# -*- fill-column: 80; org-confirm-babel-evaluate: nil -*-

#+TITLE:     Assignment 13, Data-Structures
#+AUTHOR:    Oleg Sivokon
#+EMAIL:     olegsivokon@gmail.com
#+DATE:      <2016-04-09 Sat>
#+DESCRIPTION: Third assignment in the course Data-Structures
#+KEYWORDS: Data-Structures, Algorithms, Assignment
#+LANGUAGE: en
#+LaTeX_CLASS: article
#+LATEX_HEADER: \usepackage{commath}
#+LATEX_HEADER: \usepackage{pgf}
#+LATEX_HEADER: \usepackage{tikz}
#+LATEX_HEADER: \usetikzlibrary{shapes,backgrounds}
#+LATEX_HEADER: \usepackage{marginnote}
#+LATEX_HEADER: \usepackage{listings}
#+LATEX_HEADER: \usepackage{enumerate}
#+LATEX_HEADER: \usepackage{algpseudocode}
#+LATEX_HEADER: \usepackage{algorithm}
#+LATEX_HEADER: \usepackage{mathtools}
#+LATEX_HEADER: \setlength{\parskip}{16pt plus 2pt minus 2pt}
#+LATEX_HEADER: \renewcommand{\arraystretch}{1.6}

#+BEGIN_SRC emacs-lisp :exports none
  (setq org-latex-pdf-process
        '("latexmk -pdflatex='pdflatex -shell-escape -interaction nonstopmode' -pdf -f %f")
        org-latex-listings t
        org-src-fontify-natively t
        org-babel-latex-htlatex "htlatex")

  (defmacro by-backend (&rest body)
    `(progn
       (cl-case org-export-current-backend ,@body)))

  ;; (defmacro by-backend (&rest body)
  ;;   `(cl-case (when (boundp 'backend)
  ;;               (org-export-backend-name backend))
  ;;      ,@body))
#+END_SRC

#+RESULTS:
: by-backend

#+BEGIN_LATEX
\definecolor{codebg}{rgb}{0.96,0.99,0.8}
\definecolor{codestr}{rgb}{0.46,0.09,0.2}
\lstset{%
  backgroundcolor=\color{codebg},
  basicstyle=\ttfamily\scriptsize,
  breakatwhitespace=false,
  breaklines=false,
  captionpos=b,
  framexleftmargin=10pt,
  xleftmargin=10pt,
  framerule=0pt,
  frame=tb,
  keepspaces=true,
  keywordstyle=\color{blue},
  showspaces=false,
  showstringspaces=false,
  showtabs=false,
  stringstyle=\color{codestr},
  tabsize=2
}
\lstnewenvironment{maxima}{%
  \lstset{%
    backgroundcolor=\color{codebg},
    escapeinside={(*@}{@*)},
    aboveskip=20pt,
    captionpos=b,
    label=,
    caption=,
    showstringspaces=false,
    frame=single,
    framerule=0pt,
    basicstyle=\ttfamily\scriptsize,
    columns=fixed}}{}
}
\makeatletter
\newcommand{\verbatimfont}[1]{\renewcommand{\verbatim@font}{\ttfamily#1}}
\makeatother
\verbatimfont{\small}%
\clearpage
#+END_LATEX

* Problems

** Problem 1
   1. How many comparisons does the =partition= algorithm have to make before it
      completes on an input of size $n$, provided the input is sorted in
      ascending order?
   2. What if the input is sorted in descending order?

*** Answer 1
    It doesn't matter if input is sorted, or that it has duplicates.
    =partition= will do $n-1$ comparisons (provided it selects the first element
    to be the pivot element).  This is so because it has to compare each element
    to the pivot element, but there is no need to compare them more than once.

** Problem 2
   Array is said to be almost sorted with error of size $k$ when for any two
   elements it holds that if there is more than $k$ elements between them,
   then they are in correct order.

   1. Modify =quicksort= algorithm to produce almost sorted arrays.
   2. What is the running time of this new algorithm?

*** Answer 2
    1. The modification is as follows: before we proceed to partition a slice of
       array we check that its length is greater than $k$.  This gives us the
       running time of $O(\lg(n-k)n)$.  Partitioning of larger slices of
       array ensures that $\forall e \in A[x\dots x+k]: e \leq A[x+k+1\dots]$,
       but this is exactly our requirement.
    2. As suggested above, the running time is $O(\lg(n-k)n)$ since we skip
       the processing of the subtrees at the depth greater than $n-k$.
       
    Reference implementation is given below:

    #+BEGIN_SRC C
      pair three_way_partition(array partitioned, comparison_fn_t cmp) {
          time_t t;
          size_t lidx, ridx, i, mid = 1;
          printable* left;
          printable* right;
          pair result = make_pair();
          int default_val = 0;

          if (partitioned->length <= 1) {
              result->first = int_element_generator(&default_val);
              result->last = int_element_generator(&default_val);
              return result;
          }
          srand((unsigned)time(&t));
          lidx = rand() % (partitioned->length / 2);
          ridx = (partitioned->length / 2) +
                 rand() % (partitioned->length / 2);
          left = partitioned->elements[lidx];
          right = partitioned->elements[ridx];
          if (cmp(&left, &right) > 0) {
              swap(partitioned, lidx, ridx);
              left = partitioned->elements[lidx];
              right = partitioned->elements[ridx];
          }
          swap(partitioned, 0, lidx);
          swap(partitioned, partitioned->length - 1, ridx);
          lidx = 1;
          ridx = partitioned->length - 1;
          for (i = 0; i < partitioned->length - 2; i++) {
              if (cmp(&partitioned->elements[mid], &left) < 0) {
                  swap(partitioned, lidx, mid);
                  lidx++;
                  mid++;
              } else if ((cmp(&partitioned->elements[mid], &right) < 0)) {
                  mid++;
              } else {
                  swap(partitioned, ridx - 1, mid);
                  ridx--;
              }
          }
          int lidx_int = (int)lidx;
          int ridx_int = (int)ridx;
          result->first = int_element_generator(&lidx_int);
          result->last = int_element_generator(&ridx_int);
          return result;
      }
    #+END_SRC

    #+BEGIN_SRC C
      void quicksort(array unsorted, comparison_fn_t cmp, size_t error) {
          if (unsorted->length <= error) return;
          switch (unsorted->length) {
              case 1: break;
              case 2:
                  if (cmp(&unsorted->elements[0],
                          &unsorted->elements[1]) > 0)
                      swap(unsorted, 0, 1);
                  break;
              default: {
                  pair walls = three_way_partition(unsorted, cmp);
                  int low = *(int*)walls->first->val;
                  int high = *(int*)walls->last->val;
                  if (low > 1)
                      quicksort(slice(unsorted, 0, (size_t)low),
                                cmp, error);
                  if (low + 1 < high)
                      quicksort(
                          slice(unsorted, (size_t)low, (size_t)high),
                          cmp, error);
                  if (high + 1 < unsorted->length)
                      quicksort(slice(unsorted, (size_t)high,
                                      unsorted->length), cmp, error);
              }
          }
      }
    #+END_SRC

** Problem 3
   Show the steps taken by the algorithm =randomized-select= on the sequence
   $S = (60, 70, 80, 90, 100, 1, 5, 9, 11, 15, 19, 21, 25, 29, 30)$ for $k = 5$.

*** Answer 3
    This algorithm will use the same =random-partition= procedure used in
    =quicksort=:
    1. $\textit{random-partition}(S, S_1, S_2)$, randomly selected 19, hence:
       $S_1 = (1, 5, 9, 11, 15, 19)$ and 
       $S_2 = (60, 70, 80, 90, 100, 21, 25, 29, 30)$.
    2. Since $\abs{S_1} = 6 > k$ the $k^{th}$ element must be in $S_1$, recure
       on it.
    3. $\textit{random-partition}(S_1, S_3, S_4)$, randomly select 5, hence:
       $S_3 = (1, 5)$ and
       $S_4 = (9, 11, 15, 19)$.
    4. Since $\abs{S_3} = 2 < k$, decrease $k$ by 2 and recurse on $S_4$:
    5. $k = k - 2$.
    6. $\textit{random-partition}(S_4, S_5, S_6)$, randomly select 11, hence:
       $S_5 = (9, 11)$ and
       $S_6 = (15, 19)$.
    7. Since $\abs{S_5} = 2 = k$ we are done, the $k^{th}$ element therefore
       is 11.

** Problem 4
   1. Prove that a sequence $S$ of length $n$ has at most three elements that
      repeat more than $\lfloor\frac{n}{4}\rfloor$ times.
   2. Write an algorithm finding all elements which repeat more than
      $\lfloor\frac{n}{4}\rfloor$ times.  Running time $O(n)$.

*** Answer 4
    Assume for contradiction that there are four elements in $S$ that repeat
    $\lfloor\frac{n}{4}\rfloor$ times.  That is exist fourth element in the
    sequence that repeats $\lfloor\frac{n}{4} + 1\rfloor$ times.  Note that
    $3\lfloor\frac{n}{4}\rfloor + \lfloor\frac{n}{4} + 1\rfloor > n$ contrary to
    assumed.  Hence, by contracition, there are at most three such elements.

*** Answer 5
    Simple solution involves using hash-table: loop over the sequence, using
    elements as keys in the hash-table.  Increment the key whenever you
    encounter the element.  Finally, loop over the hash-table to collect all
    those for which the condition holds.

    This can be improved by using $\lfloor\frac{n}{4}\rfloor$ hash-tables, first
    for the elements which have been seen just once, second---for the elements
    seen twice and so on.  Thus, instead of icnrementing the counter, the
    elements would be promoted to the last hash-table.  This removes the
    requirement of the final loop, but doesn't change the running time
    significantly.

    A more complicated way to do this, without using hash-tables is to do the
    following:
    1. Partition in three parts using the same partition procedure used in
       =quick-sort= algorithm.  If a partition is smaller than
       $\lfloor\frac{n}{4}\rfloor$, throw it away and repeat.
    2. At this point, the following could happen:
       - You have three partitions each containing the same element---you are
         done.
       - Further partitioning is impossible: no such elements exist---you are
         done.
       - Some partitions only contain same elements (store the element, throw
         away the partition), others contain different element: throw them away
         (they aren't candidates).

** Problem 5
   1. Given a sequence of real numbers: $(a_0, a_1, a_2, \dots, a_n)$ define:
      #+HEADER: :exports results
      #+HEADER: :results (by-backend (pdf "latex") (t "raw"))
      #+BEGIN_SRC latex
        \begin{align*}
          m &= \min\{a_i\;|\; 0 \leq i \leq n\} \\
          M &= \max\{a_i\;|\; 0 \leq i \leq n\}
        \end{align*}
      #+END_SRC

      Show that there exist $a_i, a_j$ s.t.:
      #+HEADER: :exports results
      #+HEADER: :results (by-backend (pdf "latex") (t "raw"))
      #+BEGIN_SRC latex
        \begin{align*}
          \abs{a_i - a_j} &\leq \frac{M - m}{n}
        \end{align*}
      #+END_SRC

   2. Write the algorithm for finding the $a_i, a_j$ from the question above.

*** Answer 5
    Note that the sequence with the given $M$ and $m$ will have $M - m = k \geq
    n$ elements.  Suppose now we arrange the elements in increasing order.
    Suppose, for contradiction, that none of the elements of the sequence
    satisfies $\abs{a_i - a_j} \leq \frac{M - m}{n}$, then it also means that
    the difference between every two adjacent elements must be at least
    $\frac{M - m + \epsilon}{n}$ for some positive $\epsilon$.  since there are
    k pairs of adjoint elements, we get that:

    #+HEADER: :exports results
    #+HEADER: :results (by-backend (pdf "latex") (t "raw"))
    #+BEGIN_SRC latex
      \begin{align*}
        \sum_{i=1}^k\left(a_i - a_{i-1}\right) &= \sum_{i=1}^k\left(\frac{M - m + \epsilon}{n}\right) \\
        &= \frac{k}{n}\left(M - m\right) + k \epsilon \geq M - m
      \end{align*}
    #+END_SRC

    contrary to assumed.  Hence, by contradiction, there must be at least one
    pair $a_i, a_j$ s.t. $\abs{a_i - a_j} \leq \frac{M - m}{n}$.
    
*** Answer 6
    The general idea for the algorithm is to normalize all members of the given
    sequence by subtracting the minimum and mutliplying by the ratio of one less
    than the length of the sequence and the difference between the maximum and
    the minumum.  Once done, do the insertion sort: two elements which fall in
    the same cell will have a distance between them less than the one between
    the minimum and the maximum divided into one less than the number of elements.

    #+HEADER: :exports results
    #+HEADER: :results (by-backend (pdf "latex") (t "raw"))
    #+BEGIN_SRC latex
      \begin{algorithm}
        \caption{Find $x, y \in Elts$ s.t. $\abs{x - y} \leq \frac{\max(Elts) - \min(Elts)}{\abs{Elts} - 1}$}
        \begin{algorithmic}
          \Procedure {$\textit{min-pair}$}{$elements$}
          \State \Call {$max \leftarrow \textit{max}$}{$elements$}
          \State \Call {$min \leftarrow \textit{min}$}{$elements$}
          \State \Call {$size \leftarrow \textit{size}$}{$elements$}
          \State \Call {$copy \leftarrow \textit{make-vector}$}{$size, nil$}
          \For {$element \in elements$} \Do
          \State {$index \leftarrow \lfloor \frac{(elt - min) \times (size - 1)}{max - min} \rfloor$}
          \If {$copy_{index} = nil$} \Then
          \State {$copy_{index} = element$}
          \Else
          \State \Return {$element, copy_{index}$}
          \EndIf
          \EndFor
          \EndProcedure
        \end{algorithmic}
      \end{algorithm}
    #+END_SRC