# -*- fill-column: 80; org-confirm-babel-evaluate: nil -*- #+TITLE: Assignment 13, Data-Structures #+AUTHOR: Oleg Sivokon #+EMAIL: olegsivokon@gmail.com #+DATE: <2016-04-09 Sat> #+DESCRIPTION: Third assignment in the course Data-Structures #+KEYWORDS: Data-Structures, Algorithms, Assignment #+LANGUAGE: en #+LaTeX_CLASS: article #+LATEX_HEADER: \usepackage{commath} #+LATEX_HEADER: \usepackage{pgf} #+LATEX_HEADER: \usepackage{tikz} #+LATEX_HEADER: \usetikzlibrary{shapes,backgrounds} #+LATEX_HEADER: \usepackage{marginnote} #+LATEX_HEADER: \usepackage{listings} #+LATEX_HEADER: \usepackage{enumerate} #+LATEX_HEADER: \usepackage{algpseudocode} #+LATEX_HEADER: \usepackage{algorithm} #+LATEX_HEADER: \usepackage{mathtools} #+LATEX_HEADER: \setlength{\parskip}{16pt plus 2pt minus 2pt} #+LATEX_HEADER: \renewcommand{\arraystretch}{1.6} #+BEGIN_SRC emacs-lisp :exports none (setq org-latex-pdf-process '("latexmk -pdflatex='pdflatex -shell-escape -interaction nonstopmode' -pdf -f %f") org-latex-listings t org-src-fontify-natively t org-babel-latex-htlatex "htlatex") (defmacro by-backend (&rest body) `(progn (cl-case org-export-current-backend ,@body))) ;; (defmacro by-backend (&rest body) ;; `(cl-case (when (boundp 'backend) ;; (org-export-backend-name backend)) ;; ,@body)) #+END_SRC #+RESULTS: : by-backend #+BEGIN_LATEX \definecolor{codebg}{rgb}{0.96,0.99,0.8} \definecolor{codestr}{rgb}{0.46,0.09,0.2} \lstset{% backgroundcolor=\color{codebg}, basicstyle=\ttfamily\scriptsize, breakatwhitespace=false, breaklines=false, captionpos=b, framexleftmargin=10pt, xleftmargin=10pt, framerule=0pt, frame=tb, keepspaces=true, keywordstyle=\color{blue}, showspaces=false, showstringspaces=false, showtabs=false, stringstyle=\color{codestr}, tabsize=2 } \lstnewenvironment{maxima}{% \lstset{% backgroundcolor=\color{codebg}, escapeinside={(*@}{@*)}, aboveskip=20pt, captionpos=b, label=, caption=, showstringspaces=false, frame=single, framerule=0pt, basicstyle=\ttfamily\scriptsize, columns=fixed}}{} } \makeatletter \newcommand{\verbatimfont}[1]{\renewcommand{\verbatim@font}{\ttfamily#1}} \makeatother \verbatimfont{\small}% \clearpage #+END_LATEX * Problems ** Problem 1 1. How many comparisons does the =partition= algorithm have to make before it completes on an input of size $n$, provided the input is sorted in ascending order? 2. What if the input is sorted in descending order? *** Answer 1 It doesn't matter if input is sorted, or that it has duplicates. =partition= will do $n-1$ comparisons (provided it selects the first element to be the pivot element). This is so because it has to compare each element to the pivot element, but there is no need to compare them more than once. ** Problem 2 Array is said to be almost sorted with error of size $k$ when for any two elements it holds that if there is more than $k$ elements between them, then they are in correct order. 1. Modify =quicksort= algorithm to produce almost sorted arrays. 2. What is the running time of this new algorithm? *** Answer 2 1. The modification is as follows: before we proceed to partition a slice of array we check that its length is greater than $k$. This gives us the running time of $O(\lg(n-k)n)$. Partitioning of larger slices of array ensures that $\forall e \in A[x\dots x+k]: e \leq A[x+k+1\dots]$, but this is exactly our requirement. 2. As suggested above, the running time is $O(\lg(n-k)n)$ since we skip the processing of the subtrees at the depth greater than $n-k$. Reference implementation is given below: #+BEGIN_SRC C pair three_way_partition(array partitioned, comparison_fn_t cmp) { time_t t; size_t lidx, ridx, i, mid = 1; printable* left; printable* right; pair result = make_pair(); int default_val = 0; if (partitioned->length <= 1) { result->first = int_element_generator(&default_val); result->last = int_element_generator(&default_val); return result; } srand((unsigned)time(&t)); lidx = rand() % (partitioned->length / 2); ridx = (partitioned->length / 2) + rand() % (partitioned->length / 2); left = partitioned->elements[lidx]; right = partitioned->elements[ridx]; if (cmp(&left, &right) > 0) { swap(partitioned, lidx, ridx); left = partitioned->elements[lidx]; right = partitioned->elements[ridx]; } swap(partitioned, 0, lidx); swap(partitioned, partitioned->length - 1, ridx); lidx = 1; ridx = partitioned->length - 1; for (i = 0; i < partitioned->length - 2; i++) { if (cmp(&partitioned->elements[mid], &left) < 0) { swap(partitioned, lidx, mid); lidx++; mid++; } else if ((cmp(&partitioned->elements[mid], &right) < 0)) { mid++; } else { swap(partitioned, ridx - 1, mid); ridx--; } } int lidx_int = (int)lidx; int ridx_int = (int)ridx; result->first = int_element_generator(&lidx_int); result->last = int_element_generator(&ridx_int); return result; } #+END_SRC #+BEGIN_SRC C void quicksort(array unsorted, comparison_fn_t cmp, size_t error) { if (unsorted->length <= error) return; switch (unsorted->length) { case 1: break; case 2: if (cmp(&unsorted->elements[0], &unsorted->elements[1]) > 0) swap(unsorted, 0, 1); break; default: { pair walls = three_way_partition(unsorted, cmp); int low = *(int*)walls->first->val; int high = *(int*)walls->last->val; if (low > 1) quicksort(slice(unsorted, 0, (size_t)low), cmp, error); if (low + 1 < high) quicksort( slice(unsorted, (size_t)low, (size_t)high), cmp, error); if (high + 1 < unsorted->length) quicksort(slice(unsorted, (size_t)high, unsorted->length), cmp, error); } } } #+END_SRC ** Problem 3 Show the steps taken by the algorithm =randomized-select= on the sequence $S = (60, 70, 80, 90, 100, 1, 5, 9, 11, 15, 19, 21, 25, 29, 30)$ for $k = 5$. *** Answer 3 This algorithm will use the same =random-partition= procedure used in =quicksort=: 1. $\textit{random-partition}(S, S_1, S_2)$, randomly selected 19, hence: $S_1 = (1, 5, 9, 11, 15, 19)$ and $S_2 = (60, 70, 80, 90, 100, 21, 25, 29, 30)$. 2. Since $\abs{S_1} = 6 > k$ the $k^{th}$ element must be in $S_1$, recure on it. 3. $\textit{random-partition}(S_1, S_3, S_4)$, randomly select 5, hence: $S_3 = (1, 5)$ and $S_4 = (9, 11, 15, 19)$. 4. Since $\abs{S_3} = 2 < k$, decrease $k$ by 2 and recurse on $S_4$: 5. $k = k - 2$. 6. $\textit{random-partition}(S_4, S_5, S_6)$, randomly select 11, hence: $S_5 = (9, 11)$ and $S_6 = (15, 19)$. 7. Since $\abs{S_5} = 2 = k$ we are done, the $k^{th}$ element therefore is 11. ** Problem 4 1. Prove that a sequence $S$ of length $n$ has at most three elements that repeat more than $\lfloor\frac{n}{4}\rfloor$ times. 2. Write an algorithm finding all elements which repeat more than $\lfloor\frac{n}{4}\rfloor$ times. Running time $O(n)$. *** Answer 4 Assume for contradiction that there are four elements in $S$ that repeat $\lfloor\frac{n}{4}\rfloor$ times. That is exist fourth element in the sequence that repeats $\lfloor\frac{n}{4} + 1\rfloor$ times. Note that $3\lfloor\frac{n}{4}\rfloor + \lfloor\frac{n}{4} + 1\rfloor > n$ contrary to assumed. Hence, by contracition, there are at most three such elements. *** Answer 5 Simple solution involves using hash-table: loop over the sequence, using elements as keys in the hash-table. Increment the key whenever you encounter the element. Finally, loop over the hash-table to collect all those for which the condition holds. This can be improved by using $\lfloor\frac{n}{4}\rfloor$ hash-tables, first for the elements which have been seen just once, second---for the elements seen twice and so on. Thus, instead of icnrementing the counter, the elements would be promoted to the last hash-table. This removes the requirement of the final loop, but doesn't change the running time significantly. A more complicated way to do this, without using hash-tables is to do the following: 1. Partition in three parts using the same partition procedure used in =quick-sort= algorithm. If a partition is smaller than $\lfloor\frac{n}{4}\rfloor$, throw it away and repeat. 2. At this point, the following could happen: - You have three partitions each containing the same element---you are done. - Further partitioning is impossible: no such elements exist---you are done. - Some partitions only contain same elements (store the element, throw away the partition), others contain different element: throw them away (they aren't candidates). ** Problem 5 1. Given a sequence of real numbers: $(a_0, a_1, a_2, \dots, a_n)$ define: #+HEADER: :exports results #+HEADER: :results (by-backend (pdf "latex") (t "raw")) #+BEGIN_SRC latex \begin{align*} m &= \min\{a_i\;|\; 0 \leq i \leq n\} \\ M &= \max\{a_i\;|\; 0 \leq i \leq n\} \end{align*} #+END_SRC Show that there exist $a_i, a_j$ s.t.: #+HEADER: :exports results #+HEADER: :results (by-backend (pdf "latex") (t "raw")) #+BEGIN_SRC latex \begin{align*} \abs{a_i - a_j} &\leq \frac{M - m}{n} \end{align*} #+END_SRC 2. Write the algorithm for finding the $a_i, a_j$ from the question above. *** Answer 5 Note that the sequence with the given $M$ and $m$ will have $M - m = k \geq n$ elements. Suppose now we arrange the elements in increasing order. Suppose, for contradiction, that none of the elements of the sequence satisfies $\abs{a_i - a_j} \leq \frac{M - m}{n}$, then it also means that the difference between every two adjacent elements must be at least $\frac{M - m + \epsilon}{n}$ for some positive $\epsilon$. since there are k pairs of adjoint elements, we get that: #+HEADER: :exports results #+HEADER: :results (by-backend (pdf "latex") (t "raw")) #+BEGIN_SRC latex \begin{align*} \sum_{i=1}^k\left(a_i - a_{i-1}\right) &= \sum_{i=1}^k\left(\frac{M - m + \epsilon}{n}\right) \\ &= \frac{k}{n}\left(M - m\right) + k \epsilon \geq M - m \end{align*} #+END_SRC contrary to assumed. Hence, by contradiction, there must be at least one pair $a_i, a_j$ s.t. $\abs{a_i - a_j} \leq \frac{M - m}{n}$. *** Answer 6 The general idea for the algorithm is to normalize all members of the given sequence by subtracting the minimum and mutliplying by the ratio of one less than the length of the sequence and the difference between the maximum and the minumum. Once done, do the insertion sort: two elements which fall in the same cell will have a distance between them less than the one between the minimum and the maximum divided into one less than the number of elements. #+HEADER: :exports results #+HEADER: :results (by-backend (pdf "latex") (t "raw")) #+BEGIN_SRC latex \begin{algorithm} \caption{Find $x, y \in Elts$ s.t. $\abs{x - y} \leq \frac{\max(Elts) - \min(Elts)}{\abs{Elts} - 1}$} \begin{algorithmic} \Procedure {$\textit{min-pair}$}{$elements$} \State \Call {$max \leftarrow \textit{max}$}{$elements$} \State \Call {$min \leftarrow \textit{min}$}{$elements$} \State \Call {$size \leftarrow \textit{size}$}{$elements$} \State \Call {$copy \leftarrow \textit{make-vector}$}{$size, nil$} \For {$element \in elements$} \Do \State {$index \leftarrow \lfloor \frac{(elt - min) \times (size - 1)}{max - min} \rfloor$} \If {$copy_{index} = nil$} \Then \State {$copy_{index} = element$} \Else \State \Return {$element, copy_{index}$} \EndIf \EndFor \EndProcedure \end{algorithmic} \end{algorithm} #+END_SRC