\documentclass[12pt]{article} \usepackage{fullpage} \usepackage{microtype} % microtypography \usepackage{array} \usepackage{amsmath,amssymb,amsfonts} \usepackage{amsthm} %% Header \usepackage{fancyhdr} \fancyhf{} \fancyhead[C]{COMP 136 - 2020s - HW2 Submission} \fancyfoot[C]{\thepage} % page number \renewcommand\headrulewidth{0pt} \pagestyle{fancy} %% Hyperlinks always blue, no weird boxes \usepackage[hyphens]{url} \usepackage[colorlinks=true,allcolors=black,pdfborder={0 0 0}]{hyperref} %%% Doc layout \usepackage{parskip} \usepackage{times} %%% Write out problem statements in blue, solutions in black \usepackage{color} \newcommand{\officialdirections}[1]{{\color{blue} #1}} %%% Avoid automatic section numbers (we'll provide our own) \setcounter{secnumdepth}{0} \begin{document} ~~\\ %% add vert space {\Large{\bf Student Name: TODO}} {\Large{\bf Collaboration Statement:}} Turning in this assignment indicates you have abided by the course Collaboration Policy: \url{www.cs.tufts.edu/comp/136/2020s/index.html#collaboration-policy} Total hours spent: TODO I consulted the following resources: \begin{itemize} \item TODO \item TODO \item $\ldots$ \end{itemize} FYI Official instructions for all problems can be found at: \url{www.cs.tufts.edu/comp/136/2020s/hw2.html} \tableofcontents \newpage \officialdirections{ \subsection*{1a: Problem Statement} Consider the estimator $\hat{\sigma}^2$: \begin{align} \hat{\sigma}^2(x_1, \ldots x_N) = \frac{1}{N} \sum_{n=1}^N (x_n - \mu_{\text{true}})^2 \end{align} Compute the expected value of this estimator under the sampling distribution of the observations $x$. You should assume each $x_n$ are drawn i.i.d from $x_n \sim \mathcal{N}( \mu_{\text{true}}, \sigma^2_{\text{true}})$. } \subsection{1a: Solution} \newpage \officialdirections{ \subsection*{1b: Problem Statement} Given your result above, describe if the estimator is biased or unbiased. } \subsection{1b: Solution} TODO \officialdirections{ \subsection*{1c: Problem Statement} How does your answer about bias in 1b compare to what we learned about whether the maximum likelihood estimator for the variance is biased? Provide some justification. } \subsection{1c: Solution} TODO \newpage \officialdirections{ \subsection*{2a: Problem Statement} Show that the SGD estimator is an *unbiased* estimator of the whole dataset gradient. That is, show: \begin{align} \mathbb{E}_{i \sim U}[ \hat{G}(\theta, x_i) ] = G(\theta, x) \end{align} } \subsection{2a: Solution} \newpage \officialdirections{ \subsection*{2b: Problem Statement} How will the $\theta$ value estimated by whole dataset gradient descent (which has no randomness) typically compare to resulting $\theta$ from SGD? Why is it important that the SGD estimator is unbiased? } \subsection{2b: Solution} \newpage \officialdirections{ \subsection*{3a: Problem Statement} Given the log PDF below, show that $x$ has a univariate Gaussian distribution. \begin{align} \log p(x) = \text{const} - \frac{1}{2} a x^2 + bx \end{align} where $a > 0$ and $b$ is any number. } \subsection{3a: Solution} \newpage \officialdirections{ \subsection*{3b: Problem Statement} Given the log PDF below, show that vector $x$ has a multivariate Gaussian distribution. \begin{align} \log p(x) = \text{const} - \frac{1}{2} x^T A x + b^T x \end{align} where $A$ symmetric and positive definite, and $b$ any vector. } \subsection{3b: Solution} \newpage \officialdirections{ \subsection*{4a: Problem Statement} For the case $M=1$, show that we can write $S_{N+1}^{-1} = S_N^{-1} + v^2$ for some scalar value $v$. } \subsection{4a: Solution} \newpage \officialdirections{ \subsection*{4b: Problem Statement} For the case $M=1$, write an expression for $\sigma^2_{N+1} - \sigma^2_{N}$, simplifying as much as possible. } \subsection{4b: Solution} \newpage \officialdirections{ \subsection*{4c: Problem Statement} By studying the terms of your expression from **4b**, prove the following statement: $$ \sigma_{N+1}^2(x_*) \leq \sigma_N^2(x_*) $$ } \subsection{4c: Solution} \newpage \officialdirections{ \subsection*{5a: Problem Statement} Show that we can write $S_{N+1}^{-1} = S_N^{-1} + vv^T$ for some vector $v$. } \subsection{5a: Solution} \newpage \officialdirections{ \subsection*{5b: Problem Statement} \begin{align} (A + vv^T)^{-1} &= A^{-1} - \frac{ (A^{-1}v)(v^T A^{-1})}{ 1 + v^T A^{-1} v} \end{align} Substitute $A = S_N^{-1}$ and $v$ from 5a, then simplify to write an expression for $S_{N+1}$ in terms of $S_{N}$. } \subsection{5b: Solution} \officialdirections{ \subsection*{5c: Problem Statement} Show that $\sigma^2_{N+1}(x_*) - \sigma^2_{N}(x_*) = \phi(x_*)^T \left[ S_{N+1} - S_{N} \right] \phi(x_*)$ } \subsection{5c: Solution} \newpage \officialdirections{ \subsection*{5d: Problem Statement} Finally, plug your result from 5c into 5d, plus the fact that $S_N$ must be positive definite, to show that: \begin{align} \sigma_{N+1}^2(x_*) \leq \sigma_N^2(x_*), \quad \text{or equivalently} \quad \sigma_{N+1}^2(x_*) - \sigma_N^2(x_*) \leq 0 \end{align} } \subsection{5d: Solution} \end{document}