# Description: Utility functions for MMIDS # Libraries import numpy as np from numpy import linalg as LA import matplotlib.pyplot as plt import networkx as nx #seed = 535 #rng = np.random.default_rng(seed) from scipy.stats import multivariate_normal import torch # k-means clustering def opt_reps(X, k, assign): """ Calculate the representative point for each cluster. Parameters: - X (numpy.ndarray): The input data matrix of shape (n, d). - k (int): The number of clusters. assign (numpy.ndarray): The assignment array of shape (n,) where assign[i] represents the cluster assignment of data point X[i]. Returns: numpy.ndarray: The representative points for each cluster, of shape (k, d). """ (n, d) = X.shape reps = np.zeros((k, d)) for i in range(k): in_i = [j for j in range(n) if assign[j] == i] reps[i,:] = np.sum(X[in_i,:],axis=0) / len(in_i) return reps def opt_clust(X, k, reps): """ Assign the given data points to the optimal cluster. Parameters: - X: numpy array, shape (n, d), representing the data points - k: int, the number of clusters - reps: numpy array, shape (k, d), representing the initial cluster centroids Returns: - assign: numpy array, shape (n,), representing the cluster assignments for each data point """ (n, d) = X.shape dist = np.zeros(n) assign = np.zeros(n, dtype=int) for j in range(n): dist_to_i = np.array([LA.norm(X[j,:] - reps[i,:]) for i in range(k)]) assign[j] = np.argmin(dist_to_i) dist[j] = dist_to_i[assign[j]] G = np.sum(dist ** 2) print(G) # Print current objective to monitor progress return assign def kmeans(rng, X, k, maxiter=5): """ Perform k-means clustering on the given data. Parameters: - rng: numpy.random.Generator The random number generator used for initialization. - X: numpy.ndarray The input data array of shape (n, d), where n is the number of data points and d is the number of dimensions. - k: int The number of clusters to create. - maxiter: int, optional The maximum number of iterations to perform. Default is 5. Returns: - assign: numpy.ndarray The cluster assignments for each data point, represented as an array of shape (n,). """ (n, d) = X.shape assign = rng.integers(0,k,n) reps = np.zeros((k, d), dtype=int) for iter in range(maxiter): reps = opt_reps(X, k, assign) assign = opt_clust(X, k, reps) return assign # k-NN regression def knnregression(x, y, k, xnew): """ Perform k-nearest neighbors regression. Parameters: - x (array-like): The input feature values. - y (array-like): The target values. - k (int): The number of nearest neighbors to consider. - xnew (float): The new input feature value for prediction. Returns: - float: The predicted target value based on k-nearest neighbors regression. """ n = len(x) closest = np.argsort([np.absolute(x[i] - xnew) for i in range(n)]) return np.mean(y[closest[0:k]]) # Algorithms for linear systems def backsubs(R, b): """ Perform back substitution to solve the system of linear equations Rx = b. Parameters: - R (numpy.ndarray): Upper triangular matrix representing the coefficients of the linear equations. - b (numpy.ndarray): Column vector representing the constants of the linear equations. Returns: - x (numpy.ndarray): Column vector representing the solution to the system of linear equations. """ m = b.shape[0] x = np.zeros(m) for i in reversed(range(m)): x[i] = (b[i] - np.dot(R[i, i + 1:m], x[i + 1:m])) / R[i, i] return x def forwardsubs(L, b): """ Solve a lower triangular linear system using forward substitution. Parameters: L (numpy.ndarray): The lower triangular matrix of shape (m, m). b (numpy.ndarray): The right-hand side vector of shape (m,). Returns: x (numpy.ndarray): The solution vector of shape (m,). """ m = b.shape[0] x = np.zeros(m) for i in range(m): x[i] = (b[i] - np.dot(L[i, 0:i], x[0:i])) / L[i, i] return x def cholesky(B): """ Perform Cholesky decomposition on a given matrix. Parameters: B (numpy.ndarray): The input matrix. Returns: numpy.ndarray: The lower triangular matrix L such that B = LL^T. """ n = B.shape[0] L = np.zeros((n, n)) for j in range(n): L[j,0:j] = forwardsubs(L[0:j,0:j],B[j,0:j]) L[j,j] = np.sqrt(B[j,j] - LA.norm(L[j,0:j])**2) return L def ls_by_chol(A, b): """ Solves the linear least squares problem using Cholesky decomposition. Parameters: A (numpy.ndarray): The coefficient matrix. b (numpy.ndarray): The dependent variable vector. Returns: numpy.ndarray: The solution vector x that minimizes the squared Euclidean norm ||Ax - b||^2. """ L = cholesky(A.T @ A) z = forwardsubs(L, A.T @ b) return backsubs(L.T, z) def gramschmidt(A): """ Performs the Gram-Schmidt process on the given matrix A. Parameters: A (numpy.ndarray): The input matrix of shape (n, m). Returns: Q (numpy.ndarray): The orthogonal matrix Q of shape (n, m). R (numpy.ndarray): The upper triangular matrix R of shape (m, m). """ (n,m) = A.shape Q = np.zeros((n,m)) R = np.zeros((m,m)) for j in range(m): v = np.copy(A[:,j]) for i in range(j): R[i,j] = np.dot(Q[:,i], A[:,j]) v -= R[i,j]*Q[:,i] R[j,j] = LA.norm(v) Q[:,j] = v/R[j,j] return Q, R def householder(A, b): """ Performs the Householder transformation on a matrix A and a vector b. Parameters: A (numpy.ndarray): The input matrix of shape (n, m). b (numpy.ndarray): The input vector of shape (n,). Returns: R (numpy.ndarray): The transformed matrix R of shape (m, m). Qtb (numpy.ndarray): The transformed vector Qtb of shape (m,). """ n, m = A.shape R = np.copy(A) Qtb = np.copy(b) for k in range(m): # computing z y = R[k:n,k] e1 = np.zeros(n-k) e1[0] = 1 z = np.sign(y[0]) * LA.norm(y) * e1 + y z = z / LA.norm(z) # updating R R[k:n,k:m] = R[k:n,k:m] - 2 * np.outer(z, z) @ R[k:n,k:m] # updating Qtb Qtb[k:n] = Qtb[k:n] - 2 * np.outer(z, z) @ Qtb[k:n] return R[0:m,0:m], Qtb[0:m] def ls_by_qr(A, b): """ Solves a linear system of equations using QR decomposition. Parameters: A (numpy.ndarray): The coefficient matrix of the linear system. b (numpy.ndarray): The right-hand side vector of the linear system. Returns: numpy.ndarray: The solution vector x that satisfies Ax = b. """ Q, R = gramschmidt(A) return backsubs(R, Q.T @ b) # Spectral and SVD methods def topsing(rng, A, maxiter=10): """ Compute the top singular triplets of a matrix A. Parameters: rng (numpy.random.Generator): Random number generator. A (ndarray): Input matrix. maxiter (int): Maximum number of iterations for power iteration method. Default is 10. Returns: u (ndarray): Left singular vector corresponding to the largest singular value. s (float): Largest singular value. v (ndarray): Right singular vector corresponding to the largest singular value. """ x = rng.normal(0,1,np.shape(A)[1]) B = A.T @ A for _ in range(maxiter): x = B @ x v = x / LA.norm(x) s = LA.norm(A @ v) u = A @ v / s return u, s, v def svd(rng, A, l, maxiter=100): """ Perform Singular Value Decomposition (SVD) on a matrix A. Parameters: A (ndarray): Input matrix of shape (m, n). l (int): Number of singular values to compute. maxiter (int, optional): Maximum number of iterations for the algorithm. Default is 100. Returns: U (ndarray): Left singular vectors of shape (m, l). S (list): Singular values. V (ndarray): Right singular vectors of shape (n, l). """ V = rng.normal(0,1,(np.size(A,1),l)) for _ in range(maxiter): W = A @ V Z = A.T @ W V, R = gramschmidt(Z) W = A @ V S = [LA.norm(W[:, i]) for i in range(np.size(W,1))] U = np.stack([W[:,i]/S[i] for i in range(np.size(W,1))],axis=-1) return U, S, V def pca(X, l): """ Perform Principal Component Analysis (PCA) on the input data. Parameters: X (numpy.ndarray): Input data matrix of shape (n_samples, n_features). l (int): Number of principal components to keep. maxiter (int, optional): Maximum number of iterations for the SVD algorithm. Default is 100. Returns: numpy.ndarray: Transformed data matrix of shape (n_samples, l), where l is the number of principal components. """ mean = np.mean(X, axis=0) Y = X - mean U, S, Vt = LA.svd(Y, full_matrices=False) return U[:, :l] @ np.diag(S[:l]) # Data simulation def one_cluster(rng, d, n, w): """ DEPRECATED: See spherical_gaussian Generate a single cluster of data points. Parameters: - d (int): The dimensionality of the data points. - n (int): The number of data points to generate. - w (float): The weight of the first dimension in each data point. Returns: - X (ndarray): An array of shape (n, d) containing the generated data points. """ X = np.stack( [np.concatenate(([w], np.zeros(d-1))) + rng.normal(0,1,d) for _ in range(n)] ) return X def two_clusters(d, n, w): """ DEPRECATED: See two_separated_clusters Generate two clusters of data points. Parameters: - d (int): The dimensionality of the data points. - n (int): The number of data points in each cluster. - w (float): The distance between the two clusters. Returns: - X1 (list): The data points in the first cluster. - X2 (list): The data points in the second cluster. """ X1 = one_cluster(d, n, -w) X2 = one_cluster(d, n, w) return X1, X2 def spherical_gaussian(rng, d, n, mu, sig): """ Generate samples from a spherical Gaussian distribution. Parameters: - rng (numpy.random.Generator): The random number generator. - d (int): The dimensionality of the samples. - n (int): The number of samples to generate. - mu (float): The mean of the distribution. - sig (float): The standard deviation of the distribution. Returns: - X (ndarray): An array of shape (n, d) containing the generated samples. """ X = mu + sig * rng.normal(0,1,(n,d)) return X def gmm2spherical(rng, d, n, phi0, phi1, mu0, sig0, mu1, sig1): """ Generate samples from a Gaussian Mixture Model (GMM) with spherical Gaussian components. Parameters: - rng (numpy.random.Generator): The random number generator. - d (int): The dimensionality of the samples. - n (int): The number of samples to generate. - phi0 (float): The weight of the first component. - phi1 (float): The weight of the second component. - mu0 (ndarray): The mean vector of the first component. - sig0 (float): The standard deviation of the first component. - mu1 (ndarray): The mean vector of the second component. - sig1 (float): The standard deviation of the second component. Returns: - X (ndarray): The generated samples, with shape (n, d). """ # merge components into matrices phi = np.stack((phi0, phi1)) mu = np.stack((mu0, mu1)) sig = np.stack((sig0,sig1)) # initialization X = np.zeros((n,d)) # choose components of each data point, then generate samples component = rng.choice(2, size=n, p=phi) for i in range(n): X[i,:] = spherical_gaussian(rng, d, 1, mu[component[i],:], sig[component[i]]) return X def gmm2(rng, d, n, phi0, phi1, mu0, sigma0, mu1, sigma1): """ Generate samples from a Gaussian Mixture Model (GMM) with 2 components. Parameters: - rng (numpy.random.Generator): The random number generator. - d (int): The dimensionality of the samples. - n (int): The number of samples to generate. - phi0 (float): The mixing coefficient for component 0. - phi1 (float): The mixing coefficient for component 1. - mu0 (ndarray): The mean vector for component 0. - sigma0 (ndarray): The covariance matrix for component 0. - mu1 (ndarray): The mean vector for component 1. - sigma1 (ndarray): The covariance matrix for component 1. Returns: - X (ndarray): The generated samples, with shape (n, d). """ # merge components into tensors phi = np.stack((phi0, phi1)) mu = np.stack((mu0, mu1)) sigma = np.stack((sigma0,sigma1)) # initialization X = np.zeros((n,d)) # choose components of each data point, then generate samples component = rng.choice(2, size=n, p=phi) for i in range(n): X[i,:] = rng.multivariate_normal( mu[component[i],:], sigma[component[i],:,:]) return X def two_mixed_clusters(rng, d, n, w): """ Generate a dataset with two mixed clusters. Parameters: - rng (numpy.random.Generator): The random number generator. - d (int): The dimensionality of the dataset. - n (int): The number of data points to generate. - w (float): The separation between the two clusters. Returns: - ndarray: The generated dataset with shape (n, d). """ mu0 = np.hstack(([w], np.zeros(d-1))) mu1 = np.hstack(([-w], np.zeros(d-1))) return gmm2spherical(rng, d, n, 0.5, 0.5, mu0, 1, mu1, 1) def two_separate_clusters(rng, d, n, w): """ Generate two separate clusters of samples in d-dimensional space. Parameters: rng (numpy.random.Generator): The random number generator to use. d (int): The dimensionality of the samples. n (int): The number of samples to generate for each cluster. w (float): The separation between the two clusters. Returns: tuple: A tuple containing two arrays, X0 and X1, representing the samples from the first and second clusters respectively. """ mu0 = np.concatenate(([w], np.zeros(d-1))) mu1 = np.concatenate(([-w], np.zeros(d-1))) X0 = spherical_gaussian(rng, d, n, mu0, 1) X1 = spherical_gaussian(rng, d, n, mu1, 1) return X0, X1 # Spectral graph theory algorithms def cut_ratio(A, order, k): """ Calculates the cut ratio of a graph given its adjacency matrix and a vertex order. Parameters: A (numpy.ndarray): The adjacency matrix of the graph. order (list): The order of vertices in the cut. k (int): The index of the last vertex in the cut. Returns: float: The cut ratio of the graph. """ n = A.shape[0] # number of vertices edge_boundary = 0 # initialize size of edge boundary for i in range(k+1): # for all vertices before cut for j in range(k+1,n): # for all vertices after cut edge_boundary += A[order[i],order[j]] # add one if {i,j} in E denominator = np.minimum(k+1, n-k-1) return edge_boundary/denominator def spectral_cut2(A): """ Perform spectral cut on a graph represented by its adjacency matrix. Parameters: A (numpy.ndarray): The adjacency matrix of the graph. Returns: tuple: A tuple containing two numpy arrays representing the two partitions of the graph. """ n = A.shape[0] # number of vertices # laplacian degrees = A.sum(axis=1) D = np.diag(degrees) L = D - A # spectral decomposition w, v = LA.eigh(L) order = np.argsort(v[:,np.argsort(w)[1]]) # index of entries in increasing order # cut ratios phi = np.zeros(n-1) # initialize cut ratios for k in range(n-1): phi[k] = cut_ratio(A, order, k) imin = np.argmin(phi) # find best cut ratio return order[0:imin+1], order[imin+1:n] def viz_cut(G, s, pos, node_size=100, with_labels=False): """ Visualizes a cut in a graph. Parameters: - G: NetworkX graph object The graph to visualize. - s: int The source node for the cut. - pos: dict A dictionary with node positions as values. - node_size: int, optional The size of the nodes in the visualization. Default is 100. - with_labels: bool, optional Whether to show labels for the nodes. Default is False. Returns: None """ n = G.number_of_nodes() assign = np.zeros(n) assign[s] = 1 nx.draw(G, node_color=assign, pos=pos, with_labels=with_labels, cmap='spring', node_size=node_size, font_color='k') plt.show() def inhomogeneous_er_random_graph(rng, n, M): """ Generates an inhomogeneous Erdős-Rényi random graph. Parameters: - rng (numpy.random.Generator): A random number generator. - n (int): The number of nodes in the graph. - M (numpy.ndarray): An n x n matrix representing the edge probabilities between nodes. Returns: - G (networkx.Graph): The generated random graph. """ G = nx.Graph() G.add_nodes_from(range(n)) for i in range(n): for j in range(i + 1, n): if rng.random() < M[i, j]: G.add_edge(i, j) return G # Optimization algorithms def desc_update(grad_f, x, alpha): """ Performs a gradient descent update on the input variable x. Parameters: - grad_f: The gradient of the function f at x. - x: The current value of the variable. - alpha: The learning rate or step size for the update. Returns: - The updated value of x after performing the gradient descent update. """ return x - alpha*grad_f(x) def gd(f, grad_f, x0, alpha=1e-3, niters=int(1e6)): """ Performs gradient descent optimization to minimize a given function. Parameters: f (function): The objective function to be minimized. grad_f (function): The gradient function of the objective function. x0 (float or array-like): The initial point for optimization. alpha (float, optional): The learning rate or step size. Defaults to 1e-3. niters (int, optional): The maximum number of iterations. Defaults to 1e6. Returns: tuple: A tuple containing the optimized point and the value of the objective function at that point. """ xk = x0 for _ in range(niters): xk = desc_update(grad_f, xk, alpha) return xk, f(xk) # Markov chains algorithms def SamplePath(rng, mu, P, T): """ Generate a sample path from a Markov chain. Parameters: rng (numpy.random.Generator): The random number generator. mu (numpy.ndarray): The initial distribution of the Markov chain. P (numpy.ndarray): The transition matrix of the Markov chain. T (int): The length of the sample path. Returns: numpy.ndarray: The generated sample path. """ n = mu.shape[0] X = np.zeros(T+1) for i in range(T+1): if i == 0: X[i] = rng.choice(a=np.arange(start=1,stop=n+1),p=mu) else: X[i] = rng.choice(a=np.arange(start=1,stop=n+1),p=P[int(X[i-1]-1),:]) return X def transition_from_adjacency(A): """ Compute the transition matrix from an adjacency matrix. Parameters: A (numpy.ndarray): The adjacency matrix. Returns: numpy.ndarray: The transition matrix. """ n = A.shape[0] sinks = (A @ np.ones(n)) == 0. P = A.copy() np.fill_diagonal(P, sinks) out_deg = P @ np.ones(n) P = P / out_deg[:, np.newaxis] return P def add_damping(P, alpha, mu): """ Adds damping to a matrix P using the given damping factor alpha and damping matrix mu. Parameters: P (numpy.ndarray): The matrix to which damping is applied. alpha (float): The damping factor, ranging from 0 to 1. mu (numpy.ndarray): The damping matrix. Returns: numpy.ndarray: The damped matrix Q, calculated as alpha * P + (1-alpha) * mu. """ Q = alpha * P + (1-alpha) * mu return Q def pagerank(A, alpha=0.85, max_iter=100): """ Calculate the PageRank scores for a given adjacency matrix. Parameters: - A: numpy.ndarray The adjacency matrix representing the graph. - alpha: float, optional The damping factor, which determines the probability of following a link. Default is 0.85. - max_iter: int, optional The maximum number of iterations for the PageRank algorithm. Default is 100. Returns: - v: numpy.ndarray The PageRank scores for each node in the graph. """ n = A.shape[0] mu = np.ones(n)/n P = transition_from_adjacency(A) Q = add_damping(P, alpha, mu) v = mu for _ in range(max_iter): v = Q.T @ v return v def ppr(A, mu, alpha=0.85, max_iter=100): """ Calculates the Personalized PageRank (PPR) vector for a given adjacency matrix. Parameters: A (numpy.ndarray): The adjacency matrix representing the graph. mu (float): The teleportation probability. alpha (float, optional): The damping factor. Default is 0.85. max_iter (int, optional): The maximum number of iterations. Default is 100. Returns: numpy.ndarray: The PPR vector. """ n = A.shape[0] P = transition_from_adjacency(A) Q = add_damping(P, alpha, mu) v = mu for _ in range(max_iter): v = Q.T @ v return v # Probabilistic models def gaussian_pdf(X, Y, mean, cov): """ Compute the probability density function (PDF) of a 2D Gaussian distribution. Parameters: X (ndarray): X-coordinates of the grid points. Y (ndarray): Y-coordinates of the grid points. mean (ndarray): Mean vector of the Gaussian distribution. cov (ndarray): Covariance matrix of the Gaussian distribution. Returns: ndarray: The PDF values evaluated at the given grid points. """ xy = np.stack([X.flatten(), Y.flatten()], axis=-1) return multivariate_normal.pdf(xy, mean=mean, cov=cov).reshape(X.shape) def gmm2_pdf(X, Y, mean1, cov1, pi1, mean2, cov2, pi2): """ Calculate the probability density function (PDF) of a Gaussian Mixture Model (GMM) with two components. Parameters: X (ndarray): Input array of X coordinates. Y (ndarray): Input array of Y coordinates. mean1 (ndarray): Mean vector of the first Gaussian component. cov1 (ndarray): Covariance matrix of the first Gaussian component. pi1 (float): Mixing coefficient of the first Gaussian component. mean2 (ndarray): Mean vector of the second Gaussian component. cov2 (ndarray): Covariance matrix of the second Gaussian component. pi2 (float): Mixing coefficient of the second Gaussian component. Returns: ndarray: The PDF values evaluated at each (X, Y) coordinate. """ xy = np.stack([X.flatten(), Y.flatten()], axis=-1) Z1 = multivariate_normal.pdf( xy, mean=mean1, cov=cov1).reshape(X.shape) Z2 = multivariate_normal.pdf( xy, mean=mean2, cov=cov2).reshape(X.shape) return pi1 * Z1 + pi2 * Z2 def make_surface_plot(X, Y, Z): """ Create a surface plot using the given X, Y, and Z data. Parameters: X (array-like): The X-coordinates of the data points. Y (array-like): The Y-coordinates of the data points. Z (array-like): The Z-coordinates of the data points. Returns: None """ fig = plt.figure() ax = fig.add_subplot(111, projection='3d') surf = ax.plot_surface( X, Y, Z, cmap=plt.cm.viridis, antialiased=False) plt.show() def nb_fit_table(N_km, alpha=1., beta=1.): """ Fits a Naive Bayes model to a contingency table. Parameters: - N_km (ndarray): Contingency table of shape (K, M) where K is the number of classes and M is the number of features. - alpha (float): Smoothing parameter for the class probabilities. Default is 1. - beta (float): Smoothing parameter for the feature probabilities. Default is 1. Returns: - pi_k (ndarray): Maximum likelihood estimates for the class probabilities of shape (K,). - p_km (ndarray): Maximum likelihood estimates for the feature probabilities of shape (K, M). """ K, M = N_km.shape N_k = np.sum(N_km,axis=-1) N = np.sum(N_k) # MLE for pi_k's pi_k = (N_k+alpha) / (N+K*alpha) # MLE for p_km's p_km = (N_km+beta) / (N_k[:,None]+2*beta) return pi_k, p_km def nb_predict(pi_k, p_km, x, label_set): """ Predicts the label for a given input using the Naive Bayes classifier. Parameters: - pi_k (list): The prior probabilities for each class. - p_km (ndarray): The conditional probabilities for each feature given each class. - x (ndarray): The input features. - label_set (list): The set of possible labels. Returns: - predicted_label: The predicted label for the input. """ K = len(pi_k) # Computing the score for each k score_k = np.zeros(K) for k in range(K): score_k[k] += - np.log(pi_k[k]) score_k[k] += - np.sum(x * np.log(p_km[k,:]) + (1 - x)*np.log(1 - p_km[k,:])) # Computing the minimum argmin = np.argmin(score_k, axis=0) minscr = np.max(score_k, axis=0) return label_set[argmin] def responsibility(pi_k, p_km, x): """ Compute the responsibilities for each component in a mixture model. Parameters: - pi_k (array-like): The mixing coefficients for each component. - p_km (array-like): The conditional probabilities for each component. - x (array-like): The observed data. Returns: - r_k (array-like): The responsibilities for each component. """ K = len(pi_k) # Computing the score for each k score_k = np.zeros(K) for k in range(K): score_k[k] += - np.log(pi_k[k]) score_k[k] += - np.sum(x*np.log(p_km[k,:]) + (1 - x)*np.log(1 - p_km[k,:])) # Computing responsibilities for each k r_k = np.exp(-score_k)/(np.sum(np.exp(-score_k))) return r_k def update_parameters(eta_km, eta_k, eta, alpha, beta): """ Update the parameters for the MMiDS model. Parameters: - eta_km: numpy.ndarray, shape (K, M) The count matrix of the number of times each keyword m is assigned to topic k. - eta_k: numpy.ndarray, shape (K,) The count vector of the number of times each topic k is assigned to any document. - eta: float The total count of topic assignments to any document. - alpha: float The hyperparameter for the Dirichlet prior on the topic distribution. - beta: float The hyperparameter for the Dirichlet prior on the keyword distribution. Returns: - pi_k: numpy.ndarray, shape (K,) The maximum likelihood estimate of the topic distribution. - p_km: numpy.ndarray, shape (K, M) The maximum likelihood estimate of the keyword distribution. """ K = len(eta_k) # MLE for pi_k's pi_k = (eta_k+alpha) / (eta+K*alpha) # MLE for p_km's p_km = (eta_km+beta) / (eta_k[:,None]+2*beta) return pi_k, p_km def em_bern(X, K, pi_0, p_0, maxiters=10, alpha=0., beta=0.): """ Expectation-Maximization algorithm for estimating parameters of a Bernoulli Mixture Model. Parameters: - X: numpy.ndarray Input data matrix of shape (n, M), where n is the number of samples and M is the number of features. - K: int Number of mixture components. - pi_0: numpy.ndarray Initial guess for the mixing coefficients of shape (K,). - p_0: numpy.ndarray Initial guess for the Bernoulli parameters of shape (K, M). - maxiters: int, optional Maximum number of iterations for the EM algorithm. Default is 10. - alpha: float, optional Smoothing parameter for the mixing coefficients. Default is 0. - beta: float, optional Smoothing parameter for the Bernoulli parameters. Default is 0. Returns: - pi_k: numpy.ndarray Estimated mixing coefficients of shape (K,). - p_km: numpy.ndarray Estimated Bernoulli parameters of shape (K, M). """ n, M = X.shape pi_k = pi_0 p_km = p_0 for _ in range(maxiters): # E Step r_ki = np.zeros((K, n)) for i in range(n): r_ki[:, i] = responsibility(pi_k, p_km, X[i, :]) # M Step eta_km = np.zeros((K, M)) eta_k = np.sum(r_ki, axis=-1) eta = np.sum(eta_k) for k in range(K): for m in range(M): eta_km[k, m] = np.sum(X[:, m] * r_ki[k, :]) pi_k, p_km = update_parameters(eta_km, eta_k, eta, alpha, beta) return pi_k, p_km def hard_responsibility(pi_k, p_km, x): """ Compute the hard responsibilities for each cluster based on the given parameters. Parameters: - pi_k (numpy.ndarray): The probabilities of each cluster. - p_km (numpy.ndarray): The probabilities of each feature given each cluster. - x (numpy.ndarray): The observed data. Returns: - r_k (numpy.ndarray): The hard responsibilities for each cluster. """ K = len(pi_k) # Computing the score for each k score_k = np.zeros(K) for k in range(K): score_k[k] += - np.log(pi_k[k]) score_k[k] += - np.sum(x*np.log(p_km[k,:]) + (1 - x)*np.log(1 - p_km[k,:])) # Computing responsibilities for each k argmin = np.argmin(score_k, axis=0) r_k = np.zeros(K) r_k[argmin] = 1 return r_k def hard_em_bern(X, K, pi_0, p_0, maxiters=10, alpha=0., beta=0.): """ Perform hard expectation-maximization (EM) algorithm for Bernoulli mixture model. Parameters: - X: numpy.ndarray Input data matrix of shape (n, M), where n is the number of samples and M is the number of features. - K: int Number of mixture components. - pi_0: numpy.ndarray Initial mixing coefficients of shape (K,). - p_0: numpy.ndarray Initial Bernoulli parameters of shape (K, M). - maxiters: int, optional Maximum number of iterations for the EM algorithm. Default is 10. - alpha: float, optional Hyperparameter for the Dirichlet prior on mixing coefficients. Default is 0. - beta: float, optional Hyperparameter for the Beta prior on Bernoulli parameters. Default is 0. Returns: - pi_k: numpy.ndarray Estimated mixing coefficients after the EM algorithm, of shape (K,). - p_km: numpy.ndarray Estimated Bernoulli parameters after the EM algorithm, of shape (K, M). """ n, M = X.shape pi_k = pi_0 p_km = p_0 for _ in range(maxiters): # E Step r_ki = np.zeros((K, n)) for i in range(n): r_ki[:, i] = hard_responsibility(pi_k, p_km, X[i, :]) # M Step eta_km = np.zeros((K, M)) eta_k = np.sum(r_ki, axis=-1) eta = np.sum(eta_k) for k in range(K): for m in range(M): eta_km[k, m] = np.sum(X[:, m] * r_ki[k, :]) pi_k, p_km = update_parameters(eta_km, eta_k, eta, alpha, beta) return pi_k, p_km # Linear Gaussian models def lgSamplePath(rng, ss, os, F, H, Q, R, init_mu, init_Sig, T): """ Generate a sample path from a linear Gaussian state-space model. Parameters: rng (numpy.random.Generator): The random number generator. ss (int): The number of state variables. os (int): The number of observation variables. F (ndarray): The state transition matrix of shape (ss, ss). H (ndarray): The observation matrix of shape (os, ss). Q (ndarray): The state noise covariance matrix of shape (ss, ss). R (ndarray): The observation noise covariance matrix of shape (os, os). init_mu (ndarray): The initial state mean vector of shape (ss,). init_Sig (ndarray): The initial state covariance matrix of shape (ss, ss). T (int): The number of time steps. Returns: x (ndarray): The generated state path of shape (ss, T). y (ndarray): The generated observation path of shape (os, T). """ x = np.zeros((ss,T)) y = np.zeros((os,T)) x[:,0] = rng.multivariate_normal(init_mu, init_Sig) for t in range(1,T): x[:,t] = rng.multivariate_normal(F @ x[:,t-1],Q) y[:,t] = rng.multivariate_normal(H @ x[:,t],R) return x, y def kalmanUpdate(ss, A, C, Q, R, y_t, mu_prev, Sig_prev): """ Performs the Kalman update step. Args: ss (int): State size. A (ndarray): State transition matrix. C (ndarray): Observation matrix. Q (ndarray): Process noise covariance matrix. R (ndarray): Measurement noise covariance matrix. y_t (ndarray): Measurement vector at time t. mu_prev (ndarray): Previous state estimate. Sig_prev (ndarray): Previous state covariance matrix. Returns: tuple: Updated state estimate (mu_new) and state covariance matrix (Sig_new). """ mu_pred = A @ mu_prev Sig_pred = A @ Sig_prev @ A.T + Q if np.isnan(y_t[0]) or np.isnan(y_t[1]): return mu_pred, Sig_pred else: e_t = y_t - C @ mu_pred # error at time t S = C @ Sig_pred @ C.T + R Sinv = LA.inv(S) K = Sig_pred @ C.T @ Sinv # Kalman gain matrix mu_new = mu_pred + K @ e_t Sig_new = (np.diag(np.ones(ss)) - K @ C) @ Sig_pred return mu_new, Sig_new def kalmanFilter(ss, os, y, A, C, Q, R, init_mu, init_Sig, T): """ Applies the Kalman filter algorithm to estimate the hidden states of a linear dynamical system. Parameters: ss (int): The number of hidden states. os (int): The number of observed states. y (ndarray): The observed states at each time step, shape (os, T). A (ndarray): The state transition matrix, shape (ss, ss). C (ndarray): The observation matrix, shape (os, ss). Q (ndarray): The process noise covariance matrix, shape (ss, ss). R (ndarray): The observation noise covariance matrix, shape (os, os). init_mu (ndarray): The initial mean of the hidden states, shape (ss,). init_Sig (ndarray): The initial covariance matrix of the hidden states, shape (ss, ss). T (int): The number of time steps. Returns: mu (ndarray): The estimated means of the hidden states at each time step, shape (ss, T). Sig (ndarray): The estimated covariance matrices of the hidden states at each time step, shape (ss, ss, T). """ mu = np.zeros((ss, T)) Sig = np.zeros((ss, ss, T)) mu[:,0] = init_mu Sig[:,:,0] = init_Sig for t in range(1,T): mu[:,t], Sig[:,:,t] = kalmanUpdate(ss, A, C, Q, R, y[:,t], mu[:,t-1], Sig[:,:,t-1]) return mu, Sig # Gibbs sampling for RBMs def sigmoid(z): """ Compute the sigmoid function. Parameters: z (float or array-like): The input value(s) to the sigmoid function. Returns: float or array-like: The output value(s) of the sigmoid function. """ return 1 / (1 + np.exp(-z)) def rbm_mean_hidden(v, W, c): """ Computes the mean activation of hidden units in a Restricted Boltzmann Machine (RBM). Parameters: v (numpy.ndarray): Input vector of visible units. W (numpy.ndarray): Weight matrix connecting visible and hidden units. c (numpy.ndarray): Bias vector for hidden units. Returns: numpy.ndarray: Mean activation of hidden units. """ return sigmoid(W @ v + c.reshape(len(c),1)) def rbm_mean_visible(h, W, b): """ Computes the mean of the visible units in a Restricted Boltzmann Machine (RBM). Parameters: h (numpy.ndarray): Hidden units values. W (numpy.ndarray): Weight matrix connecting hidden and visible units. b (numpy.ndarray): Bias vector for the visible units. Returns: numpy.ndarray: Mean of the visible units. """ return sigmoid(W.T @ h + b.reshape(len(b),1)) def rbm_gibbs_update(rng, v, W, b, c): """ Performs one Gibbs sampling update step for a Restricted Boltzmann Machine (RBM). Args: rng (numpy.random.Generator): Random number generator. v (ndarray): Visible units of the RBM. W (ndarray): Weight matrix connecting visible and hidden units. b (ndarray): Bias vector for the visible units. c (ndarray): Bias vector for the hidden units. Returns: ndarray: Updated visible units after one Gibbs sampling step. """ p_hidden = rbm_mean_hidden(v, W, c) h = rng.binomial(1, p_hidden, p_hidden.shape) p_visible = rbm_mean_visible(h, W, b) v = rng.binomial(1, p_visible, p_visible.shape) return v def rbm_gibbs_sampling(rng, k, v_0, W, b, c): """ Perform k steps of Gibbs sampling in a Restricted Boltzmann Machine (RBM). Parameters: rng (object): The random number generator object. k (int): The number of Gibbs sampling steps to perform. v_0 (array-like): The initial visible layer state. W (array-like): The weight matrix of the RBM. b (array-like): The bias vector of the hidden layer. c (array-like): The bias vector of the visible layer. Returns: array-like: The final visible layer state after k steps of Gibbs sampling. """ counter = 0 v = v_0 while counter < k: v = rbm_gibbs_update(rng, v, W, b, c) counter += 1 return v def plot_imgs(z, n_imgs, nx_pixels, ny_pixels): """ Plot a grid of images. Parameters: - z: numpy array of shape (n_imgs, nx_pixels * ny_pixels) The array of images to be plotted. - n_imgs: int The number of images to be plotted. - nx_pixels: int The number of pixels in the x-axis of each image. - ny_pixels: int The number of pixels in the y-axis of each image. """ nx_imgs = np.floor(np.sqrt(n_imgs)) ny_imgs = np.ceil(np.sqrt(n_imgs)) plt.figure(figsize=(8, 8)) for i, comp in enumerate(z): plt.subplot(int(nx_imgs), int(ny_imgs), i + 1) plt.imshow(comp.reshape((nx_pixels, ny_pixels)), cmap=plt.cm.gray_r) plt.xticks([]) plt.yticks([]) plt.show() # PyTorch and neural networks def train(dataloader, model, loss_fn, optimizer, device): """ Trains the model using the given dataloader, loss function, optimizer, and device. Args: dataloader (torch.utils.data.DataLoader): The dataloader containing the training data. model (torch.nn.Module): The model to be trained. loss_fn (torch.nn.Module): The loss function used to compute the prediction error. optimizer (torch.optim.Optimizer): The optimizer used for backpropagation. device (torch.device): The device on which the training will be performed. Returns: None """ size = len(dataloader.dataset) model.train() for batch, (X, y) in enumerate(dataloader): X, y = X.to(device), y.to(device) pred = model(X) loss = loss_fn(pred, y) optimizer.zero_grad() loss.backward() optimizer.step() def training_loop(train_loader, model, loss_fn, optimizer, device, epochs=3): """ Function to perform the training loop for a given number of epochs. Args: train_loader (torch.utils.data.DataLoader): The data loader for the training dataset. model (torch.nn.Module): The model to be trained. loss_fn (torch.nn.Module): The loss function to be used. optimizer (torch.optim.Optimizer): The optimizer to be used for updating the model parameters. device (torch.device): The device on which the training will be performed. epochs (int, optional): The number of epochs to train for. Defaults to 3. """ for epoch in range(epochs): train(train_loader, model, loss_fn, optimizer, device) print(f"Epoch {epoch+1}/{epochs}") def test(dataloader, model, loss_fn, device): """ Function to evaluate the performance of a model on a test dataset. Args: dataloader (torch.utils.data.DataLoader): The data loader for the test dataset. model (torch.nn.Module): The model to be evaluated. device (torch.device): The device on which the model and data should be loaded. Returns: None """ size = len(dataloader.dataset) correct = 0 model.eval() with torch.no_grad(): for X, y in dataloader: X, y = X.to(device), y.to(device) pred = model(X) correct += (pred.argmax(dim=1) == y).type(torch.float).sum().item() print(f"Test error: {(100*(correct / size)):>0.1f}% accuracy") def FashionMNIST_get_class_name(label): class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"] return class_names[label]