{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Copy and paste from\n", "# https://towardsdatascience.com/visualising-high-dimensional-datasets-using-pca-and-t-sne-in-python-8ef87e7915b\n", "# with trivial modifications\n", "#\n", "# Author: Nobody in Computer Vision\n", "# Date: 2022-03-04\n", "# \n", "# Contacts:\n", "# Company: Longer Vision Technology\n", "# Email: jiapei@longervision.com\n", "# Website: https://www.longervision.com" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from __future__ import print_function\n", "import time\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.datasets import fetch_openml\n", "from sklearn.decomposition import PCA\n", "from sklearn.manifold import TSNE\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits.mplot3d import Axes3D\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(70000, 784) (70000,)\n" ] } ], "source": [ "mnist = fetch_openml(\"mnist_784\")\n", "X = mnist.data / 255.0\n", "y = mnist.target\n", "print(X.shape, y.shape)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Size of the dataframe: (70000, 786)\n" ] } ], "source": [ "feat_cols = [ 'pixel'+str(i+1) for i in range(X.shape[1]) ]\n", "df = pd.DataFrame(X,columns=feat_cols)\n", "df['y'] = list(map(int, y))\n", "df['label'] = y.apply(lambda i: str(i))\n", "print('Size of the dataframe: {}'.format(df.shape))\n", "X, y = None, None" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# For reproducability of the results\n", "np.random.seed(42)\n", "rndperm = np.random.permutation(df.shape[0])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "