{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.manifold import TSNE\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import pickle\n",
"import plotly.express as px\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_pickle(\"./imdb.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"mat = np.matrix([x for x in df.embeddings])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Lower dim data has shape (6012, 2)\n"
]
}
],
"source": [
"tsne_model = TSNE(n_components=2, n_jobs=-1, random_state=42)\n",
"low_dim_data = tsne_model.fit_transform(mat)\n",
"\n",
"print('Lower dim data has shape',low_dim_data.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# color scheme can be DEFINITELY improved.\n",
"\n",
"tsne_df = pd.DataFrame(np.column_stack((low_dim_data, df.layers)), columns=['x', 'y', 'layers'])\n",
"tsne_df['layers'] = tsne_df['layers'].astype(str).replace('\\.0', '', regex=True)\n",
"\n",
"fig = px.scatter(\n",
" tsne_df, x=tsne_df['x'], y=tsne_df['y'],\n",
" color=tsne_df.layers, labels={'color': 'layers'}\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"hovertemplate": "x=%{x}
y=%{y}
layers=%{marker.color}