{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np; np.random.seed(31)\n", "import os \n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns; sns.set()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "dataset_path = os.path.join(os.getcwd(), 'dataset', 'pickles')\n", "\n", "# Load the data\n", "\n", "filenames = os.listdir(dataset_path)\n", "data = {}\n", "for filename in filenames :\n", " data[filename.split('.')[0]] = pd.read_pickle(os.path.join(dataset_path, filename))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1696\n", "GeostarFill-Regular\n", "DaiBannaSIL-BoldItalic\n", "BonheurRoyale-Regular\n", "CinzelDecorative-Regular\n", "Kanit-ExtraBold\n" ] } ], "source": [ "font_names = list(data.keys())\n", "print(len(font_names), *np.random.choice(font_names, 5), sep='\\n')\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BraahOne-Regular\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(26, 28, 28)\n" ] } ], "source": [ "\n", "font = np.random.choice(font_names)\n", "print(font)\n", "\n", "# Plot the data\n", "\n", "def plot_data(font) :\n", "\n", " # Get the data\n", " df = data[font]\n", " images = df['images']\n", "\n", " # for each image in the images, plot it\n", " for i in range(len(images)) :\n", " plt.subplot(1, len(images), i+1)\n", " plt.imshow(images[i], cmap='gray')\n", " plt.axis('off')\n", " plt.show()\n", " images = np.array(images)\n", " print(images.shape)\n", "\n", "\n", "plot_data(font)\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "for key in font_names :\n", " if key not in data :\n", " continue\n", " for char in data[key]['images'] :\n", " if char.shape != (28, 28) :\n", " print(key)\n", " data.pop(key)\n", " break" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "images = np.array([data[key]['images'] for key in data.keys()])" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1675, 26, 28, 28)" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "images.shape" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "def save_csv(images):\n", " images_copy = images.copy() \n", " num_fonts, num_chars, h,w = images_copy.shape\n", " assert h == w\n", "\n", " images_copy = images_copy.reshape(num_fonts, num_chars * h * w)\n", "\n", " # save as a csv file in dataset/\n", "\n", " csv_path = os.path.join(os.getcwd(), 'dataset', 'csv')\n", "\n", " if not os.path.exists(csv_path) :\n", " os.mkdir(csv_path)\n", "\n", " csv_filename = os.path.join(csv_path, 'images.csv')\n", " np.savetxt(csv_filename, images_copy, delimiter=',')" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "save_csv(images)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }