{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Prepare a CSV file for upload to Zooniverse Project Builder" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import glob\n", "import re" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
columnimagepagerowvolume
332N193-022_0184-col-2-0.jpg01840N193-022
322N193-022_0184-col-2-1.jpg01841N193-022
392N193-022_0184-col-2-2.jpg01842N193-022
402N193-022_0184-col-2-3.jpg01843N193-022
262N193-022_0184-col-2-4.jpg01844N193-022
\n", "
" ], "text/plain": [ " column image page row volume\n", "33 2 N193-022_0184-col-2-0.jpg 0184 0 N193-022\n", "32 2 N193-022_0184-col-2-1.jpg 0184 1 N193-022\n", "39 2 N193-022_0184-col-2-2.jpg 0184 2 N193-022\n", "40 2 N193-022_0184-col-2-3.jpg 0184 3 N193-022\n", "26 2 N193-022_0184-col-2-4.jpg 0184 4 N193-022" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file_pattern = 'data/columns/rows/N193-022_0184-col-*'\n", "images = glob.glob(file_pattern)\n", "data = []\n", "for image in images:\n", " if 'header' not in image:\n", " filename = image.split('/')[-1]\n", " volume = re.search(r'(N193-\\d+)_', filename).group(1)\n", " page = re.search(r'N193-\\d+_(\\d+)-col', filename).group(1)\n", " column = re.search(r'N193-\\d+_\\d+-col-(\\d+)', filename).group(1)\n", " row = re.search(r'N193-\\d+_\\d+-col-\\d+-(\\d+)', filename).group(1)\n", " data.append({'image': filename, 'volume': volume, 'page': page, 'column': column, 'row': row})\n", "df = pd.DataFrame(data)\n", "df['row'] = pd.to_numeric(df['row'])\n", "df = df.sort_values('row') \n", "df.head() " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "df.to_csv('sample_rows.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }