{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Preview the EAWAG scans\n",
    "Let's see what we did there..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import platform\n",
    "import os\n",
    "import glob\n",
    "import pandas\n",
    "import imageio\n",
    "import numpy\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib_scalebar.scalebar import ScaleBar\n",
    "import seaborn\n",
    "import dask\n",
    "import dask_image.imread\n",
    "from dask.distributed import Client, LocalCluster\n",
    "import skimage\n",
    "from tqdm.auto import tqdm, trange"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set dask temporary folder\n",
    "# Do this before creating a client: https://stackoverflow.com/a/62804525/323100\n",
    "import tempfile\n",
    "if 'Linux' in platform.system():\n",
    "    tmp = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')\n",
    "elif 'Darwin' in platform.system():\n",
    "    tmp = tempfile.gettempdir()\n",
    "else:\n",
    "    if 'anaklin' in platform.node():\n",
    "        tmp = os.path.join('F:\\\\')\n",
    "    else:\n",
    "        tmp = os.path.join('D:\\\\')\n",
    "dask.config.set({'temporary_directory': os.path.join(tmp, 'tmp')})\n",
    "print('Dask temporarry files go to %s' % dask.config.get('temporary_directory'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Start cluster and client now, after setting tempdir\n",
    "cluster = LocalCluster()\n",
    "client = Client(cluster)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('You can seee what DASK is doing at \"http://localhost:%s/status\"' % client.scheduler_info()['services']['dashboard'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Ignore warnings in the notebook\n",
    "# import warnings\n",
    "# warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up figure defaults\n",
    "plt.rc('image', cmap='gray', interpolation='nearest')  # Display all images in b&w and with 'nearest' interpolation\n",
    "plt.rcParams['figure.figsize'] = (16, 9)  # Size up figures a bit\n",
    "plt.rcParams['figure.dpi'] = 200"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup scale bar defaults\n",
    "plt.rcParams['scalebar.location'] = 'lower right'\n",
    "plt.rcParams['scalebar.frameon'] = False\n",
    "plt.rcParams['scalebar.color'] = 'white'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display all plots identically\n",
    "lines = 3\n",
    "# And then do something like\n",
    "# plt.subplot(lines, int(numpy.ceil(len(Data) / float(lines))), c + 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Different locations if running either on Linux or Windows\n",
    "FastSSD = False\n",
    "overthere = False # Load the data directly from the iee-research_storage drive\n",
    "nanoct = True # Load the data directly from the iee-research_storage drive\n",
    "# to speed things up significantly\n",
    "if 'Linux' in platform.system():\n",
    "    if FastSSD:\n",
    "        BasePath = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')\n",
    "    else:\n",
    "        BasePath = os.path.join(os.sep, 'home', 'habi', '1272')\n",
    "elif 'Darwin' in platform.system():\n",
    "    FastSSD = False\n",
    "    BasePath = os.path.join('/Users/habi/Dev/EAWAG/Data')\n",
    "elif 'Windows' in platform.system():\n",
    "    if FastSSD:\n",
    "        BasePath = os.path.join('F:\\\\')\n",
    "    else:\n",
    "        if 'anaklin' in platform.node():\n",
    "            BasePath = os.path.join('S:\\\\')\n",
    "        else:\n",
    "            BasePath = os.path.join('D:\\\\Results')\n",
    "Root = os.path.join(BasePath, 'EAWAG')\n",
    "if overthere:\n",
    "        Root = os.path.join('I:\\\\microCTupload')\n",
    "if nanoct:\n",
    "        Root = os.path.join(os.path.sep, 'home', 'habi', '2214', 'EAWAG')        \n",
    "print('We are loading all the data from %s' % Root)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_pixelsize(logfile):\n",
    "    \"\"\"Get the pixel size from the scan log file\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Image Pixel' in line and 'Scaled' not in line:\n",
    "                pixelsize = float(line.split('=')[1])\n",
    "    return(pixelsize)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_projectionsize(logfile):\n",
    "    \"\"\"How big did we set the camera?\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Number Of Rows' in line:\n",
    "                y = int(line.split('=')[1])\n",
    "            if 'Number Of Columns' in line:\n",
    "                x = int(line.split('=')[1])                \n",
    "    return(x*y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_filter(logfile):\n",
    "    \"\"\"Get the filter we used whole scanning from the scan log file\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Filter=' in line:\n",
    "                whichfilter = line.split('=')[1].strip()\n",
    "    return(whichfilter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_exposuretime(logfile):\n",
    "    \"\"\"Get the exposure time size from the scan log file\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Exposure' in line:\n",
    "                exposuretime = int(line.split('=')[1])\n",
    "    return(exposuretime)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_ringartefact(logfile):\n",
    "    \"\"\"Get the ring artefact correction from the  scan log file\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Ring Artifact' in line:\n",
    "                ringartefactcorrection = int(line.split('=')[1])\n",
    "    return(ringartefactcorrection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_reconstruction_grayvalue(logfile):\n",
    "    grayvalue = None\n",
    "    \"\"\"How did we map the brightness of the reconstructions?\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Maximum for' in line:\n",
    "                grayvalue = float(line.split('=')[1])\n",
    "    return(grayvalue)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_beamhardening(logfile):\n",
    "    \"\"\"Get the beamhardening correction from the  scan log file\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Hardening' in line:\n",
    "                beamhardeningcorrection = int(line.split('=')[1])\n",
    "    return(beamhardeningcorrection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_rotationstep(logfile):\n",
    "    \"\"\"Get the rotation step from the scan log file\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Rotation Step' in line:\n",
    "                rotstep = float(line.split('=')[1])\n",
    "    return(rotstep)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_frameaveraging(logfile):\n",
    "    \"\"\"Get the frame averaging from the scan log file\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Averaging' in line:\n",
    "                avg = line.split('=')[1]\n",
    "    return(avg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_defectpixelmasking(logfile):\n",
    "    \"\"\"Check the 'defect pixel masking' setting\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'defect pixel mask' in line:\n",
    "                defectmask = int(line.split('=')[1].strip())\n",
    "    return(defectmask)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_machine(logfile):\n",
    "    \"\"\"Get the machine we used to scan\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Scanner' in line:\n",
    "                machine = line.split('=')[1].strip()\n",
    "    return(machine)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_scantime(logfile):\n",
    "    \"\"\"How long did we scan?\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Scan duration' in line:\n",
    "                time = line.split('=')[1].strip()\n",
    "    return(pandas.to_timedelta(time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_stacks(logfile):\n",
    "    \"\"\"How many stacks/connected scans did we make?\"\"\"\n",
    "    stacks = 1\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'conn' in line:\n",
    "                stacks = int(line.split('=')[1])\n",
    "    return(stacks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_scandate(logfile, verbose=False):\n",
    "    \"\"\"When did we scan the fish?\"\"\"\n",
    "    with open(logfile, 'r') as f:\n",
    "        for line in f:\n",
    "            if 'Study Date and Time' in line:\n",
    "                if verbose:\n",
    "                    print('Found \"date\" line: %s' % line.strip())\n",
    "                datestring = line.split('=')[1].strip().replace('  ', ' ')\n",
    "                if verbose:\n",
    "                    print('The date string is: %s' % datestring)\n",
    "                date = pandas.to_datetime(datestring , format='%d %b %Y %Hh:%Mm:%Ss')\n",
    "                if verbose:\n",
    "                    print('Parsed to: %s' % date)\n",
    "                (date)\n",
    "    return(date.isoformat())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_git_hash():\n",
    "    '''\n",
    "    Get the current git hash from the repository.\n",
    "    Based on http://stackoverflow.com/a/949391/323100 and\n",
    "    http://stackoverflow.com/a/18283905/323100\n",
    "    '''\n",
    "    from subprocess import Popen, PIPE\n",
    "    import os\n",
    "    gitprocess = Popen(['git',\n",
    "                        '--git-dir',\n",
    "                        os.path.join(os.getcwd(), '.git'),\n",
    "                        'rev-parse',\n",
    "                        '--short',\n",
    "                        '--verify',\n",
    "                        'HEAD'],\n",
    "                       stdout=PIPE)\n",
    "    (output, _) = gitprocess.communicate()\n",
    "    return output.strip().decode(\"utf-8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Make directory for output\n",
    "# OutPutDir = os.path.join(os.getcwd(), 'Output', get_git_hash())\n",
    "# print('We are saving all the output to %s' % OutPutDir)\n",
    "# os.makedirs(OutPutDir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make us a dataframe for saving all that we need\n",
    "Data = pandas.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get *all* log files\n",
    "# Sort them by time, not name\n",
    "Data['LogFile'] = [f for f in sorted(glob.glob(os.path.join(Root, '**', '*.log'), recursive=True), key=os.path.getmtime)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get all folders\n",
    "Data['Folder'] = [os.path.dirname(f) for f in Data['LogFile']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check for samples which are not yet reconstructed\n",
    "for c, row in Data.iterrows():\n",
    "    # Iterate over every 'proj' folder\n",
    "    if 'proj' in row.Folder:\n",
    "        if not 'TScopy' in row.Folder and not 'PR' in row.Folder:\n",
    "            # If there's nothing with 'rec*' on the same level, then tell us        \n",
    "            if not glob.glob(row.Folder.replace('proj', '*rec*')):\n",
    "                print('- %s is missing matching reconstructions' % row.LogFile[len(Root)+1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Data['XYAlignment'] = [glob.glob(os.path.join(f, '*.csv')) for f in Data['Folder']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check for samples which are missing the .csv-files for the XY-alignment\n",
    "for c, row in Data.iterrows():\n",
    "    # Iterate over every 'proj' folder\n",
    "    if 'proj' in row.Folder:\n",
    "        if not len(row.XYAlignment):\n",
    "            if not any(x in row.LogFile for x in ['rectmp.log', 'proj_nofilter', 'TJ3\\\\jaw_v1', \n",
    "                                                  '28\\\\full_188um', '75\\\\proj_stuck',\n",
    "                                                  '106985\\\\proj\\\\106985_', '43\\\\head_30um',\n",
    "                                                  '21322\\\\jaw', '21322\\\\whole', '31\\\\moved_proj',\n",
    "                                                  '95\\\\proj\\\\14295']):\n",
    "                # 'rectmp.log' because we only exclude it afterwards :)\n",
    "                # 'proj_nofilter`, since these two scans of single teeth don't contain a reference scan\n",
    "                # 'TJ3\\jaw_v1' for TJ3\\jaw_v1 which has no reference scan\n",
    "                # 'full_188um' for 10628\\full_188um which has no reference scan\n",
    "                # 'stuck' for 103375\\proj_stuck which got stuck\n",
    "                # 106985 is a b0rked scan \n",
    "                # 'head_30um' for 161543\\head_30um which has no reference scan\n",
    "                # '21322' for two scans if 21322 which have no reference scan\n",
    "                # 'moved_proj' for MA31\\moved_proj which moved\n",
    "                # '14295_' for 14295\\proj\\ which has no reference scan\n",
    "                print('- %s has *not* been X/Y aligned' % row.LogFile[len(Root)+1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get rid of all non-rec logfiles\n",
    "for c, row in Data.iterrows():\n",
    "    if 'rec' not in row.Folder:\n",
    "        Data.drop([c], inplace=True)\n",
    "    elif 'SubScan' in row.Folder:\n",
    "        Data.drop([c], inplace=True)        \n",
    "    elif 'rectmp.log' in row.LogFile:\n",
    "        Data.drop([c], inplace=True)\n",
    "# Reset dataframe to something that we would get if we only would have loaded the 'rec' files\n",
    "Data = Data.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate us some meaningful colums\n",
    "Data['Fish'] = [l[len(Root)+1:].split(os.sep)[0] for l in Data['LogFile']]\n",
    "Data['Scan'] = ['_'.join(l[len(Root)+1:].split(os.sep)[1:-1]) for l in Data['LogFile']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "Data.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the file names of the reconstructions\n",
    "Data['Reconstructions'] = [sorted(glob.glob(os.path.join(f, '*rec0*.png'))) for f in Data['Folder']]\n",
    "Data['Number of reconstructions'] = [len(r) for r in Data.Reconstructions]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop samples which have either not been reconstructed yet or of which we deleted the reconstructions with\n",
    "# `find . -name \"*rec*.png\" -type f -mtime +333 -delete`\n",
    "# Based on https://stackoverflow.com/a/13851602\n",
    "# for c,row in Data.iterrows():\n",
    "#     if not row['Number of reconstructions']:\n",
    "#         print('%s contains no PNG files, we might be currently reconstructing it' % row.Folder)\n",
    "Data = Data[Data['Number of reconstructions'] > 0]\n",
    "Data.reset_index(drop=True, inplace=True)\n",
    "print('We have %s folders with reconstructions' % (len(Data)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get parameters to doublecheck from logfiles\n",
    "Data['Voxelsize'] = [get_pixelsize(log) for log in Data['LogFile']]\n",
    "Data['Filter'] = [get_filter(log) for log in Data['LogFile']]\n",
    "Data['Exposuretime'] = [get_exposuretime(log) for log in Data['LogFile']]\n",
    "Data['Scanner'] = [get_machine(log) for log in Data['LogFile']]\n",
    "Data['Averaging'] = [get_frameaveraging(log) for log in Data['LogFile']]\n",
    "Data['ProjectionSize'] = [get_projectionsize(log) for log in Data['LogFile']]\n",
    "Data['RotationStep'] = [get_rotationstep(log) for log in Data['LogFile']]\n",
    "Data['CameraWindow'] = [round((ps ** 0.5)/100)*100  for ps in Data['ProjectionSize']]\n",
    "Data['Grayvalue'] = [get_reconstruction_grayvalue(log) for log in Data['LogFile']]\n",
    "Data['RingartefactCorrection'] = [get_ringartefact(log) for log in Data['LogFile']]\n",
    "Data['BeamHardeningCorrection'] = [get_beamhardening(log) for log in Data['LogFile']]\n",
    "Data['DefectPixelMasking'] = [get_defectpixelmasking(log) for log in Data['LogFile']]\n",
    "Data['Scan date'] = [get_scandate(log) for log in Data['LogFile']]\n",
    "Data['Scan time'] = [get_scantime(log) for log in Data['LogFile']]\n",
    "Data['Stacks'] = [get_stacks(log) for log in Data['LogFile']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check ringremoval parameters\n",
    "for machine in Data['Scanner'].unique():\n",
    "    print('For the %s we have '\n",
    "          'ringartefact-correction values of %s' % (machine,\n",
    "                                                    Data[Data.Scanner==machine]['RingartefactCorrection'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for rac in sorted(Data['RingartefactCorrection'].unique()):\n",
    "    print('Ringartefact-correction %02s is found in %03s scans' % (rac, Data[Data.RingartefactCorrection==rac]['RingartefactCorrection'].count()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some consistency checks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# seaborn.scatterplot(data=Data, x='Fish', y='RingartefactCorrection', hue='Scanner')\n",
    "# plt.title('Ringartefact correction')\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display ringremoval parameters\n",
    "for scanner in Data.Scanner.unique():\n",
    "    print('----', scanner, '----')\n",
    "    for c, row in Data[Data.Scanner==scanner].iterrows():\n",
    "        if row.RingartefactCorrection != 0:\n",
    "            print('Fish %s scan %s was reconstructed with RAC of %s' % (row.Fish,\n",
    "                                                                        row.Scan,\n",
    "                                                                        row.RingartefactCorrection))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check beamhardening parameters\n",
    "for scanner in Data.Scanner.unique():\n",
    "    print('For the %s we have '\n",
    "          'beamhardening correction values of %s' % (scanner,\n",
    "                                                     Data[Data.Scanner==scanner]['BeamHardeningCorrection'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display beamhardening parameters\n",
    "for scanner in Data.Scanner.unique():\n",
    "    print('----', scanner, '----')\n",
    "    for c, row in Data[Data.Scanner==scanner].iterrows():\n",
    "        if row.BeamHardeningCorrection != 0:\n",
    "            print('Scan %s of fish %s was reconstructed with beam hardening correction of %s' % (row.Scan,\n",
    "                                                                                                 row.Fish,\n",
    "                                                                                                 row.BeamHardeningCorrection))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# seaborn.scatterplot(data=Data,\n",
    "#                     x='Fish',\n",
    "#                     y='BeamHardeningCorrection',\n",
    "#                     hue='Scanner')\n",
    "# plt.title('Beamhardening correction')\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check defect pixel masking parameters\n",
    "for scanner in Data.Scanner.unique():\n",
    "    print('For the %s we have '\n",
    "          'defect pixel masking values of %s' % (scanner,\n",
    "                                                 Data[Data.Scanner==scanner]['DefectPixelMasking'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dpm in sorted(Data['DefectPixelMasking'].unique()):\n",
    "    print('A defect pixel masking of %02s is found in %03s scans' % (dpm, Data[Data.DefectPixelMasking==dpm]['DefectPixelMasking'].count()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# seaborn.scatterplot(data=Data, x='Fish', y='DefectPixelMasking', hue='Scanner')\n",
    "# plt.title('Defect pixel masking')\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display defect pixel masking parameters\n",
    "for scanner in Data.Scanner.unique():\n",
    "    print('----', scanner, '----')\n",
    "    for c, row in Data[Data.Scanner==scanner].iterrows():\n",
    "        if row.Scanner == 'SkyScan1272' and row.DefectPixelMasking != 50:\n",
    "            print('Fish %s scan %s was reconstructed with DPM of %s' % (row.Fish,\n",
    "                                                                        row.Scan,\n",
    "                                                                        row.DefectPixelMasking))\n",
    "        if row.Scanner == 'SkyScan2214' and row.DefectPixelMasking != 0:\n",
    "            print('Fish %s scan %s was reconstructed with DPM of %s' % (row.Fish,\n",
    "                                                                        row.Scan,\n",
    "                                                                        row.DefectPixelMasking))            "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check and display scan times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Data['Scan time total'] = [ st * stk  for st, stk in zip(Data['Scan time'], Data['Stacks'])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # https://www.geeksforgeeks.org/iterating-over-rows-and-columns-in-pandas-dataframe/\n",
    "# columns = list(Data)\n",
    "# columns.remove('Folder') \n",
    "# columns.remove('Fish')\n",
    "# columns.remove('LogFile')\n",
    "# columns.remove('Reconstructions')\n",
    "# columns.remove('Number of reconstructions')\n",
    "# columns.remove('Grayvalue')\n",
    "# columns.remove('Scan time')\n",
    "# columns.remove('Scan time total')\n",
    "# columns.remove('Scan date')\n",
    "# print(columns)\n",
    "# for col in columns:\n",
    "#     print(col)\n",
    "#     print(Data[col].unique())\n",
    "#     print(80*'-')    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Check voxel sizes (*rounded* to two after-comma values)\n",
    "# # If different, spit out which values\n",
    "# roundto = 2\n",
    "# if len(Data['Voxelsize'].round(roundto).unique()) > 1:\n",
    "#     print('We scanned all datasets with %s different voxel sizes' % len(Data['Voxelsize'].round(roundto).unique()))\n",
    "#     for vs in sorted(Data['Voxelsize'].round(roundto).unique()):\n",
    "#         print('-', vs, 'um for ', end='')\n",
    "#         for c, row in Data.iterrows():\n",
    "#             if float(vs) == round(row['Voxelsize'], roundto):\n",
    "#                 print(os.path.join(row['Fish'], row['Scan']), end=', ')\n",
    "#         print('')\n",
    "# else:\n",
    "#     print('We scanned all datasets with equal voxel size, namely %s um.' % float(Data['Voxelsize'].round(roundto).unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# if len(Data['Grayvalue'].unique()) > 1:\n",
    "#     print('We reconstructed the datasets with different maximum gray values, namely')\n",
    "#     for gv in Data['Grayvalue'].unique():\n",
    "#         print(gv, 'for Samples ', end='')\n",
    "#         for c, row in Data.iterrows():\n",
    "#             if float(gv) == row['Grayvalue']:\n",
    "#                 print(os.path.join(row['Fish'], row['Scan']), end=', ')\n",
    "#         print('')\n",
    "# else:\n",
    "#     print('We reconstructed all datasets with equal maximum gray value, namely %s.' % Data['Grayvalue'].unique()[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data[['Fish', 'Scan',\n",
    "#       'Voxelsize', 'Scanner',\n",
    "#       'Scan date', 'CameraWindow', 'RotationStep', 'Averaging',\n",
    "#       'Scan time', 'Stacks', 'Scan time total']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get an overview over the total scan time\n",
    "# Nice output based on https://stackoverflow.com/a/8907407/323100\n",
    "total_seconds = int(Data['Scan time total'].sum().total_seconds())\n",
    "hours, remainder = divmod(total_seconds,60*60)\n",
    "minutes, seconds = divmod(remainder,60)\n",
    "print('In total, we scanned for %s hours and %s minutes)' % (hours, minutes))\n",
    "for machine in Data['Scanner'].unique():\n",
    "    total_seconds = int(Data[Data['Scanner'] == machine]['Scan time total'].sum().total_seconds())\n",
    "    hours, remainder = divmod(total_seconds,60*60)\n",
    "    minutes, seconds = divmod(remainder,60)\n",
    "    print('\\t - Of these, we scanned %s hours and %s minutes on the %s,'\n",
    "          ' for %s scans' % (hours,\n",
    "                             minutes,\n",
    "                             machine,\n",
    "                             len(Data[Data['Scanner'] == machine])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sort our dataframe by scan date\n",
    "# Data.sort_values(by='Scan date')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Data.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Data['PreviewImagePath'] = [sorted(glob.glob(os.path.join(f, '*_spr.bmp')))[0] for f in Data['Folder']]\n",
    "# Data['PreviewImage'] = [dask_image.imread.imread(pip).squeeze()\n",
    "#                         if pip\n",
    "#                         else numpy.random.random((100, 100)) for pip in Data['PreviewImagePath']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make an approximately square overview image\n",
    "# lines = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for c, row in Data.iterrows():\n",
    "#     plt.subplot(lines, int(numpy.ceil(len(Data) / float(lines))), c + 1)\n",
    "#     plt.imshow(row.PreviewImage.squeeze())\n",
    "#     plt.title(os.path.join(row['Fish'], row['Scan']))\n",
    "#     plt.gca().add_artist(ScaleBar(row['Voxelsize'],\n",
    "#                                   'um',\n",
    "#                                   color='black',\n",
    "#                                   frameon=True))\n",
    "#     plt.axis('off')\n",
    "# plt.tight_layout()\n",
    "# plt.savefig(os.path.join(Root, 'ScanOverviews.png'),\n",
    "#             bbox_inches='tight')\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Load all reconstructions into ephemereal DASK arrays\n",
    "Reconstructions = [None] * len(Data)\n",
    "for c, row in tqdm(Data.iterrows(),\n",
    "                   desc='Load reconstructions',\n",
    "                   total=len(Data)):\n",
    "    Reconstructions[c] = dask_image.imread.imread(os.path.join(row['Folder'],\n",
    "                                                               '*rec*.png'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Check if something went wrong\n",
    "# for file in Data['OutputNameRec']:\n",
    "#     print(file)\n",
    "#     dask.array.from_zarr(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How big are the datasets?\n",
    "Data['Size'] = [rec.shape for rec in Reconstructions]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The three cardinal directions\n",
    "directions = ['Axial',\n",
    "              'Coronal',\n",
    "              'Sagittal']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Read or calculate the middle slices, put them into the dataframe and save them to disk\n",
    "# for d, direction in enumerate(directions):\n",
    "#     Data['Mid_' + direction] = [None] * len(Reconstructions)\n",
    "# for c, row in tqdm(Data.iterrows(), desc='Middle images', total=len(Data), leave=False):\n",
    "#     for d, direction in tqdm(enumerate(directions),\n",
    "#                              desc='%s/%s' % (row['Fish'], row['Scan']),\n",
    "#                              leave=False,\n",
    "#                              total=len(directions)):\n",
    "#         outfilepath = os.path.join(os.path.dirname(row['Folder']),\n",
    "#                                    '%s.%s.Middle.%s.png' % (row['Fish'],\n",
    "#                                                             row['Scan'],\n",
    "#                                                             direction))\n",
    "#         if os.path.exists(outfilepath):\n",
    "#             Data.at[c, 'Mid_' + direction] = dask_image.imread.imread(outfilepath)\n",
    "#         else:\n",
    "#             # Generate requested axial view\n",
    "#             if 'Axial' in direction:\n",
    "#                 Data.at[c, 'Mid_' + direction] = Reconstructions[c][Data['Size'][c][0] // 2].compute()\n",
    "#             if 'Sagittal' in direction:\n",
    "#                 Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, Data['Size'][c][1] // 2, :].compute()\n",
    "#             if 'Coronal' in direction:\n",
    "#                 Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, :, Data['Size'][c][2] // 2].compute()\n",
    "#             # Save the calculated 'direction' view to disk\n",
    "#             imageio.imwrite(outfilepath, (Data.at[c, 'Mid_' + direction]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Show middle slices\n",
    "# for c, row in tqdm(Data.iterrows(),\n",
    "#                    desc='Saving middle images overview',\n",
    "#                    total=len(Data),\n",
    "#                    leave=False):\n",
    "#     outfilepath = os.path.join(os.path.dirname(row['Folder']),\n",
    "#                                '%s.%s.MiddleSlices.png' % (row['Fish'], row['Scan']))\n",
    "#     if not os.path.exists(outfilepath):    \n",
    "#         for d, direction in tqdm(enumerate(directions),\n",
    "#                                  desc='%s/%s' % (row['Fish'], row['Scan']),\n",
    "#                                  leave=False,\n",
    "#                                  total=len(directions)):\n",
    "#             plt.subplot(1, 3, d + 1)\n",
    "#             plt.imshow(row['Mid_' + direction].squeeze())\n",
    "#             if d == 0:\n",
    "#                 plt.axhline(row.Size[1] // 2, c=seaborn.color_palette()[0])\n",
    "#                 plt.axvline(row.Size[2] // 2, c=seaborn.color_palette()[1])\n",
    "#                 plt.gca().add_artist(ScaleBar(row['Voxelsize'],\n",
    "#                                               'um',\n",
    "#                                               color=seaborn.color_palette()[2]))\n",
    "#             elif d == 1:\n",
    "#                 plt.axhline(row.Size[0] // 2, c=seaborn.color_palette()[2])\n",
    "#                 plt.axvline(row.Size[d] // 2, c=seaborn.color_palette()[1])\n",
    "#                 plt.gca().add_artist(ScaleBar(row['Voxelsize'],\n",
    "#                                               'um',\n",
    "#                                               color=seaborn.color_palette()[0]))\n",
    "#             else:\n",
    "#                 plt.axhline(row.Size[0] // 2, c=seaborn.color_palette()[2])\n",
    "#                 plt.axvline(row.Size[d] // 2, c=seaborn.color_palette()[0])\n",
    "#                 plt.gca().add_artist(ScaleBar(row['Voxelsize'],\n",
    "#                                               'um',\n",
    "#                                               color=seaborn.color_palette()[1]))\n",
    "#             plt.title('%s, %s' % (os.path.join(row['Fish'], row['Scan']),\n",
    "#                                   direction + ' Middle slice'))\n",
    "#             plt.axis('off')\n",
    "#             plt.savefig(outfilepath,\n",
    "#                         transparent=True,\n",
    "#                         bbox_inches='tight')\n",
    "#         plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read or calculate the directional MIPs, put them into the dataframe and save them to disk\n",
    "for d, direction in enumerate(directions):\n",
    "    Data['MIP_' + direction] = [None] * len(Reconstructions)\n",
    "for c, row in tqdm(Data.iterrows(), desc='MIPs', total=len(Data), leave=False):\n",
    "    for d, direction in tqdm(enumerate(directions),\n",
    "                             desc='%s/%s' % (row['Fish'], row['Scan']),\n",
    "                             leave=False,\n",
    "                             total=len(directions)):\n",
    "        outfilepath = os.path.join(os.path.dirname(row['Folder']),\n",
    "                                   '%s.%s.MIP.%s.png' % (row['Fish'],\n",
    "                                                      row['Scan'],\n",
    "                                                      direction))\n",
    "        if os.path.exists(outfilepath):\n",
    "            Data.at[c, 'MIP_' + direction] = dask_image.imread.imread(outfilepath)\n",
    "        else:\n",
    "            # Generate MIP\n",
    "            Data.at[c, 'MIP_' + direction] = Reconstructions[c].max(axis=d).compute()\n",
    "            # Save it out\n",
    "            imageio.imwrite(outfilepath, Data.at[c, 'MIP_' + direction].astype('uint8'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show MIP slices\n",
    "for c, row in tqdm(Data.iterrows(),\n",
    "                   desc='Saving MIP images overview',\n",
    "                   total=len(Data),\n",
    "                   leave=False):\n",
    "    outfilepath = os.path.join(os.path.dirname(row['Folder']),\n",
    "                               '%s.%s.MIPs.png' % (row['Fish'], row['Scan']))\n",
    "    if not os.path.exists(outfilepath):    \n",
    "        for d, direction in tqdm(enumerate(directions),\n",
    "                                          desc='%s/%s' % (row['Fish'], row['Scan']),\n",
    "                                          leave=False,\n",
    "                                          total=len(directions)):\n",
    "            plt.subplot(1, 3, d + 1)\n",
    "            plt.imshow(row['MIP_' + direction].squeeze())\n",
    "            plt.gca().add_artist(ScaleBar(row['Voxelsize'],\n",
    "                                          'um'))\n",
    "            plt.title('%s, %s' % (os.path.join(row['Fish'], row['Scan']),\n",
    "                                  direction + ' MIP'))\n",
    "            plt.axis('off')\n",
    "        plt.savefig(outfilepath,\n",
    "                    transparent=True,\n",
    "                    bbox_inches='tight')\n",
    "        plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate the histograms of one of the MIPs\n",
    "# Caveat: dask.da.histogram returns histogram AND bins, making each histogram a 'nested' list of [h, b]\n",
    "Data['Histogram'] = [dask.array.histogram(dask.array.array(mip.squeeze()),\n",
    "                                          bins=2**8,\n",
    "                                          range=[0, 2**8]) for mip in Data['MIP_Coronal']]\n",
    "# Actually compute the data and put only h into the dataframe, since we use it quite often below.\n",
    "# Discard the bins\n",
    "Data['Histogram'] = [h for h,b in Data['Histogram']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def overeexposecheck(item, threshold=250, howmanypercent=0.1, whichone='Coronal', verbose=False):\n",
    "    '''Function to check if a certain amount of voxels are brighter than a certain value'''\n",
    "    if (Data['MIP_%s' % whichone][item]>threshold).sum() > (Data['MIP_Sagittal'][item].size * howmanypercent / 100):\n",
    "        if verbose:\n",
    "            plt.imshow(Data['MIP_%s' % whichone][item].squeeze())\n",
    "            plt.imshow(numpy.ma.masked_equal(Data['MIP_%s' % whichone][item].squeeze()>threshold, False),\n",
    "                       cmap='viridis_r',\n",
    "                       alpha=.618)\n",
    "            plt.title('%s/%s\\n%s of %s Mpixels (>%s%%) are brighter '\n",
    "                      'than %s' % (Data['Fish'][item],\n",
    "                                   Data['Scan'][item],\n",
    "                                   (Data['MIP_%s' % whichone][item]>threshold).sum().compute(),\n",
    "                                   round(1e-6 * Data['MIP_%s' % whichone][item].size,2),\n",
    "                                   howmanypercent,\n",
    "                                   threshold))\n",
    "            plt.axis('off')\n",
    "            plt.gca().add_artist(ScaleBar(Data['Voxelsize'][item],\n",
    "                                          'um'))\n",
    "            plt.show()\n",
    "        return(True)\n",
    "    else:\n",
    "        return(False)    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check if 'too much' of the MIP is overexposed\n",
    "# TODO: How much is 'too much'?\n",
    "Data['OverExposed'] = [overeexposecheck(c,\n",
    "                                        whichone='Coronal',\n",
    "                                        verbose=True) for c, row in Data.iterrows()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for c, row in sorted(Data.iterrows()):\n",
    "    plt.semilogy(row.Histogram, label='%s/%s' % (row.Fish, row.Scan))\n",
    "plt.xlim([0, 255])\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('At the moment, we have previewed %s scans of %s fishes in %s' % (len(Data),\n",
    "                                                                        len(Data.Fish.unique()),\n",
    "                                                                        Root))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}