{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Vyhodnocovac\u00ed syst\u00e9m" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hodhocen\u00ed prac\u00ed prob\u00edh\u00e1 pomoc\u00ed n\u00e1sleduj\u00edc\u00edho k\u00f3du. Pr\u00e1ce jsou automaticky sta\u017eeny z adres zadan\u00fdch v \"solution_list.ipynb\". Pak jsou spu\u0161t\u011bny a v\u0161echny otestov\u00e1ny na shodn\u00e9 n\u00e1hodn\u00e9 podskupin\u011b testovac\u00edho datasetu. Na v\u00fdsledky je pak mo\u017eno nav\u00e1zat ve skrpitu \"ZDO2014tabulka.ipynb\"" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Pozn\u00e1mky pro spou\u0161t\u011bn\u00ed testu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "V p\u0159\u00edpad\u011b opakovan\u00e9ho spou\u0161t\u011bn\u00ed testov\u00e1n\u00ed je z d\u016fvodu opakovan\u00fdch import\u016f modul\u016f se stejn\u00fdmi jmn\u00e9ny nezbytn\u00e9 restartovat kernel. To lze ud\u011blat pomoc\u00ed Kernel -> Restart\n", "\n", "V\u00fdchoz\u00ed adres\u00e1\u0159 s testovac\u00edmi daty je na staven na n\u00e1sleduj\u00edc\u00ed cestu\n", "\n", " /home/mjirik/data/zdo2014/znacky-testing/" ] }, { "cell_type": "code", "collapsed": false, "input": [ "\n", "import signal\n", "from contextlib import contextmanager\n", "import skimage\n", "import skimage.io\n", "import numpy as np\n", "import urllib\n", "import glob\n", "import zipfile\n", "import shutil\n", "import sys\n", "import os\n", "import os.path\n", "import traceback\n", "import datetime\n", "import pickle\n", "import random\n", "\n", "#print dir(urllib)\n", "#import urllib.request\n", "\n", "# ziskani seznamu \n", "import solutions_list\n", "%run solutions_list\n", "\n", "# cesta k trenovacim datum\n", "test_data_path = '/home/mjirik/data/zdo2014/zdo2014-znacky-testing/'\n", "\n", "# Nastaveni logovaciho souboru\n", "import logging\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.DEBUG)\n", "logging.basicConfig(filename='znacky.log', level=logging.DEBUG, format='%(asctime)s %(levelname)s:%(message)s')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Na\u010d\u00edt\u00e1n\u00ed dat" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def readImageDir(path):\n", " \"\"\"\n", " Na\u010d\u00edt\u00e1n\u00ed dat z adres\u00e1\u0159ov\u00e9 struktury. Funkce vrac\u00ed seznam soubor\u016f a seznam label\u016f.\n", " Ty jsou z\u00edsk\u00e1v\u00e1ny z n\u00e1zv\u016f jednotliv\u00fdch slo\u017eek.\n", " \"\"\"\n", " dirs = glob.glob(os.path.join(os.path.normpath(path) ,'*'))\n", " labels = []\n", " files = []\n", " for onedir in dirs:\n", " #print onedir\n", " base, lab = os.path.split(onedir)\n", " if os.path.isdir(onedir):\n", " filesInDir = glob.glob(os.path.join(onedir, '*'))\n", " for onefile in filesInDir:\n", " labels.append(lab)\n", " files.append(onefile)\n", " \n", " return files, labels" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Funkce pro spu\u0161ten\u00ed studensk\u00fdch \u0159e\u0161en\u00ed" ] }, { "cell_type": "code", "collapsed": false, "input": [ "class TimeoutException(Exception): pass\n", "\n", "@contextmanager\n", "def time_limit(seconds):\n", " def signal_handler(signum, frame):\n", " raise TimeoutException, \"Timed out!\"\n", " signal.signal(signal.SIGALRM, signal_handler)\n", " signal.alarm(seconds)\n", " try:\n", " yield\n", " finally:\n", " signal.alarm(0)\n", "\n", " \n", "def kontrola(ukazatel, obrazky, reseni):\n", " \"\"\"\n", " Kontrola jednoho studentsk\u00e9ho \u0159e\u0161en\u00ed\n", " \"\"\"\n", " studentske_reseni = ukazatel() # tim je zavol\u00e1n v\u00e1\u0161 konstruktor __init__\n", " \n", " vysledky = []\n", " \n", " for i in range(0, len(obrazky)):\n", " im = skimage.io.imread(obrazky[i])\n", "\n", " # Zde je vol\u00e1na va\u0161e funkce rozpoznejZnacku\n", " try:\n", " with time_limit(1):\n", " result = studentske_reseni.rozpoznejZnacku(im)\n", " except TimeoutException, msg:\n", " print \"Timed out!\"\n", " result = None\n", " \n", " vysledky.append(result)\n", "\n", " hodnoceni = np.array(reseni) == np.array(vysledky)\n", " \n", " skore = np.sum(hodnoceni.astype(np.int)) / np.float(len(reseni))\n", " \n", " return skore, vysledky\n", "\n", "def my_import(name):\n", " __import__(name)\n", " return sys.modules[name]\n", "\n", "def downloadAll(soldir):\n", " try:\n", " shutil.rmtree(soldir)\n", " except:\n", " print \"Problem with rmtree\"\n", " traceback.print_exc()\n", " \n", " try:\n", " os.mkdir(soldir)\n", " open(os.path.join(soldir, \"__init__.py\"), 'a').close() \n", " except:\n", " print \"Problem with mkdir\"\n", " traceback.print_exc()\n", " \n", " # download all solutions\n", " for one in solutions_list:\n", " zip_path = os.path.join(soldir, one[1] + '.zip')\n", " urllib.urlretrieve(one[0], zip_path)\n", " \n", " zf = zipfile.ZipFile(zip_path, \"r\")\n", " zf.extractall(soldir)\n", " \n", " teamsoldir = zip_path = os.path.join(soldir, one[1])\n", " # create __init__.py \n", " open(os.path.join(teamsoldir, \"__init__.py\"), 'a').close() \n", " \n", " # remove '-' from package path\n", " teamsoldirnew = teamsoldir.replace('-', '') + one[4]\n", " os.rename(teamsoldir, teamsoldirnew)\n", " \n", "def kontrolaVse():\n", " \"\"\"\n", " Kontrola v\u0161ech studentsk\u00fdch \u0159e\u0161en\u00ed\n", " \"\"\"\n", " \n", " soldir = 'ZDO2014students'\n", " downloadAll(soldir)\n", " \n", " \n", " scores = []\n", " teams = []\n", " stamps = []\n", " classifs = []\n", " \n", " \n", "\n", " filesall, labelsall = readImageDir(test_data_path)\n", " \n", " #sklearn.cross_validation.train_test_split(\n", "\n", " init_seed = int(random.random()*5.0)\n", " \n", " obrazky = filesall[init_seed::6]\n", " reseni = labelsall[init_seed::6]\n", " \n", " classifs.append(reseni)\n", " \n", " print \"Total image number: \", len(obrazky)\n", " logger.debug(\"Total image number: \" + str(len(obrazky)))\n", " # evaluate solutions\n", " for one in solutions_list:\n", " logger.debug(\"Team: \" + one[3])\n", " scoreone = 0\n", " vysledky = []\n", " try:\n", " imp1 = my_import('ZDO2014students.' + one[1].replace('-', '') + one[4])\n", " imp2 = my_import('ZDO2014students.' + one[1].replace('-', '') + one[4]\n", " + '.' + one[2])\n", " #print imp2\n", " pointer = imp2.Znacky\n", " #import ZDO2014sample_solution\n", " #pointer = ZDO2014sample_solution.Znacky\n", " scoreone, vysledky = kontrola(pointer, obrazky, reseni)\n", " except:\n", " \n", " print \"Problem with: \" + one[3]\n", " exc = traceback.format_exc()\n", " print exc\n", " logger.debug(\"Problem with: \" + one[3])\n", " logger.debug(exc)\n", " print \"------------------------\"\n", " vysledky = reseni\n", " teams.append(one[3])\n", " scores.append(scoreone)\n", " classifs.append(vysledky)\n", " \n", "\n", " #this is whole timestamp. I want only date\n", " dat = datetime.datetime.now()\n", " #nw = datetime.datetime.now()\n", " #dat = datetime.date(nw.year, nw.month, nw.day)\n", " stamps.append(dat)\n", " \n", " \n", " #print teams\n", " #print scores\n", " return teams, scores, stamps, classifs" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Ukl\u00e1d\u00e1n\u00ed v\u00fdsledk\u016f" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def saveAndMergeEvaluation(teams, scores, stamps, \n", " filename=\"ZDO2014evaluation.csv\"):\n", " # write to csv\n", " import pandas as pd\n", " try:\n", " orig_data = pd.read_csv(filename)\n", " #print 'orig data'\n", " #print orig_data\n", " except:\n", " print \"Cannot read stored data\"\n", " \n", " dates = []\n", " times = []\n", " for stamp in stamps:\n", " dat = stamp.date()\n", " tm = stamp.time()\n", " dates.append(dat)\n", " times.append(tm)\n", " \n", " npdata = np.array([teams, scores, dates, times]).T\n", " df = pd.DataFrame(npdata, columns=['team', 'score', 'date', 'time'])\n", " \n", " try:\n", " # there could be wrong format\n", " df = orig_data.append(df, ignore_index=True)\n", " except:\n", " print \"wrong format of input csv, rewriting\"\n", " \n", " df.to_csv(filename, index=False)\n", " \n", " return df\n", "\n", "def saveClassifs(teams, classifs, filename='ZDO2014classifs.csv'):\n", " \"\"\"\n", " Z\u00e1pis ve\u0161ker\u00fdch klasifikac\u00ed do csv souboru\n", " \"\"\"\n", " import pandas as pd\n", " cl = teams[:]\n", " cl.insert(0, 'reference')\n", " # vytvo\u0159 vhodn\u00fd form\u00e1t pro ulo\u017een\u00ed\n", " aa = {cl[i]:classifs[i] for i in range(0, len(cl))}\n", " \n", " df = pd.DataFrame(aa)\n", " df.to_csv(filename, index=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Vyhodnocen\u00ed" ] }, { "cell_type": "code", "collapsed": false, "input": [ "if __name__ == \"__main__\":\n", " teams, scores, stamps, classifs = kontrolaVse()\n", " tdf = saveAndMergeEvaluation(teams, scores, stamps)\n", " saveClassifs(teams, classifs)\n", " \n", " #printEvaluation(teams, scores, stamps)\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "DEBUG:root:Total image number: 103\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "DEBUG:root:Team: sample M. Jirik\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "DEBUG:ZDO2014students.ZDO2014sample_solutionmaster.ZDO2014sample_solution:Ukazka loggovani, nacteni klasifikatoru ok\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "DEBUG:root:Team: cervenym\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "DEBUG:root:Team: nedvedj\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "/usr/lib/python2.7/dist-packages/skimage/exposure/exposure.py:51: UserWarning: This might be a color image. The histogram will be computed on the flattened image. You can instead apply this function to each color channel.\n", " warnings.warn(\"This might be a color image. The histogram will be \"\n", "DEBUG:root:Team: RakHejdovaJuna\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "DEBUG:ZDO2014students.Semestralkamaster1.ZDO2014juna_rak_hejdova_hog:Ukazka loggovani, nacteni klasifikatoru ok\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Total image number: 103\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "print tdf.tail(50)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " team score date time\n", "122 RakHejdovaJuna2 0.7692308 2014-06-02 11:02:35.514358\n", "123 nedvedj2 0.009615385 2014-06-02 11:02:40.192876\n", "124 sample M. Jirik 0.2718447 2014-06-02 11:05:16.284897\n", "125 cervenym 0.6019417 2014-06-02 11:05:25.691898\n", "126 nedvedj 0 2014-06-02 11:05:25.967018\n", "127 RakHejdovaJuna 0 2014-06-02 11:05:26.084831\n", "128 RakHejdovaJuna2 0.7378641 2014-06-02 11:05:31.869983\n", "129 nedvedj2 0.01941748 2014-06-02 11:05:36.611275\n", "130 sample M. Jirik 0 2014-06-10 10:24:30.635871\n", "131 cervenym 0.6172616 2014-06-10 10:26:27.343067\n", "132 nedvedj 0.3239946 2014-06-10 10:27:20.666130\n", "133 RakHejdovaJuna 0.9408043 2014-06-10 10:28:26.399551\n", "134 RakHejdovaJuna2 0 2014-06-10 10:28:26.485093\n", "135 nedvedj2 0 2014-06-10 10:28:26.662503\n", "136 sample M. Jirik 0 2014-06-10 10:30:10.339569\n", "137 cervenym 0.6184448 2014-06-10 10:32:06.897540\n", "138 nedvedj 0.3449367 2014-06-10 10:32:59.784266\n", "139 RakHejdovaJuna 0.9398734 2014-06-10 10:34:05.136954\n", "140 RakHejdovaJuna2 0 2014-06-10 10:34:05.223635\n", "141 nedvedj2 0 2014-06-10 10:34:05.401491\n", "142 sample M. Jirik 0 2014-06-10 10:40:05.238037\n", "143 cervenym 0.6292948 2014-06-10 10:42:01.262574\n", "144 nedvedj 0.3227848 2014-06-10 10:42:54.287133\n", "145 RakHejdovaJuna 0.9407776 2014-06-10 10:43:58.248363\n", "146 RakHejdovaJuna2 0 2014-06-10 10:43:58.333504\n", "147 nedvedj2 0 2014-06-10 10:43:58.510197\n", "148 sample M. Jirik 0.3004971 2014-06-10 10:53:00.830229\n", "149 cervenym 0.6172616 2014-06-10 10:54:55.937289\n", "150 nedvedj 0.3239946 2014-06-10 10:55:49.244489\n", "151 RakHejdovaJuna 0.9408043 2014-06-10 10:56:52.098387\n", "152 sample M. Jirik 0.2983725 2014-06-20 09:47:57.243949\n", "153 cervenym 0.6184448 2014-06-20 09:49:51.402369\n", "154 nedvedj 0 2014-06-20 09:49:51.403685\n", "155 RakHejdovaJuna 0.9398734 2014-06-20 09:50:55.445074\n", "156 sample M. Jirik 0.25 2014-06-23 09:26:35.657323\n", "157 cervenym 0.6730769 2014-06-23 09:26:42.847864\n", "158 nedvedj 0.6730769 2014-06-23 09:26:44.600640\n", "159 RakHejdovaJuna 0.7596154 2014-06-23 09:26:49.038464\n", "160 sample M. Jirik 0.223301 2014-06-23 09:50:27.159108\n", "161 cervenym 0.592233 2014-06-23 09:50:33.911572\n", "162 nedvedj 0.6990291 2014-06-23 09:50:35.571370\n", "163 RakHejdovaJuna 0.7184466 2014-06-23 09:50:39.678260\n", "164 sample M. Jirik 0.223301 2014-06-23 09:55:08.823339\n", "165 cervenym 0.592233 2014-06-23 09:55:15.655003\n", "166 nedvedj 0.6990291 2014-06-23 09:55:17.301895\n", "167 RakHejdovaJuna 0.7184466 2014-06-23 09:55:21.398045\n", "168 sample M. Jirik 0.1747573 2014-06-23 09:57:23.405620\n", "169 cervenym 0.5436893 2014-06-23 09:57:30.615811\n", "170 nedvedj 0.6893204 2014-06-23 09:57:32.268525\n", "171 RakHejdovaJuna 0.8543689 2014-06-23 09:57:36.362442\n", "\n", "[50 rows x 4 columns]\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 } ], "metadata": {} } ] }