{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Kostic, D. 1999. Frekvencijski recnik savremenog srpskog jezika (Frequency Dictionary of Contemporary Serbian Language). Institute for Experimental Phonetics and Speech Pathology & Laboratory of Experimental Psychology, University of Belgrade, Serbia.\n", "\n", "Baayen, R. H., Milin, P., Filipovic Durdevic, D., Hendrix, P. and Marelli, M. 2011. \"An amorphous model for morphological processing in visual comprehension based on naive discriminative learning.\" *Psychological Review* 118:438-482.\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import pandas.rpy.common as com\n", "import numpy as np\n", "from sklearn.feature_extraction import DictVectorizer\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%load_ext rmagic\n", "\n", "%precision 2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "u'%.2f'" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "from ndl import *" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "%%R\n", "library(ndl)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "display_data", "text": [ "This is ndl version 0.2.16. \n", "For an overview of the package, type 'help(\"ndl.package\")'.\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "serbian = com.load_data('serbian')\n", "serbian['Cues'] = orthoCoding(serbian.WordForm,grams=2)\n", "serbian['Outcomes'] = [tuple(c.split('_')) for c in serbian.LemmaCase]\n", "serbian.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WordFormLemmaCaseFrequencyCuesOutcomes
1 yena yena_nom_Sg 576 (#y, ye, en, na, a#) (yena, nom, Sg)
2 yene yena_gen_Sg 229 (#y, ye, en, ne, e#) (yena, gen, Sg)
3 yeni yena_dat_Sg 55 (#y, ye, en, ni, i#) (yena, dat, Sg)
4 yenu yena_acc_Sg 167 (#y, ye, en, nu, u#) (yena, acc, Sg)
5 yenom yena_ins_Sg 39 (#y, ye, en, no, om, m#) (yena, ins, Sg)
\n", "

5 rows \u00d7 5 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ " WordForm LemmaCase Frequency Cues Outcomes\n", "1 yena yena_nom_Sg 576 (#y, ye, en, na, a#) (yena, nom, Sg)\n", "2 yene yena_gen_Sg 229 (#y, ye, en, ne, e#) (yena, gen, Sg)\n", "3 yeni yena_dat_Sg 55 (#y, ye, en, ni, i#) (yena, dat, Sg)\n", "4 yenu yena_acc_Sg 167 (#y, ye, en, nu, u#) (yena, acc, Sg)\n", "5 yenom yena_ins_Sg 39 (#y, ye, en, no, om, m#) (yena, ins, Sg)\n", "\n", "[5 rows x 5 columns]" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "sw = ndl(serbian)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "sw.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PlSgaccakademijaaparatbitkabogbojabolborboracbrazdabrdobridbrigabrigadabrodburacenacesta
#a-0.467714 0.952442 0.326035 0.471536 0.506614-0.001006-0.032020 0.037598-0.028174 0.015433-0.011195 0.006800 0.003051 0.000134 0.015739-0.039569 0.017378-0.002766 0.030046-0.052284...
#b-0.069955 0.556007 0.095385-0.005611-0.012985 0.079436 0.121653 0.066181 0.254183 0.097360 0.109091 0.027724-0.003189 0.009077 0.036710 0.022223 0.106195 0.000795-0.000225-0.019009...
#c-0.099439 0.585776 0.110130 0.002622-0.021386 0.010436-0.004947 0.016928-0.016833-0.005711-0.021122-0.009816 0.002113-0.007284 0.023586-0.012067 0.007555 0.000627 0.577554 0.344233...
#d-0.017994 0.500962 0.077627-0.014584-0.004215-0.002597 0.015613-0.035485 0.024504 0.006509 0.007348-0.012459-0.045132-0.002656 0.027050-0.018789 0.013864 0.000444-0.025700 0.006292...
#e-0.465341 0.925997-0.093245 0.038236-0.056175 0.024248 0.112225-0.042413-0.031564-0.030352-0.037280-0.028162 0.002051 0.010243 0.078581 0.000173-0.073940 0.000415-0.023138 0.004618...
\n", "

5 rows \u00d7 278 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ " Pl Sg acc akademija aparat bitka bog \\\n", "#a -0.467714 0.952442 0.326035 0.471536 0.506614 -0.001006 -0.032020 \n", "#b -0.069955 0.556007 0.095385 -0.005611 -0.012985 0.079436 0.121653 \n", "#c -0.099439 0.585776 0.110130 0.002622 -0.021386 0.010436 -0.004947 \n", "#d -0.017994 0.500962 0.077627 -0.014584 -0.004215 -0.002597 0.015613 \n", "#e -0.465341 0.925997 -0.093245 0.038236 -0.056175 0.024248 0.112225 \n", "\n", " boja bol bor borac brazda brdo brid \\\n", "#a 0.037598 -0.028174 0.015433 -0.011195 0.006800 0.003051 0.000134 \n", "#b 0.066181 0.254183 0.097360 0.109091 0.027724 -0.003189 0.009077 \n", "#c 0.016928 -0.016833 -0.005711 -0.021122 -0.009816 0.002113 -0.007284 \n", "#d -0.035485 0.024504 0.006509 0.007348 -0.012459 -0.045132 -0.002656 \n", "#e -0.042413 -0.031564 -0.030352 -0.037280 -0.028162 0.002051 0.010243 \n", "\n", " briga brigada brod bura cena cesta \n", "#a 0.015739 -0.039569 0.017378 -0.002766 0.030046 -0.052284 ... \n", "#b 0.036710 0.022223 0.106195 0.000795 -0.000225 -0.019009 ... \n", "#c 0.023586 -0.012067 0.007555 0.000627 0.577554 0.344233 ... \n", "#d 0.027050 -0.018789 0.013864 0.000444 -0.025700 0.006292 ... \n", "#e 0.078581 0.000173 -0.073940 0.000415 -0.023138 0.004618 ... \n", "\n", "[5 rows x 278 columns]" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "num = ['Sg','Pl']\n", "case = ['nom','gen','dat','acc','ins','loc']\n", "infl = num + case\n", "predict = [ ]\n", "for cue in serbian.Cues:\n", " A = activation(cue,sw)\n", " A.sort(ascending=False)\n", " res = [ None, None, None ]\n", " for ind in A.index:\n", " if ind in num:\n", " res[2] = ind\n", " elif ind in case:\n", " res[1] = ind\n", " else:\n", " res[0] = ind\n", " if not None in res:\n", " break\n", " predict.append(tuple(res))\n", "serbian['Predicted'] = predict " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "serbian" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WordFormLemmaCaseFrequencyCuesOutcomesPredicted
1 yena yena_nom_Sg 576 (#y, ye, en, na, a#) (yena, nom, Sg) (yena, nom, Sg)
2 yene yena_gen_Sg 229 (#y, ye, en, ne, e#) (yena, gen, Sg) (yena, nom, Pl)
3 yeni yena_dat_Sg 55 (#y, ye, en, ni, i#) (yena, dat, Sg) (yena, nom, Sg)
4 yenu yena_acc_Sg 167 (#y, ye, en, nu, u#) (yena, acc, Sg) (yena, acc, Sg)
5 yenom yena_ins_Sg 39 (#y, ye, en, no, om, m#) (yena, ins, Sg) (yena, ins, Sg)
6 yeni yena_loc_Sg 16 (#y, ye, en, ni, i#) (yena, loc, Sg) (yena, nom, Sg)
7 yene yena_nom_Pl 415 (#y, ye, en, ne, e#) (yena, nom, Pl) (yena, nom, Pl)
8 yena yena_gen_Pl 336 (#y, ye, en, na, a#) (yena, gen, Pl) (yena, nom, Sg)
9 yenama yena_dat_Pl 33 (#y, ye, en, na, am, ma, a#) (yena, dat, Pl) (yena, loc, Pl)
10 yene yena_acc_Pl 136 (#y, ye, en, ne, e#) (yena, acc, Pl) (yena, nom, Pl)
11 yenama yena_ins_Pl 24 (#y, ye, en, na, am, ma, a#) (yena, ins, Pl) (yena, loc, Pl)
12 yenama yena_loc_Pl 4 (#y, ye, en, na, am, ma, a#) (yena, loc, Pl) (yena, loc, Pl)
13 yeqa yeqa_nom_Sg 179 (#y, ye, eq, qa, a#) (yeqa, nom, Sg) (yeqa, nom, Sg)
14 yeqe yeqa_gen_Sg 54 (#y, ye, eq, qe, e#) (yeqa, gen, Sg) (yeqa, gen, Sg)
15 yeqi yeqa_dat_Sg 7 (#y, ye, eq, qi, i#) (yeqa, dat, Sg) (yeqa, loc, Sg)
16 yequ yeqa_acc_Sg 95 (#y, ye, eq, qu, u#) (yeqa, acc, Sg) (yeqa, acc, Sg)
17 yeqom yeqa_ins_Sg 30 (#y, ye, eq, qo, om, m#) (yeqa, ins, Sg) (yeqa, ins, Sg)
18 yeqi yeqa_loc_Sg 43 (#y, ye, eq, qi, i#) (yeqa, loc, Sg) (yeqa, loc, Sg)
19 yeqe yeqa_nom_Pl 102 (#y, ye, eq, qe, e#) (yeqa, nom, Pl) (yeqa, gen, Sg)
20 yeqa yeqa_gen_Pl 164 (#y, ye, eq, qa, a#) (yeqa, gen, Pl) (yeqa, nom, Sg)
21 yeqama yeqa_dat_Pl 3 (#y, ye, eq, qa, am, ma, a#) (yeqa, dat, Pl) (yeqa, loc, Pl)
22 yeqe yeqa_acc_Pl 84 (#y, ye, eq, qe, e#) (yeqa, acc, Pl) (yeqa, gen, Sg)
23 yeqama yeqa_ins_Pl 14 (#y, ye, eq, qa, am, ma, a#) (yeqa, ins, Pl) (yeqa, loc, Pl)
24 yeqama yeqa_loc_Pl 7 (#y, ye, eq, qa, am, ma, a#) (yeqa, loc, Pl) (yeqa, loc, Pl)
25 yivot yivot_nom_Sg 991 (#y, yi, iv, vo, ot, t#) (yivot, nom, Sg) (yivot, nom, Sg)
26 yivota yivot_gen_Sg 1004 (#y, yi, iv, vo, ot, ta, a#) (yivot, gen, Sg) (yivot, gen, Sg)
27 yivotu yivot_dat_Sg 100 (#y, yi, iv, vo, ot, tu, u#) (yivot, dat, Sg) (yivot, loc, Sg)
28 yivot yivot_acc_Sg 799 (#y, yi, iv, vo, ot, t#) (yivot, acc, Sg) (yivot, nom, Sg)
29 yivotom yivot_ins_Sg 142 (#y, yi, iv, vo, ot, to, om, m#) (yivot, ins, Sg) (yivot, ins, Sg)
30 yivotu yivot_loc_Sg 248 (#y, yi, iv, vo, ot, tu, u#) (yivot, loc, Sg) (yivot, loc, Sg)
31 yivoti yivot_nom_Pl 22 (#y, yi, iv, vo, ot, ti, i#) (yivot, nom, Pl) (yivot, gen, Sg)
32 yivota yivot_gen_Pl 30 (#y, yi, iv, vo, ot, ta, a#) (yivot, gen, Pl) (yivot, gen, Sg)
33 yivotima yivot_dat_Pl 3 (#y, yi, iv, vo, ot, ti, im, ma, a#) (yivot, dat, Pl) (yivot, ins, Pl)
34 yivote yivot_acc_Pl 52 (#y, yi, iv, vo, ot, te, e#) (yivot, acc, Pl) (yivot, gen, Sg)
35 yivotima yivot_ins_Pl 5 (#y, yi, iv, vo, ot, ti, im, ma, a#) (yivot, ins, Pl) (yivot, ins, Pl)
36 yivotima yivot_loc_Pl 2 (#y, yi, iv, vo, ot, ti, im, ma, a#) (yivot, loc, Pl) (yivot, ins, Pl)
37 {etwa {etwa_nom_Sg 33 (#{, {e, et, tw, wa, a#) ({etwa, nom, Sg) ({etwa, gen, Sg)
38 {etwe {etwa_gen_Sg 10 (#{, {e, et, tw, we, e#) ({etwa, gen, Sg) ({etwa, nom, Sg)
39 {etwi {etwa_dat_Sg 1 (#{, {e, et, tw, wi, i#) ({etwa, dat, Sg) ({etwa, nom, Pl)
40 {etwu {etwa_acc_Sg 29 (#{, {e, et, tw, wu, u#) ({etwa, acc, Sg) ({etwa, loc, Sg)
41 {etwom {etwa_ins_Sg 5 (#{, {e, et, tw, wo, om, m#) ({etwa, ins, Sg) ({etwa, ins, Sg)
42 {etwi {etwa_loc_Sg 12 (#{, {e, et, tw, wi, i#) ({etwa, loc, Sg) ({etwa, nom, Pl)
43 {etwe {etwa_nom_Pl 6 (#{, {e, et, tw, we, e#) ({etwa, nom, Pl) ({etwa, nom, Sg)
44 {etwi {etwa_gen_Pl 5 (#{, {e, et, tw, wi, i#) ({etwa, gen, Pl) ({etwa, nom, Pl)
45 {etwama {etwa_dat_Pl 1 (#{, {e, et, tw, wa, am, ma, a#) ({etwa, dat, Pl) ({etwa, ins, Pl)
46 {etwe {etwa_acc_Pl 11 (#{, {e, et, tw, we, e#) ({etwa, acc, Pl) ({etwa, nom, Sg)
47 {etwama {etwa_ins_Pl 2 (#{, {e, et, tw, wa, am, ma, a#) ({etwa, ins, Pl) ({etwa, ins, Pl)
48 {etwama {etwa_loc_Pl 2 (#{, {e, et, tw, wa, am, ma, a#) ({etwa, loc, Pl) ({etwa, ins, Pl)
49 {irina {irina_nom_Sg 16 (#{, {i, ir, ri, in, na, a#) ({irina, nom, Sg) ({irina, gen, Sg)
50 {irine {irina_gen_Sg 28 (#{, {i, ir, ri, in, ne, e#) ({irina, gen, Sg) ({irina, acc, Pl)
51 {irini {irina_dat_Sg 3 (#{, {i, ir, ri, in, ni, i#) ({irina, dat, Sg) ({irina, loc, Sg)
52 {irinu {irina_acc_Sg 17 (#{, {i, ir, ri, in, nu, u#) ({irina, acc, Sg) ({irina, acc, Sg)
53 {irinom {irina_ins_Sg 20 (#{, {i, ir, ri, in, no, om, m#) ({irina, ins, Sg) ({irina, ins, Sg)
54 {irini {irina_loc_Sg 17 (#{, {i, ir, ri, in, ni, i#) ({irina, loc, Sg) ({irina, loc, Sg)
55 {irine {irina_nom_Pl 11 (#{, {i, ir, ri, in, ne, e#) ({irina, nom, Pl) ({irina, acc, Pl)
56 {irina {irina_gen_Pl 12 (#{, {i, ir, ri, in, na, a#) ({irina, gen, Pl) ({irina, gen, Sg)
57 {irinama {irina_dat_Pl 2 (#{, {i, ir, ri, in, na, am, ma, a#) ({irina, dat, Pl) ({irina, loc, Pl)
58 {irine {irina_acc_Pl 23 (#{, {i, ir, ri, in, ne, e#) ({irina, acc, Pl) ({irina, acc, Pl)
59 {irinama {irina_ins_Pl 2 (#{, {i, ir, ri, in, na, am, ma, a#) ({irina, ins, Pl) ({irina, loc, Pl)
60 {irinama {irina_loc_Pl 3 (#{, {i, ir, ri, in, na, am, ma, a#) ({irina, loc, Pl) ({irina, loc, Pl)
..................
\n", "

3240 rows \u00d7 6 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ " WordForm LemmaCase Frequency Cues \\\n", "1 yena yena_nom_Sg 576 (#y, ye, en, na, a#) \n", "2 yene yena_gen_Sg 229 (#y, ye, en, ne, e#) \n", "3 yeni yena_dat_Sg 55 (#y, ye, en, ni, i#) \n", "4 yenu yena_acc_Sg 167 (#y, ye, en, nu, u#) \n", "5 yenom yena_ins_Sg 39 (#y, ye, en, no, om, m#) \n", "6 yeni yena_loc_Sg 16 (#y, ye, en, ni, i#) \n", "7 yene yena_nom_Pl 415 (#y, ye, en, ne, e#) \n", "8 yena yena_gen_Pl 336 (#y, ye, en, na, a#) \n", "9 yenama yena_dat_Pl 33 (#y, ye, en, na, am, ma, a#) \n", "10 yene yena_acc_Pl 136 (#y, ye, en, ne, e#) \n", "11 yenama yena_ins_Pl 24 (#y, ye, en, na, am, ma, a#) \n", "12 yenama yena_loc_Pl 4 (#y, ye, en, na, am, ma, a#) \n", "13 yeqa yeqa_nom_Sg 179 (#y, ye, eq, qa, a#) \n", "14 yeqe yeqa_gen_Sg 54 (#y, ye, eq, qe, e#) \n", "15 yeqi yeqa_dat_Sg 7 (#y, ye, eq, qi, i#) \n", "16 yequ yeqa_acc_Sg 95 (#y, ye, eq, qu, u#) \n", "17 yeqom yeqa_ins_Sg 30 (#y, ye, eq, qo, om, m#) \n", "18 yeqi yeqa_loc_Sg 43 (#y, ye, eq, qi, i#) \n", "19 yeqe yeqa_nom_Pl 102 (#y, ye, eq, qe, e#) \n", "20 yeqa yeqa_gen_Pl 164 (#y, ye, eq, qa, a#) \n", "21 yeqama yeqa_dat_Pl 3 (#y, ye, eq, qa, am, ma, a#) \n", "22 yeqe yeqa_acc_Pl 84 (#y, ye, eq, qe, e#) \n", "23 yeqama yeqa_ins_Pl 14 (#y, ye, eq, qa, am, ma, a#) \n", "24 yeqama yeqa_loc_Pl 7 (#y, ye, eq, qa, am, ma, a#) \n", "25 yivot yivot_nom_Sg 991 (#y, yi, iv, vo, ot, t#) \n", "26 yivota yivot_gen_Sg 1004 (#y, yi, iv, vo, ot, ta, a#) \n", "27 yivotu yivot_dat_Sg 100 (#y, yi, iv, vo, ot, tu, u#) \n", "28 yivot yivot_acc_Sg 799 (#y, yi, iv, vo, ot, t#) \n", "29 yivotom yivot_ins_Sg 142 (#y, yi, iv, vo, ot, to, om, m#) \n", "30 yivotu yivot_loc_Sg 248 (#y, yi, iv, vo, ot, tu, u#) \n", "31 yivoti yivot_nom_Pl 22 (#y, yi, iv, vo, ot, ti, i#) \n", "32 yivota yivot_gen_Pl 30 (#y, yi, iv, vo, ot, ta, a#) \n", "33 yivotima yivot_dat_Pl 3 (#y, yi, iv, vo, ot, ti, im, ma, a#) \n", "34 yivote yivot_acc_Pl 52 (#y, yi, iv, vo, ot, te, e#) \n", "35 yivotima yivot_ins_Pl 5 (#y, yi, iv, vo, ot, ti, im, ma, a#) \n", "36 yivotima yivot_loc_Pl 2 (#y, yi, iv, vo, ot, ti, im, ma, a#) \n", "37 {etwa {etwa_nom_Sg 33 (#{, {e, et, tw, wa, a#) \n", "38 {etwe {etwa_gen_Sg 10 (#{, {e, et, tw, we, e#) \n", "39 {etwi {etwa_dat_Sg 1 (#{, {e, et, tw, wi, i#) \n", "40 {etwu {etwa_acc_Sg 29 (#{, {e, et, tw, wu, u#) \n", "41 {etwom {etwa_ins_Sg 5 (#{, {e, et, tw, wo, om, m#) \n", "42 {etwi {etwa_loc_Sg 12 (#{, {e, et, tw, wi, i#) \n", "43 {etwe {etwa_nom_Pl 6 (#{, {e, et, tw, we, e#) \n", "44 {etwi {etwa_gen_Pl 5 (#{, {e, et, tw, wi, i#) \n", "45 {etwama {etwa_dat_Pl 1 (#{, {e, et, tw, wa, am, ma, a#) \n", "46 {etwe {etwa_acc_Pl 11 (#{, {e, et, tw, we, e#) \n", "47 {etwama {etwa_ins_Pl 2 (#{, {e, et, tw, wa, am, ma, a#) \n", "48 {etwama {etwa_loc_Pl 2 (#{, {e, et, tw, wa, am, ma, a#) \n", "49 {irina {irina_nom_Sg 16 (#{, {i, ir, ri, in, na, a#) \n", "50 {irine {irina_gen_Sg 28 (#{, {i, ir, ri, in, ne, e#) \n", "51 {irini {irina_dat_Sg 3 (#{, {i, ir, ri, in, ni, i#) \n", "52 {irinu {irina_acc_Sg 17 (#{, {i, ir, ri, in, nu, u#) \n", "53 {irinom {irina_ins_Sg 20 (#{, {i, ir, ri, in, no, om, m#) \n", "54 {irini {irina_loc_Sg 17 (#{, {i, ir, ri, in, ni, i#) \n", "55 {irine {irina_nom_Pl 11 (#{, {i, ir, ri, in, ne, e#) \n", "56 {irina {irina_gen_Pl 12 (#{, {i, ir, ri, in, na, a#) \n", "57 {irinama {irina_dat_Pl 2 (#{, {i, ir, ri, in, na, am, ma, a#) \n", "58 {irine {irina_acc_Pl 23 (#{, {i, ir, ri, in, ne, e#) \n", "59 {irinama {irina_ins_Pl 2 (#{, {i, ir, ri, in, na, am, ma, a#) \n", "60 {irinama {irina_loc_Pl 3 (#{, {i, ir, ri, in, na, am, ma, a#) \n", " ... ... ... ... \n", "\n", " Outcomes Predicted \n", "1 (yena, nom, Sg) (yena, nom, Sg) \n", "2 (yena, gen, Sg) (yena, nom, Pl) \n", "3 (yena, dat, Sg) (yena, nom, Sg) \n", "4 (yena, acc, Sg) (yena, acc, Sg) \n", "5 (yena, ins, Sg) (yena, ins, Sg) \n", "6 (yena, loc, Sg) (yena, nom, Sg) \n", "7 (yena, nom, Pl) (yena, nom, Pl) \n", "8 (yena, gen, Pl) (yena, nom, Sg) \n", "9 (yena, dat, Pl) (yena, loc, Pl) \n", "10 (yena, acc, Pl) (yena, nom, Pl) \n", "11 (yena, ins, Pl) (yena, loc, Pl) \n", "12 (yena, loc, Pl) (yena, loc, Pl) \n", "13 (yeqa, nom, Sg) (yeqa, nom, Sg) \n", "14 (yeqa, gen, Sg) (yeqa, gen, Sg) \n", "15 (yeqa, dat, Sg) (yeqa, loc, Sg) \n", "16 (yeqa, acc, Sg) (yeqa, acc, Sg) \n", "17 (yeqa, ins, Sg) (yeqa, ins, Sg) \n", "18 (yeqa, loc, Sg) (yeqa, loc, Sg) \n", "19 (yeqa, nom, Pl) (yeqa, gen, Sg) \n", "20 (yeqa, gen, Pl) (yeqa, nom, Sg) \n", "21 (yeqa, dat, Pl) (yeqa, loc, Pl) \n", "22 (yeqa, acc, Pl) (yeqa, gen, Sg) \n", "23 (yeqa, ins, Pl) (yeqa, loc, Pl) \n", "24 (yeqa, loc, Pl) (yeqa, loc, Pl) \n", "25 (yivot, nom, Sg) (yivot, nom, Sg) \n", "26 (yivot, gen, Sg) (yivot, gen, Sg) \n", "27 (yivot, dat, Sg) (yivot, loc, Sg) \n", "28 (yivot, acc, Sg) (yivot, nom, Sg) \n", "29 (yivot, ins, Sg) (yivot, ins, Sg) \n", "30 (yivot, loc, Sg) (yivot, loc, Sg) \n", "31 (yivot, nom, Pl) (yivot, gen, Sg) \n", "32 (yivot, gen, Pl) (yivot, gen, Sg) \n", "33 (yivot, dat, Pl) (yivot, ins, Pl) \n", "34 (yivot, acc, Pl) (yivot, gen, Sg) \n", "35 (yivot, ins, Pl) (yivot, ins, Pl) \n", "36 (yivot, loc, Pl) (yivot, ins, Pl) \n", "37 ({etwa, nom, Sg) ({etwa, gen, Sg) \n", "38 ({etwa, gen, Sg) ({etwa, nom, Sg) \n", "39 ({etwa, dat, Sg) ({etwa, nom, Pl) \n", "40 ({etwa, acc, Sg) ({etwa, loc, Sg) \n", "41 ({etwa, ins, Sg) ({etwa, ins, Sg) \n", "42 ({etwa, loc, Sg) ({etwa, nom, Pl) \n", "43 ({etwa, nom, Pl) ({etwa, nom, Sg) \n", "44 ({etwa, gen, Pl) ({etwa, nom, Pl) \n", "45 ({etwa, dat, Pl) ({etwa, ins, Pl) \n", "46 ({etwa, acc, Pl) ({etwa, nom, Sg) \n", "47 ({etwa, ins, Pl) ({etwa, ins, Pl) \n", "48 ({etwa, loc, Pl) ({etwa, ins, Pl) \n", "49 ({irina, nom, Sg) ({irina, gen, Sg) \n", "50 ({irina, gen, Sg) ({irina, acc, Pl) \n", "51 ({irina, dat, Sg) ({irina, loc, Sg) \n", "52 ({irina, acc, Sg) ({irina, acc, Sg) \n", "53 ({irina, ins, Sg) ({irina, ins, Sg) \n", "54 ({irina, loc, Sg) ({irina, loc, Sg) \n", "55 ({irina, nom, Pl) ({irina, acc, Pl) \n", "56 ({irina, gen, Pl) ({irina, gen, Sg) \n", "57 ({irina, dat, Pl) ({irina, loc, Pl) \n", "58 ({irina, acc, Pl) ({irina, acc, Pl) \n", "59 ({irina, ins, Pl) ({irina, loc, Pl) \n", "60 ({irina, loc, Pl) ({irina, loc, Pl) \n", " ... ... \n", "\n", "[3240 rows x 6 columns]" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "sum(serbian.Outcomes == serbian.Predicted) / float(len(serbian.index))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "0.37" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }