{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Load segmented affix occurrences from supplement" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'6e6389a913cc01020d03ac16217bc1c63c9d0e16b78179b4c931741c0d5a69cf'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import hashlib\n", "import io\n", "import pathlib\n", "import shutil\n", "import urllib.request\n", "import zipfile\n", "\n", "URL = 'https://zenodo.org/record/841982/files/xflr6/portmanteaus-v1.0.zip'\n", "\n", "CSV = pathlib.Path('esm3-analyses.csv')\n", "\n", "def sha256sum(filepath: pathlib.Path | str, /) -> str:\n", " with open(filepath, 'rb') as f:\n", " return hashlib.file_digest(f, hashlib.sha256).hexdigest()\n", "\n", "def download_archive_path(url: str, /, filename: str, *, target: pathlib.Path,\n", " clear_cache: bool = False) -> pathlib.Path:\n", " if clear_cache or not target.exists():\n", " with io.BytesIO() as b:\n", " with urllib.request.urlopen(url) as u:\n", " shutil.copyfileobj(u, b)\n", " with zipfile.ZipFile(b) as z:\n", " (i,) = (i for i in z.infolist() if pathlib.Path(i.filename).name == filename)\n", " i.filename = target.name\n", " z.extract(i)\n", " assert target.exists()\n", " return target\n", "\n", "sha256sum(download_archive_path(URL, CSV.name, target=CSV))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import scipy.stats\n", "\n", "def pearsonr(df, /, left, right, *, func=scipy.stats.pearsonr) -> pd.Series:\n", " df = df[[left, right]].dropna()\n", " name = f'{left} & {right}'\n", " result = func(df[left], df[right])\n", " return pd.Series(result, index=('r', 'p'), name=name)\n", "\n", "plt.style.use('classic')\n", "plt.rcParams.update({'figure.figsize': (6, 4), 'figure.facecolor': 'w',\n", " 'figure.subplot.bottom': .125, 'font.size': 10, 'savefig.dpi': 72})\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 2818 entries, 0 to 2817\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Language 2818 non-null object\n", " 1 Cell 2818 non-null object\n", " 2 Position 2818 non-null int64 \n", " 3 Form 2818 non-null object\n", " 4 Meaning 2818 non-null object\n", "dtypes: int64(1), object(4)\n", "memory usage: 110.2+ KB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LanguageCellPositionFormMeaning
0Ainu1s-1kuSA[+1 +sg]
1Ainu1p1asS[+1 +pl]
2Ainu2s-1eSAP[-3 +sg]
3Ainu2p-1eciSAP[+2]
4Ainux1anS[-1 -2 -3]
5Ainu1s->2s-1eciSAP[+2]
6Ainu1s->2p-1eciSAP[+2]
7Ainu1s->3s-1kuSA[+1 +sg]
8Ainu1s->3p-1kuSA[+1 +sg]
9Ainu1s->x-2kuSA[+1 +sg]
\n", "
" ], "text/plain": [ " Language Cell Position Form Meaning\n", "0 Ainu 1s -1 ku SA[+1 +sg]\n", "1 Ainu 1p 1 as S[+1 +pl]\n", "2 Ainu 2s -1 e SAP[-3 +sg]\n", "3 Ainu 2p -1 eci SAP[+2]\n", "4 Ainu x 1 an S[-1 -2 -3]\n", "5 Ainu 1s->2s -1 eci SAP[+2]\n", "6 Ainu 1s->2p -1 eci SAP[+2]\n", "7 Ainu 1s->3s -1 ku SA[+1 +sg]\n", "8 Ainu 1s->3p -1 ku SA[+1 +sg]\n", "9 Ainu 1s->x -2 ku SA[+1 +sg]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "RENAME = {'Quechua (Ayacucho)': 'Ayacucho',\n", " 'Tlachichilco Tepehuan': 'Tepehua',\n", " 'Lakhota': 'Lakota'}\n", "\n", "cf = (pd.read_csv(CSV, encoding='utf-8')\n", " .assign(Language=lambda x: x['Language'].replace(RENAME))\n", " .sort_values(by='Language', kind='mergesort')\n", " .reset_index(drop=True))\n", "\n", "cf.info()\n", "assert cf.set_index(['Language', 'Cell', 'Position']).index.is_unique\n", "cf.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reconcatenate word-forms with stem symbol" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "MultiIndex: 1317 entries, ('Ainu', '1s') to ('Yimas', '3p->3p')\n", "Data columns (total 1 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Form 1317 non-null object\n", "dtypes: object(1)\n", "memory usage: 17.3+ KB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Form
LanguageCell
Ainu1skuΣ
1pΣas
2s
2peciΣ
xΣan
1s->2seciΣ
1s->2peciΣ
1s->3skuΣ
1s->3pkuΣ
1s->xkuiΣ
\n", "
" ], "text/plain": [ " Form\n", "Language Cell \n", "Ainu 1s kuΣ\n", " 1p Σas\n", " 2s eΣ\n", " 2p eciΣ\n", " x Σan\n", " 1s->2s eciΣ\n", " 1s->2p eciΣ\n", " 1s->3s kuΣ\n", " 1s->3p kuΣ\n", " 1s->x kuiΣ" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "STEM = 'Σ'\n", "\n", "assert not cf['Form'].str.contains(STEM).any()\n", "\n", "_cf = (cf.drop('Meaning', axis=1)\n", " .assign(cell_index=lambda x: x.groupby(['Language', 'Cell'], sort=False).ngroup()))\n", "\n", "_sf = (_cf.drop_duplicates('cell_index')\n", " .assign(Position=0, Form=STEM))\n", "\n", "df = (pd.concat([_cf, _sf])\n", " .sort_values(by=['cell_index', 'Position'])\n", " .groupby(['cell_index', 'Language', 'Cell'])[['Form']]\n", " .agg(''.join)\n", " .reset_index('cell_index', drop=True))\n", "\n", "df.info()\n", "assert df.index.is_unique\n", "df.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tag cells as 1/2<->1/2, 1/2<->3, and other" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "MultiIndex: 1317 entries, ('Ainu', '1s') to ('Yimas', '3p->3p')\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 d_local 912 non-null object\n", " 1 Form 1317 non-null object\n", "dtypes: object(2)\n", "memory usage: 59.9+ KB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
d_localForm
LanguageCell
Ainu1sNonekuΣ
1pNoneΣas
2sNone
2pNoneeciΣ
xNoneΣan
1s->2sTrueeciΣ
1s->2pTrueeciΣ
1s->3sFalsekuΣ
1s->3pFalsekuΣ
1s->xFalsekuiΣ
1p->2sTrueeciΣ
1p->2pTrueeciΣ
1p->3sFalseciΣ
1p->3pFalseciΣ
1p->xFalseaiΣ
2s->1sTrueenΣ
2s->1pTrueunΣ
2s->3sFalse
2s->3pFalse
2s->xFalseeiΣ
2p->1sTrueecienΣ
2p->1pTrueeciunΣ
2p->3sFalseeciΣ
2p->3pFalseeciΣ
2p->xFalseeciiΣ
3s->1sFalseenΣ
3s->1pFalseunΣ
3s->2sFalse
3s->2pFalseeciΣ
3s->xNone
\n", "
" ], "text/plain": [ " d_local Form\n", "Language Cell \n", "Ainu 1s None kuΣ\n", " 1p None Σas\n", " 2s None eΣ\n", " 2p None eciΣ\n", " x None Σan\n", " 1s->2s True eciΣ\n", " 1s->2p True eciΣ\n", " 1s->3s False kuΣ\n", " 1s->3p False kuΣ\n", " 1s->x False kuiΣ\n", " 1p->2s True eciΣ\n", " 1p->2p True eciΣ\n", " 1p->3s False ciΣ\n", " 1p->3p False ciΣ\n", " 1p->x False aiΣ\n", " 2s->1s True enΣ\n", " 2s->1p True unΣ\n", " 2s->3s False eΣ\n", " 2s->3p False eΣ\n", " 2s->x False eiΣ\n", " 2p->1s True ecienΣ\n", " 2p->1p True eciunΣ\n", " 2p->3s False eciΣ\n", " 2p->3p False eciΣ\n", " 2p->x False eciiΣ\n", " 3s->1s False enΣ\n", " 3s->1p False unΣ\n", " 3s->2s False eΣ\n", " 3s->2p False eciΣ\n", " 3s->x None iΣ" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SEP = '->'\n", "\n", "def is_distinct_local(cellkey, /, *, sep=SEP, persons=('1', '2')) -> bool | None:\n", " (subj, trans, obj) = cellkey.partition(sep)\n", " (local_subj, local_obj) = (any(p in arg for p in persons) for arg in (subj, obj))\n", " if local_subj and local_obj:\n", " return True\n", " elif trans and (local_subj or local_obj):\n", " return False\n", " else:\n", " return None\n", "\n", "df.insert(0, 'd_local', df.index.get_level_values('Cell').map(is_distinct_local))\n", "\n", "df.info()\n", "assert df.index.is_unique\n", "df.head(30)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Neutralization ratios (1/2<->3 vs. 1/2<->1/2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sizenuniqueneutratioratio (norm)
d_localFalseTrueFalseTrueFalseTrueFalseTrueFalseTrue
Language
Ainu24814510343.47826142.8571430.5035970.496403
Aleut3618135231365.71428676.4705880.4621750.537825
Ayacucho2081169247.36842128.5714290.6237620.376238
Bella Coola1681363220.00000028.5714290.4117650.588235
Chuckchi158768257.14285728.5714290.6666670.333333
Darai1681155333.33333342.8571430.4375000.562500
Fox2081664221.05263228.5714290.4242420.575758
Hixkaryana1867311364.70588260.0000000.5188680.481132
Jaqaru63531020.0000000.0000001.0000000.000000
Jumjum2481787030.4347830.0000001.0000000.000000
Karuk168957346.66666742.8571430.5212770.478723
Ket48822826055.3191490.0000001.0000000.000000
Kunama4818227261155.31914964.7058820.4608970.539103
Lakota2081367236.84210528.5714290.5632180.436782
Maricopa1683213686.66666785.7142860.5027620.497238
Maung60845415425.42372957.1428570.3079180.692082
Mordvin1681046440.00000057.1428570.4117650.588235
Nocte1686310566.66666771.4285710.4827590.517241
Reyesano168828653.33333385.7142860.3835620.616438
Sahu60830830050.8474580.0000001.0000000.000000
Siuslawan48184817010.0000005.8823530.0000001.000000
Tepehua2081545426.31578957.1428570.3153150.684685
Thangmi1681066240.00000028.5714290.5833330.416667
Turkana168739560.00000071.4285710.4565220.543478
Wardaman248237114.34782614.2857140.2333330.766667
Yimas361830176117.1428575.8823530.7445260.255474
\n", "
" ], "text/plain": [ " size nunique neut ratio \\\n", "d_local False True False True False True False True \n", "Language \n", "Ainu 24 8 14 5 10 3 43.478261 42.857143 \n", "Aleut 36 18 13 5 23 13 65.714286 76.470588 \n", "Ayacucho 20 8 11 6 9 2 47.368421 28.571429 \n", "Bella Coola 16 8 13 6 3 2 20.000000 28.571429 \n", "Chuckchi 15 8 7 6 8 2 57.142857 28.571429 \n", "Darai 16 8 11 5 5 3 33.333333 42.857143 \n", "Fox 20 8 16 6 4 2 21.052632 28.571429 \n", "Hixkaryana 18 6 7 3 11 3 64.705882 60.000000 \n", "Jaqaru 6 3 5 3 1 0 20.000000 0.000000 \n", "Jumjum 24 8 17 8 7 0 30.434783 0.000000 \n", "Karuk 16 8 9 5 7 3 46.666667 42.857143 \n", "Ket 48 8 22 8 26 0 55.319149 0.000000 \n", "Kunama 48 18 22 7 26 11 55.319149 64.705882 \n", "Lakota 20 8 13 6 7 2 36.842105 28.571429 \n", "Maricopa 16 8 3 2 13 6 86.666667 85.714286 \n", "Maung 60 8 45 4 15 4 25.423729 57.142857 \n", "Mordvin 16 8 10 4 6 4 40.000000 57.142857 \n", "Nocte 16 8 6 3 10 5 66.666667 71.428571 \n", "Reyesano 16 8 8 2 8 6 53.333333 85.714286 \n", "Sahu 60 8 30 8 30 0 50.847458 0.000000 \n", "Siuslawan 48 18 48 17 0 1 0.000000 5.882353 \n", "Tepehua 20 8 15 4 5 4 26.315789 57.142857 \n", "Thangmi 16 8 10 6 6 2 40.000000 28.571429 \n", "Turkana 16 8 7 3 9 5 60.000000 71.428571 \n", "Wardaman 24 8 23 7 1 1 4.347826 14.285714 \n", "Yimas 36 18 30 17 6 1 17.142857 5.882353 \n", "\n", " ratio (norm) \n", "d_local False True \n", "Language \n", "Ainu 0.503597 0.496403 \n", "Aleut 0.462175 0.537825 \n", "Ayacucho 0.623762 0.376238 \n", "Bella Coola 0.411765 0.588235 \n", "Chuckchi 0.666667 0.333333 \n", "Darai 0.437500 0.562500 \n", "Fox 0.424242 0.575758 \n", "Hixkaryana 0.518868 0.481132 \n", "Jaqaru 1.000000 0.000000 \n", "Jumjum 1.000000 0.000000 \n", "Karuk 0.521277 0.478723 \n", "Ket 1.000000 0.000000 \n", "Kunama 0.460897 0.539103 \n", "Lakota 0.563218 0.436782 \n", "Maricopa 0.502762 0.497238 \n", "Maung 0.307918 0.692082 \n", "Mordvin 0.411765 0.588235 \n", "Nocte 0.482759 0.517241 \n", "Reyesano 0.383562 0.616438 \n", "Sahu 1.000000 0.000000 \n", "Siuslawan 0.000000 1.000000 \n", "Tepehua 0.315315 0.684685 \n", "Thangmi 0.583333 0.416667 \n", "Turkana 0.456522 0.543478 \n", "Wardaman 0.233333 0.766667 \n", "Yimas 0.744526 0.255474 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xf = (df.dropna(subset=['d_local'])\n", " .groupby(['Language', 'd_local'])['Form']\n", " .agg(['size', 'nunique']))\n", "\n", "xf['neut'] = xf['size'] - xf['nunique']\n", "xf['ratio'] = 100 * xf['neut'] / (xf['size'] - 1)\n", "xf['ratio (norm)'] = xf['ratio'] / xf['ratio'].groupby(level='Language').sum()\n", "xf.loc[xf['ratio (norm)'].isnull(), 'ratio'] = None\n", "\n", "xfp = xf.reset_index('d_local')\n", "xf = xf.unstack()\n", "\n", "xf" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "(xf.sort_values(by=('ratio (norm)', True), kind='mergesort')['ratio']\n", " .plot.bar(stacked=True, figsize=(15, 5), cmap=plt.cm.gray));" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "(xf['ratio (norm)'].sort_values(by=True, kind='mergesort')\n", " .plot.bar(stacked=True, figsize=(15, 5), cmap=plt.cm.gray));" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test for neutralization differences" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rp
d_local & ratio-0.0438860.757379
d_local & ratio (norm)-0.1599710.257281
\n", "
" ], "text/plain": [ " r p\n", "d_local & ratio -0.043886 0.757379\n", "d_local & ratio (norm) -0.159971 0.257281" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame([pearsonr(xfp, 'd_local', c) for c in ['ratio', 'ratio (norm)']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Count 1/2 subcategory (number, gender) neutralizations" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['1s' '1p' '2s' '2p' '3s' '3p' 'x' '1d' '2d' '3d' '1pe' '1pi' '1' '12' '2'\n", " '3' '1di' '3s.m' '3s.f' '3s.n' '3p.m' '3p.f' '3p.n' '1de' '3.I' '3.III'\n", " '3.II' '3.IV' '3.V' '3.VI']\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
d_localsizenuniqueneutratio
LanguagegroupX
Ainu1->2:A1pTrue211100.0
1sTrue211100.0
1->2:P2pTrue211100.0
2sTrue211100.0
1->X:P3pFalse2200.0
3sFalse2200.0
xFalse2200.0
2->1:A2pTrue2200.0
2sTrue2200.0
2->1:P1pTrue2200.0
1sTrue2200.0
2->X:P3pFalse2200.0
3sFalse2200.0
xFalse2200.0
\n", "
" ], "text/plain": [ " d_local size nunique neut ratio\n", "Language group X \n", "Ainu 1->2:A 1p True 2 1 1 100.0\n", " 1s True 2 1 1 100.0\n", " 1->2:P 2p True 2 1 1 100.0\n", " 2s True 2 1 1 100.0\n", " 1->X:P 3p False 2 2 0 0.0\n", " 3s False 2 2 0 0.0\n", " x False 2 2 0 0.0\n", " 2->1:A 2p True 2 2 0 0.0\n", " 2s True 2 2 0 0.0\n", " 2->1:P 1p True 2 2 0 0.0\n", " 1s True 2 2 0 0.0\n", " 2->X:P 3p False 2 2 0 0.0\n", " 3s False 2 2 0 0.0\n", " x False 2 2 0 0.0" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ff = df.dropna(subset=['d_local']).reset_index()\n", "ff[['A', 'P']] = ff['Cell'].str.partition(SEP)[[0, 2]]\n", "print(pd.concat([ff['A'], ff['P']]).unique())\n", "\n", "(a_first, p_first) = (ff[x].str.contains(r'1') for x in ('A', 'P'))\n", "(a_second, p_second) = (ff[x].str.contains(r'2|[dp]i') for x in ('A', 'P'))\n", "(a_third, p_third) = ~a_first & ~a_second, ~p_first & ~p_second\n", "\n", "# treat inclusive cells as first person only\n", "a_second &= ~a_first; p_second &= ~p_first\n", "assert (pd.concat([a_first, a_second, a_third], axis=1).sum(axis=1) == 1).all()\n", "assert (pd.concat([p_first, p_second, p_third], axis=1).sum(axis=1) == 1).all()\n", "\n", "groups = {'1->X:P': a_first & p_third,\n", " '2->X:P': a_second & p_third,\n", " 'X->1:A': a_third & p_first,\n", " 'X->2:A': a_third & p_second,\n", " #\n", " '1->2:A': a_first & p_second,\n", " '2->1:A': a_second & p_first,\n", " '1->2:P': a_first & p_second,\n", " '2->1:P': a_second & p_first}\n", "\n", "lf = (pd.concat([ff[c].groupby(['Language', 'd_local', g.rpartition(':')[-1]])['Form']\n", " .agg(['size', 'nunique'])\n", " .assign(group=g)\n", " .set_index('group', append=True)\n", " .swaplevel()\n", " .reset_index('d_local')\n", " for g, c in groups.items()])\n", " .sort_index())\n", "lf.index.rename('X', level=2, inplace=True)\n", "\n", "lf['neut'] = lf['size'] - lf['nunique']\n", "lf['ratio'] = 100 * lf['neut'] / (lf['size'] - 1)\n", "\n", "lf.head(14)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1/2 subcategory neutralization ratios (1/2<->3 vs. 1/2<->1/2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ratioratio (norm)
d_localFalseTrueFalseTrue
Language
Ainu0.00000050.0000000.0000001.000000
Aleut62.50000062.5000000.5000000.500000
Ayacucho6.25000025.0000000.2000000.800000
Bella Coola0.00000012.5000000.0000001.000000
Chuckchi28.57142925.0000000.5333330.466667
Darai25.00000037.5000000.4000000.600000
Fox12.50000025.0000000.3333330.666667
Hixkaryana50.00000060.0000000.4545450.545455
JaqaruNaNNaNNaNNaN
JumjumNaNNaNNaNNaN
Karuk0.00000037.5000000.0000001.000000
KetNaNNaNNaNNaN
Kunama31.25000054.1666670.3658540.634146
Lakota31.25000025.0000000.5555560.444444
Maricopa100.000000100.0000000.5000000.500000
Maung0.00000050.0000000.0000001.000000
Mordvin25.00000050.0000000.3333330.666667
Nocte50.00000075.0000000.4000000.600000
Reyesano0.00000050.0000000.0000001.000000
SahuNaNNaNNaNNaN
SiuslawanNaNNaNNaNNaN
Tepehua25.00000050.0000000.3333330.666667
Thangmi0.00000025.0000000.0000001.000000
Turkana25.00000050.0000000.3333330.666667
Wardaman0.00000012.5000000.0000001.000000
Yimas0.0000004.1666670.0000001.000000
\n", "
" ], "text/plain": [ " ratio ratio (norm) \n", "d_local False True False True \n", "Language \n", "Ainu 0.000000 50.000000 0.000000 1.000000\n", "Aleut 62.500000 62.500000 0.500000 0.500000\n", "Ayacucho 6.250000 25.000000 0.200000 0.800000\n", "Bella Coola 0.000000 12.500000 0.000000 1.000000\n", "Chuckchi 28.571429 25.000000 0.533333 0.466667\n", "Darai 25.000000 37.500000 0.400000 0.600000\n", "Fox 12.500000 25.000000 0.333333 0.666667\n", "Hixkaryana 50.000000 60.000000 0.454545 0.545455\n", "Jaqaru NaN NaN NaN NaN\n", "Jumjum NaN NaN NaN NaN\n", "Karuk 0.000000 37.500000 0.000000 1.000000\n", "Ket NaN NaN NaN NaN\n", "Kunama 31.250000 54.166667 0.365854 0.634146\n", "Lakota 31.250000 25.000000 0.555556 0.444444\n", "Maricopa 100.000000 100.000000 0.500000 0.500000\n", "Maung 0.000000 50.000000 0.000000 1.000000\n", "Mordvin 25.000000 50.000000 0.333333 0.666667\n", "Nocte 50.000000 75.000000 0.400000 0.600000\n", "Reyesano 0.000000 50.000000 0.000000 1.000000\n", "Sahu NaN NaN NaN NaN\n", "Siuslawan NaN NaN NaN NaN\n", "Tepehua 25.000000 50.000000 0.333333 0.666667\n", "Thangmi 0.000000 25.000000 0.000000 1.000000\n", "Turkana 25.000000 50.000000 0.333333 0.666667\n", "Wardaman 0.000000 12.500000 0.000000 1.000000\n", "Yimas 0.000000 4.166667 0.000000 1.000000" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rf = lf.pivot_table('ratio', ['Language', 'd_local'], aggfunc='mean')\n", "\n", "rf['ratio (norm)'] = rf['ratio'] / rf['ratio'].groupby(level='Language').sum()\n", "rf.loc[rf['ratio (norm)'].isnull(), 'ratio'] = None\n", "\n", "rfp = rf.reset_index('d_local')\n", "rf = rf.unstack()\n", "\n", "rf" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "(rf.sort_values(by=('ratio (norm)', True), kind='mergesort')['ratio']\n", " .plot.bar(stacked=True, figsize=(15, 5), cmap=plt.cm.gray));" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "(rf['ratio (norm)'].sort_values(by=True, kind='mergesort')\n", " .plot.bar(stacked=True, figsize=(15, 5), cmap=plt.cm.gray));" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test for 1/2 subcategory neutralization differences" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rp
d_local & ratio0.3759821.413199e-02
d_local & ratio (norm)0.7656243.494391e-09
\n", "
" ], "text/plain": [ " r p\n", "d_local & ratio 0.375982 1.413199e-02\n", "d_local & ratio (norm) 0.765624 3.494391e-09" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame([pearsonr(rfp, 'd_local', c) for c in ['ratio', 'ratio (norm)']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Absense of non-person features in learned meanings" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
n
0
+pl638
+1622
+3618
+2595
+sg475
-1437
-3366
-2359
-sg243
+an105
-pl83
+du73
-du47
-obv42
-masc40
+masc34
+hum27
+obv20
+fem13
-hum9
-fem3
\n", "
" ], "text/plain": [ " n\n", "0 \n", "+pl 638\n", "+1 622\n", "+3 618\n", "+2 595\n", "+sg 475\n", "-1 437\n", "-3 366\n", "-2 359\n", "-sg 243\n", "+an 105\n", "-pl 83\n", "+du 73\n", "-du 47\n", "-obv 42\n", "-masc 40\n", "+masc 34\n", "+hum 27\n", "+obv 20\n", "+fem 13\n", "-hum 9\n", "-fem 3" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cf['Meaning'].str.extractall(r'([+-]\\w+)')[0].value_counts().to_frame('n')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LanguageCellPositionFormMeaningPersonOnly
0Ainu1s-1kuSA[+1 +sg]False
1Ainu1p1asS[+1 +pl]False
2Ainu2s-1eSAP[-3 +sg]False
3Ainu2p-1eciSAP[+2]True
4Ainux1anS[-1 -2 -3]True
5Ainu1s->2s-1eciSAP[+2]True
6Ainu1s->2p-1eciSAP[+2]True
7Ainu1s->3s-1kuSA[+1 +sg]False
8Ainu1s->3p-1kuSA[+1 +sg]False
9Ainu1s->x-2kuSA[+1 +sg]False
10Ainu1s->x-1iP[-1 -2 -3]True
11Ainu1p->2s-1eciSAP[+2]True
12Ainu1p->2p-1eciSAP[+2]True
13Ainu1p->3s-1ci[+1 +pl]A->P[+3]False
\n", "
" ], "text/plain": [ " Language Cell Position Form Meaning PersonOnly\n", "0 Ainu 1s -1 ku SA[+1 +sg] False\n", "1 Ainu 1p 1 as S[+1 +pl] False\n", "2 Ainu 2s -1 e SAP[-3 +sg] False\n", "3 Ainu 2p -1 eci SAP[+2] True\n", "4 Ainu x 1 an S[-1 -2 -3] True\n", "5 Ainu 1s->2s -1 eci SAP[+2] True\n", "6 Ainu 1s->2p -1 eci SAP[+2] True\n", "7 Ainu 1s->3s -1 ku SA[+1 +sg] False\n", "8 Ainu 1s->3p -1 ku SA[+1 +sg] False\n", "9 Ainu 1s->x -2 ku SA[+1 +sg] False\n", "10 Ainu 1s->x -1 i P[-1 -2 -3] True\n", "11 Ainu 1p->2s -1 eci SAP[+2] True\n", "12 Ainu 1p->2p -1 eci SAP[+2] True\n", "13 Ainu 1p->3s -1 ci [+1 +pl]A->P[+3] False" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "NONPERSON = r'[+-]\\D+\\b'\n", "\n", "nf = cf.assign(PersonOnly=lambda x: ~x['Meaning'].str.contains(NONPERSON))\n", "\n", "nf.head(14)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
d_localPersonOnly
LanguageCell
Ainu1sNoneFalse
1pNoneFalse
2sNoneFalse
2pNoneTrue
xNoneTrue
1s->2sTrueTrue
1s->2pTrueTrue
1s->3sFalseFalse
1s->3pFalseFalse
1s->xFalseFalse
1p->2sTrueTrue
1p->2pTrueTrue
1p->3sFalseFalse
1p->3pFalseFalse
\n", "
" ], "text/plain": [ " d_local PersonOnly\n", "Language Cell \n", "Ainu 1s None False\n", " 1p None False\n", " 2s None False\n", " 2p None True\n", " x None True\n", " 1s->2s True True\n", " 1s->2p True True\n", " 1s->3s False False\n", " 1s->3p False False\n", " 1s->x False False\n", " 1p->2s True True\n", " 1p->2p True True\n", " 1p->3s False False\n", " 1p->3p False False" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnf = (nf.groupby(['Language', 'Cell'], sort=False)['PersonOnly'].all()\n", " .to_frame('PersonOnly'))\n", "\n", "cnf.insert(0, 'd_local', cnf.index.get_level_values('Cell').map(is_distinct_local))\n", "\n", "cnf.head(14)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ratioratio (norm)
d_localFalseTrueFalseTrue
Language
Ainu0.2916670.5000000.3684210.631579
Aleut0.3333330.0000001.0000000.000000
Ayacucho0.2500000.1250000.6666670.333333
Bella CoolaNaNNaNNaNNaN
ChuckchiNaNNaNNaNNaN
Darai0.1250000.0000001.0000000.000000
Fox0.0000000.1250000.0000001.000000
Hixkaryana0.7777780.6666670.5384620.461538
Jaqaru0.6666670.0000001.0000000.000000
Jumjum0.0416670.0000001.0000000.000000
Karuk0.1250000.2500000.3333330.666667
Ket0.0833330.0000001.0000000.000000
Kunama0.0416670.0000001.0000000.000000
Lakota0.2000000.1250000.6153850.384615
Maricopa1.0000001.0000000.5000000.500000
Maung0.0166670.0000001.0000000.000000
Mordvin0.4375000.7500000.3684210.631579
Nocte0.6250000.7500000.4545450.545455
Reyesano0.2500000.5000000.3333330.666667
Sahu0.2666670.0000001.0000000.000000
Siuslawan0.0416670.0000001.0000000.000000
Tepehua0.6000000.2500000.7058820.294118
Thangmi0.1250000.0000001.0000000.000000
Turkana0.4375000.5000000.4666670.533333
Wardaman0.3333330.5000000.4000000.600000
YimasNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " ratio ratio (norm) \n", "d_local False True False True \n", "Language \n", "Ainu 0.291667 0.500000 0.368421 0.631579\n", "Aleut 0.333333 0.000000 1.000000 0.000000\n", "Ayacucho 0.250000 0.125000 0.666667 0.333333\n", "Bella Coola NaN NaN NaN NaN\n", "Chuckchi NaN NaN NaN NaN\n", "Darai 0.125000 0.000000 1.000000 0.000000\n", "Fox 0.000000 0.125000 0.000000 1.000000\n", "Hixkaryana 0.777778 0.666667 0.538462 0.461538\n", "Jaqaru 0.666667 0.000000 1.000000 0.000000\n", "Jumjum 0.041667 0.000000 1.000000 0.000000\n", "Karuk 0.125000 0.250000 0.333333 0.666667\n", "Ket 0.083333 0.000000 1.000000 0.000000\n", "Kunama 0.041667 0.000000 1.000000 0.000000\n", "Lakota 0.200000 0.125000 0.615385 0.384615\n", "Maricopa 1.000000 1.000000 0.500000 0.500000\n", "Maung 0.016667 0.000000 1.000000 0.000000\n", "Mordvin 0.437500 0.750000 0.368421 0.631579\n", "Nocte 0.625000 0.750000 0.454545 0.545455\n", "Reyesano 0.250000 0.500000 0.333333 0.666667\n", "Sahu 0.266667 0.000000 1.000000 0.000000\n", "Siuslawan 0.041667 0.000000 1.000000 0.000000\n", "Tepehua 0.600000 0.250000 0.705882 0.294118\n", "Thangmi 0.125000 0.000000 1.000000 0.000000\n", "Turkana 0.437500 0.500000 0.466667 0.533333\n", "Wardaman 0.333333 0.500000 0.400000 0.600000\n", "Yimas NaN NaN NaN NaN" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xnf = (cnf.dropna(subset=['d_local'])\n", " .groupby(['Language', 'd_local']).mean()\n", " .rename(columns={'PersonOnly': 'ratio'}))\n", "\n", "xnf['ratio (norm)'] = xnf['ratio'] / xnf['ratio'].groupby(level='Language').sum()\n", "xnf.loc[xnf['ratio (norm)'].isnull(), 'ratio'] = None\n", "\n", "xnfp = xnf.reset_index('d_local')\n", "xnf = xnf.unstack()\n", "\n", "xnf" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "(xnf['ratio (norm)'].sort_values(by=True, kind='mergesort')\n", " .plot.bar(stacked=True, figsize=(15, 5), cmap=plt.cm.gray));" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rp
d_local & ratio-0.0778410.607115
d_local & ratio (norm)-0.5171180.000233
\n", "
" ], "text/plain": [ " r p\n", "d_local & ratio -0.077841 0.607115\n", "d_local & ratio (norm) -0.517118 0.000233" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame([pearsonr(xnfp, 'd_local', c) for c in ['ratio', 'ratio (norm)']])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.0" } }, "nbformat": 4, "nbformat_minor": 4 }