{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 30 entries, Ainu to Yimas\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Family 30 non-null object \n", " 1 LID 29 non-null float64\n", " 2 LOID 30 non-null int64 \n", " 3 Continent 30 non-null object \n", " 4 Area 30 non-null object \n", " 5 Analysis 29 non-null object \n", " 6 Forms 30 non-null object \n", " 7 Elim 1 non-null object \n", " 8 Status 30 non-null object \n", " 9 Comments 8 non-null object \n", " 10 Impression 22 non-null object \n", "dtypes: float64(1), int64(1), object(9)\n", "memory usage: 2.8+ KB\n" ] } ], "source": [ "%matplotlib inline\n", "\n", "import pathlib\n", "\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits.basemap import Basemap\n", "import pandas as pd\n", "\n", "SAMPLE = 'http://proalki.uni-leipzig.de/wiki/Project:Portmanteau_Analyses'\n", "\n", "SAMPLE_CSV = pathlib.Path('portmanteau_sample.csv')\n", "\n", "ENCODING = 'utf-8'\n", "\n", "RENAME = {'Quechua (Ayacucho)': 'Ayacucho',\n", " 'Tepehuan': 'Tepehua',\n", " 'Lakhota': 'Lakota'}\n", "\n", "if not SAMPLE_CSV.exists():\n", " _sf, = pd.read_html(SAMPLE, header=0, index_col='Language')\n", " _sf.to_csv(SAMPLE_CSV, encoding=ENCODING)\n", "\n", "sf = (pd.read_csv(SAMPLE_CSV, encoding=ENCODING)\n", " .assign(Language=lambda x: x['Language'].replace(RENAME))\n", " .rename(columns={'Languoid': 'Family'})\n", " .set_index('Language'))\n", "\n", "sf.info()\n", "assert sf.index.is_unique" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FamilyLIDContinentAreaAnalysis
Language
AinuAinu12N-C AsiaN Coast AsiaAin/Paradigms/to X
AleutAleut18N-C AsiaN Coast AsiaAle/Paradigms/to X
Bella CoolaBella Coola995W N AmericaAlaska-OregonBlc/Paradigms/to X
ChuckchiChukotkan56N-C AsiaN Coast AsiaCkt/Paradigms/to X
DaraiIndo-Iranian1399S/SE AsiaIndicDry/Paradigms/to all/Npst
\n", "
" ], "text/plain": [ " Family LID Continent Area \\\n", "Language \n", "Ainu Ainu 12 N-C Asia N Coast Asia \n", "Aleut Aleut 18 N-C Asia N Coast Asia \n", "Bella Coola Bella Coola 995 W N America Alaska-Oregon \n", "Chuckchi Chukotkan 56 N-C Asia N Coast Asia \n", "Darai Indo-Iranian 1399 S/SE Asia Indic \n", "\n", " Analysis \n", "Language \n", "Ainu Ain/Paradigms/to X \n", "Aleut Ale/Paradigms/to X \n", "Bella Coola Blc/Paradigms/to X \n", "Chuckchi Ckt/Paradigms/to X \n", "Darai Dry/Paradigms/to all/Npst " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = sf.loc[sf['Status'] == 'ready', :'Analysis'].drop('LOID', axis=1)\n", "\n", "df['LID'] = pd.to_numeric(df['LID'].fillna(0), downcast='integer')\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 2913 entries, 199 to 3213\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 ISO639.3 2846 non-null object \n", " 1 language 2913 non-null object \n", " 2 alt.language.name 2873 non-null object \n", " 3 language.search 2913 non-null object \n", " 4 lsbranch 301 non-null object \n", " 5 ssbranch 529 non-null object \n", " 6 sbranch 1322 non-null object \n", " 7 mbranch 2070 non-null object \n", " 8 stock 2913 non-null object \n", " 9 alt.stock.name 366 non-null object \n", " 10 stock.search 2913 non-null object \n", " 11 longitude 2913 non-null float64\n", " 12 latitude 2913 non-null float64\n", " 13 area 2913 non-null object \n", " 14 continent 2913 non-null object \n", "dtypes: float64(2), object(13)\n", "memory usage: 364.1+ KB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISO639.3languagealt.language.namelanguage.searchlsbranchssbranchsbranchmbranchstockalt.stock.namestock.searchlongitudelatitudeareacontinent
LID
199naqNamaNama, KhoekhoeNama, Nama, KhoekhoeNaNNaNNaNKhoekhoeKwadi-KhoeCentral KhoisanKwadi-Khoe, Central Khoisan18.00-25.50S AfricaAfrica
148knw!Kung!Xu, !Kung, \\t!Hu, !Khung, !Ku, !Kung, !Xu, !X...!Kung, !Xu, !Kung, \\t!Hu, !Khung, !Ku, !Kung, ...NaNNaNNaNNaNJuNorthern KhoisanJu, Northern Khoisan18.00-19.67S AfricaAfrica
94htsHadzaHadzaHadza, HadzaNaNNaNNaNNaNHadzaNaNHadza35.17-3.75S AfricaAfrica
347sadSandaweSandaweSandawe, SandaweNaNNaNNaNNaNSandaweNaNSandawe35.00-5.00S AfricaAfrica
151kwzKwadiKwadi, Cuepe, CurocaKwadi, Kwadi, Cuepe, CurocaNaNNaNNaNKwadiKwadi-KhoeCentral KhoisanKwadi-Khoe, Central Khoisan12.00-16.00S AfricaAfrica
\n", "
" ], "text/plain": [ " ISO639.3 language alt.language.name \\\n", "LID \n", "199 naq Nama Nama, Khoekhoe \n", "148 knw !Kung !Xu, !Kung, \\t!Hu, !Khung, !Ku, !Kung, !Xu, !X... \n", "94 hts Hadza Hadza \n", "347 sad Sandawe Sandawe \n", "151 kwz Kwadi Kwadi, Cuepe, Curoca \n", "\n", " language.search lsbranch ssbranch \\\n", "LID \n", "199 Nama, Nama, Khoekhoe NaN NaN \n", "148 !Kung, !Xu, !Kung, \\t!Hu, !Khung, !Ku, !Kung, ... NaN NaN \n", "94 Hadza, Hadza NaN NaN \n", "347 Sandawe, Sandawe NaN NaN \n", "151 Kwadi, Kwadi, Cuepe, Curoca NaN NaN \n", "\n", " sbranch mbranch stock alt.stock.name \\\n", "LID \n", "199 NaN Khoekhoe Kwadi-Khoe Central Khoisan \n", "148 NaN NaN Ju Northern Khoisan \n", "94 NaN NaN Hadza NaN \n", "347 NaN NaN Sandawe NaN \n", "151 NaN Kwadi Kwadi-Khoe Central Khoisan \n", "\n", " stock.search longitude latitude area continent \n", "LID \n", "199 Kwadi-Khoe, Central Khoisan 18.00 -25.50 S Africa Africa \n", "148 Ju, Northern Khoisan 18.00 -19.67 S Africa Africa \n", "94 Hadza 35.17 -3.75 S Africa Africa \n", "347 Sandawe 35.00 -5.00 S Africa Africa \n", "151 Kwadi-Khoe, Central Khoisan 12.00 -16.00 S Africa Africa " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "AUTOTYP = 'https://www.autotyp.uzh.ch/download/release_2013/autotyp.csv'\n", "\n", "AUTOTYP_CSV = pathlib.Path(AUTOTYP.rpartition('/')[2])\n", "\n", "AUTOTYP_FORMAT = {'encoding': 'utf-8',\n", " 'na_values': '', 'keep_default_na': False,\n", " 'index_col': 'LID'}\n", "\n", "if not AUTOTYP_CSV.exists():\n", " _af = pd.read_csv(AUTOTYP, **AUTOTYP_FORMAT)\n", " _af.to_csv(AUTOTYP_CSV, encoding=AUTOTYP_FORMAT['encoding'])\n", "\n", "af = pd.read_csv(AUTOTYP_CSV, **AUTOTYP_FORMAT)\n", "\n", "af.info()\n", "assert af.index.is_unique\n", "af.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nunique
language2913
stock399
mbranch236
sbranch154
area24
continent10
\n", "
" ], "text/plain": [ " nunique\n", "language 2913\n", "stock 399\n", "mbranch 236\n", "sbranch 154\n", "area 24\n", "continent 10" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "af[['language','stock', 'mbranch', 'sbranch', 'area', 'continent']].nunique().to_frame('nunique')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stockmbranch
Chibchan165
Great Andamanese54
Morehead and Upper Maro Rivers31
\n", "
" ], "text/plain": [ " stock mbranch\n", "Chibchan 16 5\n", "Great Andamanese 5 4\n", "Morehead and Upper Maro Rivers 3 1" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([af.groupby('stock').size(), af.groupby('mbranch').size()],\n", " axis=1, join='inner', keys=['stock', 'mbranch'])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "593" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "af['family'] = af['mbranch'].fillna(af['stock'])\n", "\n", "af['family'].nunique()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
n
family
Malayo-Polynesian326
Bantoid164
Indo-Iranian109
West Semitic51
Germanic50
\n", "
" ], "text/plain": [ " n\n", "family \n", "Malayo-Polynesian 326\n", "Bantoid 164\n", "Indo-Iranian 109\n", "West Semitic 51\n", "Germanic 50" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "af['family'].value_counts().to_frame('n').head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISO639.3familycontinentarealatitudelongitude
Language
AinuainAinuN-C AsiaN Coast Asia43.0143.00
AleutaleAleutN-C AsiaN Coast Asia54.0-166.00
Bella CoolablcSalishanW N AmericaAlaska-Oregon52.5-126.67
ChuckchicktChukotkanN-C AsiaN Coast Asia67.0170.00
DaraidryIndo-IranianS/SE AsiaIndic24.084.00
\n", "
" ], "text/plain": [ " ISO639.3 family continent area latitude \\\n", "Language \n", "Ainu ain Ainu N-C Asia N Coast Asia 43.0 \n", "Aleut ale Aleut N-C Asia N Coast Asia 54.0 \n", "Bella Coola blc Salishan W N America Alaska-Oregon 52.5 \n", "Chuckchi ckt Chukotkan N-C Asia N Coast Asia 67.0 \n", "Darai dry Indo-Iranian S/SE Asia Indic 24.0 \n", "\n", " longitude \n", "Language \n", "Ainu 143.00 \n", "Aleut -166.00 \n", "Bella Coola -126.67 \n", "Chuckchi 170.00 \n", "Darai 84.00 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "JUM = pd.Series({'ISO639.3': 'jum', # missing in autotyp\n", " 'family': 'Western Nilotic',\n", " 'continent': 'Africa',\n", " 'area': 'African Savannah',\n", " 'longitude': 33.7494, 'latitude': 10.4349})\n", "\n", "jf = df[['LID']].reset_index().set_index('LID').join(af).set_index('Language')\n", "jf = jf[['ISO639.3', 'family', 'continent', 'area', 'latitude', 'longitude']]\n", "jf.loc['Jumjum'] = JUM\n", "assert jf.notnull().all().all()\n", "\n", "jf.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISO639.3familycontinentarean
Language
JumjumjumWestern NiloticAfricaAfrican Savannah375
KunamakunKunamaAfricaGreater Abyssinia61
TurkanatuvEastern NiloticAfricaS Africa190
MaungmphIwaidjanAustraliaN Australia108
WardamanwrrWagiman - WardamanAustraliaN Australia108
TepehuateeTotonac-TepehuanC AmericaMesoamerica173
LakotalktSiouanE N AmericaBasin and Plains68
MaricopamrcYumanE N AmericaBasin and Plains68
FoxsacAlgonquianE N AmericaE North America69
KetketYeniseianN-C AsiaInner Asia113
MordvinmyvFinno-UgricN-C AsiaInner Asia113
AinuainAinuN-C AsiaN Coast Asia27
AleutaleAleutN-C AsiaN Coast Asia27
ChuckchicktChukotkanN-C AsiaN Coast Asia27
YimasyeeLower SepikNG and OceaniaN Coast New Guinea163
SahusajNorth HalmaheranNG and OceaniaOceania240
JaqarujqrAymaranS AmericaAndean47
AyacuchoquyQuechuanS AmericaAndean47
HixkaryanahixCaribanS AmericaNE South America192
ReyesanoreyTacananS AmericaNE South America192
DaraidryIndo-IranianS/SE AsiaIndic203
ThangmithfRemnant HimalayishS/SE AsiaIndic203
NoctenjbBrahmaputran (Sal)S/SE AsiaSoutheast Asia216
Bella CoolablcSalishanW N AmericaAlaska-Oregon59
SiuslawansisSiuslawanW N AmericaAlaska-Oregon59
KarukkyhKarokW N AmericaCalifornia47
\n", "
" ], "text/plain": [ " ISO639.3 family continent area \\\n", "Language \n", "Jumjum jum Western Nilotic Africa African Savannah \n", "Kunama kun Kunama Africa Greater Abyssinia \n", "Turkana tuv Eastern Nilotic Africa S Africa \n", "Maung mph Iwaidjan Australia N Australia \n", "Wardaman wrr Wagiman - Wardaman Australia N Australia \n", "Tepehua tee Totonac-Tepehuan C America Mesoamerica \n", "Lakota lkt Siouan E N America Basin and Plains \n", "Maricopa mrc Yuman E N America Basin and Plains \n", "Fox sac Algonquian E N America E North America \n", "Ket ket Yeniseian N-C Asia Inner Asia \n", "Mordvin myv Finno-Ugric N-C Asia Inner Asia \n", "Ainu ain Ainu N-C Asia N Coast Asia \n", "Aleut ale Aleut N-C Asia N Coast Asia \n", "Chuckchi ckt Chukotkan N-C Asia N Coast Asia \n", "Yimas yee Lower Sepik NG and Oceania N Coast New Guinea \n", "Sahu saj North Halmaheran NG and Oceania Oceania \n", "Jaqaru jqr Aymaran S America Andean \n", "Ayacucho quy Quechuan S America Andean \n", "Hixkaryana hix Cariban S America NE South America \n", "Reyesano rey Tacanan S America NE South America \n", "Darai dry Indo-Iranian S/SE Asia Indic \n", "Thangmi thf Remnant Himalayish S/SE Asia Indic \n", "Nocte njb Brahmaputran (Sal) S/SE Asia Southeast Asia \n", "Bella Coola blc Salishan W N America Alaska-Oregon \n", "Siuslawan sis Siuslawan W N America Alaska-Oregon \n", "Karuk kyh Karok W N America California \n", "\n", " n \n", "Language \n", "Jumjum 375 \n", "Kunama 61 \n", "Turkana 190 \n", "Maung 108 \n", "Wardaman 108 \n", "Tepehua 173 \n", "Lakota 68 \n", "Maricopa 68 \n", "Fox 69 \n", "Ket 113 \n", "Mordvin 113 \n", "Ainu 27 \n", "Aleut 27 \n", "Chuckchi 27 \n", "Yimas 163 \n", "Sahu 240 \n", "Jaqaru 47 \n", "Ayacucho 47 \n", "Hixkaryana 192 \n", "Reyesano 192 \n", "Darai 203 \n", "Thangmi 203 \n", "Nocte 216 \n", "Bella Coola 59 \n", "Siuslawan 59 \n", "Karuk 47 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(jf.merge(af.groupby('area').size().to_frame('n'),\n", " left_on='area', right_index=True)\n", " .sort_values(['continent', 'area'])\n", " .drop(['latitude', 'longitude'], axis=1))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languageLanguage
continentarea
AfricaN Africa28NaN
AustraliaS Australia81NaN
NG and OceaniaInterior New Guinea81NaN
S New Guinea67NaN
S AmericaSE South America32NaN
W and SW EurasiaEurope142NaN
Greater Mesopotamia131NaN
\n", "
" ], "text/plain": [ " language Language\n", "continent area \n", "Africa N Africa 28 NaN\n", "Australia S Australia 81 NaN\n", "NG and Oceania Interior New Guinea 81 NaN\n", " S New Guinea 67 NaN\n", "S America SE South America 32 NaN\n", "W and SW Eurasia Europe 142 NaN\n", " Greater Mesopotamia 131 NaN" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(af.groupby(['area', 'continent'], as_index=False)['language'].count()\n", " .merge(jf[['area']].reset_index(), how='left', on='area')\n", " .set_index(['continent', 'area'])\n", " .sort_index()\n", " .query('Language != Language'))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
presentleft_onlyboth
languages5622351
\n", "
" ], "text/plain": [ "present left_only both\n", "languages 562 2351" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(af.groupby('area').size().to_frame('languages')\n", " .merge(jf.groupby('area').size().to_frame(),\n", " how='left', left_index=True, right_index=True, indicator='present')\n", " .groupby('present', observed=False)['languages'].sum()\n", " .drop('right_only')\n", " .astype(int)\n", " .to_frame().T)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nunique
family593
continent10
area24
\n", "
" ], "text/plain": [ " nunique\n", "family 593\n", "continent 10\n", "area 24" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "af[['family', 'continent', 'area']].nunique().to_frame('nunique')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nunique
family26
continent9
area17
\n", "
" ], "text/plain": [ " nunique\n", "family 26\n", "continent 9\n", "area 17" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jf[['family', 'continent', 'area']].nunique().to_frame('nunique')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "(fig, ax) = plt.subplots(figsize=(12, 6))\n", "\n", "m = Basemap(ax=ax, projection='eck4', lon_0=155)\n", "m.fillcontinents()\n", "#m.drawparallels(range(-90, 120, 30), dashes=[], linewidth=.25, labels=[1, 0, 0, 0])\n", "#m.drawmeridians(range(0, 360, 60), dashes=[], linewidth=.25, labels=[0, 0, 0, 1])\n", "\n", "offsets = {'Jumjum': (3e5, -1e5),\n", " 'Thangmi': (0, 3e5), 'Nocte': (3e5, -2e5), 'Darai': (3e5, -4e5),\n", " 'Wardaman': (3e5, -2e5),\n", " 'Siuslawan': (-2.6e6, 0), 'Karuk': (2e5, -3e5), 'Maricopa': (3e5, -2e5),\n", " 'Lakota': (1e5, 2e5), 'Fox': (2e5, -3e5),\n", " 'Jaqaru': (0, 5e5), 'Ayacucho': (0e5, -6e5)}\n", "\n", "for l, (x, y) in jf[['longitude', 'latitude']].iterrows():\n", " x, y = m(x, y)\n", " m.plot(x, y, marker='.', color='b', markersize=7)\n", " xoff, yoff = offsets.get(l, (2e5, 1e5))\n", " ax.text(x + xoff, y + yoff, l)\n", "\n", "#fig.savefig('map.pdf', bbox_inches='tight', pad_inches=.01)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.9" } }, "nbformat": 4, "nbformat_minor": 4 }