{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Expanding the measurement of culture with a sample of two billion humans" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Replication Data and Code" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "This notebook provides all the steps to replicate the results of our paper [Expanding the measurement of culture with a sample of two billion humans](https://doi.org/10.1098/rsif.2022.0085) published in the *Journal of the Royal Society Interface 19:20220085* (2022)." ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Setup and Pre-requisites" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "Let's start by importing the required packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "#%pylab --no-import-all\n", "%matplotlib inline\n", "\n", "import sys, os, time\n", "import numpy as np\n", "import pandas as pd\n", "pd.set_option('display.width', 160)\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "import matplotlib.patches as mpatches\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", "from statsmodels.iolib.summary2 import summary_col\n", "from sklearn.metrics.pairwise import cosine_similarity, cosine_distances, manhattan_distances, pairwise_distances\n", "from scipy.stats import zscore\n", "from scipy.cluster.hierarchy import dendrogram, linkage\n", "from scipy import spatial, stats\n", "from scipy.stats import zscore\n", "import MantelTest.MantelTest as MantelTest\n", "import re\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "Let's setup our paths" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "pathfb = './data/'\n", "pathfbor = './data/OriginalData/'\n", "pathout = pathfb + 'Regs/Euc/'\n", "if os.path.exists(pathout) == False:\n", " os.mkdir(pathout)\n", "pathshare = pathout\n", "if os.path.exists(pathfbor) == False:\n", " os.mkdir(pathfbor)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "Let's load the pairwise distance data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "mypairs = pd.read_stata(pathout + 'AllDists.dta')\n", "mypairs.drop([x for x in mypairs.columns if x.endswith('uk') or x.endswith('usa')], inplace=True, axis=1)\n", "mypairs.drop([x for x in mypairs.columns if x.find('cognate')!=-1], inplace=True, axis=1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ISO_CODE_1 | \n", "ISO_CODE_2 | \n", "EucDist1 | \n", "EucDist2 | \n", "EucDist3 | \n", "EucDist4 | \n", "EucDist5 | \n", "EucDist6 | \n", "EucDist7 | \n", "EucDist8 | \n", "... | \n", "total_non_binary | \n", "EucDistAll | \n", "EucDistBin | \n", "EucDistOptions | \n", "EucDistScale | \n", "FBDist | \n", "dist | \n", "distcap | \n", "distw | \n", "distwces | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "AD | \n", "AE | \n", "1.029294 | \n", "1.266393 | \n", "1.266393 | \n", "1.252900 | \n", "1.246856 | \n", "1.246856 | \n", "1.202870 | \n", "1.005192 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.033901 | \n", "5209.694824 | \n", "5209.694824 | \n", "5239.464994 | \n", "5239.175640 | \n", "
1 | \n", "AD | \n", "AF | \n", "0.278367 | \n", "1.294190 | \n", "1.274065 | \n", "1.118237 | \n", "1.148596 | \n", "1.147732 | \n", "1.099786 | \n", "0.879229 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.436711 | \n", "5806.358887 | \n", "5806.358887 | \n", "5712.403090 | \n", "5707.325970 | \n", "
2 | \n", "AD | \n", "AG | \n", "0.712423 | \n", "1.224756 | \n", "1.224756 | \n", "1.224756 | \n", "1.224756 | \n", "1.224756 | \n", "1.179946 | \n", "0.977644 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.033308 | \n", "6565.212402 | \n", "6565.212402 | \n", "6574.278222 | \n", "6574.205836 | \n", "
3 | \n", "AD | \n", "AI | \n", "1.306302 | \n", "1.363464 | \n", "1.363464 | \n", "1.363464 | \n", "1.363464 | \n", "1.363464 | \n", "1.323359 | \n", "1.146645 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.658854 | \n", "6589.531250 | \n", "6589.531250 | \n", "6593.265340 | \n", "6593.264953 | \n", "
4 | \n", "AD | \n", "AL | \n", "0.096556 | \n", "1.361331 | \n", "1.198027 | \n", "1.199096 | \n", "1.199077 | \n", "1.199096 | \n", "1.153289 | \n", "0.945299 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.053534 | \n", "1519.550659 | \n", "1519.550659 | \n", "1523.718420 | \n", "1523.040130 | \n", "
5 rows × 46 columns
\n", "