{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import scipy\n", "import numpy as np\n", "import pandas as pd\n", "import itertools as it\n", "\n", "from math import sin\n", "import collections\n", "\n", "def recursively_default_dict():\n", " return collections.defaultdict(recursively_default_dict)\n", "\n", "from sklearn.neighbors import KernelDensity\n", "from sklearn.decomposition import PCA\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.cluster import MeanShift, estimate_bandwidth\n", "from sklearn.metrics.pairwise import pairwise_distances\n", "from sklearn.metrics.pairwise import euclidean_distances\n", "from sklearn.preprocessing import scale\n", "\n", "from scipy.stats.stats import pearsonr \n", "\n", "from scipy.stats import invgamma \n", "from scipy.stats import beta\n", "import matplotlib.pyplot as plt\n", "\n", "import plotly\n", "import plotly.plotly as py\n", "import plotly.graph_objs as go\n", "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", "from plotly.graph_objs import *\n", "import plotly.figure_factory as ff\n", "\n", "from sklearn.metrics import silhouette_samples, silhouette_score\n", "\n", "\n", "from ipywidgets import interact, interactive, fixed, interact_manual\n", "import ipywidgets as widgets\n", "init_notebook_mode(connected=True)\n", "\n", "import Lab_modules.StructE_tools as Ste\n", "from Lab_modules.Generate_freq_vectors import generate_vectors_Beta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(2000, 100)\n", "(2000, 150)\n" ] } ], "source": [ "Nbranches= 4\n", "L= 150\n", "n= 100\n", "rangeA= [1,2.5]\n", "rangeB = [.1,.6]\n", "steps= 20\n", "n_comp = 100\n", "density= 50\n", "\n", "\n", "vector_lib= generate_vectors_Beta(L,n,rangeA,rangeB,steps,n_comp)\n", "\n", "pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(vector_lib)\n", "features = pca.transform(vector_lib)\n", "\n", "\n", "#features, vector_lib= generate_Branches_Beta(4,50,L,n,rangeA,rangeB,steps,n_comp)\n", "print(features.shape)\n", "print(vector_lib.shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "### Functions to manipulate genetic structure.\n", "\n", "def sin_prior(coords,target,vector2,angle,fst_max= 0.2,passport= False):\n", " ID= 'sinusoid'\n", " \n", " coords[target[0]] = coords[target[0]] - [(fst_max * 10 - 1) * sin(angle) * x for x in vector2]\n", " \n", " if passport:\n", " return coords,ID\n", " else:\n", " return coords\n", "\n", "\n", "def linear_prior(coords,target,vector2,angle,region= [-5,5],slope= 1,passport= False):\n", " ID= 'linear'\n", " \n", " if angle >= region[0] and angle <= region[1]:\n", " progression= abs(angle - region[0]) / (region[1] - region[0])\n", " coords[target[0]] = coords[target[0]] + [progression * x * slope for x in vector2]\n", " \n", " if passport:\n", " return coords,ID\n", " else:\n", " return coords\n", "\n", "\n", "def introgression_prior(coords,target,vector2,angle, region,passport= False):\n", " ID= 'introgression'\n", " \n", " if angle >= region[0] and angle <= region[1]:\n", " coords[target[0]] = coords[1]\n", " \n", " if passport:\n", " return coords,ID\n", " else:\n", " return coords\n", " \n", "\n", "def alien_prior_I(coords,target,vector2,angle,fst= .2,region= [-5,5],passport= False):\n", " ID= 'alien I'\n", " \n", " if angle >= region[0] and angle <= region[1]:\n", " coords[target[0]] = coords[target[0]] + [(10 * fst - 1) * x for x in vector2]\n", " #coords[target[0]] = coords[target[0]] + [sin(angle) * x for x in vector2] remember to try this though\n", " #coords[target[0]] = coords[len(coords) - 1]\n", " else:\n", " coords[target[0]] = coords[target[1]]\n", " \n", " if passport:\n", " return coords,ID\n", " else:\n", " return coords\n", "\n", "\n", "def alien_prior_II(coords,target,vector2,angle,fst= .2,region= [-5,5],passport= False):\n", " ID= 'alien II'\n", " \n", " if angle >= region[0] and angle <= region[1]:\n", " coords[target[0]] = coords[target[0]] + [(10 * fst - 1) * x for x in vector2]\n", " #coords[target[0]] = coords[target[0]] + [sin(angle) * x for x in vector2] remember to try this though\n", " #coords[target[0]] = coords[len(coords) - 1]\n", " \n", " if passport:\n", " return coords,ID\n", " else:\n", " return coords\n", "\n", "\n", "def alien_prior_III(coords,target,vector2,angle,fst_a= 0.2,fst_b= .2,region= [-5,5],passport= False):\n", " ID= 'alien III'\n", " \n", " if angle >= region[0] and angle <= region[1]:\n", " coords[target[0]] = coords[target[0]] + [(10 * fst_b-1) * x for x in vector2]\n", " #coords[target[0]] = coords[target[0]] + [sin(angle) * x for x in vector2] remember to try this though\n", " #coords[target[0]] = coords[len(coords) - 1]\n", " else:\n", " coords[target[0]] = coords[target[0]] + [(10 * fst_a-1) * x for x in vector2]\n", " \n", " if passport:\n", " return coords,ID\n", " else:\n", " return coords\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\jgarcia\\Desktop\\Jupyter_stuff\\Genetic-data-analysis\\StructE_tools.py:398: RuntimeWarning:\n", "\n", "invalid value encountered in double_scalars\n", "\n" ] }, { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": true }, "data": [ { "mode": "markers", "name": "(0, 1)", "type": "scatter", "uid": "5c08c48b-2e25-40dc-a81a-55ad1f6391b8", "x": [ -10000, -9797, -9595, -9393, -9191, -8989, -8787, -8585, -8383, -8181, -7979, -7777, -7575, -7373, -7171, -6969, -6767, -6565, -6363, -6161, -5959, -5757, -5555, -5353, -5151, -4949, -4747, -4545, -4343, -4141, -3939, -3737, -3535, -3333, -3131, -2929, -2727, -2525, -2323, -2121, -1919, -1717, -1515, -1313, -1111, -909, -707, -505, -303, -101, 101, 303, 505, 707, 909, 1111, 1313, 1515, 1717, 1919, 2121, 2323, 2525, 2727, 2929, 3131, 3333, 3535, 3737, 3939, 4141, 4343, 4545, 4747, 4949, 5151, 5353, 5555, 5757, 5959, 6161, 6363, 6565, 6767, 6969, 7171, 7373, 7575, 7777, 7979, 8181, 8383, 8585, 8787, 8989, 9191, 9393, 9595, 9797, 10000 ], "y": [ -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, 0.27065714465332924, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17, -1.84857820227674e-17 ] }, { "mode": "markers", "name": "(0, 2)", "type": "scatter", "uid": "d4f2a47e-2a2f-4fec-98e8-6409cf0aa272", "x": [ -10000, -9797, -9595, -9393, -9191, -8989, -8787, -8585, -8383, -8181, -7979, -7777, -7575, -7373, -7171, -6969, -6767, -6565, -6363, -6161, -5959, -5757, -5555, -5353, -5151, -4949, -4747, -4545, -4343, -4141, -3939, -3737, -3535, -3333, -3131, -2929, -2727, -2525, -2323, -2121, -1919, -1717, -1515, -1313, -1111, -909, -707, -505, -303, -101, 101, 303, 505, 707, 909, 1111, 1313, 1515, 1717, 1919, 2121, 2323, 2525, 2727, 2929, 3131, 3333, 3535, 3737, 3939, 4141, 4343, 4545, 4747, 4949, 5151, 5353, 5555, 5757, 5959, 6161, 6363, 6565, 6767, 6969, 7171, 7373, 7575, 7777, 7979, 8181, 8383, 8585, 8787, 8989, 9191, 9393, 9595, 9797, 10000 ], "y": [ 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.204208618275184, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352, 0.08617340880985352 ] }, { "mode": "markers", "name": "(0, 3)", "type": "scatter", "uid": "3436bf95-9d8a-4348-90a4-9960fc872a86", "x": [ -10000, -9797, -9595, -9393, -9191, -8989, -8787, -8585, -8383, -8181, -7979, -7777, -7575, -7373, -7171, -6969, -6767, -6565, -6363, -6161, -5959, -5757, -5555, -5353, -5151, -4949, -4747, -4545, -4343, -4141, -3939, -3737, -3535, -3333, -3131, -2929, -2727, -2525, -2323, -2121, -1919, -1717, -1515, -1313, -1111, -909, -707, -505, -303, -101, 101, 303, 505, 707, 909, 1111, 1313, 1515, 1717, 1919, 2121, 2323, 2525, 2727, 2929, 3131, 3333, 3535, 3737, 3939, 4141, 4343, 4545, 4747, 4949, 5151, 5353, 5555, 5757, 5959, 6161, 6363, 6565, 6767, 6969, 7171, 7373, 7575, 7777, 7979, 8181, 8383, 8585, 8787, 8989, 9191, 9393, 9595, 9797, 10000 ], "y": [ 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.1939710774484019, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908 ] }, { "mode": "markers", "name": "(1, 2)", "type": "scatter", "uid": "c02378af-0716-45de-8801-7410fa7626eb", "x": [ -10000, -9797, -9595, -9393, -9191, -8989, -8787, -8585, -8383, -8181, -7979, -7777, -7575, -7373, -7171, -6969, -6767, -6565, -6363, -6161, -5959, -5757, -5555, -5353, -5151, -4949, -4747, -4545, -4343, -4141, -3939, -3737, -3535, -3333, -3131, -2929, -2727, -2525, -2323, -2121, -1919, -1717, -1515, -1313, -1111, -909, -707, -505, -303, -101, 101, 303, 505, 707, 909, 1111, 1313, 1515, 1717, 1919, 2121, 2323, 2525, 2727, 2929, 3131, 3333, 3535, 3737, 3939, 4141, 4343, 4545, 4747, 4949, 5151, 5353, 5555, 5757, 5959, 6161, 6363, 6565, 6767, 6969, 7171, 7373, 7575, 7777, 7979, 8181, 8383, 8585, 8787, 8989, 9191, 9393, 9595, 9797, 10000 ], "y": [ 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535, 0.0861734088098535 ] }, { "mode": "markers", "name": "(1, 3)", "type": "scatter", "uid": "1f5a8a1d-430d-42ca-bd3b-10089648a140", "x": [ -10000, -9797, -9595, -9393, -9191, -8989, -8787, -8585, -8383, -8181, -7979, -7777, -7575, -7373, -7171, -6969, -6767, -6565, -6363, -6161, -5959, -5757, -5555, -5353, -5151, -4949, -4747, -4545, -4343, -4141, -3939, -3737, -3535, -3333, -3131, -2929, -2727, -2525, -2323, -2121, -1919, -1717, -1515, -1313, -1111, -909, -707, -505, -303, -101, 101, 303, 505, 707, 909, 1111, 1313, 1515, 1717, 1919, 2121, 2323, 2525, 2727, 2929, 3131, 3333, 3535, 3737, 3939, 4141, 4343, 4545, 4747, 4949, 5151, 5353, 5555, 5757, 5959, 6161, 6363, 6565, 6767, 6969, 7171, 7373, 7575, 7777, 7979, 8181, 8383, 8585, 8787, 8989, 9191, 9393, 9595, 9797, 10000 ], "y": [ 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908, 0.09504173357133908 ] }, { "mode": "markers", "name": "(2, 3)", "type": "scatter", "uid": "5bf481d4-4b4b-4843-9817-41daca9aa2a0", "x": [ -10000, -9797, -9595, -9393, -9191, -8989, -8787, -8585, -8383, -8181, -7979, -7777, -7575, -7373, -7171, -6969, -6767, -6565, -6363, -6161, -5959, -5757, -5555, -5353, -5151, -4949, -4747, -4545, -4343, -4141, -3939, -3737, -3535, -3333, -3131, -2929, -2727, -2525, -2323, -2121, -1919, -1717, -1515, -1313, -1111, -909, -707, -505, -303, -101, 101, 303, 505, 707, 909, 1111, 1313, 1515, 1717, 1919, 2121, 2323, 2525, 2727, 2929, 3131, 3333, 3535, 3737, 3939, 4141, 4343, 4545, 4747, 4949, 5151, 5353, 5555, 5757, 5959, 6161, 6363, 6565, 6767, 6969, 7171, 7373, 7575, 7777, 7979, 8181, 8383, 8585, 8787, 8989, 9191, 9393, 9595, 9797, 10000 ], "y": [ 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354, 0.09530394327317354 ] } ], "layout": { "title": "Fst across sets. prior: alien III", "xaxis": { "title": "eucledian distance in feature space" }, "yaxis": { "range": [ 0, 0.5 ], "title": "fsts" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from Generate_samples import Gen_samples, Check_Path, plot_GenFst\n", "\n", "\n", "Sizes= [100,100,100,100]\n", "\n", "Npops= len(Sizes)\n", "\n", "range_diff= [-10,10]\n", "\n", "\n", "prior_kwargs= {\n", " 'fst_a': .0,\n", " 'fst_b': 0.3,\n", " 'region': [-3,-1]\n", "}\n", "\n", "prior_func= alien_prior_III\n", "\n", "fig, Pops, prior= Check_Path(Npops,vector_lib,prior_func,prior_kwargs,Pops= [],random= True,n_comp= L,range_diff= range_diff,steps= 100)\n", "\n", "iplot(fig)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\jgarcia\\Desktop\\Jupyter_stuff\\Genetic-data-analysis\\StructE_tools.py:398: RuntimeWarning:\n", "\n", "invalid value encountered in double_scalars\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Done.\n" ] } ], "source": [ "# (Pops,Sizes,vector_lib,return_pca= False,n_comp= 100,prior= 'sinusoid',range_diff= [-10,10],steps= 100)\n", "\n", "SequenceStore, Fst_windows= Gen_samples(Pops,Sizes,vector_lib,prior_func,prior_kwargs,range_diff= range_diff,steps= 100)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'Fst_windows' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m### Check that we got what we wanted.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mplot_GenFst\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFst_windows\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mNpops\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Desktop\\Jupyter_stuff\\Genetic-data-analysis\\Generate_samples.py\u001b[0m in \u001b[0;36mplot_GenFst\u001b[1;34m(Fst_lib, Npops, Chr)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m \u001b[1;34m'markers'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 177\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m \u001b[1;34m'{}'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcombinations\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNpops\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 178\u001b[1;33m ) for i in range(len([x for x in it.combinations(range(Npops),2)]))\n\u001b[0m\u001b[0;32m 179\u001b[0m ]\n\u001b[0;32m 180\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Desktop\\Jupyter_stuff\\Genetic-data-analysis\\Generate_samples.py\u001b[0m in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m \u001b[1;34m'markers'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 177\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m \u001b[1;34m'{}'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcombinations\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNpops\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 178\u001b[1;33m ) for i in range(len([x for x in it.combinations(range(Npops),2)]))\n\u001b[0m\u001b[0;32m 179\u001b[0m ]\n\u001b[0;32m 180\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'Fst_windows' is not defined" ] } ], "source": [ "### Check that we got what we wanted.\n", "\n", "plot_GenFst(Fst_windows,Npops,1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Statistics on simulated data sets.\n", "\n", "### i. AMOVA" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'Sizes' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0madmx_N\u001b[0m\u001b[1;33m=\u001b[0m \u001b[1;36m50\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 11\u001b[1;33m \u001b[0madmx_indx\u001b[0m\u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mSizes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0madmx_N\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mreplace\u001b[0m\u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[0mref_indx\u001b[0m\u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mSizes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0madmx_indx\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'Sizes' is not defined" ] } ], "source": [ "\n", "### Perform Distance and association analysis on the data sets generated\n", "\n", "### Define reference and admixed associations:\n", "### for the purpose of this analysis exploration will be driven by\n", "### structure distance profiles. Since KDE will be used for exploration, \n", "### a set of accessions can be analysed that do not contribute to \n", "### distance profiles.\n", "\n", "admx_N= 50\n", "admx_indx= np.random.choice(range(sum(Sizes)),admx_N,replace= False)\n", "\n", "ref_indx= [x for x in range(sum(Sizes)) if x not in admx_indx]\n", "\n", "labels_pops= np.repeat(range(len(Sizes)),Sizes)\n", "\n", "##\n", "\n", "refs_lib= {x:[i for i in range(sum(Sizes)) if labels_pops[i] == x] for x in range(len(Sizes))}\n", "\n", "admx_lib= {(max(refs_lib.keys()) + 1): admx_indx}\n", "\n", "\n", "admx_lib.update(refs_lib)\n", "Geneo= admx_lib\n", "\n", "### Define parameters and libraries of analyses.\n", "DIMr = 'PCA'\n", "Bandwidth_split= 30\n", "ncomp_local= 4\n", "clsize= 15\n", "\n", "Results = {x:recursively_default_dict() for x in SequenceStore.keys()}\n", "\n", "Clover= []\n", "Coordinates= []\n", "Clusters_coords= []\n", "\n", "Dist_vectors= []\n", "Struct_dist_vectors= recursively_default_dict()\n", "\n", "Construct = recursively_default_dict()\n", "Distances_vectors= recursively_default_dict()\n", "PC_var= recursively_default_dict()\n", "center_distances= []\n", "\n", "Ref_stats= []\n", "\n", "\n", "for CHR in SequenceStore.keys():\n", " print('going on CHR: '+ str(CHR))\n", " for c in SequenceStore[CHR].keys():\n", " \n", " print('data set: {}'.format(c))\n", " ### PCA and MeanShift of information from each window copied from *FM36_Galaxy.py.\n", " Sequences = SequenceStore[CHR][c]\n", " \n", " Sequences= np.nan_to_num(Sequences)\n", " \n", " print(Sequences.shape)\n", " \n", " #### Dimensionality reduction\n", " \n", " if DIMr == 'PCA':\n", " pca = PCA(n_components=ncomp_local, whiten=False,svd_solver='randomized').fit(Sequences)\n", " data = pca.transform(Sequences)\n", " PC_var[CHR][c]= [x for x in pca.explained_variance_]\n", " \n", " if DIMr == 'NMF':\n", " from sklearn.decomposition import NMF\n", " data = NMF(n_components=ncomp_local, init='random', random_state=0).fit_transform(Sequences)\n", " \n", " Accurate = []\n", " \n", " params = {'bandwidth': np.linspace(np.min(data), np.max(data),Bandwidth_split)}\n", " grid = GridSearchCV(KernelDensity(algorithm = \"ball_tree\",breadth_first = False), params,verbose=0)\n", " \n", " ######################################\n", " ####### TEST global Likelihood #######\n", " ######################################\n", " Focus_labels = [z for z in it.chain(*refs_lib.values())]\n", " \n", " #### Mean Shift approach\n", " ## from sklearn.cluster import MeanShift, estimate_bandwidth\n", "\n", " bandwidth = estimate_bandwidth(data, quantile=0.2, n_samples=len(Focus_labels))\n", " if bandwidth <= 1e-3:\n", " bandwidth = 0.1\n", "\n", " ms = MeanShift(bandwidth=bandwidth, cluster_all=False, min_bin_freq=15)\n", " ms.fit(data[Focus_labels,:])\n", " labels = ms.labels_\n", " \n", "\n", " Tree = {x:[Focus_labels[y] for y in range(len(labels)) if labels[y] == x] for x in [g for g in list(set(labels)) if g != -1]}\n", " Keep= [x for x in Tree.keys() if len(Tree[x]) > clsize]\n", " \n", " Tree= {x:Tree[x] for x in Keep}\n", " \n", " print('N clusters: {}, Sizes: {}'.format(len(Tree),[len(x) for x in Tree.values()]))\n", " SpaceX = {x:data[Tree[x],:] for x in Tree.keys()}\n", " \n", " ### Extract MScluster likelihood by sample\n", " \n", " for hill in SpaceX.keys():\n", " \n", " grid.fit(data[Tree[hill],:])\n", " \n", " # use the best estimator to compute the kernel density estimate\n", " kde = grid.best_estimator_\n", " \n", " P_dist = kde.score_samples(data[Tree[hill],:])\n", " Dist = kde.score_samples(data)\n", " P_dist= np.nan_to_num(P_dist)\n", " Dist= np.nan_to_num(Dist)\n", " if np.std(P_dist) == 0:\n", " Dist= np.array([int(Dist[x] in P_dist) for x in range(len(Dist))])\n", " else:\n", " Dist = scipy.stats.norm(np.mean(P_dist),np.std(P_dist)).cdf(Dist)\n", " Dist= np.nan_to_num(Dist)\n", " Construct[CHR][c][hill] = Dist\n", " \n", " ######################################### \n", " ############# AMOVA ################\n", " #########################################\n", " \n", " Who = [x for x in range(len(labels)) if labels[x] != -1 and labels[x] in Keep]\n", " labels = [labels[x] for x in Who]\n", " Who= [Focus_labels[x] for x in Who]\n", " \n", " AMOVA,Cig = Ste.findPhiPT(Sequences[Who,:],labels,n_boot=0)\n", " \n", " ########################################################################\n", " ############## Distances ###############################################\n", " ########################################################################\n", " \n", " Dist_vectors= Ste.Distance_profiles(data,Tree,1000,Bandwidth_split)\n", " \n", " if Dist_vectors:\n", " Distances_vectors[CHR][c]= Dist_vectors\n", " \n", " Structures= Ste.Structure_profiles(data,Tree,1000,Bandwidth_split)\n", " if Structures:\n", " Struct_dist_vectors[CHR][c]= Structures\n", " \n", " ################ Other stats\n", " SIL = silhouette_score(data[Who,:],labels)\n", " \n", " df_between = len(Tree) - 1\n", " df_within = sum([len(Tree[x]) for x in Tree.keys()]) - len(Tree)\n", " df_total = sum([len(Tree[x]) for x in Tree.keys()]) - 1\n", " \n", " Total = data[Who,:]\n", " mu = np.mean(Total)\n", " sd = np.std(Total)\n", " Total = (Total - mu) / sd\n", " \n", " T = np.sqrt(((Total - Total.mean(axis=0))**2).sum(axis=1)).sum()**2 / len(Total)\n", " Y = (np.sqrt(((Total - Total.mean(axis=0))**2).sum(axis = 1))**2).sum()\n", " \n", " SS_between = -T\n", " SS_within = Y \n", " \n", " t_s = np.empty([len(Total),0])\n", " \n", " for gp in Tree.keys():\n", " Gp = Total[[x for x in range(Total.shape[0]) if labels[x] == gp],:]\n", " Gp = (Gp - mu) / sd\n", " SS_between += (np.sqrt(((Gp - Total.mean(axis=0))**2).sum(axis = 1)).sum()**2 / len(Gp))\n", " SS_within -= (np.sqrt(((Gp - Total.mean(axis=0))**2).sum(axis = 1)**2).sum() / len(Gp))\n", " t_s = np.append(t_s,np.sqrt(((Gp - Gp.mean(axis=0))**2)).sum(axis = 1))\n", " \n", " \n", " SS_total = Y - T\n", " MS_between = SS_between / df_between\n", " MS_within = SS_within / df_within\n", " \n", " sqrt_center_dists= np.sqrt(((Total - Total.mean(axis=0))**2)).sum(axis = 1)\n", " \n", " t_stat,p_val = scipy.stats.ttest_rel(t_s,sqrt_center_dists)\n", " \n", " F = MS_between / float(MS_within)\n", " N_PC= len(Tree)\n", " \n", " eta_sqrd = SS_between / SS_total\n", " om_sqrd = (SS_between - (df_between * MS_within))/(SS_total + MS_within)\n", " \n", " Results[CHR][c] = [N_PC,F,om_sqrd,MS_between,t_stat,AMOVA,SIL,len(Tree),]\n", "\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(40, 9)\n" ] }, { "data": { "application/vnd.plotly.v1+json": { "data": [ { "mode": "markers", "name": "(0, 1)", "type": "scatter", "x": [ 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 1.212532881991734e-13, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699, 0.09824091498382699 ], "y": [ 0.2201672071377841, 0.22519583708824992, 0.22986757827281867, 0.2240035213057148, 0.2196106339864295, 0.2170025917464763, 0.21229836791183182, 0.2236800102197745, 0.21917141772083876, 0.22794808387718124, 0.25593454851933484, 0.25795754321773273, 0.2513012672416773, 0.25520338867214076, 0.2618768018728513, 0.2664984346252583, 0.25162297362694996, 0.2563951495319642, 0.2484933833978819, 0.2555562722868498, 0.22628044113560944, 0.22405078800207792, 0.22779922037302056, 0.22455191626265536, 0.22542371840554465, 0.22332050149611785, 0.22353155910291533, 0.2221481527702643, 0.2165619539908821, 0.22829118995650172, 0.22267841971176713, 0.22228193395759058, 0.2380596965791873, 0.22890653594300273, 0.23101982968328233, 0.2325107493229701, 0.22881296000196913, 0.2277022679440118, 0.21782852302132574, 0.22955682893961402 ] }, { "mode": "markers", "name": "(0, 2)", "type": "scatter", "x": [ 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459, 0.09256619730862459 ], "y": [ 0.2201672071377841, 0.22519583708824992, 0.22986757827281867, 0.2240035213057148, 0.2196106339864295, 0.2170025917464763, 0.21229836791183182, 0.2236800102197745, 0.21917141772083876, 0.22794808387718124, 0.25593454851933484, 0.25795754321773273, 0.2513012672416773, 0.25520338867214076, 0.2618768018728513, 0.2664984346252583, 0.25162297362694996, 0.2563951495319642, 0.2484933833978819, 0.2555562722868498, 0.22628044113560944, 0.22405078800207792, 0.22779922037302056, 0.22455191626265536, 0.22542371840554465, 0.22332050149611785, 0.22353155910291533, 0.2221481527702643, 0.2165619539908821, 0.22829118995650172, 0.22267841971176713, 0.22228193395759058, 0.2380596965791873, 0.22890653594300273, 0.23101982968328233, 0.2325107493229701, 0.22881296000196913, 0.2277022679440118, 0.21782852302132574, 0.22955682893961402 ] }, { "mode": "markers", "name": "(1, 2)", "type": "scatter", "x": [ 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485, 0.1091932504058485 ], "y": [ 0.2201672071377841, 0.22519583708824992, 0.22986757827281867, 0.2240035213057148, 0.2196106339864295, 0.2170025917464763, 0.21229836791183182, 0.2236800102197745, 0.21917141772083876, 0.22794808387718124, 0.25593454851933484, 0.25795754321773273, 0.2513012672416773, 0.25520338867214076, 0.2618768018728513, 0.2664984346252583, 0.25162297362694996, 0.2563951495319642, 0.2484933833978819, 0.2555562722868498, 0.22628044113560944, 0.22405078800207792, 0.22779922037302056, 0.22455191626265536, 0.22542371840554465, 0.22332050149611785, 0.22353155910291533, 0.2221481527702643, 0.2165619539908821, 0.22829118995650172, 0.22267841971176713, 0.22228193395759058, 0.2380596965791873, 0.22890653594300273, 0.23101982968328233, 0.2325107493229701, 0.22881296000196913, 0.2277022679440118, 0.21782852302132574, 0.22955682893961402 ] } ], "layout": { "title": "Stats", "xaxis": { "title": "Manipulated Fst" }, "yaxis": { "title": "AMOVA" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Fst_pairs= [i for i in it.combinations(range(len(Sizes)),2)]\n", "\n", "Fst_lib= []\n", "for pair in range(len(Fst_pairs)):\n", " Fst_crawl= [[Fst_windows[Chr][n].fst[pair] for n in Fst_windows[Chr].keys()] for Chr in Fst_windows.keys()]\n", "\n", " Fst_crawl= [z for z in it.chain(*Fst_crawl)]\n", " \n", " Fst_lib.append(Fst_crawl)\n", "\n", "Other_stats= [[[Chr,wind,*Results[Chr][wind]] for wind in Results[Chr].keys()] for Chr in Results.keys()]\n", "Other_stats= np.array([y for y in it.chain(*Other_stats)])\n", "\n", "print(Other_stats.shape)\n", "\n", "# columns [CHR,window,N_PC,F,om_sqrd,MS_between,t_stat,AMOVA,SIL]\n", "\n", "fig_data= [go.Scatter(\n", "x= Fst_lib[i],\n", "y= Other_stats[:,7],\n", "mode= 'markers',\n", "name= str(Fst_pairs[i])\n", ") for i in range(len(Fst_pairs))]\n", "\n", "layout = go.Layout(\n", " title= 'Stats',\n", " yaxis=dict(\n", " title='AMOVA'),\n", " xaxis=dict(\n", " title='Manipulated Fst')\n", ")\n", "\n", "fig= go.Figure(data=fig_data, layout=layout)\n", "iplot(fig)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "mode": "markers", "name": "Results", "type": "scatter", "x": [ 0.6229019715385639, 0.6298822714407467, 0.6337030337688662, 0.6206982428597211, 0.6148760345937221, 0.6081529514795814, 0.6136670184928292, 0.6236810458515997, 0.6217799583540478, 0.6337144402727556, 0.6377827675911979, 0.6447595516017303, 0.6359483673815086, 0.6442520328721892, 0.6526286519495939, 0.6584177424992343, 0.6417532522933405, 0.6428642071420255, 0.6358710212582372, 0.6491556247386855, 0.6285672965549307, 0.628644295817551, 0.621375834327133, 0.620183281566735, 0.6260214091457058, 0.6267661898079866, 0.6286515761807213, 0.6351570231955426, 0.6251636622891935, 0.6249210898758731, 0.6230297343766263, 0.6229155186627381, 0.6303576391676838, 0.6362297734958873, 0.6196872463475834, 0.6395968175874038, 0.626491615774999, 0.6385725274334607, 0.6235924146080484, 0.6397973101335551 ], "y": [ 0.2201672071377841, 0.22519583708824992, 0.22986757827281867, 0.2240035213057148, 0.2196106339864295, 0.2170025917464763, 0.21229836791183182, 0.2236800102197745, 0.21917141772083876, 0.22794808387718124, 0.25593454851933484, 0.25795754321773273, 0.2513012672416773, 0.25520338867214076, 0.2618768018728513, 0.2664984346252583, 0.25162297362694996, 0.2563951495319642, 0.2484933833978819, 0.2555562722868498, 0.22628044113560944, 0.22405078800207792, 0.22779922037302056, 0.22455191626265536, 0.22542371840554465, 0.22332050149611785, 0.22353155910291533, 0.2221481527702643, 0.2165619539908821, 0.22829118995650172, 0.22267841971176713, 0.22228193395759058, 0.2380596965791873, 0.22890653594300273, 0.23101982968328233, 0.2325107493229701, 0.22881296000196913, 0.2277022679440118, 0.21782852302132574, 0.22955682893961402 ] } ], "layout": { "title": "Stats", "xaxis": { "title": "SIL" }, "yaxis": { "title": "AMOVA" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig_data= [go.Scatter(\n", "x= Other_stats[:,8],\n", "y= Other_stats[:,7],\n", "mode= 'markers',\n", "name= 'Results'\n", ")]\n", "\n", "layout = go.Layout(\n", " title= 'Stats',\n", " yaxis=dict(\n", " title='AMOVA'),\n", " xaxis=dict(\n", " title='SIL')\n", ")\n", "\n", "fig= go.Figure(data=fig_data, layout=layout)\n", "iplot(fig)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PC1: 0.3; PC2: 0.24; PC3: 0.017; PC4: 0.013\n", "features shape: (110, 4)\n" ] }, { "data": { "application/vnd.plotly.v1+json": { "data": [ { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 0", "text": [ "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0" ], "type": "scatter3d", "x": [ 3.7705705945961157, 3.4192463138768447, 3.9051463675244067, 3.579515607270975, 3.6433598436196313, 3.6436574542691607, 3.7133044005534073, 3.681728564180892, 4.051434858749373, 3.3893195831104364, 3.4584151573982687, 3.57382295621053, 3.5916551710573086, 3.458820146570336, 3.4130209110177074, 3.352246750939878, 3.265238465272443, 3.2376713419257452, 3.4142396576590284, 3.4881720322944085, 3.711826999852226, 3.6448413367089225, 3.892850449051025, 3.8270996757498668, 3.776954146569246, 3.428225328965673, 3.392195546042662, 3.3497771513002546, 3.619747261576664, 3.6186581889244254, 3.6158047549787526, 3.6126489025033863, 3.548999110399357, 3.5528235673690194, 3.673500981136255, 3.5221598230336753, 3.874840669644421, 3.5036992559506497, 3.47188194017248, 3.604110796725653 ], "y": [ -0.12292385352551176, -0.1087347666907387, -0.12830867620597095, -0.1152287315932492, -0.11778674701633227, -0.11782352284663407, -0.12063406603222654, -0.11933936127367069, -0.13424353767113165, -0.10754196348171091, -0.1103164520460867, -0.11500255646170822, -0.11569454765585527, -0.11034479584954365, -0.10852199034118692, -0.10604657198819505, -0.10256197277565175, -0.10140579811531072, -0.10852029078579176, -0.11154548479034586, -0.12052869020659691, -0.11784281238799572, -0.12785574843558445, -0.12519940579606992, -0.12316600184922175, -0.1091095829815844, -0.10764084612653392, -0.10594938395158343, -0.11685455939131914, -0.11680951698849072, -0.1166418470639531, -0.1165261105615946, -0.11397947527861933, -0.11417962733572784, -0.1190147470420068, -0.11293773120242313, -0.12714643808353163, -0.11216656475550849, -0.11086638878439367, -0.11621605393656355 ], "z": [ 0.2700057247765197, -0.0535221427708672, 0.3014763222977312, 0.15035279259375744, 0.1581842237292691, 0.22774495983080786, 0.26279467827726916, 0.20757785041621987, 0.5114970132241557, -0.049891070788849134, -0.042447454102020124, 0.14548728127984223, 0.10884257052474938, 0.008917562461017476, 0.033624690339024974, -0.08437234819107395, -0.09637782762678718, -0.20683139007405155, -0.08840449190673032, 0.06518340016913575, 0.1915396686911814, 0.15406872943029287, 0.38988089931810543, 0.319394200075584, 0.25372065294835244, -0.014181422831377506, -0.0825181394790446, -0.10193587575248066, 0.18244226271146785, 0.17281069133864596, 0.06615194201436482, 0.0807120239045832, 0.07903328451151657, 0.1823607838242619, 0.20590844418371185, 0.15745775989552183, 0.4079996714023242, 0.07579633663320601, 0.009192585328595541, 0.1566071058369039 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 1", "text": [ "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1" ], "type": "scatter3d", "x": [ -1.8888330888818408, -1.9285725070707223, -2.0429905359940834, -2.0424342398474518, -2.023312828704846, -2.0289595810995142, -2.0128767045481055, -2.036233381318145, -1.915515639583193, -1.9711375553515795, -1.8854451046755945, -1.8339922834396203, -1.9792880478489512, -1.9433058964826473, -1.9268748303520102, -1.941450764003067, -1.9848784570186941, -1.9080285829518753, -1.9488348630871672, -2.0026199946132897, -1.9935793881550128, -2.0837208612496316, -2.0971702683685716, -2.060934343168008, -1.9958533674743104, -2.0292058119484357, -1.977975618184646, -1.7934401920596985, -2.134939969581798, -2.107004649881707 ], "y": [ -3.126596722164929, -3.210351374933282, -3.348888959587218, -3.378919757449134, -3.3412414934692274, -3.3291277379295, -3.3042741160814164, -3.3215922372955737, -3.175793967347201, -3.2462779430238142, -3.1672567962328375, -3.05355948130418, -3.320633931026012, -3.2534929483731627, -3.1977758720386165, -3.266881556959095, -3.2922014692212747, -3.1731525340117215, -3.1980195009779275, -3.3056388147890545, -3.32828744140226, -3.401769030443802, -3.426799259280715, -3.413221668579543, -3.2961716932269662, -3.3679867837712836, -3.3001384428736595, -3.048376637161387, -3.471074593159022, -3.4419325946260093 ], "z": [ -0.20945332688619445, -0.40531889275143534, -0.09430119976741656, -0.277686623447269, -0.38635650795630166, -0.2378687954006072, -0.057545796979897985, 0.004330655032052149, -0.30968419295021016, -0.34492371451819703, -0.5725944265783425, -0.2881386882982061, -0.7039462439567465, -0.5534440805439591, -0.3776249286951275, -0.8573096921414703, -0.3063026020059674, -0.3994496387291621, 0.0514015319423935, -0.32872470506321444, -0.544008465508981, -0.07278846222808946, -0.14913692754658922, -0.3996652941221834, -0.4494004727025436, -0.5146754301890052, -0.6607034247054666, -0.9511998154034075, -0.07890999004662005, -0.21908972943894667 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 2", "text": [ "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2", "gp: 2" ], "type": "scatter3d", "x": [ -1.7568581566025356, -1.797334882045229, -1.6358642256008469, -1.8338838127321235, -1.6316104457839542, -1.6625161410404368, -1.7450983219574452, -1.6737789981610303, -1.812691416686582, -1.6790090182724615, -1.7430788712152339, -1.6744002303935737, -1.721488783166913, -1.6584142046449377, -1.7987342658822199, -1.8076500539418763, -1.6004988449510316, -1.6355266637774692, -1.6534703376574067, -1.5775096166315694, -1.6013787197207945, -1.7365565279053974, -1.571684456005316, -1.5887253043591294, -1.7511409468320214, -1.8006222861123435, -1.681633613120429 ], "y": [ 3.476668021067126, 3.5091575547523233, 3.2644648009433994, 3.5581734085981767, 3.3000097854186934, 3.3132293249096674, 3.4244164140619255, 3.2872850592740552, 3.522828218050016, 3.332659966341833, 3.4163667333057317, 3.2811503047824364, 3.407428835597281, 3.2960129052939253, 3.4906734756680113, 3.541004836445198, 3.2214419697300993, 3.2483618700025567, 3.2989423434953293, 3.128056850733062, 3.2341298099266362, 3.4570674815875844, 3.1950070481712, 3.1896613979954727, 3.39835114722501, 3.546994929660185, 3.313925205571215 ], "z": [ -0.3478910763601543, -0.3113100881541635, -0.5092266018808983, -0.021471882893554537, -0.9225804995614147, -0.5769027062478737, -0.3367802539037315, -0.13052084598877584, -0.1959746572226101, -0.5932335767376531, -0.1927618986331216, -0.09180707788969372, -0.5684818954518617, -0.3392523353651947, -0.025026956581288222, -0.3166676604990051, -0.6440233056023068, -0.3629823006402827, -0.5375721438157037, -0.16398646671674158, -0.8111358308423713, -0.7761430250238188, -0.8508807619833437, -0.6420659712470614, 0.2574075241573552, -0.4429669568988667, -0.3686689344074161 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 3", "text": [ "gp: 3", "gp: 3", "gp: 3" ], "type": "scatter3d", "x": [ -3.2500937518291915, -3.4648979015706542, -3.295995958927121 ], "y": [ 0.08295983898169498, 0.1803793550684863, 0.2945632593856547 ], "z": [ 2.173865727421791, 2.5933786480650394, 1.8456812296754288 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 4", "text": [ "gp: 4", "gp: 4", "gp: 4" ], "type": "scatter3d", "x": [ -3.2163606169484718, -3.2505387310690916, -3.2802967490808155 ], "y": [ 0.22041877850277422, -0.005492911860674184, 0.6590187400373559 ], "z": [ 0.924811770416763, 1.4681575067032495, 1.5222610146416415 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 5", "text": [ "gp: 5", "gp: 5" ], "type": "scatter3d", "x": [ -3.2309453879850993, -3.4220660297079633 ], "y": [ 0.02216402683943777, 0.28175451313978495 ], "z": [ 1.9489235140444694, 2.4656835059239532 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 6", "text": [ "gp: 6" ], "type": "scatter3d", "x": [ -3.425000967854712 ], "y": [ 0.2505004946491857 ], "z": [ 2.0214985687768716 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 7", "text": [ "gp: 7" ], "type": "scatter3d", "x": [ -3.0398975519953315 ], "y": [ 0.47021425056561417 ], "z": [ 0.4924600148988797 ] } ], "layout": { "scene": { "xaxis": { "title": "0.3" }, "yaxis": { "title": "0.24" }, "zaxis": { "title": "0.017" } }, "title": "PCA red + MS clustering KDE profiles across windows" } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#####\n", "KDE_stats= [[[Construct[Chr][wind][gp] for gp in sorted(Construct[Chr][wind].keys())] for wind in sorted(Construct[Chr].keys())] for Chr in Construct.keys()]\n", "KDE_stats= np.array([y for y in it.chain(*[z for z in it.chain(*KDE_stats)])])\n", "\n", "KDE_stats= np.array(KDE_stats)\n", "\n", "n_comp = 4\n", "\n", "pca1 = PCA(n_components=n_comp, whiten=False,svd_solver='randomized')\n", "likelihood_feats = pca1.fit_transform(KDE_stats)\n", "var_comps= pca1.explained_variance_ratio_\n", "\n", "print(\"; \".join(['PC{0}: {1}'.format(x+1,round(pca1.explained_variance_ratio_[x],3)) for x in range(n_comp)]))\n", "print('features shape: {}'.format(likelihood_feats.shape))\n", "\n", "## perform MeanShift clustering.\n", "iu1 = np.triu_indices(KDE_stats.shape[0])\n", "\n", "bandwidth = estimate_bandwidth(likelihood_feats, quantile=0.20)\n", "params = {'bandwidth': np.linspace(np.min(likelihood_feats), np.max(likelihood_feats),30)}\n", "grid = GridSearchCV(KernelDensity(algorithm = \"ball_tree\",breadth_first = False), params,verbose=0)\n", "\n", "ms = MeanShift(bandwidth=bandwidth, bin_seeding=False, cluster_all=False, min_bin_freq=15)\n", "ms.fit(likelihood_feats)\n", "labels1 = ms.labels_\n", "label_select = {y:[x for x in range(len(labels1)) if labels1[x] == y] for y in sorted(list(set(labels1))) if y != -1}\n", "\n", "MS_centroids= [np.mean(likelihood_feats[label_select[z],:],axis= 0) for z in label_select.keys()]\n", "MS_pair_dist= pairwise_distances(MS_centroids,metric= 'euclidean')\n", "#MS_pair_dist= MS_pair_dist[iu_control]\n", "\n", "\n", "\n", "fig_data= [go.Scatter3d(\n", " x = likelihood_feats[label_select[i],0],\n", " y = likelihood_feats[label_select[i],1],\n", " z = likelihood_feats[label_select[i],2],\n", " type='scatter3d',\n", " mode= \"markers\",\n", " name= 'Cl: {}'.format(i),\n", " text= ['gp: {}'.format(i) for x in label_select[i]],\n", " marker= {\n", " 'line': {'width': 0},\n", " 'size': 4,\n", " 'symbol': 'circle',\n", " \"opacity\": 1\n", " }\n", " ) for i in label_select.keys()]\n", "\n", "\n", "\n", "layout = go.Layout(\n", " title= 'PCA red + MS clustering KDE profiles across windows',\n", " \n", " scene= Scene(\n", " yaxis=dict(\n", " title='{}'.format(round(var_comps[1],3))),\n", " xaxis=dict(\n", " title= '{}'.format(round(var_comps[0],3))),\n", " zaxis=dict(\n", " title= '{}'.format(round(var_comps[2],3))))\n", ")\n", "\n", "\n", "fig = go.Figure(data=fig_data,layout= layout)\n", "iplot(fig)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Distances" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Iter: 0, vectors selected: [ 80 72 108 94], hap length: 150\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\jgarcia\\Desktop\\Jupyter_stuff\\Distance_stats\\StructE_tools.py:398: RuntimeWarning:\n", "\n", "invalid value encountered in double_scalars\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 1, vectors selected: [159 138 146 6 158 118], hap length: 150\n", "Iter: 2, vectors selected: [132 134 42 72], hap length: 150\n", "Iter: 3, vectors selected: [ 26 72 181 93 177 165], hap length: 150\n", "Iter: 4, vectors selected: [ 71 39 120], hap length: 150\n", "Iter: 5, vectors selected: [ 22 116 123 184 111], hap length: 150\n", "Iter: 6, vectors selected: [ 84 90 172 78 137], hap length: 150\n", "Iter: 7, vectors selected: [ 46 118 23 19 27 127], hap length: 150\n", "Iter: 8, vectors selected: [194 143 170 13 17 0 14], hap length: 150\n", "Iter: 9, vectors selected: [168 120 177 8], hap length: 150\n", "Iter: 10, vectors selected: [189 14 73 31 85 123 33], hap length: 150\n", "Iter: 11, vectors selected: [ 88 123 83 129 142], hap length: 150\n", "Iter: 12, vectors selected: [ 57 74 171 39 93], hap length: 150\n", "Iter: 13, vectors selected: [ 45 94 167 107 199 142 143], hap length: 150\n", "Iter: 14, vectors selected: [ 94 77 104], hap length: 150\n", "Iter: 15, vectors selected: [163 19 172], hap length: 150\n", "Iter: 16, vectors selected: [178 164 76 81], hap length: 150\n", "Iter: 17, vectors selected: [133 15 73], hap length: 150\n", "Iter: 18, vectors selected: [130 65 101 105], hap length: 150\n", "Iter: 19, vectors selected: [ 32 74 85 140 93], hap length: 150\n" ] } ], "source": [ "### Converting distances to fst's.\n", "\n", "### Select pre and post processing measures. \n", "Eigen = False\n", "Scale= False\n", "Center= True\n", "\n", "MixL= False # select if to mix N_Pops or not.\n", "Length_increment= False\n", "L_step= 5\n", "MixP= True # select if to mix lengths or not. \n", "Pairs= False # select if comparing Pairs of distances or the distances themselves\n", "Control_inc= True\n", "Predict= False\n", "\n", "length_haps= vector_lib.shape[1]\n", "length_range= [75,vector_lib.shape[1]]\n", "length_step= 10\n", "\n", "pop_max= 8 # Number of pops\n", "\n", "n_comp= 10 # components to keep following PCA\n", "\n", "Iter= 20 # repeats\n", "\n", "N_sims= 100 # number of haplotypes to generate from each pop in the unbiased scenario.\n", "\n", "#### Predict\n", "predicted= []\n", "\n", "#def controled_fsts(vector_lib,Eigen,length_haps,Scale,Center,N_pops,n_comp,Iter,N_sims,MixL,MixP,Pairs):\n", "lengths_vector= []\n", "\n", "### Control set to include in the transformation:\n", "control_vecs= np.random.choice(range(vector_lib.shape[0]),2)\n", "control_labels= np.repeat([0,1],N_sims)\n", "### Control set distances\n", "control_even_distances= []\n", "control_bias_distances= []\n", "\n", "### store distances between centroids\n", "biased_pairwise= []\n", "unbiased_pairwise= []\n", "corrected_pairwise= []\n", "\n", "### store PC projection:\n", "dist_PC_even= {x:[] for x in range(n_comp)}\n", "dist_PC_bias= {x:[] for x in range(n_comp)}\n", "dist_PC_corrected= {x:[] for x in range(n_comp)}\n", "\n", "### store increemental PC distances\n", "dist_increment_even= {x:[] for x in range(1,n_comp)}\n", "dist_increment_bias= {x:[] for x in range(1,n_comp)} \n", "\n", "### store fsts\n", "fst_store= []\n", "\n", "\n", "### proceed.\n", "\n", "for rep in range(Iter):\n", " \n", " if MixP:\n", " N_pops= np.random.choice(range(3,pop_max),1,replace= False)[0]\n", " else: \n", " N_pops= pop_max\n", " \n", " if MixL:\n", " length_haps= np.random.choice(length_range,1)[0]\n", " \n", " if Length_increment:\n", " length_haps= int(length_range[0] + L_step * np.floor(rep / L_step))\n", " \n", " ## Population Sizes and labels\n", " bias_scheme= np.random.choice(range(25,200),N_pops,replace= False)\n", " unbiased_sheme= np.repeat(N_sims,N_pops)\n", "\n", " bias_labels= np.repeat(np.array([x for x in range(N_pops)]),bias_scheme)\n", " unbias_labels= np.repeat(np.array([x for x in range(N_pops)]),unbiased_sheme)\n", "\n", " ### triangular matrices extract.\n", " iu1= np.triu_indices(N_pops,1) # for centroid comparison\n", "\n", " iu_unbias= np.triu_indices(sum(unbiased_sheme),1)\n", " iu_bias= np.triu_indices(sum(bias_scheme),1)\n", " \n", " iu_control= np.triu_indices(2,1)\n", " \n", " Pops= np.random.choice(vector_lib.shape[0],N_pops,replace= False)\n", " print('Iter: {}, vectors selected: {}, hap length: {}'.format(rep,Pops,length_haps))\n", " ########## FST\n", "\n", " freqs_selected= vector_lib[Pops,:length_haps]\n", " Pairwise= Ste.return_fsts2(freqs_selected)\n", "\n", " #fsts_compare = scale(Pairwise.fst)\n", " fsts_compare= Pairwise.fst\n", " if Pairs:\n", " t= fsts_compare\n", " fsts_compare= [min([t[y] for y in z]) / max([t[y] for y in z]) for z in it.combinations(range(len(t)),2)]\n", " \n", " fst_store.extend(fsts_compare)\n", "\n", " ## lengths\n", " lengths_vector.extend([length_haps] * len(fsts_compare))\n", " \n", " #########################################################\n", " ########### PCA ####################################\n", " #########################################################\n", " ### control sample\n", " \n", " control_data= []\n", "\n", " for k in range(2):\n", "\n", " probs= vector_lib[control_vecs[k],:length_haps]\n", " probs[probs < 0] = 0\n", " probs[probs > 1] = 1\n", " m= unbiased_sheme[k]\n", " Haps= [[np.random.choice([1,0],p= [1-probs[x],probs[x]]) for x in range(length_haps)] for acc in range(m)]\n", " \n", " control_data.extend(Haps)\n", "\n", " control_data= np.array(control_data)\n", "\n", " #### generate data and perform PCA.\n", " data= []\n", "\n", " for k in range(N_pops):\n", "\n", " probs= vector_lib[Pops[k],:length_haps]\n", " probs[probs < 0] = 0\n", " probs[probs > 1] = 1\n", " m= unbiased_sheme[k]\n", " Haps= [[np.random.choice([1,0],p= [1-probs[x],probs[x]]) for x in range(length_haps)] for acc in range(m)]\n", "\n", " data.extend(Haps)\n", "\n", " data1= np.array(data)\n", "\n", " if Scale:\n", " data1= scale(data1)\n", " \n", " if Control_inc:\n", " pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(np.vstack([control_data,data1]))\n", " control_unbias_feat= pca.transform(control_data)\n", " else:\n", " pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(data1)\n", " \n", " feat_unbias= pca.transform(data1)\n", "\n", " if Eigen:\n", " feat_unbias= feat_unbias * pca.explained_variance_ratio_\n", "\n", " ####### centroid comparison\n", " #### Controls\n", " if Control_inc:\n", " control_centroids= [np.mean(control_unbias_feat[[y for y in range(control_unbias_feat.shape[0]) if control_labels[y] == z],:],axis= 0) for z in range(2)]\n", " control_centroids= np.array(control_centroids)\n", "\n", " unbias_control_dist= pairwise_distances(control_centroids,metric= 'euclidean')\n", " unbias_control_dist= unbias_control_dist[iu_control]\n", "\n", " control_even_distances.extend(unbias_control_dist)\n", "\n", " ####\n", " unbias_centroids= [np.mean(feat_unbias[[y for y in range(feat_unbias.shape[0]) if unbias_labels[y] == z],:],axis= 0) for z in range(N_pops)]\n", " unbias_centroids= np.array(unbias_centroids)\n", "\n", " unbias_pair_dist= pairwise_distances(unbias_centroids,metric= 'euclidean')\n", " unbias_pair_dist= unbias_pair_dist[iu1]\n", " \n", " \n", " if Predict:\n", " fst_pred= [np.exp(m_coeff*np.log(x) + b) for x in unbias_pair_dist]\n", " predicted.extend(fst_pred)\n", " #print(np.array([fst_pred,fsts_compare]).T)\n", " \n", " #unbias_pair_dist= scale(unbias_pair_dist)\n", " unbiased_pairwise.extend(unbias_pair_dist)\n", " \n", " #################################################\n", " ############## biased sample\n", "\n", " #### generate data and perform PCA\n", " data= []\n", "\n", " for k in range(N_pops):\n", "\n", " probs= vector_lib[Pops[k],:]\n", "\n", " m= bias_scheme[k]\n", " Haps= [[np.random.choice([1,0],p= [1-probs[x],probs[x]]) for x in range(length_haps)] for acc in range(m)]\n", "\n", " data.extend(Haps)\n", "\n", " data2= np.array(data)\n", "\n", " if Scale:\n", " data2= scale(data2)\n", " \n", " if Control_inc:\n", " pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(np.vstack([control_data,data2]))\n", " control_bias_feat= pca.transform(control_data)\n", " else:\n", " pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(data2)\n", " \n", " feat_bias= pca.transform(data2)\n", "\n", " if Eigen:\n", " feat_bias= feat_bias * pca.explained_variance_ratio_\n", "\n", " #### Centroid distances\n", " #### Controls\n", " if Control_inc:\n", " control_centroids= [np.mean(control_bias_feat[[y for y in range(control_bias_feat.shape[0]) if control_labels[y] == z],:],axis= 0) for z in range(2)]\n", " control_centroids= np.array(control_centroids)\n", "\n", " bias_control_dist= pairwise_distances(control_centroids,metric= 'euclidean')\n", " bias_control_dist= bias_control_dist[iu_control]\n", "\n", " control_bias_distances.extend(bias_control_dist)\n", " \n", " bias_centroids= [np.mean(feat_bias[[y for y in range(feat_bias.shape[0]) if bias_labels[y] == z],:],axis= 0) for z in range(N_pops)]\n", " bias_centroids= np.array(bias_centroids)\n", "\n", " bias_pair_dist= pairwise_distances(bias_centroids,metric= 'euclidean')\n", " bias_pair_dist= bias_pair_dist[iu1]\n", " #bias_pair_dist= scale(bias_pair_dist)\n", " if Pairs:\n", " t= bias_pair_dist\n", " bias_pair_dist= [min([t[y] for y in z]) / max([t[y] for y in z]) for z in it.combinations(range(len(t)),2)]\n", " \n", " biased_pairwise.extend(bias_pair_dist)\n", "\n", " \n", " \n", "\n", "t= np.array([\n", "fsts_compare,\n", "unbias_pair_dist,\n", "bias_pair_dist\n", "]).T\n", "\n", "\n", "Size= length_haps\n", "fst_lm_range= [.02,.3]\n", "\n", "Lindexes= [x for x in range(len(lengths_vector)) if lengths_vector[x] == Size and fst_store[x] >= fst_lm_range[0] and fst_store[x] <= fst_lm_range[1]]\n", "y_true= [np.log(biased_pairwise[x]) for x in Lindexes]\n", "m_coeff,b= np.polyfit(y_true,[np.log(fst_store[x]) for x in Lindexes],1)\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Structure distance profiles" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "Structure_vertice= [2]\n", "\n", "Combis= []\n", "for chromo in Struct_dist_vectors.keys():\n", " for winds in Struct_dist_vectors[chromo].keys():\n", " for grp in Struct_dist_vectors[chromo][winds].keys():\n", " if len(grp) in Structure_vertice:\n", " Combis.append([chromo,winds,grp])\n", "\n", "#Struct_distance_stats= np.array([y for y in it.chain(*Struct_distance_stats)])\n", "Struct_distance_stats= [[[Struct_dist_vectors[Chr][wind][gp] for gp in sorted(Struct_dist_vectors[Chr][wind].keys()) if len(gp) in Structure_vertice] for wind in sorted(Struct_dist_vectors[Chr].keys())] for Chr in Struct_dist_vectors.keys()]\n", "Struct_distance_stats= np.array([y for y in it.chain(*[z for z in it.chain(*Struct_distance_stats)])])\n", "\n", "Struct_distance_stats= np.array(Struct_distance_stats)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PC1: 0.894; PC2: 0.06; PC3: 0.014; PC4: 0.011\n", "features shape: (100, 4)\n" ] }, { "data": { "application/vnd.plotly.v1+json": { "data": [ { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 0", "text": [ "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0", "gp: 0" ], "type": "scatter3d", "x": [ -0.8562575161415769, -0.515308738587501, -1.6260494041380666, -0.26074445926841483, -0.7538693261524219, -1.2821287567407371, -0.8129179614869332, -1.6632954570831733, -1.6454797700206318, -0.34019413240536966, -1.2452881463449974, -0.8412271411638341, -0.5922327831383818, -0.9289473187549078, -0.6889618780863965, -1.2476167374143396, -1.0823364260035406, -1.4036056162440889, -0.39700023406600987, -1.1661819418364328, -1.4704640105522944, -1.2743450230099707, -1.2189287991708557, -1.130238149779237, -1.0527936610067403, -1.0349265716176081, -0.7821819729247104, -0.2648693758125176, -1.4158807020540969, -0.6716310922538224, -0.16358740678573913, -1.238556774932543, -0.3258256801131826, -0.21410608151536756, -0.5825341521834763, -1.4035395225281406, -1.0145770768165554, -0.23946302265831695 ], "y": [ -0.6503123996417243, -0.28661493204164895, -0.3452112789867424, -0.10423155747045171, -0.02148519578245907, 0.18212425365406415, 0.518658658981641, -0.07287739932590753, -0.2528017812872517, -0.3008412571655196, -0.1264551188313871, -0.4402498250836186, -0.01571698417735231, -0.19480963554649644, 0.43289775498666105, -0.6452102101454512, 0.5689600474811288, -0.3262370758160184, -0.02777684557704655, -0.15073645514239478, -0.49409905022898376, 0.3436530199345924, 0.23557079279410434, 0.027620038114217642, -0.09898830440677132, 0.1551015261091222, -0.0634686404976111, 0.06586612227476075, -0.14619248027786144, -0.07250342645213806, -0.13038769754028284, 0.20961493626747282, 0.5154609405833994, 0.023659258802545654, -0.33853576412508585, -0.4227363079099199, 0.012670915851241709, 0.3865932366339936 ], "z": [ -0.12298824600108284, -0.03953881322124773, -0.14479743460209435, -0.18156862778447236, 0.08423253304842637, 0.058513993297188045, 0.12688671090184553, -0.0837368550839448, -0.1841608678572448, 0.028616023438937804, -0.13689186560916344, 0.07820998671175063, -0.10421333908360336, -0.22815258651042483, 0.19516668287212993, -0.23914743301798874, 0.050277689955783585, -0.36633996396810686, -0.07979421154497301, 0.0004929698741697131, -0.32712547170910855, -0.11578585192320391, -0.0639178495518307, -0.19436630868157145, 0.019858240696930284, 0.008489593332244309, 0.08824271199235272, 0.005701957530658913, -0.3330874161047771, -0.010737643505297151, 0.1516627435681318, 0.2603010334745327, 0.23088900483701588, -0.16288607904689845, 0.1262794739100601, -0.3321227818489887, -0.07987957977108888, 0.052532865032107516 ] }, { "marker": { "line": { "width": 0 }, "opacity": 1, "size": 4, "symbol": "circle" }, "mode": "markers", "name": "Cl: 1", "text": [ "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1", "gp: 1" ], "type": "scatter3d", "x": [ 1.5628484460802063, 1.244518067252911, 1.993096902820546, 1.715875012869176, 2.0731214012798556, 1.788227079753076, 1.0545064450743944, 2.6774758089213027, 2.33838434438881, 2.048794593329249, 1.9554136983270158, 1.943534299587883, 2.170133210869908, 1.195545049637283, 2.414383944865259, 2.1553164232212905, 2.6421054663534975, 2.4345312527640606, 1.7119681774234634, 1.8529211624060762, 2.410322672168821, 1.5942629014618075, 1.2757749400434013, 1.834779425720727, 2.3616300919775277, 1.9186644872338463, 1.688425744455568, 1.1029995545306572, 1.3197337258651851 ], "y": [ 0.5036578534696156, -0.6291488759459257, -0.17336097183123747, -0.050920902506800564, 0.023036253572093112, -0.17513546565146382, 0.10254242777540064, 0.03501119291151275, -0.37520716405002413, 0.667928735500602, -0.43270217548883066, 0.6026722445264633, -0.026400055482982805, 0.086942105745368, 0.10181568280880844, -0.7237177556096747, 0.18552028419676828, -0.05084304675897492, -0.7606077930455831, 0.11062498826810348, -0.020102913307871575, 0.1595824543217424, -0.20211000762106363, -0.542049295645266, -0.4451964856977657, 0.13498025736108565, -0.38228762890794615, -0.11899822788534478, -0.24326254338437728 ], "z": [ 0.0851599965449606, 0.20087892485974518, -0.060050469547042684, 0.29920905001098186, -0.0675041376026348, 0.18774499444296247, 0.01163199666310819, -0.06850954472321295, 0.1347516312988349, -0.027942813365203007, 0.07781152444993826, -0.3419498554265243, 0.02114395121064448, -0.1282950437132997, -0.1446276962319107, 0.20222299885595976, -0.13753401112903388, -0.24267106818860826, 0.36102590743414414, 0.04849420621256773, -0.23327949750735236, -0.029853428398785794, -0.15740086231372893, 0.23591381337564357, 0.10569869006021344, -0.059031989484307906, 0.09311876838778749, 0.166569136895315, 0.009339379038852017 ] } ], "layout": { "scene": { "xaxis": { "title": "0.894" }, "yaxis": { "title": "0.06" }, "zaxis": { "title": "0.014" } }, "title": "PCA red + MS clustering PCA dist profiles across windows" } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "n_comp = 4\n", "\n", "pcaS = PCA(n_components=n_comp, whiten=False,svd_solver='randomized')\n", "Struct_dist_feats = pcaS.fit_transform(Struct_distance_stats)\n", "var_compsQ= pcaS.explained_variance_ratio_\n", "\n", "print(\"; \".join(['PC{0}: {1}'.format(x+1,round(pcaS.explained_variance_ratio_[x],3)) for x in range(n_comp)]))\n", "print('features shape: {}'.format(Struct_dist_feats.shape))\n", "\n", "bandwidth = estimate_bandwidth(Struct_dist_feats, quantile=0.2)\n", "params = {'bandwidth': np.linspace(np.min(Struct_dist_feats), np.max(Struct_dist_feats),30)}\n", "grid = GridSearchCV(KernelDensity(algorithm = \"ball_tree\",breadth_first = False), params,verbose=0)\n", "\n", "ms = MeanShift(bandwidth=bandwidth, bin_seeding=False, cluster_all=False, min_bin_freq=25)\n", "ms.fit(Struct_dist_feats)\n", "labels1 = ms.labels_\n", "#labels1= [int(1 in x[2]) for x in Combis]\n", "#labels1= [len(x[2]) for x in Combis]\n", "#print(Combis[:10])\n", "label_select = {y:[x for x in range(len(labels1)) if labels1[x] == y] for y in sorted(list(set(labels1))) if y != -1}\n", "\n", "\n", "#### Let's plot the first 3 coordinates nonetheless.\n", "####\n", "fig_data= [go.Scatter3d(\n", " x = Struct_dist_feats[label_select[i],0],\n", " y = Struct_dist_feats[label_select[i],1],\n", " z = Struct_dist_feats[label_select[i],2],\n", " type='scatter3d',\n", " mode= \"markers\",\n", " name= 'Cl: {}'.format(i),\n", " text= ['gp: {}'.format(i) for x in label_select[i]],\n", " marker= {\n", " 'line': {'width': 0},\n", " 'size': 4,\n", " 'symbol': 'circle',\n", " \"opacity\": 1\n", " }\n", " ) for i in label_select.keys()]\n", "\n", "\n", "\n", "\n", "layout = go.Layout(\n", " title= 'PCA red + MS clustering PCA dist profiles across windows',\n", " \n", " scene= Scene(\n", " yaxis=dict(\n", " title='{}'.format(round(var_compsQ[1],3))),\n", " xaxis=dict(\n", " title= '{}'.format(round(var_compsQ[0],3))),\n", " zaxis=dict(\n", " title= '{}'.format(round(var_compsQ[2],3))))\n", ")\n", "\n", "\n", "\n", "fig = go.Figure(data=fig_data, layout=layout)\n", "iplot(fig)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "12f433d0783341b09c9d50bae00d6964", "version_major": 2, "version_minor": 0 }, "text/html": [ "

Failed to display Jupyter Widget of type interactive.

\n", "

\n", " If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n", " that the widgets JavaScript is still loading. If this message persists, it\n", " likely means that the widgets JavaScript library is either not installed or\n", " not enabled. See the Jupyter\n", " Widgets Documentation for setup instructions.\n", "

\n", "

\n", " If you're reading this message in another frontend (for example, a static\n", " rendering on GitHub or NBViewer),\n", " it may mean that your frontend doesn't currently support widgets.\n", "

\n" ], "text/plain": [ "interactive(children=(Dropdown(description='selected_group', options=(0, 1, 2), value=0), Output()), _dom_classes=('widget-interact',))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "def plot_density(selected_group):\n", " Distances= Struct_distance_stats[label_select[selected_group],:]\n", " print(Distances.shape)\n", " sum_profile= np.mean(Distances,axis=0)\n", " \n", " fig_data= [go.Scatter(\n", " x= [np.exp(m_coeff * np.log(x) + b) for x in np.linspace(0,8,Distances.shape[1])],\n", " y= sum_profile,\n", " mode= 'markers',\n", " name= 'Results'\n", " )]\n", "\n", " layout = go.Layout(\n", " title= 'Stats',\n", " yaxis=dict(\n", " title='density'),\n", " xaxis=dict(\n", " title= 'Fst')\n", " )\n", "\n", " fig= go.Figure(data=fig_data, layout=layout)\n", " iplot(fig)\n", "\n", "\n", "\n", "interact(plot_density,selected_group= label_select.keys()) \n", "\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PC1: 0.299; PC2: 0.242; PC3: 0.013; PC4: 0.012\n", "features shape: (300, 4)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c511e2c750f648789972279b448a061b", "version_major": 2, "version_minor": 0 }, "text/html": [ "

Failed to display Jupyter Widget of type interactive.

\n", "

\n", " If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n", " that the widgets JavaScript is still loading. If this message persists, it\n", " likely means that the widgets JavaScript library is either not installed or\n", " not enabled. See the Jupyter\n", " Widgets Documentation for setup instructions.\n", "

\n", "

\n", " If you're reading this message in another frontend (for example, a static\n", " rendering on GitHub or NBViewer),\n", " it may mean that your frontend doesn't currently support widgets.\n", "

\n" ], "text/plain": [ "interactive(children=(Dropdown(description='gp', options=(-1, 0, 1, 2), value=-1), Output()), _dom_classes=('widget-interact',))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Test= Struct_distance_stats.T\n", "Novel= KDE_stats.T\n", "\n", "\n", "n_comp = 4\n", "\n", "pca1 = PCA(n_components=n_comp, whiten=False,svd_solver='randomized')\n", "likelihood_feats = pca1.fit_transform(Novel)\n", "var_comps= pca1.explained_variance_ratio_\n", "\n", "print(\"; \".join(['PC{0}: {1}'.format(x+1,round(pca1.explained_variance_ratio_[x],3)) for x in range(n_comp)]))\n", "print('features shape: {}'.format(likelihood_feats.shape))\n", "\n", "\n", "def kde_distances(gp):\n", " if gp == -1:\n", " apparel= Geneo\n", " fig_data= [go.Scatter3d(\n", " x = likelihood_feats[apparel[i],0],\n", " y = likelihood_feats[apparel[i],1],\n", " z = likelihood_feats[apparel[i],2],\n", " type='scatter3d',\n", " mode= \"markers\",\n", " name= 'Cl: {}'.format(i),\n", " text= ['gp: {}'.format(i) for x in apparel[i]],\n", " marker= {\n", " 'line': {'width': 0},\n", " 'size': 4,\n", " 'symbol': 'circle',\n", " \"opacity\": 1\n", " }\n", " ) for i in apparel.keys()]\n", " else:\n", " sought_after= label_select[gp]\n", " print(len(sought_after))\n", " sought_after= [Combis[x] for x in sought_after]\n", " #print(sought_after[:4])\n", " \n", " Likes= [z for z in it.chain(*[[Construct[el[0]][el[1]][y] for y in el[2]] for el in sought_after])]\n", " \n", " Likes= np.array([x for x in Likes if len(x) == sum(Sizes)])\n", " \n", " print(Likes.shape)\n", " Likes= np.mean(Likes, axis= 0).T\n", " print(Likes.shape)\n", " \n", " fig_data= [go.Scatter3d(\n", " x = likelihood_feats[:,0],\n", " y = likelihood_feats[:,1],\n", " z = likelihood_feats[:,2],\n", " type='scatter3d',\n", " mode= \"markers\",\n", " marker= {\n", " 'color': Likes,\n", " 'colorbar': go.ColorBar(\n", " title= 'ColorBar'\n", " ),\n", " 'colorscale': 'Viridis',\n", " 'line': {'width': 0},\n", " 'size': 4,\n", " 'symbol': 'circle',\n", " \"opacity\": 1\n", " }\n", " )]\n", " \n", "\n", " layout = go.Layout(\n", " title= 'PCA red + MS clustering KDE profiles across windows',\n", "\n", " scene= Scene(\n", " yaxis=dict(\n", " title='{}'.format(round(var_comps[1],3))),\n", " xaxis=dict(\n", " title= '{}'.format(round(var_comps[0],3))),\n", " zaxis=dict(\n", " title= '{}'.format(round(var_comps[2],3))))\n", " )\n", "\n", "\n", " fig = go.Figure(data=fig_data,layout= layout)\n", " iplot(fig)\n", "\n", "interact(kde_distances,gp= [-1,*[x for x in label_select.keys()]])" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c6817f193a8844ff9f7aefded349012b", "version_major": 2, "version_minor": 0 }, "text/html": [ "

Failed to display Jupyter Widget of type interactive.

\n", "

\n", " If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n", " that the widgets JavaScript is still loading. If this message persists, it\n", " likely means that the widgets JavaScript library is either not installed or\n", " not enabled. See the Jupyter\n", " Widgets Documentation for setup instructions.\n", "

\n", "

\n", " If you're reading this message in another frontend (for example, a static\n", " rendering on GitHub or NBViewer),\n", " it may mean that your frontend doesn't currently support widgets.\n", "

\n" ], "text/plain": [ "interactive(children=(Dropdown(description='selected_group', options=(0, 1, 2, 3), value=0), Output()), _dom_classes=('widget-interact',))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "def plot_density(selected_group):\n", " sought_after= label_select[selected_group]\n", " \n", " sought_after= [Combis[x] for x in sought_after]\n", " #print(sought_after[:4])\n", " Cl_number= len(sought_after)\n", "\n", " Likes= [z for z in it.chain(*[[Construct[el[0]][el[1]][y] for y in el[2]] for el in sought_after])]\n", "\n", " Likes= np.array([x for x in Likes if len(x) == sum(Sizes)])\n", " \n", " Likes= np.mean(Likes, axis= 0).T\n", " \n", " dense_plot= ff.create_distplot([Likes], [selected_group])\n", " dense_plot['layout'].update(title='likelihood density. N vertices: {} .N_clusters: {}'.format(Structure_vertice,Cl_number))\n", " \n", " iplot(dense_plot)\n", "\n", "interact(plot_density,selected_group= [x for x in label_select.keys()]) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }