{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load related library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# related library\n",
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import geopandas as gpd\n",
    "\n",
    "import ipywidgets\n",
    "from ipywidgets import widgets \n",
    "from ipywidgets import *  \n",
    "from IPython.display import display,clear_output\n",
    "\n",
    "from ipywidgets import Layout\n",
    "from traitlets import directional_link\n",
    "\n",
    "from datetime import datetime\n",
    "from datetime import date\n",
    "from dateutil import rrule\n",
    "\n",
    "from boto.s3.connection import S3Connection\n",
    "import requests\n",
    "from io import BytesIO\n",
    "# # when I run it in mybinder, it comes up with RuntimeWarning:\n",
    "# /srv/conda/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
    "#   return f(*args, **kwds)\n",
    "# I looked for information online, it said this warning could be ignored safely."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ipywidgets.__version__,matplotlib.__version__,pd.__version__,np.__version__,gpd.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ipywidgets.__version__,matplotlib.__version__,pd.__version__,np.__version__,gpd.__version__,quilt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ipywidgets.__version__,matplotlib.__version__,pd.__version__,np.__version__,gpd.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# prepare data for dropdown: continent list and country list\n",
    "\n",
    "# get world info\n",
    "world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n",
    "# get continent list plus \"all\"\n",
    "continent_list = [\"all\"] + list(set(world['continent']))\n",
    "#continent_list.append(\"all\")\n",
    "\n",
    "continent_country_dict = {}\n",
    "for continent in continent_list:\n",
    "    continent_country_dict[continent] = [\"all\"] + list(world[world['continent']== continent]['name'])\n",
    "    #continent_country_dict[continent].append(\"all\")\n",
    "    \n",
    "#print (\"continent list for dropdown:\")\n",
    "#print (continent_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load dataset\n",
    "\n",
    "# If you want to do it locally, you can download dataset from the links below and load it using pd.read_csv just like comments did.\n",
    "# https://www.dropbox.com/sh/mt7by5f1wgl6n3z/AACddwkFPq5lPpH3ry83MgSDa?dl=0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load local dataset.\n",
    "a_people = pd.read_csv(\"src/article_people_score_date_geo.csv\")\n",
    "t_people = pd.read_csv(\"src/talk_people_score_date_geo.csv\")\n",
    "a_events = pd.read_csv(\"src/article_events_score_date_geo.csv\")\n",
    "t_events = pd.read_csv(\"src/talk_events_score_date_geo.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "complete_df = {}\n",
    "complete_df['Articles'] = {}\n",
    "complete_df['Talks'] = {}\n",
    "complete_df['Articles']['People'] = a_people\n",
    "complete_df['Articles']['Events'] = a_events\n",
    "complete_df['Talks']['People'] = t_people\n",
    "complete_df['Talks']['Events'] = t_events"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# side functions\n",
    "# Please run them before ploting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def exclude_BC(df):\n",
    "    bc_list = [one for one in df['date'] if one.startswith(\"-\")]\n",
    "    df_withoutBC = df[~df['date'].isin(bc_list)]\n",
    "    #print (\"There are %d dates before Christ,%d entities left after excluding\"%(len(bc_list),len(df_withoutBC)))\n",
    "    return df_withoutBC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_datetime_column(df,unit):\n",
    "    # unit: date,month,year\n",
    "    datetime_format = \"\"\n",
    "    if unit == \"date\":\n",
    "        datetime_format = \"%Y-%m-%d\"\n",
    "    elif unit == \"month\":\n",
    "        datetime_format = \"%Y-%m\"\n",
    "    elif unit == \"year\":\n",
    "        datetime_format = \"%Y\"\n",
    "    new_list = [datetime.strptime(one,datetime_format) for one in df[unit]]\n",
    "    if unit == \"date\":\n",
    "        df = df.assign(datetime_date=pd.Series(new_list).values)\n",
    "    elif unit == \"month\":\n",
    "        df = df.assign(datetime_month=pd.Series(new_list).values)\n",
    "    elif unit == \"year\":\n",
    "        df = df.assign(datetime_year=pd.Series(new_list).values)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_month_column(df):\n",
    "    new_list = [one[:7] for one in df['date']]\n",
    "    df = df.assign(month=pd.Series(new_list).values)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_year_column(df):\n",
    "    new_list = [one[:4] for one in df['date']]\n",
    "    df = df.assign(year=pd.Series(new_list).values)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def score_median_group_by_column(df,tcn,scn):\n",
    "    #print (df)\n",
    "    out_df = df.groupby(tcn).agg({scn:np.median}).reset_index()\n",
    "    #print (df)\n",
    "    return out_df\n",
    "# df_month_score=score_median_group_by_column(monthSenti,'month','score')\n",
    "# print (df_month_score.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def score_percentile_group_by_column(df,tcn,scn,percentile, lex):\n",
    "    #print (df)\n",
    "    \n",
    "    # Debug:\n",
    "    per_str = '{}%'.format(int(percentile*100))\n",
    "    \n",
    "    df_describe = df.groupby(tcn).describe()\n",
    "    df_pos = df_describe[f'pos_score_{lex}'][[per_str]].rename(columns={per_str:f'pos_score_{lex}'})\n",
    "    df_neg = df_describe[f'neg_score_{lex}'][[per_str]].rename(columns={per_str:f'neg_score_{lex}'})\n",
    "    df_len = df_describe['length'][[per_str]].rename(columns={per_str:'length'})\n",
    "    df_tot = df_describe['total'][[per_str]].rename(columns={per_str:'total'})\n",
    "    out_df = pd.concat([df_pos, df_neg, df_len, df_tot], axis=1).reset_index()\n",
    "    #out_df = df.groupby(tcn).quantile(percentile).reset_index()\n",
    "    #print (tcn)\n",
    "    #print (out_df)\n",
    "    return out_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot function\n",
    "def prepare_flowplot_percentiles(lexicon,sentiment,group,domain,geo,times,ra,window,unit,min_length):\n",
    "    # lexicon,sentiment,group,domain,geo,time,ra,unit\n",
    "    # lexicon: OL, MPQA, LIWC, ANEW\n",
    "    # sentiment: pos, neg, total\n",
    "    # group: [group_people,group_events,group_others] boolean value\n",
    "    # domain: Articles, Talks\n",
    "    # geo: [continent,country] continent,country could be 'all'\n",
    "    # times: [start_year,start_month,end_year,end_month] int\n",
    "    # unit: 'year' or 'month'\n",
    "    # output_text\n",
    "    statistics = []\n",
    "    dict_a_people = {\"total\":\"1,146,257\",\"date\":\"775,664\",\"BC\":\"27\"}\n",
    "    dict_a_events = {\"total\":\"54,071\",\"date\":\"22,582\",\"BC\":\"33\"}\n",
    "    dict_t_people = {\"total\":\"415,124\",\"date\":\"289,108\",\"BC\":\"26\"}\n",
    "    dict_t_events = {\"total\":\"21,621\",\"date\":\"10,283\",\"BC\":\"27\"}\n",
    "    statistics_dict = {\"Articles\":{\"People\":dict_a_people,\"Events\":dict_a_events},\"Talks\":{\"People\":dict_t_people,\"Events\":dict_t_events}}\n",
    "    birth_occurrance = {\"People\":\"birth\",\"Events\":\"occurrance\"}\n",
    "    \n",
    "    # prepare showed dates depending on time range\n",
    "    # get time range\n",
    "    start_yy,start_mm,end_yy,end_mm = times\n",
    "    start_date = date(start_yy,start_mm,1)\n",
    "    end_date = date(end_yy,end_mm,1)\n",
    "    # prepare x-axis\n",
    "    xaxis_value = [day for day in rrule.rrule(rrule.MONTHLY,dtstart=start_date,until=end_date)]\n",
    "    #print (\"value for x-axis looks like this: \\n%s \\nlen(x-axis): %d\"%(xaxis_value[-1],len(xaxis_value)))\n",
    "    \n",
    "    # prepare dataframe to display\n",
    "    df = pd.DataFrame()\n",
    "    \n",
    "    #group_people,group_events,group_others = group\n",
    "    \n",
    "    # choose corresponding dataframe depend on domain and group\n",
    "    for one in group:\n",
    "        if one.value:\n",
    "            # add statistics for total entities for this group\n",
    "            statistics.append(\"There are totally %s %s entities in Wikipedia %s.\"%(statistics_dict[domain][one.description]['total'],one.description,domain))\n",
    "            statistics.append(\"Among them, %s have %s date information. Inside them we exclude %s entities whose date before Christ (BC).\"%\n",
    "                              (statistics_dict[domain][one.description]['date'],birth_occurrance[one.description],statistics_dict[domain][one.description]['BC']))\n",
    "            if df.empty:\n",
    "                df = complete_df[domain][one.description][['pos_score_'+lexicon,'neg_score_'+lexicon,'date','country','continent','length']]\n",
    "            else:\n",
    "                df = pd.concat([df,complete_df[domain][one.description][['pos_score_'+lexicon,'neg_score_'+lexicon,'date','country','continent','length']]])\n",
    "\n",
    "    if df.empty:\n",
    "        statistics.append(\"Please select value for Group.\")\n",
    "        return False,False,False,False,False,False,False,statistics,False,False\n",
    "    statistics.append(\"After all, there are {:,d} entities with AC date information in this run.\".format(len(df)))\n",
    "    \n",
    "    # filter length\n",
    "    df = df[df['length']>=min_length]\n",
    "    #print (len(df))\n",
    "    statistics.append(\"After filtering with the length of the document, there are {:,d} entities left.\".format(len(df)))\n",
    "                      \n",
    "    # filter out entities based on geo\n",
    "    continent,country = geo\n",
    "    # if continent == 'all', do nothing\n",
    "    if not continent == 'all':\n",
    "        if country == 'all':\n",
    "            # filter out entities with target continent\n",
    "            df = df[df['continent']==continent]\n",
    "        else:\n",
    "            if country == 'United States of America':\n",
    "                country = 'United States'\n",
    "            # filter out entities with target country\n",
    "            df = df[df['country']==country]\n",
    "    statistics.append(\"After filtering with area, there are {:,d} entities left.\".format(len(df)))\n",
    "    \n",
    "    df_test = df.copy()\n",
    "    if len(df)==0:\n",
    "        statistics.append(\"No entities fit the requirements, please change the settings.\")\n",
    "        return False,False,False,False,False,False,False,statistics,False,False\n",
    "          \n",
    "    # add target unit and corresponding datetime type for entities\n",
    "    if unit == \"month\":\n",
    "        df = add_month_column(df)\n",
    "    elif unit == \"year\":\n",
    "        df = add_year_column(df)\n",
    "\n",
    "        \n",
    "    # we need datetime type because we use it to filter out entities within time range\n",
    "    df = add_datetime_column(df,unit)\n",
    "    \n",
    "    # filter out entities based on time range\n",
    "    df = df[df[\"datetime_\"+unit].isin(xaxis_value)]\n",
    "    statistics.append(\"After filtering with date, there are {:,d} entities left, and collected in the plot.\".format(len(df)))\n",
    "    \n",
    "    if len(df)==0:\n",
    "        statistics.append(\"No entities fit the requirements, please change the settings.\")\n",
    "        return False,False,False,False,False,False,False,statistics,False,False\n",
    "    \n",
    "    # get score depending on lexicon and sentiment\n",
    "    df = df.assign(total=df[\"pos_score_\"+lexicon]+df[\"neg_score_\"+lexicon])\n",
    "    if sentiment == \"total\":        \n",
    "        score_column = \"total\"\n",
    "    else:\n",
    "        score_column = sentiment+\"_score_\"+lexicon\n",
    "        \n",
    "\n",
    "    # plot flowplot    \n",
    "    # get median\n",
    "    df_median = score_median_group_by_column(df,unit,score_column)\n",
    "    df_median = add_datetime_column(df_median,unit)\n",
    "    df_25percentile = score_percentile_group_by_column(df,'datetime_'+unit,score_column,0.25, lexicon)\n",
    "    df_75percentile = score_percentile_group_by_column(df,'datetime_'+unit,score_column,0.75, lexicon)\n",
    "    #print (\"here\")\n",
    "    #print (df_25percentile)\n",
    "    \n",
    "    # filling blank\n",
    "    fill_temp = pd.DataFrame(xaxis_value,columns=['datetime_'+unit])\n",
    "    fill_df_median = fill_temp.merge(df_median,how='left')\n",
    "    fill_df_median.loc[fill_df_median[score_column].isnull(),score_column]=0\n",
    "    \n",
    "    # Debug\n",
    "    fill_df_25percentile = fill_temp.merge(df_25percentile,how='left', left_on='datetime_month', right_on='datetime_month')\n",
    "    fill_df_25percentile.loc[fill_df_25percentile[score_column].isnull(),score_column]=0\n",
    "    \n",
    "    # Debug\n",
    "    fill_df_75percentile = fill_temp.merge(df_75percentile,how='left', left_on='datetime_month', right_on='datetime_month')\n",
    "    fill_df_75percentile.loc[fill_df_75percentile[score_column].isnull(),score_column]=0\n",
    "    \n",
    "    if ra == True:\n",
    "        # get rolling average\n",
    "#         df_median[\"ra\"] = df_median[score_column].rolling(window,center=True).mean()\n",
    "#         df_25percentile[\"ra\"] = df_25percentile[score_column].rolling(window,center=True).mean()\n",
    "#         df_75percentile[\"ra\"] = df_75percentile[score_column].rolling(window,center=True).mean()    \n",
    "        fill_df_median[\"ra\"] = fill_df_median[score_column].rolling(window,center=True).mean()\n",
    "        fill_df_25percentile[\"ra\"] = fill_df_25percentile[score_column].rolling(window,center=True).mean()\n",
    "        fill_df_75percentile[\"ra\"] = fill_df_75percentile[score_column].rolling(window,center=True).mean()\n",
    "    \n",
    "    \n",
    "    # plot flowplot\n",
    "    #ax.plot(df_median['datetime_'+unit],df_median[score_column],'r-')\n",
    "    temp_x = fill_df_median['datetime_'+unit]\n",
    "    temp_x = temp_x.values\n",
    "    #temp_x = pd.Series.values(temp_x)\n",
    "    #ax.fill_between(temp_x,df_25percentile[score_column],df_75percentile[score_column],color='b',alpha=0.2)\n",
    "    \n",
    "    # Debug\n",
    "    df_time_size = df.groupby('datetime_'+unit).size().reset_index(name='size')\n",
    "    fill_df_time_size = fill_temp.merge(df_time_size,how='left').fillna(0)\n",
    "    #print (df_time_size)\n",
    "    \n",
    "    return True, fill_df_median,fill_df_25percentile,fill_df_75percentile,score_column,temp_x,fill_df_time_size,statistics,start_date,end_date\n",
    "\n",
    "#     # set ax\n",
    "#     myfontsize = 10\n",
    "#     ax.set_title(\"median,percentile of \"+sentiment+\" score for \"+group+\" based on \"+lexicon,fontsize=myfontsize)\n",
    "#     ax.set_xlabel('time')\n",
    "#     ax.set_ylabel('score')\n",
    "#     ax.set_xlim([start_date,end_date])\n",
    "#     [[item.set_color('b') for item in bp_dict[key]['boxes']] for key in bp.keys()]\n",
    "#     [[item.set_color('b') for item in bp_dict[key]['whiskers']] for key in bp.keys()]\n",
    "#     [[item.set_color('r') for item in bp_dict[key]['medians']] for key in bp.keys()]\n",
    "    \n",
    "#     for tick in bp_axes.get_xticklabels():\n",
    "#                 tick.set_rotation(90)\n",
    "    #plt.show()\n",
    "#prepare_flowplot_percentiles(\"OL\",\"total\",\"People\",domain,geo,times,ra,window,unit)   \n",
    "#flowplot_percentiles(\"OL\",\"total\",\"People\",[\"Europe\",\"all\"],\"Articles\",\"month\",[1940,1,1],[1960,1,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# widget 1: median of sentiment score for Wikipedia concepts over time grouped by month"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define actions while click update botton\n",
    "def on_button_clicked(b):\n",
    "    # prepare and filter dataset\n",
    "    # get parameters from input\n",
    "    # get geo\n",
    "    continent = dropdown_continent.value\n",
    "    country = dropdown_country.value\n",
    "    geo = [continent,country]\n",
    "    # get time\n",
    "    time_start_year = dropdown_start_year.value\n",
    "    time_start_month = dropdown_start_month.value\n",
    "    time_end_year = dropdown_end_year.value\n",
    "    time_end_month = dropdown_end_month.value\n",
    "    time = [time_start_year,time_start_month,time_end_year,time_end_month]\n",
    "    # get lexicon\n",
    "    lexicon = radio_button_lexicon.value\n",
    "    # get sentiment\n",
    "    sentiment = sen_dict[radio_button_sentiment.value]\n",
    "    # get domain\n",
    "    domain = radio_button_domain.value\n",
    "    # get group\n",
    "    # group_people,group_events,group_others = cb_container.children\n",
    "    group = cb_container.children\n",
    "    # get rolling average\n",
    "    ra = checkbox_ra.value\n",
    "    window = dropdown_ra.value\n",
    "    \n",
    "    # get minimum length \n",
    "    min_length = dropdown_length.value\n",
    "    \n",
    "    # define time unit to month\n",
    "    unit = \"month\"\n",
    "    \n",
    "    # output text\n",
    "    #output_label_list = []\n",
    "    output_label_str_container = widgets.VBox(layout=Layout(width='100%',border='solid 1px'))\n",
    "    \n",
    "    exist, df50,df25,df75,score_column,percentile_x,df_time_size,output_label_list,s_date,e_date =prepare_flowplot_percentiles(lexicon,sentiment,group,domain,geo,time,ra,window,unit,min_length)\n",
    "    \n",
    "    if exist == False:\n",
    "        fig = None\n",
    "    else:\n",
    "        # if rolling average==True, get column 'ra' instead of \n",
    "        if ra == True:\n",
    "            score_column = 'ra'\n",
    "\n",
    "\n",
    "\n",
    "        # draw\n",
    "        # Debug\n",
    "        fig,axes = plt.subplots(2,1,sharex='row', figsize=(15,8))\n",
    "        ax0,ax1 = axes.flatten()\n",
    "        ax = [ax0,ax1]\n",
    "        time_size_loc = np.arange(len(df_time_size['datetime_'+unit].tolist()))\n",
    "        ax[0].bar(time_size_loc,df_time_size['size'],width=0.8, align='edge')\n",
    "        df50.plot(x='datetime_'+unit, y=score_column, color='r', ax=ax[1])\n",
    "        ax[1].fill_between(percentile_x,df25[score_column],df75[score_column],color='b',alpha=0.2)\n",
    "\n",
    "        ax[0].set_xlim(0,time_size_loc.shape[0])\n",
    "        ax[0].get_xaxis().set_visible(False)\n",
    "        ax[1].get_legend().remove()\n",
    "\n",
    "        # add label for plots\n",
    "        plot_title_dict = {\"ra\":\"rolling average of median score\"}\n",
    "        ax[0].set_title(\"Number of entities from %s to %s matching the current setting (grouped by month)\"%(s_date.strftime(\"%B %Y\"),e_date.strftime(\"%B %Y\")))\n",
    "        ax[0].set_ylabel(\"number of entities\")\n",
    "        ax[1].set_title(\"Corresponding %s for each month\"%(plot_title_dict.get(score_column,\"median score\")))\n",
    "        ax[1].set_ylabel(\"score\")\n",
    "        ax[1].set_xlabel(\"time\")\n",
    "    \n",
    "    #output_label_list.append(\"Totally %d entities are collected in the plot.\"%(sum(df_time_size['size'].tolist())))\n",
    "    output_labels = [widgets.HTML(value=i) for i in output_label_list]\n",
    "    output_label_str_container.children = [i for i in output_labels]\n",
    "    with box_out:\n",
    "        clear_output(wait=True)\n",
    "        display(fig)\n",
    "    with text_out:\n",
    "        clear_output(wait=True)\n",
    "        display(output_label_str_container)\n",
    "    if fig:\n",
    "        plt.close(fig)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5381d8d5ad7945019f9494ab3e52313c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(VBox(children=(HTML(value='<h1>WikiSentiFlow</h1><br>\\n<p>Introduction\\n<ul>\\n<li>This widget i…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# framework\n",
    "#     lexicon container: Which lexicon you want to choose? (OL, MPQA, LIWC)\n",
    "#         title html\n",
    "#         radio_button\n",
    "#     group container: Which group you want to show? (People, Events)\n",
    "#         title html\n",
    "#         checkboxes container:\n",
    "#             checkbox\n",
    "#             checkbox\n",
    "#     area filter: Do you want to filter out the continent or country for entities?\n",
    "#         title html\n",
    "#         dropdown_continent\n",
    "#         dropdown_country\n",
    "#     domain container: What domain you want to use? (Article, Talks)\n",
    "#         title html\n",
    "#         checkboxes container:\n",
    "#             radio_button\n",
    "#     time filter: Set the start and stop of time range\n",
    "#         dropdown\n",
    "#         \n",
    "\n",
    "# change label to HTML (for set style more flexible)\n",
    "    \n",
    "container_width = 'auto'\n",
    "with_border_layout = Layout(border='solid 0.5px')\n",
    "\n",
    "\n",
    "# preparing a container for header\n",
    "header_container = widgets.VBox(layout=Layout(width='100%',border='solid 0.5px'))\n",
    "header_text = \"\"\"<h1>WikiSentiFlow</h1><br>\n",
    "<p>Introduction\n",
    "<ul>\n",
    "<li>This widget is used to show the changes of sentiment scores for Wikipedia entities (concepts) with time. It shows 25%, 50% (median), 75% quantile of sentiment score for each month to present the sentiment distribution.</li>\n",
    "<li>Wikipedia entities contains entities in Wikipedia Article and Wikipedia Talk. Talk page is an area for editors to discuss about corresponding article, which can be visited from upper left side of article page.</li>\n",
    "<li>This widget includes entities both People and Events. The date for people indicates birth date, while the date for events indicates occurrance date.</li>\n",
    "<li>The text of Articles and Talks is extracted from Wikipedia Dump, and time stamps are extracted from DBPedia.</li>\n",
    "<li>The scores are calculated with term frequency for sentiment words based on certain lexicons (OL, MPQA, LIWC, ANEW). For ANEW we take valency into account too.</li>\n",
    "</ul></p>\n",
    "<p>How to use\n",
    "<ol>\n",
    "<li>Select value for Lexicon, Sentiment, Group, Domain, Geolocation, Time, Minimum Length.</li>\n",
    "<li>Rolling average is optional. Tick it and set a window size then a rolling average of median will be showed. Otherwise the median will be showed.</li>\n",
    "<li>Click \"Go\" to run it.</li>\n",
    "</ol>\n",
    "</p>\n",
    "<p>Results\n",
    "<ul><li>For each run there will be two plots sharing x-axis. \n",
    "The first plot is the number of entities matching the setting.\n",
    "The second plot shows a red line with purple shadows representing the corresponding scores. \n",
    "The red line presents median (50% quantile) of scores based on your settings.\n",
    "And the shadows cover 25% to 75% quantile of the scores.\n",
    "If rolling average is ticked, then the shadows and red line refer to rolling average of 25%, 50%, 75% quantile.\n",
    "The time is splited with month as unit.</li>\n",
    "<li>In the bottom you will get the data characteristics for the current run.</li></ul></p>\"\"\"\n",
    "html_header = widgets.HTML(value=header_text)\n",
    "header_container.children=[html_header]\n",
    "\n",
    "\n",
    "# for lexicon\n",
    "lexicon_container = widgets.VBox(layout=Layout(width='8%',border='solid 0.5px'))\n",
    "# add title\n",
    "html_lexicon = widgets.HTML(value=\"<b>Lexicon</b>\")\n",
    "#label_lexicon = widgets.Label(value=\"sentiment lexicon\")\n",
    "# preparing a container to put in radio buttons\n",
    "radio_button_lexicon = widgets.RadioButtons(\n",
    "    options=['OL', 'MPQA', 'LIWC','ANEW'],\n",
    "    #description='sentiment lexicon',\n",
    "    #style=style,\n",
    "    disabled=False\n",
    ")\n",
    "# put text and button into lexicon container\n",
    "lexicon_container.children = [html_lexicon,radio_button_lexicon]\n",
    "\n",
    "\n",
    "# for sentiment\n",
    "sentiment_container = widgets.VBox(layout=lexicon_container.layout)\n",
    "# add title\n",
    "html_sentiment = widgets.HTML(value=\"<b>Sentiment</b>\")\n",
    "# preparing a container to put in radio buttons\n",
    "radio_button_sentiment = widgets.RadioButtons(\n",
    "    options=['Total', 'Positive', 'Negative'],\n",
    "    #description='',\n",
    "    #style=style,\n",
    "    disabled=False\n",
    ")\n",
    "# put text and button into lexicon container\n",
    "sentiment_container.children = [html_sentiment,radio_button_sentiment]\n",
    "\n",
    "\n",
    "# for target group\n",
    "group_container = widgets.VBox(layout=lexicon_container.layout)\n",
    "html_group = widgets.HTML(value=\"<b>Group</b>\")\n",
    "# checkboxes container\n",
    "cb_container = widgets.VBox(layout=Layout(\n",
    ")) \n",
    "# preparing a container to put in created checkbox\n",
    "checkboxes = []  \n",
    "# create checkbox\n",
    "checkboxes.append(widgets.Checkbox(description = 'People', value=False,layout=Layout(left='-80px')))\n",
    "checkboxes.append(widgets.Checkbox(description = 'Events', value=False,layout=Layout(left='-80px')))\n",
    "# put check box into checkboxes container\n",
    "cb_container.children=[i for i in checkboxes]\n",
    "#display(cb_container)\n",
    "# # add a new container to control the arrangement\n",
    "# temp_container = widgets.HBox()\n",
    "# temp = widgets.Label(description='choose target group')\n",
    "# temp_container.children=[cb_container,temp]\n",
    "group_container.children=[html_group,cb_container]\n",
    "\n",
    "\n",
    "# for area \n",
    "area_container = widgets.VBox(layout=Layout(width='24%',border='solid 0.5px'))\n",
    "html_area = widgets.HTML(value=\"<b>Geolocation</b>\")\n",
    "#dropdown_container = widgets.HBox()\n",
    "#dropdown\n",
    "dropdown_continent = widgets.Dropdown(\n",
    "    options = continent_list,\n",
    "    value = 'all',\n",
    "    description='Continent:',\n",
    "    layout=Layout(width='240px',left='-10px'),\n",
    "    disabled=False,\n",
    ")  \n",
    "dropdown_country = widgets.Dropdown(\n",
    "    #options = country_list,\n",
    "    description='Country:',\n",
    "    #options = [\"all\"],\n",
    "    #value='all',\n",
    "    layout=Layout(width='240px',left='-10px'),\n",
    "    disabled = False,\n",
    ")\n",
    "def transform(case):\n",
    "    return continent_country_dict[case]\n",
    "directional_link((dropdown_continent,'value'),(dropdown_country,'options'),transform)\n",
    "area_container.children=[html_area,dropdown_continent,dropdown_country]\n",
    "\n",
    "\n",
    "# for domain\n",
    "domain_container = widgets.VBox(layout=lexicon_container.layout)\n",
    "# add title\n",
    "html_domain = widgets.HTML(value=\"<b>Domain</b>\")\n",
    "# preparing a container to put in radio buttons\n",
    "radio_button_domain = widgets.RadioButtons(\n",
    "    options=['Articles', 'Talks'],\n",
    "    disabled=False\n",
    ")\n",
    "# put text and button into domain container\n",
    "domain_container.children = [html_domain,radio_button_domain]\n",
    "\n",
    "\n",
    "# for date\n",
    "time_container = widgets.VBox(layout=Layout(width='26%',border='solid 0.5px'))\n",
    "#add title\n",
    "html_time = widgets.HTML(value=\"<b>Time</b>\")\n",
    "# prepare time year list\n",
    "# dates = [day for day in rrule.rrule(rrule.YEARLY, dtstart=date(1700,1,1), until=date.today())]\n",
    "# year_list = [(i.strftime('%Y'),i) for i in dates]\n",
    "year_list = list(range(1500,2019,1))\n",
    "month_list = list(range(1,13,1))\n",
    "#time_list = [1940,1960,1992,1993]\n",
    "time_style = {'description_width': '63%'}\n",
    "dropdown_start_year = widgets.Dropdown(\n",
    "    options = year_list,\n",
    "    value=year_list[-100],\n",
    "    description='Start (year, month):',\n",
    "    style=time_style,\n",
    "    layout=Layout(width='190px'),\n",
    "    #style={'description_width':'initial'},\n",
    "    disabled=False,\n",
    ")\n",
    "dropdown_start_month = widgets.Dropdown(\n",
    "    options = month_list,\n",
    "    #description='',\n",
    "    layout=Layout(width='50px'),\n",
    "    disabled=False,\n",
    ")\n",
    "time_container_start = widgets.HBox()\n",
    "#time_container_start.children=[widgets.Label('start (year,month):'),dropdown_start_year,dropdown_start_month]\n",
    "time_container_start.children=[dropdown_start_year,dropdown_start_month]\n",
    "\n",
    "dropdown_end_year = widgets.Dropdown(\n",
    "    options = year_list,\n",
    "    description='End (year, month):',\n",
    "    style=time_style,\n",
    "    #options = [\"all\"],\n",
    "    value=year_list[-1],\n",
    "    layout=Layout(width='190px'),\n",
    "    disabled = False,\n",
    ")\n",
    "dropdown_end_month = widgets.Dropdown(\n",
    "    options = month_list,\n",
    "    #description='',\n",
    "    #options = [\"all\"],\n",
    "    #value=month_list[-1],\n",
    "    layout=Layout(width='50px'),\n",
    "    disabled = False,\n",
    ")\n",
    "time_container_end = widgets.HBox()\n",
    "time_container_end.children = [dropdown_end_year,dropdown_end_month]\n",
    "time_container.children = [html_time,time_container_start,time_container_end]\n",
    "\n",
    "\n",
    "# for rolling average\n",
    "ra_container = widgets.VBox(layout=Layout(align_items='center',width='100%'))\n",
    "#mystyle={'description_width':'initial'}\n",
    "checkbox_ra = widgets.Checkbox(layout=Layout(left='-13px'),value=False,description='Rolling Average')\n",
    "checkbox_ra_container = widgets.HBox()\n",
    "checkbox_ra_container.children = [checkbox_ra]\n",
    "ra_list = list(range(3,50,1))\n",
    "ra_style = {'description_width': '60%'}\n",
    "dropdown_ra = widgets.Dropdown(\n",
    "    options = ra_list,\n",
    "    description='Windows Size:',\n",
    "    style=ra_style,\n",
    "    \n",
    "    #options = [\"all\"],\n",
    "    #value=year_list[-1],\n",
    "    layout=Layout(width='170px',left='-6px'),\n",
    "    disabled = True,\n",
    ")\n",
    "dropdown_ra_container = widgets.HBox()\n",
    "dropdown_ra_container.children = [dropdown_ra]\n",
    "def transform_ra(case):\n",
    "    return {True:False,False:True}[case]\n",
    "\n",
    "directional_link((checkbox_ra,'value'),(dropdown_ra,'disabled'),transform_ra)\n",
    "ra_container.children = [checkbox_ra_container,dropdown_ra_container]\n",
    "\n",
    "# for min length\n",
    "length_list = list(range(0,50,5))\n",
    "length_style = {'description_width':'70%'}\n",
    "dropdown_length = widgets.Dropdown(\n",
    "    options = length_list,\n",
    "    description='Minimum Length:',\n",
    "    style = length_style,\n",
    "    layout=Layout(width='176px',left='-9px'),\n",
    "    disabled=False,\n",
    ")\n",
    "\n",
    "\n",
    "#for button\n",
    "update_container = widgets.VBox(layout=Layout(#display='flex',\n",
    "                                              #flex_flow='column',\n",
    "                                              align_items='center',\n",
    "                                              width='16%',\n",
    "                                              #width=container_width,\n",
    "                                              #border='solid 0.5px'\n",
    "))\n",
    "#add button that updates the graph based on the checkboxes\n",
    "button = widgets.Button(description=\"Go\",button_style='primary',layout=Layout(width='90%'))\n",
    "update_container.children=[ra_container,dropdown_length,button]\n",
    "\n",
    "\n",
    "# preparing a container for input panel\n",
    "input_container = widgets.HBox(layout=Layout(\n",
    "    display='flex',\n",
    "    flex_flow='row',\n",
    "    align_items='stretch',\n",
    "    border='solid 0.5px',\n",
    "    #height='120px'\n",
    "    #width='30%'\n",
    "))\n",
    "input_container.children=[lexicon_container, sentiment_container,group_container,domain_container,area_container,time_container,update_container]\n",
    "\n",
    "\n",
    "# for plot\n",
    "box_out = ipywidgets.Output(layout=Layout(width='100%',height='500px',border='solid 0.5px'))\n",
    "# for out_text\n",
    "#label_out = widgets.Label(value=\"Here is the statistics of output...\",layout=Layout(width='100%'))\n",
    "text_out = widgets.Output(layout=Layout(width='100%',border='solid 0.5px'))\n",
    "\n",
    "\n",
    "# for output\n",
    "output_container = widgets.VBox(layout=Layout(border='solid 0.5px'))\n",
    "output_container.children = [box_out,text_out]\n",
    "\n",
    "\n",
    "# container for all: including header, input and output two sections\n",
    "all_container = widgets.VBox(layout=with_border_layout)\n",
    "all_container.children = [header_container,input_container,output_container]\n",
    "\n",
    "sen_dict = {\"Total\":\"total\",\"Positive\":\"pos\",\"Negative\":\"neg\"}\n",
    "\n",
    "# run the widget\n",
    "display(all_container)\n",
    "button.on_click(on_button_clicked)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Link to [WikiSentiScatter](WikiSentiScatter.ipynb)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  },
  "widgets": {
   "state": {
    "b6e532f28b6046a99112df0faea8550d": {
     "views": [
      {
       "cell_index": 26
      }
     ]
    }
   },
   "version": "1.2.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}