{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Curate_StudyAppDownloadReason_Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:55.009598Z",
     "start_time": "2018-12-28T19:54:37.669242Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Welcome, Abhishek Pratap!\n",
      "\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import datetime as dt\n",
    "import itertools as it\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "import synapseclient\n",
    "from synapseclient import Activity, File, Schema, Table, as_table_columns\n",
    "from tqdm import tqdm\n",
    "\n",
    "InteractiveShell.ast_node_interactivity = 'all'\n",
    "syn = synapseclient.Synapse()\n",
    "syn.login()\n",
    "\n",
    "tqdm.pandas()\n",
    "\n",
    "def isnum(x):\n",
    "    if x is None:\n",
    "        return False\n",
    "    try:\n",
    "        float(x)\n",
    "        return True\n",
    "    except ValueError:\n",
    "        return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:57.272327Z",
     "start_time": "2018-12-28T19:54:55.020146Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brightenid</th>\n",
       "      <th>start</th>\n",
       "      <th>week</th>\n",
       "      <th>user_id</th>\n",
       "      <th>sent_time_local</th>\n",
       "      <th>sent_time_utc</th>\n",
       "      <th>response_local</th>\n",
       "      <th>response_utc</th>\n",
       "      <th>response_id</th>\n",
       "      <th>Why did you download this app?</th>\n",
       "      <th>For \"other\", please type in box</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BLUE-00048</td>\n",
       "      <td>2014-08-01</td>\n",
       "      <td>0</td>\n",
       "      <td>10431</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>2014-08-01 07:00:09</td>\n",
       "      <td>2014-08-01 11:00:09</td>\n",
       "      <td>166331</td>\n",
       "      <td>for fun|for mental health reasons [e.g. depres...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>BLUE-00049</td>\n",
       "      <td>2014-08-01</td>\n",
       "      <td>0</td>\n",
       "      <td>10470</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>2014-08-01 12:31:58</td>\n",
       "      <td>2014-08-01 16:31:58</td>\n",
       "      <td>166824</td>\n",
       "      <td>for brain health [e.g. better memory]|to impro...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>BLUE-00050</td>\n",
       "      <td>2014-08-09</td>\n",
       "      <td>0</td>\n",
       "      <td>10519</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>2014-08-08 19:31:06</td>\n",
       "      <td>2014-08-09 02:31:06</td>\n",
       "      <td>173157</td>\n",
       "      <td>for management of daily problems|for brain hea...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BLUE-00050</td>\n",
       "      <td>2014-08-09</td>\n",
       "      <td>4</td>\n",
       "      <td>10519</td>\n",
       "      <td>2014-09-06 09:00:01</td>\n",
       "      <td>2014-09-06 16:00:01</td>\n",
       "      <td>2014-09-08 18:21:29</td>\n",
       "      <td>2014-09-09 01:21:29</td>\n",
       "      <td>198454</td>\n",
       "      <td>for management of daily problems|for brain hea...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>BLUE-00050</td>\n",
       "      <td>2014-08-09</td>\n",
       "      <td>12</td>\n",
       "      <td>10519</td>\n",
       "      <td>2014-11-01 08:00:05</td>\n",
       "      <td>2014-11-01 16:00:05</td>\n",
       "      <td>2014-11-02 09:16:22</td>\n",
       "      <td>2014-11-02 17:16:22</td>\n",
       "      <td>256858</td>\n",
       "      <td>for mood [e.g. sadness]|for brain health [e.g....</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   brightenid      start  week  user_id     sent_time_local  \\\n",
       "0  BLUE-00048 2014-08-01     0    10431                 NaT   \n",
       "1  BLUE-00049 2014-08-01     0    10470                 NaT   \n",
       "2  BLUE-00050 2014-08-09     0    10519                 NaT   \n",
       "3  BLUE-00050 2014-08-09     4    10519 2014-09-06 09:00:01   \n",
       "4  BLUE-00050 2014-08-09    12    10519 2014-11-01 08:00:05   \n",
       "\n",
       "        sent_time_utc      response_local        response_utc  response_id  \\\n",
       "0                 NaT 2014-08-01 07:00:09 2014-08-01 11:00:09       166331   \n",
       "1                 NaT 2014-08-01 12:31:58 2014-08-01 16:31:58       166824   \n",
       "2                 NaT 2014-08-08 19:31:06 2014-08-09 02:31:06       173157   \n",
       "3 2014-09-06 16:00:01 2014-09-08 18:21:29 2014-09-09 01:21:29       198454   \n",
       "4 2014-11-01 16:00:05 2014-11-02 09:16:22 2014-11-02 17:16:22       256858   \n",
       "\n",
       "                      Why did you download this app?  \\\n",
       "0  for fun|for mental health reasons [e.g. depres...   \n",
       "1  for brain health [e.g. better memory]|to impro...   \n",
       "2  for management of daily problems|for brain hea...   \n",
       "3  for management of daily problems|for brain hea...   \n",
       "4  for mood [e.g. sadness]|for brain health [e.g....   \n",
       "\n",
       "  For \"other\", please type in box  \n",
       "0                             NaN  \n",
       "1                             NaN  \n",
       "2                             NaN  \n",
       "3                             NaN  \n",
       "4                             NaN  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>username</th>\n",
       "      <th>Why did you download this app?</th>\n",
       "      <th>createdAt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>EN05039</td>\n",
       "      <td>Fun,My mental health,My mood,Managing daily is...</td>\n",
       "      <td>2016-09-03 17:13:21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EN05331</td>\n",
       "      <td>My mental health,Brain health,Improve work</td>\n",
       "      <td>2017-01-22 23:04:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>EN00387</td>\n",
       "      <td>My mental health,My mood,Brain health,Fun</td>\n",
       "      <td>2016-10-28 08:37:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>EN00322</td>\n",
       "      <td>My mental health</td>\n",
       "      <td>2016-09-07 21:32:32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EN00478</td>\n",
       "      <td>My mental health,Managing daily issues,My mood...</td>\n",
       "      <td>2016-11-11 07:21:42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  username                     Why did you download this app?  \\\n",
       "0  EN05039  Fun,My mental health,My mood,Managing daily is...   \n",
       "1  EN05331         My mental health,Brain health,Improve work   \n",
       "2  EN00387          My mental health,My mood,Brain health,Fun   \n",
       "3  EN00322                                   My mental health   \n",
       "4  EN00478  My mental health,Managing daily issues,My mood...   \n",
       "\n",
       "            createdAt  \n",
       "0 2016-09-03 17:13:21  \n",
       "1 2017-01-22 23:04:11  \n",
       "2 2016-10-28 08:37:06  \n",
       "3 2016-09-07 21:32:32  \n",
       "4 2016-11-11 07:21:42  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "v1sid, v2sid = 'syn10250489', 'syn17023091'\n",
    "\n",
    "v1r = pd.read_excel(syn.get(v1sid).path)\n",
    "v2r = pd.read_csv(syn.get(v2sid).path, parse_dates=['createdAt'])\n",
    "\n",
    "v1r.head()\n",
    "v2r.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### V1 Data Prep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:57.410111Z",
     "start_time": "2018-12-28T19:54:57.275614Z"
    }
   },
   "outputs": [],
   "source": [
    "# drop the uneeded columns\n",
    "v1 = v1r.drop(columns=[\n",
    "    'week',\t'user_id',\t'sent_time_local',\t'sent_time_utc', 'response_utc', 'response_id'\n",
    "]).rename(columns={\n",
    "    'brightenid': 'participant_id',\n",
    "    'response_local':'dt_response',\n",
    "    'Why did you download this app?':'apps',\n",
    "    'For \"other\", please type in box':'other_description'\n",
    "})\n",
    "\n",
    "# conver to lowercase for lookups\n",
    "v1.apps = v1.apps.apply(lambda x: x.lower() if not isnum(x) else 'none')\n",
    "\n",
    "# add indicators for different app usage\n",
    "v1['fun'] = v1.apps.apply(lambda x: int(x.find('fun') > -1))\n",
    "v1['mental_health'] =  v1.apps.apply(lambda x: int(x.find('mental health') > -1))\n",
    "v1['mood'] =  v1.apps.apply(lambda x: int(x.find('mood') > -1))\n",
    "v1['managing_daily_issues'] =  v1.apps.apply(lambda x: int(x.find('management of daily ') > -1))\n",
    "v1['improve_work'] =  v1.apps.apply(lambda x: int(x.find('improve work') > -1))\n",
    "v1['brain_health'] =  v1.apps.apply(lambda x: int(x.find('brain health') > -1))\n",
    "v1['improve_relationships'] =  v1.apps.apply(lambda x: int(x.find('improve relationships') > -1))\n",
    "v1['other'] =  v1.apps.apply(lambda x: int(x.find('other') > -1))\n",
    "\n",
    "v1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:57.423884Z",
     "start_time": "2018-12-28T19:54:57.414380Z"
    }
   },
   "outputs": [],
   "source": [
    "# print the unique application reasons\n",
    "t = [print(a) for a in pd.unique(list(it.chain(*[t.split('|') for t in v1.apps])))]; del t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:57.724949Z",
     "start_time": "2018-12-28T19:54:57.429488Z"
    }
   },
   "outputs": [],
   "source": [
    "plt.hist([len(a) for a in v1.other_description if not isnum(a)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:58.216979Z",
     "start_time": "2018-12-28T19:54:57.728659Z"
    }
   },
   "outputs": [],
   "source": [
    "v1['day'] = [(t.dt_response - t.start).days + 1 for t in v1.itertuples()]\n",
    "v1.day.hist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Extract two common topics I saw a quick read through the other descriptions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:58.243131Z",
     "start_time": "2018-12-28T19:54:58.223308Z"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "incentive_words = [\n",
    "    'paid', 'pay', 'mone', 'compens', 'gift', 'incentive', '$', 'finan', 'incom', 'reimb', 'craigs', 'pd'\n",
    "]\n",
    "\n",
    "def is_incentive(s):\n",
    "    if not isinstance(s, str):\n",
    "        return 0\n",
    "    else:\n",
    "        return int(any(s.find(a) > -1 for a in incentive_words))\n",
    "    \n",
    "v1['happ_inc'] = pd.to_numeric(v1.other_description.apply(is_incentive), downcast='integer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:58.299327Z",
     "start_time": "2018-12-28T19:54:58.248110Z"
    }
   },
   "outputs": [],
   "source": [
    "for_the_study_words = [\n",
    "    'part', 'require', 'for study', 'told to', 'asked to', 'request', 'to be', 'to do', 'brighten', 'assignment', 'ucsf', 'for a', 'study'\n",
    "]\n",
    "\n",
    "def is_for_the_study(s):\n",
    "    if not isinstance(s, str):\n",
    "        return int(0)\n",
    "    else:\n",
    "        return int(any(s.find(a) > -1 for a in for_the_study_words))\n",
    "    \n",
    "v1['happ_fts'] = v1.other_description.apply(is_for_the_study)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:58.360346Z",
     "start_time": "2018-12-28T19:54:58.303805Z"
    }
   },
   "outputs": [],
   "source": [
    "v1 = v1.drop(columns=['start', 'day'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### V2 Data Prep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:58.439887Z",
     "start_time": "2018-12-28T19:54:58.364685Z"
    }
   },
   "outputs": [],
   "source": [
    "# drop the uneeded columns\n",
    "v2 = v2r.rename(columns={\n",
    "    'username': 'participant_id',\n",
    "    'createdAt':'dt_response',\n",
    "    'Why did you download this app?':'apps'\n",
    "})\n",
    "\n",
    "# conver to lowercase for lookups\n",
    "v2.apps = v2.apps.apply(lambda x: x.lower() if not isnum(x) else 'none')\n",
    "\n",
    "# add indicators for different app usage\n",
    "v2['fun'] = v2.apps.apply(lambda x: int(x.find('fun') > -1))\n",
    "v2['mental_health'] =  v2.apps.apply(lambda x: int(x.find('my mental health') > -1))\n",
    "v2['mood'] =  v2.apps.apply(lambda x: int(x.find('my mood') > -1))\n",
    "v2['managing_daily_issues'] =  v2.apps.apply(lambda x: int(x.find('managing daily issues') > -1))\n",
    "v2['improve_work'] =  v2.apps.apply(lambda x: int(x.find('improve work') > -1))\n",
    "v2['brain_health'] =  v2.apps.apply(lambda x: int(x.find('brain health') > -1))\n",
    "v2['improve_relationships'] =  v2.apps.apply(lambda x: int(x.find('improve relationships') > -1))\n",
    "v2['other'] =  v2.apps.apply(lambda x: int(x.find('other') > -1))\n",
    "\n",
    "# add the study flag\n",
    "v2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:58.463531Z",
     "start_time": "2018-12-28T19:54:58.444738Z"
    }
   },
   "outputs": [],
   "source": [
    "t = [print(a) for a in pd.unique(list(it.chain(*[t.split(',') for t in v2.apps])))]; del t"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Combine the DataFrames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:58.541761Z",
     "start_time": "2018-12-28T19:54:58.468717Z"
    }
   },
   "outputs": [],
   "source": [
    "combined = pd.concat([v1, v2], sort=False)\n",
    "\n",
    "combined = combined.drop(columns=['apps']).rename(columns={\n",
    "    'brain_health': 'happ_bh',\n",
    "    'fun': 'happ_f',\n",
    "    'improve_relationships': 'happ_ir',\n",
    "    'improve_work': 'happ_iw',\n",
    "    'managing_daily_issues': 'happ_mdi',\n",
    "    'mental_health': 'happ_mh',\n",
    "    'mood':'happ_m',\n",
    "    'other': 'happ_o',\n",
    "    'other_description':'happ_o_description'\n",
    "}).loc[:, [\n",
    "    'participant_id', 'dt_response', \n",
    "    'happ_bh', 'happ_f', 'happ_fts', 'happ_inc', 'happ_ir', 'happ_iw', 'happ_m', 'happ_mdi', 'happ_mh', 'happ_o',\n",
    "    'happ_o_description'\n",
    "]]\n",
    "\n",
    "combined = combined.fillna(0)\n",
    "\n",
    "# make sure they're all the correct type. for some reason pandas was converting fts and inc to floats\n",
    "# but we don't need that many bits. plus i like consistency\n",
    "for c in combined.columns:\n",
    "    if c.find('description') > -1:\n",
    "        continue \n",
    "        \n",
    "    if c.find('app') > -1:\n",
    "        combined[c] = combined[c].astype(int)\n",
    "\n",
    "combined.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Add week into study"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "metasid = 'syn27082597'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:54:59.936285Z",
     "start_time": "2018-12-28T19:54:58.545530Z"
    }
   },
   "outputs": [],
   "source": [
    "metadata = syn.tableQuery(f'SELECT participant_id, startdate FROM {metasid}').asDataFrame(convert_to_datetime=True)\n",
    "metadata.startdate = pd.to_datetime(metadata.startdate)\n",
    "\n",
    "# add in the participants start date as a new column\n",
    "combined = pd.merge(combined, metadata, on='participant_id', how='left')\n",
    "\n",
    "# get the time difference in weeks as a float\n",
    "combined['week'] = [\n",
    "    d.days/7 for d in (\n",
    "        combined.dt_response.apply(\n",
    "            lambda x: dt.datetime(year=x.year, month=x.month, day=x.day))-combined.startdate\n",
    "    )\n",
    "]\n",
    "\n",
    "# convert the week number to an int by taking the floor\n",
    "combined.week = combined.week.progress_apply(lambda x: np.int16(np.floor(x))+1)# if not pd.isnull(x) else np.nan)\n",
    "\n",
    "# remove the start date\n",
    "combined = combined.drop(columns=['startdate'], errors='ignore')\n",
    "\n",
    "# reorder the columns\n",
    "cols = list(combined.columns)\n",
    "cols = cols[0:2] + ['week'] + cols[2:-1]\n",
    "combined = combined.reindex(columns=cols)\n",
    "\n",
    "combined.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Localize timestamps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:55:00.056246Z",
     "start_time": "2018-12-28T19:54:59.951047Z"
    }
   },
   "outputs": [],
   "source": [
    "# localize timestamps\n",
    "combined['dt_response'] = [\n",
    "    str(t.tz_localize('UTC'))\n",
    "    for t in combined.dt_response\n",
    "]\n",
    "\n",
    "combined.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set provenance and upload to Synapse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-28T19:55:21.790792Z",
     "start_time": "2018-12-28T19:55:00.061005Z"
    }
   },
   "outputs": [],
   "source": [
    "t = syn.delete(\n",
    "    syn.tableQuery('select * from syn17022426')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2018-12-28T19:54:37.763Z"
    }
   },
   "outputs": [],
   "source": [
    "final = syn.store(Table(\n",
    "    Schema(\n",
    "            name='Health Applications',\n",
    "            columns=as_table_columns(combined), \n",
    "            parent='syn10848316'),\n",
    "        combined\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2018-12-28T19:54:37.767Z"
    }
   },
   "outputs": [],
   "source": [
    "final = syn.setProvenance(\n",
    "    'syn17022426',\n",
    "    activity=Activity(\n",
    "        name='Combine V1 and V2 data',\n",
    "        description='Process and combine the data collected during study 1 and study 2',\n",
    "        used=['syn17023091', v1sid, v2sid, 'syn12181332', metasid],\n",
    "        executed=[\n",
    "            dict(\n",
    "                name='Curate_StudyAppDownloadReason_Data',\n",
    "                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Curate_StudyAppDownloadReason_Data.ipynb'\n",
    "            )\n",
    "        ]\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}