{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Created by: [SmirkyGraphs](https://smirkygraphs.github.io/). Code: [GitHub](https://github.com/SmirkyGraphs). Source: [RIDE](http://infoworks.ride.ri.gov/).\n",
    "<hr></hr>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cleaning RI Public Schools Testing Scores"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "using pandas to clean and combine multiple different datasets covering PRACC, SAT, NECAP, Graduation, Attendance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import glob\n",
    "import csv\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "necap_dict = csv.reader(open(\"./dicts/necap.csv\"))\n",
    "map_necap = dict(necap_dict)\n",
    "\n",
    "pracc_dict = csv.reader(open(\"./dicts/pracc.csv\"))\n",
    "map_pracc = dict(pracc_dict)\n",
    "\n",
    "middle_dict = csv.reader(open(\"./dicts/middle_schools.csv\"))\n",
    "middle_map = dict(middle_dict)\n",
    "\n",
    "region_dict = csv.reader(open(\"./dicts/region_map.csv\"))\n",
    "region_map = dict(region_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#NECAP Cleaning\n",
    "\n",
    "fp = './raw/NECAP'\n",
    "\n",
    "allFiles = glob.glob(fp + \"/*.csv\")\n",
    "frame = pd.DataFrame()\n",
    "data = []\n",
    "for csv in allFiles:\n",
    "    \n",
    "        df = pd.read_csv(csv, skiprows=3)\n",
    "        df['filename'] = os.path.basename(csv)\n",
    "        df['category'] = 'NECAP'\n",
    "        df = df.iloc[1:]\n",
    "        \n",
    "        # Unpivot the columns\n",
    "        x = ['District', 'School', 'category', 'filename']\n",
    "        df = pd.melt(df, id_vars=x, value_vars=df.columns.drop(x).tolist())\n",
    "        \n",
    "        df = df.rename(index=str, columns={'variable': 'year'})\n",
    "        df['year'] = df.year.str[13:-3]\n",
    "        \n",
    "        df = df.rename(index=str, columns={'filename': 'subject'})\n",
    "        df['subject'] = df.subject.str[7:-12]\n",
    "        \n",
    "        df['level'] = df.subject.map(map_necap)\n",
    "        \n",
    "        df = df.replace('no data', np.nan)\n",
    "        df = df.replace('too few data', np.nan)\n",
    "        df['value'] = df['value'].str.rstrip('%').astype('float') / 100.0\n",
    "        \n",
    "        df['rank'] = df.groupby(['subject', 'year'])['value'].rank(na_option='bottom', method='dense', ascending=0)\n",
    "        df['percentile'] = df.groupby(['subject', 'year'])['value'].rank(na_option='top', pct=True)\n",
    "        \n",
    "        data.append(df)\n",
    "frame = pd.concat(data)\n",
    "\n",
    "cols = ['District', 'School', 'level', 'category', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "frame = frame[cols]\n",
    "\n",
    "necap = frame\n",
    "necap.to_csv('./clean/necap_clean.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>District</th>\n",
       "      <th>School</th>\n",
       "      <th>level</th>\n",
       "      <th>category</th>\n",
       "      <th>subject</th>\n",
       "      <th>year</th>\n",
       "      <th>value</th>\n",
       "      <th>rank</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Barrington High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>NECAP</td>\n",
       "      <td>11th Grade Math</td>\n",
       "      <td>2008</td>\n",
       "      <td>0.71</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bristol Warren</td>\n",
       "      <td>Mt. Hope High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>NECAP</td>\n",
       "      <td>11th Grade Math</td>\n",
       "      <td>2008</td>\n",
       "      <td>0.40</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.827586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Burrillville</td>\n",
       "      <td>Burrillville High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>NECAP</td>\n",
       "      <td>11th Grade Math</td>\n",
       "      <td>2008</td>\n",
       "      <td>0.30</td>\n",
       "      <td>16.0</td>\n",
       "      <td>0.672414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Central Falls</td>\n",
       "      <td>Central Falls High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>NECAP</td>\n",
       "      <td>11th Grade Math</td>\n",
       "      <td>2008</td>\n",
       "      <td>0.04</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.206897</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Chariho</td>\n",
       "      <td>Chariho Regional High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>NECAP</td>\n",
       "      <td>11th Grade Math</td>\n",
       "      <td>2008</td>\n",
       "      <td>0.31</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.724138</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         District                        School        level category  \\\n",
       "0      Barrington        Barrington High School  High School    NECAP   \n",
       "1  Bristol Warren          Mt. Hope High School  High School    NECAP   \n",
       "2    Burrillville      Burrillville High School  High School    NECAP   \n",
       "3   Central Falls     Central Falls High School  High School    NECAP   \n",
       "4         Chariho  Chariho Regional High School  High School    NECAP   \n",
       "\n",
       "           subject  year  value  rank  percentile  \n",
       "0  11th Grade Math  2008   0.71   1.0    1.000000  \n",
       "1  11th Grade Math  2008   0.40   9.0    0.827586  \n",
       "2  11th Grade Math  2008   0.30  16.0    0.672414  \n",
       "3  11th Grade Math  2008   0.04  32.0    0.206897  \n",
       "4  11th Grade Math  2008   0.31  15.0    0.724138  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "necap.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#PRACC Cleaning\n",
    "\n",
    "fp = './raw/PRACC'\n",
    "\n",
    "allFiles = glob.glob(fp + \"/*.csv\")\n",
    "frame = pd.DataFrame()\n",
    "data = []\n",
    "for csv in allFiles:\n",
    "    \n",
    "        df = pd.read_csv(csv, skiprows=3)\n",
    "        df['filename'] = os.path.basename(csv)\n",
    "        df['category'] = 'PRACC'\n",
    "        df = df.iloc[1:]\n",
    "        \n",
    "        # Unpivot the columns\n",
    "        x = ['District', 'School', 'category', 'filename']\n",
    "        df = pd.melt(df, id_vars=x, value_vars=df.columns.drop(x).tolist())\n",
    "        \n",
    "        df = df.rename(index=str, columns={'variable': 'year'})\n",
    "        df['year'] = df.year.str[29:-3]\n",
    "        \n",
    "        df = df.rename(index=str, columns={'filename': 'subject'})\n",
    "        df['subject'] = df.subject.str[7:-12]\n",
    "        \n",
    "        df['level'] = df.subject.map(map_pracc)\n",
    "        df['level2'] = df.School.map(middle_map)        \n",
    "        df['level3'] = np.where((df['subject'] == 'Geometry Mathematics')\n",
    "                                & (df['level2'] == 'Middle School')\n",
    "                                | (df['subject'] == 'Algebra I Mathematics') \n",
    "                                & (df['level2'] == 'Middle School'),'Middle School', None)\n",
    "        \n",
    "        df.level3.fillna(df.level, inplace=True)\n",
    "        \n",
    "        df = df.drop(['level'], axis=1)\n",
    "        df = df.drop(['level2'], axis=1)\n",
    "        df = df.rename(index=str, columns={'level3': 'level'})\n",
    "\n",
    "        df = df.replace('no data', np.nan)\n",
    "        df = df.replace('too few data', np.nan)\n",
    "        df['value'] = df['value'].str.rstrip('%').astype('float') / 100.0\n",
    "        \n",
    "        df['rank'] = df.groupby(['subject', 'year'])['value'].rank(na_option='bottom', method='dense', ascending=0)\n",
    "        df['percentile'] = df.groupby(['subject', 'year'])['value'].rank(na_option='top', pct=True)\n",
    "        \n",
    "        data.append(df)\n",
    "frame = pd.concat(data)\n",
    "\n",
    "cols = ['District', 'School', 'level', 'category', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "frame = frame[cols]\n",
    "\n",
    "pracc = frame\n",
    "pracc.to_csv('./clean/pracc_clean.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>District</th>\n",
       "      <th>School</th>\n",
       "      <th>level</th>\n",
       "      <th>category</th>\n",
       "      <th>subject</th>\n",
       "      <th>year</th>\n",
       "      <th>value</th>\n",
       "      <th>rank</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Barrington High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>PRACC</td>\n",
       "      <td>10th Grade ELA_Literacy</td>\n",
       "      <td>2014</td>\n",
       "      <td>0.7510</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.983871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bristol Warren</td>\n",
       "      <td>Mt. Hope High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>PRACC</td>\n",
       "      <td>10th Grade ELA_Literacy</td>\n",
       "      <td>2014</td>\n",
       "      <td>0.2418</td>\n",
       "      <td>31.0</td>\n",
       "      <td>0.516129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Burrillville</td>\n",
       "      <td>Burrillville High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>PRACC</td>\n",
       "      <td>10th Grade ELA_Literacy</td>\n",
       "      <td>2014</td>\n",
       "      <td>0.2636</td>\n",
       "      <td>29.0</td>\n",
       "      <td>0.548387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Central Falls</td>\n",
       "      <td>Central Falls High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>PRACC</td>\n",
       "      <td>10th Grade ELA_Literacy</td>\n",
       "      <td>2014</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "      <td>0.096774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Chariho</td>\n",
       "      <td>Chariho Regional High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>PRACC</td>\n",
       "      <td>10th Grade ELA_Literacy</td>\n",
       "      <td>2014</td>\n",
       "      <td>0.4220</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.774194</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         District                        School        level category  \\\n",
       "0      Barrington        Barrington High School  High School    PRACC   \n",
       "1  Bristol Warren          Mt. Hope High School  High School    PRACC   \n",
       "2    Burrillville      Burrillville High School  High School    PRACC   \n",
       "3   Central Falls     Central Falls High School  High School    PRACC   \n",
       "4         Chariho  Chariho Regional High School  High School    PRACC   \n",
       "\n",
       "                   subject  year   value  rank  percentile  \n",
       "0  10th Grade ELA_Literacy  2014  0.7510   2.0    0.983871  \n",
       "1  10th Grade ELA_Literacy  2014  0.2418  31.0    0.516129  \n",
       "2  10th Grade ELA_Literacy  2014  0.2636  29.0    0.548387  \n",
       "3  10th Grade ELA_Literacy  2014     NaN  52.0    0.096774  \n",
       "4  10th Grade ELA_Literacy  2014  0.4220  15.0    0.774194  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pracc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#SAT Cleaning\n",
    "\n",
    "fp = './raw/SAT'\n",
    "\n",
    "allFiles = glob.glob(fp + \"/*.csv\")\n",
    "frame = pd.DataFrame()\n",
    "data = []\n",
    "for csv in allFiles:\n",
    "    \n",
    "        df = pd.read_csv(csv, skiprows=3)\n",
    "        df['filename'] = os.path.basename(csv)\n",
    "        df['category'] = 'SAT'\n",
    "        df = df.iloc[1:]\n",
    "        \n",
    "        df = df.replace('no data',np.nan)\n",
    "        df = df.replace('too few data',np.nan)\n",
    "        \n",
    "        col_list = ['Mathematics Average', 'Reading Average', 'Writing Average']\n",
    "        df['Total Average'] = df[col_list].astype(float).sum(axis=1)\n",
    "        \n",
    "        # Unpivot the columns\n",
    "        x = ['District', 'School', 'category', 'filename']\n",
    "        df = pd.melt(df, id_vars=x, value_vars=df.columns.drop(x).tolist())\n",
    "        \n",
    "        df = df.rename(index=str, columns={'variable': 'subject'})\n",
    "        df['subject'] = df.subject.str[:-7]\n",
    "        df['subject'] = 'SAT ' + df['subject'].astype(str) + 'Avg'\n",
    "        \n",
    "        df['level'] = 'High School'\n",
    "        \n",
    "        df = df.rename(index=str, columns={'filename': 'year'})\n",
    "        df['year'] = df.year.str[:-9]\n",
    "        \n",
    "        df['value'] = df['value'].astype('float')\n",
    "        df['rank'] = df.groupby(['subject', 'year'])['value'].rank(na_option='bottom', method='dense', ascending=0)\n",
    "        df['percentile'] = df.groupby(['subject', 'year'])['value'].rank(na_option='top', pct=True)\n",
    "        \n",
    "        \n",
    "        data.append(df)\n",
    "frame = pd.concat(data)\n",
    "\n",
    "cols = ['District', 'School', 'level', 'category', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "frame = frame[cols]\n",
    "\n",
    "sat = frame\n",
    "sat.to_csv('./clean/sat_clean.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>District</th>\n",
       "      <th>School</th>\n",
       "      <th>level</th>\n",
       "      <th>category</th>\n",
       "      <th>subject</th>\n",
       "      <th>year</th>\n",
       "      <th>value</th>\n",
       "      <th>rank</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Barrington High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>SAT</td>\n",
       "      <td>SAT Mathematics Avg</td>\n",
       "      <td>2010</td>\n",
       "      <td>576.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.982456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bristol Warren</td>\n",
       "      <td>Mt. Hope High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>SAT</td>\n",
       "      <td>SAT Mathematics Avg</td>\n",
       "      <td>2010</td>\n",
       "      <td>484.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.622807</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Burrillville</td>\n",
       "      <td>Burrillville High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>SAT</td>\n",
       "      <td>SAT Mathematics Avg</td>\n",
       "      <td>2010</td>\n",
       "      <td>488.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.649123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Central Falls</td>\n",
       "      <td>Central Falls High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>SAT</td>\n",
       "      <td>SAT Mathematics Avg</td>\n",
       "      <td>2010</td>\n",
       "      <td>374.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.263158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Chariho</td>\n",
       "      <td>Chariho Regional High School</td>\n",
       "      <td>High School</td>\n",
       "      <td>SAT</td>\n",
       "      <td>SAT Mathematics Avg</td>\n",
       "      <td>2010</td>\n",
       "      <td>520.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.842105</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         District                        School        level category  \\\n",
       "0      Barrington        Barrington High School  High School      SAT   \n",
       "1  Bristol Warren          Mt. Hope High School  High School      SAT   \n",
       "2    Burrillville      Burrillville High School  High School      SAT   \n",
       "3   Central Falls     Central Falls High School  High School      SAT   \n",
       "4         Chariho  Chariho Regional High School  High School      SAT   \n",
       "\n",
       "               subject   year  value  rank  percentile  \n",
       "0  SAT Mathematics Avg  2010   576.0   2.0    0.982456  \n",
       "1  SAT Mathematics Avg  2010   484.0  19.0    0.622807  \n",
       "2  SAT Mathematics Avg  2010   488.0  18.0    0.649123  \n",
       "3  SAT Mathematics Avg  2010   374.0  39.0    0.263158  \n",
       "4  SAT Mathematics Avg  2010   520.0   9.0    0.842105  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sat.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Graduation Rate\n",
    "\n",
    "fp = './raw/Graduation Rate'\n",
    "\n",
    "allFiles = glob.glob(fp + \"/*.csv\")\n",
    "frame = pd.DataFrame()\n",
    "data = []\n",
    "for csv in allFiles:\n",
    "    \n",
    "        df = pd.read_csv(csv, skiprows=4)\n",
    "        df['filename'] = os.path.basename(csv)\n",
    "        df['category'] = 'GRAD'\n",
    "        df = df.iloc[1:, [0,1,2,6,7]]\n",
    "        \n",
    "        # Unpivot the columns\n",
    "        x = ['District', 'School', 'category', 'filename']\n",
    "        df = pd.melt(df, id_vars=x, value_vars=df.columns.drop(x).tolist())\n",
    "        \n",
    "        df = df.rename(index=str, columns={'variable': 'subject'})\n",
    "        df['subject'] = 'Pct. Graduated 4 Yrs'\n",
    "        \n",
    "        df['level'] = 'High School'\n",
    "        \n",
    "        df = df.rename(index=str, columns={'filename': 'year'})\n",
    "        df['year'] = df.year.str[0:4]\n",
    "        \n",
    "        df = df.replace('no data', np.nan)\n",
    "        df = df.replace('too few data', np.nan)\n",
    "        df['value'] = df['value'].str.rstrip('%').astype('float') / 100.0\n",
    "        \n",
    "        df['rank'] = df.groupby(['subject', 'year'])['value'].rank(na_option='bottom', method='dense', ascending=0)\n",
    "        df['percentile'] = df.groupby(['subject', 'year'])['value'].rank(na_option='top', pct=True)\n",
    "        \n",
    "        \n",
    "        data.append(df)\n",
    "frame = pd.concat(data)\n",
    "\n",
    "cols = ['District', 'School', 'category', 'level', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "frame = frame[cols]\n",
    "\n",
    "grad = frame\n",
    "grad.to_csv('./clean/grad_clean.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>District</th>\n",
       "      <th>School</th>\n",
       "      <th>category</th>\n",
       "      <th>level</th>\n",
       "      <th>subject</th>\n",
       "      <th>year</th>\n",
       "      <th>value</th>\n",
       "      <th>rank</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Barrington High School</td>\n",
       "      <td>GRAD</td>\n",
       "      <td>High School</td>\n",
       "      <td>Pct. Graduated 4 Yrs</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.966</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.982456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bristol Warren</td>\n",
       "      <td>Mt. Hope High School</td>\n",
       "      <td>GRAD</td>\n",
       "      <td>High School</td>\n",
       "      <td>Pct. Graduated 4 Yrs</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.881</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.754386</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Burrillville</td>\n",
       "      <td>Burrillville High School</td>\n",
       "      <td>GRAD</td>\n",
       "      <td>High School</td>\n",
       "      <td>Pct. Graduated 4 Yrs</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.868</td>\n",
       "      <td>16.0</td>\n",
       "      <td>0.719298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Central Falls</td>\n",
       "      <td>Central Falls High School</td>\n",
       "      <td>GRAD</td>\n",
       "      <td>High School</td>\n",
       "      <td>Pct. Graduated 4 Yrs</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.708</td>\n",
       "      <td>37.0</td>\n",
       "      <td>0.315789</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Chariho</td>\n",
       "      <td>Chariho Regional High School</td>\n",
       "      <td>GRAD</td>\n",
       "      <td>High School</td>\n",
       "      <td>Pct. Graduated 4 Yrs</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.893</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.824561</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         District                        School category        level  \\\n",
       "0      Barrington        Barrington High School     GRAD  High School   \n",
       "1  Bristol Warren          Mt. Hope High School     GRAD  High School   \n",
       "2    Burrillville      Burrillville High School     GRAD  High School   \n",
       "3   Central Falls     Central Falls High School     GRAD  High School   \n",
       "4         Chariho  Chariho Regional High School     GRAD  High School   \n",
       "\n",
       "                subject  year  value  rank  percentile  \n",
       "0  Pct. Graduated 4 Yrs  2010  0.966   2.0    0.982456  \n",
       "1  Pct. Graduated 4 Yrs  2010  0.881  14.0    0.754386  \n",
       "2  Pct. Graduated 4 Yrs  2010  0.868  16.0    0.719298  \n",
       "3  Pct. Graduated 4 Yrs  2010  0.708  37.0    0.315789  \n",
       "4  Pct. Graduated 4 Yrs  2010  0.893  10.0    0.824561  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grad.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Attendance\n",
    "\n",
    "fp = './raw/Attendance'\n",
    "\n",
    "allFiles = glob.glob(fp + \"/*.csv\")\n",
    "frame = pd.DataFrame()\n",
    "data = []\n",
    "for csv in allFiles:\n",
    "    \n",
    "        df = pd.read_csv(csv, skiprows=4)\n",
    "        df['filename'] = os.path.basename(csv)\n",
    "        df['category'] = 'Attend'\n",
    "        df = df.iloc[1:]\n",
    "        \n",
    "        # Unpivot the columns\n",
    "        x = ['District', 'School', 'category', 'filename']\n",
    "        df = pd.melt(df, id_vars=x, value_vars=df.columns.drop(x).tolist())\n",
    "        \n",
    "        df = df.rename(index=str, columns={'variable': 'subject'})\n",
    "        df['subject'] = 'Attendance Rate'\n",
    "        \n",
    "        df['level'] = df.filename.str[-7:-5]\n",
    "        df = df.replace('ES', 'Elementary School')\n",
    "        df = df.replace('MS', 'Middle School')\n",
    "        df = df.replace('HS', 'High School')\n",
    "        \n",
    "        df = df.rename(index=str, columns={'filename': 'year'})\n",
    "        df['year'] = df.year.str[0:4]\n",
    "        \n",
    "        df = df.replace('no data', np.nan)\n",
    "        df = df.replace('too few data', np.nan)\n",
    "        df['value'] = df['value'].str.rstrip('%').astype('float') / 100.0\n",
    "        \n",
    "        df['rank'] = df.groupby(['subject', 'year'])['value'].rank(na_option='bottom', method='dense', ascending=0)\n",
    "        df['percentile'] = df.groupby(['subject', 'year'])['value'].rank(na_option='top', pct=True)\n",
    "        \n",
    "        \n",
    "        data.append(df)\n",
    "frame = pd.concat(data)\n",
    "\n",
    "cols = ['District', 'School', 'level', 'category', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "frame = frame[cols]\n",
    "\n",
    "attend = frame\n",
    "attend.to_csv('./clean/attend_clean.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>District</th>\n",
       "      <th>School</th>\n",
       "      <th>level</th>\n",
       "      <th>category</th>\n",
       "      <th>subject</th>\n",
       "      <th>year</th>\n",
       "      <th>value</th>\n",
       "      <th>rank</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Hampden Meadows School</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9659</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.887179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Nayatt School</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9585</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.712821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Primrose Hill School</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9620</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.810256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Sowams Elementary School</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9638</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.861538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Bristol Warren</td>\n",
       "      <td>Colt Andrews School</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9494</td>\n",
       "      <td>92.0</td>\n",
       "      <td>0.425641</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         District                    School              level category  \\\n",
       "0      Barrington    Hampden Meadows School  Elementary School   Attend   \n",
       "1      Barrington             Nayatt School  Elementary School   Attend   \n",
       "2      Barrington      Primrose Hill School  Elementary School   Attend   \n",
       "3      Barrington  Sowams Elementary School  Elementary School   Attend   \n",
       "4  Bristol Warren       Colt Andrews School  Elementary School   Attend   \n",
       "\n",
       "           subject  year   value  rank  percentile  \n",
       "0  Attendance Rate  2010  0.9659  21.0    0.887179  \n",
       "1  Attendance Rate  2010  0.9585  50.0    0.712821  \n",
       "2  Attendance Rate  2010  0.9620  36.0    0.810256  \n",
       "3  Attendance Rate  2010  0.9638  26.0    0.861538  \n",
       "4  Attendance Rate  2010  0.9494  92.0    0.425641  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "attend.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Chronic Absence\n",
    "\n",
    "fp = './raw/Chronic Absenteeism'\n",
    "\n",
    "allFiles = glob.glob(fp + \"/*.csv\")\n",
    "frame = pd.DataFrame()\n",
    "data = []\n",
    "for csv in allFiles:\n",
    "    \n",
    "        df = pd.read_csv(csv, skiprows=4)\n",
    "        df['filename'] = os.path.basename(csv)\n",
    "        df['category'] = 'Chronic'\n",
    "        df = df.iloc[1:]\n",
    "        \n",
    "        # Unpivot the columns\n",
    "        x = ['District', 'School', 'category', 'filename']\n",
    "        df = pd.melt(df, id_vars=x, value_vars=df.columns.drop(x).tolist())\n",
    "        \n",
    "        df = df.rename(index=str, columns={'variable': 'subject'})\n",
    "        df['subject'] = 'Chronic Absence'\n",
    "        \n",
    "        df['level'] = df.filename.str[-7:-5]\n",
    "        df = df.replace('ES', 'Elementary School')\n",
    "        df = df.replace('MS', 'Middle School')\n",
    "        df = df.replace('HS', 'High School')\n",
    "        \n",
    "        df = df.rename(index=str, columns={'filename': 'year'})\n",
    "        df['year'] = df.year.str[0:4]\n",
    "        \n",
    "        df = df.replace('no data', np.nan)\n",
    "        df = df.replace('too few data', np.nan)\n",
    "        df['value'] = df['value'].str.rstrip('%').astype('float') / 100.0\n",
    "        \n",
    "        df['rank'] = df.groupby(['subject', 'year'])['value'].rank(na_option='bottom', method='dense', ascending=0)\n",
    "        df['percentile'] = df.groupby(['subject', 'year'])['value'].rank(na_option='top', pct=True)\n",
    "        \n",
    "        \n",
    "        data.append(df)\n",
    "frame = pd.concat(data)\n",
    "\n",
    "cols = ['District', 'School', 'category', 'level', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "frame = frame[cols]\n",
    "\n",
    "chronic = frame\n",
    "chronic.to_csv('./clean/chronic_clean.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>District</th>\n",
       "      <th>School</th>\n",
       "      <th>category</th>\n",
       "      <th>level</th>\n",
       "      <th>subject</th>\n",
       "      <th>year</th>\n",
       "      <th>value</th>\n",
       "      <th>rank</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Hampden Meadows School</td>\n",
       "      <td>Chronic</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Chronic Absence</td>\n",
       "      <td>2012</td>\n",
       "      <td>0.0175</td>\n",
       "      <td>177.0</td>\n",
       "      <td>0.056995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Nayatt School</td>\n",
       "      <td>Chronic</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Chronic Absence</td>\n",
       "      <td>2012</td>\n",
       "      <td>0.0746</td>\n",
       "      <td>126.0</td>\n",
       "      <td>0.341969</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Primrose Hill School</td>\n",
       "      <td>Chronic</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Chronic Absence</td>\n",
       "      <td>2012</td>\n",
       "      <td>0.0785</td>\n",
       "      <td>121.0</td>\n",
       "      <td>0.367876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Sowams Elementary School</td>\n",
       "      <td>Chronic</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Chronic Absence</td>\n",
       "      <td>2012</td>\n",
       "      <td>0.0560</td>\n",
       "      <td>144.0</td>\n",
       "      <td>0.243523</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Bristol Warren</td>\n",
       "      <td>Colt Andrews School</td>\n",
       "      <td>Chronic</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Chronic Absence</td>\n",
       "      <td>2012</td>\n",
       "      <td>0.1162</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.616580</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         District                    School category              level  \\\n",
       "0      Barrington    Hampden Meadows School  Chronic  Elementary School   \n",
       "1      Barrington             Nayatt School  Chronic  Elementary School   \n",
       "2      Barrington      Primrose Hill School  Chronic  Elementary School   \n",
       "3      Barrington  Sowams Elementary School  Chronic  Elementary School   \n",
       "4  Bristol Warren       Colt Andrews School  Chronic  Elementary School   \n",
       "\n",
       "           subject  year   value   rank  percentile  \n",
       "0  Chronic Absence  2012  0.0175  177.0    0.056995  \n",
       "1  Chronic Absence  2012  0.0746  126.0    0.341969  \n",
       "2  Chronic Absence  2012  0.0785  121.0    0.367876  \n",
       "3  Chronic Absence  2012  0.0560  144.0    0.243523  \n",
       "4  Chronic Absence  2012  0.1162   73.0    0.616580  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chronic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "school_region_map =  {\n",
    "    'Academy for Career Exploration' : 'Charter', \n",
    "    \"NE Laborers'/Cranston Public Schools Construction Career Academy\" : 'Charter',\n",
    "    'Times2 Academy' : 'Charter'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = region_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Combine ALl\n",
    "\n",
    "fp = './clean/'\n",
    "\n",
    "allFiles = glob.glob(fp + \"/*.csv\")\n",
    "frame = pd.DataFrame()\n",
    "data = []\n",
    "for csv in allFiles:\n",
    "    \n",
    "        df = pd.read_csv(csv)     \n",
    "        \n",
    "        data.append(df)\n",
    "frame = pd.concat(data, sort=True)\n",
    "\n",
    "cols = ['District', 'School', 'category', 'level', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "frame = frame[cols]\n",
    "\n",
    "final = frame\n",
    "\n",
    "final['region'] = final.District.map(region_map)\n",
    "final['region2'] = final.School.map(school_region_map)\n",
    "\n",
    "final.region2.fillna(final.region, inplace=True)\n",
    "\n",
    "final = final.drop(['region'], axis=1)\n",
    "\n",
    "final['subject'] = final['subject'].str.replace('Mathematics', 'Math')\n",
    "final['subject'] = final['subject'].str.replace('ELA_Literacy', 'English')\n",
    "\n",
    "final = final.rename(columns={'region2': 'region'})\n",
    "\n",
    "final = final[final.District != 'Regional Collaborative']\n",
    "\n",
    "cols = ['District', 'region', 'School', 'category', 'level', 'subject', 'year', 'value', 'rank', 'percentile']\n",
    "final = final[cols]\n",
    "\n",
    "final.to_csv('./clean/final_clean.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>District</th>\n",
       "      <th>region</th>\n",
       "      <th>School</th>\n",
       "      <th>category</th>\n",
       "      <th>level</th>\n",
       "      <th>subject</th>\n",
       "      <th>year</th>\n",
       "      <th>value</th>\n",
       "      <th>rank</th>\n",
       "      <th>percentile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Suburban</td>\n",
       "      <td>Hampden Meadows School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9659</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.887179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Suburban</td>\n",
       "      <td>Nayatt School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9585</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.712821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Suburban</td>\n",
       "      <td>Primrose Hill School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9620</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.810256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Barrington</td>\n",
       "      <td>Suburban</td>\n",
       "      <td>Sowams Elementary School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9638</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.861538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Bristol Warren</td>\n",
       "      <td>Regional</td>\n",
       "      <td>Colt Andrews School</td>\n",
       "      <td>Attend</td>\n",
       "      <td>Elementary School</td>\n",
       "      <td>Attendance Rate</td>\n",
       "      <td>2010</td>\n",
       "      <td>0.9494</td>\n",
       "      <td>92.0</td>\n",
       "      <td>0.425641</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         District    region                    School category  \\\n",
       "0      Barrington  Suburban    Hampden Meadows School   Attend   \n",
       "1      Barrington  Suburban             Nayatt School   Attend   \n",
       "2      Barrington  Suburban      Primrose Hill School   Attend   \n",
       "3      Barrington  Suburban  Sowams Elementary School   Attend   \n",
       "4  Bristol Warren  Regional       Colt Andrews School   Attend   \n",
       "\n",
       "               level          subject  year   value  rank  percentile  \n",
       "0  Elementary School  Attendance Rate  2010  0.9659  21.0    0.887179  \n",
       "1  Elementary School  Attendance Rate  2010  0.9585  50.0    0.712821  \n",
       "2  Elementary School  Attendance Rate  2010  0.9620  36.0    0.810256  \n",
       "3  Elementary School  Attendance Rate  2010  0.9638  26.0    0.861538  \n",
       "4  Elementary School  Attendance Rate  2010  0.9494  92.0    0.425641  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}