{ "cells": [ { "cell_type": "code", "execution_count": 172, "metadata": {}, "outputs": [], "source": [ "#the tools we need\n", "import requests\n", "import pandas as pd\n", "import numpy as np\n", "import pysal\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{\"group_ak4kk32/Colour\":\"brown\",\"_notes\":[],\"group_mo76v47/Total_bulk_finds\":\"5\",\"_bamboo_dataset_id\":\"\",\"_tags\":[],\"group_ak4kk32/Soil_Composition_Percentage_Clay\":\"20\",\"group_bu1ca84_row_1/group_bu1ca84_row_1_column\":\"Few\",\"group_ye2st39/Filled_out_by\":\"edward_stewart\",\"group_bu1ca84_row_2/group_bu1ca84_row_2_column_1\":\"No Tufo was found\",\"group_bu1ca84_row_2/group_bu1ca84_row_2_column\":\"None\",\"group_mf1hs18/group_qy7um66/Photo_models\":\"no\",\"group_mf1hs18/Position\":\"all around the trench\",\"_xform_id_string\":\"aFQDtwfYLpWjyYb6Uc5nMp\",\"group_ye2st39/SU_Closed\":\"no\",\"group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU2\":\"99999\",\"group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU1\":\"9999\",\"group_mf1hs18/Formation_Process\":\"accumulation\",\"group_ak4kk32/Soil_Composition_Percentage_Silt\":\"40\",\"group_ak4kk32/Matrix\":\"cohesive\",\"group_mf1hs18/Approximate_date_of_SU\":\"21st C AD\",\"end\":\"2019-06-27T04:06:58.479-04:00\",\"group_qu1tl04/Eastern_limit\":\"excavation_lim\",\"group_mf1hs18/g\n" ] } ], "source": [ "#fetch new SU data from kobotoolbox via the old API. Probably this will change when they finish the new API\n", "url = \"https://kc.kobotoolbox.org/api/v1/data/300075\"\n", "\n", "headers = {\n", " 'content-type': \"multipart/form-data\",\n", " 'Authorization': \"Token 0239eed59b53ae940ca8835302b6ad360c8fc540\",\n", " }\n", "\n", "#querystring = {\"group_mf1hs18/Stratigraphic_Reliability\":\"poor\"}\n", "kobo = requests.get(url, headers=headers)\n", "#response_json = response.json()\n", "#print(response.text)\n", "#print first 4000 characters of response\n", "print(kobo.text[:1000])\n", "#print(response_json)" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [], "source": [ "#optional step: back up new data to a local file. \n", "new_file=open(\"C:\\\\temp\\\\kobo.json\",mode=\"w\",encoding=\"utf-8\")\n", "new_file.write(kobo.text)\n", "new_file.close()" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AreaDefinitionExcavation_YearFormation_ProcessPublishing_StatusSU_001SU_TypeStratigraphic_Reliability__version___attachments_bamboo_dataset_id_geolocation_id_notes_status_submission_time_submitted_by_tags_uuid_validation_status_xform_id_stringendformhub/uuidgroup_aa6pz93/Alignmentgroup_aa6pz93/Binding_Agentgroup_aa6pz93/Building_Techniquegroup_aa6pz93/Dimensionsgroup_aa6pz93/Floor_Typegroup_aa6pz93/Foundationsgroup_aa6pz93/Structure_Descriptiongroup_aa6pz93/Wall_Facinggroup_aa6pz93/Wall_Finishinggroup_ak4kk32/Colourgroup_ak4kk32/Compactiongroup_ak4kk32/Matrixgroup_ak4kk32/Soil_Composition_Percentage_Claygroup_ak4kk32/Soil_Composition_Percentage_Sandgroup_ak4kk32/Soil_Composition_Percentage_Siltgroup_bu1ca84_row/group_bu1ca84_row_columngroup_bu1ca84_row/group_bu1ca84_row_column_1group_bu1ca84_row_1/group_bu1ca84_row_1_columngroup_bu1ca84_row_1/group_bu1ca84_row_1_column_1group_bu1ca84_row_2/group_bu1ca84_row_2_columngroup_bu1ca84_row_2/group_bu1ca84_row_2_column_1group_cp91a70/Inclusion_Observationsgroup_cp91a70/Interfacegroup_cp91a70/Surfacegroup_cp91a70/Thickness_Observationsgroup_dn7bg68group_ey32a12/Observationsgroup_ey32a12/Position_001group_ey32a12/Shapegroup_inclusionsgroup_iz3xs59group_lg3yr63/Non_Soil_Samplinggroup_lg3yr63/Sievinggroup_lg3yr63/Soil_Samplinggroup_matrixgroup_mf1hs18/Approximate_date_of_SUgroup_mf1hs18/Areagroup_mf1hs18/Chronological_Periodgroup_mf1hs18/Date_of_Layer_observationsgroup_mf1hs18/Definitiongroup_mf1hs18/End_of_Use_Phasegroup_mf1hs18/Excavation_Yeargroup_mf1hs18/Formation_Phasegroup_mf1hs18/Formation_Processgroup_mf1hs18/Layer_distinguished_bygroup_mf1hs18/Loss_phasegroup_mf1hs18/Positiongroup_mf1hs18/SU_Typegroup_mf1hs18/Stratigraphic_Reliabilitygroup_mf1hs18/group_qy7um66/Photo_modelsgroup_mf1hs18/group_qy7um66/Photosgroup_mo76v47/Finds_Observationsgroup_mo76v47/Finds_Storage_Notesgroup_mo76v47/Total_bulk_findsgroup_mo76v47/Total_bulk_finds_weightgroup_mo76v47/group_ht7hf81group_mo76v47/group_jr3of51group_mo76v47/group_lq6ss10group_qu1tl04/Depth_Limitgroup_qu1tl04/Eastern_limitgroup_qu1tl04/Northern_Limitgroup_qu1tl04/Southern_Limitgroup_qu1tl04/Unit_Limit_Notesgroup_qu1tl04/Western_Limitgroup_sb6os40_currentrelate/group_sb6os40_currentrelate_Harris_relationshipgroup_sb6os40_currentrelate/group_sb6os40_currentrelate_SU1group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU2group_xg9ph94group_ye2st39/Date_of_Finds_Studygroup_ye2st39/Edited_bygroup_ye2st39/Edited_ongroup_ye2st39/Filled_out_bygroup_ye2st39/Filled_out_ongroup_ye2st39/Finds_Studied_bygroup_ye2st39/SU_Closedgroup_yp0fv41/Cut_bottomgroup_yp0fv41/Cut_bottom_edgegroup_yp0fv41/Cut_edgesgroup_yp0fv41/Cut_observationsgroup_yp0fv41/Cut_sidesgroup_yp0fv41/Cut_top_edgemeta/deprecatedIDmeta/instanceIDstarttodayusername
0area_iA test SU for the purpose of this form's creat...2019.0intentional_debeta9999anthropogenicgoodvASoWNKpGrMiWMYtnspLZ9[][None, None]30823853[]submitted_via_web2019-06-23 19:29:45NaN[]4f0fd312-ecc5-49a5-93a2-fe3a668d3c97{'by_whom': 'eddiecharlesstewart', 'timestamp'...aFQDtwfYLpWjyYb6Uc5nMp2019-06-27T04:06:58.479-04:0043a407dc130741098c714bd8b8620040NaNNaNNaNNaNNaNNaNNaNNaNNaNbrownfriablecohesive20.040.040.0Manymany fragments of terra Sigilata were uncovere...FewA few tile fragments were uncoveredNoneNo Tufo was foundNaNcommingledgrassyNaNNaNit is grassy, excavated with steel toe capped ...NaNNaN[{'group_inclusions/inclusionsfreqency': 'medi...NaNnonono[{'group_matrix/harrisrelationship': 'covers',...21st C ADarea_imodernThe grass is probably not that oldGrassNaN2019.0NaNaccumulationcolour compositionNaNall around the trenchnaturalpoornonoLots of change was found hereIt is now in my pocket5.05.0[{'group_mo76v47/group_ht7hf81/Details': '5x 1...[{'group_mo76v47/group_jr3of51/Spot_Dates': '2...[{'group_mo76v47/group_lq6ss10/Bases': '1', 'g...originalexcavation_limexcavation_limexcavation_limNaNexcavation_limcovers9999.099999.0[{'group_xg9ph94/Interpretations': 'This SU is...NaNNaNNaNedward_stewart2019-06-27NaNnoNaNNaNNaNNaNNaNNaNuuid:3214a397-fc61-4eb7-b9c0-e33931ed6995uuid:4f0fd312-ecc5-49a5-93a2-fe3a668d3c972019-06-23T20:24:52.369+01:00NaNNaN
1NaNNaNNaNNaNNaN9999NaNNaNvT5AVpWUbRN4hXkW6KpKek[][None, None]31149181[]submitted_via_web2019-06-27 08:23:34NaN[]37aae4fb-30e8-4adb-baff-c4b732fd8d5f{'by_whom': 'eddiecharlesstewart', 'timestamp'...aFQDtwfYLpWjyYb6Uc5nMp2019-06-27T03:53:46.686-04:0043a407dc130741098c714bd8b8620040NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN[{'group_mo76v47/group_jr3of51/Spot_Dates': '2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNuuid:37aae4fb-30e8-4adb-baff-c4b732fd8d5f2019-06-27T03:52:51.969-04:00NaNNaN
2NaNNaNNaNNaNbeta9998NaNNaNvjuQCNvQx2tVSXENUzPoHx[][None, None]31152598[]submitted_via_web2019-06-27 09:13:16NaN[]c77bcd48-d876-4b17-b33e-7f1f9947192b{'by_whom': 'eddiecharlesstewart', 'timestamp'...aFQDtwfYLpWjyYb6Uc5nMp2019-06-27T05:32:53.854-04:0043a407dc130741098c714bd8b8620040NaNNaNNaNNaNNaNNaNNaNNaNNaNgreyfriablecohesive50.030.020.0NaNNaNNaNNaNNaNNaNlots of broken house thingssharplumpyquite hefty[{'group_dn7bg68/Insert_link_to_connected_form...Excavated with a toothpickcentre of house structureround in plan[{'group_inclusions/inclusionsfreqency': 'freq...NaNyesyesyes[{'group_matrix/harrisrelationship': 'abuts', ...2019area_ijulio_claudian02/12/1997A large deposit of rubblephase_a_32019.0phase_a_1collapsecompaction compositionphase_a_4ain centre of house structureanthropogenicfairyesyeslots of broken house stuffstored in a garden shed in michigan27.0348.0[{'group_mo76v47/group_ht7hf81/Details': 'arch...NaN[{'group_mo76v47/group_lq6ss10/Maximum_Size_in...originaloriginaloriginaloriginalSU is cut to the west by a quarrying cutnot_originalNaNNaNNaN[{'group_xg9ph94/Interpretations': 'A layer fo...2019-06-19abe_thompson2019-06-26abigail_trowbr2019-06-03aida_aliyesNaNNaNNaNNaNNaNNaNuuid:5bbece0f-a5f5-4998-8ef2-31ec22e99117uuid:c77bcd48-d876-4b17-b33e-7f1f9947192b2019-06-27T04:57:19.848-04:00NaNNaN
3NaNNaNNaNNaNbeta1314NaNNaNvjuQCNvQx2tVSXENUzPoHx[][None, None]31154436[]submitted_via_web2019-06-27 09:52:48NaN[]56908e9d-bc9f-47f0-989a-d98de51813d0{'by_whom': 'eddiecharlesstewart', 'timestamp'...aFQDtwfYLpWjyYb6Uc5nMp2019-06-27T05:50:50.334-04:0043a407dc130741098c714bd8b8620040NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNsharpNaNNaNNaNThere was a strong gale and heavy rain during ...Near the large well shaftelephant shaped cutNaNNaNnonono[{'group_matrix/harrisrelationship': 'cuts', '...1066ADarea_imedieval_perioThis date is based off of the finding of a spo...A spoliation cutphase_a_32018.0phase_a_1cuttingcompaction compositionphase_a_4cNext to a wall in the west of area IanthropogenicpoornoyesNaNNaNNaNNaNNaN[{'group_mo76v47/group_jr3of51/Spot_Dates': 'c...NaNoriginaloriginaloriginaloriginalNaNoriginalNaNNaNNaN[{'group_xg9ph94/Interpretations': 'It is a la...2019-06-17abigail_trowbr2019-06-17david_yelsey2019-06-25evelyn_adkinsnoconcavesharproundedNaNconcavesharpNaNuuid:56908e9d-bc9f-47f0-989a-d98de51813d02019-06-27T05:33:50.763-04:00NaNNaN
4NaNNaNNaNNaNNaN9999NaNNaNvPzHY9gqVnLowgERod6kw8[{'mimetype': 'image/jpeg', 'download_url': 'h...[None, None]31482851[]submitted_via_web2019-07-01 14:09:57NaN[]b67f09c8-ecaa-42a9-a06b-3ebfb780f760{'by_whom': 'eddiecharlesstewart', 'timestamp'...aFQDtwfYLpWjyYb6Uc5nMp2019-07-01T15:09:48.963+01:0043a407dc130741098c714bd8b8620040NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN[{'group_iz3xs59/Insert_a_photo': 'DSC_1141-15...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNuuid:b67f09c8-ecaa-42a9-a06b-3ebfb780f7602019-07-01T15:02:30.493+01:002019-07-01username not found
\n", "
" ], "text/plain": [ " Area Definition Excavation_Year \\\n", "0 area_i A test SU for the purpose of this form's creat... 2019.0 \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " Formation_Process Publishing_Status SU_001 SU_Type \\\n", "0 intentional_de beta 9999 anthropogenic \n", "1 NaN NaN 9999 NaN \n", "2 NaN beta 9998 NaN \n", "3 NaN beta 1314 NaN \n", "4 NaN NaN 9999 NaN \n", "\n", " Stratigraphic_Reliability __version__ \\\n", "0 good vASoWNKpGrMiWMYtnspLZ9 \n", "1 NaN vT5AVpWUbRN4hXkW6KpKek \n", "2 NaN vjuQCNvQx2tVSXENUzPoHx \n", "3 NaN vjuQCNvQx2tVSXENUzPoHx \n", "4 NaN vPzHY9gqVnLowgERod6kw8 \n", "\n", " _attachments _bamboo_dataset_id \\\n", "0 [] \n", "1 [] \n", "2 [] \n", "3 [] \n", "4 [{'mimetype': 'image/jpeg', 'download_url': 'h... \n", "\n", " _geolocation _id _notes _status _submission_time \\\n", "0 [None, None] 30823853 [] submitted_via_web 2019-06-23 19:29:45 \n", "1 [None, None] 31149181 [] submitted_via_web 2019-06-27 08:23:34 \n", "2 [None, None] 31152598 [] submitted_via_web 2019-06-27 09:13:16 \n", "3 [None, None] 31154436 [] submitted_via_web 2019-06-27 09:52:48 \n", "4 [None, None] 31482851 [] submitted_via_web 2019-07-01 14:09:57 \n", "\n", " _submitted_by _tags _uuid \\\n", "0 NaN [] 4f0fd312-ecc5-49a5-93a2-fe3a668d3c97 \n", "1 NaN [] 37aae4fb-30e8-4adb-baff-c4b732fd8d5f \n", "2 NaN [] c77bcd48-d876-4b17-b33e-7f1f9947192b \n", "3 NaN [] 56908e9d-bc9f-47f0-989a-d98de51813d0 \n", "4 NaN [] b67f09c8-ecaa-42a9-a06b-3ebfb780f760 \n", "\n", " _validation_status _xform_id_string \\\n", "0 {'by_whom': 'eddiecharlesstewart', 'timestamp'... aFQDtwfYLpWjyYb6Uc5nMp \n", "1 {'by_whom': 'eddiecharlesstewart', 'timestamp'... aFQDtwfYLpWjyYb6Uc5nMp \n", "2 {'by_whom': 'eddiecharlesstewart', 'timestamp'... aFQDtwfYLpWjyYb6Uc5nMp \n", "3 {'by_whom': 'eddiecharlesstewart', 'timestamp'... aFQDtwfYLpWjyYb6Uc5nMp \n", "4 {'by_whom': 'eddiecharlesstewart', 'timestamp'... aFQDtwfYLpWjyYb6Uc5nMp \n", "\n", " end formhub/uuid \\\n", "0 2019-06-27T04:06:58.479-04:00 43a407dc130741098c714bd8b8620040 \n", "1 2019-06-27T03:53:46.686-04:00 43a407dc130741098c714bd8b8620040 \n", "2 2019-06-27T05:32:53.854-04:00 43a407dc130741098c714bd8b8620040 \n", "3 2019-06-27T05:50:50.334-04:00 43a407dc130741098c714bd8b8620040 \n", "4 2019-07-01T15:09:48.963+01:00 43a407dc130741098c714bd8b8620040 \n", "\n", " group_aa6pz93/Alignment group_aa6pz93/Binding_Agent \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_aa6pz93/Building_Technique group_aa6pz93/Dimensions \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_aa6pz93/Floor_Type group_aa6pz93/Foundations \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_aa6pz93/Structure_Description group_aa6pz93/Wall_Facing \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_aa6pz93/Wall_Finishing group_ak4kk32/Colour group_ak4kk32/Compaction \\\n", "0 NaN brown friable \n", "1 NaN NaN NaN \n", "2 NaN grey friable \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " group_ak4kk32/Matrix group_ak4kk32/Soil_Composition_Percentage_Clay \\\n", "0 cohesive 20.0 \n", "1 NaN NaN \n", "2 cohesive 50.0 \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_ak4kk32/Soil_Composition_Percentage_Sand \\\n", "0 40.0 \n", "1 NaN \n", "2 30.0 \n", "3 NaN \n", "4 NaN \n", "\n", " group_ak4kk32/Soil_Composition_Percentage_Silt \\\n", "0 40.0 \n", "1 NaN \n", "2 20.0 \n", "3 NaN \n", "4 NaN \n", "\n", " group_bu1ca84_row/group_bu1ca84_row_column \\\n", "0 Many \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_bu1ca84_row/group_bu1ca84_row_column_1 \\\n", "0 many fragments of terra Sigilata were uncovere... \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_bu1ca84_row_1/group_bu1ca84_row_1_column \\\n", "0 Few \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_bu1ca84_row_1/group_bu1ca84_row_1_column_1 \\\n", "0 A few tile fragments were uncovered \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_bu1ca84_row_2/group_bu1ca84_row_2_column \\\n", "0 None \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_bu1ca84_row_2/group_bu1ca84_row_2_column_1 \\\n", "0 No Tufo was found \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_cp91a70/Inclusion_Observations group_cp91a70/Interface \\\n", "0 NaN commingled \n", "1 NaN NaN \n", "2 lots of broken house things sharp \n", "3 NaN sharp \n", "4 NaN NaN \n", "\n", " group_cp91a70/Surface group_cp91a70/Thickness_Observations \\\n", "0 grassy NaN \n", "1 NaN NaN \n", "2 lumpy quite hefty \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_dn7bg68 \\\n", "0 NaN \n", "1 NaN \n", "2 [{'group_dn7bg68/Insert_link_to_connected_form... \n", "3 NaN \n", "4 NaN \n", "\n", " group_ey32a12/Observations \\\n", "0 it is grassy, excavated with steel toe capped ... \n", "1 NaN \n", "2 Excavated with a toothpick \n", "3 There was a strong gale and heavy rain during ... \n", "4 NaN \n", "\n", " group_ey32a12/Position_001 group_ey32a12/Shape \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 centre of house structure round in plan \n", "3 Near the large well shaft elephant shaped cut \n", "4 NaN NaN \n", "\n", " group_inclusions \\\n", "0 [{'group_inclusions/inclusionsfreqency': 'medi... \n", "1 NaN \n", "2 [{'group_inclusions/inclusionsfreqency': 'freq... \n", "3 NaN \n", "4 NaN \n", "\n", " group_iz3xs59 \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 [{'group_iz3xs59/Insert_a_photo': 'DSC_1141-15... \n", "\n", " group_lg3yr63/Non_Soil_Sampling group_lg3yr63/Sieving \\\n", "0 no no \n", "1 NaN NaN \n", "2 yes yes \n", "3 no no \n", "4 NaN NaN \n", "\n", " group_lg3yr63/Soil_Sampling \\\n", "0 no \n", "1 NaN \n", "2 yes \n", "3 no \n", "4 NaN \n", "\n", " group_matrix \\\n", "0 [{'group_matrix/harrisrelationship': 'covers',... \n", "1 NaN \n", "2 [{'group_matrix/harrisrelationship': 'abuts', ... \n", "3 [{'group_matrix/harrisrelationship': 'cuts', '... \n", "4 NaN \n", "\n", " group_mf1hs18/Approximate_date_of_SU group_mf1hs18/Area \\\n", "0 21st C AD area_i \n", "1 NaN NaN \n", "2 2019 area_i \n", "3 1066AD area_i \n", "4 NaN NaN \n", "\n", " group_mf1hs18/Chronological_Period \\\n", "0 modern \n", "1 NaN \n", "2 julio_claudian \n", "3 medieval_perio \n", "4 NaN \n", "\n", " group_mf1hs18/Date_of_Layer_observations \\\n", "0 The grass is probably not that old \n", "1 NaN \n", "2 02/12/1997 \n", "3 This date is based off of the finding of a spo... \n", "4 NaN \n", "\n", " group_mf1hs18/Definition group_mf1hs18/End_of_Use_Phase \\\n", "0 Grass NaN \n", "1 NaN NaN \n", "2 A large deposit of rubble phase_a_3 \n", "3 A spoliation cut phase_a_3 \n", "4 NaN NaN \n", "\n", " group_mf1hs18/Excavation_Year group_mf1hs18/Formation_Phase \\\n", "0 2019.0 NaN \n", "1 NaN NaN \n", "2 2019.0 phase_a_1 \n", "3 2018.0 phase_a_1 \n", "4 NaN NaN \n", "\n", " group_mf1hs18/Formation_Process group_mf1hs18/Layer_distinguished_by \\\n", "0 accumulation colour composition \n", "1 NaN NaN \n", "2 collapse compaction composition \n", "3 cutting compaction composition \n", "4 NaN NaN \n", "\n", " group_mf1hs18/Loss_phase group_mf1hs18/Position \\\n", "0 NaN all around the trench \n", "1 NaN NaN \n", "2 phase_a_4a in centre of house structure \n", "3 phase_a_4c Next to a wall in the west of area I \n", "4 NaN NaN \n", "\n", " group_mf1hs18/SU_Type group_mf1hs18/Stratigraphic_Reliability \\\n", "0 natural poor \n", "1 NaN NaN \n", "2 anthropogenic fair \n", "3 anthropogenic poor \n", "4 NaN NaN \n", "\n", " group_mf1hs18/group_qy7um66/Photo_models group_mf1hs18/group_qy7um66/Photos \\\n", "0 no no \n", "1 NaN NaN \n", "2 yes yes \n", "3 no yes \n", "4 NaN NaN \n", "\n", " group_mo76v47/Finds_Observations group_mo76v47/Finds_Storage_Notes \\\n", "0 Lots of change was found here It is now in my pocket \n", "1 NaN NaN \n", "2 lots of broken house stuff stored in a garden shed in michigan \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_mo76v47/Total_bulk_finds group_mo76v47/Total_bulk_finds_weight \\\n", "0 5.0 5.0 \n", "1 NaN NaN \n", "2 27.0 348.0 \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " group_mo76v47/group_ht7hf81 \\\n", "0 [{'group_mo76v47/group_ht7hf81/Details': '5x 1... \n", "1 NaN \n", "2 [{'group_mo76v47/group_ht7hf81/Details': 'arch... \n", "3 NaN \n", "4 NaN \n", "\n", " group_mo76v47/group_jr3of51 \\\n", "0 [{'group_mo76v47/group_jr3of51/Spot_Dates': '2... \n", "1 [{'group_mo76v47/group_jr3of51/Spot_Dates': '2... \n", "2 NaN \n", "3 [{'group_mo76v47/group_jr3of51/Spot_Dates': 'c... \n", "4 NaN \n", "\n", " group_mo76v47/group_lq6ss10 \\\n", "0 [{'group_mo76v47/group_lq6ss10/Bases': '1', 'g... \n", "1 NaN \n", "2 [{'group_mo76v47/group_lq6ss10/Maximum_Size_in... \n", "3 NaN \n", "4 NaN \n", "\n", " group_qu1tl04/Depth_Limit group_qu1tl04/Eastern_limit \\\n", "0 original excavation_lim \n", "1 NaN NaN \n", "2 original original \n", "3 original original \n", "4 NaN NaN \n", "\n", " group_qu1tl04/Northern_Limit group_qu1tl04/Southern_Limit \\\n", "0 excavation_lim excavation_lim \n", "1 NaN NaN \n", "2 original original \n", "3 original original \n", "4 NaN NaN \n", "\n", " group_qu1tl04/Unit_Limit_Notes group_qu1tl04/Western_Limit \\\n", "0 NaN excavation_lim \n", "1 NaN NaN \n", "2 SU is cut to the west by a quarrying cut not_original \n", "3 NaN original \n", "4 NaN NaN \n", "\n", " group_sb6os40_currentrelate/group_sb6os40_currentrelate_Harris_relationship \\\n", "0 covers \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU1 \\\n", "0 9999.0 \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU2 \\\n", "0 99999.0 \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " group_xg9ph94 \\\n", "0 [{'group_xg9ph94/Interpretations': 'This SU is... \n", "1 NaN \n", "2 [{'group_xg9ph94/Interpretations': 'A layer fo... \n", "3 [{'group_xg9ph94/Interpretations': 'It is a la... \n", "4 NaN \n", "\n", " group_ye2st39/Date_of_Finds_Study group_ye2st39/Edited_by \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 2019-06-19 abe_thompson \n", "3 2019-06-17 abigail_trowbr \n", "4 NaN NaN \n", "\n", " group_ye2st39/Edited_on group_ye2st39/Filled_out_by \\\n", "0 NaN edward_stewart \n", "1 NaN NaN \n", "2 2019-06-26 abigail_trowbr \n", "3 2019-06-17 david_yelsey \n", "4 NaN NaN \n", "\n", " group_ye2st39/Filled_out_on group_ye2st39/Finds_Studied_by \\\n", "0 2019-06-27 NaN \n", "1 NaN NaN \n", "2 2019-06-03 aida_ali \n", "3 2019-06-25 evelyn_adkins \n", "4 NaN NaN \n", "\n", " group_ye2st39/SU_Closed group_yp0fv41/Cut_bottom \\\n", "0 no NaN \n", "1 NaN NaN \n", "2 yes NaN \n", "3 no concave \n", "4 NaN NaN \n", "\n", " group_yp0fv41/Cut_bottom_edge group_yp0fv41/Cut_edges \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 sharp rounded \n", "4 NaN NaN \n", "\n", " group_yp0fv41/Cut_observations group_yp0fv41/Cut_sides \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN concave \n", "4 NaN NaN \n", "\n", " group_yp0fv41/Cut_top_edge meta/deprecatedID \\\n", "0 NaN uuid:3214a397-fc61-4eb7-b9c0-e33931ed6995 \n", "1 NaN NaN \n", "2 NaN uuid:5bbece0f-a5f5-4998-8ef2-31ec22e99117 \n", "3 sharp NaN \n", "4 NaN NaN \n", "\n", " meta/instanceID start \\\n", "0 uuid:4f0fd312-ecc5-49a5-93a2-fe3a668d3c97 2019-06-23T20:24:52.369+01:00 \n", "1 uuid:37aae4fb-30e8-4adb-baff-c4b732fd8d5f 2019-06-27T03:52:51.969-04:00 \n", "2 uuid:c77bcd48-d876-4b17-b33e-7f1f9947192b 2019-06-27T04:57:19.848-04:00 \n", "3 uuid:56908e9d-bc9f-47f0-989a-d98de51813d0 2019-06-27T05:33:50.763-04:00 \n", "4 uuid:b67f09c8-ecaa-42a9-a06b-3ebfb780f760 2019-07-01T15:02:30.493+01:00 \n", "\n", " today username \n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 2019-07-01 username not found " ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#convert kobo json to a pandas dataframe\n", "kobo_su=pd.read_json(kobo.text)\n", "kobo_su.head()" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'DataFrame' object has no attribute 'set_option'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mlegacy_su\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'https://raw.githubusercontent.com/ropitz/gabii_experiments/master/data/su_report.csv'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0msep\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m';'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mlegacy_su\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mlegacy_su\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_option\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'display.max_columns'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 4370\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4371\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4372\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4373\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4374\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'set_option'" ] } ], "source": [ "#read in legacy data from the ARK\n", "legacy_su = pd.read_csv('https://raw.githubusercontent.com/ropitz/gabii_experiments/master/data/su_report.csv',sep=';')\n", "legacy_su.head()" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SU_001AreaDefinitionPublishing_Status
09999area_iA test SU for the purpose of this form's creat...beta
19999NaNNaNNaN
29998NaNNaNbeta
31314NaNNaNbeta
49999NaNNaNNaN
57800NaNNaNNaN
69987NaNNaNbeta
77234NaNNaNNaN
89999NaNNaNpublished
\n", "
" ], "text/plain": [ " SU_001 Area Definition \\\n", "0 9999 area_i A test SU for the purpose of this form's creat... \n", "1 9999 NaN NaN \n", "2 9998 NaN NaN \n", "3 1314 NaN NaN \n", "4 9999 NaN NaN \n", "5 7800 NaN NaN \n", "6 9987 NaN NaN \n", "7 7234 NaN NaN \n", "8 9999 NaN NaN \n", "\n", " Publishing_Status \n", "0 beta \n", "1 NaN \n", "2 beta \n", "3 beta \n", "4 NaN \n", "5 NaN \n", "6 beta \n", "7 NaN \n", "8 published " ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#optional: select the parts of the kobo SU sheet you want by their column names\n", "kobo_su_sub = kobo_su[[\"SU_001\",\"Area\",\"Definition\",\"Publishing_Status\"]]\n", "kobo_su_sub" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDEXCAV_AREADEFINITIONPUB_STATUS
00ANaNPublished
11ABedrock identified in 2009Beta
23AFill of cut SU4Beta
34ARectangular cut in bedrockBeta
45ALayer of brown, silty clayBeta
\n", "
" ], "text/plain": [ " ID EXCAV_AREA DEFINITION PUB_STATUS\n", "0 0 A NaN Published\n", "1 1 A Bedrock identified in 2009 Beta\n", "2 3 A Fill of cut SU4 Beta\n", "3 4 A Rectangular cut in bedrock Beta\n", "4 5 A Layer of brown, silty clay Beta" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#select the parts of the legacy SU sheet you want by their column names. This should be the same set as the kobo selection\n", "legacy_su_sub = legacy_su[[\"ID\",\"EXCAV_AREA\",\"DEFINITION\",\"PUB_STATUS\"]]\n", "legacy_su_sub.head()" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\gabii4\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py:3778: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " return super(DataFrame, self).rename(**kwargs)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SUAreaDefinitionPublishing_Status
09999area_iA test SU for the purpose of this form's creat...beta
19999NaNNaNNaN
29998NaNNaNbeta
31314NaNNaNbeta
49999NaNNaNNaN
\n", "
" ], "text/plain": [ " SU Area Definition \\\n", "0 9999 area_i A test SU for the purpose of this form's creat... \n", "1 9999 NaN NaN \n", "2 9998 NaN NaN \n", "3 1314 NaN NaN \n", "4 9999 NaN NaN \n", "\n", " Publishing_Status \n", "0 beta \n", "1 NaN \n", "2 beta \n", "3 beta \n", "4 NaN " ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#rename any silly column names for each dataframe\n", "kobo_su_sub.rename(columns={'SU_001':'SU'}, \n", " inplace=True)\n", "kobo_su_sub.head()" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\gabii4\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py:3778: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " return super(DataFrame, self).rename(**kwargs)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SUAreaDefinitionPublishing_Status
00ANaNPublished
11ABedrock identified in 2009Beta
23AFill of cut SU4Beta
34ARectangular cut in bedrockBeta
45ALayer of brown, silty clayBeta
\n", "
" ], "text/plain": [ " SU Area Definition Publishing_Status\n", "0 0 A NaN Published\n", "1 1 A Bedrock identified in 2009 Beta\n", "2 3 A Fill of cut SU4 Beta\n", "3 4 A Rectangular cut in bedrock Beta\n", "4 5 A Layer of brown, silty clay Beta" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#rename any silly column names for each dataframe\n", "legacy_su_sub.rename(columns={'ID':'SU','EXCAV_AREA':'Area','DEFINITION':'Definition','PUB_STATUS':'Publishing_Status'}, \n", " inplace=True)\n", "legacy_su_sub.head()" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SUAreaDefinitionPublishing_Status
09999area_iA test SU for the purpose of this form's creat...beta
19999NaNNaNNaN
29998NaNNaNbeta
31314NaNNaNbeta
49999NaNNaNNaN
57800NaNNaNNaN
69987NaNNaNbeta
77234NaNNaNNaN
89999NaNNaNpublished
00ANaNPublished
11ABedrock identified in 2009Beta
23AFill of cut SU4Beta
34ARectangular cut in bedrockBeta
45ALayer of brown, silty clayBeta
56ACut in bedrockBeta
67ALayer of rocks mixed with brown clayBeta
78ACluster of mortar on Northwest side of cutBeta
89ACluster of mortar in SE side of SU4Beta
910ACluster of roof tiles in SW corner of SU4Beta
1011ALayer of brown sandy clayPublished
\n", "
" ], "text/plain": [ " SU Area Definition \\\n", "0 9999 area_i A test SU for the purpose of this form's creat... \n", "1 9999 NaN NaN \n", "2 9998 NaN NaN \n", "3 1314 NaN NaN \n", "4 9999 NaN NaN \n", "5 7800 NaN NaN \n", "6 9987 NaN NaN \n", "7 7234 NaN NaN \n", "8 9999 NaN NaN \n", "0 0 A NaN \n", "1 1 A Bedrock identified in 2009 \n", "2 3 A Fill of cut SU4 \n", "3 4 A Rectangular cut in bedrock \n", "4 5 A Layer of brown, silty clay \n", "5 6 A Cut in bedrock \n", "6 7 A Layer of rocks mixed with brown clay \n", "7 8 A Cluster of mortar on Northwest side of cut \n", "8 9 A Cluster of mortar in SE side of SU4 \n", "9 10 A Cluster of roof tiles in SW corner of SU4 \n", "10 11 A Layer of brown sandy clay \n", "\n", " Publishing_Status \n", "0 beta \n", "1 NaN \n", "2 beta \n", "3 beta \n", "4 NaN \n", "5 NaN \n", "6 beta \n", "7 NaN \n", "8 published \n", "0 Published \n", "1 Beta \n", "2 Beta \n", "3 Beta \n", "4 Beta \n", "5 Beta \n", "6 Beta \n", "7 Beta \n", "8 Beta \n", "9 Beta \n", "10 Published " ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#stack the two together to integrate, preview the first twenty entries\n", "su_stack = pd.concat([kobo_su_sub, legacy_su_sub], axis=0)\n", "su_stack.head(20)" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SUAreaDefinitionPublishing_Status
89999NaNNaNpublished
00ANaNPublished
1011ALayer of brown sandy clayPublished
360363AN-S wallPublished
457462AOval cut filled by SU451Published
617999DBeater SU. This SU is a testPublished
6341016BPost-abandonment colluvial layerPublished
6761058BWall running N-S in western part of area B - s...Published
7421124Bupper fill of cappucina tomb Lordy GagaPublished
7531135Brubble wall - perpendicular to 1058 - west to ...Published
7741156Brubble layerPublished
7761158Bsurface inside 'room' at S edge of excavation ...Published
7801162Bperpendicular continuation of rubble wall 1135Published
7811163Brubble wall continuation of 1135Published
7831165Byellowish, silty soil southPublished
7861168Blayer containing soil and gravelPublished
7871169Byellowish layer north of wall 1135Published
7881170Bditch for rubble wall 1135Published
7891171Bcut for trench built wall 1163Published
7911173BCrushed tufo floorPublished
\n", "
" ], "text/plain": [ " SU Area Definition \\\n", "8 9999 NaN NaN \n", "0 0 A NaN \n", "10 11 A Layer of brown sandy clay \n", "360 363 A N-S wall \n", "457 462 A Oval cut filled by SU451 \n", "617 999 D Beater SU. This SU is a test \n", "634 1016 B Post-abandonment colluvial layer \n", "676 1058 B Wall running N-S in western part of area B - s... \n", "742 1124 B upper fill of cappucina tomb Lordy Gaga \n", "753 1135 B rubble wall - perpendicular to 1058 - west to ... \n", "774 1156 B rubble layer \n", "776 1158 B surface inside 'room' at S edge of excavation ... \n", "780 1162 B perpendicular continuation of rubble wall 1135 \n", "781 1163 B rubble wall continuation of 1135 \n", "783 1165 B yellowish, silty soil south \n", "786 1168 B layer containing soil and gravel \n", "787 1169 B yellowish layer north of wall 1135 \n", "788 1170 B ditch for rubble wall 1135 \n", "789 1171 B cut for trench built wall 1163 \n", "791 1173 B Crushed tufo floor \n", "\n", " Publishing_Status \n", "8 published \n", "0 Published \n", "10 Published \n", "360 Published \n", "457 Published \n", "617 Published \n", "634 Published \n", "676 Published \n", "742 Published \n", "753 Published \n", "774 Published \n", "776 Published \n", "780 Published \n", "781 Published \n", "783 Published \n", "786 Published \n", "787 Published \n", "788 Published \n", "789 Published \n", "791 Published " ] }, "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#filter on field values from both lists\n", "published = ['Published','published']\n", "published_su = su_stack.loc[su_stack['Publishing_Status'].isin(published)]\n", "published_su.head(20)" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SUAreaDefinitionPublishing_Status
09999IA test SU for the purpose of this form's creat...beta
19999NaNNaNNaN
29998NaNNaNbeta
31314NaNNaNbeta
49999NaNNaNNaN
57800NaNNaNNaN
69987NaNNaNbeta
77234NaNNaNNaN
89999NaNNaNPublished
00ANaNPublished
11ABedrock identified in 2009Beta
23AFill of cut SU4Beta
34ARectangular cut in bedrockBeta
45ALayer of brown, silty clayBeta
56ACut in bedrockBeta
67ALayer of rocks mixed with brown clayBeta
78ACluster of mortar on Northwest side of cutBeta
89ACluster of mortar in SE side of SU4Beta
910ACluster of roof tiles in SW corner of SU4Beta
1011ALayer of brown sandy clayPublished
\n", "
" ], "text/plain": [ " SU Area Definition \\\n", "0 9999 I A test SU for the purpose of this form's creat... \n", "1 9999 NaN NaN \n", "2 9998 NaN NaN \n", "3 1314 NaN NaN \n", "4 9999 NaN NaN \n", "5 7800 NaN NaN \n", "6 9987 NaN NaN \n", "7 7234 NaN NaN \n", "8 9999 NaN NaN \n", "0 0 A NaN \n", "1 1 A Bedrock identified in 2009 \n", "2 3 A Fill of cut SU4 \n", "3 4 A Rectangular cut in bedrock \n", "4 5 A Layer of brown, silty clay \n", "5 6 A Cut in bedrock \n", "6 7 A Layer of rocks mixed with brown clay \n", "7 8 A Cluster of mortar on Northwest side of cut \n", "8 9 A Cluster of mortar in SE side of SU4 \n", "9 10 A Cluster of roof tiles in SW corner of SU4 \n", "10 11 A Layer of brown sandy clay \n", "\n", " Publishing_Status \n", "0 beta \n", "1 NaN \n", "2 beta \n", "3 beta \n", "4 NaN \n", "5 NaN \n", "6 beta \n", "7 NaN \n", "8 Published \n", "0 Published \n", "1 Beta \n", "2 Beta \n", "3 Beta \n", "4 Beta \n", "5 Beta \n", "6 Beta \n", "7 Beta \n", "8 Beta \n", "9 Beta \n", "10 Published " ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#go back a step and clean up values in the columns to match between tables\n", "su_stack.replace({'area_i':'I','published':'Published'},inplace=True)\n", "su_stack.head(20)" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SUAreaDefinitionPublishing_Status
00AnanPublished
11ABedrock identified in 2009Beta
23AFill of cut SU4Beta
34ARectangular cut in bedrockBeta
45ALayer of brown, silty clayBeta
\n", "
" ], "text/plain": [ " SU Area Definition Publishing_Status\n", "0 0 A nan Published\n", "1 1 A Bedrock identified in 2009 Beta\n", "2 3 A Fill of cut SU4 Beta\n", "3 4 A Rectangular cut in bedrock Beta\n", "4 5 A Layer of brown, silty clay Beta" ] }, "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#merge multiple rows with the same SU id to merge changes. Come up with a better way to do this. \n", "su_stack_clean = su_stack.groupby(['SU']).agg(lambda x: ';;'.join(x.astype(str))).reset_index()\n", "su_stack_clean.head()" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SUAreaDefinitionPublishing_Status
45819999I;;nan;;nan;;nan;;HA test SU for the purpose of this form's creat...beta;;nan;;nan;;Published;;Beta
\n", "
" ], "text/plain": [ " SU Area \\\n", "4581 9999 I;;nan;;nan;;nan;;H \n", "\n", " Definition \\\n", "4581 A test SU for the purpose of this form's creat... \n", "\n", " Publishing_Status \n", "4581 beta;;nan;;nan;;Published;;Beta " ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#choose a row with a merged info set to see what it looks like\n", "su_stack_clean.loc[su_stack_clean['SU'] == 9999]" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ID 4594\n", "PUB_STATUS 4177\n", "PUB_YEAR 4594\n", "DEFINITION 4578\n", "FORMATION_PROCESS 4526\n", "LAYER_DATE_START 4594\n", "LAYER_DATE_START_SUFFIX 1502\n", "LAYER_DATE_END 4594\n", "LAYER_DATE_END_SUFFIX 1497\n", "LAYER_DATE_OBSERVATIONS 1041\n", "STRAT_RELIABILITY 4316\n", "EXCAV_YEAR 4594\n", "EXCAV_AREA 4581\n", "EXCAV_SECTOR 2\n", "SU_TYPE 4480\n", "ELEV_MAX 4594\n", "ELEV_MIN 4594\n", "IN_CROSS_SECTION 4594\n", "IN_ELEVATION_DRAWING 4594\n", "HAS_PHOTOS 4594\n", "HAS_PHOTOMODELS 4594\n", "FILLED_BY 4594\n", "FILLED_ON 2723\n", "REVISED_BY 4594\n", "REVISED_ON 2259\n", "CLAY 4594\n", "SILT 4594\n", "SAND 4594\n", "SOIL_MATRIX 2204\n", "SOIL_COMPACTION 2318\n", " ... \n", "ORIG_PDF_DATE 1191\n", "ORIG_SIGNED_DATE 678\n", "ORIG_FINDS_SIGNED_DATE 137\n", "RECORD_FILLED 4594\n", "RECORD_REVISED 4594\n", "RECORD_FILLED_DATE 1264\n", "RECORD_REVISED_DATE 4594\n", "FINDS_BY 528\n", "FINDS_ON 494\n", "GEOLOGIC_INCLUSIONS 1663\n", "ORGANIC_INCLUSIONS 1717\n", "ANTHROPIC_INCLUSIONS 2122\n", "LAYER_DISTINGUISHED_BY 3358\n", "CHRONOLOGICAL_PERIOD 95\n", "LIMITS_MULTI 4465\n", "CUTS_MULTI 870\n", "STRUCTURAL_REMAINS_MULTI 959\n", "MATRIX1 4080\n", "MATRIX2 4422\n", "ABUTS 696\n", "IS_ABUTTED_BY 539\n", "COVERS 2296\n", "IS_COVERED_BY 3528\n", "CUTS 792\n", "IS_CUT_BY 907\n", "FILLS 1236\n", "IS_FILLED_BY 867\n", "IS_BOUND_TO 87\n", "BINDS_TO 87\n", "EQUALS 275\n", "Length: 105, dtype: int64" ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "legacy_su.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " token is: 0239eed59b53ae940ca8835302b6ad360c8fc540\n", " project is: aFQDtwfYLpWjyYb6Uc5nMp\n", "\n", " form is: 300227 gabii dev\n", "form is: 300075 gabii SU" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }