{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "0c4964ac", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Last run: 2023-05-13 07:22:00.289469\n" ] } ], "source": [ "from datetime import datetime\n", "start_time = datetime.now()\n", "print(\"Last run: \", start_time)" ] }, { "cell_type": "code", "execution_count": 2, "id": "360c0e42", "metadata": {}, "outputs": [], "source": [ "import urllib3, json\n", "import pandas as pd \n", "http = urllib3.PoolManager() \n", "pd.set_option(\"display.max.columns\", None) \n", " \n", "url = \"https://litteraturbanken.se/api/get_authors\"\n", "r = http.request('GET', url)\n", "data = json.loads(r.data)\n", "dfLitt = pd.json_normalize(data[\"data\"])" ] }, { "cell_type": "code", "execution_count": 3, "id": "04aae65b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 3850 entries, 2 to 4922\n", "Data columns (total 50 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 authorid 3850 non-null object \n", " 1 authorid_norm 3850 non-null object \n", " 2 db_checksum 3850 non-null object \n", " 3 db_timestamp 3850 non-null int64 \n", " 4 doc_type 3850 non-null object \n", " 5 full_name 3850 non-null object \n", " 6 gender 3850 non-null object \n", " 7 imported 3422 non-null object \n", " 8 intro 724 non-null object \n", " 9 name_for_index 3850 non-null object \n", " 10 pictureinfo 255 non-null object \n", " 11 searchable 3850 non-null bool \n", " 12 show 3850 non-null bool \n", " 13 surname 3850 non-null object \n", " 14 updated 3422 non-null object \n", " 15 birth.date 3612 non-null object \n", " 16 birth.plain 3850 non-null object \n", " 17 death.date 2183 non-null object \n", " 18 death.plain 3359 non-null object \n", " 19 librisid 2298 non-null object \n", " 20 wikidata.birthplace 2394 non-null object \n", " 21 wikidata.birthplace_label 2394 non-null object \n", " 22 wikidata.deathplace 2072 non-null object \n", " 23 wikidata.deathplace_label 2072 non-null object \n", " 24 wikidata.image 1845 non-null object \n", " 25 wikidata.sbl_link 927 non-null object \n", " 26 wikidata.skbl_link 183 non-null object \n", " 27 wikidata.sol_link 145 non-null object \n", " 28 wikidata.wikidata_id 3307 non-null object \n", " 29 wikidata.wikipedia 2303 non-null object \n", " 30 db_timestamp_updated 3102 non-null float64\n", " 31 intro_text 724 non-null object \n", " 32 popularity 2587 non-null float64\n", " 33 pseudonym 149 non-null object \n", " 34 dramawebben.intro 103 non-null object \n", " 35 dramawebben.intro_author 102 non-null object \n", " 36 dramawebben.intro_author_norm 102 non-null object \n", " 37 dramawebben.legacy_url 109 non-null object \n", " 38 dramawebben.picture 76 non-null object \n", " 39 sources 528 non-null object \n", " 40 other_name 111 non-null object \n", " 41 intro_author 396 non-null object \n", " 42 intro_author_norm 396 non-null object \n", " 43 dramawebben.picture_info 71 non-null object \n", " 44 picture 341 non-null object \n", " 45 bibliography 18 non-null object \n", " 46 external_ref 8 non-null object \n", " 47 presentation 36 non-null object \n", " 48 seemore 4 non-null object \n", " 49 dramawebben.sources 6 non-null object \n", "dtypes: bool(2), float64(2), int64(1), object(45)\n", "memory usage: 1.4+ MB\n" ] } ], "source": [ "dfLittShowTrue = dfLitt[dfLitt[\"show\"] == True].copy()\n", "dfLittShowTrue.info()" ] }, { "cell_type": "code", "execution_count": 4, "id": "abafbc5a", "metadata": {}, "outputs": [], "source": [ "# pip install sparqlwrapper\n", "# https://rdflib.github.io/sparqlwrapper/\n", "\n", "import sys,json\n", "import pandas as pd \n", "\n", "from SPARQLWrapper import SPARQLWrapper, JSON\n", "\n", "endpoint_url = \"https://query.wikidata.org/sparql\"\n", "\n", "# \n", "query = \"\"\"SELECT (REPLACE(STR(?item), \".*Q\", \"Q\") AS ?WikidataID) ?authorid ?SBL ?SKBL WHERE {\n", "?item wdt:P31 wd:Q5.\n", "?item wdt:P5101 ?authorid\n", "OPTIONAL {?item wdt:P3217 ?SBL}\n", "OPTIONAL {?item wdt:P4963 ?SKBL}\n", "} order by ?authorid\"\"\"\n", " \n", " \n", "def get_sparql_dataframe(endpoint_url, query):\n", " \"\"\"\n", " Helper function to convert SPARQL results into a Pandas data frame.\n", " \"\"\"\n", " user_agent = \"salgo60/%s.%s\" % (sys.version_info[0], sys.version_info[1])\n", " \n", " sparql = SPARQLWrapper(endpoint_url, agent=user_agent)\n", " sparql.setQuery(query)\n", " sparql.setReturnFormat(JSON)\n", " result = sparql.query()\n", "\n", " processed_results = json.load(result.response)\n", " cols = processed_results['head']['vars']\n", " out = []\n", " for row in processed_results['results']['bindings']:\n", " item = []\n", " for c in cols:\n", " item.append(row.get(c, {}).get('value'))\n", " out.append(item)\n", "\n", " return pd.DataFrame(out, columns=cols)\n", "\n", "WDLittbanktot = get_sparql_dataframe(endpoint_url, query)" ] }, { "cell_type": "code", "execution_count": 5, "id": "d13c58ad", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WikidataIDauthoridSBLSKBL
0Q11967131AasenENoneNone
1Q4933592AbeniusMNoneMargitAbenius
2Q24680938AbrahamssonANoneNone
3Q4934135AbrahamssonSNoneNone
4Q365923AchariusE5503None
...............
3482Q108406266ÖstergrenCNoneNone
3483Q6257795ÖstergrenCLNoneNone
3484Q100752816ÖstinONoneNone
3485Q6258216ÖstmanKNoneNone
3486Q11978200ØverlandJNoneNone
\n", "

3487 rows × 4 columns

\n", "
" ], "text/plain": [ " WikidataID authorid SBL SKBL\n", "0 Q11967131 AasenE None None\n", "1 Q4933592 AbeniusM None MargitAbenius\n", "2 Q24680938 AbrahamssonA None None\n", "3 Q4934135 AbrahamssonS None None\n", "4 Q365923 AchariusE 5503 None\n", "... ... ... ... ...\n", "3482 Q108406266 ÖstergrenC None None\n", "3483 Q6257795 ÖstergrenCL None None\n", "3484 Q100752816 ÖstinO None None\n", "3485 Q6258216 ÖstmanK None None\n", "3486 Q11978200 ØverlandJ None None\n", "\n", "[3487 rows x 4 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "WDLittbanktot" ] }, { "cell_type": "code", "execution_count": 6, "id": "0a0f2614", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "both 3319\n", "left_only 531\n", "right_only 168\n", "Name: WD_Littbank_merge_Outer, dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df = pd.merge(dfA, dfB, on=['a','b'], how=\"outer\", indicator=True)\n", "WDLittbank_WD_merge= pd.merge(dfLittShowTrue,WDLittbanktot, how=\"outer\", on='authorid',indicator=True)\n", "WDLittbank_WD_merge.rename(columns={\"_merge\": \"WD_Littbank_merge_Outer\"},inplace = True)\n", "\n", "WDLittbank_WD_merge[\"WD_Littbank_merge_Outer\"].value_counts()\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "32e7903e", "metadata": {}, "outputs": [], "source": [ "WDLittbank_WD_merge_Not_connected = \\\n", "WDLittbank_WD_merge[WDLittbank_WD_merge[\"WD_Littbank_merge_Outer\"] == \"left_only\"].copy()" ] }, { "cell_type": "code", "execution_count": 8, "id": "692fdcbd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
authoridauthorid_normdb_checksumdb_timestampdoc_typefull_namegenderimportedintroname_for_indexpictureinfosearchableshowsurnameupdatedbirth.datebirth.plaindeath.datedeath.plainlibrisidwikidata.birthplacewikidata.birthplace_labelwikidata.deathplacewikidata.deathplace_labelwikidata.imagewikidata.sbl_linkwikidata.skbl_linkwikidata.sol_linkwikidata.wikidata_idwikidata.wikipediadb_timestamp_updatedintro_textpopularitypseudonymdramawebben.introdramawebben.intro_authordramawebben.intro_author_normdramawebben.legacy_urldramawebben.picturesourcesother_nameintro_authorintro_author_normdramawebben.picture_infopicturebibliographyexternal_refpresentationseemoredramawebben.sourcesWikidataIDSBLSKBLWD_Littbank_merge_Outer
7AddäosAddaosda5e35f5f2e32602a17344e44b2b6e931.682496e+12authorAddäosmale2014-08-22NoneAddäosNoneFalseTrueAddäos2016-04-12-3500000NaN0000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
30AhlbergLAhlbergLee16e9ff206c29114ebb85c687f26a3d1.682496e+12authorLudvig Ahlbergmale2021-03-01NoneAhlberg, LudvigNoneTrueTrueAhlberg2021-03-01NaN180918881888NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN67.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
39AhlmanCAhlmanCded5b5ffeea552a3c1ea1778d44303641.682496e+12authorCarl O. Ahlmanmale2019-02-25NoneAhlman, Carl O.NoneFalseTrueAhlman2019-02-2518300000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN290.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
49AhrenbergBAhrenbergBb1741fb810e253a036639d61e7a3563f1.682496e+12authorBertha Ahrenbergfemale2022-01-31NoneAhrenberg, BerthaNoneTrueTrueAhrenberg2022-01-3118510000NaN0000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN818.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
66AllvarssonAAllvarssonA3dfce487ff899fd930a9216c072f4f211.682496e+12authorAnders Allvarssonmale2023-03-27NoneAllvarsson, AndersNoneTrueTrueAllvarsson2023-03-2718710000NaN0000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
.....................................................................................................................................................................
3815ÅkerlundPAAkerlundPA305970795ebaa9fc00e54d3889a241f11.682496e+12authorP. A. Åkerlundnot known2021-04-06NoneÅkerlund, P. A.NoneTrueTrueÅkerlund2021-04-0618430000NaN0000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN139.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
3819ÅlundPAlundP7e20f1ee10e297c8935b8413bff606101.682496e+12authorPaulus Ålundmale2019-02-25NoneÅlund, PaulusNoneTrueTrueÅlund2019-02-2518200000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN565.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
3838OhmanFAOhmanFAeed1bf5666f0b20a897d2dd6ac37270c1.682496e+12authorFrans August ÖhmanmaleNaNNoneÖhman, Frans AugustNoneFalseTrueÖhmanNaN1821182118921892NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
3845ÖstergrenPJOstergrenPJ27ee5cfe5cf3d88be34963b41f68331f1.682496e+12authorP. J. Östergrenmale2021-11-29NoneÖstergren, P. J.NoneTrueTrueÖstergren2021-11-2918700000NaN0000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN116.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
3847ÖstmanCOstmanC6a20a9700a39284e25991764ae0d769a1.682496e+12authorCarin Östmanfemale2014-02-11NoneÖstman, CarinNoneTrueTrueÖstman2014-03-1719581958NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.682496e+12NaN367.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNleft_only
\n", "

531 rows × 54 columns

\n", "
" ], "text/plain": [ " authorid authorid_norm db_checksum \\\n", "7 Addäos Addaos da5e35f5f2e32602a17344e44b2b6e93 \n", "30 AhlbergL AhlbergL ee16e9ff206c29114ebb85c687f26a3d \n", "39 AhlmanC AhlmanC ded5b5ffeea552a3c1ea1778d4430364 \n", "49 AhrenbergB AhrenbergB b1741fb810e253a036639d61e7a3563f \n", "66 AllvarssonA AllvarssonA 3dfce487ff899fd930a9216c072f4f21 \n", "... ... ... ... \n", "3815 ÅkerlundPA AkerlundPA 305970795ebaa9fc00e54d3889a241f1 \n", "3819 ÅlundP AlundP 7e20f1ee10e297c8935b8413bff60610 \n", "3838 OhmanFA OhmanFA eed1bf5666f0b20a897d2dd6ac37270c \n", "3845 ÖstergrenPJ OstergrenPJ 27ee5cfe5cf3d88be34963b41f68331f \n", "3847 ÖstmanC OstmanC 6a20a9700a39284e25991764ae0d769a \n", "\n", " db_timestamp doc_type full_name gender imported intro \\\n", "7 1.682496e+12 author Addäos male 2014-08-22 None \n", "30 1.682496e+12 author Ludvig Ahlberg male 2021-03-01 None \n", "39 1.682496e+12 author Carl O. Ahlman male 2019-02-25 None \n", "49 1.682496e+12 author Bertha Ahrenberg female 2022-01-31 None \n", "66 1.682496e+12 author Anders Allvarsson male 2023-03-27 None \n", "... ... ... ... ... ... ... \n", "3815 1.682496e+12 author P. A. Åkerlund not known 2021-04-06 None \n", "3819 1.682496e+12 author Paulus Ålund male 2019-02-25 None \n", "3838 1.682496e+12 author Frans August Öhman male NaN None \n", "3845 1.682496e+12 author P. J. Östergren male 2021-11-29 None \n", "3847 1.682496e+12 author Carin Östman female 2014-02-11 None \n", "\n", " name_for_index pictureinfo searchable show surname \\\n", "7 Addäos None False True Addäos \n", "30 Ahlberg, Ludvig None True True Ahlberg \n", "39 Ahlman, Carl O. None False True Ahlman \n", "49 Ahrenberg, Bertha None True True Ahrenberg \n", "66 Allvarsson, Anders None True True Allvarsson \n", "... ... ... ... ... ... \n", "3815 Åkerlund, P. A. None True True Åkerlund \n", "3819 Ålund, Paulus None True True Ålund \n", "3838 Öhman, Frans August None False True Öhman \n", "3845 Östergren, P. J. None True True Östergren \n", "3847 Östman, Carin None True True Östman \n", "\n", " updated birth.date birth.plain death.date death.plain librisid \\\n", "7 2016-04-12 -350 0000 NaN 0000 NaN \n", "30 2021-03-01 NaN 1809 1888 1888 NaN \n", "39 2019-02-25 1830 0000 NaN NaN NaN \n", "49 2022-01-31 1851 0000 NaN 0000 NaN \n", "66 2023-03-27 1871 0000 NaN 0000 NaN \n", "... ... ... ... ... ... ... \n", "3815 2021-04-06 1843 0000 NaN 0000 NaN \n", "3819 2019-02-25 1820 0000 NaN NaN NaN \n", "3838 NaN 1821 1821 1892 1892 NaN \n", "3845 2021-11-29 1870 0000 NaN 0000 NaN \n", "3847 2014-03-17 1958 1958 NaN NaN NaN \n", "\n", " wikidata.birthplace wikidata.birthplace_label wikidata.deathplace \\\n", "7 NaN NaN NaN \n", "30 NaN NaN NaN \n", "39 NaN NaN NaN \n", "49 NaN NaN NaN \n", "66 NaN NaN NaN \n", "... ... ... ... \n", "3815 NaN NaN NaN \n", "3819 NaN NaN NaN \n", "3838 NaN NaN NaN \n", "3845 NaN NaN NaN \n", "3847 NaN NaN NaN \n", "\n", " wikidata.deathplace_label wikidata.image wikidata.sbl_link \\\n", "7 NaN NaN NaN \n", "30 NaN NaN NaN \n", "39 NaN NaN NaN \n", "49 NaN NaN NaN \n", "66 NaN NaN NaN \n", "... ... ... ... \n", "3815 NaN NaN NaN \n", "3819 NaN NaN NaN \n", "3838 NaN NaN NaN \n", "3845 NaN NaN NaN \n", "3847 NaN NaN NaN \n", "\n", " wikidata.skbl_link wikidata.sol_link wikidata.wikidata_id \\\n", "7 NaN NaN NaN \n", "30 NaN NaN NaN \n", "39 NaN NaN NaN \n", "49 NaN NaN NaN \n", "66 NaN NaN NaN \n", "... ... ... ... \n", "3815 NaN NaN NaN \n", "3819 NaN NaN NaN \n", "3838 NaN NaN NaN \n", "3845 NaN NaN NaN \n", "3847 NaN NaN NaN \n", "\n", " wikidata.wikipedia db_timestamp_updated intro_text popularity \\\n", "7 NaN NaN NaN NaN \n", "30 NaN 1.682496e+12 NaN 67.0 \n", "39 NaN 1.682496e+12 NaN 290.0 \n", "49 NaN 1.682496e+12 NaN 818.0 \n", "66 NaN 1.682496e+12 NaN 0.0 \n", "... ... ... ... ... \n", "3815 NaN 1.682496e+12 NaN 139.0 \n", "3819 NaN 1.682496e+12 NaN 565.0 \n", "3838 NaN NaN NaN NaN \n", "3845 NaN 1.682496e+12 NaN 116.0 \n", "3847 NaN 1.682496e+12 NaN 367.0 \n", "\n", " pseudonym dramawebben.intro dramawebben.intro_author \\\n", "7 NaN NaN NaN \n", "30 NaN NaN NaN \n", "39 NaN NaN NaN \n", "49 NaN NaN NaN \n", "66 NaN NaN NaN \n", "... ... ... ... \n", "3815 NaN NaN NaN \n", "3819 NaN NaN NaN \n", "3838 NaN NaN NaN \n", "3845 NaN NaN NaN \n", "3847 NaN NaN NaN \n", "\n", " dramawebben.intro_author_norm dramawebben.legacy_url dramawebben.picture \\\n", "7 NaN NaN NaN \n", "30 NaN NaN NaN \n", "39 NaN NaN NaN \n", "49 NaN NaN NaN \n", "66 NaN NaN NaN \n", "... ... ... ... \n", "3815 NaN NaN NaN \n", "3819 NaN NaN NaN \n", "3838 NaN NaN NaN \n", "3845 NaN NaN NaN \n", "3847 NaN NaN NaN \n", "\n", " sources other_name intro_author intro_author_norm \\\n", "7 NaN NaN NaN NaN \n", "30 NaN NaN NaN NaN \n", "39 NaN NaN NaN NaN \n", "49 NaN NaN NaN NaN \n", "66 NaN NaN NaN NaN \n", "... ... ... ... ... \n", "3815 NaN NaN NaN NaN \n", "3819 NaN NaN NaN NaN \n", "3838 NaN NaN NaN NaN \n", "3845 NaN NaN NaN NaN \n", "3847 NaN NaN NaN NaN \n", "\n", " dramawebben.picture_info picture bibliography external_ref presentation \\\n", "7 NaN NaN NaN NaN NaN \n", "30 NaN NaN NaN NaN NaN \n", "39 NaN NaN NaN NaN NaN \n", "49 NaN NaN NaN NaN NaN \n", "66 NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... \n", "3815 NaN NaN NaN NaN NaN \n", "3819 NaN NaN NaN NaN NaN \n", "3838 NaN NaN NaN NaN NaN \n", "3845 NaN NaN NaN NaN NaN \n", "3847 NaN NaN NaN NaN NaN \n", "\n", " seemore dramawebben.sources WikidataID SBL SKBL WD_Littbank_merge_Outer \n", "7 NaN NaN NaN NaN NaN left_only \n", "30 NaN NaN NaN NaN NaN left_only \n", "39 NaN NaN NaN NaN NaN left_only \n", "49 NaN NaN NaN NaN NaN left_only \n", "66 NaN NaN NaN NaN NaN left_only \n", "... ... ... ... ... ... ... \n", "3815 NaN NaN NaN NaN NaN left_only \n", "3819 NaN NaN NaN NaN NaN left_only \n", "3838 NaN NaN NaN NaN NaN left_only \n", "3845 NaN NaN NaN NaN NaN left_only \n", "3847 NaN NaN NaN NaN NaN left_only \n", "\n", "[531 rows x 54 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "WDLittbank_WD_merge_Not_connected" ] }, { "cell_type": "code", "execution_count": 9, "id": "b07d52ca", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
authoridlibrisidshowfull_name
165ArfwidssonNhftwzq01397wbbdTrueNils Arfwidsson
1517Homeros0xbdfjvj5rjnrc0TrueHomeros
3515ValbäckFJgdsvzxn032qd1hmTrueFrans Johan Valbäck
3679WhitlockA97mpsnpt0lf558sTrueAnna Whitlock
\n", "
" ], "text/plain": [ " authorid librisid show full_name\n", "165 ArfwidssonN hftwzq01397wbbd True Nils Arfwidsson\n", "1517 Homeros 0xbdfjvj5rjnrc0 True Homeros\n", "3515 ValbäckFJ gdsvzxn032qd1hm True Frans Johan Valbäck\n", "3679 WhitlockA 97mpsnpt0lf558s True Anna Whitlock" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "WDLittbank_WD_merge_Not_connected[WDLittbank_WD_merge_Not_connected['librisid']. \n", " notna()][[\"authorid\",\"librisid\",\"show\",\"full_name\"]]" ] }, { "cell_type": "code", "execution_count": 10, "id": "2a6098a0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 531 entries, 7 to 3847\n", "Data columns (total 54 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 authorid 531 non-null object \n", " 1 authorid_norm 531 non-null object \n", " 2 db_checksum 531 non-null object \n", " 3 db_timestamp 531 non-null float64 \n", " 4 doc_type 531 non-null object \n", " 5 full_name 531 non-null object \n", " 6 gender 531 non-null object \n", " 7 imported 409 non-null object \n", " 8 intro 21 non-null object \n", " 9 name_for_index 531 non-null object \n", " 10 pictureinfo 4 non-null object \n", " 11 searchable 531 non-null object \n", " 12 show 531 non-null object \n", " 13 surname 531 non-null object \n", " 14 updated 409 non-null object \n", " 15 birth.date 500 non-null object \n", " 16 birth.plain 531 non-null object \n", " 17 death.date 141 non-null object \n", " 18 death.plain 411 non-null object \n", " 19 librisid 4 non-null object \n", " 20 wikidata.birthplace 3 non-null object \n", " 21 wikidata.birthplace_label 3 non-null object \n", " 22 wikidata.deathplace 3 non-null object \n", " 23 wikidata.deathplace_label 3 non-null object \n", " 24 wikidata.image 4 non-null object \n", " 25 wikidata.sbl_link 0 non-null object \n", " 26 wikidata.skbl_link 0 non-null object \n", " 27 wikidata.sol_link 0 non-null object \n", " 28 wikidata.wikidata_id 23 non-null object \n", " 29 wikidata.wikipedia 11 non-null object \n", " 30 db_timestamp_updated 414 non-null float64 \n", " 31 intro_text 21 non-null object \n", " 32 popularity 344 non-null float64 \n", " 33 pseudonym 9 non-null object \n", " 34 dramawebben.intro 0 non-null object \n", " 35 dramawebben.intro_author 0 non-null object \n", " 36 dramawebben.intro_author_norm 0 non-null object \n", " 37 dramawebben.legacy_url 1 non-null object \n", " 38 dramawebben.picture 0 non-null object \n", " 39 sources 9 non-null object \n", " 40 other_name 10 non-null object \n", " 41 intro_author 5 non-null object \n", " 42 intro_author_norm 5 non-null object \n", " 43 dramawebben.picture_info 0 non-null object \n", " 44 picture 8 non-null object \n", " 45 bibliography 0 non-null object \n", " 46 external_ref 0 non-null object \n", " 47 presentation 0 non-null object \n", " 48 seemore 0 non-null object \n", " 49 dramawebben.sources 0 non-null object \n", " 50 WikidataID 0 non-null object \n", " 51 SBL 0 non-null object \n", " 52 SKBL 0 non-null object \n", " 53 WD_Littbank_merge_Outer 531 non-null category\n", "dtypes: category(1), float64(3), object(50)\n", "memory usage: 224.7+ KB\n" ] } ], "source": [ "WDLittbank_WD_merge_Not_connected.info()" ] }, { "cell_type": "code", "execution_count": 11, "id": "5689f84c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "hftwzq01397wbbd 1\n", "0xbdfjvj5rjnrc0 1\n", "gdsvzxn032qd1hm 1\n", "97mpsnpt0lf558s 1\n", "Name: librisid, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "WDLittbank_WD_merge_Not_connected[\"librisid\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 12, "id": "0d3dffa1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
authoridauthorid_normdb_checksumdb_timestampdoc_typefull_namegenderimportedintroname_for_indexpictureinfosearchableshowsurnameupdatedbirth.datebirth.plaindeath.datedeath.plainlibrisidwikidata.birthplacewikidata.birthplace_labelwikidata.deathplacewikidata.deathplace_labelwikidata.imagewikidata.sbl_linkwikidata.skbl_linkwikidata.sol_linkwikidata.wikidata_idwikidata.wikipediadb_timestamp_updatedintro_textpopularitypseudonymdramawebben.introdramawebben.intro_authordramawebben.intro_author_normdramawebben.legacy_urldramawebben.picturesourcesother_nameintro_authorintro_author_normdramawebben.picture_infopicturebibliographyexternal_refpresentationseemoredramawebben.sourcesWikidataIDSBLSKBLWD_Littbank_merge_Outer
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [authorid, authorid_norm, db_checksum, db_timestamp, doc_type, full_name, gender, imported, intro, name_for_index, pictureinfo, searchable, show, surname, updated, birth.date, birth.plain, death.date, death.plain, librisid, wikidata.birthplace, wikidata.birthplace_label, wikidata.deathplace, wikidata.deathplace_label, wikidata.image, wikidata.sbl_link, wikidata.skbl_link, wikidata.sol_link, wikidata.wikidata_id, wikidata.wikipedia, db_timestamp_updated, intro_text, popularity, pseudonym, dramawebben.intro, dramawebben.intro_author, dramawebben.intro_author_norm, dramawebben.legacy_url, dramawebben.picture, sources, other_name, intro_author, intro_author_norm, dramawebben.picture_info, picture, bibliography, external_ref, presentation, seemore, dramawebben.sources, WikidataID, SBL, SKBL, WD_Littbank_merge_Outer]\n", "Index: []" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "WDLittbank_WD_merge_Not_connected[WDLittbank_WD_merge_Not_connected[\"librisid\"] ==\"20dgkckl01lmd80\"]" ] }, { "cell_type": "code", "execution_count": 13, "id": "4c88a147", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Series([], Name: wikidata.sbl_link, dtype: int64)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "WDLittbank_WD_merge_Not_connected[\"wikidata.sbl_link\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 14, "id": "1f56fa8e", "metadata": {}, "outputs": [], "source": [ "#WDLittbank_WD_merge_Not_connected[\"sources\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 15, "id": "9490054f", "metadata": {}, "outputs": [], "source": [ "#WDLittbank_WD_merge_Not_connected[\"popularity\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 16, "id": "dd7680a3", "metadata": {}, "outputs": [], "source": [ "# WDLittbank_WD_merge_Not_connected[\"intro_text\"].value_counts()" ] }, { "cell_type": "markdown", "id": "60380950", "metadata": {}, "source": [ "### check WD" ] }, { "cell_type": "code", "execution_count": 17, "id": "bef62966", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "both 3319\n", "left_only 531\n", "right_only 168\n", "Name: WD_Littbank_merge_Outer, dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df = pd.merge(dfA, dfB, on=['a','b'], how=\"outer\", indicator=True)\n", "WDLittbank_WD_onWD_merge= pd.merge(dfLittShowTrue,WDLittbanktot, how=\"outer\", on='authorid',indicator=True)\n", "WDLittbank_WD_onWD_merge.rename(columns={\"_merge\": \"WD_Littbank_merge_Outer\"},inplace = True)\n", "\n", "WDLittbank_WD_onWD_merge[\"WD_Littbank_merge_Outer\"].value_counts()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }