{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"\n",
"from bbw import bbw\n",
"from IPython.core.display import display, HTML"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" col0 | \n",
" col1 | \n",
" col2 | \n",
" col3 | \n",
"
\n",
" \n",
" 1 | \n",
" Mannheim | \n",
" Rhine | \n",
" 97 | \n",
" Baden-Württemberg | \n",
"
\n",
" \n",
" 2 | \n",
" Edinburgh | \n",
" River Forth | \n",
" 47 | \n",
" City of Edinburgh | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3\n",
"0 col0 col1 col2 col3\n",
"1 Mannheim Rhine 97 Baden-Württemberg\n",
"2 Edinburgh River Forth 47 City of Edinburgh"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = [\n",
" ['col0', 'col1', 'col2', 'col3'],\n",
" ['Mannheim','Rhine', '97', 'Baden-Württemberg'],\n",
" ['Edinburgh','River Forth', '47', 'City of Edinburgh']\n",
"]\n",
"df = pd.DataFrame(data)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simple workflow for semantic annotations"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"[web_table, url_table, label_table, cpa, cea, cta] = bbw.annotate(df)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" col0 | \n",
" col1 | \n",
" col2 | \n",
" col3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Mannheim | \n",
" Rhine | \n",
" 97 | \n",
" Baden-Württemberg | \n",
"
\n",
" \n",
" 2 | \n",
" Edinburgh | \n",
" River Forth | \n",
" 47 | \n",
" City of Edinburgh | \n",
"
\n",
" \n",
" property | \n",
" | \n",
" located in or next to body of water | \n",
" elevation above sea level | \n",
" located in the administrative territorial entity | \n",
"
\n",
" \n",
" type | \n",
" city | \n",
" river | \n",
" | \n",
" administrative territorial entity of a specific level | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(HTML(web_table.to_html(escape=False)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Up to here the examples worked without SearX, because that is not installed locally along this Jupyter notebook.\n",
"\n",
"## Metalookup via SearX\n",
"\n",
"However, we can use a public instance https://searx.space/# for trying it out (but carefully as this only works for a handful examples at once)."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'https://searx.monicz.pl/'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# For example\n",
"os.environ[\"BBW_SEARX_URL\"] = \"https://searx.monicz.pl/\"\n",
"os.environ[\"BBW_SEARX_URL\"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['Mannheim'], ['Edinburgh', 'edinburgh', 'Dingbur']]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Use searx to get the bestname for a string with mistakes\n",
"[bbw.get_searx_bestname('Monnhem'), bbw.get_searx_bestname('dingbur')]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" col0 | \n",
" col1 | \n",
" col2 | \n",
" col3 | \n",
"
\n",
" \n",
" 1 | \n",
" Monnheim | \n",
" Rhine | \n",
" 97 | \n",
" Baden-Württemberg | \n",
"
\n",
" \n",
" 2 | \n",
" dingbur | \n",
" River Forth | \n",
" 47 | \n",
" City of Edinburgh | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3\n",
"0 col0 col1 col2 col3\n",
"1 Monnheim Rhine 97 Baden-Württemberg\n",
"2 dingbur River Forth 47 City of Edinburgh"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[0][1] = \"Monnheim\"\n",
"df[0][2] = \"dingbur\"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" col0 | \n",
" col1 | \n",
" col2 | \n",
" col3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Mannheim | \n",
" Rhine | \n",
" 97 | \n",
" Baden-Württemberg | \n",
"
\n",
" \n",
" 2 | \n",
" Edinburgh | \n",
" River Forth | \n",
" 47 | \n",
" City of Edinburgh | \n",
"
\n",
" \n",
" property | \n",
" | \n",
" located in or next to body of water | \n",
" elevation above sea level | \n",
" located in the administrative territorial entity | \n",
"
\n",
" \n",
" type | \n",
" city | \n",
" river | \n",
" | \n",
" administrative territorial entity of a specific level | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"[web_table, url_table, label_table, cpa, cea, cta] = bbw.annotate(df)\n",
"display(HTML(web_table.to_html(escape=False)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## GUI\n",
"The GUI runs on a special port `8501` which you can access from the current URL by replacing the `notebooks/bbw.ipynb` with `proxy/8501/`.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}