{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Search-Engine.ipynb", "provenance": [], "collapsed_sections": [ "CMPb_JS_Wd-I" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "8HaJL-F492k4", "colab_type": "text" }, "source": [ " ![Drugs4Covid](https://drugs4covid.oeg-upm.net/static/media/logo.min.c1d79895.svg) **Drugs4Covid Search Engine**\n", " " ] }, { "cell_type": "markdown", "metadata": { "id": "CMPb_JS_Wd-I", "colab_type": "text" }, "source": [ "## Preparation" ] }, { "cell_type": "code", "metadata": { "id": "VQ6UnudHWhqa", "colab_type": "code", "colab": {} }, "source": [ "!pip install pysolr\n", "\n", "!wget -N \"https://raw.githubusercontent.com/oeg-upm/drugs4covid19-nlp/master/notebooks/auxfunctions.py\"" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "JObWYVZ-Wj-b", "colab_type": "text" }, "source": [ "## Search" ] }, { "cell_type": "code", "metadata": { "id": "us-Ml12M7RKO", "colab_type": "code", "cellView": "form", "outputId": "67d00f5a-0386-4ffb-a14d-625205fc51f7", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 } }, "source": [ "import auxfunctions\n", "import requests\n", "import pysolr\n", "import pandas as pd\n", "from IPython.display import display, HTML\n", " \n", "keywords = \"Hydroxychloroquine\"#@param {type:\"string\"}\n", "\n", "number = 5 #@param {type:\"slider\", min:1, max:20, step:1}\n", "\n", "# Solr collections\n", "solr_drugs = pysolr.Solr('http://librairy.linkeddata.es/data/atc')\n", "solr_diseases = pysolr.Solr('http://librairy.linkeddata.es/data/diseases')\n", "solr_articles = pysolr.Solr('http://librairy.linkeddata.es/data/covid')\n", "solr_paragraphs = pysolr.Solr('http://librairy.linkeddata.es/data/covid-paragraphs')\n", "\n", "# keyword as drug\n", "drugs = solr_drugs.search(q=\"label_t:\\\"\"+keywords+\"\\\" or code_s:\"+keywords + \" or id:\" + keywords)\n", "filter_by_drugs = [\"bionlp_drugs_C\"+str(drug['level_i'])+\":\"+drug['id'] for drug in drugs]\n", "\n", "# keyword as disease\n", "diseases = solr_diseases.search(q=\"name_t:\\\"\"+keywords+\"\\\" or id:\"+keywords + \" or synonyms:\\\"\"+keywords+\"\\\" or mappings=\\\"\" + keywords + \"\\\"\")\n", "filter_by_diseases = [ \"bionlp_diseases_C\"+str(disease['level_i'])+\":\"+disease['id'] for disease in diseases]\n", " \n", "# Solr query\n", "solr_query = \"text_t:\\\"\"+keywords+\"\\\"\"\n", "for drug_filter in filter_by_drugs:\n", " solr_query += \" or \" + drug_filter\n", "for disease_filter in filter_by_diseases:\n", " solr_query += \" or \" + disease_filter \n", "\n", "# Get paragraphs\n", "paragraphs = solr_paragraphs.search(q=solr_query,rows=number)\n", "\n", "related_drugs_code = {}\n", "related_diseases_code = {}\n", "related_drugs_name = {}\n", "related_diseases_name = {}\n", "\n", "articles_df = pd.DataFrame(columns=['Article', 'Paragraph'])\n", "j=0\n", "for paragraph in paragraphs:\n", " title = \"unknown\"\n", " url = \"\"\n", " purl=\"https://librairy.linkeddata.es/solr/covid-paragraphs/select?q=id:\"+paragraph['id']\n", " for r in solr_articles.search(\"id:\"+paragraph['article_id_s']):\n", " if ('name_s' in r and 'url_s' in r):\n", " title = r['name_s']\n", " url = r['url_s']\n", " articles_df.loc[j] = [auxfunctions.make_clickable(title,url),auxfunctions.make_clickable(paragraph['text_t'][:300]+\"...\",purl)]\n", " for i in range(0,20):\n", " disease_key = 'bionlp_diseases_C'+str(i)\n", " if (disease_key in paragraph):\n", " if (not disease_key in related_diseases_code):\n", " related_diseases_code[disease_key] = []\n", " related_diseases_name[disease_key] = []\n", " related_diseases_code[disease_key].extend(paragraph['bionlp_diseases_C'+str(i)])\n", " related_diseases_name[disease_key].extend(paragraph['bionlp_diseases_N'+str(i)])\n", " drug_key = 'bionlp_drugs_C'+str(i)\n", " if (drug_key in paragraph):\n", " if (not drug_key in related_drugs_code):\n", " related_drugs_code[drug_key] = []\n", " related_drugs_name[drug_key] = []\n", " related_drugs_code[drug_key].extend(paragraph['bionlp_drugs_C'+str(i)]) \n", " related_drugs_name[drug_key].extend(paragraph['bionlp_drugs_N'+str(i)]) \n", " j+=1\n", " \n", "\n", "# Show table of articles\n", "display(HTML(articles_df.to_html(justify='left',escape=False)))\n", " \n", "# Show plot with diseases \n", "auxfunctions.build_donut_plot(related_diseases_code,\"diseases_mesh\").show()\n", "auxfunctions.build_donut_plot(related_diseases_name,\"diseases_name\").show()\n", "\n", "# Show plot with drugs \n", "auxfunctions.build_donut_plot(related_drugs_code,\"drugs_atc\").show()\n", "auxfunctions.build_donut_plot(related_drugs_name,\"drugs_name\").show()\n" ], "execution_count": 4, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "