{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Load the RDF data dump" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "27864 triples in Biii data graph\n", "CPU times: user 1.45 s, sys: 40.5 ms, total: 1.49 s\n", "Wall time: 1.81 s\n" ] } ], "source": [ "%%time\n", "from rdflib import ConjunctiveGraph\n", "\n", "ns = {\"nb\": \"http://bise-eu.info/core-ontology#\",\n", " \"dc\": \"http://dcterms/\",\n", " \"p-plan\": \"http://purl.org/net/p-plan#\",\n", " \"edam\": \"http://purl.obolibrary.org/obo/edam#\"}\n", "\n", "g = ConjunctiveGraph()\n", "\n", "### latest version of the RDF dataset dump\n", "g.parse(\"https://raw.githubusercontent.com/bio-tools/content/master/datasets/bise-ontology-biii-dump.ttl\", format=\"turtle\")\n", "g.parse(\"bise-linked-data-webapp/static/data/EDAM-bioimaging_alpha03.owl\")\n", "print(str(len(g)) + ' triples in Biii data graph')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Critical Entries +++" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No author and no download link: 115\n", "Missing authors or download link: 533\n", "No EDAM annotations: 319\n", "No License (free text): 1055\n", "CPU times: user 1.04 s, sys: 9.52 ms, total: 1.05 s\n", "Wall time: 1.05 s\n" ] } ], "source": [ "%%time\n", "# Entries without authors and without download link\n", "q = \"\"\"\n", "SELECT (count(?s) as ?nb_soft) WHERE {\n", " ?s rdf:type .\n", " FILTER ( NOT EXISTS {?s nb:hasAuthor ?author} \n", " && NOT EXISTS {?s nb:hasLocation ?location} ).\n", "}\n", "\"\"\"\n", "no_authors_no_link = \"\"\n", "results = g.query(q, initNs=ns)\n", "for r in results:\n", " print('No author and no download link: ' + str(r[\"nb_soft\"]))\n", " no_authors_no_link = r[\"nb_soft\"]\n", "\n", " \n", "# Entries with missing authors or download link\n", "q = \"\"\"\n", "SELECT (count(?s) as ?nb_soft) WHERE {\n", " ?s rdf:type .\n", " FILTER ( NOT EXISTS {?s nb:hasAuthor ?author} \n", " || NOT EXISTS {?s nb:hasLocation ?location} ).\n", "}\n", "\"\"\"\n", "no_authors_no_link = \"\"\n", "results = g.query(q, initNs=ns)\n", "for r in results:\n", " print('Missing authors or download link: ' + str(r[\"nb_soft\"]))\n", " no_authors_no_link = r[\"nb_soft\"]\n", " \n", "\n", "# Entries without any EDAM operation\n", "q = \"\"\"\n", "SELECT (count(?s) as ?nb_soft) WHERE {\n", " ?s rdf:type .\n", " FILTER ( NOT EXISTS {?s nb:hasFunction ?function} \n", " && NOT EXISTS {?s nb:hasTopic ?topic} ).\n", "}\n", "\"\"\"\n", "no_EDAM = \"\"\n", "results = g.query(q, initNs=ns)\n", "for r in results:\n", " print('No EDAM annotations: ' + str(r[\"nb_soft\"]))\n", " no_EDAM = r[\"nb_soft\"]\n", "\n", " \n", "# Entries without license\n", "q = \"\"\"\n", "SELECT (count(?s) as ?nb_soft) WHERE {\n", " ?s rdf:type .\n", " FILTER ( NOT EXISTS {?s nb:hasLicense ?license} ).\n", "}\n", "\"\"\"\n", "no_License = \"\"\n", "results = g.query(q, initNs=ns)\n", "for r in results:\n", " print('No License (free text): ' + str(r[\"nb_soft\"]))\n", " no_License = r[\"nb_soft\"]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Bioimage informatics : 505\n", "Fluorescence microscopy : 88\n", "Machine learning : 67\n", "Imaging : 55\n", "Digital histology : 40\n", "Microscopy : 39\n", "Data sharing : 35\n", "Statistics : 31\n", "Electron microscopy : 28\n", "Light-sheet microscopy : 21\n", "High content screening : 19\n", "Light microscopy : 19\n", "Confocal microscopy : 17\n", "Super-resolution microscopy : 16\n", "Confocal fluorescence microscopy : 15\n", "Single molecule localization microscopy : 15\n", "Live sample imaging : 13\n", "Bright field microscopy : 13\n", "Magnetic resonance imaging : 10\n", "Widefield microscopy : 9\n", "Phase contrast microscopy : 9\n", "Tomography : 6\n", "Correlative Light and Electron Microscopy : 6\n", "Supervised learning : 6\n", "Structured illumination microscopy : 6\n", "Serial block-face scanning electron microscopy : 4\n", "In-silico reconstruction : 4\n", "Topic : 3\n", "Fluorescence correlative spectroscopy : 3\n", "Focused ion beam scanning electron microscopy : 3\n", "Computerized tomography : 3\n", "Multi-photon microscopy : 3\n", "Scanning electron microscopy : 3\n", "Electron tomography : 2\n", "In-vivo imaging : 2\n", "Fluctuation based microscopy : 2\n", "Spectroscopy : 2\n", "Image correlation spectroscopy : 2\n", "In-vitro imaging : 2\n", "Fluorescence lifetime imaging microscopy : 2\n", "Point-scanning microscopy : 1\n", "Transmission light microscopy : 1\n", "Cryo electron tomography : 1\n", "CPU times: user 107 ms, sys: 5.47 ms, total: 112 ms\n", "Wall time: 114 ms\n" ] } ], "source": [ "%%time\n", "q = \"\"\"\n", "SELECT ?label (count(distinct ?s1) as ?soft_count) WHERE { \n", " ?s1 a .\n", " ?s1 ?edam_class .\n", " ?edam_class rdfs:label ?label\n", "}\n", "GROUP BY ?edam_class ?label\n", "ORDER BY DESC(?soft_count)\n", "\"\"\"\n", "\n", "results = g.query(q, initNs=ns)\n", "\n", "for r in results:\n", " print (r[\"label\"] + \" : \" + r[\"soft_count\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Count the number of tools without any reference publication" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2 µs, sys: 0 ns, total: 2 µs\n", "Wall time: 6.2 µs\n", "973\n" ] } ], "source": [ "%time\n", "no_doi_query = \"\"\"\n", "SELECT ?s (count(?title) as ?nb_soft) WHERE {\n", " ?s rdf:type .\n", " ?s dc:title ?title .\n", " FILTER NOT EXISTS {?s nb:hasReferencePublication ?publication} .\n", "}\n", "\"\"\"\n", "results = g.query(no_doi_query, initNs=ns)\n", "for r in results:\n", " print(r[\"nb_soft\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Count the number of tools with a reference publication" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "757\n", "CPU times: user 58.9 ms, sys: 2.33 ms, total: 61.3 ms\n", "Wall time: 60.7 ms\n" ] } ], "source": [ "%%time\n", "doi_query = \"\"\"\n", "SELECT ?s (count(?title) as ?nb_soft) WHERE {\n", " ?s rdf:type .\n", " ?s dc:title ?title .\n", " ?s nb:hasReferencePublication ?publication .\n", "}\n", "\"\"\"\n", "results = g.query(doi_query, initNs=ns)\n", "for r in results:\n", " print(r[\"nb_soft\"])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Bioimage informatics : 505\n", "Fluorescence microscopy : 88\n", "Machine learning : 67\n", "Imaging : 55\n", "Digital histology : 40\n", "Microscopy : 39\n", "Data sharing : 35\n", "Statistics : 31\n", "Electron microscopy : 28\n", "Light-sheet microscopy : 21\n", "High content screening : 19\n", "Light microscopy : 19\n", "Confocal microscopy : 17\n", "Super-resolution microscopy : 16\n", "Confocal fluorescence microscopy : 15\n", "Single molecule localization microscopy : 15\n", "Live sample imaging : 13\n", "Bright field microscopy : 13\n", "Magnetic resonance imaging : 10\n", "Widefield microscopy : 9\n", "Phase contrast microscopy : 9\n", "Tomography : 6\n", "Correlative Light and Electron Microscopy : 6\n", "Supervised learning : 6\n", "Structured illumination microscopy : 6\n", "Serial block-face scanning electron microscopy : 4\n", "In-silico reconstruction : 4\n", "Topic : 3\n", "Fluorescence correlative spectroscopy : 3\n", "Focused ion beam scanning electron microscopy : 3\n", "Computerized tomography : 3\n", "Multi-photon microscopy : 3\n", "Scanning electron microscopy : 3\n", "Electron tomography : 2\n", "In-vivo imaging : 2\n", "Fluctuation based microscopy : 2\n", "Spectroscopy : 2\n", "Image correlation spectroscopy : 2\n", "In-vitro imaging : 2\n", "Fluorescence lifetime imaging microscopy : 2\n", "Point-scanning microscopy : 1\n", "Transmission light microscopy : 1\n", "Cryo electron tomography : 1\n", "CPU times: user 102 ms, sys: 3.85 ms, total: 106 ms\n", "Wall time: 107 ms\n" ] } ], "source": [ "%%time\n", "how_many_tools_per_topic = \"\"\"\n", "SELECT ?label (count(distinct ?s1) as ?soft_count) WHERE { \n", " ?s1 a .\n", " ?s1 ?edam_class .\n", " \n", " ?edam_class rdfs:label ?label .\n", "}\n", "GROUP BY ?edam_class ?label\n", "ORDER BY DESC(?soft_count)\n", "\"\"\"\n", "\n", "results = g.query(how_many_tools_per_topic, initNs=ns)\n", "for r in results:\n", " print(r[\"label\"] + \" : \" + r[\"soft_count\"])" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Image segmentation : 116\n", "Data handling : 82\n", "Image processing : 73\n", "Visualisation : 70\n", "Image analysis : 69\n", "Image visualisation : 60\n", "Cell segmentation : 56\n", "Object detection : 53\n", "Image registration : 52\n", "Object tracking : 51\n", "Analysis : 44\n", "Filament tracing : 42\n", "Image annotation : 41\n", "Spot detection : 40\n", "Object feature extraction : 40\n", "Image generation : 40\n", "Morphological operation : 37\n", "Neuron image analysis : 32\n", "Image thresholding : 30\n", "Cell tracking : 25\n", "Image deconvolution : 24\n", "Isolated object detection : 24\n", "Colocalisation analysis : 23\n", "Isolated object classification : 23\n", "Image enhancement : 23\n", "Object counting : 22\n", "Interactive segmentation : 22\n", "Shape features : 20\n", "Annotation : 20\n", "Image denoising : 20\n", "Watershed : 19\n", "Rigid registration : 19\n", "Alignment construction : 19\n", "Particle tracking : 18\n", "Pixel classification : 18\n", "Image correction : 18\n", "Image stitching : 18\n", "Active contours : 17\n", "Volume rendering : 17\n", "Image classification : 17\n", "Editing : 17\n", "Smoothing : 17\n", "Image reconstruction : 16\n", "Frequency analysis : 16\n", "Affine registration : 16\n", "Geometrical transform : 16\n", "Conversion : 14\n", "Deformable registration : 14\n", "Synthetic image generation : 14\n", "Skeletonisation : 13\n", "Trajectory analysis : 12\n", "Clustering : 12\n", "Plotting : 12\n", "Object-based colocalisation : 11\n", "Affine : 11\n", "Classification : 11\n", "Image convolution : 11\n", "Illumination correction : 10\n", "Image projection : 10\n", "Pattern recognition : 10\n", "Single molecule localisation : 9\n", "Image crop : 8\n", "Feature enhancement : 8\n", "Landmark detection : 8\n", "Chromatic aberration correction : 7\n", "Pixel-based colocalisation : 7\n", "Distance transform : 7\n", "Drift correction : 7\n", "Rotation : 7\n", "Object registration : 7\n", "Texture extraction : 6\n", "Warping : 6\n", "Closing : 6\n", "Selective plane illumination microscopy reconstruction : 5\n", "Optical flow analysis : 5\n", "Prediction and recognition : 4\n", "Geometric distortion correction : 4\n", "Adaptative thresholding : 4\n", "Overlay : 4\n", "Montage : 4\n", "Isolated object tracking : 4\n", "Dilation : 3\n", "Validation : 3\n", "Operation : 2\n", "Erosion : 2\n", "Local thresholding : 2\n", "Spectral unmixing : 2\n", "Image validation : 2\n", "Optimisation or refinement : 2\n", "Collective object tracking : 2\n", "Wound-healing analysis : 2\n", "Opening : 1\n", "Generation : 1\n", "Homogeneity : 1\n", "Tomography reconstruction : 1\n", "Structured illumination reconstruction : 1\n", "CPU times: user 140 ms, sys: 4.45 ms, total: 144 ms\n", "Wall time: 144 ms\n" ] } ], "source": [ "%%time\n", "how_many_tools_per_operation = \"\"\"\n", "SELECT ?label (count(distinct ?s1) as ?soft_count) WHERE { \n", " ?s1 a .\n", " ?s1 ?edam_class .\n", " \n", " ?edam_class rdfs:label ?label .\n", "}\n", "GROUP BY ?edam_class ?label\n", "ORDER BY DESC(?soft_count)\n", "\"\"\"\n", "\n", "results = g.query(how_many_tools_per_operation, initNs=ns)\n", "for r in results:\n", " print(r[\"label\"] + \" : \" + r[\"soft_count\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Count the number of tool for 2018" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "169 registered softwares in 2018\n", "\n", "CPU times: user 211 ms, sys: 2.95 ms, total: 214 ms\n", "Wall time: 213 ms\n" ] } ], "source": [ "%%time\n", "date_query = \"\"\"\n", "SELECT (count(?soft) as ?nb_soft) WHERE {\n", " ?soft a .\n", " ?soft dc:created ?date .\n", " FILTER ((?date > \"2018-01-01T00:00:00\"^^xsd:dateTime) \n", " && (?date < \"2019-01-01T00:00:00\"^^xsd:dateTime))\n", "}\n", "\"\"\"\n", "results = g.query(date_query, initNs=ns)\n", "for r in results:\n", " print('{} registered softwares in 2018'.format(r['nb_soft']))\n", "# print(r[\"label\"] + \" : \" + r[\"soft_count\"])\n", "print()\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "\n", "plt.figure(figsize=(16, 10))\n", "#plt.figure(figsize=(9, 6))\n", "\n", "soft_dates = []\n", "date_query = \"\"\"\n", "SELECT ?soft ?date WHERE {\n", " ?soft a .\n", " ?soft dc:created ?date .\n", "}\n", "\"\"\"\n", "results = g.query(date_query, initNs=ns)\n", "for r in results:\n", " soft_dates.append({'soft':str(r['soft']), 'created':str(r['date'])})\n", "\n", "df = pd.DataFrame(soft_dates)\n", "df[\"created\"] = df[\"created\"].astype(\"datetime64\")\n", "\n", "#df2 = df.groupby([df[\"created\"].dt.year, df[\"created\"].dt.month]).count()\n", "df2 = df.groupby([df[\"created\"].dt.year]).count()\n", "\n", "df2['soft'].cumsum().plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
soft
created
2013697
2014150
201513
201653
201799
2018169
2019108
202047
202118
20227
20231
\n", "
" ], "text/plain": [ " soft\n", "created \n", "2013 697\n", "2014 150\n", "2015 13\n", "2016 53\n", "2017 99\n", "2018 169\n", "2019 108\n", "2020 47\n", "2021 18\n", "2022 7\n", "2023 1" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import seaborn as sns\n", "\n", "per_year_df = df.groupby([df[\"created\"].dt.year]).count()[['soft']]\n", "per_year_df.to_csv('counts_per_year.csv')\n", "per_year_df\n", "\n", "#sns.countplot(per_year_df['soft'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Ideas of plots : \n", " - cumulative stacked hostogram with : \"no EDAM annotation\" / \"some EDAM annotatino\" to show that we improvve the quality of tool descriptions\n", " - same thing with 4-5 categories for the main Topics\n", " - same thing Tools VS Traing material\n", " - Open Source VS Comercial softwares \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TopicCount
0Bioimage informatics505
1Imaging291
2Machine learning70
3Digital histology40
4Data sharing35
5Statistics31
6High content screening19
7In-silico reconstruction4
\n", "
" ], "text/plain": [ " Topic Count\n", "0 Bioimage informatics 505\n", "1 Imaging 291\n", "2 Machine learning 70\n", "3 Digital histology 40\n", "4 Data sharing 35\n", "5 Statistics 31\n", "6 High content screening 19\n", "7 In-silico reconstruction 4" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#%%time\n", "#get the main Topics\n", "how_many_tools_per_topic = \"\"\"\n", "SELECT ?l (count(distinct ?s1) as ?soft_count) WHERE { \n", "#SELECT ?subclass WHERE { \n", " ?s1 a .\n", " ?c rdfs:subClassOf .\n", " ?c rdfs:label ?l .\n", " ?subclass rdfs:subClassOf* ?c .\n", " ?s1 ?subclass .\n", "}\n", "GROUP BY ?c ?l\n", "ORDER BY DESC(?soft_count)\n", "\"\"\"\n", "\n", "list_of_topics = []\n", "results = g.query(how_many_tools_per_topic, initNs=ns)\n", "for r in results:\n", " list_of_topics.append({'Topic':str(r['l']), 'Count':str(r['soft_count'])}) \n", "#print(list_of_topics)\n", "\n", "df = pd.DataFrame(list_of_topics)\n", "df" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OperationCount
0Analysis543
1Image processing355
2Generation130
3Visualisation128
4Data handling97
5Alignment construction74
6Classification62
7Annotation54
8Conversion14
9Prediction and recognition13
10Clustering12
11Validation5
12Optimisation or refinement2
\n", "
" ], "text/plain": [ " Operation Count\n", "0 Analysis 543\n", "1 Image processing 355\n", "2 Generation 130\n", "3 Visualisation 128\n", "4 Data handling 97\n", "5 Alignment construction 74\n", "6 Classification 62\n", "7 Annotation 54\n", "8 Conversion 14\n", "9 Prediction and recognition 13\n", "10 Clustering 12\n", "11 Validation 5\n", "12 Optimisation or refinement 2" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#%%time\n", "#get the main Operations\n", "how_many_tools_per_op = \"\"\"\n", "SELECT ?l (count(distinct ?s1) as ?soft_count) WHERE { \n", "#SELECT ?subclass ?l WHERE { \n", " ?s1 a .\n", " ?c rdfs:subClassOf .\n", " ?c rdfs:label ?l .\n", " ?subclass rdfs:subClassOf* ?c .\n", " ?s1 ?subclass .\n", "}\n", "GROUP BY ?c ?l\n", "ORDER BY DESC(?soft_count)\n", "\"\"\"\n", "\n", "list_of_op = []\n", "results = g.query(how_many_tools_per_op, initNs=ns)\n", "for r in results:\n", " list_of_op.append({'Operation':str(r['l']), 'Count':str(r['soft_count'])})\n", "\n", "df = pd.DataFrame(list_of_op)\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 42, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#%%time\n", "list_tools_op_topic = \"\"\"\n", "SELECT ?s1 ?ol ?tl WHERE { \n", " ?s1 a .\n", " ?oc rdfs:subClassOf .\n", " ?oc rdfs:label ?ol .\n", " ?osc rdfs:subClassOf* ?oc .\n", " ?s1 ?osc .\n", " \n", " ?tc rdfs:subClassOf .\n", " ?tc rdfs:label ?tl .\n", " ?tsc rdfs:subClassOf* ?tc .\n", " ?s1 ?tsc .\n", "}\n", "\"\"\"\n", "tools = []\n", "results = g.query(list_tools_op_topic, initNs=ns)\n", "for r in results:\n", " t = {'tool':str(r['s1']), 'operation': str(r['ol']),'topic': str(r['tl'])}\n", " #print(t)\n", " tools.append(t)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0, 0, 'Annotation'),\n", " Text(1, 0, 'Visualisation'),\n", " Text(2, 0, 'Data handling'),\n", " Text(3, 0, 'Prediction and recognition'),\n", " Text(4, 0, 'Alignment construction'),\n", " Text(5, 0, 'Analysis'),\n", " Text(6, 0, 'Classification'),\n", " Text(7, 0, 'Generation'),\n", " Text(8, 0, 'Conversion'),\n", " Text(9, 0, 'Image processing'),\n", " Text(10, 0, 'Clustering'),\n", " Text(11, 0, 'Optimisation or refinement'),\n", " Text(12, 0, 'Validation')]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(16, 10))\n", "\n", "df = pd.DataFrame(tools)\n", "chart = sns.countplot(x=\"operation\", data=df)\n", "chart.set_xticklabels(chart.get_xticklabels(), rotation=45)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0, 0, 'Machine learning'),\n", " Text(1, 0, 'Bioimage informatics'),\n", " Text(2, 0, 'Imaging'),\n", " Text(3, 0, 'Data sharing'),\n", " Text(4, 0, 'Digital histology'),\n", " Text(5, 0, 'High content screening'),\n", " Text(6, 0, 'In-silico reconstruction'),\n", " Text(7, 0, 'Statistics')]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(16, 10))\n", "\n", "df = pd.DataFrame(tools)\n", "chart = sns.countplot(x=\"topic\", data=df)\n", "chart.set_xticklabels(chart.get_xticklabels(), rotation=45)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Bioimage informatics'),\n", " Text(1.5, 0, 'Data sharing'),\n", " Text(2.5, 0, 'Digital histology'),\n", " Text(3.5, 0, 'High content screening'),\n", " Text(4.5, 0, 'Imaging'),\n", " Text(5.5, 0, 'In-silico reconstruction'),\n", " Text(6.5, 0, 'Machine learning'),\n", " Text(7.5, 0, 'Statistics')]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(16, 10))\n", "\n", "df_heatmap = pd.DataFrame(tools) \n", "df_heatmap\n", "df_heatmap_dum = pd.get_dummies(df_heatmap, columns=['operation', 'topic'])\n", "df_heatmap_dum\n", "\n", "df_crosstab = pd.crosstab(df_heatmap['operation'], df_heatmap['topic'])\n", "#df_heatmap_dum.corr()\n", "#sns.heatmap(df_heatmap_dum.corr(), linewidths=.5)\n", "#corr = df_heatmap['operation'].corr(df_heatmap['topic'])\n", "#corr\n", "#sns.heatmap(df_heatmap.corr(), linewidths=.5)\n", "chart = sns.heatmap(df_crosstab, linewidths=5)\n", "chart.set_xticklabels(chart.get_xticklabels(), rotation=45)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 2 }