{ "cells": [ { "cell_type": "code", "execution_count": 422, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: SPARQLWrapper in c:\\users\\aleja\\anaconda3\\lib\\site-packages (1.8.5)\n", "Requirement already satisfied: rdflib>=4.0 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from SPARQLWrapper) (5.0.0)\n", "Requirement already satisfied: six in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from rdflib>=4.0->SPARQLWrapper) (1.14.0)\n", "Requirement already satisfied: isodate in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from rdflib>=4.0->SPARQLWrapper) (0.6.0)\n", "Requirement already satisfied: pyparsing in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from rdflib>=4.0->SPARQLWrapper) (2.4.0)\n", "Requirement already satisfied: wikipedia in c:\\users\\aleja\\anaconda3\\lib\\site-packages (1.4.0)\n", "Requirement already satisfied: requests<3.0.0,>=2.0.0 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from wikipedia) (2.23.0)\n", "Requirement already satisfied: beautifulsoup4 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from wikipedia) (4.7.1)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from requests<3.0.0,>=2.0.0->wikipedia) (3.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from requests<3.0.0,>=2.0.0->wikipedia) (2019.11.28)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from requests<3.0.0,>=2.0.0->wikipedia) (1.25.8)\n", "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from requests<3.0.0,>=2.0.0->wikipedia) (2.9)\n", "Requirement already satisfied: soupsieve>=1.2 in c:\\users\\aleja\\anaconda3\\lib\\site-packages (from beautifulsoup4->wikipedia) (1.8)\n" ] } ], "source": [ "# En primer lugar se instalan e importan las librerías y dependencias que serán utilizadas en el proceso.\n", "!pip install SPARQLWrapper \n", "!pip install wikipedia \n", "import pandas as pd\n", "from pandas.io.json import json_normalize\n", "from SPARQLWrapper import SPARQLWrapper, JSON, XML\n", "import wikipedia\n", "import json\n", "import xml.etree.ElementTree as ET\n", "import time" ] }, { "cell_type": "code", "execution_count": 423, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "¿Cual es el identificador de la clase del cual se quiere generar un diagrama?\n", "Q1004\n", "¿Cual es el idioma en el que quieres generar el diagrama (ES:español, EN:ingles, ...)?\n", "ES\n" ] } ], "source": [ "# A continuación se pregunta al usuario por el identificador de la clase del cual se quiere generar un diagrama\n", "print(\"¿Cual es el identificador de la clase del cual se quiere generar un diagrama?\")\n", "clase = input()\n", "print(\"¿Cual es el idioma en el que quieres generar el diagrama (ES:español, EN:ingles, ...)?\")\n", "idioma = input()" ] }, { "cell_type": "code", "execution_count": 424, "metadata": {}, "outputs": [], "source": [ "#Se define la función para consultar Wikidata. Se consulta el endpoint señalado y \n", "#devuelve los resultados como un DataFrame de Pandas.\n", "\n", "def query_wikidata(sparql_query, sparql_service_url): \n", " \n", " # crear la conexión con el endpoint\n", " sparql = SPARQLWrapper(sparql_service_url) \n", " sparql.setQuery(sparql_query)\n", " sparql.setReturnFormat(JSON)\n", "\n", " # solicitar resultados\n", " result = sparql.query().convert()\n", " return json_normalize(result[\"results\"][\"bindings\"])\n", "\n", "#Se genera la consulta para obtener el numero de instancias de la clase a representar\n", "sparql_query1 = \"\"\"SELECT DISTINCT ?clase ?claseLabel\n", "WHERE {\n", "?clase wdt:P31 wd:\"\"\"+clase+\"\"\".\n", "SERVICE wikibase:label { bd:serviceParam wikibase:language \"es\". }\n", "}\n", "\"\"\"\n", "#para consultar otro endpoint, cambiar la URL del servicio y la consulta.\n", "sparql_service_url = \"https://query.wikidata.org/sparql\"\n", "instancias_wikidata = query_wikidata(sparql_query1, sparql_service_url)" ] }, { "cell_type": "code", "execution_count": 425, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
clase.typeclase.valueclaseLabel.typeclaseLabel.valueclaseLabel.xml:lang
0urihttp://www.wikidata.org/entity/Q91879literalQ91879NaN
1urihttp://www.wikidata.org/entity/Q91892literalQ91892NaN
2urihttp://www.wikidata.org/entity/Q91912literalQ91912NaN
3urihttp://www.wikidata.org/entity/Q91943literalQ91943NaN
4urihttp://www.wikidata.org/entity/Q92191literalQ92191NaN
\n", "
" ], "text/plain": [ " clase.type clase.value claseLabel.type \\\n", "0 uri http://www.wikidata.org/entity/Q91879 literal \n", "1 uri http://www.wikidata.org/entity/Q91892 literal \n", "2 uri http://www.wikidata.org/entity/Q91912 literal \n", "3 uri http://www.wikidata.org/entity/Q91943 literal \n", "4 uri http://www.wikidata.org/entity/Q92191 literal \n", "\n", " claseLabel.value claseLabel.xml:lang \n", "0 Q91879 NaN \n", "1 Q91892 NaN \n", "2 Q91912 NaN \n", "3 Q91943 NaN \n", "4 Q92191 NaN " ] }, "execution_count": 425, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Se confirma que la consulta haya salido bien\n", "instancias_wikidata.head()" ] }, { "cell_type": "code", "execution_count": 426, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4592" ] }, "execution_count": 426, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Se obtiene el numero de instancias de la clase de entrada\n", "numero_de_instancias = instancias_wikidata.shape[0]\n", "numero_de_instancias" ] }, { "cell_type": "code", "execution_count": 427, "metadata": {}, "outputs": [], "source": [ "#Se genera la consulta para obtener las propiedades de la clase de entrada\n", "sparql_query2 = \"\"\"SELECT DISTINCT ?p ?pLabel\n", "WHERE {\n", "?clase wdt:P31 wd:\"\"\"+clase+\"\"\";\n", "?p ?x.\n", "#to obtain property labels:\n", "?property wikibase:directClaim ?p\n", "#to obtain labels in spanish:\n", "SERVICE wikibase:label { bd:serviceParam wikibase:language \"es\". }\n", "} \n", " \"\"\"\n", "#para consultar otro endpoint, cambiar la URL del servicio y la consulta.\n", "sparql_service_url = \"https://query.wikidata.org/sparql\"\n", "propiedades_wikidata = query_wikidata(sparql_query2, sparql_service_url)" ] }, { "cell_type": "code", "execution_count": 428, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
p.typep.valuepLabel.typepLabel.value
0urihttp://www.wikidata.org/prop/direct/P279literalhttp://www.wikidata.org/prop/direct/P279
1urihttp://www.wikidata.org/prop/direct/P407literalhttp://www.wikidata.org/prop/direct/P407
2urihttp://www.wikidata.org/prop/direct/P495literalhttp://www.wikidata.org/prop/direct/P495
3urihttp://www.wikidata.org/prop/direct/P571literalhttp://www.wikidata.org/prop/direct/P571
4urihttp://www.wikidata.org/prop/direct/P580literalhttp://www.wikidata.org/prop/direct/P580
\n", "
" ], "text/plain": [ " p.type p.value pLabel.type \\\n", "0 uri http://www.wikidata.org/prop/direct/P279 literal \n", "1 uri http://www.wikidata.org/prop/direct/P407 literal \n", "2 uri http://www.wikidata.org/prop/direct/P495 literal \n", "3 uri http://www.wikidata.org/prop/direct/P571 literal \n", "4 uri http://www.wikidata.org/prop/direct/P580 literal \n", "\n", " pLabel.value \n", "0 http://www.wikidata.org/prop/direct/P279 \n", "1 http://www.wikidata.org/prop/direct/P407 \n", "2 http://www.wikidata.org/prop/direct/P495 \n", "3 http://www.wikidata.org/prop/direct/P571 \n", "4 http://www.wikidata.org/prop/direct/P580 " ] }, "execution_count": 428, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Se confirma que la consulta haya salido bien\n", "propiedades_wikidata.head()" ] }, { "cell_type": "code", "execution_count": 429, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "227\n" ] } ], "source": [ "#Se genera la consulta para obtener las propiedades de la clase.\n", "sparql_query2 = \"\"\"SELECT DISTINCT ?p ?pLabel\n", "WHERE {\n", "?clase wdt:P31 wd:\"\"\"+clase+\"\"\";\n", "?p ?x.\n", "#to obtain property labels:\n", "?property wikibase:directClaim ?p\n", "#to obtain labels in spanish:\n", "SERVICE wikibase:label { bd:serviceParam wikibase:language \"es\". }\n", "} \n", " \"\"\"\n", "#para consultar otro endpoint, cambiar la URL del servicio y la consulta.\n", "sparql_service_url = \"https://query.wikidata.org/sparql\"\n", "propiedades_wikidata = query_wikidata(sparql_query2, sparql_service_url)\n", "\n", "#Se listan las propiedades de salida de la clase y se le asignan a la variable \"lista_propiedades\"\n", "lista_completa = propiedades_wikidata[\"p.value\"].tolist()\n", "lista_propiedades = []\n", "for ele in lista_completa:\n", " ele = ele[36:]\n", " lista_propiedades.append(ele)\n", "print(len(lista_propiedades))" ] }, { "cell_type": "code", "execution_count": 434, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['P495', ['Q6256', 'Q7275', 'Q3024240'], 'P674', ['Q95074'], 'P6338', ['Q5'], 'P50', ['Q5', 'Q43229'], 'P110', ['Q5'], 'P123', ['Q11032', 'Q2085381'], 'P136', ['Q483394'], 'P840', ['Q82794', 'Q3895768'], 'P910', ['Q4167836'], 'P941', ['Q5'], 'P17', ['Q6256', 'Q43702'], 'P170', ['Q5'], 'P1434', ['Q559618'], 'P1080', ['Q559618'], 'P2354', ['Q13406463'], 'P166', ['Q618779'], 'P5008', ['Q16695773'], 'P8345', ['Q196600'], 'P747', ['Q3331189'], 'P161', ['Q5'], 'P58', ['Q5'], 'P449', ['Q15265344'], 'P462', ['Q1075'], 'P57', ['Q5'], 'P106', ['Q28640', 'Q17305127'], 'P767', ['Q5'], 'P676', ['Q5'], 'P736', ['Q5'], 'P86', ['Q5'], 'P1411', ['Q618779'], 'P98', ['Q5'], 'P5202', ['Q5'], 'P655', ['Q5'], 'P793', ['Q1190554'], 'P2679', ['Q5'], 'P135', ['Q2198855'], 'P175', ['Q215380'], 'P7936', ['Q815823'], 'P1877', ['Q5'], 'P275', ['Q79719'], 'P5769', ['Q5'], 'P112', ['Q5'], 'P437', ['Q81941037'], 'P488', ['Q5'], 'P641', ['Q31629']]\n" ] } ], "source": [ "lista_resultado_propiedades_clases = []\n", "#Para cada propiedad de la lista:\n", "for prop in lista_propiedades:\n", " \n", " #Se obtienen de las propiedades anteriores aquellas que son object properties. Se almacenan en una lista.\n", " #Aquí se genera la consulta. Esta se hace para ver si la propiedad con la que se esta tratando es object property o no.\n", " sparql= SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql.setQuery(\"\"\"ASK WHERE {\n", " wdt:\"\"\"+prop+\"\"\" rdf:type owl:ObjectProperty\n", " } \n", " \"\"\")\n", " sparql.setReturnFormat(XML)\n", " results = sparql.query().convert()\n", " string = results.toxml()\n", " \n", " #Se obtiene el valor booleano del xml string, siendo este \"true\" en caso afirmativo\n", " es_OP = string[105:109]\n", " \n", " #En caso de que la variable es_OP sea \"true\", es decir la propiedad sea object propery:\n", " if es_OP == \"true\":\n", " \n", " #Se genera la consulta. Se realiza esta consulta para obtener las clases que pertenecen a dicha propiedad.\n", " #Se realiza esta consulta para todas las object properties encontradas anteriormente.\n", " sparql1 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql1.setQuery(\"\"\"\n", " SELECT DISTINCT ?claseInicial ?claseInicialLabel ?prop ?propLabel ?Clase ?ClaseLabel\n", " WHERE {\n", " #occupation (P106) must be Formula 1 driver (Q10841764)\n", " ?claseInicial wdt:P31 wd:\"\"\"+clase+\"\"\" .\n", " ?claseInicial wdt:\"\"\"+prop +\"\"\" ?prop .\n", " ?prop wdt:P31 ?Clase .\n", " #to obtain labels in spanish:\n", " SERVICE wikibase:label { bd:serviceParam wikibase:language \"es\". }\n", " } \n", " \"\"\")\n", " sparql1.setReturnFormat(JSON)\n", " results = sparql1.query().convert()\n", " \n", " #Se añade a la lista \"lista_clases_para_una_propiedad_completo\" todas las clases que resultan de la consulta anterior. \n", " lista_clases_para_una_propiedad_completo = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_clases_para_una_propiedad_completo.append(result[\"Clase\"][\"value\"])\n", " \n", " #Al tener estos valores en su inicio la URL \"https:..\", esto se elimina dejando solo el identificador de la clase.\n", " lista_clases_para_una_propiedad = []\n", " for ele in lista_clases_para_una_propiedad_completo:\n", " ele = ele[31:]\n", " lista_clases_para_una_propiedad.append(ele)\n", " \n", " #Se tiene la lista \"lista_clases_para_una_propiedad\" en la cual se encuentran todas las clases de la propiedad \n", " #estudiada.\n", " \n", " #Se obtienen las value type constraint de las propiedades. La consulta siguiente las obtiene.\n", " sparql2 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql2.setQuery(\"\"\"SELECT ?Property_ ?Property_Label ?Property_Description ?class_ ?class_Label ?relation_ ?relation_Label\n", " WHERE {\n", " ?Property_ wikibase:directClaim wdt:\"\"\"+prop+\"\"\" .\n", " ?Property_ p:P2302 ?constraint_statement .\n", " ?constraint_statement ps:P2302 wd:Q21510865 .\n", " OPTIONAL {?constraint_statement pq:P2308 ?class_ .}\n", " OPTIONAL {?constraint_statement pq:P2309 ?relation_ .}\n", "\n", " SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". }\n", " } \n", " \"\"\")\n", " sparql2.setReturnFormat(JSON)\n", " results = sparql2.query().convert()\n", "\n", " #En la lista \"lista_value_type_constraints_completa\" se almacenan todas las value type constraint de la propiedad \n", " #estudiada.\n", " lista_value_type_constraints_completa = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_value_type_constraints_completa.append(result[\"class_\"][\"value\"])\n", " \n", " #Al tener estos valores en su inicio la URL \"https:..\", esto se elimina dejando solo el identificador de la clase.\n", " lista_value_type_constraints = []\n", " for ele in lista_value_type_constraints_completa:\n", " ele = ele[31:]\n", " lista_value_type_constraints.append(ele)\n", " \n", " #Se tiene la lista \"lista_value_type_constraints\" con las value type constraint de la propiedad estudiada.\n", " if (lista_clases_para_una_propiedad != []) & (lista_value_type_constraints != []):\n", " #Se guardan en la lista \"lista_clases_a_representar\" aquellos identificadores que esten en ambas listas.\n", " lista_clases_a_representar = []\n", " for ele_VTC in lista_value_type_constraints:\n", " for ele_CPUP in lista_clases_para_una_propiedad:\n", " if ele_VTC == ele_CPUP:\n", " lista_clases_a_representar.append(ele_VTC)\n", " \n", " #Se tiene la lista \"lista_clases_a_representar\" con las clases que hay que representar para la propiedad estudiada.\n", " \n", " #Se eliminan los duplicados\n", " if lista_clases_a_representar != []: \n", " result = []\n", " for ele in lista_clases_a_representar:\n", " if ele not in result:\n", " result.append(ele)\n", " \n", " #Se añade a la lista final el valor de la propiedad\n", " lista_resultado_propiedades_clases.append(prop)\n", " \n", " result2 = result\n", " result3 = result\n", " #Se eliminan las clases que sean subclases de otra clase de la misma lista\n", " if len(result) != 1:\n", " for ele in result:\n", " #Se obtienen las subclases de la clase ele\n", " sparql3 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql3.setQuery(\"\"\"SELECT ?s ?desc\n", " WHERE{\n", " ?s wdt:P279+ wd:\"\"\"+ele+\"\"\" .\n", " OPTIONAL {\n", " ?s rdfs:label ?desc filter (lang(?desc) = \"es\").}\n", " }\n", " \"\"\")\n", " sparql3.setReturnFormat(JSON)\n", " results = sparql3.query().convert()\n", "\n", " #En la lista \"lista_value_type_constraints_completa\" se almacenan todas las value type constraint de \n", " #la propiedad estudiada.\n", " lista_subclases_de_ele_completa = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_subclases_de_ele_completa.append(result[\"s\"][\"value\"])\n", " \n", " #Al tener estos valores en su inicio la URL \"https:..\", esto se elimina dejando solo el identificador \n", " #de la clase.\n", " lista_subclases_de_ele = []\n", " for ele2 in lista_subclases_de_ele_completa:\n", " ele2 = ele2[31:]\n", " lista_subclases_de_ele.append(ele2)\n", " \n", " #Se comparan todos los elementos de la lista \"result2\" con los de la lista \"lista_subclase_de_ele\" y \n", " #se eliman de la lista final si estos son iguales.\n", " for ele4 in result2:\n", " for ele3 in lista_subclases_de_ele:\n", " if ele4 == ele3:\n", " result3.remove(ele4)\n", " \n", " #Se añaden las listas con las clases a representar en la lista final.\n", " lista_resultado_propiedades_clases.append(result3)\n", " \n", " \n", "#Se imprime la lista resultado final con las propiedades a representar y sus respectivas clases\n", "print(lista_resultado_propiedades_clases)" ] }, { "cell_type": "code", "execution_count": 435, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[['Q5'], 'P6338', 'P110', 'P941', 'P170', 'P161', 'P58', 'P57', 'P767', 'P676', 'P736', 'P86', 'P98', 'P5202', 'P655', 'P2679', 'P1877', 'P5769', 'P112', 'P488', ['Q559618'], 'P1434', 'P1080', ['Q618779'], 'P166', 'P1411']\n", "\n", "\n", "['P495', ['Q6256', 'Q7275', 'Q3024240'], 'P674', ['Q95074'], 'P50', ['Q5', 'Q43229'], 'P123', ['Q11032', 'Q2085381'], 'P136', ['Q483394'], 'P840', ['Q82794', 'Q3895768'], 'P910', ['Q4167836'], 'P17', ['Q6256', 'Q43702'], 'P2354', ['Q13406463'], 'P5008', ['Q16695773'], 'P8345', ['Q196600'], 'P747', ['Q3331189'], 'P449', ['Q15265344'], 'P462', ['Q1075'], 'P106', ['Q28640', 'Q17305127'], 'P793', ['Q1190554'], 'P135', ['Q2198855'], 'P175', ['Q215380'], 'P7936', ['Q815823'], 'P275', ['Q79719'], 'P437', ['Q81941037'], 'P641', ['Q31629']]\n" ] } ], "source": [ "#Se generan las listas \"lista_varias_propiedades_una_clase\" y \"lista_varias_clases_una_propiedad\".\n", "#En la primera aparecen todas las propiedades que apuntan a una sola clase.\n", "#En la segunda aparecen todas las clases a las que apunta una sola propiedad.\n", "result = []\n", "result2 = []\n", "for ele in range(0,len(lista_resultado_propiedades_clases)):\n", " if lista_resultado_propiedades_clases[ele] not in result:\n", " result.append(lista_resultado_propiedades_clases[ele])\n", " else:\n", " if lista_resultado_propiedades_clases[ele] not in result2:\n", " result2.append(lista_resultado_propiedades_clases[ele])\n", " m=[i for i,x in enumerate(lista_resultado_propiedades_clases) if x==lista_resultado_propiedades_clases[ele]]\n", " for i in m:\n", " result2.append(lista_resultado_propiedades_clases[i-1])\n", "\n", "lista_varias_propiedades_una_clase = result2\n", "lista_varias_clases_una_propiedad = lista_resultado_propiedades_clases\n", "\n", "def eliminar_valores_de_una_lista(the_list, val):\n", " return [value for value in the_list if value != val]\n", "\n", "for x in lista_varias_clases_una_propiedad:\n", " for y in lista_varias_propiedades_una_clase:\n", " if x == y:\n", " lista_varias_clases_una_propiedad = eliminar_valores_de_una_lista(lista_varias_clases_una_propiedad, y)\n", "\n", "print(lista_varias_propiedades_una_clase)\n", "print(\"\\n\")\n", "print(lista_varias_clases_una_propiedad)" ] }, { "cell_type": "code", "execution_count": 436, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[['ser humano (Q5)'], 'colorista (P6338)', 'ilustrador (P110)', 'inspirado por (P941)', 'creador (P170)', 'miembro del reparto (P161)', 'guionista (P58)', 'director (P57)', 'colaborador (P767)', 'letra de (P676)', 'artista de la portada (P736)', 'compositor (P86)', 'editor (P98)', 'adaptación de (P5202)', 'traductor (P655)', 'autor del prefacio (P2679)', 'inspirado en la obra de (P1877)', 'redactor en jefe (P5769)', 'fundador (P112)', 'presidente (P488)', ['universo de ficción (Q559618)'], 'describe el universo ficticio (P1434)', 'del universo ficticio (P1080)', ['distinción (Q618779)'], 'premio recibido (P166)', 'nominado a (P1411)']\n", "\n", "\n", "['país de origen (P495)', ['país (Q6256)', 'Estado (Q7275)', 'Estado desaparecido (Q3024240)'], 'personajes (P674)', ['personaje de ficción (Q95074)'], 'autor (P50)', ['ser humano (Q5)', 'organización (Q43229)'], 'editorial (P123)', ['periódico (Q11032)', 'editorial (Q2085381)'], 'género (P136)', ['género (Q483394)'], 'lugar de la narración (P840)', ['región geográfica (Q82794)', 'lugar ficticio (Q3895768)'], 'categoría principal del tema (P910)', ['categoría de Wikimedia (Q4167836)'], 'país (P17)', ['país (Q6256)', 'federación (Q43702)'], 'lista del elemento (P2354)', ['artículo de lista de Wikimedia (Q13406463)'], 'lista de interés para el proyecto Wikimedia (P5008)', ['wikiproyecto (Q16695773)'], 'franquicia de medios (P8345)', ['franquicia de medios (Q196600)'], 'edición o traducción (P747)', ['edición, traducción o versión (Q3331189)'], 'emisora original (P449)', ['difusora (Q15265344)'], 'color (P462)', ['color (Q1075)'], 'ocupación (P106)', ['profesión (Q28640)', 'ocupación ficticia (Q17305127)'], 'evento significativo (P793)', ['acontecimiento (Q1190554)'], 'movimiento (P135)', ['movimiento cultural (Q2198855)'], 'intérprete (P175)', ['grupo musical (Q215380)'], 'modelo de negocio (P7936)', ['modelo de negocio (Q815823)'], 'licencia (P275)', ['licencia (Q79719)'], 'distribución (P437)', ['método de distribución del producto (Q81941037)'], 'deporte (P641)', ['tipo de deporte (Q31629)']]\n" ] } ], "source": [ "#Se busca la etiqueta de cada uno de los elementos de las dos listas y se superponen al identificador anterior\n", "for pos in range(0,len(lista_varias_propiedades_una_clase)):\n", " res = isinstance(lista_varias_propiedades_una_clase[pos], str) \n", " #Para las propiedades\n", " if str(res) == \"True\":\n", " #Se obtienen las subclases de la clase ele\n", " sparql4 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql4.setQuery(\"\"\"SELECT *\n", " WHERE {\n", " wd:\"\"\"+lista_varias_propiedades_una_clase[pos]+\"\"\" rdfs:label ?label .\n", " FILTER (langMatches( lang(?label),'\"\"\"+idioma+\"\"\"' ) )\n", " } \n", " LIMIT 1\n", " \"\"\")\n", " sparql4.setReturnFormat(JSON)\n", " results = sparql4.query().convert()\n", " lista_label = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_label.append(result[\"label\"][\"value\"])\n", " for ele2 in lista_label:\n", " label = ele2\n", " label = label + \" (\"+lista_varias_propiedades_una_clase[pos]+\")\"\n", " lista_varias_propiedades_una_clase[pos] = label\n", " #Para las listas de clases\n", " else:\n", " for ele in range(0,len(lista_varias_propiedades_una_clase[pos])):\n", " #Se obtienen las subclases de la clase ele\n", " sparql4 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql4.setQuery(\"\"\"SELECT *\n", " WHERE {\n", " wd:\"\"\"+lista_varias_propiedades_una_clase[pos][ele]+\"\"\" rdfs:label ?label .\n", " FILTER (langMatches( lang(?label), '\"\"\"+idioma+\"\"\"' ) )\n", " } \n", " LIMIT 1\n", " \"\"\")\n", " sparql4.setReturnFormat(JSON)\n", " results = sparql4.query().convert()\n", " lista_label = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_label.append(result[\"label\"][\"value\"])\n", " for ele2 in lista_label:\n", " label = ele2\n", " label = label + \" (\"+lista_varias_propiedades_una_clase[pos][ele]+\")\"\n", " lista_varias_propiedades_una_clase[pos][ele] = label \n", "\n", "for pos in range(0,len(lista_varias_clases_una_propiedad)):\n", " res = isinstance(lista_varias_clases_una_propiedad[pos], str) \n", " #Para las propiedades\n", " if str(res) == \"True\":\n", " #Se obtienen las subclases de la clase ele\n", " sparql4 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql4.setQuery(\"\"\"SELECT *\n", " WHERE {\n", " wd:\"\"\"+lista_varias_clases_una_propiedad[pos]+\"\"\" rdfs:label ?label .\n", " FILTER (langMatches( lang(?label), '\"\"\"+idioma+\"\"\"' ) )\n", " } \n", " LIMIT 1\n", " \"\"\")\n", " sparql4.setReturnFormat(JSON)\n", " results = sparql4.query().convert()\n", " lista_label = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_label.append(result[\"label\"][\"value\"])\n", " for ele2 in lista_label:\n", " label = ele2\n", " label = label + \" (\"+lista_varias_clases_una_propiedad[pos]+\")\"\n", " lista_varias_clases_una_propiedad[pos] = label\n", " #Para las listas de clases\n", " else:\n", " for ele in range(0,len(lista_varias_clases_una_propiedad[pos])):\n", " #Se obtienen las subclases de la clase ele\n", " sparql5 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql5.setQuery(\"\"\"SELECT *\n", " WHERE {\n", " wd:\"\"\"+lista_varias_clases_una_propiedad[pos][ele]+\"\"\" rdfs:label ?label .\n", " FILTER (langMatches( lang(?label), '\"\"\"+idioma+\"\"\"' ) )\n", " } \n", " LIMIT 1\n", " \"\"\")\n", " sparql5.setReturnFormat(JSON)\n", " results = sparql5.query().convert()\n", " lista_label = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_label.append(result[\"label\"][\"value\"])\n", " for ele2 in lista_label:\n", " label = ele2\n", " label = label + \" (\"+lista_varias_clases_una_propiedad[pos][ele]+\")\"\n", " lista_varias_clases_una_propiedad[pos][ele] = label \n", " \n", "print(lista_varias_propiedades_una_clase)\n", "print(\"\\n\")\n", "print(lista_varias_clases_una_propiedad)" ] }, { "cell_type": "code", "execution_count": 437, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['P279', 'P407', 'P495', 'P571', 'P580', 'P577', 'P646', 'P674', 'P856', 'P1433', 'P2671', 'P3589', 'P4969', 'P6338', 'P7818', 'P31', 'P50', 'P110', 'P123', 'P136', 'P156', 'P155', 'P179', 'P212', 'P840', 'P910', 'P941', 'P957', 'P1417', 'P1476', 'P2408', 'P3265', 'P3417', 'P5905', 'P17', 'P170', 'P582', 'P1274', 'P1296', 'P1434', 'P3365', 'P4480', 'P127', 'P154', 'P361', 'P393', 'P1889', 'P1984', 'P2638', 'P4087', 'P6839', 'P1080', 'P2031', 'P244', 'P373', 'P527', 'P1424', 'P1448', 'P2003', 'P2013', 'P2163', 'P2354', 'P3984', 'P4073', 'P5357', 'P7859', 'P18', 'P214', 'P227', 'P166', 'P735', 'P7047', 'P21', 'P921', 'P1267', 'P1985', 'P4485', 'P8873', 'P1705', 'P5008', 'P8345', 'P144', 'P747', 'P2047', 'P2318', 'P6175', 'P161', 'P268', 'P6366', 'P58', 'P449', 'P1113', 'P345', 'P364', 'P462', 'P480', 'P905', 'P1258', 'P1265', 'P1545', 'P1970', 'P2334', 'P2465', 'P2508', 'P2509', 'P2529', 'P2603', 'P2704', 'P2747', 'P3107', 'P3143', 'P3138', 'P3212', 'P3302', 'P3704', 'P3868', 'P3933', 'P4277', 'P4282', 'P4529', 'P4947', 'P5032', 'P5786', 'P5849', 'P6127', 'P6133', 'P8033', 'P57', 'P291', 'P1104', 'P576', 'P3912', 'P138', 'P648', 'P724', 'P5099', 'P2969', 'P8383', 'P6262', 'P5396', 'P106', 'P2284', 'P3275', 'P8619', 'P6981', 'P7363', 'P236', 'P2093', 'P767', 'P433', 'P676', 'P736', 'P973', 'P86', 'P3943', 'P2002', 'P8687', 'P2635', 'P1343', 'P243', 'P3959', 'P7266', 'P8406', 'P585', 'P287', 'P6947', 'P1814', 'P7704', 'P1273', 'P1411', 'P8179', 'P195', 'P1392', 'P1441', 'P98', 'P1092', 'P8632', 'P5202', 'P5749', 'P655', 'P793', 'P1680', 'P1922', 'P2679', 'P3132', 'P180', 'P460', 'P6164', 'P5331', 'P2913', 'P135', 'P175', 'P1269', 'P2687', 'P5733', 'P2397', 'P750', 'P276', 'P8644', 'P51', 'P7936', 'P629', 'P1085', 'P1877', 'P3631', 'P3962', 'P304', 'P478', 'P1235', 'P6623', 'P275', 'P6216', 'P8091', 'P5769', 'P112', 'P131', 'P953', 'P437', 'P675', 'P2067', 'P3999', 'P159', 'P488', 'P625', 'P2896', 'P641', 'P1435']\n", "227\n", "['P279', 'P407', 'P495', 'P571', 'P580', 'P577', 'P674', 'P856', 'P1433', 'P4969', 'P6338', 'P31', 'P50', 'P110', 'P123', 'P136', 'P156', 'P155', 'P179', 'P840', 'P910', 'P941', 'P1476', 'P2408', 'P17', 'P170', 'P582', 'P1434', 'P4480', 'P127', 'P154', 'P361', 'P393', 'P1889', 'P1080', 'P2031', 'P527', 'P1424', 'P1448', 'P2354', 'P18', 'P166', 'P735', 'P7047', 'P21', 'P921', 'P1705', 'P5008', 'P8345', 'P144', 'P747', 'P2047', 'P2318', 'P6175', 'P161', 'P58', 'P449', 'P1113', 'P364', 'P462', 'P1545', 'P2747', 'P57', 'P291', 'P1104', 'P576', 'P3912', 'P138', 'P106', 'P2284', 'P3275', 'P8619', 'P2093', 'P767', 'P433', 'P676', 'P736', 'P973', 'P86', 'P1343', 'P7266', 'P585', 'P287', 'P1814', 'P1411', 'P195', 'P1441', 'P98', 'P8632', 'P5202', 'P655', 'P793', 'P1680', 'P2679', 'P180', 'P460', 'P2913', 'P135', 'P175', 'P1269', 'P750', 'P276', 'P51', 'P7936', 'P629', 'P1877', 'P304', 'P478', 'P275', 'P6216', 'P8091', 'P5769', 'P112', 'P131', 'P953', 'P437', 'P2067', 'P3999', 'P159', 'P488', 'P625', 'P2896', 'P641', 'P1435']\n", "124\n" ] } ], "source": [ "lista_a_no_representar = ['P646','P957','P1417','P3265','P3417','P3589','P7818','P212','P1274','P1296','P2638','P3365','P2671',\n", " 'P6839','P244','P373','P2003','P2013','P2163','P3984','P4073','P214','P227','P1267','P7859','P268',\n", " 'P4485','P8873','P6366','P345','P480','P648','P724','P905','P1258','P1265','P1970','P2334','P2465',\n", " 'P2508','P2509','P2529','P2603','P2704','P3107','P3143','P3138','P3212','P3302','P3704','P3868',\n", " 'P3933','P4277','P4282','P4529','P4947','P5032','P5786','P5849','P6127','P6133','P8033','P1392',\n", " 'P1092','P5396','P2969','P8383','P7363','P236','P243','P2002','P5331','P5099','P3943','P6262','P8406',\n", " 'P6947','P8687','P1273','P8179','P2397','P2635','P6981','P3959','P5749','P7704','P2687','P5733',\n", " 'P1922','P3132','P6164','P8644','P1085','P3962','P1235','P3631','P6623','P675','P1984','P4087',\n", " 'P5905','P1985','P5357','P2581','P3222','P3219','P4342','P5019','P7818','P8349','P244','P1036','P1430',\n", " 'P7772','P8408','P7982','P2209','P2209','P9100','P935','P349','P691','P1014','P1051','P1245','P1417',\n", " 'P2347','P2924','P3221','P3509','P3827','P4613','P4854','P5250','P5429','P4212','P5513','P508','P4235',\n", " 'P1807','P2003','P1282','P6766']\n", "print(lista_propiedades)\n", "print(len(lista_propiedades))\n", "lista_propiedades_a_analizar=[]\n", "for prop in lista_propiedades:\n", " if prop not in lista_a_no_representar:\n", " lista_propiedades_a_analizar.append(prop)\n", "print(lista_propiedades_a_analizar)\n", "print(len(lista_propiedades_a_analizar))" ] }, { "cell_type": "code", "execution_count": 438, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['fecha de fundación o creación (P571)', 'fecha de inicio (P580)', 'fecha de publicación (P577)', 'título (P1476)', 'fecha de fin (P582)', 'número de edición (P393)', 'inicio del periodo de actividad (P2031)', 'nombre oficial (P1448)', 'nombre en la lengua nativa (P1705)', 'duración (P2047)', 'número de episodios (P1113)', 'orden dentro de la serie (P1545)', 'número de páginas (P1104)', 'fecha de disolución, retirada o demolición (P576)', 'precio (P2284)', 'nombre del autor (P2093)', 'número (issue) (P433)', 'fecha (P585)', 'nombre en kana (P1814)', 'subtítulo (P1680)', 'fecha representada (P2913)', 'página/s (P304)', 'tomo (P478)', 'masa (P2067)', 'fecha de clausura oficial (P3999)', 'coordenadas (P625)', 'periodicidad (P2896)']\n", "['P571', 'P580', 'P577', 'P1476', 'P582', 'P393', 'P2031', 'P1448', 'P1705', 'P2047', 'P1113', 'P1545', 'P1104', 'P576', 'P2284', 'P2093', 'P433', 'P585', 'P1814', 'P1680', 'P2913', 'P304', 'P478', 'P2067', 'P3999', 'P625', 'P2896']\n" ] } ], "source": [ "#Se obtiene la lista de data properties\n", "lista_data_properties = []\n", "lista_data_properties_id = []\n", "for prop in lista_propiedades_a_analizar: \n", " sparql= SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql.setQuery(\"\"\"ASK WHERE {\n", " wdt:\"\"\"+prop+\"\"\" rdf:type owl:ObjectProperty\n", " } \n", " \"\"\")\n", " sparql.setReturnFormat(XML)\n", " results = sparql.query().convert()\n", " string = results.toxml()\n", " #Se obtiene el valor booleano del xml string, siendo este \"true\" en caso afirmativo\n", " es_OP = string[105:110]\n", " \n", " #En caso de que la variable es_OP sea \"false\", es decir la propiedad sea data propery:\n", " if es_OP == \"false\":\n", " #Se obtienen las subclases de la clase ele\n", " sparql5 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql5.setQuery(\"\"\"SELECT *\n", " WHERE {\n", " wd:\"\"\"+prop+\"\"\" rdfs:label ?label .\n", " FILTER (langMatches( lang(?label), '\"\"\"+idioma+\"\"\"' ) )\n", " } \n", " LIMIT 1\n", " \"\"\")\n", " sparql5.setReturnFormat(JSON)\n", " results = sparql5.query().convert()\n", " lista_label = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_label.append(result[\"label\"][\"value\"])\n", " if(lista_label != []):\n", " for ele2 in lista_label:\n", " label = ele2\n", " label = label + \" (\"+prop+\")\"\n", " lista_data_properties.append(label)\n", " lista_data_properties_id.append(prop)\n", "print(lista_data_properties)\n", "print(lista_data_properties_id)" ] }, { "cell_type": "code", "execution_count": 439, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['P275', 21, 'P279', 5, 'P276', 1, 'P268', 7, 'P268', 7, 'P287', 1, 'P279', 5, 'P276', 1, 'P275', 21, 'P287', 1, 'P268', 7, 'P304', 3, 'P291', 23, 'P291', 23, 'P304', 3, 'P345', 6, 'P345', 5, 'P373', 14, 'P364', 7, 'P361', 43, 'P373', 14, 'P361', 43, 'P364', 7, 'P393', 11, 'P393', 11, 'P433', 5, 'P437', 1, 'P437', 1, 'P433', 5, 'P407', 1649, 'P407', 1649, 'P449', 1, 'P462', 2, 'P460', 1, 'P478', 3, 'P449', 1, 'P478', 3, 'P462', 2, 'P460', 1, 'P480', 1, 'P488', 1, 'P480', 1, 'P488', 1, 'P527', 8, 'P495', 1327, 'P527', 8, 'P495', 1327, 'P571', 49, 'P571', 49, 'P582', 17, 'P576', 6, 'P580', 360, 'P585', 12, 'P576', 6, 'P577', 411, 'P582', 17, 'P585', 12, 'P577', 411, 'P580', 360, 'P625', 2, 'P629', 2, 'P629', 2, 'P625', 2, 'P641', 1, 'P655', 2, 'P648', 5, 'P648', 5, 'P641', 1, 'P646', 729, 'P646', 729, 'P648', 5, 'P655', 2, 'P676', 1, 'P675', 1, 'P674', 101, 'P646', 729, 'P675', 1, 'P674', 101, 'P676', 1, 'P724', 3, 'P735', 3, 'P724', 3, 'P735', 3, 'P736', 3, 'P750', 1, 'P767', 1, 'P747', 7, 'P736', 3, 'P767', 1, 'P747', 7, 'P750', 1, 'P793', 4, 'P793', 4, 'P856', 91, 'P840', 83, 'P840', 83, 'P856', 91, 'P910', 18, 'P921', 35, 'P905', 1, 'P905', 1, 'P910', 18, 'P921', 35, 'P953', 6, 'P941', 18, 'P957', 31, 'P957', 31, 'P957', 31, 'P953', 6, 'P941', 18, 'P973', 5, 'P973', 5, 'P1080', 7, 'P1085', 2, 'P1085', 2, 'P1080', 7, 'P1092', 1, 'P1104', 43, 'P1113', 1, 'P1092', 1, 'P1104', 43, 'P1113', 1, 'P1235', 1, 'P1235', 1, 'P1267', 1, 'P1265', 1, 'P1269', 1, 'P1274', 3, 'P1273', 1, 'P1258', 1, 'P1269', 1, 'P1267', 1, 'P1265', 1, 'P1258', 1, 'P1274', 3, 'P1273', 1, 'P1296', 1, 'P1296', 1, 'P1343', 3, 'P1343', 3, 'P1392', 1, 'P1392', 1, 'P1424', 1, 'P1411', 4, 'P1435', 1, 'P1434', 75, 'P1417', 7, 'P1433', 139, 'P1411', 4, 'P1424', 1, 'P1417', 7, 'P1435', 1, 'P1434', 75, 'P1433', 139, 'P1441', 2, 'P1448', 1, 'P1441', 2, 'P1448', 1, 'P1476', 207, 'P1476', 207, 'P1545', 192, 'P1545', 192, 'P1680', 4, 'P1680', 4, 'P1705', 2, 'P1705', 2, 'P1814', 1, 'P1814', 1, 'P1877', 1, 'P1877', 1, 'P1889', 51, 'P1889', 51, 'P1922', 1, 'P1922', 1, 'P1970', 1, 'P1970', 1, 'P2002', 5, 'P2003', 5, 'P1984', 23, 'P1985', 8, 'P2013', 14, 'P1984', 23, 'P1985', 8, 'P2002', 5, 'P2003', 5, 'P2013', 14, 'P2031', 2, 'P2047', 1, 'P2047', 1, 'P2031', 2, 'P2067', 1, 'P2067', 1, 'P2093', 20, 'P2093', 20, 'P2163', 2, 'P2163', 2, 'P2163', 2, 'P2284', 2, 'P2284', 2, 'P2318', 1, 'P2334', 1, 'P2334', 1, 'P2318', 1, 'P2354', 2, 'P2354', 2, 'P2397', 2, 'P2397', 2, 'P2408', 9, 'P2408', 9, 'P2465', 1, 'P2465', 1, 'P2508', 1, 'P2509', 1, 'P2508', 1, 'P2509', 1, 'P2529', 1, 'P2529', 1, 'P2603', 4, 'P2603', 4, 'P2638', 1, 'P2635', 3, 'P2635', 3, 'P2638', 1, 'P2679', 2, 'P2687', 1, 'P2671', 260, 'P2679', 2, 'P2671', 260, 'P2687', 1, 'P2704', 1, 'P2671', 260, 'P2704', 1, 'P2747', 1, 'P2747', 1, 'P2896', 1, 'P2896', 1, 'P2913', 1, 'P2913', 1, 'P2969', 10, 'P2969', 10, 'P3107', 1, 'P3132', 1, 'P3107', 1, 'P3132', 1, 'P3143', 1, 'P3138', 2, 'P3138', 2, 'P3143', 1, 'P3212', 1, 'P3212', 1, 'P3212', 1, 'P3265', 1, 'P3275', 3, 'P3265', 1, 'P3275', 3, 'P3302', 1, 'P3302', 1, 'P3365', 2, 'P3365', 2, 'P3417', 16, 'P3417', 16, 'P3589', 86, 'P3589', 86, 'P3631', 2, 'P3631', 2, 'P3704', 1, 'P3704', 1, 'P3868', 1, 'P3868', 1, 'P3912', 2, 'P3933', 1, 'P3912', 2, 'P3933', 1, 'P3943', 2, 'P3959', 3, 'P3962', 2, 'P3959', 3, 'P3943', 2, 'P3962', 2, 'P3984', 1, 'P3999', 1, 'P3984', 1, 'P3999', 1, 'P4087', 22, 'P4073', 2, 'P4087', 22, 'P4073', 2, 'P4277', 1, 'P4282', 1, 'P4277', 1, 'P4282', 1, 'P4485', 23, 'P4480', 8, 'P4480', 8, 'P4485', 23, 'P4529', 3, 'P4529', 3, 'P4947', 1, 'P4947', 1, 'P4969', 20, 'P4969', 20, 'P5008', 7, 'P5008', 7, 'P5032', 1, 'P5032', 1, 'P5099', 6, 'P5099', 6, 'P5202', 2, 'P5202', 2, 'P5331', 2, 'P5331', 2, 'P5331', 2, 'P5357', 14, 'P5357', 14, 'P5396', 6, 'P5396', 6, 'P5733', 1, 'P5749', 3, 'P5749', 3, 'P5733', 1, 'P5786', 1, 'P5769', 1, 'P5769', 1, 'P5786', 1, 'P5849', 24, 'P5849', 24, 'P5905', 15, 'P5905', 15, 'P6133', 1, 'P6127', 1, 'P6133', 1, 'P6127', 1, 'P6164', 1, 'P6175', 1, 'P6164', 1, 'P6175', 1, 'P6216', 21, 'P6216', 21, 'P6262', 44, 'P6262', 44, 'P6338', 115, 'P6366', 1, 'P6338', 115, 'P6366', 1, 'P6366', 1, 'P6623', 2, 'P6623', 2, 'P6839', 4, 'P6839', 4, 'P6947', 1, 'P6947', 1, 'P6981', 1, 'P6981', 1, 'P7047', 1, 'P7047', 1, 'P7266', 3, 'P7266', 3, 'P7363', 3, 'P7363', 3, 'P7704', 1, 'P7704', 1, 'P7704', 1, 'P7818', 4, 'P7818', 4, 'P7859', 2, 'P7859', 2, 'P7936', 1, 'P7936', 1, 'P8033', 1, 'P8033', 1, 'P8091', 20, 'P8091', 20, 'P8179', 1, 'P8179', 1, 'P8345', 50, 'P8345', 50, 'P8383', 12, 'P8383', 12, 'P8406', 1, 'P8406', 1, 'P8632', 2, 'P8619', 1, 'P8619', 1, 'P8632', 2, 'P8644', 1, 'P8644', 1, 'P8687', 3, 'P8687', 3, 'P8873', 1, 'P8873', 1, 'P18', 29, 'P17', 120, 'P21', 6, 'P21', 6, 'P18', 29, 'P17', 120, 'P51', 1, 'P58', 25, 'P57', 2, 'P51', 1, 'P58', 25, 'P57', 2, 'P86', 1, 'P86', 1, 's/core#altLabel', 793, 'P50', 1976, 'P112', 2, 'P98', 6, 'P106', 4, 'P110', 151, 'P127', 3, 'P98', 6, 'P50', 1976, 'P112', 2, 'P106', 4, 'P127', 3, 'P123', 523, 'P110', 151, 'P135', 2, 'P144', 30, 'P131', 3, 'P154', 4, 'P138', 6, 'P159', 1, 'P123', 523, 'P155', 184, 'P131', 3, 'P156', 175, 'P135', 2, 'P144', 30, 'P136', 312, 'P138', 6, 'P159', 1, 'P154', 4, 'P156', 175, 'P166', 11, 'P31', 4592, 'P155', 182, 'P136', 312, 'P180', 9, 'P161', 7, 'P175', 1, 'P31', 4592, 'P170', 47, 'P180', 9, 'P161', 7, 'P166', 11, 'P170', 47, 'P175', 1, 'yntax-ns#type', 2, 'P214', 5, 'P195', 2, 'P212', 55, 'P179', 1408, 'P214', 5, 'P195', 2, 'P214', 5, 'P212', 55, 'P212', 55, 'P243', 6, 'P179', 1408, 'P227', 3, 'P227', 3, 'P244', 5, 'P236', 3, 'P236', 3, 'P227', 3, 'P244', 5, 'P244', 5, 'P243', 6, 'P243', 6, 'P236', 3, '', 3861, 'ersion', 4592, 'dified', 4592, 'a#label', 4592, 'estamp', 1570, 'tements', 4592, 'elinks', 4592, 'ers', 4592]\n", "960\n" ] } ], "source": [ "#Se obtienen el numero de instancias de cada una de las propiedades\n", "sparql5 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", "sparql5.setQuery(\"\"\"SELECT DISTINCT ?p (COUNT( DISTINCT ?s) as ?sCount)\n", "WHERE\n", "{\n", "?s wdt:P31 wd:\"\"\"+clase+\"\"\" .\n", "?s ?p ?o .\n", "}\n", "GROUP BY ?p\n", "\"\"\")\n", "sparql5.setReturnFormat(JSON)\n", "results = sparql5.query().convert()\n", "\n", "lista_numero_propiedades = []\n", "for result in results[\"results\"][\"bindings\"]:\n", " if len(result[\"p\"][\"value\"]) == 33:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][29:])\n", " elif len(result[\"p\"][\"value\"]) == 40:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][36:])\n", " elif len(result[\"p\"][\"value\"]) == 51:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][47:])\n", " elif len(result[\"p\"][\"value\"]) == 34:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][29:])\n", " elif len(result[\"p\"][\"value\"]) == 39:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][36:]) \n", " elif len(result[\"p\"][\"value\"]) == 32:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][29:]) \n", " elif len(result[\"p\"][\"value\"]) == 41:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][36:]) \n", " elif len(result[\"p\"][\"value\"]) == 42:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][35:])\n", " elif len(result[\"p\"][\"value\"]) == 52:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][47:])\n", " elif len(result[\"p\"][\"value\"]) == 35:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][29:])\n", " elif len(result[\"p\"][\"value\"]) == 25:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][19:])\n", " elif len(result[\"p\"][\"value\"]) == 30:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][24:])\n", " elif len(result[\"p\"][\"value\"]) == 44:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][29:])\n", " elif len(result[\"p\"][\"value\"]) == 36:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][29:])\n", " elif len(result[\"p\"][\"value\"]) == 52:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][47:])\n", " else:\n", " lista_numero_propiedades.append(result[\"p\"][\"value\"][34:])\n", " lista_numero_propiedades.append(int(result[\"sCount\"][\"value\"]))\n", "\n", "print(lista_numero_propiedades)\n", "print(len(lista_numero_propiedades))" ] }, { "cell_type": "code", "execution_count": 440, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Lista a ordenar: ['P304', 3, 'P393', 11, 'P433', 5, 'P478', 3, 'P571', 49, 'P582', 17, 'P576', 6, 'P580', 360, 'P585', 12, 'P577', 411, 'P625', 2, 'P1104', 43, 'P1113', 1, 'P1448', 1, 'P1476', 207, 'P1545', 192, 'P1680', 4, 'P1705', 2, 'P1814', 1, 'P2031', 2, 'P2047', 1, 'P2067', 1, 'P2093', 20, 'P2284', 2, 'P2896', 1, 'P2913', 1, 'P3999', 1]\n", "Lista ordenada: ['P577', 'P580', 'P1476', 'P1545', 'P571', 'P1104', 'P2093', 'P582', 'P585', 'P393', 'P576', 'P433', 'P1680', 'P304', 'P478', 'P625', 'P1705', 'P2031', 'P2284', 'P1113', 'P1448', 'P1814', 'P2047', 'P2067', 'P2896', 'P2913', 'P3999']\n" ] } ], "source": [ "lista_a_representar = []\n", "for data_prop in range(0,len(lista_numero_propiedades)):\n", " if lista_numero_propiedades[data_prop] in lista_data_properties_id:\n", " if lista_numero_propiedades[data_prop] not in lista_a_representar:\n", " lista_a_representar.append(lista_numero_propiedades[data_prop])\n", " lista_a_representar.append(lista_numero_propiedades[data_prop+1])\n", "print(\"Lista a ordenar: \", lista_a_representar)\n", "\n", "lista_numero_instancias = []\n", "lista_propiedades_representar = []\n", "for ele in range(0,len(lista_a_representar)):\n", " if ele % 2 != 0:\n", " lista_numero_instancias.append(lista_a_representar[ele])\n", " else:\n", " lista_propiedades_representar.append(lista_a_representar[ele])\n", "\n", "lista_final=[]\n", "for ele in range(0,len(lista_numero_instancias)):\n", " pos_max = lista_numero_instancias.index(max(lista_numero_instancias))\n", " lista_final.append(lista_propiedades_representar[pos_max]) \n", " lista_numero_instancias.remove(lista_numero_instancias[pos_max])\n", " lista_propiedades_representar.remove(lista_propiedades_representar[pos_max])\n", "print(\"Lista ordenada: \", lista_final)" ] }, { "cell_type": "code", "execution_count": 441, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['fecha de publicación (P577)', 'fecha de inicio (P580)', 'título (P1476)', 'orden dentro de la serie (P1545)', 'fecha de fundación o creación (P571)', 'número de páginas (P1104)']\n" ] } ], "source": [ "lista_a_meter_en_caja = [lista_final[0],lista_final[1],lista_final[2],lista_final[3],lista_final[4],lista_final[5]]\n", "lista_a_meter_en_caja_label = []\n", "for prop in lista_a_meter_en_caja:\n", " #Se obtienen las etiquetas\n", " sparql5 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", " sparql5.setQuery(\"\"\"SELECT *\n", " WHERE {\n", " wd:\"\"\"+prop+\"\"\" rdfs:label ?label .\n", " FILTER (langMatches( lang(?label), '\"\"\"+idioma+\"\"\"' ) )\n", " } \n", " LIMIT 1\n", " \"\"\")\n", " sparql5.setReturnFormat(JSON)\n", " results = sparql5.query().convert()\n", " lista_label = []\n", " for result in results[\"results\"][\"bindings\"]:\n", " lista_label.append(result[\"label\"][\"value\"])\n", " if(lista_label != []):\n", " for ele2 in lista_label:\n", " label = ele2\n", " label = label + \" (\"+prop+\")\"\n", " lista_a_meter_en_caja_label.append(label)\n", "print(lista_a_meter_en_caja_label)" ] }, { "cell_type": "code", "execution_count": 442, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "a=ET.Element('mxfile')\n", "a.set('type','device')\n", "a.set('etag','d2m_BCRs5NTD1uJDUOON')\n", "a.set('version','13.9.1')\n", "a.set('agent','5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36')\n", "a.set('modified','2020-11-05T11:56:57.085Z')\n", "a.set('host','app.diagrams.net')\n", "\n", "b = ET.SubElement(a, 'diagram')\n", "b.set('id','E2t0QyFFZp_JITEdpmB3')\n", "\n", "c = ET.SubElement(b, 'mxGraphModel')\n", "c.set('shadow','0')\n", "c.set('math','0')\n", "c.set('pageHeight','1169')\n", "c.set('pageWidth','827')\n", "c.set('pageScale','1')\n", "c.set('page','1')\n", "c.set('fold','1')\n", "c.set('arrows','1')\n", "c.set('connect','1')\n", "c.set('tooltips','1')\n", "c.set('guides','1')\n", "c.set('gridSize','10')\n", "c.set('grid','1')\n", "c.set('dy','18481')\n", "c.set('dx','21749')\n", "\n", "d = ET.SubElement(c, 'root')\n", "\n", "e = ET.SubElement(d, 'mxCell')\n", "e.set('id','0')\n", "f = ET.SubElement(d, 'mxCell')\n", "f.set('id','1')\n", "f.set('parent','0')\n", "\n", "#Caja con la clase inicial\n", "sparql6 = SPARQLWrapper(\"https://query.wikidata.org/sparql\")\n", "sparql6.setQuery(\"\"\"SELECT *\n", "WHERE {\n", "wd:\"\"\"+clase+\"\"\" rdfs:label ?label .\n", "FILTER (langMatches( lang(?label), '\"\"\"+idioma+\"\"\"' ) )\n", "} \n", "LIMIT 1\n", "\"\"\")\n", "sparql6.setReturnFormat(JSON)\n", "results = sparql6.query().convert()\n", "lista_label = []\n", "for result in results[\"results\"][\"bindings\"]:\n", " lista_label.append(result[\"label\"][\"value\"])\n", "for ele2 in lista_label:\n", " label = ele2\n", "label = label + \" (\"+clase+\")\"\n", " \n", "g = ET.SubElement(d, 'mxCell')\n", "g.set('id','2')\n", "g.set('parent','1')\n", "g.set('style','rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656')\n", "g.set('vertex','1')\n", "g.set('value',label)\n", "g1 = ET.SubElement(g, 'mxGeometry')\n", "g1.set('as','geometry')\n", "g1.set('y','-16200')\n", "g1.set('x','-18500')\n", "g1.set('height','27')\n", "g1.set('width','212')\n", "\n", "#Caja con los dataType Strings\n", "aj = ET.SubElement(d,'mxCell')\n", "aj.set('id','3')\n", "aj.set('parent','1')\n", "aj.set('style','rounded=0;whiteSpace=wrap;html=1;align=left;dashed=1')\n", "aj.set('vertex','1')\n", "aj.set('value',lista_a_meter_en_caja_label[0]+'
'+lista_a_meter_en_caja_label[1]+'
'+lista_a_meter_en_caja_label[2]\n", " +'
'+lista_a_meter_en_caja_label[3]+'
'+lista_a_meter_en_caja_label[4]+'
'+lista_a_meter_en_caja_label[5])\n", "aj1=ET.SubElement(aj,'mxGeometry')\n", "aj1.set('as','geometry')\n", "aj1.set('y','-16173')\n", "aj1.set('x','-18500')\n", "aj1.set('height','85')\n", "aj1.set('width','212')\n", "\n", "x=-20700\n", "y=-16230\n", "iteracion = 0\n", "identificador = 4\n", "for i in range(0,len(lista_varias_clases_una_propiedad)):\n", " \n", " #Para las propiedades\n", " if iteracion % 2 == 0:\n", " #Si tiene que enlazar con mas de una clase\n", " if len(lista_varias_clases_una_propiedad[i+1]) > 1:\n", " #Simbolo de union\n", " ak = ET.SubElement(d,'mxCell')\n", " ak.set('id',str(identificador))\n", " ak.set('parent','1')\n", " ak.set('style','ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=17;')\n", " ak.set('vertex','1')\n", " ak.set('value','U')\n", " ak1=ET.SubElement(ak,'mxGeometry')\n", " ak1.set('as','geometry')\n", " ak1.set('y',str(y+50))\n", " ak1.set('x',str(x+110))\n", " ak1.set('height','30')\n", " ak1.set('width','30')\n", " \n", " union = identificador\n", " identificador = identificador + 1\n", " \n", " #Flecha con la propiedad\n", " bk = ET.SubElement(d, 'mxCell')\n", " bk.set('id',str(identificador))\n", " bk.set('parent','1')\n", " bk.set('target',str(identificador-1))\n", " bk.set('source','2')\n", " bk.set('edge','1')\n", " bk.set('style','edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;dashed=1;endArrow=classic;endFill=1;')\n", " bk.set('value',lista_varias_clases_una_propiedad[i])\n", " bk1 = ET.SubElement(bk, 'mxGeometry')\n", " bk1.set('as','geometry')\n", " bk1.set('relative','1')\n", " \n", " identificador = identificador + 1\n", "\n", " for ele in lista_varias_clases_una_propiedad[i+1]:\n", " #Flecha con la propiedad\n", " ck = ET.SubElement(d, 'mxCell')\n", " ck.set('id',str(identificador))\n", " ck.set('parent','1')\n", " ck.set('target',str(identificador+len(lista_varias_clases_una_propiedad[i+1])))\n", " ck.set('source',str(union))\n", " ck.set('edge','1')\n", " ck.set('style','edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;dashed=1;endArrow=classic;endFill=1;')\n", " ck1 = ET.SubElement(ck, 'mxGeometry')\n", " ck1.set('as','geometry')\n", " ck1.set('relative','1')\n", " identificador = identificador + 1\n", " identificador = identificador - 1\n", " \n", " #Si solo tiene que enlazar con una clase\n", " if len(lista_varias_clases_una_propiedad[i+1]) == 1:\n", " #Estamos tratando una propiedad\n", " #Flecha con la propiedad\n", " k = ET.SubElement(d, 'mxCell')\n", " k.set('id',str(identificador))\n", " k.set('parent','1')\n", " k.set('target',str(identificador+1))\n", " k.set('source','2')\n", " k.set('edge','1')\n", " k.set('style','edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;dashed=1;endArrow=classic;endFill=1;')\n", " k.set('value',lista_varias_clases_una_propiedad[i])\n", " k1 = ET.SubElement(k, 'mxGeometry')\n", " k1.set('as','geometry')\n", " k1.set('relative','1')\n", " \n", " #Para las clases\n", " else:\n", " #Estamos tratando una lista de clases\n", " num_elem = 1\n", " for j in lista_varias_clases_una_propiedad[i]:\n", " if num_elem > 1:\n", " identificador = identificador +1\n", " #Caja con clase\n", " z = ET.SubElement(d, 'mxCell')\n", " z.set('id',str(identificador))\n", " z.set('parent','1')\n", " z.set('style','rounded=0;whiteSpace=wrap;html=1')\n", " z.set('vertex','1')\n", " z.set('value',j)\n", " z1 = ET.SubElement(z, 'mxGeometry')\n", " z1.set('as','geometry')\n", " z1.set('y',str(y))\n", " z1.set('x',str(x))\n", " z1.set('height','27')\n", " z1.set('width','212')\n", " num_elem = num_elem + 1\n", " x=x+50\n", " y=y-50\n", " \n", " x=x+50\n", " y=y-50\n", " iteracion = iteracion + 1\n", " identificador = identificador + 1\n", "\n", "x1 = -18900\n", "y1 = -16020\n", "clase2 = 0\n", "for i in range(0,len(lista_varias_propiedades_una_clase)):\n", " res = isinstance(lista_varias_propiedades_una_clase[i], str) \n", " #Si son propiedades\n", " if str(res) == \"True\":\n", " #Flecha con la propiedad\n", " bk = ET.SubElement(d, 'mxCell')\n", " bk.set('id',str(identificador))\n", " bk.set('parent','1')\n", " bk.set('target',str(clase2))\n", " bk.set('source','2')\n", " bk.set('edge','1')\n", " bk.set('style','edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;dashed=1;endArrow=classic;endFill=1;')\n", " bk.set('value',str(lista_varias_propiedades_una_clase[i]))\n", " bk1 = ET.SubElement(bk, 'mxGeometry')\n", " bk1.set('as','geometry')\n", " bk1.set('relative','1')\n", " identificador = identificador + 1\n", " #Si son clases\n", " else:\n", " #Caja con la clase\n", " z = ET.SubElement(d, 'mxCell')\n", " z.set('id',str(identificador))\n", " z.set('parent','1')\n", " z.set('style','rounded=0;whiteSpace=wrap;html=1')\n", " z.set('vertex','1')\n", " z.set('value',str(lista_varias_propiedades_una_clase[i][0]))\n", " z1 = ET.SubElement(z, 'mxGeometry')\n", " z1.set('as','geometry')\n", " z1.set('y',str(y1))\n", " z1.set('x',str(x1))\n", " z1.set('height','27')\n", " z1.set('width','212')\n", " clase2 = identificador\n", " identificador = identificador + 1\n", " x1 = x1 - 50\n", " y1 = y1 + 50\n", "ET.dump(a)\n", "tree = ET.ElementTree(a)\n", "tree.write(\"output_final.xml\", encoding='utf-8', xml_declaration=True) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }