{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Discovery API Test" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## APIの初期化" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: six in /miniconda3/lib/python3.7/site-packages (from websocket-client==0.48.0->ibm_watson) (1.12.0)\r\n" ] } ], "source": [ "# 必要ライブラリの導入\n", "!pip install ibm_watson | tail -n 1" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# credential情報 (個別に設定します)\n", "\n", "discovery_credentials = {\n", " \"apikey\": \"xxxx\",\n", " \"url\": \"https://gateway.watsonplatform.net/discovery/api\"\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Discovery APIの初期化\n", "\n", "import json\n", "import os\n", "from ibm_watson import DiscoveryV1\n", "\n", "version = '2019-04-30'\n", "\n", "discovery = DiscoveryV1(\n", " version=version,\n", " iam_apikey=discovery_credentials['apikey'],\n", " url=discovery_credentials['url']\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "environment_id: 2c134ad3-42c4-48fe-bf84-3731d0f8cfe3\n", "collection_id: 601acd99-511a-4b2a-bda7-c6c8f4f35ad4\n", "configuration_id: 1a09551f-bf55-4d4a-9741-b2612e5e61fd\n" ] } ], "source": [ "# environment_id、collection_id、configuration_id の取得\n", "# すでにUIで1つのprivate collectionが作成済みであることが前提\n", "\n", "# environment id の取得\n", "environment_id = discovery.list_environments().get_result()['environments'][1]['environment_id']\n", "print('environment_id: ', environment_id)\n", "\n", "# collection id の取得\n", "collection_id = discovery.list_collections(environment_id ).get_result()['collections'][1]['collection_id']\n", "print('collection_id: ' , collection_id)\n", "\n", "# configuration_idの取得\n", "configuration_id = discovery.list_configurations(environment_id).get_result()['configurations'][0]['configuration_id']\n", "print('configuration_id: ', configuration_id)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 文書のロードと削除" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# 文書ロード関数\n", "# collection_id: 対象コレクション\n", "# sample_data: 書き込み対象テキスト (json形式の配列)\n", "# key_name: 文書のユニークキー名称\n", "\n", "def load_text( collection_id, sample_data, key_name):\n", " for item in sample_data:\n", " \n", " # itemごとにワークのjsonファイルを作成\n", " print(item)\n", " key = item.get(key_name)\n", " filename = str(key) + '.json'\n", " f = open(filename, 'w')\n", " json.dump(item, f)\n", " f.close()\n", " \n", " # 書き込み可能かのチェック\n", " collection = discovery.get_collection(environment_id, collection_id).get_result()\n", " proc_docs = collection['document_counts']['processing']\n", " while True:\n", " if proc_docs < 20:\n", " break\n", " print('busy. waiting..')\n", " time.sleep(10)\n", " collection = discovery.get_collection(environment_id, collection_id)\n", " proc_docs = collection['document_counts']['processing']\n", "\n", " # jsonファイル名を引数にDiscoveryへデータロード\n", " with open(filename) as f:\n", " add_doc = discovery.add_document(environment_id, collection_id, file = f)\n", " os.remove(filename)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# 特定のコレクションの全文書を削除する関数\n", "# collection_id: 対象コレクション\n", "\n", "def delete_all_docs(collection_id):\n", "\n", " # 文書件数取得\n", " collection = discovery.get_collection(environment_id, collection_id).get_result()\n", " doc_count = collection['document_counts']['available']\n", "\n", " results = discovery.query(environment_id, collection_id, return_fields='id', count=doc_count).get_result()[\"results\"]\n", " ids = [item[\"id\"] for item in results]\n", "\n", " for id in ids:\n", " print('deleting doc: id =' + id)\n", " discovery.delete_document(environment_id, collection_id, id)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# ロードテスト用テキスト\n", "sample_data = [\n", " {'app_id': 1, 'title': '最初のテキスト', 'text': 'サンプルテキストその1。'},\n", " {'app_id': 2, 'title': '2番目のテキスト', 'text': '新幹線はやぶさが好きです。'},\n", " {'app_id': 3, 'title': '3番目のテキスト', 'text': '令和元年に転職しました。'},\n", "]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'app_id': 1, 'title': '最初のテキスト', 'text': 'サンプルテキストその1。'}\n", "{'app_id': 2, 'title': '2番目のテキスト', 'text': '新幹線はやぶさが好きです。'}\n", "{'app_id': 3, 'title': '3番目のテキスト', 'text': '令和元年に転職しました。'}\n" ] } ], "source": [ "# 文書ロードテスト\n", "load_text(collection_id, sample_data, 'app_id')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "deleting doc: id =095023d6-7b9f-4fc9-8cae-220f0dac5b64\n", "deleting doc: id =a2d805d8-ac39-4f13-adbf-ebdf60f214b0\n", "deleting doc: id =53cb408b-abc6-4fe0-8fa2-5753251f4cef\n" ] } ], "source": [ "# 全件削除テスト\n", "delete_all_docs(collection_id)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 文書検索" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# 検索用関数\n", "# collection_id: 検索対象コレクション\n", "# query_text: 検索条件式\n", "# return_fields: 出力項目\n", "\n", "def query_documents(collection_id, query_text, return_fields):\n", " # 文書件数取得\n", " collection = discovery.get_collection(environment_id, collection_id).get_result()\n", " doc_count = collection['document_counts']['available']\n", " print('doc_count: ', doc_count)\n", "\n", " query_results = discovery.query(environment_id, collection_id, \n", " query=query_text, \n", " count=doc_count, \n", " return_fields=return_fields).get_result()[ \"results\"]\n", " return query_results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### textフィールドに「サンプル」を含む文書の検索" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "doc_count: 2\n", "[\n", " {\n", " \"id\": \"965f5a5a-2bd1-4118-9517-3c187c47a02c\",\n", " \"result_metadata\": {\n", " \"confidence\": 0.08408801890816446,\n", " \"score\": 1.0226655\n", " },\n", " \"text\": \"サンプルテキストその1。\",\n", " \"title\": \"最初のテキスト\",\n", " \"app_id\": 1\n", " }\n", "]\n" ] } ], "source": [ "query_text = 'text:サンプル'\n", "return_fields = 'app_id,title,text'\n", "query_results = query_documents(collection_id, query_text, return_fields)\n", "\n", "print(json.dumps(query_results, indent=2, ensure_ascii=False))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }