{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Discovery API Test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## APIの初期化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: six in /miniconda3/lib/python3.7/site-packages (from websocket-client==0.48.0->ibm_watson) (1.12.0)\r\n"
     ]
    }
   ],
   "source": [
    "# 必要ライブラリの導入\n",
    "!pip install ibm_watson | tail -n 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# credential情報 (個別に設定します)\n",
    "\n",
    "discovery_credentials = {\n",
    "  \"apikey\": \"xxxx\",\n",
    "  \"url\": \"https://gateway.watsonplatform.net/discovery/api\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Discovery APIの初期化\n",
    "\n",
    "import json\n",
    "import os\n",
    "from ibm_watson import DiscoveryV1\n",
    "\n",
    "version = '2019-04-30'\n",
    "\n",
    "discovery = DiscoveryV1(\n",
    "    version=version,\n",
    "    iam_apikey=discovery_credentials['apikey'],\n",
    "    url=discovery_credentials['url']\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "environment_id:  2c134ad3-42c4-48fe-bf84-3731d0f8cfe3\n",
      "collection_id:  601acd99-511a-4b2a-bda7-c6c8f4f35ad4\n",
      "configuration_id:  1a09551f-bf55-4d4a-9741-b2612e5e61fd\n"
     ]
    }
   ],
   "source": [
    "# environment_id、collection_id、configuration_id の取得\n",
    "# すでにUIで1つのprivate collectionが作成済みであることが前提\n",
    "\n",
    "# environment id の取得\n",
    "environment_id = discovery.list_environments().get_result()['environments'][1]['environment_id']\n",
    "print('environment_id: ', environment_id)\n",
    "\n",
    "# collection id の取得\n",
    "collection_id = discovery.list_collections(environment_id ).get_result()['collections'][1]['collection_id']\n",
    "print('collection_id: ' , collection_id)\n",
    "\n",
    "# configuration_idの取得\n",
    "configuration_id = discovery.list_configurations(environment_id).get_result()['configurations'][0]['configuration_id']\n",
    "print('configuration_id: ', configuration_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 文書のロードと削除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 文書ロード関数\n",
    "# collection_id: 対象コレクション\n",
    "# sample_data: 書き込み対象テキスト (json形式の配列)\n",
    "# key_name: 文書のユニークキー名称\n",
    "\n",
    "def load_text( collection_id, sample_data, key_name):\n",
    "    for item in sample_data:\n",
    "        \n",
    "        # itemごとにワークのjsonファイルを作成\n",
    "        print(item)\n",
    "        key = item.get(key_name)\n",
    "        filename = str(key) + '.json'\n",
    "        f = open(filename, 'w')\n",
    "        json.dump(item, f)\n",
    "        f.close()\n",
    "        \n",
    "        # 書き込み可能かのチェック\n",
    "        collection = discovery.get_collection(environment_id, collection_id).get_result()\n",
    "        proc_docs = collection['document_counts']['processing']\n",
    "        while True:\n",
    "            if proc_docs < 20:\n",
    "                break\n",
    "            print('busy. waiting..')\n",
    "            time.sleep(10)\n",
    "            collection = discovery.get_collection(environment_id, collection_id)\n",
    "            proc_docs = collection['document_counts']['processing']\n",
    "\n",
    "        # jsonファイル名を引数にDiscoveryへデータロード\n",
    "        with open(filename) as f:\n",
    "            add_doc = discovery.add_document(environment_id, collection_id, file = f)\n",
    "        os.remove(filename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 特定のコレクションの全文書を削除する関数\n",
    "# collection_id: 対象コレクション\n",
    "\n",
    "def delete_all_docs(collection_id):\n",
    "\n",
    "    # 文書件数取得\n",
    "    collection = discovery.get_collection(environment_id, collection_id).get_result()\n",
    "    doc_count = collection['document_counts']['available']\n",
    "\n",
    "    results = discovery.query(environment_id, collection_id, return_fields='id', count=doc_count).get_result()[\"results\"]\n",
    "    ids = [item[\"id\"] for item in results]\n",
    "\n",
    "    for id in ids:\n",
    "        print('deleting doc: id =' + id)\n",
    "        discovery.delete_document(environment_id, collection_id, id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ロードテスト用テキスト\n",
    "sample_data = [\n",
    "    {'app_id': 1, 'title': '最初のテキスト', 'text': 'サンプルテキストその1。'},\n",
    "    {'app_id': 2, 'title': '2番目のテキスト', 'text': '新幹線はやぶさが好きです。'},\n",
    "    {'app_id': 3, 'title': '3番目のテキスト', 'text': '令和元年に転職しました。'},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'app_id': 1, 'title': '最初のテキスト', 'text': 'サンプルテキストその1。'}\n",
      "{'app_id': 2, 'title': '2番目のテキスト', 'text': '新幹線はやぶさが好きです。'}\n",
      "{'app_id': 3, 'title': '3番目のテキスト', 'text': '令和元年に転職しました。'}\n"
     ]
    }
   ],
   "source": [
    "# 文書ロードテスト\n",
    "load_text(collection_id, sample_data, 'app_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "deleting doc: id =095023d6-7b9f-4fc9-8cae-220f0dac5b64\n",
      "deleting doc: id =a2d805d8-ac39-4f13-adbf-ebdf60f214b0\n",
      "deleting doc: id =53cb408b-abc6-4fe0-8fa2-5753251f4cef\n"
     ]
    }
   ],
   "source": [
    "# 全件削除テスト\n",
    "delete_all_docs(collection_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 文書検索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 検索用関数\n",
    "# collection_id: 検索対象コレクション\n",
    "# query_text: 検索条件式\n",
    "# return_fields: 出力項目\n",
    "\n",
    "def query_documents(collection_id, query_text, return_fields):\n",
    "    # 文書件数取得\n",
    "    collection = discovery.get_collection(environment_id, collection_id).get_result()\n",
    "    doc_count = collection['document_counts']['available']\n",
    "    print('doc_count: ', doc_count)\n",
    "\n",
    "    query_results = discovery.query(environment_id, collection_id, \n",
    "        query=query_text, \n",
    "        count=doc_count, \n",
    "        return_fields=return_fields).get_result()[ \"results\"]\n",
    "    return query_results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### textフィールドに「サンプル」を含む文書の検索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "doc_count:  2\n",
      "[\n",
      "  {\n",
      "    \"id\": \"965f5a5a-2bd1-4118-9517-3c187c47a02c\",\n",
      "    \"result_metadata\": {\n",
      "      \"confidence\": 0.08408801890816446,\n",
      "      \"score\": 1.0226655\n",
      "    },\n",
      "    \"text\": \"サンプルテキストその1。\",\n",
      "    \"title\": \"最初のテキスト\",\n",
      "    \"app_id\": 1\n",
      "  }\n",
      "]\n"
     ]
    }
   ],
   "source": [
    "query_text = 'text:サンプル'\n",
    "return_fields = 'app_id,title,text'\n",
    "query_results = query_documents(collection_id, query_text, return_fields)\n",
    "\n",
    "print(json.dumps(query_results, indent=2, ensure_ascii=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}