{"cells": [{"attachments": {}, "cell_type": "markdown", "id": "01d70cf7", "metadata": {}, "source": ["<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/output_parsing/LangchainOutputParserDemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"在 Colab 中打开\"/></a>\n"]}, {"attachments": {}, "cell_type": "markdown", "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05", "metadata": {}, "source": ["# Langchain 输出解析\n"]}, {"attachments": {}, "cell_type": "markdown", "id": "6f4af25e", "metadata": {}, "source": ["下载数据\n"]}, {"cell_type": "code", "execution_count": null, "id": "04d0bb21", "metadata": {}, "outputs": [], "source": ["%pip install llama-index-llms-openai"]}, {"cell_type": "code", "execution_count": null, "id": "b9635dc3", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.\n", "ERROR: could not open HSTS store at '/home/loganm/.wget-hsts'. HSTS will be disabled.\n", "--2023-12-11 10:24:04--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 75042 (73K) [text/plain]\n", "Saving to: ‘data/paul_graham/paul_graham_essay.txt’\n", "\n", "data/paul_graham/pa 100%[===================>]  73.28K  --.-KB/s    in 0.04s   \n", "\n", "2023-12-11 10:24:04 (1.74 MB/s) - ‘data/paul_graham/paul_graham_essay.txt’ saved [75042/75042]\n", "\n"]}], "source": ["!mkdir -p 'data/paul_graham/'\n", "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"]}, {"cell_type": "markdown", "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119", "metadata": {}, "source": ["#### 加载文档，构建VectorStoreIndex\n"]}, {"cell_type": "code", "execution_count": null, "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7", "metadata": {}, "outputs": [], "source": ["import logging\n", "import sys\n", "\n", "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n", "\n", "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", "from IPython.display import Markdown, display\n", "\n", "import os\n", "\n", "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""]}, {"cell_type": "code", "execution_count": null, "id": "03d1691e-544b-454f-825b-5ee12f7faa8a", "metadata": {}, "outputs": [], "source": ["# 加载文档\n", "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"]}, {"cell_type": "code", "execution_count": null, "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"]}], "source": ["index = VectorStoreIndex.from_documents(documents, chunk_size=512)"]}, {"cell_type": "markdown", "id": "8b7d7c61-b5d7-4b8f-b90b-3ebee1103f27", "metadata": {}, "source": ["### 定义查询 + Langchain输出格式化模块\n"]}, {"cell_type": "code", "execution_count": null, "id": "6fb88295-0840-4e2d-b79b-def0b0a63a7f", "metadata": {}, "outputs": [], "source": ["from llama_index.core.output_parsers import LangchainOutputParser\n", "from langchain.output_parsers import StructuredOutputParser, ResponseSchema"]}, {"cell_type": "markdown", "id": "bc25edf7-9343-4e82-a3f1-eec4281a9371", "metadata": {}, "source": ["**定义自定义问答和优化提示**\n"]}, {"cell_type": "code", "execution_count": null, "id": "a4b9201d-fe16-4cc0-8135-a08d9928625d", "metadata": {}, "outputs": [], "source": ["response_schemas = [\n", "    ResponseSchema(\n", "        name=\"Education\",\n", "        description=(\n", "            \"Describes the author's educational experience/background.\"\n", "        ),\n", "    ),\n", "    ResponseSchema(\n", "        name=\"Work\",\n", "        description=\"Describes the author's work experience/background.\",\n", "    ),\n", "]"]}, {"cell_type": "code", "execution_count": null, "id": "e73b87b8-90da-4ab8-9ff7-e40880277d9b", "metadata": {}, "outputs": [], "source": ["lc_output_parser = StructuredOutputParser.from_response_schemas(\n", "    response_schemas\n", ")\n", "output_parser = LangchainOutputParser(lc_output_parser)"]}, {"cell_type": "code", "execution_count": null, "id": "1ba18a80-35f4-4fd4-9b13-9f13f84db4fe", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Context information is below.\n", "---------------------\n", "{context_str}\n", "---------------------\n", "Given the context information and not prior knowledge, answer the query.\n", "Query: {query_str}\n", "Answer: \n", "\n", "The output should be a markdown code snippet formatted in the following schema, including the leading and trailing \"```json\" and \"```\":\n", "\n", "```json\n", "{{\n", "\t\"Education\": string  // Describes the author's educational experience/background.\n", "\t\"Work\": string  // Describes the author's work experience/background.\n", "}}\n", "```\n"]}], "source": ["from llama_index.core.prompts.default_prompts import (\n", "    DEFAULT_TEXT_QA_PROMPT_TMPL,\n", ")\n", "fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)\n", "print(fmt_qa_tmpl)"]}, {"cell_type": "markdown", "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4", "metadata": {}, "source": ["### 查询索引\n"]}, {"cell_type": "code", "execution_count": null, "id": "fb9cdf43-0f31-4c36-869b-df9fa50aebdb", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"]}, {"name": "stdout", "output_type": "stream", "text": ["INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"]}], "source": ["from llama_index.llms.openai import OpenAI\n", "\n", "llm = OpenAI(output_parser=output_parser)\n", "\n", "query_engine = index.as_query_engine(\n", "    llm=llm,\n", ")\n", "response = query_engine.query(\n", "    \"What are a few things the author did growing up?\",\n", ")"]}, {"cell_type": "code", "execution_count": null, "id": "bc7760b6-5be3-4303-b97e-3f5edacf674b", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["{'Education': 'The author did not plan to study programming in college, but initially planned to study philosophy.', 'Work': 'Growing up, the author worked on writing short stories and programming. They wrote simple games, a program to predict rocket heights, and a word processor.'}\n"]}], "source": ["print(response)"]}], "metadata": {"kernelspec": {"display_name": "llama-index-4a-wkI5X-py3.11", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3"}}, "nbformat": 4, "nbformat_minor": 5}