{"cells": [{"attachments": {}, "cell_type": "markdown", "id": "8f2b4d1e", "metadata": {}, "source": ["\"在\n"]}, {"cell_type": "markdown", "id": "5d974136", "metadata": {}, "source": ["# Faiss 读取器\n"]}, {"attachments": {}, "cell_type": "markdown", "id": "1f59b68b", "metadata": {}, "source": ["如果您在colab上打开这个笔记本,您可能需要安装LlamaIndex 🦙。\n"]}, {"cell_type": "code", "execution_count": null, "id": "c50f69e6", "metadata": {}, "outputs": [], "source": ["%pip install llama-index-readers-faiss"]}, {"cell_type": "code", "execution_count": null, "id": "4c4defdb", "metadata": {}, "outputs": [], "source": ["!pip install llama-index"]}, {"cell_type": "code", "execution_count": null, "id": "4026b434", "metadata": {}, "outputs": [], "source": ["import logging\n", "import sys\n", "\n", "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"]}, {"cell_type": "code", "execution_count": null, "id": "b541d8ec", "metadata": {}, "outputs": [], "source": ["from llama_index.readers.faiss import FaissReader"]}, {"cell_type": "code", "execution_count": null, "id": "90d37078", "metadata": {}, "outputs": [], "source": ["# 构建Faiss索引。", "# 有关如何开始使用Faiss的指南,请参阅:https://github.com/facebookresearch/faiss/wiki/Getting-started", "# 我们在下面提供了一些示例代码。", "", "import faiss", "", "# # 示例代码", "# d = 8", "# docs = np.array([", "# [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],", "# [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],", "# [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3],", "# [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],", "# [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]", "# ])", "# # id_to_text_map 用于查询检索", "# id_to_text_map = {", "# 0: \"aaaaaaaaa bbbbbbb cccccc\",", "# 1: \"foooooo barrrrrr\",", "# 2: \"tmp tmptmp tmp\",", "# 3: \"hello world hello world\",", "# 4: \"cat dog cat dog\"", "# }", "# # 构建索引", "# index = faiss.IndexFlatL2(d)", "# index.add(docs)", "", "id_to_text_map = {", " \"id1\": \"文本块1\",", " \"id2\": \"文本块2\",", "}", "index = ..."]}, {"cell_type": "code", "execution_count": null, "id": "fd470a09", "metadata": {}, "outputs": [], "source": ["reader = FaissReader(index)"]}, {"cell_type": "code", "execution_count": null, "id": "c33084c5", "metadata": {}, "outputs": [], "source": ["# 从Faiss索引加载数据时,必须指定:", "# k:最近邻居的数量", "# query:查询的二维嵌入表示(行是查询)", "k = 4", "query1 = np.array([...])", "query2 = np.array([...])", "query = np.array([query1, query2])", "", "documents = reader.load_data(query=query, id_to_text_map=id_to_text_map, k=k)"]}, {"cell_type": "markdown", "id": "0b74697a", "metadata": {}, "source": ["### 创建索引\n"]}, {"cell_type": "code", "execution_count": null, "id": "e85d7e5b", "metadata": {}, "outputs": [], "source": ["index = SummaryIndex.from_documents(documents)"]}, {"cell_type": "code", "execution_count": null, "id": "31c3b68f", "metadata": {}, "outputs": [], "source": ["# 将日志级别设置为DEBUG,以获得更详细的输出", "query_engine = index.as_query_engine()", "response = query_engine.query(\"\")"]}, {"cell_type": "code", "execution_count": null, "id": "56fce3fb", "metadata": {}, "outputs": [], "source": ["display(Markdown(f\"{response}\"))"]}], "metadata": {"kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3"}}, "nbformat": 4, "nbformat_minor": 5}