{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "7bc368b2", "metadata": {}, "outputs": [], "source": [ "# ✅ Install dependencies\n", "%pip install openai pinecone-client authzed" ] }, { "cell_type": "code", "execution_count": null, "id": "6bd936f3", "metadata": {}, "outputs": [], "source": [ "# ✅ Load environment and setup clients\n", "import os\n", "from dotenv import load_dotenv\n", "from authzed.api.v1 import Client, LookupResourcesRequest, ObjectReference, SubjectReference\n", "from grpcutil import insecure_bearer_token_credentials\n", "from pinecone import Pinecone\n", "from pinecone import ServerlessSpec\n", "from openai import OpenAI\n", "\n", "load_dotenv(override=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "4cfa1fd3", "metadata": {}, "outputs": [], "source": [ "# Connect to SpiceDB\n", "spicedb_client = Client(\n", " os.getenv(\"SPICEDB_ADDR\"),\n", " insecure_bearer_token_credentials(os.getenv(\"SPICEDB_API_KEY\"))\n", ")\n", "\n", "print (os.getenv(\"SPICEDB_API_KEY\"))\n", "\n", "# Connect to Pinecone\n", "pc = Pinecone(api_key=os.getenv(\"PINECONE_API_KEY\"))\n", "\n", "index_name = \"agents\"\n", "\n", "pc.create_index(\n", " name=index_name,\n", " dimension=1536,\n", " metric=\"cosine\",\n", " spec=ServerlessSpec(\n", " cloud=\"aws\",\n", " region=\"us-east-1\"\n", " )\n", ")\n", "\n", "index = pc.Index(index_name)\n", "\n", "# Connect to OpenAI\n", "openai_client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "dbff5c64", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ SpiceDB schema applied.\n" ] } ], "source": [ "# ✅ Define SpiceDB Schema\n", "from authzed.api.v1 import WriteSchemaRequest\n", "\n", "SCHEMA = \"\"\"\n", "definition agent {}\n", "\n", "definition document {\n", " relation reader: agent\n", " permission read = reader\n", "}\n", "\"\"\"\n", "\n", "try:\n", " await spicedb_client.WriteSchema(WriteSchemaRequest(schema=SCHEMA))\n", " print(\"✅ SpiceDB schema applied.\")\n", "except Exception as e:\n", " print(f\"❌ Schema error: {type(e).__name__}: {e}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "778ff0fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Relationships written.\n" ] } ], "source": [ "from authzed.api.v1 import (\n", " ObjectReference,\n", " Relationship,\n", " RelationshipUpdate,\n", " SubjectReference,\n", " WriteRelationshipsRequest,\n", ")\n", "\n", "relationships = [\n", " (\"doc-1\", \"sales-agent\"),\n", " (\"doc-2\", \"sales-agent\"),\n", " (\"doc-3\", \"hr-agent\"),\n", "]\n", "\n", "updates = [\n", " RelationshipUpdate(\n", " operation=RelationshipUpdate.Operation.OPERATION_TOUCH,\n", " relationship=Relationship(\n", " resource=ObjectReference(object_type=\"document\", object_id=doc),\n", " relation=\"reader\",\n", " subject=SubjectReference(\n", " object=ObjectReference(object_type=\"agent\", object_id=agent)\n", " ),\n", " ),\n", " )\n", " for doc, agent in relationships\n", "]\n", "\n", "try:\n", " await spicedb_client.WriteRelationships(WriteRelationshipsRequest(updates=updates))\n", " print(\"✅ Relationships written.\")\n", "except Exception as e:\n", " print(f\"❌ Relationship error: {type(e).__name__}: {e}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "adcda264", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Documents upserted to Pinecone.\n" ] } ], "source": [ "# ✅ Upsert example documents to Pinecone\n", "def get_query_embedding(text):\n", " response = openai_client.embeddings.create(\n", " model=\"text-embedding-ada-002\", input=text\n", " )\n", " return response.data[0].embedding\n", "\n", "documents = [\n", " {\"doc_id\": \"doc-1\", \"text\": \"Sales reports for Q4 2024, 33% increase in revenue this quarter!\"},\n", " {\"doc_id\": \"doc-2\", \"text\": \"Customer usecases for sales team\"},\n", " {\"doc_id\": \"doc-3\", \"text\": \"Employee handbook for company policies and benefits.\"}\n", "]\n", "\n", "to_upsert = []\n", "for i, doc in enumerate(documents):\n", " embedding = get_query_embedding(doc[\"text\"])\n", " to_upsert.append({\n", " \"id\": doc[\"doc_id\"],\n", " \"values\": embedding,\n", " \"metadata\": {\"doc_id\": doc[\"doc_id\"], \"text\": doc[\"text\"]}\n", " })\n", "\n", "index.upsert(vectors=to_upsert)\n", "print(\"✅ Documents upserted to Pinecone.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "21557557", "metadata": {}, "outputs": [], "source": [ "# ✅ Helper: Get authorized documents for agent\n", "async def get_authorized_documents(agent_id: str):\n", " subject = SubjectReference(\n", " object=ObjectReference(object_type=\"agent\", object_id=agent_id)\n", " )\n", " lookup = spicedb_client.LookupResources(\n", " LookupResourcesRequest(\n", " subject=subject,\n", " permission=\"read\",\n", " resource_object_type=\"document\",\n", " )\n", " )\n", "\n", " authorized = []\n", " async for res in lookup:\n", " authorized.append(res.resource_object_id)\n", " return authorized\n", " \n", "\n", "# ✅ Main query function with secure pre-check\n", "def get_query_embedding(text):\n", " response = openai_client.embeddings.create(\n", " model=\"text-embedding-ada-002\", input=text\n", " )\n", " return response.data[0].embedding" ] }, { "cell_type": "code", "execution_count": null, "id": "a8e18a87", "metadata": {}, "outputs": [], "source": [ "import re\n", "\n", "async def query_rag_with_authz(agent_id, user_query):\n", " vector = get_query_embedding(user_query)\n", " pinecone_results = index.query(vector=vector, top_k=5, include_metadata=True)\n", "\n", " allowed_ids = await get_authorized_documents(agent_id)\n", " print(f\"🎯 Allowed IDs: {allowed_ids}\")\n", "\n", " results_output = []\n", " unauthorized_docs = []\n", "\n", " for match in pinecone_results[\"matches\"]:\n", " doc_id = match[\"metadata\"].get(\"doc_id\")\n", " print(f\"🔎 Checking doc_id: {doc_id}\")\n", "\n", " if doc_id in allowed_ids:\n", " results_output.append(f\"[Authorized: {doc_id}]\\n{match['metadata']['text']}\")\n", " else:\n", " results_output.append(f\"[Not authorized: {doc_id}]\\nYou are not authorized to view the contents of document '{doc_id}'.\")\n", " unauthorized_docs.append(doc_id)\n", " \n", " for doc_id in unauthorized_docs:\n", " pattern = r'\\b' + re.escape(doc_id) + r'\\b'\n", " if re.search(pattern, user_query):\n", " return f\"You are not authorized to view the contents of document '{doc_id}'.\"\n", " \n", " if not results_output:\n", " return \"⛔ No matching documents found.\"\n", " \n", " prompt = (\n", " \"You are an AI assistant. Answer ONLY based on the following context.\\n\\n\"\n", " + \"\\n\\n\".join(results_output)\n", " + f\"\\n\\nQ: {user_query}\\nA:\"\n", " )\n", "\n", " chat = openai_client.chat.completions.create(\n", " model=\"gpt-4\",\n", " messages=[{\"role\": \"user\", \"content\": prompt}],\n", " temperature=0\n", " )\n", "\n", " return chat.choices[0].message.content" ] }, { "cell_type": "code", "execution_count": null, "id": "6ca7dfd2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🎯 Allowed IDs: ['doc-1']\n", "🔎 Checking doc_id: doc-3\n", "🔎 Checking doc_id: doc-2\n", "🔎 Checking doc_id: doc-1\n" ] }, { "data": { "text/plain": [ "'The content of project doc-1 is sales reports for Q4 2024, not for external distribution.'" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# ✅ Demo test\n", "await query_rag_with_authz(\"sales-agent\", \"What is the content of project doc-1?\")" ] }, { "cell_type": "code", "execution_count": null, "id": "09211b07", "metadata": {}, "outputs": [], "source": [ "# Generate a summary of all the documents the AI agent has access to\n", "\n", "async def summarize_accessible_docs(agent_id: str):\n", " from authzed.api.v1 import LookupResourcesRequest, ObjectReference, SubjectReference\n", " from openai.types.chat import ChatCompletion\n", "\n", " # Step 1: Get documents the agent has \"read\" permission on\n", " response = spicedb_client.LookupResources(\n", " LookupResourcesRequest(\n", " resource_object_type=\"document\",\n", " permission=\"read\",\n", " subject=SubjectReference(\n", " object=ObjectReference(object_type=\"agent\", object_id=agent_id)\n", " ),\n", " )\n", " )\n", "\n", " document_ids = [res.resource_object_id async for res in response]\n", " print(f\"🔍 {agent_id} can access: {document_ids}\")\n", "\n", " if not document_ids:\n", " return \"❌ No accessible documents.\"\n", "\n", " # ✅ Step 2: Fetch content from Pinecone using fetch\n", " docs_to_summarize = []\n", " fetch_response = index.fetch(ids=document_ids)\n", "\n", " for doc_id in document_ids:\n", " try:\n", " vector_data = fetch_response.vectors.get(doc_id)\n", " if not vector_data:\n", " print(f\"⚠️ No vector found for {doc_id}\")\n", " continue\n", " content = vector_data.metadata.get(\"text\", \"\")\n", " docs_to_summarize.append(content)\n", " except Exception as e:\n", " print(f\"⚠️ Error fetching {doc_id}: {e}\")\n", "\n", " if not docs_to_summarize:\n", " return \"❌ No content found to summarize.\"\n", "\n", " combined_text = \"\\n\\n\".join(docs_to_summarize)\n", "\n", " # Step 3: Summarize with OpenAI, using grounded prompt\n", " summary_prompt = (\n", " \"You are an AI assistant. Based ONLY on the following documents, \"\n", " \"generate a concise summary of their contents. Do not use any outside knowledge.\\n\\n\"\n", " + combined_text\n", " + \"\\n\\nSummary:\"\n", " )\n", " \n", " chat_response: ChatCompletion = openai_client.chat.completions.create(\n", " messages=[{\"role\": \"user\", \"content\": summary_prompt}],\n", " model=\"gpt-4\",\n", " temperature=0 # for factual, deterministic summaries\n", " )\n", " \n", " return chat_response.choices[0].message.content" ] }, { "cell_type": "code", "execution_count": null, "id": "ec2c7fbf", "metadata": {}, "outputs": [], "source": [ "import asyncio\n", "summary = await summarize_accessible_docs(\"sales-agent\")\n", "print(summary)" ] }, { "cell_type": "markdown", "id": "a539495b", "metadata": {}, "source": [ "🔍 sales-agent can access: ['doc-1', 'doc-2']\n", "The documents contain sales reports for Q4 2024, indicating a 33% increase in revenue for that quarter. They also include customer use cases for the sales team." ] }, { "cell_type": "code", "execution_count": null, "id": "dd1d26fd", "metadata": {}, "outputs": [], "source": [ "# ❌ Remove permission for sales-agent to view doc-2\n", "\n", "try: \n", " resp = await spicedb_client.WriteRelationships(\n", " WriteRelationshipsRequest(\n", " updates=[\n", " RelationshipUpdate(\n", " operation=RelationshipUpdate.Operation.OPERATION_DELETE,\n", " relationship=Relationship(\n", " resource=ObjectReference(object_type=\"document\", object_id=\"doc-2\"),\n", " relation=\"reader\",\n", " subject=SubjectReference(\n", " object=ObjectReference(\n", " object_type=\"agent\",\n", " object_id=\"sales-agent\",\n", " )\n", " ),\n", " ),\n", " ),\n", " ]\n", " )\n", " )\n", "except Exception as e:\n", " print(f\"Write relationships error: {type(e).__name__}: {e}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "fea41ae1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🎯 Allowed IDs: ['doc-1']\n", "🔎 Checking doc_id: doc-3\n", "🔎 Checking doc_id: doc-2\n", "🔎 Checking doc_id: doc-1\n" ] }, { "data": { "text/plain": [ "\"You are not authorized to view the contents of document 'doc-2'.\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# ✅ Demo test\n", "await query_rag_with_authz(\"sales-agent\", \"What is the content of project doc-2?\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }