---
name: vectordb
description: Vector database operations for embeddings and semantic search. Activate for Pinecone, Weaviate, Chroma, pgvector, RAG, and similarity search.
allowed-tools:
  - Bash
  - Read
  - Write
  - Edit
  - Glob
  - Grep
---

# Vector Database Skill

Provides comprehensive vector database capabilities for the Golden Armada AI Agent Fleet Platform.

## When to Use This Skill

Activate this skill when working with:
- Semantic search implementation
- RAG (Retrieval Augmented Generation)
- Embedding storage and retrieval
- Similarity search
- Vector index management

## Embedding Generation

\`\`\`python
import openai
from anthropic import Anthropic

# OpenAI embeddings
def get_openai_embedding(text: str) -> list[float]:
    response = openai.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return response.data[0].embedding

# Batch embeddings
def get_batch_embeddings(texts: list[str]) -> list[list[float]]:
    response = openai.embeddings.create(
        model="text-embedding-3-small",
        input=texts
    )
    return [item.embedding for item in response.data]
\`\`\`

## Pinecone

\`\`\`python
from pinecone import Pinecone, ServerlessSpec

# Initialize
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

# Create index
pc.create_index(
    name="agents",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-west-2"
    )
)

# Get index
index = pc.Index("agents")

# Upsert vectors
index.upsert(
    vectors=[
        {
            "id": "agent-1",
            "values": embedding,
            "metadata": {
                "name": "Claude Agent",
                "type": "claude",
                "description": "General assistant"
            }
        }
    ],
    namespace="production"
)

# Query
results = index.query(
    vector=query_embedding,
    top_k=10,
    include_metadata=True,
    namespace="production",
    filter={"type": {"$eq": "claude"}}
)

for match in results.matches:
    print(f"{match.id}: {match.score} - {match.metadata}")

# Delete
index.delete(ids=["agent-1"], namespace="production")
\`\`\`

## Chroma

\`\`\`python
import chromadb
from chromadb.config import Settings

# Initialize
client = chromadb.PersistentClient(path="./chroma_db")

# Create collection
collection = client.get_or_create_collection(
    name="agents",
    metadata={"hnsw:space": "cosine"}
)

# Add documents
collection.add(
    ids=["agent-1", "agent-2"],
    embeddings=[embedding1, embedding2],
    documents=["Document 1 text", "Document 2 text"],
    metadatas=[
        {"type": "claude", "version": "3"},
        {"type": "gpt", "version": "4"}
    ]
)

# Query
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=10,
    where={"type": "claude"},
    include=["documents", "metadatas", "distances"]
)

# Update
collection.update(
    ids=["agent-1"],
    metadatas=[{"type": "claude", "version": "3.5"}]
)

# Delete
collection.delete(ids=["agent-1"])
\`\`\`

## pgvector (PostgreSQL)

\`\`\`sql
-- Enable extension
CREATE EXTENSION vector;

-- Create table
CREATE TABLE documents (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    content TEXT NOT NULL,
    embedding vector(1536),
    metadata JSONB DEFAULT '{}'
);

-- Create index (IVFFlat for larger datasets)
CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);

-- Or HNSW for better recall
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);

-- Insert
INSERT INTO documents (content, embedding, metadata)
VALUES ('Document content', '[0.1, 0.2, ...]', '{"type": "manual"}');

-- Similarity search
SELECT id, content, 1 - (embedding <=> $1) AS similarity
FROM documents
ORDER BY embedding <=> $1
LIMIT 10;

-- With filter
SELECT id, content, 1 - (embedding <=> $1) AS similarity
FROM documents
WHERE metadata->>'type' = 'manual'
ORDER BY embedding <=> $1
LIMIT 10;
\`\`\`

### Python with pgvector

\`\`\`python
from pgvector.sqlalchemy import Vector
from sqlalchemy import Column, String, JSON
from sqlalchemy.dialects.postgresql import UUID

class Document(Base):
    __tablename__ = 'documents'

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    content = Column(String, nullable=False)
    embedding = Column(Vector(1536))
    metadata = Column(JSON, default={})

# Insert
doc = Document(
    content="Document content",
    embedding=embedding,
    metadata={"type": "manual"}
)
session.add(doc)
session.commit()

# Query
from sqlalchemy import select
from pgvector.sqlalchemy import cosine_distance

results = session.execute(
    select(Document)
    .order_by(cosine_distance(Document.embedding, query_embedding))
    .limit(10)
).scalars().all()
\`\`\`

## RAG Implementation

\`\`\`python
class RAGService:
    def __init__(self, vector_store, llm_client):
        self.vector_store = vector_store
        self.llm = llm_client

    async def query(self, question: str, top_k: int = 5) -> str:
        # 1. Generate query embedding
        query_embedding = await self.get_embedding(question)

        # 2. Retrieve relevant documents
        docs = await self.vector_store.search(
            embedding=query_embedding,
            top_k=top_k
        )

        # 3. Build context
        context = "\n\n".join([
            f"Document {i+1}:\n{doc.content}"
            for i, doc in enumerate(docs)
        ])

        # 4. Generate response
        prompt = f"""Answer the question based on the following context.

Context:
{context}

Question: {question}

Answer:"""

        response = await self.llm.generate(prompt)
        return response

    async def add_document(self, content: str, metadata: dict = None):
        # Chunk document
        chunks = self.chunk_text(content)

        # Generate embeddings
        embeddings = await self.get_batch_embeddings(chunks)

        # Store
        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
            await self.vector_store.upsert(
                id=f"{metadata.get('doc_id', 'doc')}-{i}",
                embedding=embedding,
                content=chunk,
                metadata=metadata
            )

    def chunk_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
        chunks = []
        start = 0
        while start < len(text):
            end = start + chunk_size
            chunk = text[start:end]
            chunks.append(chunk)
            start += chunk_size - overlap
        return chunks
\`\`\`

## Best Practices

1. **Choose appropriate index type** (HNSW for recall, IVFFlat for scale)
2. **Chunk documents appropriately** (typically 500-1000 tokens)
3. **Include overlap** between chunks (10-20%)
4. **Store metadata** for filtering
5. **Use namespaces/collections** to organize data
6. **Monitor query latency** and index performance
7. **Batch operations** for bulk inserts