# Turbopuffer Quickstart ```elixir Mix.install([ {:turbopuffer, "~> 0.1"}, {:req, "~> 0.4"}, {:kino, "~> 0.12"} ]) ``` ## Setup Configure your Turbopuffer API key and create a client. ```elixir # API tokens are created in the dashboard: https://turbopuffer.com/dashboard api_key_input = Kino.Input.password("Turbopuffer API Key") ``` ```elixir api_key = Kino.Input.read(api_key_input) # Pick the right region: https://turbopuffer.com/docs/regions client = Turbopuffer.new( api_key: api_key, region: :gcp_us_central1 ) namespace = Turbopuffer.namespace(client, "quickstart-example-ex") ``` ## Embedding Generation We'll create a helper function to generate embeddings using OpenAI or random vectors as fallback. ```elixir openai_key_input = Kino.Input.password("OpenAI API Key (optional)") ``` ```elixir openai_api_key = Kino.Input.read(openai_key_input) # Helper function to create embeddings with OpenAI, or use random vectors as fallback create_embedding = fn text -> if openai_api_key && openai_api_key != "" do # Use OpenAI API to generate embeddings case Req.post( "https://api.openai.com/v1/embeddings", json: %{ model: "text-embedding-3-small", input: text }, headers: [ {"authorization", "Bearer #{openai_api_key}"}, {"content-type", "application/json"} ] ) do {:ok, %{status: 200, body: body}} -> body["data"] |> List.first() |> Map.get("embedding") _ -> IO.puts("Failed to get OpenAI embedding, using random vectors") # Generate random vector of dimension 1536 (text-embedding-3-small dimension) for _ <- 1..1536, do: :rand.uniform() end else IO.puts("OpenAI API key not set, using random vectors") # For demo purposes, using smaller dimension when no API key for _ <- 1..128, do: :rand.uniform() end end ``` ## Write Documents with Schema Configuration Insert documents with vectors and attributes, configuring the schema on first write. ```elixir rows = [ %{ id: 1, vector: create_embedding.("walrus narwhal"), name: "foo", public: 1, text: "walrus narwhal" }, %{ id: 2, vector: create_embedding.("elephant walrus rhino"), name: "foo", public: 0, text: "elephant walrus rhino" }, %{ id: 3, vector: create_embedding.("quick brown fox"), name: "bar", public: 1, text: "quick brown fox" } ] ``` ```elixir # Write documents with vectors and attributes # Attributes can be provided either nested under "attributes" key or directly in the row {:ok, _} = Turbopuffer.write( namespace, upsert_rows: rows, distance_metric: "cosine_distance", schema: %{ "text" => %{ "type" => "string", "full_text_search" => true } } ) IO.puts("Documents written successfully with schema configuration!") ``` ## Query Vectors Query nearest neighbors with filters. ```elixir query_vector = create_embedding.("walrus narwhal") ``` ```elixir # Query nearest neighbors with filter {:ok, results} = Turbopuffer.query( namespace, vector: query_vector, top_k: 10, filters: %{ "name" => "foo", "public" => 1 }, include_attributes: ["name", "text"] ) IO.puts("Vector search results:") for result <- results do dist = if result.dist, do: Float.round(result.dist, 4), else: "N/A" IO.puts("ID: #{result.id}, Distance: #{dist}, Name: #{result.attributes["name"]}") end results ``` ## Full-Text Search Perform full-text search on the text attribute. ```elixir # Full-text search on an attribute {:ok, text_results} = Turbopuffer.text_search( namespace, query: "quick walrus", attribute: "text", top_k: 10, filters: %{"name" => "foo"} ) IO.puts("Full-text search results:") for result <- text_results do score = if result.dist, do: Float.round(result.dist, 4), else: "N/A" IO.puts("ID: #{result.id}, Score: #{score}, Text: #{result.attributes["text"]}") end text_results ``` ## Hybrid Search Combine vector and text search for better results. ```elixir query_vector = create_embedding.("walrus") ``` ```elixir # Hybrid search combining vector and text {:ok, hybrid_results} = Turbopuffer.hybrid_search( namespace, vector: query_vector, text_query: "walrus", text_attribute: "text", top_k: 10 ) IO.puts("Hybrid search results:") for result <- hybrid_results do score = if result.dist, do: Float.round(result.dist, 4), else: "N/A" IO.puts("ID: #{result.id}, Score: #{score}") end hybrid_results ``` ## Update Vectors Update existing vectors by providing new data for existing IDs. ```elixir rows = [ %{ id: 1, vector: create_embedding.("updated content"), name: "updated", public: 1 } ] ``` ```elixir # Vectors can be updated by passing new data for an existing ID # Attributes can be provided directly in the row {:ok, _} = Turbopuffer.write(namespace, upsert_rows: rows, distance_metric: "cosine_distance" ) IO.puts("Vector 1 updated successfully!") ``` ```elixir query_vector = create_embedding.("updated") ``` ```elixir # Verify the update {:ok, updated_results} = Turbopuffer.query( namespace, vector: query_vector, top_k: 10, include_attributes: ["name", "text"] ) IO.puts("\nVectors after update:") for result <- updated_results do name = result.attributes["name"] || "N/A" IO.puts("ID: #{result.id}, Name: #{name}") end ``` ## Delete Vectors Delete vectors by their IDs using the write function. ```elixir # Vectors are deleted by ID {:ok, _} = Turbopuffer.write(namespace, deletes: [1, 3] ) IO.puts("Vectors 1 and 3 deleted successfully!") # Verify deletion {:ok, remaining} = Turbopuffer.query(namespace, vector: create_embedding.("test"), top_k: 10, include_attributes: ["name"] ) if Enum.empty?(remaining) do IO.puts("No vectors remaining") else remaining = for vec <- remaining do "ID: #{vec.id}, Name: #{vec.attributes["name"] || "N/A"}" end IO.puts("Remaining vectors: #{Enum.join(remaining, "\n")}") end ``` ## Clean Up Optionally delete the namespace when done. ```elixir # Uncomment to delete the namespace {:ok, _} = Turbopuffer.delete_namespace(namespace) IO.puts("Namespace deleted") ``` ## Summary This Livebook demonstrates: 1. **Vector Operations**: Writing and querying vectors with the Turbopuffer API 2. **Full-Text Search**: Using BM25 ranking for text search 3. **Hybrid Search**: Combining vector and text search using multi-query 4. **Filtering**: Applying metadata filters to queries 5. **Updates and Deletes**: Managing vector lifecycle with the write function Key differences from the Python version: * Uses Elixir's pattern matching and functional approach * Results are returned as `Result` structs with fields: `id`, `dist`, `attributes`, `vector` * All operations return `{:ok, result}` or `{:error, reason}` tuples * Uses Kino for interactive inputs in Livebook * Attributes can be provided either nested under an `attributes` key or directly in the row map * Filters use simple map syntax: `%{"name" => "foo", "public" => 1}` For more information, see: * [Turbopuffer Documentation](https://turbopuffer.com/docs) * [Hybrid Search Guide](https://turbopuffer.com/docs/hybrid-search) * [API Reference](https://turbopuffer.com/docs/reference)