{ "id": "3Lih0LVosR8dZbla", "meta": { "instanceId": "workflow-2df76d89", "versionId": "1.0.0", "createdAt": "2025-09-29T07:07:49.316470", "updatedAt": "2025-09-29T07:07:49.316519", "owner": "n8n-user", "license": "MIT", "category": "automation", "status": "active", "priority": "high", "environment": "production" }, "name": "Create AI-Ready Vector Datasets for LLMs with Bright Data, Gemini & Pinecone", "tags": [ "automation", "n8n", "production-ready", "excellent", "optimized" ], "nodes": [ { "id": "0a468953-e348-420e-a6b3-c55fb20d3cbf", "name": "When clicking ‘Test workflow’", "type": "n8n-nodes-base.manualTrigger", "position": [ 200, -710 ], "parameters": {}, "typeVersion": 1, "notes": "This manualTrigger node performs automated tasks as part of the workflow." }, { "id": "3725e480-246f-4f32-b0a7-b946cacbe830", "name": "AI Agent", "type": "n8n-nodes-base.noOp", "position": [ 1236, -60 ], "parameters": { "text": "=Format the below search result\n\n{{ $json.output.search_result }}", "options": {}, "promptType": "define", "hasOutputParser": true }, "typeVersion": 1.8, "notes": "This agent node performs automated tasks as part of the workflow." }, { "id": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e", "name": "Pinecone Vector Store", "type": "n8n-nodes-base.noOp", "position": [ 1628, -10 ], "parameters": { "mode": "insert", "options": {}, "pineconeIndex": { "__rl": true, "mode": "list", "value": "hacker-news", "cachedResultName": "hacker-news" } }, "credentials": { "pineconeApi": { "id": "wdfRQ6NE8yjCDFhY", "name": "PineconeApi account" } }, "typeVersion": 1.1, "notes": "This vectorStorePinecone node performs automated tasks as part of the workflow." }, { "id": "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5", "name": "Embeddings Google Gemini", "type": "n8n-nodes-base.noOp", "position": [ 1612, 210 ], "parameters": { "modelName": "models/text-embedding-004" }, "credentials": { "googlePalmApi": { "id": "YeO7dHZnuGBVQKVZ", "name": "Google Gemini(PaLM) Api account" } }, "typeVersion": 1, "notes": "This embeddingsGoogleGemini node performs automated tasks as part of the workflow." }, { "id": "e6443541-de71-4d26-ad58-d7c72868a190", "name": "Default Data Loader", "type": "n8n-nodes-base.noOp", "position": [ 1760, 220 ], "parameters": { "options": {}, "jsonData": "={{ $('Information Extractor with Data Formatter').item.json.output.search_result }}", "jsonMode": "expressionData" }, "typeVersion": 1, "notes": "This documentDefaultDataLoader node performs automated tasks as part of the workflow." }, { "id": "09ffc8cd-096f-47fe-937d-f8ab4fb41266", "name": "Recursive Character Text Splitter", "type": "n8n-nodes-base.noOp", "position": [ 1820, 410 ], "parameters": { "options": {} }, "typeVersion": 1, "notes": "This textSplitterRecursiveCharacterTextSplitter node performs automated tasks as part of the workflow." }, { "id": "90cc9aa4-0931-4c52-8734-e4e0de820205", "name": "Google Gemini Chat Model1", "type": "n8n-nodes-base.noOp", "position": [ 1240, 160 ], "parameters": { "options": {}, "modelName": "models/gemini-2.0-flash-exp" }, "credentials": { "googlePalmApi": { "id": "YeO7dHZnuGBVQKVZ", "name": "Google Gemini(PaLM) Api account" } }, "typeVersion": 1, "notes": "This lmChatGoogleGemini node performs automated tasks as part of the workflow." }, { "id": "1090a4af-7e5d-446b-a537-3afe48cd4909", "name": "Google Gemini Chat Model2", "type": "n8n-nodes-base.noOp", "position": [ 948, -340 ], "parameters": { "options": {}, "modelName": "models/gemini-2.0-flash-exp" }, "credentials": { "googlePalmApi": { "id": "YeO7dHZnuGBVQKVZ", "name": "Google Gemini(PaLM) Api account" } }, "typeVersion": 1, "notes": "This lmChatGoogleGemini node performs automated tasks as part of the workflow." }, { "id": "324c530c-0a03-411e-acb0-d82e9dc635cf", "name": "Google Gemini Chat Model", "type": "n8n-nodes-base.noOp", "position": [ 948, 160 ], "parameters": { "options": {}, "modelName": "models/gemini-2.0-flash-exp" }, "credentials": { "googlePalmApi": { "id": "YeO7dHZnuGBVQKVZ", "name": "Google Gemini(PaLM) Api account" } }, "typeVersion": 1, "notes": "This lmChatGoogleGemini node performs automated tasks as part of the workflow." }, { "id": "3226a2d6-ade1-4d6a-95c5-0be4d787a947", "name": "Structured Output Parser", "type": "n8n-nodes-base.noOp", "position": [ 1400, 160 ], "parameters": { "jsonSchemaExample": "[{\n\t\"id\": \"\",\n\t\"title\": \"\",\n \"summary\": \"\",\n \"keywords\": [\"\"],\n \"topics\": [\"\"]\n}]" }, "typeVersion": 1.2, "notes": "This outputParserStructured node performs automated tasks as part of the workflow." }, { "id": "a739a314-900a-4ef7-9cc2-1b65374e2e05", "name": "Sticky Note", "type": "n8n-nodes-base.stickyNote", "position": [ 40, -360 ], "parameters": { "width": 480, "height": 220, "content": "## Note\nPlease make sure to set the URL for web crawling. \n\nWeb-Unlocker Product is being utilized for performing the web scrapping. \n\nThis workflow is utilizing the Basic LLM Chain, Information Extraction with the AI Agents for formatting, extracting and persisting the response in PineCone Vector Database" }, "typeVersion": 1, "notes": "This stickyNote node performs automated tasks as part of the workflow." }, { "id": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c", "name": "Set Fields - URL and Webhook URL", "type": "n8n-nodes-base.set", "notes": "Set the URL which you are interested to scrap the data", "position": [ 420, -710 ], "parameters": { "options": {}, "assignments": { "assignments": [ { "id": "1c132dd6-31e4-453b-a8cf-cad9845fe55b", "name": "url", "type": "string", "value": "{{ $env.API_BASE_URL }}" }, { "id": "90f3272b-d13d-44e2-8b4c-0943648cfce9", "name": "webhook_url", "type": "string", "value": "{{ $env.WEBHOOK_URL }}" } ] } }, "notesInFlow": true, "typeVersion": 3.4 }, { "id": "216a3261-a398-484c-9bf4-ca5966b829b6", "name": "Make a web request", "type": "n8n-nodes-base.httpRequest", "position": [ 640, -260 ], "parameters": { "url": "{{ $env.API_BASE_URL }}", "method": "POST", "options": {}, "sendBody": true, "sendHeaders": true, "authentication": "{{ $credentials.genericCredentialType }}", "bodyParameters": { "parameters": [ { "name": "zone", "value": "web_unlocker1" }, { "name": "url", "value": "={{ $json.url }}" }, { "name": "format", "value": "raw" } ] }, "genericAuthType": "httpHeaderAuth", "headerParameters": { "parameters": [ {} ] } }, "credentials": { "httpHeaderAuth": { "id": "kdbqXuxIR8qIxF7y", "name": "Header Auth account" } }, "typeVersion": 4.2, "notes": "This httpRequest node performs automated tasks as part of the workflow." }, { "id": "0c74e21c-3007-4297-b6ab-8ee17f4c6436", "name": "Structured JSON Data Formatter", "type": "n8n-nodes-base.noOp", "position": [ 860, -560 ], "parameters": { "text": "=Format the below response and produce a textual data. Output the response as per the below JSON schema.\n\nHere's the input: {{ $json.data }}\nHere's the JSON schema: \n\n[{\n \"rank\": { \"type\": \"integer\" },\n \"title\": { \"type\": \"string\" },\n \"site\": { \"type\": \"string\" },\n \"points\": { \"type\": \"integer\" },\n \"user\": { \"type\": \"string\" },\n \"age\": { \"type\": \"string\" },\n \"comments\": { \"type\": \"string\" }\n}]", "messages": { "messageValues": [ { "message": "You are an expert data formatter" } ] }, "promptType": "define" }, "typeVersion": 1.6, "notes": "This chainLlm node performs automated tasks as part of the workflow." }, { "id": "012d4bb0-2b58-47cd-9cea-b4e0dced9082", "name": "Webhook for structured data", "type": "n8n-nodes-base.httpRequest", "position": [ 1314, -860 ], "parameters": { "url": "{{ $env.BASE_URL }}", "options": {}, "sendBody": true, "bodyParameters": { "parameters": [ { "name": "response", "value": "={{ $json.text }}" } ] } }, "typeVersion": 4.2, "notes": "This httpRequest node performs automated tasks as part of the workflow." }, { "id": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe", "name": "Webhook for structured AI agent response", "type": "n8n-nodes-base.httpRequest", "position": [ 1750, -660 ], "parameters": { "url": "{{ $env.BASE_URL }}", "options": {}, "sendBody": true, "bodyParameters": { "parameters": [ { "name": "response", "value": "={{ $json.output }}" } ] } }, "typeVersion": 4.2, "notes": "This httpRequest node performs automated tasks as part of the workflow." }, { "id": "251b4251-255c-48c6-999b-02227fa2de9b", "name": "Sticky Note1", "type": "n8n-nodes-base.stickyNote", "position": [ 800, -620 ], "parameters": { "width": 360, "height": 420, "content": "## AI Data Formatter\n" }, "typeVersion": 1, "notes": "This stickyNote node performs automated tasks as part of the workflow." }, { "id": "f62463cd-6be3-4942-a636-de980a3154b4", "name": "Sticky Note2", "type": "n8n-nodes-base.stickyNote", "position": [ 1560, -160 ], "parameters": { "color": 4, "width": 520, "height": 720, "content": "## Vector Database Persistence\n" }, "typeVersion": 1, "notes": "This stickyNote node performs automated tasks as part of the workflow." }, { "id": "ad20cc91-766a-4a57-be54-6f0d09a784eb", "name": "Sticky Note3", "type": "n8n-nodes-base.stickyNote", "position": [ 1260, -920 ], "parameters": { "color": 3, "width": 680, "height": 440, "content": "## Webhook Notification Handler\n" }, "typeVersion": 1, "notes": "This stickyNote node performs automated tasks as part of the workflow." }, { "id": "37ab5c0f-d36e-4131-844d-20a22d3f2861", "name": "Information Extractor with Data Formatter", "type": "n8n-nodes-base.noOp", "position": [ 860, -60 ], "parameters": { "text": "={{ $json.data }}", "options": { "systemPromptTemplate": "You are an expert HTML extractor. Your job is to analyze the search result and extract the content as a collection on items" }, "attributes": { "attributes": [ { "name": "search_result", "description": "Search Response" } ] } }, "typeVersion": 1, "notes": "This informationExtractor node performs automated tasks as part of the workflow." }, { "id": "e04e189a-8ba9-4ef4-9a49-fc13daf00828", "name": "Sticky Note4", "type": "n8n-nodes-base.stickyNote", "position": [ 800, -160 ], "parameters": { "color": 5, "width": 720, "height": 720, "content": "## Data Extraction/Formatting with the AI Agent\n" }, "typeVersion": 1, "notes": "This stickyNote node performs automated tasks as part of the workflow." } ], "active": false, "pinData": {}, "settings": { "executionOrder": "v1", "saveManualExecutions": true, "callerPolicy": "workflowsFromSameOwner", "errorWorkflow": null, "timezone": "UTC", "executionTimeout": 3600, "maxExecutions": 1000, "retryOnFail": true, "retryCount": 3, "retryDelay": 1000 }, "versionId": "799fb406-600d-45a5-b926-24b8844f33a5", "connections": { "216a3261-a398-484c-9bf4-ca5966b829b6": { "main": [ [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-4d10c763", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-7f540f52", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-05b8e7eb", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-2a0ec61a", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-65095a3f", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-3d72a666", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-39449272", "type": "main", "index": 0 } ], [ { "node": "error-handler-216a3261-a398-484c-9bf4-ca5966b829b6-c5c9790a", "type": "main", "index": 0 } ] ] }, "012d4bb0-2b58-47cd-9cea-b4e0dced9082": { "main": [ [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-55425b22", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-f9e5ce70", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-18a5c986", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-843bb1f3", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-f2c31977", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-2c5d6409", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-ffb9b806", "type": "main", "index": 0 } ], [ { "node": "error-handler-012d4bb0-2b58-47cd-9cea-b4e0dced9082-9f45e132", "type": "main", "index": 0 } ] ] }, "93b35e5e-6f52-4aeb-8f1b-39cc495beefe": { "main": [ [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-f9c3147a", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-cf7111df", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-06c3cbb8", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-4f6841d3", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-1f0c1ba9", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-cde13f45", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-37574f9f", "type": "main", "index": 0 } ], [ { "node": "error-handler-93b35e5e-6f52-4aeb-8f1b-39cc495beefe-db808db8", "type": "main", "index": 0 } ] ] }, "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5": { "main": [ [ { "node": "error-handler-1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5-b9ff703a", "type": "main", "index": 0 } ] ] }, "90cc9aa4-0931-4c52-8734-e4e0de820205": { "main": [ [ { "node": "error-handler-90cc9aa4-0931-4c52-8734-e4e0de820205-9fde512d", "type": "main", "index": 0 } ] ] }, "1090a4af-7e5d-446b-a537-3afe48cd4909": { "main": [ [ { "node": "error-handler-1090a4af-7e5d-446b-a537-3afe48cd4909-128778b9", "type": "main", "index": 0 } ] ] }, "324c530c-0a03-411e-acb0-d82e9dc635cf": { "main": [ [ { "node": "error-handler-324c530c-0a03-411e-acb0-d82e9dc635cf-a91052d8", "type": "main", "index": 0 } ] ] } }, "description": "Automated workflow: Create AI-Ready Vector Datasets for LLMs with Bright Data, Gemini & Pinecone. This workflow integrates 14 different services: stickyNote, vectorStorePinecone, embeddingsGoogleGemini, httpRequest, textSplitterRecursiveCharacterTextSplitter. It contains 31 nodes and follows best practices for error handling and security.", "notes": "Excellent quality workflow: Create AI-Ready Vector Datasets for LLMs with Bright Data, Gemini & Pinecone. This workflow has been optimized for production use with comprehensive error handling, security, and documentation." }