# Name: postgresdb_chatbot.py # Description: A Python script that creates a chatbot for querying an Azure Database for PostgreSQL Flexible Server using a locally installed Large Language Model (LLM) via Ollama. The script reads PostgreSQL connection details from a file (postgres_connection.txt), connects to the database, and uses the LLM to convert natural language queries into SQL. It executes these queries and displays results in a user-friendly chat interface. The database schema includes tables: Authors (author_id, first_name, last_name, nationality), Books (book_id, title, author_id, publication_year, genre), and Borrowers (borrower_id, book_id, borrower_name, borrow_date). # Python Version: 3.8 or higher # Libraries Used: # - psycopg2: To connect to Azure PostgreSQL database # - ollama: To interact with the locally installed LLM # - re: To extract SQL queries from LLM responses # - os: To check for the existence of postgres_connection.txt # - sys: To handle program exit on errors # - subprocess: To check if Ollama server is running # Ollama Version: Latest version compatible with LLaMA 3.2 (e.g., 0.3.12 or higher) # Local Model: LLaMA 3.2 (must be installed locally via Ollama; configurable in script) # # Usage: # - Run the script from the command line: python3 postgressdb_chatbot.py # - The script expects a file named postgres_connection.txt in the current directory with the following format: # host="your_host" # port="your_port" # database="your_database" # username="your_username" # password="your_password" # - Example postgres_connection.txt: # host="myfreepostgresdb.postgres.database.azure.com" # port="5432" # database="postgres" # username="pgadmin@myfreepostgresdb" # password="your_password" # - The script checks if the Ollama server is running and if the specified LLM model is available. # - If the server or model is not available, it provides instructions to start/install Ollama. # - The chat interface opens with a greeting, accepts natural language queries, generates SQL, executes them, and displays results. # - Type 'exit' to quit the chat interface. # # Installation and Setup: # 1. Install Python 3.8+. # 2. Install required libraries: # pip install psycopg2-binary ollama # 3. Set up Azure PostgreSQL Database: # - Create a free Azure Database for PostgreSQL Flexible Server instance in the Azure portal. # - Select Burstable B1MS instance and ≤32 GB storage to stay within free tier limits. # - Configure public access and add your client IP to the firewall rules. # - Note the host, port, database, username, and password from the Azure portal. # 4. Install Ollama locally: # - Download and install Ollama from https://ollama.com/download (available for macOS). # - Example for macOS: # curl -fsSL https://ollama.com/install.sh | sh # 5. Pull the LLaMA 3.2 model: # ollama pull llama3.2 # 6. Start the Ollama server before running the script: # ollama serve # - Run this in a separate terminal window to keep the server active. # # Script Workflow: # 1. Checks for the existence of postgres_connection.txt and reads connection details. # 2. Validates the Ollama server and LLM model availability. # 3. Establishes a connection to the Azure PostgreSQL database using psycopg2. # 4. Initializes a chat interface with a greeting. # 5. Accepts natural language queries, uses the LLM to generate SQL, and executes queries on the database. # 6. Displays query results in a readable format or an error message if the query fails. # 7. Closes the database connection when the user exits or an error occurs. # # Error Handling: # - Checks for missing or invalid postgres_connection.txt. # - Verifies Ollama server and model availability with clear setup instructions. # - Handles database connection errors and invalid SQL queries. # - Provides user-friendly error messages for unanswerable queries. # # Notes: # - The default LLM model is LLaMA 3.2. Change MODEL_NAME constant to use a more capable model (e.g., 'mistral:7b') if SQL generation is inaccurate. # - Ensure sufficient memory (e.g., 8GB+ RAM) for running LLaMA 3.2 locally on macOS; 16GB+ recommended for larger models like mistral:7b. # - The Azure free account provides 750 hours of PostgreSQL Flexible Server (Burstable B1MS) and 32 GB storage for 12 months. # - For complex queries, refine the LLM prompt or try a more advanced model if SQL generation is inaccurate. # - Clean up Azure resources after testing to avoid charges beyond the free tier. import psycopg2 import ollama import re import os import sys import subprocess # Configuration MODEL_NAME = 'llama3.2' # Matches your installed model def check_ollama_server(): """Check if Ollama server is running by attempting to list models.""" try: subprocess.check_output(['ollama', 'list']) return True except (subprocess.CalledProcessError, FileNotFoundError): return False def check_ollama_model(model_name=MODEL_NAME): """Check if the specified model is available.""" try: models = subprocess.check_output(['ollama', 'list']).decode('utf-8') return model_name in models except (subprocess.CalledProcessError, FileNotFoundError): return False def read_connection_details(file_path='postgres_connection.txt'): """Read PostgreSQL connection details from postgres_connection.txt.""" if not os.path.exists(file_path): print(f"Error: {file_path} not found in the current directory.") print("Please create postgres_connection.txt with the following format:") print('host="your_host"') print('port="your_port"') print('database="your_database"') print('username="your_username"') print('password="your_password"') sys.exit(1) config = {} try: with open(file_path, 'r', encoding='utf-8') as f: for line in f: # Remove comments and strip whitespace line = line.split('#')[0].strip() if '=' in line: key, value = line.split('=', 1) key = key.strip() value = value.strip().strip('"') config[key] = value required_keys = ['host', 'port', 'database', 'username', 'password'] missing_keys = [key for key in required_keys if key not in config] if missing_keys: print(f"Error: Missing keys in {file_path}: {', '.join(missing_keys)}") sys.exit(1) return config except Exception as e: print(f"Error reading {file_path}: {e}") sys.exit(1) def connect_to_postgres(config): """Establish connection to Azure PostgreSQL database.""" try: connection = psycopg2.connect( host=config['host'], port=config['port'], database=config['database'], user=config['username'], password=config['password'], sslmode='require' # Azure PostgreSQL requires SSL ) print("Successfully connected to Azure PostgreSQL database.") return connection except psycopg2.Error as e: print(f"Error connecting to PostgreSQL database: {e}") print("Ensure the following:") print("- PostgreSQL server details (host, port, database, username, password) are correct.") print("- Your client IP is added to the server's firewall rules in the Azure portal.") print("- The PostgreSQL server is running and accessible.") sys.exit(1) def generate_sql(query, model_name=MODEL_NAME): """Generate SQL from natural language query using LLM.""" prompt = f""" You are an expert in PostgreSQL with access to the following database schema: - Authors (author_id SERIAL PRIMARY KEY, first_name VARCHAR(50), last_name VARCHAR(50), nationality VARCHAR(50)) - Books (book_id SERIAL PRIMARY KEY, title VARCHAR(100), author_id INTEGER REFERENCES Authors(author_id), publication_year INTEGER, genre VARCHAR(50)) - Borrowers (borrower_id SERIAL PRIMARY KEY, book_id INTEGER REFERENCES Books(book_id), borrower_name VARCHAR(100), borrow_date DATE) Your task is to convert the user's natural language query into a valid PostgreSQL query. The query is: "{query}" Instructions: - Return ONLY the SQL query, enclosed in triple backticks: ```sql ```sql - Do NOT include explanations, prose, or multiple queries. - Do NOT end the query with a semicolon (;). - Use table aliases (e.g., b for Books, a for Authors). - Authors are linked to Books via author_id (Books.author_id = Authors.author_id). - For date-based queries (e.g., month/year), use EXTRACT on borrow_date (e.g., EXTRACT(MONTH FROM borrow_date) = 5). - Do NOT assume columns exist outside the schema (e.g., no 'author' column in Books). - If the query cannot be converted, return: ```sql -- Cannot generate SQL for this query ```sql - Avoid unnecessary JOINs or columns unless required by the query. Examples: Query: "list all books by George Orwell" ```sql SELECT b.title FROM Books b JOIN Authors a ON b.author_id = a.author_id WHERE a.last_name = 'Orwell' ```sql Query: "Who borrowed books in May 2025" ```sql SELECT b.borrower_name, b.borrow_date FROM Borrowers b JOIN Books bo ON b.book_id = bo.book_id WHERE EXTRACT(MONTH FROM b.borrow_date) = 5 AND EXTRACT(YEAR FROM b.borrow_date) = 2025 ```sql Query: "How many books are in the Fantasy genre?" ```sql SELECT COUNT(b.book_id) FROM Books b WHERE b.genre = 'Fantasy' ```sql Query: "list book in Fantasy genre" ```sql SELECT b.title FROM Books b WHERE b.genre = 'Fantasy' ```sql """ try: response = ollama.generate(model=model_name, prompt=prompt) raw_response = response['response'].strip() print(f"Debug: LLM raw response:\n{raw_response}\n") # Debug output # Extract SQL query sql_match = re.search(r'```sql\s*(.*?)\s*```', raw_response, re.DOTALL) if sql_match: sql = sql_match.group(1).strip() # Remove semicolon if present sql = sql.rstrip(';').strip() print(f"Debug: Cleaned SQL:\n{sql}\n") # Debug cleaned SQL if sql.startswith('-- Cannot generate'): print("LLM indicated query cannot be generated.") return None return sql print("Warning: LLM response did not contain a valid SQL query.") return None except Exception as e: print(f"Error generating SQL: {e}") return None def execute_query(connection, sql): """Execute SQL query and return results.""" try: cursor = connection.cursor() cursor.execute(sql) if sql.strip().upper().startswith("SELECT"): columns = [desc[0].upper() for desc in cursor.description] results = cursor.fetchall() cursor.close() if not results: return columns, "No results found for this query." return columns, results else: connection.commit() cursor.close() return None, "Query executed successfully." except psycopg2.Error as e: return None, f"Error executing query: {str(e)}" def main(): """Main function to run the PostgreSQL DB chatbot.""" # Check if Ollama server is running if not check_ollama_server(): print("Ollama server is not running. Please follow these steps:") print("1. Install Ollama from https://ollama.com/download") print(f"2. Pull the {MODEL_NAME} model: ollama pull {MODEL_NAME}") print("3. Start the Ollama server: ollama serve") print("Run the server in a separate terminal and try again.") sys.exit(1) # Check if the specified model is available if not check_ollama_model(MODEL_NAME): print(f"{MODEL_NAME} model is not installed. Please run:") print(f"ollama pull {MODEL_NAME}") print("Then restart the script.") sys.exit(1) # Read PostgreSQL connection details config = read_connection_details() # Connect to PostgreSQL database connection = connect_to_postgres(config) try: # Initialize chat interface print("\n=== PostgreSQL Database Chatbot ===") print("Hello! I'm ready to answer questions about the library database.") print("Ask about books, authors, or borrowers. Type 'exit' to quit.") print("Example questions:") print("- List all books by George Orwell.") print("- Who borrowed books in May 2025?") print("- How many books are in the Fantasy genre?") print("================================\n") while True: user_query = input("You: ").strip() if user_query.lower() == 'exit': print("Goodbye!") break if not user_query: print("Please enter a question.") continue # Generate SQL using LLM sql = generate_sql(user_query) if not sql: print("Sorry, I couldn't generate a valid SQL query. Please try again.") continue print(f"Generated SQL: {sql}") # Execute SQL query columns, results = execute_query(connection, sql) if isinstance(results, str): print(f"Bot: {results}\n") else: print("\nBot: Results:") print(columns) for row in results: print(row) print() finally: # Ensure connection is closed connection.close() print("Database connection closed.") if __name__ == '__main__': main()