import os import sys import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import httpx import chardet # Constants API_URL = "https://aiproxy.sanand.workers.dev/openai/v1/chat/completions" AIPROXY_TOKEN = os.getenv("AIPROXY_TOKEN", "eyJhbGciOiJIUzI1NiJ9.eyJlbWFpbCI6IjIyZjMwMDIzMzZAZHMuc3R1ZHkuaWl0bS5hYy5pbiJ9.OhqG6ldp2ovNb7qV1FFfkn7D4szk8zf8hFPTdBz2VpI") def load_data(file_path): """Load CSV data with encoding detection.""" with open(file_path, 'rb') as f: result = chardet.detect(f.read()) encoding = result['encoding'] return pd.read_csv(file_path, encoding=encoding) def analyze_data(df): """Perform basic data analysis.""" numeric_df = df.select_dtypes(include=['number']) # Select only numeric columns analysis = { 'summary': df.describe(include='all').to_dict(), 'missing_values': df.isnull().sum().to_dict(), 'correlation': numeric_df.corr().to_dict() # Compute correlation only on numeric columns } return analysis def visualize_data(df, output_dir): """Generate and save visualizations.""" sns.set(style="whitegrid") numeric_columns = df.select_dtypes(include=['number']).columns for column in numeric_columns: plt.figure() sns.histplot(df[column].dropna(), kde=True) plt.title(f'Distribution of {column}') output_path = os.path.join(output_dir, f'{column}_distribution.png') plt.savefig(output_path) plt.close() print(f"Visualization saved: {output_path}") def generate_narrative(analysis): """Generate narrative using LLM.""" headers = { 'Authorization': f'Bearer {AIPROXY_TOKEN}', 'Content-Type': 'application/json' } prompt = f"Provide a detailed analysis based on the following data summary: {analysis}" data = { "model": "gpt-4o-mini", "messages": [{"role": "user", "content": prompt}] } try: response = httpx.post(API_URL, headers=headers, json=data, timeout=30.0) response.raise_for_status() return response.json()['choices'][0]['message']['content'] except httpx.HTTPStatusError as e: print(f"HTTP error occurred: {e}") except httpx.RequestError as e: print(f"Request error occurred: {e}") except Exception as e: print(f"An unexpected error occurred: {e}") return "Narrative generation failed due to an error." def main(file_path): output_dir = os.path.join(os.getcwd(), 'outputs') os.makedirs(output_dir, exist_ok=True) df = load_data(file_path) print(f"Loaded dataset with shape: {df.shape}") analysis = analyze_data(df) print("Analysis complete. Generating visualizations...") visualize_data(df, output_dir) print("Generating narrative from analysis...") narrative = generate_narrative(analysis) readme_path = os.path.join(output_dir, 'README.md') with open(readme_path, 'w') as f: f.write(narrative) print(f"Narrative saved at: {readme_path}") if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python autolysis.py ") sys.exit(1) main(sys.argv[1])