--- name: report-generator description: Generate professional markdown and HTML reports from data with charts, tables, and analysis. --- # Report Generator Skill Generate professional markdown and HTML reports from data with charts, tables, and analysis. ## Instructions You are a report generation expert. When invoked: 1. **Analyze Data**: - Understand data structure and content - Identify key metrics and insights - Calculate statistics and trends - Detect patterns and anomalies - Generate executive summaries 2. **Create Report Structure**: - Design clear, logical sections - Create table of contents - Add executive summary - Include detailed analysis - Provide recommendations 3. **Generate Visualizations**: - Create tables for structured data - Generate charts (bar, line, pie, scatter) - Add badges and indicators - Include code blocks and examples - Format numbers and percentages 4. **Format Output**: - Generate markdown reports - Create HTML reports with styling - Export to PDF - Add branding and customization - Ensure responsive design ## Usage Examples ``` @report-generator data.csv @report-generator --format html @report-generator --template executive-summary @report-generator --charts --pdf @report-generator --compare baseline.json current.json ``` ## Report Types ### Executive Summary Report ```python def generate_executive_summary(data, title="Executive Summary"): """ Generate high-level executive summary report """ from datetime import datetime report = f"""# {title} **Generated:** {datetime.now().strftime('%B %d, %Y at %I:%M %p')} --- ## Key Highlights """ # Calculate key metrics metrics = calculate_key_metrics(data) for metric in metrics: icon = "✅" if metric['status'] == 'good' else "⚠️" if metric['status'] == 'warning' else "❌" report += f"{icon} **{metric['name']}**: {metric['value']}\n" report += f""" --- ## Performance Overview | Metric | Current | Previous | Change | |--------|---------|----------|--------| """ for metric in metrics: if 'previous' in metric: change = calculate_change(metric['current'], metric['previous']) arrow = "↑" if change > 0 else "↓" if change < 0 else "→" color = "green" if change > 0 else "red" if change < 0 else "gray" report += f"| {metric['name']} | {metric['current']:,} | {metric['previous']:,} | {arrow} {abs(change):.1f}% |\n" report += """ --- ## Recommendations """ recommendations = generate_recommendations(metrics) for i, rec in enumerate(recommendations, 1): priority = rec.get('priority', 'medium') emoji = "🔴" if priority == 'high' else "🟡" if priority == 'medium' else "🟢" report += f"{i}. {emoji} **{rec['title']}**\n" report += f" {rec['description']}\n\n" return report ``` ### Data Analysis Report ```python import pandas as pd import numpy as np from datetime import datetime def generate_data_analysis_report(df, title="Data Analysis Report"): """ Generate comprehensive data analysis report """ report = f"""# {title} **Date:** {datetime.now().strftime('%Y-%m-%d')} **Dataset:** {len(df):,} rows × {len(df.columns)} columns --- ## Table of Contents 1. [Dataset Overview](#dataset-overview) 2. [Data Quality](#data-quality) 3. [Statistical Summary](#statistical-summary) 4. [Distributions](#distributions) 5. [Correlations](#correlations) 6. [Insights](#insights) --- ## Dataset Overview ### Basic Information - **Total Rows:** {len(df):,} - **Total Columns:** {len(df.columns)} - **Memory Usage:** {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB - **Duplicate Rows:** {df.duplicated().sum():,} ### Column Information | Column | Type | Non-Null | Unique | Sample Values | |--------|------|----------|--------|---------------| """ for col in df.columns: dtype = str(df[col].dtype) non_null = df[col].count() unique = df[col].nunique() samples = df[col].dropna().head(3).tolist() sample_str = ", ".join(str(s) for s in samples) report += f"| {col} | {dtype} | {non_null:,} | {unique:,} | {sample_str} |\n" report += """ --- ## Data Quality ### Missing Values """ missing = df.isnull().sum() if missing.sum() > 0: report += "| Column | Missing Count | Missing % |\n" report += "|--------|---------------|----------|\n" for col in missing[missing > 0].index: count = missing[col] pct = (count / len(df)) * 100 report += f"| {col} | {count:,} | {pct:.1f}% |\n" else: report += "✅ No missing values detected.\n" report += "\n### Data Type Issues\n\n" # Check for potential type issues type_issues = [] for col in df.select_dtypes(include=['object']): # Check if column should be numeric try: pd.to_numeric(df[col], errors='raise') type_issues.append(f"- `{col}` appears to be numeric but stored as string") except: pass # Check if column should be datetime try: pd.to_datetime(df[col], errors='raise') if df[col].str.contains(r'\d{4}-\d{2}-\d{2}').any(): type_issues.append(f"- `{col}` appears to be datetime but stored as string") except: pass if type_issues: report += "\n".join(type_issues) + "\n" else: report += "✅ No data type issues detected.\n" report += """ --- ## Statistical Summary ### Numeric Columns """ # Add statistics for numeric columns numeric_cols = df.select_dtypes(include=[np.number]).columns if len(numeric_cols) > 0: stats = df[numeric_cols].describe() report += stats.to_markdown() + "\n" # Add additional statistics report += "\n### Additional Statistics\n\n" report += "| Column | Median | Mode | Std Dev | Variance |\n" report += "|--------|--------|------|---------|----------|\n" for col in numeric_cols: median = df[col].median() mode = df[col].mode().iloc[0] if not df[col].mode().empty else "N/A" std = df[col].std() var = df[col].var() report += f"| {col} | {median:.2f} | {mode} | {std:.2f} | {var:.2f} |\n" report += """ ### Categorical Columns """ categorical_cols = df.select_dtypes(include=['object']).columns if len(categorical_cols) > 0: for col in categorical_cols[:5]: # Limit to first 5 report += f"\n#### {col}\n\n" value_counts = df[col].value_counts().head(10) report += "| Value | Count | Percentage |\n" report += "|-------|-------|------------|\n" for value, count in value_counts.items(): pct = (count / len(df)) * 100 report += f"| {value} | {count:,} | {pct:.1f}% |\n" report += """ --- ## Distributions """ # Analyze distributions of numeric columns for col in numeric_cols[:5]: # Limit to first 5 report += f"\n### {col} Distribution\n\n" q1 = df[col].quantile(0.25) q2 = df[col].quantile(0.50) q3 = df[col].quantile(0.75) iqr = q3 - q1 # Detect outliers lower_bound = q1 - 1.5 * iqr upper_bound = q3 + 1.5 * iqr outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)] report += f""" **Quartiles:** - Q1 (25%): {q1:.2f} - Q2 (50%, Median): {q2:.2f} - Q3 (75%): {q3:.2f} - IQR: {iqr:.2f} **Outliers:** {len(outliers)} ({len(outliers)/len(df)*100:.1f}%) - Lower bound: {lower_bound:.2f} - Upper bound: {upper_bound:.2f} """ report += """ --- ## Correlations """ if len(numeric_cols) > 1: corr_matrix = df[numeric_cols].corr() report += "\n### Correlation Matrix\n\n" report += corr_matrix.to_markdown() + "\n" # Find strong correlations report += "\n### Strong Correlations (|r| > 0.7)\n\n" strong_corr = [] for i in range(len(corr_matrix.columns)): for j in range(i+1, len(corr_matrix.columns)): corr_val = corr_matrix.iloc[i, j] if abs(corr_val) > 0.7: col1 = corr_matrix.columns[i] col2 = corr_matrix.columns[j] strong_corr.append((col1, col2, corr_val)) if strong_corr: for col1, col2, corr_val in strong_corr: direction = "positive" if corr_val > 0 else "negative" report += f"- **{col1}** ↔ **{col2}**: {corr_val:.3f} ({direction})\n" else: report += "No strong correlations found.\n" report += """ --- ## Insights """ # Generate insights insights = generate_insights(df) for insight in insights: report += f"### {insight['title']}\n\n" report += f"{insight['description']}\n\n" if 'details' in insight: for detail in insight['details']: report += f"- {detail}\n" report += "\n" return report def generate_insights(df): """Generate data insights""" insights = [] # Insight: Completeness missing_pct = (df.isnull().sum().sum() / (len(df) * len(df.columns))) * 100 if missing_pct < 1: status = "excellent" emoji = "✅" elif missing_pct < 5: status = "good" emoji = "👍" else: status = "needs attention" emoji = "⚠️" insights.append({ "title": f"{emoji} Data Completeness: {status.title()}", "description": f"Overall data completeness is {100-missing_pct:.1f}% with {missing_pct:.1f}% missing values.", "details": [ f"Total cells: {len(df) * len(df.columns):,}", f"Missing cells: {df.isnull().sum().sum():,}" ] }) # Insight: Duplicates dup_count = df.duplicated().sum() if dup_count > 0: insights.append({ "title": f"⚠️ Duplicate Records Found", "description": f"Found {dup_count:,} duplicate rows ({dup_count/len(df)*100:.1f}% of dataset)", "details": [ "Consider removing duplicates for accurate analysis", "Review business logic for duplicate handling" ] }) return insights ``` ### Performance Report ```python def generate_performance_report(metrics, baseline=None): """ Generate performance comparison report """ report = f"""# Performance Report **Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} --- ## Summary """ if baseline: report += "### Comparison with Baseline\n\n" report += "| Metric | Current | Baseline | Change | Status |\n" report += "|--------|---------|----------|--------|--------|\n" for metric_name, current_value in metrics.items(): if metric_name in baseline: baseline_value = baseline[metric_name] change = ((current_value - baseline_value) / baseline_value) * 100 if abs(change) < 5: status = "🟢 Stable" elif change > 0: status = "🟢 Improved" if is_improvement(metric_name, change) else "🔴 Degraded" else: status = "🔴 Degraded" if is_improvement(metric_name, change) else "🟢 Improved" report += f"| {metric_name} | {current_value:.2f} | {baseline_value:.2f} | {change:+.1f}% | {status} |\n" else: report += "### Current Metrics\n\n" report += "| Metric | Value | Status |\n" report += "|--------|-------|--------|\n" for metric_name, value in metrics.items(): threshold = get_threshold(metric_name) status = evaluate_metric(value, threshold) report += f"| {metric_name} | {value:.2f} | {status} |\n" report += """ --- ## Detailed Analysis """ for metric_name, value in metrics.items(): report += f"### {metric_name}\n\n" if baseline and metric_name in baseline: baseline_value = baseline[metric_name] change = ((value - baseline_value) / baseline_value) * 100 report += f"- **Current:** {value:.2f}\n" report += f"- **Baseline:** {baseline_value:.2f}\n" report += f"- **Change:** {change:+.1f}%\n\n" if abs(change) > 10: report += f"⚠️ Significant change detected. " report += "Review recent changes that may have impacted this metric.\n\n" else: report += f"- **Value:** {value:.2f}\n\n" return report def is_improvement(metric_name, change): """Determine if change is improvement based on metric type""" # Lower is better for these metrics lower_is_better = ['response_time', 'error_rate', 'latency', 'load_time'] for pattern in lower_is_better: if pattern in metric_name.lower(): return change < 0 return change > 0 ``` ## HTML Report Generation ```python def generate_html_report(data, title="Report", template="default"): """ Generate styled HTML report """ # CSS styles css = """ """ # Generate HTML content html = f"""
| {header} | " html += "
|---|
| {cell} | " html += "