/**
 * Gemini Explicit Context Caching Example
 *
 * This example demonstrates how to use Gemini's explicit context caching
 * for long-running multi-turn agentic flows where AxGen is called repeatedly.
 *
 * Usage: Simply pass `contextCache: {}` to enable caching. The system
 * automatically handles cache creation, reuse, and TTL refresh.
 *
 * Benefits:
 * - Predictable cost savings (90% discount on cached tokens for Gemini 2.5)
 * - Lower latency for repeated requests with shared context
 * - Cache persists across multiple AxGen calls within a session
 * - Automatic TTL refresh keeps cache alive during long flows
 *
 * Supported models:
 * - Gemini 3 Flash/Pro (preview)
 * - Gemini 2.5 Pro/Flash/Flash-Lite
 * - Gemini 2.0 Flash/Flash-Lite
 *
 * Note: The same contextCache option works with Anthropic models too,
 * automatically injecting cache_control for ephemeral caching.
 */

import {
  AxAIGoogleGemini,
  AxAIGoogleGeminiModel,
  AxGen,
  AxMemory,
} from '@ax-llm/ax';

// Large system prompt that benefits from caching
const LARGE_SYSTEM_PROMPT = `
You are an expert code reviewer and software architect with deep knowledge of:

1. Software Design Patterns
   - Creational patterns (Singleton, Factory, Builder, Prototype)
   - Structural patterns (Adapter, Bridge, Composite, Decorator, Facade)
   - Behavioral patterns (Observer, Strategy, Command, State, Template Method)

2. Clean Code Principles
   - SOLID principles
   - DRY (Don't Repeat Yourself)
   - KISS (Keep It Simple, Stupid)
   - YAGNI (You Aren't Gonna Need It)

3. Programming Languages
   - TypeScript/JavaScript: Modern ES features, async/await, modules
   - Python: Type hints, dataclasses, async programming
   - Rust: Ownership, borrowing, lifetimes, traits
   - Go: Goroutines, channels, interfaces

4. Architecture Patterns
   - Microservices architecture
   - Event-driven architecture
   - CQRS and Event Sourcing
   - Domain-Driven Design

5. Testing Strategies
   - Unit testing best practices
   - Integration testing patterns
   - Test-driven development (TDD)
   - Behavior-driven development (BDD)

6. Performance Optimization
   - Algorithmic complexity analysis
   - Memory optimization techniques
   - Caching strategies
   - Database query optimization

When reviewing code:
- Focus on maintainability and readability first
- Suggest concrete improvements with examples
- Explain the reasoning behind each suggestion
- Consider edge cases and error handling
- Look for potential security vulnerabilities

Your responses should be structured, clear, and actionable.
`.repeat(5); // Repeat to ensure we exceed 2048 token minimum

async function runWithContextCache() {
  // Initialize Gemini with your API key
  const ai = new AxAIGoogleGemini({
    apiKey: process.env.GOOGLE_APIKEY,
    config: {
      model: AxAIGoogleGeminiModel.Gemini25Flash,
    },
  });

  // Create a shared memory for the session
  const mem = new AxMemory();
  const sessionId = `code-review-${Date.now()}`;

  // Create a code review generator with a large system prompt
  const codeReviewer = new AxGen<
    { code: string; language: string },
    { review: string; suggestions: string[] }
  >(
    `code:string "the code to review", language:string "programming language" -> review:string "detailed code review", suggestions:string[] "list of improvement suggestions"`,
    {
      description: LARGE_SYSTEM_PROMPT,
    }
  );

  console.log('Starting code review session with context caching...\n');

  // Simulate a multi-turn code review session
  const codeSnippets = [
    {
      language: 'typescript',
      code: `
function fetchUser(id: string) {
  return fetch('/api/users/' + id)
    .then(r => r.json());
}`,
    },
    {
      language: 'python',
      code: `
def process_data(items):
    result = []
    for item in items:
        if item > 0:
            result.append(item * 2)
    return result`,
    },
    {
      language: 'typescript',
      code: `
class UserService {
  private cache = new Map();
  
  async getUser(id) {
    if (this.cache.has(id)) {
      return this.cache.get(id);
    }
    const user = await fetch('/api/users/' + id).then(r => r.json());
    this.cache.set(id, user);
    return user;
  }
}`,
    },
  ];

  for (let i = 0; i < codeSnippets.length; i++) {
    const snippet = codeSnippets[i];
    console.log(`\n--- Review ${i + 1}: ${snippet.language} code ---\n`);

    const startTime = Date.now();

    const result = await codeReviewer.forward(ai, snippet, {
      mem,
      sessionId,
      // Enable context caching - presence of this object enables caching
      contextCache: {
        ttlSeconds: 3600, // 1 hour TTL
      },
    });

    const elapsed = Date.now() - startTime;

    console.log(`Review (${elapsed}ms):`);
    console.log(result.review);
    console.log('\nSuggestions:');
    result.suggestions.forEach((s, idx) => console.log(`  ${idx + 1}. ${s}`));

    // On subsequent calls, the cached system prompt should provide faster responses
    if (i > 0) {
      console.log(
        '\n(Cache should be reused - check usage metadata for cacheReadTokens)'
      );
    }
  }

  console.log('\n--- Session complete ---');
  console.log(
    'The system prompt was cached after the first call and reused for subsequent calls.'
  );
  console.log(
    'Check the modelUsage.tokens.cacheReadTokens field in debug logs for cache hits.'
  );
}

// Alternative: Using explicit cache management
async function runWithExplicitCacheManagement() {
  const ai = new AxAIGoogleGemini({
    apiKey: process.env.GOOGLE_APIKEY,
    config: {
      model: AxAIGoogleGeminiModel.Gemini25Flash,
    },
  });

  const sessionId = 'explicit-cache-demo';

  // Create generator with a large system prompt
  const gen = new AxGen<{ question: string }, { answer: string }>(
    'question -> answer',
    {
      description: `${LARGE_SYSTEM_PROMPT}\nAnswer questions concisely and accurately.`,
    }
  );

  // First call: Creates the cache automatically
  console.log('First call - cache will be created automatically...');
  const result1 = await gen.forward(
    ai,
    { question: 'What is the SOLID principle?' },
    {
      sessionId,
      contextCache: {
        ttlSeconds: 7200, // 2 hours TTL
      },
    }
  );
  console.log('Answer:', result1.answer);

  // Subsequent calls: Cache is reused automatically (same sessionId + content hash)
  console.log('\nSecond call - cache is reused automatically...');
  const result2 = await gen.forward(
    ai,
    { question: 'Explain the Decorator pattern.' },
    {
      sessionId,
      contextCache: {}, // Just enable caching, uses defaults
    }
  );
  console.log('Answer:', result2.answer);

  // Third call: Cache continues to be reused
  // TTL is auto-refreshed when near expiration (within refreshWindowSeconds)
  console.log('\nThird call - cache continues to be reused...');
  const result3 = await gen.forward(
    ai,
    { question: 'What are goroutines in Go?' },
    {
      sessionId,
      contextCache: {
        ttlSeconds: 14400, // New TTL for any refresh operations
      },
    }
  );
  console.log('Answer:', result3.answer);
}

// Run the example
async function main() {
  if (!process.env.GOOGLE_APIKEY) {
    console.error('Please set GOOGLE_API_KEY environment variable');
    process.exit(1);
  }

  console.log('=== Gemini Context Caching Demo ===\n');

  try {
    await runWithContextCache();
    console.log('\n\n=== Explicit Cache Management Demo ===\n');
    await runWithExplicitCacheManagement();
  } catch (error) {
    console.error('Error:', error);
    process.exit(1);
  }
}

main();