#!/bin/bash

# SD-Scripts LoRA Training Script
# This script automates the LoRA training workflow for sd-scripts
# Supports SDXL and Anima models

set -e  # Exit on error

# ==================== CONFIGURATION ====================
# Edit these values for your setup

# Install parameters
GFX_NAME="${GFX_NAME:-gfx1151}"
SD_SCRIPTS_INSTALL_DIR="${SD_SCRIPTS_INSTALL_DIR:-$HOME}"

# Model paths (provide the paths to your model files)
DIT_MODEL="${DIT_MODEL:-}"  # Path to base model
VAE_MODEL="${VAE_MODEL:-}"                # Path to VAE model (for Anima)
T5XXL_TOKENIZER="${T5XXL_TOKENIZER:-}"    # Path to T5-XXL tokenizer (for Anima, optional)
QWEN3_MODEL="${QWEN3_MODEL:-}"            # Path to Qwen3 model (for Anima)
CLIP_L_MODEL="${CLIP_L_MODEL:-}"          # Path to CLIP-L model (for SDXL)
CLIP_G_MODEL="${CLIP_G_MODEL:-}"          # Path to CLIP-G model (for SDXL)

# Project configuration
PROJECT_NAME="${PROJECT_NAME:-}"          # Name for your project (e.g: my-lora)
MODEL_VERSION="${MODEL_VERSION:-sdxl}"          # Model type: "sdxl" or "anima"

# Training parameters
NETWORK_DIM="${NETWORK_DIM:-32}"
NETWORK_ALPHA="${NETWORK_ALPHA:-16}"
LEARNING_RATE="${LEARNING_RATE:-1e-4}"
MAX_EPOCHS="${MAX_EPOCHS:-30}"
SAVE_EVERY_N="${SAVE_EVERY_N:-2}"
BATCH_SIZE="${BATCH_SIZE:-2}"
RESOLUTION="${RESOLUTION:-1024}"

# ======================================================

# Runtime vars

PROJECT_DIR="${PWD}/${PROJECT_NAME}"
DATASET_DIR="${PROJECT_DIR}/dataset"
OUTPUT_DIR="${PROJECT_DIR}/output"

TRAINING_SCRIPT=""
NETWORK_MODULE=""
EXTRA_TRAINING_CONFIG=""

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

log_info() {
    echo -e "${GREEN}[INFO]${NC} $1"
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# Initialize environment (export AMD GPU settings + ensure venv is active)
init_env() {
    # Check if project name is set
    if [ -z "$PROJECT_NAME" ]; then
        log_error "PROJECT_NAME is not set. Please set it in the configuration section."
        exit 1
    fi

    case "$MODEL_VERSION" in
        sdxl) 
            TRAINING_SCRIPT="sdxl_train_network.py"
            NETWORK_MODULE="networks.lora"
            EXTRA_TRAINING_CONFIG="cache_latents = true"
            ;;
        anima)
            TRAINING_SCRIPT="anima_train_network.py"
            NETWORK_MODULE="networks.lora_anima"
            EXTRA_TRAINING_CONFIG="cache_latents = true"
            ;;
        *)
            log_error "MODEL_VERSION must be 'sdxl' or 'anima'."
            exit 1
            ;;
    esac

    export MIOPEN_FIND_MODE=FAST
    export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
    export TORCH_BLAS_PREFER_HIPBLASLT=1

    # Ensure virtual environment is active
    if [ -z "$VIRTUAL_ENV" ]; then
        source "${SD_SCRIPTS_INSTALL_DIR}/sd-scripts/.venv/bin/activate"
    fi
}

# Check if required commands exist
check_dependencies() {
    log_info "Checking dependencies..."
    
    for cmd in git uv; do
        if ! command -v $cmd &> /dev/null; then
            log_error "$cmd is not installed. Please install it first."
            exit 1
        fi
    done
    
    log_info "All dependencies found."
}

# Setup sd-scripts environment
setup_sd_scripts() {
    check_dependencies

    log_info "Setting up sd-scripts environment..."
    
    # Create workspace directory if it doesn't exist
    mkdir -p "$SD_SCRIPTS_INSTALL_DIR"
    cd "$SD_SCRIPTS_INSTALL_DIR"
    
    # Clone repository if it doesn't exist
    if [ ! -d "sd-scripts" ]; then
        log_info "Cloning sd-scripts repository..."
        git clone https://github.com/kohya-ss/sd-scripts.git
    fi
    
    cd sd-scripts
    
    # Create virtual environment if it doesn't exist
    if [ ! -d ".venv" ]; then
        log_info "Creating Python virtual environment..."
        uv venv --python 3.12
    fi
    
    # Activate virtual environment
    source .venv/bin/activate
    
    # Install PyTorch with AMD GPU support
    log_info "Installing PyTorch with AMD GPU support..."
    uv pip install torch torchvision torchaudio triton --extra-index-url "https://rocm.nightlies.amd.com/v2-staging/$GFX_NAME"
    
    # Install sd-scripts requirements
    log_info "Installing sd-scripts requirements..."
    uv pip install --upgrade -r requirements.txt --extra-index-url "https://rocm.nightlies.amd.com/v2-staging/$GFX_NAME"
    
    log_info "sd-scripts environment setup complete."
}

# Validate user inputs
validate_inputs() {
    log_info "Validating inputs..."
    
    # Check if model files are set based on model type
    if [ -z "$DIT_MODEL" ]; then
        log_error "DIT_MODEL is not set. Please set it in the configuration section."
        exit 1
    fi
    
    if [ ! -f "$DIT_MODEL" ]; then
        log_error "DIT_MODEL not found: $DIT_MODEL"
        exit 1
    fi

    case "$MODEL_VERSION" in
        sdxl)
            if [ -z "$CLIP_L_MODEL" ]; then
                log_error "CLIP_L_MODEL is not set for SDXL. Please set it in the configuration section."
                exit 1
            fi
            if [ -z "$CLIP_G_MODEL" ]; then
                log_error "CLIP_G_MODEL is not set for SDXL. Please set it in the configuration section."
                exit 1
            fi
            if [ ! -f "$CLIP_L_MODEL" ]; then
                log_error "CLIP_L_MODEL not found: $CLIP_L_MODEL"
                exit 1
            fi
            if [ ! -f "$CLIP_G_MODEL" ]; then
                log_error "CLIP_G_MODEL not found: $CLIP_G_MODEL"
                exit 1
            fi
            ;;
        anima)
            if [ -z "$VAE_MODEL" ]; then
                log_error "VAE_MODEL is not set for Anima. Please set it in the configuration section."
                exit 1
            fi
            if [ -z "$T5XXL_TOKENIZER" ]; then
                log_info "T5XXL_TOKENIZER is not set for Anima. Using default configuration."
            fi
            if [ -z "$QWEN3_MODEL" ]; then
                log_error "QWEN3_MODEL is not set for Anima. Please set it in the configuration section."
                exit 1
            fi
            if [ ! -f "$VAE_MODEL" ]; then
                log_error "VAE_MODEL not found: $VAE_MODEL"
                exit 1
            fi
            if [ ! -f "$QWEN3_MODEL" ]; then
                log_error "QWEN3_MODEL not found: $QWEN3_MODEL"
                exit 1
            fi
            ;;
    esac
    
    log_info "All inputs validated successfully."
}

# Create project directories
create_project_dirs() {
    log_info "Creating project directories..."
    
    mkdir -p "$DATASET_DIR"
    mkdir -p "$OUTPUT_DIR"
    
    log_info "Project directories ready:"
    log_info "  Project: ${PROJECT_DIR}"
    log_info "  Dataset: ${DATASET_DIR}"
    log_info "  Output: ${OUTPUT_DIR}"
}

# Create dataset config
create_dataset_config() {
    log_info "Creating dataset configuration..."
    
    if [ -f "${PROJECT_DIR}/dataset.toml" ]; then
        log_info "Dataset config already exists at ${PROJECT_DIR}/dataset.toml, skipping creation"
        return
    fi
    
    cat > "${PROJECT_DIR}/dataset.toml" << EOF
[general]
caption_extension = ".txt"

[[datasets]]
resolution = ${RESOLUTION}
batch_size = ${BATCH_SIZE}
enable_bucket = true
bucket_no_upscale = true

  [[datasets.subsets]]
  image_dir = "${DATASET_DIR}"
  num_repeats = 1
EOF

    log_info "Dataset config created at ${PROJECT_DIR}/dataset.toml"
}

# Create reference prompts
create_reference_prompts() {
    log_info "Creating reference prompts..."
    
    if [ -f "${PROJECT_DIR}/reference_prompts.txt" ]; then
        log_info "Reference prompts already exist at ${PROJECT_DIR}/reference_prompts.txt, skipping creation"
        return
    fi
    
    cat > "${PROJECT_DIR}/reference_prompts.txt" << EOF
# Add prompts one per line to create sample images. Add as many as you need but remember that it takes time to generate them.
# You will also want to add a few parameters at the end of each prompt (on the same line). Most important ones are:
# --w: image width (eg: --w 1024)
# --h: image height (eg: --h 1024)
# --d: the seed. Setting a fixed seed is a good idea to make samples more comparable to each other (eg: --d 42)
# --s: the number of steps. A number between 30-50 will work fine for Anima (e.g: --s 30)
EOF

    log_info "Reference prompts created at ${PROJECT_DIR}/reference_prompts.txt"
    log_warn "Please edit the prompts to match your desired style!"
}

# Create training config
create_training_config() {
    log_info "Creating training configuration..."
    
    if [ -f "${PROJECT_DIR}/training.toml" ]; then
        log_info "Training config already exists at ${PROJECT_DIR}/training.toml, skipping creation"
        return
    fi
    
    # Build model arguments based on model type
    MODEL_ARGS=""
    case "$MODEL_VERSION" in
        sdxl)
            MODEL_ARGS="pretrained_model_name_or_path = \"${DIT_MODEL}\"
clip_l = \"${CLIP_L_MODEL}\"
clip_g = \"${CLIP_G_MODEL}\""
            ;;
        anima)
            MODEL_ARGS="pretrained_model_name_or_path = \"${DIT_MODEL}\"
vae = \"${VAE_MODEL}\"
t5xxl = \"${T5XXL_TOKENIZER}\"
qwen3 = \"${QWEN3_MODEL}\""
            ;;
    esac
    
    cat > "${PROJECT_DIR}/training.toml" << EOF
[general]
${MODEL_ARGS}
dataset_config = "${PROJECT_DIR}/dataset.toml"
persistent_data_loader_workers = true
max_data_loader_n_workers = 2
compile = true
compile_mode = "default"

[network]
network_module = "${NETWORK_MODULE}"
network_dim = ${NETWORK_DIM}
network_alpha = ${NETWORK_ALPHA}

[optimizer]
optimizer_type = "AdamW"
learning_rate = ${LEARNING_RATE}

[training]
seed = 42
max_train_epochs = ${MAX_EPOCHS}
mixed_precision = "bf16"
sdpa = true
${EXTRA_TRAINING_CONFIG}

[output]
output_dir = "${OUTPUT_DIR}"
output_name = "${PROJECT_NAME}"
save_every_n_epochs = ${SAVE_EVERY_N}
save_state = true
sample_prompts = "${PROJECT_DIR}/reference_prompts.txt"
sample_every_n_epochs = ${SAVE_EVERY_N}
sample_at_first = true
EOF

    log_info "Training config created at ${PROJECT_DIR}/training.toml"
}

# Create sd-scripts project (directories + configs)
create() {
    log_info "Creating project..."
    init_env

    validate_inputs
    create_project_dirs
    create_dataset_config
    create_reference_prompts
    create_training_config
    
    log_info "LoRA training project created successfully at ${PROJECT_DIR}"

    echo "Next steps:"
    echo "1. Add your training images to: ${DATASET_DIR}"
    echo "2. Add captions for your images (.txt files)"
    echo "3. Edit reference prompts in: ${PROJECT_DIR}/reference_prompts.txt"
    echo "4. Run the  training:"
    echo ""
    echo "   $0 train"
    echo ""    
}

# Train the LoRA
train_lora() {
    log_info "Initializing environment..."
    init_env
    
    cd ${SD_SCRIPTS_INSTALL_DIR}/sd-scripts

    # Check for existing checkpoints to resume from
    RESUME_PATH=""
    if [ -d "$OUTPUT_DIR" ]; then
        # Use find to list directories, sort by modification time (oldest first) and take the last one
        RESUME_PATH=$(find "${OUTPUT_DIR}" -maxdepth 1 -type d -name "${PROJECT_NAME}"*state -printf '%T@ %p\n' 2>/dev/null | sort -n | tail -n 1 | cut -d' ' -f2-)
        
        if [ -n "$RESUME_PATH" ]; then
            log_info "Found checkpoint with highest sequence: ${RESUME_PATH}"
        fi
    fi

    log_info "Starting training..."

    ACCELERATE_ARGS="--num_cpu_threads_per_process 1 --mixed_precision bf16  ${TRAINING_SCRIPT} --config_file ${PROJECT_DIR}/training"
    
    if [ -n "$RESUME_PATH" ]; then
        log_info "Resuming from checkpoint: ${RESUME_PATH}"
        ACCELERATE_ARGS="$ACCELERATE_ARGS --resume $RESUME_PATH"
    fi
    
    accelerate launch $ACCELERATE_ARGS
    
    log_info "Training completed!"
    log_info "Your LoRA checkpoints are in: ${OUTPUT_DIR}"
    
    # Automatically convert anima LoRA for ComfyUI
    if [ "$MODEL_VERSION" == "anima" ]; then
        convert_lora
    fi
}

# Convert anima LoRA for ComfyUI compatibility
convert_lora() {
    init_env

    # Auto-detect latest checkpoint if no arguments provided
    if [ $# -eq 0 ]; then
        local INPUT_PATH=$(find "${OUTPUT_DIR}" -maxdepth 1 -type f -name "${PROJECT_NAME}*.safetensors" -printf '%T@ %p\n' 2>/dev/null | sort -n | tail -n 1 | cut -d' ' -f2-)
        
        if [ -z "$INPUT_PATH" ]; then
            log_error "No LoRA checkpoint found in ${OUTPUT_DIR}"
            log_info "Usage: $0 convert [checkpoint_path1] [checkpoint_path2] ..."
            return 1
        fi
        
        log_info "No checkpoint specified. Using latest: ${INPUT_PATH}"
        set -- "$INPUT_PATH"
    fi
    
    # Process all provided input paths
    for INPUT_PATH in "$@"; do
        # Validate input exists
        if [ ! -f "$INPUT_PATH" ]; then
            log_error "Checkpoint not found: $INPUT_PATH"
            continue
        fi
        
        # Auto-generate output path
        local BASENAME=$(basename "$INPUT_PATH" .safetensors)
        local OUTPUT_PATH="${OUTPUT_DIR}/${BASENAME}_comfyui.safetensors"
        
        log_info "Converting LoRA for ComfyUI compatibility..."
        log_info "Input:  ${INPUT_PATH}"
        log_info "Output: ${OUTPUT_PATH}"
        
        python "${SD_SCRIPTS_INSTALL_DIR}/sd-scripts/networks/convert_anima_lora_to_comfy.py" \
            "$INPUT_PATH" "$OUTPUT_PATH"
        
        log_info "Conversion complete!"
    done
}

# Help function
help() {
    echo "Usage: $0 {setup|create|train|convert}"
    echo ""
    echo "Actions:"
    echo "  setup    Install sd-scripts environment"
    echo "  create   Create a new LoRA training project"
    echo "  train    Train the LoRA"
    echo "  convert  Convert anima LoRA for ComfyUI"
    echo "           Usage: $0 convert [checkpoint_path1] [checkpoint_path2] ..."
}

case "$1" in
    setup)
        setup_sd_scripts
        ;;
    create)
        create
        ;;
    train)
        train_lora
        ;;
    convert)
        shift
        convert_lora "$@"
        ;;
    *)
        help
        exit 1
        ;;
esac