---
name: model-optimization
description: Quantization, pruning, AutoML, hyperparameter tuning, and performance optimization. Use for improving model performance, reducing size, or automated ML.
sasmp_version: "1.3.0"
bonded_agent: 06-mlops-deployment
bond_type: SECONDARY_BOND
---

# Model Optimization

Optimize models for better performance, efficiency, and faster inference.

## Hyperparameter Tuning

### Grid Search
```python
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(
    RandomForestClassifier(),
    param_grid,
    cv=5,
    scoring='f1_weighted',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)
print(f"Best params: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.3f}")
```

### Bayesian Optimization
```python
from skopt import BayesSearchCV

param_space = {
    'n_estimators': (100, 500),
    'max_depth': (5, 50),
    'learning_rate': (0.01, 0.3, 'log-uniform')
}

bayes_search = BayesSearchCV(
    xgb.XGBClassifier(),
    param_space,
    n_iter=50,
    cv=5,
    scoring='f1_weighted'
)

bayes_search.fit(X_train, y_train)
```

### Optuna
```python
import optuna

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3)
    }

    model = xgb.XGBClassifier(**params)
    score = cross_val_score(model, X_train, y_train,
                           cv=5, scoring='f1').mean()
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print(f"Best params: {study.best_params}")
print(f"Best score: {study.best_value:.3f}")
```

## Model Compression

### Quantization (PyTorch)
```python
import torch

# Post-training dynamic quantization
model_fp32 = MyModel()
model_int8 = torch.quantization.quantize_dynamic(
    model_fp32,
    {torch.nn.Linear},
    dtype=torch.qint8
)

# 4x smaller model, 2-4x faster inference

# Quantization-aware training
model = MyModel()
model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model_prepared = torch.quantization.prepare_qat(model)

# Train
for epoch in range(epochs):
    train(model_prepared)

model_quantized = torch.quantization.convert(model_prepared)
```

### Pruning
```python
import torch.nn.utils.prune as prune

# Global unstructured pruning
parameters_to_prune = [
    (module, 'weight') for module in model.modules()
    if isinstance(module, torch.nn.Linear)
]

prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2  # Remove 20% of weights
)

# Remove pruning reparametrization
for module, _ in parameters_to_prune:
    prune.remove(module, 'weight')
```

### Knowledge Distillation
```python
import torch.nn.functional as F

def distillation_loss(student_logits, teacher_logits, labels, T=3.0, alpha=0.5):
    """
    Distillation loss: combination of soft targets from teacher
    and hard targets from ground truth
    """
    # Soft targets (knowledge from teacher)
    soft_targets = F.softmax(teacher_logits / T, dim=1)
    soft_prob = F.log_softmax(student_logits / T, dim=1)
    soft_loss = F.kl_div(soft_prob, soft_targets, reduction='batchmean') * (T ** 2)

    # Hard targets (ground truth)
    hard_loss = F.cross_entropy(student_logits, labels)

    # Combined loss
    return alpha * soft_loss + (1 - alpha) * hard_loss

# Train student model
teacher_model.eval()
student_model.train()

for images, labels in train_loader:
    with torch.no_grad():
        teacher_logits = teacher_model(images)

    student_logits = student_model(images)
    loss = distillation_loss(student_logits, teacher_logits, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
```

## AutoML

### Auto-sklearn
```python
import autosklearn.classification

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600,  # 1 hour
    per_run_time_limit=300,
    memory_limit=3072
)

automl.fit(X_train, y_train)
predictions = automl.predict(X_test)

print(automl.leaderboard())
print(automl.show_models())
```

### H2O AutoML
```python
import h2o
from h2o.automl import H2OAutoML

h2o.init()

train = h2o.H2OFrame(pd.concat([X_train, y_train], axis=1))
test = h2o.H2OFrame(pd.concat([X_test, y_test], axis=1))

aml = H2OAutoML(max_runtime_secs=3600, max_models=20)
aml.train(x=X_train.columns.tolist(), y='target',
         training_frame=train)

# Leaderboard
lb = aml.leaderboard
print(lb)

# Best model
best_model = aml.leader
predictions = best_model.predict(test)
```

### TPOT
```python
from tpot import TPOTClassifier

tpot = TPOTClassifier(
    generations=5,
    population_size=50,
    verbosity=2,
    random_state=42,
    n_jobs=-1
)

tpot.fit(X_train, y_train)
print(f"Score: {tpot.score(X_test, y_test):.3f}")

# Export best pipeline
tpot.export('best_pipeline.py')
```

## Feature Selection

```python
from sklearn.feature_selection import (
    SelectKBest, f_classif, RFE, SelectFromModel
)

# Univariate selection
selector = SelectKBest(f_classif, k=10)
X_new = selector.fit_transform(X, y)

# Recursive Feature Elimination
estimator = RandomForestClassifier()
rfe = RFE(estimator, n_features_to_select=10)
X_new = rfe.fit_transform(X, y)

# Model-based selection
selector = SelectFromModel(RandomForestClassifier(), max_features=10)
X_new = selector.fit_transform(X, y)
```

## Performance Optimization

### Inference Optimization (ONNX)
```python
import torch.onnx

# Export PyTorch to ONNX
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(
    model,
    dummy_input,
    "model.onnx",
    opset_version=11,
    input_names=['input'],
    output_names=['output']
)

# Run with ONNX Runtime
import onnxruntime as ort

session = ort.InferenceSession("model.onnx")
input_name = session.get_inputs()[0].name
output = session.run(None, {input_name: input_data})
```

### TensorRT (NVIDIA GPU)
```python
import tensorrt as trt

# Convert ONNX to TensorRT
logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
network = builder.create_network()
parser = trt.OnnxParser(network, logger)

with open('model.onnx', 'rb') as f:
    parser.parse(f.read())

config = builder.create_builder_config()
config.max_workspace_size = 1 << 30  # 1GB

engine = builder.build_engine(network, config)

# 10x faster inference on GPU
```

## Learning Rate Scheduling

```python
from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR

# Step decay
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

# Cosine annealing
scheduler = CosineAnnealingLR(optimizer, T_max=100)

# Training loop
for epoch in range(epochs):
    train(model, optimizer)
    scheduler.step()
```

## Early Stopping

```python
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# Usage
early_stopping = EarlyStopping(patience=10)

for epoch in range(epochs):
    train_loss = train(model)
    val_loss = validate(model)

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        break
```

## Best Practices

1. **Start simple**: Baseline model first
2. **Profile before optimizing**: Find bottlenecks
3. **Measure everything**: Track metrics
4. **Trade-offs**: Accuracy vs speed vs size
5. **Validate improvements**: A/B testing
6. **Automate**: Use AutoML for initial exploration