--- name: deep-learning description: Neural networks, CNNs, RNNs, Transformers with TensorFlow and PyTorch. Use for image classification, NLP, sequence modeling, or complex pattern recognition. sasmp_version: "1.3.0" bonded_agent: 04-machine-learning-ai bond_type: PRIMARY_BOND --- # Deep Learning Build neural networks for computer vision, NLP, and complex data patterns. ## Quick Start with PyTorch ```python import torch import torch.nn as nn import torch.optim as optim # Define model class NeuralNet(nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(NeuralNet, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.fc2 = nn.Linear(hidden_size, num_classes) def forward(self, x): out = self.fc1(x) out = self.relu(out) out = self.fc2(out) return out # Initialize model = NeuralNet(input_size=784, hidden_size=128, num_classes=10) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # Training loop for epoch in range(10): for images, labels in train_loader: # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() ``` ## CNN for Image Classification ```python class CNN(nn.Module): def __init__(self, num_classes=10): super(CNN, self).__init__() self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) self.pool = nn.MaxPool2d(2, 2) self.fc1 = nn.Linear(64 * 8 * 8, 512) self.fc2 = nn.Linear(512, num_classes) self.dropout = nn.Dropout(0.5) def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 64 * 8 * 8) x = F.relu(self.fc1(x)) x = self.dropout(x) x = self.fc2(x) return x ``` ## Transfer Learning ```python import torchvision.models as models # Load pre-trained ResNet model = models.resnet50(pretrained=True) # Freeze layers for param in model.parameters(): param.requires_grad = False # Replace final layer num_features = model.fc.in_features model.fc = nn.Linear(num_features, num_classes) # Only train final layer optimizer = optim.Adam(model.fc.parameters(), lr=0.001) ``` ## LSTM for Sequences ```python class LSTMModel(nn.Module): def __init__(self, input_size, hidden_size, num_layers, output_size): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2) self.fc = nn.Linear(hidden_size, output_size) def forward(self, x): h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) out, _ = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out ``` ## Transformers with Hugging Face ```python from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments model = AutoModelForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=2 ) training_args = TrainingArguments( output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16, warmup_steps=500, weight_decay=0.01, ) trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset ) trainer.train() ``` ## Tips & Tricks **Regularization:** - Dropout: `nn.Dropout(0.5)` - Batch Normalization: `nn.BatchNorm2d(channels)` - Weight Decay: `optimizer = optim.Adam(params, weight_decay=0.01)` - Early Stopping: Monitor validation loss **Optimization:** - Learning Rate Scheduling: ```python scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) ``` - Gradient Clipping: ```python torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) ``` **Data Augmentation:** ```python from torchvision import transforms transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.ColorJitter(brightness=0.2, contrast=0.2), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ``` ## Common Issues **Overfitting:** - Add dropout - Data augmentation - Early stopping - Reduce model complexity **Underfitting:** - Increase model capacity - Train longer - Reduce regularization - Better features **Vanishing Gradients:** - Use ReLU activation - Batch normalization - Residual connections - Proper initialization