---
name: database-migration-patterns
description: Database schema migration patterns and best practices. Use when creating database migrations, implementing zero-downtime schema changes, version control for databases, or managing data migrations.
---

# Database Migration Patterns

Best practices for safe database schema migrations.

## Migration File Structure

```
migrations/
├── versions/
│   ├── 001_initial_schema.py
│   ├── 002_add_users_table.py
│   ├── 003_add_user_email_index.py
│   └── 004_add_orders_table.py
├── alembic.ini
└── env.py
```

## Alembic Migration Template

```python
"""Add users table

Revision ID: 002
Revises: 001
Create Date: 2024-01-15 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers
revision = '002'
down_revision = '001'
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        'users',
        sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
        sa.Column('email', sa.String(255), nullable=False),
        sa.Column('name', sa.String(255), nullable=True),
        sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
        sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
        sa.Column('updated_at', sa.DateTime(timezone=True), onupdate=sa.func.now()),
    )

    op.create_index('ix_users_email', 'users', ['email'], unique=True)
    op.create_index('ix_users_status', 'users', ['status'])


def downgrade() -> None:
    op.drop_index('ix_users_status')
    op.drop_index('ix_users_email')
    op.drop_table('users')
```

## Zero-Downtime Migration Patterns

### Pattern 1: Expand and Contract

```python
# Step 1: Add new column (nullable)
def upgrade_step1():
    op.add_column('users', sa.Column('email_new', sa.String(255), nullable=True))

# Step 2: Backfill data (run separately, possibly in batches)
def backfill():
    connection = op.get_bind()
    connection.execute("""
        UPDATE users
        SET email_new = email
        WHERE email_new IS NULL
        LIMIT 10000
    """)

# Step 3: Make new column non-nullable, drop old column
def upgrade_step3():
    op.alter_column('users', 'email_new', nullable=False)
    op.drop_column('users', 'email')
    op.alter_column('users', 'email_new', new_column_name='email')
```

### Pattern 2: Safe Column Rename

```python
# DON'T: Direct rename causes downtime
# op.alter_column('users', 'username', new_column_name='email')

# DO: Expand-Contract pattern
def upgrade():
    # 1. Add new column
    op.add_column('users', sa.Column('email', sa.String(255)))

    # 2. Create trigger to sync data (PostgreSQL)
    op.execute("""
        CREATE OR REPLACE FUNCTION sync_username_to_email()
        RETURNS TRIGGER AS $$
        BEGIN
            NEW.email = NEW.username;
            RETURN NEW;
        END;
        $$ LANGUAGE plpgsql;

        CREATE TRIGGER sync_username_email
        BEFORE INSERT OR UPDATE ON users
        FOR EACH ROW EXECUTE FUNCTION sync_username_to_email();
    """)

    # 3. Backfill existing data
    op.execute("UPDATE users SET email = username WHERE email IS NULL")

# Later migration after app updated:
def upgrade_cleanup():
    op.execute("DROP TRIGGER sync_username_email ON users")
    op.execute("DROP FUNCTION sync_username_to_email()")
    op.drop_column('users', 'username')
```

### Pattern 3: Safe Index Creation

```python
def upgrade():
    # Create index concurrently to avoid locking
    op.execute("""
        CREATE INDEX CONCURRENTLY ix_users_email
        ON users (email)
    """)

def downgrade():
    op.execute("DROP INDEX CONCURRENTLY ix_users_email")
```

### Pattern 4: Adding NOT NULL Constraint

```python
def upgrade():
    # 1. Add column as nullable
    op.add_column('users', sa.Column('verified', sa.Boolean(), nullable=True))

    # 2. Set default for new rows
    op.alter_column('users', 'verified', server_default=sa.false())

    # 3. Backfill existing rows (in batches for large tables)
    op.execute("""
        UPDATE users
        SET verified = false
        WHERE verified IS NULL
    """)

    # 4. Add NOT NULL constraint
    op.alter_column('users', 'verified', nullable=False)
```

## Batch Processing for Large Tables

```python
from sqlalchemy import text

def backfill_in_batches(connection, batch_size=10000):
    """Backfill data in batches to avoid long locks."""
    while True:
        result = connection.execute(text("""
            UPDATE users
            SET new_column = old_column
            WHERE id IN (
                SELECT id FROM users
                WHERE new_column IS NULL
                LIMIT :batch_size
                FOR UPDATE SKIP LOCKED
            )
            RETURNING id
        """), {"batch_size": batch_size})

        updated = result.rowcount
        connection.commit()

        if updated == 0:
            break

        print(f"Updated {updated} rows")
        time.sleep(0.1)  # Small delay to reduce load
```

## Data Migration Patterns

### Separate Data Migrations

```python
# Schema migration (runs during deploy)
def upgrade():
    op.add_column('orders', sa.Column('total_cents', sa.BigInteger()))

# Data migration (runs separately)
# data_migrations/migrate_order_totals.py
def run_data_migration():
    """Convert total from dollars to cents."""
    with engine.connect() as conn:
        while True:
            result = conn.execute(text("""
                UPDATE orders
                SET total_cents = total * 100
                WHERE total_cents IS NULL
                AND id IN (
                    SELECT id FROM orders
                    WHERE total_cents IS NULL
                    LIMIT 5000
                )
            """))

            if result.rowcount == 0:
                break

            conn.commit()
```

## Foreign Key Constraints

### Safe Foreign Key Addition

```python
def upgrade():
    # 1. Add column without constraint
    op.add_column('orders',
        sa.Column('user_id', postgresql.UUID(), nullable=True)
    )

    # 2. Backfill data
    op.execute("""
        UPDATE orders o
        SET user_id = (
            SELECT id FROM users u
            WHERE u.legacy_id = o.legacy_user_id
        )
    """)

    # 3. Add constraint with NOT VALID (PostgreSQL)
    op.execute("""
        ALTER TABLE orders
        ADD CONSTRAINT fk_orders_user_id
        FOREIGN KEY (user_id) REFERENCES users(id)
        NOT VALID
    """)

    # 4. Validate constraint in background
    op.execute("""
        ALTER TABLE orders
        VALIDATE CONSTRAINT fk_orders_user_id
    """)
```

## Version Control Integration

```yaml
# CI/CD pipeline for migrations
name: Database Migration

on:
  push:
    paths:
      - 'migrations/**'

jobs:
  validate:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Validate migration
        run: |
          alembic check
          alembic upgrade head --sql > /dev/null

      - name: Check for destructive operations
        run: |
          # Fail if migration contains DROP without review
          if grep -r "op.drop" migrations/versions/*.py; then
            echo "::warning::Migration contains DROP operations"
          fi

  deploy-staging:
    needs: validate
    runs-on: ubuntu-latest
    environment: staging
    steps:
      - name: Run migration
        run: alembic upgrade head
```

## Rollback Strategies

```python
# Always implement downgrade
def downgrade():
    # For additive changes, downgrade is straightforward
    op.drop_column('users', 'new_column')

# For destructive changes, preserve data
def upgrade():
    # Rename instead of drop
    op.rename_table('old_table', '_old_table_backup')

def downgrade():
    op.rename_table('_old_table_backup', 'old_table')
```

## Testing Migrations

```python
# tests/test_migrations.py
import pytest
from alembic.config import Config
from alembic import command

@pytest.fixture
def alembic_config():
    config = Config("alembic.ini")
    return config

def test_upgrade_downgrade(alembic_config, test_database):
    """Test that all migrations can upgrade and downgrade."""
    # Upgrade to head
    command.upgrade(alembic_config, "head")

    # Downgrade to base
    command.downgrade(alembic_config, "base")

    # Upgrade again
    command.upgrade(alembic_config, "head")

def test_migration_is_reversible(alembic_config, test_database):
    """Test each migration individually."""
    revisions = get_all_revisions()

    for rev in revisions:
        command.upgrade(alembic_config, rev)
        command.downgrade(alembic_config, "-1")
        command.upgrade(alembic_config, rev)
```

## References

- [Alembic Documentation](https://alembic.sqlalchemy.org/)
- [PostgreSQL ALTER TABLE](https://www.postgresql.org/docs/current/sql-altertable.html)
- [Zero-Downtime Migrations](https://blog.cloudflare.com/why-we-use-postgresqls-advisory-locks/)
- [Safe Database Migrations](https://www.braintreepayments.com/blog/safe-operations-for-high-volume-postgresql/)