In [None]:
#@markdown Check type of GPU and VRAM available.
#!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader
!nvidia-smi

https://github.com/ShivamShrirao/diffusers/tree/main/examples/dreambooth
with changes to make it work on SageMaker Studio Lab

In [None]:
#%conda remove --name studiolab --all
# Restore/Reset env again with Studio default please use the Yaml file from /opt/amazon/sagemaker/environments/
#%conda env create -f /opt/amazon/sagemaker/environments/studiolab.yaml # or default.yaml

## Install Requirements

In [None]:
# Prepare for installation of xformers for SageMaker Studio lab
%conda install -y -c pytorch -c conda-forge cudatoolkit=11.6 pytorch=1.12.1 torchvision==0.13.1

In [None]:
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/examples/dreambooth/train_dreambooth.py
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/scripts/convert_diffusers_to_original_stable_diffusion.py
%pip install -qq git+https://github.com/ShivamShrirao/diffusers
#!wget -q https://github.com/Miraculix200/diffusers/raw/main/examples/dreambooth/train_dreambooth.py
#!wget -q https://github.com/Miraculix200/diffusers/raw/main/scripts/convert_diffusers_to_original_stable_diffusion.py
#%pip install -qq git+https://github.com/Miraculix200/diffusers
%pip install -q -U --pre triton
%pip install -q accelerate==0.12.0 transformers ftfy bitsandbytes tensorboard natsort

In [4]:
# Hugginface token, necessary to download the model
# You only need to do this once, unless you delete the ~/.huggingface folder

import ipywidgets as widgets

token_textbox = widgets.Text(
 value='Enter Huggingface token here',
 description='Token:',
)
token_textbox

Text(value='Enter Huggingface token here', description='Token:')

In [5]:
# Save Huggingface token to ~/.huggingface

!mkdir -p ~/.huggingface
!echo -n "{token_textbox.value}" > ~/.huggingface/token

### Install xformers

In [None]:
# Installation of xformers for SageMaker Studio lab
#%conda install -y -c pytorch -c conda-forge cudatoolkit=11.6 pytorch=1.12.1 
%conda install -y xformers -c xformers/label/dev

In [7]:
#ignore this cell

#%pip install -q https://github.com/metrolobo/xformers_wheels/releases/download/1d31a3ac_various_6/xformers-0.0.14.dev0-cp37-cp37m-linux_x86_64.whl
# These were compiled on Tesla T4, should also work on P100, thanks to https://github.com/metrolobo

# If precompiled wheels don't work, install it with the following command. It will take around 40 minutes to compile.
#%pip install git+https://github.com/facebookresearch/xformers@1d31a3a#egg=xformers

## Settings and run

In [None]:
#@markdown If model weights should be saved directly in google drive (takes around 4-5 GB).

# Saving to gdrive won't work on SageMaker Studio Lab, so don't change this variable
save_to_gdrive = False #@param {type:"boolean"}
#if save_to_gdrive:
# from google.colab import drive
# drive.mount('~/sagemaker-studiolab-notebooks/dreambooth/content/drive')

#@markdown Name/Path of the initial model.
MODEL_NAME = "runwayml/stable-diffusion-v1-5" #@param {type:"string"}

#@markdown Enter the directory name to save model at.

OUTPUT_DIR = "stable_diffusion_weights/zwx" #@param {type:"string"}
if save_to_gdrive:
 OUTPUT_DIR = "content/drive/MyDrive/" + OUTPUT_DIR
else:
 OUTPUT_DIR = "content/" + OUTPUT_DIR

print(f"[*] Weights will be saved at {OUTPUT_DIR}")

!mkdir -p $OUTPUT_DIR

# Start Training

Use the table below to choose the best flags based on your memory and speed requirements. Tested on Tesla T4 GPU.


| `fp16` | `train_batch_size` | `gradient_accumulation_steps` | `gradient_checkpointing` | `use_8bit_adam` | GB VRAM usage | Speed (it/s) |
| ---- | ------------------ | ----------------------------- | ----------------------- | --------------- | ---------- | ------------ |
| fp16 | 1 | 1 | TRUE | TRUE | 9.92 | 0.93 |
| no | 1 | 1 | TRUE | TRUE | 10.08 | 0.42 |
| fp16 | 2 | 1 | TRUE | TRUE | 10.4 | 0.66 |
| fp16 | 1 | 1 | FALSE | TRUE | 11.17 | 1.14 |
| no | 1 | 1 | FALSE | TRUE | 11.17 | 0.49 |
| fp16 | 1 | 2 | TRUE | TRUE | 11.56 | 1 |
| fp16 | 2 | 1 | FALSE | TRUE | 13.67 | 0.82 |
| fp16 | 1 | 2 | FALSE | TRUE | 13.7 | 0.83 |
| fp16 | 1 | 1 | TRUE | FALSE | 15.79 | 0.77 |


Add `--gradient_checkpointing` flag for around 9.92 GB VRAM usage.

remove `--use_8bit_adam` flag for full precision. Requires 15.79 GB with `--gradient_checkpointing` else 17.8 GB.

remove `--train_text_encoder` flag to reduce memory usage further, degrades output quality.

In [28]:
# You can also add multiple concepts here. Try tweaking `--max_train_steps` accordingly.

concepts_list = [
 {
 "instance_prompt": "photo of zwx dog",
 "class_prompt": "photo of a dog",
 "instance_data_dir": "content/data/zwx",
 "class_data_dir": "content/data/dog"
 },
# {
# "instance_prompt": "photo of ukj person",
# "class_prompt": "photo of a person",
# "instance_data_dir": "/content/data/ukj",
# "class_data_dir": "/content/data/person"
# }
]

# `class_data_dir` contains regularization images
import json
import os
for c in concepts_list:
 os.makedirs(c["instance_data_dir"], exist_ok=True)

with open("concepts_list.json", "w") as f:
 json.dump(concepts_list, f, indent=4)

In [10]:
# SageMaker Studio Lab:
# manually upload images to /content/data/zwx
# or if you only train a single concept, run this cell to open a file upload dialog

from ipywidgets import FileUpload
from IPython.display import display
upload = FileUpload(accept='.jpg', multiple=True)
display(upload)

FileUpload(value={}, accept='.jpg', description='Upload', multiple=True)

In [None]:
# if you only train a single concept, run this cell if you used the file upload dialog to upload images

INSTANCE_DIR = concepts_list[0]["instance_data_dir"]
print("Creating instance folder: " + INSTANCE_DIR)
!mkdir -p $INSTANCE_DIR
 
for name, file_info in upload.value.items():
 with open(name, 'wb') as fp:
 print("Uploading: " + name)
 fp.write(file_info['content'])
 !mv $name $INSTANCE_DIR
 
print("### Upload complete ###")

In [None]:
!accelerate launch train_dreambooth.py \
 --pretrained_model_name_or_path=$MODEL_NAME \
 --pretrained_vae_name_or_path="stabilityai/sd-vae-ft-mse" \
 --output_dir=$OUTPUT_DIR \
 --with_prior_preservation --prior_loss_weight=1.0 \
 --seed=1337 \
 --resolution=512 \
 --train_batch_size=1 \
 --train_text_encoder \
 --mixed_precision="fp16" \
 --use_8bit_adam \
 --gradient_accumulation_steps=1 \
 --learning_rate=1e-6 \
 --lr_scheduler="constant" \
 --lr_warmup_steps=0 \
 --num_class_images=50 \
 --sample_batch_size=4 \
 --max_train_steps=1500 \
 --save_interval=10000 \
 --save_sample_prompt="photo of zwx dog" \
 --concepts_list="concepts_list.json"

# Reduce the `--save_interval` to lower than `--max_train_steps` to save weights from intermediate steps.
# `--save_sample_prompt` can be same as `--instance_prompt` to generate intermediate samples (saved along with weights in samples directory).

In [None]:
#@markdown Run to generate a grid of preview images from the last saved weights.
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

weights_folder = OUTPUT_DIR
folders = sorted([f for f in os.listdir(weights_folder) if f != "0"], key=lambda x: int(x))

row = len(folders)
col = len(os.listdir(os.path.join(weights_folder, folders[0], "samples")))
scale = 4
fig, axes = plt.subplots(row, col, figsize=(col*scale, row*scale), gridspec_kw={'hspace': 0, 'wspace': 0})

for i, folder in enumerate(folders):
 folder_path = os.path.join(weights_folder, folder)
 image_folder = os.path.join(folder_path, "samples")
 images = [f for f in os.listdir(image_folder)]
 for j, image in enumerate(images):
 if row == 1:
 currAxes = axes[j]
 else:
 currAxes = axes[i, j]
 if i == 0:
 currAxes.set_title(f"Image {j}")
 if j == 0:
 currAxes.text(-0.1, 0.5, folder, rotation=0, va='center', ha='center', transform=currAxes.transAxes)
 image_path = os.path.join(image_folder, image)
 img = mpimg.imread(image_path)
 currAxes.imshow(img, cmap='gray')
 currAxes.axis('off')
 
plt.tight_layout()
plt.savefig('grid.png', dpi=72)

In [None]:
#@markdown Specify the weights directory to use (leave blank for latest)
WEIGHTS_DIR = "" #@param {type:"string"}
if WEIGHTS_DIR == "":
 from natsort import natsorted
 from glob import glob
 import os
 WEIGHTS_DIR = natsorted(glob(OUTPUT_DIR + os.sep + "*"))[-1]
print(f"[*] WEIGHTS_DIR={WEIGHTS_DIR}")

## Convert weights to ckpt to use in web UIs like AUTOMATIC1111.

In [None]:
#@markdown Run conversion.
ckpt_path = WEIGHTS_DIR + "/model.ckpt"

half_arg = ""
#@markdown Whether to convert to fp16, takes half the space (2GB).
fp16 = False #@param {type: "boolean"}
if fp16:
 half_arg = "--half"
!python convert_diffusers_to_original_stable_diffusion.py --model_path $WEIGHTS_DIR --checkpoint_path $ckpt_path $half_arg
print(f"[*] Converted ckpt saved at {ckpt_path}")

# Upload .ckpt to MEGA

In [None]:
# install mega.py 
%pip install mega.py

In [23]:
# Prompt for username

import ipywidgets as widgets

username_textbox = widgets.Text(
 value='MEGA username',
 description='Username:',
)
username_textbox


Text(value='MEGA username', description='Username:')

In [24]:
# Prompt for password

password_textbox = widgets.Text(
 value='MEGA password',
 description='Password:',
)
password_textbox

Text(value='MEGA password', description='Password:')

In [None]:
# upload model.ckpt to mega.nz
import time

start = time.time()

from mega import Mega
mega = Mega()
m = mega.login(username_textbox.value, password_textbox.value)

print("Uploading to mega.nz. This can take 20-30 minutes...")
file = m.upload(ckpt_path)
flink = m.get_upload_link(file)

end = time.time()
elapsed = int(time.time() - start)

print("Upload complete after " + str(elapsed) + " seconds")
print("Public download link: " + flink);
print("The content folder can now be deleted to free up space")
print("Alternatively, if you want to use the downladed weights again, use the next cell to just delete model.ckpt")

#files = m.get_files()
#for f in files:
# print(f)

In [26]:
# Delete model.ckpt to free space
!rm $ckpt_path

In [None]:
#@title Free runtime memory
exit()