# fastai OOM memory recover


In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai import *
from fastai.vision import *
from ipyexperiments import IPyExperimentsPytorch

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
assert str(device) == 'cuda:0', f"we want GPU, got {device}"

In [None]:
torch.ones(1).cuda() # preload

tensor([1.], device='cuda:0')

In [None]:
import pynvml, torch, gc

pynvml.nvmlInit()
id = torch.cuda.current_device()
def mem_free():
    gc.collect()
    torch.cuda.empty_cache()
    handle = pynvml.nvmlDeviceGetHandleByIndex(id)
    info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    return int( info.free / 2**20 )

def mem_report(): print(f"free mem={mem_free()}")

def mem_allocate_mbs(n, fatal=False): 
    " allocate n MBs, return the var holding it on success, None on failure "
    if n < 6: return None # don't try to allocate less than 6MB
    try:
        d = int(2**9*n**0.5)
        return torch.ones((d, d)).cuda().contiguous()
    except Exception as e:
        if not fatal: return None
        raise e
        
def leave_free_mbs(n):
    " consume whatever memory is needed so that n MBs are left free "
    avail = mem_free()
    assert avail > n, f"already have less available mem than desired {n}MBs"
    consume = avail - n
    print(f"consuming {consume}MB to bring free mem to {n}MBs")
    return mem_allocate_mbs(consume, fatal=True)

buf = leave_free_mbs(1000)

consuming 6614MB to bring free mem to 1000MBs


In [None]:
exp = IPyExperimentsPytorch()


*** Experiment started with the Pytorch backend
Device: ID 0, GeForce GTX 1070 Ti (8119 RAM)


*** Current state:
RAM:  Used  Free  Total      Util
CPU:  2284 19978  31588 MB   7.23% 
GPU:  7119  1000   8119 MB  87.68% 


･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.000
･ CPU:         0       0     2284 MB |
･ GPU:         0       0     7119 MB |


In [None]:
path = untar_data(URLs.PETS)
path_anno = path/'annotations'
path_img = path/'images'
fnames = get_image_files(path_img)
np.random.seed(2)
pat = re.compile(r'/([^/]+)_\d+.jpg$')

･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.044
･ CPU:         7       0     2291 MB |
･ GPU:         0       0     7119 MB |


In [None]:
#bs=128
bs = 32

data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=None, size=224, bs=bs).normalize(imagenet_stats)

･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.239
･ CPU:         1       3     2343 MB |
･ GPU:         0       0     7119 MB |


In [None]:
learn = create_cnn(data, models.resnet34, metrics=accuracy)

･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.446
･ CPU:         0       0     2480 MB |
･ GPU:       110       0     7229 MB |


In [None]:
learn.fit_one_cycle(1,1e-2)

epoch,train_loss,valid_loss,accuracy


RuntimeError: CUDA out of memory. Tried to allocate 6.12 MiB (GPU 0; 7.93 GiB total capacity; 7.34 GiB already allocated; 2.56 MiB free; 3.15 MiB cached)

･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:01.641
･ CPU:         1       0     2487 MB |
･ GPU:         6     882     7235 MB |


In [None]:
# Now run this cell, after the OOM cell above

mem_report()
del buf # that was just the buffer to make the test reliable

# Now you can get all of the GPU memory back
mem_report()

free mem=884
free mem=7498
･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.066
･ CPU:         0       0     2487 MB |
･ GPU:     -6614    6614      621 MB |
