{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Downloads and resizes imagenet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. Create spot instance\n", "2. Mount EFS\n", "3. Download imagenet from kaggle and untar\n", "4. Resize images to 80, 160, 320, 375" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from aws_setup import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Define parameters" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "vpc_name='fast-ai'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Get Existing VPC by tag name" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ec2.Vpc(id='vpc-6e6b2a17')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vpc = get_vpc(vpc_name); vpc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create EFS (if you haven't already)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "efs_tag = f'{vpc_name}-efs'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "efs = create_efs(efs_tag, vpc, performance_mode='maxIO')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Request Spot instance" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "instance_name = f'{vpc_name}-instance'\n", "# Recommend a high compute instance as we need to do multi-threaded resizing later on\n", "instance_type = 'c5.4xlarge'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'0.301400'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spot_price = get_spot_prices()[instance_type]\n", "bid_price = \"%.4f\" % (float(spot_price)*3)\n", "print(f'Spot price: {spot_price}, Bid price: {bid_price}')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "launch_specs = LaunchSpecs(vpc, instance_type=instance_type).build()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "launch_specs['BlockDeviceMappings'][0]['Ebs']['VolumeSize'] = 1000" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'BlockDeviceMappings': [{'DeviceName': '/dev/sda1',\n", " 'Ebs': {'DeleteOnTermination': True,\n", " 'VolumeSize': 1000,\n", " 'VolumeType': 'gp2'}}],\n", " 'ImageId': 'ami-8c4288f4',\n", " 'InstanceType': 'c5.4xlarge',\n", " 'KeyName': 'aws-key-fast-ai',\n", " 'NetworkInterfaces': [{'AssociatePublicIpAddress': True,\n", " 'DeviceIndex': 0,\n", " 'Groups': ['sg-f60fca88'],\n", " 'SubnetId': 'subnet-f056ff89'}]}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "launch_specs" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Waiting on spot fullfillment...\n", "Fullfillment completed. InstanceId: i-0cabe3a45ec1ef32c\n", "Rebooting...\n", "Completed. SSH: ssh -i ~/.ssh/aws-key-fast-ai.pem ubuntu@54.202.209.226\n" ] }, { "data": { "text/plain": [ "ec2.Instance(id='i-0cabe3a45ec1ef32c')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "instance = create_spot_instance(instance_name, launch_specs, spot_price=bid_price); instance" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# instance = get_instance(instance_name); instance\n", "get_ssh_command(instance)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### SSH" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connecting to SSH...\n", "Connected!\n" ] } ], "source": [ "client = connect_to_instance(instance)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Mount EFS" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'fs-0ea233a7.efs.us-west-2.amazonaws.com'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "efs_addr = get_efs_address('fast-ai-efs'); efs_addr" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "_ = run_command(client, 'mkdir ~/efs_mount')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "efs_mount_cmd = f'sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 {efs_addr}:/ ~/efs_mount'\n", "_ = run_command(client, efs_mount_cmd)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "run_command returned: \n", "efs_saved.txt\r\n", "\n" ] } ], "source": [ "_ = run_command(client, 'ls efs_mount') # no reformatting" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Tmux" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "tsess = TmuxSession(client, 'sess')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Download dataset from kaggle" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "_ = run_command(client, 'mkdir ~/.kaggle')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('', '')" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kaggle_file = Path.home()/'.kaggle/kaggle.json'\n", "upload_file(client, str(kaggle_file), '.kaggle/kaggle.json')" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('', '')" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "download_kaggle_file = Path.cwd()/'upload_scripts/download_kaggle_imagenet.sh'\n", "upload_file(client, str(download_kaggle_file), 'download_kaggle_imagenet.sh')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('', '')" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tsess.run_cmd('bash download_kaggle_imagenet.sh')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Upload image resize" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('', '')" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# imagenet_formatting.sh uses this for multithreaded resizing\n", "# resize_imags.py methods are taken from fast.ai dataset.py\n", "upload_path = Path.cwd()/'upload_scripts/resize_images.py'\n", "upload_file(client, str(upload_path), 'resize_images.py')" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('', '')" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# creates sizes 80, 160, 320, 375 and stores files in EFS\n", "upload_path = Path.cwd()/'upload_scripts/imagenet_formatting.sh'\n", "upload_file(client, str(upload_path), 'imagenet_formatting.sh')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "tsess.run_cmd('bash imagenet_formatting.sh')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }