{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2022-01-11-personalize-datalayer.ipynb","provenance":[{"file_id":"https://github.com/recohut/nbs/blob/main/raw/P190661%20%7C%20Amazon%20Personalize%20Generic%20Module%20-%20Data%20Layer.ipynb","timestamp":1644602894195}],"collapsed_sections":[],"mount_file_id":"1wVdSxH2YFwulsT2AL6s03NCcrMbIwjgt","authorship_tag":"ABX9TyM+uca+DD3+3a6ZMULUu3d1"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Amazon Personalize Generic Module - Data Layer"],"metadata":{"id":"ac4NjDi0-JIz"}},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7ZJPAbfaJcTE","executionInfo":{"status":"ok","timestamp":1630047493290,"user_tz":-330,"elapsed":720,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"bded3fca-1d3b-4955-8b9a-f767ea8c65b9"},"source":["!mkdir -p code/cloudformation\n","!wget -q --show-progress -O code/cloudformation/immersion_day.yaml https://personalization-at-amazon.s3.amazonaws.com/amazon-personalize/AmazonPersonalizeImmersionDay.yaml"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\r          code/clou   0%[                    ]       0  --.-KB/s               \rcode/cloudformation 100%[===================>]   2.57K  --.-KB/s    in 0s      \n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dco-CyHmKTAb","executionInfo":{"status":"ok","timestamp":1630047507321,"user_tz":-330,"elapsed":737,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"5e012369-a54c-4841-9897-d1b7f1892473"},"source":["!cat code/cloudformation/immersion_day.yaml"],"execution_count":null,"outputs":[{"output_type":"stream","text":["---\n","AWSTemplateFormatVersion: '2010-09-09'\n","\n","Description: Creates an S3 Bucket, IAM Policies, and SageMaker Notebook to work with Personalize.\n","\n","Parameters:\n","  NotebookName:\n","    Type: String\n","    Default: AmazonPersonalizeImmersionDay\n","    Description: Enter the name of the SageMaker notebook instance. Default is PersonalizeImmersionDay.\n","\n","  VolumeSize:\n","    Type: Number\n","    Default: 64\n","    MinValue: 5\n","    MaxValue: 16384\n","    ConstraintDescription: Must be an integer between 5 (GB) and 16384 (16 TB).\n","    Description: Enter the size of the EBS volume in GB.\n","    \n","  domain:\n","    Type: String\n","    Default: Media\n","    Description: Enter the name of the domain (Retail, Media, or CPG) you would like to use in your Amazon Personalize Immersion Day.\n","\n","\n","Resources:\n","  SAMArtifactsBucket:\n","    Type: AWS::S3::Bucket\n","  # SageMaker Execution Role\n","  SageMakerIamRole:\n","    Type: \"AWS::IAM::Role\"\n","    Properties:\n","      AssumeRolePolicyDocument:\n","        Version: \"2012-10-17\"\n","        Statement:\n","          -\n","            Effect: Allow\n","            Principal:\n","              Service: sagemaker.amazonaws.com\n","            Action: sts:AssumeRole\n","      Path: \"/\"\n","      ManagedPolicyArns:\n","        - \"arn:aws:iam::aws:policy/IAMFullAccess\"\n","        - \"arn:aws:iam::aws:policy/AWSCloudFormationFullAccess\"\n","        - \"arn:aws:iam::aws:policy/AmazonS3FullAccess\"\n","        - \"arn:aws:iam::aws:policy/AmazonSageMakerFullAccess\"\n","        - \"arn:aws:iam::aws:policy/AWSStepFunctionsFullAccess\"\n","        - \"arn:aws:iam::aws:policy/AWSLambda_FullAccess\"\n","        - \"arn:aws:iam::aws:policy/AmazonSNSFullAccess\"\n","        - \"arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess\"\n","        \n","        \n","\n","  # SageMaker notebook\n","  NotebookInstance:\n","    Type: \"AWS::SageMaker::NotebookInstance\"\n","    Properties:\n","      InstanceType: \"ml.t2.medium\"\n","      NotebookInstanceName: !Ref NotebookName\n","      RoleArn: !GetAtt SageMakerIamRole.Arn\n","      VolumeSizeInGB: !Ref VolumeSize\n","      LifecycleConfigName: !GetAtt AmazonPersonalizeMLOpsLifecycleConfig.NotebookInstanceLifecycleConfigName\n","\n","\n","  AmazonPersonalizeMLOpsLifecycleConfig:\n","    Type: \"AWS::SageMaker::NotebookInstanceLifecycleConfig\"\n","    Properties:\n","      OnStart:\n","        - Content:\n","            Fn::Base64: \n","              !Sub |\n","                #!/bin/bash\n","                sudo -u ec2-user -i <<'EOF'\n","                cd /home/ec2-user/SageMaker/\n","                git clone https://github.com/aws-samples/amazon-personalize-immersion-day.git\n","                cd /home/ec2-user/SageMaker/amazon-personalize-immersion-day/automation/ml_ops/\n","                nohup sh deploy.sh \"${SAMArtifactsBucket}\" \"${domain}\" &\n","                EOF"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"am9RumpUcLWx"},"source":["## Data Preparation"]},{"cell_type":"code","metadata":{"id":"MVMVDeLWKYzk"},"source":["import time\n","from time import sleep\n","import json\n","from datetime import datetime\n","import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2UVWuFVVaF81","executionInfo":{"status":"ok","timestamp":1630047692472,"user_tz":-330,"elapsed":437,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"111803e5-aa5d-4010-965e-a041645eead2"},"source":["original_data = pd.read_csv('./data/bronze/ml-latest-small/ratings.csv')\n","original_data.info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 100836 entries, 0 to 100835\n","Data columns (total 4 columns):\n"," #   Column     Non-Null Count   Dtype  \n","---  ------     --------------   -----  \n"," 0   userId     100836 non-null  int64  \n"," 1   movieId    100836 non-null  int64  \n"," 2   rating     100836 non-null  float64\n"," 3   timestamp  100836 non-null  int64  \n","dtypes: float64(1), int64(3)\n","memory usage: 3.1 MB\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"GLJtDOC7aZmV"},"source":["The int64 format is clearly suitable for userId and movieId. However, we need to dive deeper to understand the timestamps in the data. To use Amazon Personalize, you need to save timestamps in Unix Epoch format. Currently, the timestamp values are not human-readable. So let's grab an arbitrary timestamp value and figure out how to interpret it. Do a quick sanity check on the transformed dataset by picking an arbitrary timestamp and transforming it to a human-readable format."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5KhFzjnHaoIV","executionInfo":{"status":"ok","timestamp":1630047743488,"user_tz":-330,"elapsed":461,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"f2966174-a31b-4484-e4bd-edce90c03721"},"source":["arb_time_stamp = original_data.iloc[50]['timestamp']\n","print(arb_time_stamp)\n","print(datetime.utcfromtimestamp(arb_time_stamp).strftime('%Y-%m-%d %H:%M:%S'))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["964982681.0\n","2000-07-30 18:44:41\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"LfF4C69ea9CK"},"source":["Since this is a dataset of an explicit feedback movie ratings, it includes movies rated from 1 to 5. We want to include only moves that were \"liked\" by the users, and simulate a dataset of data that would be gathered by a VOD platform. In order to do that, we will filter out all interactions under 2 out of 5, and create two EVENT_Types \"click\" and and \"watch\". We will then assign all movies rated 2 and above as \"click\" and movies rated 4 and above as both \"click\" and \"watch\".\n","\n","Note that this is to correspond with the events we are modeling, for a real data set you would actually model based on implicit feedback such as clicks, watches and/or explicit feedback such as ratings, likes etc."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":578},"id":"3TmlOUgNa-Sr","executionInfo":{"status":"ok","timestamp":1630047929545,"user_tz":-330,"elapsed":613,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"69a4ab49-f41c-4db4-b4d3-25ad8e3dc7ae"},"source":["watched_df = original_data.copy()\n","watched_df = watched_df[watched_df['rating'] > 3]\n","watched_df = watched_df[['userId', 'movieId', 'timestamp']]\n","watched_df['EVENT_TYPE']='watch'\n","display(watched_df.head())\n","\n","clicked_df = original_data.copy()\n","clicked_df = clicked_df[clicked_df['rating'] > 1]\n","clicked_df = clicked_df[['userId', 'movieId', 'timestamp']]\n","clicked_df['EVENT_TYPE']='click'\n","display(clicked_df.head())\n","\n","interactions_df = clicked_df.copy()\n","interactions_df = interactions_df.append(watched_df)\n","interactions_df.sort_values(\"timestamp\", axis = 0, ascending = True, \n","                 inplace = True, na_position ='last') \n","interactions_df.info()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>userId</th>\n","      <th>movieId</th>\n","      <th>timestamp</th>\n","      <th>EVENT_TYPE</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>964982703</td>\n","      <td>watch</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1</td>\n","      <td>3</td>\n","      <td>964981247</td>\n","      <td>watch</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>1</td>\n","      <td>6</td>\n","      <td>964982224</td>\n","      <td>watch</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>1</td>\n","      <td>47</td>\n","      <td>964983815</td>\n","      <td>watch</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>1</td>\n","      <td>50</td>\n","      <td>964982931</td>\n","      <td>watch</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   userId  movieId  timestamp EVENT_TYPE\n","0       1        1  964982703      watch\n","1       1        3  964981247      watch\n","2       1        6  964982224      watch\n","3       1       47  964983815      watch\n","4       1       50  964982931      watch"]},"metadata":{}},{"output_type":"display_data","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>userId</th>\n","      <th>movieId</th>\n","      <th>timestamp</th>\n","      <th>EVENT_TYPE</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>964982703</td>\n","      <td>click</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1</td>\n","      <td>3</td>\n","      <td>964981247</td>\n","      <td>click</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>1</td>\n","      <td>6</td>\n","      <td>964982224</td>\n","      <td>click</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>1</td>\n","      <td>47</td>\n","      <td>964983815</td>\n","      <td>click</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>1</td>\n","      <td>50</td>\n","      <td>964982931</td>\n","      <td>click</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   userId  movieId  timestamp EVENT_TYPE\n","0       1        1  964982703      click\n","1       1        3  964981247      click\n","2       1        6  964982224      click\n","3       1       47  964983815      click\n","4       1       50  964982931      click"]},"metadata":{}},{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 158371 entries, 66679 to 81092\n","Data columns (total 4 columns):\n"," #   Column      Non-Null Count   Dtype \n","---  ------      --------------   ----- \n"," 0   userId      158371 non-null  int64 \n"," 1   movieId     158371 non-null  int64 \n"," 2   timestamp   158371 non-null  int64 \n"," 3   EVENT_TYPE  158371 non-null  object\n","dtypes: int64(3), object(1)\n","memory usage: 6.0+ MB\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"NXV-FREFbX4_"},"source":["Amazon Personalize has default column names for users, items, and timestamp. These default column names are USER_ID, ITEM_ID, AND TIMESTAMP. So the final modification to the dataset is to replace the existing column headers with the default headers."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":204},"id":"JuhWwOV7bbnJ","executionInfo":{"status":"ok","timestamp":1630048048895,"user_tz":-330,"elapsed":545,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"f1f62fd8-566c-4333-a124-df2150d74259"},"source":["interactions_df.rename(columns = {'userId':'USER_ID', 'movieId':'ITEM_ID', \n","                              'timestamp':'TIMESTAMP'}, inplace = True)\n","interactions_df.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>USER_ID</th>\n","      <th>ITEM_ID</th>\n","      <th>TIMESTAMP</th>\n","      <th>EVENT_TYPE</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>66679</th>\n","      <td>429</td>\n","      <td>222</td>\n","      <td>828124615</td>\n","      <td>watch</td>\n","    </tr>\n","    <tr>\n","      <th>66681</th>\n","      <td>429</td>\n","      <td>227</td>\n","      <td>828124615</td>\n","      <td>click</td>\n","    </tr>\n","    <tr>\n","      <th>66719</th>\n","      <td>429</td>\n","      <td>595</td>\n","      <td>828124615</td>\n","      <td>watch</td>\n","    </tr>\n","    <tr>\n","      <th>66718</th>\n","      <td>429</td>\n","      <td>592</td>\n","      <td>828124615</td>\n","      <td>watch</td>\n","    </tr>\n","    <tr>\n","      <th>66717</th>\n","      <td>429</td>\n","      <td>590</td>\n","      <td>828124615</td>\n","      <td>watch</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["       USER_ID  ITEM_ID  TIMESTAMP EVENT_TYPE\n","66679      429      222  828124615      watch\n","66681      429      227  828124615      click\n","66719      429      595  828124615      watch\n","66718      429      592  828124615      watch\n","66717      429      590  828124615      watch"]},"metadata":{},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"X_bHOc_0b1HK"},"source":["interactions_df.to_csv('./data/silver/ml-latest-small/interactions.csv', index=False, float_format='%.0f')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"H03qEFm25Otd","executionInfo":{"status":"ok","timestamp":1630055796089,"user_tz":-330,"elapsed":537,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"a4c83035-39c1-4159-b564-9c65bab224c2"},"source":["original_data = pd.read_csv('./data/bronze/ml-latest-small/movies.csv')\n","original_data.info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 9742 entries, 0 to 9741\n","Data columns (total 3 columns):\n"," #   Column   Non-Null Count  Dtype \n","---  ------   --------------  ----- \n"," 0   movieId  9742 non-null   int64 \n"," 1   title    9742 non-null   object\n"," 2   genres   9742 non-null   object\n","dtypes: int64(1), object(2)\n","memory usage: 228.5+ KB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":204},"id":"GR6mdTge5Yzz","executionInfo":{"status":"ok","timestamp":1630055932177,"user_tz":-330,"elapsed":622,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"0891fc8c-67cc-45c5-92a1-21fafab26734"},"source":["original_data['year'] =original_data['title'].str.extract('.*\\((.*)\\).*',expand = False)\n","original_data = original_data.dropna(axis=0)\n","\n","itemmetadata_df = original_data.copy()\n","itemmetadata_df = itemmetadata_df[['movieId', 'genres', 'year']]\n","itemmetadata_df.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>movieId</th>\n","      <th>genres</th>\n","      <th>year</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n","      <td>1995</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2</td>\n","      <td>Adventure|Children|Fantasy</td>\n","      <td>1995</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>3</td>\n","      <td>Comedy|Romance</td>\n","      <td>1995</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>4</td>\n","      <td>Comedy|Drama|Romance</td>\n","      <td>1995</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>5</td>\n","      <td>Comedy</td>\n","      <td>1995</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   movieId                                       genres  year\n","0        1  Adventure|Animation|Children|Comedy|Fantasy  1995\n","1        2                   Adventure|Children|Fantasy  1995\n","2        3                               Comedy|Romance  1995\n","3        4                         Comedy|Drama|Romance  1995\n","4        5                                       Comedy  1995"]},"metadata":{},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"XwgGUdk654pB"},"source":["We will add a new dataframe to help us generate a creation timestamp. If you don’t provide the CREATION_TIMESTAMP for an item, the model infers this information from the interaction dataset and uses the timestamp of the item’s earliest interaction as its corresponding release date. If an item doesn’t have an interaction, its release date is set as the timestamp of the latest interaction in the training set and it is considered a new item. For the current dataset we will set the CREATION_TIMESTAMP to 0."]},{"cell_type":"code","metadata":{"id":"pHUtEDT2522e"},"source":["itemmetadata_df['CREATION_TIMESTAMP'] = 0\n","itemmetadata_df.rename(columns = {'genres':'GENRE', 'movieId':'ITEM_ID', 'year':'YEAR'}, inplace = True) \n","itemmetadata_df.to_csv('./data/silver/ml-latest-small/item-meta.csv', index=False, float_format='%.0f')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"eBy5V4Z4b2rC"},"source":["## AWS Personalize"]},{"cell_type":"code","metadata":{"id":"C4xjNqJZcw2a"},"source":["!pip install -q boto3\n","import boto3\n","import json\n","import time"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"1oZPIuJFcXN1"},"source":["!mkdir -p ~/.aws && cp /content/drive/MyDrive/AWS/d01_admin/* ~/.aws"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"XeBuVJpGFsvb"},"source":["### ETL Job for Interactions data without using generic data loading module"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iwv-AGiWcJeD","executionInfo":{"status":"ok","timestamp":1630048308723,"user_tz":-330,"elapsed":402,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"8c08cc2d-607c-4ed2-8077-ae0cb909bce9"},"source":["# Configure the SDK to Personalize:\n","personalize = boto3.client('personalize')\n","personalize_runtime = boto3.client('personalize-runtime')\n","print(\"We can communicate with Personalize!\")"],"execution_count":null,"outputs":[{"output_type":"stream","text":["We can communicate with Personalize!\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"XkvOHA9ack0x"},"source":["# create the dataset group (the highest level of abstraction)\n","create_dataset_group_response = personalize.create_dataset_group(\n","    name = \"immersion-day-dataset-group-movielens-latest\"\n",")\n","\n","dataset_group_arn = create_dataset_group_response['datasetGroupArn']\n","print(json.dumps(create_dataset_group_response, indent=2))\n","\n","# wait for it to become active\n","max_time = time.time() + 3*60*60 # 3 hours\n","while time.time() < max_time:\n","    describe_dataset_group_response = personalize.describe_dataset_group(\n","        datasetGroupArn = dataset_group_arn\n","    )\n","    status = describe_dataset_group_response[\"datasetGroup\"][\"status\"]\n","    print(\"DatasetGroup: {}\".format(status))\n","    \n","    if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n","        break\n","        \n","    time.sleep(60)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"k-6qvw4oflsH"},"source":["interactions_schema = schema = {\n","    \"type\": \"record\",\n","    \"name\": \"Interactions\",\n","    \"namespace\": \"com.amazonaws.personalize.schema\",\n","    \"fields\": [\n","        {\n","            \"name\": \"USER_ID\",\n","            \"type\": \"string\"\n","        },\n","        {\n","            \"name\": \"ITEM_ID\",\n","            \"type\": \"string\"\n","        },\n","        {\n","            \"name\": \"EVENT_TYPE\",\n","            \"type\": \"string\"\n","        },\n","        {\n","            \"name\": \"TIMESTAMP\",\n","            \"type\": \"long\"\n","        }\n","    ],\n","    \"version\": \"1.0\"\n","}\n","\n","create_schema_response = personalize.create_schema(\n","    name = \"personalize-poc-movielens-interactions\",\n","    schema = json.dumps(interactions_schema)\n",")\n","\n","interaction_schema_arn = create_schema_response['schemaArn']\n","print(json.dumps(create_schema_response, indent=2))\n","\n","dataset_type = \"INTERACTIONS\"\n","create_dataset_response = personalize.create_dataset(\n","    name = \"personalize-poc-movielens-ints\",\n","    datasetType = dataset_type,\n","    datasetGroupArn = dataset_group_arn,\n","    schemaArn = interaction_schema_arn\n",")\n","\n","interactions_dataset_arn = create_dataset_response['datasetArn']\n","print(json.dumps(create_dataset_response, indent=2))"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8fN7IWbagYkT","executionInfo":{"status":"ok","timestamp":1630049380307,"user_tz":-330,"elapsed":472,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"998ba781-2064-492f-9ff0-9bbdc8e1aef3"},"source":["region = 'us-east-1'\n","s3 = boto3.client('s3')\n","account_id = boto3.client('sts').get_caller_identity().get('Account')\n","bucket_name = account_id + \"-\" + region + \"-\" + \"personalizepocvod\"\n","print(bucket_name)\n","if region == \"us-east-1\":\n","    s3.create_bucket(Bucket=bucket_name)\n","else:\n","    s3.create_bucket(\n","        Bucket=bucket_name,\n","        CreateBucketConfiguration={'LocationConstraint': region}\n","        )"],"execution_count":null,"outputs":[{"output_type":"stream","text":["746888961694-us-east-1-personalizepocvod\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"SQsQs56hg6I2"},"source":["interactions_file_path = './data/silver/ml-latest-small/interactions.csv'\n","interactions_filename = 'interactions.csv'\n","boto3.Session().resource('s3').Bucket(bucket_name).Object(interactions_filename).upload_file(interactions_file_path)\n","interactions_s3DataPath = \"s3://\"+bucket_name+\"/\"+interactions_filename"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"uj3Ua9zOhfFW"},"source":["policy = {\n","    \"Version\": \"2012-10-17\",\n","    \"Id\": \"PersonalizeS3BucketAccessPolicy\",\n","    \"Statement\": [\n","        {\n","            \"Sid\": \"PersonalizeS3BucketAccessPolicy\",\n","            \"Effect\": \"Allow\",\n","            \"Principal\": {\n","                \"Service\": \"personalize.amazonaws.com\"\n","            },\n","            \"Action\": [\n","                \"s3:*Object\",\n","                \"s3:ListBucket\"\n","            ],\n","            \"Resource\": [\n","                \"arn:aws:s3:::{}\".format(bucket_name),\n","                \"arn:aws:s3:::{}/*\".format(bucket_name)\n","            ]\n","        }\n","    ]\n","}\n","\n","s3.put_bucket_policy(Bucket=bucket_name, Policy=json.dumps(policy))"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jI5D1AlViHAZ","executionInfo":{"status":"ok","timestamp":1630049797032,"user_tz":-330,"elapsed":60956,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"69e83310-bb91-4ed2-d439-a5783e33a56a"},"source":["iam = boto3.client(\"iam\")\n","\n","role_name = \"PersonalizeRolePOC\"\n","assume_role_policy_document = {\n","    \"Version\": \"2012-10-17\",\n","    \"Statement\": [\n","        {\n","          \"Effect\": \"Allow\",\n","          \"Principal\": {\n","            \"Service\": \"personalize.amazonaws.com\"\n","          },\n","          \"Action\": \"sts:AssumeRole\"\n","        }\n","    ]\n","}\n","\n","create_role_response = iam.create_role(\n","    RoleName = role_name,\n","    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)\n",")\n","\n","# AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes \"personalize\" or \"Personalize\" \n","# if you would like to use a bucket with a different name, please consider creating and attaching a new policy\n","# that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role\n","policy_arn = \"arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess\"\n","iam.attach_role_policy(\n","    RoleName = role_name,\n","    PolicyArn = policy_arn\n",")\n","\n","# Now add S3 support\n","iam.attach_role_policy(\n","    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',\n","    RoleName=role_name\n",")\n","time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate\n","\n","role_arn = create_role_response[\"Role\"][\"Arn\"]\n","print(role_arn)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["arn:aws:iam::746888961694:role/PersonalizeRolePOC\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"asaTiB0oiRGr"},"source":["create_dataset_import_job_response = personalize.create_dataset_import_job(\n","    jobName = \"personalize-poc-import1\",\n","    datasetArn = interactions_dataset_arn,\n","    dataSource = {\n","        \"dataLocation\": \"s3://{}/{}\".format(bucket_name, interactions_filename)\n","    },\n","    roleArn = role_arn\n",")\n","\n","dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']\n","print(json.dumps(create_dataset_import_job_response, indent=2))\n","\n","# wait fir this import job to gets activated\n","\n","max_time = time.time() + 6*60*60 # 6 hours\n","while time.time() < max_time:\n","    describe_dataset_import_job_response = personalize.describe_dataset_import_job(\n","        datasetImportJobArn = dataset_import_job_arn\n","    )\n","    status = describe_dataset_import_job_response[\"datasetImportJob\"]['status']\n","    print(\"DatasetImportJob: {}\".format(status))\n","    \n","    if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n","        break\n","        \n","    time.sleep(60)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"7jPFKY3qFHwh"},"source":["### ETL Job for Item meta using generic data loading module"]},{"cell_type":"code","metadata":{"id":"Y5zv_4760056"},"source":["import sys\n","sys.path.insert(0,'./code')\n","\n","from generic_modules.import_dataset import personalize_dataset"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"91OcwOQm9Ai6"},"source":["dataset_group_arn = 'arn:aws:personalize:us-east-1:746888961694:dataset-group/immersion-day-dataset-group-movielens-latest'\n","bucket_name = '746888961694-us-east-1-personalizepocvod'\n","role_arn = 'arn:aws:iam::746888961694:role/PersonalizeRolePOC'\n","\n","dataset_type = 'ITEMS'\n","source_data_path = './data/silver/ml-latest-small/item-meta.csv'\n","target_file_name = 'item-meta.csv'"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"rIv3OWn18qVV"},"source":["personalize_item_meta = personalize_dataset(\n","    dataset_group_arn = dataset_group_arn,\n","    bucket_name = bucket_name,\n","    role_arn = role_arn,\n","    dataset_type = dataset_type,\n","    source_data_path = source_data_path,\n","    target_file_name = target_file_name,\n","    dataset_arn = dataset_arn,\n",")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pVwE0fkL-b51","executionInfo":{"status":"ok","timestamp":1630057962385,"user_tz":-330,"elapsed":7,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"369468f7-275c-45f1-8794-d503260db547"},"source":["personalize_item_meta.setup_connection()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["SUCCESS | We can communicate with Personalize!\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"2OaRUWs8-hAE"},"source":["itemmetadata_schema = {\n","    \"type\": \"record\",\n","    \"name\": \"Items\",\n","    \"namespace\": \"com.amazonaws.personalize.schema\",\n","    \"fields\": [\n","        {\n","            \"name\": \"ITEM_ID\",\n","            \"type\": \"string\"\n","        },\n","        {\n","            \"name\": \"GENRE\",\n","            \"type\": \"string\",\n","            \"categorical\": True\n","        },{\n","            \"name\": \"YEAR\",\n","            \"type\": \"int\",\n","        },\n","        {\n","            \"name\": \"CREATION_TIMESTAMP\",\n","            \"type\": \"long\",\n","        }\n","    ],\n","    \"version\": \"1.0\"\n","}"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"ELGoa3ng-uD1"},"source":["personalize_item_meta.create_dataset(schema=itemmetadata_schema,\n","                                     schema_name='personalize-poc-movielens-item',\n","                                     dataset_name='personalize-poc-movielens-items')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"BrwOj2VpCemy","executionInfo":{"status":"ok","timestamp":1630058213530,"user_tz":-330,"elapsed":635,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"bd706745-e758-43d2-ac2e-4b44e47baff7"},"source":["personalize_item_meta.dataset_arn"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'arn:aws:personalize:us-east-1:746888961694:dataset/immersion-day-dataset-group-movielens-latest/ITEMS'"]},"metadata":{},"execution_count":55}]},{"cell_type":"code","metadata":{"id":"9ZksZbihBmBz"},"source":["personalize_item_meta.upload_data_to_s3()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"0fYXJO3lB6EP"},"source":["personalize_item_meta.import_data_from_s3(import_job_name='personalize-poc-item-import1')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"2M5MUMAeGpFN"},"source":["\n","import boto3\n","import json\n","import time\n","\n","\n","class personalize_dataset:\n","    def __init__(self,\n","                 dataset_group_arn=None,\n","                 schema_arn=None,\n","                 dataset_arn=None,\n","                 dataset_type='INTERACTIONS',\n","                 region='us-east-1',\n","                 bucket_name=None,\n","                 role_arn=None,\n","                 source_data_path=None,\n","                 target_file_name=None,\n","                 dataset_import_job_arn=None\n","                 ):\n","        self.personalize = None\n","        self.personalize_runtime = None\n","        self.s3 = None\n","        self.iam = None\n","        self.dataset_group_arn = dataset_group_arn\n","        self.schema_arn = schema_arn\n","        self.dataset_arn = dataset_arn\n","        self.dataset_type = dataset_type\n","        self.region = region\n","        self.bucket_name = bucket_name\n","        self.role_arn = role_arn\n","        self.source_data_path = source_data_path\n","        self.target_file_name = target_file_name\n","        self.dataset_import_job_arn = dataset_import_job_arn\n","\n","    def setup_connection(self):\n","        try:\n","            self.personalize = boto3.client('personalize')\n","            self.personalize_runtime = boto3.client('personalize-runtime')\n","            self.s3 = boto3.client('s3')\n","            self.iam = boto3.client(\"iam\")\n","            print(\"SUCCESS | We can communicate with Personalize!\")\n","        except:\n","            print(\"ERROR | Connection can't be established!\")\n","    \n","    def create_dataset_group(self, dataset_group_name=None):\n","        \"\"\"\n","        The highest level of isolation and abstraction with Amazon Personalize\n","        is a dataset group. Information stored within one of these dataset groups\n","        has no impact on any other dataset group or models created from one. they\n","        are completely isolated. This allows you to run many experiments and is\n","        part of how we keep your models private and fully trained only on your data.\n","        \"\"\"\n","        create_dataset_group_response = self.personalize.create_dataset_group(name=dataset_group_name)\n","        self.dataset_group_arn = create_dataset_group_response['datasetGroupArn']\n","        # print(json.dumps(create_dataset_group_response, indent=2))\n","\n","        # Before we can use the dataset group, it must be active. \n","        # This can take a minute or two. Execute the cell below and wait for it\n","        # to show the ACTIVE status. It checks the status of the dataset group\n","        # every minute, up to a maximum of 3 hours.\n","        max_time = time.time() + 3*60*60 # 3 hours\n","        while time.time() < max_time:\n","            status = self.check_dataset_group_status()\n","            print(\"DatasetGroup: {}\".format(status))\n","            if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n","                break\n","            time.sleep(60)\n","\n","    def check_dataset_group_status(self):\n","        \"\"\"\n","        Check the status of dataset group\n","        \"\"\"\n","        describe_dataset_group_response = self.personalize.describe_dataset_group(\n","            datasetGroupArn = self.dataset_group_arn\n","            )\n","        status = describe_dataset_group_response[\"datasetGroup\"][\"status\"]\n","        return status\n","\n","    def create_dataset(self, schema=None, schema_name=None, dataset_name=None):\n","        \"\"\"\n","        First, define a schema to tell Amazon Personalize what type of dataset\n","        you are uploading. There are several reserved and mandatory keywords\n","        required in the schema, based on the type of dataset. More detailed\n","        information can be found in the documentation.\n","        \"\"\"\n","        create_schema_response = self.personalize.create_schema(\n","            name = schema_name,\n","            schema = json.dumps(schema)\n","        )\n","        self.schema_arn = create_schema_response['schemaArn']\n","\n","        \"\"\"\n","        With a schema created, you can create a dataset within the dataset group.\n","        Note that this does not load the data yet, it just defines the schema for\n","        the data. The data will be loaded a few steps later.\n","        \"\"\"\n","        create_dataset_response = self.personalize.create_dataset(\n","            name = dataset_name,\n","            datasetType = self.dataset_type,\n","            datasetGroupArn = self.dataset_group_arn,\n","            schemaArn = self.schema_arn\n","        )\n","        self.dataset_arn = create_dataset_response['datasetArn']\n","    \n","    def create_s3_bucket(self):\n","        if region == \"us-east-1\":\n","            self.s3.create_bucket(Bucket=self.bucket_name)\n","        else:\n","            self.s3.create_bucket(\n","                Bucket=self.bucket_name,\n","                CreateBucketConfiguration={'LocationConstraint': self.region}\n","                )\n","    \n","    def upload_data_to_s3(self):\n","        \"\"\"\n","        Now that your Amazon S3 bucket has been created, upload the CSV file of\n","        our user-item-interaction data.\n","        \"\"\"\n","        boto3.Session().resource('s3').Bucket(self.bucket_name).Object(self.target_file_name).upload_file(self.source_data_path)\n","        s3DataPath = \"s3://\"+self.bucket_name+\"/\"+self.target_file_name\n","    \n","    def set_s3_bucket_policy(self, policy=None):\n","        \"\"\"\n","        Amazon Personalize needs to be able to read the contents of your S3\n","        bucket. So add a bucket policy which allows that.\n","        \"\"\"\n","        if not policy:\n","            policy = {\n","                \"Version\": \"2012-10-17\",\n","                \"Id\": \"PersonalizeS3BucketAccessPolicy\",\n","                \"Statement\": [\n","                    {\n","                        \"Sid\": \"PersonalizeS3BucketAccessPolicy\",\n","                        \"Effect\": \"Allow\",\n","                        \"Principal\": {\n","                            \"Service\": \"personalize.amazonaws.com\"\n","                        },\n","                        \"Action\": [\n","                            \"s3:*Object\",\n","                            \"s3:ListBucket\"\n","                        ],\n","                        \"Resource\": [\n","                            \"arn:aws:s3:::{}\".format(self.bucket_name),\n","                            \"arn:aws:s3:::{}/*\".format(self.bucket_name)\n","                        ]\n","                    }\n","                ]\n","            }\n","\n","        self.s3.put_bucket_policy(Bucket=self.bucket_name, Policy=json.dumps(policy))\n","\n","    def create_iam_role(self, role_name=None):\n","        \"\"\"\n","        Amazon Personalize needs the ability to assume roles in AWS in order to\n","        have the permissions to execute certain tasks. Let's create an IAM role\n","        and attach the required policies to it. The code below attaches very permissive\n","        policies; please use more restrictive policies for any production application.\n","        \"\"\"\n","        assume_role_policy_document = {\n","            \"Version\": \"2012-10-17\",\n","            \"Statement\": [\n","                {\n","                \"Effect\": \"Allow\",\n","                \"Principal\": {\n","                    \"Service\": \"personalize.amazonaws.com\"\n","                },\n","                \"Action\": \"sts:AssumeRole\"\n","                }\n","            ]\n","        }\n","        create_role_response = self.iam.create_role(\n","            RoleName = role_name,\n","            AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)\n","        )\n","\n","        # AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes \"personalize\" or \"Personalize\" \n","        # if you would like to use a bucket with a different name, please consider creating and attaching a new policy\n","        # that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role\n","        policy_arn = \"arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess\"\n","        self.iam.attach_role_policy(\n","            RoleName = role_name,\n","            PolicyArn = policy_arn\n","        )\n","        # Now add S3 support\n","        self.iam.attach_role_policy(\n","            PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',\n","            RoleName=role_name\n","        )\n","        time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate\n","        self.role_arn = create_role_response[\"Role\"][\"Arn\"]\n","\n","    def import_data_from_s3(self, import_job_name=None):\n","        \"\"\"\n","        Earlier you created the dataset group and dataset to house your information,\n","        so now you will execute an import job that will load the data from the S3\n","        bucket into the Amazon Personalize dataset.\n","        \"\"\"\n","        create_dataset_import_job_response = self.personalize.create_dataset_import_job(\n","        jobName = import_job_name,\n","        datasetArn = self.dataset_arn,\n","        dataSource = {\n","            \"dataLocation\": \"s3://{}/{}\".format(self.bucket_name, self.target_file_name)\n","        },\n","        roleArn = self.role_arn\n","        )\n","        self.dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']\n","\n","        \"\"\"\n","        Before we can use the dataset, the import job must be active. Execute the\n","        cell below and wait for it to show the ACTIVE status. It checks the status\n","        of the import job every minute, up to a maximum of 6 hours.\n","        Importing the data can take some time, depending on the size of the dataset.\n","        In this workshop, the data import job should take around 15 minutes.\n","        \"\"\"\n","        max_time = time.time() + 6*60*60 # 6 hours\n","        while time.time() < max_time:\n","            describe_dataset_import_job_response = personalize.describe_dataset_import_job(\n","                datasetImportJobArn = dataset_import_job_arn\n","            )\n","            status = self.check_import_job_status()\n","            print(\"DatasetImportJob: {}\".format(status))\n","            if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n","                break\n","            time.sleep(60)\n","    \n","    def check_import_job_status(self):\n","        describe_dataset_import_job_response = self.personalize.describe_dataset_import_job(\n","            datasetImportJobArn = self.dataset_import_job_arn\n","        )\n","        status = describe_dataset_import_job_response[\"datasetImportJob\"]['status']\n","        return status\n","\n","    def __getstate__(self):\n","        attributes = self.__dict__.copy()\n","        del attributes['personalize']\n","        del attributes['personalize_runtime']\n","        del attributes['s3']\n","        del attributes['iam']\n","        return attributes"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fq131onaDHkM","executionInfo":{"status":"ok","timestamp":1630059835081,"user_tz":-330,"elapsed":613,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"c101b288-131e-457b-d2c5-20dcf755308c"},"source":["dataset_arn = 'arn:aws:personalize:us-east-1:746888961694:dataset/immersion-day-dataset-group-movielens-latest/ITEMS'\n","dataset_import_job_arn = 'arn:aws:personalize:us-east-1:746888961694:dataset-import-job/personalize-poc-item-import1'\n","\n","personalize_item_meta = personalize_dataset(\n","    dataset_group_arn = dataset_group_arn,\n","    bucket_name = bucket_name,\n","    role_arn = role_arn,\n","    dataset_type = dataset_type,\n","    source_data_path = source_data_path,\n","    target_file_name = target_file_name,\n","    dataset_arn = dataset_arn,\n","    dataset_import_job_arn = dataset_import_job_arn\n",")\n","\n","personalize_item_meta.setup_connection()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["SUCCESS | We can communicate with Personalize!\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"Au1JEi8UDSI7","executionInfo":{"status":"ok","timestamp":1630059836609,"user_tz":-330,"elapsed":11,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"7f2a0e7b-f374-4eb7-d0d5-ed9502f8d361"},"source":["personalize_item_meta.check_import_job_status()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'ACTIVE'"]},"metadata":{},"execution_count":88}]},{"cell_type":"markdown","metadata":{"id":"-AU3W_1IEub6"},"source":["### Saving the state"]},{"cell_type":"code","metadata":{"id":"z1zTY8rOGWyz"},"source":["import pickle\n","\n","with open('./artifacts/etc/personalize_item_meta.pkl', 'wb') as outp:\n","    pickle.dump(personalize_item_meta, outp, pickle.HIGHEST_PROTOCOL)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"x_cde5DTHbS-","executionInfo":{"status":"ok","timestamp":1630059841764,"user_tz":-330,"elapsed":426,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"fe78a27b-cc40-42b4-b621-6860f8277727"},"source":["personalize_item_meta.__getstate__()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'bucket_name': '746888961694-us-east-1-personalizepocvod',\n"," 'dataset_arn': 'arn:aws:personalize:us-east-1:746888961694:dataset/immersion-day-dataset-group-movielens-latest/ITEMS',\n"," 'dataset_group_arn': 'arn:aws:personalize:us-east-1:746888961694:dataset-group/immersion-day-dataset-group-movielens-latest',\n"," 'dataset_import_job_arn': 'arn:aws:personalize:us-east-1:746888961694:dataset-import-job/personalize-poc-item-import1',\n"," 'dataset_type': 'ITEMS',\n"," 'region': 'us-east-1',\n"," 'role_arn': 'arn:aws:iam::746888961694:role/PersonalizeRolePOC',\n"," 'schema_arn': None,\n"," 'source_data_path': './data/silver/ml-latest-small/item-meta.csv',\n"," 'target_file_name': 'item-meta.csv'}"]},"metadata":{},"execution_count":89}]}]}