{ "cells": [ { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "# Load the \"autoreload\" extension\n", "%load_ext autoreload\n", "\n", "# always reload modules marked with \"%aimport\"\n", "%autoreload 1\n", "\n", "import os\n", "import sys\n", "from sklearn.metrics import roc_curve\n", "\n", "# add the 'src' directory as one where we can import modules\n", "src_dir = os.path.join(os.getcwd(), os.pardir, 'src')\n", "sys.path.append(src_dir)\n", "\n", "# import my method from the source code\n", "%aimport data.read_data\n", "%aimport models.train_model\n", "%aimport features.build_features\n", "%aimport visualization.visualize\n", "from data.read_data import read_data, get_stopwords\n", "from models.train_model import split_train, score_function, get_fasttext, model_ridge, model_xgb, model_lightgbm\n", "from features.build_features import get_vec, to_categorical, replace_na, to_tfidf, stack_sparse, to_sparse_int\n", "from visualization.visualize import plot_roc, plot_scatter" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train = read_data(test=False)\n", "y = train['Target']\n", "stopwords = get_stopwords()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ID | \n", "review_content | \n", "review_title | \n", "review_stars | \n", "product | \n", "Target | \n", "
---|---|---|---|---|---|---|
0 | \n", "0 | \n", "En appelant un acheteur pour demander si l'écr... | \n", "La Police s'inscrit en acheteur privé sur Pric... | \n", "5 | \n", "2fbb619e3606f9b7c213e858a109cda771aa2c47ce50d5... | \n", "0 | \n", "
1 | \n", "1 | \n", "Alors, là, on a affaire au plus grand Navet ja... | \n", "Chef D'Oeuvre Absolu en vue... | \n", "5 | \n", "7b56d9d378d9e999d293f301ac43d044cd7b4786d09afb... | \n", "1 | \n", "
2 | \n", "2 | \n", "Effet garanti sur la terrase. Ils donnent immé... | \n", "Effet garanti sur la terrase. Ils donnent immé... | \n", "3 | \n", "7b37bf5dcb2fafd9229897910318a7dfa11a04ca36893c... | \n", "0 | \n", "
3 | \n", "3 | \n", "tres bon rapport qualite prix tre pratique en ... | \n", "bon produit | \n", "4 | \n", "77d2dbd504b933ab3aaf7cb0cd81c22f7c3549012f4f88... | \n", "1 | \n", "
4 | \n", "4 | \n", "Ordinateur de bureau trés bien pour quelqu'un ... | \n", "Apple Power MAC G4 | \n", "3 | \n", "f574512e7d2dd1dd73c7f8f804bf16f14c932c5651a01b... | \n", "1 | \n", "