{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "accelerator": "GPU", "colab": { "name": "02_PlayStore_PreProc_Classification_Pandas.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "Q5TZE9JpVaCi" }, "source": [ "# Obsei Tutorial 02\n", "## This example shows following Obsei workflow\n", " 1. Observe: Play Store's app reviews\n", " 2. Pre-process: Clean review text with properly\n", " 3. Analyze: Classify review text within given category list\n", " 4. Inform: Provide all data in Pandas DataFrame\n", " 5. Store: Store data in Google Drive in CSV format" ] }, { "cell_type": "markdown", "metadata": { "id": "yoBarQI4UFAY" }, "source": [ "## Install Obsei from latest code, perform these steps -\n", "- Select GPU RunType for faster computation \n", "- Restart Runtime after installation" ] }, { "cell_type": "code", "metadata": { "id": "Oh74E2T9HO-F", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9465415f-1629-4cf4-9cda-513b5b0becf8" }, "source": [ "!pip install obsei[all]" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Collecting git+https://github.com/lalitpagaria/obsei.git\n", " Cloning https://github.com/lalitpagaria/obsei.git to /tmp/pip-req-build-9q4fz4j2\n", " Running command git clone -q https://github.com/lalitpagaria/obsei.git /tmp/pip-req-build-9q4fz4j2\n", "Requirement already satisfied (use --upgrade to upgrade): obsei==0.0.9 from git+https://github.com/lalitpagaria/obsei.git in /usr/local/lib/python3.7/dist-packages\n", "Requirement already satisfied: app-store-reviews-reader==1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2)\n", "Requirement already satisfied: atlassian-python-api==3.10.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0)\n", "Requirement already satisfied: beautifulsoup4==4.9.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.9.3)\n", "Requirement already satisfied: blis==0.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.7.4)\n", "Requirement already satisfied: cachetools==4.2.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.2.2)\n", "Requirement already satisfied: catalogue==2.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.4)\n", "Requirement already satisfied: certifi==2021.5.30 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.5.30)\n", "Requirement already satisfied: chardet==4.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.0.0)\n", "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.1.2)\n", "Requirement already satisfied: courlan==0.4.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.0)\n", "Requirement already satisfied: cssselect==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n", "Requirement already satisfied: cymem==2.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.5)\n", "Requirement already satisfied: dateparser==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n", "Requirement already satisfied: deprecated==1.2.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.12)\n", "Requirement already satisfied: elasticsearch==7.13.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.13.1)\n", "Requirement already satisfied: feedparser==6.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (6.0.2)\n", "Requirement already satisfied: filelock==3.0.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.12)\n", "Requirement already satisfied: gnews==0.1.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.3)\n", "Requirement already satisfied: google-api-core==1.30.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.0)\n", "Requirement already satisfied: google-api-python-client==2.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.0)\n", "Requirement already satisfied: google-auth==1.30.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.2)\n", "Requirement already satisfied: google-auth-httplib2==0.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.0)\n", "Requirement already satisfied: google-play-scraper==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n", "Requirement already satisfied: googleapis-common-protos==1.53.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.53.0)\n", "Requirement already satisfied: greenlet==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n", "Requirement already satisfied: htmldate==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n", "Requirement already satisfied: httplib2==0.19.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.19.1)\n", "Requirement already satisfied: huggingface-hub==0.0.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.8)\n", "Requirement already satisfied: idna==2.10 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.10)\n", "Requirement already satisfied: importlib-metadata==4.5.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.5.0)\n", "Requirement already satisfied: jinja2==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n", "Requirement already satisfied: joblib==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n", "Requirement already satisfied: justext==2.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.0)\n", "Requirement already satisfied: lxml==4.6.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.3)\n", "Requirement already satisfied: markupsafe==2.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.1)\n", "Requirement already satisfied: mmh3==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n", "Requirement already satisfied: murmurhash==1.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.5)\n", "Requirement already satisfied: nltk==3.6.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.2)\n", "Requirement already satisfied: numpy==1.20.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.20.3)\n", "Requirement already satisfied: oauthlib==3.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.1)\n", "Requirement already satisfied: packaging==20.9 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (20.9)\n", "Requirement already satisfied: pandas==1.2.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.4)\n", "Requirement already satisfied: pathy==0.5.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.5.2)\n", "Requirement already satisfied: praw==7.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.2.0)\n", "Requirement already satisfied: prawcore==2.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1.0)\n", "Requirement already satisfied: preshed==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n", "Requirement already satisfied: presidio-analyzer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n", "Requirement already satisfied: presidio-anonymizer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n", "Requirement already satisfied: protobuf==3.17.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.17.3)\n", "Requirement already satisfied: pyasn1==0.4.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.8)\n", "Requirement already satisfied: pyasn1-modules==0.2.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.2.8)\n", "Requirement already satisfied: pycryptodome==3.10.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.1)\n", "Requirement already satisfied: pydantic==1.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.7.4)\n", "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.7)\n", "Requirement already satisfied: python-dateutil==2.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.1)\n", "Requirement already satisfied: python-facebook-api==0.9.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.9.2)\n", "Requirement already satisfied: pytz==2021.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.1)\n", "Requirement already satisfied: pyyaml==5.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (5.4.1)\n", "Requirement already satisfied: readability-lxml==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n", "Requirement already satisfied: reddit-rss-reader==1.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.2)\n", "Requirement already satisfied: regex==2020.11.13 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2020.11.13)\n", "Requirement already satisfied: requests==2.25.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.25.1)\n", "Requirement already satisfied: requests-file==1.5.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.5.1)\n", "Requirement already satisfied: requests-oauthlib==1.3.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.0)\n", "Requirement already satisfied: rsa==4.7.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.7.2)\n", "Requirement already satisfied: sacremoses==0.0.45 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.45)\n", "Requirement already satisfied: searchtweets-v2==1.0.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.7)\n", "Requirement already satisfied: sentencepiece==0.1.95 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.95)\n", "Requirement already satisfied: sgmllib3k==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n", "Requirement already satisfied: six==1.16.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.16.0)\n", "Requirement already satisfied: slack-sdk==3.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.0)\n", "Requirement already satisfied: smart-open==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n", "Requirement already satisfied: soupsieve==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n", "Requirement already satisfied: spacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n", "Requirement already satisfied: spacy-legacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n", "Requirement already satisfied: sqlalchemy==1.4.17 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.4.17)\n", "Requirement already satisfied: srsly==2.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.1)\n", "Requirement already satisfied: thinc==8.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (8.0.4)\n", "Requirement already satisfied: tld==0.12.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.12.6)\n", "Requirement already satisfied: tldextract==3.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.0)\n", "Requirement already satisfied: tokenizers==0.10.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.10.3)\n", "Requirement already satisfied: tqdm==4.61.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.61.0)\n", "Requirement already satisfied: trafilatura==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n", "Requirement already satisfied: transformers==4.6.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.1)\n", "Requirement already satisfied: tweet-preprocessor==0.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.6.0)\n", "Requirement already satisfied: typer==0.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.3.2)\n", "Requirement already satisfied: typing-extensions==3.10.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0.0)\n", "Requirement already satisfied: tzlocal==2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1)\n", "Requirement already satisfied: update-checker==0.18.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.18.0)\n", "Requirement already satisfied: uritemplate==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n", "Requirement already satisfied: urllib3==1.26.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.26.5)\n", "Requirement already satisfied: vadersentiment==3.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.3.2)\n", "Requirement already satisfied: wasabi==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n", "Requirement already satisfied: websocket-client==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n", "Requirement already satisfied: wrapt==1.12.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.12.1)\n", "Requirement already satisfied: zenpy==2.0.24 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.24)\n", "Requirement already satisfied: zipp==3.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.4.1)\n", "Requirement already satisfied: torch==1.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.8.1)\n", "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core==1.30.0->obsei==0.0.9) (57.0.0)\n", "Requirement already satisfied: responses>=0.11 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (0.13.3)\n", "Requirement already satisfied: cattrs<2.0,>=1.1; python_version >= \"3.7\" and python_version < \"4.0\" in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (1.7.1)\n", "Requirement already satisfied: attrs<21.0.0,>=20.1.0 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (20.3.0)\n", "Building wheels for collected packages: obsei\n", " Building wheel for obsei (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for obsei: filename=obsei-0.0.9-cp37-none-any.whl size=65557 sha256=bc7c8c937eed4a7b325b3ef8e46de64e44778e40914d99267356cc4ce36c7c27\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-qhkx9sy8/wheels/49/1a/6e/2fd83c9a275b7096fc615a0edef2d55b1fc33c3751ba45c1ad\n", "Successfully built obsei\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "w-avdxnAUkrR" }, "source": [ "## Mount your Google Drive to store CSV" ] }, { "cell_type": "code", "metadata": { "id": "nn216pBrEzIz", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "288fed19-0714-4c36-b012-4ec365b63c8c" }, "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "DjV4pKGEU0IG" }, "source": [ "## Configure following input -\n", "- `name`: Brand name of App\n", "- `category_list`: List of categories to perform review text classification\n", "- `identifier`: Package name of the app, it can be found at the end of the url of app in play store\n", "- `country`: Country of reviews\n", "- `lookup_period`: How many old reviews to collect (**Note**: Google rate limit and provide max 200 reviews only)\n", "- `extra_stop_words`: Extra stop words top clean from review text\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "-ex2Uj01oyte" }, "source": [ "name = \"zomato\"\n", "category_list = [\"easyOrder placement\", \"Realtime order tracking\", \"easy payment options\",\"Rewards and discounts\",\"user interface\",\"social media Integration\",]\n", "identifier = \"com.application.zomato\"\n", "country = \"in\"\n", "lookup_period = \"365d\"\n", "extra_stop_words = [\"i\", \"-\", \"day\", \"will\", \".\", \"use\", \"n\", \"without\", \"please\", \"app\", \"ha\", \"ho\", \"nt\", \"wa\", \n", " \"thi\", \"plz\", \"pleas\", \"ff\", \"ya\", \"thank\", \"you\", \"thanks\", \"mai\"]" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "cBslruvycDfR" }, "source": [ "## Configure columns of Pandas DataFrame\n", "`included_cols` will only be returned by Pandas Sink and `rename_cols_dict` will rename selected `included_cols` columns to desired one" ] }, { "cell_type": "code", "metadata": { "id": "r5FUKda1cDCI" }, "source": [ "included_cols = [f\"segmented_data_classifier_data_{category}\" for category in category_list]\n", "included_cols.append(\"segmented_data_classifier_data_positive\")\n", "included_cols.append(\"segmented_data_classifier_data_negative\")\n", "included_cols.append(\"processed_text\")\n", "included_cols.append(\"meta_at\")\n", "included_cols.append(\"meta_date\")\n", "included_cols.append(\"meta_published date\")\n", "included_cols.append(\"meta_score\")\n", "# included_cols.append(\"meta_title\")\n", "included_cols.append(\"meta_publisher_title\")\n", "\n", "rename_cols_dict = {f\"segmented_data_classifier_data_{category}\": category for category in category_list}\n", "rename_cols_dict[\"segmented_data_classifier_data_positive\"] = \"positive\"\n", "rename_cols_dict[\"segmented_data_classifier_data_negative\"] = \"negative\"\n", "rename_cols_dict[\"processed_text\"] = \"text\"\n", "rename_cols_dict[\"meta_at\"] = \"time\"\n", "rename_cols_dict[\"meta_date\"] = \"time\"\n", "rename_cols_dict[\"meta_published date\"] = \"time\"\n", "rename_cols_dict[\"meta_score\"] = \"ratings\"\n", "# rename_cols_dict[\"meta_title\"] = \"title\"\n", "rename_cols_dict[\"meta_publisher_title\"] = \"news publisher\"\n", "rename_cols_dict['Unnamed: 0'] = 'reviews'" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "aBCbHvGnU0qw" }, "source": [ "## Configure Play Store Review Observer\n", "\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "Yr8ucKlJHRUg" }, "source": [ "from obsei.source.playstore_scrapper import (\n", " PlayStoreScrapperSource,\n", " PlayStoreScrapperConfig,\n", ")\n", "\n", "source_config = PlayStoreScrapperConfig(\n", " countries=[country],\n", " package_name=identifier,\n", " lookup_period=lookup_period\n", ")\n", "\n", "source = PlayStoreScrapperSource()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "joXahJ_6U2XQ" }, "source": [ "## Configure TextCleaner as Pre-Processor to clean review text\n", "These cleaning function will run serially" ] }, { "cell_type": "code", "metadata": { "id": "esmT-8IhpHvp", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "5ddd4c5a-6828-4145-8b1e-a30639a16223" }, "source": [ "from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig\n", "from obsei.preprocessor.text_cleaning_function import *\n", "\n", "text_cleaner_config = TextCleanerConfig(\n", " stop_words=extra_stop_words,\n", " cleaning_functions = [\n", " ToLowerCase(),\n", " RemoveWhiteSpaceAndEmptyToken(),\n", " RemovePunctuation(),\n", " RemoveSpecialChars(),\n", " DecodeUnicode(),\n", " RemoveDateTime(),\n", " RemoveStopWords(),\n", " RemoveStopWords(stop_words=extra_stop_words),\n", " RemoveWhiteSpaceAndEmptyToken(),\n", " ]\n", ")\n", "\n", "text_cleaner = TextCleaner()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ], "name": "stderr" } ] }, { "cell_type": "markdown", "metadata": { "id": "yiLCi925U1YN" }, "source": [ "## Configure Classification Analyzer\n", "**Note**: Select model from https://huggingface.co/models?pipeline_tag=zero-shot-classification, if you want to try different one" ] }, { "cell_type": "code", "metadata": { "id": "26ASFOm_HW2s" }, "source": [ "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n", "\n", "analyzer_config=ClassificationAnalyzerConfig(\n", " labels=category_list,\n", ")\n", "\n", "text_analyzer = ZeroShotClassificationAnalyzer(\n", " model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n", " device=\"auto\"\n", ")" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "51iUR7QOU1zC" }, "source": [ "## Configure Pandas DataFrame Informer\n", "\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "1aLYa-QBHaDs" }, "source": [ "from pandas import DataFrame\n", "from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig\n", "\n", "sink_config = PandasSinkConfig(\n", " dataframe=DataFrame(),\n", " include_columns_list=included_cols\n", ")\n", "sink = PandasSink()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "xvy-TwzQU3rb" }, "source": [ "## Fetch app reviews" ] }, { "cell_type": "code", "metadata": { "id": "YmTz7qKayCTW" }, "source": [ "source_response_list = source.lookup(source_config)" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "JDCsk9QoU4pu" }, "source": [ "## PreProcess review text to clean it\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "TnbjTr4vyHOk", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "69115baf-4b22-462f-c8fd-faaeb0601ba7" }, "source": [ "cleaner_response_list = text_cleaner.preprocess_input(\n", " input_list=source_response_list,\n", " config=text_cleaner_config\n", ")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n", "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function - Token contain invalid date time format\n" ], "name": "stderr" } ] }, { "cell_type": "markdown", "metadata": { "id": "tjoWwW42U4J6" }, "source": [ "## Analyze reviews to perform classification\n", "**Note**: This is compute heavy step" ] }, { "cell_type": "code", "metadata": { "id": "qZyRJIcjyEIu", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "678f8211-9bcc-4fd7-ab95-0c73fda47938" }, "source": [ "analyzer_response_list = text_analyzer.analyze_input(\n", " source_response_list=cleaner_response_list,\n", " analyzer_config=analyzer_config\n", ")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ], "name": "stderr" } ] }, { "cell_type": "markdown", "metadata": { "id": "9bihm518VDSJ" }, "source": [ "## Inform review data in form of Pandas DataFrame" ] }, { "cell_type": "code", "metadata": { "id": "zFPdISnWHr9j", "colab": { "base_uri": "https://localhost:8080/", "height": 419 }, "outputId": "14316ee7-22d4-4542-c0b8-e6dbe1dff1f2" }, "source": [ "dataframe = sink.send_data(analyzer_response_list, sink_config)\n", "dataframe.rename(rename_cols_dict,axis=1,inplace=True)\n", "\n", "\n", "dataframe[\"brand\"] = name\n", "dataframe" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", " | text | \n", "positive | \n", "easy payment options | \n", "easyOrder placement | \n", "user interface | \n", "Realtime order tracking | \n", "Rewards and discounts | \n", "social media Integration | \n", "negative | \n", "ratings | \n", "time | \n", "brand | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "good | \n", "1.00 | \n", "0.67 | \n", "0.65 | \n", "0.60 | \n", "0.43 | \n", "0.35 | \n", "0.06 | \n", "0.00 | \n", "5 | \n", "2021-07-11 17:09:17 | \n", "zomato | \n", "
1 | \n", "excellent loving | \n", "1.00 | \n", "0.20 | \n", "0.19 | \n", "0.32 | \n", "0.10 | \n", "0.11 | \n", "0.01 | \n", "0.00 | \n", "5 | \n", "2021-07-11 17:08:09 | \n", "zomato | \n", "
2 | \n", "delievered wrong house | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.26 | \n", "0.00 | \n", "0.02 | \n", "0.03 | \n", "0.99 | \n", "1 | \n", "2021-07-11 17:07:36 | \n", "zomato | \n", "
3 | \n", "superb excellent | \n", "1.00 | \n", "0.55 | \n", "0.57 | \n", "0.71 | \n", "0.28 | \n", "0.20 | \n", "0.02 | \n", "0.00 | \n", "5 | \n", "2021-07-11 17:07:17 | \n", "zomato | \n", "
4 | \n", "good | \n", "1.00 | \n", "0.67 | \n", "0.65 | \n", "0.60 | \n", "0.43 | \n", "0.35 | \n", "0.06 | \n", "0.00 | \n", "4 | \n", "2021-07-11 17:05:58 | \n", "zomato | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
195 | \n", "sellers cheat users selling less quantity cont... | \n", "0.18 | \n", "0.00 | \n", "0.04 | \n", "0.07 | \n", "0.04 | \n", "0.08 | \n", "0.03 | \n", "0.68 | \n", "1 | \n", "2021-07-11 16:08:05 | \n", "zomato | \n", "
196 | \n", "nice service | \n", "0.99 | \n", "0.81 | \n", "0.40 | \n", "0.60 | \n", "0.12 | \n", "0.28 | \n", "0.02 | \n", "0.00 | \n", "5 | \n", "2021-07-11 16:07:52 | \n", "zomato | \n", "
197 | \n", "amazing experience far | \n", "0.99 | \n", "0.02 | \n", "0.04 | \n", "0.21 | \n", "0.09 | \n", "0.02 | \n", "0.01 | \n", "0.00 | \n", "5 | \n", "2021-07-11 16:07:53 | \n", "zomato | \n", "
198 | \n", "delivery fast less offers cash delivery | \n", "0.94 | \n", "0.94 | \n", "0.17 | \n", "0.62 | \n", "0.13 | \n", "0.06 | \n", "0.03 | \n", "0.42 | \n", "2 | \n", "2021-07-11 16:07:38 | \n", "zomato | \n", "
199 | \n", "good food fast delivery | \n", "0.99 | \n", "0.95 | \n", "0.30 | \n", "0.52 | \n", "0.09 | \n", "0.33 | \n", "0.00 | \n", "0.00 | \n", "5 | \n", "2021-07-11 16:07:23 | \n", "zomato | \n", "
200 rows × 12 columns
\n", "