{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "nteract": { "version": "0.12.3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "V58rxea0HqSa", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "46268099-2d42-466a-a88a-da21749488fa" }, "source": [ "import os\n", "# Find the latest version of spark 3.0 from http://www.apache.org/dist/spark/ and enter as the spark version\n", "# For example:\n", "# spark_version = 'spark-3.3.0'\n", "spark_version = 'spark-3.3.0'\n", "os.environ['SPARK_VERSION']=spark_version\n", "\n", "# Install Spark and Java\n", "!apt-get update\n", "!apt-get install openjdk-11-jdk-headless -qq > /dev/null\n", "!wget -q http://www.apache.org/dist/spark/$SPARK_VERSION/$SPARK_VERSION-bin-hadoop3.tgz\n", "!tar xf $SPARK_VERSION-bin-hadoop3.tgz\n", "!pip install -q findspark\n", "\n", "# Set Environment Variables\n", "import os\n", "os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-11-openjdk-amd64\"\n", "os.environ[\"SPARK_HOME\"] = f\"/content/{spark_version}-bin-hadoop3\"\n", "\n", "# Start a SparkSession\n", "import findspark\n", "findspark.init()" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\r0% [Working]\r \rGet:1 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n", "Get:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]\n", "Ign:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", "Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease [1,581 B]\n", "Hit:5 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n", "Hit:6 http://archive.ubuntu.com/ubuntu bionic InRelease\n", "Hit:7 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease\n", "Get:8 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n", "Hit:9 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", "Hit:10 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n", "Get:11 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [83.3 kB]\n", "Hit:12 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n", "Get:13 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [1,546 kB]\n", "Get:14 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Packages [950 kB]\n", "Get:15 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [2,992 kB]\n", "Get:17 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [2,324 kB]\n", "Get:18 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [3,424 kB]\n", "Fetched 11.5 MB in 5s (2,154 kB/s)\n", "Reading package lists... Done\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "_xKwTpATHqSe", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "eee2acb2-759f-4087-cb23-2011d661384c" }, "source": [ "# Download the Postgres driver that will allow Spark to interact with Postgres.\n", "!wget https://jdbc.postgresql.org/download/postgresql-42.5.0.jar" ], "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2022-10-08 20:59:00-- https://jdbc.postgresql.org/download/postgresql-42.5.0.jar\n", "Resolving jdbc.postgresql.org (jdbc.postgresql.org)... 72.32.157.228, 2001:4800:3e1:1::228\n", "Connecting to jdbc.postgresql.org (jdbc.postgresql.org)|72.32.157.228|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 1046274 (1022K) [application/java-archive]\n", "Saving to: ‘postgresql-42.5.0.jar’\n", "\n", "postgresql-42.5.0.j 100%[===================>] 1022K 5.47MB/s in 0.2s \n", "\n", "2022-10-08 20:59:00 (5.47 MB/s) - ‘postgresql-42.5.0.jar’ saved [1046274/1046274]\n", "\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "MMqDAjVS0KN9" }, "source": [ "from pyspark.sql import SparkSession\n", "spark = SparkSession.builder.appName(\"M16-Amazon-Challenge\").config(\"spark.driver.extraClassPath\",\"/content/postgresql-42.5.0.jar\").getOrCreate()" ], "execution_count": 3, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "cyBsySGuY-9V" }, "source": [ "### Load Amazon Data into Spark DataFrame" ] }, { "cell_type": "code", "metadata": { "id": "CtCmBhQJY-9Z", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "5f22937f-420a-44de-a92b-1c41e74db081" }, "source": [ "from pyspark import SparkFiles\n", "url = \"https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Toys_v1_00.tsv.gz\"\n", "spark.sparkContext.addFile(url)\n", "df = spark.read.option(\"encoding\", \"UTF-8\").csv(SparkFiles.get(\"amazon_reviews_us_Toys_v1_00.tsv.gz\"), sep=\"\\t\", header=True, inferSchema=True)\n", "df.show()" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+\n", "|marketplace|customer_id| review_id|product_id|product_parent| product_title|product_category|star_rating|helpful_votes|total_votes|vine|verified_purchase| review_headline| review_body| review_date|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+\n", "| US| 18778586| RDIJS7QYB6XNR|B00EDBY7X8| 122952789|Monopoly Junior B...| Toys| 5| 0| 0| N| Y| Five Stars| Excellent!!!|2015-08-31 00:00:00|\n", "| US| 24769659|R36ED1U38IELG8|B00D7JFOPC| 952062646|56 Pieces of Wood...| Toys| 5| 0| 0| N| Y|Good quality trac...|Great quality woo...|2015-08-31 00:00:00|\n", "| US| 44331596| R1UE3RPRGCOLD|B002LHA74O| 818126353|Super Jumbo Playi...| Toys| 2| 1| 1| N| Y| Two Stars|Cards are not as ...|2015-08-31 00:00:00|\n", "| US| 23310293|R298788GS6I901|B00ARPLCGY| 261944918|Barbie Doll and F...| Toys| 5| 0| 0| N| Y|my daughter loved...|my daughter loved...|2015-08-31 00:00:00|\n", "| US| 38745832| RNX4EXOBBPN5|B00UZOPOFW| 717410439|Emazing Lights eL...| Toys| 1| 1| 1| N| Y| DONT BUY THESE!|Do not buy these!...|2015-08-31 00:00:00|\n", "| US| 13394189|R3BPETL222LMIM|B009B7F6CA| 873028700|Melissa & Doug Wa...| Toys| 5| 0| 0| N| Y| Five Stars|Great item. Pictu...|2015-08-31 00:00:00|\n", "| US| 2749569|R3SORMPJZO3F2J|B0101EHRSM| 723424342|Big Bang Cosmic P...| Toys| 3| 2| 2| N| Y| Three Stars|To keep together,...|2015-08-31 00:00:00|\n", "| US| 41137196|R2RDOJQ0WBZCF6|B00407S11Y| 383363775|Fun Express Insec...| Toys| 5| 0| 0| N| Y| Five Stars|I was pleased wit...|2015-08-31 00:00:00|\n", "| US| 433677|R2B8VBEPB4YEZ7|B00FGPU7U2| 780517568|Fisher-Price Octo...| Toys| 5| 0| 0| N| Y| Five Stars| Children like it|2015-08-31 00:00:00|\n", "| US| 1297934|R1CB783I7B0U52|B0013OY0S0| 269360126|Claw Climber Goli...| Toys| 1| 0| 1| N| Y|Shame on the sell...|Showed up not how...|2015-08-31 00:00:00|\n", "| US| 52006292| R2D90RQQ3V8LH|B00519PJTW| 493486387|100 Foot Multicol...| Toys| 5| 0| 0| N| Y| Five Stars|Really liked thes...|2015-08-31 00:00:00|\n", "| US| 32071052|R1Y4ZOUGFMJ327|B001TCY2DO| 459122467|Pig Jumbo Foil Ba...| Toys| 5| 0| 0| N| Y| Nice huge balloon|Nice huge balloon...|2015-08-31 00:00:00|\n", "| US| 7360347|R2BUV9QJI2A00X|B00DOQCWF8| 226984155|Minecraft Animal ...| Toys| 5| 0| 1| N| Y| Five Stars| Great deal|2015-08-31 00:00:00|\n", "| US| 11613707| RSUHRJFJIRB3Z|B004C04I4I| 375659886|Disney Baby: Eeyo...| Toys| 4| 0| 0| N| Y| Four Stars| As Advertised|2015-08-31 00:00:00|\n", "| US| 13545982|R1T96CG98BBA15|B00NWGEKBY| 933734136|Team Losi 8IGHT-E...| Toys| 3| 2| 4| N| Y|... servo so expe...|Comes w a 15$ ser...|2015-08-31 00:00:00|\n", "| US| 43880421| R2ATXF4QQ30YW|B00000JS5S| 341842639|Hot Wheels 48- Ca...| Toys| 5| 0| 0| N| Y| Five Stars| awesome ! Thanks!|2015-08-31 00:00:00|\n", "| US| 1662075|R1YS3DS218NNMD|B00XPWXYDK| 210135375|ZuZo 2.4GHz 4 CH ...| Toys| 5| 4| 4| N| N|The closest relev...|I got this item f...|2015-08-31 00:00:00|\n", "| US| 18461411|R2SDXLTLF92O0H|B00VPXX92W| 705054378|Teenage Mutant Ni...| Toys| 5| 0| 0| N| Y| Five Stars|It was a birthday...|2015-08-31 00:00:00|\n", "| US| 27225859| R4R337CCDWLNG|B00YRA3H4U| 223420727|Franklin Sports M...| Toys| 3| 0| 1| Y| N|Got wrong product...|Got a wrong produ...|2015-08-31 00:00:00|\n", "| US| 20494593|R32Z6UA4S5Q630|B009T8BSQY| 787701676|Alien Frontiers: ...| Toys| 1| 0| 0| N| Y| Overpriced.|You need expansio...|2015-08-31 00:00:00|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+\n", "only showing top 20 rows\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "df_total_Votes = df.filter(\"total_votes >= 20\")\n", "df_total_Votes.show()" ], "metadata": { "id": "JqaZ1NsZaOcR" }, "execution_count": 11, "outputs": [] }, { "cell_type": "code", "source": [ "df_votes = df_total_Votes.withColumn(\"helpfulByTotal\", df_total_Votes[\"helpful_votes\"]/df_total_Votes[\"total_votes\"]).filter(\"helpful_votes/total_votes > 0.50\")\n", "df_votes.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "irVfVEeQbP3x", "outputId": "364a23d8-a71a-4c25-b9ea-8520ee21288a" }, "execution_count": 17, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "|marketplace|customer_id| review_id|product_id|product_parent| product_title|product_category|star_rating|helpful_votes|total_votes|vine|verified_purchase| review_headline| review_body| review_date| helpfulByTotal|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "| US| 11025729| ROP6ITXO8K5V2|B00LE9MZMW| 778942419|YJ Moyu 13x13x13 ...| Toys| 5| 23| 27| N| Y| Five Stars|Great cube, moves...|2015-08-31 00:00:00|0.8518518518518519|\n", "| US| 50123725|R3ND1LVU7AXCVF|B00LOV24VC| 363255349|Marshmallow Furni...| Toys| 1| 21| 21| N| Y|If you want a qua...|i bought this cha...|2015-08-31 00:00:00| 1.0|\n", "| US| 53067113| R9I5FOLKU99RY|B00U1KA12G| 278300201|Qixels S1 Turbo D...| Toys| 5| 19| 20| Y| N|11 Year Old Loves...|My daughter who i...|2015-08-31 00:00:00| 0.95|\n", "| US| 44167324|R1QS8AOD6HX3ED|B011BWUDGG| 501779560|Leray Self Balanc...| Toys| 4| 59| 81| N| N| Solid warranty...|[[VIDEOID:164d1b1...|2015-08-31 00:00:00|0.7283950617283951|\n", "| US| 44759540|R3ED60RC69CCQ6|B00VJKTLMS| 917726757|Fisher-Price Nick...| Toys| 5| 22| 23| Y| N|Lots of fun if yo...|This toy is a kno...|2015-08-31 00:00:00|0.9565217391304348|\n", "| US| 43450674|R2JM687C525WR9|B00KH6BQJG| 116308869|OgoSport OgoBild ...| Toys| 3| 33| 33| Y| N|Not bad, but we t...|cWe've had the Hu...|2015-08-31 00:00:00| 1.0|\n", "| US| 38209184|R2LWX4TZ67FWPT|B00IYOCSE0| 825711912|Flexible Flyer Pl...| Toys| 4| 50| 50| N| N|its a great swing...|its a great swing...|2015-08-31 00:00:00| 1.0|\n", "| US| 7719843|R29IYHPYD14AGI|B000F3V2MW| 253592398|ALEX Toys Craft M...| Toys| 3| 84| 84| N| Y| preteen projects|I purchased this ...|2015-08-31 00:00:00| 1.0|\n", "| US| 32035019|R1F2I723WRK5QV|B00YU88U6O| 480317972|SueSport 4-way Tu...| Toys| 5| 20| 20| N| Y| Great for big cats!|Bought this for m...|2015-08-31 00:00:00| 1.0|\n", "| US| 21817391| RRO8C1IVZMD6H|B011KD5CGW| 984270738|Playskool Heroes ...| Toys| 5| 22| 22| N| N|Rescan line is sm...|When buying rescu...|2015-08-31 00:00:00| 1.0|\n", "| US| 16379976|R2GHOWIWDVLN18|B00X9JDPYC| 681351265|UDI HD+ Drone wit...| Toys| 1| 28| 36| N| Y| One Star|The damn thing fl...|2015-08-31 00:00:00|0.7777777777777778|\n", "| US| 28820853|R30PJ9XUHFTY5D|B003VYAJMG| 103705238|Little Tikes Bold...| Toys| 5| 42| 43| N| N|My Granddaughter ...|My 18 month old g...|2015-08-31 00:00:00|0.9767441860465116|\n", "| US| 9988621|R2F7D9N3SSQC20|B00YI1CHHG| 150124290|CloudPets 12in - ...| Toys| 4| 32| 34| N| N|My daughter and I...|This bear is incr...|2015-08-31 00:00:00|0.9411764705882353|\n", "| US| 27803490| RN4VK9FV66YPS|B00WJ1O76G| 701543085|LeapFrog Number L...| Toys| 5| 29| 33| N| N| LOVE THIS!!|[[VIDEOID:9050e95...|2015-08-31 00:00:00|0.8787878787878788|\n", "| US| 50970965|R18T8SFBDXAKSY|B002TLDRI0| 198159965|JIGBOARD 1500 - J...| Toys| 5| 95| 95| N| Y|Love it. With fiv...|Love it. With fi...|2015-08-31 00:00:00| 1.0|\n", "| US| 8109655|R36SKVJXZ0I98A|8499000606| 536492553|Syma S107/S107G R...| Toys| 5| 124| 127| N| N|Nice, light weigh...|The Syma Helicopt...|2015-08-31 00:00:00|0.9763779527559056|\n", "| US| 182750|R3PAIHQSO707G4|B010TU7LP2| 3331341|Exploding Kittens...| Toys| 5| 189| 221| N| N|NSFW = NOT SAFE F...|Exploding Kittens...|2015-08-31 00:00:00|0.8552036199095022|\n", "| US| 1449184|R2TETIV0IVWFSJ|B008R67UKM| 775534268|Good cosplay stra...| Toys| 4| 80| 82| N| Y|and fits nicely. ...|Well constructed,...|2015-08-31 00:00:00| 0.975609756097561|\n", "| US| 6105658|R1VBVD23GPZW0E|B00YSWUDGM| 412449765|100 Assorted Poke...| Toys| 5| 55| 58| N| Y|So that was prett...|Came in a brown b...|2015-08-31 00:00:00|0.9482758620689655|\n", "| US| 48638849|R1AJKXVHURPVCZ|B00DCENMWY| 376397735|Mice and Mystics:...| Toys| 3| 12| 22| N| Y|Lacking in produc...|Keeping everythin...|2015-08-31 00:00:00|0.5454545454545454|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "only showing top 20 rows\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "df_vine_paid = df_votes.filter(\"vine='Y'\")\n", "df_vine_paid.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qLKa71webTpw", "outputId": "e8ae9e3d-7e41-4b22-d66d-1f2771baa109" }, "execution_count": 32, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "|marketplace|customer_id| review_id|product_id|product_parent| product_title|product_category|star_rating|helpful_votes|total_votes|vine|verified_purchase| review_headline| review_body| review_date| helpfulByTotal|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "| US| 53067113| R9I5FOLKU99RY|B00U1KA12G| 278300201|Qixels S1 Turbo D...| Toys| 5| 19| 20| Y| N|11 Year Old Loves...|My daughter who i...|2015-08-31 00:00:00| 0.95|\n", "| US| 44759540|R3ED60RC69CCQ6|B00VJKTLMS| 917726757|Fisher-Price Nick...| Toys| 5| 22| 23| Y| N|Lots of fun if yo...|This toy is a kno...|2015-08-31 00:00:00|0.9565217391304348|\n", "| US| 43450674|R2JM687C525WR9|B00KH6BQJG| 116308869|OgoSport OgoBild ...| Toys| 3| 33| 33| Y| N|Not bad, but we t...|cWe've had the Hu...|2015-08-31 00:00:00| 1.0|\n", "| US| 45716675| RUB4AUWGHG16G|B00T03U0OO| 754581011|Disney Frozen Els...| Toys| 4| 25| 27| Y| N|and of course lit...|This might be the...|2015-08-30 00:00:00|0.9259259259259259|\n", "| US| 52221911|R1S3IDYJUA6V2G|B00U5UH9CC| 205348267|FurReal Friends J...| Toys| 5| 28| 32| Y| N|Video Shows It Be...|[[VIDEOID:b560180...|2015-08-27 00:00:00| 0.875|\n", "| US| 49620639| RZFLN8FD5Y2M6|B00TF8Z7ZG| 552869173|Thames & Kosmos R...| Toys| 4| 25| 28| Y| N| A good challenge|This was a bit fr...|2015-08-27 00:00:00|0.8928571428571429|\n", "| US| 26903314|R1V8NDXD9SETO5|B00SOG4W56| 133690318|FurReal Friends S...| Toys| 3| 320| 352| Y| N|In our opinion, n...|Don't get me wron...|2015-08-27 00:00:00|0.9090909090909091|\n", "| US| 49161703|R2H4VEFWHP50M3|B00UPAYYCA| 626860150|Chrono Bomb Original| Toys| 5| 26| 36| Y| N|FANTASTIC GAME!!!...|This is an awesom...|2015-08-27 00:00:00|0.7222222222222222|\n", "| US| 27140716|R3DP6Y0A8WXUOV|B00V3DHJC0| 663413144|My First Lab Mini...| Toys| 5| 21| 21| Y| N|Outstanding! A r...|My First Lab Mini...|2015-08-26 00:00:00| 1.0|\n", "| US| 35391116| R49L85GZIKH4D|1601257457| 505075355|Pathfinder Advent...| Toys| 3| 33| 36| Y| N|Fun and better th...|I was kind of sur...|2015-08-26 00:00:00|0.9166666666666666|\n", "| US| 48357724| R2RQKWRUS11DZ|B00TVIFZ1Q| 901252756|Nerf Rebelle Secr...| Toys| 5| 18| 20| Y| N|KID-TESTED! 5 ki...|UPDATE on Nov 30,...|2015-08-25 00:00:00| 0.9|\n", "| US| 46693594| RZL7TPXLJPR2Z|B00VJKTFYW| 748575702|Fisher-Price Thom...| Toys| 4| 19| 20| Y| N|Your cat will hat...|I have a four yea...|2015-08-25 00:00:00| 0.95|\n", "| US| 47107073|R34JSTH01HK1J8|B00T03U0OO| 754581011|Disney Frozen Els...| Toys| 4| 75| 80| Y| N| Ice Palace|[[VIDEOID:2e52e58...|2015-08-25 00:00:00| 0.9375|\n", "| US| 50116247|R1OI8PAJW4MWUY|B00QKLLC1O| 146052104|Schleich Dragon F...| Toys| 5| 20| 20| Y| N|Another high qual...|Schleich is a Ger...|2015-08-25 00:00:00| 1.0|\n", "| US| 47030935|R1RXSJILN39UY1|B00VJKT0QU| 660207223|Fisher-Price Brig...| Toys| 5| 48| 53| Y| N| Lights, music, fun!|I ordered this to...|2015-08-24 00:00:00|0.9056603773584906|\n", "| US| 34571698| R79XS8S5EYX59|B00T0GY3U8| 977011021|Dimension: The Sp...| Toys| 3| 20| 22| Y| N|A very original g...|Over the past two...|2015-08-24 00:00:00|0.9090909090909091|\n", "| US| 20047667|R24STRS5M3QEXQ|B00U26LQDC| 28745879|Mega Bloks First ...| Toys| 5| 60| 65| Y| N|Mega Bloks First ...|A must have toy f...|2015-08-24 00:00:00|0.9230769230769231|\n", "| US| 45378171|R15HJU8XNX1XG1|B00XUZH8DE| 501465614|VTech Go! Go! Sma...| Toys| 4| 25| 26| Y| N|Excellent product...|The VTech Go! Go!...|2015-08-23 00:00:00|0.9615384615384616|\n", "| US| 52008531|R1NMYF3BJM72A0|B00PNAGN7Q| 236099298|Moose Toys Chocol...| Toys| 4| 57| 60| Y| N|A Fun Afternoon o...|So many people th...|2015-08-23 00:00:00| 0.95|\n", "| US| 49129501| R8XSDH6E6YWMX|B00UCWQMZ8| 738713791|Laugh & Learn Sma...| Toys| 4| 22| 24| Y| N| Good For Infants|This is a cute, e...|2015-08-23 00:00:00|0.9166666666666666|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "only showing top 20 rows\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "df_vine_not_paid = df_votes.filter(\"vine='N'\")\n", "df_vine_not_paid.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PQZrqLENdRA5", "outputId": "bc843c77-c268-4eed-9a22-88df89be30ef" }, "execution_count": 33, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "|marketplace|customer_id| review_id|product_id|product_parent| product_title|product_category|star_rating|helpful_votes|total_votes|vine|verified_purchase| review_headline| review_body| review_date| helpfulByTotal|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "| US| 11025729| ROP6ITXO8K5V2|B00LE9MZMW| 778942419|YJ Moyu 13x13x13 ...| Toys| 5| 23| 27| N| Y| Five Stars|Great cube, moves...|2015-08-31 00:00:00|0.8518518518518519|\n", "| US| 50123725|R3ND1LVU7AXCVF|B00LOV24VC| 363255349|Marshmallow Furni...| Toys| 1| 21| 21| N| Y|If you want a qua...|i bought this cha...|2015-08-31 00:00:00| 1.0|\n", "| US| 44167324|R1QS8AOD6HX3ED|B011BWUDGG| 501779560|Leray Self Balanc...| Toys| 4| 59| 81| N| N| Solid warranty...|[[VIDEOID:164d1b1...|2015-08-31 00:00:00|0.7283950617283951|\n", "| US| 38209184|R2LWX4TZ67FWPT|B00IYOCSE0| 825711912|Flexible Flyer Pl...| Toys| 4| 50| 50| N| N|its a great swing...|its a great swing...|2015-08-31 00:00:00| 1.0|\n", "| US| 7719843|R29IYHPYD14AGI|B000F3V2MW| 253592398|ALEX Toys Craft M...| Toys| 3| 84| 84| N| Y| preteen projects|I purchased this ...|2015-08-31 00:00:00| 1.0|\n", "| US| 32035019|R1F2I723WRK5QV|B00YU88U6O| 480317972|SueSport 4-way Tu...| Toys| 5| 20| 20| N| Y| Great for big cats!|Bought this for m...|2015-08-31 00:00:00| 1.0|\n", "| US| 21817391| RRO8C1IVZMD6H|B011KD5CGW| 984270738|Playskool Heroes ...| Toys| 5| 22| 22| N| N|Rescan line is sm...|When buying rescu...|2015-08-31 00:00:00| 1.0|\n", "| US| 16379976|R2GHOWIWDVLN18|B00X9JDPYC| 681351265|UDI HD+ Drone wit...| Toys| 1| 28| 36| N| Y| One Star|The damn thing fl...|2015-08-31 00:00:00|0.7777777777777778|\n", "| US| 28820853|R30PJ9XUHFTY5D|B003VYAJMG| 103705238|Little Tikes Bold...| Toys| 5| 42| 43| N| N|My Granddaughter ...|My 18 month old g...|2015-08-31 00:00:00|0.9767441860465116|\n", "| US| 9988621|R2F7D9N3SSQC20|B00YI1CHHG| 150124290|CloudPets 12in - ...| Toys| 4| 32| 34| N| N|My daughter and I...|This bear is incr...|2015-08-31 00:00:00|0.9411764705882353|\n", "| US| 27803490| RN4VK9FV66YPS|B00WJ1O76G| 701543085|LeapFrog Number L...| Toys| 5| 29| 33| N| N| LOVE THIS!!|[[VIDEOID:9050e95...|2015-08-31 00:00:00|0.8787878787878788|\n", "| US| 50970965|R18T8SFBDXAKSY|B002TLDRI0| 198159965|JIGBOARD 1500 - J...| Toys| 5| 95| 95| N| Y|Love it. With fiv...|Love it. With fi...|2015-08-31 00:00:00| 1.0|\n", "| US| 8109655|R36SKVJXZ0I98A|8499000606| 536492553|Syma S107/S107G R...| Toys| 5| 124| 127| N| N|Nice, light weigh...|The Syma Helicopt...|2015-08-31 00:00:00|0.9763779527559056|\n", "| US| 182750|R3PAIHQSO707G4|B010TU7LP2| 3331341|Exploding Kittens...| Toys| 5| 189| 221| N| N|NSFW = NOT SAFE F...|Exploding Kittens...|2015-08-31 00:00:00|0.8552036199095022|\n", "| US| 1449184|R2TETIV0IVWFSJ|B008R67UKM| 775534268|Good cosplay stra...| Toys| 4| 80| 82| N| Y|and fits nicely. ...|Well constructed,...|2015-08-31 00:00:00| 0.975609756097561|\n", "| US| 6105658|R1VBVD23GPZW0E|B00YSWUDGM| 412449765|100 Assorted Poke...| Toys| 5| 55| 58| N| Y|So that was prett...|Came in a brown b...|2015-08-31 00:00:00|0.9482758620689655|\n", "| US| 48638849|R1AJKXVHURPVCZ|B00DCENMWY| 376397735|Mice and Mystics:...| Toys| 3| 12| 22| N| Y|Lacking in produc...|Keeping everythin...|2015-08-31 00:00:00|0.5454545454545454|\n", "| US| 12153405|R2QDNZYVEX9X14|B00TOVMXW4| 559725484|Playskool Heroes ...| Toys| 2| 18| 20| N| N|More junk from Re...|Any parent famili...|2015-08-31 00:00:00| 0.9|\n", "| US| 29775971|R32RD311POLW7G|B00IEH9FQG| 303166933|KidKraft Airplane...| Toys| 3| 25| 27| N| Y|Adorable but asse...|Aesthetically, th...|2015-08-30 00:00:00|0.9259259259259259|\n", "| US| 35208712|R1VXO073B9HOGC|B0033JB3UG| 676646525| Buttercup Dollhouse| Toys| 4| 29| 30| N| Y|Cute little doll ...|You have to prep ...|2015-08-30 00:00:00|0.9666666666666667|\n", "+-----------+-----------+--------------+----------+--------------+--------------------+----------------+-----------+-------------+-----------+----+-----------------+--------------------+--------------------+-------------------+------------------+\n", "only showing top 20 rows\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "#Determine total paid reviews & unpaid reviews\n", "\n", "total_paid_reviews = df_vine_paid.count()\n", "total_not_paid_reviews = df_vine_not_paid.count()\n", "\n", "total_paid_5star_reviews = df_vine_paid.filter(\"star_rating=5\").count()\n", "total_unpaid_5star_reviews = df_vine_not_paid.filter(\"star_rating=5\").count()" ], "metadata": { "id": "-4X322sbdaHR" }, "execution_count": 35, "outputs": [] }, { "cell_type": "code", "source": [ "print(f\"Paid Total reviews: {total_paid_reviews}\")\n", "print(f\"Paid 5-star reviews: {total_paid_5star_reviews}\")\n", "print(f\"Unpaid Total reviews: {total_not_paid_reviews}\")\n", "print(f\"Unpaid 5-star reviews: {total_unpaid_5star_reviews}\")\n", "print(f\"% of Paid 5-star Reviews: {(total_paid_5star_reviews/total_paid_reviews)*100}%\")\n", "print(f\"% of Unpaid 5-star Reviews: {(total_unpaid_5star_reviews/total_not_paid_reviews)*100}%\")\n", "\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7xKjS2oZeSLX", "outputId": "6ca6aba3-282a-44da-d3d0-623929c10be8" }, "execution_count": 37, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Paid Total reviews: 1266\n", "Paid 5-star reviews: 432\n", "Unpaid Total reviews: 61849\n", "Unpaid 5-star reviews: 29950\n", "% of Paid 5-star Reviews: 34.12322274881517%\n", "% of Unpaid 5-star Reviews: 48.42438842988569%\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "BhC1YGdokcmO" }, "execution_count": null, "outputs": [] } ] }