{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Word Count Example with Apache Spark (PySpark)\n", "\n", "In this notebook we will go through the traditional Word Count example but we will cover map, flatmap, filter, count, reduceByKey, sortByKey and enhanced word count.\n", "\n", "`\n", "@author: Anindya Saha \n", "@email: mail.anindya@gmail.com\n", "`" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from pyspark.sql import SparkSession" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "spark = SparkSession.builder.master('local[*]').appName('wordcount-pyspark').getOrCreate()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
SparkSession - in-memory
\n", " \n", "SparkContext
\n", "\n", " \n", "\n", "v2.3.0
local[*]
wordcount-pyspark