{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------+-----+\n", "| word|count|\n", "+--------+-----+\n", "| rat| 2|\n", "| cat| 2|\n", "|elephant| 1|\n", "+--------+-----+\n", "\n", "(0, array([ 0.69630218], dtype=float32), array([ 0.00331201], dtype=float32))\n", "(20, array([ 0.28020507], dtype=float32), array([ 0.21137346], dtype=float32))\n", "(40, array([ 0.15433531], dtype=float32), array([ 0.2732774], dtype=float32))\n", "(60, array([ 0.11638315], dtype=float32), array([ 0.29194263], dtype=float32))\n", "(80, array([ 0.10493983], dtype=float32), array([ 0.29757056], dtype=float32))\n", "(100, array([ 0.10148945], dtype=float32), array([ 0.2992675], dtype=float32))\n", "(120, array([ 0.10044909], dtype=float32), array([ 0.29977915], dtype=float32))\n", "(140, array([ 0.10013542], dtype=float32), array([ 0.2999334], dtype=float32))\n", "(160, array([ 0.10004086], dtype=float32), array([ 0.29997993], dtype=float32))\n", "(180, array([ 0.10001232], dtype=float32), array([ 0.29999396], dtype=float32))\n", "(200, array([ 0.10000371], dtype=float32), array([ 0.29999819], dtype=float32))\n" ] } ], "source": [ "import urllib2 \n", "import tensorflow as tf \n", "import numpy as np \n", "import findspark \n", " \n", "findspark.init() \n", "from pyspark import SparkContext, SparkFiles, SQLContext\n", " \n", "if not 'sc' in locals(): \n", " sc = SparkContext() \n", " \n", "if not 'sqlContext' in locals():\n", " sqlContext = SQLContext(sc)\n", " \n", "wordsDF = sqlContext.createDataFrame([('cat',), ('elephant',), ('rat',), ('rat',), ('cat', )], ['word']) \n", "wordCountsDF = wordsDF.groupBy(\"word\").count() \n", "wordCountsDF.show() \n", " \n", "x_data = np.random.rand(100).astype(np.float32) \n", "y_data = x_data * 0.1 + 0.3 \n", " \n", "W = tf.Variable(tf.random_uniform([1], -1.0, 1.0)) \n", "b = tf.Variable(tf.zeros([1])) \n", "y = W * x_data + b \n", " \n", "loss = tf.reduce_mean(tf.square(y - y_data)) \n", "optimizer = tf.train.GradientDescentOptimizer(0.5) \n", "train = optimizer.minimize(loss) \n", " \n", "init = tf.initialize_all_variables() \n", " \n", "sess = tf.Session() \n", "sess.run(init) \n", " \n", "for step in range(201): \n", " sess.run(train) \n", " if step % 20 == 0: \n", " print(step, sess.run(W), sess.run(b)) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }