{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------+-----+\n", "| word|count|\n", "+--------+-----+\n", "| rat| 2|\n", "| cat| 2|\n", "|elephant| 1|\n", "+--------+-----+\n", "\n", "(0, array([-0.07621384], dtype=float32), array([ 0.56465578], dtype=float32))\n", "(20, array([ 0.04050734], dtype=float32), array([ 0.33296335], dtype=float32))\n", "(40, array([ 0.0856716], dtype=float32), array([ 0.30793899], dtype=float32))\n", "(60, array([ 0.09654912], dtype=float32), array([ 0.30191207], dtype=float32))\n", "(80, array([ 0.09916888], dtype=float32), array([ 0.30046052], dtype=float32))\n", "(100, array([ 0.09979983], dtype=float32), array([ 0.30011091], dtype=float32))\n", "(120, array([ 0.0999518], dtype=float32), array([ 0.30002671], dtype=float32))\n", "(140, array([ 0.09998839], dtype=float32), array([ 0.30000645], dtype=float32))\n", "(160, array([ 0.0999972], dtype=float32), array([ 0.30000156], dtype=float32))\n", "(180, array([ 0.09999932], dtype=float32), array([ 0.30000037], dtype=float32))\n", "(200, array([ 0.09999985], dtype=float32), array([ 0.3000001], dtype=float32))\n" ] } ], "source": [ "import urllib2 \n", "import tensorflow as tf \n", "import numpy as np \n", "import findspark \n", "from sklearn import linear_model\n", " \n", "findspark.init() \n", "from pyspark import SparkContext, SparkFiles, SQLContext\n", " \n", "if not 'sc' in locals(): \n", " sc = SparkContext() \n", " \n", "if not 'sqlContext' in locals():\n", " sqlContext = SQLContext(sc)\n", " \n", "wordsDF = sqlContext.createDataFrame([('cat',), ('elephant',), ('rat',), ('rat',), ('cat', )], ['word']) \n", "wordCountsDF = wordsDF.groupBy(\"word\").count() \n", "wordCountsDF.show() \n", " \n", "x_data = np.random.rand(100).astype(np.float32) \n", "y_data = x_data * 0.1 + 0.3 \n", " \n", "W = tf.Variable(tf.random_uniform([1], -1.0, 1.0)) \n", "b = tf.Variable(tf.zeros([1])) \n", "y = W * x_data + b \n", " \n", "loss = tf.reduce_mean(tf.square(y - y_data)) \n", "optimizer = tf.train.GradientDescentOptimizer(0.5) \n", "train = optimizer.minimize(loss) \n", " \n", "init = tf.initialize_all_variables() \n", " \n", "sess = tf.Session() \n", "sess.run(init) \n", " \n", "for step in range(201): \n", " sess.run(train) \n", " if step % 20 == 0: \n", " print(step, sess.run(W), sess.run(b)) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }