{"cells":[{"cell_type":"markdown","source":["Basic Operations"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"46af7284-f53f-48c1-b520-4d37f4ba9dd6"}}},{"cell_type":"code","source":["from pyspark.sql import SparkSession"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"370b95ce-b77c-4944-89c0-970fe27d7400"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["spark = SparkSession.builder.appName(\"Operations\").getOrCreate()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"2426619a-bed7-4414-a929-06019ec815a8"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["df = spark.read.csv(\"dbfs:/FileStore/shared_uploads/dizhen@hsph.harvard.edu/appl_stock.csv\",inferSchema=True,header=True)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"af7d4359-8d8c-4ed0-8cb9-782d327b4859"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["df.show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"9c0374d6-ce01-4299-808b-cbe765ccd7bd"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.printSchema()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"3a1321f4-7cf6-46a5-98cc-1b5f5f70704b"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"root\n |-- Date: timestamp (nullable = true)\n |-- Open: double (nullable = true)\n |-- High: double (nullable = true)\n |-- Low: double (nullable = true)\n |-- Close: double (nullable = true)\n |-- Volume: integer (nullable = true)\n |-- Adj Close: double (nullable = true)\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["root\n |-- Date: timestamp (nullable = true)\n |-- Open: double (nullable = true)\n |-- High: double (nullable = true)\n |-- Low: double (nullable = true)\n |-- Close: double (nullable = true)\n |-- Volume: integer (nullable = true)\n |-- Adj Close: double (nullable = true)\n\n"]}}],"execution_count":0},{"cell_type":"markdown","source":["Filter Data"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"548aa2aa-b82b-46e1-95d8-eaee1df8f995"}}},{"cell_type":"code","source":["df.filter(\"Close<500\").show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e331ec7f-724e-40fa-9f13-2151965adc9b"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter(\"Close<500\").select('Open').show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"878cfab4-d587-46bf-87e2-72a0695b2372"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+------------------+\n| Open|\n+------------------+\n| 213.429998|\n| 214.599998|\n| 214.379993|\n| 211.75|\n| 210.299994|\n|212.79999700000002|\n|209.18999499999998|\n| 207.870005|\n|210.11000299999998|\n|210.92999500000002|\n| 208.330002|\n| 214.910006|\n| 212.079994|\n|206.78000600000001|\n|202.51000200000001|\n|205.95000100000001|\n| 206.849995|\n| 204.930004|\n| 201.079996|\n|192.36999699999998|\n+------------------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+------------------+\n| Open|\n+------------------+\n| 213.429998|\n| 214.599998|\n| 214.379993|\n| 211.75|\n| 210.299994|\n|212.79999700000002|\n|209.18999499999998|\n| 207.870005|\n|210.11000299999998|\n|210.92999500000002|\n| 208.330002|\n| 214.910006|\n| 212.079994|\n|206.78000600000001|\n|202.51000200000001|\n|205.95000100000001|\n| 206.849995|\n| 204.930004|\n| 201.079996|\n|192.36999699999998|\n+------------------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter(\"Close<500\").select(['Open','Close']).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"54f5254f-1fba-4929-b8e8-df242a7f8b60"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+------------------+------------------+\n| Open| Close|\n+------------------+------------------+\n| 213.429998| 214.009998|\n| 214.599998| 214.379993|\n| 214.379993| 210.969995|\n| 211.75| 210.58|\n| 210.299994|211.98000499999998|\n|212.79999700000002|210.11000299999998|\n|209.18999499999998| 207.720001|\n| 207.870005| 210.650002|\n|210.11000299999998| 209.43|\n|210.92999500000002| 205.93|\n| 208.330002| 215.039995|\n| 214.910006| 211.73|\n| 212.079994| 208.069996|\n|206.78000600000001| 197.75|\n|202.51000200000001| 203.070002|\n|205.95000100000001| 205.940001|\n| 206.849995| 207.880005|\n| 204.930004| 199.289995|\n| 201.079996| 192.060003|\n|192.36999699999998| 194.729998|\n+------------------+------------------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+------------------+------------------+\n| Open| Close|\n+------------------+------------------+\n| 213.429998| 214.009998|\n| 214.599998| 214.379993|\n| 214.379993| 210.969995|\n| 211.75| 210.58|\n| 210.299994|211.98000499999998|\n|212.79999700000002|210.11000299999998|\n|209.18999499999998| 207.720001|\n| 207.870005| 210.650002|\n|210.11000299999998| 209.43|\n|210.92999500000002| 205.93|\n| 208.330002| 215.039995|\n| 214.910006| 211.73|\n| 212.079994| 208.069996|\n|206.78000600000001| 197.75|\n|202.51000200000001| 203.070002|\n|205.95000100000001| 205.940001|\n| 206.849995| 207.880005|\n| 204.930004| 199.289995|\n| 201.079996| 192.060003|\n|192.36999699999998| 194.729998|\n+------------------+------------------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter(df[\"Close\"] < 200).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"eddc4b6c-6c68-466f-b8d3-8f51c7b9109b"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n|2010-02-02 00:00:00| 195.909998| 196.319994|193.37999299999998| 195.859997|174585600|25.375532999999997|\n|2010-02-03 00:00:00| 195.169994| 200.200003| 194.420004| 199.229994|153832000|25.812148999999998|\n|2010-02-04 00:00:00| 196.730003| 198.370001| 191.570005| 192.050003|189413000| 24.881912|\n|2010-02-05 00:00:00|192.63000300000002| 196.0| 190.850002| 195.460001|212576700|25.323710000000002|\n|2010-02-08 00:00:00| 195.690006|197.88000300000002| 193.999994|194.11999699999998|119567700| 25.1501|\n|2010-02-09 00:00:00| 196.419996| 197.499994| 194.749998|196.19000400000002|158221700| 25.418289|\n|2010-02-10 00:00:00| 195.889997| 196.6| 194.26|195.12000700000002| 92590400| 25.27966|\n|2010-02-11 00:00:00| 194.880001| 199.750006|194.05999599999998| 198.669994|137586400| 25.739595|\n|2010-02-23 00:00:00| 199.999998| 201.330002| 195.709993| 197.059998|143773700| 25.531005|\n|2014-06-09 00:00:00| 92.699997| 93.879997| 91.75| 93.699997| 75415000| 88.906324|\n|2014-06-10 00:00:00| 94.730003| 95.050003| 93.57| 94.25| 62777000| 89.428189|\n|2014-06-11 00:00:00| 94.129997| 94.760002| 93.470001| 93.860001| 45681000| 89.058142|\n|2014-06-12 00:00:00| 94.040001| 94.120003| 91.900002| 92.290001| 54749000| 87.568463|\n|2014-06-13 00:00:00| 92.199997| 92.440002| 90.879997| 91.279999| 54525000| 86.610132|\n|2014-06-16 00:00:00| 91.510002| 92.75| 91.449997| 92.199997| 35561000| 87.483064|\n|2014-06-17 00:00:00| 92.309998| 92.699997| 91.800003| 92.08000200000001| 29726000| 87.36920699999999|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n|2010-02-02 00:00:00| 195.909998| 196.319994|193.37999299999998| 195.859997|174585600|25.375532999999997|\n|2010-02-03 00:00:00| 195.169994| 200.200003| 194.420004| 199.229994|153832000|25.812148999999998|\n|2010-02-04 00:00:00| 196.730003| 198.370001| 191.570005| 192.050003|189413000| 24.881912|\n|2010-02-05 00:00:00|192.63000300000002| 196.0| 190.850002| 195.460001|212576700|25.323710000000002|\n|2010-02-08 00:00:00| 195.690006|197.88000300000002| 193.999994|194.11999699999998|119567700| 25.1501|\n|2010-02-09 00:00:00| 196.419996| 197.499994| 194.749998|196.19000400000002|158221700| 25.418289|\n|2010-02-10 00:00:00| 195.889997| 196.6| 194.26|195.12000700000002| 92590400| 25.27966|\n|2010-02-11 00:00:00| 194.880001| 199.750006|194.05999599999998| 198.669994|137586400| 25.739595|\n|2010-02-23 00:00:00| 199.999998| 201.330002| 195.709993| 197.059998|143773700| 25.531005|\n|2014-06-09 00:00:00| 92.699997| 93.879997| 91.75| 93.699997| 75415000| 88.906324|\n|2014-06-10 00:00:00| 94.730003| 95.050003| 93.57| 94.25| 62777000| 89.428189|\n|2014-06-11 00:00:00| 94.129997| 94.760002| 93.470001| 93.860001| 45681000| 89.058142|\n|2014-06-12 00:00:00| 94.040001| 94.120003| 91.900002| 92.290001| 54749000| 87.568463|\n|2014-06-13 00:00:00| 92.199997| 92.440002| 90.879997| 91.279999| 54525000| 86.610132|\n|2014-06-16 00:00:00| 91.510002| 92.75| 91.449997| 92.199997| 35561000| 87.483064|\n|2014-06-17 00:00:00| 92.309998| 92.699997| 91.800003| 92.08000200000001| 29726000| 87.36920699999999|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter( (df[\"Close\"] < 200) & (df['Open'] > 200) ).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"91156948-ca41-40b3-8f8e-21edce25ac61"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+----------+----------+----------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n|2010-01-22 00:00:00|206.78000600000001|207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-28 00:00:00| 204.930004|205.500004|198.699995|199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996|202.199995|190.250002|192.060003|311488100| 24.883208|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+----------+----------+----------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n|2010-01-22 00:00:00|206.78000600000001|207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-28 00:00:00| 204.930004|205.500004|198.699995|199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996|202.199995|190.250002|192.060003|311488100| 24.883208|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter( (df[\"Close\"] < 200) | (df['Open'] > 200) ).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"78303c02-a711-431e-b0c3-78a5597b77a3"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter( (df[\"Close\"] < 200) & ~(df['Open'] < 200) ).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"3c299897-7bca-4629-b2cb-b656c7462ca7"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+----------+----------+----------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n|2010-01-22 00:00:00|206.78000600000001|207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-28 00:00:00| 204.930004|205.500004|198.699995|199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996|202.199995|190.250002|192.060003|311488100| 24.883208|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+----------+----------+----------+---------+------------------+\n| Date| Open| High| Low| Close| Volume| Adj Close|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n|2010-01-22 00:00:00|206.78000600000001|207.499996| 197.16| 197.75|220441900| 25.620401|\n|2010-01-28 00:00:00| 204.930004|205.500004|198.699995|199.289995|293375600|25.819922000000002|\n|2010-01-29 00:00:00| 201.079996|202.199995|190.250002|192.060003|311488100| 24.883208|\n+-------------------+------------------+----------+----------+----------+---------+------------------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter(df[\"Low\"] == 197.16).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e840e0bc-542a-4b5a-83e4-6b47fe40962f"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+----------+------+------+---------+---------+\n| Date| Open| High| Low| Close| Volume|Adj Close|\n+-------------------+------------------+----------+------+------+---------+---------+\n|2010-01-22 00:00:00|206.78000600000001|207.499996|197.16|197.75|220441900|25.620401|\n+-------------------+------------------+----------+------+------+---------+---------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+----------+------+------+---------+---------+\n| Date| Open| High| Low| Close| Volume|Adj Close|\n+-------------------+------------------+----------+------+------+---------+---------+\n|2010-01-22 00:00:00|206.78000600000001|207.499996|197.16|197.75|220441900|25.620401|\n+-------------------+------------------+----------+------+------+---------+---------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.filter(df[\"Low\"] == 197.16).collect()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"d19327ae-3a96-4f91-a08c-4a6eb435c428"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[14]: [Row(Date=datetime.datetime(2010, 1, 22, 0, 0), Open=206.78000600000001, High=207.499996, Low=197.16, Close=197.75, Volume=220441900, Adj Close=25.620401)]","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[14]: [Row(Date=datetime.datetime(2010, 1, 22, 0, 0), Open=206.78000600000001, High=207.499996, Low=197.16, Close=197.75, Volume=220441900, Adj Close=25.620401)]"]}}],"execution_count":0},{"cell_type":"code","source":["result = df.filter(df[\"Low\"] == 197.16).collect()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"c0fecd46-5642-4607-8024-7015f90ac713"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["type(result[0])"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e23f1768-04f3-49f4-8204-3450c2f80392"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[16]: pyspark.sql.types.Row","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[16]: pyspark.sql.types.Row"]}}],"execution_count":0},{"cell_type":"code","source":["row = result[0]"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"b4a9b26b-4df9-4252-b657-8da0e4f33c53"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["row.asDict()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"c5a15b9c-29b8-47b8-b1bf-3535ce55fd7a"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[19]: {'Date': datetime.datetime(2010, 1, 22, 0, 0),\n 'Open': 206.78000600000001,\n 'High': 207.499996,\n 'Low': 197.16,\n 'Close': 197.75,\n 'Volume': 220441900,\n 'Adj Close': 25.620401}","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[19]: {'Date': datetime.datetime(2010, 1, 22, 0, 0),\n 'Open': 206.78000600000001,\n 'High': 207.499996,\n 'Low': 197.16,\n 'Close': 197.75,\n 'Volume': 220441900,\n 'Adj Close': 25.620401}"]}}],"execution_count":0},{"cell_type":"code","source":["for item in result[0]:\n print(item)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"81e70bb0-783c-4a9f-9a7a-3c2477dce85d"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"2010-01-22 00:00:00\n206.78000600000001\n207.499996\n197.16\n197.75\n220441900\n25.620401\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["2010-01-22 00:00:00\n206.78000600000001\n207.499996\n197.16\n197.75\n220441900\n25.620401\n"]}}],"execution_count":0},{"cell_type":"markdown","source":["Dates and Timestamps"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"82ea413b-ff05-4bcd-b6e3-f4f2341a1864"}}},{"cell_type":"code","source":["from pyspark.sql.functions import format_number,dayofmonth,hour,dayofyear,month,year,weekofyear,date_format"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"6458596b-1b77-4394-9bf5-d046fb66bcdf"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["df.select(hour(df['Date'])).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"d859a165-6981-41ef-9fec-5ad5fce00b67"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+----------+\n|hour(Date)|\n+----------+\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n+----------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+----------+\n|hour(Date)|\n+----------+\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n| 0|\n+----------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.select(dayofyear(df['Date'])).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"3b00caba-399b-4025-9f20-ed8ae7d76655"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+---------------+\n|dayofyear(Date)|\n+---------------+\n| 4|\n| 5|\n| 6|\n| 7|\n| 8|\n| 11|\n| 12|\n| 13|\n| 14|\n| 15|\n| 19|\n| 20|\n| 21|\n| 22|\n| 25|\n| 26|\n| 27|\n| 28|\n| 29|\n| 32|\n+---------------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+---------------+\n|dayofyear(Date)|\n+---------------+\n| 4|\n| 5|\n| 6|\n| 7|\n| 8|\n| 11|\n| 12|\n| 13|\n| 14|\n| 15|\n| 19|\n| 20|\n| 21|\n| 22|\n| 25|\n| 26|\n| 27|\n| 28|\n| 29|\n| 32|\n+---------------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.select(month(df['Date'])).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"cf389c0e-7195-4ef1-b0c9-9034fc0bd665"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-----------+\n|month(Date)|\n+-----------+\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 2|\n+-----------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-----------+\n|month(Date)|\n+-----------+\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 1|\n| 2|\n+-----------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.select(year(df['Date'])).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"673fb41b-6d41-476c-ad0d-b140be622212"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+----------+\n|year(Date)|\n+----------+\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n+----------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+----------+\n|year(Date)|\n+----------+\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n| 2010|\n+----------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.withColumn(\"Year\",year(df['Date'])).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"a043971e-217f-480d-b2cc-533c04c0315e"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------------------+------------------+------------------+------------------+------------------+---------+------------------+----+\n| Date| Open| High| Low| Close| Volume| Adj Close|Year|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+----+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|2010|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|2010|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|2010|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|2010|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|2010|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|2010|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|2010|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|2010|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|2010|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|2010|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|2010|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|2010|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|2010|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|2010|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|2010|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|2010|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|2010|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|2010|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|2010|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|2010|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+----+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------------------+------------------+------------------+------------------+------------------+---------+------------------+----+\n| Date| Open| High| Low| Close| Volume| Adj Close|Year|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+----+\n|2010-01-04 00:00:00| 213.429998| 214.499996|212.38000099999996| 214.009998|123432400| 27.727039|2010|\n|2010-01-05 00:00:00| 214.599998| 215.589994| 213.249994| 214.379993|150476200|27.774976000000002|2010|\n|2010-01-06 00:00:00| 214.379993| 215.23| 210.750004| 210.969995|138040000|27.333178000000004|2010|\n|2010-01-07 00:00:00| 211.75| 212.000006| 209.050005| 210.58|119282800| 27.28265|2010|\n|2010-01-08 00:00:00| 210.299994| 212.000006|209.06000500000002|211.98000499999998|111902700| 27.464034|2010|\n|2010-01-11 00:00:00|212.79999700000002| 213.000002| 208.450005|210.11000299999998|115557400| 27.221758|2010|\n|2010-01-12 00:00:00|209.18999499999998|209.76999500000002| 206.419998| 207.720001|148614900| 26.91211|2010|\n|2010-01-13 00:00:00| 207.870005|210.92999500000002| 204.099998| 210.650002|151473000| 27.29172|2010|\n|2010-01-14 00:00:00|210.11000299999998|210.45999700000002| 209.020004| 209.43|108223500| 27.133657|2010|\n|2010-01-15 00:00:00|210.92999500000002|211.59999700000003| 205.869999| 205.93|148516900|26.680197999999997|2010|\n|2010-01-19 00:00:00| 208.330002|215.18999900000003| 207.240004| 215.039995|182501900|27.860484999999997|2010|\n|2010-01-20 00:00:00| 214.910006| 215.549994| 209.500002| 211.73|153038200| 27.431644|2010|\n|2010-01-21 00:00:00| 212.079994|213.30999599999998| 207.210003| 208.069996|152038600| 26.957455|2010|\n|2010-01-22 00:00:00|206.78000600000001| 207.499996| 197.16| 197.75|220441900| 25.620401|2010|\n|2010-01-25 00:00:00|202.51000200000001| 204.699999| 200.190002| 203.070002|266424900|26.309658000000002|2010|\n|2010-01-26 00:00:00|205.95000100000001| 213.710005| 202.580004| 205.940001|466777500| 26.681494|2010|\n|2010-01-27 00:00:00| 206.849995| 210.58| 199.530001| 207.880005|430642100|26.932840000000002|2010|\n|2010-01-28 00:00:00| 204.930004| 205.500004| 198.699995| 199.289995|293375600|25.819922000000002|2010|\n|2010-01-29 00:00:00| 201.079996| 202.199995| 190.250002| 192.060003|311488100| 24.883208|2010|\n|2010-02-01 00:00:00|192.36999699999998| 196.0|191.29999899999999| 194.729998|187469100| 25.229131|2010|\n+-------------------+------------------+------------------+------------------+------------------+---------+------------------+----+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["newdf = df.withColumn(\"Year\",year(df['Date']))\nnewdf.groupBy(\"Year\").mean()[['avg(Year)','avg(Close)']].show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"520c02fd-2b87-4a98-9944-aa3d330b96cd"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+---------+------------------+\n|avg(Year)| avg(Close)|\n+---------+------------------+\n| 2015.0|120.03999980555547|\n| 2013.0| 472.6348802857143|\n| 2014.0| 295.4023416507935|\n| 2012.0| 576.0497195640002|\n| 2016.0|104.60400786904763|\n| 2010.0| 259.8424600000002|\n| 2011.0|364.00432532142867|\n+---------+------------------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+---------+------------------+\n|avg(Year)| avg(Close)|\n+---------+------------------+\n| 2015.0|120.03999980555547|\n| 2013.0| 472.6348802857143|\n| 2014.0| 295.4023416507935|\n| 2012.0| 576.0497195640002|\n| 2016.0|104.60400786904763|\n| 2010.0| 259.8424600000002|\n| 2011.0|364.00432532142867|\n+---------+------------------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["result = newdf.groupBy(\"Year\").mean()[['avg(Year)','avg(Close)']]\nresult = result.withColumnRenamed(\"avg(Year)\",\"Year\")\nresult = result.select('Year',format_number('avg(Close)',2).alias(\"Mean Close\")).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e077340c-574e-47ae-929e-496e62e20276"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+------+----------+\n| Year|Mean Close|\n+------+----------+\n|2015.0| 120.04|\n|2013.0| 472.63|\n|2014.0| 295.40|\n|2012.0| 576.05|\n|2016.0| 104.60|\n|2010.0| 259.84|\n|2011.0| 364.00|\n+------+----------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+------+----------+\n| Year|Mean Close|\n+------+----------+\n|2015.0| 120.04|\n|2013.0| 472.63|\n|2014.0| 295.40|\n|2012.0| 576.05|\n|2016.0| 104.60|\n|2010.0| 259.84|\n|2011.0| 364.00|\n+------+----------+\n\n"]}}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"apple-stock","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":2198394635151779}},"nbformat":4,"nbformat_minor":0}