{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [ { "data": { "text/plain": [ "(4094, 8)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "file = r\"data_set/3-3 600519.csv\"\n", "df_gzmt = pd.read_csv(file, encoding='utf-8')\n", "df_gzmt.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 4094 entries, 0 to 4093\n", "Data columns (total 8 columns):\n", "day 4094 non-null object\n", "STOCK_CODE 4094 non-null int64\n", "open 4094 non-null float64\n", "close 4094 non-null float64\n", "maximum 4094 non-null float64\n", "minimum 4094 non-null float64\n", "volume 4094 non-null int64\n", "TURNOVER 4094 non-null int64\n", "dtypes: float64(4), int64(3), object(1)\n", "memory usage: 256.0+ KB\n" ] } ], "source": [ "# 通过info方法,快速查看具体的字段以及主要字段的类型、存在空值情况、数组表单消耗内存的情况等\n", "df_gzmt.info()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "array([6005191], dtype=int64)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 利用unique函数,查看字段值的唯一性指标\n", "df_gzmt['STOCK_CODE'].unique()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STOCK_CODEopenclosemaximumminimumvolumeTURNOVER
count4094.04094.0000004094.0000004094.0000004094.0000004094.0000004.094000e+03
mean6005191.0182.161910182.435230184.879680179.80135327092.5258926.487986e+08
std0.0165.857882165.994905168.082099163.75206524623.4720999.550262e+08
min6005191.020.90000020.88000021.00000020.710000238.0000001.421413e+06
25%6005191.047.40500047.48250048.22250046.70500010186.7500006.203505e+07
50%6005191.0159.930000159.980000161.910000157.87500022491.0000003.727192e+08
75%6005191.0208.500000209.457500212.175000205.94500036759.5000007.486596e+08
max6005191.0800.000000799.190000803.500000788.880000406318.0000001.066339e+10
\n", "
" ], "text/plain": [ " STOCK_CODE open close maximum minimum \\\n", "count 4094.0 4094.000000 4094.000000 4094.000000 4094.000000 \n", "mean 6005191.0 182.161910 182.435230 184.879680 179.801353 \n", "std 0.0 165.857882 165.994905 168.082099 163.752065 \n", "min 6005191.0 20.900000 20.880000 21.000000 20.710000 \n", "25% 6005191.0 47.405000 47.482500 48.222500 46.705000 \n", "50% 6005191.0 159.930000 159.980000 161.910000 157.875000 \n", "75% 6005191.0 208.500000 209.457500 212.175000 205.945000 \n", "max 6005191.0 800.000000 799.190000 803.500000 788.880000 \n", "\n", " volume TURNOVER \n", "count 4094.000000 4.094000e+03 \n", "mean 27092.525892 6.487986e+08 \n", "std 24623.472099 9.550262e+08 \n", "min 238.000000 1.421413e+06 \n", "25% 10186.750000 6.203505e+07 \n", "50% 22491.000000 3.727192e+08 \n", "75% 36759.500000 7.486596e+08 \n", "max 406318.000000 1.066339e+10 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 通过describe方法,快速查看多字段的统计信息,包含最大最小、均值、标准差等信息\n", "df_gzmt.describe()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
daySTOCK_CODEopenclosemaximumminimumvolumeTURNOVERplacecode
02001-08-27600519134.5135.5537.7832.8540631814103470081600519
12001-08-28600519134.9936.8637.0034.611296474634630081600519
22001-08-29600519136.9836.3837.0036.10532521946890001600519
32001-08-30600519136.2837.1037.5136.00480131775580001600519
42001-08-31600519137.1537.0137.6236.8023231862310001600519
.................................
40892018-10-296005191549.09549.09549.09549.094907626947140801600519
40902018-10-306005191510.00524.00543.00509.02204397106633925121600519
40912018-10-316005191527.00548.90555.00522.009688552398530561600519
40922018-11-016005191555.00563.00585.50551.259810755810273281600519
40932018-11-026005191585.00599.90600.00572.248915752276951041600519
\n", "

4094 rows × 10 columns

\n", "
" ], "text/plain": [ " day STOCK_CODE open close maximum minimum volume \\\n", "0 2001-08-27 6005191 34.51 35.55 37.78 32.85 406318 \n", "1 2001-08-28 6005191 34.99 36.86 37.00 34.61 129647 \n", "2 2001-08-29 6005191 36.98 36.38 37.00 36.10 53252 \n", "3 2001-08-30 6005191 36.28 37.10 37.51 36.00 48013 \n", "4 2001-08-31 6005191 37.15 37.01 37.62 36.80 23231 \n", "... ... ... ... ... ... ... ... \n", "4089 2018-10-29 6005191 549.09 549.09 549.09 549.09 49076 \n", "4090 2018-10-30 6005191 510.00 524.00 543.00 509.02 204397 \n", "4091 2018-10-31 6005191 527.00 548.90 555.00 522.00 96885 \n", "4092 2018-11-01 6005191 555.00 563.00 585.50 551.25 98107 \n", "4093 2018-11-02 6005191 585.00 599.90 600.00 572.24 89157 \n", "\n", " TURNOVER place code \n", "0 1410347008 1 600519 \n", "1 463463008 1 600519 \n", "2 194689000 1 600519 \n", "3 177558000 1 600519 \n", "4 86231000 1 600519 \n", "... ... ... ... \n", "4089 2694714080 1 600519 \n", "4090 10663392512 1 600519 \n", "4091 5239853056 1 600519 \n", "4092 5581027328 1 600519 \n", "4093 5227695104 1 600519 \n", "\n", "[4094 rows x 10 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 提取STOCK_CODE的最后一位,1表示在上交所上市\n", "df_gzmt['place'] = df_gzmt['STOCK_CODE'] % 10\n", "df_gzmt['code'] = df_gzmt['STOCK_CODE'] // 10\n", "df_gzmt" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
daySTOCK_CODEopenclosemaximumminimumvolumeTURNOVERplacecodem_increaseprice_mean
02001-08-27600519134.5135.5537.7832.85406318141034700816005191.043471.042405
12001-08-28600519134.9936.8637.0034.6112964746346300816005191.873574.807038
22001-08-29600519136.9836.3837.0036.10532521946890001600519-0.603655.994141
32001-08-30600519136.2837.1037.5136.004801317755800016005190.823698.123425
42001-08-31600519137.1537.0137.6236.8023231862310001600519-0.143711.893590
.......................................
40892018-10-296005191549.09549.09549.09549.0949076269471408016005190.0054908.999918
40902018-10-306005191510.00524.00543.00509.0220439710663392512160051914.0052170.005000
40912018-10-316005191527.00548.90555.00522.00968855239853056160051921.9054083.222955
40922018-11-016005191555.00563.00585.50551.2598107558102732816005198.0056887.146972
40932018-11-026005191585.00599.90600.00572.24891575227695104160051914.9058634.712967
\n", "

4094 rows × 12 columns

\n", "
" ], "text/plain": [ " day STOCK_CODE open close maximum minimum volume \\\n", "0 2001-08-27 6005191 34.51 35.55 37.78 32.85 406318 \n", "1 2001-08-28 6005191 34.99 36.86 37.00 34.61 129647 \n", "2 2001-08-29 6005191 36.98 36.38 37.00 36.10 53252 \n", "3 2001-08-30 6005191 36.28 37.10 37.51 36.00 48013 \n", "4 2001-08-31 6005191 37.15 37.01 37.62 36.80 23231 \n", "... ... ... ... ... ... ... ... \n", "4089 2018-10-29 6005191 549.09 549.09 549.09 549.09 49076 \n", "4090 2018-10-30 6005191 510.00 524.00 543.00 509.02 204397 \n", "4091 2018-10-31 6005191 527.00 548.90 555.00 522.00 96885 \n", "4092 2018-11-01 6005191 555.00 563.00 585.50 551.25 98107 \n", "4093 2018-11-02 6005191 585.00 599.90 600.00 572.24 89157 \n", "\n", " TURNOVER place code m_increase price_mean \n", "0 1410347008 1 600519 1.04 3471.042405 \n", "1 463463008 1 600519 1.87 3574.807038 \n", "2 194689000 1 600519 -0.60 3655.994141 \n", "3 177558000 1 600519 0.82 3698.123425 \n", "4 86231000 1 600519 -0.14 3711.893590 \n", "... ... ... ... ... ... \n", "4089 2694714080 1 600519 0.00 54908.999918 \n", "4090 10663392512 1 600519 14.00 52170.005000 \n", "4091 5239853056 1 600519 21.90 54083.222955 \n", "4092 5581027328 1 600519 8.00 56887.146972 \n", "4093 5227695104 1 600519 14.90 58634.712967 \n", "\n", "[4094 rows x 12 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 计算涨幅,并赋值给新增列m_increase\n", "df_gzmt['m_increase'] = df_gzmt['close'] - df_gzmt['open']\n", "# 计算平均成交价,并赋值给price_mean\n", "df_gzmt['price_mean'] = df_gzmt['TURNOVER'] / df_gzmt['volume']\n", "df_gzmt" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "0 True\n", "1 True\n", "2 True\n", "3 True\n", "4 True\n", " ... \n", "4089 False\n", "4090 False\n", "4091 False\n", "4092 False\n", "4093 False\n", "Name: close, Length: 4094, dtype: bool" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 查看收盘价不高于500的数据\n", "df_gzmt['close'] <= 500" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
daySTOCK_CODEopenclosemaximumminimumvolumeTURNOVERplacecodem_increaseprice_mean
38962018-01-096005191752.21782.52783.00752.21645925001164544160051930.3177426.996284
38972018-01-106005191785.00785.71788.88773.4847714373199315216005190.7178215.893700
38992018-01-126005191773.77788.42788.80767.02459883577281776160051914.6577787.287466
39002018-01-156005191793.46785.37799.06779.025247341366454881600519-8.0978833.790483
39912018-06-046005191752.35781.97782.90745.88738065697540352160051929.6277196.167683
39922018-06-056005191786.50788.05794.70777.2352630413657857616005191.5578597.350865
39932018-06-066005191788.00785.75800.95782.304896938706167201600519-2.2579042.184239
39942018-06-076005191789.98780.97795.55778.903916830781374721600519-9.0178588.068627
39972018-06-126005191778.00799.19803.50776.50552874385043200160051921.1979314.182358
39982018-06-136005191800.00790.33802.62788.883524428034416801600519-9.6779543.799796
39992018-06-146005191790.00786.13793.80775.183616028305142241600519-3.8778277.495133
\n", "
" ], "text/plain": [ " day STOCK_CODE open close maximum minimum volume \\\n", "3896 2018-01-09 6005191 752.21 782.52 783.00 752.21 64592 \n", "3897 2018-01-10 6005191 785.00 785.71 788.88 773.48 47714 \n", "3899 2018-01-12 6005191 773.77 788.42 788.80 767.02 45988 \n", "3900 2018-01-15 6005191 793.46 785.37 799.06 779.02 52473 \n", "3991 2018-06-04 6005191 752.35 781.97 782.90 745.88 73806 \n", "3992 2018-06-05 6005191 786.50 788.05 794.70 777.23 52630 \n", "3993 2018-06-06 6005191 788.00 785.75 800.95 782.30 48969 \n", "3994 2018-06-07 6005191 789.98 780.97 795.55 778.90 39168 \n", "3997 2018-06-12 6005191 778.00 799.19 803.50 776.50 55287 \n", "3998 2018-06-13 6005191 800.00 790.33 802.62 788.88 35244 \n", "3999 2018-06-14 6005191 790.00 786.13 793.80 775.18 36160 \n", "\n", " TURNOVER place code m_increase price_mean \n", "3896 5001164544 1 600519 30.31 77426.996284 \n", "3897 3731993152 1 600519 0.71 78215.893700 \n", "3899 3577281776 1 600519 14.65 77787.287466 \n", "3900 4136645488 1 600519 -8.09 78833.790483 \n", "3991 5697540352 1 600519 29.62 77196.167683 \n", "3992 4136578576 1 600519 1.55 78597.350865 \n", "3993 3870616720 1 600519 -2.25 79042.184239 \n", "3994 3078137472 1 600519 -9.01 78588.068627 \n", "3997 4385043200 1 600519 21.19 79314.182358 \n", "3998 2803441680 1 600519 -9.67 79543.799796 \n", "3999 2830514224 1 600519 -3.87 78277.495133 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 逻辑运算常用的场景是按照某一条件进行筛选\n", "df_gzmt[df_gzmt['close'] >= 780]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
daySTOCK_CODEopenclosemaximumminimumvolumeTURNOVERplacecodem_increaseprice_mean
38952018-01-086005191735.02752.13756.50735.02522053899951728160051917.1174704.563318
39032018-01-186005191747.93750.74765.00744.0958979444303129616005192.8175332.428424
39042018-01-196005191752.90750.18758.90739.025092638180219841600519-2.7274971.958999
39132018-02-016005191767.00757.73767.30752.925058338464216161600519-9.2776041.785106
39402018-03-196005191743.97753.48753.99739.0038667289734854416005199.5174930.781907
39892018-05-316005191730.25751.13751.62730.24718535356222464160051920.8874544.173020
40032018-06-216005191765.22759.32778.89758.773918330138676481600519-5.9076917.735957
40062018-06-266005191765.31754.80769.88747.004657835169454401600519-10.5175506.579072
40202018-07-166005191752.30754.68764.11751.0332475245450160016005192.3875581.265589
\n", "
" ], "text/plain": [ " day STOCK_CODE open close maximum minimum volume \\\n", "3895 2018-01-08 6005191 735.02 752.13 756.50 735.02 52205 \n", "3903 2018-01-18 6005191 747.93 750.74 765.00 744.09 58979 \n", "3904 2018-01-19 6005191 752.90 750.18 758.90 739.02 50926 \n", "3913 2018-02-01 6005191 767.00 757.73 767.30 752.92 50583 \n", "3940 2018-03-19 6005191 743.97 753.48 753.99 739.00 38667 \n", "3989 2018-05-31 6005191 730.25 751.13 751.62 730.24 71853 \n", "4003 2018-06-21 6005191 765.22 759.32 778.89 758.77 39183 \n", "4006 2018-06-26 6005191 765.31 754.80 769.88 747.00 46578 \n", "4020 2018-07-16 6005191 752.30 754.68 764.11 751.03 32475 \n", "\n", " TURNOVER place code m_increase price_mean \n", "3895 3899951728 1 600519 17.11 74704.563318 \n", "3903 4443031296 1 600519 2.81 75332.428424 \n", "3904 3818021984 1 600519 -2.72 74971.958999 \n", "3913 3846421616 1 600519 -9.27 76041.785106 \n", "3940 2897348544 1 600519 9.51 74930.781907 \n", "3989 5356222464 1 600519 20.88 74544.173020 \n", "4003 3013867648 1 600519 -5.90 76917.735957 \n", "4006 3516945440 1 600519 -10.51 75506.579072 \n", "4020 2454501600 1 600519 2.38 75581.265589 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 通过对多个布尔类型先进行逻辑运算(或运算|、与运算&、非运算~)\n", "df_gzmt[(df_gzmt['close'] >= 750) & (df_gzmt['close'] <= 760)]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
daySTOCK_CODEopenclosemaximumminimumvolumeTURNOVERplacecodem_increaseprice_mean
26162012-10-086005191245.30239.94245.30237.48236085665275681600519-5.3623997.270756
28532013-10-086005191135.76136.67137.40134.195686777329416016005190.9113598.293562
30982014-10-086005191161.00160.73161.10157.80456537250302241600519-0.2715881.327054
33422015-10-086005191198.78197.52201.58197.00343926841380481600519-1.2619892.360084
40742018-10-086005191715.41686.15719.00686.158274557987215361600519-29.2670079.419131
\n", "
" ], "text/plain": [ " day STOCK_CODE open close maximum minimum volume \\\n", "2616 2012-10-08 6005191 245.30 239.94 245.30 237.48 23608 \n", "2853 2013-10-08 6005191 135.76 136.67 137.40 134.19 56867 \n", "3098 2014-10-08 6005191 161.00 160.73 161.10 157.80 45653 \n", "3342 2015-10-08 6005191 198.78 197.52 201.58 197.00 34392 \n", "4074 2018-10-08 6005191 715.41 686.15 719.00 686.15 82745 \n", "\n", " TURNOVER place code m_increase price_mean \n", "2616 566527568 1 600519 -5.36 23997.270756 \n", "2853 773294160 1 600519 0.91 13598.293562 \n", "3098 725030224 1 600519 -0.27 15881.327054 \n", "3342 684138048 1 600519 -1.26 19892.360084 \n", "4074 5798721536 1 600519 -29.26 70079.419131 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 如果是不连续条件呢?isin可以方便地解决这个问题\n", "df_gzmt[df_gzmt['day'].isin([\n", " '2012-10-08',\n", " '2013-10-08',\n", " '2014-10-08',\n", " '2015-10-08',\n", " '2016-10-08',\n", " '2017-10-08',\n", " '2018-10-08',\n", "])]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "182.435229604299" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# axis=0表示进行纵向方向的计算,axis=1表示进行横向方向的计算\n", "df_gzmt['close'].mean(axis=0, skipna=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "489" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 求能够获取到的最小值和最大值的索引值\n", "df_gzmt['close'].idxmax(axis=0)\n", "df_gzmt['open'].idxmin(axis=0)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Index(['STOCK_CODE', 'open', 'close', 'maximum', 'minimum', 'volume',\n", " 'TURNOVER'],\n", " dtype='object')" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_mt = pd.read_csv(file, encoding='utf-8', index_col=0)\n", "df_mt\n", "df_mt.index\n", "df_mt.index.name\n", "df_mt.columns" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "STOCK_CODE 6.005191e+06\n", "open 3.451000e+01\n", "close 3.555000e+01\n", "maximum 3.778000e+01\n", "minimum 3.285000e+01\n", "volume 4.063180e+05\n", "TURNOVER 1.410347e+09\n", "Name: 2001-08-27, dtype: float64\n", "type is \n", "2001-08-27 GZMT close price is 35.55 yuan\n" ] } ], "source": [ "# 通过索引名字进行遍历,这里仅做演示,程序里增加了break,只打印第一轮循环\n", "for day in df_mt.index:\n", " row = df_mt.loc[day]\n", " print(row)\n", " print('type is', type(row))\n", " print(\"{0} GZMT close price is {1} yuan\".format(day, row['close']))\n", " break" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "day\n", "2001-08-27 6005191\n", "2001-08-28 6005191\n", "2001-08-29 6005191\n", "2001-08-30 6005191\n", "2001-08-31 6005191\n", " ... \n", "2018-10-29 6005191\n", "2018-10-30 6005191\n", "2018-10-31 6005191\n", "2018-11-01 6005191\n", "2018-11-02 6005191\n", "Name: STOCK_CODE, Length: 4094, dtype: int64\n", "day\n", "2001-08-27 6005191\n", "2001-08-28 6005191\n", "2001-08-29 6005191\n", "2001-08-30 6005191\n", "2001-08-31 6005191\n", " ... \n", "2018-10-29 6005191\n", "2018-10-30 6005191\n", "2018-10-31 6005191\n", "2018-11-01 6005191\n", "2018-11-02 6005191\n", "Name: STOCK_CODE, Length: 4094, dtype: int64\n", "type is \n" ] } ], "source": [ "# 列循环和行循环的规律非常相似,无非就是循环对象变更为列名\n", "for col in df_mt.columns:\n", " print(df_mt[col])\n", " print(df_mt.loc[:, col])\n", " print('type is', type(df_mt[col]))\n", " break" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2001-08-27\n", "STOCK_CODE 6.005191e+06\n", "open 3.451000e+01\n", "close 3.555000e+01\n", "maximum 3.778000e+01\n", "minimum 3.285000e+01\n", "volume 4.063180e+05\n", "TURNOVER 1.410347e+09\n", "Name: 2001-08-27, dtype: float64\n", "\n" ] } ], "source": [ "# 内置的itertuples()和iterrows()等方法 ,是类似于迭代器的yield用法,一次产生一行的生成器\n", "for index, row in df_mt.iterrows():\n", " print(index)\n", " print(row)\n", " print(type(row))\n", " break" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pandas(Index='2001-08-27', STOCK_CODE=6005191, open=34.51, close=35.55, maximum=37.78, minimum=32.85, volume=406318, TURNOVER=1410347008) \n", "2001-08-27\n", "34.51\n", "35.55\n", "1410347008\n", "(406318,)\n" ] } ], "source": [ "# 和iterrows()类似,itertuples()也是一种行循环方法,把每一行数据返回成 一个namedtuple,并且行的索引值作为元组的第一个元素。\n", "for sub_tuple in df_mt.itertuples():\n", " print(sub_tuple, type(sub_tuple))\n", " print(sub_tuple.Index)\n", " print(sub_tuple.open)\n", " print(getattr(sub_tuple, 'close'))\n", " print(sub_tuple[-1])\n", " print(sub_tuple[-2:-1])\n", " break" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "STOCK_CODE\n", "day\n", "2001-08-27 6005191\n", "2001-08-28 6005191\n", "2001-08-29 6005191\n", "2001-08-30 6005191\n", "2001-08-31 6005191\n", " ... \n", "2018-10-29 6005191\n", "2018-10-30 6005191\n", "2018-10-31 6005191\n", "2018-11-01 6005191\n", "2018-11-02 6005191\n", "Name: STOCK_CODE, Length: 4094, dtype: int64\n", "\n", "open\n", "day\n", "2001-08-27 34.51\n", "2001-08-28 34.99\n", "2001-08-29 36.98\n", "2001-08-30 36.28\n", "2001-08-31 37.15\n", " ... \n", "2018-10-29 549.09\n", "2018-10-30 510.00\n", "2018-10-31 527.00\n", "2018-11-01 555.00\n", "2018-11-02 585.00\n", "Name: open, Length: 4094, dtype: float64\n", "\n", "close\n", "day\n", "2001-08-27 35.55\n", "2001-08-28 36.86\n", "2001-08-29 36.38\n", "2001-08-30 37.10\n", "2001-08-31 37.01\n", " ... \n", "2018-10-29 549.09\n", "2018-10-30 524.00\n", "2018-10-31 548.90\n", "2018-11-01 563.00\n", "2018-11-02 599.90\n", "Name: close, Length: 4094, dtype: float64\n", "\n", "maximum\n", "day\n", "2001-08-27 37.78\n", "2001-08-28 37.00\n", "2001-08-29 37.00\n", "2001-08-30 37.51\n", "2001-08-31 37.62\n", " ... \n", "2018-10-29 549.09\n", "2018-10-30 543.00\n", "2018-10-31 555.00\n", "2018-11-01 585.50\n", "2018-11-02 600.00\n", "Name: maximum, Length: 4094, dtype: float64\n", "\n", "minimum\n", "day\n", "2001-08-27 32.85\n", "2001-08-28 34.61\n", "2001-08-29 36.10\n", "2001-08-30 36.00\n", "2001-08-31 36.80\n", " ... \n", "2018-10-29 549.09\n", "2018-10-30 509.02\n", "2018-10-31 522.00\n", "2018-11-01 551.25\n", "2018-11-02 572.24\n", "Name: minimum, Length: 4094, dtype: float64\n", "\n", "volume\n", "day\n", "2001-08-27 406318\n", "2001-08-28 129647\n", "2001-08-29 53252\n", "2001-08-30 48013\n", "2001-08-31 23231\n", " ... \n", "2018-10-29 49076\n", "2018-10-30 204397\n", "2018-10-31 96885\n", "2018-11-01 98107\n", "2018-11-02 89157\n", "Name: volume, Length: 4094, dtype: int64\n", "\n", "TURNOVER\n", "day\n", "2001-08-27 1410347008\n", "2001-08-28 463463008\n", "2001-08-29 194689000\n", "2001-08-30 177558000\n", "2001-08-31 86231000\n", " ... \n", "2018-10-29 2694714080\n", "2018-10-30 10663392512\n", "2018-10-31 5239853056\n", "2018-11-01 5581027328\n", "2018-11-02 5227695104\n", "Name: TURNOVER, Length: 4094, dtype: int64\n", "\n" ] } ], "source": [ "# iteritems()是一种列循环的方法,返回列名和列的内容\n", "for col_name, column in df_mt.iteritems():\n", " print(col_name)\n", " print(column)\n", " print(type(column))\n", " break" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 手动实现累计和的运算\n", "%matplotlib inline\n", "import matplotlib as plt\n", "# 第一步:逐行计算累积值\n", "df = pd.read_csv(file, encoding='utf-8', index_col=0)\n", "cum_sum = []\n", "last_cum = 0\n", "for index, row in df.iterrows():\n", " tmp = row['TURNOVER'] + last_cum\n", " cum_sum.append(tmp)\n", " last_cum = tmp\n", "# 新增一列,并用上一步的结果赋值\n", "df['Turnover_CUMSUM'] = cum_sum\n", "# 第三步:可视化效果\n", "df['Turnover_CUMSUM'].plot()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" }, "pycharm": { "stem_cell": { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [] } } }, "nbformat": 4, "nbformat_minor": 4 }