{ "cells": [ { "cell_type": "markdown", "id": "7667d6b3-a08e-423d-b609-1933290de3a8", "metadata": {}, "source": [ "# Part 0: Import 套件" ] }, { "cell_type": "code", "execution_count": null, "id": "cbe07c5d-e659-4ef8-902c-a7e676989099", "metadata": {}, "outputs": [], "source": [ "# 下載資料套件\n", "import requests as r\n", "\n", "#資料處理套件\n", "import pandas as pd\n", "import json\n", "from datetime import datetime, date" ] }, { "cell_type": "markdown", "id": "24e7dfd0-bbd3-4b40-9ae6-436c8f70edb9", "metadata": {}, "source": [ "# Part 1: 下載單日三大法人買賣超日報\n", "- 資料來源:[證交所 三大法人買賣超日報](https://www.twse.com.tw/zh/page/trading/fund/T86.html)" ] }, { "cell_type": "code", "execution_count": null, "id": "65ffd525-69bf-4a7a-af44-365826cb19c1", "metadata": {}, "outputs": [], "source": [ "url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210827&selectType=ALL'" ] }, { "cell_type": "code", "execution_count": null, "id": "886236ed-116c-4506-83dc-cc830ec55879", "metadata": {}, "outputs": [], "source": [ "res = r.get(url)\n", "res" ] }, { "cell_type": "code", "execution_count": null, "id": "3cc28f9a-4402-42a8-ba3e-9e54e45ee42f", "metadata": {}, "outputs": [], "source": [ "inv_json = res.json()\n", "inv_json" ] }, { "cell_type": "code", "execution_count": null, "id": "8c1d8687-f70e-47da-b061-cb229e394d31", "metadata": {}, "outputs": [], "source": [ "inv_json['data']" ] }, { "cell_type": "code", "execution_count": null, "id": "6f29debe-a0d8-4bc9-8fe3-7cec6f6d2a53", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame.from_dict(inv_json['data'])\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "d2f4d7fe-c38b-436e-ae66-d083585c4644", "metadata": {}, "outputs": [], "source": [ "inv_json['fields']" ] }, { "cell_type": "code", "execution_count": null, "id": "ba1fb4de-ada7-4ca1-b45a-4c25eb7dcd96", "metadata": {}, "outputs": [], "source": [ "df.columns = inv_json['fields']" ] }, { "cell_type": "code", "execution_count": null, "id": "4a5a3160-557a-441a-8612-016d5372a284", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "markdown", "id": "9f569843-df50-46b1-a3f3-170c95923980", "metadata": {}, "source": [ "# Part 2: 下載多天以上的三大法人買賣超日報" ] }, { "cell_type": "code", "execution_count": null, "id": "f5c3b9a1-1abe-406b-b51f-10d141176c0f", "metadata": {}, "outputs": [], "source": [ "url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210827&selectType=ALL'" ] }, { "cell_type": "code", "execution_count": null, "id": "816dd66f-f27e-463e-9a4b-a927e0eb8e12", "metadata": {}, "outputs": [], "source": [ "pd.date_range('2021-08-23', '2021-08-27', freq='D').strftime(\"%Y%m%d\").tolist()" ] }, { "cell_type": "code", "execution_count": null, "id": "254ecedc-b1df-42ac-8d85-9b6c46c6562b", "metadata": {}, "outputs": [], "source": [ "date_list = pd.date_range('2021-08-23', '2021-08-27', freq='D').strftime(\"%Y%m%d\").tolist()\n", "\n", "for day in date_list:\n", " print(day)" ] }, { "cell_type": "code", "execution_count": null, "id": "559ecfba-37df-4b11-a5eb-8aedc83a1b55", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame()\n", "df" ] }, { "cell_type": "code", "execution_count": null, "id": "c5c1cbfc-2ed2-4c14-8bfb-f469a4684846", "metadata": {}, "outputs": [], "source": [ "for day in date_list:\n", " url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=24'\n", " res = r.get(url)\n", " inv_json = res.json()\n", " df_inv = pd.DataFrame.from_dict(inv_json['data'])\n", " df = df.append(df_inv, ignore_index = True) " ] }, { "cell_type": "code", "execution_count": null, "id": "4d3f855a-15fc-46b9-9c9d-88cc1954e6d2", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "11c853ef-a97a-4028-90df-5a4b7ef6f24c", "metadata": {}, "outputs": [], "source": [ "df.columns = ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']" ] }, { "cell_type": "code", "execution_count": null, "id": "84341ed2-150f-48f6-b6a0-3588048e7202", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "markdown", "id": "d20b3810-e2b7-4e66-9b1c-fee0eebd4152", "metadata": {}, "source": [ "## Part 2.1: 沒日期怎麼分辦是哪一天的資料?!" ] }, { "cell_type": "code", "execution_count": null, "id": "3ba50237-1e28-4dbd-b04d-86f7c9d1ee7d", "metadata": {}, "outputs": [], "source": [ "df[df['證券代號'] == '2330']" ] }, { "cell_type": "code", "execution_count": null, "id": "09236cfb-43e8-4006-8a50-d89af5a3d197", "metadata": {}, "outputs": [], "source": [ "url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210827&selectType=ALL'\n", "res = r.get(url)\n", "inv_json = res.json()\n", "df = pd.DataFrame.from_dict(inv_json['data'])\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "0e39173a-fb8f-4c38-91c9-b17cbae48443", "metadata": {}, "outputs": [], "source": [ "df.insert(0, '日期', datetime(2021, 8, 27))" ] }, { "cell_type": "code", "execution_count": null, "id": "e31a2808-a7ab-4e73-962e-b70a6b7e60af", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "d8459fdd-7d51-4818-a7cc-464d835cbbbe", "metadata": {}, "outputs": [], "source": [ "for day in date_list:\n", " print(day, int(day[:4]), int(day[4:6]), int(day[6:]), datetime(int(day[:4]), int(day[4:6]), int(day[6:])))" ] }, { "cell_type": "code", "execution_count": null, "id": "0dd37283-980e-4b4b-be29-ee9bfa4ea187", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame()\n", "for day in date_list:\n", " url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=ALL'\n", " res = r.get(url)\n", " inv_json = res.json()\n", " df_inv = pd.DataFrame.from_dict(inv_json['data'])\n", " df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:])))\n", " df = df.append(df_inv, ignore_index = True)" ] }, { "cell_type": "code", "execution_count": null, "id": "4eb1c663-e486-4560-9770-600b541e2bcd", "metadata": {}, "outputs": [], "source": [ "df.head(100)" ] }, { "cell_type": "code", "execution_count": null, "id": "369d0adc-57fb-4ff6-a6d7-0c638dce2147", "metadata": {}, "outputs": [], "source": [ "df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']" ] }, { "cell_type": "code", "execution_count": null, "id": "323cc477-c904-4ebb-8571-ab58ee2a93c1", "metadata": {}, "outputs": [], "source": [ "df.head(100)" ] }, { "cell_type": "code", "execution_count": null, "id": "bd07671d-5703-4937-a5d6-42c256171158", "metadata": {}, "outputs": [], "source": [ "df[df['證券代號'] == '2330']" ] }, { "cell_type": "code", "execution_count": null, "id": "b7f87eea-3b2e-45c9-9ff4-821aded8333e", "metadata": {}, "outputs": [], "source": [ "def get_tw_investor_data(start_year, start_month, start_day, end_year, end_month, end_day):\n", " start_date = str(date(start_year, start_month, start_day))\n", " end_date = str(date(end_year, end_month, end_day))\n", "\n", " date_list = pd.date_range(start_date, end_date, freq='D').strftime(\"%Y%m%d\").tolist()\n", " \n", " df = pd.DataFrame()\n", " for day in date_list:\n", " url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=24'\n", " res = r.get(url)\n", " inv_json = res.json()\n", " df_inv = pd.DataFrame.from_dict(inv_json['data'])\n", " #加入日期\n", " df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:])))\n", " df = df.append(df_inv, ignore_index = True)\n", " \n", " df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": null, "id": "c906c81b-d1c2-476a-98b0-a6d928282673", "metadata": {}, "outputs": [], "source": [ "df = get_tw_investor_data(start_year = 2021, \n", " start_month = 8, \n", " start_day = 23, \n", " end_year = 2021, \n", " end_month = 8, \n", " end_day = 27)\n", "df" ] }, { "cell_type": "markdown", "id": "c1868dc8-5bc8-4a5f-b452-84734efa242c", "metadata": {}, "source": [ "## Part 2.2: 怎麼下載不到資料?!" ] }, { "cell_type": "code", "execution_count": null, "id": "4ddd5968-8f15-4c87-8ac8-9e1f18f13f5c", "metadata": {}, "outputs": [], "source": [ "df = get_tw_investor_data(start_year = 2021, \n", " start_month = 8, \n", " start_day = 22, \n", " end_year = 2021, \n", " end_month = 8, \n", " end_day = 27) \n", "\n", "df" ] }, { "cell_type": "code", "execution_count": null, "id": "37ad2b6e-952c-4852-940a-e789062de5c0", "metadata": {}, "outputs": [], "source": [ "url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210822&selectType=ALL'\n", "res = r.get(url)\n", "res" ] }, { "cell_type": "code", "execution_count": null, "id": "0797672f-03b8-4204-bd05-3cc76f73583c", "metadata": {}, "outputs": [], "source": [ "res.json()" ] }, { "cell_type": "code", "execution_count": null, "id": "879be36a-7848-4922-93c9-9bf6b443c7a5", "metadata": {}, "outputs": [], "source": [ "def get_tw_investor_data(start_year, start_month, start_day, end_year, end_month, end_day, stock_code):\n", " start_date = str(date(start_year, start_month, start_day))\n", " end_date = str(date(end_year, end_month, end_day))\n", "\n", " date_list = pd.date_range(start_date, end_date, freq='D').strftime(\"%Y%m%d\").tolist()\n", " \n", " df = pd.DataFrame()\n", " for day in date_list:\n", " url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=ALL'\n", " res = r.get(url)\n", " inv_json = res.json()\n", " # 加入非開盤日就跳過的下載的判斷式\n", " if res.json()['stat'] == '很抱歉,沒有符合條件的資料!':\n", " continue\n", " else:\n", " df_inv = pd.DataFrame.from_dict(inv_json['data'])\n", " df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:])))\n", " df = df.append(df_inv, ignore_index = True)\n", " \n", " df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": null, "id": "eac53c6c-ef13-476f-9ebb-7de577007039", "metadata": {}, "outputs": [], "source": [ "df = get_tw_investor_data(start_year = 2021, \n", " start_month = 8, \n", " start_day = 22, \n", " end_year = 2021, \n", " end_month = 8, \n", " end_day = 27, \n", " stock_code = None)\n", "df" ] }, { "cell_type": "markdown", "id": "2efaa83e-b339-4a5a-b194-8d20f72f3325", "metadata": {}, "source": [ "## Part 2.3: 想篩選特定股票怎麼做?" ] }, { "cell_type": "code", "execution_count": null, "id": "63a01fe8-718d-4014-aee4-278e12491b67", "metadata": {}, "outputs": [], "source": [ "df[df['證券代號'] == '2330']" ] }, { "cell_type": "code", "execution_count": null, "id": "a120d643-8c63-49e6-8fe7-cb3e1a2e0d35", "metadata": {}, "outputs": [], "source": [ "def get_tw_investor_data(start_year, start_month, start_day, end_year, end_month, end_day, stock_code):\n", " start_date = str(date(start_year, start_month, start_day))\n", " end_date = str(date(end_year, end_month, end_day))\n", "\n", " date_list = pd.date_range(start_date, end_date, freq='D').strftime(\"%Y%m%d\").tolist()\n", " \n", " df = pd.DataFrame()\n", " for day in date_list:\n", " url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=ALL'\n", " res = r.get(url)\n", " inv_json = res.json()\n", " if res.json()['stat'] == '很抱歉,沒有符合條件的資料!':\n", " continue\n", " else:\n", " df_inv = pd.DataFrame.from_dict(inv_json['data'])\n", " df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:])))\n", " df = df.append(df_inv, ignore_index = True)\n", " \n", " df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數']\n", " \n", " # 加入股票代碼篩選\n", " if stock_code == None:\n", " pass\n", " else:\n", " df = df[df['證券代號'] == str(stock_code)]\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": null, "id": "6b4bd7f1-92e4-4887-93be-39e7fe196df8", "metadata": {}, "outputs": [], "source": [ "df = get_tw_investor_data(start_year = 2021, \n", " start_month = 8, \n", " start_day = 22, \n", " end_year = 2021, \n", " end_month = 8, \n", " end_day = 27, \n", " stock_code = 2330)\n", "df" ] }, { "cell_type": "markdown", "id": "d708845d-0f57-4a1a-85a4-65eb4d21797c", "metadata": {}, "source": [ "## Part 2.4: 資料轉型 - 把數字資料從字串(string) 轉成浮點數 (float)" ] }, { "cell_type": "code", "execution_count": null, "id": "309307b6-380c-4a9b-96da-1f974cb17745", "metadata": {}, "outputs": [], "source": [ "for col in range(3, 20):\n", " for row in range(df.shape[0]):\n", " df.iloc[row, col] = float(df.iloc[row,col].replace(',', ''))" ] }, { "cell_type": "code", "execution_count": null, "id": "dd347aba-fbcc-4cce-a42d-5d17ad8c9e79", "metadata": {}, "outputs": [], "source": [ "df" ] }, { "cell_type": "markdown", "id": "697aaae9-5ddb-4c7b-a131-393b8a77cdc8", "metadata": {}, "source": [ "# Part 3: 另存成csv檔" ] }, { "cell_type": "code", "execution_count": null, "id": "d230c70d-732a-43f4-bab1-71bf61e4070a", "metadata": {}, "outputs": [], "source": [ "df.to_csv('investor.csv')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }