{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "from pathlib import Path\n", "\n", "import qlib\n", "import pandas as pd\n", "from qlib.config import REG_CN\n", "from qlib.contrib.model.gbdt import LGBModel\n", "from qlib.contrib.estimator.handler import Alpha158\n", "from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n", "from qlib.contrib.evaluate import (\n", " backtest as normal_backtest,\n", " risk_analysis,\n", ")\n", "from qlib.utils import exists_qlib_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "# use default data\n", "# NOTE: need to download data from remote: python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data\n", "provider_uri = \"~/.qlib/qlib_data/cn_data\" # target_dir\n", "if not exists_qlib_data(provider_uri):\n", " print(f\"Qlib data is not found in {provider_uri}\")\n", " sys.path.append(str(Path.cwd().parent.joinpath(\"scripts\")))\n", " from get_data import GetData\n", " GetData().qlib_data_cn(target_dir=provider_uri)\n", "qlib.init(provider_uri=provider_uri, region=REG_CN)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MARKET = \"csi300\"\n", "BENCHMARK = \"SH000300\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# train model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "###################################\n", "# train model\n", "###################################\n", "DATA_HANDLER_CONFIG = {\n", " \"dropna_label\": True,\n", " \"start_date\": \"2008-01-01\",\n", " \"end_date\": \"2020-08-01\",\n", " \"market\": MARKET,\n", "}\n", "\n", "TRAINER_CONFIG = {\n", " \"train_start_date\": \"2008-01-01\",\n", " \"train_end_date\": \"2014-12-31\",\n", " \"validate_start_date\": \"2015-01-01\",\n", " \"validate_end_date\": \"2016-12-31\",\n", " \"test_start_date\": \"2017-01-01\",\n", " \"test_end_date\": \"2020-08-01\",\n", "}\n", "\n", "# use default DataHandler\n", "# custom DataHandler, refer to: TODO: DataHandler api url\n", "x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(**TRAINER_CONFIG)\n", "\n", "\n", "MODEL_CONFIG = {\n", " \"loss\": \"mse\",\n", " \"colsample_bytree\": 0.8879,\n", " \"learning_rate\": 0.0421,\n", " \"subsample\": 0.8789,\n", " \"lambda_l1\": 205.6999,\n", " \"lambda_l2\": 580.9768,\n", " \"max_depth\": 8,\n", " \"num_leaves\": 210,\n", " \"num_threads\": 20,\n", "}\n", "# use default model\n", "# custom Model, refer to: TODO: Model api url\n", "model = LGBModel(**MODEL_CONFIG)\n", "model.fit(x_train, y_train, x_validate, y_validate)\n", "_pred = model.predict(x_test)\n", "_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)\n", "\n", "# backtest requires pred_score\n", "pred_score = pd.DataFrame(index=_pred.index)\n", "pred_score[\"score\"] = _pred.iloc(axis=1)[0]\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# backtest" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "###################################\n", "# backtest\n", "###################################\n", "STRATEGY_CONFIG = {\n", " \"topk\": 50,\n", " \"n_drop\": 5}\n", "BACKTEST_CONFIG = {\n", " \"verbose\": False,\n", " \"limit_threshold\": 0.095,\n", " \"account\": 100000000,\n", " \"benchmark\": BENCHMARK,\n", " \"deal_price\": \"close\",\n", " \"open_cost\": 0.0005,\n", " \"close_cost\": 0.0015,\n", " \"min_cost\": 5,\n", " \n", "}\n", "\n", "# use default strategy\n", "# custom Strategy, refer to: TODO: Strategy api url\n", "strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)\n", "report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# analyze" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "###################################\n", "# analyze\n", "# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb\n", "###################################\n", "analysis = dict()\n", "analysis[\"excess_return_without_cost\"] = risk_analysis(report_normal[\"return\"] - report_normal[\"bench\"])\n", "analysis[\"excess_return_with_cost\"] = risk_analysis(\n", " report_normal[\"return\"] - report_normal[\"bench\"] - report_normal[\"cost\"]\n", ")\n", "analysis_df = pd.concat(analysis) # type: pd.DataFrame\n", "print(analysis_df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# analyze graphs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from qlib.contrib.report import analysis_model, analysis_position\n", "from qlib.data import D\n", "pred_df_dates = pred_score.index.get_level_values(level='datetime')\n", "report_normal_df = report_normal\n", "positions = positions_normal\n", "pred_df = pred_score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## analysis position" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n", "stock_ret.columns = ['label']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### report" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "analysis_position.report_graph(report_normal_df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### risk analysis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "analysis_position.risk_analysis_graph(analysis_df, report_normal_df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## analysis model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())\n", "label_df.columns = ['label']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### score IC" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)\n", "analysis_position.score_ic_graph(pred_label)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### model performance" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "analysis_model.model_performance_graph(pred_label)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }