{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from sklearn.datasets import load_boston\n", "from sklearn.model_selection import TimeSeriesSplit as skTimeSeriesSplit" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class TimeSeriesSplit():\n", " def __init__(self, n_splits=5):\n", " self.n_splits = n_splits\n", "\n", " def split(self, X, y):\n", " indices = np.arange(X.shape[0])\n", " n_folds = self.n_splits + 1\n", " test_size = X.shape[0] // n_folds\n", " test_starts = np.arange(test_size + X.shape[0] % n_folds,\n", " X.shape[0], test_size)\n", " for test_start in test_starts:\n", " yield (indices[:test_start],\n", " indices[test_start:test_start + test_size])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "X, y = load_boston(return_X_y=True)\n", "cv1 = TimeSeriesSplit(n_splits=5)\n", "cv2 = skTimeSeriesSplit(n_splits=5)\n", "for (train1, test1), (train2, test2) in zip(cv1.split(X, y), cv2.split(X, y)):\n", " assert np.array_equal(train1, train2)\n", " assert np.array_equal(test1, test2)" ] } ], "metadata": { "kernelspec": { "display_name": "dev", "language": "python", "name": "dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }