{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Bayesian Temporal Regularized Tensor Factorization\n", "\n", "**Published**: December 27, 2020\n", "\n", "**Author**: Xinyu Chen [[**GitHub homepage**](https://github.com/xinychen)]\n", "\n", "**Download**: This Jupyter notebook is at our GitHub repository. If you want to evaluate the code, please download the notebook from the [**transdim**](https://github.com/xinychen/transdim/blob/master/predictor/BTRTF.ipynb) repository.\n", "\n", "This notebook shows how to implement the Bayesian Temporal Regularized Tensor Factorization (BTRTF), a fully Bayesian matrix factorization model, on some real-world data sets. To overcome the missing data problem in multivariate time series, BTRTF takes into account both low-rank matrix structure and time series autoregression. For an in-depth discussion of BTRTF, please see [1].\n", "\n", "
\n", "\n", "[1] Xinyu Chen, Lijun Sun (2019). Bayesian temporal factorization for multidimensional time series prediction. arXiv:1910.06366. [PDF] \n", "\n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from numpy.linalg import inv as inv\n", "from numpy.random import normal as normrnd\n", "from numpy.random import multivariate_normal as mvnrnd\n", "from scipy.linalg import khatri_rao as kr_prod\n", "from scipy.stats import wishart\n", "from scipy.stats import invwishart\n", "from numpy.linalg import solve as solve\n", "from numpy.linalg import cholesky as cholesky_lower\n", "from scipy.linalg import cholesky as cholesky_upper\n", "from scipy.linalg import solve_triangular as solve_ut\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def mvnrnd_pre(mu, Lambda):\n", " src = normrnd(size = (mu.shape[0],))\n", " return solve_ut(cholesky_upper(Lambda, overwrite_a = True, check_finite = False), \n", " src, lower = False, check_finite = False, overwrite_b = True) + mu\n", "\n", "def cov_mat(mat, mat_bar):\n", " mat = mat - mat_bar\n", " return mat.T @ mat\n", "\n", "def ten2mat(tensor, mode):\n", " return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order = 'F')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def sample_factor_u(tau_sparse_tensor, tau_ind, U, V, X, beta0 = 1):\n", " \"\"\"Sampling M-by-R factor matrix U and its hyperparameters (mu_u, Lambda_u).\"\"\"\n", " \n", " dim1, rank = U.shape\n", " U_bar = np.mean(U, axis = 0)\n", " temp = dim1 / (dim1 + beta0)\n", " var_mu_hyper = temp * U_bar\n", " var_U_hyper = inv(np.eye(rank) + cov_mat(U, U_bar) + temp * beta0 * np.outer(U_bar, U_bar))\n", " var_Lambda_hyper = wishart.rvs(df = dim1 + rank, scale = var_U_hyper)\n", " var_mu_hyper = mvnrnd_pre(var_mu_hyper, (dim1 + beta0) * var_Lambda_hyper)\n", "\n", " var1 = kr_prod(X, V).T\n", " var2 = kr_prod(var1, var1)\n", " var3 = (var2 @ ten2mat(tau_ind, 0).T).reshape([rank, rank, dim1]) + var_Lambda_hyper[:, :, None]\n", " var4 = var1 @ ten2mat(tau_sparse_tensor, 0).T + (var_Lambda_hyper @ var_mu_hyper)[:, None]\n", " for i in range(dim1):\n", " U[i, :] = mvnrnd_pre(solve(var3[:, :, i], var4[:, i]), var3[:, :, i])\n", " \n", " return U" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def sample_factor_v(tau_sparse_tensor, tau_ind, U, V, X, beta0 = 1):\n", " \"\"\"Sampling N-by-R factor matrix V and its hyperparameters (mu_v, Lambda_v).\"\"\"\n", " \n", " dim2, rank = V.shape\n", " V_bar = np.mean(V, axis = 0)\n", " temp = dim2 / (dim2 + beta0)\n", " var_mu_hyper = temp * V_bar\n", " var_V_hyper = inv(np.eye(rank) + cov_mat(V, V_bar) + temp * beta0 * np.outer(V_bar, V_bar))\n", " var_Lambda_hyper = wishart.rvs(df = dim2 + rank, scale = var_V_hyper)\n", " var_mu_hyper = mvnrnd_pre(var_mu_hyper, (dim2 + beta0) * var_Lambda_hyper)\n", "\n", " var1 = kr_prod(X, U).T\n", " var2 = kr_prod(var1, var1)\n", " var3 = (var2 @ ten2mat(tau_ind, 1).T).reshape([rank, rank, dim2]) + var_Lambda_hyper[:, :, None]\n", " var4 = var1 @ ten2mat(tau_sparse_tensor, 1).T + (var_Lambda_hyper @ var_mu_hyper)[:, None]\n", " for j in range(dim2):\n", " V[j, :] = mvnrnd_pre(solve(var3[:, :, j], var4[:, j]), var3[:, :, j])\n", " \n", " return V" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def sample_theta(X, theta, Lambda_x, time_lags, beta0 = 1):\n", " \n", " dim, rank = X.shape\n", " d = time_lags.shape[0]\n", " tmax = np.max(time_lags)\n", " theta_bar = np.mean(theta, axis = 0)\n", " temp = d / (d + beta0)\n", " var_theta_hyper = inv(np.eye(rank) + cov_mat(theta, theta_bar) \n", " + temp * beta0 * np.outer(theta_bar, theta_bar))\n", " var_Lambda_hyper = wishart.rvs(df = d + rank, scale = var_theta_hyper)\n", " var_mu_hyper = mvnrnd_pre(temp * theta_bar, (d + beta0) * var_Lambda_hyper)\n", " \n", " for k in range(d):\n", " theta0 = theta.copy()\n", " theta0[k, :] = 0\n", " mat0 = np.zeros((dim - tmax, rank))\n", " for L in range(d):\n", " mat0 += X[tmax - time_lags[L] : dim - time_lags[L], :] @ np.diag(theta0[L, :])\n", " varPi = X[tmax : dim, :] - mat0\n", " var0 = X[tmax - time_lags[k] : dim - time_lags[k], :]\n", " var = np.einsum('ij, jk, ik -> j', var0, Lambda_x, varPi)\n", " var_Lambda = np.einsum('ti, tj, ij -> ij', var0, var0, Lambda_x) + var_Lambda_hyper\n", " theta[k, :] = mvnrnd_pre(solve(var_Lambda, var + var_Lambda_hyper @ var_mu_hyper), var_Lambda)\n", " \n", " return theta" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def sample_Lambda_x(X, theta, time_lags):\n", " \n", " dim, rank = X.shape\n", " d = time_lags.shape[0]\n", " tmax = np.max(time_lags)\n", " mat = X[: tmax, :].T @ X[: tmax, :]\n", " temp = np.zeros((dim - tmax, rank, d))\n", " for k in range(d):\n", " temp[:, :, k] = X[tmax - time_lags[k] : dim - time_lags[k], :]\n", " new_mat = X[tmax : dim, :] - np.einsum('kr, irk -> ir', theta, temp)\n", " Lambda_x = wishart.rvs(df = dim + rank, scale = inv(np.eye(rank) + mat + new_mat.T @ new_mat))\n", " \n", " return Lambda_x" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def sample_factor_x(tau_sparse_tensor, tau_ind, time_lags, U, V, X, theta, Lambda_x):\n", " \"\"\"Sampling T-by-R factor matrix X.\"\"\"\n", "\n", " dim3, rank = X.shape\n", " tmax = np.max(time_lags)\n", " tmin = np.min(time_lags)\n", " d = time_lags.shape[0]\n", " A = np.zeros((d * rank, rank))\n", " for k in range(d):\n", " A[k * rank : (k + 1) * rank, :] = np.diag(theta[k, :])\n", " A0 = np.dstack([A] * d)\n", " for k in range(d):\n", " A0[k * rank : (k + 1) * rank, :, k] = 0\n", " mat0 = Lambda_x @ A.T\n", " mat1 = np.einsum('kij, jt -> kit', A.reshape([d, rank, rank]), Lambda_x)\n", " mat2 = np.einsum('kit, kjt -> ij', mat1, A.reshape([d, rank, rank]))\n", " \n", " var1 = kr_prod(V, U).T\n", " var2 = kr_prod(var1, var1)\n", " var3 = (var2 @ ten2mat(tau_ind, 2).T).reshape([rank, rank, dim3]) + Lambda_x[:, :, None]\n", " var4 = var1 @ ten2mat(tau_sparse_tensor, 2).T\n", " for t in range(dim3):\n", " Mt = np.zeros((rank, rank))\n", " Nt = np.zeros(rank)\n", " Qt = mat0 @ X[t - time_lags, :].reshape(rank * d)\n", " index = list(range(0, d))\n", " if t >= dim3 - tmax and t < dim3 - tmin:\n", " index = list(np.where(t + time_lags < dim3))[0]\n", " elif t < tmax:\n", " Qt = np.zeros(rank)\n", " index = list(np.where(t + time_lags >= tmax))[0]\n", " if t < dim3 - tmin:\n", " Mt = mat2.copy()\n", " temp = np.zeros((rank * d, len(index)))\n", " n = 0\n", " for k in index:\n", " temp[:, n] = X[t + time_lags[k] - time_lags, :].reshape(rank * d)\n", " n += 1\n", " temp0 = X[t + time_lags[index], :].T - np.einsum('ijk, ik -> jk', A0[:, :, index], temp)\n", " Nt = np.einsum('kij, jk -> i', mat1[index, :, :], temp0)\n", " \n", " var3[:, :, t] = var3[:, :, t] + Mt\n", " if t < tmax:\n", " var3[:, :, t] = var3[:, :, t] - Lambda_x + np.eye(rank)\n", " X[t, :] = mvnrnd_pre(solve(var3[:, :, t], var4[:, t] + Nt + Qt), var3[:, :, t])\n", "\n", " return X" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def compute_mape(var, var_hat):\n", " return np.sum(np.abs(var - var_hat) / var) / var.shape[0]\n", "\n", "def compute_rmse(var, var_hat):\n", " return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def ar4cast(theta, X, Lambda_x, time_lags, multi_step):\n", " dim, rank = X.shape\n", " d = time_lags.shape[0]\n", " X_new = np.append(X, np.zeros((multi_step, rank)), axis = 0)\n", " for t in range(multi_step):\n", " X_new[dim + t, :] = mvnrnd_pre(np.einsum('kr, kr -> r', theta, X_new[dim + t - time_lags, :]), Lambda_x)\n", " return X_new" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### BTRTF Implementation\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def BTRTF(dense_tensor, sparse_tensor, init, rank, time_lags, burn_iter, gibbs_iter, multi_step = 1):\n", " \"\"\"Bayesian Temporal Regularized Tensor Factorization, BTRTF.\"\"\"\n", " \n", " dim1, dim2, dim3 = sparse_tensor.shape\n", " d = time_lags.shape[0]\n", " U = init[\"U\"]\n", " V = init[\"V\"]\n", " X = init[\"X\"]\n", " theta = 0.01 * np.random.randn(d, rank)\n", " if np.isnan(sparse_tensor).any() == False:\n", " ind = sparse_tensor != 0\n", " pos_obs = np.where(ind)\n", " pos_test = np.where((dense_tensor != 0) & (sparse_tensor == 0))\n", " elif np.isnan(sparse_tensor).any() == True:\n", " pos_test = np.where((dense_tensor != 0) & (np.isnan(sparse_tensor)))\n", " ind = ~np.isnan(sparse_tensor)\n", " pos_obs = np.where(ind)\n", " sparse_tensor[np.isnan(sparse_tensor)] = 0\n", " dense_test = dense_tensor[pos_test]\n", " del dense_tensor\n", " U_plus = np.zeros((dim1, rank, gibbs_iter))\n", " V_plus = np.zeros((dim2, rank, gibbs_iter))\n", " X_plus = np.zeros((dim3 + multi_step, rank, gibbs_iter))\n", " theta_plus = np.zeros((d, rank, gibbs_iter))\n", " tau_plus = np.zeros(gibbs_iter)\n", " Lambda_plus = np.zeros((rank, rank, gibbs_iter))\n", " temp_hat = np.zeros(len(pos_test[0]))\n", " show_iter = 500\n", " tau = 1\n", " tensor_hat_plus = np.zeros(sparse_tensor.shape)\n", " tensor_new_plus = np.zeros((dim1, dim2, multi_step))\n", " for it in range(burn_iter + gibbs_iter):\n", " tau_ind = tau * ind\n", " tau_sparse_tensor = tau * sparse_tensor\n", " U = sample_factor_u(tau_sparse_tensor, tau_ind, U, V, X)\n", " V = sample_factor_v(tau_sparse_tensor, tau_ind, U, V, X)\n", " Lambda_x = sample_Lambda_x(X, theta, time_lags)\n", " theta = sample_theta(X, theta, Lambda_x, time_lags)\n", " X = sample_factor_x(tau_sparse_tensor, tau_ind, time_lags, U, V, X, theta, Lambda_x)\n", " tensor_hat = np.einsum('is, js, ts -> ijt', U, V, X)\n", " tau = np.random.gamma(1e-6 + 0.5 * np.sum(ind), \n", " 1 / (1e-6 + 0.5 * np.sum(((sparse_tensor - tensor_hat) ** 2) * ind)))\n", " temp_hat += tensor_hat[pos_test]\n", " if (it + 1) % show_iter == 0 and it < burn_iter:\n", " temp_hat = temp_hat / show_iter\n", " print('Iter: {}'.format(it + 1))\n", " print('MAPE: {:.6}'.format(compute_mape(dense_test, temp_hat)))\n", " print('RMSE: {:.6}'.format(compute_rmse(dense_test, temp_hat)))\n", " temp_hat = np.zeros(len(pos_test[0]))\n", " print()\n", " if it + 1 > burn_iter:\n", " U_plus[:, :, it - burn_iter] = U\n", " V_plus[:, :, it - burn_iter] = V\n", " theta_plus[:, :, it - burn_iter] = theta\n", " Lambda_plus[:, :, it - burn_iter] = Lambda_x\n", " tau_plus[it - burn_iter] = tau\n", " tensor_hat_plus += tensor_hat\n", " X0 = ar4cast(theta, X, Lambda_x, time_lags, multi_step)\n", " X_plus[:, :, it - burn_iter] = X0\n", " tensor_new_plus += np.einsum('is, js, ts -> ijt', U, V, X0[- multi_step :, :])\n", " tensor_hat = tensor_hat_plus / gibbs_iter\n", " print('Imputation MAPE: {:.6}'.format(compute_mape(dense_test, tensor_hat[:, :, : dim3][pos_test])))\n", " print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_test, tensor_hat[:, :, : dim3][pos_test])))\n", " print()\n", " tensor_hat = np.append(tensor_hat, tensor_new_plus / gibbs_iter, axis = 2)\n", " tensor_hat[tensor_hat < 0] = 0\n", " \n", " return tensor_hat, U_plus, V_plus, X_plus, theta_plus, Lambda_plus, tau_plus" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def sample_factor_x_partial(tau_sparse_tensor, tau_ind, time_lags, U, V, X, theta, Lambda_x, back_step):\n", " \"\"\"Sampling T-by-R factor matrix X.\"\"\"\n", " \n", " dim3, rank = X.shape\n", " tmax = np.max(time_lags)\n", " tmin = np.min(time_lags)\n", " d = time_lags.shape[0]\n", " A = np.zeros((d * rank, rank))\n", " for k in range(d):\n", " A[k * rank : (k + 1) * rank, :] = np.diag(theta[k, :])\n", " A0 = np.dstack([A] * d)\n", " for k in range(d):\n", " A0[k * rank : (k + 1) * rank, :, k] = 0\n", " mat0 = Lambda_x @ A.T\n", " mat1 = np.einsum('kij, jt -> kit', A.reshape([d, rank, rank]), Lambda_x)\n", " mat2 = np.einsum('kit, kjt -> ij', mat1, A.reshape([d, rank, rank]))\n", " \n", " var1 = kr_prod(V, U).T\n", " var2 = kr_prod(var1, var1)\n", " var3 = (var2 @ ten2mat(tau_ind[:, :, - back_step :], 2).T).reshape([rank, rank, back_step]) + Lambda_x[:, :, None]\n", " var4 = var1 @ ten2mat(tau_sparse_tensor[:, :, - back_step :], 2).T\n", " for t in range(dim3 - back_step, dim3):\n", " Mt = np.zeros((rank, rank))\n", " Nt = np.zeros(rank)\n", " Qt = mat0 @ X[t - time_lags, :].reshape(rank * d)\n", " index = list(range(0, d))\n", " if t >= dim3 - tmax and t < dim3 - tmin:\n", " index = list(np.where(t + time_lags < dim3))[0]\n", " if t < dim3 - tmin:\n", " Mt = mat2.copy()\n", " temp = np.zeros((rank * d, len(index)))\n", " n = 0\n", " for k in index:\n", " temp[:, n] = X[t + time_lags[k] - time_lags, :].reshape(rank * d)\n", " n += 1\n", " temp0 = X[t + time_lags[index], :].T - np.einsum('ijk, ik -> jk', A0[:, :, index], temp)\n", " Nt = np.einsum('kij, jk -> i', mat1[index, :, :], temp0)\n", " var3[:, :, t + back_step - dim3] = var3[:, :, t + back_step - dim3] + Mt\n", " X[t, :] = mvnrnd_pre(solve(var3[:, :, t + back_step - dim3], \n", " var4[:, t + back_step - dim3] + Nt + Qt), var3[:, :, t + back_step - dim3])\n", " return X" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def BTRTF_partial(dense_tensor, sparse_tensor, init, rank, time_lags, burn_iter, gibbs_iter, multi_step = 1, gamma = 10):\n", " \"\"\"Bayesian Temporal Regularized Tensor Factorization, BTRTF.\"\"\"\n", " \n", " dim1, dim2, dim3 = sparse_tensor.shape\n", " U_plus = init[\"U_plus\"]\n", " V_plus = init[\"V_plus\"]\n", " X_plus = init[\"X_plus\"]\n", " theta_plus = init[\"theta_plus\"]\n", " Lambda_plus = init[\"Lambda_plus\"]\n", " tau_plus = init[\"tau_plus\"]\n", " if np.isnan(sparse_tensor).any() == False:\n", " ind = sparse_tensor != 0\n", " pos_obs = np.where(ind)\n", " elif np.isnan(sparse_tensor).any() == True:\n", " ind = ~np.isnan(sparse_tensor)\n", " pos_obs = np.where(ind)\n", " sparse_tensor[np.isnan(sparse_tensor)] = 0\n", " X_new_plus = np.zeros((dim3 + multi_step, rank, gibbs_iter))\n", " tensor_new_plus = np.zeros((dim1, dim2, multi_step))\n", " back_step = gamma * multi_step\n", " for it in range(gibbs_iter):\n", " tau_ind = tau_plus[it] * ind\n", " tau_sparse_tensor = tau_plus[it] * sparse_tensor\n", " X = sample_factor_x_partial(tau_sparse_tensor, tau_ind, time_lags, U_plus[:, :, it], V_plus[:, :, it],\n", " X_plus[:, :, it], theta_plus[:, :, it], Lambda_plus[:, :, it], back_step)\n", " X0 = ar4cast(theta_plus[:, :, it], X, Lambda_plus[:, :, it], time_lags, multi_step)\n", " X_new_plus[:, :, it] = X0\n", " tensor_new_plus += np.einsum('is, js, ts -> ijt', U_plus[:, :, it], V_plus[:, :, it], X0[- multi_step :, :])\n", " tensor_hat = tensor_new_plus / gibbs_iter\n", " tensor_hat[tensor_hat < 0] = 0\n", " \n", " return tensor_hat, U_plus, V_plus, X_new_plus, theta_plus, Lambda_plus, tau_plus" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from ipywidgets import IntProgress\n", "from IPython.display import display\n", "\n", "def BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 10):\n", " dim1, dim2, T = dense_tensor.shape\n", " start_time = T - pred_step\n", " max_count = int(np.ceil(pred_step / multi_step))\n", " tensor_hat = np.zeros((dim1, dim2, max_count * multi_step))\n", " f = IntProgress(min = 0, max = max_count) # instantiate the bar\n", " display(f) # display the bar\n", " for t in range(max_count):\n", " if t == 0:\n", " init = {\"U\": 0.1 * np.random.randn(dim1, rank), \n", " \"V\": 0.1 * np.random.randn(dim2, rank),\n", " \"X\": 0.1 * np.random.randn(start_time, rank)}\n", " tensor, U, V, X_new, theta, Lambda_x, tau = BTRTF(dense_tensor[:, :, : start_time], \n", " sparse_tensor[:, :, : start_time], init, rank, time_lags, burn_iter, gibbs_iter, multi_step)\n", " else:\n", " init = {\"U_plus\": U, \"V_plus\": V, \"X_plus\": X_new, \"theta_plus\": theta, \"Lambda_plus\": Lambda_x, \"tau_plus\": tau}\n", " tensor, U, V, X_new, theta, Lambda_x, tau = BTRTF_partial(dense_tensor[:, :, : start_time + t * multi_step], \n", " sparse_tensor[:, :, : start_time + t * multi_step], init, \n", " rank, time_lags, burn_iter, gibbs_iter, multi_step, gamma)\n", " tensor_hat[:, :, t * multi_step : (t + 1) * multi_step] = tensor[:, :, - multi_step :]\n", " f.value = t\n", " small_dense_tensor = dense_tensor[:, :, start_time : T]\n", " pos = np.where(small_dense_tensor != 0)\n", " print('Prediction MAPE: {:.6}'.format(compute_mape(small_dense_tensor[pos], tensor_hat[pos])))\n", " print('Prediction RMSE: {:.6}'.format(compute_rmse(small_dense_tensor[pos], tensor_hat[pos])))\n", " print()\n", " return tensor_hat" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Eavluation on NYC Taxi Flow Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 30\\times 1461$ (origin, destination, time)\n", "- Test on original data\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import scipy.io\n", "import warnings\n", "warnings.simplefilter('ignore')\n", "\n", "dense_tensor = scipy.io.loadmat('../datasets/NYC-data-set/tensor.mat')['tensor'].astype(np.float32)\n", "sparse_tensor = dense_tensor.copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Model setting**:\n", "\n", "- Low rank: 30\n", "- Total (rolling) prediction horizons: 7 * 24\n", "- Time lags: {1, 2, 24, 24 + 1, 24 + 2, 7 * 24, 7 * 24 + 1, 7 * 24 + 2}\n", "- The number of burn-in iterations: 1000\n", "- The number of Gibbs iterations: 200" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "53cb17060afd49cab9b0f01fb2abe301", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=84)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Iter: 1000\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Imputation MAPE: nan\n", "Imputation RMSE: nan\n", "\n", "Prediction MAPE: 0.507437\n", "Prediction RMSE: 5.46837\n", "\n", "Running time: 1665 seconds\n", "\n", "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b91b447cfdec46ff916e1fd4909ae4c2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=42)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Iter: 1000\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Imputation MAPE: nan\n", "Imputation RMSE: nan\n", "\n", "Prediction MAPE: 0.508875\n", "Prediction RMSE: 5.71892\n", "\n", "Running time: 1594 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9ef67febe26b4fe4b2506212313ef465", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=28)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Iter: 1000\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Imputation MAPE: nan\n", "Imputation RMSE: nan\n", "\n", "Prediction MAPE: 0.503395\n", "Prediction RMSE: 5.9437\n", "\n", "Running time: 1638 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 7 * 24\n", "time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [2, 4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 30\\times 1461$ (origin, destination, time)\n", "- Random missing (RM)\n", "- 40% missing rate\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "import scipy.io\n", "import numpy as np\n", "np.random.seed(1000)\n", "\n", "dense_tensor = scipy.io.loadmat('../datasets/NYC-data-set/tensor.mat')['tensor'].astype(np.float32)\n", "dim = dense_tensor.shape\n", "missing_rate = 0.4 # Random missing (RM)\n", "sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Model setting**:\n", "\n", "- Low rank: 30\n", "- Total (rolling) prediction horizons: 7 * 24\n", "- Time lags: {1, 2, 24, 24 + 1, 24 + 2, 7 * 24, 7 * 24 + 1, 7 * 24 + 2}\n", "- The number of burn-in iterations: 1000\n", "- The number of Gibbs iterations: 200" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4ab1248e87e14ea98a4cb054e2d419d9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=84)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.478202\n", "RMSE: 4.84207\n", "\n", "Iter: 1000\n", "MAPE: 0.484866\n", "RMSE: 4.93907\n", "\n", "Imputation MAPE: 0.484998\n", "Imputation RMSE: 4.9549\n", "\n", "Prediction MAPE: 0.485314\n", "Prediction RMSE: 6.46555\n", "\n", "Running time: 1516 seconds\n", "\n", "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7d9c7a1325744b14abc725b0575f6f35", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=42)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.511286\n", "RMSE: 4.78762\n", "\n", "Iter: 1000\n", "MAPE: 0.52187\n", "RMSE: 4.85227\n", "\n", "Imputation MAPE: 0.523542\n", "Imputation RMSE: 4.86575\n", "\n", "Prediction MAPE: 0.506951\n", "Prediction RMSE: 5.75299\n", "\n", "Running time: 1396 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cb349a47aa764e578e3d6790b00a63a6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=28)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.50354\n", "RMSE: 4.76326\n", "\n", "Iter: 1000\n", "MAPE: 0.513024\n", "RMSE: 4.81782\n", "\n", "Imputation MAPE: 0.514024\n", "Imputation RMSE: 4.82617\n", "\n", "Prediction MAPE: 0.508809\n", "Prediction RMSE: 5.91545\n", "\n", "Running time: 1822 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 7 * 24\n", "time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [2, 4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 30\\times 1461$ (origin, destination, time)\n", "- Random missing (RM)\n", "- 60% missing rate\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import scipy.io\n", "import numpy as np\n", "np.random.seed(1000)\n", "\n", "dense_tensor = scipy.io.loadmat('../datasets/NYC-data-set/tensor.mat')['tensor'].astype(np.float32)\n", "dim = dense_tensor.shape\n", "missing_rate = 0.6 # Random missing (RM)\n", "sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Model setting**:\n", "\n", "- Low rank: 30\n", "- Total (rolling) prediction horizons: 7 * 24\n", "- Time lags: {1, 2, 24, 24 + 1, 24 + 2, 7 * 24, 7 * 24 + 1, 7 * 24 + 2}\n", "- The number of burn-in iterations: 1000\n", "- The number of Gibbs iterations: 200" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8630171126f14026bf0fac9c1754fa6c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=84)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.486001\n", "RMSE: 5.26879\n", "\n", "Iter: 1000\n", "MAPE: 0.491243\n", "RMSE: 5.26529\n", "\n", "Imputation MAPE: 0.491764\n", "Imputation RMSE: 5.30838\n", "\n", "Prediction MAPE: 0.493182\n", "Prediction RMSE: 7.27568\n", "\n", "Running time: 1628 seconds\n", "\n", "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bd9599f8285f4a4ba60a481ce38723c4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=42)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.51287\n", "RMSE: 5.07456\n", "\n", "Iter: 1000\n", "MAPE: 0.52265\n", "RMSE: 5.17151\n", "\n", "Imputation MAPE: 0.525277\n", "Imputation RMSE: 5.18165\n", "\n", "Prediction MAPE: 0.505567\n", "Prediction RMSE: 5.94686\n", "\n", "Running time: 1808 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "63a8b8f35919474f9199a988849b7192", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=28)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.516195\n", "RMSE: 5.02277\n", "\n", "Iter: 1000\n", "MAPE: 0.522737\n", "RMSE: 5.08686\n", "\n", "Imputation MAPE: 0.520651\n", "Imputation RMSE: 5.14705\n", "\n", "Prediction MAPE: 0.50992\n", "Prediction RMSE: 6.15616\n", "\n", "Running time: 1347 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 7 * 24\n", "time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [2, 4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 30\\times 1461$ (origin, destination, time)\n", "- Non-random missing (NM)\n", "- 40% missing rate\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import scipy.io\n", "import numpy as np\n", "np.random.seed(1000)\n", "\n", "dense_tensor = scipy.io.loadmat('../datasets/NYC-data-set/tensor.mat')['tensor']\n", "dim = dense_tensor.shape\n", "nm_tensor = np.random.rand(dim[0], dim[1], dim[2])\n", "missing_rate = 0.4 # Non-random missing (NM)\n", "binary_tensor = np.zeros(dense_tensor.shape)\n", "for i1 in range(dim[0]):\n", " for i2 in range(dim[1]):\n", " for i3 in range(61):\n", " binary_tensor[i1, i2, i3 * 24 : (i3 + 1) * 24] = np.round(nm_tensor[i1, i2, i3] + 0.5 - missing_rate)\n", "sparse_tensor = dense_tensor * binary_tensor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Model setting**:\n", "\n", "- Low rank: 30\n", "- Total (rolling) prediction horizons: 7 * 24\n", "- Time lags: {1, 2, 24, 24 + 1, 24 + 2, 7 * 24, 7 * 24 + 1, 7 * 24 + 2}\n", "- The number of burn-in iterations: 1000\n", "- The number of Gibbs iterations: 200" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "41551ac08593448fa47e1eb33ef54c10", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=84)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.47176\n", "RMSE: 4.99266\n", "\n", "Iter: 1000\n", "MAPE: 0.478566\n", "RMSE: 5.16785\n", "\n", "Imputation MAPE: 0.478796\n", "Imputation RMSE: 5.19043\n", "\n", "Prediction MAPE: 0.485589\n", "Prediction RMSE: 6.30386\n", "\n", "Running time: 1599 seconds\n", "\n", "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f08f5ee5b6d74d4298526db132ae9b49", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=42)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.509326\n", "RMSE: 4.92351\n", "\n", "Iter: 1000\n", "MAPE: 0.52759\n", "RMSE: 5.02248\n", "\n", "Imputation MAPE: 0.527748\n", "Imputation RMSE: 5.04027\n", "\n", "Prediction MAPE: 0.503853\n", "Prediction RMSE: 5.87668\n", "\n", "Running time: 1518 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "94f8570a076148f0a78290e1091a8426", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=28)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.51339\n", "RMSE: 4.87887\n", "\n", "Iter: 1000\n", "MAPE: 0.525974\n", "RMSE: 5.02457\n", "\n", "Imputation MAPE: 0.528036\n", "Imputation RMSE: 5.03888\n", "\n", "Prediction MAPE: 0.500864\n", "Prediction RMSE: 6.11974\n", "\n", "Running time: 1512 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 7 * 24\n", "time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [2, 4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluation on Pacific Surface Temperature Data\n", "\n", "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 84\\times 396$ (location x, location y, month)\n", "- Test on original data" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import warnings\n", "warnings.simplefilter('ignore')\n", "\n", "dense_tensor = np.load('../datasets/Temperature-data-set/tensor.npy').astype(np.float32)\n", "pos = np.where(dense_tensor[:, 0, :] > 50)\n", "dense_tensor[pos[0], :, pos[1]] = 0\n", "sparse_tensor = dense_tensor.copy()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "255aa0b73c5a4373bd0010405ff5be88", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=60)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Iter: 1000\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Imputation MAPE: nan\n", "Imputation RMSE: nan\n", "\n", "Prediction MAPE: 0.163308\n", "Prediction RMSE: 5.58539\n", "\n", "Running time: 849 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "multi_step = 2\n", "start = time.time()\n", "print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", "tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", "end = time.time()\n", "print('Running time: %d seconds'%(end - start))\n", "print()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b22f1064663842849bb46d9baf494f66", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=30)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Iter: 1000\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Imputation MAPE: nan\n", "Imputation RMSE: nan\n", "\n", "Prediction MAPE: 0.219357\n", "Prediction RMSE: 7.52495\n", "\n", "Running time: 676 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a7e24e74bf6c4658884243b570327a40", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=20)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Iter: 1000\n", "MAPE: nan\n", "RMSE: nan\n", "\n", "Imputation MAPE: nan\n", "Imputation RMSE: nan\n", "\n", "Prediction MAPE: 0.172542\n", "Prediction RMSE: 5.59173\n", "\n", "Running time: 642 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 84\\times 396$ (location x, location y, month)\n", "- Random missing (RM)\n", "- 40% missing rate" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "np.random.seed(1000)\n", "\n", "dense_tensor = np.load('../datasets/Temperature-data-set/tensor.npy').astype(np.float32)\n", "pos = np.where(dense_tensor[:, 0, :] > 50)\n", "dense_tensor[pos[0], :, pos[1]] = 0\n", "random_tensor = np.random.rand(dense_tensor.shape[0], dense_tensor.shape[1], dense_tensor.shape[2])\n", "missing_rate = 0.4\n", "\n", "## Random missing (RM)\n", "binary_tensor = np.round(random_tensor + 0.5 - missing_rate)\n", "sparse_tensor = dense_tensor.copy()\n", "sparse_tensor[binary_tensor == 0] = np.nan\n", "sparse_tensor[sparse_tensor == 0] = np.nan" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "26b1bc794a2946c0a50a62072de2e264", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=60)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0146505\n", "RMSE: 0.488159\n", "\n", "Iter: 1000\n", "MAPE: 0.0146261\n", "RMSE: 0.484394\n", "\n", "Imputation MAPE: 0.0145402\n", "Imputation RMSE: 0.480634\n", "\n", "Prediction MAPE: 0.100703\n", "Prediction RMSE: 3.29517\n", "\n", "Running time: 759 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "multi_step = 2\n", "start = time.time()\n", "print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", "tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", "end = time.time()\n", "print('Running time: %d seconds'%(end - start))\n", "print()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cf0421fd18ec4dcc9936402b0a6f358f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=30)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0147999\n", "RMSE: 0.495504\n", "\n", "Iter: 1000\n", "MAPE: 0.0145005\n", "RMSE: 0.482804\n", "\n", "Imputation MAPE: 0.0144735\n", "Imputation RMSE: 0.481308\n", "\n", "Prediction MAPE: 0.146639\n", "Prediction RMSE: 4.71453\n", "\n", "Running time: 691 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "450be49ca6a044299704b853a8b50a21", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=20)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0148263\n", "RMSE: 0.492939\n", "\n", "Iter: 1000\n", "MAPE: 0.0144807\n", "RMSE: 0.479886\n", "\n", "Imputation MAPE: 0.0144055\n", "Imputation RMSE: 0.476987\n", "\n", "Prediction MAPE: 0.145679\n", "Prediction RMSE: 4.71419\n", "\n", "Running time: 664 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 84\\times 396$ (location x, location y, month)\n", "- Random missing (RM)\n", "- 60% missing rate" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "np.random.seed(1000)\n", "\n", "dense_tensor = np.load('../datasets/Temperature-data-set/tensor.npy').astype(np.float32)\n", "pos = np.where(dense_tensor[:, 0, :] > 50)\n", "dense_tensor[pos[0], :, pos[1]] = 0\n", "random_tensor = np.random.rand(dense_tensor.shape[0], dense_tensor.shape[1], dense_tensor.shape[2])\n", "missing_rate = 0.6\n", "\n", "## Random missing (RM)\n", "binary_tensor = np.round(random_tensor + 0.5 - missing_rate)\n", "sparse_tensor = dense_tensor.copy()\n", "sparse_tensor[binary_tensor == 0] = np.nan\n", "sparse_tensor[sparse_tensor == 0] = np.nan" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9513cde623344147b9b7c37f74dc2595", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=60)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0150097\n", "RMSE: 0.502107\n", "\n", "Iter: 1000\n", "MAPE: 0.0148378\n", "RMSE: 0.49437\n", "\n", "Imputation MAPE: 0.0147177\n", "Imputation RMSE: 0.489617\n", "\n", "Prediction MAPE: 0.175718\n", "Prediction RMSE: 5.57326\n", "\n", "Running time: 790 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "multi_step = 2\n", "start = time.time()\n", "print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", "tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", "end = time.time()\n", "print('Running time: %d seconds'%(end - start))\n", "print()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9aa8e1b9bd8b48c09794e698b653658b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=30)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0149364\n", "RMSE: 0.496529\n", "\n", "Iter: 1000\n", "MAPE: 0.0146882\n", "RMSE: 0.486221\n", "\n", "Imputation MAPE: 0.0146582\n", "Imputation RMSE: 0.485313\n", "\n", "Prediction MAPE: 0.133308\n", "Prediction RMSE: 4.16352\n", "\n", "Running time: 704 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "39d391694fec4c9bb15006591ec9a104", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=20)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0148235\n", "RMSE: 0.493898\n", "\n", "Iter: 1000\n", "MAPE: 0.0144327\n", "RMSE: 0.480168\n", "\n", "Imputation MAPE: 0.0143945\n", "Imputation RMSE: 0.478618\n", "\n", "Prediction MAPE: 0.0907117\n", "Prediction RMSE: 3.1397\n", "\n", "Running time: 674 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Scenario setting**:\n", "\n", "- Tensor size: $30\\times 84\\times 396$ (location x, location y, month)\n", "- Non-random missing (NM)\n", "- 40% missing rate" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "np.random.seed(1000)\n", "\n", "dense_tensor = np.load('../datasets/Temperature-data-set/tensor.npy').astype(np.float32)\n", "pos = np.where(dense_tensor[:, 0, :] > 50)\n", "dense_tensor[pos[0], :, pos[1]] = 0\n", "random_tensor = np.random.rand(dense_tensor.shape[0], dense_tensor.shape[1], int(dense_tensor.shape[2] / 3))\n", "missing_rate = 0.4\n", "\n", "## Non-random missing (NM)\n", "binary_tensor = np.zeros(dense_tensor.shape)\n", "for i1 in range(dense_tensor.shape[0]):\n", " for i2 in range(dense_tensor.shape[1]):\n", " for i3 in range(int(dense_tensor.shape[2] / 3)):\n", " binary_tensor[i1, i2, i3 * 3 : (i3 + 1) * 3] = np.round(random_tensor[i1, i2, i3] + 0.5 - missing_rate)\n", "sparse_tensor = dense_tensor.copy()\n", "sparse_tensor[binary_tensor == 0] = np.nan\n", "sparse_tensor[sparse_tensor == 0] = np.nan" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 2.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e45bb13602534685a01cafcd132a3483", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=60)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0144751\n", "RMSE: 0.481766\n", "\n", "Iter: 1000\n", "MAPE: 0.0144173\n", "RMSE: 0.477133\n", "\n", "Imputation MAPE: 0.0143819\n", "Imputation RMSE: 0.476134\n", "\n", "Prediction MAPE: 0.285081\n", "Prediction RMSE: 9.44928\n", "\n", "Running time: 788 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "multi_step = 2\n", "start = time.time()\n", "print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", "tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", "end = time.time()\n", "print('Running time: %d seconds'%(end - start))\n", "print()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prediction time horizon (delta) = 4.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "daa295cfbf8440978c75392bffddb96a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=30)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0147757\n", "RMSE: 0.491789\n", "\n", "Iter: 1000\n", "MAPE: 0.0146142\n", "RMSE: 0.484462\n", "\n", "Imputation MAPE: 0.0145513\n", "Imputation RMSE: 0.481513\n", "\n", "Prediction MAPE: 0.103239\n", "Prediction RMSE: 3.34341\n", "\n", "Running time: 708 seconds\n", "\n", "Prediction time horizon (delta) = 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "67d870f2eb05490099a5d0ac279bcf29", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, max=20)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Iter: 500\n", "MAPE: 0.0149642\n", "RMSE: 0.497966\n", "\n", "Iter: 1000\n", "MAPE: 0.0147473\n", "RMSE: 0.487566\n", "\n", "Imputation MAPE: 0.0146458\n", "Imputation RMSE: 0.484409\n", "\n", "Prediction MAPE: 0.296197\n", "Prediction RMSE: 9.49325\n", "\n", "Running time: 757 seconds\n", "\n" ] } ], "source": [ "import time\n", "rank = 30\n", "pred_step = 10 * 12\n", "time_lags = np.array([1, 2, 3, 12, 13, 14, 2 * 12, 2 * 12 + 1, 2 * 12 + 2])\n", "burn_iter = 1000\n", "gibbs_iter = 200\n", "for multi_step in [4, 6]:\n", " start = time.time()\n", " print('Prediction time horizon (delta) = {}.'.format(multi_step))\n", " tensor_hat = BTRTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, \n", " rank, time_lags, burn_iter, gibbs_iter, gamma = 30)\n", " end = time.time()\n", " print('Running time: %d seconds'%(end - start))\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### License\n", "\n", "
\n", "This work is released under the MIT license.\n", "
" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 2 }