""" ================================= Latent Variable Hierarchical CRF ================================= Solving a 2d grid toy problem by introducing an additional layer of latent variables. """ import numpy as np import itertools from pystruct.models import GraphCRF, LatentNodeCRF from pystruct.learners import NSlackSSVM, OneSlackSSVM, LatentSSVM from pystruct.datasets import make_simple_2x2 from pystruct.utils import make_grid_edges, plot_grid import matplotlib.pyplot as plt def plot_boxes(boxes, size=4, title=""): cmap = plt.cm.gray if boxes[0].size == size * size: fig, ax = plt.subplots(1, len(boxes), figsize=(8, 0.7)) for a, x in zip(ax, boxes): plot_grid(x[:size * size].reshape(size, size), cmap=cmap, axes=a, border_color="green") else: # have hidden states fig, ax = plt.subplots(2, len(boxes), figsize=(8, 1)) for a, x in zip(ax[0], boxes): plot_grid(x[size * size:].reshape(size / 2, size / 2), cmap=cmap, axes=a, border_color="green") for a, x in zip(ax[1], boxes): plot_grid(x[:size * size].reshape(size, size), cmap=cmap, axes=a, border_color="green") fig.subplots_adjust(.01, .03, .98, .75, .2, .05) fig.suptitle(title) # learn the "easy" 2x2 boxes dataset. # a 2x2 box is placed randomly in a 4x4 grid # we add a latent variable for each 2x2 patch # that should make the model fairly simple X, Y = make_simple_2x2(seed=1) # flatten X and Y X_flat = [x.reshape(-1, 1).astype(np.float) for x in X] Y_flat = [y.ravel() for y in Y] # first, use standard graph CRF. Can't do much, high loss. crf = GraphCRF() svm = NSlackSSVM(model=crf, max_iter=200, C=1, n_jobs=1) G = [make_grid_edges(x) for x in X] X_grid_edges = list(zip(X_flat, G)) svm.fit(X_grid_edges, Y_flat) plot_boxes(svm.predict(X_grid_edges), title="Non-latent SSVM predictions") print("Training score binary grid CRF: %f" % svm.score(X_grid_edges, Y_flat)) # using one latent variable for each 2x2 rectangle latent_crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2, inference_method='lp') ssvm = OneSlackSSVM(model=latent_crf, max_iter=200, C=100, n_jobs=-1, show_loss_every=10, inference_cache=50) latent_svm = LatentSSVM(ssvm) # make edges for hidden states: edges = [] node_indices = np.arange(4 * 4).reshape(4, 4) for i, (x, y) in enumerate(itertools.product([0, 2], repeat=2)): for j in range(x, x + 2): for k in range(y, y + 2): edges.append([i + 4 * 4, node_indices[j, k]]) G = [np.vstack([make_grid_edges(x), edges]) for x in X] # Random initialization H_init = [np.hstack([y.ravel(), np.random.randint(2, 4, size=2 * 2)]) for y in Y] plot_boxes(H_init, title="Top: Random initial hidden states. Bottom: Ground" "truth labeling.") X_ = list(zip(X_flat, G, [2 * 2 for x in X_flat])) latent_svm.fit(X_, Y_flat, H_init) print("Training score with latent nodes: %f " % latent_svm.score(X_, Y_flat)) H = latent_svm.predict_latent(X_) plot_boxes(H, title="Top: Hidden states after training. Bottom: Prediction.") plt.show()