#ifndef CAFFE_COMMON_LAYERS_HPP_ #define CAFFE_COMMON_LAYERS_HPP_ #include #include #include #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/layers/data_layer.hpp" #include "caffe/layer.hpp" #include "caffe/layers/loss_layer.hpp" #include "caffe/layers/neuron_layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { /** * @brief Compute the index of the @f$ K @f$ max values for each datum across * all dimensions @f$ (C \times H \times W) @f$. * * Intended for use after a classification layer to produce a prediction. * If parameter out_max_val is set to true, output is a vector of pairs * (max_ind, max_val) for each image. * * NOTE: does not implement Backwards operation. */ template class ArgMaxLayer : public Layer { public: /** * @param param provides ArgMaxParameter argmax_param, * with ArgMaxLayer options: * - top_k (\b optional uint, default 1). * the number @f$ K @f$ of maximal items to output. * - out_max_val (\b optional bool, default false). * if set, output a vector of pairs (max_ind, max_val) for each image. */ explicit ArgMaxLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "ArgMax"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: /** * @param bottom input Blob vector (length 1) * -# @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x @f$ * @param top output Blob vector (length 1) * -# @f$ (N \times 1 \times K \times 1) @f$ or, if out_max_val * @f$ (N \times 2 \times K \times 1) @f$ * the computed outputs @f$ * y_n = \arg\max\limits_i x_{ni} * @f$ (for @f$ K = 1 @f$). */ virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); /// @brief Not implemented (non-differentiable function) virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { NOT_IMPLEMENTED; } bool out_max_val_; size_t top_k_; }; /** * @brief Takes at least two Blob%s and concatenates them along either the num * or channel dimension, outputting the result. */ template class ConcatLayer : public Layer { public: explicit ConcatLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Concat"; } virtual inline int MinBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: /** * @param bottom input Blob vector (length 2+) * -# @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x_1 @f$ * -# @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x_2 @f$ * -# ... * - K @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x_K @f$ * @param top output Blob vector (length 1) * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or * @f$ (N \times KC \times H \times W) @f$ if axis == 1: * the concatenated output @f$ * y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}] * @f$ */ virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); /** * @brief Computes the error gradient w.r.t. the concatenate inputs. * * @param top output Blob vector (length 1), providing the error gradient with * respect to the outputs * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or * @f$ (N \times KC \times H \times W) @f$ if axis == 1: * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ * with respect to concatenated outputs @f$ y @f$ * @param propagate_down see Layer::Backward. * @param bottom input Blob vector (length K), into which the top gradient * @f$ \frac{\partial E}{\partial y} @f$ is deconcatenated back to the * inputs @f$ * \left[ \begin{array}{cccc} * \frac{\partial E}{\partial x_1} & * \frac{\partial E}{\partial x_2} & * ... & * \frac{\partial E}{\partial x_K} * \end{array} \right] = * \frac{\partial E}{\partial y} * @f$ */ virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); int count_; int num_concats_; int concat_input_size_; int concat_axis_; }; /** * @brief Compute elementwise operations, such as product and sum, * along multiple input Blobs. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class EltwiseLayer : public Layer { public: explicit EltwiseLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Eltwise"; } virtual inline int MinBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); EltwiseParameter_EltwiseOp op_; vector coeffs_; Blob max_idx_; bool stable_prod_grad_; }; /** * @brief A layer for learning "embeddings" of one-hot vector input. * Equivalent to an InnerProductLayer with one-hot vectors as input, but * for efficiency the input is the "hot" index of each column itself. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class EmbedLayer : public Layer { public: explicit EmbedLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Embed"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); int M_; int K_; int N_; bool bias_term_; Blob bias_multiplier_; }; /** * @brief Takes two+ Blobs, interprets last Blob as a selector and * filter remaining Blobs accordingly with selector data (0 means that * the corresponding item has to be filtered, non-zero means that corresponding * item needs to stay). */ template class FilterLayer : public Layer { public: explicit FilterLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Filter"; } virtual inline int MinBottomBlobs() const { return 2; } virtual inline int MinTopBlobs() const { return 1; } protected: /** * @param bottom input Blob vector (length 2+) * -# @f$ (N \times C \times H \times W) @f$ * the inputs to be filtered @f$ x_1 @f$ * -# ... * -# @f$ (N \times C \times H \times W) @f$ * the inputs to be filtered @f$ x_K @f$ * -# @f$ (N \times 1 \times 1 \times 1) @f$ * the selector blob * @param top output Blob vector (length 1+) * -# @f$ (S \times C \times H \times W) @f$ () * the filtered output @f$ x_1 @f$ * where S is the number of items * that haven't been filtered * @f$ (S \times C \times H \times W) @f$ * the filtered output @f$ x_K @f$ * where S is the number of items * that haven't been filtered */ virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); /** * @brief Computes the error gradient w.r.t. the forwarded inputs. * * @param top output Blob vector (length 1+), providing the error gradient with * respect to the outputs * @param propagate_down see Layer::Backward. * @param bottom input Blob vector (length 2+), into which the top error * gradient is copied */ virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); bool first_reshape_; vector indices_to_forward_; }; /** * @brief Reshapes the input Blob into flat vectors. * * Note: because this layer does not change the input values -- merely the * dimensions -- it can simply copy the input. The copy happens "virtually" * (thus taking effectively 0 real time) by setting, in Forward, the data * pointer of the top Blob to that of the bottom Blob (see Blob::ShareData), * and in Backward, the diff pointer of the bottom Blob to that of the top Blob * (see Blob::ShareDiff). */ template class FlattenLayer : public Layer { public: explicit FlattenLayer(const LayerParameter& param) : Layer(param) {} virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Flatten"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: /** * @param bottom input Blob vector (length 2+) * -# @f$ (N \times C \times H \times W) @f$ * the inputs * @param top output Blob vector (length 1) * -# @f$ (N \times CHW \times 1 \times 1) @f$ * the outputs -- i.e., the (virtually) copied, flattened inputs */ virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); /** * @brief Computes the error gradient w.r.t. the concatenate inputs. * * @param top output Blob vector (length 1), providing the error gradient with * respect to the outputs * @param propagate_down see Layer::Backward. * @param bottom input Blob vector (length K), into which the top error * gradient is (virtually) copied */ virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); }; /** * @brief Also known as a "fully-connected" layer, computes an inner product * with a set of learned weights, and (optionally) adds biases. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class InnerProductLayer : public Layer { public: explicit InnerProductLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "InnerProduct"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); int M_; int K_; int N_; bool bias_term_; Blob bias_multiplier_; }; /** * @brief Normalizes the input to have 0-mean and/or unit (1) variance. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class MVNLayer : public Layer { public: explicit MVNLayer(const LayerParameter& param) : Layer(param) {} virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "MVN"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); Blob mean_, variance_, temp_; /// sum_multiplier is used to carry out sum using BLAS Blob sum_multiplier_; Dtype eps_; }; /* * @brief Reshapes the input Blob into an arbitrary-sized output Blob. * * Note: similarly to FlattenLayer, this layer does not change the input values * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff). */ template class ReshapeLayer : public Layer { public: explicit ReshapeLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Reshape"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top) {} virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) {} virtual void Forward_gpu(const vector*>& bottom, const vector*>& top) {} virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) {} /// @brief vector of axes indices whose dimensions we'll copy from the bottom vector copy_axes_; /// @brief the index of the axis whose dimension we infer, or -1 if none int inferred_axis_; /// @brief the product of the "constant" output dimensions int constant_count_; }; /** * @brief Compute "reductions" -- operations that return a scalar output Blob * for an input Blob of arbitrary size, such as the sum, absolute sum, * and sum of squares. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class ReductionLayer : public Layer { public: explicit ReductionLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Reduction"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); /// @brief the reduction operation performed by the layer ReductionParameter_ReductionOp op_; /// @brief a scalar coefficient applied to all outputs Dtype coeff_; /// @brief the index of the first input axis to reduce int axis_; /// @brief the number of reductions performed int num_; /// @brief the input size of each reduction int dim_; /// @brief a helper Blob used for summation (op_ == SUM) Blob sum_multiplier_; }; /** * @brief Ignores bottom blobs while producing no top blobs. (This is useful * to suppress outputs during testing.) */ template class SilenceLayer : public Layer { public: explicit SilenceLayer(const LayerParameter& param) : Layer(param) {} virtual void Reshape(const vector*>& bottom, const vector*>& top) {} virtual inline const char* type() const { return "Silence"; } virtual inline int MinBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 0; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top) {} // We can't define Forward_gpu here, since STUB_GPU will provide // its own definition for CPU_ONLY mode. virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); }; /** * @brief Computes the softmax function. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class SoftmaxLayer : public Layer { public: explicit SoftmaxLayer(const LayerParameter& param) : Layer(param) {} virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Softmax"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); int outer_num_; int inner_num_; int softmax_axis_; /// sum_multiplier is used to carry out sum using BLAS Blob sum_multiplier_; /// scale is an intermediate Blob to hold temporary results. Blob scale_; }; #ifdef USE_CUDNN /** * @brief cuDNN implementation of SoftmaxLayer. * Fallback to SoftmaxLayer for CPU mode. */ template class CuDNNSoftmaxLayer : public SoftmaxLayer { public: explicit CuDNNSoftmaxLayer(const LayerParameter& param) : SoftmaxLayer(param), handles_setup_(false) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual ~CuDNNSoftmaxLayer(); protected: virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); bool handles_setup_; cudnnHandle_t handle_; cudnnTensorDescriptor_t bottom_desc_; cudnnTensorDescriptor_t top_desc_; }; #endif /** * @brief Creates a "split" path in the network by copying the bottom Blob * into multiple top Blob%s to be used by multiple consuming layers. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class SplitLayer : public Layer { public: explicit SplitLayer(const LayerParameter& param) : Layer(param) {} virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Split"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); int count_; }; /** * @brief Takes a Blob and slices it along either the num or channel dimension, * outputting multiple sliced Blob results. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template class SliceLayer : public Layer { public: explicit SliceLayer(const LayerParameter& param) : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Slice"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); int count_; int num_slices_; int slice_size_; int slice_axis_; vector slice_point_; }; /** * @brief Copy a Blob along specified dimensions. */ template class TileLayer : public Layer { public: explicit TileLayer(const LayerParameter& param) : Layer(param) {} virtual void Reshape(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "Tile"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); unsigned int axis_, tiles_, outer_dim_, inner_dim_; }; } // namespace caffe #endif // CAFFE_COMMON_LAYERS_HPP_