Caffe2 - C++ API
A deep learning, cross platform ML framework
utility_ops_gpu.cc
1 #include "caffe2/core/context_gpu.h"
2 #include "caffe2/operators/flatten_op.h"
3 #include "caffe2/operators/utility_ops.h"
4 #include "caffe2/utils/math.h"
5 
6 namespace caffe2 {
7 
8 template <>
9 bool WeightedSumOp<CUDAContext>::RunOnDevice() {
10  if (Input(0).IsType<float>()) {
11  return DoRunWithType<float>();
12  } else if (Input(0).IsType<float16>()) {
13  return DoRunWithType<float16>();
14  } else {
15  CAFFE_THROW("Unsupported inputs");
16  }
17  return false;
18 }
19 
20 template <>
21 bool SumOp<CUDAContext>::RunOnDevice() {
22  if (Input(0).IsType<float>()) {
23  return DoRunWithType<float, float>();
24  } else if (Input(0).IsType<float16>()) {
25  return DoRunWithType<float16, float16>();
26  } else {
27  CAFFE_THROW("Unsupported inputs");
28  }
29  return false;
30 }
31 
32 template <>
34  : public Operator<CUDAContext> {
35  public:
36  CopyOnDeviceLikeOp(const OperatorDef& operator_def, Workspace* ws)
37  : Operator<CUDAContext>(operator_def, ws) {}
38  USE_OPERATOR_FUNCTIONS(CUDAContext);
39 
40  bool RunOnDevice() override {
41  auto& input = Input(0);
42  auto* output = OperatorBase::Output<Tensor<CUDAContext>>(0);
43  CUDAContext context(GetGPUIDForPointer(Input(1).raw_data()));
44  output->ResizeLike(input);
45  context.template CopyItems<CUDAContext, CUDAContext>(
46  input.meta(),
47  input.size(),
48  input.raw_data(),
49  output->raw_mutable_data(input.meta()));
50  return true;
51  }
52 };
53 
54 REGISTER_CUDA_OPERATOR(Print, PrintOp<CUDAContext>);
55 REGISTER_CUDA_OPERATOR(Flatten, FlattenOp<CUDAContext>);
56 REGISTER_CUDA_OPERATOR(FlattenToVec, FlattenToVecOp<CUDAContext>);
57 REGISTER_CUDA_OPERATOR(Alias, AliasOp<CUDAContext>);
58 REGISTER_CUDA_OPERATOR(ResizeLike, ResizeLikeOp<CUDAContext>);
59 REGISTER_CUDA_OPERATOR(Sum, SumOp<CUDAContext>);
60 REGISTER_CUDA_OPERATOR(WeightedSum, WeightedSumOp<CUDAContext>);
61 // From whatever the current context, ensure the output is TensorCPU
62 REGISTER_CUDA_OPERATOR(
63  EnsureCPUOutput,
65 // From CPU, copy it to whatever the current context
66 REGISTER_CUDA_OPERATOR(
67  CopyFromCPUInput,
69 
70 // CopyGPUToCPU and CopyCPUToGPU should both be carried out in a cuda context,
71 // since gpu code will be involved.
72 REGISTER_CUDA_OPERATOR(
73  CopyGPUToCPU,
75 REGISTER_CUDA_OPERATOR(
76  CopyCPUToGPU,
78 // If we only specify Copy, we assume that it is a gpu to gpu copy - maybe
79 // involving different GPUs.
80 REGISTER_CUDA_OPERATOR(Copy, CopyOp<CUDAContext, CUDAContext, CUDAContext>);
81 
82 REGISTER_CUDA_OPERATOR(
83  CopyOnDeviceLike,
85 
86 REGISTER_CUDA_OPERATOR(UnsafeCoalesce, UnsafeCoalesceOp<CUDAContext>);
87 
88 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
int GetGPUIDForPointer(const void *ptr)
Gets the GPU id that the current pointer is located at.
Definition: common_gpu.cc:133
Alias op makes the output and the input share the same underlying storage.
Definition: utility_ops.h:164