Caffe2 - C++ API
A deep learning, cross platform ML framework
rewrite_net.cc
1 
2 #include "rewrite_net.h"
3 #include "caffe2/core/operator.h"
4 #include "caffe2/utils/proto_utils.h"
5 #include <unordered_map>
6 
7 namespace caffe2 {
8 
9 struct Analysis {
10  struct SSA {
11  using BlobVersions = std::unordered_map<std::string, size_t>;
12  BlobVersions inVersions;
13  BlobVersions outVersions;
14  };
15  std::vector<SSA> ssa;
16  std::unordered_map<std::string, std::unordered_map<size_t, std::vector<size_t>>> inUsages;
17 };
18 
19 static Analysis analyzeNet(const NetDef& net) {
20  Analysis::SSA::BlobVersions frontier;
21  Analysis analysis;
22 
23  auto play = [&](size_t i, const OperatorDef& op) {
24  Analysis::SSA::BlobVersions inVersions;
25  for (const auto& s : op.input()) {
26  inVersions[s] = frontier[s];
27  analysis.inUsages[s][frontier[s]].push_back(i);
28  }
29  Analysis::SSA::BlobVersions outVersions;
30  for (const auto& s : op.output()) {
31  if (frontier.find(s) != frontier.end()) {
32  frontier[s] += 1;
33  }
34  outVersions[s] = frontier[s];
35  }
36  analysis.ssa.push_back(Analysis::SSA{inVersions, outVersions});
37  };
38 
39  for (auto i = 0; i < net.op_size(); ++i) {
40  play(i, net.op(i));
41  }
42  return analysis;
43 }
44 
45 static void insertCopyFromGLOp(NetDef& predictNet, const std::string& cpu_blob) {
46  auto* op = predictNet.add_op();
47  op->set_name("CopyFromGL");
48  op->set_type("CopyFromGL");
49  op->add_input(cpu_blob + "_M");
50  op->add_output(cpu_blob);
51 }
52 
53 static NetDef insertInputOutputCopyOps(const NetDef& def, std::unordered_set<std::string>& cpuOp) {
54  // Do some validation of the outputs. For this version, we require:
55  // - a single input (first element of external_input()) is consumed by the NetDef
56  // - a single output (first element of external_output()) is produced by the NetDef.
57  // - the input is consumed by def.op(0), and this is the only consumer.
58  // - the output is produced by def.op(-1).
59  CAFFE_ENFORCE_GE(def.external_input_size(), 1);
60  CAFFE_ENFORCE_GE(def.external_output_size(), 1);
61  auto analysis = analyzeNet(def);
62  // enforce a single use of the input blob.
63  CAFFE_ENFORCE_GE(def.op_size(), 1);
64 
65  const auto& inputBlob = def.external_input(0);
66  // Enforce that the input blob has a single usage - in the first operator.
67  CAFFE_ENFORCE(analysis.inUsages[inputBlob][0] == (std::vector<size_t>{0}));
68  // Enforce that the external_output(0) blob is produced by the last operator in this sequence.
69  const auto& outputBlob = def.external_output(0);
70  CAFFE_ENFORCE(analysis.ssa.back().outVersions.find(outputBlob) !=
71  analysis.ssa.back().outVersions.end());
72  const auto& outputBlobVersion = analysis.ssa.back().outVersions[outputBlob];
73  // This should hold true by definition of the SSA analysis.
74  CAFFE_ENFORCE(analysis.inUsages[outputBlob].find(outputBlobVersion) ==
75  analysis.inUsages[outputBlob].end());
76 
77  NetDef mdef;
78  mdef.CopyFrom(def);
79  mdef.clear_op();
80 
81  std::unordered_map<std::string, std::set<size_t>> cpu_blobs, gpu_blobs;
82  cpu_blobs[def.external_input(0)].insert(0);
83 
84  for (auto i = 0; i < def.op_size(); i++) {
85  const auto& currentOp = def.op(i);
86  if (cpuOp.count(currentOp.type()) > 0) {
87  // CPU Op
88  // insert copyFromOpenGLOp
89  for (auto j = 0; j < currentOp.input_size(); j++) {
90  auto& input = currentOp.input(j);
91  auto version = analysis.ssa[i].inVersions[input];
92  if (gpu_blobs[input].count(version) > 0) {
93  insertCopyFromGLOp(mdef, input);
94  }
95  }
96  auto* op = mdef.add_op();
97  op->CopyFrom(currentOp);
98  for (auto j = 0; j < currentOp.output_size(); j++) {
99  auto& output = currentOp.output(j);
100  auto version = analysis.ssa[i].outVersions[output];
101  cpu_blobs[output].insert(version);
102  }
103  } else {
104  // OpenGL Op
105  auto* op = mdef.add_op();
106  op->CopyFrom(currentOp);
107 
108  for (auto j = 0; j < op->input_size(); j++) {
109  auto* input = op->mutable_input(j);
110  auto version = analysis.ssa[i].inVersions[*input];
111  if (gpu_blobs[*input].count(version) > 0) {
112  *input = *input + "_M";
113  }
114  }
115 
116  for (auto j = 0; j < currentOp.output_size(); j++) {
117  auto& output = currentOp.output(j);
118  auto version = analysis.ssa[i].outVersions[output];
119  gpu_blobs[output].insert(version);
120  // add _M to intermediate OpenGL op outputs
121  auto* output_ = op->mutable_output(j);
122  bool inter = true;
123  for(auto k = 0; k < def.external_output_size(); k++) {
124  if (*output_ == def.external_output(k)) {
125  inter = false;
126  }
127  }
128  if (inter) {
129  *output_ = *output_ + "_M";
130  }
131  }
132  }
133  }
134  return mdef;
135 }
136 
137 static bool tryFuseAdjacentOps(const OperatorDef& currentOp,
138  const OperatorDef& nextOp,
139  OperatorDef* fusedOp,
140  std::unordered_set<std::string>& glOps) {
141  // Check for possible invalid opportunities.
142  if (currentOp.output_size() != 1 || nextOp.output_size() != 1) {
143  return false;
144  }
145  // The fused op cannot be inplace
146  if (currentOp.output(0) != nextOp.input(0) || currentOp.input(0) == nextOp.output(0)) {
147  return false;
148  }
149 
150  static const std::map<std::pair<std::string, std::string>, std::string> fusionOpportunities = {
151  {{"OpenGLInstanceNorm", "OpenGLPRelu"}, "OpenGLInstanceNormPRelu"},
152  {{"OpenGLConv", "OpenGLPRelu"}, "OpenGLConvPRelu"},
153  {{"OpenGLConv", "OpenGLRelu"}, "OpenGLConvRelu"},
154  {{"OpenGLConvTranspose", "OpenGLPRelu"}, "OpenGLConvTransposePRelu"}};
155  auto it = fusionOpportunities.find({currentOp.type(), nextOp.type()});
156  if (it == fusionOpportunities.end()) {
157  return false;
158  }
159 
160  glOps.insert(it->second);
161  fusedOp->CopyFrom(currentOp);
162  fusedOp->set_output(0, nextOp.output(0));
163  fusedOp->set_type(it->second);
164  for (auto i = 1; i < nextOp.input_size(); i++) {
165  fusedOp->add_input(nextOp.input(i));
166  }
167  return true;
168 }
169 
170 static NetDef runOpenGLFusion(const NetDef& def, std::unordered_set<std::string>& glOps) {
171  CHECK_GE(def.op_size(), 1);
172  NetDef mdef;
173  mdef.CopyFrom(def);
174  mdef.clear_op();
175  auto i = 0;
176 
177  while (i < def.op_size()) {
178  if (i == def.op_size() - 1) {
179  VLOG(2) << "Last operator, skipping";
180  auto* op = mdef.add_op();
181  op->CopyFrom(def.op(i));
182  i += 1;
183  continue;
184  }
185 
186  const auto& currentOp = def.op(i);
187  const auto& nextOp = def.op(i + 1);
188  OperatorDef fusedOp;
189  if (tryFuseAdjacentOps(currentOp, nextOp, &fusedOp, glOps)) {
190  VLOG(2) << "Found an adjacent fusion for: " << currentOp.type() << ", " << nextOp.type();
191  // We can fuse.
192  auto* op = mdef.add_op();
193  op->CopyFrom(fusedOp);
194  i += 2;
195  continue;
196  }
197  VLOG(2) << "No fusion available for: " << currentOp.type() << ", " << nextOp.type();
198  // Just emit the current type.
199  auto* op = mdef.add_op();
200  op->CopyFrom(currentOp);
201  i += 1;
202  }
203  return mdef;
204 }
205 
206 void dumpDefForOpenGL(const NetDef& d) {
207  for (const auto& op : d.op()) {
208  LOG(INFO) << op.input(0) << " -> " << op.type() << " -> " << op.output(0);
209  }
210 }
211 
212 // // For debugging
213 // void dumpDefForOpenGL(const NetDef &net) {
214 // for (const auto &op : net.op()) {
215 // printf("***Operator: %s\n", op.type().c_str());
216 // for (auto input : op.input()) {
217 // printf("\tInput: %s\n", input.c_str());
218 // }
219 //
220 // for (auto output : op.output()) {
221 // printf("\tOutput: %s\n", output.c_str());
222 // }
223 // }
224 //}
225 
226 NetDef rewritePredictNetForOpenGL(const NetDef& predictNet, bool runFusion, std::unordered_set<std::string> cpuOps) {
227  CAFFE_ENFORCE_GE(predictNet.op_size(), 1);
228  NetDef net;
229  net.CopyFrom(predictNet);
230 
231  // if (runFusion) {
232  // net = runOpenGLFusion(net, openGLOps);
233  // }
234 
235  net = insertInputOutputCopyOps(net, cpuOps);
236  net.set_type("opengl");
237 
238  for (auto i = 0; i < net.op().size(); ++i) {
239  auto op = net.mutable_op(i);
240  if (std::find(cpuOps.begin(), cpuOps.end(), op->type()) == cpuOps.end()) {
241  op->mutable_device_option()->set_device_type(OPENGL);
242  }
243  }
244 
245  return net;
246 }
247 
248 bool tryConvertToOpenGL(const NetDef& predictNet,
249  NetDef* glPredictNet,
250  bool runFusion,
251  std::unordered_set<std::string> cpuOps) {
252  try {
253  // Throws if unsupported operators are found.
254  *glPredictNet = rewritePredictNetForOpenGL(predictNet, runFusion, cpuOps);
255  dumpDefForOpenGL(*glPredictNet);
256  // Throws if unsupported parameters are found.
257  LOG(INFO) << "OpenGL is successfully enabled";
258  return true;
259  } catch (const std::exception& e) {
260  LOG(ERROR) << "Caught exception trying to convert NetDef to OpenGL: " << e.what();
261  return false;
262  }
263 }
264 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...