Caffe2 - C++ API
A deep learning, cross platform ML framework
convert_encoded_to_raw_leveldb.cc
1 
17 // This script converts an image dataset to leveldb.
18 //
19 // caffe2::FLAGS_input_folder is the root folder that holds all the images, and
20 // caffe2::FLAGS_list_file should be a list of files as well as their labels, in the
21 // format as
22 // subfolder1/file1.JPEG 7
23 // ....
24 
25 #include <opencv2/opencv.hpp>
26 
27 #include <fstream> // NOLINT(readability/streams)
28 #include <memory>
29 #include <random>
30 #include <string>
31 
32 #include "caffe2/core/init.h"
33 #include "caffe2/proto/caffe2.pb.h"
34 #include "caffe2/core/logging.h"
35 #include "leveldb/db.h"
36 #include "leveldb/write_batch.h"
37 
38 CAFFE2_DEFINE_string(input_db_name, "", "The input image file name.");
39 CAFFE2_DEFINE_string(output_db_name, "", "The output training leveldb name.");
40 CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
41 CAFFE2_DEFINE_int(scale, 256,
42  "If caffe2::FLAGS_raw is set, scale all the images' shorter edge to the given "
43  "value.");
44 CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
45 
46 
47 namespace caffe2 {
48 
49 using std::string;
50 using std::unique_ptr;
51 
52 void ConvertToRawDataset(
53  const string& input_db_name, const string& output_db_name) {
54  // input leveldb
55  std::unique_ptr<leveldb::DB> input_db;
56  LOG(INFO) << "Opening input leveldb " << input_db_name;
57  {
58  leveldb::Options options;
59  options.create_if_missing = false;
60  leveldb::DB* db_temp;
61  leveldb::Status status = leveldb::DB::Open(
62  options, input_db_name, &db_temp);
63  CAFFE_ENFORCE(status.ok(), "Failed to open leveldb ", input_db_name, ".");
64  input_db.reset(db_temp);
65  }
66 
67  // output leveldb
68  std::unique_ptr<leveldb::DB> output_db;
69  std::unique_ptr<leveldb::WriteBatch> batch;
70  LOG(INFO) << "Opening leveldb " << output_db_name;
71  {
72  leveldb::Options options;
73  options.error_if_exists = true;
74  options.create_if_missing = true;
75  options.write_buffer_size = 268435456;
76  leveldb::DB* db_temp;
77  leveldb::Status status = leveldb::DB::Open(
78  options, output_db_name, &db_temp);
79  CAFFE_ENFORCE(
80  status.ok(),
81  "Failed to open leveldb ",
82  output_db_name,
83  ". Is it already existing?");
84  output_db.reset(db_temp);
85  }
86  batch.reset(new leveldb::WriteBatch());
87 
88  TensorProtos input_protos;
89  TensorProtos output_protos;
90  TensorProto* data = output_protos.add_protos();
91  TensorProto* label = output_protos.add_protos();
92  data->set_data_type(TensorProto::BYTE);
93  data->add_dims(0);
94  data->add_dims(0);
95  if (caffe2::FLAGS_color) {
96  data->add_dims(3);
97  }
98  string value;
99 
100  unique_ptr<leveldb::Iterator> iter;
101  iter.reset(input_db->NewIterator(leveldb::ReadOptions()));
102  iter->SeekToFirst();
103  int count = 0;
104  for (; iter->Valid(); iter->Next()) {
105  CAFFE_ENFORCE(input_protos.ParseFromString(iter->value().ToString()));
106  label->CopyFrom(input_protos.protos(1));
107  const string& encoded_image = input_protos.protos(0).string_data(0);
108  int encoded_size = encoded_image.size();
109  cv::Mat img = cv::imdecode(
110  cv::Mat(1, &encoded_size, CV_8UC1,
111  const_cast<char*>(encoded_image.data())),
112  caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
113  cv::Mat resized_img;
114  int scaled_width, scaled_height;
115  if (caffe2::FLAGS_warp) {
116  scaled_width = caffe2::FLAGS_scale;
117  scaled_height = caffe2::FLAGS_scale;
118  } else if (img.rows > img.cols) {
119  scaled_width = caffe2::FLAGS_scale;
120  scaled_height = static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
121  } else {
122  scaled_height = caffe2::FLAGS_scale;
123  scaled_width = static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
124  }
125  cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
126  cv::INTER_LINEAR);
127  data->set_dims(0, scaled_height);
128  data->set_dims(1, scaled_width);
129  DCHECK(resized_img.isContinuous());
130  data->set_byte_data(resized_img.ptr(),
131  scaled_height * scaled_width * (caffe2::FLAGS_color ? 3 : 1));
132  output_protos.SerializeToString(&value);
133  // Put in db
134  batch->Put(iter->key(), value);
135  if (++count % 1000 == 0) {
136  output_db->Write(leveldb::WriteOptions(), batch.get());
137  batch.reset(new leveldb::WriteBatch());
138  LOG(INFO) << "Processed " << count << " files.";
139  }
140  }
141  // write the last batch
142  if (count % 1000 != 0) {
143  output_db->Write(leveldb::WriteOptions(), batch.get());
144  }
145  LOG(INFO) << "Processed a total of " << count << " files.";
146 }
147 
148 } // namespace caffe2
149 
150 
151 int main(int argc, char** argv) {
152  caffe2::GlobalInit(&argc, &argv);
153  caffe2::ConvertToRawDataset(
154  caffe2::FLAGS_input_db_name, caffe2::FLAGS_output_db_name);
155  return 0;
156 }
bool GlobalInit(int *pargc, char ***pargv)
Initialize the global environment of caffe2.
Definition: init.cc:18
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...