1 #ifndef CAFFE2_CORE_BLOB_SERIALIZATION_H_ 2 #define CAFFE2_CORE_BLOB_SERIALIZATION_H_ 7 #include <google/protobuf/repeated_field.h> 9 #include "caffe2/core/blob.h" 10 #include "caffe2/core/blob_serializer_base.h" 11 #include "caffe2/core/tensor.h" 12 #include "caffe2/core/typeid.h" 13 #include "caffe2/core/types.h" 14 #include "caffe2/utils/simple_queue.h" 16 CAFFE2_DECLARE_int(caffe2_tensor_chunk_size);
17 CAFFE2_DECLARE_int(caffe2_max_tensor_serializer_threads);
18 CAFFE2_DECLARE_bool(caffe2_serialize_fp16_as_bytes);
22 constexpr
auto kTensorBlobType =
"Tensor";
24 constexpr
auto kChunkIdSeparator =
"#%";
27 CAFFE_DECLARE_TYPED_REGISTRY(
28 BlobSerializerRegistry,
32 #define REGISTER_BLOB_SERIALIZER(id, ...) \ 33 CAFFE_REGISTER_TYPED_CLASS(BlobSerializerRegistry, id, __VA_ARGS__) 35 inline unique_ptr<BlobSerializerBase> CreateSerializer(CaffeTypeId
id) {
36 return BlobSerializerRegistry()->Create(
id);
45 template <
class Context>
57 SerializationAcceptor acceptor)
override;
58 void SerializeWithChunkSize(
61 SerializationAcceptor acceptor,
62 int chunk_size)
override;
65 TensorProto* proto,
size_t chunkBegin, int32_t chunkSize);
69 void StoreDeviceDetail(
const Tensor<Context>& input, TensorProto* proto);
82 virtual void Deserialize(
const BlobProto& proto,
Blob* blob) = 0;
86 #define REGISTER_BLOB_DESERIALIZER(name, ...) \ 87 CAFFE_REGISTER_CLASS(BlobDeserializerRegistry, name, __VA_ARGS__) 89 inline unique_ptr<BlobDeserializerBase> CreateDeserializer(
const string& type) {
90 return BlobDeserializerRegistry()->Create(type);
101 template <
class Context>
104 void Deserialize(
const BlobProto& proto,
Blob* blob)
override;
113 template <
typename SrcType,
typename DstType,
class Context>
114 inline void CopyToProtoAsIs(
117 google::protobuf::RepeatedField<DstType>* field,
120 sizeof(SrcType) ==
sizeof(DstType),
121 "The source type and dest type cannot be copied as-is. Did " 122 "you mean CopyToProtoWithCast?");
123 field->Reserve(size);
124 for (
int i = 0; i < size; ++i) {
127 context->template Copy<SrcType, Context, CPUContext>(
128 size, src,
reinterpret_cast<SrcType*
>(field->mutable_data()));
130 context->FinishDeviceComputation();
133 template <
typename SrcType,
typename DstType,
class Context>
134 inline void CopyToProtoWithCast(
137 google::protobuf::RepeatedField<DstType>* field,
141 unique_ptr<SrcType[]> buffer(
new SrcType[size]);
142 context->template Copy<SrcType, Context, CPUContext>(
143 size, src, buffer.get());
144 context->FinishDeviceComputation();
145 field->Reserve(size);
146 for (
int i = 0; i < size; ++i) {
147 field->Add(static_cast<DstType>(buffer[i]));
151 template <
typename SrcType,
typename DstType,
class Context>
152 inline void CopyFromProtoAsIs(
154 const google::protobuf::RepeatedField<SrcType>& field,
158 sizeof(SrcType) ==
sizeof(DstType),
159 "The source type and dest type cannot be copied as-is. Did " 160 "you mean CopyFromProtoWithCast?");
161 CAFFE_ENFORCE_EQ(size, field.size(),
"Incorrect proto field size.");
162 context->template Copy<DstType, CPUContext, Context>(
163 size,
reinterpret_cast<const DstType*
>(field.data()), dst);
166 template <
typename SrcType,
typename DstType,
class Context>
167 inline void CopyFromProtoWithCast(
169 const google::protobuf::RepeatedField<SrcType>& field,
172 CAFFE_ENFORCE_EQ(size, field.size(),
"Incorrect proto field size.");
175 unique_ptr<DstType[]> buffer(
new DstType[size]);
176 const SrcType* src = field.data();
177 for (
int i = 0; i < size; ++i) {
178 buffer[i] =
static_cast<DstType
>(src[i]);
180 context->template Copy<DstType, CPUContext, Context>(size, buffer.get(), dst);
185 template <
class Context>
189 BlobSerializerBase::SerializationAcceptor acceptor) {
190 this->SerializeWithChunkSize(blob, name, acceptor, kDefaultChunkSize);
193 template <
class Context>
197 BlobSerializerBase::SerializationAcceptor acceptor,
200 const auto& tensor = blob.template Get<Tensor<Context>>();
201 if (chunk_size == kNoChunking) {
202 chunk_size = tensor.size() + 1;
203 }
else if (chunk_size == kDefaultChunkSize) {
204 chunk_size = FLAGS_caffe2_tensor_chunk_size;
207 auto processChunk = [&](int64_t chunkStart) {
208 BlobProto blob_proto;
209 blob_proto.set_name(name);
210 blob_proto.set_type(kTensorBlobType);
211 TensorProto& proto = *blob_proto.mutable_tensor();
212 proto.set_name(name);
214 tensor, name, blob_proto.mutable_tensor(), chunkStart, chunk_size);
216 MakeString(name, kChunkIdSeparator, chunkStart / chunk_size),
217 blob_proto.SerializeAsString());
221 std::vector<std::future<void>> futures;
226 while (chunkQueue.Pop(&chunkStart)) {
227 processChunk(chunkStart);
230 if (tensor.size() > chunk_size) {
231 for (
int i = 0; i < FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
232 futures.emplace_back(std::async(std::launch::async, task));
237 VLOG(1) <<
"Serializing blob " << name;
240 for (
size_t chunkBegin = 0;
241 chunkBegin < std::max(tensor.size(),
static_cast<TIndex
>(1));
242 chunkBegin += chunk_size) {
243 VLOG(2) <<
"Starting a chunk at " << chunkBegin;
245 if (tensor.size() > chunk_size) {
246 chunkQueue.Push(chunkBegin);
249 processChunk(chunkBegin);
253 processChunk(chunkBegin);
258 chunkQueue.NoMoreJobs();
259 for (
auto& fut : futures) {
265 template <
class Context>
269 TensorProto* proto_ptr,
273 chunkBegin <= input.
size(),
274 "Chunk begin is out of tensor: ",
278 if (chunkBegin + chunkSize > input.
size()) {
279 chunkSize = input.
size() - chunkBegin;
284 "The input does not have data input yet. This is probably because you " 285 "created a tensor of non-zero shape but never filled its data via " 286 "mutable_data() calls. This means that it makes no sense to serialize " 287 "the tensor content.");
289 TensorProto& proto = *proto_ptr;
290 proto.mutable_segment()->set_begin(chunkBegin);
291 proto.mutable_segment()->set_end(chunkBegin + chunkSize);
293 for (
int i = 0; i < input.
ndim(); ++i) {
294 proto.add_dims(input.
dim(i));
296 const TensorProto::DataType data_type = TypeMetaToDataType(input.
meta());
297 proto.set_data_type(data_type);
298 StoreDeviceDetail(input, &proto);
302 case TensorProto_DataType_FLOAT:
303 detail::CopyToProtoAsIs(
305 input.template data<float>() + chunkBegin,
306 proto.mutable_float_data(),
309 case TensorProto_DataType_INT32:
310 detail::CopyToProtoAsIs(
312 input.template data<int>() + chunkBegin,
313 proto.mutable_int32_data(),
316 case TensorProto_DataType_BYTE:
317 LOG(FATAL) <<
"This should not happen. When serializing, " 318 "BYTE is deprecated and moved to UINT8.";
320 case TensorProto_DataType_STRING:
322 proto.mutable_string_data()->Reserve(chunkSize);
323 const string* content = input.template data<string>();
324 for (
int i = chunkBegin; i < chunkBegin + chunkSize; ++i) {
325 proto.add_string_data(content[i]);
329 case TensorProto_DataType_BOOL:
330 detail::CopyToProtoWithCast(
332 input.template data<bool>() + chunkBegin,
333 proto.mutable_int32_data(),
336 case TensorProto_DataType_UINT8:
337 detail::CopyToProtoWithCast(
339 input.template data<uint8_t>() + chunkBegin,
340 proto.mutable_int32_data(),
343 case TensorProto_DataType_INT8:
344 detail::CopyToProtoWithCast(
346 input.template data<int8_t>() + chunkBegin,
347 proto.mutable_int32_data(),
350 case TensorProto_DataType_UINT16:
351 detail::CopyToProtoWithCast(
353 input.template data<uint16_t>() + chunkBegin,
354 proto.mutable_int32_data(),
357 case TensorProto_DataType_INT16:
358 detail::CopyToProtoWithCast(
360 input.template data<int16_t>() + chunkBegin,
361 proto.mutable_int32_data(),
364 case TensorProto_DataType_INT64:
365 detail::CopyToProtoAsIs(
367 input.template data<int64_t>() + chunkBegin,
368 proto.mutable_int64_data(),
371 case TensorProto_DataType_FLOAT16: {
372 if (FLAGS_caffe2_serialize_fp16_as_bytes) {
373 const int kValue = 1;
375 reinterpret_cast<const char*>(&kValue)[0],
377 "Serialization of FLOAT16 on big endian platform " 378 "is not written yet.");
379 unique_ptr<char[]> buffer(
new char[2 * chunkSize]);
380 this->context_.template Copy<char, Context, CPUContext>(
382 reinterpret_cast<const char*
>(
383 input.template data<float16>() + chunkBegin),
385 this->context_.FinishDeviceComputation();
386 proto.set_byte_data(buffer.release(), 2 * chunkSize);
388 detail::CopyToProtoWithCast(
390 reinterpret_cast<const uint16_t*>(input.template data<float16>()) +
392 proto.mutable_int32_data(),
396 case TensorProto_DataType_DOUBLE:
397 detail::CopyToProtoAsIs(
399 input.template data<double>() + chunkBegin,
400 proto.mutable_double_data(),
403 case TensorProto_DataType_UNDEFINED: {
404 proto.mutable_string_data()->Reserve(chunkSize);
406 const char* raw_data =
static_cast<const char*
>(input.
raw_data());
407 for (
int i = chunkBegin; i < chunkBegin + chunkSize; ++i) {
409 const_cast<char*>(raw_data + i * input.
itemsize()), input.
meta());
410 proto.add_string_data(temp_blob.
Serialize(
""));
418 template <
class Context>
420 const BlobProto& blob_proto,
425 template <
class Context>
427 const TensorProto& proto,
431 Context context(proto.device_detail());
432 context.SwitchToDevice(0);
434 for (
const TIndex d : proto.dims()) {
439 int64_t chunkBegin = 0;
440 auto chunkEnd = tensor->
size();
441 if (proto.has_segment()) {
442 chunkBegin = proto.segment().begin();
443 chunkEnd = proto.segment().end();
446 0 <= chunkBegin && chunkBegin <= chunkEnd && chunkEnd <= tensor->size(),
451 " with total tensor size ",
453 auto chunkSize = chunkEnd - chunkBegin;
455 switch (proto.data_type()) {
456 case TensorProto_DataType_FLOAT:
457 detail::CopyFromProtoAsIs(
460 tensor->template mutable_data<float>() + chunkBegin,
463 case TensorProto_DataType_INT32:
464 detail::CopyFromProtoAsIs(
467 tensor->template mutable_data<int>() + chunkBegin,
470 case TensorProto_DataType_BYTE:
474 chunkSize, proto.byte_data().size(),
"Incorrect proto field size.");
475 context.template Copy<uint8_t, Context, CPUContext>(
477 reinterpret_cast<const uint8_t*
>(proto.byte_data().data()),
478 tensor->template mutable_data<uint8_t>() + chunkBegin);
480 case TensorProto_DataType_STRING:
483 string* content = tensor->template mutable_data<string>();
484 for (
int i = 0; i < chunkSize; ++i) {
485 content[i + chunkBegin] = proto.string_data(i);
489 case TensorProto_DataType_BOOL:
490 detail::CopyFromProtoWithCast(
493 tensor->template mutable_data<bool>() + chunkBegin,
496 case TensorProto_DataType_UINT8:
497 detail::CopyFromProtoWithCast(
500 tensor->template mutable_data<uint8_t>() + chunkBegin,
503 case TensorProto_DataType_INT8:
504 detail::CopyFromProtoWithCast(
507 tensor->template mutable_data<int8_t>() + chunkBegin,
510 case TensorProto_DataType_UINT16:
511 detail::CopyFromProtoWithCast(
514 tensor->template mutable_data<uint16_t>() + chunkBegin,
517 case TensorProto_DataType_INT16:
518 detail::CopyFromProtoWithCast(
521 tensor->template mutable_data<int16_t>() + chunkBegin,
524 case TensorProto_DataType_INT64:
525 detail::CopyFromProtoAsIs(
528 tensor->template mutable_data<int64_t>() + chunkBegin,
531 case TensorProto_DataType_FLOAT16:
532 if (proto.has_byte_data()) {
533 const int kValue = 1;
535 reinterpret_cast<const char*>(&kValue)[0],
537 "Serialization of FLOAT16 on big endian platform " 538 "is not written yet.");
541 proto.byte_data().size(),
542 "Incorrect proto field size.");
543 context.template Copy<float16, Context, CPUContext>(
545 reinterpret_cast<const float16*
>(proto.byte_data().data()),
546 tensor->template mutable_data<float16>() + chunkBegin);
549 detail::CopyFromProtoWithCast(
552 reinterpret_cast<uint16_t*
>(
553 tensor->template mutable_data<float16>()) +
558 case TensorProto_DataType_DOUBLE:
559 detail::CopyFromProtoAsIs(
562 tensor->template mutable_data<double>() + chunkBegin,
565 case TensorProto_DataType_UNDEFINED: {
567 void* raw_ptr =
nullptr;
568 for (
int i = 0; i < chunkSize; ++i) {
575 static_cast<char*
>(raw_ptr) +
581 context.FinishDeviceComputation();
586 #endif // CAFFE2_CORE_BLOB_SERIALIZATION_H_ Blob is a general container that hosts a typed pointer.
size_t itemsize() const
Return the number of bytes each item takes in the tensor.
std::remove_const< T >::type * ShareExternal(typename std::remove_const< T >::type *allocated)
Sets the underlying object to the allocated one, but does not take over the ownership of the passed i...
const TypeMeta & meta() const
Returns the TypeMeta object associated with the current data type.
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
TensorSerializer is the serializer for Tensors.
BlobDeserializerBase is an abstract class that deserializes a blob from a BlobProto or a TensorProto...
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
TIndex size() const
Returns the size (i.e.
void Serialize(const Blob &blob, const string &name, SerializationAcceptor acceptor) override
Serializes a Blob.
void Serialize(const string &name, BlobSerializerBase::SerializationAcceptor acceptor, int chunk_size=kDefaultChunkSize) const
Serializes the current blob, if possible.
void Resize(Ts...dim_source)
Resizes a tensor.
TensorDeserializer is the deserializer for Tensors.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
const void * raw_data() const
Returns a const raw void* pointer of the underlying storage.
T * GetMutable(bool *is_new_object=nullptr)
Gets a mutable pointer to the stored object.
const TypeMeta & meta() const
Returns the meta info of the blob.
void Deserialize(const string &content)
Deserializes from a string containing either BlobProto or TensorProto.
bool IsType() const
Checks if the content stored in the blob is of type T.
int ndim() const
Returns the number of dimensions of the data.
void * raw_mutable_data(const TypeMeta &meta)
Returns a mutable raw pointer of the underlying storage.
BlobSerializerBase is an abstract class that serializes a blob to a string.