@@ -14,6 +14,8 @@ limitations under the License.
1414==============================================================================*/
1515
1616#include " tensorflow/core/framework/op_kernel.h"
17+ #include " tensorflow_io/core/kernels/io_interface.h"
18+ #include " tensorflow_io/core/kernels/stream.h"
1719
1820#include < hdf5.h>
1921#include < hdf5_hl.h>
@@ -320,5 +322,156 @@ REGISTER_KERNEL_BUILDER(Name("ReadHDF5").Device(DEVICE_CPU),
320322
321323
322324} // namespace
325+
326+
327+ class HDF5Indexable : public IOIndexableInterface {
328+ public:
329+ HDF5Indexable (Env* env)
330+ : env_(env) {}
331+
332+ ~HDF5Indexable () {}
333+ Status Init (const std::vector<string>& input, const std::vector<string>& metadata, const void * memory_data, const int64 memory_size) override {
334+ if (input.size () > 1 ) {
335+ return errors::InvalidArgument (" more than 1 filename is not supported" );
336+ }
337+ const string& filename = input[0 ];
338+ file_.reset (new SizedRandomAccessFile (env_, filename, memory_data, memory_size));
339+ TF_RETURN_IF_ERROR (file_->GetFileSize (&file_size_));
340+
341+ file_image_.reset (new HDF5FileImage (env_, filename, " " ));
342+ H5::H5File *file = file_image_->GetFile ();
343+ if (file == nullptr ) {
344+ return errors::InvalidArgument (" unable to open hdf5 file: " , filename);
345+ }
346+
347+ H5O_info_t info;
348+ file->getObjinfo (info);
349+ HDF5Iterate data (info.addr );
350+ herr_t err = H5Literate (file->getId (), H5_INDEX_NAME, H5_ITER_NATIVE, NULL , HDF5Iterate::Iterate, (void *)&data);
351+ for (size_t i = 0 ; i < data.datasets_ .size (); i++) {
352+ columns_.emplace_back (data.datasets_ [i]);
353+ columns_index_[data.datasets_ [i]] = i;
354+ }
355+
356+ for (size_t i = 0 ; i < columns_.size (); i++) {
357+ ::tensorflow::DataType dtype;
358+ string dataset = columns_[i];
359+ H5::DataSet data_set = file->openDataSet (dataset);
360+
361+ H5::DataSpace data_space = data_set.getSpace ();
362+ int rank = data_space.getSimpleExtentNdims ();
363+ absl::InlinedVector<hsize_t , 4 > dims (rank);
364+ data_space.getSimpleExtentDims (dims.data ());
365+
366+ H5::DataType data_type = data_set.getDataType ();
367+ hid_t native_type = H5Tget_native_type (data_type.getId (), H5T_DIR_ASCEND);
368+ if (H5Tequal (native_type, H5T_NATIVE_INT)) {
369+ dtype = DT_INT32;
370+ } else if (H5Tequal (native_type, H5T_NATIVE_UINT32)) {
371+ dtype = DT_UINT32;
372+ } else if (H5Tequal (native_type, H5T_NATIVE_LONG)) {
373+ dtype = DT_INT64;
374+ } else if (H5Tequal (native_type, H5T_NATIVE_FLOAT)) {
375+ dtype = DT_FLOAT;
376+ } else if (H5Tequal (native_type, H5T_NATIVE_DOUBLE)) {
377+ dtype = DT_DOUBLE;
378+ } else {
379+ return errors::InvalidArgument (" unsupported data type: " , native_type);
380+ }
381+ dtypes_.emplace_back (dtype);
382+ absl::InlinedVector<int64, 4 > shape_dims (rank);
383+ for (int r = 0 ; r < rank; r++) {
384+ shape_dims[r] = dims[r];
385+ }
386+ shapes_.emplace_back (TensorShape (shape_dims));
387+ }
388+ return Status::OK ();
389+ }
390+ Status Component (Tensor* component) override {
391+ *component = Tensor (DT_STRING, TensorShape ({static_cast <int64>(columns_.size ())}));
392+ for (size_t i = 0 ; i < columns_.size (); i++) {
393+ component->flat <string>()(i) = columns_[i];
394+ }
395+ return Status::OK ();
396+ }
397+ Status Spec (const Tensor& component, PartialTensorShape* shape, DataType* dtype) override {
398+ const int64 column_index = columns_index_[component.scalar <string>()()];
399+ *shape = shapes_[column_index];
400+ *dtype = dtypes_[column_index];
401+ return Status::OK ();
402+ }
403+
404+ Status GetItem (const int64 start, const int64 stop, const int64 step, const Tensor& component, Tensor* tensor) override {
405+ if (step != 1 ) {
406+ return errors::InvalidArgument (" step " , step, " is not supported" );
407+ }
408+ const string& column = component.scalar <string>()();
409+
410+ H5::H5File *file = file_image_->GetFile ();
411+ try {
412+ H5::DataSet data_set = file->openDataSet (column);
413+ H5::DataSpace data_space = data_set.getSpace ();
414+
415+ int rank = data_space.getSimpleExtentNdims ();
416+ absl::InlinedVector<hsize_t , 4 > dims (rank);
417+ data_space.getSimpleExtentDims (dims.data ());
418+
419+ if (start > dims[0 ] || stop > dims[0 ]) {
420+ return errors::InvalidArgument (" dataset " , column, " selection is out of boundary" );
421+ }
422+ // Find the border of the dims start and dims
423+ absl::InlinedVector<hsize_t , 4 > dims_start (dims.size (), 0 );
424+ dims_start[0 ] = start;
425+ dims[0 ] = stop - start;
426+
427+ H5::DataSpace memory_space (dims.size (), dims.data ());
428+
429+ data_space.selectHyperslab (H5S_SELECT_SET, dims.data (), dims_start.data ());
430+
431+ H5::DataType data_type = data_set.getDataType ();
432+ hid_t native_type = H5Tget_native_type (data_type.getId (), H5T_DIR_ASCEND);
433+ if (H5Tequal (native_type, H5T_NATIVE_INT)) {
434+ data_set.read (tensor->flat <int32>().data (), H5::PredType::NATIVE_INT, memory_space, data_space);
435+ } else if (H5Tequal (native_type, H5T_NATIVE_UINT32)) {
436+ data_set.read (tensor->flat <uint32>().data (), H5::PredType::NATIVE_UINT32, memory_space, data_space);
437+ } else if (H5Tequal (native_type, H5T_NATIVE_LONG)) {
438+ data_set.read (tensor->flat <int64>().data (), H5::PredType::NATIVE_LONG, memory_space, data_space);
439+ } else if (H5Tequal (native_type, H5T_NATIVE_FLOAT)) {
440+ data_set.read (tensor->flat <float >().data (), H5::PredType::NATIVE_FLOAT, memory_space, data_space);
441+ } else if (H5Tequal (native_type, H5T_NATIVE_DOUBLE)) {
442+ data_set.read (tensor->flat <double >().data (), H5::PredType::NATIVE_DOUBLE, memory_space, data_space);
443+ } else {
444+ return errors::Unimplemented (" data type not supported yet: " , data_set.getTypeClass ());
445+ }
446+ } catch (H5::FileIException e){
447+ return errors::InvalidArgument (" unable to open dataset" , e.getCDetailMsg ());
448+ }
449+
450+ return Status::OK ();
451+ }
452+
453+ string DebugString () const override {
454+ mutex_lock l (mu_);
455+ return strings::StrCat (" HDF5Indexable" );
456+ }
457+ private:
458+ mutable mutex mu_;
459+ Env* env_ GUARDED_BY (mu_);
460+ std::unique_ptr<SizedRandomAccessFile> file_ GUARDED_BY (mu_);
461+ uint64 file_size_ GUARDED_BY (mu_);
462+ std::unique_ptr<HDF5FileImage> file_image_;
463+
464+ std::vector<DataType> dtypes_;
465+ std::vector<TensorShape> shapes_;
466+ std::vector<string> columns_;
467+ std::unordered_map<string, int64> columns_index_;
468+ };
469+
470+ REGISTER_KERNEL_BUILDER (Name(" HDF5IndexableInit" ).Device(DEVICE_CPU),
471+ IOInterfaceInitOp<HDF5Indexable>);
472+ REGISTER_KERNEL_BUILDER (Name(" HDF5IndexableSpec" ).Device(DEVICE_CPU),
473+ IOInterfaceSpecOp<HDF5Indexable>);
474+ REGISTER_KERNEL_BUILDER (Name(" HDF5IndexableGetItem" ).Device(DEVICE_CPU),
475+ IOIndexableGetItemOp<HDF5Indexable>);
323476} // namespace data
324477} // namespace tensorflow
0 commit comments