@@ -14,6 +14,8 @@ limitations under the License.
1414==============================================================================*/
1515
1616#include " tensorflow/core/framework/op_kernel.h"
17+ #include " tensorflow_io/core/kernels/io_interface.h"
18+ #include " tensorflow_io/core/kernels/stream.h"
1719
1820#include < hdf5.h>
1921#include < hdf5_hl.h>
@@ -320,5 +322,168 @@ REGISTER_KERNEL_BUILDER(Name("ReadHDF5").Device(DEVICE_CPU),
320322
321323
322324} // namespace
325+
326+
327+ class HDF5Indexable : public IOIndexableInterface {
328+ public:
329+ HDF5Indexable (Env* env)
330+ : env_(env) {}
331+
332+ ~HDF5Indexable () {}
333+ Status Init (const std::vector<string>& input, const std::vector<string>& metadata, const void * memory_data, const int64 memory_size) override {
334+ if (input.size () > 1 ) {
335+ return errors::InvalidArgument (" more than 1 filename is not supported" );
336+ }
337+ const string& filename = input[0 ];
338+ file_.reset (new SizedRandomAccessFile (env_, filename, memory_data, memory_size));
339+ TF_RETURN_IF_ERROR (file_->GetFileSize (&file_size_));
340+
341+ file_image_.reset (new HDF5FileImage (env_, filename, " " ));
342+ H5::H5File *file = file_image_->GetFile ();
343+ if (file == nullptr ) {
344+ return errors::InvalidArgument (" unable to open hdf5 file: " , filename);
345+ }
346+
347+ for (size_t i = 0 ; i < metadata.size (); i++) {
348+ if (metadata[i].find_first_of (" column: " ) == 0 ) {
349+ columns_.emplace_back (metadata[i].substr (8 ));
350+ }
351+ }
352+
353+ if (columns_.size () == 0 ) {
354+ H5O_info_t info;
355+ file->getObjinfo (info);
356+ HDF5Iterate data (info.addr );
357+ herr_t err = H5Literate (file->getId (), H5_INDEX_NAME, H5_ITER_NATIVE, NULL , HDF5Iterate::Iterate, (void *)&data);
358+ for (size_t i = 0 ; i < data.datasets_ .size (); i++) {
359+ columns_.emplace_back (data.datasets_ [i]);
360+ }
361+ }
362+
363+ for (size_t i = 0 ; i < columns_.size (); i++) {
364+ ::tensorflow::DataType dtype;
365+ string dataset = columns_[i];
366+ H5::DataSet data_set = file->openDataSet (dataset);
367+
368+ H5::DataSpace data_space = data_set.getSpace ();
369+ int rank = data_space.getSimpleExtentNdims ();
370+ absl::InlinedVector<hsize_t , 4 > dims (rank);
371+ data_space.getSimpleExtentDims (dims.data ());
372+
373+ H5::DataType data_type = data_set.getDataType ();
374+ hid_t native_type = H5Tget_native_type (data_type.getId (), H5T_DIR_ASCEND);
375+ if (H5Tequal (native_type, H5T_NATIVE_INT)) {
376+ dtype = DT_INT32;
377+ } else if (H5Tequal (native_type, H5T_NATIVE_UINT32)) {
378+ dtype = DT_UINT32;
379+ } else if (H5Tequal (native_type, H5T_NATIVE_LONG)) {
380+ dtype = DT_INT64;
381+ } else if (H5Tequal (native_type, H5T_NATIVE_FLOAT)) {
382+ dtype = DT_FLOAT;
383+ } else if (H5Tequal (native_type, H5T_NATIVE_DOUBLE)) {
384+ dtype = DT_DOUBLE;
385+ } else {
386+ return errors::InvalidArgument (" unsupported data type: " , native_type);
387+ }
388+ dtypes_.emplace_back (dtype);
389+ absl::InlinedVector<int64, 4 > shape_dims (rank);
390+ for (int r = 0 ; r < rank; r++) {
391+ shape_dims[r] = dims[r];
392+ }
393+ shapes_.emplace_back (TensorShape (shape_dims));
394+ }
395+ return Status::OK ();
396+ }
397+ Status Spec (std::vector<DataType>& dtypes, std::vector<PartialTensorShape>& shapes) override {
398+ dtypes.clear ();
399+ for (size_t i = 0 ; i < dtypes_.size (); i++) {
400+ dtypes.push_back (dtypes_[i]);
401+ }
402+ shapes.clear ();
403+ for (size_t i = 0 ; i < shapes_.size (); i++) {
404+ shapes.push_back (shapes_[i]);
405+ }
406+ return Status::OK ();
407+ }
408+
409+ Status Extra (std::vector<Tensor>* extra) override {
410+ // Expose columns
411+ Tensor columns (DT_STRING, TensorShape ({static_cast <int64>(columns_.size ())}));
412+ for (size_t i = 0 ; i < columns_.size (); i++) {
413+ columns.flat <string>()(i) = columns_[i];
414+ }
415+ extra->push_back (columns);
416+ return Status::OK ();
417+ }
418+
419+ Status GetItem (const int64 start, const int64 stop, const int64 step, std::vector<Tensor>& tensors) override {
420+ if (step != 1 ) {
421+ return errors::InvalidArgument (" step " , step, " is not supported" );
422+ }
423+ H5::H5File *file = file_image_->GetFile ();
424+ for (size_t i = 0 ; i < tensors.size (); i++) {
425+ try {
426+ H5::DataSet data_set = file->openDataSet (columns_[i]);
427+ H5::DataSpace data_space = data_set.getSpace ();
428+
429+ int rank = data_space.getSimpleExtentNdims ();
430+ absl::InlinedVector<hsize_t , 4 > dims (rank);
431+ data_space.getSimpleExtentDims (dims.data ());
432+
433+ if (start > dims[0 ] || stop > dims[0 ]) {
434+ return errors::InvalidArgument (" dataset " , columns_[i], " selection is out of boundary" );
435+ }
436+ // Find the border of the dims start and dims
437+ absl::InlinedVector<hsize_t , 4 > dims_start (dims.size (), 0 );
438+ dims_start[0 ] = start;
439+ dims[0 ] = stop - start;
440+
441+ H5::DataSpace memory_space (dims.size (), dims.data ());
442+
443+ data_space.selectHyperslab (H5S_SELECT_SET, dims.data (), dims_start.data ());
444+
445+ H5::DataType data_type = data_set.getDataType ();
446+ hid_t native_type = H5Tget_native_type (data_type.getId (), H5T_DIR_ASCEND);
447+ if (H5Tequal (native_type, H5T_NATIVE_INT)) {
448+ data_set.read (tensors[i].flat <int32>().data (), H5::PredType::NATIVE_INT, memory_space, data_space);
449+ } else if (H5Tequal (native_type, H5T_NATIVE_UINT32)) {
450+ data_set.read (tensors[i].flat <uint32>().data (), H5::PredType::NATIVE_UINT32, memory_space, data_space);
451+ } else if (H5Tequal (native_type, H5T_NATIVE_LONG)) {
452+ data_set.read (tensors[i].flat <int64>().data (), H5::PredType::NATIVE_LONG, memory_space, data_space);
453+ } else if (H5Tequal (native_type, H5T_NATIVE_FLOAT)) {
454+ data_set.read (tensors[i].flat <float >().data (), H5::PredType::NATIVE_FLOAT, memory_space, data_space);
455+ } else if (H5Tequal (native_type, H5T_NATIVE_DOUBLE)) {
456+ data_set.read (tensors[i].flat <double >().data (), H5::PredType::NATIVE_DOUBLE, memory_space, data_space);
457+ } else {
458+ return errors::Unimplemented (" data type not supported yet: " , data_set.getTypeClass ());
459+ }
460+ } catch (H5::FileIException e){
461+ return errors::InvalidArgument (" unable to open dataset" , e.getCDetailMsg ());
462+ }
463+ }
464+
465+ return Status::OK ();
466+ }
467+
468+ string DebugString () const override {
469+ mutex_lock l (mu_);
470+ return strings::StrCat (" HDF5Indexable" );
471+ }
472+ private:
473+ mutable mutex mu_;
474+ Env* env_ GUARDED_BY (mu_);
475+ std::unique_ptr<SizedRandomAccessFile> file_ GUARDED_BY (mu_);
476+ uint64 file_size_ GUARDED_BY (mu_);
477+ std::unique_ptr<HDF5FileImage> file_image_;
478+
479+ std::vector<DataType> dtypes_;
480+ std::vector<TensorShape> shapes_;
481+ std::vector<string> columns_;
482+ };
483+
484+ REGISTER_KERNEL_BUILDER (Name(" HDF5IndexableInit" ).Device(DEVICE_CPU),
485+ IOInterfaceInitOp<HDF5Indexable>);
486+ REGISTER_KERNEL_BUILDER (Name(" HDF5IndexableGetItem" ).Device(DEVICE_CPU),
487+ IOIndexableGetItemOp<HDF5Indexable>);
323488} // namespace data
324489} // namespace tensorflow
0 commit comments