Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ Other enhancements
compression library. Compression was also added to the low-level Stata-file writers
:class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).

.. ---------------------------------------------------------------------------

Expand Down
14 changes: 13 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,7 @@ def put(
data_columns: Optional[List[str]] = None,
encoding=None,
errors: str = "strict",
track_times: bool = True,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update the doc-string, make sure to add a 1.1 versionadded tag

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doc string added

):
"""
Store object in HDFStore.
Expand All @@ -1010,6 +1011,12 @@ def put(
Provide an encoding for strings.
dropna : bool, default False, do not write an ALL nan row to
The store settable by the option 'io.hdf.dropna_table'.
track_times : bool, default True
Parameter is propagated to 'create_table' method of 'PyTables'.
If set to False it enables to have the same h5 files (same hashes)
independent on creation time.

.. versionadded:: 1.1.0
"""
if format is None:
format = get_option("io.hdf.default_format") or "fixed"
Expand All @@ -1027,6 +1034,7 @@ def put(
data_columns=data_columns,
encoding=encoding,
errors=errors,
track_times=track_times,
)

def remove(self, key: str, where=None, start=None, stop=None):
Expand Down Expand Up @@ -1626,6 +1634,7 @@ def _write_to_group(
data_columns=None,
encoding=None,
errors: str = "strict",
track_times: bool = True,
):
group = self.get_node(key)

Expand Down Expand Up @@ -1688,6 +1697,7 @@ def _write_to_group(
dropna=dropna,
nan_rep=nan_rep,
data_columns=data_columns,
track_times=track_times,
)

if isinstance(s, Table) and index:
Expand Down Expand Up @@ -4106,8 +4116,8 @@ def write(
dropna=False,
nan_rep=None,
data_columns=None,
track_times=True,
):

if not append and self.is_exists:
self._handle.remove_node(self.group, "table")

Expand Down Expand Up @@ -4137,6 +4147,8 @@ def write(
# set the table attributes
table.set_attrs()

options["track_times"] = track_times

# create the table
table._handle.create_table(table.group, **options)

Expand Down
45 changes: 45 additions & 0 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import datetime
from datetime import timedelta
from distutils.version import LooseVersion
import hashlib
from io import BytesIO
import os
from pathlib import Path
import re
import time
from warnings import catch_warnings, simplefilter

import numpy as np
Expand Down Expand Up @@ -296,6 +298,49 @@ def test_keys(self, setup_path):
assert set(store.keys()) == expected
assert set(store) == expected

def test_no_track_times(self, setup_path):

# GH 32682
# enables to set track_times (see `pytables` `create_table` documentation)

def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128):
h = hash_factory()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(chunk_num_blocks * h.block_size), b""):
h.update(chunk)
return h.digest()

def create_h5_and_return_checksum(track_times):
with ensure_clean_path(setup_path) as path:
df = pd.DataFrame({"a": [1]})

with pd.HDFStore(path, mode="w") as hdf:
hdf.put(
"table",
df,
format="table",
data_columns=True,
index=None,
track_times=track_times,
)

return checksum(path)

checksum_0_tt_false = create_h5_and_return_checksum(track_times=False)
checksum_0_tt_true = create_h5_and_return_checksum(track_times=True)

# sleep is necessary to create h5 with different creation time
time.sleep(1)

checksum_1_tt_false = create_h5_and_return_checksum(track_times=False)
checksum_1_tt_true = create_h5_and_return_checksum(track_times=True)

# checksums are the same if track_time = False
assert checksum_0_tt_false == checksum_1_tt_false

# checksums are NOT same if track_time = True
assert checksum_0_tt_true != checksum_1_tt_true

def test_keys_ignore_hdf_softlink(self, setup_path):

# GH 20523
Expand Down