Skip to content

fix: fix progress bar for RDataFrame with Range limits #19294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions tree/dataframe/inc/ROOT/RDFHelpers.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <ROOT/RDF/RActionBase.hxx>
#include <ROOT/RDF/RResultMap.hxx>
#include <ROOT/RResultHandle.hxx> // users of RunGraphs might rely on this transitive include
#include <ROOT/RResultPtr.hxx>
#include <ROOT/TypeTraits.hxx>

#include <array>
Expand Down Expand Up @@ -276,8 +277,9 @@ SnapshotPtr_t VariationsFor(SnapshotPtr_t resPtr);
/// \brief Add ProgressBar to a ROOT::RDF::RNode
/// \param[in] df RDataFrame node at which ProgressBar is called.
///
/// The ProgressBar can be added not only at the RDataFrame head node, but also at any any computational node,
/// such as Filter or Define.
/// The ProgressBar can be added not only at the RDataFrame head node, but also at any computational node,
/// such as Filter or Define. To correctly account for the entries processed, place the progress bar
/// after transformations that reduce the number of events (e.g. `Range`).
/// ###Example usage:
/// ~~~{.cpp}
/// ROOT::RDataFrame df("tree", "file.root");
Expand Down Expand Up @@ -360,6 +362,8 @@ private:
bool fUseShellColours;

std::shared_ptr<TTree> fTree{nullptr};
ROOT::RDF::RResultPtr<ULong64_t> fTotalEntries{};


public:
/// Create a progress helper.
Expand All @@ -370,7 +374,8 @@ public:
/// \param useColors Use shell colour codes to colour the output. Automatically disabled when
/// we are not writing to a tty.
ProgressHelper(std::size_t increment, unsigned int totalFiles = 1, unsigned int progressBarWidth = 40,
unsigned int printInterval = 1, bool useColors = true);
unsigned int printInterval = 1, bool useColors = true,
ROOT::RDF::RResultPtr<ULong64_t> totalEntries = {});

~ProgressHelper() = default;

Expand Down
14 changes: 9 additions & 5 deletions tree/dataframe/src/RDFHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "ROOT/RDF/RLoopManager.hxx" // for RLoopManager
#include "ROOT/RDF/Utils.hxx"
#include "ROOT/RResultHandle.hxx" // for RResultHandle, RunGraphs
#include "ROOT/RResultPtr.hxx"

#include "TROOT.h" // IsImplicitMTEnabled
#include "TError.h" // Warning
Expand Down Expand Up @@ -157,7 +158,8 @@ void ThreadsPerTH3(unsigned int N)
}

ProgressHelper::ProgressHelper(std::size_t increment, unsigned int totalFiles, unsigned int progressBarWidth,
unsigned int printInterval, bool useColors)
unsigned int printInterval, bool useColors,
ROOT::RDF::RResultPtr<ULong64_t> totalEntries)
: fPrintInterval(printInterval),
fIncrement{increment},
fBarWidth{progressBarWidth = int(get_tty_size() / 4)},
Expand All @@ -169,6 +171,7 @@ ProgressHelper::ProgressHelper(std::size_t increment, unsigned int totalFiles, u
fIsTTY{isatty(fileno(stdout)) == 1},
fUseShellColours{useColors && fIsTTY} // Control characters only with terminals.
#endif
, fTotalEntries(std::move(totalEntries))
{
}

Expand Down Expand Up @@ -241,7 +244,7 @@ void ProgressHelper::PrintStats(std::ostream &stream, std::size_t currentEventCo
{
RestoreStreamState restore(stream);
auto evtpersec = EvtPerSec();
auto GetNEventsOfCurrentFile = ComputeNEventsSoFar();
auto GetNEventsOfCurrentFile = fTotalEntries && fTotalEntries.IsReady() ? const_cast<ROOT::RDF::RResultPtr<ULong64_t>&>(fTotalEntries).GetValue() : ComputeNEventsSoFar();
auto currentFileIdx = ComputeCurrentFileIdx();
auto totalFiles = fTotalFiles;

Expand Down Expand Up @@ -287,7 +290,7 @@ void ProgressHelper::PrintStats(std::ostream &stream, std::size_t currentEventCo
void ProgressHelper::PrintStatsFinal(std::ostream &stream, std::chrono::seconds elapsedSeconds) const
{
RestoreStreamState restore(stream);
auto totalEvents = ComputeNEventsSoFar();
auto totalEvents = fTotalEntries && fTotalEntries.IsReady() ? const_cast<ROOT::RDF::RResultPtr<ULong64_t>&>(fTotalEntries).GetValue() : ComputeNEventsSoFar();
auto totalFiles = fTotalFiles;

if (fUseShellColours)
Expand Down Expand Up @@ -316,7 +319,7 @@ void ProgressHelper::PrintStatsFinal(std::ostream &stream, std::chrono::seconds
/// Print a progress bar of width `ProgressHelper::fBarWidth` if `fGetNEventsOfCurrentFile` is known.
void ProgressHelper::PrintProgressBar(std::ostream &stream, std::size_t currentEventCount) const
{
auto GetNEventsOfCurrentFile = ComputeNEventsSoFar();
auto GetNEventsOfCurrentFile = fTotalEntries && fTotalEntries.IsReady() ? const_cast<ROOT::RDF::RResultPtr<ULong64_t>&>(fTotalEntries).GetValue() : ComputeNEventsSoFar();
if (GetNEventsOfCurrentFile == 0)
return;

Expand Down Expand Up @@ -387,7 +390,8 @@ class ProgressBarAction final : public ROOT::Detail::RDF::RActionImpl<ProgressBa
void AddProgressBar(ROOT::RDF::RNode node)
{
auto total_files = node.GetNFiles();
auto progress = std::make_shared<ProgressHelper>(1000, total_files);
auto totalEntries = node.Count();
auto progress = std::make_shared<ProgressHelper>(1000, total_files, 40, 1, true, totalEntries);
ProgressBarAction c(progress);
auto r = node.Book<>(c);
r.OnPartialResultSlot(1000, [progress](unsigned int slot, auto &&arg) { (*progress)(slot, arg); });
Expand Down
15 changes: 15 additions & 0 deletions tutorials/analysis/dataframe/df108_ProgressRange.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/// Minimal example showing how a progress bar interacts with `Range`.
///
/// The progress bar must be attached to the node *after* the range is applied
/// so the total number of entries displayed corresponds to the ranged dataset
/// and not the original input.
///
/// \macro_code
void df108_ProgressRange()
{
ROOT::RDataFrame df(100);
auto ranged = df.Range(0, 10);
ROOT::RDF::Experimental::AddProgressBar(ranged);
auto h = ranged.Define("x", []() { return gRandom->Rndm(); }).Histo1D("x");
std::cout << h->GetEntries() << std::endl;
}
Loading