From 4c90e94bd7088f9469c4fa6c9b7c25ba2874c856 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Fri, 11 Jul 2025 15:40:19 +0200 Subject: [PATCH 1/3] [df] Remove duplicated tests Templated snapshotting has been removed, so the "*Templated" and "*JITted" tests have become identical. --- .../test/dataframe_snapshot_ntuple.cxx | 104 ++---------------- 1 file changed, 8 insertions(+), 96 deletions(-) diff --git a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx index d31d8aa68df91..70fdd5a687b3c 100644 --- a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx +++ b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx @@ -35,9 +35,9 @@ class FileRAII { std::string GetPath() const { return fPath; } }; -TEST(RDFSnapshotRNTuple, FromScratchTemplated) +TEST(RDFSnapshotRNTuple, FromScratch) { - FileRAII fileGuard{"RDFSnapshotRNTuple_from_scratch_templated.root"}; + FileRAII fileGuard{"RDFSnapshotRNTuple_from_scratch.root"}; const std::vector columns = {"x"}; auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; }); @@ -59,30 +59,6 @@ TEST(RDFSnapshotRNTuple, FromScratchTemplated) } } -TEST(RDFSnapshotRNTuple, FromScratchJITted) -{ - FileRAII fileGuard{"RDFSnapshotRNTuple_from_scratch_jitted.root"}; - const std::vector columns = {"x"}; - - auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; }); - - RSnapshotOptions opts; - opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple; - - auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts); - - EXPECT_EQ(columns, sdf->GetColumnNames()); - - // Verify we actually snapshotted to an RNTuple. - auto ntuple = RNTupleReader::Open("ntuple", fileGuard.GetPath()); - EXPECT_EQ(25ull, ntuple->GetNEntries()); - - auto x = ntuple->GetView("x"); - for (const auto i : ntuple->GetEntryRange()) { - EXPECT_EQ(10, x(i)); - } -} - void BookLazySnapshot(std::string_view filename) { auto d = ROOT::RDataFrame(1); @@ -172,26 +148,9 @@ TEST_F(RDFSnapshotRNTupleTest, DefaultFormatWarning) "in RSnapshotOptions. Note that this current default behaviour might change in the future."); } -TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTupleTemplated) -{ - FileRAII fileGuard{"RDFSnapshotRNTuple_snap_templated.root"}; - - auto df = ROOT::RDataFrame(fNtplName, fFileName); - auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, fSnapshotOpts); - - auto ntuple = RNTupleReader::Open("ntuple", fileGuard.GetPath()); - EXPECT_EQ(1ull, ntuple->GetNEntries()); - - auto pt = ntuple->GetView("pt"); - auto x = ntuple->GetView("x"); - - EXPECT_FLOAT_EQ(42.0, pt(0)); - EXPECT_EQ(10, x(0)); -} - -TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTupleJITted) +TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTuple) { - FileRAII fileGuard{"RDFSnapshotRNTuple_snap_jitted.root"}; + FileRAII fileGuard{"RDFSnapshotRNTuple_snap.root"}; auto df = ROOT::RDataFrame(fNtplName, fFileName); auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, fSnapshotOpts); @@ -206,35 +165,9 @@ TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTupleJITted) EXPECT_EQ(10, x(0)); } -TEST_F(RDFSnapshotRNTupleTest, ToTTreeTemplated) -{ - FileRAII fileGuard{"RDFSnapshotRNTuple_to_ttree_templated.root"}; - - auto df = ROOT::RDataFrame(fNtplName, fFileName); - - fSnapshotOpts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kTTree; - - auto sdf = df.Define("x", [] { return 10; }).Snapshot("tree", fileGuard.GetPath(), {"pt", "x"}, fSnapshotOpts); - - TFile file(fileGuard.GetPath().c_str()); - auto tree = file.Get("tree"); - EXPECT_EQ(1ull, tree->GetEntries()); - - float pt; - int x; - - tree->SetBranchAddress("pt", &pt); - tree->SetBranchAddress("x", &x); - - tree->GetEntry(0); - - EXPECT_FLOAT_EQ(42.0, pt); - EXPECT_EQ(10, x); -} - -TEST_F(RDFSnapshotRNTupleTest, ToTTreeJITted) +TEST_F(RDFSnapshotRNTupleTest, ToTTree) { - FileRAII fileGuard{"RDFSnapshotRNTuple_to_ttree_jitted.root"}; + FileRAII fileGuard{"RDFSnapshotRNTuple_to_ttree.root"}; auto df = ROOT::RDataFrame(fNtplName, fFileName); @@ -524,31 +457,10 @@ void WriteTestTree(const std::string &tname, const std::string &fname) t.Write(); } -TEST(RDFSnapshotRNTuple, DisallowFromTTreeTemplated) -{ - const auto treename = "tree"; - FileRAII fileGuard{"RDFSnapshotRNTuple_disallow_from_ttree_templated.root"}; - - WriteTestTree(treename, fileGuard.GetPath()); - - auto df = ROOT::RDataFrame(treename, fileGuard.GetPath()); - - RSnapshotOptions opts; - opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple; - - try { - auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, opts); - FAIL() << "snapshotting from RNTuple to TTree is not (yet) possible"; - } catch (const std::runtime_error &err) { - EXPECT_STREQ(err.what(), "Snapshotting from TTree to RNTuple is not yet supported. The current recommended way " - "to convert TTrees to RNTuple is through the RNTupleImporter."); - } -} - -TEST(RDFSnapshotRNTuple, DisallowFromTTreeJITted) +TEST(RDFSnapshotRNTuple, DisallowFromTTree) { const auto treename = "tree"; - FileRAII fileGuard{"RDFSnapshotRNTuple_disallow_from_ttree_jitted.root"}; + FileRAII fileGuard{"RDFSnapshotRNTuple_disallow_from_ttree.root"}; WriteTestTree(treename, fileGuard.GetPath()); From bc3a50bdf578c464616801216f988f614067abd4 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Mon, 14 Jul 2025 17:14:46 +0200 Subject: [PATCH 2/3] [df] Allow snapshotting from TTree to RNTuple --- tree/dataframe/inc/ROOT/RDF/RInterface.hxx | 5 - tree/dataframe/test/NTupleStruct.hxx | 1 + tree/dataframe/test/TwoFloats.h | 1 - .../test/dataframe_snapshot_ntuple.cxx | 109 ++++++++++++++++-- 4 files changed, 99 insertions(+), 17 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx index 7e84de0d33116..3064581dfd946 100644 --- a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx @@ -1379,11 +1379,6 @@ public: }; if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) { - if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") { - throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended " - "way to convert TTrees to RNTuple is through the RNTupleImporter."); - } - // The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one // without a data source for now, and set it once the actual data source can be created (i.e., after // writing the RNTuple). diff --git a/tree/dataframe/test/NTupleStruct.hxx b/tree/dataframe/test/NTupleStruct.hxx index 99d4504890ff2..00ed7b092f42a 100644 --- a/tree/dataframe/test/NTupleStruct.hxx +++ b/tree/dataframe/test/NTupleStruct.hxx @@ -10,6 +10,7 @@ struct Electron { float pt; + friend bool operator==(const Electron &left, const Electron &right) { return left.pt == right.pt; } friend bool operator<(const Electron &left, const Electron &right) { return left.pt < right.pt; } }; diff --git a/tree/dataframe/test/TwoFloats.h b/tree/dataframe/test/TwoFloats.h index 08e84cc912085..481ec63e5a6de 100644 --- a/tree/dataframe/test/TwoFloats.h +++ b/tree/dataframe/test/TwoFloats.h @@ -14,4 +14,3 @@ class TwoFloats { } ClassDef(TwoFloats, 2) }; - diff --git a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx index 70fdd5a687b3c..fab940a02f720 100644 --- a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx +++ b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx @@ -35,6 +35,18 @@ class FileRAII { std::string GetPath() const { return fPath; } }; +template +void expect_vec_eq(const ROOT::RVec &v1, const ROOT::RVec &v2) +{ + ASSERT_EQ(v1.size(), v2.size()) << "Vectors 'v1' and 'v2' are of unequal length"; + for (std::size_t i = 0ull; i < v1.size(); ++i) { + if constexpr (std::is_floating_point_v) + EXPECT_FLOAT_EQ(v1[i], v2[i]) << "Vectors 'v1' and 'v2' differ at index " << i; + else + EXPECT_EQ(v1[i], v2[i]) << "Vectors 'v1' and 'v2' differ at index " << i; + } +} + TEST(RDFSnapshotRNTuple, FromScratch) { FileRAII fileGuard{"RDFSnapshotRNTuple_from_scratch.root"}; @@ -448,19 +460,39 @@ void WriteTestTree(const std::string &tname, const std::string &fname) { TFile file(fname.c_str(), "RECREATE"); TTree t(tname.c_str(), tname.c_str()); - float pt; + + float pt = 42.f; + std::vector photons{1.f, 2.f, 3.f}; + Electron electron{137.f}; + Jet jets; + jets.electrons.emplace_back(Electron{122.f}); + jets.electrons.emplace_back(Electron{125.f}); + jets.electrons.emplace_back(Electron{129.f}); + + Int_t nmuons = 1; + float muon_pt[3] = {10.f, 20.f, 30.f}; + + struct { + Int_t x = 1; + Int_t y = 2; + } point; + t.Branch("pt", &pt); + t.Branch("photons", &photons); + t.Branch("electron", &electron); + t.Branch("jets", &jets); + t.Branch("nmuons", &nmuons); + t.Branch("muon_pt", muon_pt, "muon_pt[nmuons]"); + t.Branch("point", &point, "x/I:y/I"); - pt = 42.0; t.Fill(); - t.Write(); } -TEST(RDFSnapshotRNTuple, DisallowFromTTree) +TEST(RDFSnapshotRNTuple, FromTTree) { const auto treename = "tree"; - FileRAII fileGuard{"RDFSnapshotRNTuple_disallow_from_ttree.root"}; + FileRAII fileGuard{"RDFSnapshotRNTuple_from_ttree.root"}; WriteTestTree(treename, fileGuard.GetPath()); @@ -469,13 +501,68 @@ TEST(RDFSnapshotRNTuple, DisallowFromTTree) RSnapshotOptions opts; opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple; - try { - auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, opts); - FAIL() << "snapshotting from RNTuple to TTree is not (yet) possible"; - } catch (const std::runtime_error &err) { - EXPECT_STREQ(err.what(), "Snapshotting from TTree to RNTuple is not yet supported. The current recommended way " - "to convert TTrees to RNTuple is through the RNTupleImporter."); + { + // FIXME(fdegeus): snapshotting leaflist branches as-is (i.e. without explicitly providing their leafs) is not + // supported, because we have no way of reconstructing the memory layout of the branch itself from only the + // TTree's on-disk information without JITting. For RNTuple, we would be able to do this using anonymous record + // fields, however. Once this is implemented, this test should be changed to check the result of snapshotting + // "point" fully. + auto sdf = df.Define("x", [] { return 10; }) + .Snapshot("ntuple", fileGuard.GetPath(), + {"x", "pt", "photons", "electron", "jets", "muon_pt", "point.x", "point.y"}, opts); + + auto x = sdf->Take("x"); + auto pt = sdf->Take("pt"); + auto photons = sdf->Take>("photons"); + auto electron = sdf->Take("electron"); + auto jet_electrons = sdf->Take>("jets.electrons"); + auto nMuons = sdf->Take("nmuons"); + auto muonPt = sdf->Take>("muon_pt"); + auto pointX = sdf->Take("point_x"); + auto pointY = sdf->Take("point_y"); + + ASSERT_EQ(1UL, x->size()); + ASSERT_EQ(1UL, pt->size()); + ASSERT_EQ(1UL, photons->size()); + ASSERT_EQ(1UL, electron->size()); + ASSERT_EQ(1UL, jet_electrons->size()); + ASSERT_EQ(1UL, nMuons->size()); + ASSERT_EQ(1UL, muonPt->size()); + ASSERT_EQ(1UL, pointX->size()); + ASSERT_EQ(1UL, pointY->size()); + + EXPECT_EQ(10, x->front()); + EXPECT_EQ(42.f, pt->front()); + expect_vec_eq({1.f, 2.f, 3.f}, photons->front()); + EXPECT_EQ(Electron{137.f}, electron->front()); + expect_vec_eq({Electron{122.f}, Electron{125.f}, Electron{129.f}}, jet_electrons->front()); + EXPECT_EQ(1, nMuons->front()); + expect_vec_eq({10.f}, muonPt->front()); + EXPECT_EQ(1, pointX->front()); + EXPECT_EQ(2, pointY->front()); } + + auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath()); + + auto x = reader->GetView("x"); + auto pt = reader->GetView("pt"); + auto photons = reader->GetView>("photons"); + auto electron = reader->GetView("electron"); + auto jet_electrons = reader->GetView>("jets.electrons"); + auto nMuons = reader->GetView("nmuons"); + auto muonPt = reader->GetView>("muon_pt"); + auto pointX = reader->GetView("point_x"); + auto pointY = reader->GetView("point_y"); + + EXPECT_EQ(10, x(0)); + EXPECT_EQ(42.f, pt(0)); + expect_vec_eq({1.f, 2.f, 3.f}, photons(0)); + EXPECT_EQ(Electron{137.f}, electron(0)); + expect_vec_eq({Electron{122.f}, Electron{125.f}, Electron{129.f}}, jet_electrons(0)); + EXPECT_EQ(1, nMuons(0)); + expect_vec_eq({10.f}, muonPt(0)); + EXPECT_EQ(1, pointX(0)); + EXPECT_EQ(2, pointY(0)); } #ifdef R__USE_IMT From d46cd896001d2258663f137d258dcbe860a49b16 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Thu, 17 Jul 2025 15:18:46 +0200 Subject: [PATCH 3/3] [df] Expand `FromTTree` tests Also check the resulting on-disk layout, as well as the effects of turning the `kVectorToRVec` snapshot option off. --- .../test/dataframe_snapshot_ntuple.cxx | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx index fab940a02f720..ca0fc74200603 100644 --- a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx +++ b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx @@ -489,7 +489,7 @@ void WriteTestTree(const std::string &tname, const std::string &fname) t.Write(); } -TEST(RDFSnapshotRNTuple, FromTTree) +void TestFromTTree(bool vector2RVec = false) { const auto treename = "tree"; FileRAII fileGuard{"RDFSnapshotRNTuple_from_ttree.root"}; @@ -498,10 +498,11 @@ TEST(RDFSnapshotRNTuple, FromTTree) auto df = ROOT::RDataFrame(treename, fileGuard.GetPath()); - RSnapshotOptions opts; - opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple; - { + RSnapshotOptions opts; + opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple; + opts.fVector2RVec = vector2RVec; + // FIXME(fdegeus): snapshotting leaflist branches as-is (i.e. without explicitly providing their leafs) is not // supported, because we have no way of reconstructing the memory layout of the branch itself from only the // TTree's on-disk information without JITting. For RNTuple, we would be able to do this using anonymous record @@ -544,6 +545,34 @@ TEST(RDFSnapshotRNTuple, FromTTree) auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath()); + auto &descriptor = reader->GetDescriptor(); + + int nTopLevelFields = 0; + for (const auto &_ : descriptor.GetTopLevelFields()) { + nTopLevelFields++; + } + EXPECT_EQ(9, nTopLevelFields); + EXPECT_EQ("std::int32_t", descriptor.GetFieldDescriptor(descriptor.FindFieldId("x")).GetTypeName()); + EXPECT_EQ("float", descriptor.GetFieldDescriptor(descriptor.FindFieldId("pt")).GetTypeName()); + if (vector2RVec) { + EXPECT_EQ("ROOT::VecOps::RVec", + descriptor.GetFieldDescriptor(descriptor.FindFieldId("photons")).GetTypeName()); + } else { + EXPECT_EQ("std::vector", descriptor.GetFieldDescriptor(descriptor.FindFieldId("photons")).GetTypeName()); + } + EXPECT_EQ("Electron", descriptor.GetFieldDescriptor(descriptor.FindFieldId("electron")).GetTypeName()); + auto jetsId = descriptor.FindFieldId("jets"); + EXPECT_EQ("Jet", descriptor.GetFieldDescriptor(jetsId).GetTypeName()); + auto electronsId = descriptor.FindFieldId("electrons", jetsId); + EXPECT_EQ("std::vector", descriptor.GetFieldDescriptor(electronsId).GetTypeName()); + EXPECT_EQ("std::int32_t", descriptor.GetFieldDescriptor(descriptor.FindFieldId("nmuons")).GetTypeName()); + EXPECT_EQ("ROOT::VecOps::RVec", + descriptor.GetFieldDescriptor(descriptor.FindFieldId("muon_pt")).GetTypeName()); + EXPECT_EQ("std::int32_t", descriptor.GetFieldDescriptor(descriptor.FindFieldId("point_x")).GetTypeName()); + EXPECT_EQ("std::int32_t", descriptor.GetFieldDescriptor(descriptor.FindFieldId("point_y")).GetTypeName()); + // sanity check to make sure we don't snapshot the internal RDF size columns + EXPECT_EQ(ROOT::kInvalidDescriptorId, descriptor.FindFieldId("R_rdf_sizeof_photons")); + auto x = reader->GetView("x"); auto pt = reader->GetView("pt"); auto photons = reader->GetView>("photons"); @@ -565,6 +594,16 @@ TEST(RDFSnapshotRNTuple, FromTTree) EXPECT_EQ(2, pointY(0)); } +TEST(RDFSnapshotRNTuple, FromTTree) +{ + TestFromTTree(); +} + +TEST(RDFSnapshotRNTuple, FromTTreeNoVector2RVec) +{ + TestFromTTree(/*vector2RVec=*/false); +} + #ifdef R__USE_IMT struct TIMTEnabler { TIMTEnabler(unsigned int nSlots) { ROOT::EnableImplicitMT(nSlots); }