From 5ed5b4bf9df28f0dc70fbbfde10f0487277e7a83 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 23 Oct 2023 09:42:11 -0700 Subject: [PATCH 1/7] Delete table integrity checking (DO NOT MERGE) --- subprojects/tskit/tskit/tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subprojects/tskit/tskit/tables.c b/subprojects/tskit/tskit/tables.c index 8eea85f5a..e26386c37 100644 --- a/subprojects/tskit/tskit/tables.c +++ b/subprojects/tskit/tskit/tables.c @@ -9609,7 +9609,7 @@ simplifier_init(simplifier_t *self, const tsk_id_t *samples, tsk_size_t num_samp * tests to ensure we're doing sensible things with duplicate sites. * (Particularly, re TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY.) */ ret_id = tsk_table_collection_check_integrity(tables, - TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING | TSK_CHECK_SITE_DUPLICATES); + TSK_CHECK_SITE_ORDERING | TSK_CHECK_SITE_DUPLICATES); if (ret_id != 0) { ret = (int) ret_id; goto out; From 0f7903ce2f5840dc1e8d05fdf12e3b9f8756eb17 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 23 Oct 2023 08:51:58 -0700 Subject: [PATCH 2/7] add naive edge buffer --- Cargo.toml | 3 + ...aploid_wright_fisher_simple_edge_buffer.rs | 196 ++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 examples/haploid_wright_fisher_simple_edge_buffer.rs diff --git a/Cargo.toml b/Cargo.toml index 8c790fd29..0b5d9e14c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,3 +58,6 @@ rustdoc-args = ["--cfg", "doc_cfg"] # Not run during tests [[example]] name = "tree_traversals" + +[[example]] +name = "haploid_wright_fisher_simple_edge_buffer" diff --git a/examples/haploid_wright_fisher_simple_edge_buffer.rs b/examples/haploid_wright_fisher_simple_edge_buffer.rs new file mode 100644 index 000000000..943fbb746 --- /dev/null +++ b/examples/haploid_wright_fisher_simple_edge_buffer.rs @@ -0,0 +1,196 @@ +// This is a rust implementation of the example +// found in tskit-c + +use std::collections::HashMap; + +use anyhow::Result; +use clap::Parser; +#[cfg(test)] +use rand::distributions::Distribution; +use rand::prelude::*; +use rand::SeedableRng; + +#[derive(Debug)] +struct Edge { + left: tskit::Position, + right: tskit::Position, + child: tskit::NodeId, + previous: Option, +} + +#[derive(Default)] +struct EdgeBuffer { + parent: Vec, + last: HashMap, + edges: Vec, +} + +impl EdgeBuffer { + fn buffer_edge( + &mut self, + left: tskit::Position, + right: tskit::Position, + parent: tskit::NodeId, + child: tskit::NodeId, + ) -> Result<()> { + if let Some(last) = self.last.get_mut(&parent) { + let pchild = self.edges[*last].child; + assert!(child >= pchild); + self.edges.push(Edge { + left, + right, + child, + previous: Some(*last), + }); + *last = self.edges.len() - 1; + } else { + self.edges.push(Edge { + left, + right, + child, + previous: None, + }); + self.last.insert(parent, self.edges.len() - 1); + self.parent.push(parent); + } + Ok(()) + } + + fn clear(&mut self) { + self.parent.clear(); + self.last.clear(); + self.edges.clear(); + } +} + +fn rotate_edges(bookmark: &tskit::types::Bookmark, tables: &mut tskit::TableCollection) { + let num_edges = tables.edges().num_rows().as_usize(); + let left = + unsafe { std::slice::from_raw_parts_mut((*tables.as_mut_ptr()).edges.left, num_edges) }; + let right = + unsafe { std::slice::from_raw_parts_mut((*tables.as_mut_ptr()).edges.right, num_edges) }; + let parent = + unsafe { std::slice::from_raw_parts_mut((*tables.as_mut_ptr()).edges.parent, num_edges) }; + let child = + unsafe { std::slice::from_raw_parts_mut((*tables.as_mut_ptr()).edges.child, num_edges) }; + let mid = bookmark.edges().as_usize(); + left.rotate_left(mid); + right.rotate_left(mid); + parent.rotate_left(mid); + child.rotate_left(mid); +} + +// ANCHOR: haploid_wright_fisher +fn simulate( + seed: u64, + popsize: usize, + num_generations: i32, + simplify_interval: i32, +) -> Result { + if popsize == 0 { + return Err(anyhow::Error::msg("popsize must be > 0")); + } + if num_generations == 0 { + return Err(anyhow::Error::msg("num_generations must be > 0")); + } + if simplify_interval == 0 { + return Err(anyhow::Error::msg("simplify_interval must be > 0")); + } + let mut tables = tskit::TableCollection::new(1.0)?; + + // create parental nodes + let mut parents_and_children = { + let mut temp = vec![]; + let parental_time = f64::from(num_generations); + for _ in 0..popsize { + let node = tables.add_node(0, parental_time, -1, -1)?; + temp.push(node); + } + temp + }; + + // allocate space for offspring nodes + parents_and_children.resize(2 * parents_and_children.len(), tskit::NodeId::NULL); + + // Construct non-overlapping mutable slices into our vector. + let (mut parents, mut children) = parents_and_children.split_at_mut(popsize); + + let parent_picker = rand::distributions::Uniform::new(0, popsize); + let breakpoint_generator = rand::distributions::Uniform::new(0.0, 1.0); + let mut rng = rand::rngs::StdRng::seed_from_u64(seed); + let mut bookmark = tskit::types::Bookmark::default(); + + let mut buffer = EdgeBuffer::default(); + for birth_time in (0..num_generations).rev() { + for c in children.iter_mut() { + let bt = f64::from(birth_time); + let child = tables.add_node(0, bt, -1, -1)?; + let left_parent = parents + .get(parent_picker.sample(&mut rng)) + .ok_or_else(|| anyhow::Error::msg("invalid left_parent index"))?; + let right_parent = parents + .get(parent_picker.sample(&mut rng)) + .ok_or_else(|| anyhow::Error::msg("invalid right_parent index"))?; + let breakpoint = breakpoint_generator.sample(&mut rng); + buffer.buffer_edge(0_f64.into(), breakpoint.into(), *left_parent, child)?; + buffer.buffer_edge(breakpoint.into(), 1_f64.into(), *right_parent, child)?; + *c = child; + } + + if birth_time % simplify_interval == 0 { + for &parent in buffer.parent.iter().rev() { + let mut last = buffer.last.get(&parent).cloned(); + while let Some(previous) = last { + let edge = &buffer.edges[previous]; + tables.add_edge(edge.left, edge.right, parent, edge.child)?; + last = edge.previous; + } + } + buffer.clear(); + rotate_edges(&bookmark, &mut tables); + if let Some(idmap) = + tables.simplify(children, tskit::SimplificationOptions::default(), true)? + { + // remap child nodes + for o in children.iter_mut() { + *o = idmap[usize::try_from(*o)?]; + } + } + bookmark.set_edges(tables.edges().num_rows()); + } + std::mem::swap(&mut parents, &mut children); + } + + tables.build_index()?; + let treeseq = tables.tree_sequence(tskit::TreeSequenceFlags::default())?; + + Ok(treeseq) +} +// ANCHOR_END: haploid_wright_fisher + +#[derive(Clone, clap::Parser)] +struct SimParams { + seed: u64, + popsize: usize, + num_generations: i32, + simplify_interval: i32, + treefile: Option, + #[clap(short, long, help = "Use bookmark to avoid sorting entire edge table.")] + bookmark: bool, +} + +fn main() -> Result<()> { + let params = SimParams::parse(); + let treeseq = simulate( + params.seed, + params.popsize, + params.num_generations, + params.simplify_interval, + )?; + + if let Some(treefile) = ¶ms.treefile { + treeseq.dump(treefile, 0)?; + } + + Ok(()) +} From abebc5062b33b355ba9cea1345c58d95c5623c00 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 23 Oct 2023 12:16:05 -0700 Subject: [PATCH 3/7] remove unneeded assert --- examples/haploid_wright_fisher_simple_edge_buffer.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/haploid_wright_fisher_simple_edge_buffer.rs b/examples/haploid_wright_fisher_simple_edge_buffer.rs index 943fbb746..4a3b062bf 100644 --- a/examples/haploid_wright_fisher_simple_edge_buffer.rs +++ b/examples/haploid_wright_fisher_simple_edge_buffer.rs @@ -34,8 +34,6 @@ impl EdgeBuffer { child: tskit::NodeId, ) -> Result<()> { if let Some(last) = self.last.get_mut(&parent) { - let pchild = self.edges[*last].child; - assert!(child >= pchild); self.edges.push(Edge { left, right, From 040a4cbf95ad6dc54190eb6e9929c0f326e8f872 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 23 Oct 2023 12:20:38 -0700 Subject: [PATCH 4/7] remove uneeded Err path --- examples/haploid_wright_fisher_simple_edge_buffer.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/haploid_wright_fisher_simple_edge_buffer.rs b/examples/haploid_wright_fisher_simple_edge_buffer.rs index 4a3b062bf..6726682f2 100644 --- a/examples/haploid_wright_fisher_simple_edge_buffer.rs +++ b/examples/haploid_wright_fisher_simple_edge_buffer.rs @@ -32,7 +32,7 @@ impl EdgeBuffer { right: tskit::Position, parent: tskit::NodeId, child: tskit::NodeId, - ) -> Result<()> { + ) { if let Some(last) = self.last.get_mut(&parent) { self.edges.push(Edge { left, @@ -51,7 +51,6 @@ impl EdgeBuffer { self.last.insert(parent, self.edges.len() - 1); self.parent.push(parent); } - Ok(()) } fn clear(&mut self) { @@ -130,8 +129,8 @@ fn simulate( .get(parent_picker.sample(&mut rng)) .ok_or_else(|| anyhow::Error::msg("invalid right_parent index"))?; let breakpoint = breakpoint_generator.sample(&mut rng); - buffer.buffer_edge(0_f64.into(), breakpoint.into(), *left_parent, child)?; - buffer.buffer_edge(breakpoint.into(), 1_f64.into(), *right_parent, child)?; + buffer.buffer_edge(0_f64.into(), breakpoint.into(), *left_parent, child); + buffer.buffer_edge(breakpoint.into(), 1_f64.into(), *right_parent, child); *c = child; } From 7f13450f10a6016fa3476cfd718ea8575680fcd0 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 23 Oct 2023 12:55:13 -0700 Subject: [PATCH 5/7] simplify buffer update logic --- ...aploid_wright_fisher_simple_edge_buffer.rs | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/examples/haploid_wright_fisher_simple_edge_buffer.rs b/examples/haploid_wright_fisher_simple_edge_buffer.rs index 6726682f2..2e27be2e1 100644 --- a/examples/haploid_wright_fisher_simple_edge_buffer.rs +++ b/examples/haploid_wright_fisher_simple_edge_buffer.rs @@ -33,24 +33,20 @@ impl EdgeBuffer { parent: tskit::NodeId, child: tskit::NodeId, ) { + let mut previous = None; if let Some(last) = self.last.get_mut(&parent) { - self.edges.push(Edge { - left, - right, - child, - previous: Some(*last), - }); - *last = self.edges.len() - 1; + previous = Some(*last); + *last = self.edges.len(); } else { - self.edges.push(Edge { - left, - right, - child, - previous: None, - }); - self.last.insert(parent, self.edges.len() - 1); + self.last.insert(parent, self.edges.len()); self.parent.push(parent); } + self.edges.push(Edge { + left, + right, + child, + previous, + }); } fn clear(&mut self) { From 6dc483deca494e8ed311274846c4bd47fc66e228 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 23 Oct 2023 13:21:08 -0700 Subject: [PATCH 6/7] offload liftover logic to fn --- ...aploid_wright_fisher_simple_edge_buffer.rs | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/examples/haploid_wright_fisher_simple_edge_buffer.rs b/examples/haploid_wright_fisher_simple_edge_buffer.rs index 2e27be2e1..89e24a3be 100644 --- a/examples/haploid_wright_fisher_simple_edge_buffer.rs +++ b/examples/haploid_wright_fisher_simple_edge_buffer.rs @@ -56,6 +56,24 @@ impl EdgeBuffer { } } +fn liftover_buffered_edges( + bookmark: &tskit::types::Bookmark, + buffer: &mut EdgeBuffer, + tables: &mut tskit::TableCollection, +) -> Result<()> { + for &parent in buffer.parent.iter().rev() { + let mut last = buffer.last.get(&parent.into()).cloned(); + while let Some(previous) = last { + let edge = &buffer.edges[previous]; + tables.add_edge(edge.left, edge.right, parent, edge.child)?; + last = edge.previous; + } + } + buffer.clear(); + rotate_edges(bookmark, tables); + Ok(()) +} + fn rotate_edges(bookmark: &tskit::types::Bookmark, tables: &mut tskit::TableCollection) { let num_edges = tables.edges().num_rows().as_usize(); let left = @@ -131,16 +149,7 @@ fn simulate( } if birth_time % simplify_interval == 0 { - for &parent in buffer.parent.iter().rev() { - let mut last = buffer.last.get(&parent).cloned(); - while let Some(previous) = last { - let edge = &buffer.edges[previous]; - tables.add_edge(edge.left, edge.right, parent, edge.child)?; - last = edge.previous; - } - } - buffer.clear(); - rotate_edges(&bookmark, &mut tables); + liftover_buffered_edges(&bookmark, &mut buffer, &mut tables)?; if let Some(idmap) = tables.simplify(children, tskit::SimplificationOptions::default(), true)? { From c37e13c997022917f4d132a8e3ec590a784a5297 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 23 Oct 2023 13:26:19 -0700 Subject: [PATCH 7/7] fix lints --- examples/haploid_wright_fisher_simple_edge_buffer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/haploid_wright_fisher_simple_edge_buffer.rs b/examples/haploid_wright_fisher_simple_edge_buffer.rs index 89e24a3be..8438c1f7a 100644 --- a/examples/haploid_wright_fisher_simple_edge_buffer.rs +++ b/examples/haploid_wright_fisher_simple_edge_buffer.rs @@ -62,7 +62,7 @@ fn liftover_buffered_edges( tables: &mut tskit::TableCollection, ) -> Result<()> { for &parent in buffer.parent.iter().rev() { - let mut last = buffer.last.get(&parent.into()).cloned(); + let mut last = buffer.last.get(&parent).cloned(); while let Some(previous) = last { let edge = &buffer.edges[previous]; tables.add_edge(edge.left, edge.right, parent, edge.child)?;