From 8e09387a40df594070b3178b9cbea1ebc1a739c7 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 29 Nov 2022 11:35:54 -0800 Subject: [PATCH] feature: TreeSequence::edge_differences_iter Returns a lending iterator providing further sub-Iterators over edge differences. --- src/edge_differences.rs | 254 ++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/trees.rs | 63 ++++++++++ 3 files changed, 319 insertions(+) create mode 100644 src/edge_differences.rs diff --git a/src/edge_differences.rs b/src/edge_differences.rs new file mode 100644 index 000000000..ac5c7e3cb --- /dev/null +++ b/src/edge_differences.rs @@ -0,0 +1,254 @@ +use crate::NodeId; +use crate::Position; +use crate::TreeSequence; + +use crate::bindings; + +#[repr(transparent)] +struct LLEdgeDifferenceIterator(bindings::tsk_diff_iter_t); + +impl std::ops::Deref for LLEdgeDifferenceIterator { + type Target = bindings::tsk_diff_iter_t; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for LLEdgeDifferenceIterator { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Drop for LLEdgeDifferenceIterator { + fn drop(&mut self) { + unsafe { bindings::tsk_diff_iter_free(&mut self.0) }; + } +} + +impl LLEdgeDifferenceIterator { + pub fn new_from_treeseq(treeseq: &TreeSequence, flags: bindings::tsk_flags_t) -> Option { + let mut inner = std::mem::MaybeUninit::::uninit(); + match unsafe { bindings::tsk_diff_iter_init(inner.as_mut_ptr(), treeseq.as_ptr(), flags) } { + x if x < 0 => None, + _ => Some(Self(unsafe { inner.assume_init() })), + } + } +} + +/// Marker type for edge insertion. +pub struct Insertion {} + +/// Marker type for edge removal. +pub struct Removal {} + +mod private { + pub trait EdgeDifferenceIteration {} + + impl EdgeDifferenceIteration for super::Insertion {} + impl EdgeDifferenceIteration for super::Removal {} +} + +struct LLEdgeList { + inner: bindings::tsk_edge_list_t, + marker: std::marker::PhantomData, +} + +macro_rules! build_lledgelist { + ($name: ident, $generic: ty) => { + type $name = LLEdgeList<$generic>; + + impl Default for $name { + fn default() -> Self { + Self { + inner: bindings::tsk_edge_list_t { + head: std::ptr::null_mut(), + tail: std::ptr::null_mut(), + }, + marker: std::marker::PhantomData::<$generic> {}, + } + } + } + }; +} + +build_lledgelist!(LLEdgeInsertionList, Insertion); +build_lledgelist!(LLEdgeRemovalList, Removal); + +/// Concrete type implementing [`Iterator`] over [`EdgeInsertion`] or [`EdgeRemoval`]. +/// Created by [`EdgeDifferencesIterator::edge_insertions`] or +/// [`EdgeDifferencesIterator::edge_removals`], respectively. +pub struct EdgeDifferences<'a, T: private::EdgeDifferenceIteration> { + inner: &'a LLEdgeList, + current: *mut bindings::tsk_edge_list_node_t, +} + +impl<'a, T: private::EdgeDifferenceIteration> EdgeDifferences<'a, T> { + fn new(inner: &'a LLEdgeList) -> Self { + Self { + inner, + current: std::ptr::null_mut(), + } + } +} + +/// An edge difference. Edge insertions and removals are differentiated by +/// marker types [`Insertion`] and [`Removal`], respectively. +#[derive(Debug, Copy, Clone)] +pub struct EdgeDifference { + left: Position, + right: Position, + parent: NodeId, + child: NodeId, + marker: std::marker::PhantomData, +} + +impl EdgeDifference { + fn new, N: Into>(left: P, right: P, parent: N, child: N) -> Self { + Self { + left: left.into(), + right: right.into(), + parent: parent.into(), + child: child.into(), + marker: std::marker::PhantomData:: {}, + } + } + + pub fn left(&self) -> Position { + self.left + } + pub fn right(&self) -> Position { + self.right + } + pub fn parent(&self) -> NodeId { + self.parent + } + pub fn child(&self) -> NodeId { + self.child + } +} + +impl std::fmt::Display for EdgeDifference +where + T: private::EdgeDifferenceIteration, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "left: {}, right: {}, parent: {}, child: {}", + self.left(), + self.right(), + self.parent(), + self.child() + ) + } +} + +/// Type alias for [`EdgeDifference`] +pub type EdgeInsertion = EdgeDifference; +/// Type alias for [`EdgeDifference`] +pub type EdgeRemoval = EdgeDifference; + +impl<'a, T> Iterator for EdgeDifferences<'a, T> +where + T: private::EdgeDifferenceIteration, +{ + type Item = EdgeDifference; + + fn next(&mut self) -> Option { + if self.current.is_null() { + self.current = self.inner.inner.head; + } else { + self.current = unsafe { *self.current }.next; + } + if self.current.is_null() { + None + } else { + let left = unsafe { (*self.current).edge.left }; + let right = unsafe { (*self.current).edge.right }; + let parent = unsafe { (*self.current).edge.parent }; + let child = unsafe { (*self.current).edge.child }; + Some(Self::Item::new(left, right, parent, child)) + } + } +} + +/// Manages iteration over trees to obtain +/// edge differences. +pub struct EdgeDifferencesIterator { + inner: LLEdgeDifferenceIterator, + insertion: LLEdgeInsertionList, + removal: LLEdgeRemovalList, + left: f64, + right: f64, + advanced: i32, +} + +impl EdgeDifferencesIterator { + // NOTE: will return None if tskit-c cannot + // allocate memory for internal structures. + pub(crate) fn new_from_treeseq( + treeseq: &TreeSequence, + flags: bindings::tsk_flags_t, + ) -> Option { + LLEdgeDifferenceIterator::new_from_treeseq(treeseq, flags).map(|inner| Self { + inner, + insertion: LLEdgeInsertionList::default(), + removal: LLEdgeRemovalList::default(), + left: f64::default(), + right: f64::default(), + advanced: 0, + }) + } + + fn advance_tree(&mut self) { + // SAFETY: our tree sequence is guaranteed + // to be valid and own its tables. + self.advanced = unsafe { + bindings::tsk_diff_iter_next( + &mut self.inner.0, + &mut self.left, + &mut self.right, + &mut self.removal.inner, + &mut self.insertion.inner, + ) + }; + } + + pub fn left(&self) -> Position { + self.left.into() + } + + pub fn right(&self) -> Position { + self.right.into() + } + + pub fn interval(&self) -> (Position, Position) { + (self.left(), self.right()) + } + + pub fn edge_removals(&self) -> impl Iterator + '_ { + EdgeDifferences::::new(&self.removal) + } + + pub fn edge_insertions(&self) -> impl Iterator + '_ { + EdgeDifferences::::new(&self.insertion) + } +} + +impl streaming_iterator::StreamingIterator for EdgeDifferencesIterator { + type Item = EdgeDifferencesIterator; + + fn advance(&mut self) { + self.advance_tree() + } + + fn get(&self) -> Option<&Self::Item> { + if self.advanced > 0 { + Some(self) + } else { + None + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 45a345c60..8c5cf0455 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,6 +79,7 @@ pub mod bindings; mod _macros; // Starts w/_ to be sorted at front by rustfmt! +mod edge_differences; mod edge_table; pub mod error; mod flags; @@ -427,6 +428,7 @@ impl_time_position_arithmetic!(Position, Time); /// "Null" identifier value. pub(crate) const TSK_NULL: tsk_id_t = -1; +pub use edge_differences::*; pub use edge_table::{EdgeTable, EdgeTableRow, OwningEdgeTable}; pub use error::TskitError; pub use flags::*; diff --git a/src/trees.rs b/src/trees.rs index 4b6dc3bb0..f6abb51be 100644 --- a/src/trees.rs +++ b/src/trees.rs @@ -538,6 +538,19 @@ impl TreeSequence { } delegate_table_view_api!(); + + /// Build a lending iterator over edge differences. + /// + /// # Returns + /// + /// * None if the `C` back end is unable to allocate + /// needed memory + /// * `Some(iterator)` otherwise. + pub fn edge_differences_iter( + &self, + ) -> Option { + crate::edge_differences::EdgeDifferencesIterator::new_from_treeseq(self, 0) + } } impl TryFrom for TreeSequence { @@ -861,6 +874,56 @@ pub(crate) mod test_trees { panic!("Expected a tree."); } } + + // TODO: use trybuild to add tests that the iterator + // lifetime is indeed coupled to that of the treeseq + #[test] + fn test_edge_diffs_lending_iterator_num_trees() { + { + let treeseq = treeseq_from_small_table_collection_two_trees(); + let num_nodes: usize = treeseq.nodes().num_rows().try_into().unwrap(); + let mut parents = vec![NodeId::NULL; num_nodes + 1]; + if let Some(mut ediff_iter) = treeseq.edge_differences_iter() { + let mut tree_iter = treeseq.tree_iterator(0).unwrap(); + let mut ntrees = 0; + while let Some(diffs) = ediff_iter.next() { + let tree = tree_iter.next().unwrap(); + + for edge_out in diffs.edge_removals() { + let p = edge_out.child(); + parents[usize::try_from(p).unwrap()] = NodeId::NULL; + } + + for edge_in in diffs.edge_insertions() { + let c: usize = edge_in.child().try_into().unwrap(); + parents[c] = edge_in.parent(); + } + + assert_eq!(tree.parent_array(), &parents); + ntrees += 1; + } + assert_eq!(ntrees, 2); + } else { + panic!("expected an edge differences iterator"); + } + } + + { + let treeseq = treeseq_from_small_table_collection_two_trees(); + let mut ediff_iter = treeseq.edge_differences_iter().unwrap(); + + let mut ntrees = 0; + while let Some(diffs) = ediff_iter.next() { + if ntrees == 0 { + assert_eq!(diffs.interval(), (0.0.into(), 500.0.into())); + } else { + assert_eq!(diffs.interval(), (500.0.into(), 1000.0.into())); + } + ntrees += 1; + } + assert_eq!(ntrees, 2); + } + } } #[cfg(test)]