diff --git a/src/_macros.rs b/src/_macros.rs index 5a6d19653..1798e241c 100644 --- a/src/_macros.rs +++ b/src/_macros.rs @@ -27,130 +27,12 @@ macro_rules! panic_on_tskit_error { }; } -macro_rules! unsafe_tsk_ragged_column_access { - ($i: expr, $lo: expr, $hi: expr, $owner: expr, $array: ident, $offset_array: ident, $offset_array_len: ident, $output_id_type: ty) => {{ - let i = $crate::SizeType::try_from($i).ok()?; - if $i < $lo || i >= $hi { - None - } else if $owner.$offset_array_len == 0 { - None - } else { - debug_assert!(!$owner.$array.is_null()); - if $owner.$array.is_null() { - return None; - } - let offset = i.as_usize() as isize; - // SAFETY: we have checked bounds and ensured not null - let start = unsafe { *$owner.$offset_array.offset(offset) }; - let stop = if i < $hi { - unsafe { *$owner.$offset_array.offset((offset + 1) as isize) } - } else { - $owner.$offset_array_len as tsk_size_t - }; - if start == stop { - None - } else { - Some(unsafe { - std::slice::from_raw_parts( - $owner.$array.offset(start as isize) as *const $output_id_type, - stop as usize - start as usize, - ) - }) - } - } - }}; -} - -// Allow this to be unused for default features -// to pass clippy checks -#[allow(unused_macros)] -macro_rules! unsafe_tsk_ragged_char_column_access { - ($i: expr, $lo: expr, $hi: expr, $owner: expr, $array: ident, $offset_array: ident, $offset_array_len: ident) => {{ - let i = $crate::SizeType::try_from($i).ok()?; - if $i < $lo || i >= $hi { - None - } else if $owner.$offset_array_len == 0 { - None - } else { - assert!(!$owner.$array.is_null()); - assert!(!$owner.$offset_array.is_null()); - let start = unsafe { *$owner.$offset_array.offset($i as isize) }; - let stop = if i < $hi { - unsafe { *$owner.$offset_array.offset(($i + 1) as isize) } - } else { - $owner.$offset_array_len as tsk_size_t - }; - if start == stop { - None - } else { - let mut buffer = String::new(); - for i in start..stop { - buffer.push(unsafe { *$owner.$array.offset(i as isize) as u8 as char }); - } - Some(buffer) - } - } - }}; -} - -#[cfg(feature = "provenance")] -macro_rules! unsafe_tsk_ragged_char_column_access_to_slice_u8 { - ($i: expr, $lo: expr, $hi: expr, $owner: expr, $array: ident, $offset_array: ident, $offset_array_len: ident) => {{ - let i = match $crate::SizeType::try_from($i).ok() { - Some(j) => j, - None => $crate::SizeType::from(u64::MAX), - }; - if $i < $lo || i >= $hi { - None - } else if $owner.$offset_array_len == 0 { - None - } else { - assert!(!$owner.$array.is_null()); - assert!(!$owner.$offset_array.is_null()); - let offset = i.as_usize() as isize; - let start = unsafe { *$owner.$offset_array.offset(offset) }; - let stop = if i < $hi { - unsafe { *$owner.$offset_array.offset((offset + 1) as isize) } - } else { - $owner.$offset_array_len as tsk_size_t - }; - if start == stop { - None - } else { - let ptr = unsafe { $owner.$array.offset(start as isize) as *const u8 }; - let len = (stop - start) as usize; - let slice = unsafe { std::slice::from_raw_parts(ptr, len) }; - Some(slice) - } - } - }}; -} - -macro_rules! metadata_to_vector { - ($outer: ident, $table: expr, $row: expr) => { - $crate::metadata::char_column_to_slice( - $outer, - $table.metadata, - $table.metadata_offset, - $row, - $table.num_rows, - $table.metadata_length, - ) - }; -} - macro_rules! decode_metadata_row { ($T: ty, $buffer: expr) => { <$T as $crate::metadata::MetadataRoundtrip>::decode($buffer) }; } -macro_rules! table_row_decode_metadata { - ($owner: ident, $table: ident, $pos: ident) => { - metadata_to_vector!($owner, $table, $pos) - }; -} - macro_rules! process_state_input { ($state: expr) => { match $state { @@ -1093,14 +975,13 @@ macro_rules! build_owned_table_type { } macro_rules! raw_metadata_getter_for_tables { - ($idtype: ident) => { + ($idtype: ty) => { fn raw_metadata(&self, row: $idtype) -> Option<&[u8]> { - $crate::metadata::char_column_to_slice( - self, + $crate::sys::tsk_ragged_column_access::<'_, u8, $idtype, _, _>( + row.into(), self.as_ref().metadata, + self.num_rows(), self.as_ref().metadata_offset, - row.into(), - self.num_rows().into(), self.as_ref().metadata_length, ) } diff --git a/src/edge_table.rs b/src/edge_table.rs index aa1cf5221..2464766b3 100644 --- a/src/edge_table.rs +++ b/src/edge_table.rs @@ -31,14 +31,13 @@ impl PartialEq for EdgeTableRow { } fn make_edge_table_row(table: &EdgeTable, pos: tsk_id_t) -> Option { - let table_ref = table.as_ref(); Some(EdgeTableRow { id: pos.into(), left: table.left(pos)?, right: table.right(pos)?, parent: table.parent(pos)?, child: table.child(pos)?, - metadata: table_row_decode_metadata!(table, table_ref, pos).map(|m| m.to_vec()), + metadata: table.raw_metadata(pos.into()).map(|m| m.to_vec()), }) } @@ -238,8 +237,7 @@ impl EdgeTable { &self, row: EdgeId, ) -> Option> { - let table_ref = self.as_ref(); - let buffer = metadata_to_vector!(self, table_ref, row.into())?; + let buffer = self.raw_metadata(row)?; Some(decode_metadata_row!(T, buffer).map_err(|e| e.into())) } diff --git a/src/individual_table.rs b/src/individual_table.rs index 13e335653..1b0ded86a 100644 --- a/src/individual_table.rs +++ b/src/individual_table.rs @@ -6,7 +6,7 @@ use crate::sys; use crate::IndividualFlags; use crate::IndividualId; use crate::Location; -use crate::{tsk_id_t, tsk_size_t, TskitError}; +use crate::{tsk_id_t, TskitError}; use ll_bindings::{tsk_individual_table_free, tsk_individual_table_init}; /// Row of a [`IndividualTable`] @@ -109,13 +109,12 @@ pub struct IndividualTable { } fn make_individual_table_row(table: &IndividualTable, pos: tsk_id_t) -> Option { - let table_ref = table.as_ref(); Some(IndividualTableRow { id: pos.into(), flags: table.flags(pos)?, location: table.location(pos).map(|s| s.to_vec()), parents: table.parents(pos).map(|s| s.to_vec()), - metadata: table_row_decode_metadata!(table, table_ref, pos).map(|m| m.to_vec()), + metadata: table.raw_metadata(pos.into()).map(|m| m.to_vec()), }) } @@ -186,15 +185,12 @@ impl IndividualTable { /// * `Some(location)` if `row` is valid. /// * `None` otherwise. pub fn location + Copy>(&self, row: I) -> Option<&[Location]> { - unsafe_tsk_ragged_column_access!( + sys::tsk_ragged_column_access( row.into(), - 0, + self.as_ref().location, self.num_rows(), - self.as_ref(), - location, - location_offset, - location_length, - Location + self.as_ref().location_offset, + self.as_ref().location_length, ) } @@ -205,15 +201,12 @@ impl IndividualTable { /// * `Some(parents)` if `row` is valid. /// * `None` otherwise. pub fn parents + Copy>(&self, row: I) -> Option<&[IndividualId]> { - unsafe_tsk_ragged_column_access!( + sys::tsk_ragged_column_access( row.into(), - 0, + self.as_ref().parents, self.num_rows(), - self.as_ref(), - parents, - parents_offset, - parents_length, - IndividualId + self.as_ref().parents_offset, + self.as_ref().parents_length, ) } @@ -391,8 +384,7 @@ match tables.individuals().metadata::(0.into()) &self, row: IndividualId, ) -> Option> { - let table_ref = self.as_ref(); - let buffer = metadata_to_vector!(self, table_ref, row.into())?; + let buffer = self.raw_metadata(row)?; Some(decode_metadata_row!(T, buffer).map_err(|e| e.into())) } diff --git a/src/metadata.rs b/src/metadata.rs index b62cf8016..4c356c339 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -163,7 +163,6 @@ //! * We have not yet tested importing metadata encoded using `rust` //! into `Python` via the `tskit` `Python API`. -use crate::bindings::{tsk_id_t, tsk_size_t}; use crate::SizeType; use thiserror::Error; @@ -250,59 +249,6 @@ pub enum MetadataError { }, } -pub(crate) fn char_column_to_slice( - _lifetime: &T, - column: *const libc::c_char, - column_offset: *const tsk_size_t, - row: tsk_id_t, - num_rows: tsk_size_t, - column_length: tsk_size_t, -) -> Option<&[u8]> { - let row = match tsk_size_t::try_from(row).ok() { - Some(r) if r < num_rows => r, - _ => return None, - }; - if column_length == 0 { - return None; - } - let row_isize = match isize::try_from(row).ok() { - Some(x) => x, - None => return None, - }; - debug_assert!(!column.is_null()); - debug_assert!(!column_offset.is_null()); - if column.is_null() { - return None; - } - if column_offset.is_null() { - return None; - } - // SAFETY: not null and best effort bounds check - let start = unsafe { *column_offset.offset(row_isize) }; - let stop = if (row as tsk_size_t) < num_rows { - unsafe { *column_offset.offset(row_isize + 1) } - } else { - column_length - }; - if start >= stop { - return None; - } - if column_length == 0 { - return None; - } - let istart = match isize::try_from(start).ok() { - Some(v) => v, - None => return None, - }; - let ustop = match usize::try_from(stop).ok() { - Some(v) => v, - None => return None, - }; - Some(unsafe { - std::slice::from_raw_parts(column.offset(istart) as *const u8, ustop - istart as usize) - }) -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/migration_table.rs b/src/migration_table.rs index 47308aa51..83eee3677 100644 --- a/src/migration_table.rs +++ b/src/migration_table.rs @@ -37,7 +37,6 @@ impl PartialEq for MigrationTableRow { } fn make_migration_table_row(table: &MigrationTable, pos: tsk_id_t) -> Option { - let table_ref = table.as_ref(); Some(MigrationTableRow { id: pos.into(), left: table.left(pos)?, @@ -46,7 +45,7 @@ fn make_migration_table_row(table: &MigrationTable, pos: tsk_id_t) -> Option Option> { - let table_ref = self.as_ref(); - let buffer = metadata_to_vector!(self, table_ref, row.into())?; + let buffer = self.raw_metadata(row)?; Some(decode_metadata_row!(T, buffer).map_err(|e| e.into())) } diff --git a/src/mutation_table.rs b/src/mutation_table.rs index c479edb4a..807a3ea1c 100644 --- a/src/mutation_table.rs +++ b/src/mutation_table.rs @@ -37,7 +37,6 @@ fn make_mutation_table_row(table: &MutationTable, pos: tsk_id_t) -> Option { - let table_ref = table.as_ref(); let derived_state = table.derived_state(pos).map(|s| s.to_vec()); Some(MutationTableRow { id: pos.into(), @@ -46,7 +45,7 @@ fn make_mutation_table_row(table: &MutationTable, pos: tsk_id_t) -> Option None, @@ -245,12 +244,11 @@ impl MutationTable { /// Will return [``IndexError``](crate::TskitError::IndexError) /// if ``row`` is out of range. pub fn derived_state>(&self, row: M) -> Option<&[u8]> { - metadata::char_column_to_slice( - self, + sys::tsk_ragged_column_access( + row.into(), self.as_ref().derived_state, + self.num_rows(), self.as_ref().derived_state_offset, - row.into().into(), - self.as_ref().num_rows, self.as_ref().derived_state_length, ) } @@ -275,8 +273,7 @@ impl MutationTable { &self, row: MutationId, ) -> Option> { - let table_ref = self.as_ref(); - let buffer = metadata_to_vector!(self, table_ref, row.into())?; + let buffer = self.raw_metadata(row)?; Some(decode_metadata_row!(T, buffer).map_err(|e| e.into())) } diff --git a/src/node_table.rs b/src/node_table.rs index cb8e8dc71..40d1a5137 100644 --- a/src/node_table.rs +++ b/src/node_table.rs @@ -33,14 +33,13 @@ impl PartialEq for NodeTableRow { } fn make_node_table_row(table: &NodeTable, pos: tsk_id_t) -> Option { - let table_ref = table.as_ref(); Some(NodeTableRow { id: pos.into(), time: table.time(pos)?, flags: table.flags(pos)?, population: table.population(pos)?, individual: table.individual(pos)?, - metadata: table_row_decode_metadata!(table, table_ref, pos).map(|m| m.to_vec()), + metadata: table.raw_metadata(pos.into()).map(|m| m.to_vec()), }) } @@ -340,8 +339,7 @@ impl NodeTable { &self, row: NodeId, ) -> Option> { - let table_ref = self.as_ref(); - let buffer = metadata_to_vector!(self, table_ref, row.into())?; + let buffer = self.raw_metadata(row)?; Some(decode_metadata_row!(T, buffer).map_err(|e| e.into())) } diff --git a/src/population_table.rs b/src/population_table.rs index 7ce23c0fa..d52be8fbd 100644 --- a/src/population_table.rs +++ b/src/population_table.rs @@ -22,12 +22,11 @@ impl PartialEq for PopulationTableRow { } fn make_population_table_row(table: &PopulationTable, pos: tsk_id_t) -> Option { - let table_ref = table.as_ref(); let index = ll_bindings::tsk_size_t::try_from(pos).ok()?; match index { i if i < table.num_rows() => { - let metadata = table_row_decode_metadata!(table, table_ref, pos).map(|s| s.to_vec()); + let metadata = table.raw_metadata(pos.into()).map(|m| m.to_vec()); Some(PopulationTableRow { id: pos.into(), metadata, @@ -162,8 +161,7 @@ impl PopulationTable { &self, row: PopulationId, ) -> Option> { - let table_ref = self.as_ref(); - let buffer = metadata_to_vector!(self, table_ref, row.into())?; + let buffer = self.raw_metadata(row)?; Some(decode_metadata_row!(T, buffer).map_err(TskitError::from)) } diff --git a/src/provenance.rs b/src/provenance.rs index 7031caeeb..df289b4d0 100644 --- a/src/provenance.rs +++ b/src/provenance.rs @@ -13,6 +13,7 @@ use std::ptr::NonNull; use crate::bindings as ll_bindings; +use crate::sys; use crate::SizeType; use crate::{tsk_id_t, tsk_size_t, ProvenanceId}; use ll_bindings::{tsk_provenance_table_free, tsk_provenance_table_init}; @@ -185,14 +186,12 @@ impl ProvenanceTable { /// # } /// ``` pub fn timestamp + Copy>(&self, row: P) -> Option<&str> { - let timestamp_slice = unsafe_tsk_ragged_char_column_access_to_slice_u8!( + let timestamp_slice = sys::tsk_ragged_column_access( row.into(), - 0, + self.as_ref().timestamp, self.num_rows(), - self.as_ref(), - timestamp, - timestamp_offset, - timestamp_length + self.as_ref().timestamp_offset, + self.as_ref().timestamp_length, ); match timestamp_slice { Some(tstamp) => std::str::from_utf8(tstamp).ok(), @@ -221,14 +220,12 @@ impl ProvenanceTable { /// # panic!("Expected Some(timestamp)"); /// # } pub fn record + Copy>(&self, row: P) -> Option<&str> { - let record_slice = unsafe_tsk_ragged_char_column_access_to_slice_u8!( + let record_slice = sys::tsk_ragged_column_access( row.into(), - 0, + self.as_ref().record, self.num_rows(), - self.as_ref(), - record, - record_offset, - record_length + self.as_ref().record_offset, + self.as_ref().record_length, ); match record_slice { Some(rec) => std::str::from_utf8(rec).ok(), diff --git a/src/site_table.rs b/src/site_table.rs index 7f6ef02fe..e2e09237a 100644 --- a/src/site_table.rs +++ b/src/site_table.rs @@ -29,13 +29,12 @@ impl PartialEq for SiteTableRow { } fn make_site_table_row(table: &SiteTable, pos: tsk_id_t) -> Option { - let table_ref = table.as_ref(); let ancestral_state = table.ancestral_state(pos).map(|s| s.to_vec()); Some(SiteTableRow { id: pos.into(), position: table.position(pos)?, ancestral_state, - metadata: table_row_decode_metadata!(table, table_ref, pos).map(|m| m.to_vec()), + metadata: table.raw_metadata(pos.into()).map(|m| m.to_vec()), }) } @@ -181,12 +180,11 @@ impl SiteTable { /// * `Some(ancestral state)` if `row` is valid. /// * `None` otherwise. pub fn ancestral_state>(&self, row: S) -> Option<&[u8]> { - crate::metadata::char_column_to_slice( - self, + sys::tsk_ragged_column_access( + row.into(), self.as_ref().ancestral_state, + self.num_rows(), self.as_ref().ancestral_state_offset, - row.into().into(), - self.as_ref().num_rows, self.as_ref().ancestral_state_length, ) } @@ -211,8 +209,7 @@ impl SiteTable { &self, row: SiteId, ) -> Option> { - let table_ref = self.as_ref(); - let buffer = metadata_to_vector!(self, table_ref, row.into())?; + let buffer = self.raw_metadata(row)?; Some(decode_metadata_row!(T, buffer).map_err(TskitError::from)) } diff --git a/src/sys.rs b/src/sys.rs index 78c85de29..39dae7858 100644 --- a/src/sys.rs +++ b/src/sys.rs @@ -30,3 +30,59 @@ pub fn tsk_column_access< ) -> Option { tsk_column_access_detail(row, column, column_length).map(|v| v.into()) } + +fn tsk_ragged_column_access_detail< + R: Into, + L: Into, + T: Copy, +>( + row: R, + column: *const T, + column_length: L, + offset: *const bindings::tsk_size_t, + offset_length: bindings::tsk_size_t, +) -> Option<(*const T, usize)> { + let row = row.into(); + let column_length = column_length.into(); + if row < 0 || row as bindings::tsk_size_t > column_length || offset_length == 0 { + None + } else { + assert!(!column.is_null()); + assert!(!offset.is_null()); + // SAFETY: pointers are not null + // and *_length are given by tskit-c + let index = row as isize; + let start = unsafe { *offset.offset(index) }; + let stop = if (row as bindings::tsk_size_t) < column_length { + unsafe { *offset.offset(index + 1) } + } else { + offset_length + }; + if start == stop { + None + } else { + Some(( + unsafe { column.offset(start as isize) }, + stop as usize - start as usize, + )) + } + } +} + +pub fn tsk_ragged_column_access< + 'a, + O, + R: Into, + L: Into, + T: Copy, +>( + row: R, + column: *const T, + column_length: L, + offset: *const bindings::tsk_size_t, + offset_length: bindings::tsk_size_t, +) -> Option<&'a [O]> { + // SAFETY: see tsk_ragged_column_access_detail + tsk_ragged_column_access_detail(row, column, column_length, offset, offset_length) + .map(|(p, n)| unsafe { std::slice::from_raw_parts(p.cast::(), n) }) +}