From a5e5186a33e78202b251de0cf70ae30814c21dea Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 8 Dec 2025 15:18:13 +0200 Subject: [PATCH 1/9] Refactor. --- .../src/chaining_cost_function.rs | 54 +++++++++---------- .../src/chaining_cost_function/cost_array.rs | 8 +-- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index a27f45e..2bd5c0f 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -6,26 +6,26 @@ use crate::{ ts_kind::{TsAncestor, TsDescendant, TsKind}, }, anchors::Anchors, - chaining_cost_function::cost_array::CostArray2D, + chaining_cost_function::cost_array::ChainingCostArray, chaining_lower_bounds::ChainingLowerBounds, }; mod cost_array; pub struct ChainingCostFunction { - primary: CostArray2D, - secondary_11: CostArray2D, - secondary_12: CostArray2D, - secondary_21: CostArray2D, - secondary_22: CostArray2D, - jump_12_to_11: CostArray2D, - jump_12_to_12: CostArray2D, - jump_12_to_21: CostArray2D, - jump_12_to_22: CostArray2D, - jump_34_from_11: CostArray2D, - jump_34_from_12: CostArray2D, - jump_34_from_21: CostArray2D, - jump_34_from_22: CostArray2D, + primary: ChainingCostArray, + secondary_11: ChainingCostArray, + secondary_12: ChainingCostArray, + secondary_21: ChainingCostArray, + secondary_22: ChainingCostArray, + jump_12_to_11: ChainingCostArray, + jump_12_to_12: ChainingCostArray, + jump_12_to_21: ChainingCostArray, + jump_12_to_22: ChainingCostArray, + jump_34_from_11: ChainingCostArray, + jump_34_from_12: ChainingCostArray, + jump_34_from_21: ChainingCostArray, + jump_34_from_22: ChainingCostArray, } impl ChainingCostFunction { @@ -37,7 +37,7 @@ impl ChainingCostFunction { ) -> Self { let k = usize::try_from(chaining_lower_bounds.max_match_run() + 1).unwrap(); - let mut primary = CostArray2D::new_from_cost( + let mut primary = ChainingCostArray::new_from_cost( [anchors.primary.len() + 2, anchors.primary.len() + 2], Cost::max_value(), ); @@ -65,7 +65,7 @@ impl ChainingCostFunction { } } - let mut secondary_11 = CostArray2D::new_from_cost( + let mut secondary_11 = ChainingCostArray::new_from_cost( [anchors.secondary_11.len(), anchors.secondary_11.len()], Cost::max_value(), ); @@ -81,7 +81,7 @@ impl ChainingCostFunction { } } - let mut secondary_12 = CostArray2D::new_from_cost( + let mut secondary_12 = ChainingCostArray::new_from_cost( [anchors.secondary_12.len(), anchors.secondary_12.len()], Cost::max_value(), ); @@ -97,7 +97,7 @@ impl ChainingCostFunction { } } - let mut secondary_21 = CostArray2D::new_from_cost( + let mut secondary_21 = ChainingCostArray::new_from_cost( [anchors.secondary_21.len(), anchors.secondary_21.len()], Cost::max_value(), ); @@ -113,7 +113,7 @@ impl ChainingCostFunction { } } - let mut secondary_22 = CostArray2D::new_from_cost( + let mut secondary_22 = ChainingCostArray::new_from_cost( [anchors.secondary_22.len(), anchors.secondary_22.len()], Cost::max_value(), ); @@ -129,7 +129,7 @@ impl ChainingCostFunction { } } - let mut jump_12_to_11 = CostArray2D::new_from_cost( + let mut jump_12_to_11 = ChainingCostArray::new_from_cost( [anchors.primary.len() + 2, anchors.secondary_11.len()], Cost::max_value(), ); @@ -143,7 +143,7 @@ impl ChainingCostFunction { } } - let mut jump_12_to_12 = CostArray2D::new_from_cost( + let mut jump_12_to_12 = ChainingCostArray::new_from_cost( [anchors.primary.len() + 2, anchors.secondary_12.len()], Cost::max_value(), ); @@ -157,7 +157,7 @@ impl ChainingCostFunction { } } - let mut jump_12_to_21 = CostArray2D::new_from_cost( + let mut jump_12_to_21 = ChainingCostArray::new_from_cost( [anchors.primary.len() + 2, anchors.secondary_21.len()], Cost::max_value(), ); @@ -171,7 +171,7 @@ impl ChainingCostFunction { } } - let mut jump_12_to_22 = CostArray2D::new_from_cost( + let mut jump_12_to_22 = ChainingCostArray::new_from_cost( [anchors.primary.len() + 2, anchors.secondary_22.len()], Cost::max_value(), ); @@ -185,7 +185,7 @@ impl ChainingCostFunction { } } - let mut jump_34_from_11 = CostArray2D::new_from_cost( + let mut jump_34_from_11 = ChainingCostArray::new_from_cost( [anchors.secondary_11.len(), anchors.primary.len() + 2], Cost::max_value(), ); @@ -199,7 +199,7 @@ impl ChainingCostFunction { } } - let mut jump_34_from_12 = CostArray2D::new_from_cost( + let mut jump_34_from_12 = ChainingCostArray::new_from_cost( [anchors.secondary_12.len(), anchors.primary.len() + 2], Cost::max_value(), ); @@ -213,7 +213,7 @@ impl ChainingCostFunction { } } - let mut jump_34_from_21 = CostArray2D::new_from_cost( + let mut jump_34_from_21 = ChainingCostArray::new_from_cost( [anchors.secondary_21.len(), anchors.primary.len() + 2], Cost::max_value(), ); @@ -227,7 +227,7 @@ impl ChainingCostFunction { } } - let mut jump_34_from_22 = CostArray2D::new_from_cost( + let mut jump_34_from_22 = ChainingCostArray::new_from_cost( [anchors.secondary_22.len(), anchors.primary.len() + 2], Cost::max_value(), ); diff --git a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs index 8189c0a..9702af2 100644 --- a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs +++ b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs @@ -2,13 +2,13 @@ use std::ops::{Index, IndexMut}; use bitvec::{bitvec, order::LocalBits, vec::BitVec}; -pub struct CostArray2D { +pub struct ChainingCostArray { len: [usize; 2], data: Vec, is_exact: BitVec, } -impl CostArray2D { +impl ChainingCostArray { pub fn new_from_cost(len: [usize; 2], cost: Cost) -> Self where Cost: Clone, @@ -35,7 +35,7 @@ impl CostArray2D { } } -impl Index<[usize; 2]> for CostArray2D { +impl Index<[usize; 2]> for ChainingCostArray { type Output = Cost; fn index(&self, index: [usize; 2]) -> &Self::Output { @@ -43,7 +43,7 @@ impl Index<[usize; 2]> for CostArray2D { } } -impl IndexMut<[usize; 2]> for CostArray2D { +impl IndexMut<[usize; 2]> for ChainingCostArray { fn index_mut(&mut self, index: [usize; 2]) -> &mut Self::Output { &mut self.data[coordinates_to_index(index[0], index[1], self.len)] } From a898ad8cf7c332d766ed0030128f6553f6b756d3 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 8 Dec 2025 15:39:32 +0200 Subject: [PATCH 2/9] Iter primary-primary chainings in cost order. --- lib_ts_chainalign/src/chain_align/chainer.rs | 152 +++++++++--------- .../src/chaining_cost_function.rs | 10 ++ .../src/chaining_cost_function/cost_array.rs | 27 ++++ 3 files changed, 115 insertions(+), 74 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 2aec206..e25f174 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -162,85 +162,89 @@ impl AStarContext for Context<'_, '_, '_, Cost> { })), ); } - Identifier::Primary { index } => output.extend( - (0..self.anchors.primary.len()) - .flat_map(|successor_index| { - if DEBUG_CHAINER { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] - ); - } + Identifier::Primary { index } => { + output.extend( + self.chaining_cost_function + .iter_primary_in_cost_order(index) + .flat_map(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + println!( + "Checking anchor P-{successor_index}: {}", + self.anchors.primary[successor_index] + ); + } - let cost = predecessor_cost.checked_add( - &self.chaining_cost_function.primary(index, successor_index), - )?; - if DEBUG_CHAINER { - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Primary { - index: successor_index, - }, - predecessor, - cost, - }) - }) - .chain(TsKind::iter().flat_map(|ts_kind| { - (0..self.anchors.secondary(ts_kind).len()) - .zip(iter::repeat(&self)) - .flat_map(move |(successor_index, context)| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - context.anchors.secondary(ts_kind)[successor_index] - ); - } + (cost != Cost::max_value()).then_some(Node { + identifier: Identifier::Primary { + index: successor_index, + }, + predecessor, + cost, + }) + }), + ); + output.extend( + TsKind::iter() + .flat_map(|ts_kind| { + (0..self.anchors.secondary(ts_kind).len()) + .zip(iter::repeat(&self)) + .flat_map(move |(successor_index, context)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + context.anchors.secondary(ts_kind)[successor_index] + ); + } - let cost = predecessor_cost.checked_add( - &context.chaining_cost_function.jump_12( - index, - successor_index, - ts_kind, - ), - )?; - if DEBUG_CHAINER { - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } + let cost = predecessor_cost.checked_add( + &context.chaining_cost_function.jump_12( + index, + successor_index, + ts_kind, + ), + )?; + if DEBUG_CHAINER { + println!( + "Cost: {}+{}", + predecessor_cost, + cost - predecessor_cost + ); + } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index: successor_index, - }, - predecessor, - cost, + (cost != Cost::max_value()).then_some(Node { + identifier: Identifier::Secondary { + index: successor_index, + ts_kind, + first_secondary_index: successor_index, + }, + predecessor, + cost, + }) }) - }) - })) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add(&self.chaining_cost_function.primary_to_end(index)) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })), - ), + }) + .chain(iter::once({ + let cost = predecessor_cost + .checked_add(&self.chaining_cost_function.primary_to_end(index)) + .unwrap(); + if DEBUG_CHAINER { + println!("Checking anchor end"); + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + debug_assert_ne!(cost, Cost::max_value()); + Node { + identifier: Identifier::End, + predecessor, + cost, + } + })), + ); + } Identifier::Secondary { index, ts_kind, diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 2bd5c0f..2f58425 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -888,4 +888,14 @@ impl ChainingCostFunction { *target = cost; result } + + pub fn iter_primary_in_cost_order( + &mut self, + from_primary_index: usize, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.primary.iter_in_cost_order(from_primary_index + 1) + } } diff --git a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs index 9702af2..46fa224 100644 --- a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs +++ b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs @@ -5,6 +5,8 @@ use bitvec::{bitvec, order::LocalBits, vec::BitVec}; pub struct ChainingCostArray { len: [usize; 2], data: Vec, + /// For each ordinate 0, store the indices of the corresponding ordinates 1, ordered by their cost. + cost_order_permutation: Vec>, is_exact: BitVec, } @@ -16,6 +18,7 @@ impl ChainingCostArray { Self { len, data: vec![cost; len[0] * len[1]], + cost_order_permutation: vec![Vec::new(); len[0]], is_exact: bitvec![usize, LocalBits; 0; len[0] * len[1]], } } @@ -33,6 +36,29 @@ impl ChainingCostArray { pub fn dim(&self) -> (usize, usize) { (self.len[0], self.len[1]) } + + /// Iterate over all ordinates `c2` that belong to this `c1` in order of their cost. + pub fn iter_in_cost_order(&mut self, c1: usize) -> impl Iterator + where + Cost: Copy + Ord, + { + if self.cost_order_permutation[c1].is_empty() { + self.cost_order_permutation[c1].extend((0..).take(self.len[1])); + self.cost_order_permutation[c1].sort_unstable_by_key(|c2| { + self.data[coordinates_to_index(c1, usize::try_from(*c2).unwrap(), self.len)] + }); + } + + let data = &self.data; + let len = self.len; + self.cost_order_permutation[c1] + .iter() + .copied() + .map(move |c2| { + let c2 = usize::try_from(c2).unwrap(); + (c2, data[coordinates_to_index(c1, c2, len)]) + }) + } } impl Index<[usize; 2]> for ChainingCostArray { @@ -45,6 +71,7 @@ impl Index<[usize; 2]> for ChainingCostArray { impl IndexMut<[usize; 2]> for ChainingCostArray { fn index_mut(&mut self, index: [usize; 2]) -> &mut Self::Output { + self.cost_order_permutation[index[0]].clear(); &mut self.data[coordinates_to_index(index[0], index[1], self.len)] } } From bb0894831b5e06f03ed29b1985b3ca8a70fa49b8 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 10 Dec 2025 11:22:13 +0200 Subject: [PATCH 3/9] Add chaining successors in order. --- lib_ts_chainalign/src/chain_align/chainer.rs | 329 ++++++++---------- .../src/chaining_cost_function.rs | 94 +++++ 2 files changed, 248 insertions(+), 175 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index e25f174..1ce35d1 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -80,8 +80,9 @@ impl AStarContext for Context<'_, '_, '_, Cost> { match node.identifier { Identifier::Start => { output.extend( - (0..self.anchors.primary.len()) - .flat_map(|successor_index| { + self.chaining_cost_function + .iter_primary_from_start_in_cost_order() + .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { println!( "Checking anchor P-{successor_index}: {}", @@ -89,13 +90,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { ); } - let cost = predecessor_cost - .checked_add( - &self - .chaining_cost_function - .primary_from_start(successor_index), - ) - .unwrap(); + let cost = predecessor_cost.checked_add(&chaining_cost).unwrap(); if DEBUG_CHAINER { println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); } @@ -108,60 +103,60 @@ impl AStarContext for Context<'_, '_, '_, Cost> { predecessor, cost, }) - }) - .chain(TsKind::iter().flat_map(|ts_kind| { - (0..self.anchors.secondary(ts_kind).len()) - .zip(iter::repeat(&self)) - .flat_map(move |(successor_index, context)| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - context.anchors.secondary(ts_kind)[successor_index] - ); - } - - let cost = predecessor_cost.checked_add( - &context - .chaining_cost_function - .jump_12_from_start(successor_index, ts_kind), - )?; - if DEBUG_CHAINER { - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } - - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index: successor_index, - }, - predecessor, - cost, - }) - }) - })) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add(&self.chaining_cost_function.start_to_end()) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })), + }), ); + for ts_kind in TsKind::iter() { + output.extend( + self.chaining_cost_function + .iter_jump_12_from_start_in_cost_order(ts_kind) + .zip(iter::repeat(self.anchors)) + .flat_map(move |((successor_index, chaining_cost), anchors)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + anchors.secondary(ts_kind)[successor_index] + ); + } + + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!( + "Cost: {}+{}", + predecessor_cost, + cost - predecessor_cost + ); + } + + (cost != Cost::max_value()).then_some(Node { + identifier: Identifier::Secondary { + index: successor_index, + ts_kind, + first_secondary_index: successor_index, + }, + predecessor, + cost, + }) + }), + ); + } + output.extend(iter::once({ + let cost = predecessor_cost + .checked_add(&self.chaining_cost_function.start_to_end()) + .unwrap(); + if DEBUG_CHAINER { + println!("Checking anchor end"); + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + debug_assert_ne!(cost, Cost::max_value()); + Node { + identifier: Identifier::End, + predecessor, + cost, + } + })); } + Identifier::Primary { index } => { output.extend( self.chaining_cost_function @@ -188,98 +183,91 @@ impl AStarContext for Context<'_, '_, '_, Cost> { }) }), ); - output.extend( - TsKind::iter() - .flat_map(|ts_kind| { - (0..self.anchors.secondary(ts_kind).len()) - .zip(iter::repeat(&self)) - .flat_map(move |(successor_index, context)| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - context.anchors.secondary(ts_kind)[successor_index] - ); - } - - let cost = predecessor_cost.checked_add( - &context.chaining_cost_function.jump_12( - index, - successor_index, - ts_kind, - ), - )?; - if DEBUG_CHAINER { - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } - - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index: successor_index, - }, - predecessor, - cost, - }) + for ts_kind in TsKind::iter() { + output.extend( + self.chaining_cost_function + .iter_jump_12_in_cost_order(index, ts_kind) + .zip(iter::repeat(self.anchors)) + .flat_map(move |((successor_index, chaining_cost), anchors)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + anchors.secondary(ts_kind)[successor_index] + ); + } + + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!( + "Cost: {}+{}", + predecessor_cost, + cost - predecessor_cost + ); + } + + (cost != Cost::max_value()).then_some(Node { + identifier: Identifier::Secondary { + index: successor_index, + ts_kind, + first_secondary_index: successor_index, + }, + predecessor, + cost, }) - }) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add(&self.chaining_cost_function.primary_to_end(index)) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })), - ); + }), + ); + } + output.extend(iter::once({ + let cost = predecessor_cost + .checked_add(&self.chaining_cost_function.primary_to_end(index)) + .unwrap(); + if DEBUG_CHAINER { + println!("Checking anchor end"); + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + debug_assert_ne!(cost, Cost::max_value()); + Node { + identifier: Identifier::End, + predecessor, + cost, + } + })); } + Identifier::Secondary { index, ts_kind, first_secondary_index, } => { - output.extend((0..self.anchors.secondary(ts_kind).len()).flat_map( - |successor_index| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - self.anchors.secondary(ts_kind)[successor_index] - ); - } + output.extend( + self.chaining_cost_function + .iter_secondary_in_cost_order(index, ts_kind) + .flat_map(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + self.anchors.secondary(ts_kind)[successor_index] + ); + } - let cost = predecessor_cost.checked_add( - &self - .chaining_cost_function - .secondary(index, successor_index, ts_kind), - )?; - if DEBUG_CHAINER { - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index, - }, - predecessor, - cost, - }) - }, - )); + (cost != Cost::max_value()).then_some(Node { + identifier: Identifier::Secondary { + index: successor_index, + ts_kind, + first_secondary_index, + }, + predecessor, + cost, + }) + }), + ); let first_anchor = &self.anchors.secondary(ts_kind)[first_secondary_index]; let ts_length = first_anchor.ts_length_until( @@ -290,8 +278,9 @@ impl AStarContext for Context<'_, '_, '_, Cost> { if self.ts_limits.length_23.contains(&ts_length) { output.extend( - (0..self.anchors.primary.len()) - .flat_map(|successor_index| { + self.chaining_cost_function + .iter_jump_34_in_cost_order(index, ts_kind) + .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { println!( "Checking anchor P-{successor_index}: {}", @@ -299,13 +288,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { ); } - let cost = predecessor_cost.checked_add( - &self.chaining_cost_function.jump_34( - index, - successor_index, - ts_kind, - ), - )?; + let cost = predecessor_cost.checked_add(&chaining_cost)?; if DEBUG_CHAINER { println!( "Cost: {}+{}", @@ -321,29 +304,25 @@ impl AStarContext for Context<'_, '_, '_, Cost> { predecessor, cost, }) - }) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add( - &self.chaining_cost_function.jump_34_to_end(index, ts_kind), - ) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })), + }), ); + output.extend(iter::once({ + let cost = predecessor_cost + .checked_add( + &self.chaining_cost_function.jump_34_to_end(index, ts_kind), + ) + .unwrap(); + if DEBUG_CHAINER { + println!("Checking anchor end"); + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + debug_assert_ne!(cost, Cost::max_value()); + Node { + identifier: Identifier::End, + predecessor, + cost, + } + })); } } diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 2f58425..4eacdbb 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -898,4 +898,98 @@ impl ChainingCostFunction { { self.primary.iter_in_cost_order(from_primary_index + 1) } + + pub fn iter_primary_from_start_in_cost_order(&mut self) -> impl Iterator + where + Cost: Copy + Ord, + { + self.primary.iter_in_cost_order(0) + } + + pub fn iter_jump_12_in_cost_order( + &mut self, + from_primary_index: usize, + ts_kind: TsKind, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + match (ts_kind.ancestor, ts_kind.descendant) { + (TsAncestor::Seq1, TsDescendant::Seq1) => self + .jump_12_to_11 + .iter_in_cost_order(from_primary_index + 1), + (TsAncestor::Seq1, TsDescendant::Seq2) => self + .jump_12_to_12 + .iter_in_cost_order(from_primary_index + 1), + (TsAncestor::Seq2, TsDescendant::Seq1) => self + .jump_12_to_21 + .iter_in_cost_order(from_primary_index + 1), + (TsAncestor::Seq2, TsDescendant::Seq2) => self + .jump_12_to_22 + .iter_in_cost_order(from_primary_index + 1), + } + } + + pub fn iter_jump_12_from_start_in_cost_order( + &mut self, + ts_kind: TsKind, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + match (ts_kind.ancestor, ts_kind.descendant) { + (TsAncestor::Seq1, TsDescendant::Seq1) => self.jump_12_to_11.iter_in_cost_order(0), + (TsAncestor::Seq1, TsDescendant::Seq2) => self.jump_12_to_12.iter_in_cost_order(0), + (TsAncestor::Seq2, TsDescendant::Seq1) => self.jump_12_to_21.iter_in_cost_order(0), + (TsAncestor::Seq2, TsDescendant::Seq2) => self.jump_12_to_22.iter_in_cost_order(0), + } + } + + pub fn iter_secondary_in_cost_order( + &mut self, + from_secondary_index: usize, + ts_kind: TsKind, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + match (ts_kind.ancestor, ts_kind.descendant) { + (TsAncestor::Seq1, TsDescendant::Seq1) => { + self.secondary_11.iter_in_cost_order(from_secondary_index) + } + (TsAncestor::Seq1, TsDescendant::Seq2) => { + self.secondary_12.iter_in_cost_order(from_secondary_index) + } + (TsAncestor::Seq2, TsDescendant::Seq1) => { + self.secondary_21.iter_in_cost_order(from_secondary_index) + } + (TsAncestor::Seq2, TsDescendant::Seq2) => { + self.secondary_22.iter_in_cost_order(from_secondary_index) + } + } + } + + pub fn iter_jump_34_in_cost_order( + &mut self, + from_secondary_index: usize, + ts_kind: TsKind, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + match (ts_kind.ancestor, ts_kind.descendant) { + (TsAncestor::Seq1, TsDescendant::Seq1) => self + .jump_34_from_11 + .iter_in_cost_order(from_secondary_index), + (TsAncestor::Seq1, TsDescendant::Seq2) => self + .jump_34_from_12 + .iter_in_cost_order(from_secondary_index), + (TsAncestor::Seq2, TsDescendant::Seq1) => self + .jump_34_from_21 + .iter_in_cost_order(from_secondary_index), + (TsAncestor::Seq2, TsDescendant::Seq2) => self + .jump_34_from_22 + .iter_in_cost_order(from_secondary_index), + } + } } From a565fc60b75dea617d0b6ec2691951a1377a91e0 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 10 Dec 2025 11:51:11 +0200 Subject: [PATCH 4/9] Fix chainalign successor iteration. --- lib_ts_chainalign/src/chain_align/chainer.rs | 117 ++++++++---------- .../src/chaining_cost_function.rs | 15 ++- .../src/chaining_cost_function/cost_array.rs | 4 +- 3 files changed, 67 insertions(+), 69 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 1ce35d1..4a36c64 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -84,10 +84,14 @@ impl AStarContext for Context<'_, '_, '_, Cost> { .iter_primary_from_start_in_cost_order() .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] - ); + if successor_index == self.anchors.primary.len() { + println!("Checking anchor end"); + } else { + println!( + "Checking anchor P-{successor_index}: {}", + self.anchors.primary[successor_index] + ); + } } let cost = predecessor_cost.checked_add(&chaining_cost).unwrap(); @@ -97,8 +101,12 @@ impl AStarContext for Context<'_, '_, '_, Cost> { debug_assert_ne!(cost, Cost::max_value()); Some(Node { - identifier: Identifier::Primary { - index: successor_index, + identifier: if successor_index == self.anchors.primary.len() { + Identifier::End + } else { + Identifier::Primary { + index: successor_index, + } }, predecessor, cost, @@ -140,21 +148,6 @@ impl AStarContext for Context<'_, '_, '_, Cost> { }), ); } - output.extend(iter::once({ - let cost = predecessor_cost - .checked_add(&self.chaining_cost_function.start_to_end()) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })); } Identifier::Primary { index } => { @@ -163,10 +156,14 @@ impl AStarContext for Context<'_, '_, '_, Cost> { .iter_primary_in_cost_order(index) .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] - ); + if successor_index == self.anchors.primary.len() { + println!("Checking anchor end"); + } else { + println!( + "Checking anchor P-{successor_index}: {}", + self.anchors.primary[successor_index] + ); + } } let cost = predecessor_cost.checked_add(&chaining_cost)?; @@ -174,9 +171,18 @@ impl AStarContext for Context<'_, '_, '_, Cost> { println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); } + // Assert that we can always chain to end. + debug_assert!( + successor_index != self.anchors.primary.len() + || cost != Cost::max_value() + ); (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Primary { - index: successor_index, + identifier: if successor_index == self.anchors.primary.len() { + Identifier::End + } else { + Identifier::Primary { + index: successor_index, + } }, predecessor, cost, @@ -218,21 +224,6 @@ impl AStarContext for Context<'_, '_, '_, Cost> { }), ); } - output.extend(iter::once({ - let cost = predecessor_cost - .checked_add(&self.chaining_cost_function.primary_to_end(index)) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })); } Identifier::Secondary { @@ -282,10 +273,14 @@ impl AStarContext for Context<'_, '_, '_, Cost> { .iter_jump_34_in_cost_order(index, ts_kind) .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] - ); + if successor_index == self.anchors.primary.len() { + println!("Checking anchor end"); + } else { + println!( + "Checking anchor P-{successor_index}: {}", + self.anchors.primary[successor_index] + ); + } } let cost = predecessor_cost.checked_add(&chaining_cost)?; @@ -297,32 +292,24 @@ impl AStarContext for Context<'_, '_, '_, Cost> { ); } + // Assert that we can always chain to end. + debug_assert!( + successor_index != self.anchors.primary.len() + || cost != Cost::max_value() + ); (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Primary { - index: successor_index, + identifier: if successor_index == self.anchors.primary.len() { + Identifier::End + } else { + Identifier::Primary { + index: successor_index, + } }, predecessor, cost, }) }), ); - output.extend(iter::once({ - let cost = predecessor_cost - .checked_add( - &self.chaining_cost_function.jump_34_to_end(index, ts_kind), - ) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })); } } diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 4eacdbb..7fca1b4 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -896,14 +896,22 @@ impl ChainingCostFunction { where Cost: Copy + Ord, { - self.primary.iter_in_cost_order(from_primary_index + 1) + self.primary + .iter_in_cost_order(from_primary_index + 1) + .filter_map(|(to_primary_index, chaining_cost)| { + (to_primary_index != 0).then(|| (to_primary_index - 1, chaining_cost)) + }) } pub fn iter_primary_from_start_in_cost_order(&mut self) -> impl Iterator where Cost: Copy + Ord, { - self.primary.iter_in_cost_order(0) + self.primary + .iter_in_cost_order(0) + .filter_map(|(to_primary_index, chaining_cost)| { + (to_primary_index != 0).then(|| (to_primary_index - 1, chaining_cost)) + }) } pub fn iter_jump_12_in_cost_order( @@ -991,5 +999,8 @@ impl ChainingCostFunction { .jump_34_from_22 .iter_in_cost_order(from_secondary_index), } + .filter_map(|(to_primary_index, chaining_cost)| { + (to_primary_index != 0).then(|| (to_primary_index - 1, chaining_cost)) + }) } } diff --git a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs index 46fa224..6a9423a 100644 --- a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs +++ b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs @@ -54,9 +54,9 @@ impl ChainingCostArray { self.cost_order_permutation[c1] .iter() .copied() - .map(move |c2| { + .filter_map(move |c2| { let c2 = usize::try_from(c2).unwrap(); - (c2, data[coordinates_to_index(c1, c2, len)]) + (c1 != c2).then(|| (c2, data[coordinates_to_index(c1, c2, len)])) }) } } From 70355a7df7bdd379d54ba4bcd8a6386ad1247f2d Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 10 Dec 2025 13:21:47 +0200 Subject: [PATCH 5/9] Refactor anchors and chaining costs. --- lib_ts_chainalign/src/alignment/ts_kind.rs | 18 + lib_ts_chainalign/src/anchors.rs | 175 ++-- lib_ts_chainalign/src/anchors/index.rs | 77 ++ lib_ts_chainalign/src/anchors/tests.rs | 30 +- lib_ts_chainalign/src/chain_align.rs | 63 +- lib_ts_chainalign/src/chain_align/chainer.rs | 43 +- .../src/chaining_cost_function.rs | 963 +++++------------- .../src/chaining_cost_function/cost_array.rs | 63 +- 8 files changed, 531 insertions(+), 901 deletions(-) create mode 100644 lib_ts_chainalign/src/anchors/index.rs diff --git a/lib_ts_chainalign/src/alignment/ts_kind.rs b/lib_ts_chainalign/src/alignment/ts_kind.rs index d9e987d..2cfbb92 100644 --- a/lib_ts_chainalign/src/alignment/ts_kind.rs +++ b/lib_ts_chainalign/src/alignment/ts_kind.rs @@ -44,6 +44,24 @@ impl TsKind { [Self::TS11, Self::TS12, Self::TS21, Self::TS22].into_iter() } + /// Index of this [`TsKind`] in the iterator produced by [`Self::iter()`]. + /// + /// ```rust + /// # use lib_ts_chainalign::alignment::ts_kind::TsKind; + /// assert_eq!( + /// TsKind::iter().enumerate().collect::>(), + /// TsKind::iter().map(|ts_kind| (ts_kind.index(), ts_kind)).collect::>() + /// ); + /// ``` + pub fn index(&self) -> usize { + match (self.ancestor, self.descendant) { + (TsAncestor::Seq1, TsDescendant::Seq1) => 0, + (TsAncestor::Seq1, TsDescendant::Seq2) => 1, + (TsAncestor::Seq2, TsDescendant::Seq1) => 2, + (TsAncestor::Seq2, TsDescendant::Seq2) => 3, + } + } + pub fn digits(&self) -> &'static str { match (self.ancestor, self.descendant) { (TsAncestor::Seq1, TsDescendant::Seq1) => "11", diff --git a/lib_ts_chainalign/src/anchors.rs b/lib_ts_chainalign/src/anchors.rs index 6507808..a4ea442 100644 --- a/lib_ts_chainalign/src/anchors.rs +++ b/lib_ts_chainalign/src/anchors.rs @@ -7,14 +7,16 @@ use crate::{ alignment::{ coordinates::AlignmentCoordinates, sequences::AlignmentSequences, - ts_kind::{TsAncestor, TsDescendant, TsKind}, + ts_kind::{TsDescendant, TsKind}, }, anchors::{ + index::AnchorIndex, kmer_matches::find_kmer_matches, kmers::{Kmer, KmerStore}, }, }; +pub mod index; pub mod kmer_matches; pub mod kmers; #[cfg(test)] @@ -22,11 +24,8 @@ mod tests; #[derive(Debug, PartialEq, Eq)] pub struct Anchors { - pub primary: Vec, - pub secondary_11: Vec, - pub secondary_12: Vec, - pub secondary_21: Vec, - pub secondary_22: Vec, + primary: Vec, + secondaries: [Vec; 4], } #[derive(Debug, PartialEq, Eq)] @@ -110,34 +109,35 @@ impl Anchors { .into_iter() .map(|(seq1, seq2)| PrimaryAnchor { seq1, seq2 }) .collect(); - let mut secondary_11: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s1_kmers) + let secondary_11: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s1_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s1.len() - ancestor, descendant, }) .collect(); - let mut secondary_12: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s2_kmers) + let secondary_12: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s2_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s1.len() - ancestor, descendant, }) .collect(); - let mut secondary_21: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s1_kmers) + let secondary_21: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s1_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s2.len() - ancestor, descendant, }) .collect(); - let mut secondary_22: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s2_kmers) + let secondary_22: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s2_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s2.len() - ancestor, descendant, }) .collect(); + let mut secondaries = [secondary_11, secondary_12, secondary_21, secondary_22]; // Sort anchors. primary.sort_unstable_by_key(|primary_anchor| { @@ -147,65 +147,67 @@ impl Anchors { primary_anchor.seq2, ) }); - secondary_11.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); - secondary_12.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); - secondary_21.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); - secondary_22.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); + for secondary in &mut secondaries { + secondary.sort_unstable_by_key(|secondary_anchor| { + ( + secondary_anchor.ancestor.min(secondary_anchor.descendant), + secondary_anchor.ancestor, + secondary_anchor.descendant, + ) + }); + } info!( "Found {} anchors ({} + {} + {} + {} + {})", - primary.len() - + secondary_11.len() - + secondary_12.len() - + secondary_21.len() - + secondary_22.len(), + primary.len() + secondaries.iter().map(Vec::len).sum::(), primary.len(), - secondary_11.len(), - secondary_12.len(), - secondary_21.len(), - secondary_22.len(), + secondaries[0].len(), + secondaries[1].len(), + secondaries[2].len(), + secondaries[3].len(), ); Self { primary, - secondary_11, - secondary_12, - secondary_21, - secondary_22, + secondaries, } } - pub fn secondary(&self, ts_kind: TsKind) -> &[SecondaryAnchor] { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => &self.secondary_11, - (TsAncestor::Seq1, TsDescendant::Seq2) => &self.secondary_12, - (TsAncestor::Seq2, TsDescendant::Seq1) => &self.secondary_21, - (TsAncestor::Seq2, TsDescendant::Seq2) => &self.secondary_22, - } + pub fn primary(&self, index: AnchorIndex) -> &PrimaryAnchor { + &self.primary[index.as_usize()] + } + + pub fn primary_len(&self) -> AnchorIndex { + self.primary.len().into() + } + + pub fn enumerate_primaries(&self) -> impl Iterator { + self.primary + .iter() + .enumerate() + .map(|(index, anchor)| (index.into(), anchor)) + } + + fn secondary_anchor_vec(&self, ts_kind: TsKind) -> &Vec { + &self.secondaries[ts_kind.index()] + } + + pub fn secondary(&self, index: AnchorIndex, ts_kind: TsKind) -> &SecondaryAnchor { + &self.secondary_anchor_vec(ts_kind)[index.as_usize()] + } + + pub fn secondary_len(&self, ts_kind: TsKind) -> AnchorIndex { + self.secondary_anchor_vec(ts_kind).len().into() + } + + pub fn enumerate_secondaries( + &self, + ts_kind: TsKind, + ) -> impl Iterator { + self.secondary_anchor_vec(ts_kind) + .iter() + .enumerate() + .map(|(index, anchor)| (index.into(), anchor)) } } @@ -419,53 +421,26 @@ impl Display for Anchors { } writeln!(f, "]")?; - write!(f, "S11: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_11 { - if once { - once = false; - } else { - write!(f, ", ")?; - } - write!(f, "{secondary_anchor}")?; - } - writeln!(f, "]")?; - - write!(f, "S12: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_12 { - if once { - once = false; - } else { - write!(f, ", ")?; - } - write!(f, "{secondary_anchor}")?; - } - writeln!(f, "]")?; - - write!(f, "S21: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_21 { - if once { - once = false; + let mut ts_kind_once = true; + for ts_kind in TsKind::iter() { + if ts_kind_once { + ts_kind_once = false; } else { - write!(f, ", ")?; + writeln!(f)?; } - write!(f, "{secondary_anchor}")?; - } - writeln!(f, "]")?; - write!(f, "S22: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_22 { - if once { - once = false; - } else { - write!(f, ", ")?; + write!(f, "S{}: [", ts_kind.digits())?; + let mut once = true; + for secondary_anchor in self.secondary_anchor_vec(ts_kind) { + if once { + once = false; + } else { + write!(f, ", ")?; + } + write!(f, "{secondary_anchor}")?; } - write!(f, "{secondary_anchor}")?; + write!(f, "]")?; } - write!(f, "]")?; Ok(()) } diff --git a/lib_ts_chainalign/src/anchors/index.rs b/lib_ts_chainalign/src/anchors/index.rs new file mode 100644 index 0000000..dd1286d --- /dev/null +++ b/lib_ts_chainalign/src/anchors/index.rs @@ -0,0 +1,77 @@ +use std::{ + fmt::Display, + ops::{Add, Sub}, +}; + +use num_traits::Zero; + +type IndexType = u32; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct AnchorIndex(IndexType); + +impl AnchorIndex { + pub fn as_usize(self) -> usize { + self.0.try_into().unwrap() + } +} + +impl From for AnchorIndex { + fn from(value: IndexType) -> Self { + Self(value) + } +} + +impl From for AnchorIndex { + fn from(value: usize) -> Self { + Self(value.try_into().unwrap()) + } +} + +impl Add for AnchorIndex { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self(self.0.checked_add(rhs.0).unwrap()) + } +} + +impl Sub for AnchorIndex { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self(self.0.checked_sub(rhs.0).unwrap()) + } +} + +impl Add for AnchorIndex { + type Output = Self; + + fn add(self, rhs: IndexType) -> Self::Output { + self + Self::from(rhs) + } +} + +impl Sub for AnchorIndex { + type Output = Self; + + fn sub(self, rhs: IndexType) -> Self::Output { + self - Self::from(rhs) + } +} + +impl Zero for AnchorIndex { + fn zero() -> Self { + Self(0) + } + + fn is_zero(&self) -> bool { + self.0.is_zero() + } +} + +impl Display for AnchorIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/lib_ts_chainalign/src/anchors/tests.rs b/lib_ts_chainalign/src/anchors/tests.rs index 03a8eab..1fe7bee 100644 --- a/lib_ts_chainalign/src/anchors/tests.rs +++ b/lib_ts_chainalign/src/anchors/tests.rs @@ -1,7 +1,7 @@ use lib_tsalign::a_star_aligner::alignment_geometry::AlignmentRange; use crate::{ - alignment::sequences::AlignmentSequences, + alignment::{sequences::AlignmentSequences, ts_kind::TsKind}, anchors::{Anchors, PrimaryAnchor, SecondaryAnchor}, }; @@ -23,18 +23,18 @@ fn test_coordinates() { let anchors = Anchors::new(&sequences, range, k, &rc_fn); assert_eq!(anchors.primary, [(0, 0), (2, 0)].map(PrimaryAnchor::from)); - assert!(anchors.secondary_11.is_empty()); + assert!(anchors.secondary_anchor_vec(TsKind::TS11).is_empty()); assert_eq!( - anchors.secondary_12, - [(2, 2), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS12), + &[(2, 2), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_21, - [(4, 0), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS21), + &[(4, 0), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_22, - [(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS22), + &[(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) ); } @@ -46,17 +46,17 @@ fn test_coordinates_rev() { let anchors = Anchors::new(&sequences, range, k, &rc_fn); assert_eq!(anchors.primary, [(0, 0), (0, 2)].map(PrimaryAnchor::from)); - assert!(anchors.secondary_22.is_empty()); + assert!(anchors.secondary_anchor_vec(TsKind::TS22).is_empty()); assert_eq!( - anchors.secondary_21, - [(2, 2), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS21), + &[(2, 2), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_12, - [(4, 0), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS12), + &[(4, 0), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_11, - [(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS11), + &[(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) ); } diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index 4e6666a..c741a99 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -105,7 +105,7 @@ pub fn align( match identifier { Identifier::Start => write!(s, "start").unwrap(), Identifier::Primary { index } => { - write!(s, "P{}", anchors.primary[*index]).unwrap() + write!(s, "P{}", anchors.primary(*index)).unwrap() } Identifier::Secondary { index, @@ -115,8 +115,8 @@ pub fn align( s, "S{}{}->{}", ts_kind.digits(), - anchors.secondary(*ts_kind)[*first_secondary_index], - anchors.secondary(*ts_kind)[*index], + anchors.secondary(*first_secondary_index, *ts_kind), + anchors.secondary(*index, *ts_kind), ) .unwrap(), Identifier::End => write!(s, "end").unwrap(), @@ -329,7 +329,7 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.start_to_end(); } (Identifier::Start, Identifier::Primary { index }) => { - let end = anchors.primary[index].start(); + let end = anchors.primary(index).start(); if final_evaluation || !chaining_cost_function.is_primary_from_start_exact(index) { let alignment = GapAffineAlignment::new( start, @@ -341,7 +341,7 @@ fn evaluate_chain( ); trace!( "Aligning from start to P{index}{} costs {}", - anchors.primary[index], + anchors.primary(index), alignment.cost() ); if !final_evaluation { @@ -356,7 +356,7 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.primary_from_start(index); } (Identifier::Start, Identifier::Secondary { index, ts_kind, .. }) => { - let end = anchors.secondary(ts_kind)[index].start(ts_kind); + let end = anchors.secondary(index, ts_kind).start(ts_kind); if final_evaluation || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) { @@ -371,7 +371,7 @@ fn evaluate_chain( trace!( "Aligning from start to S{}[{index}]{} costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[index], + anchors.secondary(index, ts_kind), alignment.cost() ); if !final_evaluation { @@ -387,7 +387,7 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.jump_12_from_start(index, ts_kind); } (Identifier::Primary { index }, Identifier::End) => { - let start = anchors.primary[index].end(k); + let start = anchors.primary(index).end(k); if final_evaluation || !chaining_cost_function.is_primary_to_end_exact(index) { let alignment = GapAffineAlignment::new( start, @@ -399,7 +399,7 @@ fn evaluate_chain( ); trace!( "Aligning from P{index}{} to end costs {}", - anchors.primary[index], + anchors.primary(index), alignment.cost() ); if !final_evaluation { @@ -411,7 +411,7 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.primary_to_end(index); } (Identifier::Secondary { index, ts_kind, .. }, Identifier::End) => { - let start = anchors.secondary(ts_kind)[index].end(ts_kind, k); + let start = anchors.secondary(index, ts_kind).end(ts_kind, k); if final_evaluation || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) { @@ -426,7 +426,7 @@ fn evaluate_chain( trace!( "Aligning from S{}[{index}]{} to end costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[index], + anchors.secondary(index, ts_kind), alignment.cost() ); if !final_evaluation { @@ -446,14 +446,16 @@ fn evaluate_chain( Identifier::Primary { index: from_index }, Identifier::Primary { index: to_index }, ) => { - if anchors.primary[from_index].is_direct_predecessor_of(&anchors.primary[to_index]) + if anchors + .primary(from_index) + .is_direct_predecessor_of(&anchors.primary(to_index)) { alignments.push(Alignment::from(vec![AlignmentType::Match])); continue; } - let start = anchors.primary[from_index].end(k); - let end = anchors.primary[to_index].start(); + let start = anchors.primary(from_index).end(k); + let end = anchors.primary(to_index).start(); if final_evaluation || !chaining_cost_function.is_primary_exact(from_index, to_index) { @@ -467,8 +469,8 @@ fn evaluate_chain( ); trace!( "Aligning from P{from_index}{} to P{to_index}{} costs {}", - anchors.primary[from_index], - anchors.primary[to_index], + anchors.primary(from_index), + anchors.primary(to_index), alignment.cost() ); if !final_evaluation { @@ -492,8 +494,8 @@ fn evaluate_chain( .. }, ) => { - let start = anchors.primary[from_index].end(k); - let end = anchors.secondary(ts_kind)[to_index].start(ts_kind); + let start = anchors.primary(from_index).end(k); + let end = anchors.secondary(to_index, ts_kind).start(ts_kind); if final_evaluation || !chaining_cost_function.is_jump_12_exact(from_index, to_index, ts_kind) { @@ -516,9 +518,9 @@ fn evaluate_chain( } trace!( "Aligning from P{from_index}{} to S{}[{to_index}]{} costs {}", - anchors.primary[from_index], + anchors.primary(from_index), ts_kind.digits(), - anchors.secondary(ts_kind)[to_index], + anchors.secondary(to_index, ts_kind), alignment.cost() ); alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); @@ -540,14 +542,15 @@ fn evaluate_chain( }, ) => { assert_eq!(ts_kind, to_ts_kind); - if anchors.secondary(ts_kind)[from_index] - .is_direct_predecessor_of(&anchors.secondary(ts_kind)[to_index]) + if anchors + .secondary(from_index, ts_kind) + .is_direct_predecessor_of(&anchors.secondary(to_index, ts_kind)) { alignments.push(Alignment::from(vec![AlignmentType::Match])); continue; } - let start = anchors.secondary(ts_kind)[from_index].end(ts_kind, k); - let end = anchors.secondary(ts_kind)[to_index].start(ts_kind); + let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); + let end = anchors.secondary(to_index, ts_kind).start(ts_kind); if final_evaluation || !chaining_cost_function.is_secondary_exact(from_index, to_index, ts_kind) { @@ -562,9 +565,9 @@ fn evaluate_chain( trace!( "Aligning from S{}[{from_index}]{} to S{}[{to_index}]{} costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[from_index], + anchors.secondary(from_index, ts_kind), ts_kind.digits(), - anchors.secondary(ts_kind)[to_index], + anchors.secondary(to_index, ts_kind), alignment.cost() ); if !final_evaluation { @@ -590,8 +593,8 @@ fn evaluate_chain( }, Identifier::Primary { index: to_index }, ) => { - let start = anchors.secondary(ts_kind)[from_index].end(ts_kind, k); - let end = anchors.primary[to_index].start(); + let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); + let end = anchors.primary(to_index).start(); if final_evaluation || !chaining_cost_function.is_jump_34_exact(from_index, to_index, ts_kind) { @@ -615,8 +618,8 @@ fn evaluate_chain( trace!( "Aligning from S{}[{from_index}]{} to P{to_index}{} (S{} to P{}) costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[from_index], - anchors.primary[to_index], + anchors.secondary(from_index, ts_kind), + anchors.primary(to_index), start, end, alignment.cost() diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 4a36c64..7b97332 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -3,7 +3,9 @@ use std::{fmt::Display, iter}; use generic_a_star::{AStarContext, AStarNode, cost::AStarCost, reset::Reset}; use crate::{ - alignment::ts_kind::TsKind, anchors::Anchors, chaining_cost_function::ChainingCostFunction, + alignment::ts_kind::TsKind, + anchors::{Anchors, index::AnchorIndex}, + chaining_cost_function::ChainingCostFunction, costs::TsLimits, }; @@ -27,15 +29,15 @@ pub struct Node { pub enum Identifier { Start, Primary { - index: usize, + index: AnchorIndex, }, Secondary { - index: usize, + index: AnchorIndex, ts_kind: TsKind, /// The first secondary anchor that is part of the current template switch. /// /// Used to estimate the length of the resulting template switch. - first_secondary_index: usize, + first_secondary_index: AnchorIndex, }, End, } @@ -72,6 +74,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { fn generate_successors(&mut self, node: &Self::Node, output: &mut impl Extend) { let predecessor = Some(node.identifier); let predecessor_cost = node.cost; + let primary_end_anchor_index = AnchorIndex::from(self.anchors.primary_len()); if DEBUG_CHAINER { println!("Generating successors of {node}"); @@ -84,12 +87,12 @@ impl AStarContext for Context<'_, '_, '_, Cost> { .iter_primary_from_start_in_cost_order() .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { - if successor_index == self.anchors.primary.len() { + if successor_index == primary_end_anchor_index { println!("Checking anchor end"); } else { println!( "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] + self.anchors.primary(successor_index) ); } } @@ -101,7 +104,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { debug_assert_ne!(cost, Cost::max_value()); Some(Node { - identifier: if successor_index == self.anchors.primary.len() { + identifier: if successor_index == primary_end_anchor_index { Identifier::End } else { Identifier::Primary { @@ -123,7 +126,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { println!( "Checking anchor S{}-{successor_index}: {}", ts_kind.digits(), - anchors.secondary(ts_kind)[successor_index] + anchors.secondary(successor_index, ts_kind) ); } @@ -156,12 +159,12 @@ impl AStarContext for Context<'_, '_, '_, Cost> { .iter_primary_in_cost_order(index) .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { - if successor_index == self.anchors.primary.len() { + if successor_index == primary_end_anchor_index { println!("Checking anchor end"); } else { println!( "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] + self.anchors.primary(successor_index) ); } } @@ -173,11 +176,11 @@ impl AStarContext for Context<'_, '_, '_, Cost> { // Assert that we can always chain to end. debug_assert!( - successor_index != self.anchors.primary.len() + successor_index != primary_end_anchor_index || cost != Cost::max_value() ); (cost != Cost::max_value()).then_some(Node { - identifier: if successor_index == self.anchors.primary.len() { + identifier: if successor_index == primary_end_anchor_index { Identifier::End } else { Identifier::Primary { @@ -199,7 +202,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { println!( "Checking anchor S{}-{successor_index}: {}", ts_kind.digits(), - anchors.secondary(ts_kind)[successor_index] + anchors.secondary(successor_index, ts_kind) ); } @@ -239,7 +242,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { println!( "Checking anchor S{}-{successor_index}: {}", ts_kind.digits(), - self.anchors.secondary(ts_kind)[successor_index] + self.anchors.secondary(successor_index, ts_kind) ); } @@ -260,9 +263,9 @@ impl AStarContext for Context<'_, '_, '_, Cost> { }), ); - let first_anchor = &self.anchors.secondary(ts_kind)[first_secondary_index]; + let first_anchor = &self.anchors.secondary(first_secondary_index, ts_kind); let ts_length = first_anchor.ts_length_until( - &self.anchors.secondary(ts_kind)[index], + &self.anchors.secondary(index, ts_kind), ts_kind, self.k, ); @@ -273,12 +276,12 @@ impl AStarContext for Context<'_, '_, '_, Cost> { .iter_jump_34_in_cost_order(index, ts_kind) .flat_map(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { - if successor_index == self.anchors.primary.len() { + if successor_index == primary_end_anchor_index { println!("Checking anchor end"); } else { println!( "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] + self.anchors.primary(successor_index) ); } } @@ -294,11 +297,11 @@ impl AStarContext for Context<'_, '_, '_, Cost> { // Assert that we can always chain to end. debug_assert!( - successor_index != self.anchors.primary.len() + successor_index != primary_end_anchor_index || cost != Cost::max_value() ); (cost != Cost::max_value()).then_some(Node { - identifier: if successor_index == self.anchors.primary.len() { + identifier: if successor_index == primary_end_anchor_index { Identifier::End } else { Identifier::Primary { diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 7fca1b4..ab1c5b0 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -1,11 +1,10 @@ use generic_a_star::cost::AStarCost; +use itertools::Itertools; +use num_traits::Zero; use crate::{ - alignment::{ - coordinates::AlignmentCoordinates, - ts_kind::{TsAncestor, TsDescendant, TsKind}, - }, - anchors::Anchors, + alignment::{coordinates::AlignmentCoordinates, ts_kind::TsKind}, + anchors::{Anchors, index::AnchorIndex}, chaining_cost_function::cost_array::ChainingCostArray, chaining_lower_bounds::ChainingLowerBounds, }; @@ -14,18 +13,9 @@ mod cost_array; pub struct ChainingCostFunction { primary: ChainingCostArray, - secondary_11: ChainingCostArray, - secondary_12: ChainingCostArray, - secondary_21: ChainingCostArray, - secondary_22: ChainingCostArray, - jump_12_to_11: ChainingCostArray, - jump_12_to_12: ChainingCostArray, - jump_12_to_21: ChainingCostArray, - jump_12_to_22: ChainingCostArray, - jump_34_from_11: ChainingCostArray, - jump_34_from_12: ChainingCostArray, - jump_34_from_21: ChainingCostArray, - jump_34_from_22: ChainingCostArray, + secondaries: [ChainingCostArray; 4], + jump_12s: [ChainingCostArray; 4], + jump_34s: [ChainingCostArray; 4], } impl ChainingCostFunction { @@ -36,24 +26,28 @@ impl ChainingCostFunction { end: AlignmentCoordinates, ) -> Self { let k = usize::try_from(chaining_lower_bounds.max_match_run() + 1).unwrap(); + let primary_anchor_amount = AnchorIndex::from(anchors.primary_len() + 2); + let primary_start_anchor_index = AnchorIndex::zero(); + let primary_end_anchor_index = primary_anchor_amount - 1; let mut primary = ChainingCostArray::new_from_cost( - [anchors.primary.len() + 2, anchors.primary.len() + 2], + [primary_anchor_amount, primary_anchor_amount], Cost::max_value(), ); let gap1 = end.primary_ordinate_a().unwrap() - start.primary_ordinate_a().unwrap(); let gap2 = end.primary_ordinate_b().unwrap() - start.primary_ordinate_b().unwrap(); - primary[[0, anchors.primary.len() + 1]] = + primary[[primary_start_anchor_index, primary_end_anchor_index]] = chaining_lower_bounds.primary_lower_bound(gap1, gap2); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { + for (from_index, from_anchor) in anchors.enumerate_primaries() { let from_index = from_index + 1; let (gap1, gap2) = from_anchor.chaining_gaps_from_start(start); - primary[[0, from_index]] = chaining_lower_bounds.primary_lower_bound(gap1, gap2); + primary[[primary_start_anchor_index, from_index]] = + chaining_lower_bounds.primary_lower_bound(gap1, gap2); let (gap1, gap2) = from_anchor.chaining_gaps_to_end(end, k); - primary[[from_index, anchors.primary.len() + 1]] = + primary[[from_index, primary_end_anchor_index]] = chaining_lower_bounds.primary_lower_bound(gap1, gap2); - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { + for (to_index, to_anchor) in anchors.enumerate_primaries() { let to_index = to_index + 1; if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, k) { primary[[from_index, to_index]] = @@ -65,553 +59,259 @@ impl ChainingCostFunction { } } - let mut secondary_11 = ChainingCostArray::new_from_cost( - [anchors.secondary_11.len(), anchors.secondary_11.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_11.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_11.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS11, k) { - secondary_11[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_11[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut secondary_12 = ChainingCostArray::new_from_cost( - [anchors.secondary_12.len(), anchors.secondary_12.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_12.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_12.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS12, k) { - secondary_12[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_12[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut secondary_21 = ChainingCostArray::new_from_cost( - [anchors.secondary_21.len(), anchors.secondary_21.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_21.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_21.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS21, k) { - secondary_21[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_21[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut secondary_22 = ChainingCostArray::new_from_cost( - [anchors.secondary_22.len(), anchors.secondary_22.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_22.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_22.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS22, k) { - secondary_22[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_22[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut jump_12_to_11 = ChainingCostArray::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_11.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_11.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS11, k) { - jump_12_to_11[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_12_to_12 = ChainingCostArray::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_12.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_12.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS12, k) { - jump_12_to_12[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_12_to_21 = ChainingCostArray::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_21.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_21.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS21, k) { - jump_12_to_21[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_12_to_22 = ChainingCostArray::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_22.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_22.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS22, k) { - jump_12_to_22[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_34_from_11 = ChainingCostArray::new_from_cost( - [anchors.secondary_11.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_11.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS11, k) { - jump_34_from_11[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - } - } - - let mut jump_34_from_12 = ChainingCostArray::new_from_cost( - [anchors.secondary_12.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_12.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS12, k) { - jump_34_from_12[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - } - } - - let mut jump_34_from_21 = ChainingCostArray::new_from_cost( - [anchors.secondary_21.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_21.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS21, k) { - jump_34_from_21[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); + let mut secondaries = TsKind::iter() + .map(|ts_kind| { + ChainingCostArray::new_from_cost( + [ + anchors.secondary_len(ts_kind), + anchors.secondary_len(ts_kind), + ], + Cost::max_value(), + ) + }) + .collect_array() + .unwrap(); + for (ts_kind, secondary) in TsKind::iter().zip(&mut secondaries) { + for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { + for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { + if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, ts_kind, k) { + secondary[[from_index, to_index]] = + chaining_lower_bounds.secondary_lower_bound(gap1, gap2); + } + if from_anchor.is_direct_predecessor_of(to_anchor) { + secondary[[from_index, to_index]] = Cost::zero(); + } + } + } + } + + let mut jump_12s = TsKind::iter() + .map(|ts_kind| { + ChainingCostArray::new_from_cost( + [primary_anchor_amount, anchors.secondary_len(ts_kind)], + Cost::max_value(), + ) + }) + .collect_array() + .unwrap(); + for (ts_kind, jump_12) in TsKind::iter().zip(&mut jump_12s) { + for (from_index, from_anchor) in anchors.enumerate_primaries() { + let from_index = from_index + 1; + for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { + if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, ts_kind, k) { + jump_12[[from_index, to_index]] = + chaining_lower_bounds.jump_12_lower_bound(gap); + } + } + } + } + + let mut jump_34s = TsKind::iter() + .map(|ts_kind| { + ChainingCostArray::new_from_cost( + [anchors.secondary_len(ts_kind), primary_anchor_amount], + Cost::max_value(), + ) + }) + .collect_array() + .unwrap(); + for (ts_kind, jump_34) in TsKind::iter().zip(&mut jump_34s) { + for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { + for (to_index, to_anchor) in anchors.enumerate_primaries() { + let to_index = to_index + 1; + if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, ts_kind, k) { + jump_34[[from_index, to_index]] = + chaining_lower_bounds.jump_34_lower_bound(gap); + } } } } - let mut jump_34_from_22 = ChainingCostArray::new_from_cost( - [anchors.secondary_22.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_22.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS22, k) { - jump_34_from_22[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } + for (ts_kind, (jump_12, jump_34)) in + TsKind::iter().zip(jump_12s.iter_mut().zip(&mut jump_34s)) + { + for (index, anchor) in anchors.enumerate_secondaries(ts_kind) { + let gap = anchor.chaining_jump_gap_from_start(start, ts_kind); + jump_12[[primary_start_anchor_index, index]] = + chaining_lower_bounds.jump_12_lower_bound(gap); + let gap = anchor.chaining_jump_gap_to_end(end, ts_kind, k); + jump_34[[index, primary_end_anchor_index]] = + chaining_lower_bounds.jump_34_lower_bound(gap); } } - for (index, anchor) in anchors.secondary_11.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS11); - jump_12_to_11[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS11, k); - jump_34_from_11[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - - for (index, anchor) in anchors.secondary_12.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS12); - jump_12_to_12[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS12, k); - jump_34_from_12[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - - for (index, anchor) in anchors.secondary_21.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS21); - jump_12_to_21[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS21, k); - jump_34_from_21[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - - for (index, anchor) in anchors.secondary_22.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS22); - jump_12_to_22[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS22, k); - jump_34_from_22[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - Self { primary, - secondary_11, - secondary_12, - secondary_21, - secondary_22, - jump_12_to_11, - jump_12_to_12, - jump_12_to_21, - jump_12_to_22, - jump_34_from_11, - jump_34_from_12, - jump_34_from_21, - jump_34_from_22, + secondaries, + jump_12s, + jump_34s, } } - pub fn primary(&self, from_primary_index: usize, to_primary_index: usize) -> Cost { - self.primary[[from_primary_index + 1, to_primary_index + 1]] + fn primary_start_anchor_index() -> AnchorIndex { + AnchorIndex::zero() } - pub fn primary_from_start(&self, primary_index: usize) -> Cost { - self.primary[[0, primary_index + 1]] + fn primary_end_anchor_index(&self) -> AnchorIndex { + self.primary.dim().0 - 1 } - pub fn primary_to_end(&self, primary_index: usize) -> Cost { - self.primary[[primary_index + 1, self.primary.dim().1 - 1]] + fn secondary_array(&self, ts_kind: TsKind) -> &ChainingCostArray { + &self.secondaries[ts_kind.index()] } - pub fn start_to_end(&self) -> Cost { - self.primary[[0, self.primary.dim().1 - 1]] + fn secondary_array_mut(&mut self, ts_kind: TsKind) -> &mut ChainingCostArray { + &mut self.secondaries[ts_kind.index()] } - pub fn jump_12_to_11(&self, from_primary_index: usize, to_secondary_11_index: usize) -> Cost { - self.jump_12_to_11[[from_primary_index + 1, to_secondary_11_index]] + fn jump_12_array(&self, ts_kind: TsKind) -> &ChainingCostArray { + &self.jump_12s[ts_kind.index()] } - pub fn jump_12_to_12(&self, from_primary_index: usize, to_secondary_12_index: usize) -> Cost { - self.jump_12_to_12[[from_primary_index + 1, to_secondary_12_index]] + fn jump_12_array_mut(&mut self, ts_kind: TsKind) -> &mut ChainingCostArray { + &mut self.jump_12s[ts_kind.index()] } - pub fn jump_12_to_21(&self, from_primary_index: usize, to_secondary_21_index: usize) -> Cost { - self.jump_12_to_21[[from_primary_index + 1, to_secondary_21_index]] + fn jump_34_array(&self, ts_kind: TsKind) -> &ChainingCostArray { + &self.jump_34s[ts_kind.index()] } - pub fn jump_12_to_22(&self, from_primary_index: usize, to_secondary_22_index: usize) -> Cost { - self.jump_12_to_22[[from_primary_index + 1, to_secondary_22_index]] + fn jump_34_array_mut(&mut self, ts_kind: TsKind) -> &mut ChainingCostArray { + &mut self.jump_34s[ts_kind.index()] } - pub fn jump_12( - &self, - from_primary_index: usize, - to_secondary_index: usize, - ts_kind: TsKind, - ) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_12_to_11(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_12_to_12(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_12_to_21(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_12_to_22(from_primary_index, to_secondary_index) - } - } + pub fn primary(&self, from_primary_index: AnchorIndex, to_primary_index: AnchorIndex) -> Cost { + self.primary[[from_primary_index + 1, to_primary_index + 1]] } - pub fn jump_12_to_11_from_start(&self, to_secondary_11_index: usize) -> Cost { - self.jump_12_to_11[[0, to_secondary_11_index]] + pub fn primary_from_start(&self, primary_index: AnchorIndex) -> Cost { + self.primary[[Self::primary_start_anchor_index(), primary_index + 1]] } - pub fn jump_12_to_12_from_start(&self, to_secondary_12_index: usize) -> Cost { - self.jump_12_to_12[[0, to_secondary_12_index]] + pub fn primary_to_end(&self, primary_index: AnchorIndex) -> Cost { + self.primary[[primary_index + 1, self.primary_end_anchor_index()]] } - pub fn jump_12_to_21_from_start(&self, to_secondary_21_index: usize) -> Cost { - self.jump_12_to_21[[0, to_secondary_21_index]] + pub fn start_to_end(&self) -> Cost { + self.primary[[ + Self::primary_start_anchor_index(), + self.primary_end_anchor_index(), + ]] } - pub fn jump_12_to_22_from_start(&self, to_secondary_22_index: usize) -> Cost { - self.jump_12_to_22[[0, to_secondary_22_index]] + pub fn jump_12( + &self, + from_primary_index: AnchorIndex, + to_secondary_index: AnchorIndex, + ts_kind: TsKind, + ) -> Cost { + self.jump_12_array(ts_kind)[[from_primary_index + 1, to_secondary_index]] } - pub fn jump_12_from_start(&self, to_secondary_index: usize, ts_kind: TsKind) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_12_to_11_from_start(to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_12_to_12_from_start(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_12_to_21_from_start(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_12_to_22_from_start(to_secondary_index) - } - } + pub fn jump_12_from_start(&self, to_secondary_index: AnchorIndex, ts_kind: TsKind) -> Cost { + self.jump_12_array(ts_kind)[[Self::primary_start_anchor_index(), to_secondary_index]] } pub fn secondary( &self, - from_secondary_index: usize, - to_secondary_index: usize, + from_secondary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, ) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.secondary_11[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.secondary_12[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.secondary_21[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.secondary_22[[from_secondary_index, to_secondary_index]] - } - } + self.secondary_array(ts_kind)[[from_secondary_index, to_secondary_index]] } pub fn jump_34( &self, - from_secondary_index: usize, - to_primary_index: usize, + from_secondary_index: AnchorIndex, + to_primary_index: AnchorIndex, ts_kind: TsKind, ) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_34_from_11[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_34_from_12[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_34_from_21[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_34_from_22[[from_secondary_index, to_primary_index + 1]] - } - } + self.jump_34_array(ts_kind)[[from_secondary_index, to_primary_index + 1]] } - pub fn jump_34_to_end(&self, from_secondary_index: usize, ts_kind: TsKind) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_34_from_11[[from_secondary_index, self.jump_34_from_11.dim().1 - 1]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_34_from_12[[from_secondary_index, self.jump_34_from_12.dim().1 - 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_34_from_21[[from_secondary_index, self.jump_34_from_21.dim().1 - 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_34_from_22[[from_secondary_index, self.jump_34_from_22.dim().1 - 1]] - } - } + pub fn jump_34_to_end(&self, from_secondary_index: AnchorIndex, ts_kind: TsKind) -> Cost { + self.jump_34_array(ts_kind)[[from_secondary_index, self.primary_end_anchor_index()]] } - pub fn is_primary_exact(&self, from_primary_index: usize, to_primary_index: usize) -> bool { + pub fn is_primary_exact( + &self, + from_primary_index: AnchorIndex, + to_primary_index: AnchorIndex, + ) -> bool { self.primary .is_exact(from_primary_index + 1, to_primary_index + 1) } - pub fn is_primary_from_start_exact(&self, primary_index: usize) -> bool { - self.primary.is_exact(0, primary_index + 1) + pub fn is_primary_from_start_exact(&self, primary_index: AnchorIndex) -> bool { + self.primary + .is_exact(Self::primary_start_anchor_index(), primary_index + 1) } - pub fn is_primary_to_end_exact(&self, primary_index: usize) -> bool { + pub fn is_primary_to_end_exact(&self, primary_index: AnchorIndex) -> bool { self.primary - .is_exact(primary_index + 1, self.primary.dim().1 - 1) + .is_exact(primary_index + 1, self.primary_end_anchor_index()) } pub fn is_start_to_end_exact(&self) -> bool { - self.primary.is_exact(0, self.primary.dim().1 - 1) - } - - pub fn is_jump_12_to_11_exact( - &self, - from_primary_index: usize, - to_secondary_11_index: usize, - ) -> bool { - self.jump_12_to_11 - .is_exact(from_primary_index + 1, to_secondary_11_index) - } - - pub fn is_jump_12_to_12_exact( - &self, - from_primary_index: usize, - to_secondary_12_index: usize, - ) -> bool { - self.jump_12_to_12 - .is_exact(from_primary_index + 1, to_secondary_12_index) + self.primary.is_exact( + Self::primary_start_anchor_index(), + self.primary_end_anchor_index(), + ) } - pub fn is_jump_12_to_21_exact( - &self, - from_primary_index: usize, - to_secondary_21_index: usize, - ) -> bool { - self.jump_12_to_21 - .is_exact(from_primary_index + 1, to_secondary_21_index) - } - - pub fn is_jump_12_to_22_exact( + pub fn is_jump_12_exact( &self, - from_primary_index: usize, - to_secondary_22_index: usize, + from_primary_index: AnchorIndex, + to_secondary_index: AnchorIndex, + ts_kind: TsKind, ) -> bool { - self.jump_12_to_22 - .is_exact(from_primary_index + 1, to_secondary_22_index) + self.jump_12_array(ts_kind) + .is_exact(from_primary_index + 1, to_secondary_index) } - pub fn is_jump_12_exact( + pub fn is_jump_12_from_start_exact( &self, - from_primary_index: usize, - to_secondary_index: usize, + to_secondary_index: AnchorIndex, ts_kind: TsKind, ) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.is_jump_12_to_11_exact(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.is_jump_12_to_12_exact(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.is_jump_12_to_21_exact(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.is_jump_12_to_22_exact(from_primary_index, to_secondary_index) - } - } - } - - pub fn is_jump_12_to_11_from_start_exact(&self, to_secondary_11_index: usize) -> bool { - self.jump_12_to_11.is_exact(0, to_secondary_11_index) - } - - pub fn is_jump_12_to_12_from_start_exact(&self, to_secondary_12_index: usize) -> bool { - self.jump_12_to_12.is_exact(0, to_secondary_12_index) - } - - pub fn is_jump_12_to_21_from_start_exact(&self, to_secondary_21_index: usize) -> bool { - self.jump_12_to_21.is_exact(0, to_secondary_21_index) - } - - pub fn is_jump_12_to_22_from_start_exact(&self, to_secondary_22_index: usize) -> bool { - self.jump_12_to_22.is_exact(0, to_secondary_22_index) - } - - pub fn is_jump_12_from_start_exact(&self, to_secondary_index: usize, ts_kind: TsKind) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.is_jump_12_to_11_from_start_exact(to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.is_jump_12_to_12_from_start_exact(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.is_jump_12_to_21_from_start_exact(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.is_jump_12_to_22_from_start_exact(to_secondary_index) - } - } + self.jump_12_array(ts_kind) + .is_exact(Self::primary_start_anchor_index(), to_secondary_index) } pub fn is_secondary_exact( &self, - from_secondary_index: usize, - to_secondary_index: usize, + from_secondary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, ) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .secondary_11 - .is_exact(from_secondary_index, to_secondary_index), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .secondary_12 - .is_exact(from_secondary_index, to_secondary_index), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .secondary_21 - .is_exact(from_secondary_index, to_secondary_index), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .secondary_22 - .is_exact(from_secondary_index, to_secondary_index), - } + self.secondary_array(ts_kind) + .is_exact(from_secondary_index, to_secondary_index) } pub fn is_jump_34_exact( &self, - from_secondary_index: usize, - to_primary_index: usize, + from_secondary_index: AnchorIndex, + to_primary_index: AnchorIndex, ts_kind: TsKind, ) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .jump_34_from_11 - .is_exact(from_secondary_index, to_primary_index + 1), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .jump_34_from_12 - .is_exact(from_secondary_index, to_primary_index + 1), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .jump_34_from_21 - .is_exact(from_secondary_index, to_primary_index + 1), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .jump_34_from_22 - .is_exact(from_secondary_index, to_primary_index + 1), - } + self.jump_34_array(ts_kind) + .is_exact(from_secondary_index, to_primary_index + 1) } - pub fn is_jump_34_to_end_exact(&self, from_secondary_index: usize, ts_kind: TsKind) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .jump_34_from_11 - .is_exact(from_secondary_index, self.jump_34_from_11.dim().1 - 1), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .jump_34_from_12 - .is_exact(from_secondary_index, self.jump_34_from_12.dim().1 - 1), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .jump_34_from_21 - .is_exact(from_secondary_index, self.jump_34_from_21.dim().1 - 1), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .jump_34_from_22 - .is_exact(from_secondary_index, self.jump_34_from_22.dim().1 - 1), - } + pub fn is_jump_34_to_end_exact( + &self, + from_secondary_index: AnchorIndex, + ts_kind: TsKind, + ) -> bool { + self.jump_34_array(ts_kind) + .is_exact(from_secondary_index, self.primary_end_anchor_index()) } pub fn update_primary( &mut self, - from_primary_index: usize, - to_primary_index: usize, + from_primary_index: AnchorIndex, + to_primary_index: AnchorIndex, cost: Cost, is_exact: bool, ) -> bool { @@ -628,14 +328,15 @@ impl ChainingCostFunction { pub fn update_primary_from_start( &mut self, - primary_index: usize, + primary_index: AnchorIndex, cost: Cost, is_exact: bool, ) -> bool { if is_exact { - self.primary.set_exact(0, primary_index + 1); + self.primary + .set_exact(Self::primary_start_anchor_index(), primary_index + 1); } - let target = &mut self.primary[[0, primary_index + 1]]; + let target = &mut self.primary[[Self::primary_start_anchor_index(), primary_index + 1]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -644,11 +345,11 @@ impl ChainingCostFunction { pub fn update_primary_to_end( &mut self, - primary_index: usize, + primary_index: AnchorIndex, cost: Cost, is_exact: bool, ) -> bool { - let end_index = self.primary.dim().1 - 1; + let end_index = self.primary_end_anchor_index(); if is_exact { self.primary.set_exact(primary_index + 1, end_index); } @@ -662,9 +363,10 @@ impl ChainingCostFunction { pub fn update_start_to_end(&mut self, cost: Cost, is_exact: bool) -> bool { let end_index = self.primary.dim().1 - 1; if is_exact { - self.primary.set_exact(0, end_index); + self.primary + .set_exact(Self::primary_start_anchor_index(), end_index); } - let target = &mut self.primary[[0, end_index]]; + let target = &mut self.primary[[Self::primary_start_anchor_index(), end_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -673,42 +375,18 @@ impl ChainingCostFunction { pub fn update_jump_12( &mut self, - from_primary_index: usize, - to_secondary_index: usize, + from_primary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_11 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_11[[from_primary_index + 1, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_12 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_12[[from_primary_index + 1, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_21 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_21[[from_primary_index + 1, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_22 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_22[[from_primary_index + 1, to_secondary_index]] - } - }; + if is_exact { + self.jump_12_array_mut(ts_kind) + .set_exact(from_primary_index + 1, to_secondary_index); + } + let target = + &mut self.jump_12_array_mut(ts_kind)[[from_primary_index + 1, to_secondary_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -717,37 +395,17 @@ impl ChainingCostFunction { pub fn update_jump_12_from_start( &mut self, - to_secondary_index: usize, + to_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_11.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_11[[0, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_12.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_12[[0, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_21.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_21[[0, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_22.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_22[[0, to_secondary_index]] - } - }; + if is_exact { + self.jump_12_array_mut(ts_kind) + .set_exact(Self::primary_start_anchor_index(), to_secondary_index); + } + let target = &mut self.jump_12_array_mut(ts_kind) + [[Self::primary_start_anchor_index(), to_secondary_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -756,42 +414,18 @@ impl ChainingCostFunction { pub fn update_secondary( &mut self, - from_secondary_index: usize, - to_secondary_index: usize, + from_secondary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.secondary_11 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_11[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.secondary_12 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_12[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.secondary_21 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_21[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.secondary_22 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_22[[from_secondary_index, to_secondary_index]] - } - }; + if is_exact { + self.secondary_array_mut(ts_kind) + .set_exact(from_secondary_index, to_secondary_index); + } + let target = + &mut self.secondary_array_mut(ts_kind)[[from_secondary_index, to_secondary_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -800,42 +434,18 @@ impl ChainingCostFunction { pub fn update_jump_34( &mut self, - from_secondary_index: usize, - to_primary_index: usize, + from_secondary_index: AnchorIndex, + to_primary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.jump_34_from_11 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_11[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.jump_34_from_12 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_12[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.jump_34_from_21 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_21[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.jump_34_from_22 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_22[[from_secondary_index, to_primary_index + 1]] - } - }; + if is_exact { + self.jump_34_array_mut(ts_kind) + .set_exact(from_secondary_index, to_primary_index + 1); + } + let target = + &mut self.jump_34_array_mut(ts_kind)[[from_secondary_index, to_primary_index + 1]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -844,45 +454,17 @@ impl ChainingCostFunction { pub fn update_jump_34_to_end( &mut self, - from_secondary_index: usize, + from_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - let end_index = self.jump_34_from_11.dim().1 - 1; - if is_exact { - self.jump_34_from_11 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_11[[from_secondary_index, end_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - let end_index = self.jump_34_from_12.dim().1 - 1; - if is_exact { - self.jump_34_from_12 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_12[[from_secondary_index, end_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - let end_index = self.jump_34_from_21.dim().1 - 1; - if is_exact { - self.jump_34_from_21 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_21[[from_secondary_index, end_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - let end_index = self.jump_34_from_22.dim().1 - 1; - if is_exact { - self.jump_34_from_22 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_22[[from_secondary_index, end_index]] - } - }; + let end_index = self.primary_end_anchor_index(); + if is_exact { + self.jump_34_array_mut(ts_kind) + .set_exact(from_secondary_index, end_index); + } + let target = &mut self.jump_34_array_mut(ts_kind)[[from_secondary_index, end_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -891,116 +473,81 @@ impl ChainingCostFunction { pub fn iter_primary_in_cost_order( &mut self, - from_primary_index: usize, - ) -> impl Iterator + from_primary_index: AnchorIndex, + ) -> impl Iterator where Cost: Copy + Ord, { self.primary .iter_in_cost_order(from_primary_index + 1) .filter_map(|(to_primary_index, chaining_cost)| { - (to_primary_index != 0).then(|| (to_primary_index - 1, chaining_cost)) + (to_primary_index != AnchorIndex::zero()) + .then(|| (to_primary_index - 1, chaining_cost)) }) } - pub fn iter_primary_from_start_in_cost_order(&mut self) -> impl Iterator + pub fn iter_primary_from_start_in_cost_order( + &mut self, + ) -> impl Iterator where Cost: Copy + Ord, { self.primary - .iter_in_cost_order(0) + .iter_in_cost_order(AnchorIndex::zero()) .filter_map(|(to_primary_index, chaining_cost)| { - (to_primary_index != 0).then(|| (to_primary_index - 1, chaining_cost)) + (to_primary_index != AnchorIndex::zero()) + .then(|| (to_primary_index - 1, chaining_cost)) }) } pub fn iter_jump_12_in_cost_order( &mut self, - from_primary_index: usize, + from_primary_index: AnchorIndex, ts_kind: TsKind, - ) -> impl Iterator + ) -> impl Iterator where Cost: Copy + Ord, { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .jump_12_to_11 - .iter_in_cost_order(from_primary_index + 1), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .jump_12_to_12 - .iter_in_cost_order(from_primary_index + 1), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .jump_12_to_21 - .iter_in_cost_order(from_primary_index + 1), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .jump_12_to_22 - .iter_in_cost_order(from_primary_index + 1), - } + self.jump_12_array_mut(ts_kind) + .iter_in_cost_order(from_primary_index + 1) } pub fn iter_jump_12_from_start_in_cost_order( &mut self, ts_kind: TsKind, - ) -> impl Iterator + ) -> impl Iterator where Cost: Copy + Ord, { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self.jump_12_to_11.iter_in_cost_order(0), - (TsAncestor::Seq1, TsDescendant::Seq2) => self.jump_12_to_12.iter_in_cost_order(0), - (TsAncestor::Seq2, TsDescendant::Seq1) => self.jump_12_to_21.iter_in_cost_order(0), - (TsAncestor::Seq2, TsDescendant::Seq2) => self.jump_12_to_22.iter_in_cost_order(0), - } + self.jump_12_array_mut(ts_kind) + .iter_in_cost_order(Self::primary_start_anchor_index()) } pub fn iter_secondary_in_cost_order( &mut self, - from_secondary_index: usize, + from_secondary_index: AnchorIndex, ts_kind: TsKind, - ) -> impl Iterator + ) -> impl Iterator where Cost: Copy + Ord, { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.secondary_11.iter_in_cost_order(from_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.secondary_12.iter_in_cost_order(from_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.secondary_21.iter_in_cost_order(from_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.secondary_22.iter_in_cost_order(from_secondary_index) - } - } + self.secondary_array_mut(ts_kind) + .iter_in_cost_order(from_secondary_index) } pub fn iter_jump_34_in_cost_order( &mut self, - from_secondary_index: usize, + from_secondary_index: AnchorIndex, ts_kind: TsKind, - ) -> impl Iterator + ) -> impl Iterator where Cost: Copy + Ord, { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .jump_34_from_11 - .iter_in_cost_order(from_secondary_index), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .jump_34_from_12 - .iter_in_cost_order(from_secondary_index), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .jump_34_from_21 - .iter_in_cost_order(from_secondary_index), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .jump_34_from_22 - .iter_in_cost_order(from_secondary_index), - } - .filter_map(|(to_primary_index, chaining_cost)| { - (to_primary_index != 0).then(|| (to_primary_index - 1, chaining_cost)) - }) + self.jump_34_array_mut(ts_kind) + .iter_in_cost_order(from_secondary_index) + .filter_map(|(to_primary_index, chaining_cost)| { + (to_primary_index != AnchorIndex::zero()) + .then(|| (to_primary_index - 1, chaining_cost)) + }) } } diff --git a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs index 6a9423a..28c0ec9 100644 --- a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs +++ b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs @@ -2,81 +2,88 @@ use std::ops::{Index, IndexMut}; use bitvec::{bitvec, order::LocalBits, vec::BitVec}; +use crate::anchors::index::AnchorIndex; + pub struct ChainingCostArray { - len: [usize; 2], + len: [AnchorIndex; 2], data: Vec, /// For each ordinate 0, store the indices of the corresponding ordinates 1, ordered by their cost. - cost_order_permutation: Vec>, + cost_order_permutation: Vec>, is_exact: BitVec, } impl ChainingCostArray { - pub fn new_from_cost(len: [usize; 2], cost: Cost) -> Self + pub fn new_from_cost(dim: [impl Into; 2], cost: Cost) -> Self where Cost: Clone, { + let dim = dim.map(Into::into); + Self { - len, - data: vec![cost; len[0] * len[1]], - cost_order_permutation: vec![Vec::new(); len[0]], - is_exact: bitvec![usize, LocalBits; 0; len[0] * len[1]], + len: dim, + data: vec![cost; dim[0].as_usize() * dim[1].as_usize()], + cost_order_permutation: vec![Vec::new(); dim[0].as_usize()], + is_exact: bitvec![usize, LocalBits; 0; dim[0].as_usize() * dim[1].as_usize()], } } - pub fn is_exact(&self, c1: usize, c2: usize) -> bool { + pub fn is_exact(&self, c1: AnchorIndex, c2: AnchorIndex) -> bool { self.is_exact[coordinates_to_index(c1, c2, self.len)] } - pub fn set_exact(&mut self, c1: usize, c2: usize) { + pub fn set_exact(&mut self, c1: AnchorIndex, c2: AnchorIndex) { debug_assert!(!self.is_exact(c1, c2)); self.is_exact .set(coordinates_to_index(c1, c2, self.len), true); } - pub fn dim(&self) -> (usize, usize) { + pub fn dim(&self) -> (AnchorIndex, AnchorIndex) { (self.len[0], self.len[1]) } /// Iterate over all ordinates `c2` that belong to this `c1` in order of their cost. - pub fn iter_in_cost_order(&mut self, c1: usize) -> impl Iterator + pub fn iter_in_cost_order( + &mut self, + c1: AnchorIndex, + ) -> impl Iterator where Cost: Copy + Ord, { - if self.cost_order_permutation[c1].is_empty() { - self.cost_order_permutation[c1].extend((0..).take(self.len[1])); - self.cost_order_permutation[c1].sort_unstable_by_key(|c2| { - self.data[coordinates_to_index(c1, usize::try_from(*c2).unwrap(), self.len)] - }); + if self.cost_order_permutation[c1.as_usize()].is_empty() { + self.cost_order_permutation[c1.as_usize()].extend( + (0usize..) + .take(self.len[1].as_usize()) + .map(AnchorIndex::from), + ); + self.cost_order_permutation[c1.as_usize()] + .sort_unstable_by_key(|c2| self.data[coordinates_to_index(c1, *c2, self.len)]); } let data = &self.data; let len = self.len; - self.cost_order_permutation[c1] + self.cost_order_permutation[c1.as_usize()] .iter() .copied() - .filter_map(move |c2| { - let c2 = usize::try_from(c2).unwrap(); - (c1 != c2).then(|| (c2, data[coordinates_to_index(c1, c2, len)])) - }) + .filter_map(move |c2| (c1 != c2).then(|| (c2, data[coordinates_to_index(c1, c2, len)]))) } } -impl Index<[usize; 2]> for ChainingCostArray { +impl Index<[AnchorIndex; 2]> for ChainingCostArray { type Output = Cost; - fn index(&self, index: [usize; 2]) -> &Self::Output { + fn index(&self, index: [AnchorIndex; 2]) -> &Self::Output { &self.data[coordinates_to_index(index[0], index[1], self.len)] } } -impl IndexMut<[usize; 2]> for ChainingCostArray { - fn index_mut(&mut self, index: [usize; 2]) -> &mut Self::Output { - self.cost_order_permutation[index[0]].clear(); +impl IndexMut<[AnchorIndex; 2]> for ChainingCostArray { + fn index_mut(&mut self, index: [AnchorIndex; 2]) -> &mut Self::Output { + self.cost_order_permutation[index[0].as_usize()].clear(); &mut self.data[coordinates_to_index(index[0], index[1], self.len)] } } #[inline] -fn coordinates_to_index(c1: usize, c2: usize, len: [usize; 2]) -> usize { - c1 * len[1] + c2 +fn coordinates_to_index(c1: AnchorIndex, c2: AnchorIndex, len: [AnchorIndex; 2]) -> usize { + c1.as_usize() * len[1].as_usize() + c2.as_usize() } From 8982c98705f118f3bc2afb445406f5f267453287 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 10 Dec 2025 15:15:40 +0200 Subject: [PATCH 6/9] Step through anchor successors instead of generating all at once. --- lib_ts_chainalign/src/chain_align.rs | 151 ++++- lib_ts_chainalign/src/chain_align/chainer.rs | 544 ++++++++++++------ .../chainer/max_successors_iterator.rs | 61 ++ .../src/chaining_cost_function.rs | 18 +- .../src/chaining_cost_function/cost_array.rs | 36 +- lib_ts_chainalign/src/lib.rs | 3 + tsalign/src/align.rs | 7 + tsalign/src/align/a_star_chain_ts.rs | 10 +- 8 files changed, 632 insertions(+), 198 deletions(-) create mode 100644 lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index c741a99..f13ac01 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -12,6 +12,7 @@ use lib_tsalign::a_star_aligner::{ template_switch_distance::{EqualCostRange, TemplateSwitchDirection}, }; use log::{debug, trace}; +use num_traits::Zero; use std::{ fmt::Write, iter, @@ -34,11 +35,19 @@ use crate::{ mod chainer; +pub struct AlignmentParameters { + /// The step width for generating successors during chaining. + /// + /// At most `max_successors` will be generated at a time, but at least all with minimum chaining cost. + pub max_successors: usize, +} + #[expect(clippy::too_many_arguments)] pub fn align( sequences: &AlignmentSequences, start: AlignmentCoordinates, end: AlignmentCoordinates, + parameters: &AlignmentParameters, alignment_costs: &AlignmentCosts, rc_fn: &dyn Fn(u8) -> u8, max_match_run: u32, @@ -58,6 +67,7 @@ pub fn align( chaining_cost_function, &alignment_costs.ts_limits, k, + parameters.max_successors, ); let mut astar = AStar::new(context); let mut chaining_execution_count = 0; @@ -104,21 +114,55 @@ pub fn align( } match identifier { Identifier::Start => write!(s, "start").unwrap(), - Identifier::Primary { index } => { - write!(s, "P{}", anchors.primary(*index)).unwrap() + Identifier::StartToPrimary { offset } => { + write!(s, "start-to-primary-{offset}").unwrap() + } + Identifier::StartToSecondary { ts_kind, offset } => { + write!(s, "start-to-secondary{}-{offset}", ts_kind.digits()) + .unwrap() } - Identifier::Secondary { + Identifier::PrimaryToPrimary { index, offset } => { + write!(s, "P{}-to-primary-{offset}", anchors.primary(*index)) + .unwrap() + } + Identifier::PrimaryToSecondary { + index, + ts_kind, + offset, + } => write!( + s, + "P{}-to-secondary{}-{offset}", + anchors.primary(*index), + ts_kind.digits() + ) + .unwrap(), + Identifier::SecondaryToSecondary { index, ts_kind, first_secondary_index, + offset, } => write!( s, - "S{}{}->{}", + "S{}{}->{}-to-secondary-{offset}", ts_kind.digits(), anchors.secondary(*first_secondary_index, *ts_kind), anchors.secondary(*index, *ts_kind), ) .unwrap(), + Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset, + } => write!( + s, + "S{}{}->{}-to-primary-{offset}", + ts_kind.digits(), + anchors.secondary(*first_secondary_index, *ts_kind), + anchors.secondary(*index, *ts_kind), + ) + .unwrap(), + Identifier::End => write!(s, "end").unwrap(), } } @@ -305,9 +349,29 @@ fn evaluate_chain( let k = usize::try_from(max_match_run + 1).unwrap(); let mut current_upper_bound = Cost::zero(); let mut alignments = Vec::new(); - for window in chain.windows(2) { - let from_anchor = window[0]; - let to_anchor = window[1]; + let mut current_from_index = 0; + + loop { + let from_anchor = chain[current_from_index]; + let Some((to_anchor_index, to_anchor)) = chain + .iter() + .copied() + .enumerate() + .skip(current_from_index + 1) + .find(|(_, identifier)| match identifier { + Identifier::Start => true, + Identifier::StartToPrimary { .. } => false, + Identifier::StartToSecondary { .. } => false, + Identifier::PrimaryToPrimary { offset, .. } + | Identifier::PrimaryToSecondary { offset, .. } + | Identifier::SecondaryToSecondary { offset, .. } + | Identifier::SecondaryToPrimary { offset, .. } => offset.is_zero(), + Identifier::End => true, + }) + else { + break; + }; + current_from_index = to_anchor_index; match (from_anchor, to_anchor) { (Identifier::Start, Identifier::End) => { @@ -328,7 +392,11 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.start_to_end(); } - (Identifier::Start, Identifier::Primary { index }) => { + ( + Identifier::Start, + Identifier::PrimaryToPrimary { index, .. } + | Identifier::PrimaryToSecondary { index, .. }, + ) => { let end = anchors.primary(index).start(); if final_evaluation || !chaining_cost_function.is_primary_from_start_exact(index) { let alignment = GapAffineAlignment::new( @@ -355,7 +423,11 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.primary_from_start(index); } - (Identifier::Start, Identifier::Secondary { index, ts_kind, .. }) => { + ( + Identifier::Start, + Identifier::SecondaryToPrimary { index, ts_kind, .. } + | Identifier::SecondaryToSecondary { index, ts_kind, .. }, + ) => { let end = anchors.secondary(index, ts_kind).start(ts_kind); if final_evaluation || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) @@ -386,7 +458,7 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.jump_12_from_start(index, ts_kind); } - (Identifier::Primary { index }, Identifier::End) => { + (Identifier::PrimaryToPrimary { index, .. }, Identifier::End) => { let start = anchors.primary(index).end(k); if final_evaluation || !chaining_cost_function.is_primary_to_end_exact(index) { let alignment = GapAffineAlignment::new( @@ -410,7 +482,7 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.primary_to_end(index); } - (Identifier::Secondary { index, ts_kind, .. }, Identifier::End) => { + (Identifier::SecondaryToPrimary { index, ts_kind, .. }, Identifier::End) => { let start = anchors.secondary(index, ts_kind).end(ts_kind, k); if final_evaluation || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) @@ -443,8 +515,15 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.jump_34_to_end(index, ts_kind); } ( - Identifier::Primary { index: from_index }, - Identifier::Primary { index: to_index }, + Identifier::PrimaryToPrimary { + index: from_index, .. + }, + Identifier::PrimaryToPrimary { + index: to_index, .. + } + | Identifier::PrimaryToSecondary { + index: to_index, .. + }, ) => { if anchors .primary(from_index) @@ -487,8 +566,15 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.primary(from_index, to_index); } ( - Identifier::Primary { index: from_index }, - Identifier::Secondary { + Identifier::PrimaryToSecondary { + index: from_index, .. + }, + Identifier::SecondaryToSecondary { + index: to_index, + ts_kind, + .. + } + | Identifier::SecondaryToPrimary { index: to_index, ts_kind, .. @@ -530,12 +616,17 @@ fn evaluate_chain( chaining_cost_function.jump_12(from_index, to_index, ts_kind); } ( - Identifier::Secondary { + Identifier::SecondaryToSecondary { index: from_index, ts_kind, .. }, - Identifier::Secondary { + Identifier::SecondaryToSecondary { + index: to_index, + ts_kind: to_ts_kind, + .. + } + | Identifier::SecondaryToPrimary { index: to_index, ts_kind: to_ts_kind, .. @@ -586,12 +677,17 @@ fn evaluate_chain( chaining_cost_function.secondary(from_index, to_index, ts_kind); } ( - Identifier::Secondary { + Identifier::SecondaryToPrimary { index: from_index, ts_kind, .. }, - Identifier::Primary { index: to_index }, + Identifier::PrimaryToPrimary { + index: to_index, .. + } + | Identifier::PrimaryToSecondary { + index: to_index, .. + }, ) => { let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); let end = anchors.primary(to_index).start(); @@ -630,7 +726,22 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.jump_34(from_index, to_index, ts_kind); } - (Identifier::End, _) | (_, Identifier::Start) => unreachable!(), + (Identifier::End, _) + | (_, Identifier::Start) + | ( + Identifier::SecondaryToPrimary { .. } | Identifier::PrimaryToPrimary { .. }, + Identifier::SecondaryToPrimary { .. } | Identifier::SecondaryToSecondary { .. }, + ) + | ( + Identifier::PrimaryToSecondary { .. } | Identifier::SecondaryToSecondary { .. }, + Identifier::PrimaryToPrimary { .. } + | Identifier::PrimaryToSecondary { .. } + | Identifier::End, + ) + | (Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }, _) + | (_, Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }) => { + unreachable!() + } } } diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 7b97332..7cb85fc 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -1,14 +1,18 @@ use std::{fmt::Display, iter}; use generic_a_star::{AStarContext, AStarNode, cost::AStarCost, reset::Reset}; +use num_traits::Zero; use crate::{ alignment::ts_kind::TsKind, anchors::{Anchors, index::AnchorIndex}, + chain_align::chainer::max_successors_iterator::MaxSuccessorsIterator, chaining_cost_function::ChainingCostFunction, costs::TsLimits, }; +mod max_successors_iterator; + const DEBUG_CHAINER: bool = false; pub struct Context<'anchors, 'chaining_cost_function, 'ts_limits, Cost> { @@ -16,6 +20,7 @@ pub struct Context<'anchors, 'chaining_cost_function, 'ts_limits, Cost> { pub chaining_cost_function: &'chaining_cost_function mut ChainingCostFunction, pub ts_limits: &'ts_limits TsLimits, pub k: usize, + pub max_successors: usize, } #[derive(Debug, Clone, Copy, Eq, PartialEq)] @@ -28,16 +33,39 @@ pub struct Node { #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash)] pub enum Identifier { Start, - Primary { + StartToPrimary { + offset: AnchorIndex, + }, + StartToSecondary { + ts_kind: TsKind, + offset: AnchorIndex, + }, + PrimaryToPrimary { + index: AnchorIndex, + offset: AnchorIndex, + }, + PrimaryToSecondary { + index: AnchorIndex, + ts_kind: TsKind, + offset: AnchorIndex, + }, + SecondaryToSecondary { index: AnchorIndex, + ts_kind: TsKind, + /// The first secondary anchor that is part of the current template switch. + /// + /// Used to estimate the length of the resulting template switch. + first_secondary_index: AnchorIndex, + offset: AnchorIndex, }, - Secondary { + SecondaryToPrimary { index: AnchorIndex, ts_kind: TsKind, /// The first secondary anchor that is part of the current template switch. /// /// Used to estimate the length of the resulting template switch. first_secondary_index: AnchorIndex, + offset: AnchorIndex, }, End, } @@ -50,12 +78,14 @@ impl<'anchors, 'chaining_cost_function, 'ts_limits, Cost> chaining_cost_function: &'chaining_cost_function mut ChainingCostFunction, ts_limits: &'ts_limits TsLimits, k: usize, + max_successors: usize, ) -> Self { Self { anchors, chaining_cost_function, ts_limits, k, + max_successors, } } } @@ -81,90 +111,132 @@ impl AStarContext for Context<'_, '_, '_, Cost> { } match node.identifier { - Identifier::Start => { - output.extend( + Identifier::Start => output.extend( + iter::once(Identifier::StartToPrimary { + offset: AnchorIndex::zero(), + }) + .chain(TsKind::iter().map(|ts_kind| Identifier::StartToSecondary { + ts_kind, + offset: AnchorIndex::zero(), + })) + .map(|identifier| Node { + identifier, + predecessor, + cost: predecessor_cost, + }), + ), + + Identifier::StartToPrimary { offset } => { + let mut iter = MaxSuccessorsIterator::new( self.chaining_cost_function - .iter_primary_from_start_in_cost_order() - .flat_map(|(successor_index, chaining_cost)| { + .iter_primary_from_start_in_cost_order(offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { if successor_index == primary_end_anchor_index { println!("Checking anchor end"); } else { println!( "Checking anchor P-{successor_index}: {}", - self.anchors.primary(successor_index) + self.anchors.primary(successor_index), ); } } - let cost = predecessor_cost.checked_add(&chaining_cost).unwrap(); + let cost = predecessor_cost.checked_add(&chaining_cost)?; if DEBUG_CHAINER { println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); } - debug_assert_ne!(cost, Cost::max_value()); - Some(Node { - identifier: if successor_index == primary_end_anchor_index { - Identifier::End - } else { - Identifier::Primary { - index: successor_index, - } - }, + Some(generate_primary_successors( + successor_index, predecessor, cost, - }) - }), + primary_end_anchor_index, + )) + }) + .flatten(), ); - for ts_kind in TsKind::iter() { - output.extend( - self.chaining_cost_function - .iter_jump_12_from_start_in_cost_order(ts_kind) - .zip(iter::repeat(self.anchors)) - .flat_map(move |((successor_index, chaining_cost), anchors)| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - anchors.secondary(successor_index, ts_kind) - ); - } - let cost = predecessor_cost.checked_add(&chaining_cost)?; - if DEBUG_CHAINER { - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } - - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index: successor_index, - }, - predecessor, - cost, - }) - }), - ); + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::StartToPrimary { + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); } } - Identifier::Primary { index } => { + Identifier::StartToSecondary { ts_kind, offset } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_jump_12_from_start_in_cost_order(ts_kind, offset), + self.max_successors, + ); + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + self.anchors.secondary(successor_index, ts_kind), + ); + } + + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + + Some(generate_secondary_successors( + successor_index, + ts_kind, + successor_index, + predecessor, + cost, + self.ts_limits.length_23.contains(&self.k), + )) + }) + .flatten(), + ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::StartToSecondary { + ts_kind, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } + } + + Identifier::PrimaryToPrimary { index, offset } => { + let mut iter = MaxSuccessorsIterator::new( self.chaining_cost_function - .iter_primary_in_cost_order(index) - .flat_map(|(successor_index, chaining_cost)| { + .iter_primary_in_cost_order(index, offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { if successor_index == primary_end_anchor_index { println!("Checking anchor end"); } else { println!( "Checking anchor P-{successor_index}: {}", - self.anchors.primary(successor_index) + self.anchors.primary(successor_index), ); } } @@ -174,75 +246,100 @@ impl AStarContext for Context<'_, '_, '_, Cost> { println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); } - // Assert that we can always chain to end. - debug_assert!( - successor_index != primary_end_anchor_index - || cost != Cost::max_value() - ); - (cost != Cost::max_value()).then_some(Node { - identifier: if successor_index == primary_end_anchor_index { - Identifier::End - } else { - Identifier::Primary { - index: successor_index, - } - }, + Some(generate_primary_successors( + successor_index, predecessor, cost, - }) - }), + primary_end_anchor_index, + )) + }) + .flatten(), ); - for ts_kind in TsKind::iter() { - output.extend( - self.chaining_cost_function - .iter_jump_12_in_cost_order(index, ts_kind) - .zip(iter::repeat(self.anchors)) - .flat_map(move |((successor_index, chaining_cost), anchors)| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - anchors.secondary(successor_index, ts_kind) - ); - } - let cost = predecessor_cost.checked_add(&chaining_cost)?; - if DEBUG_CHAINER { - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::PrimaryToPrimary { + index, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } + } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index: successor_index, - }, - predecessor, - cost, - }) - }), - ); + Identifier::PrimaryToSecondary { + index, + ts_kind, + offset, + } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_jump_12_in_cost_order(index, ts_kind, offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + self.anchors.secondary(successor_index, ts_kind), + ); + } + + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + + Some(generate_secondary_successors( + successor_index, + ts_kind, + successor_index, + predecessor, + cost, + self.ts_limits.length_23.contains(&self.k), + )) + }) + .flatten(), + ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::PrimaryToSecondary { + index, + ts_kind, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); } } - Identifier::Secondary { + Identifier::SecondaryToSecondary { index, ts_kind, first_secondary_index, + offset, } => { - output.extend( + let mut iter = MaxSuccessorsIterator::new( self.chaining_cost_function - .iter_secondary_in_cost_order(index, ts_kind) - .flat_map(|(successor_index, chaining_cost)| { + .iter_secondary_in_cost_order(index, ts_kind, offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { println!( "Checking anchor S{}-{successor_index}: {}", ts_kind.digits(), - self.anchors.secondary(successor_index, ts_kind) + self.anchors.secondary(successor_index, ts_kind), ); } @@ -251,68 +348,92 @@ impl AStarContext for Context<'_, '_, '_, Cost> { println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index, - }, + let first_anchor = + &self.anchors.secondary(first_secondary_index, ts_kind); + let ts_length = first_anchor.ts_length_until( + &self.anchors.secondary(successor_index, ts_kind), + ts_kind, + self.k, + ); + + Some(generate_secondary_successors( + successor_index, + ts_kind, + first_secondary_index, predecessor, cost, - }) - }), + self.ts_limits.length_23.contains(&ts_length), + )) + }) + .flatten(), ); - let first_anchor = &self.anchors.secondary(first_secondary_index, ts_kind); - let ts_length = first_anchor.ts_length_until( - &self.anchors.secondary(index, ts_kind), - ts_kind, - self.k, - ); + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::SecondaryToSecondary { + index, + ts_kind, + first_secondary_index, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } + } - if self.ts_limits.length_23.contains(&ts_length) { - output.extend( - self.chaining_cost_function - .iter_jump_34_in_cost_order(index, ts_kind) - .flat_map(|(successor_index, chaining_cost)| { - if DEBUG_CHAINER { - if successor_index == primary_end_anchor_index { - println!("Checking anchor end"); - } else { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary(successor_index) - ); - } - } + Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset, + } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_jump_34_in_cost_order(index, ts_kind, offset), + self.max_successors, + ); - let cost = predecessor_cost.checked_add(&chaining_cost)?; - if DEBUG_CHAINER { + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + if successor_index == primary_end_anchor_index { + println!("Checking anchor end"); + } else { println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost + "Checking anchor P-{successor_index}: {}", + self.anchors.primary(successor_index), ); } + } - // Assert that we can always chain to end. - debug_assert!( - successor_index != primary_end_anchor_index - || cost != Cost::max_value() - ); - (cost != Cost::max_value()).then_some(Node { - identifier: if successor_index == primary_end_anchor_index { - Identifier::End - } else { - Identifier::Primary { - index: successor_index, - } - }, - predecessor, - cost, - }) - }), - ); + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + + Some(generate_primary_successors( + successor_index, + predecessor, + cost, + primary_end_anchor_index, + )) + }) + .flatten(), + ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); } } @@ -333,6 +454,72 @@ impl AStarContext for Context<'_, '_, '_, Cost> { } } +fn generate_primary_successors( + index: AnchorIndex, + predecessor: Option, + cost: Cost, + primary_end_anchor_index: AnchorIndex, +) -> impl Iterator> { + (index == primary_end_anchor_index) + .then(|| Identifier::End) + .into_iter() + .chain( + (index != primary_end_anchor_index) + .then(move || { + [Identifier::PrimaryToPrimary { + index, + offset: AnchorIndex::zero(), + }] + .into_iter() + .chain( + TsKind::iter().map(move |ts_kind| Identifier::PrimaryToSecondary { + index, + ts_kind, + offset: AnchorIndex::zero(), + }), + ) + }) + .into_iter() + .flatten(), + ) + .map(move |identifier| Node { + identifier, + predecessor, + cost, + }) +} + +fn generate_secondary_successors( + index: AnchorIndex, + ts_kind: TsKind, + first_secondary_index: AnchorIndex, + predecessor: Option, + cost: Cost, + can_34_jump: bool, +) -> impl Iterator> { + [ + can_34_jump.then(|| Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset: AnchorIndex::zero(), + }), + Some(Identifier::SecondaryToSecondary { + index, + ts_kind, + first_secondary_index, + offset: AnchorIndex::zero(), + }), + ] + .into_iter() + .filter_map(|i| i) + .map(move |identifier| Node { + identifier, + predecessor, + cost, + }) +} + impl Reset for Context<'_, '_, '_, Cost> { fn reset(&mut self) { // Nothing to do. @@ -393,12 +580,43 @@ impl Display for Identifier { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Identifier::Start => write!(f, "start"), - Identifier::Primary { index } => write!(f, "P-{index}"), - Identifier::Secondary { + Identifier::StartToPrimary { offset } => write!(f, "start-to-primary-{offset}"), + Identifier::StartToSecondary { ts_kind, offset } => { + write!(f, "start-to-secondary{}-{offset}", ts_kind.digits()) + } + Identifier::PrimaryToPrimary { index, offset } => { + write!(f, "primary-to-primary-{index}-{offset}") + } + Identifier::PrimaryToSecondary { + index, + ts_kind, + offset, + } => write!( + f, + "primary-to-secondary{}-{index}-{offset}", + ts_kind.digits() + ), + Identifier::SecondaryToSecondary { + index, + ts_kind, + first_secondary_index, + offset, + } => write!( + f, + "secondary{}-to-secondary{}-{first_secondary_index}-{index}-{offset}", + ts_kind.digits(), + ts_kind.digits() + ), + Identifier::SecondaryToPrimary { index, ts_kind, first_secondary_index, - } => write!(f, "S{}-{first_secondary_index}-{index}", ts_kind.digits()), + offset, + } => write!( + f, + "secondary{}-to-primary-{first_secondary_index}-{index}-{offset}", + ts_kind.digits() + ), Identifier::End => write!(f, "end"), } } diff --git a/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs b/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs new file mode 100644 index 0000000..7c74440 --- /dev/null +++ b/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs @@ -0,0 +1,61 @@ +use generic_a_star::cost::AStarCost; + +use crate::anchors::index::AnchorIndex; + +pub struct MaxSuccessorsIterator, Cost> { + iter: Iter, + count: usize, + max_count: usize, + initial_cost: Cost, + next_cost: Option, +} + +impl, Cost: AStarCost> + MaxSuccessorsIterator +{ + pub fn new(iter: Iter, max_count: usize) -> Self { + Self { + iter, + count: 0, + max_count, + initial_cost: Cost::zero(), + next_cost: None, + } + } + + pub fn successor_count(&self) -> usize { + self.count + } + + pub fn next_cost(&self) -> Option { + self.next_cost + } +} + +impl, Cost: AStarCost> Iterator + for MaxSuccessorsIterator +{ + type Item = (AnchorIndex, Cost); + + fn next(&mut self) -> Option { + let Some((anchor_index, cost)) = self.iter.next() else { + return None; + }; + + if cost == Cost::max_value() { + return None; + } + + if self.count == 0 { + self.count += 1; + self.initial_cost = cost; + Some((anchor_index, cost)) + } else if self.count < self.max_count || cost == self.initial_cost { + self.count += 1; + Some((anchor_index, cost)) + } else { + self.next_cost = Some(cost); + None + } + } +} diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index ab1c5b0..d55c440 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -474,12 +474,13 @@ impl ChainingCostFunction { pub fn iter_primary_in_cost_order( &mut self, from_primary_index: AnchorIndex, + offset: AnchorIndex, ) -> impl Iterator where Cost: Copy + Ord, { self.primary - .iter_in_cost_order(from_primary_index + 1) + .iter_in_cost_order_from(from_primary_index + 1, offset) .filter_map(|(to_primary_index, chaining_cost)| { (to_primary_index != AnchorIndex::zero()) .then(|| (to_primary_index - 1, chaining_cost)) @@ -488,12 +489,13 @@ impl ChainingCostFunction { pub fn iter_primary_from_start_in_cost_order( &mut self, + offset: AnchorIndex, ) -> impl Iterator where Cost: Copy + Ord, { self.primary - .iter_in_cost_order(AnchorIndex::zero()) + .iter_in_cost_order_from(AnchorIndex::zero(), offset) .filter_map(|(to_primary_index, chaining_cost)| { (to_primary_index != AnchorIndex::zero()) .then(|| (to_primary_index - 1, chaining_cost)) @@ -504,47 +506,51 @@ impl ChainingCostFunction { &mut self, from_primary_index: AnchorIndex, ts_kind: TsKind, + offset: AnchorIndex, ) -> impl Iterator where Cost: Copy + Ord, { self.jump_12_array_mut(ts_kind) - .iter_in_cost_order(from_primary_index + 1) + .iter_in_cost_order_from(from_primary_index + 1, offset) } pub fn iter_jump_12_from_start_in_cost_order( &mut self, ts_kind: TsKind, + offset: AnchorIndex, ) -> impl Iterator where Cost: Copy + Ord, { self.jump_12_array_mut(ts_kind) - .iter_in_cost_order(Self::primary_start_anchor_index()) + .iter_in_cost_order_from(Self::primary_start_anchor_index(), offset) } pub fn iter_secondary_in_cost_order( &mut self, from_secondary_index: AnchorIndex, ts_kind: TsKind, + offset: AnchorIndex, ) -> impl Iterator where Cost: Copy + Ord, { self.secondary_array_mut(ts_kind) - .iter_in_cost_order(from_secondary_index) + .iter_in_cost_order_from(from_secondary_index, offset) } pub fn iter_jump_34_in_cost_order( &mut self, from_secondary_index: AnchorIndex, ts_kind: TsKind, + offset: AnchorIndex, ) -> impl Iterator where Cost: Copy + Ord, { self.jump_34_array_mut(ts_kind) - .iter_in_cost_order(from_secondary_index) + .iter_in_cost_order_from(from_secondary_index, offset) .filter_map(|(to_primary_index, chaining_cost)| { (to_primary_index != AnchorIndex::zero()) .then(|| (to_primary_index - 1, chaining_cost)) diff --git a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs index 28c0ec9..6f75e32 100644 --- a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs +++ b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs @@ -1,6 +1,7 @@ use std::ops::{Index, IndexMut}; use bitvec::{bitvec, order::LocalBits, vec::BitVec}; +use num_traits::Zero; use crate::anchors::index::AnchorIndex; @@ -41,30 +42,49 @@ impl ChainingCostArray { (self.len[0], self.len[1]) } - /// Iterate over all ordinates `c2` that belong to this `c1` in order of their cost. - pub fn iter_in_cost_order( - &mut self, - c1: AnchorIndex, - ) -> impl Iterator + fn compute_cost_order_permutation_if_missing(&mut self, c1: AnchorIndex) where Cost: Copy + Ord, { if self.cost_order_permutation[c1.as_usize()].is_empty() { self.cost_order_permutation[c1.as_usize()].extend( - (0usize..) - .take(self.len[1].as_usize()) + ((0..c1.as_usize()).chain(c1.as_usize() + 1..)) + .take(self.len[1].as_usize() - 1) .map(AnchorIndex::from), ); self.cost_order_permutation[c1.as_usize()] .sort_unstable_by_key(|c2| self.data[coordinates_to_index(c1, *c2, self.len)]); } + } + + /// Iterate over all ordinates `c2` that belong to this `c1` in order of their cost. + #[expect(dead_code)] + pub fn iter_in_cost_order( + &mut self, + c1: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.iter_in_cost_order_from(c1, AnchorIndex::zero()) + } + pub fn iter_in_cost_order_from( + &mut self, + c1: AnchorIndex, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.compute_cost_order_permutation_if_missing(c1); let data = &self.data; let len = self.len; self.cost_order_permutation[c1.as_usize()] .iter() .copied() - .filter_map(move |c2| (c1 != c2).then(|| (c2, data[coordinates_to_index(c1, c2, len)]))) + .skip(offset.as_usize()) + .map(move |c2| (c2, data[coordinates_to_index(c1, c2, len)])) } } diff --git a/lib_ts_chainalign/src/lib.rs b/lib_ts_chainalign/src/lib.rs index cbfe68e..4490bd7 100644 --- a/lib_ts_chainalign/src/lib.rs +++ b/lib_ts_chainalign/src/lib.rs @@ -9,6 +9,7 @@ use log::{debug, info, trace}; use crate::{ alignment::{coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, anchors::Anchors, + chain_align::AlignmentParameters, chaining_cost_function::ChainingCostFunction, chaining_lower_bounds::ChainingLowerBounds, costs::AlignmentCosts, @@ -40,6 +41,7 @@ pub fn align( reference: Vec, query: Vec, range: AlignmentRange, + parameters: &AlignmentParameters, rc_fn: &dyn Fn(u8) -> u8, reference_name: &str, query_name: &str, @@ -71,6 +73,7 @@ pub fn align( &sequences, start, end, + parameters, chaining_lower_bounds.alignment_costs(), rc_fn, chaining_lower_bounds.max_match_run(), diff --git a/tsalign/src/align.rs b/tsalign/src/align.rs index 1bda7dc..1793a9e 100644 --- a/tsalign/src/align.rs +++ b/tsalign/src/align.rs @@ -109,6 +109,13 @@ pub struct Cli { #[clap(long, default_value = "maximise")] ts_total_length_strategy: TemplateSwitchTotalLengthStrategySelector, + /// The maximum amount of successors to generate while processing a node during chaining. + /// Can be tuned to optimise performance. + /// + /// This applies only to tschainalign. + #[clap(long, default_value = "5")] + max_chaining_successors: usize, + /// If set, template switches are not allowed. /// /// Use this to compare a template switch alignment against an alignment with out template switches. diff --git a/tsalign/src/align/a_star_chain_ts.rs b/tsalign/src/align/a_star_chain_ts.rs index 9620611..1d0a9ca 100644 --- a/tsalign/src/align/a_star_chain_ts.rs +++ b/tsalign/src/align/a_star_chain_ts.rs @@ -2,7 +2,10 @@ use compact_genome::interface::{ alphabet::{Alphabet, AlphabetCharacter}, sequence::GenomeSequence, }; -use lib_ts_chainalign::{chaining_lower_bounds::ChainingLowerBounds, costs::AlignmentCosts}; +use lib_ts_chainalign::{ + chain_align::AlignmentParameters, chaining_lower_bounds::ChainingLowerBounds, + costs::AlignmentCosts, +}; use lib_tsalign::{ a_star_aligner::alignment_geometry::AlignmentRange, config::TemplateSwitchConfig, }; @@ -90,10 +93,15 @@ pub fn align_a_star_chain_ts< let reference = reference.clone_as_vec(); let query = query.clone_as_vec(); + let parameters = AlignmentParameters { + max_successors: cli.max_chaining_successors, + }; + let alignment = lib_ts_chainalign::align::( reference, query, range, + ¶meters, &|c| { AlphabetType::character_to_ascii( AlphabetType::ascii_to_character(c).unwrap().complement(), From 00aab139f4405a6144c5ca8459bed6f29bf32da0 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 15 Dec 2025 09:58:47 +0200 Subject: [PATCH 7/9] Update development rust version. --- generic_a_star/src/lib.rs | 6 ++++++ rust-toolchain.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/generic_a_star/src/lib.rs b/generic_a_star/src/lib.rs index 9f0baa1..58faef6 100644 --- a/generic_a_star/src/lib.rs +++ b/generic_a_star/src/lib.rs @@ -100,6 +100,12 @@ pub trait AStarContext: Reset { } } +/// The closed list for the A* algorithm. +pub trait AStarClosedList: Reset {} + +/// The open list for the A* algorithm. +pub trait AStarOpenList: Reset {} + #[derive(Debug, Default)] pub struct AStarPerformanceCounters { pub opened_nodes: usize, diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 6017514..2289e56 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.85.1" +channel = "1.92.0" components = ["rust-analyzer"] From 77b3b46f7c503086cb89e1998f9eb97f946ec85d Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 15 Dec 2025 10:01:30 +0200 Subject: [PATCH 8/9] Use macos-latest in CI. --- .github/workflows/python.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 50c689d..2c34e63 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -73,9 +73,9 @@ jobs: strategy: matrix: platform: - - runner: macos-13 + - runner: macos-latest target: x86_64 - - runner: macos-14 + - runner: macos-latest target: aarch64 steps: - uses: actions/checkout@v4 From bea05d5cbc37bd6d484beddfd44d009918791cd9 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 15 Dec 2025 10:08:53 +0200 Subject: [PATCH 9/9] Fix new lints. --- .vscode/settings.json | 3 ++- lib_ts_chainalign/src/chain_align.rs | 4 ++-- lib_ts_chainalign/src/chain_align/chainer.rs | 8 ++++---- .../chainer/max_successors_iterator.rs | 4 +--- .../src/chaining_cost_function.rs | 20 +++++++------------ lib_ts_chainalign/src/lib.rs | 1 + 6 files changed, 17 insertions(+), 23 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 8b978f6..ad4029f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,5 +20,6 @@ "Cargo.toml", "*.fa", "*.toml" - ] + ], + "rust-analyzer.check.command": "clippy" } \ No newline at end of file diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index f13ac01..35af6cd 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -527,7 +527,7 @@ fn evaluate_chain( ) => { if anchors .primary(from_index) - .is_direct_predecessor_of(&anchors.primary(to_index)) + .is_direct_predecessor_of(anchors.primary(to_index)) { alignments.push(Alignment::from(vec![AlignmentType::Match])); continue; @@ -635,7 +635,7 @@ fn evaluate_chain( assert_eq!(ts_kind, to_ts_kind); if anchors .secondary(from_index, ts_kind) - .is_direct_predecessor_of(&anchors.secondary(to_index, ts_kind)) + .is_direct_predecessor_of(anchors.secondary(to_index, ts_kind)) { alignments.push(Alignment::from(vec![AlignmentType::Match])); continue; diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 7cb85fc..1b74422 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -104,7 +104,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { fn generate_successors(&mut self, node: &Self::Node, output: &mut impl Extend) { let predecessor = Some(node.identifier); let predecessor_cost = node.cost; - let primary_end_anchor_index = AnchorIndex::from(self.anchors.primary_len()); + let primary_end_anchor_index = self.anchors.primary_len(); if DEBUG_CHAINER { println!("Generating successors of {node}"); @@ -351,7 +351,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { let first_anchor = &self.anchors.secondary(first_secondary_index, ts_kind); let ts_length = first_anchor.ts_length_until( - &self.anchors.secondary(successor_index, ts_kind), + self.anchors.secondary(successor_index, ts_kind), ts_kind, self.k, ); @@ -461,7 +461,7 @@ fn generate_primary_successors( primary_end_anchor_index: AnchorIndex, ) -> impl Iterator> { (index == primary_end_anchor_index) - .then(|| Identifier::End) + .then_some(Identifier::End) .into_iter() .chain( (index != primary_end_anchor_index) @@ -512,7 +512,7 @@ fn generate_secondary_successors( }), ] .into_iter() - .filter_map(|i| i) + .flatten() .map(move |identifier| Node { identifier, predecessor, diff --git a/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs b/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs index 7c74440..0c0c511 100644 --- a/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs +++ b/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs @@ -38,9 +38,7 @@ impl, Cost: AStarCost> Iterator type Item = (AnchorIndex, Cost); fn next(&mut self) -> Option { - let Some((anchor_index, cost)) = self.iter.next() else { - return None; - }; + let (anchor_index, cost) = self.iter.next()?; if cost == Cost::max_value() { return None; diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index d55c440..9635f7e 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -26,7 +26,7 @@ impl ChainingCostFunction { end: AlignmentCoordinates, ) -> Self { let k = usize::try_from(chaining_lower_bounds.max_match_run() + 1).unwrap(); - let primary_anchor_amount = AnchorIndex::from(anchors.primary_len() + 2); + let primary_anchor_amount = anchors.primary_len() + 2; let primary_start_anchor_index = AnchorIndex::zero(); let primary_end_anchor_index = primary_anchor_amount - 1; @@ -481,10 +481,8 @@ impl ChainingCostFunction { { self.primary .iter_in_cost_order_from(from_primary_index + 1, offset) - .filter_map(|(to_primary_index, chaining_cost)| { - (to_primary_index != AnchorIndex::zero()) - .then(|| (to_primary_index - 1, chaining_cost)) - }) + .filter(|(to_primary_index, _)| *to_primary_index != AnchorIndex::zero()) + .map(|(to_primary_index, chaining_cost)| (to_primary_index - 1, chaining_cost)) } pub fn iter_primary_from_start_in_cost_order( @@ -496,10 +494,8 @@ impl ChainingCostFunction { { self.primary .iter_in_cost_order_from(AnchorIndex::zero(), offset) - .filter_map(|(to_primary_index, chaining_cost)| { - (to_primary_index != AnchorIndex::zero()) - .then(|| (to_primary_index - 1, chaining_cost)) - }) + .filter(|(to_primary_index, _)| *to_primary_index != AnchorIndex::zero()) + .map(|(to_primary_index, chaining_cost)| (to_primary_index - 1, chaining_cost)) } pub fn iter_jump_12_in_cost_order( @@ -551,9 +547,7 @@ impl ChainingCostFunction { { self.jump_34_array_mut(ts_kind) .iter_in_cost_order_from(from_secondary_index, offset) - .filter_map(|(to_primary_index, chaining_cost)| { - (to_primary_index != AnchorIndex::zero()) - .then(|| (to_primary_index - 1, chaining_cost)) - }) + .filter(|(to_primary_index, _)| *to_primary_index != AnchorIndex::zero()) + .map(|(to_primary_index, chaining_cost)| (to_primary_index - 1, chaining_cost)) } } diff --git a/lib_ts_chainalign/src/lib.rs b/lib_ts_chainalign/src/lib.rs index 4490bd7..d008636 100644 --- a/lib_ts_chainalign/src/lib.rs +++ b/lib_ts_chainalign/src/lib.rs @@ -37,6 +37,7 @@ pub fn preprocess( ChainingLowerBounds::new(max_n, max_match_run, alignment_costs) } +#[expect(clippy::too_many_arguments)] pub fn align( reference: Vec, query: Vec,