Skip to content

Commit 223e14c

Browse files
author
Ethan Pailes
committed
Flatten onepass member of the OnePassCompiler
Embedding the OnePass DFA to be compiled in the OnePassCompiler caused a few values to be unnecessarily duplicated and added an extra level of indirection. This patch resolves that issue and takes advantage of these move semantics I'm always hearing about.
1 parent 103f20e commit 223e14c

File tree

1 file changed

+39
-38
lines changed

1 file changed

+39
-38
lines changed

src/onepass.rs

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,17 @@ pub struct OnePass {
5050
/// DFA states.
5151
table: Vec<StatePtr>,
5252
/// The prefixes.
53-
///
54-
/// TODO(ethan):yakshaving This guy is getting computed 4 times
55-
/// at the moment, which seems a bit extra.
56-
/// talk with @burntsushi to figure out how
57-
/// to drop the duplicated work.
5853
prefixes: LiteralSearcher,
5954
/// The stride.
6055
num_byte_classes: usize,
6156
/// The byte classes of this regex.
6257
byte_classes: Vec<u8>,
6358
/// The starting state.
6459
start_state: StatePtr,
65-
/// True if the regex is anchored at the end.
66-
is_anchored_end: bool,
6760
/// True if the regex is anchored at the start.
6861
is_anchored_start: bool,
62+
/// True if the regex is anchored at the end.
63+
is_anchored_end: bool,
6964
/// True if this regex ought to only accept utf8 strings.
7065
only_utf8: bool,
7166
}
@@ -468,13 +463,19 @@ impl OnePass {
468463

469464
/// Compiler for a OnePass DFA
470465
pub struct OnePassCompiler {
471-
onepass: OnePass,
466+
/// The flattened transition table AKA the baked form of the DFA.
467+
table: Vec<StatePtr>,
468+
469+
num_byte_classes: usize,
470+
only_utf8: bool,
471+
472+
/// The program to be compiled.
472473
prog: Program,
473474

474-
/// A mapping from instruction indicies to their transitions
475+
/// A mapping from instruction indices to their transitions
475476
transitions: Vec<Option<TransitionTable>>,
476477

477-
/// A mapping from instruction indicies to flags indicating
478+
/// A mapping from instruction indices to flags indicating
478479
/// if they should have the STATE_MATCH flag set.
479480
accepting_states: Vec<bool>,
480481

@@ -555,16 +556,10 @@ impl OnePassCompiler {
555556

556557
trace!("new compiler for:\n{:?}", prog);
557558
Ok(OnePassCompiler {
558-
onepass: OnePass {
559-
table: vec![],
560-
prefixes: prog.prefixes.clone(),
561-
num_byte_classes: num_byte_classes,
562-
byte_classes: prog.byte_classes.clone(),
563-
start_state: 0,
564-
is_anchored_end: prog.is_anchored_end,
565-
is_anchored_start: prog.is_anchored_start,
566-
only_utf8: only_utf8,
567-
},
559+
table: vec![],
560+
num_byte_classes: num_byte_classes,
561+
only_utf8: only_utf8,
562+
568563
transitions: {
569564
let mut x = Vec::new();
570565
for _ in 0..prog.len() {
@@ -594,9 +589,17 @@ impl OnePassCompiler {
594589
// Now emit the transitions in a form that we can actually
595590
// execute.
596591
self.emit_transitions();
597-
self.onepass.start_state = 0 | STATE_ACTION;
598592

599-
Ok(self.onepass)
593+
Ok(OnePass {
594+
table: self.table,
595+
prefixes: self.prog.prefixes,
596+
num_byte_classes: self.num_byte_classes,
597+
byte_classes: self.prog.byte_classes,
598+
start_state: 0 | STATE_ACTION,
599+
is_anchored_start: self.prog.is_anchored_start,
600+
is_anchored_end: self.prog.is_anchored_end,
601+
only_utf8: self.only_utf8,
602+
})
600603
}
601604

602605
/// Compile the stage 1 transition table for the state corresponding
@@ -629,7 +632,7 @@ impl OnePassCompiler {
629632

630633
let mut trans = TransitionTable(
631634
vec![Transition { tgt: TransitionTarget::Die, priority: 0 };
632-
self.onepass.num_byte_classes]);
635+
self.num_byte_classes]);
633636

634637
// Start at priority 1 because everything is higher priority than
635638
// the initial list of `TransitionTarget::Die` pointers.
@@ -648,7 +651,7 @@ impl OnePassCompiler {
648651
// closed-open ranges.
649652
for byte in (inst.start as usize)..(inst.end as usize + 1) {
650653
let byte = byte as u8;
651-
let bc = self.onepass.byte_classes[byte as usize];
654+
let bc = self.prog.byte_classes[byte as usize];
652655
trans.0[bc as usize] = Transition {
653656
tgt: TransitionTarget::BytesInst(child_idx),
654657
priority: priority
@@ -784,19 +787,19 @@ impl OnePassCompiler {
784787
/// Once all the per-instruction transition tables have been worked
785788
/// out, we can bake them into the single flat transition table we
786789
/// are going to use for the actual DFA. This function creates the
787-
/// baked form, storing it in `self.onepass.table`.
790+
/// baked form, storing it in `self.table`.
788791
fn emit_transitions(&mut self) {
789792
// pre-compute the state indices
790793
let mut state_starts = Vec::with_capacity(self.prog.len());
791794
let mut off = 0;
792795
for inst_idx in 0..self.prog.len() {
793796
state_starts.push(off);
794797
if self.transitions[inst_idx].is_some() {
795-
off += self.onepass.num_byte_classes;
798+
off += self.num_byte_classes;
796799

797800
match &self.prog[inst_idx] {
798801
&Inst::EmptyLook(_) | &Inst::Save(_) => {
799-
off += self.onepass.num_byte_classes;
802+
off += self.num_byte_classes;
800803
}
801804
_ => {}
802805
}
@@ -811,12 +814,10 @@ impl OnePassCompiler {
811814
p
812815
};
813816

814-
self.onepass.table.reserve(
815-
state_starts[state_starts.len() - 1]
816-
+ self.onepass.num_byte_classes);
817+
self.table.reserve(state_starts[state_starts.len() - 1]
818+
+ self.num_byte_classes);
817819
for inst_idx in 0..self.prog.len() {
818-
let mut trans = Vec::with_capacity(
819-
self.onepass.num_byte_classes * 2);
820+
let mut trans = Vec::with_capacity(self.num_byte_classes * 2);
820821

821822
match &self.transitions[inst_idx] {
822823
&None => continue,
@@ -834,26 +835,26 @@ impl OnePassCompiler {
834835
}
835836
}
836837

837-
self.onepass.table.extend(trans);
838+
self.table.extend(trans);
838839

839840
// emit all the right window dressing for the action, if
840841
// there is one.
841842
match &self.prog[inst_idx] {
842843
&Inst::Save(ref inst) => {
843-
debug_assert!(self.onepass.num_byte_classes >= 2);
844+
debug_assert!(self.num_byte_classes >= 2);
844845

845846
let mut save_args = vec![
846847
Action::Save as StatePtr,
847848
inst.slot as StatePtr];
848849
save_args.extend(vec![STATE_POISON;
849-
self.onepass.num_byte_classes - 2]);
850-
self.onepass.table.extend(save_args);
850+
self.num_byte_classes - 2]);
851+
self.table.extend(save_args);
851852
}
852853
&Inst::EmptyLook(ref inst) => {
853854
let mut el_args = vec![self.empty_look_action(inst.look)];
854855
el_args.extend(vec![STATE_POISON;
855-
self.onepass.num_byte_classes - 1]);
856-
self.onepass.table.extend(el_args);
856+
self.num_byte_classes - 1]);
857+
self.table.extend(el_args);
857858
}
858859
_ => {}
859860
}

0 commit comments

Comments
 (0)