@@ -286,11 +286,15 @@ impl OnePass {
286
286
// No need to mask because no flags are set.
287
287
state_ptr = self . follow ( state_ptr as usize , byte_class) ;
288
288
} else {
289
+ // STATE_HALT and STATE_DEAD must always be checked
290
+ // first because they have STATE_ACTION and STATE_MATCH
291
+ // set, even though those flags don't apply. It would
292
+ // probably be better for performance to check them last,
293
+ // so it may be worthwhile to try to rejigger the
294
+ // representation of StatePtrs.
289
295
if state_ptr == STATE_HALT {
290
296
break ;
291
- }
292
-
293
- if state_ptr == STATE_DEAD {
297
+ } else if state_ptr == STATE_DEAD {
294
298
trace ! ( "::exec_ drain-dead" ) ;
295
299
slots[ FULL_MATCH_CAPTURE_END ] = last_match;
296
300
return last_match. is_some ( ) ;
@@ -474,7 +478,7 @@ pub struct OnePassCompiler {
474
478
/// if they should have the STATE_MATCH flag set.
475
479
accepting_states : Vec < bool > ,
476
480
477
- /// A DAG of forwarding relationship indicating when
481
+ /// A DAG of forwarding relationships indicating when
478
482
/// a state needs to be forwarded to an Action state
479
483
/// once that Action state has been fully constructed.
480
484
forwards : Forwards ,
@@ -595,6 +599,13 @@ impl OnePassCompiler {
595
599
Ok ( self . onepass )
596
600
}
597
601
602
+ /// Compile the stage 1 transition table for the state corresponding
603
+ /// to the given instruction.
604
+ ///
605
+ /// The result of `inst_trans` will end up in `self.transitions`.
606
+ ///
607
+ /// Returns a list of child instructions which must be compiled
608
+ /// via `inst_trans`.
598
609
fn inst_trans (
599
610
& mut self ,
600
611
inst_idx : usize
@@ -671,6 +682,45 @@ impl OnePassCompiler {
671
682
Ok ( children)
672
683
}
673
684
685
+ /// Topologically sort the forwarding jobs so that we
686
+ /// start with jobs that have no dependencies and then
687
+ /// shuffle the transitions over. Mutate `self.transitions`
688
+ /// in place.
689
+ ///
690
+ /// To make that a little more concrete, consider the program snippet:
691
+ ///
692
+ /// 0000: Bytes(a, a)
693
+ /// 0001: Save(2)
694
+ /// 0002: Bytes(b, b)
695
+ ///
696
+ /// Here the state for `Bytes(a, a)` needs to transition to
697
+ /// the state for `Save(2)`, but it does not know when to do
698
+ /// so. The right answer is that it should transition to
699
+ /// the `Save(2)` state when it sees a `b`, but it is hard
700
+ /// to know what children `Save(2)` has from where `Bytes(a, a)`
701
+ /// stands. To handle this we just emit a forwarding job
702
+ /// that says "when you know enough about the `Save(2)` state,
703
+ /// please forward `Bytes(a, a)` to `Save(2)`.". We need to use
704
+ /// a full DAG for this because there could be multiple forwarding
705
+ /// states in a row:
706
+ ///
707
+ /// 0000: Bytes(a, a)
708
+ /// 0001: Save(2)
709
+ /// 0002: Save(3)
710
+ /// 0003: Bytes(b, b)
711
+ ///
712
+ /// Here we will end up with two forwarding jobs:
713
+ ///
714
+ /// 1. Forward from `Bytes(a, a)` to `Save(2)`.
715
+ /// 2. Forward from `Save(2)` to `Save(3)`.
716
+ ///
717
+ /// Which we structure as a dag that looks like:
718
+ ///
719
+ /// (2) --> (1)
720
+ ///
721
+ /// The arrow flows in a funny direction because we want the jobs
722
+ /// with no dependencies to live at the roots of the DAG so that
723
+ /// we can process them first.
674
724
fn solve_forwards ( & mut self ) -> Result < ( ) , OnePassError > {
675
725
// TODO(ethan):yakshaving drop the clone
676
726
for fwd in self . forwards . clone ( ) . into_iter_topo ( ) {
@@ -718,9 +768,9 @@ impl OnePassCompiler {
718
768
}
719
769
720
770
// Finally, if a match instruction is reachable through
721
- // a save fwd, the from state is accepting.
722
- match ( & self . prog [ fwd. to ] , & self . prog [ fwd . from ] ) {
723
- ( & Inst :: Save ( _ ) , _) => {
771
+ // a save fwd (which can never fail) , the from state is accepting.
772
+ match & self . prog [ fwd. to ] {
773
+ & Inst :: Save ( _) => {
724
774
self . accepting_states [ fwd. from ] =
725
775
self . accepting_states [ fwd. to ] ;
726
776
}
@@ -731,11 +781,12 @@ impl OnePassCompiler {
731
781
Ok ( ( ) )
732
782
}
733
783
734
- // Once all the per-instruction transition tables have been worked
735
- // out, we can bake them into the single flat transition table we
736
- // are going to use for the actual DFA.
784
+ /// Once all the per-instruction transition tables have been worked
785
+ /// out, we can bake them into the single flat transition table we
786
+ /// are going to use for the actual DFA. This function creates the
787
+ /// baked form, storing it in `self.onepass.table`.
737
788
fn emit_transitions ( & mut self ) {
738
- // pre-compute the state indicies
789
+ // pre-compute the state indices
739
790
let mut state_starts = Vec :: with_capacity ( self . prog . len ( ) ) ;
740
791
let mut off = 0 ;
741
792
for inst_idx in 0 ..self . prog . len ( ) {
@@ -760,10 +811,12 @@ impl OnePassCompiler {
760
811
p
761
812
} ;
762
813
814
+ self . onepass . table . reserve (
815
+ state_starts[ state_starts. len ( ) - 1 ]
816
+ + self . onepass . num_byte_classes ) ;
763
817
for inst_idx in 0 ..self . prog . len ( ) {
764
818
let mut trans = Vec :: with_capacity (
765
- state_starts[ state_starts. len ( ) - 1 ]
766
- + self . onepass . num_byte_classes ) ;
819
+ self . onepass . num_byte_classes * 2 ) ;
767
820
768
821
match & self . transitions [ inst_idx] {
769
822
& None => continue ,
@@ -1102,7 +1155,7 @@ const STATE_DEAD: StatePtr = STATE_MATCH + 1;
1102
1155
/// from DEAD only in that an accepting state that transitions to HALT
1103
1156
/// still accepts, while an accepting state which transitions to DEAD
1104
1157
/// does not.
1105
- const STATE_HALT : StatePtr = STATE_MATCH + 2 ;
1158
+ const STATE_HALT : StatePtr = STATE_ACTION + 1 ;
1106
1159
1107
1160
/// The maximum state pointer. This is useful to mask out the "valid" state
1108
1161
/// pointer from a state with the "start" or "match" bits set.
0 commit comments