Skip to content

Commit 9cca6b8

Browse files
author
Takanori MAEHARA
authored
Segment Recognizer (evaluate automaton run in O(|M|) time)
1 parent 4f3455f commit 9cca6b8

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
//
2+
// Segment Recognizer
3+
//
4+
// Description:
5+
// Let M be an automaton and x be a sequence of alphabets.
6+
// The segment recognizer computes the transitioned state
7+
// starting from s and reading x[i,j) in O(|M|) time.
8+
// The preprocessing requires O(|M| |x|) time and space.
9+
//
10+
// The same method is implemented by the segment tree,
11+
// where the time complexity is O(log n) and the space
12+
// complexity is O(n log n). Thus, the segment recognizer
13+
// is efficient if |M| is small.
14+
//
15+
// Algorithm:
16+
// Basically, it stores all the runs from all initial
17+
// position i and initial state s. To reduce the space,
18+
// it merges two runs if they yields the same state.
19+
//
20+
// Reference
21+
// Mikola Bojanczyk (2009): "Factorization forests",
22+
// International Conference on Developments in Language Theory,
23+
// pp. 1--17.
24+
//
25+
#include <bits/stdc++.h>
26+
27+
using namespace std;
28+
29+
#define fst first
30+
#define snd second
31+
#define all(c) ((c).begin()), ((c).end())
32+
#define TEST(s) if (!(s)) { cout << __LINE__ << " " << #s << endl; exit(-1); }
33+
34+
35+
// === tick a time ===
36+
#include <ctime>
37+
double tick() {
38+
static clock_t oldtick;
39+
clock_t newtick = clock();
40+
double diff = 1.0*(newtick - oldtick) / CLOCKS_PER_SEC;
41+
oldtick = newtick;
42+
return diff;
43+
}
44+
45+
template <int MOD>
46+
struct ModuloAutomaton {
47+
const int init = 0;
48+
int size() const { return MOD; }
49+
int next(int s, int d) const { return (s+d)%MOD; }
50+
int accept(int s) const { return s==0; }
51+
};
52+
53+
// 0: free
54+
// 1: selected
55+
// 2: bottom
56+
struct IndependenceAutomaton {
57+
const int init = 0;
58+
int size() const { return 3; }
59+
int next(int s, int d) const {
60+
if (s == 0) return d;
61+
if (s == 1) return 2*d;
62+
if (s == 2) return s;
63+
}
64+
int accept(int s) const { return s!=2; }
65+
};
66+
67+
template <class Automaton>
68+
struct SegmentRecognizer {
69+
Automaton M;
70+
vector<int> x;
71+
72+
struct Tape {
73+
int begin;
74+
vector<int> sequence;
75+
};
76+
vector<vector<int>> index;
77+
vector<Tape> tapes;
78+
79+
SegmentRecognizer(Automaton M, vector<int> x) : M(M), x(x) {
80+
index.assign(x.size()+1, vector<int>(M.size()));
81+
vector<int> stripe;
82+
for (int r = 0; r < M.size(); ++r) {
83+
stripe.push_back(r);
84+
index[0][r] = stripe[r];
85+
tapes.push_back({0, {r}});
86+
}
87+
for (int i = 0; i < x.size(); ++i) {
88+
unordered_set<int> available;
89+
for (int s = 0; s < M.size(); ++s)
90+
available.insert(s);
91+
vector<int> reallocate;
92+
for (int r = 0; r < M.size(); ++r) {
93+
int next = M.next(tapes[stripe[r]].sequence.back(), x[i]);
94+
if (available.count(next)) {
95+
available.erase(next);
96+
index[i+1][next] = stripe[r];
97+
tapes[stripe[r]].sequence.push_back(next);
98+
} else {
99+
reallocate.push_back(r);
100+
}
101+
}
102+
for (int r: reallocate) {
103+
int s = *available.begin();
104+
stripe[r] = tapes.size();
105+
index[i+1][s] = stripe[r];
106+
tapes.push_back({i+1, {s}});
107+
available.erase(s);
108+
}
109+
}
110+
}
111+
112+
int getState(int i, int s, int j) {
113+
while (1) {
114+
auto &tape = tapes[index[i][s]];
115+
if (j - tape.begin < tape.sequence.size()) {
116+
return tape.sequence[j - tape.begin];
117+
} else {
118+
i = tape.begin + tape.sequence.size();
119+
s = M.next(tape.sequence.back(), x[i-1]);
120+
}
121+
}
122+
}
123+
};
124+
template <class Automaton>
125+
SegmentRecognizer<Automaton> makeSegmentRecognizer(Automaton M, vector<int> s) {
126+
return SegmentRecognizer<Automaton>(M, s);
127+
}
128+
129+
int main() {
130+
IndependenceAutomaton M;
131+
132+
for (int n = 2; n < (1<<24); n*=2) {
133+
vector<int> x(n);
134+
for (int i = 0; i < n; ++i) {
135+
x[i] = (rand() % 10 == 0);
136+
}
137+
auto recognizer = makeSegmentRecognizer(M, x);
138+
139+
tick();
140+
int count = 0;
141+
for (int iter = 0; iter < n; ++iter) {
142+
int v = (rand() % n) + 1;
143+
int u = rand() % v;
144+
count += recognizer.getState(u, 0, v);
145+
}
146+
double t = tick();
147+
cout << n << " " << t / n << endl;
148+
}
149+
}

0 commit comments

Comments
 (0)