Skip to content

Commit 207f9c7

Browse files
committed
[GR-49987] TRegex: add InputStringGenerator.
PullRequest: graal/17235
2 parents ad294c6 + 852d2f8 commit 207f9c7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1670
-4825
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
package com.oracle.truffle.regex.tregex.test;
42+
43+
import java.util.Random;
44+
45+
import com.oracle.truffle.api.strings.TruffleString;
46+
import org.graalvm.polyglot.Value;
47+
import org.junit.Assert;
48+
import org.junit.Test;
49+
50+
import com.oracle.truffle.api.source.Source;
51+
import com.oracle.truffle.regex.RegexLanguage;
52+
import com.oracle.truffle.regex.RegexSource;
53+
import com.oracle.truffle.regex.analysis.InputStringGenerator;
54+
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
55+
import com.oracle.truffle.regex.tregex.parser.ast.RegexAST;
56+
import com.oracle.truffle.regex.tregex.string.Encodings;
57+
58+
public class InputStringGeneratorTests extends RegexTestBase {
59+
60+
private final RegexLanguage language = new RegexLanguage();
61+
private final Random rng = new Random(1234);
62+
63+
@Override
64+
String getEngineOptions() {
65+
return "";
66+
}
67+
68+
@Override
69+
Encodings.Encoding getTRegexEncoding() {
70+
return Encodings.UTF_16_RAW;
71+
}
72+
73+
@Test
74+
public void testBenchmarkRegexes() {
75+
testInputStringGenerator("(((\\w+):\\/\\/)([^\\/:]*)(:(\\d+))?)?([^#?]*)(\\?([^#]*))?(#(.*))?");
76+
testInputStringGenerator("([aeiouAEIOU]+)");
77+
testInputStringGenerator("((([1-3][0-9])|[1-9])\\/((1[0-2])|0?[1-9])\\/[0-9]{4})|((([1-3][0-9])|[1-9])-((1[0-2])|0?[1-9])-[0-9]{4})|((([1-3][0-9])|[1-9])\\.((1[0-2])|0?[1-9])\\.[0-9]{4})");
78+
testInputStringGenerator("((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)");
79+
testInputStringGenerator("([A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}");
80+
testInputStringGenerator("([A-Fa-f0-9]{1,4}:){6}(([A-Fa-f0-9]{1,4}:[A-Fa-f0-9]{1,4})|(((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])))");
81+
testInputStringGenerator(
82+
"([-!#-''*+/-9=?A-Z^-~]+(\\.[-!#-''*+/-9=?A-Z^-~]+)*|\"([ ]!#-[^-~ ]|(\\\\[-~ ]))+\")@[0-9A-Za-z]([0-9A-Za-z-]{0,61}[0-9A-Za-z])?(\\.[0-9A-Za-z]([0-9A-Za-z-]{0,61}[0-9A-Za-z])?)+");
83+
testInputStringGenerator(
84+
"([-!#-''*+/-9=?A-Z^-~]+(\\.[-!#-''*+/-9=?A-Z^-~]+)*|\"([ ]!#-[^-~ ]|(\\\\[-~ ]))+\")@[0-9A-Za-z]([0-9A-Za-z-]*[0-9A-Za-z])?(\\.[0-9A-Za-z]([0-9A-Za-z-]*[0-9A-Za-z])?)+");
85+
testInputStringGenerator("(\\S+) (\\S+) (\\S+) \\[([A-Za-z0-9_:/]+\\s[-+]\\d{4})\\] \"(\\S+)\\s?(\\S+)?\\s?(\\S+)?\" (\\d{3}|-) (\\d+|-)\\s?\"?([^\"]*)\"?\\s?\"?([^\"]*)?\"?");
86+
}
87+
88+
private TruffleString generateInputString(String pattern, String flags, String options, Encodings.Encoding encoding) {
89+
String sourceString = createSourceString(pattern, flags, options, encoding);
90+
Source source = Source.newBuilder("regex", sourceString, "regexSource").build();
91+
RegexSource regexSource = RegexLanguage.createRegexSource(source);
92+
RegexAST ast = regexSource.getOptions().getFlavor().createParser(language, regexSource, new CompilationBuffer(regexSource.getEncoding())).parse();
93+
return InputStringGenerator.generate(ast, rng.nextLong());
94+
}
95+
96+
void testInputStringGenerator(String pattern) {
97+
testInputStringGenerator(pattern, "", getEngineOptions(), getTRegexEncoding());
98+
}
99+
100+
void testInputStringGenerator(String pattern, String flags, String options, Encodings.Encoding encoding) {
101+
Value compiledRegex = compileRegex(pattern, flags);
102+
testInputStringGenerator(pattern, flags, options, encoding, compiledRegex);
103+
}
104+
105+
private void testInputStringGenerator(String pattern, String flags, String options, Encodings.Encoding encoding, Value compiledRegex) {
106+
for (int i = 0; i < 20; i++) {
107+
TruffleString input = generateInputString(pattern, flags, options, encoding);
108+
Assert.assertNotNull(input);
109+
Value result = execRegex(compiledRegex, encoding, input, 0);
110+
Assert.assertTrue(result.getMember("isMatch").asBoolean());
111+
}
112+
}
113+
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JsTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ public void nestedQuantifiers() {
7373
test("(x??)?", "", "x", 1, true, 1, 1, -1, -1);
7474
test("(x??)*", "", "x", 0, true, 0, 1, 0, 1);
7575
test("(x??)*", "", "x", 1, true, 1, 1, -1, -1);
76+
test("X(.?){8,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8);
7677
}
7778

7879
@Test

0 commit comments

Comments
 (0)