Skip to content

Commit dacb26d

Browse files
committed
[GR-37073] TruffleStrings: introduce ErrorHandling parameter.
PullRequest: graal/12040
2 parents da8fe80 + 8c70d62 commit dacb26d

File tree

45 files changed

+1200
-535
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1200
-535
lines changed

compiler/src/org.graalvm.compiler.truffle.test/src/org/graalvm/compiler/truffle/test/strings/TStringOpsCalcStringAttributesUTF8Test.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.ArrayList;
2929
import java.util.Arrays;
3030

31+
import com.oracle.truffle.api.profiles.ConditionProfile;
3132
import org.graalvm.compiler.replacements.amd64.AMD64CalcStringAttributesNode;
3233
import org.junit.Test;
3334
import org.junit.runner.RunWith;
@@ -134,8 +135,8 @@ public TStringOpsCalcStringAttributesUTF8Test(Object array, int offset, int leng
134135

135136
@Test
136137
public void testUtf8() {
137-
ResolvedJavaMethod method = getTStringOpsMethod("calcStringAttributesUTF8", Object.class, int.class, int.class, boolean.class, boolean.class);
138-
test(method, null, DUMMY_LOCATION, array, offset, length, true, false);
139-
test(method, null, DUMMY_LOCATION, array, offset, length, false, false);
138+
ResolvedJavaMethod method = getTStringOpsMethod("calcStringAttributesUTF8", Object.class, int.class, int.class, boolean.class, boolean.class, ConditionProfile.class);
139+
test(method, null, DUMMY_LOCATION, array, offset, length, true, false, ConditionProfile.getUncached());
140+
test(method, null, DUMMY_LOCATION, array, offset, length, false, false, ConditionProfile.getUncached());
140141
}
141142
}

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/RegexObject.java

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@
6868
import com.oracle.truffle.regex.tregex.TRegexCompilationRequest;
6969
import com.oracle.truffle.regex.tregex.parser.flavors.PythonFlags;
7070
import com.oracle.truffle.regex.tregex.parser.flavors.RubyFlags;
71-
import com.oracle.truffle.regex.tregex.string.Encodings;
7271
import com.oracle.truffle.regex.util.TruffleReadOnlyKeysArray;
7372

7473
/**
@@ -275,56 +274,56 @@ Object invokeMember(String member, Object[] args,
275274
if (fromIndex > Integer.MAX_VALUE) {
276275
return RegexResult.getNoMatchInstance();
277276
}
278-
return invokeCache.execute(member, this, input, (int) fromIndex, source.getEncoding());
277+
return invokeCache.execute(member, this, input, (int) fromIndex);
279278
}
280279

281280
@ImportStatic(RegexObject.class)
282281
@GenerateUncached
283282
abstract static class InvokeCacheNode extends Node {
284283

285-
abstract Object execute(String symbol, RegexObject receiver, Object input, int fromIndex, Encodings.Encoding encoding)
284+
abstract Object execute(String symbol, RegexObject receiver, Object input, int fromIndex)
286285
throws UnsupportedMessageException, ArityException, UnsupportedTypeException, UnknownIdentifierException;
287286

288287
@SuppressWarnings("unused")
289288
@Specialization(guards = {"symbol == cachedSymbol", "cachedSymbol.equals(PROP_EXEC)"}, limit = N_METHODS)
290-
Object execIdentity(String symbol, RegexObject receiver, Object input, int fromIndex, Encodings.Encoding encoding,
289+
Object execIdentity(String symbol, RegexObject receiver, Object input, int fromIndex,
291290
@Cached("symbol") String cachedSymbol,
292291
@Cached ExpectStringOrTruffleObjectNode expectStringOrTruffleObjectNode,
293292
@Cached ExecCompiledRegexNode execNode) throws UnsupportedMessageException, ArityException, UnsupportedTypeException {
294-
return execNode.execute(receiver.getExecCallTarget(), expectStringOrTruffleObjectNode.execute(input, encoding), fromIndex);
293+
return execNode.execute(receiver.getExecCallTarget(), expectStringOrTruffleObjectNode.execute(input), fromIndex);
295294
}
296295

297296
@SuppressWarnings("unused")
298297
@Specialization(guards = {"symbol.equals(cachedSymbol)", "cachedSymbol.equals(PROP_EXEC)"}, limit = N_METHODS, replaces = "execIdentity")
299-
Object execEquals(String symbol, RegexObject receiver, Object input, int fromIndex, Encodings.Encoding encoding,
298+
Object execEquals(String symbol, RegexObject receiver, Object input, int fromIndex,
300299
@Cached("symbol") String cachedSymbol,
301300
@Cached ExpectStringOrTruffleObjectNode expectStringOrTruffleObjectNode,
302301
@Cached ExecCompiledRegexNode execNode) throws UnsupportedMessageException, ArityException, UnsupportedTypeException {
303-
return execNode.execute(receiver.getExecCallTarget(), expectStringOrTruffleObjectNode.execute(input, encoding), fromIndex);
302+
return execNode.execute(receiver.getExecCallTarget(), expectStringOrTruffleObjectNode.execute(input), fromIndex);
304303
}
305304

306305
@SuppressWarnings("unused")
307306
@Specialization(guards = {"symbol == cachedSymbol", "cachedSymbol.equals(PROP_EXEC_BOOLEAN)"}, limit = N_METHODS)
308-
boolean execBooleanIdentity(String symbol, RegexObject receiver, Object input, int fromIndex, Encodings.Encoding encoding,
307+
boolean execBooleanIdentity(String symbol, RegexObject receiver, Object input, int fromIndex,
309308
@Cached("symbol") String cachedSymbol,
310309
@Cached ExpectStringOrTruffleObjectNode expectStringOrTruffleObjectNode,
311310
@Cached ExecCompiledRegexNode execNode) throws UnsupportedMessageException, ArityException, UnsupportedTypeException {
312-
return execNode.execute(receiver.getExecBooleanCallTarget(), expectStringOrTruffleObjectNode.execute(input, encoding), fromIndex) != RegexResult.getNoMatchInstance();
311+
return execNode.execute(receiver.getExecBooleanCallTarget(), expectStringOrTruffleObjectNode.execute(input), fromIndex) != RegexResult.getNoMatchInstance();
313312
}
314313

315314
@SuppressWarnings("unused")
316315
@Specialization(guards = {"symbol.equals(cachedSymbol)", "cachedSymbol.equals(PROP_EXEC_BOOLEAN)"}, limit = N_METHODS, replaces = "execBooleanIdentity")
317-
boolean execBooleanEquals(String symbol, RegexObject receiver, Object input, int fromIndex, Encodings.Encoding encoding,
316+
boolean execBooleanEquals(String symbol, RegexObject receiver, Object input, int fromIndex,
318317
@Cached("symbol") String cachedSymbol,
319318
@Cached ExpectStringOrTruffleObjectNode expectStringOrTruffleObjectNode,
320319
@Cached ExecCompiledRegexNode execNode) throws UnsupportedMessageException, ArityException, UnsupportedTypeException {
321-
return execNode.execute(receiver.getExecBooleanCallTarget(), expectStringOrTruffleObjectNode.execute(input, encoding), fromIndex) != RegexResult.getNoMatchInstance();
320+
return execNode.execute(receiver.getExecBooleanCallTarget(), expectStringOrTruffleObjectNode.execute(input), fromIndex) != RegexResult.getNoMatchInstance();
322321
}
323322

324323
// DEPRECATED
325324
@SuppressWarnings("unused")
326325
@Specialization(guards = {"symbol == cachedSymbol", "cachedSymbol.equals(PROP_EXEC_BYTES)"}, limit = N_METHODS)
327-
Object execBytesIdentity(String symbol, RegexObject receiver, Object input, int fromIndex, @SuppressWarnings("unused") Encodings.Encoding encoding,
326+
Object execBytesIdentity(String symbol, RegexObject receiver, Object input, int fromIndex,
328327
@Cached("symbol") String cachedSymbol,
329328
@Cached ExpectByteArrayHostObjectNode expectByteArrayHostObjectNode,
330329
@Cached ExecCompiledRegexNode execNode) throws UnsupportedMessageException, ArityException, UnsupportedTypeException {
@@ -334,7 +333,7 @@ Object execBytesIdentity(String symbol, RegexObject receiver, Object input, int
334333
// DEPRECATED
335334
@SuppressWarnings("unused")
336335
@Specialization(guards = {"symbol.equals(cachedSymbol)", "cachedSymbol.equals(PROP_EXEC_BYTES)"}, limit = N_METHODS, replaces = "execBytesIdentity")
337-
Object execBytesEquals(String symbol, RegexObject receiver, Object input, int fromIndex, @SuppressWarnings("unused") Encodings.Encoding encoding,
336+
Object execBytesEquals(String symbol, RegexObject receiver, Object input, int fromIndex,
338337
@Cached("symbol") String cachedSymbol,
339338
@Cached ExpectByteArrayHostObjectNode expectByteArrayHostObjectNode,
340339
@Cached ExecCompiledRegexNode execNode) throws UnsupportedMessageException, ArityException, UnsupportedTypeException {
@@ -343,15 +342,15 @@ Object execBytesEquals(String symbol, RegexObject receiver, Object input, int fr
343342

344343
@ReportPolymorphism.Megamorphic
345344
@Specialization(replaces = {"execEquals", "execBooleanEquals", "execBytesEquals"})
346-
static Object invokeGeneric(String symbol, RegexObject receiver, Object input, int fromIndex, Encodings.Encoding encoding,
345+
static Object invokeGeneric(String symbol, RegexObject receiver, Object input, int fromIndex,
347346
@Cached ExpectStringOrTruffleObjectNode expectStringOrTruffleObjectNode,
348347
@Cached ExpectByteArrayHostObjectNode expectByteArrayHostObjectNode,
349348
@Cached ExecCompiledRegexNode execNode) throws UnsupportedMessageException, ArityException, UnsupportedTypeException, UnknownIdentifierException {
350349
switch (symbol) {
351350
case PROP_EXEC:
352-
return execNode.execute(receiver.getExecCallTarget(), expectStringOrTruffleObjectNode.execute(input, encoding), fromIndex);
351+
return execNode.execute(receiver.getExecCallTarget(), expectStringOrTruffleObjectNode.execute(input), fromIndex);
353352
case PROP_EXEC_BOOLEAN:
354-
return execNode.execute(receiver.getExecBooleanCallTarget(), expectStringOrTruffleObjectNode.execute(input, encoding), fromIndex) != RegexResult.getNoMatchInstance();
353+
return execNode.execute(receiver.getExecBooleanCallTarget(), expectStringOrTruffleObjectNode.execute(input), fromIndex) != RegexResult.getNoMatchInstance();
355354
case PROP_EXEC_BYTES:
356355
return execNode.execute(receiver.getExecCallTarget(), expectByteArrayHostObjectNode.execute(input), fromIndex);
357356
default:
@@ -389,7 +388,7 @@ Object execute(Object[] args,
389388
CompilerDirectives.transferToInterpreterAndInvalidate();
390389
throw ArityException.create(2, 2, args.length);
391390
}
392-
Object input = expectStringOrTruffleObjectNode.execute(args[0], regex.source.getEncoding());
391+
Object input = expectStringOrTruffleObjectNode.execute(args[0]);
393392
long fromIndex = toLongNode.execute(args[1]);
394393
if (fromIndex > Integer.MAX_VALUE) {
395394
return RegexResult.getNoMatchInstance();
@@ -432,7 +431,7 @@ boolean execute(Object[] args,
432431
CompilerDirectives.transferToInterpreterAndInvalidate();
433432
throw ArityException.create(2, 2, args.length);
434433
}
435-
Object input = expectStringOrTruffleObjectNode.execute(args[0], regex.source.getEncoding());
434+
Object input = expectStringOrTruffleObjectNode.execute(args[0]);
436435
long fromIndex = toLongNode.execute(args[1]);
437436
if (fromIndex > Integer.MAX_VALUE) {
438437
return false;

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/RegexOptions.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ private int parseEncoding(int i) throws RegexSyntaxException {
505505
encoding = Encodings.ASCII;
506506
return expectValue(iVal, Encodings.ASCII.getName(), Encodings.ALL_NAMES);
507507
case 'B':
508-
encoding = Encodings.LATIN_1;
508+
encoding = Encodings.BYTES;
509509
return expectValue(iVal, "BYTES", Encodings.ALL_NAMES);
510510
case 'L':
511511
encoding = Encodings.LATIN_1;

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/literal/LiteralRegexExecNode.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@ RegexResult doString(String input, int fromIndex) {
103103
}
104104

105105
@Specialization
106-
RegexResult doTString(TruffleString input, int fromIndex) {
106+
RegexResult doTString(TruffleString input, int fromIndex,
107+
@Cached TruffleString.MaterializeNode materializeNode) {
108+
materializeNode.execute(input, getEncoding().getTStringEncoding());
107109
return implNode.execute(input, fromIndex, getEncoding(), true);
108110
}
109111

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/runtime/nodes/ExpectStringOrTruffleObjectNode.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,36 +41,32 @@
4141
package com.oracle.truffle.regex.runtime.nodes;
4242

4343
import com.oracle.truffle.api.CompilerDirectives;
44-
import com.oracle.truffle.api.dsl.Cached;
4544
import com.oracle.truffle.api.dsl.GenerateUncached;
4645
import com.oracle.truffle.api.dsl.Specialization;
4746
import com.oracle.truffle.api.interop.InteropLibrary;
4847
import com.oracle.truffle.api.interop.UnsupportedMessageException;
4948
import com.oracle.truffle.api.interop.UnsupportedTypeException;
50-
import com.oracle.truffle.api.strings.TruffleString;
5149
import com.oracle.truffle.api.library.CachedLibrary;
5250
import com.oracle.truffle.api.nodes.Node;
53-
import com.oracle.truffle.regex.tregex.string.Encodings;
51+
import com.oracle.truffle.api.strings.TruffleString;
5452

5553
@GenerateUncached
5654
public abstract class ExpectStringOrTruffleObjectNode extends Node {
5755

58-
public abstract Object execute(Object arg, Encodings.Encoding encoding) throws UnsupportedTypeException;
56+
public abstract Object execute(Object arg) throws UnsupportedTypeException;
5957

6058
@Specialization
61-
static String doString(String input, @SuppressWarnings("unused") Encodings.Encoding encoding) {
59+
static String doString(String input) {
6260
return input;
6361
}
6462

6563
@Specialization
66-
static TruffleString doTString(TruffleString input, Encodings.Encoding encoding,
67-
@Cached TruffleString.MaterializeNode materializeNode) {
68-
materializeNode.execute(input, encoding.getTStringEncoding());
64+
static TruffleString doTString(TruffleString input) {
6965
return input;
7066
}
7167

7268
@Specialization(guards = "inputs.isString(input)", limit = "2")
73-
static String doBoxedString(Object input, @SuppressWarnings("unused") Encodings.Encoding encoding,
69+
static String doBoxedString(Object input,
7470
@CachedLibrary("input") InteropLibrary inputs) throws UnsupportedTypeException {
7571
try {
7672
return inputs.asString(input);
@@ -80,8 +76,9 @@ static String doBoxedString(Object input, @SuppressWarnings("unused") Encodings.
8076
}
8177
}
8278

79+
// Deprecated
8380
@Specialization(guards = "inputs.hasArrayElements(input)", limit = "2")
84-
static Object doBoxedCharArray(Object input, @SuppressWarnings("unused") Encodings.Encoding encoding,
81+
static Object doBoxedCharArray(Object input,
8582
@CachedLibrary("input") InteropLibrary inputs) throws UnsupportedTypeException {
8683
try {
8784
final long inputLength = inputs.getArraySize(input);

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/TRegexOptions.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import com.oracle.truffle.regex.tregex.nodes.dfa.DFACaptureGroupPartialTransition;
5151
import com.oracle.truffle.regex.tregex.nodes.dfa.TRegexDFAExecutorNode;
5252
import com.oracle.truffle.regex.tregex.nodes.dfa.TraceFinderDFAStateNode;
53+
import com.oracle.truffle.regex.tregex.nodes.nfa.TRegexBacktrackingNFAExecutorNode;
5354
import com.oracle.truffle.regex.tregex.nodesplitter.DFANodeSplit;
5455
import com.oracle.truffle.regex.tregex.parser.ast.Group;
5556
import com.oracle.truffle.regex.tregex.parser.ast.RegexAST;
@@ -219,6 +220,12 @@ public class TRegexOptions {
219220
*/
220221
public static final int TRegexMaxPureNFATransitions = 1_000_000;
221222

223+
/**
224+
* Maximum number of {@link PureNFA} states allowed to be exploded in
225+
* {@link TRegexBacktrackingNFAExecutorNode}.
226+
*/
227+
public static final int TRegexMaxBackTrackerMergeExplodeSize = 4_000;
228+
222229
static {
223230
assert TRegexTraceFinderMaxNumberOfResults <= 254;
224231
assert TRegexParserTreeMaxSize <= Integer.MAX_VALUE;

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/TRegexExecutorEntryNode.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,11 @@ Object doStringNonCompact(String input, int fromIndex, int index, int maxIndex)
131131

132132
@Specialization(guards = "codeRangeEqualsNode.execute(input, cachedCodeRange)", limit = "5")
133133
Object doTString(TruffleString input, int fromIndex, int index, int maxIndex,
134+
@Cached TruffleString.MaterializeNode materializeNode,
134135
@Cached @SuppressWarnings("unused") TruffleString.GetCodeRangeNode codeRangeNode,
135136
@Cached @SuppressWarnings("unused") TruffleString.CodeRangeEqualsNode codeRangeEqualsNode,
136137
@Cached("codeRangeNode.execute(input, executor.getEncoding().getTStringEncoding())") TruffleString.CodeRange cachedCodeRange) {
138+
materializeNode.execute(input, executor.getEncoding().getTStringEncoding());
137139
return executor.execute(executor.createLocals(input, fromIndex, index, maxIndex), cachedCodeRange, true);
138140
}
139141

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/TRegexExecutorNode.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ public int inputReadAndDecode(TRegexExecutorLocals locals, int index) {
195195
return codepoint | (c & (0xff >>> nBytes)) << (6 * (nBytes - 1));
196196
}
197197
} else {
198-
assert getEncoding() == Encodings.UTF_16_RAW || getEncoding() == Encodings.UTF_32 || getEncoding() == Encodings.LATIN_1 || getEncoding() == Encodings.ASCII;
198+
assert getEncoding() == Encodings.UTF_16_RAW || getEncoding() == Encodings.UTF_32 || getEncoding() == Encodings.LATIN_1 || getEncoding() == Encodings.BYTES ||
199+
getEncoding() == Encodings.ASCII;
199200
locals.setNextIndex(inputIncRaw(index));
200201
return inputReadRaw(locals);
201202
}
@@ -274,7 +275,8 @@ protected void inputSkipIntl(TRegexExecutorLocals locals, boolean forward) {
274275
} while (inputHasNext(locals, false) && inputUTF8IsTrailingByte(c));
275276
}
276277
} else {
277-
assert getEncoding() == Encodings.UTF_16_RAW || getEncoding() == Encodings.UTF_32 || getEncoding() == Encodings.LATIN_1 || getEncoding() == Encodings.ASCII;
278+
assert getEncoding() == Encodings.UTF_16_RAW || getEncoding() == Encodings.UTF_32 || getEncoding() == Encodings.LATIN_1 || getEncoding() == Encodings.BYTES ||
279+
getEncoding() == Encodings.ASCII;
278280
inputIncRaw(locals, forward);
279281
}
280282
}

0 commit comments

Comments
 (0)