Skip to content

Commit 29fa37d

Browse files
committed
TruffleStrings: docs and style cleanup
1 parent 7d9dacd commit 29fa37d

File tree

10 files changed

+126
-136
lines changed

10 files changed

+126
-136
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/jmh/TRegexVSJavaBenchmarks.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ public void setUp() {
139139
context.enter();
140140
ParameterSet p = benchmarks.get(benchName);
141141
javaPattern = Pattern.compile(p.regex, toJavaFlags(p.flags));
142-
tregexBool = context.parse(TRegexTestDummyLanguage.ID, "__BENCH__GenerateDFAImmediately=true/" + p.regex + '/' + p.flags);
143-
tregexCG = context.parse(TRegexTestDummyLanguage.ID, "__BENCH_CG__GenerateDFAImmediately=true/" + p.regex + '/' + p.flags);
142+
tregexBool = context.parse(TRegexTestDummyLanguage.ID, TRegexTestDummyLanguage.BENCH_PREFIX + "GenerateDFAImmediately=true/" + p.regex + '/' + p.flags);
143+
tregexCG = context.parse(TRegexTestDummyLanguage.ID, TRegexTestDummyLanguage.BENCH_CG_PREFIX + "GenerateDFAImmediately=true/" + p.regex + '/' + p.flags);
144144
input = "_".repeat(200) + p.input;
145145
}
146146

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/TRegexTestDummyLanguage.java

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
import com.oracle.truffle.api.nodes.Node;
5757
import com.oracle.truffle.api.nodes.RootNode;
5858
import com.oracle.truffle.api.source.Source;
59-
import com.oracle.truffle.api.strings.TruffleString;
6059
import com.oracle.truffle.regex.RegexLanguage;
6160

6261
@TruffleLanguage.Registration(name = TRegexTestDummyLanguage.NAME, id = TRegexTestDummyLanguage.ID, characterMimeTypes = TRegexTestDummyLanguage.MIME_TYPE, version = "0.1", dependentLanguages = RegexLanguage.ID)
@@ -65,14 +64,15 @@ public class TRegexTestDummyLanguage extends TruffleLanguage<TRegexTestDummyLang
6564
public static final String NAME = "REGEXDUMMYLANG";
6665
public static final String ID = "regexDummyLang";
6766
public static final String MIME_TYPE = "application/tregexdummy";
67+
public static final String BENCH_PREFIX = "__BENCH__";
68+
public static final String BENCH_CG_PREFIX = "__BENCH_CG__";
6869

6970
@Override
7071
protected CallTarget parse(ParsingRequest parsingRequest) {
7172
String src = parsingRequest.getSource().getCharacters().toString();
72-
String benchPrefix = "__BENCH__";
73-
if (src.startsWith(benchPrefix)) {
73+
if (src.startsWith(BENCH_PREFIX)) {
7474
final Object regex = DummyLanguageContext.get(null).getEnv().parseInternal(
75-
Source.newBuilder(RegexLanguage.ID, "BooleanMatch=true," + src.substring(benchPrefix.length()), parsingRequest.getSource().getName()).internal(true).build()).call();
75+
Source.newBuilder(RegexLanguage.ID, "BooleanMatch=true," + src.substring(BENCH_PREFIX.length()), parsingRequest.getSource().getName()).internal(true).build()).call();
7676
return new RootNode(this) {
7777

7878
private final Object compiledRegex = regex;
@@ -86,10 +86,9 @@ public Object execute(VirtualFrame frame) {
8686
}
8787
}.getCallTarget();
8888
}
89-
String benchCGPrefix = "__BENCH_CG__";
90-
if (src.startsWith(benchCGPrefix)) {
89+
if (src.startsWith(BENCH_CG_PREFIX)) {
9190
final Object regex = DummyLanguageContext.get(null).getEnv().parseInternal(
92-
Source.newBuilder(RegexLanguage.ID, src.substring(benchCGPrefix.length()), parsingRequest.getSource().getName()).internal(true).build()).call();
91+
Source.newBuilder(RegexLanguage.ID, src.substring(BENCH_CG_PREFIX.length()), parsingRequest.getSource().getName()).internal(true).build()).call();
9392
return new RootNode(this) {
9493

9594
private final Object compiledRegex = regex;
@@ -116,10 +115,9 @@ abstract static class RegexBenchNode extends Node {
116115

117116
@Specialization(guards = "objs.isMemberInvocable(compiledRegex, EXEC)", limit = "3")
118117
static boolean run(Object compiledRegex, String input, int fromIndex,
119-
@Cached(inline = false) TruffleString.FromJavaStringNode fromJavaStringNode,
120118
@CachedLibrary("compiledRegex") InteropLibrary objs) {
121119
try {
122-
return (boolean) objs.invokeMember(compiledRegex, EXEC, fromJavaStringNode.execute(input, TruffleString.Encoding.UTF_16), fromIndex);
120+
return (boolean) objs.invokeMember(compiledRegex, EXEC, input, fromIndex);
123121
} catch (UnsupportedMessageException | UnsupportedTypeException | ArityException | UnknownIdentifierException e) {
124122
throw CompilerDirectives.shouldNotReachHere(e);
125123
}
@@ -135,14 +133,13 @@ abstract static class RegexBenchCGNode extends Node {
135133

136134
@Specialization(guards = "objs.isMemberInvocable(compiledRegex, EXEC)", limit = "3")
137135
static int run(Node node, Object compiledRegex, String input, int fromIndex,
138-
@Cached(inline = false) TruffleString.FromJavaStringNode fromJavaStringNode,
139136
@CachedLibrary("compiledRegex") InteropLibrary objs,
140137
@Cached RegexBenchCGGetStartNode getStart0,
141138
@Cached RegexBenchCGGetStartNode getStart1,
142139
@Cached RegexBenchCGGetEndNode getEnd0,
143140
@Cached RegexBenchCGGetEndNode getEnd1) {
144141
try {
145-
Object result = objs.invokeMember(compiledRegex, EXEC, fromJavaStringNode.execute(input, TruffleString.Encoding.UTF_16), fromIndex);
142+
Object result = objs.invokeMember(compiledRegex, EXEC, input, fromIndex);
146143
int start0 = getStart0.execute(node, result, 0);
147144
int end0 = getEnd0.execute(node, result, 0);
148145
int start1 = getStart1.execute(node, result, 1);

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/dfa/DFAGenerator.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ public final class DFAGenerator implements JsonConvertible {
148148
private TRegexDFAExecutorNode innerLiteralPrefixMatcher = null;
149149

150150
private final SequentialMatchers.Builder matchersBuilder;
151-
private final List<TruffleString.ByteIndexOfCodePointSetNode.CodePointSet> indexOfParams = new ArrayList<>();
151+
private final List<TruffleString.CodePointSet> indexOfParams = new ArrayList<>();
152152

153153
public DFAGenerator(TRegexCompilationRequest compilationRequest, NFA nfa, TRegexDFAExecutorProperties executorProps, CompilationBuffer compilationBuffer) {
154154
this.compilationRequest = compilationRequest;
@@ -338,7 +338,7 @@ public TRegexDFAExecutorNode createDFAExecutor() {
338338
executorProps.setSimpleCGMustCopy(simpleCGMustCopy);
339339
TRegexDFAExecutorDebugRecorder debugRecorder = TRegexDFAExecutorDebugRecorder.create(getOptions(), this);
340340
return new TRegexDFAExecutorNode(nfa.getAst().getSource(), executorProps, getNfa().getAst().getNumberOfCaptureGroups(), maxNumberOfNfaStates,
341-
indexOfParams.toArray(TruffleString.ByteIndexOfCodePointSetNode.CodePointSet[]::new), states,
341+
indexOfParams.toArray(TruffleString.CodePointSet[]::new), states,
342342
debugRecorder, innerLiteralPrefixMatcher);
343343
}
344344

@@ -861,7 +861,7 @@ private DFAAbstractStateNode[] createDFAExecutorStates() {
861861
loopToSelf = (short) i;
862862
CodePointSet loopMB = s.getSuccessors()[i].getCodePointSet();
863863
if (coversCharSpace && !loopMB.matchesEverything(getEncoding())) {
864-
TruffleString.ByteIndexOfCodePointSetNode.CodePointSet indexOfParam = TruffleString.ByteIndexOfCodePointSetNode.CodePointSet.fromRanges(
864+
TruffleString.CodePointSet indexOfParam = TruffleString.CodePointSet.fromRanges(
865865
loopMB.createInverse(getEncoding()).getRanges(), getEncoding().getTStringEncoding());
866866
indexOfIsFast = checkIndexOfIsFast(indexOfParam);
867867
if (indexOfIsFast != 0) {
@@ -907,7 +907,7 @@ private DFAAbstractStateNode[] createDFAExecutorStates() {
907907
return ret;
908908
}
909909

910-
private static byte checkIndexOfIsFast(TruffleString.ByteIndexOfCodePointSetNode.CodePointSet indexOfParam) {
910+
private static byte checkIndexOfIsFast(TruffleString.CodePointSet indexOfParam) {
911911
byte indexOfIsFast = 0;
912912
for (TruffleString.CodeRange codeRange : TruffleString.CodeRange.values()) {
913913
if (indexOfParam.isIntrinsicCandidate(codeRange)) {

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/dfa/DFAStateNode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ void beforeFindSuccessor(TRegexDFAExecutorLocals locals, TRegexDFAExecutorNode e
183183

184184
/**
185185
* Gets called after every call to
186-
* {@link InputIndexOfNode#execute(TruffleString, int, int, TruffleString.ByteIndexOfCodePointSetNode.CodePointSet, Encodings.Encoding)},
186+
* {@link InputIndexOfNode#execute(TruffleString, int, int, TruffleString.CodePointSet, Encodings.Encoding)},
187187
* which we call an {@code indexOf}-operation.
188188
*
189189
* @param preLoopIndex the starting index of the {@code indexOf}-operation.

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/dfa/TRegexDFAExecutorNode.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public final class TRegexDFAExecutorNode extends TRegexExecutorNode {
7272
public static final int NO_MATCH = -2;
7373
private final TRegexDFAExecutorProperties props;
7474
private final int maxNumberOfNFAStates;
75-
@CompilationFinal(dimensions = 1) private final TruffleString.ByteIndexOfCodePointSetNode.CodePointSet[] indexOfParameters;
75+
@CompilationFinal(dimensions = 1) private final TruffleString.CodePointSet[] indexOfParameters;
7676
@CompilationFinal(dimensions = 1) private final DFAAbstractStateNode[] states;
7777
@CompilationFinal(dimensions = 1) private final int[] cgResultOrder;
7878
private final TRegexDFAExecutorDebugRecorder debugRecorder;
@@ -86,7 +86,7 @@ public TRegexDFAExecutorNode(
8686
TRegexDFAExecutorProperties props,
8787
int numberOfCaptureGroups,
8888
int maxNumberOfNFAStates,
89-
TruffleString.ByteIndexOfCodePointSetNode.CodePointSet[] indexOfParameters,
89+
TruffleString.CodePointSet[] indexOfParameters,
9090
DFAAbstractStateNode[] states,
9191
TRegexDFAExecutorDebugRecorder debugRecorder,
9292
TRegexDFAExecutorNode innerLiteralPrefixMatcher) {
@@ -101,7 +101,7 @@ public TRegexDFAExecutorNode(
101101
int numberOfCaptureGroups,
102102
int numberOfTransitions,
103103
int maxNumberOfNFAStates,
104-
TruffleString.ByteIndexOfCodePointSetNode.CodePointSet[] indexOfParameters,
104+
TruffleString.CodePointSet[] indexOfParameters,
105105
DFAAbstractStateNode[] states,
106106
int[] cgResultOrder,
107107
TRegexDFAExecutorDebugRecorder debugRecorder,
@@ -371,7 +371,7 @@ public Object execute(VirtualFrame frame, final TRegexExecutorLocals abstractLoc
371371
if (injectBranchProbability(CONTINUE_PROBABILITY, isForward() && state.canDoIndexOf(codeRange) && inputHasNext(locals))) {
372372
int indexOfNodeId = state.getIndexOfNodeId();
373373
InputIndexOfNode indexOfNode = getIndexOfNode(indexOfNodeId);
374-
TruffleString.ByteIndexOfCodePointSetNode.CodePointSet indexOfParameter = indexOfParameters[indexOfNodeId];
374+
TruffleString.CodePointSet indexOfParameter = indexOfParameters[indexOfNodeId];
375375
CompilerAsserts.partialEvaluationConstant(indexOfNodeId);
376376
CompilerAsserts.partialEvaluationConstant(indexOfNode);
377377
CompilerAsserts.partialEvaluationConstant(indexOfParameter);

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/input/InputIndexOfNode.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@
4949

5050
public abstract class InputIndexOfNode extends Node {
5151

52-
public abstract int execute(TruffleString input, int fromIndex, int maxIndex, TruffleString.ByteIndexOfCodePointSetNode.CodePointSet codePointSet, Encoding encoding);
52+
public abstract int execute(TruffleString input, int fromIndex, int maxIndex, TruffleString.CodePointSet codePointSet, Encoding encoding);
5353

5454
@Specialization
55-
public int doTString(TruffleString input, int fromIndex, int maxIndex, TruffleString.ByteIndexOfCodePointSetNode.CodePointSet codePointSet, Encoding encoding,
55+
public int doTString(TruffleString input, int fromIndex, int maxIndex, TruffleString.CodePointSet codePointSet, Encoding encoding,
5656
@Cached TruffleString.ByteIndexOfCodePointSetNode indexOfNode) {
5757
CompilerAsserts.partialEvaluationConstant(codePointSet);
5858
return indexOfNode.execute(input, fromIndex << encoding.getStride(), maxIndex << encoding.getStride(), codePointSet) >> encoding.getStride();

truffle/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ This changelog summarizes major changes between Truffle versions relevant to lan
4646
* GR-42539 (change of behavior) Unclosed polyglot engines are no longer closed automatically on VM shutdown. They just die with the VM. As a result, `TruffleInstrument#onDispose` is not called for active instruments on unclosed engines in the event of VM shutdown. In case an instrument is supposed to do some specific action before its disposal, e.g. print some kind of summary, it should be done in `TruffleInstrument#onFinalize`.
4747
* GR-42961 Added `TruffleString.ByteIndexOfCodePointSetNode`, which allows fast searching for a given set of codepoints.
4848
* GR-42961 Added `TruffleString.GetCodeRangeImpreciseNode`, which allows querying the currently known code range without triggering a string scan.
49+
* GR-42961 `TruffleString.FromJavaStringNode` no longer eagerly scans strings for their code range. To still get eager scanning of constant strings, use `fromConstant(String)`.
4950

5051
## Version 22.3.0
5152

truffle/src/com.oracle.truffle.api.strings.test/src/com/oracle/truffle/api/strings/test/ops/TStringByteIndexOfCodePointSetTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ public void testAll() throws Exception {
195195
if (!isUTF(encoding) && ranges[ranges.length - 1] > 0x7f) {
196196
continue;
197197
}
198-
TruffleString.ByteIndexOfCodePointSetNode.CodePointSet codePointSet = TruffleString.ByteIndexOfCodePointSetNode.CodePointSet.fromRanges(ranges, encoding);
198+
TruffleString.CodePointSet codePointSet = TruffleString.CodePointSet.fromRanges(ranges, encoding);
199199
for (int i = 0; i < strings.length; i++) {
200200
int expected = indexOfRanges(codepoints[i], ranges, byteIndices[i]);
201201
int actual = node.execute(strings[i], 0, strings[i].byteLength(encoding), codePointSet);
@@ -226,13 +226,13 @@ private static int indexOfRanges(int[] codepoints, int[] ranges, int[] byteIndic
226226

227227
@Test
228228
public void testNull() throws Exception {
229-
checkNullSE((s, e) -> node.execute(s, 0, 1, TruffleString.ByteIndexOfCodePointSetNode.CodePointSet.fromRanges(new int[]{0, 0}, e)));
229+
checkNullSE((s, e) -> node.execute(s, 0, 1, TruffleString.CodePointSet.fromRanges(new int[]{0, 0}, e)));
230230
expectNullPointerException(() -> node.execute(S_UTF8, 0, 1, null));
231231
}
232232

233233
@Test
234234
public void testOutOfBounds() throws Exception {
235235
checkOutOfBoundsFromTo(true, 0, Encodings.PRIMARY_ENCODINGS,
236-
(a, fromIndex, toIndex, encoding) -> node.execute(a, fromIndex, toIndex, TruffleString.ByteIndexOfCodePointSetNode.CodePointSet.fromRanges(new int[]{0, 0}, encoding)));
236+
(a, fromIndex, toIndex, encoding) -> node.execute(a, fromIndex, toIndex, TruffleString.CodePointSet.fromRanges(new int[]{0, 0}, encoding)));
237237
}
238238
}

truffle/src/com.oracle.truffle.api.strings/snapshot.sigtest

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,6 @@ CLSS public abstract interface !annotation com.oracle.truffle.api.dsl.ImportStat
4141
intf java.lang.annotation.Annotation
4242
meth public abstract java.lang.Class<?>[] value()
4343

44-
CLSS public abstract interface !annotation com.oracle.truffle.api.dsl.ReportPolymorphism
45-
anno 0 java.lang.annotation.Inherited()
46-
anno 0 java.lang.annotation.Retention(java.lang.annotation.RetentionPolicy value=CLASS)
47-
anno 0 java.lang.annotation.Target(java.lang.annotation.ElementType[] value=[TYPE])
48-
innr public abstract interface static !annotation Exclude
49-
innr public abstract interface static !annotation Megamorphic
50-
intf java.lang.annotation.Annotation
51-
5244
CLSS public abstract com.oracle.truffle.api.nodes.Node
5345
cons protected init()
5446
innr public abstract interface static !annotation Child
@@ -343,6 +335,7 @@ innr public final static !enum CodeRange
343335
innr public final static !enum CompactionLevel
344336
innr public final static !enum Encoding
345337
innr public final static !enum ErrorHandling
338+
innr public final static CodePointSet
346339
innr public final static IllegalByteArrayLengthException
347340
innr public final static NumberFormatException
348341
innr public final static WithMask
@@ -404,19 +397,11 @@ supr com.oracle.truffle.api.nodes.Node
404397

405398
CLSS public abstract static com.oracle.truffle.api.strings.TruffleString$ByteIndexOfCodePointSetNode
406399
outer com.oracle.truffle.api.strings.TruffleString
407-
innr public final static CodePointSet
408-
meth public abstract int execute(com.oracle.truffle.api.strings.AbstractTruffleString,int,int,com.oracle.truffle.api.strings.TruffleString$ByteIndexOfCodePointSetNode$CodePointSet)
400+
meth public abstract int execute(com.oracle.truffle.api.strings.AbstractTruffleString,int,int,com.oracle.truffle.api.strings.TruffleString$CodePointSet)
409401
meth public static com.oracle.truffle.api.strings.TruffleString$ByteIndexOfCodePointSetNode create()
410402
meth public static com.oracle.truffle.api.strings.TruffleString$ByteIndexOfCodePointSetNode getUncached()
411403
supr com.oracle.truffle.api.nodes.Node
412404

413-
CLSS public final static com.oracle.truffle.api.strings.TruffleString$ByteIndexOfCodePointSetNode$CodePointSet
414-
outer com.oracle.truffle.api.strings.TruffleString$ByteIndexOfCodePointSetNode
415-
meth public boolean isIntrinsicCandidate(com.oracle.truffle.api.strings.TruffleString$CodeRange)
416-
meth public static com.oracle.truffle.api.strings.TruffleString$ByteIndexOfCodePointSetNode$CodePointSet fromRanges(int[],com.oracle.truffle.api.strings.TruffleString$Encoding)
417-
supr java.lang.Object
418-
hfds encoding,indexOfNodes,ranges
419-
420405
CLSS public abstract static com.oracle.truffle.api.strings.TruffleString$ByteIndexOfStringNode
421406
outer com.oracle.truffle.api.strings.TruffleString
422407
meth public final int execute(com.oracle.truffle.api.strings.AbstractTruffleString,com.oracle.truffle.api.strings.AbstractTruffleString,int,int,com.oracle.truffle.api.strings.TruffleString$Encoding)
@@ -477,6 +462,13 @@ meth public static com.oracle.truffle.api.strings.TruffleString$CodePointLengthN
477462
meth public static com.oracle.truffle.api.strings.TruffleString$CodePointLengthNode getUncached()
478463
supr com.oracle.truffle.api.nodes.Node
479464

465+
CLSS public final static com.oracle.truffle.api.strings.TruffleString$CodePointSet
466+
outer com.oracle.truffle.api.strings.TruffleString
467+
meth public boolean isIntrinsicCandidate(com.oracle.truffle.api.strings.TruffleString$CodeRange)
468+
meth public static com.oracle.truffle.api.strings.TruffleString$CodePointSet fromRanges(int[],com.oracle.truffle.api.strings.TruffleString$Encoding)
469+
supr java.lang.Object
470+
hfds encoding,indexOfNodes,ranges
471+
480472
CLSS public final static !enum com.oracle.truffle.api.strings.TruffleString$CodeRange
481473
outer com.oracle.truffle.api.strings.TruffleString
482474
fld public final static com.oracle.truffle.api.strings.TruffleString$CodeRange ASCII
@@ -1256,12 +1248,6 @@ CLSS public abstract interface !annotation java.lang.annotation.Documented
12561248
anno 0 java.lang.annotation.Target(java.lang.annotation.ElementType[] value=[ANNOTATION_TYPE])
12571249
intf java.lang.annotation.Annotation
12581250

1259-
CLSS public abstract interface !annotation java.lang.annotation.Inherited
1260-
anno 0 java.lang.annotation.Documented()
1261-
anno 0 java.lang.annotation.Retention(java.lang.annotation.RetentionPolicy value=RUNTIME)
1262-
anno 0 java.lang.annotation.Target(java.lang.annotation.ElementType[] value=[ANNOTATION_TYPE])
1263-
intf java.lang.annotation.Annotation
1264-
12651251
CLSS public abstract interface !annotation java.lang.annotation.Retention
12661252
anno 0 java.lang.annotation.Documented()
12671253
anno 0 java.lang.annotation.Retention(java.lang.annotation.RetentionPolicy value=RUNTIME)

0 commit comments

Comments
 (0)