Skip to content

Commit e15f073

Browse files
authored
[browser][non-icu] HybridGlobalization faster encoding for change case. (#85516)
* This speeds it up ~twice. * JS's decoder always changes the codepoint. * @kg's optimization idea.
1 parent 0be256e commit e15f073

File tree

3 files changed

+24
-25
lines changed

3 files changed

+24
-25
lines changed

src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -325,12 +325,14 @@ public void ToLower_Netcore(string name, string str, string expected)
325325
[Fact]
326326
public void ToLower_InvalidSurrogates()
327327
{
328+
bool usesTextDecoder = PlatformDetection.IsHybridGlobalizationOnBrowser && PlatformDetection.IsBrowserDomSupportedOrNodeJS;
329+
328330
// Invalid UTF-16 in a string (mismatched surrogate pairs) should be unchanged.
329331
foreach (string cultureName in new string[] { "", "en-US", "fr" })
330332
{
331-
ToLower(cultureName, "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY", "be careful, \uD83C\uD83C, this one is tricky");
332-
ToLower(cultureName, "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY", "be careful, \uDF08\uD83C, this one is tricky");
333-
ToLower(cultureName, "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY", "be careful, \uDF08\uDF08, this one is tricky");
333+
ToLower(cultureName, "\uD83C\uD83C", usesTextDecoder ? "\uFFFD\uFFFD" : "\uD83C\uD83C");
334+
ToLower(cultureName, "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY", usesTextDecoder ? "be careful, \uFFFD\uFFFD, this one is tricky" : "be careful, \uDF08\uD83C, this one is tricky");
335+
ToLower(cultureName, "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY", usesTextDecoder ? "be careful, \uFFFD\uFFFD, this one is tricky" : "be careful, \uDF08\uDF08, this one is tricky");
334336
}
335337
}
336338

@@ -452,12 +454,14 @@ public void ToUpper_netcore(string name, string str, string expected)
452454
[Fact]
453455
public void ToUpper_InvalidSurrogates()
454456
{
457+
bool usesTextDecoder = PlatformDetection.IsHybridGlobalizationOnBrowser && PlatformDetection.IsBrowserDomSupportedOrNodeJS;
458+
455459
// Invalid UTF-16 in a string (mismatched surrogate pairs) should be unchanged.
456460
foreach (string cultureName in new string[] { "", "en-US", "fr"})
457461
{
458-
ToUpper(cultureName, "be careful, \uD83C\uD83C, this one is tricky", "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY");
459-
ToUpper(cultureName, "be careful, \uDF08\uD83C, this one is tricky", "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY");
460-
ToUpper(cultureName, "be careful, \uDF08\uDF08, this one is tricky", "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY");
462+
ToUpper(cultureName, "be careful, \uD83C\uD83C, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY");
463+
ToUpper(cultureName, "be careful, \uDF08\uD83C, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY");
464+
ToUpper(cultureName, "be careful, \uDF08\uDF08, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY");
461465
}
462466
}
463467

src/mono/wasm/runtime/hybrid-globalization.ts

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,23 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4-
import { Module } from "./globals";
54
import { mono_wasm_new_external_root } from "./roots";
65
import { MonoString, MonoStringRef } from "./types";
76
import { Int32Ptr } from "./types/emscripten";
87
import { conv_string_root, js_string_to_mono_string_root, string_decoder } from "./strings";
9-
import { setU16 } from "./memory";
8+
import { setU16_unchecked } from "./memory";
109

11-
export function mono_wasm_change_case_invariant(exceptionMessage: Int32Ptr, src: number, srcLength: number, dst: number, dstLength: number, toUpper: number): void {
12-
try {
13-
const input = get_utf16_string(src, srcLength);
10+
export function mono_wasm_change_case_invariant(exceptionMessage: Int32Ptr, src: number, srcLength: number, dst: number, dstLength: number, toUpper: number) : void{
11+
try{
12+
const input = string_decoder.decode(<any>src, <any>(src + 2*srcLength));
1413
let result = toUpper ? input.toUpperCase() : input.toLowerCase();
1514
// Unicode defines some codepoints which expand into multiple codepoints,
1615
// originally we do not support this expansion
1716
if (result.length > dstLength)
1817
result = input;
1918

20-
for (let i = 0; i < result.length; i++)
21-
setU16(dst + i * 2, result.charCodeAt(i));
19+
for (let i = 0, j = dst; i < result.length; i++, j += 2)
20+
setU16_unchecked(j, result.charCodeAt(i));
2221
}
2322
catch (ex: any) {
2423
pass_exception_details(ex, exceptionMessage);
@@ -31,13 +30,13 @@ export function mono_wasm_change_case(exceptionMessage: Int32Ptr, culture: MonoS
3130
const cultureName = conv_string_root(cultureRoot);
3231
if (!cultureName)
3332
throw new Error("Cannot change case, the culture name is null.");
34-
const input = get_utf16_string(src, srcLength);
33+
const input = string_decoder.decode(<any>src, <any>(src + 2*srcLength));
3534
let result = toUpper ? input.toLocaleUpperCase(cultureName) : input.toLocaleLowerCase(cultureName);
3635
if (result.length > destLength)
3736
result = input;
3837

39-
for (let i = 0; i < destLength; i++)
40-
setU16(dst + i * 2, result.charCodeAt(i));
38+
for (let i = 0, j = dst; i < result.length; i++, j += 2)
39+
setU16_unchecked(j, result.charCodeAt(i));
4140
}
4241
catch (ex: any) {
4342
pass_exception_details(ex, exceptionMessage);
@@ -47,14 +46,6 @@ export function mono_wasm_change_case(exceptionMessage: Int32Ptr, culture: MonoS
4746
}
4847
}
4948

50-
function get_utf16_string(ptr: number, length: number): string {
51-
const view = new Uint16Array(Module.HEAPU16.buffer, ptr, length);
52-
let string = "";
53-
for (let i = 0; i < length; i++)
54-
string += String.fromCharCode(view[i]);
55-
return string;
56-
}
57-
5849
export function mono_wasm_compare_string(exceptionMessage: Int32Ptr, culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number): number {
5950
const cultureRoot = mono_wasm_new_external_root<MonoString>(culture);
6051
try {

src/mono/wasm/runtime/memory.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
import monoWasmThreads from "consts:monoWasmThreads";
5-
import { Module, runtimeHelpers } from "./globals";
65
import { mono_assert, MemOffset, NumberOrPointer } from "./types";
76
import { VoidPtr, CharPtr } from "./types/emscripten";
87
import cwraps, { I52Error } from "./cwraps";
8+
import { Module, runtimeHelpers } from "./globals";
99

1010
const alloca_stack: Array<VoidPtr> = [];
1111
const alloca_buffer_size = 32 * 1024;
@@ -73,6 +73,10 @@ export function setU16(offset: MemOffset, value: number): void {
7373
Module.HEAPU16[<any>offset >>> 1] = value;
7474
}
7575

76+
export function setU16_unchecked(offset: MemOffset, value: number): void {
77+
Module.HEAPU16[<any>offset >>> 1] = value;
78+
}
79+
7680
export function setU32_unchecked(offset: MemOffset, value: NumberOrPointer): void {
7781
Module.HEAPU32[<any>offset >>> 2] = <number><any>value;
7882
}

0 commit comments

Comments
 (0)