Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions stdlib/public/core/StringGraphemeBreaking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -669,8 +669,12 @@ extension _GraphemeBreakingState {
}

let x = Unicode._GraphemeBreakProperty(from: scalar1)
let y = Unicode._GraphemeBreakProperty(from: scalar2)


// GB4 handled here because we don't need to know `y` for this csae
if x == .control {
return true
}

// This variable and the defer statement help toggle the isInEmojiSequence
// state variable to false after every decision of 'shouldBreak'. If we
// happen to see a rhs .extend or .zwj, then it's a signal that we should
Expand All @@ -684,6 +688,8 @@ extension _GraphemeBreakingState {
self.isInEmojiSequence = enterEmojiSequence
self.isInIndicSequence = enterIndicSequence
}

let y = Unicode._GraphemeBreakProperty(from: scalar2)

switch (x, y) {

Expand All @@ -692,9 +698,7 @@ extension _GraphemeBreakingState {
case (.any, .any):
return true

// GB4
case (.control, _):
return true
// (GB4 is handled above)

// GB5
case (_, .control):
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/UnicodeBreakProperty.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extension Unicode {
case v
case zwj

@inline(__always)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of making sure this gets inlined is to allow the compiler enough visibility to combine the switch statement here with the switch statement in the caller, rather than going switch -> flag -> switch

init(from scalar: Unicode.Scalar) {
switch scalar.value {
// Some fast paths for ascii characters...
Expand Down
166 changes: 83 additions & 83 deletions stdlib/public/stubs/Unicode/Common/GraphemeData.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 - 2023 Apple Inc. and the Swift project authors
// Copyright (c) 2021-2024 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
Expand All @@ -18,89 +18,89 @@

#include "swift/shims/SwiftStdint.h"

#define GRAPHEME_BREAK_DATA_COUNT 637
#define GRAPHEME_BREAK_DATA_COUNT 638

static const __swift_uint32_t _swift_stdlib_graphemeBreakProperties[637] = {
0x3E00000, 0x400007F, 0x800000A9, 0xAD, 0x800000AE, 0x2DE00300, 0x20C00483, 0x25800591,
0x200005BF, 0x202005C1, 0x202005C4, 0x200005C7, 0x40A00600, 0x21400610, 0x61C, 0x2280064B,
0x20000670, 0x20C006D6, 0x400006DD, 0x20A006DF, 0x202006E7, 0x206006EA, 0x4000070F, 0x20000711,
0x23400730, 0x214007A6, 0x210007EB, 0x200007FD, 0x20600816, 0x2100081B, 0x20400825, 0x20800829,
0x20400859, 0x40200890, 0x20E00898, 0x22E008CA, 0x400008E2, 0x23E008E3, 0x60000903, 0x2000093A,
0x6000093B, 0x2000093C, 0x6040093E, 0x20E00941, 0x60600949, 0x2000094D, 0x6020094E, 0x20C00951,
0x20200962, 0x20000981, 0x60200982, 0x200009BC, 0x200009BE, 0x602009BF, 0x206009C1, 0x602009C7,
0x602009CB, 0x200009CD, 0x200009D7, 0x202009E2, 0x200009FE, 0x20200A01, 0x60000A03, 0x20000A3C,
0x60400A3E, 0x20200A41, 0x20200A47, 0x20400A4B, 0x20000A51, 0x20200A70, 0x20000A75, 0x20200A81,
0x60000A83, 0x20000ABC, 0x60400ABE, 0x20800AC1, 0x20200AC7, 0x60000AC9, 0x60200ACB, 0x20000ACD,
0x20200AE2, 0x20A00AFA, 0x20000B01, 0x60200B02, 0x20000B3C, 0x20200B3E, 0x60000B40, 0x20600B41,
0x60200B47, 0x60200B4B, 0x20000B4D, 0x20400B55, 0x20200B62, 0x20000B82, 0x20000BBE, 0x60000BBF,
0x20000BC0, 0x60200BC1, 0x60400BC6, 0x60400BCA, 0x20000BCD, 0x20000BD7, 0x20000C00, 0x60400C01,
0x20000C04, 0x20000C3C, 0x20400C3E, 0x60600C41, 0x20400C46, 0x20600C4A, 0x20200C55, 0x20200C62,
0x20000C81, 0x60200C82, 0x20000CBC, 0x60000CBE, 0x20000CBF, 0x60200CC0, 0x20000CC2, 0x60200CC3,
0x20000CC6, 0x60200CC7, 0x60200CCA, 0x20200CCC, 0x20200CD5, 0x20200CE2, 0x60000CF3, 0x20200D00,
0x60200D02, 0x20200D3B, 0x20000D3E, 0x60200D3F, 0x20600D41, 0x60400D46, 0x60400D4A, 0x20000D4D,
0x40000D4E, 0x20000D57, 0x20200D62, 0x20000D81, 0x60200D82, 0x20000DCA, 0x20000DCF, 0x60200DD0,
0x20400DD2, 0x20000DD6, 0x60C00DD8, 0x20000DDF, 0x60200DF2, 0x20000E31, 0x60000E33, 0x20C00E34,
0x20E00E47, 0x20000EB1, 0x60000EB3, 0x21000EB4, 0x20C00EC8, 0x20200F18, 0x20000F35, 0x20000F37,
0x20000F39, 0x60200F3E, 0x21A00F71, 0x60000F7F, 0x20800F80, 0x20200F86, 0x21400F8D, 0x24600F99,
0x20000FC6, 0x2060102D, 0x60001031, 0x20A01032, 0x20201039, 0x6020103B, 0x2020103D, 0x60201056,
0x20201058, 0x2040105E, 0x20601071, 0x20001082, 0x60001084, 0x20201085, 0x2000108D, 0x2000109D,
0x2040135D, 0x20401712, 0x60001715, 0x20201732, 0x60001734, 0x20201752, 0x20201772, 0x202017B4,
0x600017B6, 0x20C017B7, 0x60E017BE, 0x200017C6, 0x602017C7, 0x214017C9, 0x200017DD, 0x2040180B,
0x180E, 0x2000180F, 0x20201885, 0x200018A9, 0x20401920, 0x60601923, 0x20201927, 0x60401929,
0x60201930, 0x20001932, 0x60A01933, 0x20401939, 0x20201A17, 0x60201A19, 0x20001A1B, 0x60001A55,
0x20001A56, 0x60001A57, 0x20C01A58, 0x20001A60, 0x20001A62, 0x20E01A65, 0x60A01A6D, 0x21201A73,
0x20001A7F, 0x23C01AB0, 0x20601B00, 0x60001B04, 0x20C01B34, 0x60001B3B, 0x20001B3C, 0x60801B3D,
0x20001B42, 0x60201B43, 0x21001B6B, 0x20201B80, 0x60001B82, 0x60001BA1, 0x20601BA2, 0x60201BA6,
0x20201BA8, 0x60001BAA, 0x20401BAB, 0x20001BE6, 0x60001BE7, 0x20201BE8, 0x60401BEA, 0x20001BED,
0x60001BEE, 0x20401BEF, 0x60201BF2, 0x60E01C24, 0x20E01C2C, 0x60201C34, 0x20201C36, 0x20401CD0,
0x21801CD4, 0x60001CE1, 0x20C01CE2, 0x20001CED, 0x20001CF4, 0x60001CF7, 0x20201CF8, 0x27E01DC0,
0x200B, 0x2000200C, 0x20200E, 0xC02028, 0x8000203C, 0x80002049, 0x1E02060, 0x240020D0, 0x80002122,
0x80002139, 0x80A02194, 0x802021A9, 0x8020231A, 0x80002328, 0x80002388, 0x800023CF, 0x814023E9,
0x804023F8, 0x800024C2, 0x802025AA, 0x800025B6, 0x800025C0, 0x806025FB, 0x80A02600, 0x81602607,
0x8E202614, 0x8EA02690, 0x81402708, 0x80002714, 0x80002716, 0x8000271D, 0x80002721, 0x80002728,
0x80202733, 0x80002744, 0x80002747, 0x8000274C, 0x8000274E, 0x80402753, 0x80002757, 0x80802763,
0x80402795, 0x800027A1, 0x800027B0, 0x800027BF, 0x80202934, 0x80402B05, 0x80202B1B, 0x80002B50,
0x80002B55, 0x20402CEF, 0x20002D7F, 0x23E02DE0, 0x20A0302A, 0x80003030, 0x8000303D, 0x20203099,
0x80003297, 0x80003299, 0x2060A66F, 0x2120A674, 0x2020A69E, 0x2020A6F0, 0x2000A802, 0x2000A806,
0x2000A80B, 0x6020A823, 0x2020A825, 0x6000A827, 0x2000A82C, 0x6020A880, 0x61E0A8B4, 0x2020A8C4,
0x2220A8E0, 0x2000A8FF, 0x20E0A926, 0x2140A947, 0x6020A952, 0x2040A980, 0x6000A983, 0x2000A9B3,
0x6020A9B4, 0x2060A9B6, 0x6020A9BA, 0x2020A9BC, 0x6040A9BE, 0x2000A9E5, 0x20A0AA29, 0x6020AA2F,
0x2020AA31, 0x6020AA33, 0x2020AA35, 0x2000AA43, 0x2000AA4C, 0x6000AA4D, 0x2000AA7C, 0x2000AAB0,
0x2040AAB2, 0x2020AAB7, 0x2020AABE, 0x2000AAC1, 0x6000AAEB, 0x2020AAEC, 0x6020AAEE, 0x6000AAF5,
0x2000AAF6, 0x6020ABE3, 0x2000ABE5, 0x6020ABE6, 0x2000ABE8, 0x6020ABE9, 0x6000ABEC, 0x2000ABED,
0x2000FB1E, 0x21E0FE00, 0x21E0FE20, 0xFEFF, 0x2020FF9E, 0x160FFF0, 0x200101FD, 0x200102E0,
0x20810376, 0x20410A01, 0x20210A05, 0x20610A0C, 0x20410A38, 0x20010A3F, 0x20210AE5, 0x20610D24,
0x20210EAB, 0x20410EFD, 0x21410F46, 0x20610F82, 0x60011000, 0x20011001, 0x60011002, 0x21C11038,
0x20011070, 0x20211073, 0x2041107F, 0x60011082, 0x604110B0, 0x206110B3, 0x602110B7, 0x202110B9,
0x400110BD, 0x200110C2, 0x400110CD, 0x20411100, 0x20811127, 0x6001112C, 0x20E1112D, 0x60211145,
0x20011173, 0x20211180, 0x60011182, 0x604111B3, 0x210111B6, 0x602111BF, 0x402111C2, 0x206111C9,
0x600111CE, 0x200111CF, 0x6041122C, 0x2041122F, 0x60211232, 0x20011234, 0x60011235, 0x20211236,
0x2001123E, 0x20011241, 0x200112DF, 0x604112E0, 0x20E112E3, 0x20211300, 0x60211302, 0x2021133B,
0x2001133E, 0x6001133F, 0x20011340, 0x60611341, 0x60211347, 0x6041134B, 0x20011357, 0x60211362,
0x20C11366, 0x20811370, 0x60411435, 0x20E11438, 0x60211440, 0x20411442, 0x60011445, 0x20011446,
0x2001145E, 0x200114B0, 0x602114B1, 0x20A114B3, 0x600114B9, 0x200114BA, 0x602114BB, 0x200114BD,
0x600114BE, 0x202114BF, 0x600114C1, 0x202114C2, 0x200115AF, 0x602115B0, 0x206115B2, 0x606115B8,
0x202115BC, 0x600115BE, 0x202115BF, 0x202115DC, 0x60411630, 0x20E11633, 0x6021163B, 0x2001163D,
0x6001163E, 0x2021163F, 0x200116AB, 0x600116AC, 0x200116AD, 0x602116AE, 0x20A116B0, 0x600116B6,
0x200116B7, 0x2041171D, 0x20611722, 0x60011726, 0x20811727, 0x6041182C, 0x2101182F, 0x60011838,
0x20211839, 0x20011930, 0x60811931, 0x60211937, 0x2021193B, 0x6001193D, 0x2001193E, 0x4001193F,
0x60011940, 0x40011941, 0x60011942, 0x20011943, 0x604119D1, 0x206119D4, 0x202119DA, 0x606119DC,
0x200119E0, 0x600119E4, 0x21211A01, 0x20A11A33, 0x60011A39, 0x40011A3A, 0x20611A3B, 0x20011A47,
0x20A11A51, 0x60211A57, 0x20411A59, 0x40A11A84, 0x21811A8A, 0x60011A97, 0x20211A98, 0x60011C2F,
0x20C11C30, 0x20A11C38, 0x60011C3E, 0x20011C3F, 0x22A11C92, 0x60011CA9, 0x20C11CAA, 0x60011CB1,
0x20211CB2, 0x60011CB4, 0x20211CB5, 0x20A11D31, 0x20011D3A, 0x20211D3C, 0x20C11D3F, 0x40011D46,
0x20011D47, 0x60811D8A, 0x20211D90, 0x60211D93, 0x20011D95, 0x60011D96, 0x20011D97, 0x20211EF3,
0x60211EF5, 0x20211F00, 0x40011F02, 0x60011F03, 0x60211F34, 0x20811F36, 0x60211F3E, 0x20011F40,
0x60011F41, 0x20011F42, 0x1E13430, 0x20013440, 0x21C13447, 0x20816AF0, 0x20C16B30, 0x20016F4F,
0x66C16F51, 0x20616F8F, 0x20016FE4, 0x60216FF0, 0x2021BC9D, 0x61BCA0, 0x25A1CF00, 0x22C1CF30,
0x2001D165, 0x6001D166, 0x2041D167, 0x6001D16D, 0x2081D16E, 0xE1D173, 0x20E1D17B, 0x20C1D185,
0x2061D1AA, 0x2041D242, 0x26C1DA00, 0x2621DA3B, 0x2001DA75, 0x2001DA84, 0x2081DA9B, 0x21C1DAA1,
0x20C1E000, 0x2201E008, 0x20C1E01B, 0x2021E023, 0x2081E026, 0x2001E08F, 0x20C1E130, 0x2001E2AE,
0x2061E2EC, 0x2061E4EC, 0x20C1E8D0, 0x20C1E944, 0x9FE1F000, 0x8041F10D, 0x8001F12F, 0x80A1F16C,
0x8021F17E, 0x8001F18E, 0x8121F191, 0x8701F1AD, 0x81C1F201, 0x8001F21A, 0x8001F22F, 0x8101F232,
0x8061F23C, 0xB621F249, 0x2081F3FB, 0xA7A1F400, 0xA121F546, 0x8FE1F680, 0x8161F774, 0x8541F7D5,
0x8061F80C, 0x80E1F848, 0x80A1F85A, 0x80E1F888, 0x8A21F8AE, 0x85C1F90C, 0x8121F93C, 0xB701F947,
0x3EE0000, 0x2BEE0020, 0xFEE0080, 0x3DEE0100,
static const __swift_uint32_t _swift_stdlib_graphemeBreakProperties[638] = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't really need the dummy 0th element, but also it doesn't really matter =)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the 4 bytes of binary size Steve!

0x0, 0x2020FF9E, 0x60201C34, 0x604119D1, 0x20200D00, 0x20A0302A, 0x60211347, 0x2021BC9D,
0x20000A3C, 0x202017B4, 0x800025C0, 0x6040A9BE, 0x20811127, 0x60411630, 0x20011D3A, 0x9FE1F000,
0x20800829, 0x60000BBF, 0x20000F37, 0x21201A73, 0x80002049, 0x8000274E, 0x2000A82C, 0x6000AAEB,
0x60011000, 0x60211232, 0x600114B9, 0x20811727, 0x21811A8A, 0x60211F34, 0x2001DA75, 0xA121F546,
0x2280064B, 0x20C00951, 0x20000ACD, 0x20200C62, 0x60200DD0, 0x60201056, 0x60401929, 0x60201BA6,
0x60001CF7, 0x80002328, 0x80002716, 0x80202934, 0x2020A69E, 0x6020A952, 0x2000AA4C, 0x2000ABE8,
0x20410A38, 0x604110B0, 0x210111B6, 0x20E112E3, 0x60211440, 0x200115AF, 0x200116AD, 0x2021193B,
0x60011A39, 0x22A11C92, 0x20011D95, 0x21C13447, 0x2081D16E, 0x2081E026, 0x81C1F201, 0x8A21F8AE,
0x25800591, 0x20000711, 0x2000093A, 0x602009C7, 0x20200A81, 0x20600B41, 0x60400C01, 0x60200CC3,
0x20000D4D, 0x20C00E34, 0x24600F99, 0x2000109D, 0x2040180B, 0x60001A55, 0x60801B3D, 0x20001BED,
0x60001CE1, 0x2000200C, 0x80002139, 0x804023F8, 0x8E202614, 0x80202733, 0x80402795, 0x80002B55,
0x80003297, 0x2000A80B, 0x2220A8E0, 0x6020A9B4, 0x2020AA31, 0x2040AAB2, 0x2000AAF6, 0x2000FB1E,
0x20810376, 0x20210EAB, 0x20011070, 0x400110BD, 0x20011173, 0x600111CE, 0x2001123E, 0x2001133E,
0x20C11366, 0x2001145E, 0x600114BE, 0x202115BC, 0x6001163E, 0x200116B7, 0x20211839, 0x60011940,
0x200119E0, 0x20A11A51, 0x20C11C30, 0x20211CB2, 0x20011D47, 0x60211EF5, 0x60011F41, 0x66C16F51,
0x2001D165, 0x2061D1AA, 0x20C1E000, 0x2061E2EC, 0x8021F17E, 0x8061F23C, 0x8061F80C, 0x3EE0000,
0xAD, 0x200005C7, 0x20A006DF, 0x200007FD, 0x22E008CA, 0x20E00941, 0x200009BC, 0x202009E2,
0x20400A4B, 0x20800AC1, 0x60200B02, 0x20400B55, 0x60400BCA, 0x60600C41, 0x60000CBE, 0x20200CCC,
0x60200D3F, 0x20000D81, 0x20000DDF, 0x21000EB4, 0x60000F7F, 0x20A01032, 0x20001082, 0x20201732,
0x200017C6, 0x200018A9, 0x20401939, 0x20001A60, 0x60001B04, 0x20201B80, 0x20001BE6, 0x60E01C24,
0x20401CD0, 0x20001CED, 0x27E01DC0, 0xC02028, 0x240020D0, 0x802021A9, 0x800023CF, 0x802025AA,
0x80A02600, 0x81402708, 0x80002721, 0x80002747, 0x80002757, 0x800027B0, 0x80202B1B, 0x20002D7F,
0x8000303D, 0x2060A66F, 0x2000A802, 0x2020A825, 0x61E0A8B4, 0x20E0A926, 0x6000A983, 0x6020A9BA,
0x20A0AA29, 0x2020AA35, 0x2000AA7C, 0x2020AABE, 0x6020AAEE, 0x2000ABE5, 0x6000ABEC, 0x21E0FE20,
0x200101FD, 0x20210A05, 0x20210AE5, 0x21410F46, 0x60011002, 0x2041107F, 0x602110B7, 0x400110CD,
0x20E1112D, 0x60011182, 0x402111C2, 0x6041122C, 0x60011235, 0x200112DF, 0x60211302, 0x20011340,
0x20011357, 0x60411435, 0x60011445, 0x602114B1, 0x602114BB, 0x600114C1, 0x206115B2, 0x202115BF,
0x6021163B, 0x200116AB, 0x20A116B0, 0x20611722, 0x2101182F, 0x60811931, 0x2001193E, 0x60011942,
0x202119DA, 0x21211A01, 0x20611A3B, 0x20411A59, 0x20211A98, 0x60011C3E, 0x20C11CAA, 0x20211CB5,
0x20C11D3F, 0x20211D90, 0x20011D97, 0x40011F02, 0x60211F3E, 0x1E13430, 0x20C16B30, 0x20016FE4,
0x25A1CF00, 0x2041D167, 0x20E1D17B, 0x26C1DA00, 0x2081DA9B, 0x20C1E01B, 0x20C1E130, 0x20C1E8D0,
0x8001F12F, 0x8121F191, 0x8001F22F, 0x2081F3FB, 0x8161F774, 0x80A1F85A, 0x8121F93C, 0xFEE0080,
0x400007F, 0x2DE00300, 0x202005C1, 0x21400610, 0x20C006D6, 0x206006EA, 0x214007A6, 0x2100081B,
0x40200890, 0x23E008E3, 0x2000093C, 0x2000094D, 0x20000981, 0x602009BF, 0x200009CD, 0x20200A01,
0x20200A41, 0x20200A70, 0x20000ABC, 0x60000AC9, 0x20A00AFA, 0x20200B3E, 0x60200B4B, 0x20000B82,
0x60200BC1, 0x20000BD7, 0x20000C3C, 0x20600C4A, 0x60200C82, 0x60200CC0, 0x60200CC7, 0x20200CE2,
0x20200D3B, 0x60400D46, 0x20000D57, 0x20000DCA, 0x20000DD6, 0x20000E31, 0x20000EB1, 0x20200F18,
0x60200F3E, 0x20200F86, 0x2060102D, 0x6020103B, 0x2040105E, 0x20201085, 0x20401712, 0x20201752,
0x20C017B7, 0x214017C9, 0x2000180F, 0x60601923, 0x20001932, 0x60201A19, 0x60001A57, 0x20E01A65,
0x23C01AB0, 0x60001B3B, 0x60201B43, 0x60001BA1, 0x60001BAA, 0x20201BE8, 0x20401BEF, 0x20E01C2C,
0x20201C36, 0x21801CD4, 0x20C01CE2, 0x20001CF4, 0x20201CF8, 0x200B, 0x20200E, 0x8000203C,
0x1E02060, 0x80002122, 0x80A02194, 0x8020231A, 0x80002388, 0x814023E9, 0x800024C2, 0x800025B6,
0x806025FB, 0x81602607, 0x8EA02690, 0x80002714, 0x8000271D, 0x80002728, 0x80002744, 0x8000274C,
0x80402753, 0x80802763, 0x800027A1, 0x800027BF, 0x80402B05, 0x80002B50, 0x20402CEF, 0x23E02DE0,
0x80003030, 0x20203099, 0x80003299, 0x2120A674, 0x2020A6F0, 0x2000A806, 0x6020A823, 0x6000A827,
0x6020A880, 0x2020A8C4, 0x2000A8FF, 0x2140A947, 0x2040A980, 0x2000A9B3, 0x2060A9B6, 0x2020A9BC,
0x2000A9E5, 0x6020AA2F, 0x6020AA33, 0x2000AA43, 0x6000AA4D, 0x2000AAB0, 0x2020AAB7, 0x2000AAC1,
0x2020AAEC, 0x6000AAF5, 0x6020ABE3, 0x6020ABE6, 0x6020ABE9, 0x2000ABED, 0x21E0FE00, 0xFEFF,
0x160FFF0, 0x200102E0, 0x20410A01, 0x20610A0C, 0x20010A3F, 0x20610D24, 0x20410EFD, 0x20610F82,
0x20011001, 0x21C11038, 0x20211073, 0x60011082, 0x206110B3, 0x202110B9, 0x200110C2, 0x20411100,
0x6001112C, 0x60211145, 0x20211180, 0x604111B3, 0x602111BF, 0x206111C9, 0x200111CF, 0x2041122F,
0x20011234, 0x20211236, 0x20011241, 0x604112E0, 0x20211300, 0x2021133B, 0x6001133F, 0x60611341,
0x6041134B, 0x60211362, 0x20811370, 0x20E11438, 0x20411442, 0x20011446, 0x200114B0, 0x20A114B3,
0x200114BA, 0x200114BD, 0x202114BF, 0x202114C2, 0x602115B0, 0x606115B8, 0x600115BE, 0x202115DC,
0x20E11633, 0x2001163D, 0x2021163F, 0x600116AC, 0x602116AE, 0x600116B6, 0x2041171D, 0x60011726,
0x6041182C, 0x60011838, 0x20011930, 0x60211937, 0x6001193D, 0x4001193F, 0x40011941, 0x20011943,
0x206119D4, 0x606119DC, 0x600119E4, 0x20A11A33, 0x40011A3A, 0x20011A47, 0x60211A57, 0x40A11A84,
0x60011A97, 0x60011C2F, 0x20A11C38, 0x20011C3F, 0x60011CA9, 0x60011CB1, 0x60011CB4, 0x20A11D31,
0x20211D3C, 0x40011D46, 0x60811D8A, 0x60211D93, 0x60011D96, 0x20211EF3, 0x20211F00, 0x60011F03,
0x20811F36, 0x20011F40, 0x20011F42, 0x20013440, 0x20816AF0, 0x20016F4F, 0x20616F8F, 0x60216FF0,
0x61BCA0, 0x22C1CF30, 0x6001D166, 0x6001D16D, 0xE1D173, 0x20C1D185, 0x2041D242, 0x2621DA3B,
0x2001DA84, 0x21C1DAA1, 0x2201E008, 0x2021E023, 0x2001E08F, 0x2001E2AE, 0x2061E4EC, 0x20C1E944,
0x8041F10D, 0x80A1F16C, 0x8001F18E, 0x8701F1AD, 0x8001F21A, 0x8101F232, 0xB621F249, 0xA7A1F400,
0x8FE1F680, 0x8541F7D5, 0x80E1F848, 0x80E1F888, 0x85C1F90C, 0xB701F947, 0x2BEE0020, 0x3DEE0100,
0x3E00000, 0x800000A9, 0x800000AE, 0x20C00483, 0x200005BF, 0x202005C4, 0x40A00600, 0x61C,
0x20000670, 0x400006DD, 0x202006E7, 0x4000070F, 0x23400730, 0x210007EB, 0x20600816, 0x20400825,
0x20400859, 0x20E00898, 0x400008E2, 0x60000903, 0x6000093B, 0x6040093E, 0x60600949, 0x6020094E,
0x20200962, 0x60200982, 0x200009BE, 0x206009C1, 0x602009CB, 0x200009D7, 0x200009FE, 0x60000A03,
0x60400A3E, 0x20200A47, 0x20000A51, 0x20000A75, 0x60000A83, 0x60400ABE, 0x20200AC7, 0x60200ACB,
0x20200AE2, 0x20000B01, 0x20000B3C, 0x60000B40, 0x60200B47, 0x20000B4D, 0x20200B62, 0x20000BBE,
0x20000BC0, 0x60400BC6, 0x20000BCD, 0x20000C00, 0x20000C04, 0x20400C3E, 0x20400C46, 0x20200C55,
0x20000C81, 0x20000CBC, 0x20000CBF, 0x20000CC2, 0x20000CC6, 0x60200CCA, 0x20200CD5, 0x60000CF3,
0x60200D02, 0x20000D3E, 0x20600D41, 0x60400D4A, 0x40000D4E, 0x20200D62, 0x60200D82, 0x20000DCF,
0x20400DD2, 0x60C00DD8, 0x60200DF2, 0x60000E33, 0x20E00E47, 0x60000EB3, 0x20C00EC8, 0x20000F35,
0x20000F39, 0x21A00F71, 0x20800F80, 0x21400F8D, 0x20000FC6, 0x60001031, 0x20201039, 0x2020103D,
0x20201058, 0x20601071, 0x60001084, 0x2000108D, 0x2040135D, 0x60001715, 0x60001734, 0x20201772,
0x600017B6, 0x60E017BE, 0x602017C7, 0x200017DD, 0x180E, 0x20201885, 0x20401920, 0x20201927,
0x60201930, 0x60A01933, 0x20201A17, 0x20001A1B, 0x20001A56, 0x20C01A58, 0x20001A62, 0x60A01A6D,
0x20001A7F, 0x20601B00, 0x20C01B34, 0x20001B3C, 0x20001B42, 0x21001B6B, 0x60001B82, 0x20601BA2,
0x20201BA8, 0x20401BAB, 0x60001BE7, 0x60401BEA, 0x60001BEE, 0x60201BF2,
};

static const __swift_uint16_t _swift_stdlib_linkingConsonant_ranks[165] = {
Expand Down
40 changes: 16 additions & 24 deletions stdlib/public/stubs/Unicode/UnicodeGrapheme.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,48 +18,40 @@
#include "swift/shims/UnicodeData.h"
#include <limits>


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Azoy could you look over the changes to the C++ code and the strategy used?

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
swift::swift_abortDisabledUnicodeSupport();
#else
auto low = 0;
auto high = GRAPHEME_BREAK_DATA_COUNT - 1;

while (high >= low) {
auto idx = low + (high - low) / 2;

auto entry = _swift_stdlib_graphemeBreakProperties[idx];

auto index = 1; //0th element is a dummy element
while (index < GRAPHEME_BREAK_DATA_COUNT) {
auto entry = _swift_stdlib_graphemeBreakProperties[index];

// Shift the enum and range count out of the value.
auto lower = (entry << 11) >> 11;

// Shift the enum out first, then shift out the scalar value.
auto upper = lower + ((entry << 3) >> 24);

// Shift everything out.
auto enumValue = (__swift_uint8_t)(entry >> 29);

// Special case: extendedPictographic who used an extra bit for the range.
if (enumValue == 5) {
upper = lower + ((entry << 2) >> 23);
}

if (scalar >= lower && scalar <= upper) {
return enumValue;
}

if (scalar > upper) {
low = idx + 1;
continue;
}


//If we want the left child of the current node in our virtual tree,
//that's at index * 2, if we want the right child it's at (index * 2) + 1
if (scalar < lower) {
Copy link
Contributor Author

@Catfish-Man Catfish-Man Feb 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch could be hoisted above fetching upper and enumValue, but the optimizer successfully does that for us, and it reads better this way

high = idx - 1;
continue;
index = 2 * index;
} else if (scalar <= upper) {
return enumValue;
} else {
index = 2 * index + 1;
}
}

// If we made it out here, then our scalar was not found in the grapheme
// array (this occurs when a scalar doesn't map to any grapheme break
// property). Return the max value here to indicate .any.
Expand Down
Loading