Skip to content

Commit de6bcc2

Browse files
authored
Fix packed enum decoding benchmark (#979)
While investigating why decoding packed enums is so much slower in AOT compared to decoding packed int32 (both are varints on the wire) I noticed that the enum decoding benchmark should actually be slower, because currently TFA is able to specialize the enum int value to Dart enum value mapping to a direct call, in this function: ```dart ProtobufEnum? _decodeEnum( int tagNumber, ExtensionRegistry? registry, int rawValue) { final f = valueOfFunc(tagNumber); if (f != null) { return f(rawValue); // <------------------- HERE } ... } ``` Wasm code for this function, before this PR: ```wat (func $BuilderInfo._decodeEnum (;641;) (param $var0 (ref $BuilderInfo_214)) (param $var1 i64) (param $var2 i64) (result (ref null $Enum)) (local $var3 (ref null $FieldInfo_223)) local.get $var0 struct.get $BuilderInfo_214 $field4 i32.const 71 local.get $var1 struct.new $BoxedInt call $_DefaultMap&_HashFieldBase&MapMixin&_HashBase&_OperatorEqualsAndHashCode&_LinkedHashMapMixin.[] ref.cast null $FieldInfo_223 local.tee $var3 ref.is_null if (result (ref null $#Closure-0-1_815)) ref.null none else local.get $var3 struct.get $FieldInfo_223 $field9 end ref.is_null i32.eqz if local.get $var2 call $Enum.valueOf return end ref.null none ) ``` Note that this calls `$Enum.valueOf` even though this function is generic on the enum type. With this PR we add another enum to the proto file and decode it in setup, so that TFA is unable to specialize `_deocdeEnum` to one specific enum type. New code: ```wat (func $BuilderInfo._decodeEnum (;643;) (param $var0 (ref $BuilderInfo_214)) (param $var1 i64) (param $var2 i64) (result (ref null $ProtobufEnum)) (local $var3 (ref null $FieldInfo_229)) (local $var4 (ref null $#Closure-0-1)) (local $var5 (ref $#Closure-0-1)) local.get $var0 struct.get $BuilderInfo_214 $field4 i32.const 71 local.get $var1 struct.new $BoxedInt call $_DefaultMap&_HashFieldBase&MapMixin&_HashBase&_OperatorEqualsAndHashCode&_LinkedHashMapMixin.[] ref.cast null $FieldInfo_229 local.tee $var3 ref.is_null if (result (ref null $#Closure-0-1)) ref.null none else local.get $var3 struct.get $FieldInfo_229 $field9 end local.tee $var4 ref.is_null i32.eqz if local.get $var4 ref.as_non_null local.tee $var5 struct.get $#Closure-0-1 $field2 i32.const 71 local.get $var2 struct.new $BoxedInt local.get $var5 struct.get $#Closure-0-1 $field3 struct.get $#Vtable-0-1 $field1 call_ref $type39 ref.cast null $ProtobufEnum return end ref.null none ) ``` Wasm benchmark results: ``` // Before protobuf_PackedEnumDecoding(RunTimeRaw): 41120.0 us. // After protobuf_PackedEnumDecoding(RunTimeRaw): 52750.0 us. ``` VM benchmark results: ``` // Before protobuf_PackedEnumDecoding(RunTimeRaw): 45051.520000000004 us. // After protobuf_PackedEnumDecoding(RunTimeRaw): 54661.125 us. ```
1 parent 9daf5ca commit de6bcc2

File tree

2 files changed

+40
-9
lines changed

2 files changed

+40
-9
lines changed

benchmarks/bin/binary_decode_packed.dart

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,35 @@ class PackedEnumDecodingBenchmark extends BenchmarkBase {
146146
PackedEnumDecodingBenchmark() : super('PackedEnumDecoding') {
147147
final rand = Random(123);
148148
final message = PackedFields();
149-
final numEnums = Enum.values.length;
149+
final numEnums = Enum1.values.length;
150150
for (var i = 0; i < 1000000; i += 1) {
151-
message.packedEnum.add(Enum.values[rand.nextInt(numEnums)]);
151+
message.packedEnum1.add(Enum1.values[rand.nextInt(numEnums)]);
152152
}
153153
encoded = message.writeToBuffer();
154154
}
155155

156+
@override
157+
void setup() {
158+
// Decode different enums to prevent TFA from specializing enum decoding
159+
// code to one type.
160+
final rand = Random(123);
161+
final message = PackedFields();
162+
for (var i = 0; i < 100; i += 1) {
163+
message.packedEnum1.add(Enum1.values[rand.nextInt(Enum1.values.length)]);
164+
}
165+
for (var i = 0; i < 100; i += 1) {
166+
message.packedEnum2.add(Enum2.values[rand.nextInt(Enum2.values.length)]);
167+
}
168+
final encoded = message.writeToBuffer();
169+
final decoded = PackedFields()..mergeFromBuffer(encoded);
170+
if (decoded.packedEnum1.length != 100) {
171+
throw AssertionError('BUG');
172+
}
173+
if (decoded.packedEnum2.length != 100) {
174+
throw AssertionError('BUG');
175+
}
176+
}
177+
156178
@override
157179
void run() {
158180
sink = PackedFields()..mergeFromBuffer(encoded);

benchmarks/protos/packed_fields.proto

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,22 @@ message PackedFields {
88
repeated sint32 packedSint32 = 5 [packed = true];
99
repeated sint64 packedSint64 = 6 [packed = true];
1010
repeated bool packedBool = 7 [packed = true];
11-
repeated Enum packedEnum = 8 [packed = true];
11+
repeated Enum1 packedEnum1 = 8 [packed = true];
12+
repeated Enum2 packedEnum2 = 9 [packed = true];
1213
}
1314

14-
enum Enum {
15-
ENUM_1 = 0;
16-
ENUM_2 = 1;
17-
ENUM_3 = 2;
18-
ENUM_4 = 4;
19-
ENUM_5 = 5;
15+
enum Enum1 {
16+
ENUM_1_1 = 0;
17+
ENUM_1_2 = 1;
18+
ENUM_1_3 = 2;
19+
ENUM_1_4 = 4;
20+
ENUM_1_5 = 5;
21+
}
22+
23+
enum Enum2 {
24+
ENUM_2_1 = 0;
25+
ENUM_2_2 = 1;
26+
ENUM_2_3 = 2;
27+
ENUM_2_4 = 4;
28+
ENUM_2_5 = 5;
2029
}

0 commit comments

Comments
 (0)