From 158f2643ac6bb48cc4b12553ef7c7fbca39a0a52 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Sun, 27 Jul 2025 18:38:02 -0700 Subject: [PATCH 1/9] Fix duplicated attribute nodes in MLIR bytecode deserialization --- mlir/include/mlir/AsmParser/AsmParser.h | 3 ++- mlir/lib/AsmParser/DialectSymbolParser.cpp | 25 +++++++++++++++++++-- mlir/lib/AsmParser/ParserState.h | 3 +++ mlir/lib/Bytecode/Reader/BytecodeReader.cpp | 6 ++++- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/AsmParser/AsmParser.h b/mlir/include/mlir/AsmParser/AsmParser.h index 33daf7ca26f49..f39b3bd853a2a 100644 --- a/mlir/include/mlir/AsmParser/AsmParser.h +++ b/mlir/include/mlir/AsmParser/AsmParser.h @@ -53,7 +53,8 @@ parseAsmSourceFile(const llvm::SourceMgr &sourceMgr, Block *block, /// null terminated. Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type = {}, size_t *numRead = nullptr, - bool isKnownNullTerminated = false); + bool isKnownNullTerminated = false, + llvm::StringMap *attributesCache = nullptr); /// This parses a single MLIR type to an MLIR context if it was valid. If not, /// an error diagnostic is emitted to the context. diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp index 8b14e71118c3a..de8e3c1fc1e72 100644 --- a/mlir/lib/AsmParser/DialectSymbolParser.cpp +++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp @@ -245,6 +245,14 @@ static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState, return nullptr; } + if constexpr (std::is_same_v) { + auto &cache = p.getState().symbols.attributesCache; + + auto cacheIt = cache.find(symbolData); + if (cacheIt != cache.end()) { + return cacheIt->second; + } + } return createSymbol(dialectName, symbolData, loc); } @@ -337,6 +345,7 @@ Type Parser::parseExtendedType() { template static T parseSymbol(StringRef inputStr, MLIRContext *context, size_t *numReadOut, bool isKnownNullTerminated, + llvm::StringMap *attributesCache, ParserFn &&parserFn) { // Set the buffer name to the string being parsed, so that it appears in error // diagnostics. @@ -348,6 +357,9 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context, SourceMgr sourceMgr; sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc()); SymbolState aliasState; + if (attributesCache) + aliasState.attributesCache = *attributesCache; + ParserConfig config(context); ParserState state(sourceMgr, config, aliasState, /*asmState=*/nullptr, /*codeCompleteContext=*/nullptr); @@ -358,6 +370,13 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context, if (!symbol) return T(); + if constexpr (std::is_same_v) { + // Cache key is the symbol data without the dialect prefix. + StringRef cacheKey = inputStr.split('.').second; + if (attributesCache && !cacheKey.empty()) { + (*attributesCache)[cacheKey] = symbol; + } + } // Provide the number of bytes that were read. Token endTok = parser.getToken(); size_t numRead = @@ -374,13 +393,15 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context, Attribute mlir::parseAttribute(StringRef attrStr, MLIRContext *context, Type type, size_t *numRead, - bool isKnownNullTerminated) { + bool isKnownNullTerminated, + llvm::StringMap *attributesCache) { return parseSymbol( - attrStr, context, numRead, isKnownNullTerminated, + attrStr, context, numRead, isKnownNullTerminated, attributesCache, [type](Parser &parser) { return parser.parseAttribute(type); }); } Type mlir::parseType(StringRef typeStr, MLIRContext *context, size_t *numRead, bool isKnownNullTerminated) { return parseSymbol(typeStr, context, numRead, isKnownNullTerminated, + /*attributesCache=*/nullptr, [](Parser &parser) { return parser.parseType(); }); } diff --git a/mlir/lib/AsmParser/ParserState.h b/mlir/lib/AsmParser/ParserState.h index 159058a18fa4e..aa53032107cbf 100644 --- a/mlir/lib/AsmParser/ParserState.h +++ b/mlir/lib/AsmParser/ParserState.h @@ -40,6 +40,9 @@ struct SymbolState { /// A map from unique integer identifier to DistinctAttr. DenseMap distinctAttributes; + + /// A map from unique string identifier to Attribute. + llvm::StringMap attributesCache; }; //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp index 44458d010c6c8..0f97443433774 100644 --- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp +++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp @@ -895,6 +895,10 @@ class AttrTypeReader { SmallVector attributes; SmallVector types; + /// The map of cached attributes, used to avoid re-parsing the same + /// attribute multiple times. + llvm::StringMap attributesCache; + /// A location used for error emission. Location fileLoc; @@ -1235,7 +1239,7 @@ LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader, ::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true); else result = ::parseAttribute(asmStr, context, Type(), &numRead, - /*isKnownNullTerminated=*/true); + /*isKnownNullTerminated=*/true, &attributesCache); if (!result) return failure(); From 5e3bddf42fa5f7abbf43d2a6515c904ce7f2cbdb Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Thu, 31 Jul 2025 00:59:27 -0700 Subject: [PATCH 2/9] Add test --- .../Dialect/LLVMIR/recursive-debuginfo.mlir | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir diff --git a/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir b/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir new file mode 100644 index 0000000000000..45617ef422482 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir @@ -0,0 +1,32 @@ +// RUN: mlir-opt -emit-bytecode %s | mlir-translate --mlir-to-llvmir | FileCheck %s + +#di_basic_type = #llvm.di_basic_type +#di_file = #llvm.di_file<"foo.c" in "/mlir/"> +#di_file1 = #llvm.di_file<"foo.c" in "/mlir/"> +#di_subprogram = #llvm.di_subprogram, isRecSelf = true> +#di_compile_unit = #llvm.di_compile_unit, sourceLanguage = DW_LANG_C11, file = #di_file, producer = "MLIR", isOptimized = true, emissionKind = Full, nameTableKind = None> +#di_local_variable = #llvm.di_local_variable +#di_subroutine_type = #llvm.di_subroutine_type +#di_subprogram1 = #llvm.di_subprogram, id = distinct[2]<>, compileUnit = #di_compile_unit, scope = #di_file1, name = "main", file = #di_file1, line = 1, scopeLine = 1, subprogramFlags = "Definition|Optimized", type = #di_subroutine_type, retainedNodes = #di_local_variable> +#di_local_variable1 = #llvm.di_local_variable + +module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>>, llvm.ident = "MLIR", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + llvm.module_flags [#llvm.mlir.module_flag] + + // CHECK: define i32 @main + llvm.func @main() -> i32 attributes {passthrough = ["noinline"]} { + %0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc3) + llvm.intr.dbg.value #di_local_variable1 = %0 : i32 loc(#loc7) + llvm.return %0 : i32 loc(#loc8) + } loc(#loc6) +} loc(#loc) +#loc = loc("foo.c":0:0) +#loc1 = loc("main") +#loc2 = loc("foo.c":1:0) +#loc3 = loc(unknown) +#loc4 = loc("foo.c":0:0) +#loc5 = loc("foo.c":3:0) +#loc6 = loc(fused<#di_subprogram1>[#loc1, #loc2]) +#loc7 = loc(fused<#di_subprogram1>[#loc4]) +#loc8 = loc(fused<#di_subprogram1>[#loc5]) + From 5864ec2f5ac451d29ec05a78dd0d78539c37d2d2 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Sat, 2 Aug 2025 01:48:40 -0700 Subject: [PATCH 3/9] Reduce to minimal test --- .../Dialect/LLVMIR/recursive-debuginfo.mlir | 35 ++++--------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir b/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir index 45617ef422482..937a2f31368fa 100644 --- a/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir +++ b/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir @@ -1,32 +1,11 @@ -// RUN: mlir-opt -emit-bytecode %s | mlir-translate --mlir-to-llvmir | FileCheck %s +// RUN: mlir-opt -emit-bytecode %s | mlir-opt --mlir-print-debuginfo | FileCheck %s -#di_basic_type = #llvm.di_basic_type +// CHECK: llvm.di_subprogram +// CHECK-NOT: llvm.di_subprogram #di_file = #llvm.di_file<"foo.c" in "/mlir/"> -#di_file1 = #llvm.di_file<"foo.c" in "/mlir/"> #di_subprogram = #llvm.di_subprogram, isRecSelf = true> -#di_compile_unit = #llvm.di_compile_unit, sourceLanguage = DW_LANG_C11, file = #di_file, producer = "MLIR", isOptimized = true, emissionKind = Full, nameTableKind = None> -#di_local_variable = #llvm.di_local_variable -#di_subroutine_type = #llvm.di_subroutine_type -#di_subprogram1 = #llvm.di_subprogram, id = distinct[2]<>, compileUnit = #di_compile_unit, scope = #di_file1, name = "main", file = #di_file1, line = 1, scopeLine = 1, subprogramFlags = "Definition|Optimized", type = #di_subroutine_type, retainedNodes = #di_local_variable> -#di_local_variable1 = #llvm.di_local_variable - -module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>>, llvm.ident = "MLIR", llvm.target_triple = "x86_64-unknown-linux-gnu"} { - llvm.module_flags [#llvm.mlir.module_flag] - - // CHECK: define i32 @main - llvm.func @main() -> i32 attributes {passthrough = ["noinline"]} { - %0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc3) - llvm.intr.dbg.value #di_local_variable1 = %0 : i32 loc(#loc7) - llvm.return %0 : i32 loc(#loc8) - } loc(#loc6) -} loc(#loc) -#loc = loc("foo.c":0:0) -#loc1 = loc("main") -#loc2 = loc("foo.c":1:0) -#loc3 = loc(unknown) -#loc4 = loc("foo.c":0:0) -#loc5 = loc("foo.c":3:0) -#loc6 = loc(fused<#di_subprogram1>[#loc1, #loc2]) -#loc7 = loc(fused<#di_subprogram1>[#loc4]) -#loc8 = loc(fused<#di_subprogram1>[#loc5]) +#di_basic_type = #llvm.di_basic_type +#di_local_variable = #llvm.di_local_variable +module attributes {test.alias = #di_local_variable} { +} loc(fused<#di_subprogram>[]) \ No newline at end of file From 57310ed07b6a2ef4ec503fab88e901de70918833 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Thu, 7 Aug 2025 01:18:02 -0700 Subject: [PATCH 4/9] Use symbolData as cache key --- mlir/lib/AsmParser/DialectSymbolParser.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp index de8e3c1fc1e72..57e13e13bb510 100644 --- a/mlir/lib/AsmParser/DialectSymbolParser.cpp +++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp @@ -252,6 +252,8 @@ static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState, if (cacheIt != cache.end()) { return cacheIt->second; } + cache[symbolData] = createSymbol(dialectName, symbolData, loc); + return cache[symbolData]; } return createSymbol(dialectName, symbolData, loc); } @@ -371,12 +373,10 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context, return T(); if constexpr (std::is_same_v) { - // Cache key is the symbol data without the dialect prefix. - StringRef cacheKey = inputStr.split('.').second; - if (attributesCache && !cacheKey.empty()) { - (*attributesCache)[cacheKey] = symbol; - } + if (attributesCache) + *attributesCache = state.symbols.attributesCache; } + // Provide the number of bytes that were read. Token endTok = parser.getToken(); size_t numRead = From 6d6fd154817599df56f10d07eb9b74152e6f7103 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Thu, 7 Aug 2025 22:59:59 -0700 Subject: [PATCH 5/9] Update test --- mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir | 11 ----------- mlir/test/IR/recursive-distinct-attr.mlir | 9 +++++++++ 2 files changed, 9 insertions(+), 11 deletions(-) delete mode 100644 mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir create mode 100644 mlir/test/IR/recursive-distinct-attr.mlir diff --git a/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir b/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir deleted file mode 100644 index 937a2f31368fa..0000000000000 --- a/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir +++ /dev/null @@ -1,11 +0,0 @@ -// RUN: mlir-opt -emit-bytecode %s | mlir-opt --mlir-print-debuginfo | FileCheck %s - -// CHECK: llvm.di_subprogram -// CHECK-NOT: llvm.di_subprogram -#di_file = #llvm.di_file<"foo.c" in "/mlir/"> -#di_subprogram = #llvm.di_subprogram, isRecSelf = true> -#di_basic_type = #llvm.di_basic_type -#di_local_variable = #llvm.di_local_variable - -module attributes {test.alias = #di_local_variable} { -} loc(fused<#di_subprogram>[]) \ No newline at end of file diff --git a/mlir/test/IR/recursive-distinct-attr.mlir b/mlir/test/IR/recursive-distinct-attr.mlir new file mode 100644 index 0000000000000..863440fcde86d --- /dev/null +++ b/mlir/test/IR/recursive-distinct-attr.mlir @@ -0,0 +1,9 @@ +// RUN: mlir-opt -emit-bytecode %s | mlir-opt --mlir-print-debuginfo | FileCheck %s + +// CHECK: distinct[0] +// CHECK-NOT: distinct[1] +#attr_ugly = #test end> +#attr_ugly1 = #test + +module attributes {test.alias = #attr_ugly} { +} loc(fused<#attr_ugly1>[]) \ No newline at end of file From c164d0086985a75d91eb04984d02acc027cf55b8 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Thu, 7 Aug 2025 23:00:44 -0700 Subject: [PATCH 6/9] Skip cached for attribute with type --- mlir/lib/AsmParser/DialectSymbolParser.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp index 57e13e13bb510..bb31b018bc754 100644 --- a/mlir/lib/AsmParser/DialectSymbolParser.cpp +++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp @@ -247,13 +247,12 @@ static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState, if constexpr (std::is_same_v) { auto &cache = p.getState().symbols.attributesCache; - auto cacheIt = cache.find(symbolData); - if (cacheIt != cache.end()) { + // Skip cached attribute if it has type. + if (cacheIt != cache.end() && !p.getToken().is(Token::colon)) return cacheIt->second; - } - cache[symbolData] = createSymbol(dialectName, symbolData, loc); - return cache[symbolData]; + + return cache[symbolData] = createSymbol(dialectName, symbolData, loc); } return createSymbol(dialectName, symbolData, loc); } From 2650cc83800601ac33b6f0fb9de45f03a624bbe9 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Thu, 14 Aug 2025 00:41:40 -0700 Subject: [PATCH 7/9] Fix format issue --- mlir/lib/AsmParser/DialectSymbolParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp index bb31b018bc754..416d8eb5f40e0 100644 --- a/mlir/lib/AsmParser/DialectSymbolParser.cpp +++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp @@ -251,7 +251,7 @@ static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState, // Skip cached attribute if it has type. if (cacheIt != cache.end() && !p.getToken().is(Token::colon)) return cacheIt->second; - + return cache[symbolData] = createSymbol(dialectName, symbolData, loc); } return createSymbol(dialectName, symbolData, loc); From b6799722b2308f8763ea74f922fd445ed2a1481c Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Thu, 14 Aug 2025 00:41:48 -0700 Subject: [PATCH 8/9] Update test --- mlir/test/IR/recursive-distinct-attr.mlir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/test/IR/recursive-distinct-attr.mlir b/mlir/test/IR/recursive-distinct-attr.mlir index 863440fcde86d..f5d0e95dd1798 100644 --- a/mlir/test/IR/recursive-distinct-attr.mlir +++ b/mlir/test/IR/recursive-distinct-attr.mlir @@ -5,5 +5,5 @@ #attr_ugly = #test end> #attr_ugly1 = #test -module attributes {test.alias = #attr_ugly} { -} loc(fused<#attr_ugly1>[]) \ No newline at end of file +module attributes {test.alias = #attr_ugly, test.alias1 = #attr_ugly1} { +} \ No newline at end of file From bc0c4207af9f3c070d831838d3fb41f7838b2b46 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Thu, 14 Aug 2025 09:11:32 -0700 Subject: [PATCH 9/9] Add comment --- mlir/test/IR/recursive-distinct-attr.mlir | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlir/test/IR/recursive-distinct-attr.mlir b/mlir/test/IR/recursive-distinct-attr.mlir index f5d0e95dd1798..5afb5c59e0fcf 100644 --- a/mlir/test/IR/recursive-distinct-attr.mlir +++ b/mlir/test/IR/recursive-distinct-attr.mlir @@ -1,5 +1,9 @@ // RUN: mlir-opt -emit-bytecode %s | mlir-opt --mlir-print-debuginfo | FileCheck %s +// Verify that the distinct attribute which is used transitively +// through two aliases does not end up duplicated when round-tripped +// through bytecode. + // CHECK: distinct[0] // CHECK-NOT: distinct[1] #attr_ugly = #test end>