From 8b046ed6ac109ef37e407674265d22e875bfbaaf Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Wed, 23 Nov 2022 10:46:10 +0100 Subject: [PATCH 1/3] This fixes Github issue 78206 - a heap corruption problem associated with mark stack overflow. Dumps provided by the customer showed in all cases that the min_overflow_address/max_overflow_address fields had values different from their initial values of MAX_PTR and 0. This implies that a mark stack overflow has occurred, but has not been properly handled. Looking at the code, we realized that we may still have objects in the mark prefetch queue as we enter process_mark_overflow. These objects may cause another mark stack overflow when they are traced. So we need to drain the mark prefetch queue before we check the min_overflow_address/max_overflow_address fields. We provided a private build of clrgc.dll to the customer reporting the issue, and customer has validated that the fix resolves the issue. --- src/coreclr/gc/gc.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index da45255030bbe1..59da418959d3fc 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -25893,6 +25893,7 @@ BOOL gc_heap::process_mark_overflow(int condemned_gen_number) BOOL overflow_p = FALSE; recheck: + drain_mark_queue(); if ((! (max_overflow_address == 0) || ! (min_overflow_address == MAX_PTR))) { From 269fdd17c6f41c4f8392d60361610298ac3481cd Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Thu, 24 Nov 2022 11:16:43 +0100 Subject: [PATCH 2/3] Adressed code review feedback - replaced calls to drain_mark_queue where the mark queue should be empty with asserts. --- src/coreclr/gc/gc.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 59da418959d3fc..d980881dd57bfa 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -26158,7 +26158,8 @@ void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, if (process_mark_overflow(condemned_gen_number)) fUnscannedPromotions = true; - drain_mark_queue(); + // mark queue must be empty after process_mark_overflow + assert (mark_queue.get_next_marked() == nullptr); // Perform the scan and set the flag if any promotions resulted. if (GCScan::GcDhReScan(sc)) @@ -26776,7 +26777,9 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) // handle table has been fully promoted. GCScan::GcDhInitialScan(GCHeap::Promote, condemned_gen_number, max_generation, &sc); scan_dependent_handles(condemned_gen_number, &sc, true); - drain_mark_queue(); + + // mark queue must be empty after scan_dependent_handles + assert (mark_queue.get_next_marked() == nullptr); fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); #ifdef MULTIPLE_HEAPS @@ -26866,7 +26869,9 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) // Scan dependent handles again to promote any secondaries associated with primaries that were promoted // for finalization. As before scan_dependent_handles will also process any mark stack overflow. scan_dependent_handles(condemned_gen_number, &sc, false); - drain_mark_queue(); + + // mark queue must be empty after scan_dependent_handles + assert (mark_queue.get_next_marked() == nullptr); fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); #endif //FEATURE_PREMORTEM_FINALIZATION From e621aaf54b9b217ce4f96ef450097d28ecbb0e67 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Thu, 24 Nov 2022 11:31:43 +0100 Subject: [PATCH 3/3] Make code slightly more explicit by using exisiting method mark_queue_t::verify_empty instead of an assert testing the result from mark_queue_t::get_next_marked(). --- src/coreclr/gc/gc.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index d980881dd57bfa..91c15e8d464e01 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -26159,7 +26159,7 @@ void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, fUnscannedPromotions = true; // mark queue must be empty after process_mark_overflow - assert (mark_queue.get_next_marked() == nullptr); + mark_queue.verify_empty(); // Perform the scan and set the flag if any promotions resulted. if (GCScan::GcDhReScan(sc)) @@ -26779,7 +26779,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) scan_dependent_handles(condemned_gen_number, &sc, true); // mark queue must be empty after scan_dependent_handles - assert (mark_queue.get_next_marked() == nullptr); + mark_queue.verify_empty(); fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); #ifdef MULTIPLE_HEAPS @@ -26871,7 +26871,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) scan_dependent_handles(condemned_gen_number, &sc, false); // mark queue must be empty after scan_dependent_handles - assert (mark_queue.get_next_marked() == nullptr); + mark_queue.verify_empty(); fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); #endif //FEATURE_PREMORTEM_FINALIZATION