From aa17e4994a5872a129233b06360ea6cb5ab43d76 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 8 Mar 2023 16:20:58 +0800 Subject: [PATCH 1/2] Workaround a synchronization bug. If a worker thread spuriously wakes up, it may observe the pending coordinator work counter going from 0 to 1 when the EndOfGC work packet starts to be executed. That'll trigger an assertion failure. It is a workaround because a full solution should explicitly address the need to prevent opening more work packets when the coordinator (or any worker) is adding work packets to related buckets. --- src/scheduler/controller.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/scheduler/controller.rs b/src/scheduler/controller.rs index 0c082f3167..62a6ef2efd 100644 --- a/src/scheduler/controller.rs +++ b/src/scheduler/controller.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use crate::plan::gc_requester::GCRequester; use crate::scheduler::gc_work::{EndOfGC, ScheduleCollection}; -use crate::scheduler::CoordinatorMessage; +use crate::scheduler::{CoordinatorMessage, GCWork}; use crate::util::VMWorkerThread; use crate::vm::VMBinding; use crate::MMTK; @@ -131,7 +131,16 @@ impl GCController { let mut end_of_gc = EndOfGC { elapsed: gc_start.elapsed(), }; - self.initiate_coordinator_work(&mut end_of_gc, false); + + // Note: We cannot use `initiate_coordinator_work` here. If we increment the + // `pending_coordinator_packets` counter when a worker spuriously wakes up, it may try to + // open new buckets and result in an assertion error. That counter was there to prevent + // GC workers from opening more work packets while the coordinator is scheduling more work + // packets for the workers. The only two other coordinator, i.e. `ScheduleCollection` and + // `StopMutators`, belong to that category. `EndOfGC` doesn't add new work packets, so it + // doesn't need to prevent the workers from opening buckets (as there are none to be + // opened.) + end_of_gc.do_work_with_stat(&mut self.coordinator_worker, self.mmtk); self.scheduler.debug_assert_all_buckets_deactivated(); } From c7be6a11c18c2c1122f0b42ce97bd7a37707855a Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 9 Mar 2023 19:08:30 +0800 Subject: [PATCH 2/2] Mention the issues in the comment. --- src/scheduler/controller.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/scheduler/controller.rs b/src/scheduler/controller.rs index 62a6ef2efd..fc45848b9d 100644 --- a/src/scheduler/controller.rs +++ b/src/scheduler/controller.rs @@ -134,12 +134,17 @@ impl GCController { // Note: We cannot use `initiate_coordinator_work` here. If we increment the // `pending_coordinator_packets` counter when a worker spuriously wakes up, it may try to - // open new buckets and result in an assertion error. That counter was there to prevent - // GC workers from opening more work packets while the coordinator is scheduling more work - // packets for the workers. The only two other coordinator, i.e. `ScheduleCollection` and - // `StopMutators`, belong to that category. `EndOfGC` doesn't add new work packets, so it - // doesn't need to prevent the workers from opening buckets (as there are none to be - // opened.) + // open new buckets and result in an assertion error. + // See: https://github.com/mmtk/mmtk-core/issues/770 + // + // The `pending_coordinator_packets` counter and the `initiate_coordinator_work` function + // were introduced to prevent any buckets from being opened while `ScheduleCollection` or + // `StopMutators` is being executed. (See the doc comment of `initiate_coordinator_work`.) + // `EndOfGC` doesn't add any new work packets, therefore it does not need this layer of + // synchronization. + // + // FIXME: We should redesign the synchronization mechanism to properly address the opening + // condition of buckets. See: https://github.com/mmtk/mmtk-core/issues/774 end_of_gc.do_work_with_stat(&mut self.coordinator_worker, self.mmtk); self.scheduler.debug_assert_all_buckets_deactivated();