Skip to content

Commit b69d1b5

Browse files
Ivan WalulyaThomas Schatzl
andcommitted
8327042: G1: Parallelism used for redirty logged cards needs better control.
Co-authored-by: Thomas Schatzl <[email protected]> Reviewed-by: tschatzl, ayang
1 parent e889b46 commit b69d1b5

File tree

5 files changed

+48
-27
lines changed

5 files changed

+48
-27
lines changed

src/hotspot/share/gc/g1/g1ParScanThreadState.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,15 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
113113
initialize_numa_stats();
114114
}
115115

116-
size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers) {
117-
_rdc_local_qset.flush();
116+
size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* rdc_buffers) {
117+
*rdc_buffers = _rdc_local_qset.flush();
118118
flush_numa_stats();
119119
// Update allocation statistics.
120120
_plab_allocator->flush_and_retire_stats(num_workers);
121121
_g1h->policy()->record_age_table(&_age_table);
122122

123123
if (_evacuation_failed_info.has_failed()) {
124-
_g1h->gc_tracer_stw()->report_evacuation_failed(_evacuation_failed_info);
124+
_g1h->gc_tracer_stw()->report_evacuation_failed(_evacuation_failed_info);
125125
}
126126

127127
size_t sum = 0;
@@ -593,7 +593,6 @@ const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
593593

594594
void G1ParScanThreadStateSet::flush_stats() {
595595
assert(!_flushed, "thread local state from the per thread states should be flushed once");
596-
597596
for (uint worker_id = 0; worker_id < _num_workers; ++worker_id) {
598597
G1ParScanThreadState* pss = _states[worker_id];
599598
assert(pss != nullptr, "must be initialized");
@@ -604,7 +603,7 @@ void G1ParScanThreadStateSet::flush_stats() {
604603
// because it resets the PLAB allocator where we get this info from.
605604
size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize;
606605
size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize;
607-
size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers) * HeapWordSize;
606+
size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers, &_rdc_buffers[worker_id]) * HeapWordSize;
608607
size_t evac_fail_enqueued_cards = pss->evac_failure_enqueued_cards();
609608

610609
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes);
@@ -615,6 +614,11 @@ void G1ParScanThreadStateSet::flush_stats() {
615614
delete pss;
616615
_states[worker_id] = nullptr;
617616
}
617+
618+
G1DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
619+
dcq.merge_bufferlists(rdcqs());
620+
rdcqs()->verify_empty();
621+
618622
_flushed = true;
619623
}
620624

@@ -706,13 +710,15 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
706710
_rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
707711
_preserved_marks_set(true /* in_c_heap */),
708712
_states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
713+
_rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
709714
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
710715
_num_workers(num_workers),
711716
_flushed(false),
712717
_evac_failure_regions(evac_failure_regions) {
713718
_preserved_marks_set.init(num_workers);
714719
for (uint i = 0; i < num_workers; ++i) {
715720
_states[i] = nullptr;
721+
_rdc_buffers[i] = BufferNodeList();
716722
}
717723
memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
718724
}
@@ -721,5 +727,6 @@ G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
721727
assert(_flushed, "thread local state from the per thread states should have been flushed");
722728
FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
723729
FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
730+
FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
724731
_preserved_marks_set.reclaim();
725732
}

src/hotspot/share/gc/g1/g1ParScanThreadState.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
166166

167167
// Pass locally gathered statistics to global state. Returns the total number of
168168
// HeapWords copied.
169-
size_t flush_stats(size_t* surviving_young_words, uint num_workers);
169+
size_t flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* buffer_log);
170170

171171
private:
172172
void do_partial_array(PartialArrayScanTask task);
@@ -247,6 +247,7 @@ class G1ParScanThreadStateSet : public StackObj {
247247
G1RedirtyCardsQueueSet _rdcqs;
248248
PreservedMarksSet _preserved_marks_set;
249249
G1ParScanThreadState** _states;
250+
BufferNodeList* _rdc_buffers;
250251
size_t* _surviving_young_words_total;
251252
uint _num_workers;
252253
bool _flushed;
@@ -260,12 +261,14 @@ class G1ParScanThreadStateSet : public StackObj {
260261
~G1ParScanThreadStateSet();
261262

262263
G1RedirtyCardsQueueSet* rdcqs() { return &_rdcqs; }
264+
BufferNodeList* rdc_buffers() { return _rdc_buffers; }
263265
PreservedMarksSet* preserved_marks_set() { return &_preserved_marks_set; }
264266

265267
void flush_stats();
266268
void record_unused_optional_region(HeapRegion* hr);
267269

268270
G1ParScanThreadState* state_for_worker(uint worker_id);
271+
uint num_workers() const { return _num_workers; }
269272

270273
const size_t* surviving_young_words() const;
271274
};

src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,12 @@ void G1RedirtyCardsLocalQueueSet::enqueue(void* value) {
6565
}
6666
}
6767

68-
void G1RedirtyCardsLocalQueueSet::flush() {
68+
BufferNodeList G1RedirtyCardsLocalQueueSet::flush() {
6969
flush_queue(_queue);
70+
BufferNodeList cur_buffers = _buffers;
7071
_shared_qset->add_bufferlist(_buffers);
7172
_buffers = BufferNodeList();
73+
return cur_buffers;
7274
}
7375

7476
// G1RedirtyCardsLocalQueueSet::Queue

src/hotspot/share/gc/g1/g1RedirtyCardsQueue.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ class G1RedirtyCardsLocalQueueSet : private PtrQueueSet {
5656
void enqueue(void* value);
5757

5858
// Transfer all completed buffers to the shared qset.
59-
void flush();
59+
// Returns the flushed BufferNodeList which is later used
60+
// as a shortcut into the shared qset.
61+
BufferNodeList flush();
6062
};
6163

6264
// Card table entries to be redirtied and the cards reprocessed later.

src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -590,37 +590,41 @@ class G1PostEvacuateCollectionSetCleanupTask2::ProcessEvacuationFailedRegionsTas
590590
};
591591

592592
class G1PostEvacuateCollectionSetCleanupTask2::RedirtyLoggedCardsTask : public G1AbstractSubTask {
593-
G1RedirtyCardsQueueSet* _rdcqs;
594-
BufferNode* volatile _nodes;
593+
BufferNodeList* _rdc_buffers;
594+
uint _num_buffer_lists;
595595
G1EvacFailureRegions* _evac_failure_regions;
596596

597597
public:
598-
RedirtyLoggedCardsTask(G1RedirtyCardsQueueSet* rdcqs, G1EvacFailureRegions* evac_failure_regions) :
598+
RedirtyLoggedCardsTask(G1EvacFailureRegions* evac_failure_regions, BufferNodeList* rdc_buffers, uint num_buffer_lists) :
599599
G1AbstractSubTask(G1GCPhaseTimes::RedirtyCards),
600-
_rdcqs(rdcqs),
601-
_nodes(rdcqs->all_completed_buffers()),
600+
_rdc_buffers(rdc_buffers),
601+
_num_buffer_lists(num_buffer_lists),
602602
_evac_failure_regions(evac_failure_regions) { }
603603

604-
virtual ~RedirtyLoggedCardsTask() {
605-
G1DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
606-
dcq.merge_bufferlists(_rdcqs);
607-
_rdcqs->verify_empty();
608-
}
609-
610604
double worker_cost() const override {
611605
// Needs more investigation.
612606
return G1CollectedHeap::heap()->workers()->active_workers();
613607
}
614608

615609
void do_work(uint worker_id) override {
616610
RedirtyLoggedCardTableEntryClosure cl(G1CollectedHeap::heap(), _evac_failure_regions);
617-
BufferNode* next = Atomic::load(&_nodes);
618-
while (next != nullptr) {
619-
BufferNode* node = next;
620-
next = Atomic::cmpxchg(&_nodes, node, node->next());
621-
if (next == node) {
622-
cl.apply_to_buffer(node, worker_id);
623-
next = node->next();
611+
612+
uint start = worker_id;
613+
for (uint i = 0; i < _num_buffer_lists; i++) {
614+
uint index = (start + i) % _num_buffer_lists;
615+
616+
BufferNode* next = Atomic::load(&_rdc_buffers[index]._head);
617+
BufferNode* tail = Atomic::load(&_rdc_buffers[index]._tail);
618+
619+
while (next != nullptr) {
620+
BufferNode* node = next;
621+
next = Atomic::cmpxchg(&_rdc_buffers[index]._head, node, (node != tail ) ? node->next() : nullptr);
622+
if (next == node) {
623+
cl.apply_to_buffer(node, worker_id);
624+
next = (node != tail ) ? node->next() : nullptr;
625+
} else {
626+
break; // If there is contention, move to the next BufferNodeList
627+
}
624628
}
625629
}
626630
record_work_item(worker_id, 0, cl.num_dirtied());
@@ -970,7 +974,10 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2
970974
add_parallel_task(new RestorePreservedMarksTask(per_thread_states->preserved_marks_set()));
971975
add_parallel_task(new ProcessEvacuationFailedRegionsTask(evac_failure_regions));
972976
}
973-
add_parallel_task(new RedirtyLoggedCardsTask(per_thread_states->rdcqs(), evac_failure_regions));
977+
add_parallel_task(new RedirtyLoggedCardsTask(evac_failure_regions,
978+
per_thread_states->rdc_buffers(),
979+
per_thread_states->num_workers()));
980+
974981
if (UseTLAB && ResizeTLAB) {
975982
add_parallel_task(new ResizeTLABsTask());
976983
}

0 commit comments

Comments
 (0)