Skip to content

Commit e8224f7

Browse files
committed
8282089: [BACKOUT] Parallel: Refactor PSCardTable::scavenge_contents_parallel
Reviewed-by: tschatzl
1 parent 834d55c commit e8224f7

File tree

2 files changed

+174
-175
lines changed

2 files changed

+174
-175
lines changed

src/hotspot/share/gc/parallel/psCardTable.cpp

Lines changed: 172 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -117,88 +117,18 @@ class CheckForPreciseMarks : public BasicOopIterateClosure {
117117
virtual void do_oop(narrowOop* p) { CheckForPreciseMarks::do_oop_work(p); }
118118
};
119119

120-
static void prefetch_write(void *p) {
121-
if (PrefetchScanIntervalInBytes >= 0) {
122-
Prefetch::write(p, PrefetchScanIntervalInBytes);
123-
}
124-
}
125-
126-
// postcondition: ret is a dirty card or end_card
127-
CardTable::CardValue* PSCardTable::find_first_dirty_card(CardValue* const start_card,
128-
CardValue* const end_card) {
129-
for (CardValue* i_card = start_card; i_card < end_card; ++i_card) {
130-
if (*i_card != PSCardTable::clean_card_val()) {
131-
return i_card;
132-
}
133-
}
134-
return end_card;
135-
}
136-
137-
// postcondition: ret is a clean card or end_card
138-
// Note: if a part of an object is on a dirty card, all cards this object
139-
// resides on are considered dirty.
140-
CardTable::CardValue* PSCardTable::find_first_clean_card(ObjectStartArray* const start_array,
141-
CardValue* const start_card,
142-
CardValue* const end_card) {
143-
assert(start_card == end_card ||
144-
*start_card != PSCardTable::clean_card_val(), "precondition");
145-
// Skip the first dirty card.
146-
CardValue* i_card = start_card + 1;
147-
while (i_card < end_card) {
148-
if (*i_card != PSCardTable::clean_card_val()) {
149-
i_card++;
150-
continue;
151-
}
152-
assert(i_card - 1 >= start_card, "inv");
153-
assert(*(i_card - 1) != PSCardTable::clean_card_val(), "prev card must be dirty");
154-
// Find the final obj on the prev dirty card.
155-
HeapWord* obj_addr = start_array->object_start(addr_for(i_card)-1);
156-
HeapWord* obj_end_addr = obj_addr + cast_to_oop(obj_addr)->size();
157-
CardValue* final_card_by_obj = byte_for(obj_end_addr - 1);
158-
assert(final_card_by_obj < end_card, "inv");
159-
if (final_card_by_obj <= i_card) {
160-
return i_card;
161-
}
162-
// This final obj extends beyond i_card, check if this new card is dirty.
163-
if (*final_card_by_obj == PSCardTable::clean_card_val()) {
164-
return final_card_by_obj;
165-
}
166-
// This new card is dirty, continuing the search...
167-
i_card = final_card_by_obj + 1;
168-
}
169-
return end_card;
170-
}
171-
172-
void PSCardTable::clear_cards(CardValue* const start, CardValue* const end) {
173-
for (CardValue* i_card = start; i_card < end; ++i_card) {
174-
*i_card = clean_card;
175-
}
176-
}
177-
178-
void PSCardTable::scan_objects_in_range(PSPromotionManager* pm,
179-
HeapWord* start,
180-
HeapWord* end) {
181-
HeapWord* obj_addr = start;
182-
while (obj_addr < end) {
183-
oop obj = cast_to_oop(obj_addr);
184-
assert(oopDesc::is_oop(obj), "inv");
185-
prefetch_write(obj_addr);
186-
pm->push_contents(obj);
187-
obj_addr += obj->size();
188-
}
189-
pm->drain_stacks_cond_depth();
190-
}
191-
192120
// We get passed the space_top value to prevent us from traversing into
193121
// the old_gen promotion labs, which cannot be safely parsed.
194122

195123
// Do not call this method if the space is empty.
196124
// It is a waste to start tasks and get here only to
197-
// do no work. This method is just a no-op if space_top == sp->bottom().
125+
// do no work. If this method needs to be called
126+
// when the space is empty, fix the calculation of
127+
// end_card to allow sp_top == sp->bottom().
198128

199129
// The generation (old gen) is divided into slices, which are further
200130
// subdivided into stripes, with one stripe per GC thread. The size of
201-
// a stripe is a constant, num_cards_in_stripe.
131+
// a stripe is a constant, ssize.
202132
//
203133
// +===============+ slice 0
204134
// | stripe 0 |
@@ -222,106 +152,188 @@ void PSCardTable::scan_objects_in_range(PSPromotionManager* pm,
222152
// In this case there are 4 threads, so 4 stripes. A GC thread first works on
223153
// its stripe within slice 0 and then moves to its stripe in the next slice
224154
// until it has exceeded the top of the generation. The distance to stripe in
225-
// the next slice is calculated based on the number of stripes. After finishing
226-
// stripe 0 in slice 0, the thread finds the stripe 0 in slice 1 by adding
227-
// slice_size_in_words to the start of stripe 0 in slice 0 to get to the start
228-
// of stripe 0 in slice 1.
155+
// the next slice is calculated based on the number of stripes. The next
156+
// stripe is at ssize * number_of_stripes (= slice_stride).. So after
157+
// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1 by
158+
// adding slice_stride to the start of stripe 0 in slice 0 to get to the start
159+
// of stride 0 in slice 1.
229160

230161
void PSCardTable::scavenge_contents_parallel(ObjectStartArray* start_array,
231162
MutableSpace* sp,
232163
HeapWord* space_top,
233164
PSPromotionManager* pm,
234-
uint stripe_index,
235-
uint n_stripes) {
236-
const size_t num_cards_in_stripe = 128;
237-
const size_t stripe_size_in_words = num_cards_in_stripe * _card_size_in_words;
238-
const size_t slice_size_in_words = stripe_size_in_words * n_stripes;
239-
240-
HeapWord* cur_stripe_addr = sp->bottom() + stripe_index * stripe_size_in_words;
241-
242-
for (/* empty */; cur_stripe_addr < space_top; cur_stripe_addr += slice_size_in_words) {
243-
// exclusive
244-
HeapWord* const cur_stripe_end_addr = MIN2(cur_stripe_addr + stripe_size_in_words,
245-
space_top);
246-
247-
// Process a stripe iff it contains any obj-start
248-
if (!start_array->object_starts_in_range(cur_stripe_addr, cur_stripe_end_addr)) {
165+
uint stripe_number,
166+
uint stripe_total) {
167+
int ssize = 128; // Naked constant! Work unit = 64k.
168+
169+
// It is a waste to get here if empty.
170+
assert(sp->bottom() < sp->top(), "Should not be called if empty");
171+
oop* sp_top = (oop*)space_top;
172+
CardValue* start_card = byte_for(sp->bottom());
173+
CardValue* end_card = byte_for(sp_top - 1) + 1;
174+
oop* last_scanned = NULL; // Prevent scanning objects more than once
175+
// The width of the stripe ssize*stripe_total must be
176+
// consistent with the number of stripes so that the complete slice
177+
// is covered.
178+
size_t slice_width = ssize * stripe_total;
179+
for (CardValue* slice = start_card; slice < end_card; slice += slice_width) {
180+
CardValue* worker_start_card = slice + stripe_number * ssize;
181+
if (worker_start_card >= end_card)
182+
return; // We're done.
183+
184+
CardValue* worker_end_card = worker_start_card + ssize;
185+
if (worker_end_card > end_card)
186+
worker_end_card = end_card;
187+
188+
// We do not want to scan objects more than once. In order to accomplish
189+
// this, we assert that any object with an object head inside our 'slice'
190+
// belongs to us. We may need to extend the range of scanned cards if the
191+
// last object continues into the next 'slice'.
192+
//
193+
// Note! ending cards are exclusive!
194+
HeapWord* slice_start = addr_for(worker_start_card);
195+
HeapWord* slice_end = MIN2((HeapWord*) sp_top, addr_for(worker_end_card));
196+
197+
// If there are not objects starting within the chunk, skip it.
198+
if (!start_array->object_starts_in_range(slice_start, slice_end)) {
249199
continue;
250200
}
251-
252-
// Constraints:
253-
// 1. range of cards checked for being dirty or clean: [iter_limit_l, iter_limit_r)
254-
// 2. range of cards can be cleared: [clear_limit_l, clear_limit_r)
255-
// 3. range of objs (obj-start) can be scanned: [first_obj_addr, cur_stripe_end_addr)
256-
257-
CardValue* iter_limit_l;
258-
CardValue* iter_limit_r;
259-
CardValue* clear_limit_l;
260-
CardValue* clear_limit_r;
261-
262-
// Identify left ends and the first obj-start inside this stripe.
263-
HeapWord* first_obj_addr = start_array->object_start(cur_stripe_addr);
264-
if (first_obj_addr < cur_stripe_addr) {
265-
// this obj belongs to previous stripe; can't clear any cards it occupies
266-
first_obj_addr += cast_to_oop(first_obj_addr)->size();
267-
clear_limit_l = byte_for(first_obj_addr - 1) + 1;
268-
iter_limit_l = byte_for(first_obj_addr);
269-
} else {
270-
assert(first_obj_addr == cur_stripe_addr, "inv");
271-
iter_limit_l = clear_limit_l = byte_for(cur_stripe_addr);
201+
// Update our beginning addr
202+
HeapWord* first_object = start_array->object_start(slice_start);
203+
debug_only(oop* first_object_within_slice = (oop*) first_object;)
204+
if (first_object < slice_start) {
205+
last_scanned = (oop*)(first_object + cast_to_oop(first_object)->size());
206+
debug_only(first_object_within_slice = last_scanned;)
207+
worker_start_card = byte_for(last_scanned);
272208
}
273209

274-
assert(cur_stripe_addr <= first_obj_addr, "inside this stripe");
275-
assert(first_obj_addr <= cur_stripe_end_addr, "can be empty");
276-
277-
{
278-
// Identify right ends.
279-
HeapWord* obj_addr = start_array->object_start(cur_stripe_end_addr - 1);
280-
HeapWord* obj_end_addr = obj_addr + cast_to_oop(obj_addr)->size();
281-
assert(obj_end_addr >= cur_stripe_end_addr, "inv");
282-
clear_limit_r = byte_for(obj_end_addr);
283-
iter_limit_r = byte_for(obj_end_addr - 1) + 1;
210+
// Update the ending addr
211+
if (slice_end < (HeapWord*)sp_top) {
212+
// The subtraction is important! An object may start precisely at slice_end.
213+
HeapWord* last_object = start_array->object_start(slice_end - 1);
214+
slice_end = last_object + cast_to_oop(last_object)->size();
215+
// worker_end_card is exclusive, so bump it one past the end of last_object's
216+
// covered span.
217+
worker_end_card = byte_for(slice_end) + 1;
218+
219+
if (worker_end_card > end_card)
220+
worker_end_card = end_card;
284221
}
285222

286-
assert(iter_limit_l <= clear_limit_l &&
287-
clear_limit_r <= iter_limit_r, "clear cards only if we iterate over them");
288-
289-
// Process dirty chunks, i.e. consecutive dirty cards [dirty_l, dirty_r),
290-
// chunk by chunk inside [iter_limit_l, iter_limit_r).
291-
CardValue* dirty_l;
292-
CardValue* dirty_r;
293-
294-
for (CardValue* cur_card = iter_limit_l; cur_card < iter_limit_r; cur_card = dirty_r + 1) {
295-
dirty_l = find_first_dirty_card(cur_card, iter_limit_r);
296-
dirty_r = find_first_clean_card(start_array, dirty_l, iter_limit_r);
297-
assert(dirty_l <= dirty_r, "inv");
298-
299-
// empty
300-
if (dirty_l == dirty_r) {
301-
assert(dirty_r == iter_limit_r, "no more dirty cards in this stripe");
302-
break;
223+
assert(slice_end <= (HeapWord*)sp_top, "Last object in slice crosses space boundary");
224+
assert(is_valid_card_address(worker_start_card), "Invalid worker start card");
225+
assert(is_valid_card_address(worker_end_card), "Invalid worker end card");
226+
// Note that worker_start_card >= worker_end_card is legal, and happens when
227+
// an object spans an entire slice.
228+
assert(worker_start_card <= end_card, "worker start card beyond end card");
229+
assert(worker_end_card <= end_card, "worker end card beyond end card");
230+
231+
CardValue* current_card = worker_start_card;
232+
while (current_card < worker_end_card) {
233+
// Find an unclean card.
234+
while (current_card < worker_end_card && card_is_clean(*current_card)) {
235+
current_card++;
303236
}
304-
305-
assert(*dirty_l != clean_card, "inv");
306-
assert(*dirty_r == clean_card || dirty_r == iter_limit_r, "inv");
307-
308-
// Process this non-empty dirty chunk in two steps:
309-
{
310-
// 1. Clear card in [dirty_l, dirty_r) subject to [clear_limit_l, clear_limit_r) constraint
311-
clear_cards(MAX2(dirty_l, clear_limit_l),
312-
MIN2(dirty_r, clear_limit_r));
237+
CardValue* first_unclean_card = current_card;
238+
239+
// Find the end of a run of contiguous unclean cards
240+
while (current_card < worker_end_card && !card_is_clean(*current_card)) {
241+
while (current_card < worker_end_card && !card_is_clean(*current_card)) {
242+
current_card++;
243+
}
244+
245+
if (current_card < worker_end_card) {
246+
// Some objects may be large enough to span several cards. If such
247+
// an object has more than one dirty card, separated by a clean card,
248+
// we will attempt to scan it twice. The test against "last_scanned"
249+
// prevents the redundant object scan, but it does not prevent newly
250+
// marked cards from being cleaned.
251+
HeapWord* last_object_in_dirty_region = start_array->object_start(addr_for(current_card)-1);
252+
size_t size_of_last_object = cast_to_oop(last_object_in_dirty_region)->size();
253+
HeapWord* end_of_last_object = last_object_in_dirty_region + size_of_last_object;
254+
CardValue* ending_card_of_last_object = byte_for(end_of_last_object);
255+
assert(ending_card_of_last_object <= worker_end_card, "ending_card_of_last_object is greater than worker_end_card");
256+
if (ending_card_of_last_object > current_card) {
257+
// This means the object spans the next complete card.
258+
// We need to bump the current_card to ending_card_of_last_object
259+
current_card = ending_card_of_last_object;
260+
}
261+
}
313262
}
314-
315-
{
316-
// 2. Scan objs in [dirty_l, dirty_r) subject to [first_obj_addr, cur_stripe_end_addr) constraint
317-
HeapWord* obj_l = MAX2(start_array->object_start(addr_for(dirty_l)),
318-
first_obj_addr);
319-
320-
HeapWord* obj_r = MIN2(addr_for(dirty_r),
321-
cur_stripe_end_addr);
322-
323-
scan_objects_in_range(pm, obj_l, obj_r);
263+
CardValue* following_clean_card = current_card;
264+
265+
if (first_unclean_card < worker_end_card) {
266+
oop* p = (oop*) start_array->object_start(addr_for(first_unclean_card));
267+
assert((HeapWord*)p <= addr_for(first_unclean_card), "checking");
268+
// "p" should always be >= "last_scanned" because newly GC dirtied
269+
// cards are no longer scanned again (see comment at end
270+
// of loop on the increment of "current_card"). Test that
271+
// hypothesis before removing this code.
272+
// If this code is removed, deal with the first time through
273+
// the loop when the last_scanned is the object starting in
274+
// the previous slice.
275+
assert((p >= last_scanned) ||
276+
(last_scanned == first_object_within_slice),
277+
"Should no longer be possible");
278+
if (p < last_scanned) {
279+
// Avoid scanning more than once; this can happen because
280+
// newgen cards set by GC may a different set than the
281+
// originally dirty set
282+
p = last_scanned;
283+
}
284+
oop* to = (oop*)addr_for(following_clean_card);
285+
286+
// Test slice_end first!
287+
if ((HeapWord*)to > slice_end) {
288+
to = (oop*)slice_end;
289+
} else if (to > sp_top) {
290+
to = sp_top;
291+
}
292+
293+
// we know which cards to scan, now clear them
294+
if (first_unclean_card <= worker_start_card+1)
295+
first_unclean_card = worker_start_card+1;
296+
if (following_clean_card >= worker_end_card-1)
297+
following_clean_card = worker_end_card-1;
298+
299+
while (first_unclean_card < following_clean_card) {
300+
*first_unclean_card++ = clean_card;
301+
}
302+
303+
const int interval = PrefetchScanIntervalInBytes;
304+
// scan all objects in the range
305+
if (interval != 0) {
306+
while (p < to) {
307+
Prefetch::write(p, interval);
308+
oop m = cast_to_oop(p);
309+
assert(oopDesc::is_oop_or_null(m), "Expected an oop or NULL for header field at " PTR_FORMAT, p2i(m));
310+
pm->push_contents(m);
311+
p += m->size();
312+
}
313+
pm->drain_stacks_cond_depth();
314+
} else {
315+
while (p < to) {
316+
oop m = cast_to_oop(p);
317+
assert(oopDesc::is_oop_or_null(m), "Expected an oop or NULL for header field at " PTR_FORMAT, p2i(m));
318+
pm->push_contents(m);
319+
p += m->size();
320+
}
321+
pm->drain_stacks_cond_depth();
322+
}
323+
last_scanned = p;
324324
}
325+
// "current_card" is still the "following_clean_card" or
326+
// the current_card is >= the worker_end_card so the
327+
// loop will not execute again.
328+
assert((current_card == following_clean_card) ||
329+
(current_card >= worker_end_card),
330+
"current_card should only be incremented if it still equals "
331+
"following_clean_card");
332+
// Increment current_card so that it is not processed again.
333+
// It may now be dirty because a old-to-young pointer was
334+
// found on it an updated. If it is now dirty, it cannot be
335+
// be safely cleaned in the next iteration.
336+
current_card++;
325337
}
326338
}
327339
}

src/hotspot/share/gc/parallel/psCardTable.hpp

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,6 @@ class PSCardTable: public CardTable {
4242
verify_card = CT_MR_BS_last_reserved + 5
4343
};
4444

45-
CardValue* find_first_dirty_card(CardValue* const start_card,
46-
CardValue* const end_card);
47-
48-
CardValue* find_first_clean_card(ObjectStartArray* start_array,
49-
CardValue* const start_card,
50-
CardValue* const end_card);
51-
52-
void clear_cards(CardValue* const start, CardValue* const end);
53-
54-
void scan_objects_in_range(PSPromotionManager* pm,
55-
HeapWord* start,
56-
HeapWord* end);
57-
5845
public:
5946
PSCardTable(MemRegion whole_heap) : CardTable(whole_heap) {}
6047

@@ -66,8 +53,8 @@ class PSCardTable: public CardTable {
6653
MutableSpace* sp,
6754
HeapWord* space_top,
6855
PSPromotionManager* pm,
69-
uint stripe_index,
70-
uint n_stripes);
56+
uint stripe_number,
57+
uint stripe_total);
7158

7259
bool addr_is_marked_imprecise(void *addr);
7360
bool addr_is_marked_precise(void *addr);

0 commit comments

Comments
 (0)