8282089: [BACKOUT] Parallel: Refactor PSCardTable::scavenge_contents_parallel

albertnetymk · albertnetymk · commit e8224f7de9e4 · 2022-02-18T09:54:28.000Z
Reviewed-by: tschatzl
diff --git a/src/hotspot/share/gc/parallel/psCardTable.cpp b/src/hotspot/share/gc/parallel/psCardTable.cpp
@@ -117,88 +117,18 @@ class CheckForPreciseMarks : public BasicOopIterateClosure {
   virtual void do_oop(narrowOop* p) { CheckForPreciseMarks::do_oop_work(p); }
 };
 
-static void prefetch_write(void *p) {
-  if (PrefetchScanIntervalInBytes >= 0) {
-    Prefetch::write(p, PrefetchScanIntervalInBytes);
-  }
-}
-
-// postcondition: ret is a dirty card or end_card
-CardTable::CardValue* PSCardTable::find_first_dirty_card(CardValue* const start_card,
-                                                         CardValue* const end_card) {
-  for (CardValue* i_card = start_card; i_card < end_card; ++i_card) {
-    if (*i_card != PSCardTable::clean_card_val()) {
-      return i_card;
-    }
-  }
-  return end_card;
-}
-
-// postcondition: ret is a clean card or end_card
-// Note: if a part of an object is on a dirty card, all cards this object
-// resides on are considered dirty.
-CardTable::CardValue* PSCardTable::find_first_clean_card(ObjectStartArray* const start_array,
-                                                         CardValue* const start_card,
-                                                         CardValue* const end_card) {
-  assert(start_card == end_card ||
-         *start_card != PSCardTable::clean_card_val(), "precondition");
-  // Skip the first dirty card.
-  CardValue* i_card = start_card + 1;
-  while (i_card < end_card) {
-    if (*i_card != PSCardTable::clean_card_val()) {
-      i_card++;
-      continue;
-    }
-    assert(i_card - 1 >= start_card, "inv");
-    assert(*(i_card - 1) != PSCardTable::clean_card_val(), "prev card must be dirty");
-    // Find the final obj on the prev dirty card.
-    HeapWord* obj_addr = start_array->object_start(addr_for(i_card)-1);
-    HeapWord* obj_end_addr = obj_addr + cast_to_oop(obj_addr)->size();
-    CardValue* final_card_by_obj = byte_for(obj_end_addr - 1);
-    assert(final_card_by_obj < end_card, "inv");
-    if (final_card_by_obj <= i_card) {
-      return i_card;
-    }
-    // This final obj extends beyond i_card, check if this new card is dirty.
-    if (*final_card_by_obj == PSCardTable::clean_card_val()) {
-      return final_card_by_obj;
-    }
-    // This new card is dirty, continuing the search...
-    i_card = final_card_by_obj + 1;
-  }
-  return end_card;
-}
-
-void PSCardTable::clear_cards(CardValue* const start, CardValue* const end) {
-  for (CardValue* i_card = start; i_card < end; ++i_card) {
-    *i_card = clean_card;
-  }
-}
-
-void PSCardTable::scan_objects_in_range(PSPromotionManager* pm,
-                                        HeapWord* start,
-                                        HeapWord* end) {
-  HeapWord* obj_addr = start;
-  while (obj_addr < end) {
-    oop obj = cast_to_oop(obj_addr);
-    assert(oopDesc::is_oop(obj), "inv");
-    prefetch_write(obj_addr);
-    pm->push_contents(obj);
-    obj_addr += obj->size();
-  }
-  pm->drain_stacks_cond_depth();
-}
-
 // We get passed the space_top value to prevent us from traversing into
 // the old_gen promotion labs, which cannot be safely parsed.
 
 // Do not call this method if the space is empty.
 // It is a waste to start tasks and get here only to
-// do no work. This method is just a no-op if space_top == sp->bottom().
+// do no work.  If this method needs to be called
+// when the space is empty, fix the calculation of
+// end_card to allow sp_top == sp->bottom().
 
 // The generation (old gen) is divided into slices, which are further
 // subdivided into stripes, with one stripe per GC thread. The size of
-// a stripe is a constant, num_cards_in_stripe.
+// a stripe is a constant, ssize.
 //
 //      +===============+        slice 0
 //      |  stripe 0     |
@@ -222,106 +152,188 @@ void PSCardTable::scan_objects_in_range(PSPromotionManager* pm,
 // In this case there are 4 threads, so 4 stripes.  A GC thread first works on
 // its stripe within slice 0 and then moves to its stripe in the next slice
 // until it has exceeded the top of the generation.  The distance to stripe in
-// the next slice is calculated based on the number of stripes. After finishing
-// stripe 0 in slice 0, the thread finds the stripe 0 in slice 1 by adding
-// slice_size_in_words to the start of stripe 0 in slice 0 to get to the start
-// of stripe 0 in slice 1.
+// the next slice is calculated based on the number of stripes.  The next
+// stripe is at ssize * number_of_stripes (= slice_stride)..  So after
+// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1 by
+// adding slice_stride to the start of stripe 0 in slice 0 to get to the start
+// of stride 0 in slice 1.
 
 void PSCardTable::scavenge_contents_parallel(ObjectStartArray* start_array,
                                              MutableSpace* sp,
                                              HeapWord* space_top,
                                              PSPromotionManager* pm,
-                                             uint stripe_index,
-                                             uint n_stripes) {
-  const size_t num_cards_in_stripe = 128;
-  const size_t stripe_size_in_words = num_cards_in_stripe * _card_size_in_words;
-  const size_t slice_size_in_words = stripe_size_in_words * n_stripes;
-
-  HeapWord* cur_stripe_addr = sp->bottom() + stripe_index * stripe_size_in_words;
-
-  for (/* empty */; cur_stripe_addr < space_top; cur_stripe_addr += slice_size_in_words) {
-    // exclusive
-    HeapWord* const cur_stripe_end_addr = MIN2(cur_stripe_addr + stripe_size_in_words,
-                                               space_top);
-
-    // Process a stripe iff it contains any obj-start
-    if (!start_array->object_starts_in_range(cur_stripe_addr, cur_stripe_end_addr)) {
+                                             uint stripe_number,
+                                             uint stripe_total) {
+  int ssize = 128; // Naked constant!  Work unit = 64k.
+
+  // It is a waste to get here if empty.
+  assert(sp->bottom() < sp->top(), "Should not be called if empty");
+  oop* sp_top = (oop*)space_top;
+  CardValue* start_card = byte_for(sp->bottom());
+  CardValue* end_card   = byte_for(sp_top - 1) + 1;
+  oop* last_scanned = NULL; // Prevent scanning objects more than once
+  // The width of the stripe ssize*stripe_total must be
+  // consistent with the number of stripes so that the complete slice
+  // is covered.
+  size_t slice_width = ssize * stripe_total;
+  for (CardValue* slice = start_card; slice < end_card; slice += slice_width) {
+    CardValue* worker_start_card = slice + stripe_number * ssize;
+    if (worker_start_card >= end_card)
+      return; // We're done.
+
+    CardValue* worker_end_card = worker_start_card + ssize;
+    if (worker_end_card > end_card)
+      worker_end_card = end_card;
+
+    // We do not want to scan objects more than once. In order to accomplish
+    // this, we assert that any object with an object head inside our 'slice'
+    // belongs to us. We may need to extend the range of scanned cards if the
+    // last object continues into the next 'slice'.
+    //
+    // Note! ending cards are exclusive!
+    HeapWord* slice_start = addr_for(worker_start_card);
+    HeapWord* slice_end = MIN2((HeapWord*) sp_top, addr_for(worker_end_card));
+
+    // If there are not objects starting within the chunk, skip it.
+    if (!start_array->object_starts_in_range(slice_start, slice_end)) {
       continue;
     }
-
-    // Constraints:
-    // 1. range of cards checked for being dirty or clean: [iter_limit_l, iter_limit_r)
-    // 2. range of cards can be cleared: [clear_limit_l, clear_limit_r)
-    // 3. range of objs (obj-start) can be scanned: [first_obj_addr, cur_stripe_end_addr)
-
-    CardValue* iter_limit_l;
-    CardValue* iter_limit_r;
-    CardValue* clear_limit_l;
-    CardValue* clear_limit_r;
-
-    // Identify left ends and the first obj-start inside this stripe.
-    HeapWord* first_obj_addr = start_array->object_start(cur_stripe_addr);
-    if (first_obj_addr < cur_stripe_addr) {
-      // this obj belongs to previous stripe; can't clear any cards it occupies
-      first_obj_addr += cast_to_oop(first_obj_addr)->size();
-      clear_limit_l = byte_for(first_obj_addr - 1) + 1;
-      iter_limit_l = byte_for(first_obj_addr);
-    } else {
-      assert(first_obj_addr == cur_stripe_addr, "inv");
-      iter_limit_l = clear_limit_l = byte_for(cur_stripe_addr);
+    // Update our beginning addr
+    HeapWord* first_object = start_array->object_start(slice_start);
+    debug_only(oop* first_object_within_slice = (oop*) first_object;)
+    if (first_object < slice_start) {
+      last_scanned = (oop*)(first_object + cast_to_oop(first_object)->size());
+      debug_only(first_object_within_slice = last_scanned;)
+      worker_start_card = byte_for(last_scanned);
     }
 
-    assert(cur_stripe_addr <= first_obj_addr, "inside this stripe");
-    assert(first_obj_addr <= cur_stripe_end_addr, "can be empty");
-
-    {
-      // Identify right ends.
-      HeapWord* obj_addr = start_array->object_start(cur_stripe_end_addr - 1);
-      HeapWord* obj_end_addr = obj_addr + cast_to_oop(obj_addr)->size();
-      assert(obj_end_addr >= cur_stripe_end_addr, "inv");
-      clear_limit_r = byte_for(obj_end_addr);
-      iter_limit_r = byte_for(obj_end_addr - 1) + 1;
+    // Update the ending addr
+    if (slice_end < (HeapWord*)sp_top) {
+      // The subtraction is important! An object may start precisely at slice_end.
+      HeapWord* last_object = start_array->object_start(slice_end - 1);
+      slice_end = last_object + cast_to_oop(last_object)->size();
+      // worker_end_card is exclusive, so bump it one past the end of last_object's
+      // covered span.
+      worker_end_card = byte_for(slice_end) + 1;
+
+      if (worker_end_card > end_card)
+        worker_end_card = end_card;
     }
 
-    assert(iter_limit_l <= clear_limit_l &&
-           clear_limit_r <= iter_limit_r, "clear cards only if we iterate over them");
-
-    // Process dirty chunks, i.e. consecutive dirty cards [dirty_l, dirty_r),
-    // chunk by chunk inside [iter_limit_l, iter_limit_r).
-    CardValue* dirty_l;
-    CardValue* dirty_r;
-
-    for (CardValue* cur_card = iter_limit_l; cur_card < iter_limit_r; cur_card = dirty_r + 1) {
-      dirty_l = find_first_dirty_card(cur_card, iter_limit_r);
-      dirty_r = find_first_clean_card(start_array, dirty_l, iter_limit_r);
-      assert(dirty_l <= dirty_r, "inv");
-
-      // empty
-      if (dirty_l == dirty_r) {
-        assert(dirty_r == iter_limit_r, "no more dirty cards in this stripe");
-        break;
+    assert(slice_end <= (HeapWord*)sp_top, "Last object in slice crosses space boundary");
+    assert(is_valid_card_address(worker_start_card), "Invalid worker start card");
+    assert(is_valid_card_address(worker_end_card), "Invalid worker end card");
+    // Note that worker_start_card >= worker_end_card is legal, and happens when
+    // an object spans an entire slice.
+    assert(worker_start_card <= end_card, "worker start card beyond end card");
+    assert(worker_end_card <= end_card, "worker end card beyond end card");
+
+    CardValue* current_card = worker_start_card;
+    while (current_card < worker_end_card) {
+      // Find an unclean card.
+      while (current_card < worker_end_card && card_is_clean(*current_card)) {
+        current_card++;
       }
-
-      assert(*dirty_l != clean_card, "inv");
-      assert(*dirty_r == clean_card || dirty_r == iter_limit_r, "inv");
-
-      // Process this non-empty dirty chunk in two steps:
-      {
-        // 1. Clear card in [dirty_l, dirty_r) subject to [clear_limit_l, clear_limit_r) constraint
-        clear_cards(MAX2(dirty_l, clear_limit_l),
-                    MIN2(dirty_r, clear_limit_r));
+      CardValue* first_unclean_card = current_card;
+
+      // Find the end of a run of contiguous unclean cards
+      while (current_card < worker_end_card && !card_is_clean(*current_card)) {
+        while (current_card < worker_end_card && !card_is_clean(*current_card)) {
+          current_card++;
+        }
+
+        if (current_card < worker_end_card) {
+          // Some objects may be large enough to span several cards. If such
+          // an object has more than one dirty card, separated by a clean card,
+          // we will attempt to scan it twice. The test against "last_scanned"
+          // prevents the redundant object scan, but it does not prevent newly
+          // marked cards from being cleaned.
+          HeapWord* last_object_in_dirty_region = start_array->object_start(addr_for(current_card)-1);
+          size_t size_of_last_object = cast_to_oop(last_object_in_dirty_region)->size();
+          HeapWord* end_of_last_object = last_object_in_dirty_region + size_of_last_object;
+          CardValue* ending_card_of_last_object = byte_for(end_of_last_object);
+          assert(ending_card_of_last_object <= worker_end_card, "ending_card_of_last_object is greater than worker_end_card");
+          if (ending_card_of_last_object > current_card) {
+            // This means the object spans the next complete card.
+            // We need to bump the current_card to ending_card_of_last_object
+            current_card = ending_card_of_last_object;
+          }
+        }
       }
-
-      {
-        // 2. Scan objs in [dirty_l, dirty_r) subject to [first_obj_addr, cur_stripe_end_addr) constraint
-        HeapWord* obj_l = MAX2(start_array->object_start(addr_for(dirty_l)),
-                               first_obj_addr);
-
-        HeapWord* obj_r = MIN2(addr_for(dirty_r),
-                               cur_stripe_end_addr);
-
-        scan_objects_in_range(pm, obj_l, obj_r);
+      CardValue* following_clean_card = current_card;
+
+      if (first_unclean_card < worker_end_card) {
+        oop* p = (oop*) start_array->object_start(addr_for(first_unclean_card));
+        assert((HeapWord*)p <= addr_for(first_unclean_card), "checking");
+        // "p" should always be >= "last_scanned" because newly GC dirtied
+        // cards are no longer scanned again (see comment at end
+        // of loop on the increment of "current_card").  Test that
+        // hypothesis before removing this code.
+        // If this code is removed, deal with the first time through
+        // the loop when the last_scanned is the object starting in
+        // the previous slice.
+        assert((p >= last_scanned) ||
+               (last_scanned == first_object_within_slice),
+               "Should no longer be possible");
+        if (p < last_scanned) {
+          // Avoid scanning more than once; this can happen because
+          // newgen cards set by GC may a different set than the
+          // originally dirty set
+          p = last_scanned;
+        }
+        oop* to = (oop*)addr_for(following_clean_card);
+
+        // Test slice_end first!
+        if ((HeapWord*)to > slice_end) {
+          to = (oop*)slice_end;
+        } else if (to > sp_top) {
+          to = sp_top;
+        }
+
+        // we know which cards to scan, now clear them
+        if (first_unclean_card <= worker_start_card+1)
+          first_unclean_card = worker_start_card+1;
+        if (following_clean_card >= worker_end_card-1)
+          following_clean_card = worker_end_card-1;
+
+        while (first_unclean_card < following_clean_card) {
+          *first_unclean_card++ = clean_card;
+        }
+
+        const int interval = PrefetchScanIntervalInBytes;
+        // scan all objects in the range
+        if (interval != 0) {
+          while (p < to) {
+            Prefetch::write(p, interval);
+            oop m = cast_to_oop(p);
+            assert(oopDesc::is_oop_or_null(m), "Expected an oop or NULL for header field at " PTR_FORMAT, p2i(m));
+            pm->push_contents(m);
+            p += m->size();
+          }
+          pm->drain_stacks_cond_depth();
+        } else {
+          while (p < to) {
+            oop m = cast_to_oop(p);
+            assert(oopDesc::is_oop_or_null(m), "Expected an oop or NULL for header field at " PTR_FORMAT, p2i(m));
+            pm->push_contents(m);
+            p += m->size();
+          }
+          pm->drain_stacks_cond_depth();
+        }
+        last_scanned = p;
       }
+      // "current_card" is still the "following_clean_card" or
+      // the current_card is >= the worker_end_card so the
+      // loop will not execute again.
+      assert((current_card == following_clean_card) ||
+             (current_card >= worker_end_card),
+        "current_card should only be incremented if it still equals "
+        "following_clean_card");
+      // Increment current_card so that it is not processed again.
+      // It may now be dirty because a old-to-young pointer was
+      // found on it an updated.  If it is now dirty, it cannot be
+      // be safely cleaned in the next iteration.
+      current_card++;
     }
   }
 }
diff --git a/src/hotspot/share/gc/parallel/psCardTable.hpp b/src/hotspot/share/gc/parallel/psCardTable.hpp
@@ -42,19 +42,6 @@ class PSCardTable: public CardTable {
     verify_card       = CT_MR_BS_last_reserved + 5
   };
 
-  CardValue* find_first_dirty_card(CardValue* const start_card,
-                                   CardValue* const end_card);
-
-  CardValue* find_first_clean_card(ObjectStartArray* start_array,
-                                   CardValue* const start_card,
-                                   CardValue* const end_card);
-
-  void clear_cards(CardValue* const start, CardValue* const end);
-
-  void scan_objects_in_range(PSPromotionManager* pm,
-                             HeapWord* start,
-                             HeapWord* end);
-
  public:
   PSCardTable(MemRegion whole_heap) : CardTable(whole_heap) {}
 
@@ -66,8 +53,8 @@ class PSCardTable: public CardTable {
                                   MutableSpace* sp,
                                   HeapWord* space_top,
                                   PSPromotionManager* pm,
-                                  uint stripe_index,
-                                  uint n_stripes);
+                                  uint stripe_number,
+                                  uint stripe_total);
 
   bool addr_is_marked_imprecise(void *addr);
   bool addr_is_marked_precise(void *addr);