From 8d6b6f71a6fa2501fb76ee72b650168b2549dc53 Mon Sep 17 00:00:00 2001
From: Florence Morris <florence@cockroachlabs.com>
Date: Wed, 29 Oct 2025 15:08:26 -0400
Subject: [PATCH] In available-metrics-in-metrics-list.csv, added 8
 sql.routine.* metrics.

Updated metrics.yaml, ran `./cockroach gen metric-list > metrics.yaml` with v25.4.0-rc.1 binary.

In v25.3/essential-metrics.md and v25.4/essential-metrics.md, fixed bug where it mentioned self-hosted on Advanced page.

In v25.4/essential-metrics.md, added sections for Physical Replication and Logical Replication.
---
 .../available-metrics-in-metrics-list.csv     |    8 +
 src/current/_data/v25.4/metrics/metrics.yaml  | 1630 +++++++++++------
 .../_includes/v25.3/essential-metrics.md      |    6 +-
 .../_includes/v25.4/essential-metrics.md      |   14 +-
 4 files changed, 1047 insertions(+), 611 deletions(-)

diff --git a/src/current/_data/v25.4/metrics/available-metrics-in-metrics-list.csv b/src/current/_data/v25.4/metrics/available-metrics-in-metrics-list.csv
index c9f2c0e5c52..9d3adad338f 100644
--- a/src/current/_data/v25.4/metrics/available-metrics-in-metrics-list.csv
+++ b/src/current/_data/v25.4/metrics/available-metrics-in-metrics-list.csv
@@ -483,3 +483,11 @@ rebalancing.range.rebalances
 rebalancing.replicas.cpunanospersecond
 rebalancing.replicas.queriespersecond
 rebalancing.state.imbalanced_overfull_options_exhausted
+sql.routine.delete.count
+sql.routine.delete.started.count
+sql.routine.insert.count
+sql.routine.insert.started.count
+sql.routine.select.count
+sql.routine.select.started.count
+sql.routine.update.count
+sql.routine.update.started.count
diff --git a/src/current/_data/v25.4/metrics/metrics.yaml b/src/current/_data/v25.4/metrics/metrics.yaml
index 7fe4416b4ae..fb0a9bb3ae6 100644
--- a/src/current/_data/v25.4/metrics/metrics.yaml
+++ b/src/current/_data/v25.4/metrics/metrics.yaml
@@ -85,6 +85,28 @@ layers:
       derivative: NONE
       how_to_use: Changefeeds use protected timestamps to protect the data from being garbage collected. Ensure the protected timestamp age does not significantly exceed the GC TTL zone configuration. Alert on this metric if the protected timestamp age is greater than 3 times the GC TTL.
       essential: true
+  - name: CROSS_CLUSTER_REPLICATION
+    metrics:
+    - name: physical_replication.logical_bytes
+      exported_name: physical_replication_logical_bytes
+      description: Logical bytes (sum of keys + values) ingested by all replication jobs
+      y_axis_label: Bytes
+      type: COUNTER
+      unit: BYTES
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: Track PCR throughput
+      essential: true
+    - name: physical_replication.replicated_time_seconds
+      exported_name: physical_replication_replicated_time_seconds
+      description: The replicated time of the physical replication stream in seconds since the unix epoch.
+      y_axis_label: Seconds
+      type: GAUGE
+      unit: SECONDS
+      aggregation: AVG
+      derivative: NONE
+      how_to_use: Track replication lag via current time - physical_replication.replicated_time_seconds
+      essential: true
   - name: DISTRIBUTED
     metrics:
     - name: distsender.errors.notleaseholder
@@ -107,6 +129,58 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
       how_to_use: RPC errors do not necessarily indicate a problem. This metric tracks remote procedure calls that return a status value other than "success". A non-success status of an RPC should not be misconstrued as a network transport issue. It is database code logic executed on another cluster node. The non-success status is a result of an orderly execution of an RPC that reports a specific logical condition.
       essential: true
+  - name: LOGICAL_DATA_REPLICATION
+    metrics:
+    - name: logical_replication.commit_latency
+      exported_name: logical_replication_commit_latency
+      description: 'Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded'
+      y_axis_label: Nanoseconds
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+      how_to_use: track the latency of of applying events from source to destination
+      essential: true
+    - name: logical_replication.events_dlqed
+      exported_name: logical_replication_events_dlqed
+      description: Row update events sent to DLQ
+      y_axis_label: Failures
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: track events sent to the dead letter queue
+      essential: true
+    - name: logical_replication.events_ingested
+      exported_name: logical_replication_events_ingested
+      description: Events ingested by all replication jobs
+      y_axis_label: Events
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: track events (e.g. updates, deletes, inserts) ingested
+      essential: true
+    - name: logical_replication.logical_bytes
+      exported_name: logical_replication_logical_bytes
+      description: Logical bytes (sum of keys + values) received by all replication jobs
+      y_axis_label: Bytes
+      type: COUNTER
+      unit: BYTES
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: track logical data replication throughput
+      essential: true
+    - name: logical_replication.replicated_time_seconds
+      exported_name: logical_replication_replicated_time_seconds
+      description: The replicated time of the logical replication stream in seconds since the unix epoch.
+      y_axis_label: Seconds
+      type: GAUGE
+      unit: SECONDS
+      aggregation: AVG
+      derivative: NONE
+      how_to_use: Track replication lag via current time - logical_replication.replicated_time_seconds
+      essential: true
   - name: NETWORKING
     metrics:
     - name: clock-offset.meannanos
@@ -124,6 +198,10 @@ layers:
       description: |
         Sum of exponentially weighted moving average of round-trip latencies, as measured through a gRPC RPC.
 
+        Since this metric is based on gRPC RPCs, it is affected by application-level
+        processing delays and CPU overload effects. See rpc.connection.tcp_rtt for a
+        metric that is obtained from the kernel's TCP stack.
+
         Dividing this Gauge by rpc.connection.healthy gives an approximation of average
         latency, but the top-level round-trip-latency histogram is more useful. Instead,
         users should consult the label families of this metric if they are available
@@ -191,6 +269,40 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
       how_to_use: See Description.
       essential: true
+    - name: rpc.connection.tcp_rtt
+      exported_name: rpc_connection_tcp_rtt
+      description: |
+        Kernel-level TCP round-trip time as measured by the Linux TCP stack.
+
+        This metric reports the smoothed round-trip time (SRTT) as maintained by the
+        kernel's TCP implementation. Unlike application-level RPC latency measurements,
+        this reflects pure network latency and is less affected by CPU overload effects.
+
+        This metric is only available on Linux.
+      y_axis_label: Latency
+      type: GAUGE
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+      how_to_use: High TCP RTT values indicate network issues outside of CockroachDB that could be impacting the user's workload.
+      essential: true
+    - name: rpc.connection.tcp_rtt_var
+      exported_name: rpc_connection_tcp_rtt_var
+      description: |
+        Kernel-level TCP round-trip time variance as measured by the Linux TCP stack.
+
+        This metric reports the smoothed round-trip time variance (RTTVAR) as maintained
+        by the kernel's TCP implementation. This measures the stability of the
+        connection latency.
+
+        This metric is only available on Linux.
+      y_axis_label: Latency Variance
+      type: GAUGE
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+      how_to_use: High TCP RTT variance values indicate network stability issues outside of CockroachDB that could be impacting the user's workload.
+      essential: true
     - name: rpc.connection.unhealthy
       exported_name: rpc_connection_unhealthy
       description: Gauge of current connections in an unhealthy state (not bidirectionally connected or heartbeating)
@@ -218,6 +330,39 @@ layers:
       essential: true
   - name: SQL
     metrics:
+    - name: jobs.auto_create_partial_stats.currently_paused
+      exported_name: jobs_auto_create_partial_stats_currently_paused
+      labeled_name: 'jobs{name: auto_create_partial_stats, status: currently_paused}'
+      description: Number of auto_create_partial_stats jobs currently considered Paused
+      y_axis_label: jobs
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
+      how_to_use: This metric is a high-level indicator that automatically generated partial statistics jobs are paused which can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance.
+      essential: true
+    - name: jobs.auto_create_partial_stats.currently_running
+      exported_name: jobs_auto_create_partial_stats_currently_running
+      labeled_name: 'jobs{type: auto_create_partial_stats, status: currently_running}'
+      description: Number of auto_create_partial_stats jobs currently running in Resume or OnFailOrCancel state
+      y_axis_label: jobs
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
+      how_to_use: This metric tracks the number of active automatically generated partial statistics jobs that could also be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics.
+      essential: true
+    - name: jobs.auto_create_partial_stats.resume_failed
+      exported_name: jobs_auto_create_partial_stats_resume_failed
+      labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: failed}'
+      description: Number of auto_create_partial_stats jobs which failed with a non-retriable error
+      y_axis_label: jobs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This metric is a high-level indicator that automatically generated partial table statistics is failing. Failed statistic creation can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance.
+      essential: true
     - name: jobs.auto_create_stats.currently_paused
       exported_name: jobs_auto_create_stats_currently_paused
       labeled_name: 'jobs{name: auto_create_stats, status: currently_paused}'
@@ -282,7 +427,7 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
-      how_to_use: This metric tracks the number of active create statistics jobs that may be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics.
+      how_to_use: This metric tracks the number of active create statistics jobs that could also be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics.
       essential: true
     - name: schedules.BACKUP.failed
       exported_name: schedules_BACKUP_failed
@@ -373,6 +518,25 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.delete.started.count
+      exported_name: sql_delete_started_count
+      labeled_name: 'sql.started.count{query_type: delete}'
+      description: Number of SQL DELETE statements started
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.delete.started.count.internal
+      exported_name: sql_delete_started_count_internal
+      labeled_name: 'sql.started.count{query_type: delete, query_internal: true}'
+      description: Number of SQL DELETE statements started (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.distsql.contended_queries.count
       exported_name: sql_distsql_contended_queries_count
       description: Number of SQL queries that experienced contention
@@ -439,6 +603,25 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.insert.started.count
+      exported_name: sql_insert_started_count
+      labeled_name: 'sql.started.count{query_type: insert}'
+      description: Number of SQL INSERT statements started
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.insert.started.count.internal
+      exported_name: sql_insert_started_count_internal
+      labeled_name: 'sql.started.count{query_type: insert, query_internal: true}'
+      description: Number of SQL INSERT statements started (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.mem.root.current
       exported_name: sql_mem_root_current
       description: Current sql statement memory usage for root
@@ -459,6 +642,162 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
       how_to_use: The rate of this metric shows how frequently new connections are being established. This can be useful in determining if a high rate of incoming new connections is causing additional load on the server due to a misconfigured application.
       essential: true
+    - name: sql.routine.delete.count
+      exported_name: sql_routine_delete_count
+      labeled_name: 'sql.count{query_type: routine-delete}'
+      description: Number of SQL DELETE statements successfully executed within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+      essential: true
+    - name: sql.routine.delete.count.internal
+      exported_name: sql_routine_delete_count_internal
+      labeled_name: 'sql.count{query_type: routine-delete, query_internal: true}'
+      description: Number of SQL DELETE statements successfully executed within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.routine.delete.started.count
+      exported_name: sql_routine_delete_started_count
+      labeled_name: 'sql.started.count{query_type: routine-started-delete}'
+      description: Number of SQL DELETE statements started within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.routine.delete.started.count.internal
+      exported_name: sql_routine_delete_started_count_internal
+      labeled_name: 'sql.started.count{query_type: routine-started-delete, query_internal: true}'
+      description: Number of SQL DELETE statements started within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.routine.insert.count
+      exported_name: sql_routine_insert_count
+      labeled_name: 'sql.count{query_type: routine-insert}'
+      description: Number of SQL INSERT statements successfully executed within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+      essential: true
+    - name: sql.routine.insert.count.internal
+      exported_name: sql_routine_insert_count_internal
+      labeled_name: 'sql.count{query_type: routine-insert, query_internal: true}'
+      description: Number of SQL INSERT statements successfully executed within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.routine.insert.started.count
+      exported_name: sql_routine_insert_started_count
+      labeled_name: 'sql.started.count{query_type: routine-started-insert}'
+      description: Number of SQL INSERT statements started within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.routine.insert.started.count.internal
+      exported_name: sql_routine_insert_started_count_internal
+      labeled_name: 'sql.started.count{query_type: routine-started-insert, query_internal: true}'
+      description: Number of SQL INSERT statements started within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.routine.select.count
+      exported_name: sql_routine_select_count
+      labeled_name: 'sql.count{query_type: routine-select}'
+      description: Number of SQL SELECT statements successfully executed within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+      essential: true
+    - name: sql.routine.select.count.internal
+      exported_name: sql_routine_select_count_internal
+      labeled_name: 'sql.count{query_type: routine-select, query_internal: true}'
+      description: Number of SQL SELECT statements successfully executed within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.routine.select.started.count
+      exported_name: sql_routine_select_started_count
+      labeled_name: 'sql.started.count{query_type: routine-started-select}'
+      description: Number of SQL SELECT statements started within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.routine.select.started.count.internal
+      exported_name: sql_routine_select_started_count_internal
+      labeled_name: 'sql.started.count{query_type: routine-started-select, query_internal: true}'
+      description: Number of SQL SELECT statements started within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.routine.update.count
+      exported_name: sql_routine_update_count
+      labeled_name: 'sql.count{query_type: routine-update}'
+      description: Number of SQL UPDATE statements successfully executed within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+      essential: true
+    - name: sql.routine.update.count.internal
+      exported_name: sql_routine_update_count_internal
+      labeled_name: 'sql.count{query_type: routine-update, query_internal: true}'
+      description: Number of SQL UPDATE statements successfully executed within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.routine.update.started.count
+      exported_name: sql_routine_update_started_count
+      labeled_name: 'sql.started.count{query_type: routine-started-update}'
+      description: Number of SQL UPDATE statements started within routine invocation
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.routine.update.started.count.internal
+      exported_name: sql_routine_update_started_count_internal
+      labeled_name: 'sql.started.count{query_type: routine-started-update, query_internal: true}'
+      description: Number of SQL UPDATE statements started within routine invocation (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.select.count
       exported_name: sql_select_count
       labeled_name: 'sql.count{query_type: select}'
@@ -479,6 +818,25 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.select.started.count
+      exported_name: sql_select_started_count
+      labeled_name: 'sql.started.count{query_type: select}'
+      description: Number of SQL SELECT statements started
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.select.started.count.internal
+      exported_name: sql_select_started_count_internal
+      labeled_name: 'sql.started.count{query_type: select, query_internal: true}'
+      description: Number of SQL SELECT statements started (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.service.latency
       exported_name: sql_service_latency
       description: Latency of SQL request execution
@@ -643,6 +1001,25 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.update.started.count
+      exported_name: sql_update_started_count
+      labeled_name: 'sql.started.count{query_type: update}'
+      description: Number of SQL UPDATE statements started
+      y_axis_label: SQL Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.
+    - name: sql.update.started.count.internal
+      exported_name: sql_update_started_count_internal
+      labeled_name: 'sql.started.count{query_type: update, query_internal: true}'
+      description: Number of SQL UPDATE statements started (internal queries)
+      y_axis_label: SQL Internal Statements
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: txn.restarts.serializable
       exported_name: txn_restarts_serializable
       description: Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE
@@ -864,6 +1241,14 @@ layers:
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
+    - name: auth.ldap.conn.latency.internal
+      exported_name: auth_ldap_conn_latency_internal
+      description: Internal Auth Latency to establish and authenticate a SQL connection using LDAP(excludes external LDAP calls)
+      y_axis_label: Nanoseconds
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
     - name: auth.password.conn.latency
       exported_name: auth_password_conn_latency
       description: Latency to establish and authenticate a SQL connection using password
@@ -1211,7 +1596,7 @@ layers:
     - name: changefeed.checkpoint_hist_nanos
       exported_name: changefeed_checkpoint_hist_nanos
       description: Time spent checkpointing changefeed progress
-      y_axis_label: Changefeeds
+      y_axis_label: Nanoseconds
       type: HISTOGRAM
       unit: NANOSECONDS
       aggregation: AVG
@@ -1416,6 +1801,22 @@ layers:
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
+    - name: changefeed.progress_skew.span
+      exported_name: changefeed_progress_skew_span
+      description: The time difference between the fastest and slowest span's resolved timestamp
+      y_axis_label: Nanoseconds
+      type: GAUGE
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+    - name: changefeed.progress_skew.table
+      exported_name: changefeed_progress_skew_table
+      description: The time difference between the fastest and slowest table's resolved timestamp
+      y_axis_label: Nanoseconds
+      type: GAUGE
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
     - name: changefeed.queue_time_nanos
       exported_name: changefeed_queue_time_nanos
       description: Time KV event spent waiting to be processed
@@ -1456,6 +1857,14 @@ layers:
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: changefeed.sink_backpressure_nanos
+      exported_name: changefeed_sink_backpressure_nanos
+      description: Time spent waiting for quota when emitting to the sink (back-pressure). Only populated for sinks using the batching_sink wrapper. As of writing, this includes Kafka (v2), Pub/Sub (v2), and Webhook (v2).
+      y_axis_label: Nanoseconds
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
     - name: changefeed.sink_batch_hist_nanos
       exported_name: changefeed_sink_batch_hist_nanos
       description: Time spent batched in the sink buffer before being flushed and acknowledged
@@ -1520,6 +1929,14 @@ layers:
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
+    - name: changefeed.stage.frontier_persistence.latency
+      exported_name: changefeed_stage_frontier_persistence_latency
+      description: 'Latency of the changefeed stage: persisting frontier to job info'
+      y_axis_label: Latency
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
     - name: changefeed.stage.kv_feed_buffer.latency
       exported_name: changefeed_stage_kv_feed_buffer_latency
       description: 'Latency of the changefeed stage: waiting to buffer kv events'
@@ -1536,6 +1953,30 @@ layers:
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
+    - name: changefeed.stage.pts.create.latency
+      exported_name: changefeed_stage_pts_create_latency
+      description: 'Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation'
+      y_axis_label: Latency
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+    - name: changefeed.stage.pts.manage.latency
+      exported_name: changefeed_stage_pts_manage_latency
+      description: 'Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed'
+      y_axis_label: Latency
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+    - name: changefeed.stage.pts.manage_error.latency
+      exported_name: changefeed_stage_pts_manage_error_latency
+      description: 'Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error'
+      y_axis_label: Latency
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
     - name: changefeed.stage.rangefeed_buffer_checkpoint.latency
       exported_name: changefeed_stage_rangefeed_buffer_checkpoint_latency
       description: 'Latency of the changefeed stage: buffering rangefeed checkpoint events'
@@ -2956,6 +3397,18 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: distsender.rpc.flushlocktable.sent
+      exported_name: distsender_rpc_flushlocktable_sent
+      description: |-
+        Number of FlushLockTable requests processed.
+
+        This counts the requests in batches handed to DistSender, not the RPCs
+        sent to individual Ranges as a result.
+      y_axis_label: RPCs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: distsender.rpc.gc.sent
       exported_name: distsender_rpc_gc_sent
       description: |-
@@ -3443,15 +3896,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_config_env_runner.fail_or_cancel_failed
-      exported_name: jobs_auto_config_env_runner_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: failed}'
-      description: Number of auto_config_env_runner jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_config_env_runner.fail_or_cancel_retry_error
       exported_name: jobs_auto_config_env_runner_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: retry_error}'
@@ -3551,15 +3995,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_config_runner.fail_or_cancel_failed
-      exported_name: jobs_auto_config_runner_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: failed}'
-      description: Number of auto_config_runner jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_config_runner.fail_or_cancel_retry_error
       exported_name: jobs_auto_config_runner_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: retry_error}'
@@ -3659,15 +4094,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_config_task.fail_or_cancel_failed
-      exported_name: jobs_auto_config_task_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: failed}'
-      description: Number of auto_config_task jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_config_task.fail_or_cancel_retry_error
       exported_name: jobs_auto_config_task_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: retry_error}'
@@ -3731,33 +4157,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
-    - name: jobs.auto_create_partial_stats.currently_paused
-      exported_name: jobs_auto_create_partial_stats_currently_paused
-      labeled_name: 'jobs{name: auto_create_partial_stats, status: currently_paused}'
-      description: Number of auto_create_partial_stats jobs currently considered Paused
-      y_axis_label: jobs
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
-    - name: jobs.auto_create_partial_stats.currently_running
-      exported_name: jobs_auto_create_partial_stats_currently_running
-      labeled_name: 'jobs{type: auto_create_partial_stats, status: currently_running}'
-      description: Number of auto_create_partial_stats jobs currently running in Resume or OnFailOrCancel state
-      y_axis_label: jobs
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
-    - name: jobs.auto_create_partial_stats.expired_pts_records
-      exported_name: jobs_auto_create_partial_stats_expired_pts_records
-      labeled_name: 'jobs.expired_pts_records{type: auto_create_partial_stats}'
-      description: Number of expired protected timestamp records owned by auto_create_partial_stats jobs
-      y_axis_label: records
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_create_partial_stats.fail_or_cancel_completed
       exported_name: jobs_auto_create_partial_stats_fail_or_cancel_completed
       labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: completed}'
@@ -3767,15 +4166,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_create_partial_stats.fail_or_cancel_failed
-      exported_name: jobs_auto_create_partial_stats_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: failed}'
-      description: Number of auto_create_partial_stats jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_create_partial_stats.fail_or_cancel_retry_error
       exported_name: jobs_auto_create_partial_stats_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: retry_error}'
@@ -3785,24 +4175,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_create_partial_stats.protected_age_sec
-      exported_name: jobs_auto_create_partial_stats_protected_age_sec
-      labeled_name: 'jobs.protected_age_sec{type: auto_create_partial_stats}'
-      description: The age of the oldest PTS record protected by auto_create_partial_stats jobs
-      y_axis_label: seconds
-      type: GAUGE
-      unit: SECONDS
-      aggregation: AVG
-      derivative: NONE
-    - name: jobs.auto_create_partial_stats.protected_record_count
-      exported_name: jobs_auto_create_partial_stats_protected_record_count
-      labeled_name: 'jobs.protected_record_count{type: auto_create_partial_stats}'
-      description: Number of protected timestamp records held by auto_create_partial_stats jobs
-      y_axis_label: records
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
     - name: jobs.auto_create_partial_stats.resume_completed
       exported_name: jobs_auto_create_partial_stats_resume_completed
       labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: completed}'
@@ -3812,15 +4184,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_create_partial_stats.resume_failed
-      exported_name: jobs_auto_create_partial_stats_resume_failed
-      labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: failed}'
-      description: Number of auto_create_partial_stats jobs which failed with a non-retriable error
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_create_partial_stats.resume_retry_error
       exported_name: jobs_auto_create_partial_stats_resume_retry_error
       labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: retry_error}'
@@ -3839,15 +4202,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
-    - name: jobs.auto_create_stats.expired_pts_records
-      exported_name: jobs_auto_create_stats_expired_pts_records
-      labeled_name: 'jobs.expired_pts_records{type: auto_create_stats}'
-      description: Number of expired protected timestamp records owned by auto_create_stats jobs
-      y_axis_label: records
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_create_stats.fail_or_cancel_completed
       exported_name: jobs_auto_create_stats_fail_or_cancel_completed
       labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: completed}'
@@ -3857,15 +4211,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_create_stats.fail_or_cancel_failed
-      exported_name: jobs_auto_create_stats_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: failed}'
-      description: Number of auto_create_stats jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_create_stats.fail_or_cancel_retry_error
       exported_name: jobs_auto_create_stats_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: retry_error}'
@@ -3875,24 +4220,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_create_stats.protected_age_sec
-      exported_name: jobs_auto_create_stats_protected_age_sec
-      labeled_name: 'jobs.protected_age_sec{type: auto_create_stats}'
-      description: The age of the oldest PTS record protected by auto_create_stats jobs
-      y_axis_label: seconds
-      type: GAUGE
-      unit: SECONDS
-      aggregation: AVG
-      derivative: NONE
-    - name: jobs.auto_create_stats.protected_record_count
-      exported_name: jobs_auto_create_stats_protected_record_count
-      labeled_name: 'jobs.protected_record_count{type: auto_create_stats}'
-      description: Number of protected timestamp records held by auto_create_stats jobs
-      y_axis_label: records
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
     - name: jobs.auto_create_stats.resume_completed
       exported_name: jobs_auto_create_stats_resume_completed
       labeled_name: 'jobs.resume{name: auto_create_stats, status: completed}'
@@ -3956,15 +4283,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_schema_telemetry.fail_or_cancel_failed
-      exported_name: jobs_auto_schema_telemetry_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: failed}'
-      description: Number of auto_schema_telemetry jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_schema_telemetry.fail_or_cancel_retry_error
       exported_name: jobs_auto_schema_telemetry_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: retry_error}'
@@ -4064,15 +4382,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_span_config_reconciliation.fail_or_cancel_failed
-      exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: failed}'
-      description: Number of auto_span_config_reconciliation jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_span_config_reconciliation.fail_or_cancel_retry_error
       exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: retry_error}'
@@ -4172,15 +4481,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_sql_stats_compaction.fail_or_cancel_failed
-      exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: failed}'
-      description: Number of auto_sql_stats_compaction jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_sql_stats_compaction.fail_or_cancel_retry_error
       exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: retry_error}'
@@ -4280,15 +4580,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.auto_update_sql_activity.fail_or_cancel_failed
-      exported_name: jobs_auto_update_sql_activity_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: failed}'
-      description: Number of auto_update_sql_activity jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.auto_update_sql_activity.fail_or_cancel_retry_error
       exported_name: jobs_auto_update_sql_activity_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: retry_error}'
@@ -4370,15 +4661,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.backup.fail_or_cancel_failed
-      exported_name: jobs_backup_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: backup, status: failed}'
-      description: Number of backup jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.backup.fail_or_cancel_retry_error
       exported_name: jobs_backup_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: backup, status: retry_error}'
@@ -4469,15 +4751,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.changefeed.fail_or_cancel_failed
-      exported_name: jobs_changefeed_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: failed}'
-      description: Number of changefeed jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.changefeed.fail_or_cancel_retry_error
       exported_name: jobs_changefeed_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: retry_error}'
@@ -4549,15 +4822,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
-    - name: jobs.create_stats.expired_pts_records
-      exported_name: jobs_create_stats_expired_pts_records
-      labeled_name: 'jobs.expired_pts_records{type: create_stats}'
-      description: Number of expired protected timestamp records owned by create_stats jobs
-      y_axis_label: records
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.create_stats.fail_or_cancel_completed
       exported_name: jobs_create_stats_fail_or_cancel_completed
       labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: completed}'
@@ -4567,15 +4831,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.create_stats.fail_or_cancel_failed
-      exported_name: jobs_create_stats_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: failed}'
-      description: Number of create_stats jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.create_stats.fail_or_cancel_retry_error
       exported_name: jobs_create_stats_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: retry_error}'
@@ -4585,24 +4840,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.create_stats.protected_age_sec
-      exported_name: jobs_create_stats_protected_age_sec
-      labeled_name: 'jobs.protected_age_sec{type: create_stats}'
-      description: The age of the oldest PTS record protected by create_stats jobs
-      y_axis_label: seconds
-      type: GAUGE
-      unit: SECONDS
-      aggregation: AVG
-      derivative: NONE
-    - name: jobs.create_stats.protected_record_count
-      exported_name: jobs_create_stats_protected_record_count
-      labeled_name: 'jobs.protected_record_count{type: create_stats}'
-      description: Number of protected timestamp records held by create_stats jobs
-      y_axis_label: records
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
     - name: jobs.create_stats.resume_completed
       exported_name: jobs_create_stats_resume_completed
       labeled_name: 'jobs.resume{name: create_stats, status: completed}'
@@ -4675,15 +4912,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.history_retention.fail_or_cancel_failed
-      exported_name: jobs_history_retention_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: failed}'
-      description: Number of history_retention jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.history_retention.fail_or_cancel_retry_error
       exported_name: jobs_history_retention_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: retry_error}'
@@ -4783,15 +5011,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.hot_ranges_logger.fail_or_cancel_failed
-      exported_name: jobs_hot_ranges_logger_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: failed}'
-      description: Number of hot_ranges_logger jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.hot_ranges_logger.fail_or_cancel_retry_error
       exported_name: jobs_hot_ranges_logger_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: retry_error}'
@@ -4891,15 +5110,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.import.fail_or_cancel_failed
-      exported_name: jobs_import_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: import, status: failed}'
-      description: Number of import jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.import.fail_or_cancel_retry_error
       exported_name: jobs_import_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: import, status: retry_error}'
@@ -4981,15 +5191,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
-    - name: jobs.import_rollback.expired_pts_records
-      exported_name: jobs_import_rollback_expired_pts_records
-      labeled_name: 'jobs.expired_pts_records{type: import_rollback}'
-      description: Number of expired protected timestamp records owned by import_rollback jobs
-      y_axis_label: records
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.import_rollback.fail_or_cancel_completed
       exported_name: jobs_import_rollback_fail_or_cancel_completed
       labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: completed}'
@@ -4999,69 +5200,165 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.import_rollback.fail_or_cancel_failed
-      exported_name: jobs_import_rollback_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: failed}'
-      description: Number of import_rollback jobs which failed with a non-retriable error on their failure or cancelation process
+    - name: jobs.import_rollback.fail_or_cancel_retry_error
+      exported_name: jobs_import_rollback_fail_or_cancel_retry_error
+      labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: retry_error}'
+      description: Number of import_rollback jobs which failed with a retriable error on their failure or cancelation process
+      y_axis_label: jobs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.import_rollback.resume_completed
+      exported_name: jobs_import_rollback_resume_completed
+      labeled_name: 'jobs.resume{name: import_rollback, status: completed}'
+      description: Number of import_rollback jobs which successfully resumed to completion
+      y_axis_label: jobs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.import_rollback.resume_failed
+      exported_name: jobs_import_rollback_resume_failed
+      labeled_name: 'jobs.resume{name: import_rollback, status: failed}'
+      description: Number of import_rollback jobs which failed with a non-retriable error
+      y_axis_label: jobs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.import_rollback.resume_retry_error
+      exported_name: jobs_import_rollback_resume_retry_error
+      labeled_name: 'jobs.resume{name: import_rollback, status: retry_error}'
+      description: Number of import_rollback jobs which failed with a retriable error
+      y_axis_label: jobs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.inspect.currently_idle
+      exported_name: jobs_inspect_currently_idle
+      labeled_name: 'jobs{type: inspect, status: currently_idle}'
+      description: Number of inspect jobs currently considered Idle and can be freely shut down
+      y_axis_label: jobs
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
+    - name: jobs.inspect.currently_paused
+      exported_name: jobs_inspect_currently_paused
+      labeled_name: 'jobs{name: inspect, status: currently_paused}'
+      description: Number of inspect jobs currently considered Paused
+      y_axis_label: jobs
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
+    - name: jobs.inspect.currently_running
+      exported_name: jobs_inspect_currently_running
+      labeled_name: 'jobs{type: inspect, status: currently_running}'
+      description: Number of inspect jobs currently running in Resume or OnFailOrCancel state
+      y_axis_label: jobs
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
+    - name: jobs.inspect.expired_pts_records
+      exported_name: jobs_inspect_expired_pts_records
+      labeled_name: 'jobs.expired_pts_records{type: inspect}'
+      description: Number of expired protected timestamp records owned by inspect jobs
+      y_axis_label: records
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.inspect.fail_or_cancel_completed
+      exported_name: jobs_inspect_fail_or_cancel_completed
+      labeled_name: 'jobs.fail_or_cancel{name: inspect, status: completed}'
+      description: Number of inspect jobs which successfully completed their failure or cancelation process
       y_axis_label: jobs
       type: COUNTER
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.import_rollback.fail_or_cancel_retry_error
-      exported_name: jobs_import_rollback_fail_or_cancel_retry_error
-      labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: retry_error}'
-      description: Number of import_rollback jobs which failed with a retriable error on their failure or cancelation process
+    - name: jobs.inspect.fail_or_cancel_retry_error
+      exported_name: jobs_inspect_fail_or_cancel_retry_error
+      labeled_name: 'jobs.fail_or_cancel{name: inspect, status: retry_error}'
+      description: Number of inspect jobs which failed with a retriable error on their failure or cancelation process
       y_axis_label: jobs
       type: COUNTER
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.import_rollback.protected_age_sec
-      exported_name: jobs_import_rollback_protected_age_sec
-      labeled_name: 'jobs.protected_age_sec{type: import_rollback}'
-      description: The age of the oldest PTS record protected by import_rollback jobs
+    - name: jobs.inspect.issues_found
+      exported_name: jobs_inspect_issues_found
+      description: Total count of issues found by INSPECT jobs
+      y_axis_label: Issues
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.inspect.protected_age_sec
+      exported_name: jobs_inspect_protected_age_sec
+      labeled_name: 'jobs.protected_age_sec{type: inspect}'
+      description: The age of the oldest PTS record protected by inspect jobs
       y_axis_label: seconds
       type: GAUGE
       unit: SECONDS
       aggregation: AVG
       derivative: NONE
-    - name: jobs.import_rollback.protected_record_count
-      exported_name: jobs_import_rollback_protected_record_count
-      labeled_name: 'jobs.protected_record_count{type: import_rollback}'
-      description: Number of protected timestamp records held by import_rollback jobs
+    - name: jobs.inspect.protected_record_count
+      exported_name: jobs_inspect_protected_record_count
+      labeled_name: 'jobs.protected_record_count{type: inspect}'
+      description: Number of protected timestamp records held by inspect jobs
       y_axis_label: records
       type: GAUGE
       unit: COUNT
       aggregation: AVG
       derivative: NONE
-    - name: jobs.import_rollback.resume_completed
-      exported_name: jobs_import_rollback_resume_completed
-      labeled_name: 'jobs.resume{name: import_rollback, status: completed}'
-      description: Number of import_rollback jobs which successfully resumed to completion
+    - name: jobs.inspect.resume_completed
+      exported_name: jobs_inspect_resume_completed
+      labeled_name: 'jobs.resume{name: inspect, status: completed}'
+      description: Number of inspect jobs which successfully resumed to completion
       y_axis_label: jobs
       type: COUNTER
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.import_rollback.resume_failed
-      exported_name: jobs_import_rollback_resume_failed
-      labeled_name: 'jobs.resume{name: import_rollback, status: failed}'
-      description: Number of import_rollback jobs which failed with a non-retriable error
+    - name: jobs.inspect.resume_failed
+      exported_name: jobs_inspect_resume_failed
+      labeled_name: 'jobs.resume{name: inspect, status: failed}'
+      description: Number of inspect jobs which failed with a non-retriable error
       y_axis_label: jobs
       type: COUNTER
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.import_rollback.resume_retry_error
-      exported_name: jobs_import_rollback_resume_retry_error
-      labeled_name: 'jobs.resume{name: import_rollback, status: retry_error}'
-      description: Number of import_rollback jobs which failed with a retriable error
+    - name: jobs.inspect.resume_retry_error
+      exported_name: jobs_inspect_resume_retry_error
+      labeled_name: 'jobs.resume{name: inspect, status: retry_error}'
+      description: Number of inspect jobs which failed with a retriable error
       y_axis_label: jobs
       type: COUNTER
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.inspect.runs
+      exported_name: jobs_inspect_runs
+      description: Number of INSPECT jobs executed
+      y_axis_label: Jobs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: jobs.inspect.runs_with_issues
+      exported_name: jobs_inspect_runs_with_issues
+      description: Number of INSPECT jobs that found at least one issue
+      y_axis_label: Jobs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.key_visualizer.currently_idle
       exported_name: jobs_key_visualizer_currently_idle
       labeled_name: 'jobs{type: key_visualizer, status: currently_idle}'
@@ -5107,15 +5404,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.key_visualizer.fail_or_cancel_failed
-      exported_name: jobs_key_visualizer_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: failed}'
-      description: Number of key_visualizer jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.key_visualizer.fail_or_cancel_retry_error
       exported_name: jobs_key_visualizer_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: retry_error}'
@@ -5215,15 +5503,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.logical_replication.fail_or_cancel_failed
-      exported_name: jobs_logical_replication_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: failed}'
-      description: Number of logical_replication jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.logical_replication.fail_or_cancel_retry_error
       exported_name: jobs_logical_replication_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: retry_error}'
@@ -5331,15 +5610,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.migration.fail_or_cancel_failed
-      exported_name: jobs_migration_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: migration, status: failed}'
-      description: Number of migration jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.migration.fail_or_cancel_retry_error
       exported_name: jobs_migration_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: migration, status: retry_error}'
@@ -5439,15 +5709,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.mvcc_statistics_update.fail_or_cancel_failed
-      exported_name: jobs_mvcc_statistics_update_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: failed}'
-      description: Number of mvcc_statistics_update jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.mvcc_statistics_update.fail_or_cancel_retry_error
       exported_name: jobs_mvcc_statistics_update_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: retry_error}'
@@ -5547,15 +5808,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.new_schema_change.fail_or_cancel_failed
-      exported_name: jobs_new_schema_change_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: failed}'
-      description: Number of new_schema_change jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.new_schema_change.fail_or_cancel_retry_error
       exported_name: jobs_new_schema_change_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: retry_error}'
@@ -5655,15 +5907,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.poll_jobs_stats.fail_or_cancel_failed
-      exported_name: jobs_poll_jobs_stats_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: failed}'
-      description: Number of poll_jobs_stats jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.poll_jobs_stats.fail_or_cancel_retry_error
       exported_name: jobs_poll_jobs_stats_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: retry_error}'
@@ -5763,15 +6006,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.replication_stream_ingestion.fail_or_cancel_failed
-      exported_name: jobs_replication_stream_ingestion_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: failed}'
-      description: Number of replication_stream_ingestion jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.replication_stream_ingestion.fail_or_cancel_retry_error
       exported_name: jobs_replication_stream_ingestion_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: retry_error}'
@@ -5871,15 +6105,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.replication_stream_producer.fail_or_cancel_failed
-      exported_name: jobs_replication_stream_producer_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: failed}'
-      description: Number of replication_stream_producer jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.replication_stream_producer.fail_or_cancel_retry_error
       exported_name: jobs_replication_stream_producer_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: retry_error}'
@@ -5979,15 +6204,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.restore.fail_or_cancel_failed
-      exported_name: jobs_restore_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: restore, status: failed}'
-      description: Number of restore jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.restore.fail_or_cancel_retry_error
       exported_name: jobs_restore_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: restore, status: retry_error}'
@@ -6077,15 +6293,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.row_level_ttl.fail_or_cancel_failed
-      exported_name: jobs_row_level_ttl_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: failed}'
-      description: Number of row_level_ttl jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.row_level_ttl.fail_or_cancel_retry_error
       exported_name: jobs_row_level_ttl_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: retry_error}'
@@ -6183,15 +6390,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.schema_change.fail_or_cancel_failed
-      exported_name: jobs_schema_change_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: failed}'
-      description: Number of schema_change jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.schema_change.fail_or_cancel_retry_error
       exported_name: jobs_schema_change_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: retry_error}'
@@ -6291,15 +6489,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.schema_change_gc.fail_or_cancel_failed
-      exported_name: jobs_schema_change_gc_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: failed}'
-      description: Number of schema_change_gc jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.schema_change_gc.fail_or_cancel_retry_error
       exported_name: jobs_schema_change_gc_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: retry_error}'
@@ -6399,15 +6588,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.sql_activity_flush.fail_or_cancel_failed
-      exported_name: jobs_sql_activity_flush_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: failed}'
-      description: Number of sql_activity_flush jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.sql_activity_flush.fail_or_cancel_retry_error
       exported_name: jobs_sql_activity_flush_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: retry_error}'
@@ -6507,15 +6687,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.standby_read_ts_poller.fail_or_cancel_failed
-      exported_name: jobs_standby_read_ts_poller_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: failed}'
-      description: Number of standby_read_ts_poller jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.standby_read_ts_poller.fail_or_cancel_retry_error
       exported_name: jobs_standby_read_ts_poller_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: retry_error}'
@@ -6615,15 +6786,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.typedesc_schema_change.fail_or_cancel_failed
-      exported_name: jobs_typedesc_schema_change_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: failed}'
-      description: Number of typedesc_schema_change jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.typedesc_schema_change.fail_or_cancel_retry_error
       exported_name: jobs_typedesc_schema_change_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: retry_error}'
@@ -6723,15 +6885,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: jobs.update_table_metadata_cache.fail_or_cancel_failed
-      exported_name: jobs_update_table_metadata_cache_fail_or_cancel_failed
-      labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: failed}'
-      description: Number of update_table_metadata_cache jobs which failed with a non-retriable error on their failure or cancelation process
-      y_axis_label: jobs
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: jobs.update_table_metadata_cache.fail_or_cancel_retry_error
       exported_name: jobs_update_table_metadata_cache_fail_or_cancel_retry_error
       labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: retry_error}'
@@ -6882,24 +7035,9 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: logical_replication.commit_latency
-      exported_name: logical_replication_commit_latency
-      description: 'Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded'
-      y_axis_label: Nanoseconds
-      type: HISTOGRAM
-      unit: NANOSECONDS
-      aggregation: AVG
-      derivative: NONE
-    - name: logical_replication.events_dlqed
-      exported_name: logical_replication_events_dlqed
-      description: Row update events sent to DLQ
-      y_axis_label: Failures
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_dlqed_age
       exported_name: logical_replication_events_dlqed_age
+      labeled_name: 'logical_replication.events{type: dlqed_age}'
       description: Row update events sent to DLQ due to reaching the maximum time allowed in the retry queue
       y_axis_label: Failures
       type: COUNTER
@@ -6916,6 +7054,7 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_dlqed_errtype
       exported_name: logical_replication_events_dlqed_errtype
+      labeled_name: 'logical_replication.events{type: dlqed_errtype}'
       description: Row update events sent to DLQ due to an error not considered retryable
       y_axis_label: Failures
       type: COUNTER
@@ -6924,20 +7063,13 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_dlqed_space
       exported_name: logical_replication_events_dlqed_space
+      labeled_name: 'logical_replication.events{type: dlqed_space}'
       description: Row update events sent to DLQ due to capacity of the retry queue
       y_axis_label: Failures
       type: COUNTER
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: logical_replication.events_ingested
-      exported_name: logical_replication_events_ingested
-      description: Events ingested by all replication jobs
-      y_axis_label: Events
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_ingested_by_label
       exported_name: logical_replication_events_ingested_by_label
       description: Events ingested by all replication jobs by label
@@ -6948,6 +7080,7 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_initial_failure
       exported_name: logical_replication_events_initial_failure
+      labeled_name: 'logical_replication.events{type: initial_failure}'
       description: Failed attempts to apply an incoming row update
       y_axis_label: Failures
       type: COUNTER
@@ -6956,14 +7089,16 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_initial_success
       exported_name: logical_replication_events_initial_success
+      labeled_name: 'logical_replication.events{type: initial_success}'
       description: Successful applications of an incoming row update
-      y_axis_label: Failures
+      y_axis_label: Successes
       type: COUNTER
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_retry_failure
       exported_name: logical_replication_events_retry_failure
+      labeled_name: 'logical_replication.events{type: retry_failure}'
       description: Failed re-attempts to apply a row update
       y_axis_label: Failures
       type: COUNTER
@@ -6972,8 +7107,9 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.events_retry_success
       exported_name: logical_replication_events_retry_success
+      labeled_name: 'logical_replication.events{type: retry_success}'
       description: Row update events applied after one or more retries
-      y_axis_label: Failures
+      y_axis_label: Successes
       type: COUNTER
       unit: COUNT
       aggregation: AVG
@@ -6994,14 +7130,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: logical_replication.logical_bytes
-      exported_name: logical_replication_logical_bytes
-      description: Logical bytes (sum of keys + values) received by all replication jobs
-      y_axis_label: Bytes
-      type: COUNTER
-      unit: BYTES
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: logical_replication.replan_count
       exported_name: logical_replication_replan_count
       description: Total number of dist sql replanning events
@@ -7018,14 +7146,6 @@ layers:
       unit: SECONDS
       aggregation: AVG
       derivative: NONE
-    - name: logical_replication.replicated_time_seconds
-      exported_name: logical_replication_replicated_time_seconds
-      description: The replicated time of the logical replication stream in seconds since the unix epoch.
-      y_axis_label: Seconds
-      type: GAUGE
-      unit: SECONDS
-      aggregation: AVG
-      derivative: NONE
     - name: logical_replication.retry_queue_bytes
       exported_name: logical_replication_retry_queue_bytes
       description: Logical bytes (sum of keys+values) in the retry queue
@@ -7146,22 +7266,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: physical_replication.logical_bytes
-      exported_name: physical_replication_logical_bytes
-      description: Logical bytes (sum of keys + values) ingested by all replication jobs
-      y_axis_label: Bytes
-      type: COUNTER
-      unit: BYTES
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
-    - name: physical_replication.replicated_time_seconds
-      exported_name: physical_replication_replicated_time_seconds
-      description: The replicated time of the physical replication stream in seconds since the unix epoch.
-      y_axis_label: Seconds
-      type: GAUGE
-      unit: SECONDS
-      aggregation: AVG
-      derivative: NONE
     - name: physical_replication.resolved_events_ingested
       exported_name: physical_replication_resolved_events_ingested
       description: Resolved events ingested by all replication jobs
@@ -7192,6 +7296,17 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
+    - name: round-trip-default-class-latency
+      exported_name: round_trip_default_class_latency
+      description: |
+        Distribution of round-trip latencies with other nodes.
+
+        Similar to round-trip-latency, but only for default class connections.
+      y_axis_label: Round-trip time
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
     - name: round-trip-latency
       exported_name: round_trip_latency
       description: |
@@ -7209,6 +7324,39 @@ layers:
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
+    - name: round-trip-raft-class-latency
+      exported_name: round_trip_raft_class_latency
+      description: |
+        Distribution of round-trip latencies with other nodes.
+
+        Similar to round-trip-latency, but only for raft class connections.
+      y_axis_label: Round-trip time
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+    - name: round-trip-rangefeed-class-latency
+      exported_name: round_trip_rangefeed_class_latency
+      description: |
+        Distribution of round-trip latencies with other nodes.
+
+        Similar to round-trip-latency, but only for rangefeed class connections.
+      y_axis_label: Round-trip time
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
+    - name: round-trip-system-class-latency
+      exported_name: round_trip_system_class_latency
+      description: |
+        Distribution of round-trip latencies with other nodes.
+
+        Similar to round-trip-latency, but only for system class connections.
+      y_axis_label: Round-trip time
+      type: HISTOGRAM
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NONE
     - name: rpc.client.bytes.egress
       exported_name: rpc_client_bytes_egress
       description: Counter of TCP bytes sent via gRPC on connections we initiated.
@@ -7653,22 +7801,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: sql.delete.started.count
-      exported_name: sql_delete_started_count
-      description: Number of SQL DELETE statements started
-      y_axis_label: SQL Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
-    - name: sql.delete.started.count.internal
-      exported_name: sql_delete_started_count_internal
-      description: Number of SQL DELETE statements started (internal queries)
-      y_axis_label: SQL Internal Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.disk.distsql.current
       exported_name: sql_disk_distsql_current
       description: Current sql statement disk usage for distsql
@@ -8093,22 +8225,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: sql.insert.started.count
-      exported_name: sql_insert_started_count
-      description: Number of SQL INSERT statements started
-      y_axis_label: SQL Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
-    - name: sql.insert.started.count.internal
-      exported_name: sql_insert_started_count_internal
-      description: Number of SQL INSERT statements started (internal queries)
-      y_axis_label: SQL Internal Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.insights.anomaly_detection.evictions
       exported_name: sql_insights_anomaly_detection_evictions
       description: Evictions of fingerprint latency summaries due to memory pressure
@@ -8773,22 +8889,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
-    - name: sql.select.started.count
-      exported_name: sql_select_started_count
-      description: Number of SQL SELECT statements started
-      y_axis_label: SQL Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
-    - name: sql.select.started.count.internal
-      exported_name: sql_select_started_count_internal
-      description: Number of SQL SELECT statements started (internal queries)
-      y_axis_label: SQL Internal Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.service.latency.consistent
       exported_name: sql_service_latency_consistent
       description: Latency of SQL request execution of non-historical queries
@@ -8932,7 +9032,23 @@ layers:
       type: COUNTER
       unit: COUNT
       aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.stats.ingester.num_processed
+      exported_name: sql_stats_ingester_num_processed
+      description: Number of items processed by the SQL stats ingester
+      y_axis_label: Items
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sql.stats.ingester.queue_size
+      exported_name: sql_stats_ingester_queue_size
+      description: Current number of items queued in the SQL stats ingester
+      y_axis_label: Items
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
     - name: sql.stats.mem.current
       exported_name: sql_stats_mem_current
       description: Current memory usage for fingerprint storage
@@ -9213,22 +9329,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: sql.update.started.count
-      exported_name: sql_update_started_count
-      description: Number of SQL UPDATE statements started
-      y_axis_label: SQL Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
-    - name: sql.update.started.count.internal
-      exported_name: sql_update_started_count_internal
-      description: Number of SQL UPDATE statements started (internal queries)
-      y_axis_label: SQL Internal Statements
-      type: COUNTER
-      unit: COUNT
-      aggregation: AVG
-      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sql.vecindex.pending_splits_merges
       exported_name: sql_vecindex_pending_splits_merges
       description: Total number of vector index splits and merges waiting to be processed
@@ -9837,6 +9937,86 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
       how_to_use: This metric measures the length of time, in seconds, that the CockroachDB process has been running. Monitor this metric to detect events such as node restarts, which may require investigation or intervention.
       essential: true
+  - name: NETWORKING
+    metrics:
+    - name: sys.host.net.send.tcp.fast_retrans_segs
+      exported_name: sys_host_net_send_tcp_fast_retrans_segs
+      description: |-
+        Segments retransmitted due to the fast retransmission mechanism in TCP.
+        Fast retransmissions occur when the sender learns that intermediate segments have been lost.
+      y_axis_label: Segments
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sys.host.net.send.tcp.loss_probes
+      exported_name: sys_host_net_send_tcp_loss_probes
+      description: |2-
+
+        Number of TCP tail loss probes sent. Loss probes are an optimization to detect
+        loss of the last packet earlier than the retransmission timer, and can indicate
+        network issues. Tail loss probes are aggressive, so the base rate is often nonzero
+        even in healthy networks.
+      y_axis_label: Probes
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sys.host.net.send.tcp.retrans_segs
+      exported_name: sys_host_net_send_tcp_retrans_segs
+      description: |2
+
+        The number of TCP segments retransmitted across all network interfaces.
+        This can indicate packet loss occurring in the network. However, it can
+        also be caused by recipient nodes not consuming packets in a timely manner,
+        or the local node overflowing its outgoing buffers, for example due to overload.
+
+        Retransmissions also occur in the absence of problems, as modern TCP stacks
+        err on the side of aggressively retransmitting segments.
+
+        The linux tool 'ss -i' can show the Linux kernel's smoothed view of round-trip
+        latency and variance on a per-connection basis.  Additionally, 'netstat -s'
+        shows all TCP counters maintained by the kernel.
+      y_axis_label: Segments
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: |2
+
+        Phase changes, especially when occurring on groups of nodes, can indicate packet
+        loss in the network or a slow consumer of packets. On slow consumers, the
+        'sys.host.net.rcvd.drop' metric may be elevated; on overloaded senders, it
+        is worth checking the 'sys.host.net.send.drop' metric.
+        Additionally, the 'sys.host.net.send.tcp.*' may provide more insight into the
+        specific type of retransmission.
+      essential: true
+    - name: sys.host.net.send.tcp.slow_start_retrans
+      exported_name: sys_host_net_send_tcp_slow_start_retrans
+      description: |2
+
+        Number of TCP retransmissions in slow start. This can indicate that the network
+        is unable to support the initial fast ramp-up in window size, and can be a sign
+        of packet loss or congestion.
+      y_axis_label: Segments
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: sys.host.net.send.tcp_timeouts
+      exported_name: sys_host_net_send_tcp_timeouts
+      description: |2
+
+        Number of TCP retransmission timeouts. These typically imply that a packet has
+        not been acknowledged within at least 200ms.  Modern TCP stacks use
+        optimizations such as fast retransmissions and loss probes to avoid hitting
+        retransmission timeouts. Anecdotally, they still occasionally present themselves
+        even in supposedly healthy cloud environments.
+      y_axis_label: Timeouts
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
   - name: UNSET
     metrics:
     - name: build.timestamp
@@ -9903,6 +10083,30 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: log.otlp.sink.grpc.transparent_retries
+      exported_name: log_otlp_sink_grpc_transparent_retries
+      description: Number of transparent retries done by otlp-server logging sinks when using GRPC
+      y_axis_label: Retries
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: log.otlp.sink.write.attempts
+      exported_name: log_otlp_sink_write_attempts
+      description: Number of write attempts experienced by otlp-server logging sinks
+      y_axis_label: Attempts
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: log.otlp.sink.write.errors
+      exported_name: log_otlp_sink_write_errors
+      description: Number of write errors experienced by otlp-server logging sinks
+      y_axis_label: Errors
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sys.cgo.allocbytes
       exported_name: sys_cgo_allocbytes
       description: Current bytes of memory allocated by cgo
@@ -10047,6 +10251,14 @@ layers:
       unit: BYTES
       aggregation: AVG
       derivative: NONE
+    - name: sys.go.limitbytes
+      exported_name: sys_go_limitbytes
+      description: Go soft memory limit
+      y_axis_label: Memory
+      type: GAUGE
+      unit: BYTES
+      aggregation: AVG
+      derivative: NONE
     - name: sys.go.pause.other.ns
       exported_name: sys_go_pause_other_ns
       description: Estimated non-GC-related total pause time
@@ -10457,13 +10669,13 @@ layers:
       essential: true
     - name: storage.wal.fsync.latency
       exported_name: storage_wal_fsync_latency
-      description: The write ahead log fsync latency
+      description: The fsync latency to the Write-Ahead Log device.
       y_axis_label: Fsync Latency
       type: HISTOGRAM
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
-      how_to_use: If this value is greater than `100ms`, it is an indication of a disk stall. To mitigate the effects of disk stalls, consider deploying your cluster with WAL failover configured.
+      how_to_use: If this value is greater than 100ms, it is an indication of a disk stall. To mitigate the effects of disk stalls, consider deploying your cluster with WAL failover configured. When WAL failover is configured, the more relevant metric is storage.wal.failover_write_and_sync.latency, as this metric reflects the fsync latency of the primary and/or the secondary WAL device.
       essential: true
     - name: storage.write-stalls
       exported_name: storage_write_stalls
@@ -12216,6 +12428,14 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
+    - name: kv.concurrency.locks_shed_due_to_memory_limit
+      exported_name: kv_concurrency_locks_shed_due_to_memory_limit
+      description: The number of locks that were shed because the lock table ran into memory limits
+      y_axis_label: Locks
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: kv.concurrency.locks_with_wait_queues
       exported_name: kv_concurrency_locks_with_wait_queues
       description: Number of active locks held in lock tables with active wait-queues
@@ -12248,6 +12468,14 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
+    - name: kv.concurrency.num_lock_shed_due_to_memory_limit_events
+      exported_name: kv_concurrency_num_lock_shed_due_to_memory_limit_events
+      description: The number of times locks that were shed by the lock table because it ran into memory limits
+      y_axis_label: Lock Shed Events
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: kv.loadsplitter.cleardirection
       exported_name: kv_loadsplitter_cleardirection
       description: Load-based splitter observed an access direction greater than 80% left or right in the samples.
@@ -12424,14 +12652,6 @@ layers:
       unit: BYTES
       aggregation: AVG
       derivative: NONE
-    - name: kv.rangefeed.mux_stream_send.latency
-      exported_name: kv_rangefeed_mux_stream_send_latency
-      description: Latency of sending RangeFeed events to the client
-      y_axis_label: Latency
-      type: HISTOGRAM
-      unit: NANOSECONDS
-      aggregation: AVG
-      derivative: NONE
     - name: kv.rangefeed.mux_stream_send.slow_events
       exported_name: kv_rangefeed_mux_stream_send_slow_events
       description: Number of RangeFeed events that took longer than 10s to send to the client
@@ -13651,6 +13871,38 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: queue.replicate.enqueue.add
+      exported_name: queue_replicate_enqueue_add
+      description: Number of replicas successfully added to the replicate queue
+      y_axis_label: Replicas
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: queue.replicate.enqueue.failedprecondition
+      exported_name: queue_replicate_enqueue_failedprecondition
+      description: Number of replicas that failed the precondition checks and were therefore not added to the replicate queue
+      y_axis_label: Replicas
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: queue.replicate.enqueue.noaction
+      exported_name: queue_replicate_enqueue_noaction
+      description: Number of replicas for which ShouldQueue determined no action was needed and were therefore not added to the replicate queue
+      y_axis_label: Replicas
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: queue.replicate.enqueue.unexpectederror
+      exported_name: queue_replicate_enqueue_unexpectederror
+      description: Number of replicas that were expected to be enqueued (ShouldQueue returned true or the caller decided to add to the replicate queue directly), but failed to be enqueued due to unexpected errors
+      y_axis_label: Replicas
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: queue.replicate.nonvoterpromotions
       exported_name: queue_replicate_nonvoterpromotions
       description: Number of non-voters promoted to voters by the replicate queue
@@ -13667,6 +13919,22 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
+    - name: queue.replicate.priority_inversion.requeue
+      exported_name: queue_replicate_priority_inversion_requeue
+      description: Number of priority inversions in the replicate queue that resulted in requeuing of the replicas. A priority inversion occurs when the priority at processing time ends up being lower than at enqueue time. When the priority has changed from a high priority repair action to rebalance, the change is requeued to avoid unfairness.
+      y_axis_label: Replicas
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: queue.replicate.priority_inversion.total
+      exported_name: queue_replicate_priority_inversion_total
+      description: Total number of priority inversions in the replicate queue. A priority inversion occurs when the priority at processing time ends up being lower than at enqueue time
+      y_axis_label: Replicas
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: queue.replicate.process.failure
       exported_name: queue_replicate_process_failure
       description: Number of replicas which failed processing in the replicate queue
@@ -13699,6 +13967,14 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
+    - name: queue.replicate.queue_full
+      exported_name: queue_replicate_queue_full
+      description: Number of times a replica was dropped from the queue due to queue fullness
+      y_axis_label: Replicas
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: queue.replicate.rebalancenonvoterreplica
       exported_name: queue_replicate_rebalancenonvoterreplica
       description: Number of non-voter replica rebalancer-initiated additions attempted by the replicate queue
@@ -15073,6 +15349,60 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
+    - name: ranges.decommissioning.nudger.enqueue
+      exported_name: ranges_decommissioning_nudger_enqueue
+      labeled_name: 'ranges.decommissioning.nudger.enqueue{status: enqueue}'
+      description: 'Number of enqueued enqueues of a range for decommissioning by the decommissioning nudger. Note: This metric tracks when the nudger attempts to enqueue, but the replica might not end up being enqueued by the priority queue due to various filtering or failure conditions.'
+      y_axis_label: Ranges
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: ranges.decommissioning.nudger.enqueue.failure
+      exported_name: ranges_decommissioning_nudger_enqueue_failure
+      labeled_name: ranges.decommissioning.nudger.enqueue.failure
+      description: Number of ranges that failed to enqueue at the replicate queue
+      y_axis_label: Ranges
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: ranges.decommissioning.nudger.enqueue.success
+      exported_name: ranges_decommissioning_nudger_enqueue_success
+      labeled_name: ranges.decommissioning.nudger.enqueue.success
+      description: Number of ranges that were successfully enqueued by the decommisioning nudger
+      y_axis_label: Ranges
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: ranges.decommissioning.nudger.not_leaseholder_or_invalid_lease
+      exported_name: ranges_decommissioning_nudger_not_leaseholder_or_invalid_lease
+      labeled_name: ranges.decommissioning.nudger.not_leaseholder_or_invalid_lease
+      description: Number of ranges that were not the leaseholder or had an invalid lease at the decommissioning nudger
+      y_axis_label: Ranges
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: ranges.decommissioning.nudger.process.failure
+      exported_name: ranges_decommissioning_nudger_process_failure
+      labeled_name: ranges.decommissioning.nudger.process.failure
+      description: Number of ranges enqueued by the decommissioning nudger that failed to process by the replicate queue
+      y_axis_label: Ranges
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
+    - name: ranges.decommissioning.nudger.process.success
+      exported_name: ranges_decommissioning_nudger_process_success
+      labeled_name: ranges.decommissioning.nudger.process.success
+      description: Number of ranges enqueued by the decommissioning nudger that were successfully processed by the replicate queue
+      y_axis_label: Ranges
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: ranges.overreplicated
       exported_name: ranges_overreplicated
       description: Number of ranges with more live replicas than the replication target
@@ -15381,7 +15711,7 @@ layers:
       description: Number of disk reads per query
       y_axis_label: Disk Reads per Query
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: rocksdb.table-readers-mem-estimate
@@ -15544,6 +15874,14 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: rpc.method.flushlocktable.recv
+      exported_name: rpc_method_flushlocktable_recv
+      description: Number of FlushLockTable requests processed
+      y_axis_label: RPCs
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: rpc.method.gc.recv
       exported_name: rpc_method_gc_recv
       description: Number of GC requests processed
@@ -15993,6 +16331,78 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: storage.compression.cr
+      exported_name: storage_compression_cr
+      description: Average compression ratio of sstable and blob value data.
+      y_axis_label: Ratio
+      type: GAUGE
+      unit: CONST
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.minlz.bytes
+      exported_name: storage_compression_minlz_bytes
+      description: Total on disk size of sstable and blob value data that is compressed with the MinLZ algorithm.
+      y_axis_label: Bytes
+      type: GAUGE
+      unit: CONST
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.minlz.cr
+      exported_name: storage_compression_minlz_cr
+      description: Average compression ratio of sstable and blob value data that is compressed with the MinLZ algorithm.
+      y_axis_label: Ratio
+      type: GAUGE
+      unit: CONST
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.none.bytes
+      exported_name: storage_compression_none_bytes
+      description: Total on disk size of sstable and blob value data that is not compressed.
+      y_axis_label: Bytes
+      type: GAUGE
+      unit: BYTES
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.snappy.bytes
+      exported_name: storage_compression_snappy_bytes
+      description: Total on disk size of sstable and blob value data that is compressed with the Snappy algorithm.
+      y_axis_label: Bytes
+      type: GAUGE
+      unit: BYTES
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.snappy.cr
+      exported_name: storage_compression_snappy_cr
+      description: Average compression ratio of sstable and blob value data that is compressed with the snappy algorithm.
+      y_axis_label: Ratio
+      type: GAUGE
+      unit: CONST
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.unknown.bytes
+      exported_name: storage_compression_unknown_bytes
+      description: Total on disk size of sstable and blob value data that is compressed but for which we have no compression statistics.
+      y_axis_label: Bytes
+      type: GAUGE
+      unit: BYTES
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.zstd.bytes
+      exported_name: storage_compression_zstd_bytes
+      description: Total on disk size of sstable and blob value data that is compressed with the Zstd algorithm.
+      y_axis_label: Bytes
+      type: GAUGE
+      unit: BYTES
+      aggregation: AVG
+      derivative: NONE
+    - name: storage.compression.zstd.cr
+      exported_name: storage_compression_zstd_cr
+      description: Average compression ratio of sstable and blob value data that is compressed with the Zstd algorithm.
+      y_axis_label: Ratio
+      type: GAUGE
+      unit: CONST
+      aggregation: AVG
+      derivative: NONE
     - name: storage.disk-slow
       exported_name: storage_disk_slow
       description: Number of instances of disk operations taking longer than 10s
@@ -16009,6 +16419,14 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: storage.disk-unhealthy.duration
+      exported_name: storage_disk_unhealthy_duration
+      description: Total disk unhealthy duration in nanos
+      y_axis_label: Nanoseconds
+      type: COUNTER
+      unit: NANOSECONDS
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: storage.disk.io.time
       exported_name: storage_disk_io_time
       description: Time spent reading from or writing to the store's disk since this process started (as reported by the OS)
@@ -16033,6 +16451,14 @@ layers:
       unit: BYTES
       aggregation: AVG
       derivative: NONE
+    - name: storage.disk.read-max.iops
+      exported_name: storage_disk_read_max_iops
+      description: Maximum rate of read operations performed on the disk (as reported by the OS)
+      y_axis_label: Operations
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
     - name: storage.disk.read.bytes
       exported_name: storage_disk_read_bytes
       description: Bytes read from the store's disk since this process started (as reported by the OS)
@@ -16073,6 +16499,14 @@ layers:
       unit: BYTES
       aggregation: AVG
       derivative: NONE
+    - name: storage.disk.write-max.iops
+      exported_name: storage_disk_write_max_iops
+      description: Maximum rate of write operations performed on the disk (as reported by the OS)
+      y_axis_label: Operations
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
     - name: storage.disk.write.bytes
       exported_name: storage_disk_write_bytes
       description: Bytes written to the store's disk since this process started (as reported by the OS)
@@ -16588,7 +17022,7 @@ layers:
       description: Compaction score of level 0
       y_axis_label: Score
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: storage.l0-level-size
@@ -16628,7 +17062,7 @@ layers:
       description: Compaction score of level 1
       y_axis_label: Score
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: storage.l1-level-size
@@ -16652,7 +17086,7 @@ layers:
       description: Compaction score of level 2
       y_axis_label: Score
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: storage.l2-level-size
@@ -16676,7 +17110,7 @@ layers:
       description: Compaction score of level 3
       y_axis_label: Score
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: storage.l3-level-size
@@ -16700,7 +17134,7 @@ layers:
       description: Compaction score of level 4
       y_axis_label: Score
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: storage.l4-level-size
@@ -16724,7 +17158,7 @@ layers:
       description: Compaction score of level 5
       y_axis_label: Score
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: storage.l5-level-size
@@ -16748,7 +17182,7 @@ layers:
       description: Compaction score of level 6
       y_axis_label: Score
       type: GAUGE
-      unit: COUNT
+      unit: CONST
       aggregation: AVG
       derivative: NONE
     - name: storage.l6-level-size
@@ -16903,38 +17337,6 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
-    - name: storage.sstable.compression.none.count
-      exported_name: storage_sstable_compression_none_count
-      description: Count of SSTables that are uncompressed.
-      y_axis_label: SSTables
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
-    - name: storage.sstable.compression.snappy.count
-      exported_name: storage_sstable_compression_snappy_count
-      description: Count of SSTables that have been compressed with the snappy compression algorithm.
-      y_axis_label: SSTables
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
-    - name: storage.sstable.compression.unknown.count
-      exported_name: storage_sstable_compression_unknown_count
-      description: Count of SSTables that have an unknown compression algorithm.
-      y_axis_label: SSTables
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
-    - name: storage.sstable.compression.zstd.count
-      exported_name: storage_sstable_compression_zstd_count
-      description: Count of SSTables that have been compressed with the zstd compression algorithm.
-      y_axis_label: SSTables
-      type: GAUGE
-      unit: COUNT
-      aggregation: AVG
-      derivative: NONE
     - name: storage.sstable.remote.bytes
       exported_name: storage_sstable_remote_bytes
       description: Bytes in SSTables that are stored off-disk (remotely) in object storage.
@@ -16991,9 +17393,17 @@ layers:
       unit: BYTES
       aggregation: AVG
       derivative: NONE
+    - name: storage.value_separation.value_retrieval.count
+      exported_name: storage_value_separation_value_retrieval_count
+      description: The number of value retrievals of values separated into blob files.
+      y_axis_label: Events
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: storage.wal.bytes_in
       exported_name: storage_wal_bytes_in
-      description: The number of logical bytes the storage engine has written to the WAL
+      description: The number of logical bytes the storage engine has written to the Write-Ahead Log.
       y_axis_label: Events
       type: COUNTER
       unit: COUNT
@@ -17001,7 +17411,7 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: storage.wal.bytes_written
       exported_name: storage_wal_bytes_written
-      description: The number of bytes the storage engine has written to the WAL
+      description: The number of bytes the storage engine has written to the Write-Ahead Log.
       y_axis_label: Events
       type: COUNTER
       unit: COUNT
@@ -17009,20 +17419,22 @@ layers:
       derivative: NON_NEGATIVE_DERIVATIVE
     - name: storage.wal.failover.primary.duration
       exported_name: storage_wal_failover_primary_duration
-      description: Cumulative time spent writing to the primary WAL directory. Only populated when WAL failover is configured
+      description: Cumulative time spent writing to the primary WAL directory.
       y_axis_label: Nanoseconds
       type: COUNTER
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: Only populated when WAL failover is configured.
     - name: storage.wal.failover.secondary.duration
       exported_name: storage_wal_failover_secondary_duration
-      description: Cumulative time spent writing to the secondary WAL directory. Only populated when WAL failover is configured
+      description: Cumulative time spent writing to the secondary WAL directory.
       y_axis_label: Nanoseconds
       type: COUNTER
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: Only populated when WAL failover is configured.
     - name: storage.wal.failover.switch.count
       exported_name: storage_wal_failover_switch_count
       description: Count of the number of times WAL writing has switched from primary to secondary and vice versa.
@@ -17031,14 +17443,16 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+      how_to_use: Only populated when WAL failover is configured. A high switch count indicates that many disk stalls were encountered.
     - name: storage.wal.failover.write_and_sync.latency
       exported_name: storage_wal_failover_write_and_sync_latency
-      description: The observed latency for writing and syncing to the write ahead log. Only populated when WAL failover is configured
+      description: The observed latency for writing and syncing to the logical Write-Ahead Log.
       y_axis_label: Nanoseconds
       type: HISTOGRAM
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
+      how_to_use: Only populated when WAL failover is configured. Without WAL failover, the relevant metric is storage.wal.fsync.latency.
     - name: storage.write-amplification
       exported_name: storage_write_amplification
       description: |-
@@ -17204,6 +17618,14 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NON_NEGATIVE_DERIVATIVE
+    - name: subsume.locks_written
+      exported_name: subsume_locks_written
+      description: Number of locks written to storage during subsume (range merge)
+      y_axis_label: Locks Written
+      type: COUNTER
+      unit: COUNT
+      aggregation: AVG
+      derivative: NON_NEGATIVE_DERIVATIVE
     - name: sysbytes
       exported_name: sysbytes
       description: Number of bytes in system KV pairs
diff --git a/src/current/_includes/v25.3/essential-metrics.md b/src/current/_includes/v25.3/essential-metrics.md
index 6f02ba25f05..d9a445fca22 100644
--- a/src/current/_includes/v25.3/essential-metrics.md
+++ b/src/current/_includes/v25.3/essential-metrics.md
@@ -1,8 +1,6 @@
 {% assign version = page.version.version | replace: ".", "" %}
 {% comment %}DEBUG: {{ version }}{% endcomment %}
 
-These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.core }} cluster. Use them to build custom dashboards with the following tools:
-
 {% comment %} STEP 1. Assign variables specific to deployment {% endcomment %}
 {% if include.deployment == 'self-hosted' %}
   {% assign metrics_datadog = site.data[version].metrics.datadog-cockroachdb %}
@@ -10,6 +8,8 @@ These essential CockroachDB metrics let you monitor your CockroachDB {{ site.dat
   {% assign datadog_prefix = "cockroachdb" %}
   {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,NETWORKING,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %}
 
+These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.core }} cluster. Use them to build custom dashboards with the following tools:
+
 - [Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana)
 - [Datadog Integration]({% link {{ page.version.version }}/datadog.md %}): The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}.` prefix.
 
@@ -20,6 +20,8 @@ These essential CockroachDB metrics let you monitor your CockroachDB {{ site.dat
 {% comment %} Removed NETWORKING category for advanced deployment {% endcomment %}
   {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %}
 
+These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.advanced }} cluster. Use them to build custom dashboards with the following tools:
+
 - [Datadog integration]({% link cockroachcloud/tools-page.md %}#monitor-cockroachdb-cloud-with-datadog) - The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}` prefix.
 - [Metrics export]({% link cockroachcloud/export-metrics-advanced.md %})
 
diff --git a/src/current/_includes/v25.4/essential-metrics.md b/src/current/_includes/v25.4/essential-metrics.md
index 6f02ba25f05..4f4627a9dcb 100644
--- a/src/current/_includes/v25.4/essential-metrics.md
+++ b/src/current/_includes/v25.4/essential-metrics.md
@@ -1,14 +1,14 @@
 {% assign version = page.version.version | replace: ".", "" %}
 {% comment %}DEBUG: {{ version }}{% endcomment %}
 
-These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.core }} cluster. Use them to build custom dashboards with the following tools:
-
 {% comment %} STEP 1. Assign variables specific to deployment {% endcomment %}
 {% if include.deployment == 'self-hosted' %}
   {% assign metrics_datadog = site.data[version].metrics.datadog-cockroachdb %}
   {% assign datadog_link = "https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics" %}
   {% assign datadog_prefix = "cockroachdb" %}
-  {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,NETWORKING,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %}
+  {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,NETWORKING,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,CROSS_CLUSTER_REPLICATION,LOGICAL_DATA_REPLICATION,UNSET," %}
+
+These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.core }} cluster. Use them to build custom dashboards with the following tools:
 
 - [Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana)
 - [Datadog Integration]({% link {{ page.version.version }}/datadog.md %}): The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}.` prefix.
@@ -18,7 +18,9 @@ These essential CockroachDB metrics let you monitor your CockroachDB {{ site.dat
   {% assign datadog_link = "https://docs.datadoghq.com/integrations/cockroach-cloud/#metrics" %}
   {% assign datadog_prefix = "crdb_dedicated" %}
 {% comment %} Removed NETWORKING category for advanced deployment {% endcomment %}
-  {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %}
+  {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,CROSS_CLUSTER_REPLICATION,LOGICAL_DATA_REPLICATION,UNSET," %}
+
+These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.advanced }} cluster. Use them to build custom dashboards with the following tools:
 
 - [Datadog integration]({% link cockroachcloud/tools-page.md %}#monitor-cockroachdb-cloud-with-datadog) - The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}` prefix.
 - [Metrics export]({% link cockroachcloud/export-metrics-advanced.md %})
@@ -56,7 +58,7 @@ The **Usage** column explains why each metric is important to visualize and how
 
 {% comment %} Order categories, NOTE: new categories may break this order, however all relevant categories will be displayed though not in the desired order{% endcomment %}
 {% comment %}DEBUG: category_names_string = {{ category_names_string }}{% endcomment %}
-{% assign category_names_string_ordered = category_names_string | replace: "CHANGEFEEDS,DISTRIBUTED,NETWORKING,SQL,TTL,UNSET,HARDWARE,OVERLOAD,REPLICATION,STORAGE,", category_order  %}
+{% assign category_names_string_ordered = category_names_string | replace: "CHANGEFEEDS,CROSS_CLUSTER_REPLICATION,DISTRIBUTED,LOGICAL_DATA_REPLICATION,NETWORKING,SQL,TTL,UNSET,HARDWARE,OVERLOAD,STORAGE,", category_order  %}
 {% comment %}DEBUG: category_names_string_ordered = {{ category_names_string_ordered }}{% endcomment %}
 {% assign category_names_array = category_names_string_ordered | split: "," %}
 
@@ -90,6 +92,8 @@ The **Usage** column explains why each metric is important to visualize and how
       {% elsif category_name == "REPLICATION" %}{% assign category_display_name = "KV Replication" %}
       {% elsif category_name == "CHANGEFEEDS" %}{% assign category_display_name = "Changefeeds" %}
       {% elsif category_name == "TTL" %}{% assign category_display_name = "Row-level TTL" %}
+      {% elsif category_name == "CROSS_CLUSTER_REPLICATION" %}{% assign category_display_name = "Physical Replication" %}
+      {% elsif category_name == "LOGICAL_DATA_REPLICATION" %}{% assign category_display_name = "Logical Replication" %}
       {% else %}{% assign category_display_name = category_name %}{% comment %} For example, SQL {% endcomment %}
       {% endif %}