replace api

yangw-dev · yangw-dev · commit 5231499b45bb · 2025-09-08T11:25:40.000-07:00
ghstack-source-id: 4192ac0 Pull-Request: #7112
diff --git a/torchci/clickhouse_queries/compilers_benchmark_api_query/params.json b/torchci/clickhouse_queries/compilers_benchmark_api_query/params.json
@@ -0,0 +1,17 @@
+{
+  "params": {
+    "branches": "Array(String)",
+    "commits": "Array(String)",
+    "compilers": "Array(String)",
+    "device": "String",
+    "arch": "String",
+    "dtype": "String",
+    "granularity": "String",
+    "mode": "String",
+    "startTime": "DateTime64(3)",
+    "stopTime": "DateTime64(3)",
+    "suites": "Array(String)",
+    "workflowId": "Int64"
+  },
+  "tests": []
+}
diff --git a/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql b/torchci/clickhouse_queries/compilers_benchmark_api_query/query.sql
@@ -0,0 +1,36 @@
+SELECT
+    workflow_id,
+    job_id,
+    head_sha,
+    replaceOne(head_branch, 'refs/heads/', '') AS head_branch,
+    suite,
+    model_name AS model,
+    metric_name AS metric,
+    value,
+    metric_extra_info AS extra_info,
+    benchmark_extra_info['output'] AS output,
+    timestamp,
+    DATE_TRUNC({granularity: String}, fromUnixTimestamp(timestamp))
+        AS granularity_bucket
+FROM benchmark.oss_ci_benchmark_torchinductor
+WHERE
+    (head_sha) IN (
+        SELECT DISTINCT head_sha
+        FROM benchmark.oss_ci_benchmark_torchinductor
+        PREWHERE
+            timestamp >= toUnixTimestamp({startTime: DateTime64(3,)})
+            AND timestamp < toUnixTimestamp({stopTime: DateTime64(3)})
+    )
+    AND (
+        has(
+            {branches: Array(String)},
+            replaceOne(head_branch, 'refs/heads/', '')
+        )
+        OR empty({branches: Array(String)})
+    )
+    AND benchmark_dtype = {dtype: String}
+    AND benchmark_mode = {mode: String}
+    AND device = {device: String}
+    AND positionCaseInsensitive(arch, {arch: String}) > 0
+
+SETTINGS session_timezone = 'UTC';
diff --git a/torchci/lib/benchmark/api_helper/compilers/precompute.ts b/torchci/lib/benchmark/api_helper/compilers/precompute.ts
@@ -6,34 +6,29 @@ import {
   getPassingModels,
 } from "lib/benchmark/compilerUtils";
 import { queryClickhouseSaved } from "lib/clickhouse";
-import {
-  BenchmarkTimeSeriesResponse,
-  CommitRow,
-  groupByBenchmarkData,
-  toCommitRowMap,
-} from "../utils";
-
-const BENCNMARK_TABLE_NAME = "compilers_benchmark_performance";
-const BENCNMARK_COMMIT_NAME = "compilers_benchmark_performance_branches";
+import { CompilerPerformanceData } from "lib/types";
+import { BenchmarkTimeSeriesResponse, groupByBenchmarkData } from "../utils";
+//["x86_64","NVIDIA A10G","NVIDIA H100 80GB HBM3"]
+const COMPILER_BENCHMARK_TABLE_NAME = "compilers_benchmark_api_query";
 
 // TODO(elainewy): improve the fetch performance
-export async function getCompilerBenchmarkData(inputparams: any) {
+export async function getCompilerBenchmarkData(
+  inputparams: any,
+  query_table: string = ""
+) {
+  let table = COMPILER_BENCHMARK_TABLE_NAME;
+  if (query_table.length > 0) {
+    table = query_table;
+  }
+
   const start = Date.now();
-  const rows = await queryClickhouseSaved(BENCNMARK_TABLE_NAME, inputparams);
+  let rows = await queryClickhouseSaved(table, inputparams);
   const end = Date.now();
-  console.log("time to get data", end - start);
-
-  const startc = Date.now();
-  const commits = await queryClickhouseSaved(
-    BENCNMARK_COMMIT_NAME,
-    inputparams
-  );
-  const endc = Date.now();
-  console.log("time to get commit data", endc - startc);
-  const commitMap = toCommitRowMap(commits);
+  console.log("time to get compiler timeseris data", end - start);
 
   if (rows.length === 0) {
     const response: BenchmarkTimeSeriesResponse = {
+      total_rows: 0,
       time_series: [],
       time_range: {
         start: "",
@@ -43,11 +38,26 @@ export async function getCompilerBenchmarkData(inputparams: any) {
     return response;
   }
 
+  // extract backend from output in runtime instead of doing it in the query. since it's expensive for regex matching.
+  // TODO(elainewy): we should add this as a column in the database for less runtime logics.
+  rows.map((row) => {
+    const backend =
+      row.backend && row.backend !== ""
+        ? row.backend
+        : extractBackendSqlStyle(
+            row.output,
+            row.suite,
+            inputparams.dtype,
+            inputparams.mode,
+            inputparams.device
+          );
+    row["backend"] = backend;
+  });
+
   // TODO(elainewy): add logics to handle the case to return raw data
   const benchmark_time_series_response = toPrecomputeCompiler(
     rows,
     inputparams,
-    commitMap,
     "time_series"
   );
   return benchmark_time_series_response;
@@ -56,18 +66,16 @@ export async function getCompilerBenchmarkData(inputparams: any) {
 function toPrecomputeCompiler(
   rawData: any[],
   inputparams: any,
-  commitMap: Record<string, CommitRow>,
   type: string = "time_series"
 ) {
   const data = convertToCompilerPerformanceData(rawData);
+  const commit_map = toWorkflowIdMap(data);
   const models = getPassingModels(data);
-
   const passrate = computePassrate(data, models);
   const geomean = computeGeomean(data, models);
   const peakMemory = computeMemoryCompressionRatio(data, models);
 
   const all_data = [passrate, geomean, peakMemory].flat();
-
   const earliest_timestamp = Math.min(
     ...all_data.map((row) => new Date(row.granularity_bucket).getTime())
   );
@@ -81,9 +89,8 @@ function toPrecomputeCompiler(
     row["arch"] = inputparams["arch"];
     row["device"] = inputparams["device"];
     row["mode"] = inputparams["mode"];
-    // always keep this:
-    row["commit"] = commitMap[row["workflow_id"]]?.head_sha;
-    row["branch"] = commitMap[row["workflow_id"]]?.head_branch;
+    row["commit"] = commit_map.get(row.workflow_id)?.commit;
+    row["branch"] = commit_map.get(row.workflow_id)?.branch;
   });
 
   let res: any[] = [];
@@ -163,11 +170,44 @@ function toPrecomputeCompiler(
   }
 
   const response: BenchmarkTimeSeriesResponse = {
-    time_series: res,
+    total_rows: res.length,
+    total_raw_rows: rawData.length,
     time_range: {
       start: new Date(earliest_timestamp).toISOString(),
       end: new Date(latest_timestamp).toISOString(),
     },
+    time_series: res,
   };
   return response;
 }
+
+export function extractBackendSqlStyle(
+  output: string,
+  suite: string,
+  dtype: string,
+  mode: string,
+  device: string
+): string | null {
+  const esc = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  const tail = `_${esc(suite)}_${esc(dtype)}_${esc(mode)}_${esc(device)}_`;
+
+  const temp = output.replace(new RegExp(`${tail}.*$`), "");
+
+  const m = temp.match(/.*[\/\\]([^\/\\]+)$/);
+  return m ? m[1] : null;
+}
+
+export function toWorkflowIdMap(data: CompilerPerformanceData[]) {
+  const commit_map = new Map<string, any>();
+  data.forEach((row) => {
+    const commit = row?.commit;
+    const branch = row?.branch;
+    const workflow_id = `${row.workflow_id}`;
+    commit_map.set(workflow_id, {
+      commit,
+      branch,
+      workflow_id,
+    });
+  });
+  return commit_map;
+}
diff --git a/torchci/lib/benchmark/api_helper/utils.ts b/torchci/lib/benchmark/api_helper/utils.ts
@@ -136,8 +136,10 @@ export function getNestedField(obj: any, path: string): any {
 }
 
 export type BenchmarkTimeSeriesResponse = {
+  total_rows: number;
   time_series: any[];
   time_range: { start: string; end: string };
+  total_raw_rows?: number;
 };
 
 export type CommitRow = {
diff --git a/torchci/lib/benchmark/compilerUtils.ts b/torchci/lib/benchmark/compilerUtils.ts
@@ -440,6 +440,8 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) {
         suite: r.suite,
         workflow_id: r.workflow_id,
         job_id: r.job_id,
+        branch: r.head_branch,
+        commit: r.head_sha,
       };
     }
 
diff --git a/torchci/lib/types.ts b/torchci/lib/types.ts
@@ -204,6 +204,8 @@ export interface CompilerPerformanceData {
   suite: string;
   workflow_id: number;
   job_id?: number;
+  branch?: string;
+  commit?: string;
 }
 
 export interface TritonBenchPerformanceData {
@@ -231,6 +233,8 @@ export interface BenchmarkData {
   suite: string;
   value: number;
   workflow_id: number;
+  head_sha?: string;
+  head_branch?: string;
 }
 
 export interface RepoBranchAndCommit {

Original file line number	Diff line number	Diff line change
`@@ -136,8 +136,10 @@ export function getNestedField(obj: any, path: string): any {`
`136`	`136`	`}`
`137`	`137`
`138`	`138`	`export type BenchmarkTimeSeriesResponse = {`
	`139`	`+ total_rows: number;`
`139`	`140`	`time_series: any[];`
`140`	`141`	`time_range: { start: string; end: string };`
	`142`	`+ total_raw_rows?: number;`
`141`	`143`	`};`
`142`	`144`
`143`	`145`	`export type CommitRow = {`
Original file line number	Diff line number	Diff line change
`@@ -440,6 +440,8 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) {`
`440`	`440`	`suite: r.suite,`
`441`	`441`	`workflow_id: r.workflow_id,`
`442`	`442`	`job_id: r.job_id,`
	`443`	`+ branch: r.head_branch,`
	`444`	`+ commit: r.head_sha,`
`443`	`445`	`};`
`444`	`446`	`}`
`445`	`447`