From ff6b33b60614b6ce584f2146e20f42a83f18debe Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 7 Aug 2025 14:32:58 -0700
Subject: [PATCH 01/60] Working prototype

---
 CODEOWNERS                                    |  3 ++-
 filepush/.gitignore                           |  1 +
 filepush/REDME.md                             | 13 ++++++++++
 .../databricks_template_schema.json           | 23 +++++++++++++++++
 .../filepush-template/library/variables.tmpl  |  3 +++
 .../template/__preamble.tmpl                  |  5 ++++
 .../{{.connector_name}}/databricks.yml.tmpl   | 25 +++++++++++++++++++
 .../{{.connector_name}}/env.json.tmpl         |  3 +++
 .../{{.connector_name}}_job.yml.tmpl          | 11 ++++++++
 .../{{.connector_name}}_pipeline.yml.tmpl     | 13 ++++++++++
 .../{{.connector_name}}_volume.yml.tmpl       |  9 +++++++
 .../{{.connector_name}}_ingestion.py.tmpl     | 13 ++++++++++
 ...connector_name}}_readfiles_kernel.sql.tmpl |  6 +++++
 13 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 filepush/.gitignore
 create mode 100644 filepush/REDME.md
 create mode 100644 filepush/filepush-template/databricks_template_schema.json
 create mode 100644 filepush/filepush-template/library/variables.tmpl
 create mode 100644 filepush/filepush-template/template/__preamble.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/databricks.yml.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_readfiles_kernel.sql.tmpl

diff --git a/CODEOWNERS b/CODEOWNERS
index d984c513..6350370d 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -11,6 +11,7 @@ conversational-agent-app   @vivian-xie-db @yuanchaoma-db
 database-diagram-builder   @alexott
 downstreams                @nfx @alexott
 feature-registry-app       @yang-chengg @mparkhe @mingyangge-db @stephanielu5
+filepush                   @chi-yang-db
 go-libs                    @nfx @alexott
 ip_access_list_analyzer    @alexott
 ka-chat-bot                @taiga-db
@@ -19,4 +20,4 @@ runtime-packages           @nfx @alexott
 sql_migration_copilot      @robertwhiffin
 tacklebox                  @Jonathan-Choi
 uc-catalog-cloning         @esiol-db @vasco-lopes
-.github                    @nfx @alexott @gueniai
\ No newline at end of file
+.github                    @nfx @alexott @gueniai
diff --git a/filepush/.gitignore b/filepush/.gitignore
new file mode 100644
index 00000000..722d5e71
--- /dev/null
+++ b/filepush/.gitignore
@@ -0,0 +1 @@
+.vscode
diff --git a/filepush/REDME.md b/filepush/REDME.md
new file mode 100644
index 00000000..195287c9
--- /dev/null
+++ b/filepush/REDME.md
@@ -0,0 +1,13 @@
+---
+title: "Managed File Push"
+language: python
+author: "Chi Yang"
+date: 2025-08-07
+
+tags: 
+- ingestion
+- file
+- nocode
+---
+
+# Managed File Push
diff --git a/filepush/filepush-template/databricks_template_schema.json b/filepush/filepush-template/databricks_template_schema.json
new file mode 100644
index 00000000..6ce6f13b
--- /dev/null
+++ b/filepush/filepush-template/databricks_template_schema.json
@@ -0,0 +1,23 @@
+{
+  "properties": {
+    "connector_name": {
+      "type": "string",
+      "default": "filepushconnector",
+      "description": "Name of the filepush connector.",
+      "order": 1
+    },
+    "catalog_name": {
+      "type": "string",
+      "default": "{{default_catalog}}",
+      "description": "Name of the catalog where tables and pipelines will be created.",
+      "order": 2
+    },
+    "schema_name": {
+      "type": "string",
+      "default": "default",
+      "description": "Name of the schema where tables and pipelines will be created.",
+      "order": 3
+    }
+  },
+  "success_message": "\nYour bundle '{{.connector_name}}' has been created."
+}
diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
new file mode 100644
index 00000000..a8acc137
--- /dev/null
+++ b/filepush/filepush-template/library/variables.tmpl
@@ -0,0 +1,3 @@
+{{ define `volume_path` -}}
+  /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_volume
+{{- end }}
diff --git a/filepush/filepush-template/template/__preamble.tmpl b/filepush/filepush-template/template/__preamble.tmpl
new file mode 100644
index 00000000..b538c75a
--- /dev/null
+++ b/filepush/filepush-template/template/__preamble.tmpl
@@ -0,0 +1,5 @@
+# Preamble
+
+This file only template directives; it is skipped for the actual output.
+
+{{skip "__preamble"}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/databricks.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/databricks.yml.tmpl
new file mode 100644
index 00000000..50042472
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/databricks.yml.tmpl
@@ -0,0 +1,25 @@
+# databricks.yml
+# This is the configuration for the Databricks Asset Bundle {{.connector_name}}.
+
+bundle:
+  name: {{.connector_name}}
+
+include:
+  - resources/*.yml
+
+targets:
+  # The deployment targets. See https://docs.databricks.com/en/dev-tools/bundles/deployment-modes.html
+  dev:
+    mode: development
+    default: true
+    workspace:
+      host: {{workspace_host}}
+
+  prod:
+    mode: production
+    workspace:
+      host: {{workspace_host}}
+      root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
+        level: CAN_MANAGE
diff --git a/filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl b/filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl
new file mode 100644
index 00000000..2f5917c4
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl
@@ -0,0 +1,3 @@
+{
+  "volume_path": "{{template `volume_path` .}}"
+}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
new file mode 100644
index 00000000..095518de
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
@@ -0,0 +1,11 @@
+# {{.connector_name}}_job.yml
+# The main job for {{.connector_name}}
+
+resources:
+  jobs:
+    {{.connector_name}}_job:
+      name: {{.connector_name}}_job
+      tasks:
+        - task_key: {{.connector_name}}_pipeline_refresh
+          pipeline_task:
+            pipeline_id: ${resources.pipelines.{{.connector_name}}_pipeline.id}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl
new file mode 100644
index 00000000..4e45c1a7
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl
@@ -0,0 +1,13 @@
+# {{.connector_name}}_pipeline.yml
+# The refresh pipeline for {{.connector_name}}
+
+resources:
+  pipelines:
+    {{.connector_name}}_pipeline:
+      name: {{.connector_name}}_pipeline
+      catalog: {{.catalog_name}}
+      schema: {{.schema_name}}
+      serverless: true
+      libraries:
+        - file:
+            path: ../src/pipelines/{{.connector_name}}_ingestion.py
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl
new file mode 100644
index 00000000..b3456934
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl
@@ -0,0 +1,9 @@
+# {{.connector_name}}_volume.yml
+# The volume for {{.connector_name}}
+
+resources:
+  volumes:
+    {{.connector_name}}_volume:
+      name: {{.connector_name}}_volume
+      catalog_name: {{.catalog_name}}
+      schema_name: {{.schema_name}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
new file mode 100644
index 00000000..a1af93f1
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
@@ -0,0 +1,13 @@
+import dlt
+
+@dlt.table(
+  name="{{.connector_name}}_raw",
+  comment="A streaming table created by filepush bundle {{.connector_name}}.",
+  table_properties={
+    "volume_path": "{{template `volume_path` .}}"
+  }
+)
+def {{.connector_name}}_raw():
+  with open(f"./{{.connector_name}}_readfiles_kernel.sql", "r") as f:
+    kernel_query = f.read()
+  return spark.sql(kernel_query)
diff --git a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_readfiles_kernel.sql.tmpl b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_readfiles_kernel.sql.tmpl
new file mode 100644
index 00000000..a04774eb
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_readfiles_kernel.sql.tmpl
@@ -0,0 +1,6 @@
+SELECT
+  *
+FROM
+  read_files(
+    '{{template `volume_path` .}}'
+  )

From 02da2bf646c3cf71689e4f7003a164de12d6bf08 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 7 Aug 2025 16:02:26 -0700
Subject: [PATCH 02/60] Add basic tool scripts

---
 .../tools/get_push_endpoint_from_table.sh.tmpl                  | 2 ++
 .../template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl  | 2 ++
 .../template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl | 2 ++
 3 files changed, 6 insertions(+)
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
 create mode 100644 filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl

diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
new file mode 100644
index 00000000..30a953a3
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+databricks tables get {{template `raw_table_name` .}} --output json | jq '.properties.volume_path'
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
new file mode 100644
index 00000000..4eb18a5a
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+databricks pipelines start-update --pipeline-id ${resources.pipelines.{{.connector_name}}_pipeline.id}
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
new file mode 100644
index 00000000..9a52bde2
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+databricks fs cp $1 dbfs:{{template `volume_path` .}}
\ No newline at end of file

From a3a7683c00aabc5510bf9f2498aa17838479f91c Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 7 Aug 2025 16:02:51 -0700
Subject: [PATCH 03/60] Parameterize raw table name

---
 filepush/filepush-template/library/variables.tmpl           | 4 ++++
 .../src/pipelines/{{.connector_name}}_ingestion.py.tmpl     | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
index a8acc137..e15d2bf5 100644
--- a/filepush/filepush-template/library/variables.tmpl
+++ b/filepush/filepush-template/library/variables.tmpl
@@ -1,3 +1,7 @@
 {{ define `volume_path` -}}
   /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_volume
 {{- end }}
+
+{{ define `raw_table_name` -}}
+  {{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_raw
+{{- end}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
index a1af93f1..16c50973 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
@@ -1,13 +1,13 @@
 import dlt
 
 @dlt.table(
-  name="{{.connector_name}}_raw",
-  comment="A streaming table created by filepush bundle {{.connector_name}}.",
+  name="{{template `raw_table_name` .}}",
+  comment="A streaming table created by filepush bundle {{.connector_name}}. This holds the raw data from the uploaded files.",
   table_properties={
     "volume_path": "{{template `volume_path` .}}"
   }
 )
-def {{.connector_name}}_raw():
+def {{template `raw_table_name` .}}():
   with open(f"./{{.connector_name}}_readfiles_kernel.sql", "r") as f:
     kernel_query = f.read()
   return spark.sql(kernel_query)

From 1d79e2baefd7a5c9e42ec3d1b3a9907c7569ffb5 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 7 Aug 2025 16:52:11 -0700
Subject: [PATCH 04/60] Finish the dev scripts

---
 filepush/.gitignore                                            | 3 +++
 filepush/filepush-template/library/variables.tmpl              | 2 +-
 .../template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl | 2 ++
 .../tools/get_push_endpoint_from_table.sh.tmpl                 | 0
 .../template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl | 2 +-
 .../{{.connector_name}}/tools/upload_to_volume.sh.tmpl         | 0
 6 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100755 filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
 mode change 100644 => 100755 filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
 mode change 100644 => 100755 filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
 mode change 100644 => 100755 filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl

diff --git a/filepush/.gitignore b/filepush/.gitignore
index 722d5e71..0e53a123 100644
--- a/filepush/.gitignore
+++ b/filepush/.gitignore
@@ -1 +1,4 @@
 .vscode
+up.sh
+down.sh
+conf.json
diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
index e15d2bf5..8bd4c960 100644
--- a/filepush/filepush-template/library/variables.tmpl
+++ b/filepush/filepush-template/library/variables.tmpl
@@ -3,5 +3,5 @@
 {{- end }}
 
 {{ define `raw_table_name` -}}
-  {{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_raw
+  {{.connector_name}}_raw
 {{- end}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
new file mode 100755
index 00000000..da3fd63f
--- /dev/null
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+databricks bundle summary --output json | jq -r .resources.pipelines.{{.connector_name}}_pipeline.id
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
old mode 100644
new mode 100755
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
old mode 100644
new mode 100755
index 4eb18a5a..87df798f
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-databricks pipelines start-update --pipeline-id ${resources.pipelines.{{.connector_name}}_pipeline.id}
\ No newline at end of file
+databricks pipelines start-update $($(dirname $0)/get_pipeline_id.sh)
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
old mode 100644
new mode 100755

From 6f2b5a40cc3b9fa170cd51da25a5a5e7f1a9ead4 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 7 Aug 2025 17:13:43 -0700
Subject: [PATCH 05/60] Fix an identifier in script

---
 filepush/.gitignore                                             | 1 +
 .../tools/get_push_endpoint_from_table.sh.tmpl                  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/filepush/.gitignore b/filepush/.gitignore
index 0e53a123..2a722220 100644
--- a/filepush/.gitignore
+++ b/filepush/.gitignore
@@ -2,3 +2,4 @@
 up.sh
 down.sh
 conf.json
+filepushconnector/
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
index 30a953a3..7b6e488c 100755
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-databricks tables get {{template `raw_table_name` .}} --output json | jq '.properties.volume_path'
\ No newline at end of file
+databricks tables get {{.catalog_name}}.{{.schema_name}}.{{template `raw_table_name` .}} --output json | jq '.properties.volume_path'
\ No newline at end of file

From 37700e80c86a87f2fd5fbf6b6de19786da5557b3 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 8 Aug 2025 10:05:52 -0700
Subject: [PATCH 06/60] User file trigger

---
 filepush/filepush-template/library/variables.tmpl             | 4 ++++
 .../resources/{{.connector_name}}_job.yml.tmpl                | 3 +++
 .../{{.connector_name}}/tools/get_pipeline_id.sh.tmpl         | 2 +-
 ...le.sh.tmpl => get_volume_path_from_table_property.sh.tmpl} | 2 +-
 .../{{.connector_name}}/tools/trigger_refresh.sh.tmpl         | 2 +-
 .../{{.connector_name}}/tools/upload_to_volume.sh.tmpl        | 2 +-
 6 files changed, 11 insertions(+), 4 deletions(-)
 rename filepush/filepush-template/template/{{.connector_name}}/tools/{get_push_endpoint_from_table.sh.tmpl => get_volume_path_from_table_property.sh.tmpl} (96%)

diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
index 8bd4c960..0e642063 100644
--- a/filepush/filepush-template/library/variables.tmpl
+++ b/filepush/filepush-template/library/variables.tmpl
@@ -2,6 +2,10 @@
   /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_volume
 {{- end }}
 
+{{ define `volume_path_url` -}}
+  dbfs:{{template `volume_path` .}}
+{{- end }}
+
 {{ define `raw_table_name` -}}
   {{.connector_name}}_raw
 {{- end}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
index 095518de..4beb422c 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
@@ -9,3 +9,6 @@ resources:
         - task_key: {{.connector_name}}_pipeline_refresh
           pipeline_task:
             pipeline_id: ${resources.pipelines.{{.connector_name}}_pipeline.id}
+      trigger:
+        file_arrival:
+          url: {{template `volume_path_url` .}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
index da3fd63f..92645102 100755
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-databricks bundle summary --output json | jq -r .resources.pipelines.{{.connector_name}}_pipeline.id
\ No newline at end of file
+databricks bundle summary --output json | jq -r .resources.pipelines.{{.connector_name}}_pipeline.id
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/get_volume_path_from_table_property.sh.tmpl
similarity index 96%
rename from filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
rename to filepush/filepush-template/template/{{.connector_name}}/tools/get_volume_path_from_table_property.sh.tmpl
index 7b6e488c..5ecf816f 100755
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/get_push_endpoint_from_table.sh.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/get_volume_path_from_table_property.sh.tmpl
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-databricks tables get {{.catalog_name}}.{{.schema_name}}.{{template `raw_table_name` .}} --output json | jq '.properties.volume_path'
\ No newline at end of file
+databricks tables get {{.catalog_name}}.{{.schema_name}}.{{template `raw_table_name` .}} --output json | jq '.properties.volume_path'
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
index 87df798f..7e297d6a 100755
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-databricks pipelines start-update $($(dirname $0)/get_pipeline_id.sh)
\ No newline at end of file
+databricks bundle run {{.connector_name}}_pipeline
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
index 9a52bde2..8eaad52e 100755
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-databricks fs cp $1 dbfs:{{template `volume_path` .}}
\ No newline at end of file
+databricks fs cp $1 {{template `volume_path_url` .}}

From 2e0cbbf628ff092fc3bc7c6c2f37068b6c9e48c7 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 8 Aug 2025 10:20:09 -0700
Subject: [PATCH 07/60] Fix file trigger path

---
 filepush/filepush-template/library/variables.tmpl              | 2 +-
 .../template/{{.connector_name}}/env.json.tmpl                 | 3 ---
 .../resources/{{.connector_name}}_job.yml.tmpl                 | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)
 delete mode 100644 filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl

diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
index 0e642063..073bc1ba 100644
--- a/filepush/filepush-template/library/variables.tmpl
+++ b/filepush/filepush-template/library/variables.tmpl
@@ -1,5 +1,5 @@
 {{ define `volume_path` -}}
-  /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_volume
+  /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_volume/
 {{- end }}
 
 {{ define `volume_path_url` -}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl b/filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl
deleted file mode 100644
index 2f5917c4..00000000
--- a/filepush/filepush-template/template/{{.connector_name}}/env.json.tmpl
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "volume_path": "{{template `volume_path` .}}"
-}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
index 4beb422c..cc416f49 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
@@ -11,4 +11,4 @@ resources:
             pipeline_id: ${resources.pipelines.{{.connector_name}}_pipeline.id}
       trigger:
         file_arrival:
-          url: {{template `volume_path_url` .}}
+          url: {{template `volume_path` .}}

From 4fa17eb039d0a1400f9fb549c82a26fd9f0e2bd8 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 8 Aug 2025 14:39:22 -0700
Subject: [PATCH 08/60] Fix gitignore

---
 filepush/.gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/filepush/.gitignore b/filepush/.gitignore
index 2a722220..0e53a123 100644
--- a/filepush/.gitignore
+++ b/filepush/.gitignore
@@ -2,4 +2,3 @@
 up.sh
 down.sh
 conf.json
-filepushconnector/

From 2a6062bd5fc85733e58651849b50e890bcaea23a Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 8 Aug 2025 14:55:50 -0700
Subject: [PATCH 09/60] Unpause file trigger by default

---
 .../resources/{{.connector_name}}_job.yml.tmpl                   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
index cc416f49..e31dde57 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
@@ -12,3 +12,4 @@ resources:
       trigger:
         file_arrival:
           url: {{template `volume_path` .}}
+        pause_status: UNPAUSED

From 26ff855b46c3865da594e1bd51a0ae89008cd11f Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 11 Aug 2025 15:44:59 -0700
Subject: [PATCH 10/60] Switch to streaming query

---
 .../src/pipelines/{{.connector_name}}_ingestion.py.tmpl         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
index 16c50973..ce8cbc94 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
@@ -9,5 +9,5 @@ import dlt
 )
 def {{template `raw_table_name` .}}():
   with open(f"./{{.connector_name}}_readfiles_kernel.sql", "r") as f:
-    kernel_query = f.read()
+    kernel_query = kernel_query.replace("read_files(", "STREAM read_files(")
   return spark.sql(kernel_query)

From c5212cfe4bfa7fd047f3cf56853da2d668881919 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 12 Aug 2025 08:38:34 -0700
Subject: [PATCH 11/60] Successful dynamic table prototype

---
 .../filepush-template/library/variables.tmpl  | 12 +++++
 .../{{.connector_name}}_ingestion.py.tmpl     | 54 +++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
index 073bc1ba..35950f56 100644
--- a/filepush/filepush-template/library/variables.tmpl
+++ b/filepush/filepush-template/library/variables.tmpl
@@ -2,6 +2,18 @@
   /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_volume/
 {{- end }}
 
+{{ define `volume_data_path` -}}
+  dbfs:{{template `volume_path` .}}data/
+{{- end }}
+
+{{ define `volume_baddata_path` -}}
+  dbfs:{{template `volume_path` .}}baddata/
+{{- end }}
+
+{{ define `volume_archive_path` -}}
+  dbfs:{{template `volume_path` .}}archive/
+{{- end }}
+
 {{ define `volume_path_url` -}}
   dbfs:{{template `volume_path` .}}
 {{- end }}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
index ce8cbc94..719eecc2 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
+++ b/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
@@ -1,4 +1,6 @@
 import dlt
+from dbruntime.dbutils import FileInfo
+import re
 
 @dlt.table(
   name="{{template `raw_table_name` .}}",
@@ -9,5 +11,57 @@ import dlt
 )
 def {{template `raw_table_name` .}}():
   with open(f"./{{.connector_name}}_readfiles_kernel.sql", "r") as f:
+    kernel_query = f.read()
     kernel_query = kernel_query.replace("read_files(", "STREAM read_files(")
   return spark.sql(kernel_query)
+
+# Dynamic Tables
+def sanitize_table_name(name: str) -> str:
+  """
+  Make a valid, reasonably human-friendly table name from a folder name.
+  - Lowercase
+  - Replace non [a-z0-9_] with underscores
+  - Ensure it doesn't start with a digit
+  """
+  n = name.strip().lower()
+  n = re.sub(r"[^a-z0-9_]", "_", n)
+  if re.match(r"^[0-9]", n):
+    n = f"t_{n}"
+  n = re.sub(r"_+", "_", n).strip("_")
+  return n or "t_unnamed"
+
+def dbfs_is_dir(f: FileInfo):
+  is_dir_attr = getattr(f, "isDir", None)
+  return is_dir_attr() if callable(is_dir_attr) else f.name.endswith("/")
+
+def list_immediate_subdirs(path: str):
+  items = dbutils.fs.ls(path)
+  out = []
+  for f in items:
+    if dbfs_is_dir(f):
+      # f.name often ends with '/', drop it for a clean folder name
+      clean_name = f.name[:-1] if f.name.endswith("/") else f.name
+      out.append((clean_name, f.path.removeprefix('dbfs:')))
+  return out
+
+def make_dlt_table(subdir_name: str, subdir_path: str):
+  """
+  Defines a DLT table for a given subfolder at import time.
+  Uses Auto Loader (streaming) if `streaming=True`, else batch reader.
+  """
+
+  table_name = sanitize_table_name(subdir_name)
+
+  if len(dbutils.fs.ls(subdir_path)) > 0:
+    @dlt.table(
+      name=table_name,
+      comment=f"Auto-created from subfolder: {subdir_path} (streaming via Auto Loader)",
+      table_properties={
+        "volume_path": f"{subdir_path}"
+      }
+    )
+    def _auto_loader_table():
+      return spark.readStream.format("cloudFiles").option("cloudFiles.format","csv").load(subdir_path)
+  
+for subdir_name, subdir_path in list_immediate_subdirs('{{template `volume_path` .}}'):
+  make_dlt_table(subdir_name, subdir_path)
\ No newline at end of file

From efb8e4aa620b4a02c0d443c3c4f6ce88c0e7a570 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 12 Aug 2025 10:19:37 -0700
Subject: [PATCH 12/60] Eliminate connector name

---
 .../databricks_template_schema.json               |  8 +-------
 filepush/filepush-template/library/variables.tmpl |  4 ++--
 .../resources/{{.connector_name}}_job.yml.tmpl    | 15 ---------------
 .../{{.connector_name}}_pipeline.yml.tmpl         | 13 -------------
 .../resources/{{.connector_name}}_volume.yml.tmpl |  9 ---------
 .../get_volume_path_from_table_property.sh.tmpl   |  2 --
 .../tools/trigger_refresh.sh.tmpl                 |  2 --
 .../tools/upload_to_volume.sh.tmpl                |  2 --
 .../databricks.yml.tmpl                           |  4 ++--
 .../{{.schema_name}}/resources/job.yml.tmpl       | 15 +++++++++++++++
 .../{{.schema_name}}/resources/pipeline.yml.tmpl  | 14 ++++++++++++++
 .../{{.schema_name}}/resources/schema.yml.tmpl    |  7 +++++++
 .../{{.schema_name}}/resources/volume.yml.tmpl    |  8 ++++++++
 .../pipelines/{{.schema_name}}_ingestion.py.tmpl} | 13 -------------
 .../{{.schema_name}}_readfiles_kernel.sql.tmpl}   |  0
 .../tools/get_pipeline_id.sh.tmpl                 |  2 +-
 .../get_volume_path_from_table_property.sh.tmpl   |  2 ++
 .../tools/trigger_refresh.sh.tmpl                 |  2 ++
 .../tools/upload_to_volume.sh.tmpl                |  2 ++
 19 files changed, 56 insertions(+), 68 deletions(-)
 delete mode 100644 filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl
 delete mode 100755 filepush/filepush-template/template/{{.connector_name}}/tools/get_volume_path_from_table_property.sh.tmpl
 delete mode 100755 filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
 delete mode 100755 filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
 rename filepush/filepush-template/template/{{{.connector_name}} => {{.schema_name}}}/databricks.yml.tmpl (83%)
 create mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl
 create mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
 create mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
 create mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
 rename filepush/filepush-template/template/{{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl => {{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl} (77%)
 rename filepush/filepush-template/template/{{{.connector_name}}/src/pipelines/{{.connector_name}}_readfiles_kernel.sql.tmpl => {{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl} (100%)
 rename filepush/filepush-template/template/{{{.connector_name}} => {{.schema_name}}}/tools/get_pipeline_id.sh.tmpl (69%)
 create mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
 create mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
 create mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl

diff --git a/filepush/filepush-template/databricks_template_schema.json b/filepush/filepush-template/databricks_template_schema.json
index 6ce6f13b..256f185a 100644
--- a/filepush/filepush-template/databricks_template_schema.json
+++ b/filepush/filepush-template/databricks_template_schema.json
@@ -1,11 +1,5 @@
 {
   "properties": {
-    "connector_name": {
-      "type": "string",
-      "default": "filepushconnector",
-      "description": "Name of the filepush connector.",
-      "order": 1
-    },
     "catalog_name": {
       "type": "string",
       "default": "{{default_catalog}}",
@@ -19,5 +13,5 @@
       "order": 3
     }
   },
-  "success_message": "\nYour bundle '{{.connector_name}}' has been created."
+  "success_message": "\nYour file push bundle in {{.catalog_name}}.{{.schema_name}} has been created."
 }
diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
index 35950f56..34b8f79e 100644
--- a/filepush/filepush-template/library/variables.tmpl
+++ b/filepush/filepush-template/library/variables.tmpl
@@ -1,5 +1,5 @@
 {{ define `volume_path` -}}
-  /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.connector_name}}_volume/
+  /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.schema_name}}_volume/
 {{- end }}
 
 {{ define `volume_data_path` -}}
@@ -18,6 +18,6 @@
   dbfs:{{template `volume_path` .}}
 {{- end }}
 
-{{ define `raw_table_name` -}}
+{{ define `raw_table_name_format` -}}
   {{.connector_name}}_raw
 {{- end}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
deleted file mode 100644
index e31dde57..00000000
--- a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_job.yml.tmpl
+++ /dev/null
@@ -1,15 +0,0 @@
-# {{.connector_name}}_job.yml
-# The main job for {{.connector_name}}
-
-resources:
-  jobs:
-    {{.connector_name}}_job:
-      name: {{.connector_name}}_job
-      tasks:
-        - task_key: {{.connector_name}}_pipeline_refresh
-          pipeline_task:
-            pipeline_id: ${resources.pipelines.{{.connector_name}}_pipeline.id}
-      trigger:
-        file_arrival:
-          url: {{template `volume_path` .}}
-        pause_status: UNPAUSED
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl
deleted file mode 100644
index 4e45c1a7..00000000
--- a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_pipeline.yml.tmpl
+++ /dev/null
@@ -1,13 +0,0 @@
-# {{.connector_name}}_pipeline.yml
-# The refresh pipeline for {{.connector_name}}
-
-resources:
-  pipelines:
-    {{.connector_name}}_pipeline:
-      name: {{.connector_name}}_pipeline
-      catalog: {{.catalog_name}}
-      schema: {{.schema_name}}
-      serverless: true
-      libraries:
-        - file:
-            path: ../src/pipelines/{{.connector_name}}_ingestion.py
diff --git a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl b/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl
deleted file mode 100644
index b3456934..00000000
--- a/filepush/filepush-template/template/{{.connector_name}}/resources/{{.connector_name}}_volume.yml.tmpl
+++ /dev/null
@@ -1,9 +0,0 @@
-# {{.connector_name}}_volume.yml
-# The volume for {{.connector_name}}
-
-resources:
-  volumes:
-    {{.connector_name}}_volume:
-      name: {{.connector_name}}_volume
-      catalog_name: {{.catalog_name}}
-      schema_name: {{.schema_name}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_volume_path_from_table_property.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/get_volume_path_from_table_property.sh.tmpl
deleted file mode 100755
index 5ecf816f..00000000
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/get_volume_path_from_table_property.sh.tmpl
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-databricks tables get {{.catalog_name}}.{{.schema_name}}.{{template `raw_table_name` .}} --output json | jq '.properties.volume_path'
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
deleted file mode 100755
index 7e297d6a..00000000
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/trigger_refresh.sh.tmpl
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-databricks bundle run {{.connector_name}}_pipeline
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl b/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
deleted file mode 100755
index 8eaad52e..00000000
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/upload_to_volume.sh.tmpl
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-databricks fs cp $1 {{template `volume_path_url` .}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/databricks.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
similarity index 83%
rename from filepush/filepush-template/template/{{.connector_name}}/databricks.yml.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
index 50042472..e1fe600b 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/databricks.yml.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
@@ -1,8 +1,8 @@
 # databricks.yml
-# This is the configuration for the Databricks Asset Bundle {{.connector_name}}.
+# This is the configuration for the file push DAB {{.schema_name}}.
 
 bundle:
-  name: {{.connector_name}}
+  name: {{.schema_name}}
 
 include:
   - resources/*.yml
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl
new file mode 100644
index 00000000..97d09b12
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl
@@ -0,0 +1,15 @@
+# The main job for schema {{.schema_name}}
+# This job will trigger in the schema pipeline
+
+resources:
+  jobs:
+    {{.schema_name}}_job:
+      name: {{.schema_name}}_job
+      tasks:
+        - task_key: {{.schema_name}}_pipeline_refresh
+          pipeline_task:
+            pipeline_id: ${resources.pipelines.{{.schema_name}}_pipeline.id}
+      trigger:
+        file_arrival:
+          url: /Volumes/{{.catalog_name}}/${resources.schemas.{{.schema_name}}.name}/{{.schema_name}}_volume/
+        pause_status: UNPAUSED
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
new file mode 100644
index 00000000..7c609a38
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
@@ -0,0 +1,14 @@
+# The table refresh pipeline for schema {{.schema_name}}
+
+resources:
+  pipelines:
+    {{.schema_name}}_pipeline:
+      name: {{.schema_name}}_pipeline
+      catalog: {{.catalog_name}}
+      schema: {{.schema_name}}
+      serverless: true
+      libraries:
+        - file:
+            path: ../src/pipelines/{{.schema_name}}_ingestion.py
+      configuration:
+        volume_path: /Volumes/{{.catalog_name}}/${resources.schemas.{{.schema_name}}.name}/{{.schema_name}}_volume/
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
new file mode 100644
index 00000000..abfc1ba1
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
@@ -0,0 +1,7 @@
+# The schema {{.schema_name}}
+
+resources:
+  schemas:
+    {{.schema_name}}:
+      name: {{.schema_name}}
+      catalog_name: {{.catalog_name}}
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
new file mode 100644
index 00000000..ce3782bd
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
@@ -0,0 +1,8 @@
+# The file staging volume for schema{{.schema_name}}
+
+resources:
+  volumes:
+    {{.schema_name}}_volume:
+      name: {{.schema_name}}_volume
+      catalog_name: {{.catalog_name}}
+      schema_name: {{.schema_name}}
diff --git a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
similarity index 77%
rename from filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
index 719eecc2..05b7f44d 100644
--- a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_ingestion.py.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
@@ -2,19 +2,6 @@ import dlt
 from dbruntime.dbutils import FileInfo
 import re
 
-@dlt.table(
-  name="{{template `raw_table_name` .}}",
-  comment="A streaming table created by filepush bundle {{.connector_name}}. This holds the raw data from the uploaded files.",
-  table_properties={
-    "volume_path": "{{template `volume_path` .}}"
-  }
-)
-def {{template `raw_table_name` .}}():
-  with open(f"./{{.connector_name}}_readfiles_kernel.sql", "r") as f:
-    kernel_query = f.read()
-    kernel_query = kernel_query.replace("read_files(", "STREAM read_files(")
-  return spark.sql(kernel_query)
-
 # Dynamic Tables
 def sanitize_table_name(name: str) -> str:
   """
diff --git a/filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_readfiles_kernel.sql.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
similarity index 100%
rename from filepush/filepush-template/template/{{.connector_name}}/src/pipelines/{{.connector_name}}_readfiles_kernel.sql.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
diff --git a/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/get_pipeline_id.sh.tmpl
similarity index 69%
rename from filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/tools/get_pipeline_id.sh.tmpl
index 92645102..a0ffe5e3 100755
--- a/filepush/filepush-template/template/{{.connector_name}}/tools/get_pipeline_id.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_pipeline_id.sh.tmpl
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-databricks bundle summary --output json | jq -r .resources.pipelines.{{.connector_name}}_pipeline.id
+databricks bundle summary --output json | jq -r .resources.pipelines.{{.schema_name}}_pipeline.id
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
new file mode 100755
index 00000000..7e2ac123
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+databricks tables get {{.catalog_name}}.{{.schema_name}}.$1 --output json | jq '.properties.volume_path'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
new file mode 100755
index 00000000..2206e20f
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+databricks bundle run {{.schema_name}}_pipeline
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl
new file mode 100755
index 00000000..285d1ae1
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+databricks fs cp $2 {{template `volume_path_url` .}}/$1

From 44e4ce6b4d90518154b2e39e00f5a62dd6e2ddde Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 12 Aug 2025 10:47:11 -0700
Subject: [PATCH 13/60] Fix shell script

---
 .../template/{{.schema_name}}/tools/env.sh.tmpl    | 14 ++++++++++++++
 .../{{.schema_name}}/tools/get_pipeline_id.sh.tmpl |  2 --
 .../get_volume_path_from_table_property.sh.tmpl    |  3 ++-
 .../tools/upload_to_volume.sh.tmpl                 |  4 +++-
 4 files changed, 19 insertions(+), 4 deletions(-)
 create mode 100644 filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/get_pipeline_id.sh.tmpl

diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
new file mode 100644
index 00000000..619cef3c
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+# Prevent running directly; this file must be *sourced*
+(return 0 2>/dev/null) || { echo "Source this file:  . $(basename "$0")"; exit 1; }
+
+# Idempotent guard
+if [[ -n "${_FILEPUSH_ENV_LOADED:-}" ]]; then
+  return 0
+fi
+export _FILEPUSH_ENV_LOADED=1
+
+summary=$(databricks bundle summary --output json)
+export FILEPUSH_CATALOG_NAME={{.catalog_name}}
+export FILEPUSH_SCHEMA_NAME=$(echo $summary | jq -r '.resources.schemas.{{.schema_name}}.name')
+export FILEPUSH_VOLUME_PATH=/Volumes/{{.catalog_name}}/${FILEPUSH_SCHEMA_NAME}/{{.schema_name}}_volume/
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_pipeline_id.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/get_pipeline_id.sh.tmpl
deleted file mode 100755
index a0ffe5e3..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_pipeline_id.sh.tmpl
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-databricks bundle summary --output json | jq -r .resources.pipelines.{{.schema_name}}_pipeline.id
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
index 7e2ac123..744a80b0 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
@@ -1,2 +1,3 @@
 #!/usr/bin/env bash
-databricks tables get {{.catalog_name}}.{{.schema_name}}.$1 --output json | jq '.properties.volume_path'
+. $(dirname $0)/env.sh
+databricks tables get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME}.$1 --output json | jq '.properties.volume_path'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl
index 285d1ae1..1ebd46d4 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl
@@ -1,2 +1,4 @@
 #!/usr/bin/env bash
-databricks fs cp $2 {{template `volume_path_url` .}}/$1
+. $(dirname $0)/env.sh
+databricks fs mkdir dbfs:${FILEPUSH_VOLUME_PATH}$1/
+databricks fs cp $2 dbfs:${FILEPUSH_VOLUME_PATH}$1/

From 9a9e93ba68f9372b11515b26b03538f360cdce36 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 12 Aug 2025 14:27:29 -0700
Subject: [PATCH 14/60] Working prototype for dynamic table kernel

---
 .../resources/volume.yml.tmpl                 |  4 +-
 .../{{.schema_name}}_ingestion.py.tmpl        | 42 +++++++++++++++----
 ...{{.schema_name}}_readfiles_kernel.sql.tmpl |  9 +++-
 .../tools/trigger_refresh.sh.tmpl             |  1 +
 4 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
index ce3782bd..95904249 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
@@ -1,8 +1,8 @@
-# The file staging volume for schema{{.schema_name}}
+# The file staging volume for schema {{.schema_name}}
 
 resources:
   volumes:
     {{.schema_name}}_volume:
       name: {{.schema_name}}_volume
       catalog_name: {{.catalog_name}}
-      schema_name: {{.schema_name}}
+      schema_name: ${resources.schemas.{{.schema_name}}.name}
diff --git a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
index 05b7f44d..ff22d361 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
@@ -1,11 +1,12 @@
 import dlt
 from dbruntime.dbutils import FileInfo
 import re
+import os
 
 # Dynamic Tables
 def sanitize_table_name(name: str) -> str:
   """
-  Make a valid, reasonably human-friendly table name from a folder name.
+  Sanitize a table name.
   - Lowercase
   - Replace non [a-z0-9_] with underscores
   - Ensure it doesn't start with a digit
@@ -17,6 +18,16 @@ def sanitize_table_name(name: str) -> str:
   n = re.sub(r"_+", "_", n).strip("_")
   return n or "t_unnamed"
 
+def is_valid_table_name(name: str) -> bool:
+  """
+  Validate a table name.
+  - Must be alphanumeric
+  - Must not start with a digit
+  - Must not contain any special characters
+  """
+  pat = re.compile(r'^[A-Za-z0-9_]+$')
+  return pat.match(name) is not None
+
 def dbfs_is_dir(f: FileInfo):
   is_dir_attr = getattr(f, "isDir", None)
   return is_dir_attr() if callable(is_dir_attr) else f.name.endswith("/")
@@ -28,16 +39,27 @@ def list_immediate_subdirs(path: str):
     if dbfs_is_dir(f):
       # f.name often ends with '/', drop it for a clean folder name
       clean_name = f.name[:-1] if f.name.endswith("/") else f.name
-      out.append((clean_name, f.path.removeprefix('dbfs:')))
+      if is_valid_table_name(clean_name):
+        out.append((clean_name, f.path.removeprefix('dbfs:')))
+      else:
+        print(f"Skipping invalid table name: {clean_name}. It must be alphanumeric connected by underscores and not start with a digit.")
   return out
 
 def make_dlt_table(subdir_name: str, subdir_path: str):
   """
   Defines a DLT table for a given subfolder at import time.
-  Uses Auto Loader (streaming) if `streaming=True`, else batch reader.
+  If table does not exist, it will create a read_files kernel and use that to create the table.
   """
-
   table_name = sanitize_table_name(subdir_name)
+  kernel_file_name = f"./{{.schema_name}}_{table_name}_readfiles_kernel.sql"
+
+  if not os.path.exists(kernel_file_name):
+    print(f"Initialize table {table_name}")
+    with open(f"./{{.schema_name}}_readfiles_kernel.sql", "r") as f:
+      kernel_query_fmt = f.read()
+    with open(kernel_file_name, "w") as f:
+      table_kernel_query = kernel_query_fmt % subdir_path
+      f.write(table_kernel_query)
 
   if len(dbutils.fs.ls(subdir_path)) > 0:
     @dlt.table(
@@ -48,7 +70,13 @@ def make_dlt_table(subdir_name: str, subdir_path: str):
       }
     )
     def _auto_loader_table():
-      return spark.readStream.format("cloudFiles").option("cloudFiles.format","csv").load(subdir_path)
-  
-for subdir_name, subdir_path in list_immediate_subdirs('{{template `volume_path` .}}'):
+      with open(kernel_file_name, "r") as f:
+        table_kernel_query = f.read()
+      print(table_kernel_query.replace("read_files(", "STREAM read_files("))
+      return spark.sql(table_kernel_query.replace("read_files(", "STREAM read_files("))
+  else:
+    print(f"Waiting for files to land in {subdir_path}")
+
+volume_path_root = spark.conf.get("volume_path")
+for subdir_name, subdir_path in list_immediate_subdirs(volume_path_root):
   make_dlt_table(subdir_name, subdir_path)
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
index a04774eb..bf4b68b5 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
@@ -1,6 +1,13 @@
+-- Kernel template for read_files
 SELECT
   *
 FROM
   read_files(
-    '{{template `volume_path` .}}'
+    '%s'
+    ,
+    -- Do not change anything above
+    -- Add any additional options below
+    -- Example:
+    -- header => 'true',
+    -- escape => '"'
   )
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
index 2206e20f..e279f57d 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
@@ -1,2 +1,3 @@
 #!/usr/bin/env bash
+. $(dirname $0)/env.sh
 databricks bundle run {{.schema_name}}_pipeline

From 6eb2c6336a1077e3e3befd527889756f75b79da3 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 12 Aug 2025 15:06:27 -0700
Subject: [PATCH 15/60] Fix comma and trigger job instead

---
 filepush/filepush-template/databricks_template_schema.json      | 2 +-
 .../src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl    | 2 +-
 .../template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/filepush/filepush-template/databricks_template_schema.json b/filepush/filepush-template/databricks_template_schema.json
index 256f185a..66159bdc 100644
--- a/filepush/filepush-template/databricks_template_schema.json
+++ b/filepush/filepush-template/databricks_template_schema.json
@@ -13,5 +13,5 @@
       "order": 3
     }
   },
-  "success_message": "\nYour file push bundle in {{.catalog_name}}.{{.schema_name}} has been created."
+  "success_message": "\nYour file push bundle under catalog and schema {{.catalog_name}}.{{.schema_name}} has been created."
 }
diff --git a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
index bf4b68b5..2fdcdc5e 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
@@ -4,10 +4,10 @@ SELECT
 FROM
   read_files(
     '%s'
-    ,
     -- Do not change anything above
     -- Add any additional options below
     -- Example:
+    -- ,
     -- header => 'true',
     -- escape => '"'
   )
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
index e279f57d..70652af9 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
@@ -1,3 +1,3 @@
 #!/usr/bin/env bash
 . $(dirname $0)/env.sh
-databricks bundle run {{.schema_name}}_pipeline
+databricks bundle run {{.schema_name}}_job

From 69436f691d6d235b1eb334ccb9a5e4ed1e01e28e Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 13 Aug 2025 12:34:41 -0700
Subject: [PATCH 16/60] Update property to all resource

---
 .../template/{{.schema_name}}/resources/pipeline.yml.tmpl  | 4 ++--
 .../template/{{.schema_name}}/resources/schema.yml.tmpl    | 4 +++-
 ...{.schema_name}}_ingestion.py.tmpl => ingestion.py.tmpl} | 4 ++--
 .../template/{{.schema_name}}/tools/env.sh.tmpl            | 7 +++++++
 .../tools/get_volume_path_from_pipeline_config.sh          | 3 +++
 .../tools/get_volume_path_from_schema_dbproperty.sh        | 3 +++
 ...erty.sh.tmpl => get_volume_path_from_table_property.sh} | 2 +-
 .../tools/set_volume_path_to_schema_dbproperty.sh          | 3 +++
 .../tools/{trigger_refresh.sh.tmpl => trigger_refresh.sh}  | 2 +-
 .../{upload_to_volume.sh.tmpl => upload_to_volume.sh}      | 4 ++++
 10 files changed, 29 insertions(+), 7 deletions(-)
 rename filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{{.schema_name}}_ingestion.py.tmpl => ingestion.py.tmpl} (96%)
 create mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
 create mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
 rename filepush/filepush-template/template/{{.schema_name}}/tools/{get_volume_path_from_table_property.sh.tmpl => get_volume_path_from_table_property.sh} (61%)
 create mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
 rename filepush/filepush-template/template/{{.schema_name}}/tools/{trigger_refresh.sh.tmpl => trigger_refresh.sh} (50%)
 rename filepush/filepush-template/template/{{.schema_name}}/tools/{upload_to_volume.sh.tmpl => upload_to_volume.sh} (60%)

diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
index 7c609a38..21b124be 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
@@ -9,6 +9,6 @@ resources:
       serverless: true
       libraries:
         - file:
-            path: ../src/pipelines/{{.schema_name}}_ingestion.py
+            path: ../src/pipelines/ingestion.py
       configuration:
-        volume_path: /Volumes/{{.catalog_name}}/${resources.schemas.{{.schema_name}}.name}/{{.schema_name}}_volume/
+        filepush.volume_path: /Volumes/{{.catalog_name}}/${resources.schemas.{{.schema_name}}.name}/{{.schema_name}}_volume/
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
index abfc1ba1..032b7b9d 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
@@ -4,4 +4,6 @@ resources:
   schemas:
     {{.schema_name}}:
       name: {{.schema_name}}
-      catalog_name: {{.catalog_name}}
\ No newline at end of file
+      catalog_name: {{.catalog_name}}
+      properties:
+        filepush.volume_path: /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.schema_name}}_volume/
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/ingestion.py.tmpl
similarity index 96%
rename from filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/src/pipelines/ingestion.py.tmpl
index ff22d361..9c837652 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_ingestion.py.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/ingestion.py.tmpl
@@ -66,7 +66,7 @@ def make_dlt_table(subdir_name: str, subdir_path: str):
       name=table_name,
       comment=f"Auto-created from subfolder: {subdir_path} (streaming via Auto Loader)",
       table_properties={
-        "volume_path": f"{subdir_path}"
+        "filepush.volume_path": f"{subdir_path}"
       }
     )
     def _auto_loader_table():
@@ -77,6 +77,6 @@ def make_dlt_table(subdir_name: str, subdir_path: str):
   else:
     print(f"Waiting for files to land in {subdir_path}")
 
-volume_path_root = spark.conf.get("volume_path")
+volume_path_root = spark.conf.get("filepush.volume_path")
 for subdir_name, subdir_path in list_immediate_subdirs(volume_path_root):
   make_dlt_table(subdir_name, subdir_path)
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
index 619cef3c..e9898794 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
@@ -1,8 +1,13 @@
 #!/usr/bin/env bash
+# This file is used to set the environment variables for the filepush bundle.
+# It is sourced by the other scripts in the tools directory.
+# This should be deployed **after** the bundle is deployed.
+
 # Prevent running directly; this file must be *sourced*
 (return 0 2>/dev/null) || { echo "Source this file:  . $(basename "$0")"; exit 1; }
 
 # Idempotent guard
+# Check if the environment is already set and non-empty
 if [[ -n "${_FILEPUSH_ENV_LOADED:-}" ]]; then
   return 0
 fi
@@ -12,3 +17,5 @@ summary=$(databricks bundle summary --output json)
 export FILEPUSH_CATALOG_NAME={{.catalog_name}}
 export FILEPUSH_SCHEMA_NAME=$(echo $summary | jq -r '.resources.schemas.{{.schema_name}}.name')
 export FILEPUSH_VOLUME_PATH=/Volumes/{{.catalog_name}}/${FILEPUSH_SCHEMA_NAME}/{{.schema_name}}_volume/
+export FILEPUSH_PIPELINE_ID=$(echo $summary | jq -r '.resources.pipelines.{{.schema_name}}_pipeline.id')
+export FILEPUSH_JOB_NAME={{.schema_name}}_job
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
new file mode 100755
index 00000000..9f618af4
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+. $(dirname $0)/env.sh
+databricks pipelines get $FILEPUSH_PIPELINE_ID --output json | jq '.spec.configuration["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
new file mode 100755
index 00000000..952e9098
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+. $(dirname $0)/env.sh
+databricks schemas get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
similarity index 61%
rename from filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
index 744a80b0..df4b46c6 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
@@ -1,3 +1,3 @@
 #!/usr/bin/env bash
 . $(dirname $0)/env.sh
-databricks tables get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME}.$1 --output json | jq '.properties.volume_path'
+databricks tables get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME}.$1 --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
new file mode 100755
index 00000000..0996e96f
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+. $(dirname $0)/env.sh
+databricks schemas update ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} --json '{ "properties": { "filepush.volume_path": "'${FILEPUSH_VOLUME_PATH}'" } }'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
similarity index 50%
rename from filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
index 70652af9..bb724b10 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
@@ -1,3 +1,3 @@
 #!/usr/bin/env bash
 . $(dirname $0)/env.sh
-databricks bundle run {{.schema_name}}_job
+databricks bundle run $FILEPUSH_JOB_NAME
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
similarity index 60%
rename from filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl
rename to filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
index 1ebd46d4..ee926ddc 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
@@ -1,4 +1,8 @@
 #!/usr/bin/env bash
 . $(dirname $0)/env.sh
+if [ -z "$1" ] || [ -z "$2" ]; then
+  echo "Usage: $0 <table_name> <local_file_path>"
+  exit 1
+fi
 databricks fs mkdir dbfs:${FILEPUSH_VOLUME_PATH}$1/
 databricks fs cp $2 dbfs:${FILEPUSH_VOLUME_PATH}$1/

From 28a28917085cf0377d2d1d1c88eb0405220bc7be Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 13 Aug 2025 15:21:21 -0700
Subject: [PATCH 17/60] Fix order number

---
 filepush/filepush-template/databricks_template_schema.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/filepush/filepush-template/databricks_template_schema.json b/filepush/filepush-template/databricks_template_schema.json
index 66159bdc..f150630b 100644
--- a/filepush/filepush-template/databricks_template_schema.json
+++ b/filepush/filepush-template/databricks_template_schema.json
@@ -4,13 +4,13 @@
       "type": "string",
       "default": "{{default_catalog}}",
       "description": "Name of the catalog where tables and pipelines will be created.",
-      "order": 2
+      "order": 1
     },
     "schema_name": {
       "type": "string",
       "default": "default",
       "description": "Name of the schema where tables and pipelines will be created.",
-      "order": 3
+      "order": 2
     }
   },
   "success_message": "\nYour file push bundle under catalog and schema {{.catalog_name}}.{{.schema_name}} has been created."

From c8e5d0eb165a443b342ec49680741741096dfe49 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 13 Aug 2025 16:50:13 -0700
Subject: [PATCH 18/60] Enable bundle target in tool scripts

---
 .../{{.schema_name}}/tools/env.sh.tmpl        | 22 ++++++++++++++++++-
 .../get_volume_path_from_pipeline_config.sh   |  6 ++++-
 .../get_volume_path_from_schema_dbproperty.sh |  6 ++++-
 .../get_volume_path_from_table_property.sh    |  6 ++++-
 .../set_volume_path_to_schema_dbproperty.sh   |  6 ++++-
 .../{{.schema_name}}/tools/trigger_refresh.sh |  6 ++++-
 .../tools/upload_to_volume.sh                 |  8 +++++--
 7 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
index e9898794..74bea68a 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
@@ -13,7 +13,27 @@ if [[ -n "${_FILEPUSH_ENV_LOADED:-}" ]]; then
 fi
 export _FILEPUSH_ENV_LOADED=1
 
-summary=$(databricks bundle summary --output json)
+# Sets the target for the bundle
+ARG_TARGET="dev"
+ARG_POSITIONAL=()
+
+while [[ $# -gt 0 ]]; do
+case "$1" in
+    --target)   [[ $# -ge 2 ]] || { echo "Error: --target needs a value"; return 2; }
+                ARG_TARGET="$2"; shift 2 ;;
+    --target=*) ARG_TARGET="${1#*=}"; shift ;;
+    -t)         [[ $# -ge 2 ]] || { echo "Error: -t needs a value"; return 2; }
+                ARG_TARGET="$2"; shift 2 ;;
+    --)         shift; ARG_POSITIONAL+=("$@"); break ;;
+    -h|--help)  usage; return 1 ;;
+    -*)         echo "Unknown option: $1"; usage; return 2 ;;
+    *)          ARG_POSITIONAL+=("$1"); shift ;;
+esac
+done
+
+export BUNDLE_TARGET=$ARG_TARGET
+
+summary=$(databricks bundle summary -t $BUNDLE_TARGET --output json)
 export FILEPUSH_CATALOG_NAME={{.catalog_name}}
 export FILEPUSH_SCHEMA_NAME=$(echo $summary | jq -r '.resources.schemas.{{.schema_name}}.name')
 export FILEPUSH_VOLUME_PATH=/Volumes/{{.catalog_name}}/${FILEPUSH_SCHEMA_NAME}/{{.schema_name}}_volume/
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
index 9f618af4..f163b33f 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
@@ -1,3 +1,7 @@
 #!/usr/bin/env bash
-. $(dirname $0)/env.sh
+usage() {
+    echo "Usage: $(basename $0) [--target=dev|prod]"
+}
+export -f usage
+. $(dirname $0)/env.sh $@
 databricks pipelines get $FILEPUSH_PIPELINE_ID --output json | jq '.spec.configuration["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
index 952e9098..b81776b7 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
@@ -1,3 +1,7 @@
 #!/usr/bin/env bash
-. $(dirname $0)/env.sh
+usage() {
+    echo "Usage: $(basename $0) [--target=dev|prod]"
+}
+export -f usage
+. $(dirname $0)/env.sh $@
 databricks schemas get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
index df4b46c6..704471be 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
@@ -1,3 +1,7 @@
 #!/usr/bin/env bash
-. $(dirname $0)/env.sh
+usage() {
+    echo "Usage: $(basename $0) <table_name> [--target=dev|prod]"
+}
+export -f usage
+. $(dirname $0)/env.sh $@
 databricks tables get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME}.$1 --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
index 0996e96f..69ffbc41 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
@@ -1,3 +1,7 @@
 #!/usr/bin/env bash
-. $(dirname $0)/env.sh
+usage() {
+    echo "Usage: $(basename $0) [--target=dev|prod]"
+}
+export -f usage
+. $(dirname $0)/env.sh $@
 databricks schemas update ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} --json '{ "properties": { "filepush.volume_path": "'${FILEPUSH_VOLUME_PATH}'" } }'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
index bb724b10..e2536dd8 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
@@ -1,3 +1,7 @@
 #!/usr/bin/env bash
-. $(dirname $0)/env.sh
+usage() {
+    echo "Usage: $(basename $0) [--target=dev|prod]"
+}
+export -f usage
+. $(dirname $0)/env.sh $@
 databricks bundle run $FILEPUSH_JOB_NAME
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
index ee926ddc..2d7dc8ce 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
@@ -1,7 +1,11 @@
 #!/usr/bin/env bash
-. $(dirname $0)/env.sh
+usage() {
+    echo "Usage: $(basename $0) <table_name> <local_file_path> [--target=dev|prod]"
+}
+export -f usage
+. $(dirname $0)/env.sh $@
 if [ -z "$1" ] || [ -z "$2" ]; then
-  echo "Usage: $0 <table_name> <local_file_path>"
+  usage
   exit 1
 fi
 databricks fs mkdir dbfs:${FILEPUSH_VOLUME_PATH}$1/

From 54faae01073aae107a70b549e15ec4c71f9a3326 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 13 Aug 2025 17:06:10 -0700
Subject: [PATCH 19/60] Embed bundle target to all scripts

---
 .../tools/get_volume_path_from_pipeline_config.sh             | 2 +-
 .../tools/get_volume_path_from_schema_dbproperty.sh           | 2 +-
 .../tools/get_volume_path_from_table_property.sh              | 2 +-
 .../tools/set_volume_path_to_schema_dbproperty.sh             | 2 +-
 .../template/{{.schema_name}}/tools/trigger_refresh.sh        | 2 +-
 .../template/{{.schema_name}}/tools/upload_to_volume.sh       | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
index f163b33f..4fef63c6 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
@@ -4,4 +4,4 @@ usage() {
 }
 export -f usage
 . $(dirname $0)/env.sh $@
-databricks pipelines get $FILEPUSH_PIPELINE_ID --output json | jq '.spec.configuration["filepush.volume_path"]'
+databricks pipelines get $FILEPUSH_PIPELINE_ID -t $BUNDLE_TARGET --output json | jq '.spec.configuration["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
index b81776b7..14da35c1 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
@@ -4,4 +4,4 @@ usage() {
 }
 export -f usage
 . $(dirname $0)/env.sh $@
-databricks schemas get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} --output json | jq '.properties["filepush.volume_path"]'
+databricks schemas get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} -t $BUNDLE_TARGET --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
index 704471be..fb93f468 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
@@ -4,4 +4,4 @@ usage() {
 }
 export -f usage
 . $(dirname $0)/env.sh $@
-databricks tables get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME}.$1 --output json | jq '.properties["filepush.volume_path"]'
+databricks tables get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME}.$1 -t $BUNDLE_TARGET --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
index 69ffbc41..3169f1c0 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
@@ -4,4 +4,4 @@ usage() {
 }
 export -f usage
 . $(dirname $0)/env.sh $@
-databricks schemas update ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} --json '{ "properties": { "filepush.volume_path": "'${FILEPUSH_VOLUME_PATH}'" } }'
+databricks schemas update ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} -t $BUNDLE_TARGET --json '{ "properties": { "filepush.volume_path": "'${FILEPUSH_VOLUME_PATH}'" } }'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
index e2536dd8..f85c34e1 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
@@ -4,4 +4,4 @@ usage() {
 }
 export -f usage
 . $(dirname $0)/env.sh $@
-databricks bundle run $FILEPUSH_JOB_NAME
+databricks bundle run $FILEPUSH_JOB_NAME -t $BUNDLE_TARGET
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
index 2d7dc8ce..31e5fbde 100755
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
@@ -8,5 +8,5 @@ if [ -z "$1" ] || [ -z "$2" ]; then
   usage
   exit 1
 fi
-databricks fs mkdir dbfs:${FILEPUSH_VOLUME_PATH}$1/
-databricks fs cp $2 dbfs:${FILEPUSH_VOLUME_PATH}$1/
+databricks fs mkdir dbfs:${FILEPUSH_VOLUME_PATH}$1/ -t $BUNDLE_TARGET
+databricks fs cp $2 dbfs:${FILEPUSH_VOLUME_PATH}$1/ -t $BUNDLE_TARGET

From 96811b80a301310d220ba2b24fd0887dd8241cd1 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 14 Aug 2025 13:26:45 -0700
Subject: [PATCH 20/60] Added CRUD

---
 filepush/create_filepush_schema.sh            | 18 +++++++++++++++
 filepush/drop_filepush_schema.sh              | 22 +++++++++++++++++++
 .../{{.schema_name}}/databricks.yml.tmpl      |  3 +++
 filepush/push_file_to_table.sh                | 15 +++++++++++++
 4 files changed, 58 insertions(+)
 create mode 100755 filepush/create_filepush_schema.sh
 create mode 100755 filepush/drop_filepush_schema.sh
 create mode 100755 filepush/push_file_to_table.sh

diff --git a/filepush/create_filepush_schema.sh b/filepush/create_filepush_schema.sh
new file mode 100755
index 00000000..3815470d
--- /dev/null
+++ b/filepush/create_filepush_schema.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+usage() {
+    echo "Usage: $(basename $0) <catalog_name> <schema_name>"
+}
+if [ -z "$1" ] || [ -z "$2" ]; then
+  usage
+  exit 1
+fi
+if ! databricks catalogs get "$1" >/dev/null 2>&1; then
+  echo "Catalog \`$1\` not found (or no permission)"
+  exit 1
+fi
+databricks bundle init filepush-template --config-file <(echo "{\"catalog_name\": \"$1\", \"schema_name\": \"$2\"}")
+working_dir=$(pwd)
+schema_name=$2
+cd $schema_name
+databricks bundle deploy --force-lock --auto-approve -t prod
+cd $working_dir
\ No newline at end of file
diff --git a/filepush/drop_filepush_schema.sh b/filepush/drop_filepush_schema.sh
new file mode 100755
index 00000000..adbd2521
--- /dev/null
+++ b/filepush/drop_filepush_schema.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+usage() {
+    echo "Usage: $(basename $0) <catalog_name> <schema_name>"
+}
+if [ -z "$1" ] || [ -z "$2" ]; then
+  usage
+  exit 1
+fi
+if ! databricks catalogs get "$1" >/dev/null 2>&1; then
+  echo "Catalog \`$1\` not found (or no permission)"
+  exit 1
+fi
+volume_path=$(databricks schemas get $1.$2 --output json | jq -r '.properties["filepush.volume_path"]')
+if [ -z "$volume_path" ] || [ "$volume_path" == "null" ]; then
+  echo "Schema \`$1.$2\` is not a filepush schema. Did you run create_filepush_schema.sh to create it?"
+  exit 1
+fi
+working_dir=$(pwd)
+schema_name=$2
+cd $schema_name
+databricks bundle destroy --force-lock -t prod
+cd $working_dir
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
index e1fe600b..d32c1802 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
@@ -7,6 +7,9 @@ bundle:
 include:
   - resources/*.yml
 
+experimental:
+  skip_name_prefix_for_schema: true
+
 targets:
   # The deployment targets. See https://docs.databricks.com/en/dev-tools/bundles/deployment-modes.html
   dev:
diff --git a/filepush/push_file_to_table.sh b/filepush/push_file_to_table.sh
new file mode 100755
index 00000000..e652b6b8
--- /dev/null
+++ b/filepush/push_file_to_table.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+usage() {
+    echo "Usage: $(basename $0) <catalog_name> <schema_name> <table_name> <file_path>"
+}
+if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then
+  usage
+  exit 1
+fi
+volume_path=$(databricks schemas get $1.$2 --output json | jq -r '.properties["filepush.volume_path"]')
+if [ -z "$volume_path" ] || [ "$volume_path" == "null" ]; then
+  echo "Schema \`$1.$2\` is not a filepush schema. Did you run create_filepush_schema.sh to create it?"
+  exit 1
+fi
+databricks fs mkdir dbfs:${volume_path}$3/
+databricks fs cp $4 dbfs:${volume_path}$3/
\ No newline at end of file

From fd73830b458989868c547463d31647ead23ce08e Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 15 Aug 2025 17:06:17 -0700
Subject: [PATCH 21/60] Add new default options to kernel

---
 .../src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
index 2fdcdc5e..55666737 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
@@ -3,7 +3,9 @@ SELECT
   *
 FROM
   read_files(
-    '%s'
+    '%s',
+    ignoreCorruptFiles => 'true', -- If a different malformed file pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
+    ignoreMissingFiles => 'true' -- If a different file format is accidentally pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
     -- Do not change anything above
     -- Add any additional options below
     -- Example:

From 0409893e1479ff6b8f59b3d6283bedc81df69a4d Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 19 Aug 2025 17:47:32 -0700
Subject: [PATCH 22/60] Add helper script to open all resource

---
 .../template/{{.schema_name}}/tools/env.sh.tmpl        |  1 +
 .../{{.schema_name}}/tools/open_all_resources.sh       | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh

diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
index 74bea68a..a5a64d7c 100644
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
@@ -34,6 +34,7 @@ done
 export BUNDLE_TARGET=$ARG_TARGET
 
 summary=$(databricks bundle summary -t $BUNDLE_TARGET --output json)
+export FILEPUSH_BUNDLE_NAME={{.schema_name}}
 export FILEPUSH_CATALOG_NAME={{.catalog_name}}
 export FILEPUSH_SCHEMA_NAME=$(echo $summary | jq -r '.resources.schemas.{{.schema_name}}.name')
 export FILEPUSH_VOLUME_PATH=/Volumes/{{.catalog_name}}/${FILEPUSH_SCHEMA_NAME}/{{.schema_name}}_volume/
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh
new file mode 100755
index 00000000..6abf8173
--- /dev/null
+++ b/filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+usage() {
+    echo "Usage: $(basename $0) [--target=dev|prod]"
+}
+export -f usage
+. $(dirname $0)/env.sh $@
+databricks bundle open ${FILEPUSH_BUNDLE_NAME} -t $BUNDLE_TARGET
+databricks bundle open ${FILEPUSH_BUNDLE_NAME}_job -t $BUNDLE_TARGET
+databricks bundle open ${FILEPUSH_BUNDLE_NAME}_pipeline -t $BUNDLE_TARGET
+databricks bundle open ${FILEPUSH_BUNDLE_NAME}_volume -t $BUNDLE_TARGET
\ No newline at end of file

From 644345e1209199f00dfd2b7625dfd7ffca335eca Mon Sep 17 00:00:00 2001
From: chi-yang-db <117940157+chi-yang-db@users.noreply.github.com>
Date: Thu, 11 Sep 2025 15:10:54 -0700
Subject: [PATCH 23/60] Before migrating to new CUJ


From b0896381ec78f4bd79e97333d6147a71c716f150 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 11 Sep 2025 17:22:40 -0700
Subject: [PATCH 24/60] Successful deployment after dab conversion

---
 filepush/dab/databricks.yml                   | 36 +++++++++++++++++++
 filepush/dab/resources/job.yml                | 25 +++++++++++++
 filepush/dab/resources/pipeline.yml           | 12 +++++++
 filepush/dab/resources/schema.yml             |  7 ++++
 filepush/dab/resources/volume.yml             |  8 +++++
 filepush/dab/src/initialization.py            |  0
 .../src/pipelines/dab_readfiles_kernel.sql    | 15 ++++++++
 filepush/dab/src/pipelines/ingestion.py       |  0
 8 files changed, 103 insertions(+)
 create mode 100644 filepush/dab/databricks.yml
 create mode 100644 filepush/dab/resources/job.yml
 create mode 100644 filepush/dab/resources/pipeline.yml
 create mode 100644 filepush/dab/resources/schema.yml
 create mode 100644 filepush/dab/resources/volume.yml
 create mode 100644 filepush/dab/src/initialization.py
 create mode 100644 filepush/dab/src/pipelines/dab_readfiles_kernel.sql
 create mode 100644 filepush/dab/src/pipelines/ingestion.py

diff --git a/filepush/dab/databricks.yml b/filepush/dab/databricks.yml
new file mode 100644
index 00000000..28003832
--- /dev/null
+++ b/filepush/dab/databricks.yml
@@ -0,0 +1,36 @@
+# databricks.yml
+# This is the configuration for the file push DAB dab.
+
+bundle:
+  name: dab
+
+include:
+  - resources/*.yml
+
+# experimental:
+#   skip_name_prefix_for_schema: true
+
+targets:
+  # The deployment targets. See https://docs.databricks.com/en/dev-tools/bundles/deployment-modes.html
+  dev:
+    mode: development
+    default: true
+    workspace:
+      host: https://e2-dogfood.staging.cloud.databricks.com
+
+  prod:
+    mode: production
+    workspace:
+      host: https://e2-dogfood.staging.cloud.databricks.com
+      # root_path: /Workspace/Users/chi.yang@databricks.com/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - user_name: chi.yang@databricks.com
+        level: CAN_MANAGE
+
+variables:
+  catalog_name:
+    description: The existing catalog where the schema will be created.
+    default: main
+  schema_name:
+    description: The name of the schema where the tables and ingestion pipeline will be created.
+    default: filepushschema
diff --git a/filepush/dab/resources/job.yml b/filepush/dab/resources/job.yml
new file mode 100644
index 00000000..08b13bb6
--- /dev/null
+++ b/filepush/dab/resources/job.yml
@@ -0,0 +1,25 @@
+# The main job for schema dab
+# This job will trigger in the schema pipeline
+
+resources:
+  jobs:
+    filetrigger_job:
+      name: ${var.schema_name}_filetrigger_job
+      tasks:
+        - task_key: pipeline_refresh
+          pipeline_task:
+            pipeline_id: ${resources.pipelines.refresh_pipeline.id}
+    configuration_job:
+      name: ${var.schema_name}_configuration_job
+      tasks:
+        - task_key: initialization
+          spark_python_task:
+            python_file: ../src/initialization.py
+          environment_key: serverless
+        - task_key: trigger_refresh
+          run_job_task:
+            job_id: ${resources.jobs.filetrigger_job.id}
+      environments:
+        - environment_key: serverless
+          spec:
+            client: "3"
diff --git a/filepush/dab/resources/pipeline.yml b/filepush/dab/resources/pipeline.yml
new file mode 100644
index 00000000..f1dee3b1
--- /dev/null
+++ b/filepush/dab/resources/pipeline.yml
@@ -0,0 +1,12 @@
+# The table refresh pipeline for schema dab
+
+resources:
+  pipelines:
+    refresh_pipeline:
+      name: ${var.schema_name}_refresh_pipeline
+      catalog: ${var.catalog_name}
+      schema: ${var.schema_name}
+      serverless: true
+      libraries:
+        - file:
+            path: ../src/pipelines/ingestion.py
diff --git a/filepush/dab/resources/schema.yml b/filepush/dab/resources/schema.yml
new file mode 100644
index 00000000..28eae88a
--- /dev/null
+++ b/filepush/dab/resources/schema.yml
@@ -0,0 +1,7 @@
+# The schema dab
+
+resources:
+  schemas:
+    main_schema:
+      name: ${var.schema_name}
+      catalog_name: ${var.catalog_name}
\ No newline at end of file
diff --git a/filepush/dab/resources/volume.yml b/filepush/dab/resources/volume.yml
new file mode 100644
index 00000000..b479d607
--- /dev/null
+++ b/filepush/dab/resources/volume.yml
@@ -0,0 +1,8 @@
+# The file staging volume for schema dab
+
+resources:
+  volumes:
+    filepush_volume:
+      name: ${var.schema_name}_filepush_volume
+      catalog_name: ${var.catalog_name}
+      schema_name: ${var.schema_name}
diff --git a/filepush/dab/src/initialization.py b/filepush/dab/src/initialization.py
new file mode 100644
index 00000000..e69de29b
diff --git a/filepush/dab/src/pipelines/dab_readfiles_kernel.sql b/filepush/dab/src/pipelines/dab_readfiles_kernel.sql
new file mode 100644
index 00000000..55666737
--- /dev/null
+++ b/filepush/dab/src/pipelines/dab_readfiles_kernel.sql
@@ -0,0 +1,15 @@
+-- Kernel template for read_files
+SELECT
+  *
+FROM
+  read_files(
+    '%s',
+    ignoreCorruptFiles => 'true', -- If a different malformed file pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
+    ignoreMissingFiles => 'true' -- If a different file format is accidentally pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
+    -- Do not change anything above
+    -- Add any additional options below
+    -- Example:
+    -- ,
+    -- header => 'true',
+    -- escape => '"'
+  )
diff --git a/filepush/dab/src/pipelines/ingestion.py b/filepush/dab/src/pipelines/ingestion.py
new file mode 100644
index 00000000..e69de29b

From 7b5c85f6e95bd74b99f627c965edc100610bfb77 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 12 Sep 2025 16:52:53 -0700
Subject: [PATCH 25/60] Add path property if possible

---
 filepush/dab/databricks.yml         | 4 ++++
 filepush/dab/resources/job.yml      | 7 +++++--
 filepush/dab/resources/pipeline.yml | 4 +++-
 filepush/dab/resources/schema.yml   | 2 +-
 filepush/dab/resources/volume.yml   | 2 +-
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/filepush/dab/databricks.yml b/filepush/dab/databricks.yml
index 28003832..411ab284 100644
--- a/filepush/dab/databricks.yml
+++ b/filepush/dab/databricks.yml
@@ -34,3 +34,7 @@ variables:
   schema_name:
     description: The name of the schema where the tables and ingestion pipeline will be created.
     default: filepushschema
+  resource_name_prefix:
+    description: The prefix for the resource names.
+    default: ${var.catalog_name}_${var.schema_name}_
+    
\ No newline at end of file
diff --git a/filepush/dab/resources/job.yml b/filepush/dab/resources/job.yml
index 08b13bb6..1648ed6a 100644
--- a/filepush/dab/resources/job.yml
+++ b/filepush/dab/resources/job.yml
@@ -4,13 +4,16 @@
 resources:
   jobs:
     filetrigger_job:
-      name: ${var.schema_name}_filetrigger_job
+      name: ${var.resource_name_prefix}filetrigger_job
       tasks:
         - task_key: pipeline_refresh
           pipeline_task:
             pipeline_id: ${resources.pipelines.refresh_pipeline.id}
+      trigger:
+        file_arrival:
+          url: ${resources.volumes.filepush_volume.volume_path}/data/
     configuration_job:
-      name: ${var.schema_name}_configuration_job
+      name: ${var.resource_name_prefix}configuration_job
       tasks:
         - task_key: initialization
           spark_python_task:
diff --git a/filepush/dab/resources/pipeline.yml b/filepush/dab/resources/pipeline.yml
index f1dee3b1..84a74f67 100644
--- a/filepush/dab/resources/pipeline.yml
+++ b/filepush/dab/resources/pipeline.yml
@@ -3,10 +3,12 @@
 resources:
   pipelines:
     refresh_pipeline:
-      name: ${var.schema_name}_refresh_pipeline
+      name: ${var.resource_name_prefix}refresh_pipeline
       catalog: ${var.catalog_name}
       schema: ${var.schema_name}
       serverless: true
       libraries:
         - file:
             path: ../src/pipelines/ingestion.py
+      configuration:
+        filepush.volume_path: ${resources.volumes.filepush_volume.volume_path}
diff --git a/filepush/dab/resources/schema.yml b/filepush/dab/resources/schema.yml
index 28eae88a..72500a02 100644
--- a/filepush/dab/resources/schema.yml
+++ b/filepush/dab/resources/schema.yml
@@ -4,4 +4,4 @@ resources:
   schemas:
     main_schema:
       name: ${var.schema_name}
-      catalog_name: ${var.catalog_name}
\ No newline at end of file
+      catalog_name: ${var.catalog_name}
diff --git a/filepush/dab/resources/volume.yml b/filepush/dab/resources/volume.yml
index b479d607..ac8929c8 100644
--- a/filepush/dab/resources/volume.yml
+++ b/filepush/dab/resources/volume.yml
@@ -3,6 +3,6 @@
 resources:
   volumes:
     filepush_volume:
-      name: ${var.schema_name}_filepush_volume
+      name: ${var.resource_name_prefix}filepush_volume
       catalog_name: ${var.catalog_name}
       schema_name: ${var.schema_name}

From 14657340a3f66813feec8b284937819d8223bfe5 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 15 Sep 2025 10:08:17 -0700
Subject: [PATCH 26/60] Successfully pass in parameters

---
 filepush/dab/resources/job.yml     | 12 ++++++++++++
 filepush/dab/src/initialization.py |  9 +++++++++
 2 files changed, 21 insertions(+)

diff --git a/filepush/dab/resources/job.yml b/filepush/dab/resources/job.yml
index 1648ed6a..744ba835 100644
--- a/filepush/dab/resources/job.yml
+++ b/filepush/dab/resources/job.yml
@@ -18,11 +18,23 @@ resources:
         - task_key: initialization
           spark_python_task:
             python_file: ../src/initialization.py
+            parameters:
+              - "--catalog_name"
+              - "{{job.parameters.catalog_name}}"
+              - "--schema_name"
+              - "{{job.parameters.schema_name}}"
           environment_key: serverless
         - task_key: trigger_refresh
           run_job_task:
             job_id: ${resources.jobs.filetrigger_job.id}
+          depends_on:
+            - task_key: initialization
       environments:
         - environment_key: serverless
           spec:
             client: "3"
+      parameters:
+        - name: catalog_name
+          default: ${var.catalog_name}
+        - name: schema_name
+          default: ${resources.schemas.main_schema.name}
diff --git a/filepush/dab/src/initialization.py b/filepush/dab/src/initialization.py
index e69de29b..46b10aaa 100644
--- a/filepush/dab/src/initialization.py
+++ b/filepush/dab/src/initialization.py
@@ -0,0 +1,9 @@
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--catalog_name", type=str, required=True)
+parser.add_argument("--schema_name", type=str, required=True)
+args = parser.parse_args()
+
+print(f"Catalog: {args.catalog_name}")
+print(f"Schema: {args.schema_name}")
\ No newline at end of file

From 9eb37b7764dd66095d5dfdb530dc17377c55aff0 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 15 Sep 2025 13:44:20 -0700
Subject: [PATCH 27/60] Create basic folder structure in volume

---
 filepush/dab/resources/job.yml      | 10 ++++++++
 filepush/dab/resources/pipeline.yml |  3 ++-
 filepush/dab/src/initialization.py  | 39 +++++++++++++++++++++++++++--
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/filepush/dab/resources/job.yml b/filepush/dab/resources/job.yml
index 744ba835..ad6d1568 100644
--- a/filepush/dab/resources/job.yml
+++ b/filepush/dab/resources/job.yml
@@ -23,6 +23,12 @@ resources:
               - "{{job.parameters.catalog_name}}"
               - "--schema_name"
               - "{{job.parameters.schema_name}}"
+              - "--volume_path_root"
+              - "{{job.parameters.volume_path_root}}"
+              - "--volume_path_data"
+              - "{{job.parameters.volume_path_data}}"
+              - "--logging_level"
+              - "${bundle.target}"
           environment_key: serverless
         - task_key: trigger_refresh
           run_job_task:
@@ -38,3 +44,7 @@ resources:
           default: ${var.catalog_name}
         - name: schema_name
           default: ${resources.schemas.main_schema.name}
+        - name: volume_path_root
+          default: ${resources.volumes.filepush_volume.volume_path}
+        - name: volume_path_data
+          default: ${resources.volumes.filepush_volume.volume_path}/data
diff --git a/filepush/dab/resources/pipeline.yml b/filepush/dab/resources/pipeline.yml
index 84a74f67..bb0d1757 100644
--- a/filepush/dab/resources/pipeline.yml
+++ b/filepush/dab/resources/pipeline.yml
@@ -11,4 +11,5 @@ resources:
         - file:
             path: ../src/pipelines/ingestion.py
       configuration:
-        filepush.volume_path: ${resources.volumes.filepush_volume.volume_path}
+        filepush.volume_path_root: ${resources.volumes.filepush_volume.volume_path}
+        filepush.volume_path_data: ${resources.volumes.filepush_volume.volume_path}/data
diff --git a/filepush/dab/src/initialization.py b/filepush/dab/src/initialization.py
index 46b10aaa..eaeea593 100644
--- a/filepush/dab/src/initialization.py
+++ b/filepush/dab/src/initialization.py
@@ -1,9 +1,44 @@
 import argparse
+import logging
+from databricks.sdk import WorkspaceClient
 
+# Parse arguments
 parser = argparse.ArgumentParser()
 parser.add_argument("--catalog_name", type=str, required=True)
 parser.add_argument("--schema_name", type=str, required=True)
+parser.add_argument("--volume_path_root", type=str, required=True)
+parser.add_argument("--volume_path_data", type=str, required=True)
+parser.add_argument("--logging_level", type=str, required=False, default="dev")
 args = parser.parse_args()
 
-print(f"Catalog: {args.catalog_name}")
-print(f"Schema: {args.schema_name}")
\ No newline at end of file
+catalog_name = args.catalog_name
+schema_name = args.schema_name
+volume_path_root = args.volume_path_root
+volume_path_data = args.volume_path_data
+logging_level = logging.DEBUG if args.logging_level == "dev" else logging.INFO
+
+# Logging
+logging.basicConfig(
+    level=logging_level,
+    format="%(asctime)s [%(levelname)s] %(module)s - %(message)s"
+)
+logger = logging.getLogger(__name__)  # per-module logger
+
+# Initialize workspace client
+ws = WorkspaceClient()
+
+# Set property to schema
+logger.info(f"Setting property to schema {catalog_name}.{schema_name}")
+logger.debug(f"Volume path root: {volume_path_root}")
+logger.debug(f"Volume path data: {volume_path_data}")
+ws.schemas.update(full_name=f"{catalog_name}.{schema_name}", properties={
+    "filepush.volume_path_root": volume_path_root,
+    "filepush.volume_path_data": volume_path_data
+})
+logger.info(f"Schema {catalog_name}.{schema_name} configured")
+
+# Initialize volume folder structure
+logger.info(f"Initializing volume folder structure {volume_path_root}")
+logger.debug(f"Creating volume directory {volume_path_data}")
+ws.files.create_directory(volume_path_data)
+logger.info(f"Volume {volume_path_root} configured")

From 53788238fed4bce057117f763ebde6b66a12ea1f Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 15 Sep 2025 15:33:04 -0700
Subject: [PATCH 28/60] Infer data path

---
 filepush/dab/resources/job.yml      | 4 ----
 filepush/dab/resources/pipeline.yml | 3 +--
 filepush/dab/src/initialization.py  | 5 ++---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/filepush/dab/resources/job.yml b/filepush/dab/resources/job.yml
index ad6d1568..3fc21603 100644
--- a/filepush/dab/resources/job.yml
+++ b/filepush/dab/resources/job.yml
@@ -25,8 +25,6 @@ resources:
               - "{{job.parameters.schema_name}}"
               - "--volume_path_root"
               - "{{job.parameters.volume_path_root}}"
-              - "--volume_path_data"
-              - "{{job.parameters.volume_path_data}}"
               - "--logging_level"
               - "${bundle.target}"
           environment_key: serverless
@@ -46,5 +44,3 @@ resources:
           default: ${resources.schemas.main_schema.name}
         - name: volume_path_root
           default: ${resources.volumes.filepush_volume.volume_path}
-        - name: volume_path_data
-          default: ${resources.volumes.filepush_volume.volume_path}/data
diff --git a/filepush/dab/resources/pipeline.yml b/filepush/dab/resources/pipeline.yml
index bb0d1757..037b8061 100644
--- a/filepush/dab/resources/pipeline.yml
+++ b/filepush/dab/resources/pipeline.yml
@@ -5,11 +5,10 @@ resources:
     refresh_pipeline:
       name: ${var.resource_name_prefix}refresh_pipeline
       catalog: ${var.catalog_name}
-      schema: ${var.schema_name}
+      schema: ${resources.schemas.main_schema.name}
       serverless: true
       libraries:
         - file:
             path: ../src/pipelines/ingestion.py
       configuration:
         filepush.volume_path_root: ${resources.volumes.filepush_volume.volume_path}
-        filepush.volume_path_data: ${resources.volumes.filepush_volume.volume_path}/data
diff --git a/filepush/dab/src/initialization.py b/filepush/dab/src/initialization.py
index eaeea593..351927fd 100644
--- a/filepush/dab/src/initialization.py
+++ b/filepush/dab/src/initialization.py
@@ -7,14 +7,13 @@
 parser.add_argument("--catalog_name", type=str, required=True)
 parser.add_argument("--schema_name", type=str, required=True)
 parser.add_argument("--volume_path_root", type=str, required=True)
-parser.add_argument("--volume_path_data", type=str, required=True)
 parser.add_argument("--logging_level", type=str, required=False, default="dev")
 args = parser.parse_args()
 
 catalog_name = args.catalog_name
 schema_name = args.schema_name
 volume_path_root = args.volume_path_root
-volume_path_data = args.volume_path_data
+volume_path_data = args.volume_path_root + "/data"
 logging_level = logging.DEBUG if args.logging_level == "dev" else logging.INFO
 
 # Logging
@@ -39,6 +38,6 @@
 
 # Initialize volume folder structure
 logger.info(f"Initializing volume folder structure {volume_path_root}")
-logger.debug(f"Creating volume directory {volume_path_data}")
+logger.debug(f"Creating data directory {volume_path_data}")
 ws.files.create_directory(volume_path_data)
 logger.info(f"Volume {volume_path_root} configured")

From ebff04a5a9975ee947ad9928c3161c02d2658d9f Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 15 Sep 2025 16:18:14 -0700
Subject: [PATCH 29/60] tidy up directory

---
 filepush/dab/resources/pipeline.yml               |  3 ++-
 filepush/dab/src/{pipelines => }/ingestion.py     |  0
 .../dab/src/pipelines/dab_readfiles_kernel.sql    | 15 ---------------
 3 files changed, 2 insertions(+), 16 deletions(-)
 rename filepush/dab/src/{pipelines => }/ingestion.py (100%)
 delete mode 100644 filepush/dab/src/pipelines/dab_readfiles_kernel.sql

diff --git a/filepush/dab/resources/pipeline.yml b/filepush/dab/resources/pipeline.yml
index 037b8061..e30c4ae6 100644
--- a/filepush/dab/resources/pipeline.yml
+++ b/filepush/dab/resources/pipeline.yml
@@ -9,6 +9,7 @@ resources:
       serverless: true
       libraries:
         - file:
-            path: ../src/pipelines/ingestion.py
+            path: ../src/ingestion.py
+      root_path: ../src
       configuration:
         filepush.volume_path_root: ${resources.volumes.filepush_volume.volume_path}
diff --git a/filepush/dab/src/pipelines/ingestion.py b/filepush/dab/src/ingestion.py
similarity index 100%
rename from filepush/dab/src/pipelines/ingestion.py
rename to filepush/dab/src/ingestion.py
diff --git a/filepush/dab/src/pipelines/dab_readfiles_kernel.sql b/filepush/dab/src/pipelines/dab_readfiles_kernel.sql
deleted file mode 100644
index 55666737..00000000
--- a/filepush/dab/src/pipelines/dab_readfiles_kernel.sql
+++ /dev/null
@@ -1,15 +0,0 @@
--- Kernel template for read_files
-SELECT
-  *
-FROM
-  read_files(
-    '%s',
-    ignoreCorruptFiles => 'true', -- If a different malformed file pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
-    ignoreMissingFiles => 'true' -- If a different file format is accidentally pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
-    -- Do not change anything above
-    -- Add any additional options below
-    -- Example:
-    -- ,
-    -- header => 'true',
-    -- escape => '"'
-  )

From 802549cbda2729fa66b21f604be4a18c7cbf4aa5 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 15 Sep 2025 20:53:34 -0700
Subject: [PATCH 30/60] Migrate filed and add initialization script

---
 filepush/dab/resources/job.yml                |  2 +-
 filepush/dab/src/configs/tables.json          | 11 +++++++
 filepush/dab/src/debug_table.py               | 31 +++++++++++++++++++
 .../dab/src/{ => utils}/initialization.py     | 12 ++++++-
 4 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 filepush/dab/src/configs/tables.json
 create mode 100644 filepush/dab/src/debug_table.py
 rename filepush/dab/src/{ => utils}/initialization.py (84%)

diff --git a/filepush/dab/resources/job.yml b/filepush/dab/resources/job.yml
index 3fc21603..f8fdaac9 100644
--- a/filepush/dab/resources/job.yml
+++ b/filepush/dab/resources/job.yml
@@ -17,7 +17,7 @@ resources:
       tasks:
         - task_key: initialization
           spark_python_task:
-            python_file: ../src/initialization.py
+            python_file: ../src/utils/initialization.py
             parameters:
               - "--catalog_name"
               - "{{job.parameters.catalog_name}}"
diff --git a/filepush/dab/src/configs/tables.json b/filepush/dab/src/configs/tables.json
new file mode 100644
index 00000000..81cb1560
--- /dev/null
+++ b/filepush/dab/src/configs/tables.json
@@ -0,0 +1,11 @@
+[
+  {
+    "name": "dummy",
+    "format": "csv",
+    "format_options": {
+      "header": "true",
+      "escape": "\""
+    },
+    "schema_hints": "id int, name string"
+  }
+]
diff --git a/filepush/dab/src/debug_table.py b/filepush/dab/src/debug_table.py
new file mode 100644
index 00000000..ef8559ec
--- /dev/null
+++ b/filepush/dab/src/debug_table.py
@@ -0,0 +1,31 @@
+# Databricks notebook source
+import json
+import os
+
+# Widget
+dbutils.widgets.text("table_name", "", "Table Name")
+
+# Load configs to environment json
+environment_path = "./configs/environment.json"
+table_configs_path = "./configs/tables.json"
+
+assert os.path.exists(environment_path), f"Missing environment file: {environment_path}. Have you run `databricks bundle run configuration_job`?"
+assert os.path.exists(table_configs_path), f"Missing table configs file: {table_configs_path}"
+
+with open(environment_path, "r") as f:
+    configs = json.load(f)
+with open(table_configs_path, "r") as f:
+    table_configs = json.load(f)
+
+catalog_name = configs["catalog_name"]
+schema_name = configs["schema_name"]
+table_name = dbutils.widgets.get("table_name")
+table_volume_path_data = configs["volume_path_data"] + f"/{table_name}"
+
+# Locate table config
+matches = [table_config for table_config in table_configs if table_config.get("name") == "dummy"]
+assert len(matches) == 1, f"Expect exactly 1 config for table `{table_name}`. Found {len(matches)}. Please fix the config file."
+table_config = matches[0]
+
+print(f"Table Volume Path: {table_volume_path_data}")
+print(f"Table Config: {table_config}")
diff --git a/filepush/dab/src/initialization.py b/filepush/dab/src/utils/initialization.py
similarity index 84%
rename from filepush/dab/src/initialization.py
rename to filepush/dab/src/utils/initialization.py
index 351927fd..28b7fe64 100644
--- a/filepush/dab/src/initialization.py
+++ b/filepush/dab/src/utils/initialization.py
@@ -1,6 +1,7 @@
+from databricks.sdk import WorkspaceClient
 import argparse
+import json
 import logging
-from databricks.sdk import WorkspaceClient
 
 # Parse arguments
 parser = argparse.ArgumentParser()
@@ -41,3 +42,12 @@
 logger.debug(f"Creating data directory {volume_path_data}")
 ws.files.create_directory(volume_path_data)
 logger.info(f"Volume {volume_path_root} configured")
+
+# Dump configs to environment json
+with open("./configs/environment.json", "w") as f:
+    json.dump({
+        "catalog_name": catalog_name,
+        "schema_name": schema_name,
+        "volume_path_root": volume_path_root,
+        "volume_path_data": volume_path_data
+    }, f)

From 5ef1e055afeb9200fe4cbdcad4374070c25847c2 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 15 Sep 2025 20:56:14 -0700
Subject: [PATCH 31/60] Fix relative path issue

---
 filepush/dab/src/utils/initialization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/filepush/dab/src/utils/initialization.py b/filepush/dab/src/utils/initialization.py
index 28b7fe64..2d7459f4 100644
--- a/filepush/dab/src/utils/initialization.py
+++ b/filepush/dab/src/utils/initialization.py
@@ -44,7 +44,7 @@
 logger.info(f"Volume {volume_path_root} configured")
 
 # Dump configs to environment json
-with open("./configs/environment.json", "w") as f:
+with open("../configs/environment.json", "w") as f:
     json.dump({
         "catalog_name": catalog_name,
         "schema_name": schema_name,

From b9546d06098b2793c2612153e0246ca1c1cac7c1 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 15 Sep 2025 21:51:53 -0700
Subject: [PATCH 32/60] Add config manager and a working debug notebook

---
 filepush/dab/src/debug_table.py          | 14 +++++++++++++-
 filepush/dab/src/utils/configmanager.py  |  0
 filepush/dab/src/utils/initialization.py |  5 +++++
 3 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 filepush/dab/src/utils/configmanager.py

diff --git a/filepush/dab/src/debug_table.py b/filepush/dab/src/debug_table.py
index ef8559ec..a7343fe3 100644
--- a/filepush/dab/src/debug_table.py
+++ b/filepush/dab/src/debug_table.py
@@ -20,12 +20,24 @@
 catalog_name = configs["catalog_name"]
 schema_name = configs["schema_name"]
 table_name = dbutils.widgets.get("table_name")
+assert table_name, "Please provide a table name"
 table_volume_path_data = configs["volume_path_data"] + f"/{table_name}"
 
 # Locate table config
-matches = [table_config for table_config in table_configs if table_config.get("name") == "dummy"]
+matches = [table_config for table_config in table_configs if table_config.get("name") == table_name]
 assert len(matches) == 1, f"Expect exactly 1 config for table `{table_name}`. Found {len(matches)}. Please fix the config file."
 table_config = matches[0]
 
 print(f"Table Volume Path: {table_volume_path_data}")
 print(f"Table Config: {table_config}")
+
+# COMMAND ----------
+
+import tempfile
+from utils import configmanager
+
+with tempfile.TemporaryDirectory() as tmpdir:
+    reader = spark.readStream.format("cloudFiles")
+    reader = configmanager.apply_table_config(reader, table_config)
+    reader.option("cloudFiles.schemaLocation", tmpdir)
+    display(reader.load(table_volume_path_data))
\ No newline at end of file
diff --git a/filepush/dab/src/utils/configmanager.py b/filepush/dab/src/utils/configmanager.py
new file mode 100644
index 00000000..e69de29b
diff --git a/filepush/dab/src/utils/initialization.py b/filepush/dab/src/utils/initialization.py
index 2d7459f4..a2fdd6d6 100644
--- a/filepush/dab/src/utils/initialization.py
+++ b/filepush/dab/src/utils/initialization.py
@@ -41,6 +41,11 @@
 logger.info(f"Initializing volume folder structure {volume_path_root}")
 logger.debug(f"Creating data directory {volume_path_data}")
 ws.files.create_directory(volume_path_data)
+with open("../configs/tables.json", "r") as f:
+    for table in json.load(f):
+        table_volume_path_data = {volume_path_data}/{table['name']}
+        logger.debug(f"Creating table directory {table_volume_path_data}")
+        ws.files.create_directory(table_volume_path_data)
 logger.info(f"Volume {volume_path_root} configured")
 
 # Dump configs to environment json

From 299997ee685d88ed5bca23c3912431fabef42c71 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 16 Sep 2025 11:25:59 -0700
Subject: [PATCH 33/60] Working debug notebook

---
 filepush/dab/src/debug_table.py          | 43 ------------
 filepush/dab/src/debug_table_config.py   | 85 ++++++++++++++++++++++++
 filepush/dab/src/utils/configmanager.py  | 18 +++++
 filepush/dab/src/utils/initialization.py | 28 ++++----
 4 files changed, 117 insertions(+), 57 deletions(-)
 delete mode 100644 filepush/dab/src/debug_table.py
 create mode 100644 filepush/dab/src/debug_table_config.py

diff --git a/filepush/dab/src/debug_table.py b/filepush/dab/src/debug_table.py
deleted file mode 100644
index a7343fe3..00000000
--- a/filepush/dab/src/debug_table.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Databricks notebook source
-import json
-import os
-
-# Widget
-dbutils.widgets.text("table_name", "", "Table Name")
-
-# Load configs to environment json
-environment_path = "./configs/environment.json"
-table_configs_path = "./configs/tables.json"
-
-assert os.path.exists(environment_path), f"Missing environment file: {environment_path}. Have you run `databricks bundle run configuration_job`?"
-assert os.path.exists(table_configs_path), f"Missing table configs file: {table_configs_path}"
-
-with open(environment_path, "r") as f:
-    configs = json.load(f)
-with open(table_configs_path, "r") as f:
-    table_configs = json.load(f)
-
-catalog_name = configs["catalog_name"]
-schema_name = configs["schema_name"]
-table_name = dbutils.widgets.get("table_name")
-assert table_name, "Please provide a table name"
-table_volume_path_data = configs["volume_path_data"] + f"/{table_name}"
-
-# Locate table config
-matches = [table_config for table_config in table_configs if table_config.get("name") == table_name]
-assert len(matches) == 1, f"Expect exactly 1 config for table `{table_name}`. Found {len(matches)}. Please fix the config file."
-table_config = matches[0]
-
-print(f"Table Volume Path: {table_volume_path_data}")
-print(f"Table Config: {table_config}")
-
-# COMMAND ----------
-
-import tempfile
-from utils import configmanager
-
-with tempfile.TemporaryDirectory() as tmpdir:
-    reader = spark.readStream.format("cloudFiles")
-    reader = configmanager.apply_table_config(reader, table_config)
-    reader.option("cloudFiles.schemaLocation", tmpdir)
-    display(reader.load(table_volume_path_data))
\ No newline at end of file
diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
new file mode 100644
index 00000000..08259d2f
--- /dev/null
+++ b/filepush/dab/src/debug_table_config.py
@@ -0,0 +1,85 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC # Assign the table config JSON you would like to debug to variable `table_config`
+# MAGIC For example,
+# MAGIC ```
+# MAGIC table_config = r'''
+# MAGIC {
+# MAGIC   "name": "all_employees",
+# MAGIC   "format": "csv",
+# MAGIC   "format_options": {
+# MAGIC     "header": "true",
+# MAGIC     "escape": "\""
+# MAGIC   }
+# MAGIC   "schema_hints": "id int, name string"
+# MAGIC }
+# MAGIC '''
+# MAGIC ```
+
+# COMMAND ----------
+
+table_config = r'''
+{
+  "name": "all_employees",
+  "format": "csv",
+  "format_options": {
+    "header": "true",
+    "escape": "\""
+  },
+  "schema_hints": "id int, name string"
+}
+'''
+
+# COMMAND ----------
+
+import json
+import os
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors.platform import NotFound
+
+# Initialize workspace client
+ws = WorkspaceClient()
+
+# Load configs from environment json
+environment_path = "./configs/environment.json"
+assert os.path.exists(environment_path), f"Missing environment file: {environment_path}. Have you run `databricks bundle run configuration_job`?"
+with open(environment_path, "r") as f:
+  configs = json.load(f)
+
+catalog_name = configs["catalog_name"]
+schema_name = configs["schema_name"]
+table_config_json = json.loads(table_config)
+table_name = table_config_json["name"]
+assert table_name, "Please provide a table name in the table_config"
+
+# Load table configs
+table_configs_path = "./configs/tables.json"
+assert os.path.exists(table_configs_path), f"Missing table configs file: {table_configs_path}. Please following README.md to create one, deploy and run configuration_job."
+with open(table_configs_path, "r") as f:
+  table_configs = json.load(f)
+matches = [table_config for table_config in table_configs if table_config.get("name") == table_name]
+assert len(matches) == 1, f"Expect exactly 1 config for table `{table_name}`. Found {len(matches)}. Please fix the config file and run configuration_job"
+
+table_volume_path_data = configs["volume_path_data"] + f"/{table_name}"
+try:
+  ws.files.get_directory_metadata(table_volume_path_data)
+  iter = ws.files.list_directory_contents(table_volume_path_data)
+  next(iter)
+except NotFound:
+  assert False, f"Table data path not found for table `{table_name}`. Have you run `databricks bundle run configuration_job`?"
+except StopIteration:
+  assert False, f"No data file found in {table_volume_path_data}. Please upload at least 1 file."
+
+print(f"Table Volume Path: {table_volume_path_data}")
+print(f"Table Config:\n{table_config}")
+
+# COMMAND ----------
+
+import tempfile
+from utils import configmanager
+
+with tempfile.TemporaryDirectory() as tmpdir:
+  reader = spark.readStream.format("cloudFiles")
+  reader = configmanager.apply_table_config(reader, table_config_json)
+  reader.option("cloudFiles.schemaLocation", tmpdir)
+  display(reader.load(table_volume_path_data))
diff --git a/filepush/dab/src/utils/configmanager.py b/filepush/dab/src/utils/configmanager.py
index e69de29b..4817ba5e 100644
--- a/filepush/dab/src/utils/configmanager.py
+++ b/filepush/dab/src/utils/configmanager.py
@@ -0,0 +1,18 @@
+from pyspark.sql.streaming import DataStreamReader
+
+def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStreamReader:
+  fmt = table_config.get("format")
+  assert fmt is not None, f"format is required for table {table_config.get('name')}"
+  reader = reader.option("cloudFiles.format", fmt)
+
+  # format-specific options
+  fmt_opts = table_config.get("format_options", {})
+  for k, v in fmt_opts.items():
+    reader = reader.option(k, v)
+  
+  # schema hints
+  schema_hints = table_config.get("schema_hints")
+  if schema_hints:
+    reader = reader.option("cloudFiles.schemaHints", schema_hints)
+
+  return reader
\ No newline at end of file
diff --git a/filepush/dab/src/utils/initialization.py b/filepush/dab/src/utils/initialization.py
index a2fdd6d6..09f79947 100644
--- a/filepush/dab/src/utils/initialization.py
+++ b/filepush/dab/src/utils/initialization.py
@@ -19,8 +19,8 @@
 
 # Logging
 logging.basicConfig(
-    level=logging_level,
-    format="%(asctime)s [%(levelname)s] %(module)s - %(message)s"
+  level=logging_level,
+  format="%(asctime)s [%(levelname)s] %(module)s - %(message)s"
 )
 logger = logging.getLogger(__name__)  # per-module logger
 
@@ -32,8 +32,8 @@
 logger.debug(f"Volume path root: {volume_path_root}")
 logger.debug(f"Volume path data: {volume_path_data}")
 ws.schemas.update(full_name=f"{catalog_name}.{schema_name}", properties={
-    "filepush.volume_path_root": volume_path_root,
-    "filepush.volume_path_data": volume_path_data
+  "filepush.volume_path_root": volume_path_root,
+  "filepush.volume_path_data": volume_path_data
 })
 logger.info(f"Schema {catalog_name}.{schema_name} configured")
 
@@ -42,17 +42,17 @@
 logger.debug(f"Creating data directory {volume_path_data}")
 ws.files.create_directory(volume_path_data)
 with open("../configs/tables.json", "r") as f:
-    for table in json.load(f):
-        table_volume_path_data = {volume_path_data}/{table['name']}
-        logger.debug(f"Creating table directory {table_volume_path_data}")
-        ws.files.create_directory(table_volume_path_data)
+  for table in json.load(f):
+    table_volume_path_data = f"{volume_path_data}/{table['name']}"
+    logger.debug(f"Creating table directory {table_volume_path_data}")
+    ws.files.create_directory(table_volume_path_data)
 logger.info(f"Volume {volume_path_root} configured")
 
 # Dump configs to environment json
 with open("../configs/environment.json", "w") as f:
-    json.dump({
-        "catalog_name": catalog_name,
-        "schema_name": schema_name,
-        "volume_path_root": volume_path_root,
-        "volume_path_data": volume_path_data
-    }, f)
+  json.dump({
+    "catalog_name": catalog_name,
+    "schema_name": schema_name,
+    "volume_path_root": volume_path_root,
+    "volume_path_data": volume_path_data
+  }, f)

From e7eb8efaaac7502aa93b88ec4a85c4ff1907d8f1 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 16 Sep 2025 16:33:07 -0700
Subject: [PATCH 34/60] Refactor managers

---
 filepush/dab/src/utils/envmanager.py                     | 9 +++++++++
 .../dab/src/utils/{configmanager.py => tablemanager.py}  | 5 +++++
 2 files changed, 14 insertions(+)
 create mode 100644 filepush/dab/src/utils/envmanager.py
 rename filepush/dab/src/utils/{configmanager.py => tablemanager.py} (77%)

diff --git a/filepush/dab/src/utils/envmanager.py b/filepush/dab/src/utils/envmanager.py
new file mode 100644
index 00000000..7a92547c
--- /dev/null
+++ b/filepush/dab/src/utils/envmanager.py
@@ -0,0 +1,9 @@
+import os
+import json
+
+def get_env_config() -> dict:
+  environment_path =  os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "environment.json")
+  assert os.path.exists(environment_path), f"Missing environment file: {environment_path}. Have you run `databricks bundle run configuration_job`?"
+  with open(environment_path, "r") as f:
+    configs = json.load(f)
+  return configs
diff --git a/filepush/dab/src/utils/configmanager.py b/filepush/dab/src/utils/tablemanager.py
similarity index 77%
rename from filepush/dab/src/utils/configmanager.py
rename to filepush/dab/src/utils/tablemanager.py
index 4817ba5e..d25ce408 100644
--- a/filepush/dab/src/utils/configmanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -1,5 +1,10 @@
+
 from pyspark.sql.streaming import DataStreamReader
 
+def get_table_configs() -> dict:
+  config_path = json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "config.json")
+  return load_json(config_path)
+
 def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStreamReader:
   fmt = table_config.get("format")
   assert fmt is not None, f"format is required for table {table_config.get('name')}"

From a4cbc520d4492eeae7793259f6be42da25569964 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 16 Sep 2025 17:42:50 -0700
Subject: [PATCH 35/60] Successfully create placeholder table

---
 filepush/dab/src/debug_table_config.py | 44 +++++++-------------------
 filepush/dab/src/ingestion.py          | 18 +++++++++++
 filepush/dab/src/utils/envmanager.py   |  8 ++---
 filepush/dab/src/utils/tablemanager.py | 44 +++++++++++++++++++++++---
 4 files changed, 73 insertions(+), 41 deletions(-)

diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index 08259d2f..7e003c3f 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -1,6 +1,6 @@
 # Databricks notebook source
 # MAGIC %md
-# MAGIC # Assign the table config JSON you would like to debug to variable `table_config`
+# MAGIC # Paste the table config JSON you would like to debug and assign to variable `table_config`
 # MAGIC For example,
 # MAGIC ```
 # MAGIC table_config = r'''
@@ -34,52 +34,32 @@
 
 import json
 import os
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors.platform import NotFound
-
-# Initialize workspace client
-ws = WorkspaceClient()
+from utils import envmanager
+from utils import tablemanager
 
 # Load configs from environment json
-environment_path = "./configs/environment.json"
-assert os.path.exists(environment_path), f"Missing environment file: {environment_path}. Have you run `databricks bundle run configuration_job`?"
-with open(environment_path, "r") as f:
-  configs = json.load(f)
+config = envmanager.get_config()
+catalog_name = config["catalog_name"]
+schema_name = config["schema_name"]
 
-catalog_name = configs["catalog_name"]
-schema_name = configs["schema_name"]
+# Load table configs
 table_config_json = json.loads(table_config)
 table_name = table_config_json["name"]
-assert table_name, "Please provide a table name in the table_config"
-
-# Load table configs
-table_configs_path = "./configs/tables.json"
-assert os.path.exists(table_configs_path), f"Missing table configs file: {table_configs_path}. Please following README.md to create one, deploy and run configuration_job."
-with open(table_configs_path, "r") as f:
-  table_configs = json.load(f)
+assert table_name, "Please provide a table name in the table_config json"
+table_configs = tablemanager.get_configs()
 matches = [table_config for table_config in table_configs if table_config.get("name") == table_name]
 assert len(matches) == 1, f"Expect exactly 1 config for table `{table_name}`. Found {len(matches)}. Please fix the config file and run configuration_job"
-
-table_volume_path_data = configs["volume_path_data"] + f"/{table_name}"
-try:
-  ws.files.get_directory_metadata(table_volume_path_data)
-  iter = ws.files.list_directory_contents(table_volume_path_data)
-  next(iter)
-except NotFound:
-  assert False, f"Table data path not found for table `{table_name}`. Have you run `databricks bundle run configuration_job`?"
-except StopIteration:
-  assert False, f"No data file found in {table_volume_path_data}. Please upload at least 1 file."
+table_volume_path_data = tablemanager.get_table_volume_path(table_name)
+assert tablemanager.has_data_file(table_name), f"No data file found in {table_volume_path_data}. Please upload at least 1 file."
 
 print(f"Table Volume Path: {table_volume_path_data}")
-print(f"Table Config:\n{table_config}")
 
 # COMMAND ----------
 
 import tempfile
-from utils import configmanager
 
 with tempfile.TemporaryDirectory() as tmpdir:
   reader = spark.readStream.format("cloudFiles")
-  reader = configmanager.apply_table_config(reader, table_config_json)
+  reader = tablemanager.apply_table_config(reader, table_config_json)
   reader.option("cloudFiles.schemaLocation", tmpdir)
   display(reader.load(table_volume_path_data))
diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index e69de29b..72f11b7d 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -0,0 +1,18 @@
+import dlt
+from utils import tablemanager
+
+table_configs = tablemanager.get_configs()
+
+for table_config in table_configs:
+  tablemanager.validate_config(table_config)
+  table_name = table_config['name']
+  table_volume_path = tablemanager.get_table_volume_path(table_name)
+  @dlt.table(
+    name = table_name,
+    comment = "File push created table",
+    table_properties = {"filepush.table_volume_path_data": tablemanager.get_table_volume_path(table_name)}
+  )
+  def create_table():
+      reader = spark.readStream.format("cloudFiles")
+      reader = tablemanager.apply_table_config(reader, table_config)
+      return reader.load(table_volume_path)
\ No newline at end of file
diff --git a/filepush/dab/src/utils/envmanager.py b/filepush/dab/src/utils/envmanager.py
index 7a92547c..0eee314b 100644
--- a/filepush/dab/src/utils/envmanager.py
+++ b/filepush/dab/src/utils/envmanager.py
@@ -1,9 +1,9 @@
 import os
 import json
 
-def get_env_config() -> dict:
-  environment_path =  os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "environment.json")
-  assert os.path.exists(environment_path), f"Missing environment file: {environment_path}. Have you run `databricks bundle run configuration_job`?"
-  with open(environment_path, "r") as f:
+def get_config() -> dict:
+  json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "environment.json")
+  assert os.path.exists(json_path), f"Missing environment file: {json_path}. Have you run `databricks bundle run configuration_job`?"
+  with open(json_path, "r") as f:
     configs = json.load(f)
   return configs
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index d25ce408..fae57a6e 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -1,9 +1,40 @@
-
+import os
+import json
+from . import envmanager
 from pyspark.sql.streaming import DataStreamReader
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors.platform import NotFound
+
+def validate_config(config: dict):
+  pass
+
+def get_table_volume_path(table_name: str) -> str:
+  # Initialize workspace client
+  ws = WorkspaceClient()
+  table_volume_path_data = os.path.join(envmanager.get_config()["volume_path_data"], table_name)
+  try:
+    ws.files.get_directory_metadata(table_volume_path_data)
+  except NotFound:
+    assert False, f"Table data path not found for table `{table_name}`. Have you run `databricks bundle run configuration_job`?"
+  return table_volume_path_data
+
+def has_data_file(table_name: str) -> bool:
+  # Initialize workspace client
+  ws = WorkspaceClient()
+  table_volume_path_data = get_table_volume_path(table_name)
+  try:
+    iter = ws.files.list_directory_contents(table_volume_path_data)
+    next(iter)
+  except StopIteration:
+    return False
+  return True
 
-def get_table_configs() -> dict:
-  config_path = json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "config.json")
-  return load_json(config_path)
+def get_configs() -> list:
+  json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "tables.json")
+  assert os.path.exists(json_path), f"Missing table configs file: {json_path}. Please following README.md to create one, deploy and run configuration_job." 
+  with open(json_path, "r") as f:
+    configs = json.load(f)
+  return configs
 
 def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStreamReader:
   fmt = table_config.get("format")
@@ -16,8 +47,11 @@ def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStre
     reader = reader.option(k, v)
   
   # schema hints
+  # always have _rescued_data
+  reader = reader.schema("_rescued_data STRING")
   schema_hints = table_config.get("schema_hints")
   if schema_hints:
     reader = reader.option("cloudFiles.schemaHints", schema_hints)
 
-  return reader
\ No newline at end of file
+  return reader
+  
\ No newline at end of file

From 322bc4827f80cf6681b6ec6c9188ca7eab608912 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 17 Sep 2025 14:29:18 -0700
Subject: [PATCH 36/60] Solve the empty table DLT resolve issue

---
 filepush/dab/src/ingestion.py          | 13 ++++++++++---
 filepush/dab/src/utils/tablemanager.py | 15 +++++++++++++--
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index 72f11b7d..99f05c1a 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -7,12 +7,19 @@
   tablemanager.validate_config(table_config)
   table_name = table_config['name']
   table_volume_path = tablemanager.get_table_volume_path(table_name)
-  @dlt.table(
+
+  dlt.create_streaming_table(
     name = table_name,
     comment = "File push created table",
     table_properties = {"filepush.table_volume_path_data": tablemanager.get_table_volume_path(table_name)}
   )
-  def create_table():
+  if not tablemanager.has_data_file(table_name) and not tablemanager.is_table_created(table_name):
+    @dlt.append_flow(target = table_name)
+    def noop_to_table():
+      return tablemanager.get_placeholder_stream(spark.readStream)
+  else:
+    @dlt.append_flow(target = table_name)
+    def append_to_table():
       reader = spark.readStream.format("cloudFiles")
       reader = tablemanager.apply_table_config(reader, table_config)
-      return reader.load(table_volume_path)
\ No newline at end of file
+      return reader.load(table_volume_path)
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index fae57a6e..d5790cac 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -29,6 +29,19 @@ def has_data_file(table_name: str) -> bool:
     return False
   return True
 
+def is_table_created(table_name: str) -> bool:
+  # Initialize workspace client
+  ws = WorkspaceClient()
+  return ws.tables.exists(full_name=f"{envmanager.get_config()["catalog_name"]}.{envmanager.get_config()["schema_name"]}.{table_name}").table_exists
+
+def get_placeholder_stream(reader: DataStreamReader) -> DataStreamReader:
+  # Streaming source that produces empty micro-batches (but is STILL streaming)
+  return (
+    reader.format("rate").option("rowsPerSecond", 1).load()
+      .selectExpr("CAST(NULL AS STRING) AS _rescued_data")
+      .where("1=0")  # no rows, just preserves streaming lineage
+  )
+
 def get_configs() -> list:
   json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "tables.json")
   assert os.path.exists(json_path), f"Missing table configs file: {json_path}. Please following README.md to create one, deploy and run configuration_job." 
@@ -47,8 +60,6 @@ def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStre
     reader = reader.option(k, v)
   
   # schema hints
-  # always have _rescued_data
-  reader = reader.schema("_rescued_data STRING")
   schema_hints = table_config.get("schema_hints")
   if schema_hints:
     reader = reader.option("cloudFiles.schemaHints", schema_hints)

From 04b1662fa3682cdef321247847839b18fec7c8b5 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 17 Sep 2025 15:28:57 -0700
Subject: [PATCH 37/60] Better way to solve the empty DLT resolve

---
 filepush/dab/src/debug_table_config.py | 22 +++++++++++-----------
 filepush/dab/src/ingestion.py          | 17 +++++++----------
 filepush/dab/src/utils/tablemanager.py | 10 +---------
 3 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index 7e003c3f..2e697a94 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -19,15 +19,15 @@
 # COMMAND ----------
 
 table_config = r'''
-{
-  "name": "all_employees",
-  "format": "csv",
-  "format_options": {
-    "header": "true",
-    "escape": "\""
-  },
-  "schema_hints": "id int, name string"
-}
+  {
+    "name": "dummy",
+    "format": "csv",
+    "format_options": {
+      "header": "true",
+      "escape": "\""
+    },
+    "schema_hints": "id int, name string"
+  }
 '''
 
 # COMMAND ----------
@@ -50,7 +50,7 @@
 matches = [table_config for table_config in table_configs if table_config.get("name") == table_name]
 assert len(matches) == 1, f"Expect exactly 1 config for table `{table_name}`. Found {len(matches)}. Please fix the config file and run configuration_job"
 table_volume_path_data = tablemanager.get_table_volume_path(table_name)
-assert tablemanager.has_data_file(table_name), f"No data file found in {table_volume_path_data}. Please upload at least 1 file."
+assert tablemanager.has_data_file(table_name), f"No data file found in {table_volume_path_data}. Please upload at least 1 file to {table_volume_path_data}"
 
 print(f"Table Volume Path: {table_volume_path_data}")
 
@@ -62,4 +62,4 @@
   reader = spark.readStream.format("cloudFiles")
   reader = tablemanager.apply_table_config(reader, table_config_json)
   reader.option("cloudFiles.schemaLocation", tmpdir)
-  display(reader.load(table_volume_path_data))
+  display(reader.load(table_volume_path_data))
\ No newline at end of file
diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index 99f05c1a..622cc3d6 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -13,13 +13,10 @@
     comment = "File push created table",
     table_properties = {"filepush.table_volume_path_data": tablemanager.get_table_volume_path(table_name)}
   )
-  if not tablemanager.has_data_file(table_name) and not tablemanager.is_table_created(table_name):
-    @dlt.append_flow(target = table_name)
-    def noop_to_table():
-      return tablemanager.get_placeholder_stream(spark.readStream)
-  else:
-    @dlt.append_flow(target = table_name)
-    def append_to_table():
-      reader = spark.readStream.format("cloudFiles")
-      reader = tablemanager.apply_table_config(reader, table_config)
-      return reader.load(table_volume_path)
+  @dlt.append_flow(target = table_name)
+  def append_to_table():
+    reader = spark.readStream.format("cloudFiles")
+    reader = tablemanager.apply_table_config(reader, table_config)
+    if not tablemanager.has_data_file(table_name) and not tablemanager.is_table_created(table_name):
+      reader.schema("_rescued_data STRING") # Use _rescued_data as placeholder
+    return reader.load(table_volume_path)
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index d5790cac..7cf85a14 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -32,15 +32,7 @@ def has_data_file(table_name: str) -> bool:
 def is_table_created(table_name: str) -> bool:
   # Initialize workspace client
   ws = WorkspaceClient()
-  return ws.tables.exists(full_name=f"{envmanager.get_config()["catalog_name"]}.{envmanager.get_config()["schema_name"]}.{table_name}").table_exists
-
-def get_placeholder_stream(reader: DataStreamReader) -> DataStreamReader:
-  # Streaming source that produces empty micro-batches (but is STILL streaming)
-  return (
-    reader.format("rate").option("rowsPerSecond", 1).load()
-      .selectExpr("CAST(NULL AS STRING) AS _rescued_data")
-      .where("1=0")  # no rows, just preserves streaming lineage
-  )
+  return ws.tables.exists(full_name=f"{envmanager.get_config()['catalog_name']}.{envmanager.get_config()['schema_name']}.{table_name}").table_exists
 
 def get_configs() -> list:
   json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "tables.json")

From d1f83736bf42977c7a4d06d05fc6f931f72ee554 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 17 Sep 2025 16:33:14 -0700
Subject: [PATCH 38/60] Fix a flow name conflict issue

---
 filepush/dab/src/configs/tables.json | 14 +++++++++++++-
 filepush/dab/src/ingestion.py        |  5 ++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/filepush/dab/src/configs/tables.json b/filepush/dab/src/configs/tables.json
index 81cb1560..aba5d80d 100644
--- a/filepush/dab/src/configs/tables.json
+++ b/filepush/dab/src/configs/tables.json
@@ -1,6 +1,18 @@
 [
   {
-    "name": "dummy",
+    "name": "dummy1",
+    "format": "csv"
+  },
+  {
+    "name": "dummy2",
+    "format": "csv",
+    "format_options": {
+      "header": "true",
+      "escape": "\""
+    }
+  },
+  {
+    "name": "dummy3",
     "format": "csv",
     "format_options": {
       "header": "true",
diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index 622cc3d6..291a0ea0 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -13,7 +13,10 @@
     comment = "File push created table",
     table_properties = {"filepush.table_volume_path_data": tablemanager.get_table_volume_path(table_name)}
   )
-  @dlt.append_flow(target = table_name)
+  @dlt.append_flow(
+    target = table_name,
+    name = table_name
+  )
   def append_to_table():
     reader = spark.readStream.format("cloudFiles")
     reader = tablemanager.apply_table_config(reader, table_config)

From 29b8609706890d89f689c75721f51e7a09594eae Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 18 Sep 2025 11:14:00 -0700
Subject: [PATCH 39/60] Working format manager

---
 filepush/dab/src/configs/tables.json    |  2 -
 filepush/dab/src/debug_table_config.py  | 10 ++--
 filepush/dab/src/ingestion.py           |  5 +-
 filepush/dab/src/utils/envmanager.py    |  3 +-
 filepush/dab/src/utils/formatmanager.py | 71 +++++++++++++++++++++++++
 filepush/dab/src/utils/tablemanager.py  | 26 ++++-----
 6 files changed, 92 insertions(+), 25 deletions(-)
 create mode 100644 filepush/dab/src/utils/formatmanager.py

diff --git a/filepush/dab/src/configs/tables.json b/filepush/dab/src/configs/tables.json
index aba5d80d..8bb40fa4 100644
--- a/filepush/dab/src/configs/tables.json
+++ b/filepush/dab/src/configs/tables.json
@@ -7,7 +7,6 @@
     "name": "dummy2",
     "format": "csv",
     "format_options": {
-      "header": "true",
       "escape": "\""
     }
   },
@@ -15,7 +14,6 @@
     "name": "dummy3",
     "format": "csv",
     "format_options": {
-      "header": "true",
       "escape": "\""
     },
     "schema_hints": "id int, name string"
diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index 2e697a94..537b17ff 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -8,8 +8,8 @@
 # MAGIC   "name": "all_employees",
 # MAGIC   "format": "csv",
 # MAGIC   "format_options": {
-# MAGIC     "header": "true",
-# MAGIC     "escape": "\""
+# MAGIC     "escape": "\"",
+# MAGIC     "multiLine": "false"
 # MAGIC   }
 # MAGIC   "schema_hints": "id int, name string"
 # MAGIC }
@@ -20,10 +20,9 @@
 
 table_config = r'''
   {
-    "name": "dummy",
+    "name": "dummy1",
     "format": "csv",
     "format_options": {
-      "header": "true",
       "escape": "\""
     },
     "schema_hints": "id int, name string"
@@ -59,7 +58,6 @@
 import tempfile
 
 with tempfile.TemporaryDirectory() as tmpdir:
-  reader = spark.readStream.format("cloudFiles")
-  reader = tablemanager.apply_table_config(reader, table_config_json)
+  reader = tablemanager.apply_table_config(spark.readStream, table_config_json)
   reader.option("cloudFiles.schemaLocation", tmpdir)
   display(reader.load(table_volume_path_data))
\ No newline at end of file
diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index 291a0ea0..bbc51abc 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -4,14 +4,13 @@
 table_configs = tablemanager.get_configs()
 
 for table_config in table_configs:
-  tablemanager.validate_config(table_config)
   table_name = table_config['name']
   table_volume_path = tablemanager.get_table_volume_path(table_name)
 
   dlt.create_streaming_table(
     name = table_name,
     comment = "File push created table",
-    table_properties = {"filepush.table_volume_path_data": tablemanager.get_table_volume_path(table_name)}
+    table_properties = {"filepush.table_volume_path_data": table_volume_path}
   )
   @dlt.append_flow(
     target = table_name,
@@ -20,6 +19,6 @@
   def append_to_table():
     reader = spark.readStream.format("cloudFiles")
     reader = tablemanager.apply_table_config(reader, table_config)
-    if not tablemanager.has_data_file(table_name) and not tablemanager.is_table_created(table_name):
+    if not tablemanager.has_data_file(table_name):
       reader.schema("_rescued_data STRING") # Use _rescued_data as placeholder
     return reader.load(table_volume_path)
diff --git a/filepush/dab/src/utils/envmanager.py b/filepush/dab/src/utils/envmanager.py
index 0eee314b..3f215c77 100644
--- a/filepush/dab/src/utils/envmanager.py
+++ b/filepush/dab/src/utils/envmanager.py
@@ -3,7 +3,8 @@
 
 def get_config() -> dict:
   json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "environment.json")
-  assert os.path.exists(json_path), f"Missing environment file: {json_path}. Have you run `databricks bundle run configuration_job`?"
+  if not os.path.exists(json_path):
+    raise RuntimeError(f"Missing environment file: {json_path}. Have you run `databricks bundle run configuration_job`?")
   with open(json_path, "r") as f:
     configs = json.load(f)
   return configs
diff --git a/filepush/dab/src/utils/formatmanager.py b/filepush/dab/src/utils/formatmanager.py
new file mode 100644
index 00000000..acf944e9
--- /dev/null
+++ b/filepush/dab/src/utils/formatmanager.py
@@ -0,0 +1,71 @@
+from dataclasses import dataclass
+
+@dataclass(frozen=True, slots=True)
+class AutoLoaderOption:
+  key: str
+  value: str
+  hidden: bool = False
+  def __iter__(self):
+    yield (self.key, self)
+
+class AutoLoaderFormat:
+  def __init__(self):
+    self.name = None
+    self.options: set[AutoLoaderOption] = {
+      AutoLoaderOption("cloudFiles.inferColumnTypes", "true", True),
+      AutoLoaderOption("cloudFiles.schemaEvolutionMode", "addNewColumns", True),
+    }
+
+  def __iter__(self):
+    yield (self.name, self)
+
+  def get_userfacing_options(self) -> dict[str, str]:
+    return {opt.key: opt.value for opt in self.options if not opt.hidden}
+
+  def validate_user_options(self, options: dict[str, str]) -> None:
+    allowed = set(self.get_userfacing_options())
+    illegal = set(options) - allowed
+    if illegal:
+      raise ValueError(
+        f"Unsupported or protected options: {sorted(illegal)}. "
+        f"Allowed user options: {sorted(allowed)}"
+      )
+
+  def get_modified_options(self, options: dict[str, str]) -> dict[str, str]:
+    self.validate_user_options(options)
+    defaults = self.get_userfacing_options()
+    return {k: v for k, v in options.items() if k in defaults and v != defaults[k]}
+
+class CSV(AutoLoaderFormat):
+  def __init__(self):
+    super().__init__()
+    self.name = "CSV"
+    self.options |= {
+      AutoLoaderOption("header", "true", True),
+      AutoLoaderOption("mergeSchema", "true", True),
+      AutoLoaderOption("delimiter", ","),
+      AutoLoaderOption("escape", "\""),
+      AutoLoaderOption("multiLine", "false"),
+    }
+
+class JSON(AutoLoaderFormat):
+  def __init__(self):
+    super().__init__()
+    self.name = "JSON"
+    self.options |= {
+      AutoLoaderOption("mergeSchema", "true", True),
+      AutoLoaderOption("allowComments", "true"),
+      AutoLoaderOption("allowSingleQuotes", "true"),
+      AutoLoaderOption("inferTimestamp", "true"),
+      AutoLoaderOption("multiLine", "true"),
+    }
+
+_supported_formats: dict[str, AutoLoaderFormat] = {f.name: f for f in (CSV(), JSON())}
+
+def get_format_manager(fmt: str) -> dict[str, str]:
+  key = fmt.strip().upper()
+  try:
+    return _supported_formats[key]
+  except KeyError:
+    supported = ", ".join(sorted(_supported_formats))
+    raise ValueError(f"{fmt!r} is not a supported format. Supported formats: {supported}")
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index 7cf85a14..6b5baa25 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -1,25 +1,21 @@
 import os
 import json
 from . import envmanager
+from . import formatmanager
 from pyspark.sql.streaming import DataStreamReader
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors.platform import NotFound
 
-def validate_config(config: dict):
-  pass
-
 def get_table_volume_path(table_name: str) -> str:
-  # Initialize workspace client
   ws = WorkspaceClient()
   table_volume_path_data = os.path.join(envmanager.get_config()["volume_path_data"], table_name)
   try:
     ws.files.get_directory_metadata(table_volume_path_data)
   except NotFound:
-    assert False, f"Table data path not found for table `{table_name}`. Have you run `databricks bundle run configuration_job`?"
+    raise RuntimeError(f"Table data path not found for table `{table_name}`. Have you run `databricks bundle run configuration_job`?")
   return table_volume_path_data
 
 def has_data_file(table_name: str) -> bool:
-  # Initialize workspace client
   ws = WorkspaceClient()
   table_volume_path_data = get_table_volume_path(table_name)
   try:
@@ -30,25 +26,29 @@ def has_data_file(table_name: str) -> bool:
   return True
 
 def is_table_created(table_name: str) -> bool:
-  # Initialize workspace client
   ws = WorkspaceClient()
   return ws.tables.exists(full_name=f"{envmanager.get_config()['catalog_name']}.{envmanager.get_config()['schema_name']}.{table_name}").table_exists
 
 def get_configs() -> list:
   json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "tables.json")
-  assert os.path.exists(json_path), f"Missing table configs file: {json_path}. Please following README.md to create one, deploy and run configuration_job." 
+  if not os.path.exists(json_path):
+    raise RuntimeError(f"Missing table configs file: {json_path}. Please following README.md to create one, deploy and run configuration_job.")
   with open(json_path, "r") as f:
     configs = json.load(f)
   return configs
 
 def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStreamReader:
   fmt = table_config.get("format")
-  assert fmt is not None, f"format is required for table {table_config.get('name')}"
-  reader = reader.option("cloudFiles.format", fmt)
+  if fmt is None:
+    raise ValueError(f"format is required for table {table_config.get('name')}")
+
+  # format-specific options from user input
+  user_fmt_opts = table_config.get("format_options", {})
+  # validate and get the final modified options
+  final_fmt_opts = formatmanager.get_format_manager(fmt).get_modified_options(user_fmt_opts)
 
-  # format-specific options
-  fmt_opts = table_config.get("format_options", {})
-  for k, v in fmt_opts.items():
+  reader = reader.format("cloudFiles").option("cloudFiles.format", fmt)
+  for k, v in final_fmt_opts.items():
     reader = reader.option(k, v)
   
   # schema hints

From 1caa333ef863a84d48d7bdf8c6f8480b2e8220c8 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 18 Sep 2025 11:38:51 -0700
Subject: [PATCH 40/60] Fix multi table bug

---
 filepush/dab/src/ingestion.py | 36 +++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index bbc51abc..69f2975a 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -1,24 +1,28 @@
 import dlt
 from utils import tablemanager
 
+def _make_append_flow(table_name, table_config, table_volume_path):
+  def _body():
+    reader = tablemanager.apply_table_config(spark.readStream, table_config)
+    if not tablemanager.has_data_file(table_name):
+      reader = reader.schema("_rescued_data STRING")
+    return reader.load(table_volume_path)
+
+  # give the function a unique name (nice for logs / debug)
+  _body.__name__ = f"append_{table_name.lower()}"
+
+  # apply the decorator programmatically
+  return dlt.append_flow(target=table_name, name=table_name)(_body)
+
 table_configs = tablemanager.get_configs()
 
-for table_config in table_configs:
-  table_name = table_config['name']
-  table_volume_path = tablemanager.get_table_volume_path(table_name)
+for cfg in table_configs:
+  tbl = cfg["name"]
+  path = tablemanager.get_table_volume_path(tbl)
 
   dlt.create_streaming_table(
-    name = table_name,
-    comment = "File push created table",
-    table_properties = {"filepush.table_volume_path_data": table_volume_path}
-  )
-  @dlt.append_flow(
-    target = table_name,
-    name = table_name
+    name=tbl,
+    comment="File push created table",
+    table_properties={"filepush.table_volume_path_data": path},
   )
-  def append_to_table():
-    reader = spark.readStream.format("cloudFiles")
-    reader = tablemanager.apply_table_config(reader, table_config)
-    if not tablemanager.has_data_file(table_name):
-      reader.schema("_rescued_data STRING") # Use _rescued_data as placeholder
-    return reader.load(table_volume_path)
+  _make_append_flow(tbl, cfg, path)

From 3e906e6b9382cca1c779c9a285a15a95bf0db305 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 18 Sep 2025 16:16:39 -0700
Subject: [PATCH 41/60] Fix option merge issue

---
 filepush/dab/src/configs/tables.json    | 13 +------------
 filepush/dab/src/debug_table_config.py  |  2 +-
 filepush/dab/src/utils/formatmanager.py |  8 ++++++++
 filepush/dab/src/utils/tablemanager.py  |  2 +-
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/filepush/dab/src/configs/tables.json b/filepush/dab/src/configs/tables.json
index 8bb40fa4..98c4591f 100644
--- a/filepush/dab/src/configs/tables.json
+++ b/filepush/dab/src/configs/tables.json
@@ -1,17 +1,6 @@
 [
   {
-    "name": "dummy1",
-    "format": "csv"
-  },
-  {
-    "name": "dummy2",
-    "format": "csv",
-    "format_options": {
-      "escape": "\""
-    }
-  },
-  {
-    "name": "dummy3",
+    "name": "employees",
     "format": "csv",
     "format_options": {
       "escape": "\""
diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index 537b17ff..a134b2af 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -20,7 +20,7 @@
 
 table_config = r'''
   {
-    "name": "dummy1",
+    "name": "employees",
     "format": "csv",
     "format_options": {
       "escape": "\""
diff --git a/filepush/dab/src/utils/formatmanager.py b/filepush/dab/src/utils/formatmanager.py
index acf944e9..302cb547 100644
--- a/filepush/dab/src/utils/formatmanager.py
+++ b/filepush/dab/src/utils/formatmanager.py
@@ -35,6 +35,14 @@ def get_modified_options(self, options: dict[str, str]) -> dict[str, str]:
     self.validate_user_options(options)
     defaults = self.get_userfacing_options()
     return {k: v for k, v in options.items() if k in defaults and v != defaults[k]}
+  
+  def get_merged_options(self, options: dict[str, str]) -> dict[str, str]:
+    self.validate_user_options(options)
+    defaults = self.get_userfacing_options()
+
+    merged = defaults.copy()
+    merged.update({k: v for k, v in options.items() if k in defaults})
+    return merged
 
 class CSV(AutoLoaderFormat):
   def __init__(self):
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index 6b5baa25..82106fea 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -45,7 +45,7 @@ def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStre
   # format-specific options from user input
   user_fmt_opts = table_config.get("format_options", {})
   # validate and get the final modified options
-  final_fmt_opts = formatmanager.get_format_manager(fmt).get_modified_options(user_fmt_opts)
+  final_fmt_opts = formatmanager.get_format_manager(fmt).get_merged_options(user_fmt_opts)
 
   reader = reader.format("cloudFiles").option("cloudFiles.format", fmt)
   for k, v in final_fmt_opts.items():

From 1c5ba0f35ef79bfdc00e2726dc5ebe9d8afa4a7e Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Thu, 18 Sep 2025 16:28:14 -0700
Subject: [PATCH 42/60] Add corrupted record columm to CSV and JSON option

---
 filepush/dab/src/utils/formatmanager.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/filepush/dab/src/utils/formatmanager.py b/filepush/dab/src/utils/formatmanager.py
index 302cb547..29aa0a68 100644
--- a/filepush/dab/src/utils/formatmanager.py
+++ b/filepush/dab/src/utils/formatmanager.py
@@ -51,6 +51,7 @@ def __init__(self):
     self.options |= {
       AutoLoaderOption("header", "true", True),
       AutoLoaderOption("mergeSchema", "true", True),
+      AutoLoaderOption("columnNameOfCorruptRecord", "_corrupt_record", True),
       AutoLoaderOption("delimiter", ","),
       AutoLoaderOption("escape", "\""),
       AutoLoaderOption("multiLine", "false"),
@@ -62,6 +63,7 @@ def __init__(self):
     self.name = "JSON"
     self.options |= {
       AutoLoaderOption("mergeSchema", "true", True),
+      AutoLoaderOption("columnNameOfCorruptRecord", "_corrupt_record", True),
       AutoLoaderOption("allowComments", "true"),
       AutoLoaderOption("allowSingleQuotes", "true"),
       AutoLoaderOption("inferTimestamp", "true"),

From 11540ce9216fe5cb45e6866e77e36f14d58d93a2 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 19 Sep 2025 16:23:32 -0700
Subject: [PATCH 43/60] Refactor and add expectation

---
 filepush/dab/src/debug_table_config.py  | 29 ++++++-------------
 filepush/dab/src/ingestion.py           |  5 ++++
 filepush/dab/src/utils/formatmanager.py |  9 ++++++
 filepush/dab/src/utils/tablemanager.py  | 38 ++++++++++++++++---------
 4 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index a134b2af..f0ff40bb 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -32,32 +32,19 @@
 # COMMAND ----------
 
 import json
-import os
-from utils import envmanager
+import tempfile
 from utils import tablemanager
 
-# Load configs from environment json
-config = envmanager.get_config()
-catalog_name = config["catalog_name"]
-schema_name = config["schema_name"]
-
-# Load table configs
+# Load table config
 table_config_json = json.loads(table_config)
+tablemanager.validate_config(table_config_json)
 table_name = table_config_json["name"]
-assert table_name, "Please provide a table name in the table_config json"
-table_configs = tablemanager.get_configs()
-matches = [table_config for table_config in table_configs if table_config.get("name") == table_name]
-assert len(matches) == 1, f"Expect exactly 1 config for table `{table_name}`. Found {len(matches)}. Please fix the config file and run configuration_job"
 table_volume_path_data = tablemanager.get_table_volume_path(table_name)
-assert tablemanager.has_data_file(table_name), f"No data file found in {table_volume_path_data}. Please upload at least 1 file to {table_volume_path_data}"
-
-print(f"Table Volume Path: {table_volume_path_data}")
+table_reader = tablemanager.apply_table_config(spark.readStream, table_config_json)
 
-# COMMAND ----------
-
-import tempfile
+assert tablemanager.has_data_file(table_name), f"No data file found in {table_volume_path_data}. Please upload at least 1 file to {table_volume_path_data}"
 
+# Put schema location in temp directory
 with tempfile.TemporaryDirectory() as tmpdir:
-  reader = tablemanager.apply_table_config(spark.readStream, table_config_json)
-  reader.option("cloudFiles.schemaLocation", tmpdir)
-  display(reader.load(table_volume_path_data))
\ No newline at end of file
+  table_reader.option("cloudFiles.schemaLocation", tmpdir)
+  display(table_reader.load(table_volume_path_data))
\ No newline at end of file
diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index 69f2975a..264484a2 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -1,9 +1,11 @@
 import dlt
 from utils import tablemanager
+from utils import formatmanager
 
 def _make_append_flow(table_name, table_config, table_volume_path):
   def _body():
     reader = tablemanager.apply_table_config(spark.readStream, table_config)
+    # use _rescued_data as placeholder when no data file is present
     if not tablemanager.has_data_file(table_name):
       reader = reader.schema("_rescued_data STRING")
     return reader.load(table_volume_path)
@@ -17,12 +19,15 @@ def _body():
 table_configs = tablemanager.get_configs()
 
 for cfg in table_configs:
+  tablemanager.validate_config(cfg)
   tbl = cfg["name"]
   path = tablemanager.get_table_volume_path(tbl)
+  expts = formatmanager.get_format_manager(cfg["format"]).expectations
 
   dlt.create_streaming_table(
     name=tbl,
     comment="File push created table",
     table_properties={"filepush.table_volume_path_data": path},
   )
+  dlt.expect_all(expts)
   _make_append_flow(tbl, cfg, path)
diff --git a/filepush/dab/src/utils/formatmanager.py b/filepush/dab/src/utils/formatmanager.py
index 29aa0a68..ccc9d462 100644
--- a/filepush/dab/src/utils/formatmanager.py
+++ b/filepush/dab/src/utils/formatmanager.py
@@ -15,6 +15,9 @@ def __init__(self):
       AutoLoaderOption("cloudFiles.inferColumnTypes", "true", True),
       AutoLoaderOption("cloudFiles.schemaEvolutionMode", "addNewColumns", True),
     }
+    self.expectations: dict[str, str] = {
+      "Rescued data should be null": "_rescued_data IS NULL"
+    }
 
   def __iter__(self):
     yield (self.name, self)
@@ -56,6 +59,9 @@ def __init__(self):
       AutoLoaderOption("escape", "\""),
       AutoLoaderOption("multiLine", "false"),
     }
+    self.expectations |= {
+      "Corrupted record should be null": "_corrupt_record IS NULL"
+    }
 
 class JSON(AutoLoaderFormat):
   def __init__(self):
@@ -69,6 +75,9 @@ def __init__(self):
       AutoLoaderOption("inferTimestamp", "true"),
       AutoLoaderOption("multiLine", "true"),
     }
+    self.expectations |= {
+      "Corrupted record should be null": "_corrupt_record IS NULL"
+    }
 
 _supported_formats: dict[str, AutoLoaderFormat] = {f.name: f for f in (CSV(), JSON())}
 
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index 82106fea..6845e39a 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -6,6 +6,29 @@
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors.platform import NotFound
 
+def validate_config(table_config: dict):
+  if not table_config.get("name"):
+    raise ValueError("name is required for table config")
+  if not table_config.get("format"):
+    raise ValueError("format is required for table config")
+
+def validate_configs(table_configs: list):
+  names = [cfg.get("name") for cfg in table_configs]
+  duplicates = set([name for name in names if names.count(name) > 1 and name is not None])
+  if duplicates:
+    raise ValueError(f"Duplicate table names found in table configs: {sorted(duplicates)}")
+  for table_config in table_configs:
+    validate_config(table_config)
+    
+def get_configs() -> list:
+  json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "tables.json")
+  if not os.path.exists(json_path):
+    raise RuntimeError(f"Missing table configs file: {json_path}. Please following README.md to create one, deploy and run configuration_job.")
+  with open(json_path, "r") as f:
+    configs = json.load(f)
+  validate_configs(configs)
+  return configs
+
 def get_table_volume_path(table_name: str) -> str:
   ws = WorkspaceClient()
   table_volume_path_data = os.path.join(envmanager.get_config()["volume_path_data"], table_name)
@@ -29,24 +52,13 @@ def is_table_created(table_name: str) -> bool:
   ws = WorkspaceClient()
   return ws.tables.exists(full_name=f"{envmanager.get_config()['catalog_name']}.{envmanager.get_config()['schema_name']}.{table_name}").table_exists
 
-def get_configs() -> list:
-  json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "tables.json")
-  if not os.path.exists(json_path):
-    raise RuntimeError(f"Missing table configs file: {json_path}. Please following README.md to create one, deploy and run configuration_job.")
-  with open(json_path, "r") as f:
-    configs = json.load(f)
-  return configs
-
 def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStreamReader:
+  validate_config(table_config)
   fmt = table_config.get("format")
-  if fmt is None:
-    raise ValueError(f"format is required for table {table_config.get('name')}")
 
-  # format-specific options from user input
+  # format options
   user_fmt_opts = table_config.get("format_options", {})
-  # validate and get the final modified options
   final_fmt_opts = formatmanager.get_format_manager(fmt).get_merged_options(user_fmt_opts)
-
   reader = reader.format("cloudFiles").option("cloudFiles.format", fmt)
   for k, v in final_fmt_opts.items():
     reader = reader.option(k, v)

From b0d27c8e085c49c3f9e5254a0e201d82f84c2352 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 19 Sep 2025 16:43:28 -0700
Subject: [PATCH 44/60] Add warning for default storage

---
 filepush/dab/src/debug_table_config.py |  4 ++++
 filepush/dab/src/utils/envmanager.py   | 27 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index f0ff40bb..31e64754 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -34,6 +34,10 @@
 import json
 import tempfile
 from utils import tablemanager
+from utils import envmanager
+
+if not envmanager.has_default_storage():
+  print("WARNING: Current catalog is not using default storage, some file push feature may not be available")
 
 # Load table config
 table_config_json = json.loads(table_config)
diff --git a/filepush/dab/src/utils/envmanager.py b/filepush/dab/src/utils/envmanager.py
index 3f215c77..82c17f45 100644
--- a/filepush/dab/src/utils/envmanager.py
+++ b/filepush/dab/src/utils/envmanager.py
@@ -1,5 +1,6 @@
 import os
 import json
+from databricks.sdk import WorkspaceClient
 
 def get_config() -> dict:
   json_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs", "environment.json")
@@ -8,3 +9,29 @@ def get_config() -> dict:
   with open(json_path, "r") as f:
     configs = json.load(f)
   return configs
+
+def has_default_storage() -> bool:
+  catalog = get_config()["catalog_name"]
+
+  w = WorkspaceClient()
+
+  # Try SDK model first
+  info = w.catalogs.get(catalog)
+  storage_root = getattr(info, "storage_root", None)
+  storage_location = getattr(info, "storage_location", None)
+  props = getattr(info, "properties", {}) or {}
+
+  # Some workspaces expose fields only via raw JSON; fall back if all empty
+  if not (storage_root or storage_location or props):
+    j = w.api_client.do("GET", f"/api/2.1/unity-catalog/catalogs/{catalog}")
+    storage_root = j.get("storage_root") or j.get("storageLocation")
+    storage_location = j.get("storage_location") or j.get("storageLocation")
+    props = j.get("properties", {}) or {}
+
+  # Heuristics: any of these indicates “default storage” is set
+  return bool(
+    storage_root or
+    storage_location or
+    props.get("defaultManagedLocation") or
+    props.get("delta.defaultLocation")
+  )
\ No newline at end of file

From b0beb24988dd763e57604d40e9a537247bd1b253 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Fri, 19 Sep 2025 16:47:56 -0700
Subject: [PATCH 45/60] Clean up workspace

---
 filepush/.gitignore                           |  3 -
 filepush/create_filepush_schema.sh            | 18 ----
 filepush/drop_filepush_schema.sh              | 22 -----
 .../databricks_template_schema.json           | 17 ----
 .../filepush-template/library/variables.tmpl  | 23 ------
 .../template/__preamble.tmpl                  |  5 --
 .../{{.schema_name}}/databricks.yml.tmpl      | 28 -------
 .../{{.schema_name}}/resources/job.yml.tmpl   | 15 ----
 .../resources/pipeline.yml.tmpl               | 14 ----
 .../resources/schema.yml.tmpl                 |  9 --
 .../resources/volume.yml.tmpl                 |  8 --
 .../src/pipelines/ingestion.py.tmpl           | 82 -------------------
 ...{{.schema_name}}_readfiles_kernel.sql.tmpl | 15 ----
 .../{{.schema_name}}/tools/env.sh.tmpl        | 42 ----------
 .../get_volume_path_from_pipeline_config.sh   |  7 --
 .../get_volume_path_from_schema_dbproperty.sh |  7 --
 .../get_volume_path_from_table_property.sh    |  7 --
 .../tools/open_all_resources.sh               | 10 ---
 .../set_volume_path_to_schema_dbproperty.sh   |  7 --
 .../{{.schema_name}}/tools/trigger_refresh.sh |  7 --
 .../tools/upload_to_volume.sh                 | 12 ---
 filepush/push_file_to_table.sh                | 15 ----
 22 files changed, 373 deletions(-)
 delete mode 100755 filepush/create_filepush_schema.sh
 delete mode 100755 filepush/drop_filepush_schema.sh
 delete mode 100644 filepush/filepush-template/databricks_template_schema.json
 delete mode 100644 filepush/filepush-template/library/variables.tmpl
 delete mode 100644 filepush/filepush-template/template/__preamble.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/src/pipelines/ingestion.py.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
 delete mode 100644 filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
 delete mode 100755 filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
 delete mode 100755 filepush/push_file_to_table.sh

diff --git a/filepush/.gitignore b/filepush/.gitignore
index 0e53a123..722d5e71 100644
--- a/filepush/.gitignore
+++ b/filepush/.gitignore
@@ -1,4 +1 @@
 .vscode
-up.sh
-down.sh
-conf.json
diff --git a/filepush/create_filepush_schema.sh b/filepush/create_filepush_schema.sh
deleted file mode 100755
index 3815470d..00000000
--- a/filepush/create_filepush_schema.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) <catalog_name> <schema_name>"
-}
-if [ -z "$1" ] || [ -z "$2" ]; then
-  usage
-  exit 1
-fi
-if ! databricks catalogs get "$1" >/dev/null 2>&1; then
-  echo "Catalog \`$1\` not found (or no permission)"
-  exit 1
-fi
-databricks bundle init filepush-template --config-file <(echo "{\"catalog_name\": \"$1\", \"schema_name\": \"$2\"}")
-working_dir=$(pwd)
-schema_name=$2
-cd $schema_name
-databricks bundle deploy --force-lock --auto-approve -t prod
-cd $working_dir
\ No newline at end of file
diff --git a/filepush/drop_filepush_schema.sh b/filepush/drop_filepush_schema.sh
deleted file mode 100755
index adbd2521..00000000
--- a/filepush/drop_filepush_schema.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) <catalog_name> <schema_name>"
-}
-if [ -z "$1" ] || [ -z "$2" ]; then
-  usage
-  exit 1
-fi
-if ! databricks catalogs get "$1" >/dev/null 2>&1; then
-  echo "Catalog \`$1\` not found (or no permission)"
-  exit 1
-fi
-volume_path=$(databricks schemas get $1.$2 --output json | jq -r '.properties["filepush.volume_path"]')
-if [ -z "$volume_path" ] || [ "$volume_path" == "null" ]; then
-  echo "Schema \`$1.$2\` is not a filepush schema. Did you run create_filepush_schema.sh to create it?"
-  exit 1
-fi
-working_dir=$(pwd)
-schema_name=$2
-cd $schema_name
-databricks bundle destroy --force-lock -t prod
-cd $working_dir
\ No newline at end of file
diff --git a/filepush/filepush-template/databricks_template_schema.json b/filepush/filepush-template/databricks_template_schema.json
deleted file mode 100644
index f150630b..00000000
--- a/filepush/filepush-template/databricks_template_schema.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "properties": {
-    "catalog_name": {
-      "type": "string",
-      "default": "{{default_catalog}}",
-      "description": "Name of the catalog where tables and pipelines will be created.",
-      "order": 1
-    },
-    "schema_name": {
-      "type": "string",
-      "default": "default",
-      "description": "Name of the schema where tables and pipelines will be created.",
-      "order": 2
-    }
-  },
-  "success_message": "\nYour file push bundle under catalog and schema {{.catalog_name}}.{{.schema_name}} has been created."
-}
diff --git a/filepush/filepush-template/library/variables.tmpl b/filepush/filepush-template/library/variables.tmpl
deleted file mode 100644
index 34b8f79e..00000000
--- a/filepush/filepush-template/library/variables.tmpl
+++ /dev/null
@@ -1,23 +0,0 @@
-{{ define `volume_path` -}}
-  /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.schema_name}}_volume/
-{{- end }}
-
-{{ define `volume_data_path` -}}
-  dbfs:{{template `volume_path` .}}data/
-{{- end }}
-
-{{ define `volume_baddata_path` -}}
-  dbfs:{{template `volume_path` .}}baddata/
-{{- end }}
-
-{{ define `volume_archive_path` -}}
-  dbfs:{{template `volume_path` .}}archive/
-{{- end }}
-
-{{ define `volume_path_url` -}}
-  dbfs:{{template `volume_path` .}}
-{{- end }}
-
-{{ define `raw_table_name_format` -}}
-  {{.connector_name}}_raw
-{{- end}}
diff --git a/filepush/filepush-template/template/__preamble.tmpl b/filepush/filepush-template/template/__preamble.tmpl
deleted file mode 100644
index b538c75a..00000000
--- a/filepush/filepush-template/template/__preamble.tmpl
+++ /dev/null
@@ -1,5 +0,0 @@
-# Preamble
-
-This file only template directives; it is skipped for the actual output.
-
-{{skip "__preamble"}}
diff --git a/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
deleted file mode 100644
index d32c1802..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/databricks.yml.tmpl
+++ /dev/null
@@ -1,28 +0,0 @@
-# databricks.yml
-# This is the configuration for the file push DAB {{.schema_name}}.
-
-bundle:
-  name: {{.schema_name}}
-
-include:
-  - resources/*.yml
-
-experimental:
-  skip_name_prefix_for_schema: true
-
-targets:
-  # The deployment targets. See https://docs.databricks.com/en/dev-tools/bundles/deployment-modes.html
-  dev:
-    mode: development
-    default: true
-    workspace:
-      host: {{workspace_host}}
-
-  prod:
-    mode: production
-    workspace:
-      host: {{workspace_host}}
-      root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
-    permissions:
-      - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
-        level: CAN_MANAGE
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl
deleted file mode 100644
index 97d09b12..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/resources/job.yml.tmpl
+++ /dev/null
@@ -1,15 +0,0 @@
-# The main job for schema {{.schema_name}}
-# This job will trigger in the schema pipeline
-
-resources:
-  jobs:
-    {{.schema_name}}_job:
-      name: {{.schema_name}}_job
-      tasks:
-        - task_key: {{.schema_name}}_pipeline_refresh
-          pipeline_task:
-            pipeline_id: ${resources.pipelines.{{.schema_name}}_pipeline.id}
-      trigger:
-        file_arrival:
-          url: /Volumes/{{.catalog_name}}/${resources.schemas.{{.schema_name}}.name}/{{.schema_name}}_volume/
-        pause_status: UNPAUSED
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
deleted file mode 100644
index 21b124be..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/resources/pipeline.yml.tmpl
+++ /dev/null
@@ -1,14 +0,0 @@
-# The table refresh pipeline for schema {{.schema_name}}
-
-resources:
-  pipelines:
-    {{.schema_name}}_pipeline:
-      name: {{.schema_name}}_pipeline
-      catalog: {{.catalog_name}}
-      schema: {{.schema_name}}
-      serverless: true
-      libraries:
-        - file:
-            path: ../src/pipelines/ingestion.py
-      configuration:
-        filepush.volume_path: /Volumes/{{.catalog_name}}/${resources.schemas.{{.schema_name}}.name}/{{.schema_name}}_volume/
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
deleted file mode 100644
index 032b7b9d..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/resources/schema.yml.tmpl
+++ /dev/null
@@ -1,9 +0,0 @@
-# The schema {{.schema_name}}
-
-resources:
-  schemas:
-    {{.schema_name}}:
-      name: {{.schema_name}}
-      catalog_name: {{.catalog_name}}
-      properties:
-        filepush.volume_path: /Volumes/{{.catalog_name}}/{{.schema_name}}/{{.schema_name}}_volume/
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl b/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
deleted file mode 100644
index 95904249..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/resources/volume.yml.tmpl
+++ /dev/null
@@ -1,8 +0,0 @@
-# The file staging volume for schema {{.schema_name}}
-
-resources:
-  volumes:
-    {{.schema_name}}_volume:
-      name: {{.schema_name}}_volume
-      catalog_name: {{.catalog_name}}
-      schema_name: ${resources.schemas.{{.schema_name}}.name}
diff --git a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/ingestion.py.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/ingestion.py.tmpl
deleted file mode 100644
index 9c837652..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/ingestion.py.tmpl
+++ /dev/null
@@ -1,82 +0,0 @@
-import dlt
-from dbruntime.dbutils import FileInfo
-import re
-import os
-
-# Dynamic Tables
-def sanitize_table_name(name: str) -> str:
-  """
-  Sanitize a table name.
-  - Lowercase
-  - Replace non [a-z0-9_] with underscores
-  - Ensure it doesn't start with a digit
-  """
-  n = name.strip().lower()
-  n = re.sub(r"[^a-z0-9_]", "_", n)
-  if re.match(r"^[0-9]", n):
-    n = f"t_{n}"
-  n = re.sub(r"_+", "_", n).strip("_")
-  return n or "t_unnamed"
-
-def is_valid_table_name(name: str) -> bool:
-  """
-  Validate a table name.
-  - Must be alphanumeric
-  - Must not start with a digit
-  - Must not contain any special characters
-  """
-  pat = re.compile(r'^[A-Za-z0-9_]+$')
-  return pat.match(name) is not None
-
-def dbfs_is_dir(f: FileInfo):
-  is_dir_attr = getattr(f, "isDir", None)
-  return is_dir_attr() if callable(is_dir_attr) else f.name.endswith("/")
-
-def list_immediate_subdirs(path: str):
-  items = dbutils.fs.ls(path)
-  out = []
-  for f in items:
-    if dbfs_is_dir(f):
-      # f.name often ends with '/', drop it for a clean folder name
-      clean_name = f.name[:-1] if f.name.endswith("/") else f.name
-      if is_valid_table_name(clean_name):
-        out.append((clean_name, f.path.removeprefix('dbfs:')))
-      else:
-        print(f"Skipping invalid table name: {clean_name}. It must be alphanumeric connected by underscores and not start with a digit.")
-  return out
-
-def make_dlt_table(subdir_name: str, subdir_path: str):
-  """
-  Defines a DLT table for a given subfolder at import time.
-  If table does not exist, it will create a read_files kernel and use that to create the table.
-  """
-  table_name = sanitize_table_name(subdir_name)
-  kernel_file_name = f"./{{.schema_name}}_{table_name}_readfiles_kernel.sql"
-
-  if not os.path.exists(kernel_file_name):
-    print(f"Initialize table {table_name}")
-    with open(f"./{{.schema_name}}_readfiles_kernel.sql", "r") as f:
-      kernel_query_fmt = f.read()
-    with open(kernel_file_name, "w") as f:
-      table_kernel_query = kernel_query_fmt % subdir_path
-      f.write(table_kernel_query)
-
-  if len(dbutils.fs.ls(subdir_path)) > 0:
-    @dlt.table(
-      name=table_name,
-      comment=f"Auto-created from subfolder: {subdir_path} (streaming via Auto Loader)",
-      table_properties={
-        "filepush.volume_path": f"{subdir_path}"
-      }
-    )
-    def _auto_loader_table():
-      with open(kernel_file_name, "r") as f:
-        table_kernel_query = f.read()
-      print(table_kernel_query.replace("read_files(", "STREAM read_files("))
-      return spark.sql(table_kernel_query.replace("read_files(", "STREAM read_files("))
-  else:
-    print(f"Waiting for files to land in {subdir_path}")
-
-volume_path_root = spark.conf.get("filepush.volume_path")
-for subdir_name, subdir_path in list_immediate_subdirs(volume_path_root):
-  make_dlt_table(subdir_name, subdir_path)
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl b/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
deleted file mode 100644
index 55666737..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/src/pipelines/{{.schema_name}}_readfiles_kernel.sql.tmpl
+++ /dev/null
@@ -1,15 +0,0 @@
--- Kernel template for read_files
-SELECT
-  *
-FROM
-  read_files(
-    '%s',
-    ignoreCorruptFiles => 'true', -- If a different malformed file pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
-    ignoreMissingFiles => 'true' -- If a different file format is accidentally pushed and RocksDB has it enlisted, this will ignore the error when the file is deleted.
-    -- Do not change anything above
-    -- Add any additional options below
-    -- Example:
-    -- ,
-    -- header => 'true',
-    -- escape => '"'
-  )
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl b/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
deleted file mode 100644
index a5a64d7c..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/env.sh.tmpl
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-# This file is used to set the environment variables for the filepush bundle.
-# It is sourced by the other scripts in the tools directory.
-# This should be deployed **after** the bundle is deployed.
-
-# Prevent running directly; this file must be *sourced*
-(return 0 2>/dev/null) || { echo "Source this file:  . $(basename "$0")"; exit 1; }
-
-# Idempotent guard
-# Check if the environment is already set and non-empty
-if [[ -n "${_FILEPUSH_ENV_LOADED:-}" ]]; then
-  return 0
-fi
-export _FILEPUSH_ENV_LOADED=1
-
-# Sets the target for the bundle
-ARG_TARGET="dev"
-ARG_POSITIONAL=()
-
-while [[ $# -gt 0 ]]; do
-case "$1" in
-    --target)   [[ $# -ge 2 ]] || { echo "Error: --target needs a value"; return 2; }
-                ARG_TARGET="$2"; shift 2 ;;
-    --target=*) ARG_TARGET="${1#*=}"; shift ;;
-    -t)         [[ $# -ge 2 ]] || { echo "Error: -t needs a value"; return 2; }
-                ARG_TARGET="$2"; shift 2 ;;
-    --)         shift; ARG_POSITIONAL+=("$@"); break ;;
-    -h|--help)  usage; return 1 ;;
-    -*)         echo "Unknown option: $1"; usage; return 2 ;;
-    *)          ARG_POSITIONAL+=("$1"); shift ;;
-esac
-done
-
-export BUNDLE_TARGET=$ARG_TARGET
-
-summary=$(databricks bundle summary -t $BUNDLE_TARGET --output json)
-export FILEPUSH_BUNDLE_NAME={{.schema_name}}
-export FILEPUSH_CATALOG_NAME={{.catalog_name}}
-export FILEPUSH_SCHEMA_NAME=$(echo $summary | jq -r '.resources.schemas.{{.schema_name}}.name')
-export FILEPUSH_VOLUME_PATH=/Volumes/{{.catalog_name}}/${FILEPUSH_SCHEMA_NAME}/{{.schema_name}}_volume/
-export FILEPUSH_PIPELINE_ID=$(echo $summary | jq -r '.resources.pipelines.{{.schema_name}}_pipeline.id')
-export FILEPUSH_JOB_NAME={{.schema_name}}_job
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
deleted file mode 100755
index 4fef63c6..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_pipeline_config.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) [--target=dev|prod]"
-}
-export -f usage
-. $(dirname $0)/env.sh $@
-databricks pipelines get $FILEPUSH_PIPELINE_ID -t $BUNDLE_TARGET --output json | jq '.spec.configuration["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
deleted file mode 100755
index 14da35c1..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_schema_dbproperty.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) [--target=dev|prod]"
-}
-export -f usage
-. $(dirname $0)/env.sh $@
-databricks schemas get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} -t $BUNDLE_TARGET --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
deleted file mode 100755
index fb93f468..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/get_volume_path_from_table_property.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) <table_name> [--target=dev|prod]"
-}
-export -f usage
-. $(dirname $0)/env.sh $@
-databricks tables get ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME}.$1 -t $BUNDLE_TARGET --output json | jq '.properties["filepush.volume_path"]'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh
deleted file mode 100755
index 6abf8173..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/open_all_resources.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) [--target=dev|prod]"
-}
-export -f usage
-. $(dirname $0)/env.sh $@
-databricks bundle open ${FILEPUSH_BUNDLE_NAME} -t $BUNDLE_TARGET
-databricks bundle open ${FILEPUSH_BUNDLE_NAME}_job -t $BUNDLE_TARGET
-databricks bundle open ${FILEPUSH_BUNDLE_NAME}_pipeline -t $BUNDLE_TARGET
-databricks bundle open ${FILEPUSH_BUNDLE_NAME}_volume -t $BUNDLE_TARGET
\ No newline at end of file
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
deleted file mode 100755
index 3169f1c0..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/set_volume_path_to_schema_dbproperty.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) [--target=dev|prod]"
-}
-export -f usage
-. $(dirname $0)/env.sh $@
-databricks schemas update ${FILEPUSH_CATALOG_NAME}.${FILEPUSH_SCHEMA_NAME} -t $BUNDLE_TARGET --json '{ "properties": { "filepush.volume_path": "'${FILEPUSH_VOLUME_PATH}'" } }'
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
deleted file mode 100755
index f85c34e1..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/trigger_refresh.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) [--target=dev|prod]"
-}
-export -f usage
-. $(dirname $0)/env.sh $@
-databricks bundle run $FILEPUSH_JOB_NAME -t $BUNDLE_TARGET
diff --git a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh b/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
deleted file mode 100755
index 31e5fbde..00000000
--- a/filepush/filepush-template/template/{{.schema_name}}/tools/upload_to_volume.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) <table_name> <local_file_path> [--target=dev|prod]"
-}
-export -f usage
-. $(dirname $0)/env.sh $@
-if [ -z "$1" ] || [ -z "$2" ]; then
-  usage
-  exit 1
-fi
-databricks fs mkdir dbfs:${FILEPUSH_VOLUME_PATH}$1/ -t $BUNDLE_TARGET
-databricks fs cp $2 dbfs:${FILEPUSH_VOLUME_PATH}$1/ -t $BUNDLE_TARGET
diff --git a/filepush/push_file_to_table.sh b/filepush/push_file_to_table.sh
deleted file mode 100755
index e652b6b8..00000000
--- a/filepush/push_file_to_table.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-usage() {
-    echo "Usage: $(basename $0) <catalog_name> <schema_name> <table_name> <file_path>"
-}
-if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then
-  usage
-  exit 1
-fi
-volume_path=$(databricks schemas get $1.$2 --output json | jq -r '.properties["filepush.volume_path"]')
-if [ -z "$volume_path" ] || [ "$volume_path" == "null" ]; then
-  echo "Schema \`$1.$2\` is not a filepush schema. Did you run create_filepush_schema.sh to create it?"
-  exit 1
-fi
-databricks fs mkdir dbfs:${volume_path}$3/
-databricks fs cp $4 dbfs:${volume_path}$3/
\ No newline at end of file

From 36bad25fb30b294ac830ac3725f0b17f4521fffd Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 22 Sep 2025 11:07:15 -0700
Subject: [PATCH 46/60] Create cleansource move folder

---
 filepush/dab/src/utils/initialization.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/filepush/dab/src/utils/initialization.py b/filepush/dab/src/utils/initialization.py
index 09f79947..211d26d8 100644
--- a/filepush/dab/src/utils/initialization.py
+++ b/filepush/dab/src/utils/initialization.py
@@ -15,6 +15,7 @@
 schema_name = args.schema_name
 volume_path_root = args.volume_path_root
 volume_path_data = args.volume_path_root + "/data"
+volume_path_archive = args.volume_path_root + "/archive"
 logging_level = logging.DEBUG if args.logging_level == "dev" else logging.INFO
 
 # Logging
@@ -33,7 +34,8 @@
 logger.debug(f"Volume path data: {volume_path_data}")
 ws.schemas.update(full_name=f"{catalog_name}.{schema_name}", properties={
   "filepush.volume_path_root": volume_path_root,
-  "filepush.volume_path_data": volume_path_data
+  "filepush.volume_path_data": volume_path_data,
+  "filepush.volume_path_data": volume_path_archive
 })
 logger.info(f"Schema {catalog_name}.{schema_name} configured")
 
@@ -41,11 +43,16 @@
 logger.info(f"Initializing volume folder structure {volume_path_root}")
 logger.debug(f"Creating data directory {volume_path_data}")
 ws.files.create_directory(volume_path_data)
+logger.debug(f"Creating archive directory {volume_path_archive}")
+ws.files.create_directory(volume_path_archive)
 with open("../configs/tables.json", "r") as f:
   for table in json.load(f):
     table_volume_path_data = f"{volume_path_data}/{table['name']}"
     logger.debug(f"Creating table directory {table_volume_path_data}")
     ws.files.create_directory(table_volume_path_data)
+    table_volume_path_archive = f"{volume_path_archive}/{table['name']}"
+    logger.debug(f"Creating table archive directory {table_volume_path_archive}")
+    ws.files.create_directory(table_volume_path_archive)
 logger.info(f"Volume {volume_path_root} configured")
 
 # Dump configs to environment json
@@ -54,5 +61,6 @@
     "catalog_name": catalog_name,
     "schema_name": schema_name,
     "volume_path_root": volume_path_root,
-    "volume_path_data": volume_path_data
+    "volume_path_data": volume_path_data,
+    "volume_path_archive": volume_path_archive
   }, f)

From b88869be2c776b25b391e58176780a2e9371dc86 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 22 Sep 2025 11:28:50 -0700
Subject: [PATCH 47/60] Include cleansource move destination

---
 filepush/dab/src/utils/formatmanager.py | 12 +++++++++++-
 filepush/dab/src/utils/tablemanager.py  |  3 ++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/filepush/dab/src/utils/formatmanager.py b/filepush/dab/src/utils/formatmanager.py
index ccc9d462..ce496121 100644
--- a/filepush/dab/src/utils/formatmanager.py
+++ b/filepush/dab/src/utils/formatmanager.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass
+from . import envmanager
 
 @dataclass(frozen=True, slots=True)
 class AutoLoaderOption:
@@ -14,6 +15,9 @@ def __init__(self):
     self.options: set[AutoLoaderOption] = {
       AutoLoaderOption("cloudFiles.inferColumnTypes", "true", True),
       AutoLoaderOption("cloudFiles.schemaEvolutionMode", "addNewColumns", True),
+      AutoLoaderOption("cloudFiles.cleanSource", "MOVE", True),
+      AutoLoaderOption("cloudFiles.cleanSource.retentionDuration", "14 days", True),
+      AutoLoaderOption("cloudFiles.cleanSource.moveDestination", f"{envmanager.get_config()['volume_path_archive']}/{{table_name}}", True)
     }
     self.expectations: dict[str, str] = {
       "Rescued data should be null": "_rescued_data IS NULL"
@@ -39,12 +43,18 @@ def get_modified_options(self, options: dict[str, str]) -> dict[str, str]:
     defaults = self.get_userfacing_options()
     return {k: v for k, v in options.items() if k in defaults and v != defaults[k]}
   
-  def get_merged_options(self, options: dict[str, str]) -> dict[str, str]:
+  def get_merged_options(self, options: dict[str, str], table_name: str) -> dict[str, str]:
     self.validate_user_options(options)
     defaults = self.get_userfacing_options()
 
     merged = defaults.copy()
     merged.update({k: v for k, v in options.items() if k in defaults})
+
+    # Format the moveDestination if table_name is supplied
+    move_dest_key = "cloudFiles.cleanSource.moveDestination"
+    if table_name is not None and move_dest_key in merged:
+      merged[move_dest_key] = merged[move_dest_key].format(table_name=table_name)
+
     return merged
 
 class CSV(AutoLoaderFormat):
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index 6845e39a..4d4095bd 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -54,11 +54,12 @@ def is_table_created(table_name: str) -> bool:
 
 def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStreamReader:
   validate_config(table_config)
+  name = table_config.get("name")
   fmt = table_config.get("format")
 
   # format options
   user_fmt_opts = table_config.get("format_options", {})
-  final_fmt_opts = formatmanager.get_format_manager(fmt).get_merged_options(user_fmt_opts)
+  final_fmt_opts = formatmanager.get_format_manager(fmt).get_merged_options(user_fmt_opts, name)
   reader = reader.format("cloudFiles").option("cloudFiles.format", fmt)
   for k, v in final_fmt_opts.items():
     reader = reader.option(k, v)

From 7a89c08e9b3531123e7afb53fa86e51a13b51596 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 22 Sep 2025 15:42:59 -0700
Subject: [PATCH 48/60] Fix issue on _corrupted_record column

---
 filepush/dab/src/ingestion.py           | 13 +++++++------
 filepush/dab/src/utils/envmanager.py    |  3 ++-
 filepush/dab/src/utils/formatmanager.py |  5 +++++
 filepush/dab/src/utils/tablemanager.py  |  7 +++++--
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index 264484a2..d6c3b996 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -2,19 +2,19 @@
 from utils import tablemanager
 from utils import formatmanager
 
-def _make_append_flow(table_name, table_config, table_volume_path):
+def _make_append_flow(table_name, table_config, table_volume_path, format_mgr):
   def _body():
     reader = tablemanager.apply_table_config(spark.readStream, table_config)
     # use _rescued_data as placeholder when no data file is present
     if not tablemanager.has_data_file(table_name):
-      reader = reader.schema("_rescued_data STRING")
+      reader = reader.schema(",".join(format_mgr.default_schema))
     return reader.load(table_volume_path)
 
   # give the function a unique name (nice for logs / debug)
   _body.__name__ = f"append_{table_name.lower()}"
 
   # apply the decorator programmatically
-  return dlt.append_flow(target=table_name, name=table_name)(_body)
+  dlt.append_flow(target=table_name, name=table_name)(_body)
 
 table_configs = tablemanager.get_configs()
 
@@ -22,12 +22,13 @@ def _body():
   tablemanager.validate_config(cfg)
   tbl = cfg["name"]
   path = tablemanager.get_table_volume_path(tbl)
-  expts = formatmanager.get_format_manager(cfg["format"]).expectations
+  fmt = formatmanager.get_format_manager(cfg["format"])
+  expts = fmt.expectations
 
   dlt.create_streaming_table(
     name=tbl,
     comment="File push created table",
     table_properties={"filepush.table_volume_path_data": path},
+    expect_all=expts
   )
-  dlt.expect_all(expts)
-  _make_append_flow(tbl, cfg, path)
+  _make_append_flow(tbl, cfg, path, fmt)
diff --git a/filepush/dab/src/utils/envmanager.py b/filepush/dab/src/utils/envmanager.py
index 82c17f45..ba822a98 100644
--- a/filepush/dab/src/utils/envmanager.py
+++ b/filepush/dab/src/utils/envmanager.py
@@ -34,4 +34,5 @@ def has_default_storage() -> bool:
     storage_location or
     props.get("defaultManagedLocation") or
     props.get("delta.defaultLocation")
-  )
\ No newline at end of file
+  )
+  
\ No newline at end of file
diff --git a/filepush/dab/src/utils/formatmanager.py b/filepush/dab/src/utils/formatmanager.py
index ce496121..5f61ca7c 100644
--- a/filepush/dab/src/utils/formatmanager.py
+++ b/filepush/dab/src/utils/formatmanager.py
@@ -22,6 +22,7 @@ def __init__(self):
     self.expectations: dict[str, str] = {
       "Rescued data should be null": "_rescued_data IS NULL"
     }
+    self.default_schema: set[str] = {"_rescued_data STRING"}
 
   def __iter__(self):
     yield (self.name, self)
@@ -64,6 +65,7 @@ def __init__(self):
     self.options |= {
       AutoLoaderOption("header", "true", True),
       AutoLoaderOption("mergeSchema", "true", True),
+      AutoLoaderOption("mode", "PERMISSIVE", True),
       AutoLoaderOption("columnNameOfCorruptRecord", "_corrupt_record", True),
       AutoLoaderOption("delimiter", ","),
       AutoLoaderOption("escape", "\""),
@@ -72,6 +74,7 @@ def __init__(self):
     self.expectations |= {
       "Corrupted record should be null": "_corrupt_record IS NULL"
     }
+    self.default_schema |= {"_corrupt_record STRING"}
 
 class JSON(AutoLoaderFormat):
   def __init__(self):
@@ -79,6 +82,7 @@ def __init__(self):
     self.name = "JSON"
     self.options |= {
       AutoLoaderOption("mergeSchema", "true", True),
+      AutoLoaderOption("mode", "PERMISSIVE", True),
       AutoLoaderOption("columnNameOfCorruptRecord", "_corrupt_record", True),
       AutoLoaderOption("allowComments", "true"),
       AutoLoaderOption("allowSingleQuotes", "true"),
@@ -88,6 +92,7 @@ def __init__(self):
     self.expectations |= {
       "Corrupted record should be null": "_corrupt_record IS NULL"
     }
+    self.default_schema |= {"_corrupt_record STRING"}
 
 _supported_formats: dict[str, AutoLoaderFormat] = {f.name: f for f in (CSV(), JSON())}
 
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index 4d4095bd..83c8a97c 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -56,10 +56,11 @@ def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStre
   validate_config(table_config)
   name = table_config.get("name")
   fmt = table_config.get("format")
+  fmt_mgr = formatmanager.get_format_manager(fmt)
 
   # format options
   user_fmt_opts = table_config.get("format_options", {})
-  final_fmt_opts = formatmanager.get_format_manager(fmt).get_merged_options(user_fmt_opts, name)
+  final_fmt_opts = fmt_mgr.get_merged_options(user_fmt_opts, name)
   reader = reader.format("cloudFiles").option("cloudFiles.format", fmt)
   for k, v in final_fmt_opts.items():
     reader = reader.option(k, v)
@@ -67,7 +68,9 @@ def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStre
   # schema hints
   schema_hints = table_config.get("schema_hints")
   if schema_hints:
-    reader = reader.option("cloudFiles.schemaHints", schema_hints)
+    reader = reader.option("cloudFiles.schemaHints", ",".join({schema_hints} | fmt_mgr.default_schema))
+  else:
+    reader = reader.option("cloudFiles.schemaHints", ",".join(fmt_mgr.default_schema))
 
   return reader
   
\ No newline at end of file

From c26bf421a8f54b12a3cf0d8142c80caa2cd1408f Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 22 Sep 2025 17:28:56 -0700
Subject: [PATCH 49/60] Tidy up

---
 filepush/dab/src/debug_table_config.py  |  4 +--
 filepush/dab/src/ingestion.py           | 10 +++----
 filepush/dab/src/utils/formatmanager.py |  8 +++---
 filepush/dab/src/utils/tablemanager.py  | 35 ++++++++++++++++++++-----
 4 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index 31e64754..22928500 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -44,11 +44,9 @@
 tablemanager.validate_config(table_config_json)
 table_name = table_config_json["name"]
 table_volume_path_data = tablemanager.get_table_volume_path(table_name)
-table_reader = tablemanager.apply_table_config(spark.readStream, table_config_json)
 
 assert tablemanager.has_data_file(table_name), f"No data file found in {table_volume_path_data}. Please upload at least 1 file to {table_volume_path_data}"
 
 # Put schema location in temp directory
 with tempfile.TemporaryDirectory() as tmpdir:
-  table_reader.option("cloudFiles.schemaLocation", tmpdir)
-  display(table_reader.load(table_volume_path_data))
\ No newline at end of file
+  display(tablemanager.get_df_with_config(spark, table_config_json, tmpdir))
\ No newline at end of file
diff --git a/filepush/dab/src/ingestion.py b/filepush/dab/src/ingestion.py
index d6c3b996..1046a140 100644
--- a/filepush/dab/src/ingestion.py
+++ b/filepush/dab/src/ingestion.py
@@ -2,13 +2,13 @@
 from utils import tablemanager
 from utils import formatmanager
 
-def _make_append_flow(table_name, table_config, table_volume_path, format_mgr):
+def _make_append_flow(table_name, table_config, table_volume_path):
   def _body():
-    reader = tablemanager.apply_table_config(spark.readStream, table_config)
     # use _rescued_data as placeholder when no data file is present
     if not tablemanager.has_data_file(table_name):
-      reader = reader.schema(",".join(format_mgr.default_schema))
-    return reader.load(table_volume_path)
+      return tablemanager.get_placeholder_df_with_config(spark, table_config)
+    else:
+      return tablemanager.get_df_with_config(spark, table_config)
 
   # give the function a unique name (nice for logs / debug)
   _body.__name__ = f"append_{table_name.lower()}"
@@ -31,4 +31,4 @@ def _body():
     table_properties={"filepush.table_volume_path_data": path},
     expect_all=expts
   )
-  _make_append_flow(tbl, cfg, path, fmt)
+  _make_append_flow(tbl, cfg, path)
diff --git a/filepush/dab/src/utils/formatmanager.py b/filepush/dab/src/utils/formatmanager.py
index 5f61ca7c..663b897a 100644
--- a/filepush/dab/src/utils/formatmanager.py
+++ b/filepush/dab/src/utils/formatmanager.py
@@ -24,8 +24,8 @@ def __init__(self):
     }
     self.default_schema: set[str] = {"_rescued_data STRING"}
 
-  def __iter__(self):
-    yield (self.name, self)
+  def get_default_schema(self) -> str:
+    return ", ".join(self.default_schema)
 
   def get_userfacing_options(self) -> dict[str, str]:
     return {opt.key: opt.value for opt in self.options if not opt.hidden}
@@ -51,9 +51,9 @@ def get_merged_options(self, options: dict[str, str], table_name: str) -> dict[s
     merged = defaults.copy()
     merged.update({k: v for k, v in options.items() if k in defaults})
 
-    # Format the moveDestination if table_name is supplied
+    # Format the moveDestination with table_name
     move_dest_key = "cloudFiles.cleanSource.moveDestination"
-    if table_name is not None and move_dest_key in merged:
+    if move_dest_key in merged:
       merged[move_dest_key] = merged[move_dest_key].format(table_name=table_name)
 
     return merged
diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index 83c8a97c..33376ded 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -3,6 +3,7 @@
 from . import envmanager
 from . import formatmanager
 from pyspark.sql.streaming import DataStreamReader
+from pyspark.sql import DataFrame, SparkSession
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors.platform import NotFound
 
@@ -52,25 +53,45 @@ def is_table_created(table_name: str) -> bool:
   ws = WorkspaceClient()
   return ws.tables.exists(full_name=f"{envmanager.get_config()['catalog_name']}.{envmanager.get_config()['schema_name']}.{table_name}").table_exists
 
-def apply_table_config(reader: DataStreamReader, table_config: dict) -> DataStreamReader:
-  validate_config(table_config)
+def _apply_table_options(reader: DataStreamReader, table_config: dict, fmt_mgr) -> DataStreamReader:
   name = table_config.get("name")
   fmt = table_config.get("format")
-  fmt_mgr = formatmanager.get_format_manager(fmt)
 
   # format options
   user_fmt_opts = table_config.get("format_options", {})
   final_fmt_opts = fmt_mgr.get_merged_options(user_fmt_opts, name)
-  reader = reader.format("cloudFiles").option("cloudFiles.format", fmt)
+  reader = reader.option("cloudFiles.format", fmt)
   for k, v in final_fmt_opts.items():
     reader = reader.option(k, v)
   
   # schema hints
   schema_hints = table_config.get("schema_hints")
   if schema_hints:
-    reader = reader.option("cloudFiles.schemaHints", ",".join({schema_hints} | fmt_mgr.default_schema))
+    reader = reader.option("cloudFiles.schemaHints", ", ".join({schema_hints} | fmt_mgr.default_schema))
   else:
-    reader = reader.option("cloudFiles.schemaHints", ",".join(fmt_mgr.default_schema))
+    reader = reader.option("cloudFiles.schemaHints", ", ".join(fmt_mgr.default_schema))
 
   return reader
-  
\ No newline at end of file
+
+def get_df_with_config(spark: SparkSession, table_config: dict, schema_location: str = None) -> DataFrame:
+  validate_config(table_config)
+  fmt = table_config.get("format")
+  fmt_mgr = formatmanager.get_format_manager(fmt)
+
+  reader = spark.readStream.format("cloudFiles")
+  reader = _apply_table_options(reader, table_config, fmt_mgr)
+  if schema_location:
+    reader = reader.option("cloudFiles.schemaLocation", schema_location)
+
+  # include file metadata
+  return reader.load(get_table_volume_path(table_config.get("name"))).selectExpr("*", "_metadata")
+
+def get_placeholder_df_with_config(spark: SparkSession, table_config: dict) -> DataFrame:
+  validate_config(table_config)
+  fmt = table_config.get("format")
+  fmt_mgr = formatmanager.get_format_manager(fmt)
+
+  reader = spark.readStream.format("cloudFiles")
+  reader = _apply_table_options(reader, table_config, fmt_mgr).schema(fmt_mgr.get_default_schema())
+
+  return reader.load(get_table_volume_path(table_config.get("name")))
\ No newline at end of file

From 628753e266b6bf422b39db231bf961fde0f29720 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 13:24:25 -0700
Subject: [PATCH 50/60] Fix file name

---
 filepush/{REDME.md => README.md} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename filepush/{REDME.md => README.md} (100%)

diff --git a/filepush/REDME.md b/filepush/README.md
similarity index 100%
rename from filepush/REDME.md
rename to filepush/README.md

From 2fdaea37e3b60f982aab1548d8e286ea1867cbb1 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 14:54:30 -0700
Subject: [PATCH 51/60] Update default target and decription

---
 filepush/dab/databricks.yml | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/filepush/dab/databricks.yml b/filepush/dab/databricks.yml
index 411ab284..bed0f416 100644
--- a/filepush/dab/databricks.yml
+++ b/filepush/dab/databricks.yml
@@ -7,24 +7,21 @@ bundle:
 include:
   - resources/*.yml
 
-# experimental:
-#   skip_name_prefix_for_schema: true
-
 targets:
   # The deployment targets. See https://docs.databricks.com/en/dev-tools/bundles/deployment-modes.html
   dev:
     mode: development
-    default: true
     workspace:
       host: https://e2-dogfood.staging.cloud.databricks.com
 
   prod:
     mode: production
+    default: true
     workspace:
       host: https://e2-dogfood.staging.cloud.databricks.com
-      # root_path: /Workspace/Users/chi.yang@databricks.com/.bundle/${bundle.name}/${bundle.target}
+      root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target}
     permissions:
-      - user_name: chi.yang@databricks.com
+      - user_name: ${workspace.current_user.userName}
         level: CAN_MANAGE
 
 variables:
@@ -32,9 +29,8 @@ variables:
     description: The existing catalog where the schema will be created.
     default: main
   schema_name:
-    description: The name of the schema where the tables and ingestion pipeline will be created.
+    description: The name of the NEW schema where the tables will be created.
     default: filepushschema
   resource_name_prefix:
     description: The prefix for the resource names.
     default: ${var.catalog_name}_${var.schema_name}_
-    
\ No newline at end of file

From c17b7324f8a8ec268192982ab758005a61b4b21c Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 14:54:54 -0700
Subject: [PATCH 52/60] Print environment to console

---
 filepush/dab/src/utils/initialization.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/filepush/dab/src/utils/initialization.py b/filepush/dab/src/utils/initialization.py
index 211d26d8..aa106072 100644
--- a/filepush/dab/src/utils/initialization.py
+++ b/filepush/dab/src/utils/initialization.py
@@ -56,11 +56,14 @@
 logger.info(f"Volume {volume_path_root} configured")
 
 # Dump configs to environment json
+all_configs = {
+  "catalog_name": catalog_name,
+  "schema_name": schema_name,
+  "volume_path_root": volume_path_root,
+  "volume_path_data": volume_path_data,
+  "volume_path_archive": volume_path_archive
+}
 with open("../configs/environment.json", "w") as f:
-  json.dump({
-    "catalog_name": catalog_name,
-    "schema_name": schema_name,
-    "volume_path_root": volume_path_root,
-    "volume_path_data": volume_path_data,
-    "volume_path_archive": volume_path_archive
-  }, f)
+  json.dump(all_configs, f)
+
+logger.info(f"==========\n%s\n==========", "\n".join(f"{k}: {v}" for k, v in all_configs.items()))

From e3fb6334b7ef02f703eabc87981f1442a939cd1d Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 16:16:15 -0700
Subject: [PATCH 53/60] More doc in debug notebook

---
 filepush/dab/src/debug_table_config.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/filepush/dab/src/debug_table_config.py b/filepush/dab/src/debug_table_config.py
index 22928500..0d697fcd 100644
--- a/filepush/dab/src/debug_table_config.py
+++ b/filepush/dab/src/debug_table_config.py
@@ -1,6 +1,6 @@
 # Databricks notebook source
 # MAGIC %md
-# MAGIC # Paste the table config JSON you would like to debug and assign to variable `table_config`
+# MAGIC ## Paste the table config JSON you would like to debug from `./configs/tables.json` and assign to variable `table_config`
 # MAGIC For example,
 # MAGIC ```
 # MAGIC table_config = r'''
@@ -15,6 +15,7 @@
 # MAGIC }
 # MAGIC '''
 # MAGIC ```
+# MAGIC Only `name` and `format` are required for a table.
 
 # COMMAND ----------
 
@@ -31,6 +32,11 @@
 
 # COMMAND ----------
 
+# MAGIC %md
+# MAGIC ## Click `Run all` and inspect the parsed result. Iterate on the config until the result looks good
+
+# COMMAND ----------
+
 import json
 import tempfile
 from utils import tablemanager
@@ -49,4 +55,9 @@
 
 # Put schema location in temp directory
 with tempfile.TemporaryDirectory() as tmpdir:
-  display(tablemanager.get_df_with_config(spark, table_config_json, tmpdir))
\ No newline at end of file
+  display(tablemanager.get_df_with_config(spark, table_config_json, tmpdir))
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Copy and paste the modified config back to the `./configs/tables.json` in the DAB folder
\ No newline at end of file

From 7dc7dcfd4d760bb7d1315a0ea166c642783cd117 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 16:18:44 -0700
Subject: [PATCH 54/60] Add README

---
 filepush/README.md | 117 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)

diff --git a/filepush/README.md b/filepush/README.md
index 195287c9..a4321c40 100644
--- a/filepush/README.md
+++ b/filepush/README.md
@@ -11,3 +11,120 @@ tags:
 ---
 
 # Managed File Push
+## Table of Contents
+- [Quick Start](#quick-start)
+- [Debug Table Issues](#debug-table-issues)
+
+## Quick Start
+### Step 1. Configure tables
+Define the catalog and a NEW schema name where the tables will land in `./dab/databricks.yml`
+```
+variables:
+  catalog_name:
+    description: The existing catalog where the NEW schema will be created.
+    default: main
+  schema_name:
+    description: The name of the NEW schema where the tables will be created.
+    default: filepushschema
+
+```
+Edit the table configs in `./dab/src/configs/tables.json`. Only `name` and `format` are required for a table.
+
+For possible `format_options` checkout [Auto Loader Options article](https://docs.databricks.com/aws/en/ingestion/cloud-object-storage/auto-loader/options). Not all options are supported. If you are not sure, feel free to specify only the `name` and `format`, or follow steps in [Debug Table Issues](#debug-table-issues) section to help come up with the proper options.
+```
+[
+  {
+    "name": "table1",
+    "format": "csv",
+    "format_options": {
+      "escape": "\""
+    },
+    "schema_hints": "id int, name string"
+  },
+  {
+    "name": "table2",
+    "format": "json"
+  }
+  ,
+  ...
+]
+
+```
+
+### Step 2. Deploy & setup
+```
+$ cd dab
+$ databricks bundle deploy
+$ databricks bundle run configuration_job
+```
+Wait for the configuration job to finish before moving to the next step.
+
+### Step 3. Retrieve endpoint & push files
+Get the volume path for uploading the files
+```
+$ databricks tables get main.filepushschema.table1 --output json | jq '.properties["filepush.table_volume_path_data"]'
+```
+Example output:
+```
+"/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1"
+```
+Upload files to the path above using the [UC Volume APIs of your choice](https://docs.databricks.com/aws/en/volumes/volume-files#methods-for-managing-files-in-volumes). Here is an example using the **REST API**:
+```
+$ curl --request PUT https://<workspace-url>/api/2.0/fs/files/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1/datafile1.csv" \
+    --header "Authorization: Bearer <PAT>" \
+    --header "Content-Type: application/octet-stream \
+    --data-binary "@/local/file/path/datafile1.csv"
+```
+Here is another example using the **Databricks CLI**. This way you do not need to specify the file name at destination. Pay attention to the `dbfs:` URL scheme for the destination path:
+```
+$ databricks fs cp /local/file/path/datafile1.csv dbfs:/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1
+```
+
+After maximum 1 minute, the data should land the corresponding table e.g. `main.filepushschema.table1`
+
+## Debug Table Issues
+In case the data is not parsed correctly in the destination table, follow the steps below to fix the table configs.
+### Step 1. Configure tables to debug
+Configure tables just like [Step 1 in Quick Start](#step-1-configure-tables).
+
+### Step 2. Deploy & Setup in ***dev mode***
+```
+$ cd dab
+$ databricks bundle deploy -t dev
+$ databricks bundle run configuration_job -t dev
+```
+Wait for the configuration job to finish before moving to the next step. Example output:
+```
+2025-09-23 22:03:04,938 [INFO] initialization - ==========
+catalog_name: main
+schema_name: dev_chi_yang_filepushschema
+volume_path_root: /Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume
+volume_path_data: /Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume/data
+volume_path_archive: /Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume/archive
+==========
+```
+Pay attention that, ***dev mode put a prefix to the schema name***, and you should use the name output by the initialization job for the remaining steps.
+
+### Step 3. Retrieve endpoint & push files to debug
+Get the volume path for uploading the files, pay attention to the ***prefix*** name of the schema:
+```
+$ databricks tables get main.dev_chi_yang_filepushschema.table1 --output json | jq '.properties["filepush.table_volume_path_data"]'
+```
+Example output:
+```
+"/Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume/data/table1"
+```
+Follow the remaining steps of [Step 3 in Quick Start](#step-3-retrieve-endpoint--push-files) to push files for debug.
+
+### Step 4. Debug table configs
+Open the `refresh_pipeline` in the workspace:
+```
+$ databricks bundle open refresh_pipeline -t dev
+```
+Then click `Edit pipeline` to launch the development UI. Open the notebook `debug_table_config` and follow the instruction there to fix the table configs. Remember to copy over the config to the table configs in `./dab/src/configs/tables.json`.
+
+### Step 5. Fix the table configs in production
+Go though [Step 2 in Quick Start](#step-2-deploy--setup) to deploy the updated config, then issue a full-refresh to fix the problematic data in the table:
+```
+$ databricks bundle run refresh_pipeline --full-refresh table1
+```

From e5ff8f4831d9c70e0fd97578df2ba3d40c68305c Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 16:28:42 -0700
Subject: [PATCH 55/60] Beautify README

---
 filepush/README.md | 160 +++++++++++++++++++++++++++++----------------
 1 file changed, 105 insertions(+), 55 deletions(-)

diff --git a/filepush/README.md b/filepush/README.md
index a4321c40..769121f8 100644
--- a/filepush/README.md
+++ b/filepush/README.md
@@ -4,21 +4,36 @@ language: python
 author: "Chi Yang"
 date: 2025-08-07
 
-tags: 
+tags:
 - ingestion
 - file
 - nocode
 ---
 
 # Managed File Push
+
+A lightweight, no‑code file ingestion workflow. Configure a set of tables, get a volume path for each, and drop files into those paths—your data lands in Unity Catalog tables via Auto Loader.
+
 ## Table of Contents
 - [Quick Start](#quick-start)
+  - [Step 1. Configure tables](#step-1-configure-tables)
+  - [Step 2. Deploy & set up](#step-2-deploy--set-up)
+  - [Step 3. Retrieve endpoint & push files](#step-3-retrieve-endpoint--push-files)
 - [Debug Table Issues](#debug-table-issues)
+  - [Step 1. Configure tables to debug](#step-1-configure-tables-to-debug)
+  - [Step 2. Deploy & set up in dev mode](#step-2-deploy--set-up-in-dev-mode)
+  - [Step 3. Retrieve endpoint & push files to debug](#step-3-retrieve-endpoint--push-files-to-debug)
+  - [Step 4. Debug table configs](#step-4-debug-table-configs)
+  - [Step 5. Fix the table configs in production](#step-5-fix-the-table-configs-in-production)
+
+---
 
 ## Quick Start
+
 ### Step 1. Configure tables
-Define the catalog and a NEW schema name where the tables will land in `./dab/databricks.yml`
-```
+Define the catalog and a **new** schema name where the tables will land in `./dab/databricks.yml`:
+
+```yaml
 variables:
   catalog_name:
     description: The existing catalog where the NEW schema will be created.
@@ -26,75 +41,94 @@ variables:
   schema_name:
     description: The name of the NEW schema where the tables will be created.
     default: filepushschema
-
 ```
-Edit the table configs in `./dab/src/configs/tables.json`. Only `name` and `format` are required for a table.
 
-For possible `format_options` checkout [Auto Loader Options article](https://docs.databricks.com/aws/en/ingestion/cloud-object-storage/auto-loader/options). Not all options are supported. If you are not sure, feel free to specify only the `name` and `format`, or follow steps in [Debug Table Issues](#debug-table-issues) section to help come up with the proper options.
-```
+Edit table configs in `./dab/src/configs/tables.json`. Only `name` and `format` are required.
+
+For supported `format_options`, see the [Auto Loader options](https://docs.databricks.com/aws/en/ingestion/cloud-object-storage/auto-loader/options). Not all options are supported here. If unsure, specify only `name` and `format`, or follow [Debug Table Issues](#debug-table-issues) to discover the correct options.
+
+```json
 [
   {
     "name": "table1",
     "format": "csv",
-    "format_options": {
-      "escape": "\""
-    },
+    "format_options": { "escape": "\\"" },
     "schema_hints": "id int, name string"
   },
   {
     "name": "table2",
     "format": "json"
   }
-  ,
-  ...
+  // ...
 ]
-
 ```
 
-### Step 2. Deploy & setup
-```
-$ cd dab
-$ databricks bundle deploy
-$ databricks bundle run configuration_job
+> **Tip:** Keep `schema_hints` minimal; Auto Loader can evolve the schema as new columns appear.
+
+### Step 2. Deploy & set up
+
+```bash
+cd dab
+_databricks bundle deploy
+_databricks bundle run configuration_job
 ```
-Wait for the configuration job to finish before moving to the next step.
+
+Wait for the configuration job to finish before moving on.
 
 ### Step 3. Retrieve endpoint & push files
-Get the volume path for uploading the files
-```
-$ databricks tables get main.filepushschema.table1 --output json | jq '.properties["filepush.table_volume_path_data"]'
+Fetch the volume path for uploading files to a specific table (example: `table1`):
+
+```bash
+databricks tables get main.filepushschema.table1 --output json \
+  | jq -r '.properties["filepush.table_volume_path_data"]'
 ```
+
 Example output:
-```
+
+```text
 "/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1"
 ```
-Upload files to the path above using the [UC Volume APIs of your choice](https://docs.databricks.com/aws/en/volumes/volume-files#methods-for-managing-files-in-volumes). Here is an example using the **REST API**:
-```
-$ curl --request PUT https://<workspace-url>/api/2.0/fs/files/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1/datafile1.csv" \
-    --header "Authorization: Bearer <PAT>" \
-    --header "Content-Type: application/octet-stream \
-    --data-binary "@/local/file/path/datafile1.csv"
-```
-Here is another example using the **Databricks CLI**. This way you do not need to specify the file name at destination. Pay attention to the `dbfs:` URL scheme for the destination path:
+
+Upload files to the path above using any of the [Volumes file APIs](https://docs.databricks.com/aws/en/volumes/volume-files#methods-for-managing-files-in-volumes).
+
+**REST API example**:
+
+```bash
+# prerequisites: export DATABRICKS_HOST and DATABRICKS_TOKEN
+curl -X PUT "$DATABRICKS_HOST/api/2.0/fs/files/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1/datafile1.csv" \
+  -H "Authorization: Bearer $DATABRICKS_TOKEN" \
+  -H "Content-Type: application/octet-stream" \
+  --data-binary @"/local/file/path/datafile1.csv"
 ```
-$ databricks fs cp /local/file/path/datafile1.csv dbfs:/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1
+
+**Databricks CLI example** (destination uses the `dbfs:` scheme):
+
+```bash
+databricks fs cp /local/file/path/datafile1.csv \
+  dbfs:/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1
 ```
 
-After maximum 1 minute, the data should land the corresponding table e.g. `main.filepushschema.table1`
+Within about a minute, the data should appear in the table `main.filepushschema.table1`.
+
+---
 
 ## Debug Table Issues
-In case the data is not parsed correctly in the destination table, follow the steps below to fix the table configs.
+If data isn’t parsed as expected, use **dev mode** to iterate on table options safely.
+
 ### Step 1. Configure tables to debug
-Configure tables just like [Step 1 in Quick Start](#step-1-configure-tables).
+Configure tables as in [Step 1 of Quick Start](#step-1-configure-tables).
 
-### Step 2. Deploy & Setup in ***dev mode***
-```
-$ cd dab
-$ databricks bundle deploy -t dev
-$ databricks bundle run configuration_job -t dev
-```
-Wait for the configuration job to finish before moving to the next step. Example output:
+### Step 2. Deploy & set up in **dev mode**
+
+```bash
+cd dab
+databricks bundle deploy -t dev
+databricks bundle run configuration_job -t dev
 ```
+
+Wait for the configuration job to finish. Example output:
+
+```text
 2025-09-23 22:03:04,938 [INFO] initialization - ==========
 catalog_name: main
 schema_name: dev_chi_yang_filepushschema
@@ -103,28 +137,44 @@ volume_path_data: /Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_
 volume_path_archive: /Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume/archive
 ==========
 ```
-Pay attention that, ***dev mode put a prefix to the schema name***, and you should use the name output by the initialization job for the remaining steps.
+
+> **Note:** In **dev mode**, the schema name is **prefixed**. Use the printed schema name for the remaining steps.
 
 ### Step 3. Retrieve endpoint & push files to debug
-Get the volume path for uploading the files, pay attention to the ***prefix*** name of the schema:
-```
-$ databricks tables get main.dev_chi_yang_filepushschema.table1 --output json | jq '.properties["filepush.table_volume_path_data"]'
+Get the dev volume path (note the prefixed schema):
+
+```bash
+databricks tables get main.dev_chi_yang_filepushschema.table1 --output json \
+  | jq -r '.properties["filepush.table_volume_path_data"]'
 ```
+
 Example output:
-```
+
+```text
 "/Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume/data/table1"
 ```
-Follow the remaining steps of [Step 3 in Quick Start](#step-3-retrieve-endpoint--push-files) to push files for debug.
+
+Then follow the upload instructions from [Quick Start → Step 3](#step-3-retrieve-endpoint--push-files) to send test files.
 
 ### Step 4. Debug table configs
-Open the `refresh_pipeline` in the workspace:
-```
-$ databricks bundle open refresh_pipeline -t dev
+Open the pipeline in the workspace:
+
+```bash
+databricks bundle open refresh_pipeline -t dev
 ```
-Then click `Edit pipeline` to launch the development UI. Open the notebook `debug_table_config` and follow the instruction there to fix the table configs. Remember to copy over the config to the table configs in `./dab/src/configs/tables.json`.
+
+Click **Edit pipeline** to launch the development UI. Open the `debug_table_config` notebook and follow its guidance to refine the table options. When satisfied, copy the final config back to `./dab/src/configs/tables.json`.
 
 ### Step 5. Fix the table configs in production
-Go though [Step 2 in Quick Start](#step-2-deploy--setup) to deploy the updated config, then issue a full-refresh to fix the problematic data in the table:
-```
-$ databricks bundle run refresh_pipeline --full-refresh table1
+Redeploy the updated config and run a full refresh to correct existing data for an affected table:
+
+```bash
+cd dab
+databricks bundle deploy
+databricks bundle run refresh_pipeline --full-refresh table1
 ```
+
+---
+
+**That’s it!** You now have a managed file‑push workflow with debuggable table configs and repeatable deployments.
+

From 94b394af0733f799fece36bb5e26665b10c2fd72 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 16:32:12 -0700
Subject: [PATCH 56/60] Fix a display issue in README

---
 filepush/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/filepush/README.md b/filepush/README.md
index 769121f8..9ab7e2e5 100644
--- a/filepush/README.md
+++ b/filepush/README.md
@@ -52,14 +52,13 @@ For supported `format_options`, see the [Auto Loader options](https://docs.datab
   {
     "name": "table1",
     "format": "csv",
-    "format_options": { "escape": "\\"" },
+    "format_options": { "escape": "\"" },
     "schema_hints": "id int, name string"
   },
   {
     "name": "table2",
     "format": "json"
   }
-  // ...
 ]
 ```
 

From 9003a6c585b3564ca03b39e660567d45ce88e981 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 16:36:38 -0700
Subject: [PATCH 57/60] newline

---
 filepush/dab/src/utils/tablemanager.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/filepush/dab/src/utils/tablemanager.py b/filepush/dab/src/utils/tablemanager.py
index 33376ded..5aaf9a55 100644
--- a/filepush/dab/src/utils/tablemanager.py
+++ b/filepush/dab/src/utils/tablemanager.py
@@ -94,4 +94,5 @@ def get_placeholder_df_with_config(spark: SparkSession, table_config: dict) -> D
   reader = spark.readStream.format("cloudFiles")
   reader = _apply_table_options(reader, table_config, fmt_mgr).schema(fmt_mgr.get_default_schema())
 
-  return reader.load(get_table_volume_path(table_config.get("name")))
\ No newline at end of file
+  return reader.load(get_table_volume_path(table_config.get("name")))
+  
\ No newline at end of file

From 4df63801f150f388a3ed8b0d662d4cc60b96bdb0 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Tue, 23 Sep 2025 16:41:33 -0700
Subject: [PATCH 58/60] Update example name

---
 filepush/dab/src/configs/tables.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/filepush/dab/src/configs/tables.json b/filepush/dab/src/configs/tables.json
index 98c4591f..3926a1bc 100644
--- a/filepush/dab/src/configs/tables.json
+++ b/filepush/dab/src/configs/tables.json
@@ -1,6 +1,6 @@
 [
   {
-    "name": "employees",
+    "name": "example_table",
     "format": "csv",
     "format_options": {
       "escape": "\""

From 2f55c3d276a69190ac43e47be42741ce2ef13bed Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Wed, 24 Sep 2025 09:40:46 -0700
Subject: [PATCH 59/60] Fix typo in doc and enrich instructions

---
 filepush/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/filepush/README.md b/filepush/README.md
index 9ab7e2e5..96cf9ec6 100644
--- a/filepush/README.md
+++ b/filepush/README.md
@@ -68,8 +68,8 @@ For supported `format_options`, see the [Auto Loader options](https://docs.datab
 
 ```bash
 cd dab
-_databricks bundle deploy
-_databricks bundle run configuration_job
+databricks bundle deploy
+databricks bundle run configuration_job
 ```
 
 Wait for the configuration job to finish before moving on.
@@ -93,7 +93,7 @@ Upload files to the path above using any of the [Volumes file APIs](https://docs
 **REST API example**:
 
 ```bash
-# prerequisites: export DATABRICKS_HOST and DATABRICKS_TOKEN
+# prerequisites: export DATABRICKS_HOST and DATABRICKS_TOKEN (PAT token)
 curl -X PUT "$DATABRICKS_HOST/api/2.0/fs/files/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1/datafile1.csv" \
   -H "Authorization: Bearer $DATABRICKS_TOKEN" \
   -H "Content-Type: application/octet-stream" \

From ce62a8b5328d1424fad2a756782897cce8fb4355 Mon Sep 17 00:00:00 2001
From: chi-yang-db <chi.yang@databricks.com>
Date: Mon, 29 Sep 2025 14:44:33 -0700
Subject: [PATCH 60/60] Add more comments

---
 filepush/README.md          | 4 ++--
 filepush/dab/databricks.yml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/filepush/README.md b/filepush/README.md
index 96cf9ec6..64750f30 100644
--- a/filepush/README.md
+++ b/filepush/README.md
@@ -85,7 +85,7 @@ databricks tables get main.filepushschema.table1 --output json \
 Example output:
 
 ```text
-"/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1"
+/Volumes/main/filepushschema/main_filepushschema_filepush_volume/data/table1
 ```
 
 Upload files to the path above using any of the [Volumes file APIs](https://docs.databricks.com/aws/en/volumes/volume-files#methods-for-managing-files-in-volumes).
@@ -150,7 +150,7 @@ databricks tables get main.dev_chi_yang_filepushschema.table1 --output json \
 Example output:
 
 ```text
-"/Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume/data/table1"
+/Volumes/main/dev_chi_yang_filepushschema/main_filepushschema_filepush_volume/data/table1
 ```
 
 Then follow the upload instructions from [Quick Start → Step 3](#step-3-retrieve-endpoint--push-files) to send test files.
diff --git a/filepush/dab/databricks.yml b/filepush/dab/databricks.yml
index bed0f416..c9c1729b 100644
--- a/filepush/dab/databricks.yml
+++ b/filepush/dab/databricks.yml
@@ -26,8 +26,8 @@ targets:
 
 variables:
   catalog_name:
-    description: The existing catalog where the schema will be created.
-    default: main
+    description: The existing catalog where the NEW schema will be created.
+    default: chi_catalog
   schema_name:
     description: The name of the NEW schema where the tables will be created.
     default: filepushschema