diff --git a/.vscode/settings.json b/.vscode/settings.json
index 236981053..878eab807 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -3,7 +3,7 @@
"editor.cursorBlinking": "solid",
"editor.fontFamily": "ui-monospace, Menlo, Monaco, 'Cascadia Mono', 'Segoe UI Mono', 'Roboto Mono', 'Oxygen Mono', 'Ubuntu Monospace', 'Source Code Pro', 'Fira Mono', 'Droid Sans Mono', 'Courier New', monospace",
"editor.fontLigatures": false,
- "editor.fontSize": 22,
+ "editor.fontSize": 12,
"editor.formatOnPaste": true,
"editor.formatOnSave": true,
"editor.lineNumbers": "on",
@@ -16,8 +16,7 @@
"explorer.openEditors.visible": 0,
"files.autoSave": "afterDelay",
"screencastMode.onlyKeyboardShortcuts": true,
- "terminal.integrated.fontSize": 18,
- "workbench.activityBar.visible": true,
+ "terminal.integrated.fontSize": 12,
"workbench.colorTheme": "Visual Studio Dark",
"workbench.fontAliasing": "antialiased",
"workbench.statusBar.visible": true
diff --git a/assets/tutorial_files/models/docs/schema.yml b/assets/tutorial_files/models/docs/schema.yml
index c1dfcb9fc..1d0f8b2d9 100644
--- a/assets/tutorial_files/models/docs/schema.yml
+++ b/assets/tutorial_files/models/docs/schema.yml
@@ -11,15 +11,11 @@ models:
- name: all_other_areas
description: '{{ doc("all_other_areas") }}'
- - name: bronze_parking_violations
+ - name: bronze_parking_violations
description: Raw data related to parking violations in 2023, encompassing various details about each violation.
columns:
- name: summons_number
description: '{{ doc("summons_number") }}'
- tests:
- - unique
- - not_null
- - generic_not_null
- name: registration_state
description: '{{ doc("registration_state") }}'
- name: plate_type
diff --git a/data/nyc_parking_violations.db b/data/nyc_parking_violations.db
index 19e587528..81f7cd1f2 100644
Binary files a/data/nyc_parking_violations.db and b/data/nyc_parking_violations.db differ
diff --git a/data/prod_nyc_parking_violations.db b/data/prod_nyc_parking_violations.db
index 9a2bd4ad7..008479de5 100644
Binary files a/data/prod_nyc_parking_violations.db and b/data/prod_nyc_parking_violations.db differ
diff --git a/my_database.duckdb b/my_database.duckdb
new file mode 100644
index 000000000..4965164bb
Binary files /dev/null and b/my_database.duckdb differ
diff --git a/nyc_parking_violations/.gitignore b/nyc_parking_violations/.gitignore
index 9a7943d2d..49f147cb9 100644
--- a/nyc_parking_violations/.gitignore
+++ b/nyc_parking_violations/.gitignore
@@ -1,4 +1,4 @@
-.user.yml
+
target/
dbt_packages/
logs/
diff --git a/nyc_parking_violations/.user.yml b/nyc_parking_violations/.user.yml
new file mode 100644
index 000000000..665b03e87
--- /dev/null
+++ b/nyc_parking_violations/.user.yml
@@ -0,0 +1 @@
+id: 57e9178d-61d6-4af0-b0a1-75dab50afcea
diff --git a/nyc_parking_violations/dbt_project.yml b/nyc_parking_violations/dbt_project.yml
index ec48605d3..abab4f061 100644
--- a/nyc_parking_violations/dbt_project.yml
+++ b/nyc_parking_violations/dbt_project.yml
@@ -1,13 +1,12 @@
-
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
-name: 'nyc_parking_violations'
-version: '1.0.0'
+name: "nyc_parking_violations"
+version: "1.0.0"
config-version: 2
# This setting configures which "profile" dbt uses for this project.
-profile: 'nyc_parking_violations'
+profile: "nyc_parking_violations"
# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
@@ -19,17 +18,22 @@ seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
-clean-targets: # directories to be removed by `dbt clean`
+clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
-
# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
# In this example config, we tell dbt to build all models in the example/
# directory as views. These settings can be overridden in the individual model
# files using the `{{ config(...) }}` macro.
+# models:
+# nyc_parking_violations:
+# # Config indicated by + and applies to all files under models/example/
+# example:
+# +materialized: view
+
models:
nyc_parking_violations:
# Config indicated by + and applies to all files under models/example/
diff --git a/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql b/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql
index 217a5f263..4074ab031 100644
--- a/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql
+++ b/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql
@@ -4,4 +4,4 @@ SELECT
manhattan_96th_st_below,
all_other_areas
FROM
- parking_violation_codes
+ parking_violation_codes
\ No newline at end of file
diff --git a/nyc_parking_violations/models/docs/docs_blocks.md b/nyc_parking_violations/models/docs/docs_block.md
similarity index 100%
rename from nyc_parking_violations/models/docs/docs_blocks.md
rename to nyc_parking_violations/models/docs/docs_block.md
diff --git a/nyc_parking_violations/models/docs/schema.yml b/nyc_parking_violations/models/docs/schema.yml
index c1dfcb9fc..ba62ad7b6 100644
--- a/nyc_parking_violations/models/docs/schema.yml
+++ b/nyc_parking_violations/models/docs/schema.yml
@@ -11,7 +11,7 @@ models:
- name: all_other_areas
description: '{{ doc("all_other_areas") }}'
- - name: bronze_parking_violations
+ - name: bronze_parking_violations
description: Raw data related to parking violations in 2023, encompassing various details about each violation.
columns:
- name: summons_number
diff --git a/nyc_parking_violations/models/example/first_model.sql b/nyc_parking_violations/models/example/first_model.sql
index 942e472df..e89f7f0f8 100644
--- a/nyc_parking_violations/models/example/first_model.sql
+++ b/nyc_parking_violations/models/example/first_model.sql
@@ -1 +1,2 @@
-SELECT * FROM parking_violation_codes
\ No newline at end of file
+SELECT *
+FROM parking_violation_codes
diff --git a/nyc_parking_violations/models/example/ref_model.sql b/nyc_parking_violations/models/example/ref_model.sql
index 9bb4f51c6..8b3adac88 100644
--- a/nyc_parking_violations/models/example/ref_model.sql
+++ b/nyc_parking_violations/models/example/ref_model.sql
@@ -1,4 +1,2 @@
-SELECT
- COUNT(*)
-FROM
- {{ref('first_model')}}
+SELECT count(*)
+FROM {{ ref('first_model') }}
\ No newline at end of file
diff --git a/nyc_parking_violations/models/gold/gold_vehicles_metrics.sql b/nyc_parking_violations/models/gold/gold_vehicle_metrics.sql
similarity index 100%
rename from nyc_parking_violations/models/gold/gold_vehicles_metrics.sql
rename to nyc_parking_violations/models/gold/gold_vehicle_metrics.sql
diff --git a/nyc_parking_violations/profiles.yml b/nyc_parking_violations/profiles.yml
index cabe18c54..88545bab0 100644
--- a/nyc_parking_violations/profiles.yml
+++ b/nyc_parking_violations/profiles.yml
@@ -1,12 +1,9 @@
nyc_parking_violations:
outputs:
- dev:
- type: duckdb
- path: '../data/nyc_parking_violations.db'
- prod:
- type: duckdb
- # note that path is slightly different as GitHub actions
- # start in the root directory and not in the
- # nyc_parking_violations directory
- path: './data/prod_nyc_parking_violations.db'
- target: dev
\ No newline at end of file
+ dev:
+ type: duckdb
+ path: ../data/nyc_parking_violations.db
+ prod:
+ type: duckdb
+ path: ./data/prod_nyc_parking_violations.db
+ target: dev
diff --git a/nyc_parking_violations/tests/generic/generic_not_null.sql b/nyc_parking_violations/tests/generic/generic_not_null.sql
index e49c2b087..25843a106 100644
--- a/nyc_parking_violations/tests/generic/generic_not_null.sql
+++ b/nyc_parking_violations/tests/generic/generic_not_null.sql
@@ -1,8 +1,7 @@
--- source: https://docs.getdbt.com/guides/best-practices/writing-custom-generic-tests#generic-tests-with-default-config-values
{% test generic_not_null(model, column_name) %}
- select *
- from {{ model }}
- where {{ column_name }} is null
+select *
+from {{ model }}
+where {{ column_name }} is null
-{% endtest %}
\ No newline at end of file
+{% endtest %}
diff --git a/nyc_parking_violations/tests/violation_codes_revenue.sql b/nyc_parking_violations/tests/violation_codes_revenue.sql
index 091ddfdd4..2dde94e7c 100644
--- a/nyc_parking_violations/tests/violation_codes_revenue.sql
+++ b/nyc_parking_violations/tests/violation_codes_revenue.sql
@@ -1,11 +1,10 @@
-{{ config(severity = 'warn') }}
-
-SELECT
- violation_code,
- SUM(fee_usd) AS total_revenue_usd
+{{config(severity='warn')}}
+SELECT
+violation_code,
+sum(fee_usd) AS total_revenue_usd
FROM
- {{ref('silver_parking_violation_codes')}}
+{{ref('silver_parking_violation_codes')}}
GROUP BY
- violation_code
+violation_code
HAVING
- NOT(total_revenue_usd >= 1)
+NOT(total_revenue_usd >= 1)
\ No newline at end of file
diff --git a/run_sql_queries_here.ipynb b/run_sql_queries_here.ipynb
index f7c54e6c9..24bfb07a2 100644
--- a/run_sql_queries_here.ipynb
+++ b/run_sql_queries_here.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -12,9 +12,116 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " bronze_parking_violation_codes | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bronze_parking_violations | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " first_model | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " gold_ticket_metrics | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " gold_vehicle_metrics | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " my_first_dbt_model | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " my_second_dbt_model | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " parking_violation_codes | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " parking_violations_2023 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " ref_model | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " silver_parking_violation_codes | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " silver_parking_violations | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " silver_violation_tickets | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " silver_violation_vehicles | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name\n",
+ "0 bronze_parking_violation_codes\n",
+ "1 bronze_parking_violations\n",
+ "2 first_model\n",
+ "3 gold_ticket_metrics\n",
+ "4 gold_vehicle_metrics\n",
+ "5 my_first_dbt_model\n",
+ "6 my_second_dbt_model\n",
+ "7 parking_violation_codes\n",
+ "8 parking_violations_2023\n",
+ "9 ref_model\n",
+ "10 silver_parking_violation_codes\n",
+ "11 silver_parking_violations\n",
+ "12 silver_violation_tickets\n",
+ "13 silver_violation_vehicles"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"sql_query = '''\n",
"show tables\n",
@@ -23,11 +130,397 @@
"with duckdb.connect('data/nyc_parking_violations.db') as con:\n",
" display(con.sql(sql_query).df())"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import duckdb\n",
+ "\n",
+ "# Connect to your DuckDB database\n",
+ "db_path = \"data/nyc_parking_violations.db\"\n",
+ "with duckdb.connect(db_path) as con:\n",
+ " # Get all tables and views from the main schema\n",
+ " objects = con.execute(\"\"\"\n",
+ " SELECT table_name, table_type \n",
+ " FROM information_schema.tables \n",
+ " WHERE table_schema = 'main'\n",
+ " \"\"\").fetchall()\n",
+ "\n",
+ " # Drop each table or view\n",
+ " for table_name, table_type in objects:\n",
+ " if table_type.upper() == 'VIEW':\n",
+ " con.execute(f\"DROP VIEW IF EXISTS {table_name}\")\n",
+ " elif table_type.upper() == 'BASE TABLE':\n",
+ " con.execute(f\"DROP TABLE IF EXISTS {table_name}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sql_query_import_1 = '''\n",
+ "CREATE OR REPLACE TABLE parking_violation_codes AS\n",
+ "SELECT *\n",
+ "from read_csv_auto('data/dof_parking_violation_codes.csv', \n",
+ "normalize_names=True)\n",
+ "'''\n",
+ "\n",
+ "sql_query_import_2 = '''\n",
+ "CREATE OR REPLACE TABLE parking_violations_2023 AS\n",
+ "SELECT *\n",
+ "from read_csv_auto('data/parking_violations_issued_fiscal_year_2023_sample.csv',\n",
+ "normalize_names=True)\n",
+ "'''\n",
+ "\n",
+ "with duckdb.connect('data/nyc_parking_violations.db') as con:\n",
+ " con.sql(sql_query_import_1)\n",
+ " con.sql(sql_query_import_2)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " registration_state | \n",
+ " ticket_count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " NJ | \n",
+ " 9258 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " PA | \n",
+ " 3514 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " FL | \n",
+ " 2414 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " registration_state ticket_count\n",
+ "0 NJ 9258\n",
+ "1 PA 3514\n",
+ "2 FL 2414"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sql_query = '''\n",
+ "select * from gold_vehicle_metrics limit 3\n",
+ "'''\n",
+ "\n",
+ "with duckdb.connect('data/nyc_parking_violations.db') as con:\n",
+ " display(con.sql(sql_query).df())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "┌────────────────┬───────────────────┐\n",
+ "│ violation_code │ total_revenue_usd │\n",
+ "│ int64 │ int128 │\n",
+ "├────────────────┼───────────────────┤\n",
+ "│ 41 │ 0 │\n",
+ "└────────────────┴───────────────────┘"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sql_query = '''\n",
+ "select * from \"nyc_parking_violations\".\"main_dbt_test__audit\".\"violation_codes_revenue\"\n",
+ "'''\n",
+ "\n",
+ "with duckdb.connect('data/nyc_parking_violations.db') as con:\n",
+ " display(con.sql(sql_query))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sql_query_import_1 = '''\n",
+ "CREATE OR REPLACE TABLE parking_violation_codes AS\n",
+ "SELECT *\n",
+ "from read_csv_auto('data/dof_parking_violation_codes.csv', \n",
+ "normalize_names=True)\n",
+ "'''\n",
+ "\n",
+ "sql_query_import_2 = '''\n",
+ "CREATE OR REPLACE TABLE parking_violations_2023 AS\n",
+ "SELECT *\n",
+ "from read_csv_auto('data/parking_violations_issued_fiscal_year_2023_sample.csv',\n",
+ "normalize_names=True)\n",
+ "'''\n",
+ "\n",
+ "with duckdb.connect('data/prod_nyc_parking_violations.db') as con:\n",
+ " con.sql(sql_query_import_1)\n",
+ " con.sql(sql_query_import_2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " bronze_parking_violation_codes | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bronze_parking_violations | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " gold_ticket_metrics | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " gold_vehicle_metrics | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " gold_vehicles_metrics | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " parking_violation_codes | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " parking_violations_2023 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " silver_violation_tickets | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " silver_violation_vehicles | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name\n",
+ "0 bronze_parking_violation_codes\n",
+ "1 bronze_parking_violations\n",
+ "2 gold_ticket_metrics\n",
+ "3 gold_vehicle_metrics\n",
+ "4 gold_vehicles_metrics\n",
+ "5 parking_violation_codes\n",
+ "6 parking_violations_2023\n",
+ "7 silver_violation_tickets\n",
+ "8 silver_violation_vehicles"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sql_query = '''\n",
+ "show tables\n",
+ "'''\n",
+ "\n",
+ "with duckdb.connect('data/prod_nyc_parking_violations.db') as con:\n",
+ " display(con.sql(sql_query).df())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 97 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " count\n",
+ "0 97"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sql_query = '''\n",
+ "select count(*) as count\n",
+ "from prod_nyc_parking_violations.bronze_parking_violation_codes\n",
+ "'''\n",
+ "\n",
+ "with duckdb.connect('data/prod_nyc_parking_violations.db') as con:\n",
+ " display(con.sql(sql_query).df())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 76 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " count\n",
+ "0 76"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sql_query = '''\n",
+ "select count(*) as count\n",
+ "from prod_nyc_parking_violations.gold_ticket_metrics\n",
+ "'''\n",
+ "\n",
+ "with duckdb.connect('data/prod_nyc_parking_violations.db') as con:\n",
+ " display(con.sql(sql_query).df())"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "venv11",
"language": "python",
"name": "python3"
},
@@ -41,14 +534,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "version": "3.11.11"
},
- "orig_nbformat": 4,
- "vscode": {
- "interpreter": {
- "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
- }
- }
+ "orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2