diff --git a/.vscode/settings.json b/.vscode/settings.json index 236981053..ca20f9585 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -17,7 +17,6 @@ "files.autoSave": "afterDelay", "screencastMode.onlyKeyboardShortcuts": true, "terminal.integrated.fontSize": 18, - "workbench.activityBar.visible": true, "workbench.colorTheme": "Visual Studio Dark", "workbench.fontAliasing": "antialiased", "workbench.statusBar.visible": true diff --git a/data/nyc_parking_violations.db b/data/nyc_parking_violations.db index 19e587528..615dec769 100644 Binary files a/data/nyc_parking_violations.db and b/data/nyc_parking_violations.db differ diff --git a/data/prod_nyc_parking_violations.db b/data/prod_nyc_parking_violations.db index 9a2bd4ad7..84c4f6bf3 100644 Binary files a/data/prod_nyc_parking_violations.db and b/data/prod_nyc_parking_violations.db differ diff --git a/nyc_parking_violations/.gitignore b/nyc_parking_violations/.gitignore index 9a7943d2d..49f147cb9 100644 --- a/nyc_parking_violations/.gitignore +++ b/nyc_parking_violations/.gitignore @@ -1,4 +1,4 @@ -.user.yml + target/ dbt_packages/ logs/ diff --git a/nyc_parking_violations/.user.yml b/nyc_parking_violations/.user.yml new file mode 100644 index 000000000..af81a6ec4 --- /dev/null +++ b/nyc_parking_violations/.user.yml @@ -0,0 +1 @@ +id: f536095e-1e73-4858-b08e-1ceabadf31b1 diff --git a/nyc_parking_violations/dbt_project.yml b/nyc_parking_violations/dbt_project.yml index ec48605d3..12edc44f1 100644 --- a/nyc_parking_violations/dbt_project.yml +++ b/nyc_parking_violations/dbt_project.yml @@ -41,12 +41,12 @@ models: silver_parking_violation_codes: +materialized: ephemeral silver_parking_violations: - +materialized: ephemeral + +materialized: ephemeral silver_violation_tickets: - +materialized: view + +materialized: view silver_violation_vehicles: +materialized: view gold: - +materialized: table + +materialized: table tests: - +store_failures: true + +store_failures: true \ No newline at end of file diff --git a/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql b/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql index 217a5f263..3b0e1339f 100644 --- a/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql +++ b/nyc_parking_violations/models/bronze/bronze_parking_violation_codes.sql @@ -1,7 +1,7 @@ -SELECT - code AS violation_code, - definition, - manhattan_96th_st_below, - all_other_areas -FROM - parking_violation_codes +SELECT + code as violation_code + ,definition + ,manhattan_96th_st_below + ,all_other_areas +FROM + parking_violation_codes \ No newline at end of file diff --git a/nyc_parking_violations/models/bronze/bronze_parking_violations.sql b/nyc_parking_violations/models/bronze/bronze_parking_violations.sql index 46525da2a..c50fce7d2 100644 --- a/nyc_parking_violations/models/bronze/bronze_parking_violations.sql +++ b/nyc_parking_violations/models/bronze/bronze_parking_violations.sql @@ -20,4 +20,4 @@ SELECT vehicle_color, vehicle_year, FROM - parking_violations_2023 \ No newline at end of file + parking_violation_2023 \ No newline at end of file diff --git a/nyc_parking_violations/models/docs/docs_blocks.md b/nyc_parking_violations/models/docs/docs_block.md similarity index 99% rename from nyc_parking_violations/models/docs/docs_blocks.md rename to nyc_parking_violations/models/docs/docs_block.md index cd410941a..aa403dff5 100644 --- a/nyc_parking_violations/models/docs/docs_blocks.md +++ b/nyc_parking_violations/models/docs/docs_block.md @@ -104,4 +104,4 @@ The total number of tickets issued for a specific violation code. {% docs total_revenue_usd %} The total revenue accumulated from tickets, based on the violation code. This sum is represented in USD. -{% enddocs %} +{% enddocs %} \ No newline at end of file diff --git a/nyc_parking_violations/models/docs/schema.yml b/nyc_parking_violations/models/docs/schema.yml index c1dfcb9fc..337eb0ccf 100644 --- a/nyc_parking_violations/models/docs/schema.yml +++ b/nyc_parking_violations/models/docs/schema.yml @@ -179,10 +179,10 @@ models: - name: total_revenue_usd description: '{{ doc("total_revenue_usd") }}' - - name: gold_vehicles_metrics + - name: gold_vehicle_metrics description: "Aggregated metrics detailing the number of tickets per vehicle, identified by the plate ID." columns: - name: registration_state description: '{{ doc("registration_state") }}' - name: ticket_count - description: '{{ doc("ticket_count") }}' + description: '{{ doc("ticket_count") }}' \ No newline at end of file diff --git a/nyc_parking_violations/models/example/ref_model.sql b/nyc_parking_violations/models/example/ref_model.sql index 9bb4f51c6..64b53a444 100644 --- a/nyc_parking_violations/models/example/ref_model.sql +++ b/nyc_parking_violations/models/example/ref_model.sql @@ -1,4 +1,2 @@ -SELECT - COUNT(*) -FROM - {{ref('first_model')}} +SELECT COUNT(*) +FROM {{ref('first_model')}} \ No newline at end of file diff --git a/nyc_parking_violations/models/gold/gold_vehicles_metrics.sql b/nyc_parking_violations/models/gold/gold_vehicle_metrics.sql similarity index 100% rename from nyc_parking_violations/models/gold/gold_vehicles_metrics.sql rename to nyc_parking_violations/models/gold/gold_vehicle_metrics.sql diff --git a/nyc_parking_violations/profiles.yml b/nyc_parking_violations/profiles.yml index cabe18c54..3d89d2f1e 100644 --- a/nyc_parking_violations/profiles.yml +++ b/nyc_parking_violations/profiles.yml @@ -8,5 +8,5 @@ nyc_parking_violations: # note that path is slightly different as GitHub actions # start in the root directory and not in the # nyc_parking_violations directory - path: './data/prod_nyc_parking_violations.db' + path: '../data/prod_nyc_parking_violations.db' target: dev \ No newline at end of file diff --git a/nyc_parking_violations/tests/violation_codes_revenue.sql b/nyc_parking_violations/tests/violation_codes_revenue.sql index 091ddfdd4..8e72b97ea 100644 --- a/nyc_parking_violations/tests/violation_codes_revenue.sql +++ b/nyc_parking_violations/tests/violation_codes_revenue.sql @@ -1,11 +1,9 @@ -{{ config(severity = 'warn') }} - -SELECT - violation_code, - SUM(fee_usd) AS total_revenue_usd -FROM - {{ref('silver_parking_violation_codes')}} -GROUP BY - violation_code -HAVING - NOT(total_revenue_usd >= 1) +{{config(severity = 'warn')}} +SELECT violation_code + ,sum(fee_usd) as total_revenue_usd +FROM + {{ref('silver_parking_violation_codes')}} +GROUP BY + violation_code +HAVING + NOT((total_revenue_usd >=1)) diff --git a/run_sql_queries_here.ipynb b/run_sql_queries_here.ipynb index f7c54e6c9..6fe06381b 100644 --- a/run_sql_queries_here.ipynb +++ b/run_sql_queries_here.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -12,17 +12,224 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
violation_codetotal_revenue_usd
0410.0
\n", + "
" + ], + "text/plain": [ + " violation_code total_revenue_usd\n", + "0 41 0.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql_query = '''\n", - "show tables\n", + " select * from \"nyc_parking_violations\".\"main_dbt_test__audit\".\"violation_codes_revenue\"\n", + " \n", "'''\n", "\n", "with duckdb.connect('data/nyc_parking_violations.db') as con:\n", " display(con.sql(sql_query).df())" ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "sql_query_import_1= '''\n", + "CREATE OR REPLACE TABLE parking_violation_codes AS\n", + "SELECT * \n", + "FROM read_csv_auto(\n", + " 'data/dof_parking_violation_codes.csv',\n", + " normalize_names = True\n", + ")\n", + "'''\n", + "\n", + "sql_query_import_2= '''\n", + "CREATE OR REPLACE TABLE parking_violation_2023 AS\n", + "SELECT * \n", + "FROM read_csv_auto(\n", + " 'data/parking_violations_issued_fiscal_year_2023_sample.csv',\n", + " normalize_names = True\n", + ")\n", + "'''\n", + "\n", + "with duckdb.connect('data/nyc_parking_violations.db') as con:\n", + " con.sql(sql_query_import_1)\n", + " con.sql(sql_query_import_2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sql_query_import_1= '''\n", + "CREATE OR REPLACE TABLE parking_violation_codes AS\n", + "SELECT * \n", + "FROM read_csv_auto(\n", + " 'data/dof_parking_violation_codes.csv',\n", + " normalize_names = True\n", + ")\n", + "'''\n", + "\n", + "sql_query_import_2= '''\n", + "CREATE OR REPLACE TABLE parking_violation_2023 AS\n", + "SELECT * \n", + "FROM read_csv_auto(\n", + " 'data/parking_violations_issued_fiscal_year_2023_sample.csv',\n", + " normalize_names = True\n", + ")\n", + "'''\n", + "\n", + "with duckdb.connect('data/prod_nyc_parking_violations.db') as con:\n", + " con.sql(sql_query_import_1)\n", + " con.sql(sql_query_import_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
name
0bronze_parking_violation_codes
1bronze_parking_violations
2gold_ticket_metrics
3gold_vehicles_metrics
4parking_violation_2023
5parking_violation_codes
6parking_violations_2023
7silver_violation_tickets
8silver_violation_vehicles
\n", + "
" + ], + "text/plain": [ + " name\n", + "0 bronze_parking_violation_codes\n", + "1 bronze_parking_violations\n", + "2 gold_ticket_metrics\n", + "3 gold_vehicles_metrics\n", + "4 parking_violation_2023\n", + "5 parking_violation_codes\n", + "6 parking_violations_2023\n", + "7 silver_violation_tickets\n", + "8 silver_violation_vehicles" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sql_query = '''\n", + " show tables\n", + " \n", + "'''\n", + "\n", + "with duckdb.connect('data/prod_nyc_parking_violations.db') as con:\n", + " display(con.sql(sql_query).df())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {