From 12cbd1676c7feaaa7fce7f6bf32cf85c432f8224 Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Tue, 28 Oct 2025 23:48:53 +0000 Subject: [PATCH 1/4] agency grain census data --- .../01_agency_grain_census.ipynb | 1230 ++++++++++++++++- 1 file changed, 1208 insertions(+), 22 deletions(-) diff --git a/transit_provider_dashboard/01_agency_grain_census.ipynb b/transit_provider_dashboard/01_agency_grain_census.ipynb index c4b990934..bbe2f6f14 100644 --- a/transit_provider_dashboard/01_agency_grain_census.ipynb +++ b/transit_provider_dashboard/01_agency_grain_census.ipynb @@ -3,18 +3,79 @@ { "cell_type": "code", "execution_count": 1, + "id": "873ef61b-098a-49da-9c28-2667af4ffd64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: shared_utils in /opt/conda/lib/python3.11/site-packages (4.2)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install shared_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "id": "0cdda776-857c-4e47-8ce8-940bfc49bb29", "metadata": { "tags": [] }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pygris in /opt/conda/lib/python3.11/site-packages (0.2.0)\n", + "Requirement already satisfied: geopandas>=0.9 in /opt/conda/lib/python3.11/site-packages (from pygris) (0.14.4)\n", + "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from pygris) (2.32.5)\n", + "Requirement already satisfied: appdirs in /opt/conda/lib/python3.11/site-packages (from pygris) (1.4.4)\n", + "Requirement already satisfied: fiona>=1.8.21 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.10.1)\n", + "Requirement already satisfied: numpy>=1.22 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.26.4)\n", + "Requirement already satisfied: packaging in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (25.0)\n", + "Requirement already satisfied: pandas>=1.4.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.5.3)\n", + "Requirement already satisfied: pyproj>=3.3.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (3.7.2)\n", + "Requirement already satisfied: shapely>=1.8.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (2.1.1)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.4.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2025.8.3)\n", + "Requirement already satisfied: attrs>=19.2.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (25.3.0)\n", + "Requirement already satisfied: click~=8.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (8.2.1)\n", + "Requirement already satisfied: click-plugins>=1.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (1.1.1.2)\n", + "Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (0.7.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas>=1.4.0->geopandas>=0.9->pygris) (1.17.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "94b52594-1c8e-45e0-bedc-957467ef9959", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "#pip install pygris" + "import sys\n", + "sys.path.append('../ahsc_grant')" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 4, "id": "dfd19a35-4791-4c64-a90a-88ec37c3b4b9", "metadata": { "tags": [] @@ -29,6 +90,8 @@ "import requests\n", "from pygris import tracts \n", "from calitp_data_analysis.sql import get_engine\n", + "from shared_utils import schedule_rt_utils \n", + "from gtfs_key_ntd_crosswalk import filter_to_valid_dates\n", "db_engine = get_engine()\n", "credentials, project = google.auth.default()\n", "fs = gcsfs.GCSFileSystem()\n", @@ -38,7 +101,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, + "id": "af0504b4-642c-4977-b166-d800acfc82a0", + "metadata": {}, + "outputs": [], + "source": [ + "GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses'\n", + "analysis_date = \"2025-08-20\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "bcece4ad-cb36-47e5-8ec9-1ab7d909f472", "metadata": { "tags": [] @@ -51,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "id": "2fc77411-cfa0-45b8-9103-f1972f92e9ad", "metadata": { "tags": [] @@ -78,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "id": "5b08cfbf-e7ed-4b4a-8e38-68e66a760086", "metadata": {}, "outputs": [], @@ -88,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "id": "ad038c7f-7aef-4a3d-a878-00190d1b9fb9", "metadata": { "tags": [] @@ -100,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "id": "00b4b017-18de-42fa-98b0-6d91950a9474", "metadata": { "tags": [] @@ -112,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "id": "94684cc6-092a-4ed3-8872-76e3e7a4ef33", "metadata": { "tags": [] @@ -129,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "id": "b69bf253-d227-4ef5-9601-f4f3605f1877", "metadata": { "tags": [] @@ -141,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "id": "5a03871e-f6b4-46fc-99d2-0dbfa04d1f38", "metadata": { "tags": [] @@ -153,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "id": "a0808315-79b0-42ee-8653-222785770048", "metadata": { "tags": [] @@ -179,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "id": "e3ddd869-a418-4bbd-9349-92bdba346012", "metadata": { "tags": [] @@ -193,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "id": "6c08efd5-8cf5-4333-bd86-94c515ecfb07", "metadata": { "tags": [] @@ -486,7 +560,7 @@ "4 400500 06001400500 Alameda " ] }, - "execution_count": 13, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -497,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "id": "6647104f-db0a-4cb5-9908-c30d009e568e", "metadata": { "tags": [] @@ -512,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "id": "5b637751-060f-46ce-9b8c-41c94dd07620", "metadata": { "tags": [] @@ -525,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, "id": "55013195-b315-4889-abf6-951091d09e05", "metadata": {}, "outputs": [ @@ -545,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "986d2bcf-3e7f-4746-abe4-627460f68406", "metadata": {}, "outputs": [], @@ -557,6 +631,30 @@ { "cell_type": "code", "execution_count": 21, + "id": "0381662d-9cfe-4efd-a971-47fda3567c85", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "tracts_ca_acs.to_crs(crs=3310, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "0bd06747-c70c-4418-b136-eb34f8a1fd7d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "tracts_ca_acs[\"area_m2\"] = tracts_ca_acs.geometry.area" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "id": "1a89c9b5-4b7c-45c8-9d1e-632e1006b653", "metadata": { "tags": [] @@ -567,18 +665,1106 @@ "with db_engine.connect() as connection:\n", " query = \"\"\"\n", " SELECT\n", - " source_record_id, organization_type, ntd_id, ntd_agency_info_key, \n", - " public_currently_operating, _is_current,_valid_from, _valid_to\n", + " key, name, organization_type, ntd_id, ntd_agency_info_key, \n", + " public_currently_operating, _is_current, _valid_from, _valid_to\n", " FROM \n", " cal-itp-data-infra.mart_transit_database.dim_organizations\n", " \"\"\"\n", - " dim_orgs= pd.read_sql(query, connection)" + " \n", + " #localize timestamps\n", + " dim_orgs = (\n", + " pd.read_sql(query, connection)\n", + " .pipe(schedule_rt_utils.localize_timestamp_col, [\"_valid_from\", \"_valid_to\"])\n", + " )\n", + " \n", + " \n", + " dim_orgs = dim_orgs[\n", + " (dim_orgs['public_currently_operating'] == True) & \n", + " (dim_orgs['_is_current'] == True)\n", + " ].reset_index(drop=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "4211d3bf-0742-4a8f-9fd3-5ab435168ddf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Filtering the provider gtfs data to valid dates \n", + "valid_organization_full = filter_to_valid_dates(dim_orgs, [analysis_date])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ca225047-9991-4ab1-bbde-e5e720418782", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keynameorganization_typentd_idntd_agency_info_keypublic_currently_operating_is_current_valid_from_valid_to_valid_from_local_valid_to_local
09b5971d16d58e4fcafa694ee7fa33b12Alpine CountyCounty9R02-91116rec02Is8jSIBDkwM0TrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
173ed19bf64f9ba305091973b3f45d553Camarillo Health Care DistrictIndependent AgencyNoneNoneTrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
2402b2852ff46b95557801fbf3038ae7cChemehuevi Indian TribeTribe99316reclUB9NcCQrSImfdTrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
33a93c944381ee6c34646fa2dbf8b3d8fCity of AtascaderoCity/Town90194recMmQSjQCzABlmh1TrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
4e56f748b8cf235ca2acee940b9f60d64City of AzusaCity/Town90250recbLanAuzm5QituETrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
\n", + "
" + ], + "text/plain": [ + " key name \\\n", + "0 9b5971d16d58e4fcafa694ee7fa33b12 Alpine County \n", + "1 73ed19bf64f9ba305091973b3f45d553 Camarillo Health Care District \n", + "2 402b2852ff46b95557801fbf3038ae7c Chemehuevi Indian Tribe \n", + "3 3a93c944381ee6c34646fa2dbf8b3d8f City of Atascadero \n", + "4 e56f748b8cf235ca2acee940b9f60d64 City of Azusa \n", + "\n", + " organization_type ntd_id ntd_agency_info_key \\\n", + "0 County 9R02-91116 rec02Is8jSIBDkwM0 \n", + "1 Independent Agency None None \n", + "2 Tribe 99316 reclUB9NcCQrSImfd \n", + "3 City/Town 90194 recMmQSjQCzABlmh1 \n", + "4 City/Town 90250 recbLanAuzm5QituE \n", + "\n", + " public_currently_operating _is_current _valid_from \\\n", + "0 True True 2025-03-06 00:00:00+00:00 \n", + "1 True True 2025-03-06 00:00:00+00:00 \n", + "2 True True 2025-03-06 00:00:00+00:00 \n", + "3 True True 2025-03-06 00:00:00+00:00 \n", + "4 True True 2025-03-06 00:00:00+00:00 \n", + "\n", + " _valid_to _valid_from_local \\\n", + "0 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "1 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "2 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "3 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "4 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "\n", + " _valid_to_local \n", + "0 2098-12-31 15:59:59.999999 \n", + "1 2098-12-31 15:59:59.999999 \n", + "2 2098-12-31 15:59:59.999999 \n", + "3 2098-12-31 15:59:59.999999 \n", + "4 2098-12-31 15:59:59.999999 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "valid_organization_full.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "ed1cd1ac-5a21-4a5d-9b1a-5522564896c1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Querying bridge organizations and gtfs_datasets\n", + "with db_engine.connect() as connection:\n", + " query = \"\"\"\n", + " SELECT\n", + " organization_key, gtfs_dataset_key, organization_name\n", + " FROM\n", + " cal-itp-data-infra.mart_transit_database.bridge_organizations_x_gtfs_datasets_produced\n", + " \"\"\"\n", + " dim_orgs_GTFS= pd.read_sql(query, connection)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "d28f4c79-d5b4-46ba-9c3c-54f1bd6e9d89", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dim_orgs_merged = pd.merge(\n", + " dim_orgs.dropna(subset=['key', 'name']),\n", + " dim_orgs_GTFS.dropna(subset=['organization_key', 'organization_name']),\n", + " left_on=['key', 'name'],\n", + " right_on=['organization_key', 'organization_name'],\n", + " how='left'\n", + ") " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "c66cf805-de18-4723-b46b-be7bf84fa2b1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dim_orgs_merged = dim_orgs_merged.dropna(subset=['organization_key', 'gtfs_dataset_key'])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "01eccdb1-4bd8-4670-8f9c-3a2a0e94948d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dim_orgs_final = dim_orgs_merged[['key', 'name', 'organization_type', 'gtfs_dataset_key', 'ntd_id', 'ntd_agency_info_key']]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "457b636a-705d-4085-91f8-db64d0142ef1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keynameorganization_typegtfs_dataset_keyntd_idntd_agency_info_key
16306bafde22fe614e0a6af2269625d8f6City of Menlo ParkCity/Townb76861f44c68f440d922c54ac1231d31NoneNone
2150e751fbc179dae727bb433686f934acCity of Newport BeachCity/Townf1f6de14d52cf6ea1b24e87e494063c9NoneNone
2250e751fbc179dae727bb433686f934acCity of Newport BeachCity/Townaa3f2efbc6864841e4c6103dd1cbe7f4NoneNone
2350e751fbc179dae727bb433686f934acCity of Newport BeachCity/Town8c3c59f936e34fe19396593f08436c55NoneNone
2450e751fbc179dae727bb433686f934acCity of Newport BeachCity/Townf47fca258cc9089401297e36ced29101NoneNone
\n", + "
" + ], + "text/plain": [ + " key name organization_type \\\n", + "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park City/Town \n", + "21 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "22 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "23 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "24 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "\n", + " gtfs_dataset_key ntd_id ntd_agency_info_key \n", + "16 b76861f44c68f440d922c54ac1231d31 None None \n", + "21 f1f6de14d52cf6ea1b24e87e494063c9 None None \n", + "22 aa3f2efbc6864841e4c6103dd1cbe7f4 None None \n", + "23 8c3c59f936e34fe19396593f08436c55 None None \n", + "24 f47fca258cc9089401297e36ced29101 None None " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dim_orgs_final.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "b7292811-58ec-4672-a2e3-b9c133e79723", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 461 entries, 16 to 543\n", + "Data columns (total 6 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 key 461 non-null object\n", + " 1 name 461 non-null object\n", + " 2 organization_type 461 non-null object\n", + " 3 gtfs_dataset_key 461 non-null object\n", + " 4 ntd_id 433 non-null object\n", + " 5 ntd_agency_info_key 383 non-null object\n", + "dtypes: object(6)\n", + "memory usage: 25.2+ KB\n" + ] + } + ], + "source": [ + "dim_orgs_final.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "dc862f09-81a6-4748-93d3-29a84ada4c8d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keynameorganization_typegtfs_dataset_keyntd_idntd_agency_info_key
16306bafde22fe614e0a6af2269625d8f6City of Menlo ParkCity/Townb76861f44c68f440d922c54ac1231d31NoneNone
2150e751fbc179dae727bb433686f934acCity of Newport BeachCity/Townf1f6de14d52cf6ea1b24e87e494063c9NoneNone
2250e751fbc179dae727bb433686f934acCity of Newport BeachCity/Townaa3f2efbc6864841e4c6103dd1cbe7f4NoneNone
2350e751fbc179dae727bb433686f934acCity of Newport BeachCity/Town8c3c59f936e34fe19396593f08436c55NoneNone
2450e751fbc179dae727bb433686f934acCity of Newport BeachCity/Townf47fca258cc9089401297e36ced29101NoneNone
\n", + "
" + ], + "text/plain": [ + " key name organization_type \\\n", + "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park City/Town \n", + "21 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "22 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "23 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "24 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", + "\n", + " gtfs_dataset_key ntd_id ntd_agency_info_key \n", + "16 b76861f44c68f440d922c54ac1231d31 None None \n", + "21 f1f6de14d52cf6ea1b24e87e494063c9 None None \n", + "22 aa3f2efbc6864841e4c6103dd1cbe7f4 None None \n", + "23 8c3c59f936e34fe19396593f08436c55 None None \n", + "24 f47fca258cc9089401297e36ced29101 None None " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dim_orgs_final.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "b826d41d-720f-49ba-9b77-b7186dd95bf6", + "metadata": {}, + "outputs": [], + "source": [ + "def prep_stops(analysis_date: str):\n", + " stops = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}/rt_vs_schedule/stop_times_direction_{analysis_date}.parquet\",\n", + " columns=[\"schedule_gtfs_dataset_key\", \"feed_key\", \"stop_id\", \"stop_name\", \"geometry\"],\n", + " storage_options={'token': credentials.token}\n", + " )\n", + "\n", + " return stops" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f1cd0c41-8577-4733-b67c-7d1fe08f9aa1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 4595211 entries, 0 to 4595210\n", + "Data columns (total 5 columns):\n", + " # Column Dtype \n", + "--- ------ ----- \n", + " 0 schedule_gtfs_dataset_key object \n", + " 1 feed_key object \n", + " 2 stop_id object \n", + " 3 stop_name object \n", + " 4 geometry geometry\n", + "dtypes: geometry(1), object(4)\n", + "memory usage: 175.3+ MB\n" + ] + } + ], + "source": [ + "stops = prep_stops(analysis_date)\n", + "stops.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "160c660b-d683-4eb2-ae38-7f9d1a412cda", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keyfeed_keystop_idstop_namegeometry
0723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-3Terminal 1POINT (147834.197 -450957.957)
1723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-4Terminal 2POINT (147598.785 -450990.106)
2723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-5Terminal 3POINT (147265.199 -451037.318)
3723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-6International TerminalPOINT (147144.316 -451145.363)
4723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-7Terminal 4POINT (147272.606 -451317.665)
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key feed_key stop_id \\\n", + "0 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-3 \n", + "1 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-4 \n", + "2 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-5 \n", + "3 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-6 \n", + "4 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-7 \n", + "\n", + " stop_name geometry \n", + "0 Terminal 1 POINT (147834.197 -450957.957) \n", + "1 Terminal 2 POINT (147598.785 -450990.106) \n", + "2 Terminal 3 POINT (147265.199 -451037.318) \n", + "3 International Terminal POINT (147144.316 -451145.363) \n", + "4 Terminal 4 POINT (147272.606 -451317.665) " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stops.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "1f77a566-31f2-4f9e-9dda-d8d74b79487a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "orgs_stops = stops.dropna(subset = ['stop_id', 'stop_name']).merge(\n", + " dim_orgs_final,\n", + " right_on = 'gtfs_dataset_key',\n", + " left_on = 'schedule_gtfs_dataset_key',\n", + " how = 'inner'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "7d76cc04-e782-46b8-a748-1c9e43e077df", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "orgs_stops = orgs_stops.drop_duplicates()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "2ac56dc1-0c04-48e6-9114-04535ed00c76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 53511 entries, 0 to 2788870\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 schedule_gtfs_dataset_key 53511 non-null object \n", + " 1 feed_key 53511 non-null object \n", + " 2 stop_id 53511 non-null object \n", + " 3 stop_name 53511 non-null object \n", + " 4 geometry 53511 non-null geometry\n", + " 5 key 53511 non-null object \n", + " 6 name 53511 non-null object \n", + " 7 organization_type 53511 non-null object \n", + " 8 gtfs_dataset_key 53511 non-null object \n", + " 9 ntd_id 52343 non-null object \n", + " 10 ntd_agency_info_key 48936 non-null object \n", + "dtypes: geometry(1), object(10)\n", + "memory usage: 4.9+ MB\n" + ] + } + ], + "source": [ + "orgs_stops.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c8563478-a79b-4aef-9dff-225472a79fda", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "orgs_stops = orgs_stops.to_crs(tracts_ca_acs.crs)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "806da12f-9cf6-4306-81c0-dd057f31d8a9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "orgs_stop_buffered = gpd.GeoDataFrame(\n", + " orgs_stops.copy(), \n", + " geometry=orgs_stops.geometry.buffer(500),\n", + " crs=orgs_stops.crs\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "2e231b7d-d744-419d-b442-810b166fcd3d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "geometry_intersect = gpd.overlay(orgs_stop_buffered, tracts_ca_acs, how = 'intersection', keep_geom_type=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "fa657799-42aa-46d7-a789-c4d86b226c13", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "geometry_intersect['area_2'] = geometry_intersect.geometry.area" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "59ce3254-f021-40cd-869b-3c7a21f9002e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keyfeed_keystop_idstop_namekeynameorganization_typegtfs_dataset_keyntd_idntd_agency_info_keySTATEFPCOUNTYFPTRACTCEGEOIDFQGEOIDNAMENAMELSADSTUSPSNAMELSADCOSTATE_NAMELSADALANDAWATERtotal_poppoverty_popnon_us_citizenmale_65_to_66male_67_to_69male_70_to_74male_75_to_79male_80_to_84male_85_and_overfemale_65_to_66female_67_to_69female_70_to_74female_75_to_79female_80_to_84female_85_and_overincome_less_10000income_10000_14999income_15000_24999income_25000_34999income_35000_49999income_50000_64999income_65000_74999workers_with_no_carhouseholds_with_no_carsdisabled_poppublic_asst_popstatecountytractcounty_nameinc_extremelylowinc_verylowinc_lowmale_seniorsfemale_seniorsarea_m2geometryarea_2
0cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897315961046Echo Park Ave & Donaldson St (Southbound)123beaa13b8cfbd650a48cdfd4647088City of Los AngelesCity/Towncc53a0dbf5df90e3009b9cb5d89d80ba90147reccTizvO7pe1k1CS060371974101400000US06037197410060371974101974.10Census Tract 1974.10CALos Angeles CountyCaliforniaCT139331203805239429252553758320497177352728813828822642640215129513805170106037197410Los Angeles7146525532182591.406431e+06POLYGON ((162046.722 -434622.218, 162034.799 -...725281.427873
1cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897315797231Echo Park Ave & Baxter St (Southbound) (4052)123beaa13b8cfbd650a48cdfd4647088City of Los AngelesCity/Towncc53a0dbf5df90e3009b9cb5d89d80ba90147reccTizvO7pe1k1CS060371974101400000US06037197410060371974101974.10Census Tract 1974.10CALos Angeles CountyCaliforniaCT139331203805239429252553758320497177352728813828822642640215129513805170106037197410Los Angeles7146525532182591.406431e+06POLYGON ((161975.618 -434794.073, 161968.418 -...590112.226428
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key feed_key \\\n", + "0 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + "1 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + "\n", + " stop_id stop_name \\\n", + "0 5961046 Echo Park Ave & Donaldson St (Southbound) \n", + "1 5797231 Echo Park Ave & Baxter St (Southbound) (4052) \n", + "\n", + " key name organization_type \\\n", + "0 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", + "1 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", + "\n", + " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n", + "0 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", + "1 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", + "\n", + " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n", + "0 037 197410 1400000US06037197410 06037197410 1974.10 \n", + "1 037 197410 1400000US06037197410 06037197410 1974.10 \n", + "\n", + " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", + "0 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", + "1 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", + "\n", + " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n", + "0 0 3805 239 429 25 \n", + "1 0 3805 239 429 25 \n", + "\n", + " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n", + "0 25 53 75 8 \n", + "1 25 53 75 8 \n", + "\n", + " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n", + "0 32 0 49 71 \n", + "1 32 0 49 71 \n", + "\n", + " female_75_to_79 female_80_to_84 female_85_and_over income_less_10000 \\\n", + "0 77 35 27 288 \n", + "1 77 35 27 288 \n", + "\n", + " income_10000_14999 income_15000_24999 income_25000_34999 \\\n", + "0 138 288 226 \n", + "1 138 288 226 \n", + "\n", + " income_35000_49999 income_50000_64999 income_65000_74999 \\\n", + "0 426 402 151 \n", + "1 426 402 151 \n", + "\n", + " workers_with_no_car households_with_no_cars disabled_pop \\\n", + "0 29 51 3805 \n", + "1 29 51 3805 \n", + "\n", + " public_asst_pop state county tract county_name inc_extremelylow \\\n", + "0 1701 06 037 197410 Los Angeles 714 \n", + "1 1701 06 037 197410 Los Angeles 714 \n", + "\n", + " inc_verylow inc_low male_seniors female_seniors area_m2 \\\n", + "0 652 553 218 259 1.406431e+06 \n", + "1 652 553 218 259 1.406431e+06 \n", + "\n", + " geometry area_2 \n", + "0 POLYGON ((162046.722 -434622.218, 162034.799 -... 725281.427873 \n", + "1 POLYGON ((161975.618 -434794.073, 161968.418 -... 590112.226428 " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometry_intersect.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "ed44c28b-0eb9-4d7c-b9a5-3739ea9f5133", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "geometry_intersect['adjusted_total_pop'] = geometry_intersect['total_pop'] * (geometry_intersect['area_2'] / geometry_intersect['area_m2'])" ] }, { "cell_type": "code", "execution_count": null, - "id": "5763be02-ce61-46fe-aeba-9fd273e9ca81", + "id": "8653de80-582e-435a-9642-69ac80089dba", "metadata": {}, "outputs": [], "source": [] From 45d5b3bed96693dd7cfde7aea4304132586f6a72 Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Fri, 31 Oct 2025 21:17:06 +0000 Subject: [PATCH 2/4] agency level acs data --- .../01_agency_grain_census.ipynb | 1036 +++++++++++++---- 1 file changed, 784 insertions(+), 252 deletions(-) diff --git a/transit_provider_dashboard/01_agency_grain_census.ipynb b/transit_provider_dashboard/01_agency_grain_census.ipynb index bbe2f6f14..11a68831b 100644 --- a/transit_provider_dashboard/01_agency_grain_census.ipynb +++ b/transit_provider_dashboard/01_agency_grain_census.ipynb @@ -24,6 +24,10 @@ "execution_count": 2, "id": "0cdda776-857c-4e47-8ce8-940bfc49bb29", "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, "tags": [] }, "outputs": [ @@ -119,8 +123,8 @@ }, "outputs": [], "source": [ - "with open (\"ACS_apikey\", \"r\") as file:\n", - " api_key = file.read().strip()" + "# with open (\"ACS_apikey\", \"r\") as file:\n", + "# api_key = file.read().strip()" ] }, { @@ -132,142 +136,116 @@ }, "outputs": [], "source": [ - "# County Level Metrics required: \"Total Population\", \"Total Veteran Population\", \"Total Senior Population\", \"Total Low Income Population\"\n", - "variables = [\n", - " \"B01003_001E\", # Total Population\n", - " \"B17001_002E\", # Population with Income in the past 12 months below poverty level\n", - " \"B16008_037E\", # Non US Citizen Population\n", - " \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n", - " \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n", - " \"B06010_004E\", \"B06010_005E\", \"B06010_006E\", # Population with extremely low income\n", - " \"B06010_007E\", \"B06010_008E\", # Population with very low income\n", - " \"B06010_009E\", \"B06010_010E\", # Population with low income \n", - " \"B08014_002E\", \"B08201_002E\", # Workers and Households with no cars\n", - " \"B18101_001E\", # Total Population with Disability\n", - " \"B19058_001E\" # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n", - "]\n", + "# # County Level Metrics required: \"Total Population\", \"Total Veteran Population\", \"Total Senior Population\", \"Total Low Income Population\"\n", + "# variables = [\n", + "# \"B01003_001E\", # Total Population\n", + "# \"B17001_002E\", # Population with Income in the past 12 months below poverty level\n", + "# \"B16008_037E\", # Non US Citizen Population\n", + "# \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n", + "# \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n", + "# \"B06010_004E\", \"B06010_005E\", \"B06010_006E\", # Population with extremely low income\n", + "# \"B06010_007E\", \"B06010_008E\", # Population with very low income\n", + "# \"B06010_009E\", \"B06010_010E\", # Population with low income \n", + "# \"B08014_002E\", \"B08201_002E\", # Workers and Households with no cars\n", + "# \"B18101_001E\", # Total Population with Disability\n", + "# \"B19058_001E\", # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n", + "# \"B21001_002E\" # Population with veteran status: 18 and above\n", + "# ]\n", "\n", " " ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 53, "id": "5b08cfbf-e7ed-4b4a-8e38-68e66a760086", "metadata": {}, "outputs": [], "source": [ - "variable_str = \"NAME,\" + \",\".join(variables)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ad038c7f-7aef-4a3d-a878-00190d1b9fb9", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "url = f\"https://api.census.gov/data/2023/acs/acs5?get={variable_str}&for=tract:*&in=state:06&key={api_key}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "00b4b017-18de-42fa-98b0-6d91950a9474", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "response = requests.get(url)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "94684cc6-092a-4ed3-8872-76e3e7a4ef33", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "if response.status_code == 200:\n", - " data = response.json()\n", - " census_data = pd.DataFrame(data[1:], columns=data[0])\n", + "# variable_str = \"NAME,\" + \",\".join(variables)\n", + "# url = f\"https://api.census.gov/data/2023/acs/acs5?get={variable_str}&for=tract:*&in=state:06&key={api_key}\"\n", + "# response = requests.get(url)\n", + "\n", + "# if response.status_code == 200:\n", + "# data = response.json()\n", + "# census_data = pd.DataFrame(data[1:], columns=data[0])\n", " \n", - " # Create GEOID column\n", - " census_data[\"GEOID\"] = census_data[\"state\"] + census_data[\"county\"] + census_data[\"tract\"]" + "# # Create GEOID column\n", + "# census_data[\"GEOID\"] = census_data[\"state\"] + census_data[\"county\"] + census_data[\"tract\"]\n", + "\n", + "# census_data['county_name'] = census_data['NAME'].str.extract(r';\\s*([A-Za-z\\s]+) County;')\n", + "# census_data = census_data.drop(columns=['NAME'])\n" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "b69bf253-d227-4ef5-9601-f4f3605f1877", + "execution_count": 14, + "id": "a0808315-79b0-42ee-8653-222785770048", "metadata": { "tags": [] }, "outputs": [], "source": [ - "census_data['county_name'] = census_data['NAME'].str.extract(r';\\s*([A-Za-z\\s]+) County;')" + "# census_data = census_data.rename(columns = {\n", + "# 'B01003_001E': 'total_pop',\n", + "# 'B17001_002E': 'poverty_pop',\n", + "# 'B16008_037E': 'non_us_citizen',\n", + "# 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', \n", + "# 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n", + "# 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n", + "# 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 'B01001_049E': 'female_85_and_over',\n", + "# 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', \n", + "# 'B06010_007E': 'income_25000_34999', 'B06010_008E': 'income_35000_49999',\n", + "# 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n", + "# 'B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars',\n", + "# 'B18101_001E': 'disabled_pop',\n", + "# 'B19058_001E': 'public_asst_pop',\n", + "# 'B21001_002E': 'veteran_pop'\n", + "# })" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "5a03871e-f6b4-46fc-99d2-0dbfa04d1f38", + "execution_count": 15, + "id": "e3ddd869-a418-4bbd-9349-92bdba346012", "metadata": { "tags": [] }, "outputs": [], "source": [ - "census_data = census_data.drop(columns=['NAME'])" + "# exclude = ['state', 'county', 'tract', 'county_name', 'GEOID']\n", + "# cols_to_numeric = [col for col in census_data.columns if col not in exclude]\n", + "# census_data[cols_to_numeric] = census_data[cols_to_numeric].apply(pd.to_numeric, errors='coerce')" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "a0808315-79b0-42ee-8653-222785770048", + "execution_count": 16, + "id": "c0b7ce56-ca45-4a7d-8f45-ddab3e28606f", "metadata": { "tags": [] }, "outputs": [], "source": [ - "census_data = census_data.rename(columns = {\n", - " 'B01003_001E': 'total_pop',\n", - " 'B17001_002E': 'poverty_pop',\n", - " 'B16008_037E': 'non_us_citizen',\n", - " 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', \n", - " 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n", - " 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n", - " 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 'B01001_049E': 'female_85_and_over',\n", - " 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', \n", - " 'B06010_007E': 'income_25000_34999', 'B06010_008E': 'income_35000_49999',\n", - " 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n", - " 'B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars',\n", - " 'B18101_001E': 'disabled_pop',\n", - " 'B19058_001E': 'public_asst_pop'\n", - "})" + "# # Store data in warehouse\n", + "# with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_data_2023.parquet\", \"wb\") as f:\n", + "# census_data.to_parquet(f, index=False)" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "e3ddd869-a418-4bbd-9349-92bdba346012", - "metadata": { - "tags": [] - }, + "execution_count": 17, + "id": "f7477af7-e3ee-44f5-b292-bd423280a0f2", + "metadata": {}, "outputs": [], "source": [ - "exclude = ['state', 'county', 'tract', 'county_name', 'GEOID']\n", - "cols_to_numeric = [col for col in census_data.columns if col not in exclude]\n", - "census_data[cols_to_numeric] = census_data[cols_to_numeric].apply(pd.to_numeric, errors='coerce')" + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_data_2023.parquet\", \"rb\") as f:\n", + " census_data = pd.read_parquet(f)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "6c08efd5-8cf5-4333-bd86-94c515ecfb07", "metadata": { "tags": [] @@ -320,6 +298,7 @@ " households_with_no_cars\n", " disabled_pop\n", " public_asst_pop\n", + " veteran_pop\n", " state\n", " county\n", " tract\n", @@ -356,6 +335,7 @@ " 85\n", " 3094\n", " 1316\n", + " 129\n", " 06\n", " 001\n", " 400100\n", @@ -390,6 +370,7 @@ " 95\n", " 2093\n", " 861\n", + " 38\n", " 06\n", " 001\n", " 400200\n", @@ -424,6 +405,7 @@ " 416\n", " 5727\n", " 2713\n", + " 80\n", " 06\n", " 001\n", " 400300\n", @@ -458,6 +440,7 @@ " 204\n", " 4376\n", " 1803\n", + " 88\n", " 06\n", " 001\n", " 400400\n", @@ -492,6 +475,7 @@ " 169\n", " 3822\n", " 1655\n", + " 115\n", " 06\n", " 001\n", " 400500\n", @@ -545,22 +529,22 @@ "3 178 87 134 \n", "4 387 244 74 \n", "\n", - " households_with_no_cars disabled_pop public_asst_pop state county \\\n", - "0 85 3094 1316 06 001 \n", - "1 95 2093 861 06 001 \n", - "2 416 5727 2713 06 001 \n", - "3 204 4376 1803 06 001 \n", - "4 169 3822 1655 06 001 \n", - "\n", - " tract GEOID county_name \n", - "0 400100 06001400100 Alameda \n", - "1 400200 06001400200 Alameda \n", - "2 400300 06001400300 Alameda \n", - "3 400400 06001400400 Alameda \n", - "4 400500 06001400500 Alameda " + " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n", + "0 85 3094 1316 129 06 \n", + "1 95 2093 861 38 06 \n", + "2 416 5727 2713 80 06 \n", + "3 204 4376 1803 88 06 \n", + "4 169 3822 1655 115 06 \n", + "\n", + " county tract GEOID county_name \n", + "0 001 400100 06001400100 Alameda \n", + "1 001 400200 06001400200 Alameda \n", + "2 001 400300 06001400300 Alameda \n", + "3 001 400400 06001400400 Alameda \n", + "4 001 400500 06001400500 Alameda " ] }, - "execution_count": 16, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -571,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "6647104f-db0a-4cb5-9908-c30d009e568e", "metadata": { "tags": [] @@ -586,7 +570,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "5b637751-060f-46ce-9b8c-41c94dd07620", "metadata": { "tags": [] @@ -599,7 +583,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "id": "55013195-b315-4889-abf6-951091d09e05", "metadata": {}, "outputs": [ @@ -619,7 +603,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "id": "986d2bcf-3e7f-4746-abe4-627460f68406", "metadata": {}, "outputs": [], @@ -630,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "id": "0381662d-9cfe-4efd-a971-47fda3567c85", "metadata": { "tags": [] @@ -642,7 +626,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "id": "0bd06747-c70c-4418-b136-eb34f8a1fd7d", "metadata": { "tags": [] @@ -654,52 +638,79 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "id": "1a89c9b5-4b7c-45c8-9d1e-632e1006b653", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Querying dim organization\n", - "with db_engine.connect() as connection:\n", - " query = \"\"\"\n", - " SELECT\n", - " key, name, organization_type, ntd_id, ntd_agency_info_key, \n", - " public_currently_operating, _is_current, _valid_from, _valid_to\n", - " FROM \n", - " cal-itp-data-infra.mart_transit_database.dim_organizations\n", - " \"\"\"\n", + "# # Querying dim organization\n", + "# with db_engine.connect() as connection:\n", + "# query = \"\"\"\n", + "# SELECT\n", + "# key, name, organization_type, ntd_id, ntd_agency_info_key, \n", + "# public_currently_operating, _is_current, _valid_from, _valid_to\n", + "# FROM \n", + "# cal-itp-data-infra.mart_transit_database.dim_organizations\n", + "# \"\"\"\n", " \n", - " #localize timestamps\n", - " dim_orgs = (\n", - " pd.read_sql(query, connection)\n", - " .pipe(schedule_rt_utils.localize_timestamp_col, [\"_valid_from\", \"_valid_to\"])\n", - " )\n", + "# #localize timestamps\n", + "# dim_orgs = (\n", + "# pd.read_sql(query, connection)\n", + "# .pipe(schedule_rt_utils.localize_timestamp_col, [\"_valid_from\", \"_valid_to\"])\n", + "# )\n", " \n", " \n", - " dim_orgs = dim_orgs[\n", - " (dim_orgs['public_currently_operating'] == True) & \n", - " (dim_orgs['_is_current'] == True)\n", - " ].reset_index(drop=True)\n" + "# dim_orgs = dim_orgs[\n", + "# (dim_orgs['public_currently_operating'] == True) & \n", + "# (dim_orgs['_is_current'] == True)\n", + "# ].reset_index(drop=True)\n" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 26, "id": "4211d3bf-0742-4a8f-9fd3-5ab435168ddf", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Filtering the provider gtfs data to valid dates \n", - "valid_organization_full = filter_to_valid_dates(dim_orgs, [analysis_date])" + "# # Filtering the provider gtfs data to valid dates \n", + "# valid_organization_full = filter_to_valid_dates(dim_orgs, [analysis_date])" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 27, + "id": "fa8ce12e-e993-446c-a4b0-aafca6974275", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# #Store data in warehouse\n", + "# with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/organization_data_2025_08_20.parquet\", \"wb\") as f:\n", + "# valid_organization_full.to_parquet(f, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "4916dd04-523b-4dda-aca7-599a90560b19", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/organization_data_2025_08_20.parquet\", \"rb\") as f:\n", + " valid_organization_full = pd.read_parquet(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, "id": "ca225047-9991-4ab1-bbde-e5e720418782", "metadata": { "tags": [] @@ -829,12 +840,12 @@ "3 City/Town 90194 recMmQSjQCzABlmh1 \n", "4 City/Town 90250 recbLanAuzm5QituE \n", "\n", - " public_currently_operating _is_current _valid_from \\\n", - "0 True True 2025-03-06 00:00:00+00:00 \n", - "1 True True 2025-03-06 00:00:00+00:00 \n", - "2 True True 2025-03-06 00:00:00+00:00 \n", - "3 True True 2025-03-06 00:00:00+00:00 \n", - "4 True True 2025-03-06 00:00:00+00:00 \n", + " public_currently_operating _is_current _valid_from \\\n", + "0 True True 2025-03-06 00:00:00+00:00 \n", + "1 True True 2025-03-06 00:00:00+00:00 \n", + "2 True True 2025-03-06 00:00:00+00:00 \n", + "3 True True 2025-03-06 00:00:00+00:00 \n", + "4 True True 2025-03-06 00:00:00+00:00 \n", "\n", " _valid_to _valid_from_local \\\n", "0 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", @@ -851,7 +862,7 @@ "4 2098-12-31 15:59:59.999999 " ] }, - "execution_count": 25, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -862,7 +873,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 30, "id": "ed1cd1ac-5a21-4a5d-9b1a-5522564896c1", "metadata": { "tags": [] @@ -882,7 +893,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 32, "id": "d28f4c79-d5b4-46ba-9c3c-54f1bd6e9d89", "metadata": { "tags": [] @@ -890,7 +901,7 @@ "outputs": [], "source": [ "dim_orgs_merged = pd.merge(\n", - " dim_orgs.dropna(subset=['key', 'name']),\n", + " valid_organization_full.dropna(subset=['key', 'name']),\n", " dim_orgs_GTFS.dropna(subset=['organization_key', 'organization_name']),\n", " left_on=['key', 'name'],\n", " right_on=['organization_key', 'organization_name'],\n", @@ -900,7 +911,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 33, "id": "c66cf805-de18-4723-b46b-be7bf84fa2b1", "metadata": { "tags": [] @@ -912,7 +923,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 34, "id": "01eccdb1-4bd8-4670-8f9c-3a2a0e94948d", "metadata": { "tags": [] @@ -924,7 +935,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 35, "id": "457b636a-705d-4085-91f8-db64d0142ef1", "metadata": { "tags": [] @@ -970,38 +981,38 @@ " None\n", " \n", " \n", - " 21\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", + " 32\n", + " 1906a01d5cb664c5e898a95276912bfe\n", + " Town of Truckee\n", " City/Town\n", - " f1f6de14d52cf6ea1b24e87e494063c9\n", - " None\n", - " None\n", + " 6fda78099793184fe08dd78945d188c0\n", + " 9R02-91101\n", + " receHP6eQInAo7sSP\n", " \n", " \n", - " 22\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", + " 33\n", + " 1906a01d5cb664c5e898a95276912bfe\n", + " Town of Truckee\n", " City/Town\n", - " aa3f2efbc6864841e4c6103dd1cbe7f4\n", - " None\n", - " None\n", + " 683da99e57acc29ac600a24cbd96feda\n", + " 9R02-91101\n", + " receHP6eQInAo7sSP\n", " \n", " \n", - " 23\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", - " City/Town\n", - " 8c3c59f936e34fe19396593f08436c55\n", + " 34\n", + " aad5befa7fcfce979f2113e373e48aa6\n", + " Yosemite National Park\n", + " Federal Government\n", + " 31f91d59f493cbee9ae0eeb824f44d0e\n", " None\n", " None\n", " \n", " \n", - " 24\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", - " City/Town\n", - " f47fca258cc9089401297e36ced29101\n", + " 35\n", + " aad5befa7fcfce979f2113e373e48aa6\n", + " Yosemite National Park\n", + " Federal Government\n", + " 31152914d10e2d0977b8b2fabb167922\n", " None\n", " None\n", " \n", @@ -1010,22 +1021,29 @@ "" ], "text/plain": [ - " key name organization_type \\\n", - "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park City/Town \n", - "21 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "22 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "23 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "24 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "\n", - " gtfs_dataset_key ntd_id ntd_agency_info_key \n", - "16 b76861f44c68f440d922c54ac1231d31 None None \n", - "21 f1f6de14d52cf6ea1b24e87e494063c9 None None \n", - "22 aa3f2efbc6864841e4c6103dd1cbe7f4 None None \n", - "23 8c3c59f936e34fe19396593f08436c55 None None \n", - "24 f47fca258cc9089401297e36ced29101 None None " + " key name \\\n", + "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park \n", + "32 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", + "33 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", + "34 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", + "35 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", + "\n", + " organization_type gtfs_dataset_key ntd_id \\\n", + "16 City/Town b76861f44c68f440d922c54ac1231d31 None \n", + "32 City/Town 6fda78099793184fe08dd78945d188c0 9R02-91101 \n", + "33 City/Town 683da99e57acc29ac600a24cbd96feda 9R02-91101 \n", + "34 Federal Government 31f91d59f493cbee9ae0eeb824f44d0e None \n", + "35 Federal Government 31152914d10e2d0977b8b2fabb167922 None \n", + "\n", + " ntd_agency_info_key \n", + "16 None \n", + "32 receHP6eQInAo7sSP \n", + "33 receHP6eQInAo7sSP \n", + "34 None \n", + "35 None " ] }, - "execution_count": 30, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1036,7 +1054,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 36, "id": "b7292811-58ec-4672-a2e3-b9c133e79723", "metadata": { "tags": [] @@ -1047,18 +1065,18 @@ "output_type": "stream", "text": [ "\n", - "Int64Index: 461 entries, 16 to 543\n", + "Int64Index: 441 entries, 16 to 522\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 key 461 non-null object\n", - " 1 name 461 non-null object\n", - " 2 organization_type 461 non-null object\n", - " 3 gtfs_dataset_key 461 non-null object\n", - " 4 ntd_id 433 non-null object\n", - " 5 ntd_agency_info_key 383 non-null object\n", + " 0 key 441 non-null object\n", + " 1 name 441 non-null object\n", + " 2 organization_type 441 non-null object\n", + " 3 gtfs_dataset_key 441 non-null object\n", + " 4 ntd_id 417 non-null object\n", + " 5 ntd_agency_info_key 370 non-null object\n", "dtypes: object(6)\n", - "memory usage: 25.2+ KB\n" + "memory usage: 24.1+ KB\n" ] } ], @@ -1068,7 +1086,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 37, "id": "dc862f09-81a6-4748-93d3-29a84ada4c8d", "metadata": { "tags": [] @@ -1114,38 +1132,38 @@ " None\n", " \n", " \n", - " 21\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", + " 32\n", + " 1906a01d5cb664c5e898a95276912bfe\n", + " Town of Truckee\n", " City/Town\n", - " f1f6de14d52cf6ea1b24e87e494063c9\n", - " None\n", - " None\n", + " 6fda78099793184fe08dd78945d188c0\n", + " 9R02-91101\n", + " receHP6eQInAo7sSP\n", " \n", " \n", - " 22\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", + " 33\n", + " 1906a01d5cb664c5e898a95276912bfe\n", + " Town of Truckee\n", " City/Town\n", - " aa3f2efbc6864841e4c6103dd1cbe7f4\n", - " None\n", - " None\n", + " 683da99e57acc29ac600a24cbd96feda\n", + " 9R02-91101\n", + " receHP6eQInAo7sSP\n", " \n", " \n", - " 23\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", - " City/Town\n", - " 8c3c59f936e34fe19396593f08436c55\n", + " 34\n", + " aad5befa7fcfce979f2113e373e48aa6\n", + " Yosemite National Park\n", + " Federal Government\n", + " 31f91d59f493cbee9ae0eeb824f44d0e\n", " None\n", " None\n", " \n", " \n", - " 24\n", - " 50e751fbc179dae727bb433686f934ac\n", - " City of Newport Beach\n", - " City/Town\n", - " f47fca258cc9089401297e36ced29101\n", + " 35\n", + " aad5befa7fcfce979f2113e373e48aa6\n", + " Yosemite National Park\n", + " Federal Government\n", + " 31152914d10e2d0977b8b2fabb167922\n", " None\n", " None\n", " \n", @@ -1154,22 +1172,29 @@ "" ], "text/plain": [ - " key name organization_type \\\n", - "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park City/Town \n", - "21 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "22 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "23 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "24 50e751fbc179dae727bb433686f934ac City of Newport Beach City/Town \n", - "\n", - " gtfs_dataset_key ntd_id ntd_agency_info_key \n", - "16 b76861f44c68f440d922c54ac1231d31 None None \n", - "21 f1f6de14d52cf6ea1b24e87e494063c9 None None \n", - "22 aa3f2efbc6864841e4c6103dd1cbe7f4 None None \n", - "23 8c3c59f936e34fe19396593f08436c55 None None \n", - "24 f47fca258cc9089401297e36ced29101 None None " + " key name \\\n", + "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park \n", + "32 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", + "33 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", + "34 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", + "35 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", + "\n", + " organization_type gtfs_dataset_key ntd_id \\\n", + "16 City/Town b76861f44c68f440d922c54ac1231d31 None \n", + "32 City/Town 6fda78099793184fe08dd78945d188c0 9R02-91101 \n", + "33 City/Town 683da99e57acc29ac600a24cbd96feda 9R02-91101 \n", + "34 Federal Government 31f91d59f493cbee9ae0eeb824f44d0e None \n", + "35 Federal Government 31152914d10e2d0977b8b2fabb167922 None \n", + "\n", + " ntd_agency_info_key \n", + "16 None \n", + "32 receHP6eQInAo7sSP \n", + "33 receHP6eQInAo7sSP \n", + "34 None \n", + "35 None " ] }, - "execution_count": 32, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1180,7 +1205,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 38, "id": "b826d41d-720f-49ba-9b77-b7186dd95bf6", "metadata": {}, "outputs": [], @@ -1197,7 +1222,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 39, "id": "f1cd0c41-8577-4733-b67c-7d1fe08f9aa1", "metadata": { "tags": [] @@ -1229,7 +1254,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 40, "id": "160c660b-d683-4eb2-ae38-7f9d1a412cda", "metadata": { "tags": [] @@ -1324,7 +1349,7 @@ "4 Terminal 4 POINT (147272.606 -451317.665) " ] }, - "execution_count": 35, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1335,7 +1360,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 41, "id": "1f77a566-31f2-4f9e-9dda-d8d74b79487a", "metadata": { "tags": [] @@ -1352,7 +1377,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 42, "id": "7d76cc04-e782-46b8-a748-1c9e43e077df", "metadata": { "tags": [] @@ -1364,7 +1389,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 43, "id": "2ac56dc1-0c04-48e6-9114-04535ed00c76", "metadata": {}, "outputs": [ @@ -1399,7 +1424,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 44, "id": "c8563478-a79b-4aef-9dff-225472a79fda", "metadata": { "tags": [] @@ -1411,7 +1436,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 45, "id": "806da12f-9cf6-4306-81c0-dd057f31d8a9", "metadata": { "tags": [] @@ -1427,7 +1452,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 46, "id": "2e231b7d-d744-419d-b442-810b166fcd3d", "metadata": { "tags": [] @@ -1439,7 +1464,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 47, "id": "fa657799-42aa-46d7-a789-c4d86b226c13", "metadata": { "tags": [] @@ -1451,7 +1476,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 48, "id": "59ce3254-f021-40cd-869b-3c7a21f9002e", "metadata": { "tags": [] @@ -1527,6 +1552,7 @@ " households_with_no_cars\n", " disabled_pop\n", " public_asst_pop\n", + " veteran_pop\n", " state\n", " county\n", " tract\n", @@ -1593,6 +1619,7 @@ " 51\n", " 3805\n", " 1701\n", + " 95\n", " 06\n", " 037\n", " 197410\n", @@ -1657,6 +1684,7 @@ " 51\n", " 3805\n", " 1701\n", + " 95\n", " 06\n", " 037\n", " 197410\n", @@ -1727,20 +1755,24 @@ "0 29 51 3805 \n", "1 29 51 3805 \n", "\n", - " public_asst_pop state county tract county_name inc_extremelylow \\\n", - "0 1701 06 037 197410 Los Angeles 714 \n", - "1 1701 06 037 197410 Los Angeles 714 \n", + " public_asst_pop veteran_pop state county tract county_name \\\n", + "0 1701 95 06 037 197410 Los Angeles \n", + "1 1701 95 06 037 197410 Los Angeles \n", "\n", - " inc_verylow inc_low male_seniors female_seniors area_m2 \\\n", - "0 652 553 218 259 1.406431e+06 \n", - "1 652 553 218 259 1.406431e+06 \n", + " inc_extremelylow inc_verylow inc_low male_seniors female_seniors \\\n", + "0 714 652 553 218 259 \n", + "1 714 652 553 218 259 \n", "\n", - " geometry area_2 \n", - "0 POLYGON ((162046.722 -434622.218, 162034.799 -... 725281.427873 \n", - "1 POLYGON ((161975.618 -434794.073, 161968.418 -... 590112.226428 " + " area_m2 geometry \\\n", + "0 1.406431e+06 POLYGON ((162046.722 -434622.218, 162034.799 -... \n", + "1 1.406431e+06 POLYGON ((161975.618 -434794.073, 161968.418 -... \n", + "\n", + " area_2 \n", + "0 725281.427873 \n", + "1 590112.226428 " ] }, - "execution_count": 43, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -1751,7 +1783,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 49, "id": "ed44c28b-0eb9-4d7c-b9a5-3739ea9f5133", "metadata": { "tags": [] @@ -1763,9 +1795,509 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "id": "8653de80-582e-435a-9642-69ac80089dba", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keyfeed_keystop_idstop_namekeynameorganization_typegtfs_dataset_keyntd_idntd_agency_info_keySTATEFPCOUNTYFPTRACTCEGEOIDFQGEOIDNAMENAMELSADSTUSPSNAMELSADCOSTATE_NAMELSADALANDAWATERtotal_poppoverty_popnon_us_citizenmale_65_to_66male_67_to_69male_70_to_74male_75_to_79male_80_to_84male_85_and_overfemale_65_to_66female_67_to_69female_70_to_74female_75_to_79female_80_to_84female_85_and_overincome_less_10000income_10000_14999income_15000_24999income_25000_34999income_35000_49999income_50000_64999income_65000_74999workers_with_no_carhouseholds_with_no_carsdisabled_poppublic_asst_popveteran_popstatecountytractcounty_nameinc_extremelylowinc_verylowinc_lowmale_seniorsfemale_seniorsarea_m2geometryarea_2adjusted_total_pop
0cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897315961046Echo Park Ave & Donaldson St (Southbound)123beaa13b8cfbd650a48cdfd4647088City of Los AngelesCity/Towncc53a0dbf5df90e3009b9cb5d89d80ba90147reccTizvO7pe1k1CS060371974101400000US06037197410060371974101974.10Census Tract 1974.10CALos Angeles CountyCaliforniaCT13933120380523942925255375832049717735272881382882264264021512951380517019506037197410Los Angeles7146525532182591.406431e+06POLYGON ((162046.722 -434622.218, 162034.799 -...725281.4278731962.197505
1cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897315797231Echo Park Ave & Baxter St (Southbound) (4052)123beaa13b8cfbd650a48cdfd4647088City of Los AngelesCity/Towncc53a0dbf5df90e3009b9cb5d89d80ba90147reccTizvO7pe1k1CS060371974101400000US06037197410060371974101974.10Census Tract 1974.10CALos Angeles CountyCaliforniaCT13933120380523942925255375832049717735272881382882264264021512951380517019506037197410Los Angeles7146525532182591.406431e+06POLYGON ((161975.618 -434794.073, 161968.418 -...590112.2264281596.506809
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key feed_key \\\n", + "0 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + "1 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + "\n", + " stop_id stop_name \\\n", + "0 5961046 Echo Park Ave & Donaldson St (Southbound) \n", + "1 5797231 Echo Park Ave & Baxter St (Southbound) (4052) \n", + "\n", + " key name organization_type \\\n", + "0 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", + "1 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", + "\n", + " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n", + "0 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", + "1 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", + "\n", + " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n", + "0 037 197410 1400000US06037197410 06037197410 1974.10 \n", + "1 037 197410 1400000US06037197410 06037197410 1974.10 \n", + "\n", + " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", + "0 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", + "1 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", + "\n", + " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n", + "0 0 3805 239 429 25 \n", + "1 0 3805 239 429 25 \n", + "\n", + " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n", + "0 25 53 75 8 \n", + "1 25 53 75 8 \n", + "\n", + " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n", + "0 32 0 49 71 \n", + "1 32 0 49 71 \n", + "\n", + " female_75_to_79 female_80_to_84 female_85_and_over income_less_10000 \\\n", + "0 77 35 27 288 \n", + "1 77 35 27 288 \n", + "\n", + " income_10000_14999 income_15000_24999 income_25000_34999 \\\n", + "0 138 288 226 \n", + "1 138 288 226 \n", + "\n", + " income_35000_49999 income_50000_64999 income_65000_74999 \\\n", + "0 426 402 151 \n", + "1 426 402 151 \n", + "\n", + " workers_with_no_car households_with_no_cars disabled_pop \\\n", + "0 29 51 3805 \n", + "1 29 51 3805 \n", + "\n", + " public_asst_pop veteran_pop state county tract county_name \\\n", + "0 1701 95 06 037 197410 Los Angeles \n", + "1 1701 95 06 037 197410 Los Angeles \n", + "\n", + " inc_extremelylow inc_verylow inc_low male_seniors female_seniors \\\n", + "0 714 652 553 218 259 \n", + "1 714 652 553 218 259 \n", + "\n", + " area_m2 geometry \\\n", + "0 1.406431e+06 POLYGON ((162046.722 -434622.218, 162034.799 -... \n", + "1 1.406431e+06 POLYGON ((161975.618 -434794.073, 161968.418 -... \n", + "\n", + " area_2 adjusted_total_pop \n", + "0 725281.427873 1962.197505 \n", + "1 590112.226428 1596.506809 " + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometry_intersect.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "597c67ad-9913-435f-8c9b-ddc1a9fbd297", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#Calculating weighted average for other metrics based on total population \n", + "geometry_intersect['pop_weight'] = geometry_intersect['adjusted_total_pop'] / geometry_intersect['total_pop']\n", + "\n", + "cols_to_weight = ['poverty_pop', 'non_us_citizen', 'workers_with_no_car', \n", + " 'households_with_no_cars', 'disabled_pop', 'public_asst_pop', \n", + " 'inc_extremelylow', 'inc_verylow', 'inc_low', 'male_seniors', 'female_seniors',\n", + " 'veteran_pop']\n", + "\n", + "geometry_intersect[[f'{col}_adj' for col in cols_to_weight]] = (\n", + " geometry_intersect[cols_to_weight].multiply(geometry_intersect['pop_weight'], axis=0)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "a2d96744-d297-483e-a19a-892a194be041", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameorganization_typentd_idntd_agency_info_keystop_idstop_nameschedule_gtfs_dataset_keyfeed_keyGEOIDFQgeometryarea_2adjusted_total_poppop_weightpoverty_pop_adjnon_us_citizen_adjworkers_with_no_car_adjhouseholds_with_no_cars_adjdisabled_pop_adjpublic_asst_pop_adjinc_extremelylow_adjinc_verylow_adjinc_low_adjmale_seniors_adjfemale_seniors_adjveteran_pop_adj
0City of Los AngelesCity/Town90147reccTizvO7pe1k1CS5961046Echo Park Ave & Donaldson St (Southbound)cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897311400000US06037197410POLYGON ((162046.722 -434622.218, 162034.799 -...725281.4278731962.1975050.515689123.249725221.23067814.95498826.3001511962.197505877.187373368.202107336.229375285.176142112.420251133.56351048.990476
1City of Los AngelesCity/Town90147reccTizvO7pe1k1CS5797231Echo Park Ave & Baxter St (Southbound) (4052)cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897311400000US06037197410POLYGON ((161975.618 -434794.073, 161968.418 -...590112.2264281596.5068090.419581100.279928180.00037312.16785721.3986461596.506809713.707775299.581041273.567001232.02845391.468721108.67155439.860223
\n", + "
" + ], + "text/plain": [ + " name organization_type ntd_id ntd_agency_info_key stop_id \\\n", + "0 City of Los Angeles City/Town 90147 reccTizvO7pe1k1CS 5961046 \n", + "1 City of Los Angeles City/Town 90147 reccTizvO7pe1k1CS 5797231 \n", + "\n", + " stop_name \\\n", + "0 Echo Park Ave & Donaldson St (Southbound) \n", + "1 Echo Park Ave & Baxter St (Southbound) (4052) \n", + "\n", + " schedule_gtfs_dataset_key feed_key \\\n", + "0 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + "1 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + "\n", + " GEOIDFQ geometry \\\n", + "0 1400000US06037197410 POLYGON ((162046.722 -434622.218, 162034.799 -... \n", + "1 1400000US06037197410 POLYGON ((161975.618 -434794.073, 161968.418 -... \n", + "\n", + " area_2 adjusted_total_pop pop_weight poverty_pop_adj \\\n", + "0 725281.427873 1962.197505 0.515689 123.249725 \n", + "1 590112.226428 1596.506809 0.419581 100.279928 \n", + "\n", + " non_us_citizen_adj workers_with_no_car_adj households_with_no_cars_adj \\\n", + "0 221.230678 14.954988 26.300151 \n", + "1 180.000373 12.167857 21.398646 \n", + "\n", + " disabled_pop_adj public_asst_pop_adj inc_extremelylow_adj \\\n", + "0 1962.197505 877.187373 368.202107 \n", + "1 1596.506809 713.707775 299.581041 \n", + "\n", + " inc_verylow_adj inc_low_adj male_seniors_adj female_seniors_adj \\\n", + "0 336.229375 285.176142 112.420251 133.563510 \n", + "1 273.567001 232.028453 91.468721 108.671554 \n", + "\n", + " veteran_pop_adj \n", + "0 48.990476 \n", + "1 39.860223 " + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_final_data = geometry_intersect[['name', 'organization_type', 'ntd_id', 'ntd_agency_info_key', 'stop_id', 'stop_name', 'schedule_gtfs_dataset_key', \n", + " 'feed_key', 'GEOIDFQ', 'geometry', 'area_2',\t'adjusted_total_pop', 'pop_weight',\t'poverty_pop_adj',\t\n", + " 'non_us_citizen_adj',\t'workers_with_no_car_adj',\t'households_with_no_cars_adj',\t'disabled_pop_adj',\t\n", + " 'public_asst_pop_adj', 'inc_extremelylow_adj', 'inc_verylow_adj',\t'inc_low_adj',\t'male_seniors_adj',\t\n", + " 'female_seniors_adj', 'veteran_pop_adj']]\n", + "\n", + "filtered_final_data.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6cc3be8-b52b-4ec0-97c6-1fc3c7e108d9", + "metadata": {}, "outputs": [], "source": [] } From 893ddf799432d12a5b75a66b7be5d4de9ec7d649 Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Mon, 3 Nov 2025 17:54:28 +0000 Subject: [PATCH 3/4] comments and steps added --- .../01_agency_grain_census.ipynb | 239 +++++++++--------- 1 file changed, 116 insertions(+), 123 deletions(-) diff --git a/transit_provider_dashboard/01_agency_grain_census.ipynb b/transit_provider_dashboard/01_agency_grain_census.ipynb index 11a68831b..d5e2fd02c 100644 --- a/transit_provider_dashboard/01_agency_grain_census.ipynb +++ b/transit_provider_dashboard/01_agency_grain_census.ipynb @@ -1,5 +1,25 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "22ae14b5-df6a-4892-98c5-2176b3fae594", + "metadata": {}, + "source": [ + "# Agency-Grain Census Data Summary Table\n", + "- **Purpose:** To define and quantify the service population of each Cal-ITP partner transit agency using census and related demographic data.\n", + "- **Goal:** Provide agency-level summaries that describe the characteristics of populations served, such as size, demographics, income, and travel behavior, to illustrate the reach and impact of Cal-ITP services.\n", + "- **Use:** Support data-driven storytelling and performance reporting by supplying key statistics for communications about the benefits, adoption, and equity potential of Cal-ITP initiatives (e.g., open-loop payment systems).\n", + "\n", + "- **Steps:**\n", + " - Querying ACS data via the Census API and upload results to a GCS bucket for later usage.\n", + " - Census Tract Geometry Processing\n", + " - Querying Organization Data from the Data Warehouse and Storing in GCS\n", + " - Querying Bridge Organization GTFS Datasets and Merging with Dim Organizations Table\n", + " - Loading Transit Stop Data and Merging Stop Data with Organization Information\n", + " - Spatial Analysis: Stop Buffers and Census Tract Intersections\n", + " - Adjusting Population and Demographic Metrics for Stop Service Areas\n" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -86,6 +106,7 @@ }, "outputs": [], "source": [ + "# Importing necessary package \n", "import pandas as pd \n", "import geopandas as gpd\n", "import google.auth\n", @@ -111,7 +132,23 @@ "outputs": [], "source": [ "GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses'\n", - "analysis_date = \"2025-08-20\"" + "analysis_date = \"2025-08-20\" # Selecting weekday to account for most agencies " + ] + }, + { + "cell_type": "markdown", + "id": "c8ed8f1c-0f71-4408-9af2-c15926c26c99", + "metadata": {}, + "source": [ + "## Querying ACS data via the Census API and upload results to a GCS bucket for later usage." + ] + }, + { + "cell_type": "markdown", + "id": "152f62a1-dfe6-4100-8d74-5e4ceffbee11", + "metadata": {}, + "source": [ + "Uncomment and run the cells below as needed to include additional ACS variables." ] }, { @@ -239,6 +276,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Load the stored ACS dataset from the specified GCS file path.\n", "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_data_2023.parquet\", \"rb\") as f:\n", " census_data = pd.read_parquet(f)" ] @@ -562,7 +600,7 @@ }, "outputs": [], "source": [ - "#Creating custom income variables \n", + "## Aggregate ACS income brackets into broader income group categories: extremely low, very low, and low income.\n", "census_data['inc_extremelylow'] = census_data['income_less_10000'] + census_data['income_10000_14999'] + census_data['income_15000_24999']\n", "census_data['inc_verylow'] = census_data['income_25000_34999'] + census_data['income_35000_49999']\n", "census_data['inc_low'] = census_data['income_50000_64999'] + census_data['income_65000_74999']" @@ -577,10 +615,19 @@ }, "outputs": [], "source": [ + "# Sum all senior age brackets (65+) to calculate total male and female senior populations.\n", "census_data['male_seniors'] = census_data.loc[:, \"male_65_to_66\":\"male_85_and_over\"].sum(axis=1)\n", "census_data['female_seniors'] = census_data.loc[:, \"female_65_to_66\":\"female_85_and_over\"].sum(axis=1)" ] }, + { + "cell_type": "markdown", + "id": "958f1dfb-710b-4ef5-a6b1-22e8d89814bc", + "metadata": {}, + "source": [ + "## Census Tract Geometry Processing" + ] + }, { "cell_type": "code", "execution_count": 21, @@ -621,6 +668,7 @@ }, "outputs": [], "source": [ + "# Reproject California census tract geometries to EPSG:3310 (California Albers projection).\n", "tracts_ca_acs.to_crs(crs=3310, inplace=True)" ] }, @@ -633,9 +681,26 @@ }, "outputs": [], "source": [ + "# Calculate the area of each census tract in square meters.\n", "tracts_ca_acs[\"area_m2\"] = tracts_ca_acs.geometry.area" ] }, + { + "cell_type": "markdown", + "id": "8fdaa98a-c420-415f-946b-c2db711a4cf6", + "metadata": {}, + "source": [ + "## Querying Organization Data from the Data Warehouse and Storing in GCS" + ] + }, + { + "cell_type": "markdown", + "id": "8ca77331-4e37-43b7-b4a9-83a823a3e087", + "metadata": {}, + "source": [ + "Uncomment and run the cells below as needed to include additional columns from dim_organization table." + ] + }, { "cell_type": "code", "execution_count": 25, @@ -704,6 +769,7 @@ }, "outputs": [], "source": [ + "# Load the stored organization dataset from the specified GCS file path.\n", "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/organization_data_2025_08_20.parquet\", \"rb\") as f:\n", " valid_organization_full = pd.read_parquet(f)" ] @@ -871,6 +937,14 @@ "valid_organization_full.head(5)" ] }, + { + "cell_type": "markdown", + "id": "4c0c1a26-1cba-43b0-9cd0-e356ce40efcb", + "metadata": {}, + "source": [ + "## Querying Bridge Organization GTFS Datasets and Merging with Dim Organizations Table" + ] + }, { "cell_type": "code", "execution_count": 30, @@ -893,13 +967,13 @@ }, { "cell_type": "code", - "execution_count": 32, - "id": "d28f4c79-d5b4-46ba-9c3c-54f1bd6e9d89", - "metadata": { - "tags": [] - }, + "execution_count": null, + "id": "a142623a-62cf-4411-9cf6-9d85f1b5ce22", + "metadata": {}, "outputs": [], "source": [ + "# Merge validated organization data with GTFS organization dimension data\n", + "# based on matching keys and names, keeping all rows from the validated dataset.\n", "dim_orgs_merged = pd.merge(\n", " valid_organization_full.dropna(subset=['key', 'name']),\n", " dim_orgs_GTFS.dropna(subset=['organization_key', 'organization_name']),\n", @@ -918,6 +992,7 @@ }, "outputs": [], "source": [ + "# Drop rows where either 'organization_key' or 'gtfs_dataset_key' is missing.\n", "dim_orgs_merged = dim_orgs_merged.dropna(subset=['organization_key', 'gtfs_dataset_key'])" ] }, @@ -930,6 +1005,7 @@ }, "outputs": [], "source": [ + "# Select relevant columns for the final organization dataset.\n", "dim_orgs_final = dim_orgs_merged[['key', 'name', 'organization_type', 'gtfs_dataset_key', 'ntd_id', 'ntd_agency_info_key']]" ] }, @@ -1085,122 +1161,11 @@ ] }, { - "cell_type": "code", - "execution_count": 37, - "id": "dc862f09-81a6-4748-93d3-29a84ada4c8d", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
keynameorganization_typegtfs_dataset_keyntd_idntd_agency_info_key
16306bafde22fe614e0a6af2269625d8f6City of Menlo ParkCity/Townb76861f44c68f440d922c54ac1231d31NoneNone
321906a01d5cb664c5e898a95276912bfeTown of TruckeeCity/Town6fda78099793184fe08dd78945d188c09R02-91101receHP6eQInAo7sSP
331906a01d5cb664c5e898a95276912bfeTown of TruckeeCity/Town683da99e57acc29ac600a24cbd96feda9R02-91101receHP6eQInAo7sSP
34aad5befa7fcfce979f2113e373e48aa6Yosemite National ParkFederal Government31f91d59f493cbee9ae0eeb824f44d0eNoneNone
35aad5befa7fcfce979f2113e373e48aa6Yosemite National ParkFederal Government31152914d10e2d0977b8b2fabb167922NoneNone
\n", - "
" - ], - "text/plain": [ - " key name \\\n", - "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park \n", - "32 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", - "33 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", - "34 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", - "35 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", - "\n", - " organization_type gtfs_dataset_key ntd_id \\\n", - "16 City/Town b76861f44c68f440d922c54ac1231d31 None \n", - "32 City/Town 6fda78099793184fe08dd78945d188c0 9R02-91101 \n", - "33 City/Town 683da99e57acc29ac600a24cbd96feda 9R02-91101 \n", - "34 Federal Government 31f91d59f493cbee9ae0eeb824f44d0e None \n", - "35 Federal Government 31152914d10e2d0977b8b2fabb167922 None \n", - "\n", - " ntd_agency_info_key \n", - "16 None \n", - "32 receHP6eQInAo7sSP \n", - "33 receHP6eQInAo7sSP \n", - "34 None \n", - "35 None " - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "cell_type": "markdown", + "id": "2e7fb6c3-b176-4d1d-9a35-8a7f7bbb8a5c", + "metadata": {}, "source": [ - "dim_orgs_final.head(5)" + "## Loading Transit Stop Data and Merging Stop Data with Organization Information" ] }, { @@ -1210,6 +1175,7 @@ "metadata": {}, "outputs": [], "source": [ + "#Load stop data for a given analysis date from GCS and return as a GeoDataFrame.\n", "def prep_stops(analysis_date: str):\n", " stops = gpd.read_parquet(\n", " f\"{GCS_FILE_PATH}/rt_vs_schedule/stop_times_direction_{analysis_date}.parquet\",\n", @@ -1367,6 +1333,7 @@ }, "outputs": [], "source": [ + "# Merge stop data with the final organization dataset, keeping only stops with valid IDs and names.\n", "orgs_stops = stops.dropna(subset = ['stop_id', 'stop_name']).merge(\n", " dim_orgs_final,\n", " right_on = 'gtfs_dataset_key',\n", @@ -1422,6 +1389,14 @@ "orgs_stops.info()" ] }, + { + "cell_type": "markdown", + "id": "703d3c9d-c755-45d9-aa74-467b315549dc", + "metadata": {}, + "source": [ + "## Spatial Analysis: Stop Buffers and Census Tract Intersections" + ] + }, { "cell_type": "code", "execution_count": 44, @@ -1431,6 +1406,7 @@ }, "outputs": [], "source": [ + "# Reproject stops to match the CRS of California census tracts.\n", "orgs_stops = orgs_stops.to_crs(tracts_ca_acs.crs)" ] }, @@ -1443,6 +1419,7 @@ }, "outputs": [], "source": [ + "# Create a 500-meter buffer around each stop.\n", "orgs_stop_buffered = gpd.GeoDataFrame(\n", " orgs_stops.copy(), \n", " geometry=orgs_stops.geometry.buffer(500),\n", @@ -1459,6 +1436,7 @@ }, "outputs": [], "source": [ + "# Compute the intersection between buffered stops and census tracts.\n", "geometry_intersect = gpd.overlay(orgs_stop_buffered, tracts_ca_acs, how = 'intersection', keep_geom_type=True)" ] }, @@ -1471,6 +1449,7 @@ }, "outputs": [], "source": [ + "# Calculate the area of each intersected geometry in square meters.\n", "geometry_intersect['area_2'] = geometry_intersect.geometry.area" ] }, @@ -1781,6 +1760,14 @@ "geometry_intersect.head(2)" ] }, + { + "cell_type": "markdown", + "id": "92eb767d-b477-4e14-ab51-fac6f615e621", + "metadata": {}, + "source": [ + "## Adjusting Population and Demographic Metrics for Stop Service Areas" + ] + }, { "cell_type": "code", "execution_count": 49, @@ -1790,6 +1777,7 @@ }, "outputs": [], "source": [ + "# Adjust total population by the proportion of the tract area that intersects the stop buffer.\n", "geometry_intersect['adjusted_total_pop'] = geometry_intersect['total_pop'] * (geometry_intersect['area_2'] / geometry_intersect['area_m2'])" ] }, @@ -2110,14 +2098,19 @@ }, "outputs": [], "source": [ - "#Calculating weighted average for other metrics based on total population \n", + "# Calculate a population weight for each intersected geometry, which represents\n", + "# the fraction of the tract's total population within the stop buffer.\n", "geometry_intersect['pop_weight'] = geometry_intersect['adjusted_total_pop'] / geometry_intersect['total_pop']\n", "\n", + "\n", + "# Define the demographic and socioeconomic columns to be adjusted based on the population weight.\n", "cols_to_weight = ['poverty_pop', 'non_us_citizen', 'workers_with_no_car', \n", " 'households_with_no_cars', 'disabled_pop', 'public_asst_pop', \n", " 'inc_extremelylow', 'inc_verylow', 'inc_low', 'male_seniors', 'female_seniors',\n", " 'veteran_pop']\n", "\n", + "# Apply the population weight to each selected metric to create adjusted versions\n", + "# representing the portion of each population subgroup within the stop buffer.\n", "geometry_intersect[[f'{col}_adj' for col in cols_to_weight]] = (\n", " geometry_intersect[cols_to_weight].multiply(geometry_intersect['pop_weight'], axis=0)\n", ")" From 4ee79dc19cd3c289a97cef0fd1af2b1747ffb5fd Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Fri, 7 Nov 2025 00:12:56 +0000 Subject: [PATCH 4/4] comments addressed --- .../01_agency_grain_census.ipynb | 1218 +++++++++-------- 1 file changed, 633 insertions(+), 585 deletions(-) diff --git a/transit_provider_dashboard/01_agency_grain_census.ipynb b/transit_provider_dashboard/01_agency_grain_census.ipynb index d5e2fd02c..b388eb628 100644 --- a/transit_provider_dashboard/01_agency_grain_census.ipynb +++ b/transit_provider_dashboard/01_agency_grain_census.ipynb @@ -44,10 +44,6 @@ "execution_count": 2, "id": "0cdda776-857c-4e47-8ce8-940bfc49bb29", "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, "tags": [] }, "outputs": [ @@ -114,6 +110,7 @@ "import gcsfs\n", "import requests\n", "from pygris import tracts \n", + "from pygris.utils import erase_water\n", "from calitp_data_analysis.sql import get_engine\n", "from shared_utils import schedule_rt_utils \n", "from gtfs_key_ntd_crosswalk import filter_to_valid_dates\n", @@ -180,6 +177,7 @@ "# \"B16008_037E\", # Non US Citizen Population\n", "# \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n", "# \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n", + "# \"B19013_001E\", # Median household income in the past 12 months (2023 Inflation adjusted dollars)\n", "# \"B06010_004E\", \"B06010_005E\", \"B06010_006E\", # Population with extremely low income\n", "# \"B06010_007E\", \"B06010_008E\", # Population with very low income\n", "# \"B06010_009E\", \"B06010_010E\", # Population with low income \n", @@ -194,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 8, "id": "5b08cfbf-e7ed-4b4a-8e38-68e66a760086", "metadata": {}, "outputs": [], @@ -216,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "id": "a0808315-79b0-42ee-8653-222785770048", "metadata": { "tags": [] @@ -231,6 +229,7 @@ "# 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n", "# 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n", "# 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 'B01001_049E': 'female_85_and_over',\n", + "# 'B19013_001E': 'median_household_income',\n", "# 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', \n", "# 'B06010_007E': 'income_25000_34999', 'B06010_008E': 'income_35000_49999',\n", "# 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n", @@ -243,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "id": "e3ddd869-a418-4bbd-9349-92bdba346012", "metadata": { "tags": [] @@ -257,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 11, "id": "c0b7ce56-ca45-4a7d-8f45-ddab3e28606f", "metadata": { "tags": [] @@ -271,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 12, "id": "f7477af7-e3ee-44f5-b292-bd423280a0f2", "metadata": {}, "outputs": [], @@ -283,7 +282,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "id": "6c08efd5-8cf5-4333-bd86-94c515ecfb07", "metadata": { "tags": [] @@ -325,6 +324,7 @@ " female_75_to_79\n", " female_80_to_84\n", " female_85_and_over\n", + " median_household_income\n", " income_less_10000\n", " income_10000_14999\n", " income_15000_24999\n", @@ -362,6 +362,7 @@ " 85\n", " 105\n", " 107\n", + " 250001\n", " 188\n", " 75\n", " 134\n", @@ -397,6 +398,7 @@ " 96\n", " 34\n", " 13\n", + " 225880\n", " 75\n", " 70\n", " 89\n", @@ -432,6 +434,7 @@ " 158\n", " 13\n", " 142\n", + " 157731\n", " 383\n", " 201\n", " 300\n", @@ -467,6 +470,7 @@ " 43\n", " 23\n", " 30\n", + " 159612\n", " 187\n", " 105\n", " 287\n", @@ -502,6 +506,7 @@ " 50\n", " 60\n", " 203\n", + " 96250\n", " 256\n", " 91\n", " 244\n", @@ -546,43 +551,50 @@ "3 55 105 104 43 \n", "4 19 47 51 50 \n", "\n", - " female_80_to_84 female_85_and_over income_less_10000 income_10000_14999 \\\n", - "0 105 107 188 75 \n", - "1 34 13 75 70 \n", - "2 13 142 383 201 \n", - "3 23 30 187 105 \n", - "4 60 203 256 91 \n", - "\n", - " income_15000_24999 income_25000_34999 income_35000_49999 \\\n", - "0 134 157 87 \n", - "1 89 12 207 \n", - "2 300 251 400 \n", - "3 287 215 207 \n", - "4 244 213 385 \n", - "\n", - " income_50000_64999 income_65000_74999 workers_with_no_car \\\n", - "0 129 70 28 \n", - "1 77 32 92 \n", - "2 148 291 157 \n", - "3 178 87 134 \n", - "4 387 244 74 \n", - "\n", - " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n", - "0 85 3094 1316 129 06 \n", - "1 95 2093 861 38 06 \n", - "2 416 5727 2713 80 06 \n", - "3 204 4376 1803 88 06 \n", - "4 169 3822 1655 115 06 \n", - "\n", - " county tract GEOID county_name \n", - "0 001 400100 06001400100 Alameda \n", - "1 001 400200 06001400200 Alameda \n", - "2 001 400300 06001400300 Alameda \n", - "3 001 400400 06001400400 Alameda \n", - "4 001 400500 06001400500 Alameda " + " female_80_to_84 female_85_and_over median_household_income \\\n", + "0 105 107 250001 \n", + "1 34 13 225880 \n", + "2 13 142 157731 \n", + "3 23 30 159612 \n", + "4 60 203 96250 \n", + "\n", + " income_less_10000 income_10000_14999 income_15000_24999 \\\n", + "0 188 75 134 \n", + "1 75 70 89 \n", + "2 383 201 300 \n", + "3 187 105 287 \n", + "4 256 91 244 \n", + "\n", + " income_25000_34999 income_35000_49999 income_50000_64999 \\\n", + "0 157 87 129 \n", + "1 12 207 77 \n", + "2 251 400 148 \n", + "3 215 207 178 \n", + "4 213 385 387 \n", + "\n", + " income_65000_74999 workers_with_no_car households_with_no_cars \\\n", + "0 70 28 85 \n", + "1 32 92 95 \n", + "2 291 157 416 \n", + "3 87 134 204 \n", + "4 244 74 169 \n", + "\n", + " disabled_pop public_asst_pop veteran_pop state county tract \\\n", + "0 3094 1316 129 06 001 400100 \n", + "1 2093 861 38 06 001 400200 \n", + "2 5727 2713 80 06 001 400300 \n", + "3 4376 1803 88 06 001 400400 \n", + "4 3822 1655 115 06 001 400500 \n", + "\n", + " GEOID county_name \n", + "0 06001400100 Alameda \n", + "1 06001400200 Alameda \n", + "2 06001400300 Alameda \n", + "3 06001400400 Alameda \n", + "4 06001400500 Alameda " ] }, - "execution_count": 18, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -591,9 +603,27 @@ "census_data.head(5)" ] }, + { + "cell_type": "markdown", + "id": "eb8dd45d-7dac-4b3f-a6e6-4187b210d2a5", + "metadata": {}, + "source": [ + "“Low-income households” are those with household incomes at or below 80 percent of the statewide median income\"" + ] + }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, + "id": "ab510e4b-8b0e-463f-85cd-7baa0bc8b92f", + "metadata": {}, + "outputs": [], + "source": [ + "#Finding low income number: \"" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "id": "6647104f-db0a-4cb5-9908-c30d009e568e", "metadata": { "tags": [] @@ -608,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 16, "id": "5b637751-060f-46ce-9b8c-41c94dd07620", "metadata": { "tags": [] @@ -630,7 +660,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 17, "id": "55013195-b315-4889-abf6-951091d09e05", "metadata": {}, "outputs": [ @@ -645,12 +675,32 @@ "source": [ "#Retrieving Tract Geometries for California\n", "ca_tracts = tracts(state = \"CA\", cb = True,\n", - " year = 2023, cache = True)" + " year = 2023, cache = True)\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 18, + "id": "e444d31c-b572-4831-83ec-b705970da9c8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.11/site-packages/geopandas/geodataframe.py:2475: UserWarning: `keep_geom_type=True` in overlay resulted in 549 dropped geometries of different geometry types than df1 has. Set `keep_geom_type=False` to retain all geometries\n", + " return geopandas.overlay(\n" + ] + } + ], + "source": [ + "ca_tracts = erase_water(ca_tracts)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "id": "986d2bcf-3e7f-4746-abe4-627460f68406", "metadata": {}, "outputs": [], @@ -661,7 +711,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "id": "0381662d-9cfe-4efd-a971-47fda3567c85", "metadata": { "tags": [] @@ -674,7 +724,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "id": "0bd06747-c70c-4418-b136-eb34f8a1fd7d", "metadata": { "tags": [] @@ -703,7 +753,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "id": "1a89c9b5-4b7c-45c8-9d1e-632e1006b653", "metadata": { "tags": [] @@ -735,7 +785,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 23, "id": "4211d3bf-0742-4a8f-9fd3-5ab435168ddf", "metadata": { "tags": [] @@ -748,7 +798,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 24, "id": "fa8ce12e-e993-446c-a4b0-aafca6974275", "metadata": { "tags": [] @@ -762,7 +812,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 25, "id": "4916dd04-523b-4dda-aca7-599a90560b19", "metadata": { "tags": [] @@ -776,7 +826,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 26, "id": "ca225047-9991-4ab1-bbde-e5e720418782", "metadata": { "tags": [] @@ -928,7 +978,7 @@ "4 2098-12-31 15:59:59.999999 " ] }, - "execution_count": 29, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -947,7 +997,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "id": "ed1cd1ac-5a21-4a5d-9b1a-5522564896c1", "metadata": { "tags": [] @@ -967,7 +1017,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "a142623a-62cf-4411-9cf6-9d85f1b5ce22", "metadata": {}, "outputs": [], @@ -985,7 +1035,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 29, "id": "c66cf805-de18-4723-b46b-be7bf84fa2b1", "metadata": { "tags": [] @@ -998,7 +1048,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 30, "id": "01eccdb1-4bd8-4670-8f9c-3a2a0e94948d", "metadata": { "tags": [] @@ -1011,7 +1061,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 31, "id": "457b636a-705d-4085-91f8-db64d0142ef1", "metadata": { "tags": [] @@ -1079,7 +1129,7 @@ " aad5befa7fcfce979f2113e373e48aa6\n", " Yosemite National Park\n", " Federal Government\n", - " 31f91d59f493cbee9ae0eeb824f44d0e\n", + " 31152914d10e2d0977b8b2fabb167922\n", " None\n", " None\n", " \n", @@ -1088,7 +1138,7 @@ " aad5befa7fcfce979f2113e373e48aa6\n", " Yosemite National Park\n", " Federal Government\n", - " 31152914d10e2d0977b8b2fabb167922\n", + " 31f91d59f493cbee9ae0eeb824f44d0e\n", " None\n", " None\n", " \n", @@ -1108,8 +1158,8 @@ "16 City/Town b76861f44c68f440d922c54ac1231d31 None \n", "32 City/Town 6fda78099793184fe08dd78945d188c0 9R02-91101 \n", "33 City/Town 683da99e57acc29ac600a24cbd96feda 9R02-91101 \n", - "34 Federal Government 31f91d59f493cbee9ae0eeb824f44d0e None \n", - "35 Federal Government 31152914d10e2d0977b8b2fabb167922 None \n", + "34 Federal Government 31152914d10e2d0977b8b2fabb167922 None \n", + "35 Federal Government 31f91d59f493cbee9ae0eeb824f44d0e None \n", "\n", " ntd_agency_info_key \n", "16 None \n", @@ -1119,7 +1169,7 @@ "35 None " ] }, - "execution_count": 35, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1130,7 +1180,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 32, "id": "b7292811-58ec-4672-a2e3-b9c133e79723", "metadata": { "tags": [] @@ -1141,18 +1191,18 @@ "output_type": "stream", "text": [ "\n", - "Int64Index: 441 entries, 16 to 522\n", + "Int64Index: 444 entries, 16 to 525\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 key 441 non-null object\n", - " 1 name 441 non-null object\n", - " 2 organization_type 441 non-null object\n", - " 3 gtfs_dataset_key 441 non-null object\n", - " 4 ntd_id 417 non-null object\n", - " 5 ntd_agency_info_key 370 non-null object\n", + " 0 key 444 non-null object\n", + " 1 name 444 non-null object\n", + " 2 organization_type 444 non-null object\n", + " 3 gtfs_dataset_key 444 non-null object\n", + " 4 ntd_id 420 non-null object\n", + " 5 ntd_agency_info_key 373 non-null object\n", "dtypes: object(6)\n", - "memory usage: 24.1+ KB\n" + "memory usage: 24.3+ KB\n" ] } ], @@ -1170,7 +1220,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 33, "id": "b826d41d-720f-49ba-9b77-b7186dd95bf6", "metadata": {}, "outputs": [], @@ -1188,7 +1238,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 34, "id": "f1cd0c41-8577-4733-b67c-7d1fe08f9aa1", "metadata": { "tags": [] @@ -1220,7 +1270,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 35, "id": "160c660b-d683-4eb2-ae38-7f9d1a412cda", "metadata": { "tags": [] @@ -1315,7 +1365,7 @@ "4 Terminal 4 POINT (147272.606 -451317.665) " ] }, - "execution_count": 40, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1326,7 +1376,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 36, "id": "1f77a566-31f2-4f9e-9dda-d8d74b79487a", "metadata": { "tags": [] @@ -1344,7 +1394,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 37, "id": "7d76cc04-e782-46b8-a748-1c9e43e077df", "metadata": { "tags": [] @@ -1356,7 +1406,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 38, "id": "2ac56dc1-0c04-48e6-9114-04535ed00c76", "metadata": {}, "outputs": [ @@ -1399,7 +1449,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 39, "id": "c8563478-a79b-4aef-9dff-225472a79fda", "metadata": { "tags": [] @@ -1412,24 +1462,44 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 40, "id": "806da12f-9cf6-4306-81c0-dd057f31d8a9", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Create a 500-meter buffer around each stop.\n", + "# Create a 1/2 mile buffer around each stop.\n", "orgs_stop_buffered = gpd.GeoDataFrame(\n", " orgs_stops.copy(), \n", - " geometry=orgs_stops.geometry.buffer(500),\n", + " geometry=orgs_stops.geometry.buffer(804.672),\n", " crs=orgs_stops.crs\n", ")" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 41, + "id": "bb20127e-2092-4074-a710-5070e39806f7", + "metadata": {}, + "outputs": [], + "source": [ + "orgs_stop_dissolved = orgs_stop_buffered.dissolve(by='key')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "9de1e91a-aa0b-4eea-b6c2-05537cf21aea", + "metadata": {}, + "outputs": [], + "source": [ + "orgs_stop_dissolved = orgs_stop_dissolved.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, "id": "2e231b7d-d744-419d-b442-810b166fcd3d", "metadata": { "tags": [] @@ -1437,12 +1507,16 @@ "outputs": [], "source": [ "# Compute the intersection between buffered stops and census tracts.\n", - "geometry_intersect = gpd.overlay(orgs_stop_buffered, tracts_ca_acs, how = 'intersection', keep_geom_type=True)" + "geometry_intersect = gpd.overlay(\n", + " orgs_stop_dissolved, \n", + " tracts_ca_acs, \n", + " how = 'intersection', \n", + " keep_geom_type=True)" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 44, "id": "fa657799-42aa-46d7-a789-c4d86b226c13", "metadata": { "tags": [] @@ -1455,7 +1529,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 45, "id": "59ce3254-f021-40cd-869b-3c7a21f9002e", "metadata": { "tags": [] @@ -1482,11 +1556,11 @@ " \n", " \n", " \n", + " key\n", " schedule_gtfs_dataset_key\n", " feed_key\n", " stop_id\n", " stop_name\n", - " key\n", " name\n", " organization_type\n", " gtfs_dataset_key\n", @@ -1520,6 +1594,7 @@ " female_75_to_79\n", " female_80_to_84\n", " female_85_and_over\n", + " median_household_income\n", " income_less_10000\n", " income_10000_14999\n", " income_15000_24999\n", @@ -1549,209 +1624,211 @@ " \n", " \n", " 0\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 49f469dcf8712b562e3c970aa1b89731\n", - " 5961046\n", - " Echo Park Ave & Donaldson St (Southbound)\n", - " 123beaa13b8cfbd650a48cdfd4647088\n", - " City of Los Angeles\n", - " City/Town\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 90147\n", - " reccTizvO7pe1k1CS\n", + " 0119506e03bed4c4d8b094ab1177cd78\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " d9e1e77d0754b712fc608741ae3836f5\n", + " bSAT\n", + " Santa Maria-Ihop Bus Stop\n", + " San Joaquin Joint Powers Authority\n", + " Independent Agency\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " None\n", + " None\n", " 06\n", - " 037\n", - " 197410\n", - " 1400000US06037197410\n", - " 06037197410\n", - " 1974.10\n", - " Census Tract 1974.10\n", + " 001\n", + " 450741\n", + " 1400000US06001450741\n", + " 06001450741\n", + " 4507.41\n", + " Census Tract 4507.41\n", " CA\n", - " Los Angeles County\n", + " Alameda County\n", " California\n", " CT\n", - " 1393312\n", - " 0\n", - " 3805\n", - " 239\n", - " 429\n", - " 25\n", - " 25\n", - " 53\n", - " 75\n", - " 8\n", - " 32\n", + " 2182370\n", " 0\n", - " 49\n", - " 71\n", - " 77\n", - " 35\n", - " 27\n", - " 288\n", - " 138\n", - " 288\n", - " 226\n", - " 426\n", - " 402\n", - " 151\n", - " 29\n", - " 51\n", - " 3805\n", - " 1701\n", + " 5469\n", + " 937\n", + " 750\n", + " 37\n", + " 67\n", + " 73\n", + " 117\n", + " 60\n", + " 41\n", + " 43\n", + " 165\n", + " 182\n", " 95\n", + " 74\n", + " 175\n", + " 154609\n", + " 657\n", + " 516\n", + " 245\n", + " 252\n", + " 260\n", + " 63\n", + " 77\n", + " 55\n", + " 228\n", + " 5301\n", + " 1941\n", + " 82\n", " 06\n", - " 037\n", - " 197410\n", - " Los Angeles\n", - " 714\n", - " 652\n", - " 553\n", - " 218\n", - " 259\n", - " 1.406431e+06\n", - " POLYGON ((162046.722 -434622.218, 162034.799 -...\n", - " 725281.427873\n", + " 001\n", + " 450741\n", + " Alameda\n", + " 1418\n", + " 512\n", + " 140\n", + " 395\n", + " 734\n", + " 2.191555e+06\n", + " POLYGON ((-165021.160 -37973.000, -165009.573 ...\n", + " 116531.265316\n", " \n", " \n", " 1\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 49f469dcf8712b562e3c970aa1b89731\n", - " 5797231\n", - " Echo Park Ave & Baxter St (Southbound) (4052)\n", - " 123beaa13b8cfbd650a48cdfd4647088\n", - " City of Los Angeles\n", - " City/Town\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 90147\n", - " reccTizvO7pe1k1CS\n", + " 0119506e03bed4c4d8b094ab1177cd78\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " d9e1e77d0754b712fc608741ae3836f5\n", + " bSAT\n", + " Santa Maria-Ihop Bus Stop\n", + " San Joaquin Joint Powers Authority\n", + " Independent Agency\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " None\n", + " None\n", " 06\n", - " 037\n", - " 197410\n", - " 1400000US06037197410\n", - " 06037197410\n", - " 1974.10\n", - " Census Tract 1974.10\n", + " 001\n", + " 450300\n", + " 1400000US06001450300\n", + " 06001450300\n", + " 4503\n", + " Census Tract 4503\n", " CA\n", - " Los Angeles County\n", + " Alameda County\n", " California\n", " CT\n", - " 1393312\n", + " 3133641\n", " 0\n", - " 3805\n", - " 239\n", - " 429\n", - " 25\n", - " 25\n", + " 4967\n", + " 67\n", + " 582\n", + " 48\n", " 53\n", - " 75\n", - " 8\n", - " 32\n", - " 0\n", " 49\n", - " 71\n", - " 77\n", - " 35\n", - " 27\n", - " 288\n", - " 138\n", - " 288\n", - " 226\n", - " 426\n", - " 402\n", - " 151\n", - " 29\n", + " 70\n", + " 72\n", + " 32\n", + " 69\n", " 51\n", - " 3805\n", - " 1701\n", - " 95\n", + " 68\n", + " 101\n", + " 113\n", + " 75\n", + " 147875\n", + " 363\n", + " 142\n", + " 221\n", + " 197\n", + " 428\n", + " 330\n", + " 196\n", + " 64\n", + " 84\n", + " 4967\n", + " 1849\n", + " 179\n", " 06\n", - " 037\n", - " 197410\n", - " Los Angeles\n", - " 714\n", - " 652\n", - " 553\n", - " 218\n", - " 259\n", - " 1.406431e+06\n", - " POLYGON ((161975.618 -434794.073, 161968.418 -...\n", - " 590112.226428\n", + " 001\n", + " 450300\n", + " Alameda\n", + " 726\n", + " 625\n", + " 526\n", + " 324\n", + " 477\n", + " 3.149375e+06\n", + " POLYGON ((-167866.792 -33259.734, -167870.667 ...\n", + " 212536.639123\n", " \n", " \n", "\n", "" ], "text/plain": [ - " schedule_gtfs_dataset_key feed_key \\\n", - "0 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", - "1 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + " key schedule_gtfs_dataset_key \\\n", + "0 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", + "1 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", "\n", - " stop_id stop_name \\\n", - "0 5961046 Echo Park Ave & Donaldson St (Southbound) \n", - "1 5797231 Echo Park Ave & Baxter St (Southbound) (4052) \n", + " feed_key stop_id stop_name \\\n", + "0 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", + "1 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", "\n", - " key name organization_type \\\n", - "0 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", - "1 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", + " name organization_type \\\n", + "0 San Joaquin Joint Powers Authority Independent Agency \n", + "1 San Joaquin Joint Powers Authority Independent Agency \n", "\n", " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n", - "0 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", - "1 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", + "0 524ea6209600e9a2de34a02cf9068729 None None 06 \n", + "1 524ea6209600e9a2de34a02cf9068729 None None 06 \n", "\n", " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n", - "0 037 197410 1400000US06037197410 06037197410 1974.10 \n", - "1 037 197410 1400000US06037197410 06037197410 1974.10 \n", + "0 001 450741 1400000US06001450741 06001450741 4507.41 \n", + "1 001 450300 1400000US06001450300 06001450300 4503 \n", "\n", - " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", - "0 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", - "1 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", + " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", + "0 Census Tract 4507.41 CA Alameda County California CT 2182370 \n", + "1 Census Tract 4503 CA Alameda County California CT 3133641 \n", "\n", " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n", - "0 0 3805 239 429 25 \n", - "1 0 3805 239 429 25 \n", + "0 0 5469 937 750 37 \n", + "1 0 4967 67 582 48 \n", "\n", " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n", - "0 25 53 75 8 \n", - "1 25 53 75 8 \n", + "0 67 73 117 60 \n", + "1 53 49 70 72 \n", "\n", " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n", - "0 32 0 49 71 \n", - "1 32 0 49 71 \n", + "0 41 43 165 182 \n", + "1 32 69 51 68 \n", "\n", - " female_75_to_79 female_80_to_84 female_85_and_over income_less_10000 \\\n", - "0 77 35 27 288 \n", - "1 77 35 27 288 \n", + " female_75_to_79 female_80_to_84 female_85_and_over \\\n", + "0 95 74 175 \n", + "1 101 113 75 \n", "\n", - " income_10000_14999 income_15000_24999 income_25000_34999 \\\n", - "0 138 288 226 \n", - "1 138 288 226 \n", + " median_household_income income_less_10000 income_10000_14999 \\\n", + "0 154609 657 516 \n", + "1 147875 363 142 \n", "\n", - " income_35000_49999 income_50000_64999 income_65000_74999 \\\n", - "0 426 402 151 \n", - "1 426 402 151 \n", + " income_15000_24999 income_25000_34999 income_35000_49999 \\\n", + "0 245 252 260 \n", + "1 221 197 428 \n", "\n", - " workers_with_no_car households_with_no_cars disabled_pop \\\n", - "0 29 51 3805 \n", - "1 29 51 3805 \n", + " income_50000_64999 income_65000_74999 workers_with_no_car \\\n", + "0 63 77 55 \n", + "1 330 196 64 \n", "\n", - " public_asst_pop veteran_pop state county tract county_name \\\n", - "0 1701 95 06 037 197410 Los Angeles \n", - "1 1701 95 06 037 197410 Los Angeles \n", + " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n", + "0 228 5301 1941 82 06 \n", + "1 84 4967 1849 179 06 \n", "\n", - " inc_extremelylow inc_verylow inc_low male_seniors female_seniors \\\n", - "0 714 652 553 218 259 \n", - "1 714 652 553 218 259 \n", + " county tract county_name inc_extremelylow inc_verylow inc_low \\\n", + "0 001 450741 Alameda 1418 512 140 \n", + "1 001 450300 Alameda 726 625 526 \n", "\n", - " area_m2 geometry \\\n", - "0 1.406431e+06 POLYGON ((162046.722 -434622.218, 162034.799 -... \n", - "1 1.406431e+06 POLYGON ((161975.618 -434794.073, 161968.418 -... \n", + " male_seniors female_seniors area_m2 \\\n", + "0 395 734 2.191555e+06 \n", + "1 324 477 3.149375e+06 \n", "\n", - " area_2 \n", - "0 725281.427873 \n", - "1 590112.226428 " + " geometry area_2 \n", + "0 POLYGON ((-165021.160 -37973.000, -165009.573 ... 116531.265316 \n", + "1 POLYGON ((-167866.792 -33259.734, -167870.667 ... 212536.639123 " ] }, - "execution_count": 48, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1770,7 +1847,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 46, "id": "ed44c28b-0eb9-4d7c-b9a5-3739ea9f5133", "metadata": { "tags": [] @@ -1778,12 +1855,13 @@ "outputs": [], "source": [ "# Adjust total population by the proportion of the tract area that intersects the stop buffer.\n", - "geometry_intersect['adjusted_total_pop'] = geometry_intersect['total_pop'] * (geometry_intersect['area_2'] / geometry_intersect['area_m2'])" + "# Calculate the proportion of each tract's area that intersects the stop buffer\n", + "geometry_intersect['area_ratio'] = geometry_intersect['area_2'] / geometry_intersect['area_m2']" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 47, "id": "8653de80-582e-435a-9642-69ac80089dba", "metadata": {}, "outputs": [ @@ -1808,11 +1886,11 @@ " \n", " \n", " \n", + " key\n", " schedule_gtfs_dataset_key\n", " feed_key\n", " stop_id\n", " stop_name\n", - " key\n", " name\n", " organization_type\n", " gtfs_dataset_key\n", @@ -1846,6 +1924,7 @@ " female_75_to_79\n", " female_80_to_84\n", " female_85_and_over\n", + " median_household_income\n", " income_less_10000\n", " income_10000_14999\n", " income_15000_24999\n", @@ -1870,217 +1949,223 @@ " area_m2\n", " geometry\n", " area_2\n", - " adjusted_total_pop\n", + " area_ratio\n", " \n", " \n", " \n", " \n", " 0\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 49f469dcf8712b562e3c970aa1b89731\n", - " 5961046\n", - " Echo Park Ave & Donaldson St (Southbound)\n", - " 123beaa13b8cfbd650a48cdfd4647088\n", - " City of Los Angeles\n", - " City/Town\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 90147\n", - " reccTizvO7pe1k1CS\n", + " 0119506e03bed4c4d8b094ab1177cd78\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " d9e1e77d0754b712fc608741ae3836f5\n", + " bSAT\n", + " Santa Maria-Ihop Bus Stop\n", + " San Joaquin Joint Powers Authority\n", + " Independent Agency\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " None\n", + " None\n", " 06\n", - " 037\n", - " 197410\n", - " 1400000US06037197410\n", - " 06037197410\n", - " 1974.10\n", - " Census Tract 1974.10\n", + " 001\n", + " 450741\n", + " 1400000US06001450741\n", + " 06001450741\n", + " 4507.41\n", + " Census Tract 4507.41\n", " CA\n", - " Los Angeles County\n", + " Alameda County\n", " California\n", " CT\n", - " 1393312\n", - " 0\n", - " 3805\n", - " 239\n", - " 429\n", - " 25\n", - " 25\n", - " 53\n", - " 75\n", - " 8\n", - " 32\n", + " 2182370\n", " 0\n", - " 49\n", - " 71\n", - " 77\n", - " 35\n", - " 27\n", - " 288\n", - " 138\n", - " 288\n", - " 226\n", - " 426\n", - " 402\n", - " 151\n", - " 29\n", - " 51\n", - " 3805\n", - " 1701\n", + " 5469\n", + " 937\n", + " 750\n", + " 37\n", + " 67\n", + " 73\n", + " 117\n", + " 60\n", + " 41\n", + " 43\n", + " 165\n", + " 182\n", " 95\n", + " 74\n", + " 175\n", + " 154609\n", + " 657\n", + " 516\n", + " 245\n", + " 252\n", + " 260\n", + " 63\n", + " 77\n", + " 55\n", + " 228\n", + " 5301\n", + " 1941\n", + " 82\n", " 06\n", - " 037\n", - " 197410\n", - " Los Angeles\n", - " 714\n", - " 652\n", - " 553\n", - " 218\n", - " 259\n", - " 1.406431e+06\n", - " POLYGON ((162046.722 -434622.218, 162034.799 -...\n", - " 725281.427873\n", - " 1962.197505\n", + " 001\n", + " 450741\n", + " Alameda\n", + " 1418\n", + " 512\n", + " 140\n", + " 395\n", + " 734\n", + " 2.191555e+06\n", + " POLYGON ((-165021.160 -37973.000, -165009.573 ...\n", + " 116531.265316\n", + " 0.053173\n", " \n", " \n", " 1\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 49f469dcf8712b562e3c970aa1b89731\n", - " 5797231\n", - " Echo Park Ave & Baxter St (Southbound) (4052)\n", - " 123beaa13b8cfbd650a48cdfd4647088\n", - " City of Los Angeles\n", - " City/Town\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 90147\n", - " reccTizvO7pe1k1CS\n", + " 0119506e03bed4c4d8b094ab1177cd78\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " d9e1e77d0754b712fc608741ae3836f5\n", + " bSAT\n", + " Santa Maria-Ihop Bus Stop\n", + " San Joaquin Joint Powers Authority\n", + " Independent Agency\n", + " 524ea6209600e9a2de34a02cf9068729\n", + " None\n", + " None\n", " 06\n", - " 037\n", - " 197410\n", - " 1400000US06037197410\n", - " 06037197410\n", - " 1974.10\n", - " Census Tract 1974.10\n", + " 001\n", + " 450300\n", + " 1400000US06001450300\n", + " 06001450300\n", + " 4503\n", + " Census Tract 4503\n", " CA\n", - " Los Angeles County\n", + " Alameda County\n", " California\n", " CT\n", - " 1393312\n", + " 3133641\n", " 0\n", - " 3805\n", - " 239\n", - " 429\n", - " 25\n", - " 25\n", + " 4967\n", + " 67\n", + " 582\n", + " 48\n", " 53\n", - " 75\n", - " 8\n", - " 32\n", - " 0\n", " 49\n", - " 71\n", - " 77\n", - " 35\n", - " 27\n", - " 288\n", - " 138\n", - " 288\n", - " 226\n", - " 426\n", - " 402\n", - " 151\n", - " 29\n", + " 70\n", + " 72\n", + " 32\n", + " 69\n", " 51\n", - " 3805\n", - " 1701\n", - " 95\n", + " 68\n", + " 101\n", + " 113\n", + " 75\n", + " 147875\n", + " 363\n", + " 142\n", + " 221\n", + " 197\n", + " 428\n", + " 330\n", + " 196\n", + " 64\n", + " 84\n", + " 4967\n", + " 1849\n", + " 179\n", " 06\n", - " 037\n", - " 197410\n", - " Los Angeles\n", - " 714\n", - " 652\n", - " 553\n", - " 218\n", - " 259\n", - " 1.406431e+06\n", - " POLYGON ((161975.618 -434794.073, 161968.418 -...\n", - " 590112.226428\n", - " 1596.506809\n", + " 001\n", + " 450300\n", + " Alameda\n", + " 726\n", + " 625\n", + " 526\n", + " 324\n", + " 477\n", + " 3.149375e+06\n", + " POLYGON ((-167866.792 -33259.734, -167870.667 ...\n", + " 212536.639123\n", + " 0.067485\n", " \n", " \n", "\n", "" ], "text/plain": [ - " schedule_gtfs_dataset_key feed_key \\\n", - "0 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", - "1 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", + " key schedule_gtfs_dataset_key \\\n", + "0 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", + "1 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", "\n", - " stop_id stop_name \\\n", - "0 5961046 Echo Park Ave & Donaldson St (Southbound) \n", - "1 5797231 Echo Park Ave & Baxter St (Southbound) (4052) \n", + " feed_key stop_id stop_name \\\n", + "0 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", + "1 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", "\n", - " key name organization_type \\\n", - "0 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", - "1 123beaa13b8cfbd650a48cdfd4647088 City of Los Angeles City/Town \n", + " name organization_type \\\n", + "0 San Joaquin Joint Powers Authority Independent Agency \n", + "1 San Joaquin Joint Powers Authority Independent Agency \n", "\n", " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n", - "0 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", - "1 cc53a0dbf5df90e3009b9cb5d89d80ba 90147 reccTizvO7pe1k1CS 06 \n", + "0 524ea6209600e9a2de34a02cf9068729 None None 06 \n", + "1 524ea6209600e9a2de34a02cf9068729 None None 06 \n", "\n", " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n", - "0 037 197410 1400000US06037197410 06037197410 1974.10 \n", - "1 037 197410 1400000US06037197410 06037197410 1974.10 \n", + "0 001 450741 1400000US06001450741 06001450741 4507.41 \n", + "1 001 450300 1400000US06001450300 06001450300 4503 \n", "\n", - " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", - "0 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", - "1 Census Tract 1974.10 CA Los Angeles County California CT 1393312 \n", + " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", + "0 Census Tract 4507.41 CA Alameda County California CT 2182370 \n", + "1 Census Tract 4503 CA Alameda County California CT 3133641 \n", "\n", " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n", - "0 0 3805 239 429 25 \n", - "1 0 3805 239 429 25 \n", + "0 0 5469 937 750 37 \n", + "1 0 4967 67 582 48 \n", "\n", " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n", - "0 25 53 75 8 \n", - "1 25 53 75 8 \n", + "0 67 73 117 60 \n", + "1 53 49 70 72 \n", "\n", " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n", - "0 32 0 49 71 \n", - "1 32 0 49 71 \n", + "0 41 43 165 182 \n", + "1 32 69 51 68 \n", "\n", - " female_75_to_79 female_80_to_84 female_85_and_over income_less_10000 \\\n", - "0 77 35 27 288 \n", - "1 77 35 27 288 \n", + " female_75_to_79 female_80_to_84 female_85_and_over \\\n", + "0 95 74 175 \n", + "1 101 113 75 \n", "\n", - " income_10000_14999 income_15000_24999 income_25000_34999 \\\n", - "0 138 288 226 \n", - "1 138 288 226 \n", + " median_household_income income_less_10000 income_10000_14999 \\\n", + "0 154609 657 516 \n", + "1 147875 363 142 \n", "\n", - " income_35000_49999 income_50000_64999 income_65000_74999 \\\n", - "0 426 402 151 \n", - "1 426 402 151 \n", + " income_15000_24999 income_25000_34999 income_35000_49999 \\\n", + "0 245 252 260 \n", + "1 221 197 428 \n", "\n", - " workers_with_no_car households_with_no_cars disabled_pop \\\n", - "0 29 51 3805 \n", - "1 29 51 3805 \n", + " income_50000_64999 income_65000_74999 workers_with_no_car \\\n", + "0 63 77 55 \n", + "1 330 196 64 \n", "\n", - " public_asst_pop veteran_pop state county tract county_name \\\n", - "0 1701 95 06 037 197410 Los Angeles \n", - "1 1701 95 06 037 197410 Los Angeles \n", + " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n", + "0 228 5301 1941 82 06 \n", + "1 84 4967 1849 179 06 \n", "\n", - " inc_extremelylow inc_verylow inc_low male_seniors female_seniors \\\n", - "0 714 652 553 218 259 \n", - "1 714 652 553 218 259 \n", + " county tract county_name inc_extremelylow inc_verylow inc_low \\\n", + "0 001 450741 Alameda 1418 512 140 \n", + "1 001 450300 Alameda 726 625 526 \n", "\n", - " area_m2 geometry \\\n", - "0 1.406431e+06 POLYGON ((162046.722 -434622.218, 162034.799 -... \n", - "1 1.406431e+06 POLYGON ((161975.618 -434794.073, 161968.418 -... \n", + " male_seniors female_seniors area_m2 \\\n", + "0 395 734 2.191555e+06 \n", + "1 324 477 3.149375e+06 \n", "\n", - " area_2 adjusted_total_pop \n", - "0 725281.427873 1962.197505 \n", - "1 590112.226428 1596.506809 " + " geometry area_2 \\\n", + "0 POLYGON ((-165021.160 -37973.000, -165009.573 ... 116531.265316 \n", + "1 POLYGON ((-167866.792 -33259.734, -167870.667 ... 212536.639123 \n", + "\n", + " area_ratio \n", + "0 0.053173 \n", + "1 0.067485 " ] }, - "execution_count": 50, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -2091,192 +2176,35 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 48, "id": "597c67ad-9913-435f-8c9b-ddc1a9fbd297", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Calculate a population weight for each intersected geometry, which represents\n", - "# the fraction of the tract's total population within the stop buffer.\n", - "geometry_intersect['pop_weight'] = geometry_intersect['adjusted_total_pop'] / geometry_intersect['total_pop']\n", + "# Define demographic and socioeconomic columns to be adjusted by area ratio\n", + "cols_to_weight = [\n", + " 'total_pop', 'poverty_pop', 'non_us_citizen', 'workers_with_no_car', \n", + " 'households_with_no_cars', 'disabled_pop', 'public_asst_pop', \n", + " 'inc_extremelylow', 'inc_verylow', 'inc_low', \n", + " 'male_seniors', 'female_seniors', 'veteran_pop'\n", + "]\n", "\n", - "\n", - "# Define the demographic and socioeconomic columns to be adjusted based on the population weight.\n", - "cols_to_weight = ['poverty_pop', 'non_us_citizen', 'workers_with_no_car', \n", - " 'households_with_no_cars', 'disabled_pop', 'public_asst_pop', \n", - " 'inc_extremelylow', 'inc_verylow', 'inc_low', 'male_seniors', 'female_seniors',\n", - " 'veteran_pop']\n", - "\n", - "# Apply the population weight to each selected metric to create adjusted versions\n", - "# representing the portion of each population subgroup within the stop buffer.\n", + "# Apply area ratio to create adjusted metrics\n", "geometry_intersect[[f'{col}_adj' for col in cols_to_weight]] = (\n", - " geometry_intersect[cols_to_weight].multiply(geometry_intersect['pop_weight'], axis=0)\n", + " geometry_intersect[cols_to_weight].multiply(geometry_intersect['area_ratio'], axis=0)\n", ")" ] }, { "cell_type": "code", - "execution_count": 52, - "id": "a2d96744-d297-483e-a19a-892a194be041", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameorganization_typentd_idntd_agency_info_keystop_idstop_nameschedule_gtfs_dataset_keyfeed_keyGEOIDFQgeometryarea_2adjusted_total_poppop_weightpoverty_pop_adjnon_us_citizen_adjworkers_with_no_car_adjhouseholds_with_no_cars_adjdisabled_pop_adjpublic_asst_pop_adjinc_extremelylow_adjinc_verylow_adjinc_low_adjmale_seniors_adjfemale_seniors_adjveteran_pop_adj
0City of Los AngelesCity/Town90147reccTizvO7pe1k1CS5961046Echo Park Ave & Donaldson St (Southbound)cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897311400000US06037197410POLYGON ((162046.722 -434622.218, 162034.799 -...725281.4278731962.1975050.515689123.249725221.23067814.95498826.3001511962.197505877.187373368.202107336.229375285.176142112.420251133.56351048.990476
1City of Los AngelesCity/Town90147reccTizvO7pe1k1CS5797231Echo Park Ave & Baxter St (Southbound) (4052)cc53a0dbf5df90e3009b9cb5d89d80ba49f469dcf8712b562e3c970aa1b897311400000US06037197410POLYGON ((161975.618 -434794.073, 161968.418 -...590112.2264281596.5068090.419581100.279928180.00037312.16785721.3986461596.506809713.707775299.581041273.567001232.02845391.468721108.67155439.860223
\n", - "
" - ], - "text/plain": [ - " name organization_type ntd_id ntd_agency_info_key stop_id \\\n", - "0 City of Los Angeles City/Town 90147 reccTizvO7pe1k1CS 5961046 \n", - "1 City of Los Angeles City/Town 90147 reccTizvO7pe1k1CS 5797231 \n", - "\n", - " stop_name \\\n", - "0 Echo Park Ave & Donaldson St (Southbound) \n", - "1 Echo Park Ave & Baxter St (Southbound) (4052) \n", - "\n", - " schedule_gtfs_dataset_key feed_key \\\n", - "0 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", - "1 cc53a0dbf5df90e3009b9cb5d89d80ba 49f469dcf8712b562e3c970aa1b89731 \n", - "\n", - " GEOIDFQ geometry \\\n", - "0 1400000US06037197410 POLYGON ((162046.722 -434622.218, 162034.799 -... \n", - "1 1400000US06037197410 POLYGON ((161975.618 -434794.073, 161968.418 -... \n", - "\n", - " area_2 adjusted_total_pop pop_weight poverty_pop_adj \\\n", - "0 725281.427873 1962.197505 0.515689 123.249725 \n", - "1 590112.226428 1596.506809 0.419581 100.279928 \n", - "\n", - " non_us_citizen_adj workers_with_no_car_adj households_with_no_cars_adj \\\n", - "0 221.230678 14.954988 26.300151 \n", - "1 180.000373 12.167857 21.398646 \n", - "\n", - " disabled_pop_adj public_asst_pop_adj inc_extremelylow_adj \\\n", - "0 1962.197505 877.187373 368.202107 \n", - "1 1596.506809 713.707775 299.581041 \n", - "\n", - " inc_verylow_adj inc_low_adj male_seniors_adj female_seniors_adj \\\n", - "0 336.229375 285.176142 112.420251 133.563510 \n", - "1 273.567001 232.028453 91.468721 108.671554 \n", - "\n", - " veteran_pop_adj \n", - "0 48.990476 \n", - "1 39.860223 " - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "id": "e1e4d484-1748-4027-83b2-bbb8dc441ed9", + "metadata": {}, + "outputs": [], "source": [ + "# Stop level demography data \n", "filtered_final_data = geometry_intersect[['name', 'organization_type', 'ntd_id', 'ntd_agency_info_key', 'stop_id', 'stop_name', 'schedule_gtfs_dataset_key', \n", " 'feed_key', 'GEOIDFQ', 'geometry', 'area_2',\t'adjusted_total_pop', 'pop_weight',\t'poverty_pop_adj',\t\n", " 'non_us_citizen_adj',\t'workers_with_no_car_adj',\t'households_with_no_cars_adj',\t'disabled_pop_adj',\t\n", @@ -2286,13 +2214,133 @@ "filtered_final_data.head(2)" ] }, + { + "cell_type": "markdown", + "id": "6119ef50-cc16-43b9-a2aa-28d36a4428f8", + "metadata": {}, + "source": [ + "## Agency Level Demography Data " + ] + }, { "cell_type": "code", - "execution_count": null, - "id": "d6cc3be8-b52b-4ec0-97c6-1fc3c7e108d9", + "execution_count": 58, + "id": "971c8504-f654-45f9-b880-613b71a93c88", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "group_key = ['key', 'name']\n", + "\n", + "# Identify adjusted demographic columns\n", + "adj_cols = [col for col in geometry_intersect.columns if col.endswith('_adj')]\n", + "\n", + "# Non-aggregated attributes that are unique per agency\n", + "extra_cols = [\n", + " 'organization_type', 'ntd_id', 'ntd_agency_info_key',\n", + " 'schedule_gtfs_dataset_key', 'feed_key'\n", + "]\n", + "\n", + "# Build aggregation dictionary\n", + "agg_dict = {col: 'sum' for col in adj_cols}\n", + "agg_dict.update({col: 'first' for col in extra_cols})\n", + "\n", + "# Aggregate by agency\n", + "agency_summary = (\n", + " geometry_intersect\n", + " .groupby(group_key, as_index=False)\n", + " .agg(agg_dict)\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "a7fd704c-6a7b-4573-85d1-8d27bbaf43a7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 84 entries, 0 to 83\n", + "Data columns (total 20 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 key 84 non-null object \n", + " 1 name 84 non-null object \n", + " 2 total_pop_adj 84 non-null float64\n", + " 3 poverty_pop_adj 84 non-null float64\n", + " 4 non_us_citizen_adj 84 non-null float64\n", + " 5 workers_with_no_car_adj 84 non-null float64\n", + " 6 households_with_no_cars_adj 84 non-null float64\n", + " 7 disabled_pop_adj 84 non-null float64\n", + " 8 public_asst_pop_adj 84 non-null float64\n", + " 9 inc_extremelylow_adj 84 non-null float64\n", + " 10 inc_verylow_adj 84 non-null float64\n", + " 11 inc_low_adj 84 non-null float64\n", + " 12 male_seniors_adj 84 non-null float64\n", + " 13 female_seniors_adj 84 non-null float64\n", + " 14 veteran_pop_adj 84 non-null float64\n", + " 15 organization_type 84 non-null object \n", + " 16 ntd_id 77 non-null object \n", + " 17 ntd_agency_info_key 68 non-null object \n", + " 18 schedule_gtfs_dataset_key 84 non-null object \n", + " 19 feed_key 84 non-null object \n", + "dtypes: float64(13), object(7)\n", + "memory usage: 13.3+ KB\n" + ] + } + ], + "source": [ + "agency_summary.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "366986a8-b17b-4e19-b7c2-82608e14d250", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def export_gdf(gdf, filename: str):\n", + " \n", + " gdf.to_parquet(f\"{filename}.parquet\")\n", + " \n", + " fs.put(\n", + " f\"{filename}.parquet\",\n", + " f\"{GCS_FILE_PATH}/transit_provider_dashboard/{filename}.parquet\",\n", + " token = credentials.token\n", + " )\n", + " \n", + " os.remove(f\"{filename}.parquet\")\n", + " print(f\"saved {GCS_FILE_PATH}/transit_provider_dashboard/{filename}.parquet\")\n", + " \n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "d6cc3be8-b52b-4ec0-97c6-1fc3c7e108d9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "saved gs://calitp-analytics-data/data-analyses/transit_provider_dashboard/agency_stop_level_census_data.parquet.parquet\n" + ] + } + ], + "source": [ + "# Store data in warehouse\n", + "export_gdf(agency_summary, \"agency_level_census_data.parquet\")" + ] } ], "metadata": {