diff --git a/transit_provider_dashboard/01_agency_grain_census.ipynb b/transit_provider_dashboard/01_agency_grain_census.ipynb index c4b990934..b388eb628 100644 --- a/transit_provider_dashboard/01_agency_grain_census.ipynb +++ b/transit_provider_dashboard/01_agency_grain_census.ipynb @@ -1,26 +1,108 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "22ae14b5-df6a-4892-98c5-2176b3fae594", + "metadata": {}, + "source": [ + "# Agency-Grain Census Data Summary Table\n", + "- **Purpose:** To define and quantify the service population of each Cal-ITP partner transit agency using census and related demographic data.\n", + "- **Goal:** Provide agency-level summaries that describe the characteristics of populations served, such as size, demographics, income, and travel behavior, to illustrate the reach and impact of Cal-ITP services.\n", + "- **Use:** Support data-driven storytelling and performance reporting by supplying key statistics for communications about the benefits, adoption, and equity potential of Cal-ITP initiatives (e.g., open-loop payment systems).\n", + "\n", + "- **Steps:**\n", + " - Querying ACS data via the Census API and upload results to a GCS bucket for later usage.\n", + " - Census Tract Geometry Processing\n", + " - Querying Organization Data from the Data Warehouse and Storing in GCS\n", + " - Querying Bridge Organization GTFS Datasets and Merging with Dim Organizations Table\n", + " - Loading Transit Stop Data and Merging Stop Data with Organization Information\n", + " - Spatial Analysis: Stop Buffers and Census Tract Intersections\n", + " - Adjusting Population and Demographic Metrics for Stop Service Areas\n" + ] + }, { "cell_type": "code", "execution_count": 1, + "id": "873ef61b-098a-49da-9c28-2667af4ffd64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: shared_utils in /opt/conda/lib/python3.11/site-packages (4.2)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install shared_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "id": "0cdda776-857c-4e47-8ce8-940bfc49bb29", "metadata": { "tags": [] }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pygris in /opt/conda/lib/python3.11/site-packages (0.2.0)\n", + "Requirement already satisfied: geopandas>=0.9 in /opt/conda/lib/python3.11/site-packages (from pygris) (0.14.4)\n", + "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from pygris) (2.32.5)\n", + "Requirement already satisfied: appdirs in /opt/conda/lib/python3.11/site-packages (from pygris) (1.4.4)\n", + "Requirement already satisfied: fiona>=1.8.21 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.10.1)\n", + "Requirement already satisfied: numpy>=1.22 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.26.4)\n", + "Requirement already satisfied: packaging in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (25.0)\n", + "Requirement already satisfied: pandas>=1.4.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.5.3)\n", + "Requirement already satisfied: pyproj>=3.3.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (3.7.2)\n", + "Requirement already satisfied: shapely>=1.8.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (2.1.1)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.4.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2025.8.3)\n", + "Requirement already satisfied: attrs>=19.2.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (25.3.0)\n", + "Requirement already satisfied: click~=8.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (8.2.1)\n", + "Requirement already satisfied: click-plugins>=1.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (1.1.1.2)\n", + "Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (0.7.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas>=1.4.0->geopandas>=0.9->pygris) (1.17.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "94b52594-1c8e-45e0-bedc-957467ef9959", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "#pip install pygris" + "import sys\n", + "sys.path.append('../ahsc_grant')" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 4, "id": "dfd19a35-4791-4c64-a90a-88ec37c3b4b9", "metadata": { "tags": [] }, "outputs": [], "source": [ + "# Importing necessary package \n", "import pandas as pd \n", "import geopandas as gpd\n", "import google.auth\n", @@ -28,7 +110,10 @@ "import gcsfs\n", "import requests\n", "from pygris import tracts \n", + "from pygris.utils import erase_water\n", "from calitp_data_analysis.sql import get_engine\n", + "from shared_utils import schedule_rt_utils \n", + "from gtfs_key_ntd_crosswalk import filter_to_valid_dates\n", "db_engine = get_engine()\n", "credentials, project = google.auth.default()\n", "fs = gcsfs.GCSFileSystem()\n", @@ -38,157 +123,161 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "bcece4ad-cb36-47e5-8ec9-1ab7d909f472", - "metadata": { - "tags": [] - }, + "execution_count": 5, + "id": "af0504b4-642c-4977-b166-d800acfc82a0", + "metadata": {}, "outputs": [], "source": [ - "with open (\"ACS_apikey\", \"r\") as file:\n", - " api_key = file.read().strip()" + "GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses'\n", + "analysis_date = \"2025-08-20\" # Selecting weekday to account for most agencies " ] }, { - "cell_type": "code", - "execution_count": 4, - "id": "2fc77411-cfa0-45b8-9103-f1972f92e9ad", - "metadata": { - "tags": [] - }, - "outputs": [], + "cell_type": "markdown", + "id": "c8ed8f1c-0f71-4408-9af2-c15926c26c99", + "metadata": {}, "source": [ - "# County Level Metrics required: \"Total Population\", \"Total Veteran Population\", \"Total Senior Population\", \"Total Low Income Population\"\n", - "variables = [\n", - " \"B01003_001E\", # Total Population\n", - " \"B17001_002E\", # Population with Income in the past 12 months below poverty level\n", - " \"B16008_037E\", # Non US Citizen Population\n", - " \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n", - " \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n", - " \"B06010_004E\", \"B06010_005E\", \"B06010_006E\", # Population with extremely low income\n", - " \"B06010_007E\", \"B06010_008E\", # Population with very low income\n", - " \"B06010_009E\", \"B06010_010E\", # Population with low income \n", - " \"B08014_002E\", \"B08201_002E\", # Workers and Households with no cars\n", - " \"B18101_001E\", # Total Population with Disability\n", - " \"B19058_001E\" # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n", - "]\n", - "\n", - " " + "## Querying ACS data via the Census API and upload results to a GCS bucket for later usage." ] }, { - "cell_type": "code", - "execution_count": 5, - "id": "5b08cfbf-e7ed-4b4a-8e38-68e66a760086", + "cell_type": "markdown", + "id": "152f62a1-dfe6-4100-8d74-5e4ceffbee11", "metadata": {}, - "outputs": [], "source": [ - "variable_str = \"NAME,\" + \",\".join(variables)" + "Uncomment and run the cells below as needed to include additional ACS variables." ] }, { "cell_type": "code", "execution_count": 6, - "id": "ad038c7f-7aef-4a3d-a878-00190d1b9fb9", + "id": "bcece4ad-cb36-47e5-8ec9-1ab7d909f472", "metadata": { "tags": [] }, "outputs": [], "source": [ - "url = f\"https://api.census.gov/data/2023/acs/acs5?get={variable_str}&for=tract:*&in=state:06&key={api_key}\"" + "# with open (\"ACS_apikey\", \"r\") as file:\n", + "# api_key = file.read().strip()" ] }, { "cell_type": "code", "execution_count": 7, - "id": "00b4b017-18de-42fa-98b0-6d91950a9474", + "id": "2fc77411-cfa0-45b8-9103-f1972f92e9ad", "metadata": { "tags": [] }, "outputs": [], "source": [ - "response = requests.get(url)" + "# # County Level Metrics required: \"Total Population\", \"Total Veteran Population\", \"Total Senior Population\", \"Total Low Income Population\"\n", + "# variables = [\n", + "# \"B01003_001E\", # Total Population\n", + "# \"B17001_002E\", # Population with Income in the past 12 months below poverty level\n", + "# \"B16008_037E\", # Non US Citizen Population\n", + "# \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n", + "# \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n", + "# \"B19013_001E\", # Median household income in the past 12 months (2023 Inflation adjusted dollars)\n", + "# \"B06010_004E\", \"B06010_005E\", \"B06010_006E\", # Population with extremely low income\n", + "# \"B06010_007E\", \"B06010_008E\", # Population with very low income\n", + "# \"B06010_009E\", \"B06010_010E\", # Population with low income \n", + "# \"B08014_002E\", \"B08201_002E\", # Workers and Households with no cars\n", + "# \"B18101_001E\", # Total Population with Disability\n", + "# \"B19058_001E\", # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n", + "# \"B21001_002E\" # Population with veteran status: 18 and above\n", + "# ]\n", + "\n", + " " ] }, { "cell_type": "code", "execution_count": 8, - "id": "94684cc6-092a-4ed3-8872-76e3e7a4ef33", - "metadata": { - "tags": [] - }, + "id": "5b08cfbf-e7ed-4b4a-8e38-68e66a760086", + "metadata": {}, "outputs": [], "source": [ - "if response.status_code == 200:\n", - " data = response.json()\n", - " census_data = pd.DataFrame(data[1:], columns=data[0])\n", + "# variable_str = \"NAME,\" + \",\".join(variables)\n", + "# url = f\"https://api.census.gov/data/2023/acs/acs5?get={variable_str}&for=tract:*&in=state:06&key={api_key}\"\n", + "# response = requests.get(url)\n", + "\n", + "# if response.status_code == 200:\n", + "# data = response.json()\n", + "# census_data = pd.DataFrame(data[1:], columns=data[0])\n", " \n", - " # Create GEOID column\n", - " census_data[\"GEOID\"] = census_data[\"state\"] + census_data[\"county\"] + census_data[\"tract\"]" + "# # Create GEOID column\n", + "# census_data[\"GEOID\"] = census_data[\"state\"] + census_data[\"county\"] + census_data[\"tract\"]\n", + "\n", + "# census_data['county_name'] = census_data['NAME'].str.extract(r';\\s*([A-Za-z\\s]+) County;')\n", + "# census_data = census_data.drop(columns=['NAME'])\n" ] }, { "cell_type": "code", "execution_count": 9, - "id": "b69bf253-d227-4ef5-9601-f4f3605f1877", + "id": "a0808315-79b0-42ee-8653-222785770048", "metadata": { "tags": [] }, "outputs": [], "source": [ - "census_data['county_name'] = census_data['NAME'].str.extract(r';\\s*([A-Za-z\\s]+) County;')" + "# census_data = census_data.rename(columns = {\n", + "# 'B01003_001E': 'total_pop',\n", + "# 'B17001_002E': 'poverty_pop',\n", + "# 'B16008_037E': 'non_us_citizen',\n", + "# 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', \n", + "# 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n", + "# 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n", + "# 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 'B01001_049E': 'female_85_and_over',\n", + "# 'B19013_001E': 'median_household_income',\n", + "# 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', \n", + "# 'B06010_007E': 'income_25000_34999', 'B06010_008E': 'income_35000_49999',\n", + "# 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n", + "# 'B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars',\n", + "# 'B18101_001E': 'disabled_pop',\n", + "# 'B19058_001E': 'public_asst_pop',\n", + "# 'B21001_002E': 'veteran_pop'\n", + "# })" ] }, { "cell_type": "code", "execution_count": 10, - "id": "5a03871e-f6b4-46fc-99d2-0dbfa04d1f38", + "id": "e3ddd869-a418-4bbd-9349-92bdba346012", "metadata": { "tags": [] }, "outputs": [], "source": [ - "census_data = census_data.drop(columns=['NAME'])" + "# exclude = ['state', 'county', 'tract', 'county_name', 'GEOID']\n", + "# cols_to_numeric = [col for col in census_data.columns if col not in exclude]\n", + "# census_data[cols_to_numeric] = census_data[cols_to_numeric].apply(pd.to_numeric, errors='coerce')" ] }, { "cell_type": "code", "execution_count": 11, - "id": "a0808315-79b0-42ee-8653-222785770048", + "id": "c0b7ce56-ca45-4a7d-8f45-ddab3e28606f", "metadata": { "tags": [] }, "outputs": [], "source": [ - "census_data = census_data.rename(columns = {\n", - " 'B01003_001E': 'total_pop',\n", - " 'B17001_002E': 'poverty_pop',\n", - " 'B16008_037E': 'non_us_citizen',\n", - " 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', \n", - " 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n", - " 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n", - " 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 'B01001_049E': 'female_85_and_over',\n", - " 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', \n", - " 'B06010_007E': 'income_25000_34999', 'B06010_008E': 'income_35000_49999',\n", - " 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n", - " 'B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars',\n", - " 'B18101_001E': 'disabled_pop',\n", - " 'B19058_001E': 'public_asst_pop'\n", - "})" + "# # Store data in warehouse\n", + "# with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_data_2023.parquet\", \"wb\") as f:\n", + "# census_data.to_parquet(f, index=False)" ] }, { "cell_type": "code", "execution_count": 12, - "id": "e3ddd869-a418-4bbd-9349-92bdba346012", - "metadata": { - "tags": [] - }, + "id": "f7477af7-e3ee-44f5-b292-bd423280a0f2", + "metadata": {}, "outputs": [], "source": [ - "exclude = ['state', 'county', 'tract', 'county_name', 'GEOID']\n", - "cols_to_numeric = [col for col in census_data.columns if col not in exclude]\n", - "census_data[cols_to_numeric] = census_data[cols_to_numeric].apply(pd.to_numeric, errors='coerce')" + "# Load the stored ACS dataset from the specified GCS file path.\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_data_2023.parquet\", \"rb\") as f:\n", + " census_data = pd.read_parquet(f)" ] }, { @@ -235,6 +324,7 @@ " female_75_to_79\n", " female_80_to_84\n", " female_85_and_over\n", + " median_household_income\n", " income_less_10000\n", " income_10000_14999\n", " income_15000_24999\n", @@ -246,6 +336,7 @@ " households_with_no_cars\n", " disabled_pop\n", " public_asst_pop\n", + " veteran_pop\n", " state\n", " county\n", " tract\n", @@ -271,6 +362,7 @@ " 85\n", " 105\n", " 107\n", + " 250001\n", " 188\n", " 75\n", " 134\n", @@ -282,6 +374,7 @@ " 85\n", " 3094\n", " 1316\n", + " 129\n", " 06\n", " 001\n", " 400100\n", @@ -305,6 +398,7 @@ " 96\n", " 34\n", " 13\n", + " 225880\n", " 75\n", " 70\n", " 89\n", @@ -316,6 +410,7 @@ " 95\n", " 2093\n", " 861\n", + " 38\n", " 06\n", " 001\n", " 400200\n", @@ -339,6 +434,7 @@ " 158\n", " 13\n", " 142\n", + " 157731\n", " 383\n", " 201\n", " 300\n", @@ -350,6 +446,7 @@ " 416\n", " 5727\n", " 2713\n", + " 80\n", " 06\n", " 001\n", " 400300\n", @@ -373,6 +470,7 @@ " 43\n", " 23\n", " 30\n", + " 159612\n", " 187\n", " 105\n", " 287\n", @@ -384,6 +482,7 @@ " 204\n", " 4376\n", " 1803\n", + " 88\n", " 06\n", " 001\n", " 400400\n", @@ -407,6 +506,7 @@ " 50\n", " 60\n", " 203\n", + " 96250\n", " 256\n", " 91\n", " 244\n", @@ -418,6 +518,7 @@ " 169\n", " 3822\n", " 1655\n", + " 115\n", " 06\n", " 001\n", " 400500\n", @@ -450,40 +551,47 @@ "3 55 105 104 43 \n", "4 19 47 51 50 \n", "\n", - " female_80_to_84 female_85_and_over income_less_10000 income_10000_14999 \\\n", - "0 105 107 188 75 \n", - "1 34 13 75 70 \n", - "2 13 142 383 201 \n", - "3 23 30 187 105 \n", - "4 60 203 256 91 \n", + " female_80_to_84 female_85_and_over median_household_income \\\n", + "0 105 107 250001 \n", + "1 34 13 225880 \n", + "2 13 142 157731 \n", + "3 23 30 159612 \n", + "4 60 203 96250 \n", "\n", - " income_15000_24999 income_25000_34999 income_35000_49999 \\\n", - "0 134 157 87 \n", - "1 89 12 207 \n", - "2 300 251 400 \n", - "3 287 215 207 \n", - "4 244 213 385 \n", + " income_less_10000 income_10000_14999 income_15000_24999 \\\n", + "0 188 75 134 \n", + "1 75 70 89 \n", + "2 383 201 300 \n", + "3 187 105 287 \n", + "4 256 91 244 \n", "\n", - " income_50000_64999 income_65000_74999 workers_with_no_car \\\n", - "0 129 70 28 \n", - "1 77 32 92 \n", - "2 148 291 157 \n", - "3 178 87 134 \n", - "4 387 244 74 \n", - "\n", - " households_with_no_cars disabled_pop public_asst_pop state county \\\n", - "0 85 3094 1316 06 001 \n", - "1 95 2093 861 06 001 \n", - "2 416 5727 2713 06 001 \n", - "3 204 4376 1803 06 001 \n", - "4 169 3822 1655 06 001 \n", - "\n", - " tract GEOID county_name \n", - "0 400100 06001400100 Alameda \n", - "1 400200 06001400200 Alameda \n", - "2 400300 06001400300 Alameda \n", - "3 400400 06001400400 Alameda \n", - "4 400500 06001400500 Alameda " + " income_25000_34999 income_35000_49999 income_50000_64999 \\\n", + "0 157 87 129 \n", + "1 12 207 77 \n", + "2 251 400 148 \n", + "3 215 207 178 \n", + "4 213 385 387 \n", + "\n", + " income_65000_74999 workers_with_no_car households_with_no_cars \\\n", + "0 70 28 85 \n", + "1 32 92 95 \n", + "2 291 157 416 \n", + "3 87 134 204 \n", + "4 244 74 169 \n", + "\n", + " disabled_pop public_asst_pop veteran_pop state county tract \\\n", + "0 3094 1316 129 06 001 400100 \n", + "1 2093 861 38 06 001 400200 \n", + "2 5727 2713 80 06 001 400300 \n", + "3 4376 1803 88 06 001 400400 \n", + "4 3822 1655 115 06 001 400500 \n", + "\n", + " GEOID county_name \n", + "0 06001400100 Alameda \n", + "1 06001400200 Alameda \n", + "2 06001400300 Alameda \n", + "3 06001400400 Alameda \n", + "4 06001400500 Alameda " ] }, "execution_count": 13, @@ -495,16 +603,34 @@ "census_data.head(5)" ] }, + { + "cell_type": "markdown", + "id": "eb8dd45d-7dac-4b3f-a6e6-4187b210d2a5", + "metadata": {}, + "source": [ + "“Low-income households” are those with household incomes at or below 80 percent of the statewide median income\"" + ] + }, { "cell_type": "code", "execution_count": 14, + "id": "ab510e4b-8b0e-463f-85cd-7baa0bc8b92f", + "metadata": {}, + "outputs": [], + "source": [ + "#Finding low income number: \"" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "id": "6647104f-db0a-4cb5-9908-c30d009e568e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "#Creating custom income variables \n", + "## Aggregate ACS income brackets into broader income group categories: extremely low, very low, and low income.\n", "census_data['inc_extremelylow'] = census_data['income_less_10000'] + census_data['income_10000_14999'] + census_data['income_15000_24999']\n", "census_data['inc_verylow'] = census_data['income_25000_34999'] + census_data['income_35000_49999']\n", "census_data['inc_low'] = census_data['income_50000_64999'] + census_data['income_65000_74999']" @@ -512,20 +638,29 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "5b637751-060f-46ce-9b8c-41c94dd07620", "metadata": { "tags": [] }, "outputs": [], "source": [ + "# Sum all senior age brackets (65+) to calculate total male and female senior populations.\n", "census_data['male_seniors'] = census_data.loc[:, \"male_65_to_66\":\"male_85_and_over\"].sum(axis=1)\n", "census_data['female_seniors'] = census_data.loc[:, \"female_65_to_66\":\"female_85_and_over\"].sum(axis=1)" ] }, + { + "cell_type": "markdown", + "id": "958f1dfb-710b-4ef5-a6b1-22e8d89814bc", + "metadata": {}, + "source": [ + "## Census Tract Geometry Processing" + ] + }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "55013195-b315-4889-abf6-951091d09e05", "metadata": {}, "outputs": [ @@ -540,12 +675,32 @@ "source": [ "#Retrieving Tract Geometries for California\n", "ca_tracts = tracts(state = \"CA\", cb = True,\n", - " year = 2023, cache = True)" + " year = 2023, cache = True)\n", + "\n" ] }, { "cell_type": "code", "execution_count": 18, + "id": "e444d31c-b572-4831-83ec-b705970da9c8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.11/site-packages/geopandas/geodataframe.py:2475: UserWarning: `keep_geom_type=True` in overlay resulted in 549 dropped geometries of different geometry types than df1 has. Set `keep_geom_type=False` to retain all geometries\n", + " return geopandas.overlay(\n" + ] + } + ], + "source": [ + "ca_tracts = erase_water(ca_tracts)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "id": "986d2bcf-3e7f-4746-abe4-627460f68406", "metadata": {}, "outputs": [], @@ -554,34 +709,1638 @@ "tracts_ca_acs = ca_tracts.merge(census_data, how=\"inner\", on=\"GEOID\")" ] }, + { + "cell_type": "code", + "execution_count": 20, + "id": "0381662d-9cfe-4efd-a971-47fda3567c85", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Reproject California census tract geometries to EPSG:3310 (California Albers projection).\n", + "tracts_ca_acs.to_crs(crs=3310, inplace=True)" + ] + }, { "cell_type": "code", "execution_count": 21, + "id": "0bd06747-c70c-4418-b136-eb34f8a1fd7d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Calculate the area of each census tract in square meters.\n", + "tracts_ca_acs[\"area_m2\"] = tracts_ca_acs.geometry.area" + ] + }, + { + "cell_type": "markdown", + "id": "8fdaa98a-c420-415f-946b-c2db711a4cf6", + "metadata": {}, + "source": [ + "## Querying Organization Data from the Data Warehouse and Storing in GCS" + ] + }, + { + "cell_type": "markdown", + "id": "8ca77331-4e37-43b7-b4a9-83a823a3e087", + "metadata": {}, + "source": [ + "Uncomment and run the cells below as needed to include additional columns from dim_organization table." + ] + }, + { + "cell_type": "code", + "execution_count": 22, "id": "1a89c9b5-4b7c-45c8-9d1e-632e1006b653", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Querying dim organization\n", + "# # Querying dim organization\n", + "# with db_engine.connect() as connection:\n", + "# query = \"\"\"\n", + "# SELECT\n", + "# key, name, organization_type, ntd_id, ntd_agency_info_key, \n", + "# public_currently_operating, _is_current, _valid_from, _valid_to\n", + "# FROM \n", + "# cal-itp-data-infra.mart_transit_database.dim_organizations\n", + "# \"\"\"\n", + " \n", + "# #localize timestamps\n", + "# dim_orgs = (\n", + "# pd.read_sql(query, connection)\n", + "# .pipe(schedule_rt_utils.localize_timestamp_col, [\"_valid_from\", \"_valid_to\"])\n", + "# )\n", + " \n", + " \n", + "# dim_orgs = dim_orgs[\n", + "# (dim_orgs['public_currently_operating'] == True) & \n", + "# (dim_orgs['_is_current'] == True)\n", + "# ].reset_index(drop=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "4211d3bf-0742-4a8f-9fd3-5ab435168ddf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# # Filtering the provider gtfs data to valid dates \n", + "# valid_organization_full = filter_to_valid_dates(dim_orgs, [analysis_date])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "fa8ce12e-e993-446c-a4b0-aafca6974275", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# #Store data in warehouse\n", + "# with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/organization_data_2025_08_20.parquet\", \"wb\") as f:\n", + "# valid_organization_full.to_parquet(f, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "4916dd04-523b-4dda-aca7-599a90560b19", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Load the stored organization dataset from the specified GCS file path.\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/organization_data_2025_08_20.parquet\", \"rb\") as f:\n", + " valid_organization_full = pd.read_parquet(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "ca225047-9991-4ab1-bbde-e5e720418782", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keynameorganization_typentd_idntd_agency_info_keypublic_currently_operating_is_current_valid_from_valid_to_valid_from_local_valid_to_local
09b5971d16d58e4fcafa694ee7fa33b12Alpine CountyCounty9R02-91116rec02Is8jSIBDkwM0TrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
173ed19bf64f9ba305091973b3f45d553Camarillo Health Care DistrictIndependent AgencyNoneNoneTrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
2402b2852ff46b95557801fbf3038ae7cChemehuevi Indian TribeTribe99316reclUB9NcCQrSImfdTrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
33a93c944381ee6c34646fa2dbf8b3d8fCity of AtascaderoCity/Town90194recMmQSjQCzABlmh1TrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
4e56f748b8cf235ca2acee940b9f60d64City of AzusaCity/Town90250recbLanAuzm5QituETrueTrue2025-03-06 00:00:00+00:002098-12-31 23:59:59.999999+00:002025-03-05 16:00:002098-12-31 15:59:59.999999
\n", + "
" + ], + "text/plain": [ + " key name \\\n", + "0 9b5971d16d58e4fcafa694ee7fa33b12 Alpine County \n", + "1 73ed19bf64f9ba305091973b3f45d553 Camarillo Health Care District \n", + "2 402b2852ff46b95557801fbf3038ae7c Chemehuevi Indian Tribe \n", + "3 3a93c944381ee6c34646fa2dbf8b3d8f City of Atascadero \n", + "4 e56f748b8cf235ca2acee940b9f60d64 City of Azusa \n", + "\n", + " organization_type ntd_id ntd_agency_info_key \\\n", + "0 County 9R02-91116 rec02Is8jSIBDkwM0 \n", + "1 Independent Agency None None \n", + "2 Tribe 99316 reclUB9NcCQrSImfd \n", + "3 City/Town 90194 recMmQSjQCzABlmh1 \n", + "4 City/Town 90250 recbLanAuzm5QituE \n", + "\n", + " public_currently_operating _is_current _valid_from \\\n", + "0 True True 2025-03-06 00:00:00+00:00 \n", + "1 True True 2025-03-06 00:00:00+00:00 \n", + "2 True True 2025-03-06 00:00:00+00:00 \n", + "3 True True 2025-03-06 00:00:00+00:00 \n", + "4 True True 2025-03-06 00:00:00+00:00 \n", + "\n", + " _valid_to _valid_from_local \\\n", + "0 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "1 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "2 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "3 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "4 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n", + "\n", + " _valid_to_local \n", + "0 2098-12-31 15:59:59.999999 \n", + "1 2098-12-31 15:59:59.999999 \n", + "2 2098-12-31 15:59:59.999999 \n", + "3 2098-12-31 15:59:59.999999 \n", + "4 2098-12-31 15:59:59.999999 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "valid_organization_full.head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "4c0c1a26-1cba-43b0-9cd0-e356ce40efcb", + "metadata": {}, + "source": [ + "## Querying Bridge Organization GTFS Datasets and Merging with Dim Organizations Table" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "ed1cd1ac-5a21-4a5d-9b1a-5522564896c1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Querying bridge organizations and gtfs_datasets\n", "with db_engine.connect() as connection:\n", " query = \"\"\"\n", " SELECT\n", - " source_record_id, organization_type, ntd_id, ntd_agency_info_key, \n", - " public_currently_operating, _is_current,_valid_from, _valid_to\n", - " FROM \n", - " cal-itp-data-infra.mart_transit_database.dim_organizations\n", + " organization_key, gtfs_dataset_key, organization_name\n", + " FROM\n", + " cal-itp-data-infra.mart_transit_database.bridge_organizations_x_gtfs_datasets_produced\n", " \"\"\"\n", - " dim_orgs= pd.read_sql(query, connection)" + " dim_orgs_GTFS= pd.read_sql(query, connection)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a142623a-62cf-4411-9cf6-9d85f1b5ce22", + "metadata": {}, + "outputs": [], + "source": [ + "# Merge validated organization data with GTFS organization dimension data\n", + "# based on matching keys and names, keeping all rows from the validated dataset.\n", + "dim_orgs_merged = pd.merge(\n", + " valid_organization_full.dropna(subset=['key', 'name']),\n", + " dim_orgs_GTFS.dropna(subset=['organization_key', 'organization_name']),\n", + " left_on=['key', 'name'],\n", + " right_on=['organization_key', 'organization_name'],\n", + " how='left'\n", + ") " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c66cf805-de18-4723-b46b-be7bf84fa2b1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Drop rows where either 'organization_key' or 'gtfs_dataset_key' is missing.\n", + "dim_orgs_merged = dim_orgs_merged.dropna(subset=['organization_key', 'gtfs_dataset_key'])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "01eccdb1-4bd8-4670-8f9c-3a2a0e94948d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Select relevant columns for the final organization dataset.\n", + "dim_orgs_final = dim_orgs_merged[['key', 'name', 'organization_type', 'gtfs_dataset_key', 'ntd_id', 'ntd_agency_info_key']]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "457b636a-705d-4085-91f8-db64d0142ef1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keynameorganization_typegtfs_dataset_keyntd_idntd_agency_info_key
16306bafde22fe614e0a6af2269625d8f6City of Menlo ParkCity/Townb76861f44c68f440d922c54ac1231d31NoneNone
321906a01d5cb664c5e898a95276912bfeTown of TruckeeCity/Town6fda78099793184fe08dd78945d188c09R02-91101receHP6eQInAo7sSP
331906a01d5cb664c5e898a95276912bfeTown of TruckeeCity/Town683da99e57acc29ac600a24cbd96feda9R02-91101receHP6eQInAo7sSP
34aad5befa7fcfce979f2113e373e48aa6Yosemite National ParkFederal Government31152914d10e2d0977b8b2fabb167922NoneNone
35aad5befa7fcfce979f2113e373e48aa6Yosemite National ParkFederal Government31f91d59f493cbee9ae0eeb824f44d0eNoneNone
\n", + "
" + ], + "text/plain": [ + " key name \\\n", + "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park \n", + "32 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", + "33 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n", + "34 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", + "35 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n", + "\n", + " organization_type gtfs_dataset_key ntd_id \\\n", + "16 City/Town b76861f44c68f440d922c54ac1231d31 None \n", + "32 City/Town 6fda78099793184fe08dd78945d188c0 9R02-91101 \n", + "33 City/Town 683da99e57acc29ac600a24cbd96feda 9R02-91101 \n", + "34 Federal Government 31152914d10e2d0977b8b2fabb167922 None \n", + "35 Federal Government 31f91d59f493cbee9ae0eeb824f44d0e None \n", + "\n", + " ntd_agency_info_key \n", + "16 None \n", + "32 receHP6eQInAo7sSP \n", + "33 receHP6eQInAo7sSP \n", + "34 None \n", + "35 None " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dim_orgs_final.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "b7292811-58ec-4672-a2e3-b9c133e79723", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 444 entries, 16 to 525\n", + "Data columns (total 6 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 key 444 non-null object\n", + " 1 name 444 non-null object\n", + " 2 organization_type 444 non-null object\n", + " 3 gtfs_dataset_key 444 non-null object\n", + " 4 ntd_id 420 non-null object\n", + " 5 ntd_agency_info_key 373 non-null object\n", + "dtypes: object(6)\n", + "memory usage: 24.3+ KB\n" + ] + } + ], + "source": [ + "dim_orgs_final.info()" + ] + }, + { + "cell_type": "markdown", + "id": "2e7fb6c3-b176-4d1d-9a35-8a7f7bbb8a5c", + "metadata": {}, + "source": [ + "## Loading Transit Stop Data and Merging Stop Data with Organization Information" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "b826d41d-720f-49ba-9b77-b7186dd95bf6", + "metadata": {}, + "outputs": [], + "source": [ + "#Load stop data for a given analysis date from GCS and return as a GeoDataFrame.\n", + "def prep_stops(analysis_date: str):\n", + " stops = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}/rt_vs_schedule/stop_times_direction_{analysis_date}.parquet\",\n", + " columns=[\"schedule_gtfs_dataset_key\", \"feed_key\", \"stop_id\", \"stop_name\", \"geometry\"],\n", + " storage_options={'token': credentials.token}\n", + " )\n", + "\n", + " return stops" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f1cd0c41-8577-4733-b67c-7d1fe08f9aa1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 4595211 entries, 0 to 4595210\n", + "Data columns (total 5 columns):\n", + " # Column Dtype \n", + "--- ------ ----- \n", + " 0 schedule_gtfs_dataset_key object \n", + " 1 feed_key object \n", + " 2 stop_id object \n", + " 3 stop_name object \n", + " 4 geometry geometry\n", + "dtypes: geometry(1), object(4)\n", + "memory usage: 175.3+ MB\n" + ] + } + ], + "source": [ + "stops = prep_stops(analysis_date)\n", + "stops.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "160c660b-d683-4eb2-ae38-7f9d1a412cda", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keyfeed_keystop_idstop_namegeometry
0723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-3Terminal 1POINT (147834.197 -450957.957)
1723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-4Terminal 2POINT (147598.785 -450990.106)
2723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-5Terminal 3POINT (147265.199 -451037.318)
3723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-6International TerminalPOINT (147144.316 -451145.363)
4723210f3a6d61ee3936df401e18a563615b542ef6dbfd2903710095179e84b25TL-7Terminal 4POINT (147272.606 -451317.665)
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key feed_key stop_id \\\n", + "0 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-3 \n", + "1 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-4 \n", + "2 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-5 \n", + "3 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-6 \n", + "4 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-7 \n", + "\n", + " stop_name geometry \n", + "0 Terminal 1 POINT (147834.197 -450957.957) \n", + "1 Terminal 2 POINT (147598.785 -450990.106) \n", + "2 Terminal 3 POINT (147265.199 -451037.318) \n", + "3 International Terminal POINT (147144.316 -451145.363) \n", + "4 Terminal 4 POINT (147272.606 -451317.665) " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stops.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "1f77a566-31f2-4f9e-9dda-d8d74b79487a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Merge stop data with the final organization dataset, keeping only stops with valid IDs and names.\n", + "orgs_stops = stops.dropna(subset = ['stop_id', 'stop_name']).merge(\n", + " dim_orgs_final,\n", + " right_on = 'gtfs_dataset_key',\n", + " left_on = 'schedule_gtfs_dataset_key',\n", + " how = 'inner'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "7d76cc04-e782-46b8-a748-1c9e43e077df", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "orgs_stops = orgs_stops.drop_duplicates()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "2ac56dc1-0c04-48e6-9114-04535ed00c76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 53511 entries, 0 to 2788870\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 schedule_gtfs_dataset_key 53511 non-null object \n", + " 1 feed_key 53511 non-null object \n", + " 2 stop_id 53511 non-null object \n", + " 3 stop_name 53511 non-null object \n", + " 4 geometry 53511 non-null geometry\n", + " 5 key 53511 non-null object \n", + " 6 name 53511 non-null object \n", + " 7 organization_type 53511 non-null object \n", + " 8 gtfs_dataset_key 53511 non-null object \n", + " 9 ntd_id 52343 non-null object \n", + " 10 ntd_agency_info_key 48936 non-null object \n", + "dtypes: geometry(1), object(10)\n", + "memory usage: 4.9+ MB\n" + ] + } + ], + "source": [ + "orgs_stops.info()" + ] + }, + { + "cell_type": "markdown", + "id": "703d3c9d-c755-45d9-aa74-467b315549dc", + "metadata": {}, + "source": [ + "## Spatial Analysis: Stop Buffers and Census Tract Intersections" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c8563478-a79b-4aef-9dff-225472a79fda", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Reproject stops to match the CRS of California census tracts.\n", + "orgs_stops = orgs_stops.to_crs(tracts_ca_acs.crs)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "806da12f-9cf6-4306-81c0-dd057f31d8a9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Create a 1/2 mile buffer around each stop.\n", + "orgs_stop_buffered = gpd.GeoDataFrame(\n", + " orgs_stops.copy(), \n", + " geometry=orgs_stops.geometry.buffer(804.672),\n", + " crs=orgs_stops.crs\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "bb20127e-2092-4074-a710-5070e39806f7", + "metadata": {}, + "outputs": [], + "source": [ + "orgs_stop_dissolved = orgs_stop_buffered.dissolve(by='key')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "9de1e91a-aa0b-4eea-b6c2-05537cf21aea", + "metadata": {}, + "outputs": [], + "source": [ + "orgs_stop_dissolved = orgs_stop_dissolved.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "2e231b7d-d744-419d-b442-810b166fcd3d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Compute the intersection between buffered stops and census tracts.\n", + "geometry_intersect = gpd.overlay(\n", + " orgs_stop_dissolved, \n", + " tracts_ca_acs, \n", + " how = 'intersection', \n", + " keep_geom_type=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "fa657799-42aa-46d7-a789-c4d86b226c13", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Calculate the area of each intersected geometry in square meters.\n", + "geometry_intersect['area_2'] = geometry_intersect.geometry.area" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "59ce3254-f021-40cd-869b-3c7a21f9002e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keyschedule_gtfs_dataset_keyfeed_keystop_idstop_namenameorganization_typegtfs_dataset_keyntd_idntd_agency_info_keySTATEFPCOUNTYFPTRACTCEGEOIDFQGEOIDNAMENAMELSADSTUSPSNAMELSADCOSTATE_NAMELSADALANDAWATERtotal_poppoverty_popnon_us_citizenmale_65_to_66male_67_to_69male_70_to_74male_75_to_79male_80_to_84male_85_and_overfemale_65_to_66female_67_to_69female_70_to_74female_75_to_79female_80_to_84female_85_and_overmedian_household_incomeincome_less_10000income_10000_14999income_15000_24999income_25000_34999income_35000_49999income_50000_64999income_65000_74999workers_with_no_carhouseholds_with_no_carsdisabled_poppublic_asst_popveteran_popstatecountytractcounty_nameinc_extremelylowinc_verylowinc_lowmale_seniorsfemale_seniorsarea_m2geometryarea_2
00119506e03bed4c4d8b094ab1177cd78524ea6209600e9a2de34a02cf9068729d9e1e77d0754b712fc608741ae3836f5bSATSanta Maria-Ihop Bus StopSan Joaquin Joint Powers AuthorityIndependent Agency524ea6209600e9a2de34a02cf9068729NoneNone060014507411400000US06001450741060014507414507.41Census Tract 4507.41CAAlameda CountyCaliforniaCT2182370054699377503767731176041431651829574175154609657516245252260637755228530119418206001450741Alameda14185121403957342.191555e+06POLYGON ((-165021.160 -37973.000, -165009.573 ...116531.265316
10119506e03bed4c4d8b094ab1177cd78524ea6209600e9a2de34a02cf9068729d9e1e77d0754b712fc608741ae3836f5bSATSanta Maria-Ihop Bus StopSan Joaquin Joint Powers AuthorityIndependent Agency524ea6209600e9a2de34a02cf9068729NoneNone060014503001400000US06001450300060014503004503Census Tract 4503CAAlameda CountyCaliforniaCT313364104967675824853497072326951681011137514787536314222119742833019664844967184917906001450300Alameda7266255263244773.149375e+06POLYGON ((-167866.792 -33259.734, -167870.667 ...212536.639123
\n", + "
" + ], + "text/plain": [ + " key schedule_gtfs_dataset_key \\\n", + "0 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", + "1 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", + "\n", + " feed_key stop_id stop_name \\\n", + "0 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", + "1 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", + "\n", + " name organization_type \\\n", + "0 San Joaquin Joint Powers Authority Independent Agency \n", + "1 San Joaquin Joint Powers Authority Independent Agency \n", + "\n", + " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n", + "0 524ea6209600e9a2de34a02cf9068729 None None 06 \n", + "1 524ea6209600e9a2de34a02cf9068729 None None 06 \n", + "\n", + " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n", + "0 001 450741 1400000US06001450741 06001450741 4507.41 \n", + "1 001 450300 1400000US06001450300 06001450300 4503 \n", + "\n", + " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", + "0 Census Tract 4507.41 CA Alameda County California CT 2182370 \n", + "1 Census Tract 4503 CA Alameda County California CT 3133641 \n", + "\n", + " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n", + "0 0 5469 937 750 37 \n", + "1 0 4967 67 582 48 \n", + "\n", + " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n", + "0 67 73 117 60 \n", + "1 53 49 70 72 \n", + "\n", + " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n", + "0 41 43 165 182 \n", + "1 32 69 51 68 \n", + "\n", + " female_75_to_79 female_80_to_84 female_85_and_over \\\n", + "0 95 74 175 \n", + "1 101 113 75 \n", + "\n", + " median_household_income income_less_10000 income_10000_14999 \\\n", + "0 154609 657 516 \n", + "1 147875 363 142 \n", + "\n", + " income_15000_24999 income_25000_34999 income_35000_49999 \\\n", + "0 245 252 260 \n", + "1 221 197 428 \n", + "\n", + " income_50000_64999 income_65000_74999 workers_with_no_car \\\n", + "0 63 77 55 \n", + "1 330 196 64 \n", + "\n", + " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n", + "0 228 5301 1941 82 06 \n", + "1 84 4967 1849 179 06 \n", + "\n", + " county tract county_name inc_extremelylow inc_verylow inc_low \\\n", + "0 001 450741 Alameda 1418 512 140 \n", + "1 001 450300 Alameda 726 625 526 \n", + "\n", + " male_seniors female_seniors area_m2 \\\n", + "0 395 734 2.191555e+06 \n", + "1 324 477 3.149375e+06 \n", + "\n", + " geometry area_2 \n", + "0 POLYGON ((-165021.160 -37973.000, -165009.573 ... 116531.265316 \n", + "1 POLYGON ((-167866.792 -33259.734, -167870.667 ... 212536.639123 " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometry_intersect.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "92eb767d-b477-4e14-ab51-fac6f615e621", + "metadata": {}, + "source": [ + "## Adjusting Population and Demographic Metrics for Stop Service Areas" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "ed44c28b-0eb9-4d7c-b9a5-3739ea9f5133", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Adjust total population by the proportion of the tract area that intersects the stop buffer.\n", + "# Calculate the proportion of each tract's area that intersects the stop buffer\n", + "geometry_intersect['area_ratio'] = geometry_intersect['area_2'] / geometry_intersect['area_m2']" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "8653de80-582e-435a-9642-69ac80089dba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keyschedule_gtfs_dataset_keyfeed_keystop_idstop_namenameorganization_typegtfs_dataset_keyntd_idntd_agency_info_keySTATEFPCOUNTYFPTRACTCEGEOIDFQGEOIDNAMENAMELSADSTUSPSNAMELSADCOSTATE_NAMELSADALANDAWATERtotal_poppoverty_popnon_us_citizenmale_65_to_66male_67_to_69male_70_to_74male_75_to_79male_80_to_84male_85_and_overfemale_65_to_66female_67_to_69female_70_to_74female_75_to_79female_80_to_84female_85_and_overmedian_household_incomeincome_less_10000income_10000_14999income_15000_24999income_25000_34999income_35000_49999income_50000_64999income_65000_74999workers_with_no_carhouseholds_with_no_carsdisabled_poppublic_asst_popveteran_popstatecountytractcounty_nameinc_extremelylowinc_verylowinc_lowmale_seniorsfemale_seniorsarea_m2geometryarea_2area_ratio
00119506e03bed4c4d8b094ab1177cd78524ea6209600e9a2de34a02cf9068729d9e1e77d0754b712fc608741ae3836f5bSATSanta Maria-Ihop Bus StopSan Joaquin Joint Powers AuthorityIndependent Agency524ea6209600e9a2de34a02cf9068729NoneNone060014507411400000US06001450741060014507414507.41Census Tract 4507.41CAAlameda CountyCaliforniaCT2182370054699377503767731176041431651829574175154609657516245252260637755228530119418206001450741Alameda14185121403957342.191555e+06POLYGON ((-165021.160 -37973.000, -165009.573 ...116531.2653160.053173
10119506e03bed4c4d8b094ab1177cd78524ea6209600e9a2de34a02cf9068729d9e1e77d0754b712fc608741ae3836f5bSATSanta Maria-Ihop Bus StopSan Joaquin Joint Powers AuthorityIndependent Agency524ea6209600e9a2de34a02cf9068729NoneNone060014503001400000US06001450300060014503004503Census Tract 4503CAAlameda CountyCaliforniaCT313364104967675824853497072326951681011137514787536314222119742833019664844967184917906001450300Alameda7266255263244773.149375e+06POLYGON ((-167866.792 -33259.734, -167870.667 ...212536.6391230.067485
\n", + "
" + ], + "text/plain": [ + " key schedule_gtfs_dataset_key \\\n", + "0 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", + "1 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n", + "\n", + " feed_key stop_id stop_name \\\n", + "0 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", + "1 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n", + "\n", + " name organization_type \\\n", + "0 San Joaquin Joint Powers Authority Independent Agency \n", + "1 San Joaquin Joint Powers Authority Independent Agency \n", + "\n", + " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n", + "0 524ea6209600e9a2de34a02cf9068729 None None 06 \n", + "1 524ea6209600e9a2de34a02cf9068729 None None 06 \n", + "\n", + " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n", + "0 001 450741 1400000US06001450741 06001450741 4507.41 \n", + "1 001 450300 1400000US06001450300 06001450300 4503 \n", + "\n", + " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n", + "0 Census Tract 4507.41 CA Alameda County California CT 2182370 \n", + "1 Census Tract 4503 CA Alameda County California CT 3133641 \n", + "\n", + " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n", + "0 0 5469 937 750 37 \n", + "1 0 4967 67 582 48 \n", + "\n", + " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n", + "0 67 73 117 60 \n", + "1 53 49 70 72 \n", + "\n", + " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n", + "0 41 43 165 182 \n", + "1 32 69 51 68 \n", + "\n", + " female_75_to_79 female_80_to_84 female_85_and_over \\\n", + "0 95 74 175 \n", + "1 101 113 75 \n", + "\n", + " median_household_income income_less_10000 income_10000_14999 \\\n", + "0 154609 657 516 \n", + "1 147875 363 142 \n", + "\n", + " income_15000_24999 income_25000_34999 income_35000_49999 \\\n", + "0 245 252 260 \n", + "1 221 197 428 \n", + "\n", + " income_50000_64999 income_65000_74999 workers_with_no_car \\\n", + "0 63 77 55 \n", + "1 330 196 64 \n", + "\n", + " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n", + "0 228 5301 1941 82 06 \n", + "1 84 4967 1849 179 06 \n", + "\n", + " county tract county_name inc_extremelylow inc_verylow inc_low \\\n", + "0 001 450741 Alameda 1418 512 140 \n", + "1 001 450300 Alameda 726 625 526 \n", + "\n", + " male_seniors female_seniors area_m2 \\\n", + "0 395 734 2.191555e+06 \n", + "1 324 477 3.149375e+06 \n", + "\n", + " geometry area_2 \\\n", + "0 POLYGON ((-165021.160 -37973.000, -165009.573 ... 116531.265316 \n", + "1 POLYGON ((-167866.792 -33259.734, -167870.667 ... 212536.639123 \n", + "\n", + " area_ratio \n", + "0 0.053173 \n", + "1 0.067485 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometry_intersect.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "597c67ad-9913-435f-8c9b-ddc1a9fbd297", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define demographic and socioeconomic columns to be adjusted by area ratio\n", + "cols_to_weight = [\n", + " 'total_pop', 'poverty_pop', 'non_us_citizen', 'workers_with_no_car', \n", + " 'households_with_no_cars', 'disabled_pop', 'public_asst_pop', \n", + " 'inc_extremelylow', 'inc_verylow', 'inc_low', \n", + " 'male_seniors', 'female_seniors', 'veteran_pop'\n", + "]\n", + "\n", + "# Apply area ratio to create adjusted metrics\n", + "geometry_intersect[[f'{col}_adj' for col in cols_to_weight]] = (\n", + " geometry_intersect[cols_to_weight].multiply(geometry_intersect['area_ratio'], axis=0)\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "5763be02-ce61-46fe-aeba-9fd273e9ca81", + "id": "e1e4d484-1748-4027-83b2-bbb8dc441ed9", + "metadata": {}, + "outputs": [], + "source": [ + "# Stop level demography data \n", + "filtered_final_data = geometry_intersect[['name', 'organization_type', 'ntd_id', 'ntd_agency_info_key', 'stop_id', 'stop_name', 'schedule_gtfs_dataset_key', \n", + " 'feed_key', 'GEOIDFQ', 'geometry', 'area_2',\t'adjusted_total_pop', 'pop_weight',\t'poverty_pop_adj',\t\n", + " 'non_us_citizen_adj',\t'workers_with_no_car_adj',\t'households_with_no_cars_adj',\t'disabled_pop_adj',\t\n", + " 'public_asst_pop_adj', 'inc_extremelylow_adj', 'inc_verylow_adj',\t'inc_low_adj',\t'male_seniors_adj',\t\n", + " 'female_seniors_adj', 'veteran_pop_adj']]\n", + "\n", + "filtered_final_data.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "6119ef50-cc16-43b9-a2aa-28d36a4428f8", + "metadata": {}, + "source": [ + "## Agency Level Demography Data " + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "971c8504-f654-45f9-b880-613b71a93c88", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "group_key = ['key', 'name']\n", + "\n", + "# Identify adjusted demographic columns\n", + "adj_cols = [col for col in geometry_intersect.columns if col.endswith('_adj')]\n", + "\n", + "# Non-aggregated attributes that are unique per agency\n", + "extra_cols = [\n", + " 'organization_type', 'ntd_id', 'ntd_agency_info_key',\n", + " 'schedule_gtfs_dataset_key', 'feed_key'\n", + "]\n", + "\n", + "# Build aggregation dictionary\n", + "agg_dict = {col: 'sum' for col in adj_cols}\n", + "agg_dict.update({col: 'first' for col in extra_cols})\n", + "\n", + "# Aggregate by agency\n", + "agency_summary = (\n", + " geometry_intersect\n", + " .groupby(group_key, as_index=False)\n", + " .agg(agg_dict)\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "a7fd704c-6a7b-4573-85d1-8d27bbaf43a7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 84 entries, 0 to 83\n", + "Data columns (total 20 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 key 84 non-null object \n", + " 1 name 84 non-null object \n", + " 2 total_pop_adj 84 non-null float64\n", + " 3 poverty_pop_adj 84 non-null float64\n", + " 4 non_us_citizen_adj 84 non-null float64\n", + " 5 workers_with_no_car_adj 84 non-null float64\n", + " 6 households_with_no_cars_adj 84 non-null float64\n", + " 7 disabled_pop_adj 84 non-null float64\n", + " 8 public_asst_pop_adj 84 non-null float64\n", + " 9 inc_extremelylow_adj 84 non-null float64\n", + " 10 inc_verylow_adj 84 non-null float64\n", + " 11 inc_low_adj 84 non-null float64\n", + " 12 male_seniors_adj 84 non-null float64\n", + " 13 female_seniors_adj 84 non-null float64\n", + " 14 veteran_pop_adj 84 non-null float64\n", + " 15 organization_type 84 non-null object \n", + " 16 ntd_id 77 non-null object \n", + " 17 ntd_agency_info_key 68 non-null object \n", + " 18 schedule_gtfs_dataset_key 84 non-null object \n", + " 19 feed_key 84 non-null object \n", + "dtypes: float64(13), object(7)\n", + "memory usage: 13.3+ KB\n" + ] + } + ], + "source": [ + "agency_summary.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "366986a8-b17b-4e19-b7c2-82608e14d250", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def export_gdf(gdf, filename: str):\n", + " \n", + " gdf.to_parquet(f\"{filename}.parquet\")\n", + " \n", + " fs.put(\n", + " f\"{filename}.parquet\",\n", + " f\"{GCS_FILE_PATH}/transit_provider_dashboard/{filename}.parquet\",\n", + " token = credentials.token\n", + " )\n", + " \n", + " os.remove(f\"{filename}.parquet\")\n", + " print(f\"saved {GCS_FILE_PATH}/transit_provider_dashboard/{filename}.parquet\")\n", + " \n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "d6cc3be8-b52b-4ec0-97c6-1fc3c7e108d9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "saved gs://calitp-analytics-data/data-analyses/transit_provider_dashboard/agency_stop_level_census_data.parquet.parquet\n" + ] + } + ], + "source": [ + "# Store data in warehouse\n", + "export_gdf(agency_summary, \"agency_level_census_data.parquet\")" + ] } ], "metadata": {