diff --git a/transit_provider_dashboard/01_agency_grain_census.ipynb b/transit_provider_dashboard/01_agency_grain_census.ipynb
index c4b990934..b388eb628 100644
--- a/transit_provider_dashboard/01_agency_grain_census.ipynb
+++ b/transit_provider_dashboard/01_agency_grain_census.ipynb
@@ -1,26 +1,108 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "id": "22ae14b5-df6a-4892-98c5-2176b3fae594",
+ "metadata": {},
+ "source": [
+ "# Agency-Grain Census Data Summary Table\n",
+ "- **Purpose:** To define and quantify the service population of each Cal-ITP partner transit agency using census and related demographic data.\n",
+ "- **Goal:** Provide agency-level summaries that describe the characteristics of populations served, such as size, demographics, income, and travel behavior, to illustrate the reach and impact of Cal-ITP services.\n",
+ "- **Use:** Support data-driven storytelling and performance reporting by supplying key statistics for communications about the benefits, adoption, and equity potential of Cal-ITP initiatives (e.g., open-loop payment systems).\n",
+ "\n",
+ "- **Steps:**\n",
+ " - Querying ACS data via the Census API and upload results to a GCS bucket for later usage.\n",
+ " - Census Tract Geometry Processing\n",
+ " - Querying Organization Data from the Data Warehouse and Storing in GCS\n",
+ " - Querying Bridge Organization GTFS Datasets and Merging with Dim Organizations Table\n",
+ " - Loading Transit Stop Data and Merging Stop Data with Organization Information\n",
+ " - Spatial Analysis: Stop Buffers and Census Tract Intersections\n",
+ " - Adjusting Population and Demographic Metrics for Stop Service Areas\n"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 1,
+ "id": "873ef61b-098a-49da-9c28-2667af4ffd64",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: shared_utils in /opt/conda/lib/python3.11/site-packages (4.2)\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install shared_utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
"id": "0cdda776-857c-4e47-8ce8-940bfc49bb29",
"metadata": {
"tags": []
},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: pygris in /opt/conda/lib/python3.11/site-packages (0.2.0)\n",
+ "Requirement already satisfied: geopandas>=0.9 in /opt/conda/lib/python3.11/site-packages (from pygris) (0.14.4)\n",
+ "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from pygris) (2.32.5)\n",
+ "Requirement already satisfied: appdirs in /opt/conda/lib/python3.11/site-packages (from pygris) (1.4.4)\n",
+ "Requirement already satisfied: fiona>=1.8.21 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.10.1)\n",
+ "Requirement already satisfied: numpy>=1.22 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.26.4)\n",
+ "Requirement already satisfied: packaging in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (25.0)\n",
+ "Requirement already satisfied: pandas>=1.4.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.5.3)\n",
+ "Requirement already satisfied: pyproj>=3.3.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (3.7.2)\n",
+ "Requirement already satisfied: shapely>=1.8.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (2.1.1)\n",
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.4.3)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2.5.0)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2025.8.3)\n",
+ "Requirement already satisfied: attrs>=19.2.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (25.3.0)\n",
+ "Requirement already satisfied: click~=8.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (8.2.1)\n",
+ "Requirement already satisfied: click-plugins>=1.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (1.1.1.2)\n",
+ "Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (0.7.2)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2025.2)\n",
+ "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas>=1.4.0->geopandas>=0.9->pygris) (1.17.0)\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install pygris"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "94b52594-1c8e-45e0-bedc-957467ef9959",
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "#pip install pygris"
+ "import sys\n",
+ "sys.path.append('../ahsc_grant')"
]
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 4,
"id": "dfd19a35-4791-4c64-a90a-88ec37c3b4b9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
+ "# Importing necessary package \n",
"import pandas as pd \n",
"import geopandas as gpd\n",
"import google.auth\n",
@@ -28,7 +110,10 @@
"import gcsfs\n",
"import requests\n",
"from pygris import tracts \n",
+ "from pygris.utils import erase_water\n",
"from calitp_data_analysis.sql import get_engine\n",
+ "from shared_utils import schedule_rt_utils \n",
+ "from gtfs_key_ntd_crosswalk import filter_to_valid_dates\n",
"db_engine = get_engine()\n",
"credentials, project = google.auth.default()\n",
"fs = gcsfs.GCSFileSystem()\n",
@@ -38,157 +123,161 @@
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "bcece4ad-cb36-47e5-8ec9-1ab7d909f472",
- "metadata": {
- "tags": []
- },
+ "execution_count": 5,
+ "id": "af0504b4-642c-4977-b166-d800acfc82a0",
+ "metadata": {},
"outputs": [],
"source": [
- "with open (\"ACS_apikey\", \"r\") as file:\n",
- " api_key = file.read().strip()"
+ "GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses'\n",
+ "analysis_date = \"2025-08-20\" # Selecting weekday to account for most agencies "
]
},
{
- "cell_type": "code",
- "execution_count": 4,
- "id": "2fc77411-cfa0-45b8-9103-f1972f92e9ad",
- "metadata": {
- "tags": []
- },
- "outputs": [],
+ "cell_type": "markdown",
+ "id": "c8ed8f1c-0f71-4408-9af2-c15926c26c99",
+ "metadata": {},
"source": [
- "# County Level Metrics required: \"Total Population\", \"Total Veteran Population\", \"Total Senior Population\", \"Total Low Income Population\"\n",
- "variables = [\n",
- " \"B01003_001E\", # Total Population\n",
- " \"B17001_002E\", # Population with Income in the past 12 months below poverty level\n",
- " \"B16008_037E\", # Non US Citizen Population\n",
- " \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n",
- " \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n",
- " \"B06010_004E\", \"B06010_005E\", \"B06010_006E\", # Population with extremely low income\n",
- " \"B06010_007E\", \"B06010_008E\", # Population with very low income\n",
- " \"B06010_009E\", \"B06010_010E\", # Population with low income \n",
- " \"B08014_002E\", \"B08201_002E\", # Workers and Households with no cars\n",
- " \"B18101_001E\", # Total Population with Disability\n",
- " \"B19058_001E\" # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n",
- "]\n",
- "\n",
- " "
+ "## Querying ACS data via the Census API and upload results to a GCS bucket for later usage."
]
},
{
- "cell_type": "code",
- "execution_count": 5,
- "id": "5b08cfbf-e7ed-4b4a-8e38-68e66a760086",
+ "cell_type": "markdown",
+ "id": "152f62a1-dfe6-4100-8d74-5e4ceffbee11",
"metadata": {},
- "outputs": [],
"source": [
- "variable_str = \"NAME,\" + \",\".join(variables)"
+ "Uncomment and run the cells below as needed to include additional ACS variables."
]
},
{
"cell_type": "code",
"execution_count": 6,
- "id": "ad038c7f-7aef-4a3d-a878-00190d1b9fb9",
+ "id": "bcece4ad-cb36-47e5-8ec9-1ab7d909f472",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "url = f\"https://api.census.gov/data/2023/acs/acs5?get={variable_str}&for=tract:*&in=state:06&key={api_key}\""
+ "# with open (\"ACS_apikey\", \"r\") as file:\n",
+ "# api_key = file.read().strip()"
]
},
{
"cell_type": "code",
"execution_count": 7,
- "id": "00b4b017-18de-42fa-98b0-6d91950a9474",
+ "id": "2fc77411-cfa0-45b8-9103-f1972f92e9ad",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "response = requests.get(url)"
+ "# # County Level Metrics required: \"Total Population\", \"Total Veteran Population\", \"Total Senior Population\", \"Total Low Income Population\"\n",
+ "# variables = [\n",
+ "# \"B01003_001E\", # Total Population\n",
+ "# \"B17001_002E\", # Population with Income in the past 12 months below poverty level\n",
+ "# \"B16008_037E\", # Non US Citizen Population\n",
+ "# \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n",
+ "# \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n",
+ "# \"B19013_001E\", # Median household income in the past 12 months (2023 Inflation adjusted dollars)\n",
+ "# \"B06010_004E\", \"B06010_005E\", \"B06010_006E\", # Population with extremely low income\n",
+ "# \"B06010_007E\", \"B06010_008E\", # Population with very low income\n",
+ "# \"B06010_009E\", \"B06010_010E\", # Population with low income \n",
+ "# \"B08014_002E\", \"B08201_002E\", # Workers and Households with no cars\n",
+ "# \"B18101_001E\", # Total Population with Disability\n",
+ "# \"B19058_001E\", # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n",
+ "# \"B21001_002E\" # Population with veteran status: 18 and above\n",
+ "# ]\n",
+ "\n",
+ " "
]
},
{
"cell_type": "code",
"execution_count": 8,
- "id": "94684cc6-092a-4ed3-8872-76e3e7a4ef33",
- "metadata": {
- "tags": []
- },
+ "id": "5b08cfbf-e7ed-4b4a-8e38-68e66a760086",
+ "metadata": {},
"outputs": [],
"source": [
- "if response.status_code == 200:\n",
- " data = response.json()\n",
- " census_data = pd.DataFrame(data[1:], columns=data[0])\n",
+ "# variable_str = \"NAME,\" + \",\".join(variables)\n",
+ "# url = f\"https://api.census.gov/data/2023/acs/acs5?get={variable_str}&for=tract:*&in=state:06&key={api_key}\"\n",
+ "# response = requests.get(url)\n",
+ "\n",
+ "# if response.status_code == 200:\n",
+ "# data = response.json()\n",
+ "# census_data = pd.DataFrame(data[1:], columns=data[0])\n",
" \n",
- " # Create GEOID column\n",
- " census_data[\"GEOID\"] = census_data[\"state\"] + census_data[\"county\"] + census_data[\"tract\"]"
+ "# # Create GEOID column\n",
+ "# census_data[\"GEOID\"] = census_data[\"state\"] + census_data[\"county\"] + census_data[\"tract\"]\n",
+ "\n",
+ "# census_data['county_name'] = census_data['NAME'].str.extract(r';\\s*([A-Za-z\\s]+) County;')\n",
+ "# census_data = census_data.drop(columns=['NAME'])\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
- "id": "b69bf253-d227-4ef5-9601-f4f3605f1877",
+ "id": "a0808315-79b0-42ee-8653-222785770048",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "census_data['county_name'] = census_data['NAME'].str.extract(r';\\s*([A-Za-z\\s]+) County;')"
+ "# census_data = census_data.rename(columns = {\n",
+ "# 'B01003_001E': 'total_pop',\n",
+ "# 'B17001_002E': 'poverty_pop',\n",
+ "# 'B16008_037E': 'non_us_citizen',\n",
+ "# 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', \n",
+ "# 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n",
+ "# 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n",
+ "# 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 'B01001_049E': 'female_85_and_over',\n",
+ "# 'B19013_001E': 'median_household_income',\n",
+ "# 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', \n",
+ "# 'B06010_007E': 'income_25000_34999', 'B06010_008E': 'income_35000_49999',\n",
+ "# 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n",
+ "# 'B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars',\n",
+ "# 'B18101_001E': 'disabled_pop',\n",
+ "# 'B19058_001E': 'public_asst_pop',\n",
+ "# 'B21001_002E': 'veteran_pop'\n",
+ "# })"
]
},
{
"cell_type": "code",
"execution_count": 10,
- "id": "5a03871e-f6b4-46fc-99d2-0dbfa04d1f38",
+ "id": "e3ddd869-a418-4bbd-9349-92bdba346012",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "census_data = census_data.drop(columns=['NAME'])"
+ "# exclude = ['state', 'county', 'tract', 'county_name', 'GEOID']\n",
+ "# cols_to_numeric = [col for col in census_data.columns if col not in exclude]\n",
+ "# census_data[cols_to_numeric] = census_data[cols_to_numeric].apply(pd.to_numeric, errors='coerce')"
]
},
{
"cell_type": "code",
"execution_count": 11,
- "id": "a0808315-79b0-42ee-8653-222785770048",
+ "id": "c0b7ce56-ca45-4a7d-8f45-ddab3e28606f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "census_data = census_data.rename(columns = {\n",
- " 'B01003_001E': 'total_pop',\n",
- " 'B17001_002E': 'poverty_pop',\n",
- " 'B16008_037E': 'non_us_citizen',\n",
- " 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', \n",
- " 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n",
- " 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n",
- " 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 'B01001_049E': 'female_85_and_over',\n",
- " 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', \n",
- " 'B06010_007E': 'income_25000_34999', 'B06010_008E': 'income_35000_49999',\n",
- " 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n",
- " 'B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars',\n",
- " 'B18101_001E': 'disabled_pop',\n",
- " 'B19058_001E': 'public_asst_pop'\n",
- "})"
+ "# # Store data in warehouse\n",
+ "# with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_data_2023.parquet\", \"wb\") as f:\n",
+ "# census_data.to_parquet(f, index=False)"
]
},
{
"cell_type": "code",
"execution_count": 12,
- "id": "e3ddd869-a418-4bbd-9349-92bdba346012",
- "metadata": {
- "tags": []
- },
+ "id": "f7477af7-e3ee-44f5-b292-bd423280a0f2",
+ "metadata": {},
"outputs": [],
"source": [
- "exclude = ['state', 'county', 'tract', 'county_name', 'GEOID']\n",
- "cols_to_numeric = [col for col in census_data.columns if col not in exclude]\n",
- "census_data[cols_to_numeric] = census_data[cols_to_numeric].apply(pd.to_numeric, errors='coerce')"
+ "# Load the stored ACS dataset from the specified GCS file path.\n",
+ "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_data_2023.parquet\", \"rb\") as f:\n",
+ " census_data = pd.read_parquet(f)"
]
},
{
@@ -235,6 +324,7 @@
"
female_75_to_79 | \n",
" female_80_to_84 | \n",
" female_85_and_over | \n",
+ " median_household_income | \n",
" income_less_10000 | \n",
" income_10000_14999 | \n",
" income_15000_24999 | \n",
@@ -246,6 +336,7 @@
" households_with_no_cars | \n",
" disabled_pop | \n",
" public_asst_pop | \n",
+ " veteran_pop | \n",
" state | \n",
" county | \n",
" tract | \n",
@@ -271,6 +362,7 @@
" 85 | \n",
" 105 | \n",
" 107 | \n",
+ " 250001 | \n",
" 188 | \n",
" 75 | \n",
" 134 | \n",
@@ -282,6 +374,7 @@
" 85 | \n",
" 3094 | \n",
" 1316 | \n",
+ " 129 | \n",
" 06 | \n",
" 001 | \n",
" 400100 | \n",
@@ -305,6 +398,7 @@
" 96 | \n",
" 34 | \n",
" 13 | \n",
+ " 225880 | \n",
" 75 | \n",
" 70 | \n",
" 89 | \n",
@@ -316,6 +410,7 @@
" 95 | \n",
" 2093 | \n",
" 861 | \n",
+ " 38 | \n",
" 06 | \n",
" 001 | \n",
" 400200 | \n",
@@ -339,6 +434,7 @@
" 158 | \n",
" 13 | \n",
" 142 | \n",
+ " 157731 | \n",
" 383 | \n",
" 201 | \n",
" 300 | \n",
@@ -350,6 +446,7 @@
" 416 | \n",
" 5727 | \n",
" 2713 | \n",
+ " 80 | \n",
" 06 | \n",
" 001 | \n",
" 400300 | \n",
@@ -373,6 +470,7 @@
" 43 | \n",
" 23 | \n",
" 30 | \n",
+ " 159612 | \n",
" 187 | \n",
" 105 | \n",
" 287 | \n",
@@ -384,6 +482,7 @@
" 204 | \n",
" 4376 | \n",
" 1803 | \n",
+ " 88 | \n",
" 06 | \n",
" 001 | \n",
" 400400 | \n",
@@ -407,6 +506,7 @@
" 50 | \n",
" 60 | \n",
" 203 | \n",
+ " 96250 | \n",
" 256 | \n",
" 91 | \n",
" 244 | \n",
@@ -418,6 +518,7 @@
" 169 | \n",
" 3822 | \n",
" 1655 | \n",
+ " 115 | \n",
" 06 | \n",
" 001 | \n",
" 400500 | \n",
@@ -450,40 +551,47 @@
"3 55 105 104 43 \n",
"4 19 47 51 50 \n",
"\n",
- " female_80_to_84 female_85_and_over income_less_10000 income_10000_14999 \\\n",
- "0 105 107 188 75 \n",
- "1 34 13 75 70 \n",
- "2 13 142 383 201 \n",
- "3 23 30 187 105 \n",
- "4 60 203 256 91 \n",
+ " female_80_to_84 female_85_and_over median_household_income \\\n",
+ "0 105 107 250001 \n",
+ "1 34 13 225880 \n",
+ "2 13 142 157731 \n",
+ "3 23 30 159612 \n",
+ "4 60 203 96250 \n",
"\n",
- " income_15000_24999 income_25000_34999 income_35000_49999 \\\n",
- "0 134 157 87 \n",
- "1 89 12 207 \n",
- "2 300 251 400 \n",
- "3 287 215 207 \n",
- "4 244 213 385 \n",
+ " income_less_10000 income_10000_14999 income_15000_24999 \\\n",
+ "0 188 75 134 \n",
+ "1 75 70 89 \n",
+ "2 383 201 300 \n",
+ "3 187 105 287 \n",
+ "4 256 91 244 \n",
"\n",
- " income_50000_64999 income_65000_74999 workers_with_no_car \\\n",
- "0 129 70 28 \n",
- "1 77 32 92 \n",
- "2 148 291 157 \n",
- "3 178 87 134 \n",
- "4 387 244 74 \n",
- "\n",
- " households_with_no_cars disabled_pop public_asst_pop state county \\\n",
- "0 85 3094 1316 06 001 \n",
- "1 95 2093 861 06 001 \n",
- "2 416 5727 2713 06 001 \n",
- "3 204 4376 1803 06 001 \n",
- "4 169 3822 1655 06 001 \n",
- "\n",
- " tract GEOID county_name \n",
- "0 400100 06001400100 Alameda \n",
- "1 400200 06001400200 Alameda \n",
- "2 400300 06001400300 Alameda \n",
- "3 400400 06001400400 Alameda \n",
- "4 400500 06001400500 Alameda "
+ " income_25000_34999 income_35000_49999 income_50000_64999 \\\n",
+ "0 157 87 129 \n",
+ "1 12 207 77 \n",
+ "2 251 400 148 \n",
+ "3 215 207 178 \n",
+ "4 213 385 387 \n",
+ "\n",
+ " income_65000_74999 workers_with_no_car households_with_no_cars \\\n",
+ "0 70 28 85 \n",
+ "1 32 92 95 \n",
+ "2 291 157 416 \n",
+ "3 87 134 204 \n",
+ "4 244 74 169 \n",
+ "\n",
+ " disabled_pop public_asst_pop veteran_pop state county tract \\\n",
+ "0 3094 1316 129 06 001 400100 \n",
+ "1 2093 861 38 06 001 400200 \n",
+ "2 5727 2713 80 06 001 400300 \n",
+ "3 4376 1803 88 06 001 400400 \n",
+ "4 3822 1655 115 06 001 400500 \n",
+ "\n",
+ " GEOID county_name \n",
+ "0 06001400100 Alameda \n",
+ "1 06001400200 Alameda \n",
+ "2 06001400300 Alameda \n",
+ "3 06001400400 Alameda \n",
+ "4 06001400500 Alameda "
]
},
"execution_count": 13,
@@ -495,16 +603,34 @@
"census_data.head(5)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "eb8dd45d-7dac-4b3f-a6e6-4187b210d2a5",
+ "metadata": {},
+ "source": [
+ "“Low-income households” are those with household incomes at or below 80 percent of the statewide median income\""
+ ]
+ },
{
"cell_type": "code",
"execution_count": 14,
+ "id": "ab510e4b-8b0e-463f-85cd-7baa0bc8b92f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Finding low income number: \""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
"id": "6647104f-db0a-4cb5-9908-c30d009e568e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "#Creating custom income variables \n",
+ "## Aggregate ACS income brackets into broader income group categories: extremely low, very low, and low income.\n",
"census_data['inc_extremelylow'] = census_data['income_less_10000'] + census_data['income_10000_14999'] + census_data['income_15000_24999']\n",
"census_data['inc_verylow'] = census_data['income_25000_34999'] + census_data['income_35000_49999']\n",
"census_data['inc_low'] = census_data['income_50000_64999'] + census_data['income_65000_74999']"
@@ -512,20 +638,29 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 16,
"id": "5b637751-060f-46ce-9b8c-41c94dd07620",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
+ "# Sum all senior age brackets (65+) to calculate total male and female senior populations.\n",
"census_data['male_seniors'] = census_data.loc[:, \"male_65_to_66\":\"male_85_and_over\"].sum(axis=1)\n",
"census_data['female_seniors'] = census_data.loc[:, \"female_65_to_66\":\"female_85_and_over\"].sum(axis=1)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "958f1dfb-710b-4ef5-a6b1-22e8d89814bc",
+ "metadata": {},
+ "source": [
+ "## Census Tract Geometry Processing"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 17,
"id": "55013195-b315-4889-abf6-951091d09e05",
"metadata": {},
"outputs": [
@@ -540,12 +675,32 @@
"source": [
"#Retrieving Tract Geometries for California\n",
"ca_tracts = tracts(state = \"CA\", cb = True,\n",
- " year = 2023, cache = True)"
+ " year = 2023, cache = True)\n",
+ "\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
+ "id": "e444d31c-b572-4831-83ec-b705970da9c8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.11/site-packages/geopandas/geodataframe.py:2475: UserWarning: `keep_geom_type=True` in overlay resulted in 549 dropped geometries of different geometry types than df1 has. Set `keep_geom_type=False` to retain all geometries\n",
+ " return geopandas.overlay(\n"
+ ]
+ }
+ ],
+ "source": [
+ "ca_tracts = erase_water(ca_tracts)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
"id": "986d2bcf-3e7f-4746-abe4-627460f68406",
"metadata": {},
"outputs": [],
@@ -554,34 +709,1638 @@
"tracts_ca_acs = ca_tracts.merge(census_data, how=\"inner\", on=\"GEOID\")"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "0381662d-9cfe-4efd-a971-47fda3567c85",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Reproject California census tract geometries to EPSG:3310 (California Albers projection).\n",
+ "tracts_ca_acs.to_crs(crs=3310, inplace=True)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 21,
+ "id": "0bd06747-c70c-4418-b136-eb34f8a1fd7d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Calculate the area of each census tract in square meters.\n",
+ "tracts_ca_acs[\"area_m2\"] = tracts_ca_acs.geometry.area"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8fdaa98a-c420-415f-946b-c2db711a4cf6",
+ "metadata": {},
+ "source": [
+ "## Querying Organization Data from the Data Warehouse and Storing in GCS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8ca77331-4e37-43b7-b4a9-83a823a3e087",
+ "metadata": {},
+ "source": [
+ "Uncomment and run the cells below as needed to include additional columns from dim_organization table."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
"id": "1a89c9b5-4b7c-45c8-9d1e-632e1006b653",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "# Querying dim organization\n",
+ "# # Querying dim organization\n",
+ "# with db_engine.connect() as connection:\n",
+ "# query = \"\"\"\n",
+ "# SELECT\n",
+ "# key, name, organization_type, ntd_id, ntd_agency_info_key, \n",
+ "# public_currently_operating, _is_current, _valid_from, _valid_to\n",
+ "# FROM \n",
+ "# cal-itp-data-infra.mart_transit_database.dim_organizations\n",
+ "# \"\"\"\n",
+ " \n",
+ "# #localize timestamps\n",
+ "# dim_orgs = (\n",
+ "# pd.read_sql(query, connection)\n",
+ "# .pipe(schedule_rt_utils.localize_timestamp_col, [\"_valid_from\", \"_valid_to\"])\n",
+ "# )\n",
+ " \n",
+ " \n",
+ "# dim_orgs = dim_orgs[\n",
+ "# (dim_orgs['public_currently_operating'] == True) & \n",
+ "# (dim_orgs['_is_current'] == True)\n",
+ "# ].reset_index(drop=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "4211d3bf-0742-4a8f-9fd3-5ab435168ddf",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# # Filtering the provider gtfs data to valid dates \n",
+ "# valid_organization_full = filter_to_valid_dates(dim_orgs, [analysis_date])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "fa8ce12e-e993-446c-a4b0-aafca6974275",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# #Store data in warehouse\n",
+ "# with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/organization_data_2025_08_20.parquet\", \"wb\") as f:\n",
+ "# valid_organization_full.to_parquet(f, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "4916dd04-523b-4dda-aca7-599a90560b19",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Load the stored organization dataset from the specified GCS file path.\n",
+ "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/organization_data_2025_08_20.parquet\", \"rb\") as f:\n",
+ " valid_organization_full = pd.read_parquet(f)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "ca225047-9991-4ab1-bbde-e5e720418782",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " key | \n",
+ " name | \n",
+ " organization_type | \n",
+ " ntd_id | \n",
+ " ntd_agency_info_key | \n",
+ " public_currently_operating | \n",
+ " _is_current | \n",
+ " _valid_from | \n",
+ " _valid_to | \n",
+ " _valid_from_local | \n",
+ " _valid_to_local | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 9b5971d16d58e4fcafa694ee7fa33b12 | \n",
+ " Alpine County | \n",
+ " County | \n",
+ " 9R02-91116 | \n",
+ " rec02Is8jSIBDkwM0 | \n",
+ " True | \n",
+ " True | \n",
+ " 2025-03-06 00:00:00+00:00 | \n",
+ " 2098-12-31 23:59:59.999999+00:00 | \n",
+ " 2025-03-05 16:00:00 | \n",
+ " 2098-12-31 15:59:59.999999 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 73ed19bf64f9ba305091973b3f45d553 | \n",
+ " Camarillo Health Care District | \n",
+ " Independent Agency | \n",
+ " None | \n",
+ " None | \n",
+ " True | \n",
+ " True | \n",
+ " 2025-03-06 00:00:00+00:00 | \n",
+ " 2098-12-31 23:59:59.999999+00:00 | \n",
+ " 2025-03-05 16:00:00 | \n",
+ " 2098-12-31 15:59:59.999999 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 402b2852ff46b95557801fbf3038ae7c | \n",
+ " Chemehuevi Indian Tribe | \n",
+ " Tribe | \n",
+ " 99316 | \n",
+ " reclUB9NcCQrSImfd | \n",
+ " True | \n",
+ " True | \n",
+ " 2025-03-06 00:00:00+00:00 | \n",
+ " 2098-12-31 23:59:59.999999+00:00 | \n",
+ " 2025-03-05 16:00:00 | \n",
+ " 2098-12-31 15:59:59.999999 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3a93c944381ee6c34646fa2dbf8b3d8f | \n",
+ " City of Atascadero | \n",
+ " City/Town | \n",
+ " 90194 | \n",
+ " recMmQSjQCzABlmh1 | \n",
+ " True | \n",
+ " True | \n",
+ " 2025-03-06 00:00:00+00:00 | \n",
+ " 2098-12-31 23:59:59.999999+00:00 | \n",
+ " 2025-03-05 16:00:00 | \n",
+ " 2098-12-31 15:59:59.999999 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " e56f748b8cf235ca2acee940b9f60d64 | \n",
+ " City of Azusa | \n",
+ " City/Town | \n",
+ " 90250 | \n",
+ " recbLanAuzm5QituE | \n",
+ " True | \n",
+ " True | \n",
+ " 2025-03-06 00:00:00+00:00 | \n",
+ " 2098-12-31 23:59:59.999999+00:00 | \n",
+ " 2025-03-05 16:00:00 | \n",
+ " 2098-12-31 15:59:59.999999 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " key name \\\n",
+ "0 9b5971d16d58e4fcafa694ee7fa33b12 Alpine County \n",
+ "1 73ed19bf64f9ba305091973b3f45d553 Camarillo Health Care District \n",
+ "2 402b2852ff46b95557801fbf3038ae7c Chemehuevi Indian Tribe \n",
+ "3 3a93c944381ee6c34646fa2dbf8b3d8f City of Atascadero \n",
+ "4 e56f748b8cf235ca2acee940b9f60d64 City of Azusa \n",
+ "\n",
+ " organization_type ntd_id ntd_agency_info_key \\\n",
+ "0 County 9R02-91116 rec02Is8jSIBDkwM0 \n",
+ "1 Independent Agency None None \n",
+ "2 Tribe 99316 reclUB9NcCQrSImfd \n",
+ "3 City/Town 90194 recMmQSjQCzABlmh1 \n",
+ "4 City/Town 90250 recbLanAuzm5QituE \n",
+ "\n",
+ " public_currently_operating _is_current _valid_from \\\n",
+ "0 True True 2025-03-06 00:00:00+00:00 \n",
+ "1 True True 2025-03-06 00:00:00+00:00 \n",
+ "2 True True 2025-03-06 00:00:00+00:00 \n",
+ "3 True True 2025-03-06 00:00:00+00:00 \n",
+ "4 True True 2025-03-06 00:00:00+00:00 \n",
+ "\n",
+ " _valid_to _valid_from_local \\\n",
+ "0 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n",
+ "1 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n",
+ "2 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n",
+ "3 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n",
+ "4 2098-12-31 23:59:59.999999+00:00 2025-03-05 16:00:00 \n",
+ "\n",
+ " _valid_to_local \n",
+ "0 2098-12-31 15:59:59.999999 \n",
+ "1 2098-12-31 15:59:59.999999 \n",
+ "2 2098-12-31 15:59:59.999999 \n",
+ "3 2098-12-31 15:59:59.999999 \n",
+ "4 2098-12-31 15:59:59.999999 "
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "valid_organization_full.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c0c1a26-1cba-43b0-9cd0-e356ce40efcb",
+ "metadata": {},
+ "source": [
+ "## Querying Bridge Organization GTFS Datasets and Merging with Dim Organizations Table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "ed1cd1ac-5a21-4a5d-9b1a-5522564896c1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Querying bridge organizations and gtfs_datasets\n",
"with db_engine.connect() as connection:\n",
" query = \"\"\"\n",
" SELECT\n",
- " source_record_id, organization_type, ntd_id, ntd_agency_info_key, \n",
- " public_currently_operating, _is_current,_valid_from, _valid_to\n",
- " FROM \n",
- " cal-itp-data-infra.mart_transit_database.dim_organizations\n",
+ " organization_key, gtfs_dataset_key, organization_name\n",
+ " FROM\n",
+ " cal-itp-data-infra.mart_transit_database.bridge_organizations_x_gtfs_datasets_produced\n",
" \"\"\"\n",
- " dim_orgs= pd.read_sql(query, connection)"
+ " dim_orgs_GTFS= pd.read_sql(query, connection)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "a142623a-62cf-4411-9cf6-9d85f1b5ce22",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Merge validated organization data with GTFS organization dimension data\n",
+ "# based on matching keys and names, keeping all rows from the validated dataset.\n",
+ "dim_orgs_merged = pd.merge(\n",
+ " valid_organization_full.dropna(subset=['key', 'name']),\n",
+ " dim_orgs_GTFS.dropna(subset=['organization_key', 'organization_name']),\n",
+ " left_on=['key', 'name'],\n",
+ " right_on=['organization_key', 'organization_name'],\n",
+ " how='left'\n",
+ ") "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "c66cf805-de18-4723-b46b-be7bf84fa2b1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Drop rows where either 'organization_key' or 'gtfs_dataset_key' is missing.\n",
+ "dim_orgs_merged = dim_orgs_merged.dropna(subset=['organization_key', 'gtfs_dataset_key'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "01eccdb1-4bd8-4670-8f9c-3a2a0e94948d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Select relevant columns for the final organization dataset.\n",
+ "dim_orgs_final = dim_orgs_merged[['key', 'name', 'organization_type', 'gtfs_dataset_key', 'ntd_id', 'ntd_agency_info_key']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "457b636a-705d-4085-91f8-db64d0142ef1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " key | \n",
+ " name | \n",
+ " organization_type | \n",
+ " gtfs_dataset_key | \n",
+ " ntd_id | \n",
+ " ntd_agency_info_key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 16 | \n",
+ " 306bafde22fe614e0a6af2269625d8f6 | \n",
+ " City of Menlo Park | \n",
+ " City/Town | \n",
+ " b76861f44c68f440d922c54ac1231d31 | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 1906a01d5cb664c5e898a95276912bfe | \n",
+ " Town of Truckee | \n",
+ " City/Town | \n",
+ " 6fda78099793184fe08dd78945d188c0 | \n",
+ " 9R02-91101 | \n",
+ " receHP6eQInAo7sSP | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 1906a01d5cb664c5e898a95276912bfe | \n",
+ " Town of Truckee | \n",
+ " City/Town | \n",
+ " 683da99e57acc29ac600a24cbd96feda | \n",
+ " 9R02-91101 | \n",
+ " receHP6eQInAo7sSP | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " aad5befa7fcfce979f2113e373e48aa6 | \n",
+ " Yosemite National Park | \n",
+ " Federal Government | \n",
+ " 31152914d10e2d0977b8b2fabb167922 | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " aad5befa7fcfce979f2113e373e48aa6 | \n",
+ " Yosemite National Park | \n",
+ " Federal Government | \n",
+ " 31f91d59f493cbee9ae0eeb824f44d0e | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " key name \\\n",
+ "16 306bafde22fe614e0a6af2269625d8f6 City of Menlo Park \n",
+ "32 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n",
+ "33 1906a01d5cb664c5e898a95276912bfe Town of Truckee \n",
+ "34 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n",
+ "35 aad5befa7fcfce979f2113e373e48aa6 Yosemite National Park \n",
+ "\n",
+ " organization_type gtfs_dataset_key ntd_id \\\n",
+ "16 City/Town b76861f44c68f440d922c54ac1231d31 None \n",
+ "32 City/Town 6fda78099793184fe08dd78945d188c0 9R02-91101 \n",
+ "33 City/Town 683da99e57acc29ac600a24cbd96feda 9R02-91101 \n",
+ "34 Federal Government 31152914d10e2d0977b8b2fabb167922 None \n",
+ "35 Federal Government 31f91d59f493cbee9ae0eeb824f44d0e None \n",
+ "\n",
+ " ntd_agency_info_key \n",
+ "16 None \n",
+ "32 receHP6eQInAo7sSP \n",
+ "33 receHP6eQInAo7sSP \n",
+ "34 None \n",
+ "35 None "
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dim_orgs_final.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "b7292811-58ec-4672-a2e3-b9c133e79723",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 444 entries, 16 to 525\n",
+ "Data columns (total 6 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 key 444 non-null object\n",
+ " 1 name 444 non-null object\n",
+ " 2 organization_type 444 non-null object\n",
+ " 3 gtfs_dataset_key 444 non-null object\n",
+ " 4 ntd_id 420 non-null object\n",
+ " 5 ntd_agency_info_key 373 non-null object\n",
+ "dtypes: object(6)\n",
+ "memory usage: 24.3+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "dim_orgs_final.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e7fb6c3-b176-4d1d-9a35-8a7f7bbb8a5c",
+ "metadata": {},
+ "source": [
+ "## Loading Transit Stop Data and Merging Stop Data with Organization Information"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "b826d41d-720f-49ba-9b77-b7186dd95bf6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Load stop data for a given analysis date from GCS and return as a GeoDataFrame.\n",
+ "def prep_stops(analysis_date: str):\n",
+ " stops = gpd.read_parquet(\n",
+ " f\"{GCS_FILE_PATH}/rt_vs_schedule/stop_times_direction_{analysis_date}.parquet\",\n",
+ " columns=[\"schedule_gtfs_dataset_key\", \"feed_key\", \"stop_id\", \"stop_name\", \"geometry\"],\n",
+ " storage_options={'token': credentials.token}\n",
+ " )\n",
+ "\n",
+ " return stops"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "f1cd0c41-8577-4733-b67c-7d1fe08f9aa1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 4595211 entries, 0 to 4595210\n",
+ "Data columns (total 5 columns):\n",
+ " # Column Dtype \n",
+ "--- ------ ----- \n",
+ " 0 schedule_gtfs_dataset_key object \n",
+ " 1 feed_key object \n",
+ " 2 stop_id object \n",
+ " 3 stop_name object \n",
+ " 4 geometry geometry\n",
+ "dtypes: geometry(1), object(4)\n",
+ "memory usage: 175.3+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "stops = prep_stops(analysis_date)\n",
+ "stops.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "160c660b-d683-4eb2-ae38-7f9d1a412cda",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " feed_key | \n",
+ " stop_id | \n",
+ " stop_name | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 723210f3a6d61ee3936df401e18a5636 | \n",
+ " 15b542ef6dbfd2903710095179e84b25 | \n",
+ " TL-3 | \n",
+ " Terminal 1 | \n",
+ " POINT (147834.197 -450957.957) | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 723210f3a6d61ee3936df401e18a5636 | \n",
+ " 15b542ef6dbfd2903710095179e84b25 | \n",
+ " TL-4 | \n",
+ " Terminal 2 | \n",
+ " POINT (147598.785 -450990.106) | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 723210f3a6d61ee3936df401e18a5636 | \n",
+ " 15b542ef6dbfd2903710095179e84b25 | \n",
+ " TL-5 | \n",
+ " Terminal 3 | \n",
+ " POINT (147265.199 -451037.318) | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 723210f3a6d61ee3936df401e18a5636 | \n",
+ " 15b542ef6dbfd2903710095179e84b25 | \n",
+ " TL-6 | \n",
+ " International Terminal | \n",
+ " POINT (147144.316 -451145.363) | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 723210f3a6d61ee3936df401e18a5636 | \n",
+ " 15b542ef6dbfd2903710095179e84b25 | \n",
+ " TL-7 | \n",
+ " Terminal 4 | \n",
+ " POINT (147272.606 -451317.665) | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " schedule_gtfs_dataset_key feed_key stop_id \\\n",
+ "0 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-3 \n",
+ "1 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-4 \n",
+ "2 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-5 \n",
+ "3 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-6 \n",
+ "4 723210f3a6d61ee3936df401e18a5636 15b542ef6dbfd2903710095179e84b25 TL-7 \n",
+ "\n",
+ " stop_name geometry \n",
+ "0 Terminal 1 POINT (147834.197 -450957.957) \n",
+ "1 Terminal 2 POINT (147598.785 -450990.106) \n",
+ "2 Terminal 3 POINT (147265.199 -451037.318) \n",
+ "3 International Terminal POINT (147144.316 -451145.363) \n",
+ "4 Terminal 4 POINT (147272.606 -451317.665) "
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stops.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "1f77a566-31f2-4f9e-9dda-d8d74b79487a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Merge stop data with the final organization dataset, keeping only stops with valid IDs and names.\n",
+ "orgs_stops = stops.dropna(subset = ['stop_id', 'stop_name']).merge(\n",
+ " dim_orgs_final,\n",
+ " right_on = 'gtfs_dataset_key',\n",
+ " left_on = 'schedule_gtfs_dataset_key',\n",
+ " how = 'inner'\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "7d76cc04-e782-46b8-a748-1c9e43e077df",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "orgs_stops = orgs_stops.drop_duplicates()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "2ac56dc1-0c04-48e6-9114-04535ed00c76",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 53511 entries, 0 to 2788870\n",
+ "Data columns (total 11 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 schedule_gtfs_dataset_key 53511 non-null object \n",
+ " 1 feed_key 53511 non-null object \n",
+ " 2 stop_id 53511 non-null object \n",
+ " 3 stop_name 53511 non-null object \n",
+ " 4 geometry 53511 non-null geometry\n",
+ " 5 key 53511 non-null object \n",
+ " 6 name 53511 non-null object \n",
+ " 7 organization_type 53511 non-null object \n",
+ " 8 gtfs_dataset_key 53511 non-null object \n",
+ " 9 ntd_id 52343 non-null object \n",
+ " 10 ntd_agency_info_key 48936 non-null object \n",
+ "dtypes: geometry(1), object(10)\n",
+ "memory usage: 4.9+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "orgs_stops.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "703d3c9d-c755-45d9-aa74-467b315549dc",
+ "metadata": {},
+ "source": [
+ "## Spatial Analysis: Stop Buffers and Census Tract Intersections"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "c8563478-a79b-4aef-9dff-225472a79fda",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Reproject stops to match the CRS of California census tracts.\n",
+ "orgs_stops = orgs_stops.to_crs(tracts_ca_acs.crs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "806da12f-9cf6-4306-81c0-dd057f31d8a9",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Create a 1/2 mile buffer around each stop.\n",
+ "orgs_stop_buffered = gpd.GeoDataFrame(\n",
+ " orgs_stops.copy(), \n",
+ " geometry=orgs_stops.geometry.buffer(804.672),\n",
+ " crs=orgs_stops.crs\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "bb20127e-2092-4074-a710-5070e39806f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "orgs_stop_dissolved = orgs_stop_buffered.dissolve(by='key')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "9de1e91a-aa0b-4eea-b6c2-05537cf21aea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "orgs_stop_dissolved = orgs_stop_dissolved.reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "2e231b7d-d744-419d-b442-810b166fcd3d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Compute the intersection between buffered stops and census tracts.\n",
+ "geometry_intersect = gpd.overlay(\n",
+ " orgs_stop_dissolved, \n",
+ " tracts_ca_acs, \n",
+ " how = 'intersection', \n",
+ " keep_geom_type=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "fa657799-42aa-46d7-a789-c4d86b226c13",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Calculate the area of each intersected geometry in square meters.\n",
+ "geometry_intersect['area_2'] = geometry_intersect.geometry.area"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "59ce3254-f021-40cd-869b-3c7a21f9002e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " key | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " feed_key | \n",
+ " stop_id | \n",
+ " stop_name | \n",
+ " name | \n",
+ " organization_type | \n",
+ " gtfs_dataset_key | \n",
+ " ntd_id | \n",
+ " ntd_agency_info_key | \n",
+ " STATEFP | \n",
+ " COUNTYFP | \n",
+ " TRACTCE | \n",
+ " GEOIDFQ | \n",
+ " GEOID | \n",
+ " NAME | \n",
+ " NAMELSAD | \n",
+ " STUSPS | \n",
+ " NAMELSADCO | \n",
+ " STATE_NAME | \n",
+ " LSAD | \n",
+ " ALAND | \n",
+ " AWATER | \n",
+ " total_pop | \n",
+ " poverty_pop | \n",
+ " non_us_citizen | \n",
+ " male_65_to_66 | \n",
+ " male_67_to_69 | \n",
+ " male_70_to_74 | \n",
+ " male_75_to_79 | \n",
+ " male_80_to_84 | \n",
+ " male_85_and_over | \n",
+ " female_65_to_66 | \n",
+ " female_67_to_69 | \n",
+ " female_70_to_74 | \n",
+ " female_75_to_79 | \n",
+ " female_80_to_84 | \n",
+ " female_85_and_over | \n",
+ " median_household_income | \n",
+ " income_less_10000 | \n",
+ " income_10000_14999 | \n",
+ " income_15000_24999 | \n",
+ " income_25000_34999 | \n",
+ " income_35000_49999 | \n",
+ " income_50000_64999 | \n",
+ " income_65000_74999 | \n",
+ " workers_with_no_car | \n",
+ " households_with_no_cars | \n",
+ " disabled_pop | \n",
+ " public_asst_pop | \n",
+ " veteran_pop | \n",
+ " state | \n",
+ " county | \n",
+ " tract | \n",
+ " county_name | \n",
+ " inc_extremelylow | \n",
+ " inc_verylow | \n",
+ " inc_low | \n",
+ " male_seniors | \n",
+ " female_seniors | \n",
+ " area_m2 | \n",
+ " geometry | \n",
+ " area_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0119506e03bed4c4d8b094ab1177cd78 | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " d9e1e77d0754b712fc608741ae3836f5 | \n",
+ " bSAT | \n",
+ " Santa Maria-Ihop Bus Stop | \n",
+ " San Joaquin Joint Powers Authority | \n",
+ " Independent Agency | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " None | \n",
+ " None | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450741 | \n",
+ " 1400000US06001450741 | \n",
+ " 06001450741 | \n",
+ " 4507.41 | \n",
+ " Census Tract 4507.41 | \n",
+ " CA | \n",
+ " Alameda County | \n",
+ " California | \n",
+ " CT | \n",
+ " 2182370 | \n",
+ " 0 | \n",
+ " 5469 | \n",
+ " 937 | \n",
+ " 750 | \n",
+ " 37 | \n",
+ " 67 | \n",
+ " 73 | \n",
+ " 117 | \n",
+ " 60 | \n",
+ " 41 | \n",
+ " 43 | \n",
+ " 165 | \n",
+ " 182 | \n",
+ " 95 | \n",
+ " 74 | \n",
+ " 175 | \n",
+ " 154609 | \n",
+ " 657 | \n",
+ " 516 | \n",
+ " 245 | \n",
+ " 252 | \n",
+ " 260 | \n",
+ " 63 | \n",
+ " 77 | \n",
+ " 55 | \n",
+ " 228 | \n",
+ " 5301 | \n",
+ " 1941 | \n",
+ " 82 | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450741 | \n",
+ " Alameda | \n",
+ " 1418 | \n",
+ " 512 | \n",
+ " 140 | \n",
+ " 395 | \n",
+ " 734 | \n",
+ " 2.191555e+06 | \n",
+ " POLYGON ((-165021.160 -37973.000, -165009.573 ... | \n",
+ " 116531.265316 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0119506e03bed4c4d8b094ab1177cd78 | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " d9e1e77d0754b712fc608741ae3836f5 | \n",
+ " bSAT | \n",
+ " Santa Maria-Ihop Bus Stop | \n",
+ " San Joaquin Joint Powers Authority | \n",
+ " Independent Agency | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " None | \n",
+ " None | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450300 | \n",
+ " 1400000US06001450300 | \n",
+ " 06001450300 | \n",
+ " 4503 | \n",
+ " Census Tract 4503 | \n",
+ " CA | \n",
+ " Alameda County | \n",
+ " California | \n",
+ " CT | \n",
+ " 3133641 | \n",
+ " 0 | \n",
+ " 4967 | \n",
+ " 67 | \n",
+ " 582 | \n",
+ " 48 | \n",
+ " 53 | \n",
+ " 49 | \n",
+ " 70 | \n",
+ " 72 | \n",
+ " 32 | \n",
+ " 69 | \n",
+ " 51 | \n",
+ " 68 | \n",
+ " 101 | \n",
+ " 113 | \n",
+ " 75 | \n",
+ " 147875 | \n",
+ " 363 | \n",
+ " 142 | \n",
+ " 221 | \n",
+ " 197 | \n",
+ " 428 | \n",
+ " 330 | \n",
+ " 196 | \n",
+ " 64 | \n",
+ " 84 | \n",
+ " 4967 | \n",
+ " 1849 | \n",
+ " 179 | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450300 | \n",
+ " Alameda | \n",
+ " 726 | \n",
+ " 625 | \n",
+ " 526 | \n",
+ " 324 | \n",
+ " 477 | \n",
+ " 3.149375e+06 | \n",
+ " POLYGON ((-167866.792 -33259.734, -167870.667 ... | \n",
+ " 212536.639123 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " key schedule_gtfs_dataset_key \\\n",
+ "0 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n",
+ "1 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n",
+ "\n",
+ " feed_key stop_id stop_name \\\n",
+ "0 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n",
+ "1 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n",
+ "\n",
+ " name organization_type \\\n",
+ "0 San Joaquin Joint Powers Authority Independent Agency \n",
+ "1 San Joaquin Joint Powers Authority Independent Agency \n",
+ "\n",
+ " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n",
+ "0 524ea6209600e9a2de34a02cf9068729 None None 06 \n",
+ "1 524ea6209600e9a2de34a02cf9068729 None None 06 \n",
+ "\n",
+ " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n",
+ "0 001 450741 1400000US06001450741 06001450741 4507.41 \n",
+ "1 001 450300 1400000US06001450300 06001450300 4503 \n",
+ "\n",
+ " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n",
+ "0 Census Tract 4507.41 CA Alameda County California CT 2182370 \n",
+ "1 Census Tract 4503 CA Alameda County California CT 3133641 \n",
+ "\n",
+ " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n",
+ "0 0 5469 937 750 37 \n",
+ "1 0 4967 67 582 48 \n",
+ "\n",
+ " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n",
+ "0 67 73 117 60 \n",
+ "1 53 49 70 72 \n",
+ "\n",
+ " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n",
+ "0 41 43 165 182 \n",
+ "1 32 69 51 68 \n",
+ "\n",
+ " female_75_to_79 female_80_to_84 female_85_and_over \\\n",
+ "0 95 74 175 \n",
+ "1 101 113 75 \n",
+ "\n",
+ " median_household_income income_less_10000 income_10000_14999 \\\n",
+ "0 154609 657 516 \n",
+ "1 147875 363 142 \n",
+ "\n",
+ " income_15000_24999 income_25000_34999 income_35000_49999 \\\n",
+ "0 245 252 260 \n",
+ "1 221 197 428 \n",
+ "\n",
+ " income_50000_64999 income_65000_74999 workers_with_no_car \\\n",
+ "0 63 77 55 \n",
+ "1 330 196 64 \n",
+ "\n",
+ " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n",
+ "0 228 5301 1941 82 06 \n",
+ "1 84 4967 1849 179 06 \n",
+ "\n",
+ " county tract county_name inc_extremelylow inc_verylow inc_low \\\n",
+ "0 001 450741 Alameda 1418 512 140 \n",
+ "1 001 450300 Alameda 726 625 526 \n",
+ "\n",
+ " male_seniors female_seniors area_m2 \\\n",
+ "0 395 734 2.191555e+06 \n",
+ "1 324 477 3.149375e+06 \n",
+ "\n",
+ " geometry area_2 \n",
+ "0 POLYGON ((-165021.160 -37973.000, -165009.573 ... 116531.265316 \n",
+ "1 POLYGON ((-167866.792 -33259.734, -167870.667 ... 212536.639123 "
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "geometry_intersect.head(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "92eb767d-b477-4e14-ab51-fac6f615e621",
+ "metadata": {},
+ "source": [
+ "## Adjusting Population and Demographic Metrics for Stop Service Areas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "ed44c28b-0eb9-4d7c-b9a5-3739ea9f5133",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Adjust total population by the proportion of the tract area that intersects the stop buffer.\n",
+ "# Calculate the proportion of each tract's area that intersects the stop buffer\n",
+ "geometry_intersect['area_ratio'] = geometry_intersect['area_2'] / geometry_intersect['area_m2']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "8653de80-582e-435a-9642-69ac80089dba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " key | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " feed_key | \n",
+ " stop_id | \n",
+ " stop_name | \n",
+ " name | \n",
+ " organization_type | \n",
+ " gtfs_dataset_key | \n",
+ " ntd_id | \n",
+ " ntd_agency_info_key | \n",
+ " STATEFP | \n",
+ " COUNTYFP | \n",
+ " TRACTCE | \n",
+ " GEOIDFQ | \n",
+ " GEOID | \n",
+ " NAME | \n",
+ " NAMELSAD | \n",
+ " STUSPS | \n",
+ " NAMELSADCO | \n",
+ " STATE_NAME | \n",
+ " LSAD | \n",
+ " ALAND | \n",
+ " AWATER | \n",
+ " total_pop | \n",
+ " poverty_pop | \n",
+ " non_us_citizen | \n",
+ " male_65_to_66 | \n",
+ " male_67_to_69 | \n",
+ " male_70_to_74 | \n",
+ " male_75_to_79 | \n",
+ " male_80_to_84 | \n",
+ " male_85_and_over | \n",
+ " female_65_to_66 | \n",
+ " female_67_to_69 | \n",
+ " female_70_to_74 | \n",
+ " female_75_to_79 | \n",
+ " female_80_to_84 | \n",
+ " female_85_and_over | \n",
+ " median_household_income | \n",
+ " income_less_10000 | \n",
+ " income_10000_14999 | \n",
+ " income_15000_24999 | \n",
+ " income_25000_34999 | \n",
+ " income_35000_49999 | \n",
+ " income_50000_64999 | \n",
+ " income_65000_74999 | \n",
+ " workers_with_no_car | \n",
+ " households_with_no_cars | \n",
+ " disabled_pop | \n",
+ " public_asst_pop | \n",
+ " veteran_pop | \n",
+ " state | \n",
+ " county | \n",
+ " tract | \n",
+ " county_name | \n",
+ " inc_extremelylow | \n",
+ " inc_verylow | \n",
+ " inc_low | \n",
+ " male_seniors | \n",
+ " female_seniors | \n",
+ " area_m2 | \n",
+ " geometry | \n",
+ " area_2 | \n",
+ " area_ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0119506e03bed4c4d8b094ab1177cd78 | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " d9e1e77d0754b712fc608741ae3836f5 | \n",
+ " bSAT | \n",
+ " Santa Maria-Ihop Bus Stop | \n",
+ " San Joaquin Joint Powers Authority | \n",
+ " Independent Agency | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " None | \n",
+ " None | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450741 | \n",
+ " 1400000US06001450741 | \n",
+ " 06001450741 | \n",
+ " 4507.41 | \n",
+ " Census Tract 4507.41 | \n",
+ " CA | \n",
+ " Alameda County | \n",
+ " California | \n",
+ " CT | \n",
+ " 2182370 | \n",
+ " 0 | \n",
+ " 5469 | \n",
+ " 937 | \n",
+ " 750 | \n",
+ " 37 | \n",
+ " 67 | \n",
+ " 73 | \n",
+ " 117 | \n",
+ " 60 | \n",
+ " 41 | \n",
+ " 43 | \n",
+ " 165 | \n",
+ " 182 | \n",
+ " 95 | \n",
+ " 74 | \n",
+ " 175 | \n",
+ " 154609 | \n",
+ " 657 | \n",
+ " 516 | \n",
+ " 245 | \n",
+ " 252 | \n",
+ " 260 | \n",
+ " 63 | \n",
+ " 77 | \n",
+ " 55 | \n",
+ " 228 | \n",
+ " 5301 | \n",
+ " 1941 | \n",
+ " 82 | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450741 | \n",
+ " Alameda | \n",
+ " 1418 | \n",
+ " 512 | \n",
+ " 140 | \n",
+ " 395 | \n",
+ " 734 | \n",
+ " 2.191555e+06 | \n",
+ " POLYGON ((-165021.160 -37973.000, -165009.573 ... | \n",
+ " 116531.265316 | \n",
+ " 0.053173 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0119506e03bed4c4d8b094ab1177cd78 | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " d9e1e77d0754b712fc608741ae3836f5 | \n",
+ " bSAT | \n",
+ " Santa Maria-Ihop Bus Stop | \n",
+ " San Joaquin Joint Powers Authority | \n",
+ " Independent Agency | \n",
+ " 524ea6209600e9a2de34a02cf9068729 | \n",
+ " None | \n",
+ " None | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450300 | \n",
+ " 1400000US06001450300 | \n",
+ " 06001450300 | \n",
+ " 4503 | \n",
+ " Census Tract 4503 | \n",
+ " CA | \n",
+ " Alameda County | \n",
+ " California | \n",
+ " CT | \n",
+ " 3133641 | \n",
+ " 0 | \n",
+ " 4967 | \n",
+ " 67 | \n",
+ " 582 | \n",
+ " 48 | \n",
+ " 53 | \n",
+ " 49 | \n",
+ " 70 | \n",
+ " 72 | \n",
+ " 32 | \n",
+ " 69 | \n",
+ " 51 | \n",
+ " 68 | \n",
+ " 101 | \n",
+ " 113 | \n",
+ " 75 | \n",
+ " 147875 | \n",
+ " 363 | \n",
+ " 142 | \n",
+ " 221 | \n",
+ " 197 | \n",
+ " 428 | \n",
+ " 330 | \n",
+ " 196 | \n",
+ " 64 | \n",
+ " 84 | \n",
+ " 4967 | \n",
+ " 1849 | \n",
+ " 179 | \n",
+ " 06 | \n",
+ " 001 | \n",
+ " 450300 | \n",
+ " Alameda | \n",
+ " 726 | \n",
+ " 625 | \n",
+ " 526 | \n",
+ " 324 | \n",
+ " 477 | \n",
+ " 3.149375e+06 | \n",
+ " POLYGON ((-167866.792 -33259.734, -167870.667 ... | \n",
+ " 212536.639123 | \n",
+ " 0.067485 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " key schedule_gtfs_dataset_key \\\n",
+ "0 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n",
+ "1 0119506e03bed4c4d8b094ab1177cd78 524ea6209600e9a2de34a02cf9068729 \n",
+ "\n",
+ " feed_key stop_id stop_name \\\n",
+ "0 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n",
+ "1 d9e1e77d0754b712fc608741ae3836f5 bSAT Santa Maria-Ihop Bus Stop \n",
+ "\n",
+ " name organization_type \\\n",
+ "0 San Joaquin Joint Powers Authority Independent Agency \n",
+ "1 San Joaquin Joint Powers Authority Independent Agency \n",
+ "\n",
+ " gtfs_dataset_key ntd_id ntd_agency_info_key STATEFP \\\n",
+ "0 524ea6209600e9a2de34a02cf9068729 None None 06 \n",
+ "1 524ea6209600e9a2de34a02cf9068729 None None 06 \n",
+ "\n",
+ " COUNTYFP TRACTCE GEOIDFQ GEOID NAME \\\n",
+ "0 001 450741 1400000US06001450741 06001450741 4507.41 \n",
+ "1 001 450300 1400000US06001450300 06001450300 4503 \n",
+ "\n",
+ " NAMELSAD STUSPS NAMELSADCO STATE_NAME LSAD ALAND \\\n",
+ "0 Census Tract 4507.41 CA Alameda County California CT 2182370 \n",
+ "1 Census Tract 4503 CA Alameda County California CT 3133641 \n",
+ "\n",
+ " AWATER total_pop poverty_pop non_us_citizen male_65_to_66 \\\n",
+ "0 0 5469 937 750 37 \n",
+ "1 0 4967 67 582 48 \n",
+ "\n",
+ " male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n",
+ "0 67 73 117 60 \n",
+ "1 53 49 70 72 \n",
+ "\n",
+ " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n",
+ "0 41 43 165 182 \n",
+ "1 32 69 51 68 \n",
+ "\n",
+ " female_75_to_79 female_80_to_84 female_85_and_over \\\n",
+ "0 95 74 175 \n",
+ "1 101 113 75 \n",
+ "\n",
+ " median_household_income income_less_10000 income_10000_14999 \\\n",
+ "0 154609 657 516 \n",
+ "1 147875 363 142 \n",
+ "\n",
+ " income_15000_24999 income_25000_34999 income_35000_49999 \\\n",
+ "0 245 252 260 \n",
+ "1 221 197 428 \n",
+ "\n",
+ " income_50000_64999 income_65000_74999 workers_with_no_car \\\n",
+ "0 63 77 55 \n",
+ "1 330 196 64 \n",
+ "\n",
+ " households_with_no_cars disabled_pop public_asst_pop veteran_pop state \\\n",
+ "0 228 5301 1941 82 06 \n",
+ "1 84 4967 1849 179 06 \n",
+ "\n",
+ " county tract county_name inc_extremelylow inc_verylow inc_low \\\n",
+ "0 001 450741 Alameda 1418 512 140 \n",
+ "1 001 450300 Alameda 726 625 526 \n",
+ "\n",
+ " male_seniors female_seniors area_m2 \\\n",
+ "0 395 734 2.191555e+06 \n",
+ "1 324 477 3.149375e+06 \n",
+ "\n",
+ " geometry area_2 \\\n",
+ "0 POLYGON ((-165021.160 -37973.000, -165009.573 ... 116531.265316 \n",
+ "1 POLYGON ((-167866.792 -33259.734, -167870.667 ... 212536.639123 \n",
+ "\n",
+ " area_ratio \n",
+ "0 0.053173 \n",
+ "1 0.067485 "
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "geometry_intersect.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "597c67ad-9913-435f-8c9b-ddc1a9fbd297",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Define demographic and socioeconomic columns to be adjusted by area ratio\n",
+ "cols_to_weight = [\n",
+ " 'total_pop', 'poverty_pop', 'non_us_citizen', 'workers_with_no_car', \n",
+ " 'households_with_no_cars', 'disabled_pop', 'public_asst_pop', \n",
+ " 'inc_extremelylow', 'inc_verylow', 'inc_low', \n",
+ " 'male_seniors', 'female_seniors', 'veteran_pop'\n",
+ "]\n",
+ "\n",
+ "# Apply area ratio to create adjusted metrics\n",
+ "geometry_intersect[[f'{col}_adj' for col in cols_to_weight]] = (\n",
+ " geometry_intersect[cols_to_weight].multiply(geometry_intersect['area_ratio'], axis=0)\n",
+ ")"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "5763be02-ce61-46fe-aeba-9fd273e9ca81",
+ "id": "e1e4d484-1748-4027-83b2-bbb8dc441ed9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Stop level demography data \n",
+ "filtered_final_data = geometry_intersect[['name', 'organization_type', 'ntd_id', 'ntd_agency_info_key', 'stop_id', 'stop_name', 'schedule_gtfs_dataset_key', \n",
+ " 'feed_key', 'GEOIDFQ', 'geometry', 'area_2',\t'adjusted_total_pop', 'pop_weight',\t'poverty_pop_adj',\t\n",
+ " 'non_us_citizen_adj',\t'workers_with_no_car_adj',\t'households_with_no_cars_adj',\t'disabled_pop_adj',\t\n",
+ " 'public_asst_pop_adj', 'inc_extremelylow_adj', 'inc_verylow_adj',\t'inc_low_adj',\t'male_seniors_adj',\t\n",
+ " 'female_seniors_adj', 'veteran_pop_adj']]\n",
+ "\n",
+ "filtered_final_data.head(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6119ef50-cc16-43b9-a2aa-28d36a4428f8",
+ "metadata": {},
+ "source": [
+ "## Agency Level Demography Data "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "971c8504-f654-45f9-b880-613b71a93c88",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "group_key = ['key', 'name']\n",
+ "\n",
+ "# Identify adjusted demographic columns\n",
+ "adj_cols = [col for col in geometry_intersect.columns if col.endswith('_adj')]\n",
+ "\n",
+ "# Non-aggregated attributes that are unique per agency\n",
+ "extra_cols = [\n",
+ " 'organization_type', 'ntd_id', 'ntd_agency_info_key',\n",
+ " 'schedule_gtfs_dataset_key', 'feed_key'\n",
+ "]\n",
+ "\n",
+ "# Build aggregation dictionary\n",
+ "agg_dict = {col: 'sum' for col in adj_cols}\n",
+ "agg_dict.update({col: 'first' for col in extra_cols})\n",
+ "\n",
+ "# Aggregate by agency\n",
+ "agency_summary = (\n",
+ " geometry_intersect\n",
+ " .groupby(group_key, as_index=False)\n",
+ " .agg(agg_dict)\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "a7fd704c-6a7b-4573-85d1-8d27bbaf43a7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 84 entries, 0 to 83\n",
+ "Data columns (total 20 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 key 84 non-null object \n",
+ " 1 name 84 non-null object \n",
+ " 2 total_pop_adj 84 non-null float64\n",
+ " 3 poverty_pop_adj 84 non-null float64\n",
+ " 4 non_us_citizen_adj 84 non-null float64\n",
+ " 5 workers_with_no_car_adj 84 non-null float64\n",
+ " 6 households_with_no_cars_adj 84 non-null float64\n",
+ " 7 disabled_pop_adj 84 non-null float64\n",
+ " 8 public_asst_pop_adj 84 non-null float64\n",
+ " 9 inc_extremelylow_adj 84 non-null float64\n",
+ " 10 inc_verylow_adj 84 non-null float64\n",
+ " 11 inc_low_adj 84 non-null float64\n",
+ " 12 male_seniors_adj 84 non-null float64\n",
+ " 13 female_seniors_adj 84 non-null float64\n",
+ " 14 veteran_pop_adj 84 non-null float64\n",
+ " 15 organization_type 84 non-null object \n",
+ " 16 ntd_id 77 non-null object \n",
+ " 17 ntd_agency_info_key 68 non-null object \n",
+ " 18 schedule_gtfs_dataset_key 84 non-null object \n",
+ " 19 feed_key 84 non-null object \n",
+ "dtypes: float64(13), object(7)\n",
+ "memory usage: 13.3+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "agency_summary.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "366986a8-b17b-4e19-b7c2-82608e14d250",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "def export_gdf(gdf, filename: str):\n",
+ " \n",
+ " gdf.to_parquet(f\"{filename}.parquet\")\n",
+ " \n",
+ " fs.put(\n",
+ " f\"{filename}.parquet\",\n",
+ " f\"{GCS_FILE_PATH}/transit_provider_dashboard/{filename}.parquet\",\n",
+ " token = credentials.token\n",
+ " )\n",
+ " \n",
+ " os.remove(f\"{filename}.parquet\")\n",
+ " print(f\"saved {GCS_FILE_PATH}/transit_provider_dashboard/{filename}.parquet\")\n",
+ " \n",
+ " return"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "d6cc3be8-b52b-4ec0-97c6-1fc3c7e108d9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "saved gs://calitp-analytics-data/data-analyses/transit_provider_dashboard/agency_stop_level_census_data.parquet.parquet\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Store data in warehouse\n",
+ "export_gdf(agency_summary, \"agency_level_census_data.parquet\")"
+ ]
}
],
"metadata": {