diff --git a/chapter5/swiss_trains_20240902.csv b/chapter5/swiss_trains_20240902.csv index ae57d68..150fd51 100644 --- a/chapter5/swiss_trains_20240902.csv +++ b/chapter5/swiss_trains_20240902.csv @@ -1,4 +1,4 @@ -Betriebstag;Fahrt Bezeichner;Betreiber ID;Betreiber Abkürzung;Betreiber Name;Produkt ID;Linie;Linien Text;circulation id;Verkehrsmittel Text;Zusatzfahrt TF;Fällt aus;BPUIC;Haltestellen Name;Ankunftszeit;An Prognose;An Prognose Status;Abfahrtszeit;departure forecast;Ab Prognsoe Status;is passing;Ankunftsverspätung;departure delay;Didok-Nummer;Name Haltestelle;Abkuerzung Bahnhof;lod;Geoposition;sloid;Transportunternehmung (Nummer) +OperatingDay;TripIdentifier;OperatorID;OperatorAbbreviation;OperatorName;ProductID;Line;LineDescription;CirculationID;VehicleTypeDescription;AdditionalTrip;Canceled;RailwayInfrastructureCode;StopName;ArrivalTime;ArrivalForecast;ArrivalForecastStatus;DepartureTime;DepartureForecast;DepartureForecastStatus;IsPassing;ArrivalDelay;DepartureDelay;SwissStopIdentifier;StopName2;StationAbbreviation;LevelOfDetail;Geoposition;StopLocationIdentifier;TransportationCompanyNumber 2024-09-01;ch:1:sjyid:100001:21255-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;21255;S2;;S;false;false;8502231;Zug Oberwil;2024-09-01T15:13:00;2024-09-01T15:13:43;REAL;2024-09-01T15:13:00;2024-09-01T15:14:10;REAL;false;false;false;2231;Zug Oberwil;ZGO;http://lod.opentransportdata.swiss/didok/didok85;47.14768364168662, 8.50994051686946;ch:1:sloid:2231;11 2024-09-01;ch:1:sjyid:100001:21255-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;21255;S2;;S;false;false;8505006;Schwyz;2024-09-01T15:36:00;2024-09-01T15:36:35;REAL;2024-09-01T15:36:00;2024-09-01T15:37:02;REAL;false;false;false;5006;Schwyz;SCHW;http://lod.opentransportdata.swiss/didok/didok85;47.02639719104317, 8.632151708388978;ch:1:sloid:5006;11 2024-09-01;ch:1:sjyid:100001:21258-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;21258;S2;;S;false;false;8505114;Erstfeld;;;;2024-09-01T15:01:00;2024-09-01T15:01:23;REAL;false;false;false;5114;Erstfeld;ER;http://lod.opentransportdata.swiss/didok/didok85;46.82033940886055, 8.650454640226307;ch:1:sloid:5114;11 diff --git a/chapter5/swiss_trains_20240913.csv b/chapter5/swiss_trains_20240913.csv index 78b541a..1557b4c 100644 --- a/chapter5/swiss_trains_20240913.csv +++ b/chapter5/swiss_trains_20240913.csv @@ -1,4 +1,4 @@ -Betriebstag;Fahrt Bezeichner;Betreiber ID;Betreiber Abkürzung;Betreiber Name;Produkt ID;Linie;Linien Text;circulation id;Verkehrsmittel Text;Zusatzfahrt TF;Fällt aus;BPUIC;Haltestellen Name;Ankunftszeit;An Prognose;An Prognose Status;Abfahrtszeit;departure forecast;Ab Prognsoe Status;is passing;Ankunftsverspätung;departure delay;Didok-Nummer;Name Haltestelle;Abkuerzung Bahnhof;lod;Geoposition;sloid;Transportunternehmung (Nummer) +OperatingDay;TripIdentifier;OperatorID;OperatorAbbreviation;OperatorName;ProductID;Line;LineDescription;CirculationID;VehicleTypeDescription;AdditionalTrip;Canceled;RailwayInfrastructureCode;StopName;ArrivalTime;ArrivalForecast;ArrivalForecastStatus;DepartureTime;DepartureForecast;DepartureForecastStatus;IsPassing;ArrivalDelay;DepartureDelay;SwissStopIdentifier;StopName2;StationAbbreviation;LevelOfDetail;Geoposition;StopLocationIdentifier;TransportationCompanyNumber 2024-09-12;ch:1:sjyid:100001:102-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;102;ICE;;ICE;false;false;8500090;Basel Bad Bf;2024-09-12T17:19:00;2024-09-12T17:20:00;PROGNOSE;2024-09-12T17:21:00;2024-09-12T17:31:00;PROGNOSE;false;false;true;90;Basel Bad Bf;BAD;http://lod.opentransportdata.swiss/didok/didok85;47.567307905698264, 7.6069204184730825;ch:1:sloid:90;612 2024-09-12;ch:1:sjyid:100001:103-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;103;ICE;;ICE;false;false;8500090;Basel Bad Bf;2024-09-12T12:36:00;2024-09-12T12:38:00;PROGNOSE;2024-09-12T12:40:00;2024-09-12T12:41:00;PROGNOSE;false;false;false;90;Basel Bad Bf;BAD;http://lod.opentransportdata.swiss/didok/didok85;47.567307905698264, 7.6069204184730825;ch:1:sloid:90;612 2024-09-12;ch:1:sjyid:100001:1058-011;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;1058;IC61;;IC;false;false;8507492;Interlaken Ost;;;;2024-09-12T06:30:00;2024-09-12T06:30:41;REAL;false;false;false;7492;Interlaken Ost;IO;http://lod.opentransportdata.swiss/didok/didok85;46.69049999618799, 7.869000004346448;ch:1:sloid:7492;35 diff --git a/chapter5/swiss_trains_elasticsearch.ipynb b/chapter5/swiss_trains_elasticsearch.ipynb deleted file mode 100644 index fc37879..0000000 --- a/chapter5/swiss_trains_elasticsearch.ipynb +++ /dev/null @@ -1,687 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d8f5c542", - "metadata": {}, - "source": [ - "## Swiss Trains Data to Elasticsearch\n", - "This notebook demonstrates how to load a CSV file containing Swiss train data into a Pandas DataFrame, process the data, and insert it into an Elasticsearch index using the `eland` library.\n", - "You will need to modify the `cloud_id` and `api_key` variables with your Elasticsearch Cloud credentials." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "206ed8f8", - "metadata": {}, - "outputs": [], - "source": [ - "# Import necessary libraries\n", - "import eland as ed\n", - "import pandas as pd\n", - "from elasticsearch import Elasticsearch" - ] - }, - { - "cell_type": "markdown", - "id": "0bc068c9", - "metadata": {}, - "source": [ - "### Step 1: Define your variables\n", - "Specify the `filename` for the CSV file, and provide the `cloud_id` and `api_key` for your Elasticsearch cluster. You must replace `'CHANGE_ME'` with your actual credentials." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "848da3a4", - "metadata": {}, - "outputs": [], - "source": [ - "# Define variables for the filename, cloud ID, and API key\n", - "filename = 'swiss_trains_20240913.csv'\n", - "cloud_id = 'CHANGEME' # Replace with your actual Elasticsearch Cloud ID\n", - "api_key = 'CHANGEME' # Replace with your actual Elasticsearch API key" - ] - }, - { - "cell_type": "markdown", - "id": "b890ec42", - "metadata": {}, - "source": [ - "### Step 2: Read the CSV file\n", - "The file is read into a pandas DataFrame using `pd.read_csv`. This reads the Swiss train data from the CSV file and stores it in a structured format." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5d76fc67", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| \n", - " | Betriebstag | \n", - "Fahrt Bezeichner | \n", - "Betreiber ID | \n", - "Betreiber Abkürzung | \n", - "Betreiber Name | \n", - "Produkt ID | \n", - "Linie | \n", - "Linien Text | \n", - "circulation id | \n", - "Verkehrsmittel Text | \n", - "... | \n", - "is passing | \n", - "Ankunftsverspätung | \n", - "departure delay | \n", - "Didok-Nummer | \n", - "Name Haltestelle | \n", - "Abkuerzung Bahnhof | \n", - "lod | \n", - "Geoposition | \n", - "sloid | \n", - "Transportunternehmung (Nummer) | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:102-001 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "102 | \n", - "ICE | \n", - "NaN | \n", - "ICE | \n", - "... | \n", - "False | \n", - "False | \n", - "True | \n", - "90.0 | \n", - "Basel Bad Bf | \n", - "BAD | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "47.567307905698264, 7.6069204184730825 | \n", - "ch:1:sloid:90 | \n", - "612.0 | \n", - "
| 1 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:103-001 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "103 | \n", - "ICE | \n", - "NaN | \n", - "ICE | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "90.0 | \n", - "Basel Bad Bf | \n", - "BAD | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "47.567307905698264, 7.6069204184730825 | \n", - "ch:1:sloid:90 | \n", - "612.0 | \n", - "
| 2 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:1058-011 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "1058 | \n", - "IC61 | \n", - "NaN | \n", - "IC | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "7492.0 | \n", - "Interlaken Ost | \n", - "IO | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "46.69049999618799, 7.869000004346448 | \n", - "ch:1:sloid:7492 | \n", - "35.0 | \n", - "
| 3 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:1058-011 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "1058 | \n", - "IC61 | \n", - "NaN | \n", - "IC | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "7493.0 | \n", - "Interlaken West | \n", - "IW | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "46.68262798035661, 7.851453137595281 | \n", - "ch:1:sloid:7493 | \n", - "33.0 | \n", - "
| 4 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:1058-011 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "1058 | \n", - "IC61 | \n", - "NaN | \n", - "IC | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "7483.0 | \n", - "Spiez | \n", - "SP | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "46.68639566834843, 7.680103057796672 | \n", - "ch:1:sloid:7483 | \n", - "33.0 | \n", - "
5 rows × 30 columns
\n", - "| \n", - " | OperatingDay | \n", - "TripIdentifier | \n", - "OperatorID | \n", - "OperatorAbbreviation | \n", - "OperatorName | \n", - "ProductID | \n", - "Line | \n", - "LineDescription | \n", - "CirculationID | \n", - "VehicleTypeDescription | \n", - "... | \n", - "IsPassing | \n", - "ArrivalDelay | \n", - "DepartureDelay | \n", - "SwissStopIdentifier | \n", - "StopName | \n", - "StationAbbreviation | \n", - "LevelOfDetail | \n", - "Geoposition | \n", - "StopLocationIdentifier | \n", - "TransportationCompanyNumber | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:102-001 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "102 | \n", - "ICE | \n", - "NaN | \n", - "ICE | \n", - "... | \n", - "False | \n", - "False | \n", - "True | \n", - "90.0 | \n", - "Basel Bad Bf | \n", - "BAD | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "47.567307905698264, 7.6069204184730825 | \n", - "ch:1:sloid:90 | \n", - "612.0 | \n", - "
| 1 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:103-001 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "103 | \n", - "ICE | \n", - "NaN | \n", - "ICE | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "90.0 | \n", - "Basel Bad Bf | \n", - "BAD | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "47.567307905698264, 7.6069204184730825 | \n", - "ch:1:sloid:90 | \n", - "612.0 | \n", - "
| 2 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:1058-011 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "1058 | \n", - "IC61 | \n", - "NaN | \n", - "IC | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "7492.0 | \n", - "Interlaken Ost | \n", - "IO | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "46.69049999618799, 7.869000004346448 | \n", - "ch:1:sloid:7492 | \n", - "35.0 | \n", - "
| 3 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:1058-011 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "1058 | \n", - "IC61 | \n", - "NaN | \n", - "IC | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "7493.0 | \n", - "Interlaken West | \n", - "IW | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "46.68262798035661, 7.851453137595281 | \n", - "ch:1:sloid:7493 | \n", - "33.0 | \n", - "
| 4 | \n", - "2024-09-12 | \n", - "ch:1:sjyid:100001:1058-011 | \n", - "85:11 | \n", - "SBB | \n", - "Schweizerische Bundesbahnen SBB | \n", - "Zug | \n", - "1058 | \n", - "IC61 | \n", - "NaN | \n", - "IC | \n", - "... | \n", - "False | \n", - "False | \n", - "False | \n", - "7483.0 | \n", - "Spiez | \n", - "SP | \n", - "http://lod.opentransportdata.swiss/didok/didok85 | \n", - "46.68639566834843, 7.680103057796672 | \n", - "ch:1:sloid:7483 | \n", - "33.0 | \n", - "
5 rows × 30 columns
\n", - "