diff --git a/chapter5/swiss_trains_20240902.csv b/chapter5/swiss_trains_20240902.csv index ae57d68..150fd51 100644 --- a/chapter5/swiss_trains_20240902.csv +++ b/chapter5/swiss_trains_20240902.csv @@ -1,4 +1,4 @@ -Betriebstag;Fahrt Bezeichner;Betreiber ID;Betreiber Abkürzung;Betreiber Name;Produkt ID;Linie;Linien Text;circulation id;Verkehrsmittel Text;Zusatzfahrt TF;Fällt aus;BPUIC;Haltestellen Name;Ankunftszeit;An Prognose;An Prognose Status;Abfahrtszeit;departure forecast;Ab Prognsoe Status;is passing;Ankunftsverspätung;departure delay;Didok-Nummer;Name Haltestelle;Abkuerzung Bahnhof;lod;Geoposition;sloid;Transportunternehmung (Nummer) +OperatingDay;TripIdentifier;OperatorID;OperatorAbbreviation;OperatorName;ProductID;Line;LineDescription;CirculationID;VehicleTypeDescription;AdditionalTrip;Canceled;RailwayInfrastructureCode;StopName;ArrivalTime;ArrivalForecast;ArrivalForecastStatus;DepartureTime;DepartureForecast;DepartureForecastStatus;IsPassing;ArrivalDelay;DepartureDelay;SwissStopIdentifier;StopName2;StationAbbreviation;LevelOfDetail;Geoposition;StopLocationIdentifier;TransportationCompanyNumber 2024-09-01;ch:1:sjyid:100001:21255-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;21255;S2;;S;false;false;8502231;Zug Oberwil;2024-09-01T15:13:00;2024-09-01T15:13:43;REAL;2024-09-01T15:13:00;2024-09-01T15:14:10;REAL;false;false;false;2231;Zug Oberwil;ZGO;http://lod.opentransportdata.swiss/didok/didok85;47.14768364168662, 8.50994051686946;ch:1:sloid:2231;11 2024-09-01;ch:1:sjyid:100001:21255-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;21255;S2;;S;false;false;8505006;Schwyz;2024-09-01T15:36:00;2024-09-01T15:36:35;REAL;2024-09-01T15:36:00;2024-09-01T15:37:02;REAL;false;false;false;5006;Schwyz;SCHW;http://lod.opentransportdata.swiss/didok/didok85;47.02639719104317, 8.632151708388978;ch:1:sloid:5006;11 2024-09-01;ch:1:sjyid:100001:21258-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;21258;S2;;S;false;false;8505114;Erstfeld;;;;2024-09-01T15:01:00;2024-09-01T15:01:23;REAL;false;false;false;5114;Erstfeld;ER;http://lod.opentransportdata.swiss/didok/didok85;46.82033940886055, 8.650454640226307;ch:1:sloid:5114;11 diff --git a/chapter5/swiss_trains_20240913.csv b/chapter5/swiss_trains_20240913.csv index 78b541a..1557b4c 100644 --- a/chapter5/swiss_trains_20240913.csv +++ b/chapter5/swiss_trains_20240913.csv @@ -1,4 +1,4 @@ -Betriebstag;Fahrt Bezeichner;Betreiber ID;Betreiber Abkürzung;Betreiber Name;Produkt ID;Linie;Linien Text;circulation id;Verkehrsmittel Text;Zusatzfahrt TF;Fällt aus;BPUIC;Haltestellen Name;Ankunftszeit;An Prognose;An Prognose Status;Abfahrtszeit;departure forecast;Ab Prognsoe Status;is passing;Ankunftsverspätung;departure delay;Didok-Nummer;Name Haltestelle;Abkuerzung Bahnhof;lod;Geoposition;sloid;Transportunternehmung (Nummer) +OperatingDay;TripIdentifier;OperatorID;OperatorAbbreviation;OperatorName;ProductID;Line;LineDescription;CirculationID;VehicleTypeDescription;AdditionalTrip;Canceled;RailwayInfrastructureCode;StopName;ArrivalTime;ArrivalForecast;ArrivalForecastStatus;DepartureTime;DepartureForecast;DepartureForecastStatus;IsPassing;ArrivalDelay;DepartureDelay;SwissStopIdentifier;StopName2;StationAbbreviation;LevelOfDetail;Geoposition;StopLocationIdentifier;TransportationCompanyNumber 2024-09-12;ch:1:sjyid:100001:102-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;102;ICE;;ICE;false;false;8500090;Basel Bad Bf;2024-09-12T17:19:00;2024-09-12T17:20:00;PROGNOSE;2024-09-12T17:21:00;2024-09-12T17:31:00;PROGNOSE;false;false;true;90;Basel Bad Bf;BAD;http://lod.opentransportdata.swiss/didok/didok85;47.567307905698264, 7.6069204184730825;ch:1:sloid:90;612 2024-09-12;ch:1:sjyid:100001:103-001;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;103;ICE;;ICE;false;false;8500090;Basel Bad Bf;2024-09-12T12:36:00;2024-09-12T12:38:00;PROGNOSE;2024-09-12T12:40:00;2024-09-12T12:41:00;PROGNOSE;false;false;false;90;Basel Bad Bf;BAD;http://lod.opentransportdata.swiss/didok/didok85;47.567307905698264, 7.6069204184730825;ch:1:sloid:90;612 2024-09-12;ch:1:sjyid:100001:1058-011;85:11;SBB;Schweizerische Bundesbahnen SBB;Zug;1058;IC61;;IC;false;false;8507492;Interlaken Ost;;;;2024-09-12T06:30:00;2024-09-12T06:30:41;REAL;false;false;false;7492;Interlaken Ost;IO;http://lod.opentransportdata.swiss/didok/didok85;46.69049999618799, 7.869000004346448;ch:1:sloid:7492;35 diff --git a/chapter5/swiss_trains_elasticsearch.ipynb b/chapter5/swiss_trains_elasticsearch.ipynb deleted file mode 100644 index fc37879..0000000 --- a/chapter5/swiss_trains_elasticsearch.ipynb +++ /dev/null @@ -1,687 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d8f5c542", - "metadata": {}, - "source": [ - "## Swiss Trains Data to Elasticsearch\n", - "This notebook demonstrates how to load a CSV file containing Swiss train data into a Pandas DataFrame, process the data, and insert it into an Elasticsearch index using the `eland` library.\n", - "You will need to modify the `cloud_id` and `api_key` variables with your Elasticsearch Cloud credentials." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "206ed8f8", - "metadata": {}, - "outputs": [], - "source": [ - "# Import necessary libraries\n", - "import eland as ed\n", - "import pandas as pd\n", - "from elasticsearch import Elasticsearch" - ] - }, - { - "cell_type": "markdown", - "id": "0bc068c9", - "metadata": {}, - "source": [ - "### Step 1: Define your variables\n", - "Specify the `filename` for the CSV file, and provide the `cloud_id` and `api_key` for your Elasticsearch cluster. You must replace `'CHANGE_ME'` with your actual credentials." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "848da3a4", - "metadata": {}, - "outputs": [], - "source": [ - "# Define variables for the filename, cloud ID, and API key\n", - "filename = 'swiss_trains_20240913.csv'\n", - "cloud_id = 'CHANGEME' # Replace with your actual Elasticsearch Cloud ID\n", - "api_key = 'CHANGEME' # Replace with your actual Elasticsearch API key" - ] - }, - { - "cell_type": "markdown", - "id": "b890ec42", - "metadata": {}, - "source": [ - "### Step 2: Read the CSV file\n", - "The file is read into a pandas DataFrame using `pd.read_csv`. This reads the Swiss train data from the CSV file and stores it in a structured format." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5d76fc67", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BetriebstagFahrt BezeichnerBetreiber IDBetreiber AbkürzungBetreiber NameProdukt IDLinieLinien Textcirculation idVerkehrsmittel Text...is passingAnkunftsverspätungdeparture delayDidok-NummerName HaltestelleAbkuerzung BahnhoflodGeopositionsloidTransportunternehmung (Nummer)
02024-09-12ch:1:sjyid:100001:102-00185:11SBBSchweizerische Bundesbahnen SBBZug102ICENaNICE...FalseFalseTrue90.0Basel Bad BfBADhttp://lod.opentransportdata.swiss/didok/didok8547.567307905698264, 7.6069204184730825ch:1:sloid:90612.0
12024-09-12ch:1:sjyid:100001:103-00185:11SBBSchweizerische Bundesbahnen SBBZug103ICENaNICE...FalseFalseFalse90.0Basel Bad BfBADhttp://lod.opentransportdata.swiss/didok/didok8547.567307905698264, 7.6069204184730825ch:1:sloid:90612.0
22024-09-12ch:1:sjyid:100001:1058-01185:11SBBSchweizerische Bundesbahnen SBBZug1058IC61NaNIC...FalseFalseFalse7492.0Interlaken OstIOhttp://lod.opentransportdata.swiss/didok/didok8546.69049999618799, 7.869000004346448ch:1:sloid:749235.0
32024-09-12ch:1:sjyid:100001:1058-01185:11SBBSchweizerische Bundesbahnen SBBZug1058IC61NaNIC...FalseFalseFalse7493.0Interlaken WestIWhttp://lod.opentransportdata.swiss/didok/didok8546.68262798035661, 7.851453137595281ch:1:sloid:749333.0
42024-09-12ch:1:sjyid:100001:1058-01185:11SBBSchweizerische Bundesbahnen SBBZug1058IC61NaNIC...FalseFalseFalse7483.0SpiezSPhttp://lod.opentransportdata.swiss/didok/didok8546.68639566834843, 7.680103057796672ch:1:sloid:748333.0
\n", - "

5 rows × 30 columns

\n", - "
" - ], - "text/plain": [ - " Betriebstag Fahrt Bezeichner Betreiber ID Betreiber Abkürzung \\\n", - "0 2024-09-12 ch:1:sjyid:100001:102-001 85:11 SBB \n", - "1 2024-09-12 ch:1:sjyid:100001:103-001 85:11 SBB \n", - "2 2024-09-12 ch:1:sjyid:100001:1058-011 85:11 SBB \n", - "3 2024-09-12 ch:1:sjyid:100001:1058-011 85:11 SBB \n", - "4 2024-09-12 ch:1:sjyid:100001:1058-011 85:11 SBB \n", - "\n", - " Betreiber Name Produkt ID Linie Linien Text \\\n", - "0 Schweizerische Bundesbahnen SBB Zug 102 ICE \n", - "1 Schweizerische Bundesbahnen SBB Zug 103 ICE \n", - "2 Schweizerische Bundesbahnen SBB Zug 1058 IC61 \n", - "3 Schweizerische Bundesbahnen SBB Zug 1058 IC61 \n", - "4 Schweizerische Bundesbahnen SBB Zug 1058 IC61 \n", - "\n", - " circulation id Verkehrsmittel Text ... is passing Ankunftsverspätung \\\n", - "0 NaN ICE ... False False \n", - "1 NaN ICE ... False False \n", - "2 NaN IC ... False False \n", - "3 NaN IC ... False False \n", - "4 NaN IC ... False False \n", - "\n", - " departure delay Didok-Nummer Name Haltestelle Abkuerzung Bahnhof \\\n", - "0 True 90.0 Basel Bad Bf BAD \n", - "1 False 90.0 Basel Bad Bf BAD \n", - "2 False 7492.0 Interlaken Ost IO \n", - "3 False 7493.0 Interlaken West IW \n", - "4 False 7483.0 Spiez SP \n", - "\n", - " lod \\\n", - "0 http://lod.opentransportdata.swiss/didok/didok85 \n", - "1 http://lod.opentransportdata.swiss/didok/didok85 \n", - "2 http://lod.opentransportdata.swiss/didok/didok85 \n", - "3 http://lod.opentransportdata.swiss/didok/didok85 \n", - "4 http://lod.opentransportdata.swiss/didok/didok85 \n", - "\n", - " Geoposition sloid \\\n", - "0 47.567307905698264, 7.6069204184730825 ch:1:sloid:90 \n", - "1 47.567307905698264, 7.6069204184730825 ch:1:sloid:90 \n", - "2 46.69049999618799, 7.869000004346448 ch:1:sloid:7492 \n", - "3 46.68262798035661, 7.851453137595281 ch:1:sloid:7493 \n", - "4 46.68639566834843, 7.680103057796672 ch:1:sloid:7483 \n", - "\n", - " Transportunternehmung (Nummer) \n", - "0 612.0 \n", - "1 612.0 \n", - "2 35.0 \n", - "3 33.0 \n", - "4 33.0 \n", - "\n", - "[5 rows x 30 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Read the CSV file into a pandas DataFrame\n", - "df = pd.read_csv(filename, sep=';')\n", - "df.head() # Display the first few rows of the DataFrame for inspection" - ] - }, - { - "cell_type": "markdown", - "id": "a85d6c8b", - "metadata": {}, - "source": [ - "### Step 3: Rename columns\n", - "To make the data easier to work with, the column names are renamed to more user-friendly, English names. This step ensures the data can be processed more easily." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "8b7db2d7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OperatingDayTripIdentifierOperatorIDOperatorAbbreviationOperatorNameProductIDLineLineDescriptionCirculationIDVehicleTypeDescription...IsPassingArrivalDelayDepartureDelaySwissStopIdentifierStopNameStationAbbreviationLevelOfDetailGeopositionStopLocationIdentifierTransportationCompanyNumber
02024-09-12ch:1:sjyid:100001:102-00185:11SBBSchweizerische Bundesbahnen SBBZug102ICENaNICE...FalseFalseTrue90.0Basel Bad BfBADhttp://lod.opentransportdata.swiss/didok/didok8547.567307905698264, 7.6069204184730825ch:1:sloid:90612.0
12024-09-12ch:1:sjyid:100001:103-00185:11SBBSchweizerische Bundesbahnen SBBZug103ICENaNICE...FalseFalseFalse90.0Basel Bad BfBADhttp://lod.opentransportdata.swiss/didok/didok8547.567307905698264, 7.6069204184730825ch:1:sloid:90612.0
22024-09-12ch:1:sjyid:100001:1058-01185:11SBBSchweizerische Bundesbahnen SBBZug1058IC61NaNIC...FalseFalseFalse7492.0Interlaken OstIOhttp://lod.opentransportdata.swiss/didok/didok8546.69049999618799, 7.869000004346448ch:1:sloid:749235.0
32024-09-12ch:1:sjyid:100001:1058-01185:11SBBSchweizerische Bundesbahnen SBBZug1058IC61NaNIC...FalseFalseFalse7493.0Interlaken WestIWhttp://lod.opentransportdata.swiss/didok/didok8546.68262798035661, 7.851453137595281ch:1:sloid:749333.0
42024-09-12ch:1:sjyid:100001:1058-01185:11SBBSchweizerische Bundesbahnen SBBZug1058IC61NaNIC...FalseFalseFalse7483.0SpiezSPhttp://lod.opentransportdata.swiss/didok/didok8546.68639566834843, 7.680103057796672ch:1:sloid:748333.0
\n", - "

5 rows × 30 columns

\n", - "
" - ], - "text/plain": [ - " OperatingDay TripIdentifier OperatorID OperatorAbbreviation \\\n", - "0 2024-09-12 ch:1:sjyid:100001:102-001 85:11 SBB \n", - "1 2024-09-12 ch:1:sjyid:100001:103-001 85:11 SBB \n", - "2 2024-09-12 ch:1:sjyid:100001:1058-011 85:11 SBB \n", - "3 2024-09-12 ch:1:sjyid:100001:1058-011 85:11 SBB \n", - "4 2024-09-12 ch:1:sjyid:100001:1058-011 85:11 SBB \n", - "\n", - " OperatorName ProductID Line LineDescription \\\n", - "0 Schweizerische Bundesbahnen SBB Zug 102 ICE \n", - "1 Schweizerische Bundesbahnen SBB Zug 103 ICE \n", - "2 Schweizerische Bundesbahnen SBB Zug 1058 IC61 \n", - "3 Schweizerische Bundesbahnen SBB Zug 1058 IC61 \n", - "4 Schweizerische Bundesbahnen SBB Zug 1058 IC61 \n", - "\n", - " CirculationID VehicleTypeDescription ... IsPassing ArrivalDelay \\\n", - "0 NaN ICE ... False False \n", - "1 NaN ICE ... False False \n", - "2 NaN IC ... False False \n", - "3 NaN IC ... False False \n", - "4 NaN IC ... False False \n", - "\n", - " DepartureDelay SwissStopIdentifier StopName StationAbbreviation \\\n", - "0 True 90.0 Basel Bad Bf BAD \n", - "1 False 90.0 Basel Bad Bf BAD \n", - "2 False 7492.0 Interlaken Ost IO \n", - "3 False 7493.0 Interlaken West IW \n", - "4 False 7483.0 Spiez SP \n", - "\n", - " LevelOfDetail \\\n", - "0 http://lod.opentransportdata.swiss/didok/didok85 \n", - "1 http://lod.opentransportdata.swiss/didok/didok85 \n", - "2 http://lod.opentransportdata.swiss/didok/didok85 \n", - "3 http://lod.opentransportdata.swiss/didok/didok85 \n", - "4 http://lod.opentransportdata.swiss/didok/didok85 \n", - "\n", - " Geoposition StopLocationIdentifier \\\n", - "0 47.567307905698264, 7.6069204184730825 ch:1:sloid:90 \n", - "1 47.567307905698264, 7.6069204184730825 ch:1:sloid:90 \n", - "2 46.69049999618799, 7.869000004346448 ch:1:sloid:7492 \n", - "3 46.68262798035661, 7.851453137595281 ch:1:sloid:7493 \n", - "4 46.68639566834843, 7.680103057796672 ch:1:sloid:7483 \n", - "\n", - " TransportationCompanyNumber \n", - "0 612.0 \n", - "1 612.0 \n", - "2 35.0 \n", - "3 33.0 \n", - "4 33.0 \n", - "\n", - "[5 rows x 30 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Rename columns to make them more readable and user-friendly\n", - "df.rename(\n", - " columns={\n", - " 'Betriebstag': 'OperatingDay',\n", - " 'Fahrt Bezeichner': 'TripIdentifier',\n", - " 'Betreiber ID': 'OperatorID',\n", - " 'Betreiber Abkürzung': 'OperatorAbbreviation',\n", - " 'Betreiber Name': 'OperatorName',\n", - " 'Produkt ID': 'ProductID',\n", - " 'Linie': 'Line',\n", - " 'Linien Text': 'LineDescription',\n", - " 'circulation id': 'CirculationID',\n", - " 'Verkehrsmittel Text': 'VehicleTypeDescription',\n", - " 'Zusatzfahrt TF': 'AdditionalTrip',\n", - " 'Fällt aus': 'Canceled',\n", - " 'BPUIC': 'RailwayInfrastructureCode',\n", - " 'Haltestellen Name': 'StopName',\n", - " 'Ankunftszeit': 'ArrivalTime',\n", - " 'An Prognose': 'ArrivalForecast',\n", - " 'An Prognose Status': 'ArrivalForecastStatus',\n", - " 'Abfahrtszeit': 'DepartureTime',\n", - " 'departure forecast': 'DepartureForecast',\n", - " 'Ab Prognsoe Status': 'DepartureForecastStatus',\n", - " 'is passing': 'IsPassing',\n", - " 'Ankunftsverspätung': 'ArrivalDelay',\n", - " 'departure delay': 'DepartureDelay',\n", - " 'Didok-Nummer': 'SwissStopIdentifier',\n", - " 'Name Haltestelle': 'StopName',\n", - " 'Abkuerzung Bahnhof': 'StationAbbreviation',\n", - " 'lod': 'LevelOfDetail',\n", - " 'Geoposition': 'Geoposition',\n", - " 'sloid': 'StopLocationIdentifier',\n", - " 'Transportunternehmung (Nummer)': 'TransportationCompanyNumber',\n", - " },\n", - " inplace=True\n", - ")\n", - "df.head() # Check the first few rows after renaming columns" - ] - }, - { - "cell_type": "markdown", - "id": "cacd414d", - "metadata": {}, - "source": [ - "### Step 4: Connect to Elasticsearch\n", - "Use the `cloud_id` and `api_key` to establish a connection to your Elasticsearch cluster. Ensure you replace these values with your actual credentials." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "1c94dbce", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Establish a connection to Elasticsearch\n", - "es = Elasticsearch(cloud_id=cloud_id, api_key=api_key)\n", - "es.ping()" - ] - }, - { - "cell_type": "markdown", - "id": "292a3bf9", - "metadata": {}, - "source": [ - "### Step 5: Insert the data into Elasticsearch\n", - "Use the `eland.pandas_to_eland` function to insert the DataFrame into Elasticsearch. The data will be inserted into the `swiss_trains` index with appropriate type overrides for specific fields (e.g., `geo_point` for geographical data)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ac6ecf4a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/acharveriat/.pyenv/versions/3.10.5/lib/python3.10/site-packages/eland/field_mappings.py:549: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", - " for column, dtype in dataframe.dtypes.iteritems():\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data successfully inserted into Elasticsearch index: swiss_trains\n" - ] - } - ], - "source": [ - "# Insert the data into Elasticsearch using eland's `pandas_to_eland` function\n", - "# The data will be inserted into the 'swiss_trains' index with data type overrides\n", - "ed_df = ed.pandas_to_eland(\n", - " df,\n", - " es_client=es,\n", - " es_dest_index='swiss_trains',\n", - " es_if_exists='replace', # Replace the index if it already exists\n", - " es_dropna=True, # Drop rows with NaN values\n", - " es_refresh=True, # Refresh the index after inserting the documents\n", - " es_type_overrides={ # Define type overrides for specific fields\n", - " 'Geoposition': 'geo_point',\n", - " 'OperatingDay': 'date',\n", - " 'DepartureTime': 'date',\n", - " 'ArrivalTime': 'date',\n", - " 'DepartureForecast': 'date',\n", - " 'ArrivalForecast': 'date',\n", - " }\n", - ")\n", - "print('Data successfully inserted into Elasticsearch index: swiss_trains')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}