diff --git a/_data/destinations/snowflake/v1/replication.yml b/_data/destinations/snowflake/v1/replication.yml index 95f344dd6..7fe9d4a74 100644 --- a/_data/destinations/snowflake/v1/replication.yml +++ b/_data/destinations/snowflake/v1/replication.yml @@ -23,17 +23,20 @@ rename-original-column-in-split: false # LOADING BEHAVIOR # # ------------------------------ # -configurable-loading-behavior: false +configurable-loading-behavior: true default-loading-behavior: "Upsert" loading-behavior-types: - "Upsert" + - "History Mode" loading-behavior-description: | + Loading behavior can be configured for {{ destination.display_name }} destinations. + The following loading behavior types are supported for {{ destination.display_name }} destinations: - {% for loading-behavior-type in site.data.destinations.microsoft-azure.v1.replication.loading-behavior-types %} + {% for loading-behavior-type in site.data.destinations.snowflake.v1.replication.loading-behavior-types %} - {{ loading-behavior-type }} {% endfor %} diff --git a/_data/sidebars/stitchnav.yml b/_data/sidebars/stitchnav.yml index d2581592e..27fdc0092 100755 --- a/_data/sidebars/stitchnav.yml +++ b/_data/sidebars/stitchnav.yml @@ -427,6 +427,9 @@ all-docs: - title: Querying append-only tables url: "{{ link.replication.append-only-querying }}" + + - title: Querying History mode tables + url: "{{ link.replication.history-mode-querying }}" - title: Resolving record rejections url: "{{ link.destinations.storage.rejected-records }}" diff --git a/_data/tooltips.yml b/_data/tooltips.yml index d66cfbd3b..604f7ca6c 100755 --- a/_data/tooltips.yml +++ b/_data/tooltips.yml @@ -45,6 +45,8 @@ destination: "Destination is the Stitch word for 'data warehouse.' A data wareho extraction-logs: "Logs detailing the Extraction phase of the replication process for a given integration. Includes error info, should an error occur." +history-mode: "When data is loaded using the History mode, records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time." + historical-replication-job: "A Stitch replication job that replicates historical data." historical-sync: "Historical syncs are how far back from the Stitch connection date, by default, Stitch will fetch historical data." diff --git a/_data/urls.yaml b/_data/urls.yaml index 62794e366..4682d4a6d 100755 --- a/_data/urls.yaml +++ b/_data/urls.yaml @@ -277,6 +277,7 @@ replication: full-table: /replication/replication-methods/full-table append-only-querying: /replication/loading/querying-append-only-tables + history-mode-querying: /replication/loading/querying-history-mode-tables deleted-records: "/replication/deleted-record-handling" ## Replication Keys diff --git a/_destinations/choosing-a-stitch-destination.md b/_destinations/choosing-a-stitch-destination.md index db3622bbd..bf180b53b 100755 --- a/_destinations/choosing-a-stitch-destination.md +++ b/_destinations/choosing-a-stitch-destination.md @@ -76,10 +76,11 @@ sections: content: | Loading behavior determines how data is loaded into your destination. Specifically, how updates are made to existing rows in the destination. - Stitch supports two loading behavior types: + Stitch supports three loading behavior types: - **Upsert**: {{ site.data.tooltips.upsert }} - **Append-Only**: {{ site.data.tooltips.append-only }} + - **History Mode**: {{ site.data.tooltips.history-mode }} The table below lists the default loading behavior for each destination and whether it can be configured. diff --git a/_destinations/snowflake/guides/connecting-snowflake.md b/_destinations/snowflake/guides/connecting-snowflake.md index d784b77e7..be137a3ad 100755 --- a/_destinations/snowflake/guides/connecting-snowflake.md +++ b/_destinations/snowflake/guides/connecting-snowflake.md @@ -194,6 +194,11 @@ steps: content: | {% include shared/database-connection-settings.html type="general" %} + - title: "Define loading behavior" + anchor: "define-loading-behavior" + content: | + {% include destinations/history-mode.html %} + - title: "Save the destination" anchor: "save-destination" content: | diff --git a/_includes/destinations/history-mode.html b/_includes/destinations/history-mode.html new file mode 100644 index 000000000..6e33d659b --- /dev/null +++ b/_includes/destinations/history-mode.html @@ -0,0 +1,15 @@ +{% capture loading-setting-note %} +**Note**: Loading behavior can't be changed after the destination is created. To change {{ destination.display_name }} loading behavior, you'll need to [delete and re-create the destination]({{ link.destinations.switch-destinations | prepend: site.baseurl }}). +{% endcapture %} + +{% include note.html type="single-line" content=loading-setting-note %} + +The last step is to define how Stitch will handle changes to existing records in your {{ destination.display_name }} destination: + +- **Upsert**: Existing rows will be updated with the most recent version of the record from the source. With this option, only the most recent version of a record will exist in {{ destination.display_name }}. + +- **History Mode**: Newer versions of existing records are added as new rows to the end of tables. Each time a new version of a record is added, the `_sdc_end_date` column is updated in the previous version of the same record to indicate that it is no longer the most recent version. + +Refer to the [Understanding loading behavior guide]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl }}) for more info and examples. + +**Note**: This setting may impact your {{ destination.display_name }} costs. [Learn more]({{ link.destinations.overviews.bigquery-pricing | prepend: site.baseurl }}). diff --git a/_replication/loading/loading-category.md b/_replication/loading/loading-category.md index f092c4da5..69092848b 100755 --- a/_replication/loading/loading-category.md +++ b/_replication/loading/loading-category.md @@ -78,6 +78,10 @@ sections: - title: "Querying Append-Only Tables" url: "{{ link.replication.append-only-querying }}" weight: 4 + + - title: "Querying History Mode Tables" + url: "{{ link.replication.history-mode-querying }}" + weight: 5 content: | Resources and tutorials for interacting with data loaded by Stitch into your destination. diff --git a/_replication/loading/querying-history-mode-tables.md b/_replication/loading/querying-history-mode-tables.md new file mode 100644 index 000000000..3e45b20ac --- /dev/null +++ b/_replication/loading/querying-history-mode-tables.md @@ -0,0 +1,143 @@ +--- +# -------------------------- # +# PAGE INFO # +# -------------------------- # + +title: Querying History Mode Tables +permalink: /replication/loading/querying-history-mode-tables +keywords: bigquery, google bigquery data warehouse, bigquery data warehouse, bigquery etl, etl to bigquery, historical +summary: "Learn how History mode works and how to account for it in your queries." + +key: "history-mode-querying" +type: "" + +layout: general +toc: true +order: 1 +content-type: "guide" + + +# -------------------------- # +# INTRO # +# -------------------------- # + +intro: | + {% capture note %} + - [Destinations configured to use History mode]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}) + {% endcapture %} + + {% include note.html first-line="**This guide is applicable to:**" content=note %} + + When data is loaded using [History mode]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}), records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time. + + In this guide, we'll cover: + + {% for section in page.sections %} + - [{{ section.summary }}](#{{ section.anchor }}) + {% endfor %} + + +# -------------------------- # +# CONTENT # +# -------------------------- # + +sections: + - title: "Before using this guide" + anchor: "before-using-guide" + summary: "Things to know before using this guide" + content: | + Before using this guide, note that: + + - You may need to modify the queries in this guide to use them yourself + - Stitch Support's expertise lies in replicating data, and as such does not provide data analysis or querying assistance. We can, however, help with data discrepancies. + + If you'd like assistance with analysis or business intelligence solutions, we recommend reaching out to one of our [analytics partners]({{ site.partners }}){:target="new"}. + + - title: "Retrieving the latest version of every record" + anchor: "latest-version" + summary: "A querying strategy that retrieves the latest version of every record" + content: | + {% include note.html type="single-line" content="**Note**: The queries in this section are only intended to demonstrate one approach to querying. You may need to modify the queries to use them yourself." %} + + Let's take a look at an example. Assume we have an `orders` table that contains: + + - A Primary Key of `id`, + - The system `{{ system-column.prefix }}` columns added by Stitch, and + - Other order attribute columns + + If you wanted to get all current records, you could use the following query: + + {% capture code %} + SELECT * FROM orders + WHERE + _sdc_end_date = "9999-12-31 0:00 +00:00" + {% endcapture %} + + {% assign description = "Querying all current records" %} + + {% include layout/code-snippet.html code=code code-description=description %} + + {% include note.html type="single-line" content="**Note**: Since the `_sdc_end_date` value for current records is set to `9999-12-31` UTC, it is recommended to use `9999-12-31 0:00 +00:00` in your queries to make sure you get the correct result regardless of your local time." %} + + + - title: "Retrieving the version of every record for a specific date" + anchor: "specific-date" + summary: "A querying strategy that retrieves the version of every record for a specific date" + content: | + {% include note.html type="single-line" content="**Note**: The queries in this section are only intended to demonstrate one approach to querying. You may need to modify the queries to use them yourself." %} + + Let's take a look at an example. Assume we have an `orders` table that contains: + + - A Primary Key of `id`, + - The system `{{ system-column.prefix }}` columns added by Stitch, and + - Other order attribute columns + + If you wanted to get all records valid on December 1st 2022, you could use the following query: + + {% capture code %} + SELECT * FROM orders + WHERE + _sdc_start_date <= "2022-12-01" + AND _sdc_end_date > "2022-12-01" + {% endcapture %} + + {% assign description = "Querying all records for a specific date" %} + + {% include layout/code-snippet.html code=code code-description=description %} + + - title: "Retrieving the version of a specific record for a date range" + anchor: "date-range" + summary: "A querying strategy that retrieves the version of a specific record for a date range" + content: | + {% include note.html type="single-line" content="**Note**: The queries in this section are only intended to demonstrate one approach to querying. You may need to modify the queries to use them yourself." %} + + Let's take a look at an example. Assume we have an `orders` table that contains: + + - A Primary Key of `id`, + - The system `{{ system-column.prefix }}` columns added by Stitch, and + - Other order attribute columns + + If you wanted to get the version of a record with the ID `694` valid in all of December 2022, you could use the following query: + + {% capture code %} + SELECT * FROM orders + WHERE + id = 694 + AND _sdc_start_date <= "2022-12-01" + AND _sdc_end_date >= "2022-12-31" + {% endcapture %} + + {% assign description = "Querying the version of a specific record valid for a date range" %} + + {% include layout/code-snippet.html code=code code-description=description %} + + - title: "Create views in your destination" + anchor: "create-destination-views" + summary: "How to simplify querying by creating a view in your destination" + content: | + To make this easier, you can turn queries like the one above into a view. We recommend this approach because a view will encapsulate all the logic and simplify the process of querying against the latest version of your data. + + Refer to the documentation for your destination for more info on creating views: + + - [Snowflake]({{ site.data.destinations.snowflake.resource-links.create-views }}){:target="new"} +--- \ No newline at end of file diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index 958525f72..5d09c2db3 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -58,6 +58,18 @@ sections: content: | {{ site.data.tooltips.append-only }} + - title: "History Mode" + anchor: "loading-behavior-types--history-mode" + content: | + When data is loaded using the History mode, records are appended to the end of the table as new rows. + + When a record is added, the `_sdc_start_date` column is set to the loading date, and the `sdc_end_date` column is set to `9999-12-31` (UTC time). + When a new verson of the same record is added, the `_sdc_end_date` value of the previous version is updated to the loading date of the new version. + + Multiple versions of a row can exist in a table, creating a log of how a record has changed over time. This means you can create a query that returns the version of the record for a specific date or date range. + + **Note**: Since this loading type adds two system columns in the destination table, it will decrease the maximum number of columns available for your data if the destination has a limited number of columns per table. + - title: "Determining loading behavior" anchor: "loading-behavior-determined" summary: "How loading behavior is determined" @@ -89,6 +101,11 @@ sections: - The data doesn't have defined Primary Keys in the source **or** destination, **or** - The integration or table is pre-configured to use Append-Only loading + - title: "History mode" + anchor: "history-mode-conditions" + content: | + History mode is only used when the destination is configured to use History mode. + - title: "Examples" anchor: "examples" summary: "Examples of each loading behavior type" @@ -131,6 +148,26 @@ sections: {% include layout/image.html enlarge=true file="/replication/append-only-no-primary-key.png" alt="Click to enlarge: Append-Only loading as a result of no defined Primary Keys" %} + - title: "History mode example" + anchor: "example--history-mode-loading" + summary: "History mode" + content: | + In this example, the destination is configured to use History mode. The `id` column is the table's Primary Key. + + The following record is added to the destination table in a first replication job. The `_sdc_end_date` column is set to `9999-12-31` to indicate that this is the latest version of this record: + + |id|status|_sdc_start_date|_sdc_end_date| + |---|---|---|---| + |abc-123|Pending|2022-10-21|**9999-12-31**| + + + The record is then updated in the source. A second replication job creates a new version of the existing record on December 14, 2022. The previous version's `_sdc_end_date` value is updated and the new version is added to the table. The destination table now looks like this: + + |id|status|_sdc_start_date|_sdc_end_date| + |---|---|---|---| + |abc-123|Pending|2022-10-21|**2022-12-14**| + |abc-123|In progress|**2022-12-14**|9999-12-31| + - title: "Reference" anchor: "reference" summary: "References lists for destinations, integrations, and loading behavior"