datahub-project · askumar27 · Nov 18, 2025 · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/metadata-ingestion/docs/sources/fivetran/fivetran_pre.md b/metadata-ingestion/docs/sources/fivetran/fivetran_pre.md
@@ -14,6 +14,34 @@ This source extracts the following:
 3. Once initial sync up of your fivetran platform connector is done, you need to provide the fivetran platform connector's destination platform and its configuration in the recipe.
 4. We expect our users to enable automatic schema updates (default) in fivetran platform connector configured for DataHub, this ensures latest schema changes are applied and avoids inconsistency data syncs.
 
+### Database and Schema Name Handling
+
+The Fivetran source uses **quoted identifiers** for database and schema names to properly handle special characters and case-sensitive names. This follows Snowflake's quoted identifier convention, which is then transpiled to the target database dialect (Snowflake, BigQuery, or Databricks).
+
+**Important Notes:**
+
+- **Database names** are automatically wrapped in double quotes (e.g., `use database "my-database"`)
+- **Schema names** are automatically wrapped in double quotes (e.g., `"my-schema".table_name`)
+- This ensures proper handling of database and schema names containing:
+  - Hyphens (e.g., `my-database`)
+  - Spaces (e.g., `my database`)
+  - Special characters (e.g., `my.database`)
+  - Case-sensitive names (e.g., `MyDatabase`)
+
+**Migration Impact:**
+
+- If you have database or schema names with special characters, they will now be properly quoted in SQL queries
+- This change ensures consistent behavior across all supported destination platforms
+- No configuration changes are required - the quoting is handled automatically
+
+**Case Sensitivity Considerations:**
+
+- **Important**: In Snowflake, unquoted identifiers are automatically converted to uppercase when stored and resolved (e.g., `mydatabase` becomes `MYDATABASE`), while double-quoted identifiers preserve the exact case as entered (e.g., `"mydatabase"` stays as `mydatabase`). See [Snowflake's identifier documentation](https://docs.snowflake.com/en/sql-reference/identifiers-syntax#double-quoted-identifiers) for details.
+- **Backward Compatibility**: The system automatically handles backward compatibility for valid unquoted identifiers (identifiers containing only letters, numbers, and underscores). These identifiers are automatically uppercased before quoting to match Snowflake's behavior for unquoted identifiers. This means:
+  - If your database/schema name is a valid unquoted identifier (e.g., `fivetran_logs`, `MY_SCHEMA`), it will be automatically uppercased to match existing Snowflake objects created without quotes
+  - No configuration changes are required for standard identifiers (letters, numbers, underscores only)
+- **Recommended**: For best practices and to ensure consistency, maintain the exact case of your database and schema names in your configuration to match what's stored in Snowflake
+
 ## Concept mapping
 
 | Fivetran        | Datahub                                                                                               |
@@ -57,6 +85,7 @@ create or replace role fivetran_datahub;
 grant operate, usage on warehouse "<your-warehouse>" to role fivetran_datahub;
 
 // Grant access to view database and schema in which your log and metadata tables exist
+// Note: Database and schema names are automatically quoted, so use quoted identifiers if your names contain special characters
 grant usage on DATABASE "<fivetran-log-database>" to role fivetran_datahub;
 grant usage on SCHEMA "<fivetran-log-database>"."<fivetran-log-schema>" to role fivetran_datahub;
 

diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py
@@ -50,13 +50,39 @@ def _initialize_fivetran_variables(
                     snowflake_destination_config.get_sql_alchemy_url(),
                     **snowflake_destination_config.get_options(),
                 )
-                engine.execute(
-                    fivetran_log_query.use_database(
-                        snowflake_destination_config.database,
+
+                """
+                Special Handling for Snowflake Backward Compatibility:
+                We have migrated to using quoted identifiers for database and schema names.
+                However, we need to support backward compatibility for existing databases and schemas that were created with unquoted identifiers.
+                When an unquoted identifier us used, we automatically convert it to uppercase + quoted identifier (this is Snowflake's behavior to resolve the identifier).
+                unquoted identifier -> uppercase + quoted identifier -> Snowflake resolves the identifier
+                """
+                snowflake_database = (
+                    snowflake_destination_config.database.upper()
+                    if FivetranLogQuery._is_valid_unquoted_identifier(
+                        snowflake_destination_config.database
                     )
+                    else snowflake_destination_config.database
+                )
+                logger.info(
+                    f"Using snowflake database: {snowflake_database} (original: {snowflake_destination_config.database})"
+                )
+                engine.execute(fivetran_log_query.use_database(snowflake_database))
+
+                snowflake_schema = (
+                    snowflake_destination_config.log_schema.upper()
+                    if FivetranLogQuery._is_valid_unquoted_identifier(
+                        snowflake_destination_config.log_schema
+                    )
+                    else snowflake_destination_config.log_schema
+                )
+
+                logger.info(
+                    f"Using snowflake schema: {snowflake_schema} (original: {snowflake_destination_config.log_schema})"
                 )
                 fivetran_log_query.set_schema(
-                    snowflake_destination_config.log_schema,
+                    snowflake_schema,
                 )
                 fivetran_log_database = snowflake_destination_config.database
         elif destination_platform == "bigquery":

diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py
@@ -1,3 +1,4 @@
+import re
 from typing import List
 
 # Safeguards to prevent fetching massive amounts of data.
@@ -29,17 +30,60 @@ def __init__(self) -> None:
         # Select query db clause
         self.schema_clause: str = ""
 
+    @staticmethod
+    def _is_valid_unquoted_identifier(identifier: str) -> bool:
+        """
+        Check if an identifier can be used unquoted in Snowflake.
+
+        Snowflake unquoted identifiers must:
+        - Start with a letter (A-Z) or underscore (_)
+        - Contain only letters, numbers, and underscores
+        - Be uppercase (Snowflake auto-converts unquoted identifiers to uppercase)
+
+        Ref: https://docs.snowflake.com/en/sql-reference/identifiers-syntax#unquoted-identifiers
+        """
+        if not identifier:
+            return False
+
+        # Check if it's already quoted (starts and ends with double quotes)
+        if identifier.startswith('"') and identifier.endswith('"'):
+            return False
+
+        # Check if it starts with letter or underscore
+        if not (identifier[0].isalpha() or identifier[0] == "_"):
+            return False
+
+        # Check if it contains only alphanumeric characters and underscores
+        if not re.match(r"^[A-Za-z0-9_]+$", identifier):
+            return False
+
+        # For Snowflake, unquoted identifiers are case-insensitive and auto-converted to uppercase
+        # This means we have recieved an unquoted identifier, and we can convert it to quoted identifier with uppercase
+        return True
+
     def use_database(self, db_name: str) -> str:
-        return f"use database {db_name}"
+        """
+        Using Snowflake quoted identifiers convention
+        Ref: https://docs.snowflake.com/en/sql-reference/identifiers-syntax#double-quoted-identifiers
+
+        Add double quotes around an identifier
+        """
+        db_name = db_name.replace(
+            '"', '""'
+        )  # Replace double quotes with two double quotes to use the double quote character inside a quoted identifier
+        return f'use database "{db_name}"'
 
     def set_schema(self, schema_name: str) -> None:
         """
         Using Snowflake quoted identifiers convention
+        Ref: https://docs.snowflake.com/en/sql-reference/identifiers-syntax#double-quoted-identifiers
 
         Add double quotes around an identifier
         Use two quotes to use the double quote character inside a quoted identifier
         """
-        schema_name = schema_name.replace('"', '""')
+        schema_name = schema_name.replace(
+            '"', '""'
+        )  # Replace double quotes with two double quotes to use the double quote character inside a quoted identifier
         self.schema_clause = f'"{schema_name}".'
 
     def get_connectors_query(self) -> str: