|
55 | 55 | )
|
56 | 56 | from data_diff.abcs.database_types import (
|
57 | 57 | Array,
|
| 58 | + ColType_UUID, |
| 59 | + FractionalType, |
58 | 60 | Struct,
|
59 | 61 | ColType,
|
60 | 62 | Integer,
|
|
74 | 76 | JSON,
|
75 | 77 | )
|
76 | 78 | from data_diff.abcs.mixins import Compilable
|
77 |
| -from data_diff.abcs.mixins import AbstractMixin_NormalizeValue |
78 | 79 |
|
79 | 80 | logger = logging.getLogger("database")
|
80 | 81 | cv_params = contextvars.ContextVar("params")
|
@@ -762,6 +763,95 @@ def to_string(self, s: str) -> str:
|
762 | 763 | def set_timezone_to_utc(self) -> str:
|
763 | 764 | "Provide SQL for setting the session timezone to UTC"
|
764 | 765 |
|
| 766 | + @abstractmethod |
| 767 | + def md5_as_int(self, s: str) -> str: |
| 768 | + "Provide SQL for computing md5 and returning an int" |
| 769 | + |
| 770 | + @abstractmethod |
| 771 | + def normalize_timestamp(self, value: str, coltype: TemporalType) -> str: |
| 772 | + """Creates an SQL expression, that converts 'value' to a normalized timestamp. |
| 773 | +
|
| 774 | + The returned expression must accept any SQL datetime/timestamp, and return a string. |
| 775 | +
|
| 776 | + Date format: ``YYYY-MM-DD HH:mm:SS.FFFFFF`` |
| 777 | +
|
| 778 | + Precision of dates should be rounded up/down according to coltype.rounds |
| 779 | + """ |
| 780 | + |
| 781 | + @abstractmethod |
| 782 | + def normalize_number(self, value: str, coltype: FractionalType) -> str: |
| 783 | + """Creates an SQL expression, that converts 'value' to a normalized number. |
| 784 | +
|
| 785 | + The returned expression must accept any SQL int/numeric/float, and return a string. |
| 786 | +
|
| 787 | + Floats/Decimals are expected in the format |
| 788 | + "I.P" |
| 789 | +
|
| 790 | + Where I is the integer part of the number (as many digits as necessary), |
| 791 | + and must be at least one digit (0). |
| 792 | + P is the fractional digits, the amount of which is specified with |
| 793 | + coltype.precision. Trailing zeroes may be necessary. |
| 794 | + If P is 0, the dot is omitted. |
| 795 | +
|
| 796 | + Note: We use 'precision' differently than most databases. For decimals, |
| 797 | + it's the same as ``numeric_scale``, and for floats, who use binary precision, |
| 798 | + it can be calculated as ``log10(2**numeric_precision)``. |
| 799 | + """ |
| 800 | + |
| 801 | + def normalize_boolean(self, value: str, _coltype: Boolean) -> str: |
| 802 | + """Creates an SQL expression, that converts 'value' to either '0' or '1'.""" |
| 803 | + return self.to_string(value) |
| 804 | + |
| 805 | + def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str: |
| 806 | + """Creates an SQL expression, that strips uuids of artifacts like whitespace.""" |
| 807 | + if isinstance(coltype, String_UUID): |
| 808 | + return f"TRIM({value})" |
| 809 | + return self.to_string(value) |
| 810 | + |
| 811 | + def normalize_json(self, value: str, _coltype: JSON) -> str: |
| 812 | + """Creates an SQL expression, that converts 'value' to its minified json string representation.""" |
| 813 | + return self.to_string(value) |
| 814 | + |
| 815 | + def normalize_array(self, value: str, _coltype: Array) -> str: |
| 816 | + """Creates an SQL expression, that serialized an array into a JSON string.""" |
| 817 | + return self.to_string(value) |
| 818 | + |
| 819 | + def normalize_struct(self, value: str, _coltype: Struct) -> str: |
| 820 | + """Creates an SQL expression, that serialized a typed struct into a JSON string.""" |
| 821 | + return self.to_string(value) |
| 822 | + |
| 823 | + def normalize_value_by_type(self, value: str, coltype: ColType) -> str: |
| 824 | + """Creates an SQL expression, that converts 'value' to a normalized representation. |
| 825 | +
|
| 826 | + The returned expression must accept any SQL value, and return a string. |
| 827 | +
|
| 828 | + The default implementation dispatches to a method according to `coltype`: |
| 829 | +
|
| 830 | + :: |
| 831 | +
|
| 832 | + TemporalType -> normalize_timestamp() |
| 833 | + FractionalType -> normalize_number() |
| 834 | + *else* -> to_string() |
| 835 | +
|
| 836 | + (`Integer` falls in the *else* category) |
| 837 | +
|
| 838 | + """ |
| 839 | + if isinstance(coltype, TemporalType): |
| 840 | + return self.normalize_timestamp(value, coltype) |
| 841 | + elif isinstance(coltype, FractionalType): |
| 842 | + return self.normalize_number(value, coltype) |
| 843 | + elif isinstance(coltype, ColType_UUID): |
| 844 | + return self.normalize_uuid(value, coltype) |
| 845 | + elif isinstance(coltype, Boolean): |
| 846 | + return self.normalize_boolean(value, coltype) |
| 847 | + elif isinstance(coltype, JSON): |
| 848 | + return self.normalize_json(value, coltype) |
| 849 | + elif isinstance(coltype, Array): |
| 850 | + return self.normalize_array(value, coltype) |
| 851 | + elif isinstance(coltype, Struct): |
| 852 | + return self.normalize_struct(value, coltype) |
| 853 | + return self.to_string(value) |
| 854 | + |
765 | 855 | def optimizer_hints(self, hints: str) -> str:
|
766 | 856 | return f"/*+ {hints} */ "
|
767 | 857 |
|
@@ -960,10 +1050,7 @@ def _refine_coltypes(
|
960 | 1050 | if not text_columns:
|
961 | 1051 | return
|
962 | 1052 |
|
963 |
| - if isinstance(self.dialect, AbstractMixin_NormalizeValue): |
964 |
| - fields = [Code(self.dialect.normalize_uuid(self.dialect.quote(c), String_UUID())) for c in text_columns] |
965 |
| - else: |
966 |
| - fields = this[text_columns] |
| 1053 | + fields = [Code(self.dialect.normalize_uuid(self.dialect.quote(c), String_UUID())) for c in text_columns] |
967 | 1054 |
|
968 | 1055 | samples_by_row = self.query(
|
969 | 1056 | table(*table_path).select(*fields).where(Code(where) if where else SKIP).limit(sample_size), list
|
|
0 commit comments