Skip to content

Commit 7deb739

Browse files
authored
Add SqlCatalog _commit_table support (#265)
* sql commit * SqlCatalog _commit_table * better variable names * fallback to FOR UPDATE commit when engine.dialect.supports_sane_rowcount is False * remove stray print * wait * better logging
1 parent 2d30119 commit 7deb739

File tree

2 files changed

+165
-22
lines changed

2 files changed

+165
-22
lines changed

pyiceberg/catalog/sql.py

Lines changed: 110 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
union,
3232
update,
3333
)
34-
from sqlalchemy.exc import IntegrityError, OperationalError
34+
from sqlalchemy.exc import IntegrityError, NoResultFound, OperationalError
3535
from sqlalchemy.orm import (
3636
DeclarativeBase,
3737
Mapped,
@@ -48,6 +48,7 @@
4848
PropertiesUpdateSummary,
4949
)
5050
from pyiceberg.exceptions import (
51+
CommitFailedException,
5152
NamespaceAlreadyExistsError,
5253
NamespaceNotEmptyError,
5354
NoSuchNamespaceError,
@@ -59,7 +60,7 @@
5960
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
6061
from pyiceberg.schema import Schema
6162
from pyiceberg.serializers import FromInputFile
62-
from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table
63+
from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table, update_table_metadata
6364
from pyiceberg.table.metadata import new_table_metadata
6465
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
6566
from pyiceberg.typedef import EMPTY_DICT
@@ -268,16 +269,32 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
268269
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
269270
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
270271
with Session(self.engine) as session:
271-
res = session.execute(
272-
delete(IcebergTables).where(
273-
IcebergTables.catalog_name == self.name,
274-
IcebergTables.table_namespace == database_name,
275-
IcebergTables.table_name == table_name,
272+
if self.engine.dialect.supports_sane_rowcount:
273+
res = session.execute(
274+
delete(IcebergTables).where(
275+
IcebergTables.catalog_name == self.name,
276+
IcebergTables.table_namespace == database_name,
277+
IcebergTables.table_name == table_name,
278+
)
276279
)
277-
)
280+
if res.rowcount < 1:
281+
raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}")
282+
else:
283+
try:
284+
tbl = (
285+
session.query(IcebergTables)
286+
.with_for_update(of=IcebergTables)
287+
.filter(
288+
IcebergTables.catalog_name == self.name,
289+
IcebergTables.table_namespace == database_name,
290+
IcebergTables.table_name == table_name,
291+
)
292+
.one()
293+
)
294+
session.delete(tbl)
295+
except NoResultFound as e:
296+
raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") from e
278297
session.commit()
279-
if res.rowcount < 1:
280-
raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}")
281298

282299
def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
283300
"""Rename a fully classified table name.
@@ -301,18 +318,35 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
301318
raise NoSuchNamespaceError(f"Namespace does not exist: {to_database_name}")
302319
with Session(self.engine) as session:
303320
try:
304-
stmt = (
305-
update(IcebergTables)
306-
.where(
307-
IcebergTables.catalog_name == self.name,
308-
IcebergTables.table_namespace == from_database_name,
309-
IcebergTables.table_name == from_table_name,
321+
if self.engine.dialect.supports_sane_rowcount:
322+
stmt = (
323+
update(IcebergTables)
324+
.where(
325+
IcebergTables.catalog_name == self.name,
326+
IcebergTables.table_namespace == from_database_name,
327+
IcebergTables.table_name == from_table_name,
328+
)
329+
.values(table_namespace=to_database_name, table_name=to_table_name)
310330
)
311-
.values(table_namespace=to_database_name, table_name=to_table_name)
312-
)
313-
result = session.execute(stmt)
314-
if result.rowcount < 1:
315-
raise NoSuchTableError(f"Table does not exist: {from_table_name}")
331+
result = session.execute(stmt)
332+
if result.rowcount < 1:
333+
raise NoSuchTableError(f"Table does not exist: {from_table_name}")
334+
else:
335+
try:
336+
tbl = (
337+
session.query(IcebergTables)
338+
.with_for_update(of=IcebergTables)
339+
.filter(
340+
IcebergTables.catalog_name == self.name,
341+
IcebergTables.table_namespace == from_database_name,
342+
IcebergTables.table_name == from_table_name,
343+
)
344+
.one()
345+
)
346+
tbl.table_namespace = to_database_name
347+
tbl.table_name = to_table_name
348+
except NoResultFound as e:
349+
raise NoSuchTableError(f"Table does not exist: {from_table_name}") from e
316350
session.commit()
317351
except IntegrityError as e:
318352
raise TableAlreadyExistsError(f"Table {to_database_name}.{to_table_name} already exists") from e
@@ -329,8 +363,62 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
329363
330364
Raises:
331365
NoSuchTableError: If a table with the given identifier does not exist.
366+
CommitFailedException: If the commit failed.
332367
"""
333-
raise NotImplementedError
368+
identifier_tuple = self.identifier_to_tuple_without_catalog(
369+
tuple(table_request.identifier.namespace.root + [table_request.identifier.name])
370+
)
371+
current_table = self.load_table(identifier_tuple)
372+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
373+
base_metadata = current_table.metadata
374+
for requirement in table_request.requirements:
375+
requirement.validate(base_metadata)
376+
377+
updated_metadata = update_table_metadata(base_metadata, table_request.updates)
378+
if updated_metadata == base_metadata:
379+
# no changes, do nothing
380+
return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location)
381+
382+
# write new metadata
383+
new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1
384+
new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version)
385+
self._write_metadata(updated_metadata, current_table.io, new_metadata_location)
386+
387+
with Session(self.engine) as session:
388+
if self.engine.dialect.supports_sane_rowcount:
389+
stmt = (
390+
update(IcebergTables)
391+
.where(
392+
IcebergTables.catalog_name == self.name,
393+
IcebergTables.table_namespace == database_name,
394+
IcebergTables.table_name == table_name,
395+
IcebergTables.metadata_location == current_table.metadata_location,
396+
)
397+
.values(metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location)
398+
)
399+
result = session.execute(stmt)
400+
if result.rowcount < 1:
401+
raise CommitFailedException(f"Table has been updated by another process: {database_name}.{table_name}")
402+
else:
403+
try:
404+
tbl = (
405+
session.query(IcebergTables)
406+
.with_for_update(of=IcebergTables)
407+
.filter(
408+
IcebergTables.catalog_name == self.name,
409+
IcebergTables.table_namespace == database_name,
410+
IcebergTables.table_name == table_name,
411+
IcebergTables.metadata_location == current_table.metadata_location,
412+
)
413+
.one()
414+
)
415+
tbl.metadata_location = new_metadata_location
416+
tbl.previous_metadata_location = current_table.metadata_location
417+
except NoResultFound as e:
418+
raise CommitFailedException(f"Table has been updated by another process: {database_name}.{table_name}") from e
419+
session.commit()
420+
421+
return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location)
334422

335423
def _namespace_exists(self, identifier: Union[str, Identifier]) -> bool:
336424
namespace = self.identifier_to_database(identifier)

tests/catalog/test_sql.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
SortOrder,
4343
)
4444
from pyiceberg.transforms import IdentityTransform
45+
from pyiceberg.types import IntegerType
4546

4647

4748
@pytest.fixture(name="warehouse", scope="session")
@@ -87,6 +88,19 @@ def catalog_sqlite(warehouse: Path) -> Generator[SqlCatalog, None, None]:
8788
catalog.destroy_tables()
8889

8990

91+
@pytest.fixture(scope="module")
92+
def catalog_sqlite_without_rowcount(warehouse: Path) -> Generator[SqlCatalog, None, None]:
93+
props = {
94+
"uri": "sqlite:////tmp/sql-catalog.db",
95+
"warehouse": f"file://{warehouse}",
96+
}
97+
catalog = SqlCatalog("test_sql_catalog", **props)
98+
catalog.engine.dialect.supports_sane_rowcount = False
99+
catalog.create_tables()
100+
yield catalog
101+
catalog.destroy_tables()
102+
103+
90104
def test_creation_with_no_uri() -> None:
91105
with pytest.raises(NoSuchPropertyException):
92106
SqlCatalog("test_ddb_catalog", not_uri="unused")
@@ -305,6 +319,7 @@ def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_neste
305319
[
306320
lazy_fixture('catalog_memory'),
307321
lazy_fixture('catalog_sqlite'),
322+
lazy_fixture('catalog_sqlite_without_rowcount'),
308323
],
309324
)
310325
def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None:
@@ -322,6 +337,7 @@ def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, random_ide
322337
[
323338
lazy_fixture('catalog_memory'),
324339
lazy_fixture('catalog_sqlite'),
340+
lazy_fixture('catalog_sqlite_without_rowcount'),
325341
],
326342
)
327343
def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None:
@@ -341,6 +357,7 @@ def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_neste
341357
[
342358
lazy_fixture('catalog_memory'),
343359
lazy_fixture('catalog_sqlite'),
360+
lazy_fixture('catalog_sqlite_without_rowcount'),
344361
],
345362
)
346363
def test_drop_table_that_does_not_exist(catalog: SqlCatalog, random_identifier: Identifier) -> None:
@@ -353,6 +370,7 @@ def test_drop_table_that_does_not_exist(catalog: SqlCatalog, random_identifier:
353370
[
354371
lazy_fixture('catalog_memory'),
355372
lazy_fixture('catalog_sqlite'),
373+
lazy_fixture('catalog_sqlite_without_rowcount'),
356374
],
357375
)
358376
def test_rename_table(
@@ -377,6 +395,7 @@ def test_rename_table(
377395
[
378396
lazy_fixture('catalog_memory'),
379397
lazy_fixture('catalog_sqlite'),
398+
lazy_fixture('catalog_sqlite_without_rowcount'),
380399
],
381400
)
382401
def test_rename_table_from_self_identifier(
@@ -403,6 +422,7 @@ def test_rename_table_from_self_identifier(
403422
[
404423
lazy_fixture('catalog_memory'),
405424
lazy_fixture('catalog_sqlite'),
425+
lazy_fixture('catalog_sqlite_without_rowcount'),
406426
],
407427
)
408428
def test_rename_table_to_existing_one(
@@ -425,6 +445,7 @@ def test_rename_table_to_existing_one(
425445
[
426446
lazy_fixture('catalog_memory'),
427447
lazy_fixture('catalog_sqlite'),
448+
lazy_fixture('catalog_sqlite_without_rowcount'),
428449
],
429450
)
430451
def test_rename_missing_table(catalog: SqlCatalog, random_identifier: Identifier, another_random_identifier: Identifier) -> None:
@@ -439,6 +460,7 @@ def test_rename_missing_table(catalog: SqlCatalog, random_identifier: Identifier
439460
[
440461
lazy_fixture('catalog_memory'),
441462
lazy_fixture('catalog_sqlite'),
463+
lazy_fixture('catalog_sqlite_without_rowcount'),
442464
],
443465
)
444466
def test_rename_table_to_missing_namespace(
@@ -664,3 +686,36 @@ def test_update_namespace_properties(catalog: SqlCatalog, database_name: str) ->
664686
else:
665687
assert k in update_report.removed
666688
assert "updated test description" == catalog.load_namespace_properties(database_name)["comment"]
689+
690+
691+
@pytest.mark.parametrize(
692+
'catalog',
693+
[
694+
lazy_fixture('catalog_memory'),
695+
lazy_fixture('catalog_sqlite'),
696+
lazy_fixture('catalog_sqlite_without_rowcount'),
697+
],
698+
)
699+
def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None:
700+
database_name, _table_name = random_identifier
701+
catalog.create_namespace(database_name)
702+
table = catalog.create_table(random_identifier, table_schema_nested)
703+
704+
assert catalog._parse_metadata_version(table.metadata_location) == 0
705+
assert table.metadata.current_schema_id == 0
706+
707+
transaction = table.transaction()
708+
update = transaction.update_schema()
709+
update.add_column(path="b", field_type=IntegerType())
710+
update.commit()
711+
transaction.commit_transaction()
712+
713+
updated_table_metadata = table.metadata
714+
715+
assert catalog._parse_metadata_version(table.metadata_location) == 1
716+
assert updated_table_metadata.current_schema_id == 1
717+
assert len(updated_table_metadata.schemas) == 2
718+
new_schema = next(schema for schema in updated_table_metadata.schemas if schema.schema_id == 1)
719+
assert new_schema
720+
assert new_schema == update._apply()
721+
assert new_schema.find_field("b").field_type == IntegerType()

0 commit comments

Comments
 (0)