From e202861e4de46a172d8ff802d990b77d619a40c3 Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Wed, 19 Mar 2025 21:47:40 +0100
Subject: [PATCH 01/10] Extract rich text from blocks, wip

---
 jsondoc/__init__.py          |   1 +
 jsondoc/extract_rich_text.py | 163 +++++++++++++++++++++++++++++++++++
 jsondoc/utils.py             |  10 ++-
 3 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 jsondoc/__init__.py
 create mode 100644 jsondoc/extract_rich_text.py

diff --git a/jsondoc/__init__.py b/jsondoc/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/jsondoc/__init__.py
@@ -0,0 +1 @@
+
diff --git a/jsondoc/extract_rich_text.py b/jsondoc/extract_rich_text.py
new file mode 100644
index 0000000..493216a
--- /dev/null
+++ b/jsondoc/extract_rich_text.py
@@ -0,0 +1,163 @@
+from typing import Dict, List, Optional, Union
+
+from pydantic import BaseModel
+
+from jsondoc.convert.utils import block_supports_rich_text, get_rich_text_from_block
+from jsondoc.models.block.base import BlockBase
+from jsondoc.models.block.types.rich_text.base import RichTextBase
+from jsondoc.models.page import Page
+
+
+class BackRef(BaseModel):
+    block_id: str
+    begin_idx: int
+    end_idx: int
+
+
+class TextWithBackref(BaseModel):
+    text: str
+    backrefs: list[BackRef]
+
+
+def extract_rich_text_from_page(
+    page: Page, include_annotations: bool = False
+) -> Dict[str, Union[str, List[Dict]]]:
+    """
+    Extract all rich text content from a JSON-DOC page.
+
+    Args:
+        page: A JSON-DOC Page object
+        include_annotations: If True, includes formatting info (bold, italic, etc.) in the output
+
+    Returns:
+        A dictionary containing:
+        - 'title': The page title text
+        - 'content': A list of text content from all blocks, each item is either:
+          - A string (if include_annotations=False)
+          - A dict with 'text' and 'annotations' (if include_annotations=True)
+    """
+    result = {"title": "", "content": []}
+
+    # Extract title
+    if page.properties.title and page.properties.title.title:
+        title_texts = []
+        for rich_text in page.properties.title.title:
+            title_texts.append(rich_text.plain_text)
+        result["title"] = "".join(title_texts)
+
+    # Process all blocks recursively
+    result["content"] = extract_rich_text_from_blocks(
+        page.children, include_annotations
+    )
+
+    return result
+
+
+def _process_rich_text_items(rich_text_list, include_annotations, result):
+    """
+    Helper function to process a list of rich text items and append them to the result.
+
+    Args:
+        rich_text_list: List of rich text items to process
+        include_annotations: Whether to include formatting annotations
+        result: The result list to append items to
+    """
+    if not rich_text_list:
+        return
+
+    for rich_text in rich_text_list:
+        if include_annotations:
+            result.append(
+                {
+                    "text": rich_text.plain_text,
+                    "annotations": rich_text.annotations.model_dump()
+                    if hasattr(rich_text, "annotations")
+                    else {},
+                    "href": rich_text.href if hasattr(rich_text, "href") else None,
+                }
+            )
+        else:
+            result.append(rich_text.plain_text)
+
+
+def extract_rich_text_from_blocks(
+    blocks: List[BlockBase], include_annotations: bool = False
+) -> List[Union[str, Dict]]:
+    """
+    Extract rich text content from a list of blocks recursively.
+
+    Args:
+        blocks: List of BlockBase objects
+        include_annotations: If True, includes formatting info in the output
+
+    Returns:
+        List of text content, either as strings or annotation dictionaries
+    """
+    result = []
+
+    for block in blocks:
+        # Extract rich text if the block supports it
+        if block_supports_rich_text(block):
+            try:
+                rich_text_list = get_rich_text_from_block(block)
+                _process_rich_text_items(rich_text_list, include_annotations, result)
+            except ValueError:
+                # Block doesn't support rich text (shouldn't happen due to our check)
+                pass
+
+        # Extract captions from blocks that support them
+        if block.type == "image" and hasattr(block.image, "caption"):
+            _process_rich_text_items(block.image.caption, include_annotations, result)
+        elif block.type == "code" and hasattr(block.code, "caption"):
+            _process_rich_text_items(block.code.caption, include_annotations, result)
+
+        # Process child blocks recursively
+        if hasattr(block, "children") and block.children:
+            child_content = extract_rich_text_from_blocks(
+                block.children, include_annotations
+            )
+            result.extend(child_content)
+
+        # Handle special blocks like tables that have rich text in different structure
+        if block.type == "table_row" and hasattr(block.table_row, "cells"):
+            for cell in block.table_row.cells:
+                if isinstance(cell, list):
+                    _process_rich_text_items(cell, include_annotations, result)
+
+    return result
+
+
+def extract_plain_text_from_page(page: Page) -> str:
+    """
+    Extract all plain text content from a JSON-DOC page and return it as a single string.
+
+    Args:
+        page: A JSON-DOC Page object
+
+    Returns:
+        A string containing all the text content from the page
+    """
+    extracted = extract_rich_text_from_page(page, include_annotations=False)
+
+    # Join title and content with appropriate separators
+    result = []
+    if extracted["title"]:
+        result.append(extracted["title"])
+
+    if extracted["content"]:
+        # Filter out empty strings and join with spaces
+        content_text = " ".join([item for item in extracted["content"] if item])
+        if content_text:
+            result.append(content_text)
+
+    return "\n\n".join(result)
+
+
+def extract_text_from_jsondoc_page_with_block_backref(
+    page: Page,
+) -> list[TextWithBackref]:
+    """
+    Extract rich text from jsondoc data.
+    """
+    rich_text = extract_rich_text_from_page(page)
+    return rich_text
diff --git a/jsondoc/utils.py b/jsondoc/utils.py
index 38eefdb..8d0548c 100644
--- a/jsondoc/utils.py
+++ b/jsondoc/utils.py
@@ -6,6 +6,8 @@
 from contextlib import contextmanager
 from datetime import datetime, timezone
 
+from jsondoc.models.block.base import CreatedBy
+
 ARBITRARY_JSON_SCHEMA_OBJECT = {
     "type": "object",
     "properties": {},
@@ -188,9 +190,13 @@ def set_field_recursive(obj: any, field_name: str, value: any) -> None:
     #             set_field_recursive(v, field_name, value)
 
 
-def set_created_by(obj: any, created_by: str) -> None:
+def set_created_by(obj: any, created_by: str | CreatedBy) -> None:
     """
     Recursively sets the 'created_by' field to the given value in the given object.
     """
-    assert isinstance(created_by, str)
+    assert isinstance(created_by, (str, CreatedBy))
+
+    if isinstance(created_by, str):
+        created_by = CreatedBy(id=created_by, object="user")
+
     set_field_recursive(obj, "created_by", created_by)

From a31df579edfae094bed8d890aaa474bdaa0e3569 Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Wed, 19 Mar 2025 22:12:43 +0100
Subject: [PATCH 02/10] Add TextWithBackref

---
 jsondoc/extract_rich_text.py | 167 ++++++++++++++++++++++++++++++++---
 1 file changed, 157 insertions(+), 10 deletions(-)

diff --git a/jsondoc/extract_rich_text.py b/jsondoc/extract_rich_text.py
index 493216a..c7a7c9b 100644
--- a/jsondoc/extract_rich_text.py
+++ b/jsondoc/extract_rich_text.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Union
 
 from pydantic import BaseModel
 
@@ -19,22 +19,165 @@ class TextWithBackref(BaseModel):
     backrefs: list[BackRef]
 
 
-def extract_rich_text_from_page(
+def extract_text_with_backref_from_page(
     page: Page, include_annotations: bool = False
-) -> Dict[str, Union[str, List[Dict]]]:
+) -> TextWithBackref:
+    """
+    Extract all rich text content from a JSON-DOC page as a single string
+    with backrefs tracking the block origins.
+
+    Args:
+        page: A JSON-DOC Page object
+        include_annotations: If True, includes formatting info (not used in backref tracking)
+
+    Returns:
+        TextWithBackref: Object containing concatenated text and backrefs
+    """
+    concat_text = ""
+    backrefs = []
+
+    # Extract title
+    title_text = ""
+    if page.properties.title and page.properties.title.title:
+        for rich_text in page.properties.title.title:
+            title_text += rich_text.plain_text
+
+    if title_text:
+        begin_idx = len(concat_text)
+        concat_text += title_text
+        end_idx = len(concat_text)
+        # Add a backref for the page title using the page's ID
+        backrefs.append(BackRef(block_id=page.id, begin_idx=begin_idx, end_idx=end_idx))
+        # Add a newline after the title
+        concat_text += "\n\n"
+
+    # Process all blocks recursively and collect their text with backrefs
+    blocks_with_text = _extract_blocks_with_text(page.children, include_annotations)
+
+    # Add all blocks to the concatenated text with their respective backrefs
+    for block_id, block_text in blocks_with_text:
+        if block_text:
+            begin_idx = len(concat_text)
+            concat_text += block_text
+            end_idx = len(concat_text)
+
+            backrefs.append(
+                BackRef(block_id=block_id, begin_idx=begin_idx, end_idx=end_idx)
+            )
+
+            # Add a space after each block
+            concat_text += " "
+
+    return TextWithBackref(text=concat_text.strip(), backrefs=backrefs)
+
+
+def _extract_blocks_with_text(
+    blocks: List[BlockBase], include_annotations: bool = False
+) -> List[tuple[str, str]]:
+    """
+    Extract text from blocks and return a list of (block_id, text) tuples.
+
+    Args:
+        blocks: List of blocks to process
+        include_annotations: Whether to include annotations (not used in this implementation)
+
+    Returns:
+        List of (block_id, text) tuples
+    """
+    result = []
+
+    for block in blocks:
+        # Get text from the current block
+        block_text = _extract_text_from_single_block(block)
+        if block_text:
+            result.append((block.id, block_text))
+
+        # Process child blocks recursively
+        if hasattr(block, "children") and block.children:
+            child_results = _extract_blocks_with_text(
+                block.children, include_annotations
+            )
+            result.extend(child_results)
+
+    return result
+
+
+def _extract_text_from_single_block(block: BlockBase) -> str:
+    """
+    Extract text from a single block without processing its children.
+
+    Args:
+        block: The block to extract text from
+
+    Returns:
+        The text content of the block
+    """
+    result = []
+
+    # Extract rich text if the block supports it
+    if block_supports_rich_text(block):
+        try:
+            rich_text_list = get_rich_text_from_block(block)
+            for rich_text in rich_text_list:
+                result.append(rich_text.plain_text)
+        except ValueError:
+            pass
+
+    # Extract captions from blocks that support them
+    if block.type == "image" and hasattr(block.image, "caption"):
+        for caption_text in block.image.caption:
+            result.append(caption_text.plain_text)
+    elif block.type == "code" and hasattr(block.code, "caption"):
+        for caption_text in block.code.caption:
+            result.append(caption_text.plain_text)
+
+    # Handle special blocks like tables
+    if block.type == "table_row" and hasattr(block.table_row, "cells"):
+        for cell in block.table_row.cells:
+            if isinstance(cell, list):
+                for item in cell:
+                    if hasattr(item, "plain_text"):
+                        result.append(item.plain_text)
+
+    return " ".join(result)
+
+
+def _extract_text_from_block(
+    block: BlockBase, include_annotations: bool = False
+) -> str:
+    """
+    Extract all text from a single block, including its children.
+
+    Args:
+        block: The block to extract text from
+        include_annotations: Whether to include annotations (not used in this implementation)
+
+    Returns:
+        A string with all text from the block
+    """
+    # Extract text from the current block
+    result = [_extract_text_from_single_block(block)]
+
+    # Process child blocks recursively
+    if hasattr(block, "children") and block.children:
+        for child in block.children:
+            child_text = _extract_text_from_block(child, include_annotations)
+            if child_text:
+                result.append(child_text)
+
+    return " ".join([text for text in result if text])
+
+
+def extract_rich_text_from_page(page: Page, include_annotations: bool = False) -> Dict:
     """
     Extract all rich text content from a JSON-DOC page.
 
     Args:
         page: A JSON-DOC Page object
-        include_annotations: If True, includes formatting info (bold, italic, etc.) in the output
+        include_annotations: If True, includes formatting info in the output
 
     Returns:
-        A dictionary containing:
-        - 'title': The page title text
-        - 'content': A list of text content from all blocks, each item is either:
-          - A string (if include_annotations=False)
-          - A dict with 'text' and 'annotations' (if include_annotations=True)
+        Dictionary containing title and content lists
     """
     result = {"title": "", "content": []}
 
@@ -53,7 +196,11 @@ def extract_rich_text_from_page(
     return result
 
 
-def _process_rich_text_items(rich_text_list, include_annotations, result):
+def _process_rich_text_items(
+    rich_text_list: list[RichTextBase],
+    include_annotations: bool,
+    result: list,
+) -> None:
     """
     Helper function to process a list of rich text items and append them to the result.
 

From 666403879273d538c1186f4ffc918dbd398b733e Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Wed, 19 Mar 2025 22:19:13 +0100
Subject: [PATCH 03/10] Checkpoint

---
 .gitignore                                    |   3 +-
 jsondoc/{utils.py => utils/__init__.py}       |   0
 .../text_with_backref.py}                     | 148 +-----------------
 3 files changed, 5 insertions(+), 146 deletions(-)
 rename jsondoc/{utils.py => utils/__init__.py} (100%)
 rename jsondoc/{extract_rich_text.py => utils/text_with_backref.py} (51%)

diff --git a/.gitignore b/.gitignore
index 26fb22e..534ee83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@
 __pycache__
 build/
 *.docx
-*.pptx
\ No newline at end of file
+*.pptx
+scratch/
\ No newline at end of file
diff --git a/jsondoc/utils.py b/jsondoc/utils/__init__.py
similarity index 100%
rename from jsondoc/utils.py
rename to jsondoc/utils/__init__.py
diff --git a/jsondoc/extract_rich_text.py b/jsondoc/utils/text_with_backref.py
similarity index 51%
rename from jsondoc/extract_rich_text.py
rename to jsondoc/utils/text_with_backref.py
index c7a7c9b..d77c8bc 100644
--- a/jsondoc/extract_rich_text.py
+++ b/jsondoc/utils/text_with_backref.py
@@ -14,14 +14,14 @@ class BackRef(BaseModel):
     end_idx: int
 
 
-class TextWithBackref(BaseModel):
+class TextWithBackrefs(BaseModel):
     text: str
     backrefs: list[BackRef]
 
 
 def extract_text_with_backref_from_page(
     page: Page, include_annotations: bool = False
-) -> TextWithBackref:
+) -> TextWithBackrefs:
     """
     Extract all rich text content from a JSON-DOC page as a single string
     with backrefs tracking the block origins.
@@ -68,7 +68,7 @@ def extract_text_with_backref_from_page(
             # Add a space after each block
             concat_text += " "
 
-    return TextWithBackref(text=concat_text.strip(), backrefs=backrefs)
+    return TextWithBackrefs(text=concat_text.strip(), backrefs=backrefs)
 
 
 def _extract_blocks_with_text(
@@ -166,145 +166,3 @@ def _extract_text_from_block(
                 result.append(child_text)
 
     return " ".join([text for text in result if text])
-
-
-def extract_rich_text_from_page(page: Page, include_annotations: bool = False) -> Dict:
-    """
-    Extract all rich text content from a JSON-DOC page.
-
-    Args:
-        page: A JSON-DOC Page object
-        include_annotations: If True, includes formatting info in the output
-
-    Returns:
-        Dictionary containing title and content lists
-    """
-    result = {"title": "", "content": []}
-
-    # Extract title
-    if page.properties.title and page.properties.title.title:
-        title_texts = []
-        for rich_text in page.properties.title.title:
-            title_texts.append(rich_text.plain_text)
-        result["title"] = "".join(title_texts)
-
-    # Process all blocks recursively
-    result["content"] = extract_rich_text_from_blocks(
-        page.children, include_annotations
-    )
-
-    return result
-
-
-def _process_rich_text_items(
-    rich_text_list: list[RichTextBase],
-    include_annotations: bool,
-    result: list,
-) -> None:
-    """
-    Helper function to process a list of rich text items and append them to the result.
-
-    Args:
-        rich_text_list: List of rich text items to process
-        include_annotations: Whether to include formatting annotations
-        result: The result list to append items to
-    """
-    if not rich_text_list:
-        return
-
-    for rich_text in rich_text_list:
-        if include_annotations:
-            result.append(
-                {
-                    "text": rich_text.plain_text,
-                    "annotations": rich_text.annotations.model_dump()
-                    if hasattr(rich_text, "annotations")
-                    else {},
-                    "href": rich_text.href if hasattr(rich_text, "href") else None,
-                }
-            )
-        else:
-            result.append(rich_text.plain_text)
-
-
-def extract_rich_text_from_blocks(
-    blocks: List[BlockBase], include_annotations: bool = False
-) -> List[Union[str, Dict]]:
-    """
-    Extract rich text content from a list of blocks recursively.
-
-    Args:
-        blocks: List of BlockBase objects
-        include_annotations: If True, includes formatting info in the output
-
-    Returns:
-        List of text content, either as strings or annotation dictionaries
-    """
-    result = []
-
-    for block in blocks:
-        # Extract rich text if the block supports it
-        if block_supports_rich_text(block):
-            try:
-                rich_text_list = get_rich_text_from_block(block)
-                _process_rich_text_items(rich_text_list, include_annotations, result)
-            except ValueError:
-                # Block doesn't support rich text (shouldn't happen due to our check)
-                pass
-
-        # Extract captions from blocks that support them
-        if block.type == "image" and hasattr(block.image, "caption"):
-            _process_rich_text_items(block.image.caption, include_annotations, result)
-        elif block.type == "code" and hasattr(block.code, "caption"):
-            _process_rich_text_items(block.code.caption, include_annotations, result)
-
-        # Process child blocks recursively
-        if hasattr(block, "children") and block.children:
-            child_content = extract_rich_text_from_blocks(
-                block.children, include_annotations
-            )
-            result.extend(child_content)
-
-        # Handle special blocks like tables that have rich text in different structure
-        if block.type == "table_row" and hasattr(block.table_row, "cells"):
-            for cell in block.table_row.cells:
-                if isinstance(cell, list):
-                    _process_rich_text_items(cell, include_annotations, result)
-
-    return result
-
-
-def extract_plain_text_from_page(page: Page) -> str:
-    """
-    Extract all plain text content from a JSON-DOC page and return it as a single string.
-
-    Args:
-        page: A JSON-DOC Page object
-
-    Returns:
-        A string containing all the text content from the page
-    """
-    extracted = extract_rich_text_from_page(page, include_annotations=False)
-
-    # Join title and content with appropriate separators
-    result = []
-    if extracted["title"]:
-        result.append(extracted["title"])
-
-    if extracted["content"]:
-        # Filter out empty strings and join with spaces
-        content_text = " ".join([item for item in extracted["content"] if item])
-        if content_text:
-            result.append(content_text)
-
-    return "\n\n".join(result)
-
-
-def extract_text_from_jsondoc_page_with_block_backref(
-    page: Page,
-) -> list[TextWithBackref]:
-    """
-    Extract rich text from jsondoc data.
-    """
-    rich_text = extract_rich_text_from_page(page)
-    return rich_text

From 5c76457e23182d309b5168a968685f5bd9921eb1 Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Wed, 19 Mar 2025 22:31:51 +0100
Subject: [PATCH 04/10] Checkpoint

---
 jsondoc/utils/text_with_backref.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/jsondoc/utils/text_with_backref.py b/jsondoc/utils/text_with_backref.py
index d77c8bc..cf7270d 100644
--- a/jsondoc/utils/text_with_backref.py
+++ b/jsondoc/utils/text_with_backref.py
@@ -9,6 +9,7 @@
 
 
 class BackRef(BaseModel):
+    plain_text: str
     block_id: str
     begin_idx: int
     end_idx: int
@@ -47,7 +48,14 @@ def extract_text_with_backref_from_page(
         concat_text += title_text
         end_idx = len(concat_text)
         # Add a backref for the page title using the page's ID
-        backrefs.append(BackRef(block_id=page.id, begin_idx=begin_idx, end_idx=end_idx))
+        backrefs.append(
+            BackRef(
+                plain_text=title_text,
+                block_id=page.id,
+                begin_idx=begin_idx,
+                end_idx=end_idx,
+            )
+        )
         # Add a newline after the title
         concat_text += "\n\n"
 
@@ -62,7 +70,12 @@ def extract_text_with_backref_from_page(
             end_idx = len(concat_text)
 
             backrefs.append(
-                BackRef(block_id=block_id, begin_idx=begin_idx, end_idx=end_idx)
+                BackRef(
+                    plain_text=block_text,
+                    block_id=block_id,
+                    begin_idx=begin_idx,
+                    end_idx=end_idx,
+                )
             )
 
             # Add a space after each block

From dc77945e05a883d001fce596b54ecedbfa49a2bb Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Thu, 20 Mar 2025 09:25:49 +0100
Subject: [PATCH 05/10] Extract blocks util

---
 jsondoc/utils/block.py | 56 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 jsondoc/utils/block.py

diff --git a/jsondoc/utils/block.py b/jsondoc/utils/block.py
new file mode 100644
index 0000000..7155048
--- /dev/null
+++ b/jsondoc/utils/block.py
@@ -0,0 +1,56 @@
+from typing import OrderedDict
+
+from jsondoc.models.block.base import BlockBase
+from jsondoc.models.page import Page
+
+
+def extract_blocks(
+    input_obj: Page | BlockBase | list[BlockBase],
+) -> dict[str, BlockBase]:
+    """
+    Creates a mapping of block IDs to Block objects from various input types.
+
+    Args:
+        input_obj: Can be either a Page object, a single Block object, or a list of Block objects
+
+    Returns:
+        A dictionary mapping block IDs (strings) to their corresponding Block objects
+    """
+    block_map: dict[str, BlockBase] = OrderedDict()
+
+    # Handle Page input
+    if isinstance(input_obj, Page):
+        # Process all blocks in the page
+        for block in input_obj.children:
+            _process_block_and_children(block, block_map)
+
+    # Handle single Block input
+    elif isinstance(input_obj, BlockBase):
+        _process_block_and_children(input_obj, block_map)
+
+    # Handle list of Blocks input
+    elif isinstance(input_obj, list):
+        for block in input_obj:
+            if isinstance(block, BlockBase):
+                _process_block_and_children(block, block_map)
+
+    return block_map
+
+
+def _process_block_and_children(
+    block: BlockBase, block_map: dict[str, BlockBase]
+) -> None:
+    """
+    Helper function to process a block and its children recursively, adding them to the block map.
+
+    Args:
+        block: The block to process
+        block_map: The dictionary mapping block IDs to Block objects
+    """
+    # Add the current block to the map
+    block_map[block.id] = block
+
+    # Process children recursively if they exist
+    if hasattr(block, "children") and block.children:
+        for child in block.children:
+            _process_block_and_children(child, block_map)

From 58355bc7ed72f84bbb42d91c7b2f47bc5ccd0223 Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Thu, 20 Mar 2025 09:32:41 +0100
Subject: [PATCH 06/10] Checkpoint

---
 jsondoc/utils/text_with_backref.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/jsondoc/utils/text_with_backref.py b/jsondoc/utils/text_with_backref.py
index cf7270d..2a80365 100644
--- a/jsondoc/utils/text_with_backref.py
+++ b/jsondoc/utils/text_with_backref.py
@@ -11,7 +11,7 @@
 class BackRef(BaseModel):
     plain_text: str
     block_id: str
-    begin_idx: int
+    start_idx: int
     end_idx: int
 
 
@@ -44,7 +44,7 @@ def extract_text_with_backref_from_page(
             title_text += rich_text.plain_text
 
     if title_text:
-        begin_idx = len(concat_text)
+        start_idx = len(concat_text)
         concat_text += title_text
         end_idx = len(concat_text)
         # Add a backref for the page title using the page's ID
@@ -52,7 +52,7 @@ def extract_text_with_backref_from_page(
             BackRef(
                 plain_text=title_text,
                 block_id=page.id,
-                begin_idx=begin_idx,
+                start_idx=start_idx,
                 end_idx=end_idx,
             )
         )
@@ -65,7 +65,7 @@ def extract_text_with_backref_from_page(
     # Add all blocks to the concatenated text with their respective backrefs
     for block_id, block_text in blocks_with_text:
         if block_text:
-            begin_idx = len(concat_text)
+            start_idx = len(concat_text)
             concat_text += block_text
             end_idx = len(concat_text)
 
@@ -73,7 +73,7 @@ def extract_text_with_backref_from_page(
                 BackRef(
                     plain_text=block_text,
                     block_id=block_id,
-                    begin_idx=begin_idx,
+                    start_idx=start_idx,
                     end_idx=end_idx,
                 )
             )

From 1ea814c559193c55ef43db4dae6f44d59b7715b0 Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Thu, 20 Mar 2025 10:07:47 +0100
Subject: [PATCH 07/10] get_intersecting_backrefs

---
 jsondoc/utils/text_with_backref.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/jsondoc/utils/text_with_backref.py b/jsondoc/utils/text_with_backref.py
index 2a80365..a4a337c 100644
--- a/jsondoc/utils/text_with_backref.py
+++ b/jsondoc/utils/text_with_backref.py
@@ -19,6 +19,26 @@ class TextWithBackrefs(BaseModel):
     text: str
     backrefs: list[BackRef]
 
+    def get_intersecting_backrefs(self, start_idx: int, end_idx: int) -> list[BackRef]:
+        """
+        Returns all backrefs that intersect with the given text range.
+
+        A backref intersects if any part of it overlaps with the range defined by start_idx and end_idx.
+        This happens when the backref starts before the end of the range AND ends after the start of the range.
+
+        Args:
+            start_idx: The starting index of the text range
+            end_idx: The ending index of the text range (exclusive)
+
+        Returns:
+            A list of BackRef objects that intersect with the given range
+        """
+        return [
+            backref
+            for backref in self.backrefs
+            if backref.start_idx < end_idx and backref.end_idx > start_idx
+        ]
+
 
 def extract_text_with_backref_from_page(
     page: Page, include_annotations: bool = False

From f0b0d99c857dae131fd941e51b34cc095b058cdd Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Thu, 20 Mar 2025 11:04:52 +0100
Subject: [PATCH 08/10] Handle unnecessary whitespace in html conversion

---
 jsondoc/convert/utils.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/jsondoc/convert/utils.py b/jsondoc/convert/utils.py
index 7056d7c..ab7c681 100644
--- a/jsondoc/convert/utils.py
+++ b/jsondoc/convert/utils.py
@@ -649,14 +649,27 @@ def _final_block_transformation(obj: BlockBase | str | RichTextBase):
         ensure_table_cell_count(obj)
     elif isinstance(obj, str):
         text_ = all_whitespace_re.sub(" ", obj)
+        if not text_.strip():
+            # Skip empty strings
+            return None
         return create_paragraph_block(text=text_)
     elif isinstance(obj, RichTextBase):
+        # if not obj.plain_text.strip():
+        #     # Skip empty rich text objects
+        #     return None
         new_obj_ = create_paragraph_block()
         new_obj_.paragraph.rich_text = [obj]
         return new_obj_
     elif isinstance(obj, PlaceholderBlockBase):
         # Make sure no placeholder blocks are left behind
         return None
+    # elif isinstance(obj, tuple(BLOCKS_WITH_RICH_TEXT)):
+    #     # Check for blocks that support rich text
+    #     rich_text = get_rich_text_from_block(obj)
+    #     if rich_text is not None:
+    #         # If the block has no rich text or only empty rich text, skip it
+    #         if not rich_text or all(not rt.plain_text.strip() for rt in rich_text):
+    #             return None
 
     return obj
 

From 7e9ba4cb47a7aeedfcf23003ced281c9ff8ed5ef Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Thu, 20 Mar 2025 22:58:16 +0100
Subject: [PATCH 09/10] Fix colspan issue

---
 jsondoc/convert/html.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/jsondoc/convert/html.py b/jsondoc/convert/html.py
index c212320..082af56 100644
--- a/jsondoc/convert/html.py
+++ b/jsondoc/convert/html.py
@@ -841,10 +841,15 @@ def convert_td(self, el, convert_as_inline):
         paragraph_block.rich_text will be extracted to form table_row.cells.
         """
         # Get colspan
-        colspan = el.get("colspan", 1)
+        colspan = el.get("colspan", "1")
         # Get rowspan
         # rowspan = el.get("rowspan", 1)
         # We need to come up with a much different way to handle rowspan
+        if not isinstance(colspan, int):
+            try:
+                colspan = int(colspan)
+            except ValueError:
+                colspan = 1
 
         next_objects = []
         if colspan > 1:

From e5d27b7ef5a61f0a30d4686fb68c1b9a2416ae51 Mon Sep 17 00:00:00 2001
From: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Date: Fri, 21 Mar 2025 16:17:08 +0100
Subject: [PATCH 10/10] Add TypeID generation for block and page ids, refactor
 HtmlToJsonDocConverter options to use Pydantic

---
 jsondoc/convert/html.py   | 120 +++++++++++++++++++++++---------------
 jsondoc/convert/utils.py  |  41 ++++++++-----
 jsondoc/utils/__init__.py |  15 ++++-
 pyproject.toml            |   1 +
 uv.lock                   |  23 ++++++++
 5 files changed, 138 insertions(+), 62 deletions(-)

diff --git a/jsondoc/convert/html.py b/jsondoc/convert/html.py
index 082af56..3548dd8 100644
--- a/jsondoc/convert/html.py
+++ b/jsondoc/convert/html.py
@@ -1,6 +1,6 @@
 import re
 from types import NoneType
-from typing import List, Union
+from typing import Callable, List, Union
 
 from bs4 import BeautifulSoup, Comment, Doctype, NavigableString
 from pydantic import BaseModel
@@ -45,7 +45,7 @@
 from jsondoc.models.page import Page
 from jsondoc.models.shared_definitions import Annotations
 from jsondoc.rules import is_block_child_allowed
-from jsondoc.utils import generate_id, get_current_time
+from jsondoc.utils import generate_block_id, get_current_time
 
 line_beginning_re = re.compile(r"^", re.MULTILINE)
 whitespace_re = re.compile(r"[\t ]+")
@@ -307,7 +307,9 @@ def reconcile_to_rich_text(
 
 
 def reconcile_to_block(
-    block: BlockBase, children: List[CHILDREN_TYPE]
+    block: BlockBase,
+    children: List[CHILDREN_TYPE],
+    typeid: bool = False,
 ) -> List[CHILDREN_TYPE]:
     """
     Given a block and a list of children,
@@ -350,7 +352,7 @@ def reconcile_to_block(
             # Get corresponding field from the block
             block_field = getattr(block, block_type)
             init_kwargs = {
-                "id": generate_id(),
+                "id": generate_block_id(typeid=typeid),
                 "created_time": child.created_time,
                 block_type: type(block_field)(),
             }
@@ -383,26 +385,20 @@ def reconcile_to_block(
 
 
 class HtmlToJsonDocConverter(object):
-    class DefaultOptions:
-        autolinks = True
-        code_language = ""
-        code_language_callback = None
-        convert = None
-        default_title = False
-        keep_inline_images_in = []
-        strip = None
-        force_page = False
-
-    class Options(DefaultOptions):
-        pass
+    class Options(BaseModel):
+        autolinks: bool = True
+        code_language: str = ""
+        code_language_callback: Callable | None = None
+        convert: Callable | None = None
+        default_title: bool = False
+        keep_inline_images_in: list[str] = []
+        strip: str | None = None
+        force_page: bool = False
+        typeid: bool = False
 
     def __init__(self, **options):
-        # Create an options dictionary. Use DefaultOptions as a base so that
-        # it doesn't have to be extended.
-        self.options = _todict(self.DefaultOptions)
-        self.options.update(_todict(self.Options))
-        self.options.update(options)
-        if self.options["strip"] is not None and self.options["convert"] is not None:
+        self.options = self.Options(**options)
+        if self.options.strip is not None and self.options.convert is not None:
             raise ValueError(
                 "You may specify either tags to strip or tags to convert, but not both."
             )
@@ -417,7 +413,7 @@ def convert_soup(self, soup: BeautifulSoup) -> Page | BlockBase | List[BlockBase
         is_page = self._is_soup_page(soup)
 
         ret = None
-        if is_page or self.options["force_page"]:
+        if is_page or self.options.force_page:
             title = self._get_html_title(soup)
             # Ensure that children is a list
             if not isinstance(children, list):
@@ -427,6 +423,7 @@ def convert_soup(self, soup: BeautifulSoup) -> Page | BlockBase | List[BlockBase
             ret = create_page(
                 title=title,
                 children=children,
+                typeid=self.options.typeid,
             )
         else:
             ret = children
@@ -526,7 +523,11 @@ def is_nested_node(el):
         if current_level_object is None:
             objects = children_objects
         elif isinstance(current_level_object, BlockBase):
-            objects = reconcile_to_block(current_level_object, children_objects)
+            objects = reconcile_to_block(
+                current_level_object,
+                children_objects,
+                typeid=self.options.typeid,
+            )
         elif isinstance(current_level_object, RichTextBase):
             objects = reconcile_to_rich_text(current_level_object, children_objects)
         else:
@@ -615,8 +616,8 @@ def process_text(self, el):
 
     def should_convert_tag(self, tag):
         tag = tag.lower()
-        strip = self.options["strip"]
-        convert = self.options["convert"]
+        strip = self.options.strip
+        convert = self.options.convert
         if strip is not None:
             return tag not in strip
         elif convert is not None:
@@ -629,7 +630,7 @@ def convert_a(self, el, convert_as_inline):
         return ConvertOutput(main_object=create_rich_text(url=href))
 
     convert_b = abstract_inline_conversion(
-        lambda self: Annotations(bold=True)  # 2 * self.options["strong_em_symbol"]
+        lambda self: Annotations(bold=True)  # 2 * self.options.strong_em_symbol
     )
 
     def convert_blockquote(self, el, convert_as_inline):
@@ -646,7 +647,11 @@ def convert_blockquote(self, el, convert_as_inline):
             return ConvertOutput(main_object=create_rich_text())
 
         # TODO: If text has newlines, split them and add 2, 3, ... lines as children
-        return ConvertOutput(main_object=create_quote_block())
+        return ConvertOutput(
+            main_object=create_quote_block(
+                typeid=self.options.typeid,
+            )
+        )
 
     def convert_br(self, el, convert_as_inline):
         if convert_as_inline:
@@ -683,40 +688,48 @@ def convert_h1(self, el, convert_as_inline):
         if convert_as_inline:
             return ConvertOutput(main_object=create_rich_text())
 
-        return ConvertOutput(main_object=create_h1_block())
+        return ConvertOutput(main_object=create_h1_block(typeid=self.options.typeid))
 
     def convert_h2(self, el, convert_as_inline):
         if convert_as_inline:
             return ConvertOutput(main_object=create_rich_text())
 
-        return ConvertOutput(main_object=create_h2_block())
+        return ConvertOutput(main_object=create_h2_block(typeid=self.options.typeid))
 
     def convert_h3(self, el, convert_as_inline):
         if convert_as_inline:
             return ConvertOutput(main_object=create_rich_text())
 
-        return ConvertOutput(main_object=create_h3_block())
+        return ConvertOutput(main_object=create_h3_block(typeid=self.options.typeid))
 
     def convert_h4(self, el, convert_as_inline):
         if convert_as_inline:
             return ConvertOutput(main_object=create_rich_text())
 
-        return ConvertOutput(main_object=create_paragraph_block())
+        return ConvertOutput(
+            main_object=create_paragraph_block(typeid=self.options.typeid)
+        )
 
     def convert_h5(self, el, convert_as_inline):
         if convert_as_inline:
             return ConvertOutput(main_object=create_rich_text())
 
-        return ConvertOutput(main_object=create_paragraph_block())
+        return ConvertOutput(
+            main_object=create_paragraph_block(typeid=self.options.typeid)
+        )
 
     def convert_h6(self, el, convert_as_inline):
         if convert_as_inline:
             return ConvertOutput(main_object=create_rich_text())
 
-        return ConvertOutput(main_object=create_paragraph_block())
+        return ConvertOutput(
+            main_object=create_paragraph_block(typeid=self.options.typeid)
+        )
 
     def convert_hr(self, el, convert_as_inline):
-        return ConvertOutput(main_object=create_divider_block())
+        return ConvertOutput(
+            main_object=create_divider_block(typeid=self.options.typeid)
+        )
 
     convert_i = convert_em
 
@@ -730,13 +743,14 @@ def convert_img(self, el, convert_as_inline):
         # title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
         if (
             convert_as_inline
-            and el.parent.name not in self.options["keep_inline_images_in"]
+            and el.parent.name not in self.options.keep_inline_images_in
         ):
             return alt
 
         return ConvertOutput(
             main_object=create_image_block(
                 url=src,
+                typeid=self.options.typeid,
                 # alt is not supported in JSON-DOC yet
                 # caption=alt,
             )
@@ -755,15 +769,21 @@ def convert_list(self, el, convert_as_inline):
     def convert_li(self, el, convert_as_inline):
         parent = el.parent
         if parent is not None and parent.name == "ol":
-            return ConvertOutput(main_object=create_numbered_list_item_block())
+            return ConvertOutput(
+                main_object=create_numbered_list_item_block(typeid=self.options.typeid)
+            )
         else:
-            return ConvertOutput(main_object=create_bullet_list_item_block())
+            return ConvertOutput(
+                main_object=create_bullet_list_item_block(typeid=self.options.typeid)
+            )
 
     def convert_p(self, el, convert_as_inline):
         if convert_as_inline:
             return ConvertOutput(main_object=create_rich_text())
 
-        return ConvertOutput(main_object=create_paragraph_block())
+        return ConvertOutput(
+            main_object=create_paragraph_block(typeid=self.options.typeid)
+        )
 
     def convert_pre(self, el, convert_as_inline):
         text = el.get_text()
@@ -771,12 +791,16 @@ def convert_pre(self, el, convert_as_inline):
         if not text:
             return None
 
-        code_language = self.options["code_language"]
+        code_language = self.options.code_language
 
-        if self.options["code_language_callback"]:
-            code_language = self.options["code_language_callback"](el) or code_language
+        if self.options.code_language_callback:
+            code_language = self.options.code_language_callback(el) or code_language
 
-        return ConvertOutput(main_object=create_code_block(language=code_language))
+        return ConvertOutput(
+            main_object=create_code_block(
+                language=code_language, typeid=self.options.typeid
+            )
+        )
 
     def convert_script(self, el, convert_as_inline):
         return None
@@ -793,19 +817,19 @@ def convert_style(self, el, convert_as_inline):
     # Notion does not have an alternative for sub and sup tags
     convert_sub = abstract_inline_conversion(
         lambda self: Annotations()
-        # self.options["sub_symbol"],
+        # self.options.sub_symbol,
     )
 
     convert_sup = abstract_inline_conversion(
         lambda self: Annotations()
-        # self.options["sup_symbol"],
+        # self.options.sup_symbol,
     )
 
     def convert_table(self, el, convert_as_inline):
         has_column_header = html_table_has_header_row(el)
         return ConvertOutput(
             main_object=create_table_block(
-                has_column_header=has_column_header,
+                has_column_header=has_column_header, typeid=self.options.typeid
             )
         )
 
@@ -868,7 +892,9 @@ def convert_tr(self, el, convert_as_inline):
         """
         Table row
         """
-        return ConvertOutput(main_object=create_table_row_block())
+        return ConvertOutput(
+            main_object=create_table_row_block(typeid=self.options.typeid)
+        )
 
 
 def html_to_jsondoc(html: str | bytes, **options) -> Page | BlockBase | List[BlockBase]:
diff --git a/jsondoc/convert/utils.py b/jsondoc/convert/utils.py
index ab7c681..a074e89 100644
--- a/jsondoc/convert/utils.py
+++ b/jsondoc/convert/utils.py
@@ -45,7 +45,7 @@
 from jsondoc.models.page import CreatedBy, LastEditedBy, Page, Parent, Properties, Title
 from jsondoc.models.shared_definitions import Annotations
 from jsondoc.rules import is_block_child_allowed
-from jsondoc.utils import generate_id, get_current_time
+from jsondoc.utils import generate_block_id, generate_page_id, get_current_time
 
 all_whitespace_re = re.compile(r"[\s]+")
 
@@ -136,10 +136,11 @@ def create_paragraph_block(
     id: str | None = None,
     created_time=None,
     metadata: dict | None = None,
+    typeid: bool = False,
     **kwargs,
 ) -> ParagraphBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
     if created_time is None:
         created_time = get_current_time()
 
@@ -160,10 +161,11 @@ def create_bullet_list_item_block(
     text: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
     **kwargs,
 ) -> BulletedListItemBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
     if created_time is None:
         created_time = get_current_time()
 
@@ -183,10 +185,11 @@ def create_numbered_list_item_block(
     text: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
     **kwargs,
 ) -> NumberedListItemBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
     if created_time is None:
         created_time = get_current_time()
 
@@ -207,10 +210,11 @@ def create_code_block(
     language: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
     **kwargs,
 ) -> CodeBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
     if created_time is None:
         created_time = get_current_time()
 
@@ -238,9 +242,10 @@ def create_code_block(
 def create_divider_block(
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
 ) -> DividerBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
     if created_time is None:
         created_time = get_current_time()
 
@@ -256,10 +261,11 @@ def create_h1_block(
     text: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
     **kwargs,
 ) -> Heading1Block:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
@@ -280,10 +286,11 @@ def create_h2_block(
     text: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
     **kwargs,
 ) -> Heading2Block:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
@@ -304,10 +311,11 @@ def create_h3_block(
     text: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
     **kwargs,
 ) -> Heading3Block:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
@@ -329,9 +337,10 @@ def create_image_block(
     caption: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
 ) -> ImageBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
@@ -355,10 +364,11 @@ def create_quote_block(
     text: str | None = None,
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
     **kwargs,
 ) -> QuoteBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
@@ -379,9 +389,10 @@ def create_table_row_block(
     cells: List[List[RichTextBase]] = [],
     id: str | None = None,
     created_time=None,
+    typeid: bool = False,
 ) -> TableRowBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
@@ -401,9 +412,10 @@ def create_table_block(
     table_width: int | None = None,
     has_column_header: bool = False,
     has_row_header: bool = False,
+    typeid: bool = False,
 ) -> TableBlock:
     if id is None:
-        id = generate_id()
+        id = generate_block_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
@@ -430,6 +442,7 @@ def create_page(
     title: str | List[RichTextBase] | None = None,
     archived: bool | None = None,
     in_trash: bool | None = None,
+    typeid: bool = False,
     # parent: str | None = None,
     # icon # TBD
 ) -> Page:
@@ -437,7 +450,7 @@ def create_page(
     Creates a page with the given blocks
     """
     if id is None:
-        id = generate_id()
+        id = generate_page_id(typeid=typeid)
 
     if created_time is None:
         created_time = get_current_time()
diff --git a/jsondoc/utils/__init__.py b/jsondoc/utils/__init__.py
index 8d0548c..e1ca43c 100644
--- a/jsondoc/utils/__init__.py
+++ b/jsondoc/utils/__init__.py
@@ -6,6 +6,8 @@
 from contextlib import contextmanager
 from datetime import datetime, timezone
 
+from typeid import TypeID
+
 from jsondoc.models.block.base import CreatedBy
 
 ARBITRARY_JSON_SCHEMA_OBJECT = {
@@ -14,8 +16,19 @@
     "additionalProperties": True,
 }
 
+TYPEID_BLOCK_ID_PREFIX = "bk"
+TYPEID_PAGE_ID_PREFIX = "pg"
+
+
+def generate_block_id(typeid: bool = False) -> str:
+    if typeid:
+        return str(TypeID(prefix=TYPEID_BLOCK_ID_PREFIX))
+    return str(uuid.uuid4())
+
 
-def generate_id() -> str:
+def generate_page_id(typeid: bool = False) -> str:
+    if typeid:
+        return str(TypeID(prefix=TYPEID_PAGE_ID_PREFIX))
     return str(uuid.uuid4())
 
 
diff --git a/pyproject.toml b/pyproject.toml
index eb542d5..aa17bcd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,6 +10,7 @@ dependencies = [
     "jsonschema>=4.23.0,<5",
     "pypandoc>=1.15",
     "beautifulsoup4>=4.13.3",
+    "typeid-python>=0.3.2",
 ]
 
 [project.scripts]
diff --git a/uv.lock b/uv.lock
index 1346717..1f5d494 100644
--- a/uv.lock
+++ b/uv.lock
@@ -748,6 +748,7 @@ dependencies = [
     { name = "jsonschema" },
     { name = "pydantic" },
     { name = "pypandoc" },
+    { name = "typeid-python" },
 ]
 
 [package.dev-dependencies]
@@ -767,6 +768,7 @@ requires-dist = [
     { name = "jsonschema", specifier = ">=4.23.0,<5" },
     { name = "pydantic", specifier = ">=2.7.2,<3" },
     { name = "pypandoc", specifier = ">=1.15" },
+    { name = "typeid-python", specifier = ">=0.3.2" },
 ]
 
 [package.metadata.requires-dev]
@@ -998,6 +1000,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 },
 ]
 
+[[package]]
+name = "typeid-python"
+version = "0.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "uuid6" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8d/09/b9b747c4f5f47e32fcb49d2a61c1235838df22bd02445507f60744bb6759/typeid_python-0.3.2.tar.gz", hash = "sha256:07d176af35ba75a10721ffd73f70e9582bc2705d3b4cb3d8df956e3221eaf2a6", size = 6934 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/24/f5f6373f168c362c861c89fc7f7b3750968784ab90b0162bdc6cf77ad0bf/typeid_python-0.3.2-py3-none-any.whl", hash = "sha256:d4fc91e12152df9f7a468655c5fbd1824fb1b706a19ffdce0e7fcef4520ed139", size = 7229 },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.12.2"
@@ -1016,6 +1030,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 },
 ]
 
+[[package]]
+name = "uuid6"
+version = "2024.7.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/56/2560a9f1ccab9e12b1b3478a3c870796cf4d8ee5652bb19b61751cced14a/uuid6-2024.7.10.tar.gz", hash = "sha256:2d29d7f63f593caaeea0e0d0dd0ad8129c9c663b29e19bdf882e864bedf18fb0", size = 8705 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/3e/4ae6af487ce5781ed71d5fe10aca72e7cbc4d4f45afc31b120287082a8dd/uuid6-2024.7.10-py3-none-any.whl", hash = "sha256:93432c00ba403751f722829ad21759ff9db051dea140bf81493271e8e4dd18b7", size = 6376 },
+]
+
 [[package]]
 name = "virtualenv"
 version = "20.29.3"