From 4e4f332aac37bb90efa5d51129d6c7418bd9ddd1 Mon Sep 17 00:00:00 2001 From: edwardvaneechoud Date: Wed, 13 Aug 2025 07:46:56 +0200 Subject: [PATCH 01/10] Fix small issues in logging and missing functions --- flowfile/flowfile/api.py | 6 ++++-- flowfile/flowfile/web/__init__.py | 2 ++ .../flowfile_core/database/connection.py | 4 +--- .../flow_data_engine/cloud_storage_reader.py | 1 - .../flowfile_core/utils/validate_setup.py | 2 -- flowfile_frame/flowfile_frame/expr.py | 14 ++++++++++++++ flowfile_frame/flowfile_frame/flow_frame.py | 2 +- flowfile_frame/flowfile_frame/flow_frame.pyi | 9 +++------ flowfile_frontend/package-lock.json | 4 ++-- local_data/local_products.parquet | Bin 1997 -> 1999 bytes 10 files changed, 27 insertions(+), 17 deletions(-) diff --git a/flowfile/flowfile/api.py b/flowfile/flowfile/api.py index 5b44c39d..cfbd2021 100644 --- a/flowfile/flowfile/api.py +++ b/flowfile/flowfile/api.py @@ -206,7 +206,7 @@ def check_if_in_single_mode() -> bool: try: response: requests.Response = requests.get(f"{FLOWFILE_BASE_URL}/single_mode", timeout=1) if response.ok: - return response.json() == "1" + return response.json() except Exception: pass return False @@ -400,6 +400,8 @@ def _open_flow_in_browser(flow_id: int) -> None: logger.info(f"Unified mode detected. Opening imported flow in browser: {flow_url}") try: time.sleep(0.5) + logger.info("Attempting to open browser tab for flow...") + logger.info("Opening URL in browser: %s", flow_url) webbrowser.open_new_tab(flow_url) except Exception as wb_err: logger.warning(f"Could not automatically open browser tab: {wb_err}") @@ -452,7 +454,7 @@ def open_graph_in_editor(flow_graph: FlowGraph, storage_location: Optional[str] return False flow_id = import_flow_to_editor(flow_file_path, auth_token) - + print(flow_id, "flow_id", flow_in_single_mode, automatically_open_browser) if flow_id is not None: if flow_in_single_mode and automatically_open_browser: _open_flow_in_browser(flow_id) diff --git a/flowfile/flowfile/web/__init__.py b/flowfile/flowfile/web/__init__.py index 28929a75..7c2443f8 100644 --- a/flowfile/flowfile/web/__init__.py +++ b/flowfile/flowfile/web/__init__.py @@ -51,6 +51,8 @@ async def svg_logo(): @app.get("/single_mode") async def in_single_mode() -> bool: + print("Checking if single file mode is enabled") + print(os.environ.get('FLOWFILE_SINGLE_FILE_MODE')) return os.environ.get('FLOWFILE_SINGLE_FILE_MODE', "0") == "1" @app.get("/ui", include_in_schema=False) diff --git a/flowfile_core/flowfile_core/database/connection.py b/flowfile_core/flowfile_core/database/connection.py index 71a71bac..8117b90f 100644 --- a/flowfile_core/flowfile_core/database/connection.py +++ b/flowfile_core/flowfile_core/database/connection.py @@ -26,8 +26,6 @@ def get_app_data_dir() -> Path: base_dir = os.path.join(os.path.expanduser("~"), ".local", "share") app_dir = Path(base_dir) / app_name - - print(f"Using application data directory: {app_dir}") app_dir.mkdir(parents=True, exist_ok=True) return app_dir @@ -48,7 +46,7 @@ def get_database_url(): app_dir = get_app_data_dir() db_path = app_dir / "flowfile.db" - logger.info(f"Using database URL: sqlite:///{db_path}") + logger.debug(f"Using database URL: sqlite:///{db_path}") return f"sqlite:///{db_path}" diff --git a/flowfile_core/flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py b/flowfile_core/flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py index 19e689a9..194c8413 100644 --- a/flowfile_core/flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +++ b/flowfile_core/flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py @@ -68,7 +68,6 @@ def get_storage_options(connection: FullCloudStorageConnection) -> Dict[str, Any def _get_s3_storage_options(connection: 'FullCloudStorageConnection') -> Dict[str, Any]: """Build S3-specific storage options.""" auth_method = connection.auth_method - print(f"Building S3 storage options for auth_method: '{auth_method}'") if auth_method == "aws-cli": return create_storage_options_from_boto_credentials( profile_name=connection.connection_name, diff --git a/flowfile_core/flowfile_core/utils/validate_setup.py b/flowfile_core/flowfile_core/utils/validate_setup.py index ebbfc25b..21cf2df0 100644 --- a/flowfile_core/flowfile_core/utils/validate_setup.py +++ b/flowfile_core/flowfile_core/utils/validate_setup.py @@ -34,8 +34,6 @@ def validate_setup(): check_if_node_has_add_function_in_flow_graph(node) check_if_node_has_input_schema_definition(node) - print("All nodes have corresponding functions in FlowGraph and input schema definitions.") - if __name__ == "__main__": validate_setup() diff --git a/flowfile_frame/flowfile_frame/expr.py b/flowfile_frame/flowfile_frame/expr.py index 0657aec7..22d995f8 100644 --- a/flowfile_frame/flowfile_frame/expr.py +++ b/flowfile_frame/flowfile_frame/expr.py @@ -490,6 +490,20 @@ def sum(self): result.agg_func = "sum" return result + def unique_counts(self): + """ + Return the number of unique values in the column. + + Returns + ------- + Expr + A new expression with the unique counts + """ + result_expr = self.expr.unique_counts() if self.expr is not None else None + result = self._create_next_expr(method_name="unique_counts", result_expr=result_expr, is_complex=self.is_complex) + result.agg_func = "unique_counts" + return result + def implode(self): result_expr = self.expr.implode() if self.expr is not None else None result = self._create_next_expr(method_name="implode", result_expr=result_expr, is_complex=self.is_complex) diff --git a/flowfile_frame/flowfile_frame/flow_frame.py b/flowfile_frame/flowfile_frame/flow_frame.py index 13571fa0..ad2e5c72 100644 --- a/flowfile_frame/flowfile_frame/flow_frame.py +++ b/flowfile_frame/flowfile_frame/flow_frame.py @@ -565,7 +565,7 @@ def join( coalesce: bool = None, maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None, description: str = None, - ): + ) -> "FlowFrame": """ Add a join operation to the Logical Plan. diff --git a/flowfile_frame/flowfile_frame/flow_frame.pyi b/flowfile_frame/flowfile_frame/flow_frame.pyi index 7c0b4c25..1c6df182 100644 --- a/flowfile_frame/flowfile_frame/flow_frame.pyi +++ b/flowfile_frame/flowfile_frame/flow_frame.pyi @@ -80,8 +80,8 @@ class FlowFrame: def __ne__(self, other: object) -> typing.NoReturn: ... - # Create and configure a new FlowFrame instance, mimicking Polars' flexible constructor. - def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: typing.Optional[flowfile_core.flowfile.flow_graph.FlowGraph] = None, node_id: typing.Optional[int] = None, parent_node_id: typing.Optional[int] = None, override_initial: bool = False) -> Self: ... + # Unified constructor for FlowFrame. + def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: typing.Optional[flowfile_core.flowfile.flow_graph.FlowGraph] = None, node_id: typing.Optional[int] = None, parent_node_id: typing.Optional[int] = None, **kwargs) -> Self: ... def __repr__(self) -> Any: ... @@ -118,9 +118,6 @@ class FlowFrame: # Execute join using Polars code approach. def _execute_polars_code_join(self, other: FlowFrame, new_node_id: int, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_columns: typing.Optional[typing.List[str]], right_columns: typing.Optional[typing.List[str]], how: str, suffix: str, validate: str, nulls_equal: bool, coalesce: bool, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'], description: str) -> 'FlowFrame': ... - # Internal constructor to create a FlowFrame instance that wraps an - def _from_existing_node(self, data: LazyFrame, flow_graph: FlowGraph, node_id: int, parent_node_id: typing.Optional[int] = None) -> 'FlowFrame': ... - # Generates the `input_df.sort(...)` Polars code string using pure expression strings. def _generate_sort_polars_code(self, pure_sort_expr_strs: typing.List[str], descending_values: typing.List[bool], nulls_last_values: typing.List[bool], multithreaded: bool, maintain_order: bool) -> str: ... @@ -231,7 +228,7 @@ class FlowFrame: def interpolate(self, description: Optional[str] = None) -> 'FlowFrame': ... # Add a join operation to the Logical Plan. - def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, how: str = 'inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) -> Any: ... + def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, how: str = 'inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) -> 'FlowFrame': ... # Perform an asof join. def join_asof(self, other: FlowFrame, left_on: str | None | Expr = None, right_on: str | None | Expr = None, on: str | None | Expr = None, by_left: str | Sequence[str] | None = None, by_right: str | Sequence[str] | None = None, by: str | Sequence[str] | None = None, strategy: AsofJoinStrategy = 'backward', suffix: str = '_right', tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, coalesce: bool = True, allow_exact_matches: bool = True, check_sortedness: bool = True, description: Optional[str] = None) -> 'FlowFrame': ... diff --git a/flowfile_frontend/package-lock.json b/flowfile_frontend/package-lock.json index a67b3afd..e8f21e6e 100644 --- a/flowfile_frontend/package-lock.json +++ b/flowfile_frontend/package-lock.json @@ -1,12 +1,12 @@ { "name": "Flowfile", - "version": "0.3.4", + "version": "0.3.7", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "Flowfile", - "version": "0.3.4", + "version": "0.3.7", "dependencies": { "@ag-grid-community/client-side-row-model": "^31.1.1", "@ag-grid-community/core": "^31.1.1", diff --git a/local_data/local_products.parquet b/local_data/local_products.parquet index 927f94428dd81ccba062d6f6ab8d79e8c187e7a0..067b839aadbd41c23bceec4a8ac80ef15233e0f8 100644 GIT binary patch delta 755 zcmX@hf1ZEC1V%#+1_lP_ocxlE%)E3~AloTFKf9O<$neO|O;vEu&rd1l2J&2UQj<%H z^7As2Cm&?AkrL>et-brQr+|I)?Hva`dOekVRiuA#rT6tWuO_c%b*L9*5Ix5ux`16r z^aLZ2%_hnwW5%GtVGIK5^$rpog|=MR7Z<2I08Q4|s{c1ZoynAs!9|!E$Uniz*njA& zMofCKL(mSn*4Kq@4j{F*pOPHhOC7k3%Nz~^W&4@x+zK5uV}MHMl>SJcUFlG0tNx6ii3zl`K=rW`F-IP#o=X)byM!vI0QaO*;V-?=u-L5f|vOY z)m~ZZ^`COL6@Dmmtct7bOLzDNbkm`&#`Xofa(>j8T&sJM8Byu5qyXs8$+0s_9o_&P z(|W$_U#V_lZt9Z^uF^*Ze=FA&s#Wt>YveQ5OO)t2e~56*aTchE2sxVNonRg3z)+A- z6TBmk#~?y9=wHE~((};{vt|N8Y`LUSV9~PD-mJNC`>Vs<9l}=#I&3ICHDj)&0aFvt zsy71l8i)8-r$!!`F!}cRoMn5D-+ec6lIczRrS2cyH@n?lAng9_w!yzD_TNu_G0!NN zFk69}H+OgL#Jib|AM569D{q;|bEZuC+%Dt8nP1v+jGhI4`>3>c|HPBe^5d3qzIo0O zvq?bw;if}orTYc$8Gbo+!=2Y(pP7j({?ZJyx{Is#+k`Mq4rGy${=u%cj?)Hfv`_9} zaTA0k0Z9pHx|n>QMUC;tWH#36Y?p*ZCB$Y;Ud-yr_X@XHviK^yIay4)vl8qUU%-=dkOD zo?ryB*+ki7%osE{X3Ycw%X$Y1j0dt^)k<#g(VJwF2#GoeuP<;230#$m&Fv%&+B$toaX4omtmCX8gRBk zA-TP#sqSKkezt=q(DPpn^8cDyB+e>xtbSjsT*R5<5Vk_lV2At8HmPg{Ca0cFSL&Gs zG!F51zwO)pE2Zazo}m@fWyA2~ds{TJ^VoZm*j1M?s->@=JUMuK3$whrh4q0?T+Tk5 zWB)$8?0fB+n(DNO!w%_-rt2+BXpQlDR>N(pKi@a;vA)em*5yAh?zP$cpes?fbGNTw zL7b6<8c){eTQg>*Urau_;{x|ep6~1orlMSv|FKF*e_>Z!$7zE#+9!9hxCz2SRZ;?) zE+#)@QDgivnS*sY+c{xT39)IDm$G{DJOeSW2#MM-s4bfOi&f4TkGa6)B=!%aeuA*5 W9fQ~&Ik6LrY8+zAHhZ(RFaiMXVkn0I From 84f19565731529fc9409a56e27431605925ac441 Mon Sep 17 00:00:00 2001 From: edwardvaneechoud Date: Wed, 13 Aug 2025 08:59:21 +0200 Subject: [PATCH 02/10] fixing layout quickstart and index.html --- docs/index.html | 507 ++++++++++++++++++++++++++++++++++----------- docs/quickstart.md | 200 ++++++++++-------- 2 files changed, 499 insertions(+), 208 deletions(-) diff --git a/docs/index.html b/docs/index.html index 377a32a2..a957f70b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -2,7 +2,7 @@ - + Flowfile - Visual ETL Tool @@ -52,6 +52,8 @@ color: var(--md-default-fg-color); background-color: var(--md-default-bg-color); line-height: 1.6; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; } pre { @@ -59,24 +61,28 @@ padding: 1rem; border-radius: 8px; overflow-x: auto; + -webkit-overflow-scrolling: touch; + font-size: 0.875rem; } code { font-family: 'Source Code Pro', 'Roboto Mono', monospace; color: var(--md-code-fg-color); + font-size: 0.875rem; } - - @@ -622,7 +869,6 @@

🐍 Code Approach

- - -
-

See It For Yourself

-

- Start rediscovering how we bridge the gap between business users and technical users. -

- - - + +
+

See It For Yourself

+

+ Start rediscovering how we bridge the gap between business users and technical users. +

+ + + + + +

+ Free, open-source and customizable +

+
-

- Free, open-source and customizable -

-
\ No newline at end of file diff --git a/docs/quickstart.md b/docs/quickstart.md index fd54f99b..4edaf695 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -1,16 +1,16 @@ # Quick Start Guide -
- Flowfile Logo -

Get Started with Flowfile in 5 Minutes

+
+ Flowfile Logo +

Get Started with Flowfile in 5 Minutes

## Installation -
-

Recommended: Install from PyPI

-
pip install flowfile
-

This installs everything you need - the Python API, visual editor, and all services.

+
+

Recommended quickstart: Install from PyPI

+
pip install flowfile
+

This installs everything you need - the Python API, visual editor, and all services.

### Alternative Installation Methods @@ -53,46 +53,46 @@ npm run dev:web # Terminal 3 (port 8080) ## Choose Your Path -
+
-
-

Non-Technical Users

-

Perfect for: Analysts, business users, Excel power users

-

No coding required!

+
+

Non-Technical Users

+

Perfect for: Analysts, business users, Excel power users

+

No coding required!

-
    -
  • ✅ Drag and drop interface
  • -
  • ✅ Visual data preview
  • -
  • ✅ Export to Excel/CSV
  • -
  • ✅ Built-in transformations
  • +
      +
    • ✅ Drag and drop interface
    • +
    • ✅ Visual data preview
    • +
    • ✅ Export to Excel/CSV
    • +
    • ✅ Built-in transformations
    - Start Visual Tutorial → + Start Visual Tutorial →
-
-

Technical Users

-

Perfect for: Developers, data scientists, engineers

-

Full programmatic control!

+
+

Technical Users

+

Perfect for: Developers, data scientists, engineers

+

Full programmatic control!

-
    -
  • ✅ Polars-compatible API
  • -
  • ✅ Cloud storage integration
  • -
  • ✅ Version control friendly
  • -
  • ✅ Complex dynamic logic
  • +
      +
    • ✅ Polars-compatible API
    • +
    • ✅ Cloud storage integration
    • +
    • ✅ Version control friendly
    • +
    • ✅ Complex dynamic logic
    - Start Python Tutorial → + Start Python Tutorial →
--- -## 🎨 Quick Start for Non-Technical Users {#non-technical-quickstart} +## Quick Start for Non-Technical Users {#non-technical-quickstart} -
-Goal: Clean and analyze sales data without writing any code +
+Goal: Clean and analyze sales data without writing any code
### Step 1: Start Flowfile, and create a Flow @@ -106,7 +106,7 @@ Your browser should automatically open to the Flowfile UI. !!! warning "If the browser does not open automatically" If the browser does not open automatically, you can manually navigate to [http://127.0.0.1:63578/ui#/main/designer](http://127.0.0.1:63578/ui#/main/designer) in your web browser. -
+
**Creating your First Flow:** @@ -124,7 +124,7 @@ Your should see now an empty flow: ### Step 2: Load Your Data -
+
**Loading a CSV or Excel file:** @@ -144,11 +144,11 @@ Your should see now an empty flow: Let's remove duplicate records and filter for high-value transactions: -
+
-
-

Remove Duplicates

-
    +
    +

    Remove Duplicates

    +
    1. Drag "Drop Duplicates" node from Transform section
    2. Connect it to your Read Data node
    3. Select columns to check for duplicates
    4. @@ -156,9 +156,9 @@ Let's remove duplicate records and filter for high-value transactions:
    -
    -

    Filter Data

    -
      +
      +

      Filter Data

      +
      1. Drag "Filter Data" node from Transform section
      2. Connect it to Drop Duplicates node
      3. Enter formula: [Quantity] > 7
      4. @@ -191,7 +191,7 @@ Let's remove duplicate records and filter for high-value transactions: ### Step 5: Save Your Results -
        +
        **Export your cleaned data:** @@ -206,7 +206,7 @@ Let's remove duplicate records and filter for high-value transactions:
        -
        +
        ### Here's what your complete flow should look like: @@ -222,9 +222,9 @@ You've just built your first data pipeline! You can: - **Schedule it** to run automatically (coming soon) - **Export as Python code** if you want to see what's happening behind the scenes -
        -### Pro Tips for Non-Technical Users: -