diff --git a/examples/app_server_with_auto_scaler/app.py b/examples/app_server_with_auto_scaler/app.py index b713bd6d1dcfc..70799827776a8 100644 --- a/examples/app_server_with_auto_scaler/app.py +++ b/examples/app_server_with_auto_scaler/app.py @@ -1,3 +1,4 @@ +# ! pip install torch torchvision from typing import Any, List import torch @@ -22,10 +23,10 @@ class BatchResponse(BaseModel): class PyTorchServer(L.app.components.PythonServer): def __init__(self, *args, **kwargs): super().__init__( - port=L.app.utilities.network.find_free_network_port(), input_type=BatchRequestModel, output_type=BatchResponse, - cloud_compute=L.CloudCompute("gpu"), + *args, + **kwargs, ) def setup(self): @@ -57,16 +58,14 @@ def scale(self, replicas: int, metrics: dict) -> int: """The default scaling logic that users can override.""" # scale out if the number of pending requests exceeds max batch size. max_requests_per_work = self.max_batch_size - pending_requests_per_running_or_pending_work = metrics["pending_requests"] / ( - replicas + metrics["pending_works"] - ) - if pending_requests_per_running_or_pending_work >= max_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / (replicas + metrics["pending_works"]) + if pending_requests_per_work >= max_requests_per_work: return replicas + 1 # scale in if the number of pending requests is below 25% of max_requests_per_work min_requests_per_work = max_requests_per_work * 0.25 - pending_requests_per_running_work = metrics["pending_requests"] / replicas - if pending_requests_per_running_work < min_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / replicas + if pending_requests_per_work < min_requests_per_work: return replicas - 1 return replicas @@ -74,13 +73,17 @@ def scale(self, replicas: int, metrics: dict) -> int: app = L.LightningApp( MyAutoScaler( + # work class and args PyTorchServer, - min_replicas=2, + cloud_compute=L.CloudCompute("gpu"), + # autoscaler specific args + min_replicas=1, max_replicas=4, autoscale_interval=10, endpoint="predict", input_type=RequestModel, output_type=Any, timeout_batching=1, + max_batch_size=8, ) ) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index cd1c5792a3aca..f04347eb328f4 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -13,7 +13,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- +- Changed the default port of `PythonServer` from `7777` to a free port at runtime ([#15966](https://github.com/Lightning-AI/lightning/pull/15966)) ### Deprecated @@ -28,7 +28,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- +- Fixed `AutoScaler` failing due to port collision across works ([#15966](https://github.com/Lightning-AI/lightning/pull/15966)) ## [1.8.4] - 2022-12-08 diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index 62e6180c49665..c21478247f5f7 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -446,7 +446,8 @@ def workers(self) -> List[LightningWork]: def create_work(self) -> LightningWork: """Replicates a LightningWork instance with args and kwargs provided via ``__init__``.""" # TODO: Remove `start_with_flow=False` for faster initialization on the cloud - return self._work_cls(*self._work_args, **self._work_kwargs, start_with_flow=False) + self._work_kwargs.update(dict(start_with_flow=False)) + return self._work_cls(*self._work_args, **self._work_kwargs) def add_work(self, work) -> str: """Adds a new LightningWork instance. diff --git a/src/lightning_app/components/serve/python_server.py b/src/lightning_app/components/serve/python_server.py index 1868b0b357fd3..c522a25eb3f3d 100644 --- a/src/lightning_app/components/serve/python_server.py +++ b/src/lightning_app/components/serve/python_server.py @@ -75,8 +75,6 @@ class PythonServer(LightningWork, abc.ABC): @requires(["torch", "lightning_api_access"]) def __init__( # type: ignore self, - host: str = "127.0.0.1", - port: int = 7777, input_type: type = _DefaultInputData, output_type: type = _DefaultOutputData, **kwargs, @@ -84,8 +82,6 @@ def __init__( # type: ignore """The PythonServer Class enables to easily get your machine learning server up and running. Arguments: - host: Address to be used for running the server. - port: Port to be used to running the server. input_type: Optional `input_type` to be provided. This needs to be a pydantic BaseModel class. The default data type is good enough for the basic usecases and it expects the data to be a json object that has one key called `payload` @@ -129,7 +125,7 @@ def predict(self, request): ... >>> app = LightningApp(SimpleServer()) """ - super().__init__(parallel=True, host=host, port=port, **kwargs) + super().__init__(parallel=True, **kwargs) if not issubclass(input_type, BaseModel): raise TypeError("input_type must be a pydantic BaseModel class") if not issubclass(output_type, BaseModel):