feat: Optimize container infrastructure for production (#5881) · feast-dev/feast@5ebdac8
@@ -52,21 +52,42 @@
5252 type=click.INT,
5353 default=1,
5454 show_default=True,
55- help="Number of worker",
55+ help="Number of worker processes. Use -1 to auto-calculate based on CPU cores",
56+)
57+@click.option(
58+ "--worker-connections",
59+ type=click.INT,
60+ default=1000,
61+ show_default=True,
62+ help="Maximum number of simultaneous clients per worker process",
63+)
64+@click.option(
65+ "--max-requests",
66+ type=click.INT,
67+ default=1000,
68+ show_default=True,
69+ help="Maximum number of requests a worker will process before restarting (prevents memory leaks)",
70+)
71+@click.option(
72+ "--max-requests-jitter",
73+ type=click.INT,
74+ default=50,
75+ show_default=True,
76+ help="Maximum jitter to add to max-requests to prevent thundering herd on worker restart",
5677)
5778@click.option(
5879 "--keep-alive-timeout",
5980 type=click.INT,
60- default=5,
81+ default=30,
6182 show_default=True,
62- help="Timeout for keep alive",
83+ help="Timeout for keep alive connections (seconds)",
6384)
6485@click.option(
6586 "--registry_ttl_sec",
6687 "-r",
67- help="Number of seconds after which the registry is refreshed",
88+ help="Number of seconds after which the registry is refreshed. Higher values reduce refresh overhead but increase staleness",
6889 type=click.INT,
69- default=5,
90+ default=60,
7091 show_default=True,
7192)
7293@click.option(
@@ -102,11 +123,14 @@ def serve_command(
102123type_: str,
103124no_access_log: bool,
104125workers: int,
105-metrics: bool,
126+worker_connections: int,
127+max_requests: int,
128+max_requests_jitter: int,
106129keep_alive_timeout: int,
130+registry_ttl_sec: int,
107131tls_key_path: str,
108132tls_cert_path: str,
109-registry_ttl_sec: int = 5,
133+metrics: bool,
110134):
111135"""Start a feature server locally on a given port."""
112136if (tls_key_path and not tls_cert_path) or (not tls_key_path and tls_cert_path):
@@ -115,12 +139,19 @@ def serve_command(
115139 )
116140store = create_feature_store(ctx)
117141142+# Auto-calculate workers if -1 is specified
143+if workers == -1:
144+workers = max(1, (multiprocessing.cpu_count() * 2) + 1)
145+118146store.serve(
119147host=host,
120148port=port,
121149type_=type_,
122150no_access_log=no_access_log,
123151workers=workers,
152+worker_connections=worker_connections,
153+max_requests=max_requests,
154+max_requests_jitter=max_requests_jitter,
124155metrics=metrics,
125156keep_alive_timeout=keep_alive_timeout,
126157tls_key_path=tls_key_path,