feat: Optimize container infrastructure for production (#5881) · feast-dev/feast@5ebdac8

@@ -52,21 +52,42 @@

5252

type=click.INT,

5353

default=1,

5454

show_default=True,

55-

help="Number of worker",

55+

help="Number of worker processes. Use -1 to auto-calculate based on CPU cores",

56+

)

57+

@click.option(

58+

"--worker-connections",

59+

type=click.INT,

60+

default=1000,

61+

show_default=True,

62+

help="Maximum number of simultaneous clients per worker process",

63+

)

64+

@click.option(

65+

"--max-requests",

66+

type=click.INT,

67+

default=1000,

68+

show_default=True,

69+

help="Maximum number of requests a worker will process before restarting (prevents memory leaks)",

70+

)

71+

@click.option(

72+

"--max-requests-jitter",

73+

type=click.INT,

74+

default=50,

75+

show_default=True,

76+

help="Maximum jitter to add to max-requests to prevent thundering herd on worker restart",

5677

)

5778

@click.option(

5879

"--keep-alive-timeout",

5980

type=click.INT,

60-

default=5,

81+

default=30,

6182

show_default=True,

62-

help="Timeout for keep alive",

83+

help="Timeout for keep alive connections (seconds)",

6384

)

6485

@click.option(

6586

"--registry_ttl_sec",

6687

"-r",

67-

help="Number of seconds after which the registry is refreshed",

88+

help="Number of seconds after which the registry is refreshed. Higher values reduce refresh overhead but increase staleness",

6889

type=click.INT,

69-

default=5,

90+

default=60,

7091

show_default=True,

7192

)

7293

@click.option(

@@ -102,11 +123,14 @@ def serve_command(

102123

type_: str,

103124

no_access_log: bool,

104125

workers: int,

105-

metrics: bool,

126+

worker_connections: int,

127+

max_requests: int,

128+

max_requests_jitter: int,

106129

keep_alive_timeout: int,

130+

registry_ttl_sec: int,

107131

tls_key_path: str,

108132

tls_cert_path: str,

109-

registry_ttl_sec: int = 5,

133+

metrics: bool,

110134

):

111135

"""Start a feature server locally on a given port."""

112136

if (tls_key_path and not tls_cert_path) or (not tls_key_path and tls_cert_path):

@@ -115,12 +139,19 @@ def serve_command(

115139

)

116140

store = create_feature_store(ctx)

117141142+

# Auto-calculate workers if -1 is specified

143+

if workers == -1:

144+

workers = max(1, (multiprocessing.cpu_count() * 2) + 1)

145+118146

store.serve(

119147

host=host,

120148

port=port,

121149

type_=type_,

122150

no_access_log=no_access_log,

123151

workers=workers,

152+

worker_connections=worker_connections,

153+

max_requests=max_requests,

154+

max_requests_jitter=max_requests_jitter,

124155

metrics=metrics,

125156

keep_alive_timeout=keep_alive_timeout,

126157

tls_key_path=tls_key_path,