From daaaa9c5456c41cda8b5fbae087d6a423deb52fe Mon Sep 17 00:00:00 2001 From: Rasmus Date: Mon, 22 Dec 2025 21:23:15 +0100 Subject: [PATCH] server stream to keep alive --- app.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/app.py b/app.py index 05d709e..2ef09cb 100644 --- a/app.py +++ b/app.py @@ -23,29 +23,35 @@ def index(): @app.route('/about') def about(): return render_template('about.html') - @app.route('/proxy-chat', methods=['POST']) def proxy_chat(): target_url = "http://192.168.0.37:5002/v1/chat/completions" try: + # We use stream=True so we don't load the whole response into RAM at once response = requests.post( target_url, json=request.json, - timeout=300 # Important: Give the server time to process. Set very long timeout + timeout=300, + stream=True ) + # Generator to yield chunks of data as they arrive + def generate(): + for chunk in response.iter_content(chunk_size=1024): + yield chunk + return Response( - response.content, + generate(), status=response.status_code, - content_type=response.headers['content-type'] + content_type=response.headers.get('content-type', 'application/json') ) except requests.exceptions.Timeout: - return {"error": "The server took too long to answer. Try a different prompt."}, 504 + return {"error": "The backend LLM timed out."}, 504 except Exception as e: - return {"error": str(e)}, 500 - + app.logger.error(f"Proxy error: {str(e)}") + return {"error": "Internal server error"}, 500 @app.route('/post//') def post(path):