server stream to keep alive

2025-12-22 21:23:15 +01:00
parent af52722bb2
commit daaaa9c545
1 changed files with 13 additions and 7 deletions
--- a/app.py
+++ b/app.py
@@ -23,29 +23,35 @@ def index():
@app.route('/about')
 def about():
    return render_template('about.html')
-
@app.route('/proxy-chat', methods=['POST'])
 def proxy_chat():
    target_url = "http://192.168.0.37:5002/v1/chat/completions"
    
    try:
+        # We use stream=True so we don't load the whole response into RAM at once
        response = requests.post(
            target_url, 
            json=request.json, 
-            timeout=300 # Important: Give the server time to process. Set very long timeout
+            timeout=300,
+            stream=True 
        )
        
+        # Generator to yield chunks of data as they arrive
+        def generate():
+            for chunk in response.iter_content(chunk_size=1024):
+                yield chunk
+
        return Response(
-            response.content, 
+            generate(), 
            status=response.status_code, 
-            content_type=response.headers['content-type']
+            content_type=response.headers.get('content-type', 'application/json')
        )
        
    except requests.exceptions.Timeout:
-        return {"error": "The server took too long to answer. Try a different prompt."}, 504
+        return {"error": "The backend LLM timed out."}, 504
    except Exception as e:
-        return {"error": str(e)}, 500
-    
+        app.logger.error(f"Proxy error: {str(e)}")
+        return {"error": "Internal server error"}, 500

@app.route('/post/<path:path>/') 
 def post(path):