From daaaa9c5456c41cda8b5fbae087d6a423deb52fe Mon Sep 17 00:00:00 2001
From: Rasmus <rsbendtsen@gmail.com>
Date: Mon, 22 Dec 2025 21:23:15 +0100
Subject: [PATCH] server stream to keep alive

---
 app.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/app.py b/app.py
index 05d709e..2ef09cb 100644
--- a/app.py
+++ b/app.py
@@ -23,29 +23,35 @@ def index():
 @app.route('/about')
 def about():
     return render_template('about.html')
-
 @app.route('/proxy-chat', methods=['POST'])
 def proxy_chat():
     target_url = "http://192.168.0.37:5002/v1/chat/completions"
     
     try:
+        # We use stream=True so we don't load the whole response into RAM at once
         response = requests.post(
             target_url, 
             json=request.json, 
-            timeout=300 # Important: Give the server time to process. Set very long timeout
+            timeout=300,
+            stream=True 
         )
         
+        # Generator to yield chunks of data as they arrive
+        def generate():
+            for chunk in response.iter_content(chunk_size=1024):
+                yield chunk
+
         return Response(
-            response.content, 
+            generate(), 
             status=response.status_code, 
-            content_type=response.headers['content-type']
+            content_type=response.headers.get('content-type', 'application/json')
         )
         
     except requests.exceptions.Timeout:
-        return {"error": "The server took too long to answer. Try a different prompt."}, 504
+        return {"error": "The backend LLM timed out."}, 504
     except Exception as e:
-        return {"error": str(e)}, 500
-    
+        app.logger.error(f"Proxy error: {str(e)}")
+        return {"error": "Internal server error"}, 500
 
 @app.route('/post/<path:path>/') 
 def post(path):