eventstream and longer timeout

2025-12-22 21:38:12 +01:00
parent daaaa9c545
commit 3dfa3da397
2 changed files with 16 additions and 18 deletions
--- a/8
+++ b/8
@@ -5,18 +5,14 @@ FROM python:3.11-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1

-# Set work directory
 WORKDIR /app

-# Install dependencies
-# We do this before copying the whole app to leverage Docker cache
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

-# Copy the rest of the application code
 COPY . .

-# Expose the port your app runs on
 EXPOSE 5001

-CMD ["gunicorn", "--bind", "0.0.0.0:5001", "app:app"]
+# Increase timeout to 5 minutes (300s) and use threads to handle long waits
+CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--timeout", "300", "--worker-class", "gthread", "--threads", "4", "app:app"]
--- a/app.py
+++ b/app.py
@@ -27,32 +27,34 @@ def about():
 def proxy_chat():
    target_url = "http://192.168.0.37:5002/v1/chat/completions"
    
+    # Ensure 'stream' is set to True for the backend
+    payload = request.json
+    payload['stream'] = True 
+    
    try:
-        # We use stream=True so we don't load the whole response into RAM at once
+        # We use stream=True so requests doesn't buffer the whole response
        response = requests.post(
            target_url, 
-            json=request.json, 
+            json=payload, 
            timeout=300, 
            stream=True 
        )
        
-        # Generator to yield chunks of data as they arrive
        def generate():
-            for chunk in response.iter_content(chunk_size=1024):
-                yield chunk
+            # This yields chunks of data to the browser as they arrive
+            for chunk in response.iter_content(chunk_size=None):
+                if chunk:
+                    yield chunk

        return Response(
            generate(), 
-            status=response.status_code, 
-            content_type=response.headers.get('content-type', 'application/json')
+            content_type='text/event-stream' # Standard for streaming AI responses
        )
        
    except requests.exceptions.Timeout:
-        return {"error": "The backend LLM timed out."}, 504
+        return {"error": "Backend timed out"}, 504
    except Exception as e:
-        app.logger.error(f"Proxy error: {str(e)}")
-        return {"error": "Internal server error"}, 500
-
+        return {"error": str(e)}, 500
@app.route('/post/<path:path>/') 
 def post(path):
    page = pages.get_or_404(path)