diff --git a/Dockerfile b/Dockerfile
index 6f6f3bc..4d9c7e9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,18 +5,14 @@ FROM python:3.11-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 
-# Set work directory
 WORKDIR /app
 
-# Install dependencies
-# We do this before copying the whole app to leverage Docker cache
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 
-# Copy the rest of the application code
 COPY . .
 
-# Expose the port your app runs on
 EXPOSE 5001
 
-CMD ["gunicorn", "--bind", "0.0.0.0:5001", "app:app"]
\ No newline at end of file
+# Increase timeout to 5 minutes (300s) and use threads to handle long waits
+CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--timeout", "300", "--worker-class", "gthread", "--threads", "4", "app:app"]
\ No newline at end of file
diff --git a/app.py b/app.py
index 2ef09cb..20e54f4 100644
--- a/app.py
+++ b/app.py
@@ -27,32 +27,34 @@ def about():
 def proxy_chat():
     target_url = "http://192.168.0.37:5002/v1/chat/completions"
     
+    # Ensure 'stream' is set to True for the backend
+    payload = request.json
+    payload['stream'] = True 
+    
     try:
-        # We use stream=True so we don't load the whole response into RAM at once
+        # We use stream=True so requests doesn't buffer the whole response
         response = requests.post(
             target_url, 
-            json=request.json, 
-            timeout=300,
+            json=payload, 
+            timeout=300, 
             stream=True 
         )
         
-        # Generator to yield chunks of data as they arrive
         def generate():
-            for chunk in response.iter_content(chunk_size=1024):
-                yield chunk
+            # This yields chunks of data to the browser as they arrive
+            for chunk in response.iter_content(chunk_size=None):
+                if chunk:
+                    yield chunk
 
         return Response(
             generate(), 
-            status=response.status_code, 
-            content_type=response.headers.get('content-type', 'application/json')
+            content_type='text/event-stream' # Standard for streaming AI responses
         )
         
     except requests.exceptions.Timeout:
-        return {"error": "The backend LLM timed out."}, 504
+        return {"error": "Backend timed out"}, 504
     except Exception as e:
-        app.logger.error(f"Proxy error: {str(e)}")
-        return {"error": "Internal server error"}, 500
-
+        return {"error": str(e)}, 500
 @app.route('/post/<path:path>/') 
 def post(path):
     page = pages.get_or_404(path)