Private
Public Access
1
0

eventstream and longer timeout
All checks were successful
Redeploy landing on Push / Explore-Gitea-Actions (push) Successful in 7s

This commit is contained in:
2025-12-22 21:38:12 +01:00
parent daaaa9c545
commit 3dfa3da397
2 changed files with 16 additions and 18 deletions

View File

@@ -5,18 +5,14 @@ FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
# Set work directory
WORKDIR /app WORKDIR /app
# Install dependencies
# We do this before copying the whole app to leverage Docker cache
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application code
COPY . . COPY . .
# Expose the port your app runs on
EXPOSE 5001 EXPOSE 5001
CMD ["gunicorn", "--bind", "0.0.0.0:5001", "app:app"] # Increase timeout to 5 minutes (300s) and use threads to handle long waits
CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--timeout", "300", "--worker-class", "gthread", "--threads", "4", "app:app"]

24
app.py
View File

@@ -27,32 +27,34 @@ def about():
def proxy_chat(): def proxy_chat():
target_url = "http://192.168.0.37:5002/v1/chat/completions" target_url = "http://192.168.0.37:5002/v1/chat/completions"
# Ensure 'stream' is set to True for the backend
payload = request.json
payload['stream'] = True
try: try:
# We use stream=True so we don't load the whole response into RAM at once # We use stream=True so requests doesn't buffer the whole response
response = requests.post( response = requests.post(
target_url, target_url,
json=request.json, json=payload,
timeout=300, timeout=300,
stream=True stream=True
) )
# Generator to yield chunks of data as they arrive
def generate(): def generate():
for chunk in response.iter_content(chunk_size=1024): # This yields chunks of data to the browser as they arrive
yield chunk for chunk in response.iter_content(chunk_size=None):
if chunk:
yield chunk
return Response( return Response(
generate(), generate(),
status=response.status_code, content_type='text/event-stream' # Standard for streaming AI responses
content_type=response.headers.get('content-type', 'application/json')
) )
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return {"error": "The backend LLM timed out."}, 504 return {"error": "Backend timed out"}, 504
except Exception as e: except Exception as e:
app.logger.error(f"Proxy error: {str(e)}") return {"error": str(e)}, 500
return {"error": "Internal server error"}, 500
@app.route('/post/<path:path>/') @app.route('/post/<path:path>/')
def post(path): def post(path):
page = pages.get_or_404(path) page = pages.get_or_404(path)