eventstream and longer timeout
All checks were successful
Redeploy landing on Push / Explore-Gitea-Actions (push) Successful in 7s
All checks were successful
Redeploy landing on Push / Explore-Gitea-Actions (push) Successful in 7s
This commit is contained in:
@@ -5,18 +5,14 @@ FROM python:3.11-slim
|
|||||||
ENV PYTHONDONTWRITEBYTECODE=1
|
ENV PYTHONDONTWRITEBYTECODE=1
|
||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
# Set work directory
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install dependencies
|
|
||||||
# We do this before copying the whole app to leverage Docker cache
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# Copy the rest of the application code
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Expose the port your app runs on
|
|
||||||
EXPOSE 5001
|
EXPOSE 5001
|
||||||
|
|
||||||
CMD ["gunicorn", "--bind", "0.0.0.0:5001", "app:app"]
|
# Increase timeout to 5 minutes (300s) and use threads to handle long waits
|
||||||
|
CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--timeout", "300", "--worker-class", "gthread", "--threads", "4", "app:app"]
|
||||||
24
app.py
24
app.py
@@ -27,32 +27,34 @@ def about():
|
|||||||
def proxy_chat():
|
def proxy_chat():
|
||||||
target_url = "http://192.168.0.37:5002/v1/chat/completions"
|
target_url = "http://192.168.0.37:5002/v1/chat/completions"
|
||||||
|
|
||||||
|
# Ensure 'stream' is set to True for the backend
|
||||||
|
payload = request.json
|
||||||
|
payload['stream'] = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# We use stream=True so we don't load the whole response into RAM at once
|
# We use stream=True so requests doesn't buffer the whole response
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
target_url,
|
target_url,
|
||||||
json=request.json,
|
json=payload,
|
||||||
timeout=300,
|
timeout=300,
|
||||||
stream=True
|
stream=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generator to yield chunks of data as they arrive
|
|
||||||
def generate():
|
def generate():
|
||||||
for chunk in response.iter_content(chunk_size=1024):
|
# This yields chunks of data to the browser as they arrive
|
||||||
yield chunk
|
for chunk in response.iter_content(chunk_size=None):
|
||||||
|
if chunk:
|
||||||
|
yield chunk
|
||||||
|
|
||||||
return Response(
|
return Response(
|
||||||
generate(),
|
generate(),
|
||||||
status=response.status_code,
|
content_type='text/event-stream' # Standard for streaming AI responses
|
||||||
content_type=response.headers.get('content-type', 'application/json')
|
|
||||||
)
|
)
|
||||||
|
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
return {"error": "The backend LLM timed out."}, 504
|
return {"error": "Backend timed out"}, 504
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
app.logger.error(f"Proxy error: {str(e)}")
|
return {"error": str(e)}, 500
|
||||||
return {"error": "Internal server error"}, 500
|
|
||||||
|
|
||||||
@app.route('/post/<path:path>/')
|
@app.route('/post/<path:path>/')
|
||||||
def post(path):
|
def post(path):
|
||||||
page = pages.get_or_404(path)
|
page = pages.get_or_404(path)
|
||||||
|
|||||||
Reference in New Issue
Block a user