server stream to keep alive
All checks were successful
Redeploy landing on Push / Explore-Gitea-Actions (push) Successful in 7s
All checks were successful
Redeploy landing on Push / Explore-Gitea-Actions (push) Successful in 7s
This commit is contained in:
20
app.py
20
app.py
@@ -23,29 +23,35 @@ def index():
|
||||
@app.route('/about')
|
||||
def about():
|
||||
return render_template('about.html')
|
||||
|
||||
@app.route('/proxy-chat', methods=['POST'])
|
||||
def proxy_chat():
|
||||
target_url = "http://192.168.0.37:5002/v1/chat/completions"
|
||||
|
||||
try:
|
||||
# We use stream=True so we don't load the whole response into RAM at once
|
||||
response = requests.post(
|
||||
target_url,
|
||||
json=request.json,
|
||||
timeout=300 # Important: Give the server time to process. Set very long timeout
|
||||
timeout=300,
|
||||
stream=True
|
||||
)
|
||||
|
||||
# Generator to yield chunks of data as they arrive
|
||||
def generate():
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
yield chunk
|
||||
|
||||
return Response(
|
||||
response.content,
|
||||
generate(),
|
||||
status=response.status_code,
|
||||
content_type=response.headers['content-type']
|
||||
content_type=response.headers.get('content-type', 'application/json')
|
||||
)
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return {"error": "The server took too long to answer. Try a different prompt."}, 504
|
||||
return {"error": "The backend LLM timed out."}, 504
|
||||
except Exception as e:
|
||||
return {"error": str(e)}, 500
|
||||
|
||||
app.logger.error(f"Proxy error: {str(e)}")
|
||||
return {"error": "Internal server error"}, 500
|
||||
|
||||
@app.route('/post/<path:path>/')
|
||||
def post(path):
|
||||
|
||||
Reference in New Issue
Block a user