Private
Public Access
1
0

converted llm to non streaming
All checks were successful
Redeploy landing on Push / Explore-Gitea-Actions (push) Successful in 7s

This commit is contained in:
2025-12-22 17:00:12 +01:00
parent ea43c144e0
commit c9c9610de9

32
app.py
View File

@@ -23,21 +23,31 @@ def index():
@app.route('/about') @app.route('/about')
def about(): def about():
return render_template('about.html') return render_template('about.html')
@app.route('/proxy-chat', methods=['POST']) @app.route('/proxy-chat', methods=['POST'])
def proxy_chat(): def proxy_chat():
target_url = "http://192.168.0.37:5002/v1/chat/completions" target_url = "http://192.168.0.37:5002/v1/chat/completions"
# Forward the request to your local LLM try:
# We use stream=True here to get the chunks from the backend # 1. Forward the request without stream=True
req = requests.post(target_url, json=request.json, stream=True) # llama.cpp will now send back one large JSON object
response = requests.post(
def generate(): target_url,
# Yield each chunk as it arrives from the LLM json=request.json,
for chunk in req.iter_content(chunk_size=1024): timeout=120 # Important: Give the i5-8400 time to think
yield chunk )
return Response(stream_with_context(generate()), content_type=req.headers['content-type']) # 2. Return the full JSON response to the browser
return Response(
response.content,
status=response.status_code,
content_type=response.headers['content-type']
)
except requests.exceptions.Timeout:
return {"error": "The serve took too long to answer. Try a different prompt."}, 504
except Exception as e:
return {"error": str(e)}, 500
@app.route('/post/<path:path>/') # Adding /post/ prefix helps organize URLs @app.route('/post/<path:path>/') # Adding /post/ prefix helps organize URLs
def post(path): def post(path):