diff --git a/app.py b/app.py index ea7cb17..f29d605 100644 --- a/app.py +++ b/app.py @@ -1,9 +1,11 @@ -from flask import Flask, render_template +from flask import Flask, render_template, Response, stream_with_context, request from flask_flatpages import FlatPages +from werkzeug.middleware.proxy_fix import ProxyFix +import requests app = Flask(__name__) +app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_host=1) -# Consolidate Configs app.config.update( FLATPAGES_AUTO_RELOAD = True, FLATPAGES_EXTENSION = '.md', @@ -22,6 +24,20 @@ def index(): def about(): return render_template('about.html') +@app.route('/proxy-chat', methods=['POST']) +def proxy_chat(): + target_url = "http://192.168.0.37:5002/v1/chat/completions" + + # Forward the request to your local LLM + # We use stream=True here to get the chunks from the backend + req = requests.post(target_url, json=request.json, stream=True) + + def generate(): + # Yield each chunk as it arrives from the LLM + for chunk in req.iter_content(chunk_size=1024): + yield chunk + + return Response(stream_with_context(generate()), content_type=req.headers['content-type']) @app.route('/post//') # Adding /post/ prefix helps organize URLs def post(path): diff --git a/requirements.txt b/requirements.txt index 3a39c63..829e42e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ flask flask-flatpages -gunicorn \ No newline at end of file +gunicorn +requests \ No newline at end of file