Here's a dummy app if you wanted something to work off of (I made this to test something at one point)
from llama_index.llms.openai import OpenAI
from llama_index.core import SummaryIndex, Document
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
app = FastAPI()
@app.get("/")
async def root():
return {"message": "Hello World"}
@app.get("/test")
async def test():
index = SummaryIndex([Document.example()])
chat_engine = index.as_chat_engine(chat_mode="condense_plus_context")
response = await chat_engine.astream_chat("Tell me a fact about LLMs.")
async def gen():
async for r in response.async_response_gen():
yield str(r)
return StreamingResponse(gen())
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, loop="asyncio")