Here's a very basic example I keep around for testing
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex
from llama_index.core.chat_engine.types import BaseChatEngine
import asyncio
from fastapi import FastAPI, Request, Depends
from fastapi.responses import StreamingResponse
app = FastAPI()
import os
def get_chat_engine() -> BaseChatEngine:
vector_store_index = VectorStoreIndex.from_documents(documents=[])
chat_engine = vector_store_index.as_chat_engine(
chat_mode="context",
similarity_top_k=10,
system_prompt="You are a helpful assistant",
)
return chat_engine
@app.get("/")
async def root():
return {"message": "Hello World"}
@app.get("/stream_test")
async def stream_test(
request: Request,
chat_engine: BaseChatEngine = Depends(get_chat_engine),
):
response = await chat_engine.astream_chat("Tell me a poem about raining cats and dogs!")
# Generate a response.
async def event_generator():
async for token in response.async_response_gen():
if await request.is_disconnected():
break
yield token
return StreamingResponse(event_generator(), media_type="text/plain")
if __name__ == "__main__":
import uvicorn
uvicorn.run(
app,
loop="asyncio",
host="0.0.0.0",
port=8080,
)