from llama_index.llms import ( CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata, ) from llama_index.llms.base import llm_completion_callback class OurLLM(CustomLLM): context_window: int = 3900 num_output: int = 256 model_name: str = "custom" dummy_response: str = "My response" API_URL = "SET YOUR API URL HERE" @property def metadata(self) -> LLMMetadata: """Get LLM metadata.""" return LLMMetadata( context_window=self.context_window, num_output=self.num_output, model_name=self.model_name, ) @llm_completion_callback() def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: response = requests.post(CALL YOUR URL HERE) return CompletionResponse(text=ADD THE RESPONSE FROM YOUR MODEL HERE) @llm_completion_callback() def stream_complete( self, prompt: str, **kwargs: Any ) -> CompletionResponseGen: response = "" for token in self.dummy_response: response += token yield CompletionResponse(text=response, delta=token) # define our LLM llm = OurLLM() service_context = ServiceContext.from_defaults( llm=llm, embed_model="local:BAAI/bge-base-en-v1.5" )