Find answers from the community

s
F
Y
a
P
Updated last month

i have Python llamacpp running in an

i have Python llamacpp running in an container exposing the API, how can i connect to it with Llamaindex ?? when i import (from llama_index.llms import LlamaCPP) it wants to run llamaCPP on my local host but i want to connect to another host
W
h
13 comments
ohh that looks complicated πŸ™‚
basically i want my model to run in a container with some more horse power than the rest of my stack
Plain Text
from llama_index.llms import (
    CustomLLM,
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.llms.base import llm_completion_callback


class OurLLM(CustomLLM):
    context_window: int = 3900
    num_output: int = 256
    model_name: str = "custom"
    dummy_response: str = "My response"
    API_URL = "SET YOUR API URL HERE"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        response  = requests.post(CALL YOUR URL HERE)
        return CompletionResponse(text=ADD THE RESPONSE FROM YOUR MODEL HERE)

    @llm_completion_callback()
    def stream_complete(
        self, prompt: str, **kwargs: Any
    ) -> CompletionResponseGen:
        response = ""
        for token in self.dummy_response:
            response += token
            yield CompletionResponse(text=response, delta=token)


# define our LLM
llm = OurLLM()

service_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local:BAAI/bge-base-en-v1.5"
)
omg thanks i think i can work from this
sorry i cat figure it out :/

my api lives on: http://localhost:5055/v1/completions
the response looks like its from response.choises[0].text
iam completly new to python so my syntax i probably wrong alos
i wish i could just clone Llamacpp from llms and put a diffrent URL on it πŸ™‚
chatgpt helped me, this is what it looks like for future refrence:

import requests
from llama_index.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
from llama_index.llms.base import llm_completion_callback
from typing import Any


class OurLLM(CustomLLM):
context_window: int = 3900
num_output: int = 256
model_name: str = "custom"
dummy_response: str = "My response"
API_URL = "http://llamacpp:5000/v1/completions"

@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata."""
return LLMMetadata(
context_window=self.context_window,
num_output=self.num_output,
model_name=self.model_name,
)

@llm_completion_callback()
def complete(self, prompt: str, kwargs: Any) -> CompletionResponse: # Define the API payload payload = { "prompt": prompt, "stop": ["\n", "###"] } # Make the POST request to the API response = requests.post(self.API_URL, json=payload) # Parse the API response choices = response.json().get("choices", []) if choices: completion_text = choices[0].get("text", "") return CompletionResponse(text=completion_text) else: # Handle the case where there are no choices return CompletionResponse(text="") @llm_completion_callback() def stream_complete( self, prompt: str, kwargs: Any
) -> CompletionResponseGen:
response = ""
for token in self.dummy_response:
response += token
yield CompletionResponse(text=response, delta=token)
Add a reply
Sign up and join the conversation on Discord