i have Python llamacpp running in an

At a glance

i have Python llamacpp running in an container exposing the API, how can i connect to it with Llamaindex ?? when i import (from llama_index.llms import LlamaCPP) it wants to run llamaCPP on my local host but i want to connect to another host

13 comments

WWhiteFang_Jr

You can use Custom LLM class from LlamaIndex to interact with your hosted LLM via the API.

https://docs.llamaindex.ai/en/stable/module_guides/models/llms/usage_custom.html#example-using-a-custom-llm-model-advanced

hhansson0728

ohh that looks complicated 🙂

WWhiteFang_Jr

No no

hhansson0728

basically i want my model to run in a container with some more horse power than the rest of my stack

WWhiteFang_Jr

Plain Text

from llama_index.llms import (
    CustomLLM,
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.llms.base import llm_completion_callback


class OurLLM(CustomLLM):
    context_window: int = 3900
    num_output: int = 256
    model_name: str = "custom"
    dummy_response: str = "My response"
    API_URL = "SET YOUR API URL HERE"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        response  = requests.post(CALL YOUR URL HERE)
        return CompletionResponse(text=ADD THE RESPONSE FROM YOUR MODEL HERE)

    @llm_completion_callback()
    def stream_complete(
        self, prompt: str, **kwargs: Any
    ) -> CompletionResponseGen:
        response = ""
        for token in self.dummy_response:
            response += token
            yield CompletionResponse(text=response, delta=token)


# define our LLM
llm = OurLLM()

service_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local:BAAI/bge-base-en-v1.5"
)

hhansson0728

omg thanks i think i can work from this

hhansson0728

sorry i cat figure it out :/

my api lives on: http://localhost:5055/v1/completions

hhansson0728

the response looks like its from response.choises[0].text

hhansson0728

iam completly new to python so my syntax i probably wrong alos

hhansson0728

also

hhansson0728

i wish i could just clone Llamacpp from llms and put a diffrent URL on it 🙂

hhansson0728

chatgpt helped me, this is what it looks like for future refrence:

import requests
from llama_index.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
from llama_index.llms.base import llm_completion_callback
from typing import Any

class OurLLM(CustomLLM):
context_window: int = 3900
num_output: int = 256
model_name: str = "custom"
dummy_response: str = "My response"
API_URL = "http://llamacpp:5000/v1/completions"

@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata."""
return LLMMetadata(
context_window=self.context_window,
num_output=self.num_output,
model_name=self.model_name,
)

@llm_completion_callback()
def complete(self, prompt: str, kwargs: Any) -> CompletionResponse: # Define the API payload payload = { "prompt": prompt, "stop": ["\n", "###"] } # Make the POST request to the API response = requests.post(self.API_URL, json=payload) # Parse the API response choices = response.json().get("choices", []) if choices: completion_text = choices[0].get("text", "") return CompletionResponse(text=completion_text) else: # Handle the case where there are no choices return CompletionResponse(text="") @llm_completion_callback() def stream_complete( self, prompt: str, kwargs: Any
) -> CompletionResponseGen:
response = ""
for token in self.dummy_response:
response += token
yield CompletionResponse(text=response, delta=token)

WWhiteFang_Jr

Awesome!!

Add a reply

Find answers from the community

i have Python llamacpp running in an