Find answers from the community

Updated last year

Palm

Has anyone successfully integrated llama_index into Vertex/Palm? I've found some examples but all of them seem to always want an OpenAI key - which I'm trying to do this with 100% Google.
L
n
A
11 comments
What does your current approach look like?

You'll need to set both an embed_model and llm_predictor to fully remove openai
I saw someone was creating a custom model with this code:

"`
def query_google_llm(chat, query):
response = chat.send_message(query)
print(response.text)
return response.text

chat = build_google_llm()

class PaLM(LLM):

model_name = "Bard"
total_tokens_used = 0
last_token_usage = 0

def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
print("prompt: ", prompt)
response = query_google_llm(chat, prompt)

print("response: ", response)
return response

@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"name_of_model": self.model_name}

@property
def _llm_type(self) -> str:
return "custom"
"'
(new to discord)
But not sure what he was doing with the "build_google_llm" function.
Also tried to do this

Plain Text
# create a custom PaLM model using Google Vertex AI
def create_palm_model(project_id: str, location: str, model_id: str) -> LLMPredictor:
    # Use the PredictionServiceClient to create a client for the PredictionService API.
    client_options = {"api_endpoint": f"{location}-aiplatform.googleapis.com"}
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)

    # Create the model name from the project ID, location, and model ID.
    model_name = f"projects/{project_id}/locations/{location}/models/{model_id}"

    # Create a new LLMPredictor instance and set the llm property to the LLM model
    # returned by the PredictionService API.
    palm = LLMPredictor(llm=None)
    palm.llm = LLM.from_dict(client.predict(endpoint=model_name)["llm"])
    return palm


# define our LLM
llm_predictor = create_palm_model(
    project_id="gcp-enterprise-data-chat",
    location="us-west1",
    model_id="chat-bison@001",
)

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor, context_window=context_window, num_output=num_output
)
Right -- you'll also need an embed model too, otherwise it defaults to text-ada-002 from openAI

Something like this maybe

Plain Text
from langchain.embeddings import VertexAIEmbeddings
from llama_index import LangchainEmbedding

embed_model = LangchainEmbedding(VertexAIEmbeddings())

service_context = ServiceContext.from_defaults(..., embed_model=embed_model)
That helps!

Here is what I have thus far:

Plain Text
import os
from typing import List, Optional, Mapping, Any

from google.cloud import aiplatform
from google.protobuf.struct_pb2 import Value
from google.protobuf import json_format

from llama_index import LLMPredictor, SimpleDirectoryReader, ListIndex, ServiceContext
from langchain.llms.base import LLM

from langchain.llms import VertexAI
from llama_index import LangchainEmbedding, ListIndex, SimpleDirectoryReader
from langchain.embeddings import VertexAIEmbeddings


class Chat:
    def __init__(self):
        self.messages = []

    def send_message(self, message):
        self.messages.append(message)
        return "Response to " + message


os.environ[
    "GOOGLE_APPLICATION_CREDENTIALS"
] = "../gcp-enterprise-data-chat-1c02e4fff19e.json"

project = "gcp-enterprise-data-chat"
location = "us-west1"

chat = Chat()

# set context window size
context_window = 4096
# set number of output tokens
num_output = 256


class PaLM(VertexAI):
    model_name = "text-bison@001"
    total_tokens_used = 0
    last_token_usage = 0

    def __init__(self):
        super().__init__()

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response = query_google_llm(chat, prompt)
        return response

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"name_of_model": self.model_name}

    @property
    def _llm_type(self) -> str:
        return "custom"


def query_google_llm(chat, query):
    response = chat.send_message(query)
    return response


# load in HF embedding model from langchain
embed_model = LangchainEmbedding(VertexAIEmbeddings())

# define our LLM
llm_predictor = LLMPredictor(llm=PaLM())

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    context_window=context_window,
    num_output=num_output,
    embed_model=embed_model,
)

# Load the your data
documents = SimpleDirectoryReader("../data/llama_index").load_data()
index = ListIndex.from_documents(documents, service_context=service_context)

# Query and print response
query_engine = index.as_query_engine()
response = query_engine.query("<query_text>")
print(response)
Need to figure out the context_window and num_output
context window is 8192 it looks like

num_output should be whatever the default output limit for text-bison is, looks like its 1024?
My approach
Plain Text
from llama_index.llms.palm import PaLM
from llama_index.embeddings import GoogleUnivSentEncoderEmbedding

service_context = ServiceContext.from_defaults(
    llm=PaLM(api_key=api_key, model_name='models/text-bison-001'),
    embed_model=GoogleUnivSentEncoderEmbedding(),
)

it is working
Add a reply
Sign up and join the conversation on Discord