This is the full code
import os
from getpass import getpass
from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from dotenv import load_dotenv, find_dotenv
import mlflow
_ = load_dotenv(find_dotenv())
# os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")
embed_model = OllamaEmbedding(model_name=os.environ['OLLAMA_EMBED_MODEL'])
llm = Ollama(model=os.environ['OLLAMA_LLM_MODEL'], system_prompt="Talk like a Personal AI assistant.")
# embed_model = OpenAIEmbedding(model=os.environ['OPENAI_EMBED_MODEL'])
# llm = OpenAI(model=os.environ['OPENAI_LLM_MODEL'])
Settings.embed_model = embed_model
Settings.llm = llm
# load the local data directory and chunk the data for further processing
print("------------- Example Document used to Enrich LLM Context -------------")
docs = SimpleDirectoryReader(input_dir="data", required_exts=[".pdf"]).load_data(show_progress=True)
text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=100)
Settings.transformations = [text_parser]
print("------------- vector store indexing -------------")
index = VectorStoreIndex.from_documents(documents=docs, transformations=Settings.transformations)
print("------------- mlflow tracking start -------------")
mlflow.llama_index.autolog() # This is for enabling tracing
mlflow.set_tracking_uri("http://127.0.0.1:3000")
mlflow.set_experiment("experiment-9")
with mlflow.start_run() as run:
mlflow.llama_index.log_model(
index,
artifact_path="llama_index",
engine_type="query", # Defines the pyfunc and spark_udf inference type
input_example="What are Context LLMs?", # Infers signature
registered_model_name="my_llama_index_vector_store", # Stores an instance in the model registry
)
run_id = run.info.run_id
model_uri = f"runs:/{run_id}/llama_index"
print(f"Unique identifier for the model location for loading: {model_uri}")
user_query: str = "What all datasets are mentioned in paper that are used for experimentation purpose?"
print("\n------------- Inference via Llama Index -------------")
index = mlflow.llama_index.load_model(model_uri)
query_response = index.as_query_engine().query(user_query)
print(query_response)