yep sure, it's pretty much the same example from the docs
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.7, "do_sample": False},
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="facebook/opt-350m",
model_name="facebook/opt-350m",
device_map="auto",
stopping_ids=[50278, 50279, 50277, 1, 0],
tokenizer_kwargs={"max_length": 4096},
# uncomment this if using CUDA to reduce memory usage
# model_kwargs={"torch_dtype": torch.float16}
)
documents = SimpleDirectoryReader("./documents").load_data()
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm)
index = VectorStoreIndex.from_documents(
documents, service_context=service_context)