Here is my full code:
llm = LlamaOpenAI(temperature=0, model="gpt-4")
embed_model = OpenAIEmbedding()
text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=256)
prompt_helper = PromptHelper(
context_window=8192,
num_output=256,
chunk_overlap_ratio=0.1,
chunk_size_limit=None,
)
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model,
text_splitter=text_splitter,
prompt_helper=prompt_helper,
)
url = make_url(connection_string)
port = url.port or 5432
vector_store = PGVectorStore.from_params(
database=db_name,
host=url.host,
password=url.password,
port=port,
user=url.username,
table_name="llama_index",
embed_dim=1536, # openai embedding dimension
)
docstore = SimpleDocumentStore()
storage_context = StorageContext.from_defaults(
vector_store=vector_store, docstore=docstore
)
index = VectorStoreIndex.from_documents(
documents=[],
service_context=service_context,
storage_context=storage_context,
show_progress=True,
store_nodes_override=True,
)
storage_context.persist(persist_dir=datadir)
print(index.refresh_ref_docs(documents))