service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor
)
gpt_pinecone_index = GPTVectorStoreIndex.from_documents(
documents, pinecone_index=pinecone_index, service_context=service_context
)
response_stream = query_engine.query("...")
print(type(response_stream))
<class 'llama_index.response.schema.Response'>
>>> from llama_index import ServiceContext, LLMPredictor >>> from langchain.chat_models import ChatOpenAI >>> llm_predictor = LLMPredictor(llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True)) >>> service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor) >>> index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context) >>> query_engine = index.as_query_engine(streaming=True) >>> response = query_engine.query("what did the author do growing up?") >>> type(response) <class 'llama_index.response.schema.StreamingResponse'>
pinecone.init(api_key="...", environment="...")
index_name = '...'
def construct_pinecone_index(directory_path):
if index_name not in pinecone.list_indexes():
pinecone.create_index(
index_name,
dimension=1536,
metric="euclidean",
pod_type="Starter"
)
pinecone_index = pinecone.Index(index_name)
documents = SimpleDirectoryReader(directory_path).load_data()
gpt_pinecone_index = GPTVectorStoreIndex.from_documents(
documents, pinecone_index=pinecone_index, service_context=service_context
)
absolute_path = os.path.dirname(__file__)
src_folder = os.path.join(absolute_path, "docs/")
dest_folder = os.path.join(absolute_path, "indexed_documents/")
files = os.listdir(src_folder)
for file in files:
if file != "do_not_delete.txt":
src_path = os.path.join(src_folder, file)
dest_path = os.path.join(dest_folder, file)
shutil.move(src_path, dest_path)
return gpt_pinecone_index
index = construct_pinecone_index("docs")
[retrieve] Total embedding token usage: 11 tokensINFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens
[get_response] Total LLM token usage: 0 tokensINFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens
[get_response] Total embedding token usage: 0 tokens
query_engine = index.as_query_engine(..., service_context=service_context)
as wellfrom llama_index import ( GPTVectorStoreIndex, GPTSimpleKeywordTableIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, StorageContext ) from llama_index.vector_stores import PineconeVectorStore from langchain.llms.openai import OpenAIChat api_key = "<api-key>" environment = "asia-southeast1-gcp-free" index_name = "quickstart" os.environ['PINECONE_API_KEY'] = api_key llm_predictor_chatgpt = LLMPredictor( llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo", streaming=True) ) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt) vector_store = PineconeVectorStore( index_name=index_name, environment=environment, ) storage_context = StorageContext.from_defaults(vector_store=vector_store) documents = SimpleDirectoryReader("./paul_graham").load_data() index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context) query_engine = index.as_query_engine(streaming=True) response = query_engine.query("What did the author do growing up?") print(type(response)) response.print_response_stream()
pinecone_index
object too, and that also worked fine... pinecone.init(api_key=api_key, environment=environment) pinecone_index = pinecone.Index(index_name) vector_store = PineconeVectorStore( pinecone_index=pinecone_index ) ...
stream_with_context
with the generator from llama index (i.e. response.response_gen
)