Hello everyone,
I strugle with combining 2 data loaders into index. How to merge competitor_index with index to be able to query both at the same time? competitor_index uses bs4 data connector, index uses youtube data connector
from llama_index import (
LLMPredictor,
PromptHelper,
ServiceContext,
GPTSimpleVectorIndex,
download_loader
)
from langchain.chat_models import ChatOpenAI
import os
os.environ["OPENAI_API_KEY"] = 'xxx'
max_input_size = 4096
num_output = 512
max_chunk_overlap = 200
temperature = 0
# define prompt helper
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# define LLM
llm_predictor = LLMPredictor(
llm=ChatOpenAI(temperature=temperature, model_name="gpt-3.5-turbo", max_tokens=num_output))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
competitor_loader = BeautifulSoupWebReader()
competitor_documents = competitor_loader.load_data(
urls=['https://url1.com', 'https://url2.com', 'https://url3.com'])
competitor_index = GPTSimpleVectorIndex.from_documents(competitor_documents, service_context=service_context)
YoutubeTranscriptReader = download_loader("YoutubeTranscriptReader")
loader = YoutubeTranscriptReader()
documents = loader.load_data(ytlinks=['https://www.youtube.com/watch?v=xxx',
'https://www.youtube.com/watch?v=xxx',
'https://www.youtube.com/watch?v=xxx'])
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
combined_competitor_index_and_index = ???