python loader = PyMuPDFReader() docs0 = loader.load(file_path=Path("./data/company_rule.pdf")) doc_text = "\n\n".join([d.get_content() for d in docs0]) docs = [Document(text=doc_text)] llm = OpenAI(model="gpt-4-0125-preview") chunk_sizes = [128, 256, 512, 1024] nodes_list = [] vector_indices = [] for chunk_size in chunk_sizes: print(f"Chunk Size: {chunk_size}") splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_size // 2) nodes = splitter.get_nodes_from_documents(docs) for node in nodes: node.metadata["chunk_size"] = chunk_size node.excluded_embed_metadata_keys = ["chunk_size"] node.excluded_llm_metadata_keys = ["chunk_size"] nodes_list.append(nodes) vector_index = VectorStoreIndex(nodes) vector_indices.append(vector_index) retriever_dict = {} retriever_nodes = [] for chunk_size, vector_index in zip(chunk_sizes, vector_indices): node_id = f"chunk_{chunk_size}" node = IndexNode( text=( "rule context retrieves (chunk size") f"{chunk_size})" ), index_id=node_id, ) retriever_nodes.append(node) retriever_dict[node_id] = vector_index.as_retriever() summary_index = SummaryIndex(retriever_nodes) retriever = RecursiveRetriever( root_id="root", retriever_dict={"root": summary_index.as_retriever(), **retriever_dict}, ) nodes = await retriever.aretrieve( "About working hours" ) print(f"Number of nodes: {len(nodes)}") for node in nodes: print(node.node.metadata["chunk_size"]) print(node.node.get_text()) reranker = LLMRerank() print(reranker) query_engine = RetrieverQueryEngine(retriever, node_postprocessors=[reranker]) response = query_engine.query( "About working hours" ) display_response( response, show_source=True, source_length=500, show_source_metadata=True ) #finalanswer