llm = OpenAILike( model="TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ", api_base="http://127.0.0.1:8000/v1", api_key="EMPTY", api_type="fake", ) docs = SimpleDirectoryReader("./data").load_data() embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-m3", max_length=512, embed_batch_size=1) service_context = ServiceContext.from_defaults(chunk_size=256,chunk_overlap=0.2, llm=None, embed_model = embed_model) index = VectorStoreIndex.from_documents(docs, service_context = service_context, llm=llm) prompt_str = "[INST] rewrite the query: {query} [/INST]" prompt_tmpl = PromptTemplate(prompt_str) prompt_tmpl2 = PromptTemplate( "[INST] <<SYS>> You are an helpful assistan <</SYS>>\n" "---------------------\n" "{context_str}\n" "---------------------\n" "From the context answer the Query:\n" "Query: {query_str}\n" "Answer: [/INST]") retriever = index.as_retriever(similarity_top_k=3) rerank = SentenceTransformerRerank(model ='cross-encoder/ms-marco-MiniLM-L-12-v2', top_n = 3) p = QueryPipeline(verbose=True) p.add_modules({ "llm": llm, "prompt_tmpl": prompt_tmpl, "retriever": retriever, "reranker": rerank, "llm2": llm, "prompt_tmpl2" : prompt_tmpl2, }) p.add_link("prompt_tmpl", "llm") p.add_link("llm", "retriever") p.add_link("retriever", "reranker", dest_key="nodes") p.add_link("llm", "reranker", dest_key="query_str") p.add_link("prompt_tmpl2", "llm2") p.add_link("reranker", "llm2", dest_key="context_str") p.add_link("llm", "llm2", dest_key="query_str") response = p.run(query=" quelle sont les musée du 1er arrondissement") print(str(response))