for idx, node in enumerate(nodes): dataset_generator = DatasetGenerator( [node], question_gen_query=question_gen_query, service_context=gpt_4_context, metadata_mode="all", ) node_questions_0 = dataset_generator.generate_questions_from_nodes(num=5) print(f"[Node {idx}] Generated questions:\n {node_questions_0}") # for each question, get a response for question in tqdm(node_questions_0): index = DocumentSummaryIndex([node], service_context=gpt_40613_context) index2= DocumentSummaryIndex([node], service_context=gpt35_context)
# Write the current entry to the file fp.write(json.dumps(dpo_dataset_dict) + "\n") # Clear the dictionary for the next entry dpo_dataset_dict = { "prompt": [], "chosen": [], "rejected": [], }
fp.close()
using gpt3.5 has rejected prompt bc and use gpt0613 as accepted