from llama_index import LLMPredictor, ServiceContext from langchain.llms import LlamaCpp llm = LlamaCpp(model_path="./ggml-model-q4_0.bin") llm_predictor = LLMPredictor(llm=llm) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512, context_window=<??>)