Wow, that is a pretty bad page on the docs
To me, it seems like the docs should probably just have have something like this instead
# cache metadata dicts
def save_metadata_dicts(path, data):
with open(path, "w") as fp:
json.dump(data, fp)
def load_metadata_dicts(path):
with open(path, "r") as fp:
data = json.load(fp)
return data
node_to_metadata = {}
for extractor in extractors:
metadata_dicts = extractor.extract(base_nodes)
for node, metadata in zip(base_nodes, metadata_dicts):
if node.node_id not in node_to_metadata:
node_to_metadata[node.node_id] = metadata
else:
node_to_metadata[node.node_id].update(metadata)
save_metadata_dicts("data/llama2_metadata_dicts.json", node_to_metadata)
node_to_metadata = load_metadata_dicts("data/llama2_metadata_dicts.json")
all_nodes = copy.deepcopy(base_nodes)
for node_id, metadata in node_to_metadata.items():
for val in metadata.values():
inode = IndexNode(text=val, index_id=node_id)
all_nodes.append(inode)