Okay I see my doc is being chunked becuase of the metadata
# NOTE: Consider metadata info str that will be added
# to the chunk at query time. This reduces the effective
# chunk size that we can have
if metadata_str is not None:
# NOTE: extra 2 newline chars for formatting when prepending in query
num_extra_tokens = len(self.tokenizer(f"{metadata_str}\n\n")) + 1
effective_chunk_size = self._chunk_size - num_extra_tokens
if effective_chunk_size <= 0:
raise ValueError(
"Effective chunk size is non positive "
"after considering metadata"
)
else:
effective_chunk_size = self._chunk_size