should i use another encoder?
from llama_index.readers.file import FlatReader
from pathlib import Path
reader = FlatReader()
docs_2021 = reader.load_data(Path("my_file.pdf"))
File ~/.local/lib/python3.10/site-packages/llama_index/readers/file/flat/base.py:28, in FlatReader.load_data(self, file, extra_info)
26 """Parse file into string."""
27 with open(file, encoding="utf-8") as f:
---> 28 content = f.read()
29 metadata = {"filename": file.name, "extension": file.suffix}
30 if extra_info:
File /usr/lib/python3.10/codecs.py:322, in BufferedIncrementalDecoder.decode(self, input, final)
319 def decode(self, input, final=False):
320 # decode input (taking the buffer into account)
321 data = self.buffer + input
--> 322 (result, consumed) = self._buffer_decode(data, self.errors, final)
323 # keep undecoded input until the next call
324 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe2 in position 10: invalid continuation byte