diff --git a/backend/data_import/pipeline/parsers.py b/backend/data_import/pipeline/parsers.py index 151d3ca0..5fc35e5a 100644 --- a/backend/data_import/pipeline/parsers.py +++ b/backend/data_import/pipeline/parsers.py @@ -136,8 +136,8 @@ class TextFileParser(Parser): def parse(self, filename: str) -> Iterator[Dict[Any, Any]]: encoding = decide_encoding(filename, self.encoding) - with open(filename, encoding=encoding) as f: - yield {DEFAULT_TEXT_COLUMN: f.read()} + with open(filename, encoding=encoding, errors="replace") as f: + yield {DEFAULT_TEXT_COLUMN: f.read().replace("\x00", "\uFFFD")} class CSVParser(Parser):