From f092a291d65251bd25b522978cd21c27fa00a728 Mon Sep 17 00:00:00 2001 From: spodlesny Date: Thu, 26 Oct 2023 14:59:21 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20use=20unicode=20error=20symbol=20?= =?UTF-8?q?=EF=BF=BD=20instead=20of=20failing=20to=20import=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/data_import/pipeline/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/data_import/pipeline/parsers.py b/backend/data_import/pipeline/parsers.py index 151d3ca0..5fc35e5a 100644 --- a/backend/data_import/pipeline/parsers.py +++ b/backend/data_import/pipeline/parsers.py @@ -136,8 +136,8 @@ class TextFileParser(Parser): def parse(self, filename: str) -> Iterator[Dict[Any, Any]]: encoding = decide_encoding(filename, self.encoding) - with open(filename, encoding=encoding) as f: - yield {DEFAULT_TEXT_COLUMN: f.read()} + with open(filename, encoding=encoding, errors="replace") as f: + yield {DEFAULT_TEXT_COLUMN: f.read().replace("\x00", "\uFFFD")} class CSVParser(Parser):