diff --git a/backend/api/tests/test_tasks.py b/backend/api/tests/test_tasks.py index 851019a3..ca25e4d0 100644 --- a/backend/api/tests/test_tasks.py +++ b/backend/api/tests/test_tasks.py @@ -134,6 +134,12 @@ class TestIngestClassificationData(TestIngestData): response = self.ingest_data(filename, file_format) self.assert_parse_error(response) + def test_wrong_excel(self): + filename = 'text_classification/example.jsonl' + file_format = 'Excel' + response = self.ingest_data(filename, file_format) + self.assert_parse_error(response) + class TestIngestSequenceLabelingData(TestIngestData): task = SEQUENCE_LABELING diff --git a/backend/api/views/upload/dataset.py b/backend/api/views/upload/dataset.py index e5bb89af..0a4d7bb1 100644 --- a/backend/api/views/upload/dataset.py +++ b/backend/api/views/upload/dataset.py @@ -7,6 +7,7 @@ from typing import Dict, Iterator, List, Optional, Type import chardet import pydantic.error_wrappers import pyexcel +import pyexcel.exceptions from chardet.universaldetector import UniversalDetector from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens @@ -199,8 +200,12 @@ class ExcelDataset(Dataset): def load(self, filename: str) -> Iterator[Record]: records = pyexcel.iget_records(file_name=filename) - for line_num, row in enumerate(records, start=1): - yield self.from_row(filename, row, line_num) + try: + for line_num, row in enumerate(records, start=1): + yield self.from_row(filename, row, line_num) + except pyexcel.exceptions.FileTypeNotSupported: + message = 'This file type is not supported.' + raise FileParseException(filename, line_num=-1, message=message) class FastTextDataset(Dataset):