Browse Source

Improve error handling for excel parser

pull/1544/head
Hironsan 3 years ago
parent
commit
c887e64b83
2 changed files with 13 additions and 2 deletions
  1. 6
      backend/api/tests/test_tasks.py
  2. 9
      backend/api/views/upload/dataset.py

6
backend/api/tests/test_tasks.py

@ -134,6 +134,12 @@ class TestIngestClassificationData(TestIngestData):
response = self.ingest_data(filename, file_format)
self.assert_parse_error(response)
def test_wrong_excel(self):
filename = 'text_classification/example.jsonl'
file_format = 'Excel'
response = self.ingest_data(filename, file_format)
self.assert_parse_error(response)
class TestIngestSequenceLabelingData(TestIngestData):
task = SEQUENCE_LABELING

9
backend/api/views/upload/dataset.py

@ -7,6 +7,7 @@ from typing import Dict, Iterator, List, Optional, Type
import chardet
import pydantic.error_wrappers
import pyexcel
import pyexcel.exceptions
from chardet.universaldetector import UniversalDetector
from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens
@ -199,8 +200,12 @@ class ExcelDataset(Dataset):
def load(self, filename: str) -> Iterator[Record]:
records = pyexcel.iget_records(file_name=filename)
for line_num, row in enumerate(records, start=1):
yield self.from_row(filename, row, line_num)
try:
for line_num, row in enumerate(records, start=1):
yield self.from_row(filename, row, line_num)
except pyexcel.exceptions.FileTypeNotSupported:
message = 'This file type is not supported.'
raise FileParseException(filename, line_num=-1, message=message)
class FastTextDataset(Dataset):

Loading…
Cancel
Save