Browse Source

Improve error handling for excel parser

pull/1544/head
Hironsan 3 years ago
parent
commit
c887e64b83
2 changed files with 13 additions and 2 deletions
  1. 6
      backend/api/tests/test_tasks.py
  2. 9
      backend/api/views/upload/dataset.py

6
backend/api/tests/test_tasks.py

@ -134,6 +134,12 @@ class TestIngestClassificationData(TestIngestData):
response = self.ingest_data(filename, file_format) response = self.ingest_data(filename, file_format)
self.assert_parse_error(response) self.assert_parse_error(response)
def test_wrong_excel(self):
filename = 'text_classification/example.jsonl'
file_format = 'Excel'
response = self.ingest_data(filename, file_format)
self.assert_parse_error(response)
class TestIngestSequenceLabelingData(TestIngestData): class TestIngestSequenceLabelingData(TestIngestData):
task = SEQUENCE_LABELING task = SEQUENCE_LABELING

9
backend/api/views/upload/dataset.py

@ -7,6 +7,7 @@ from typing import Dict, Iterator, List, Optional, Type
import chardet import chardet
import pydantic.error_wrappers import pydantic.error_wrappers
import pyexcel import pyexcel
import pyexcel.exceptions
from chardet.universaldetector import UniversalDetector from chardet.universaldetector import UniversalDetector
from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens
@ -199,8 +200,12 @@ class ExcelDataset(Dataset):
def load(self, filename: str) -> Iterator[Record]: def load(self, filename: str) -> Iterator[Record]:
records = pyexcel.iget_records(file_name=filename) records = pyexcel.iget_records(file_name=filename)
for line_num, row in enumerate(records, start=1):
yield self.from_row(filename, row, line_num)
try:
for line_num, row in enumerate(records, start=1):
yield self.from_row(filename, row, line_num)
except pyexcel.exceptions.FileTypeNotSupported:
message = 'This file type is not supported.'
raise FileParseException(filename, line_num=-1, message=message)
class FastTextDataset(Dataset): class FastTextDataset(Dataset):

Loading…
Cancel
Save