Browse Source

Add a validator to the text field

pull/1573/head
Hironsan 3 years ago
parent
commit
e33713a99c
3 changed files with 21 additions and 5 deletions
  1. 3
      backend/api/tests/test_tasks.py
  2. 9
      backend/api/views/upload/data.py
  3. 14
      backend/api/views/upload/dataset.py

3
backend/api/tests/test_tasks.py

@ -27,6 +27,7 @@ class TestIngestClassificationData(TestIngestData):
task = DOCUMENT_CLASSIFICATION
def assert_examples(self, dataset):
self.assertEqual(Example.objects.count(), len(dataset))
for text, expected_labels in dataset:
example = Example.objects.get(text=text)
labels = set(cat.label.text for cat in example.categories.all())
@ -151,6 +152,7 @@ class TestIngestSequenceLabelingData(TestIngestData):
task = SEQUENCE_LABELING
def assert_examples(self, dataset):
self.assertEqual(Example.objects.count(), len(dataset))
for text, expected_labels in dataset:
example = Example.objects.get(text=text)
labels = [[span.start_offset, span.end_offset, span.label.text] for span in example.spans.all()]
@ -193,6 +195,7 @@ class TestIngestSeq2seqData(TestIngestData):
task = SEQ2SEQ
def assert_examples(self, dataset):
self.assertEqual(Example.objects.count(), len(dataset))
for text, expected_labels in dataset:
example = Example.objects.get(text=text)
labels = set(text_label.text for text_label in example.texts.all())

9
backend/api/views/upload/data.py

@ -1,7 +1,7 @@
import abc
from typing import Dict
from pydantic import BaseModel
from pydantic import BaseModel, validator
class BaseData(BaseModel, abc.ABC):
@ -19,6 +19,13 @@ class BaseData(BaseModel, abc.ABC):
class TextData(BaseData):
text: str
@validator('text')
def text_is_not_empty(cls, value: str):
if value:
return value
else:
raise ValueError('is not empty.')
class FileData(BaseData):
pass

14
backend/api/views/upload/dataset.py

@ -5,10 +5,10 @@ import os
from typing import Dict, Iterator, List, Optional, Type
import chardet
import pydantic.error_wrappers
import pyexcel
import pyexcel.exceptions
from chardet.universaldetector import UniversalDetector
from pydantic import ValidationError
from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens
from .data import BaseData
@ -67,7 +67,7 @@ class Dataset:
for filename in self.filenames:
try:
yield from self.load(filename)
except UnicodeDecodeError as err:
except (UnicodeDecodeError, ValidationError) as err:
message = str(err)
raise FileParseException(filename, line_num=-1, message=message)
@ -113,9 +113,15 @@ class Dataset:
label = [label] if isinstance(label, str) else label
try:
label = [self.label_class.parse(o) for o in label]
except (pydantic.error_wrappers.ValidationError, TypeError):
except (ValidationError, TypeError):
label = []
data = self.data_class.parse(text=text, filename=filename, meta=row)
try:
data = self.data_class.parse(text=text, filename=filename, meta=row)
except ValidationError:
message = 'The empty text is not allowed.'
raise FileParseException(filename, line_num, message)
record = Record(data=data, label=label)
return record

Loading…
Cancel
Save