Browse Source

Add a validator to the text field

pull/1573/head
Hironsan 3 years ago
parent
commit
e33713a99c
3 changed files with 21 additions and 5 deletions
  1. 3
      backend/api/tests/test_tasks.py
  2. 9
      backend/api/views/upload/data.py
  3. 14
      backend/api/views/upload/dataset.py

3
backend/api/tests/test_tasks.py

@ -27,6 +27,7 @@ class TestIngestClassificationData(TestIngestData):
task = DOCUMENT_CLASSIFICATION task = DOCUMENT_CLASSIFICATION
def assert_examples(self, dataset): def assert_examples(self, dataset):
self.assertEqual(Example.objects.count(), len(dataset))
for text, expected_labels in dataset: for text, expected_labels in dataset:
example = Example.objects.get(text=text) example = Example.objects.get(text=text)
labels = set(cat.label.text for cat in example.categories.all()) labels = set(cat.label.text for cat in example.categories.all())
@ -151,6 +152,7 @@ class TestIngestSequenceLabelingData(TestIngestData):
task = SEQUENCE_LABELING task = SEQUENCE_LABELING
def assert_examples(self, dataset): def assert_examples(self, dataset):
self.assertEqual(Example.objects.count(), len(dataset))
for text, expected_labels in dataset: for text, expected_labels in dataset:
example = Example.objects.get(text=text) example = Example.objects.get(text=text)
labels = [[span.start_offset, span.end_offset, span.label.text] for span in example.spans.all()] labels = [[span.start_offset, span.end_offset, span.label.text] for span in example.spans.all()]
@ -193,6 +195,7 @@ class TestIngestSeq2seqData(TestIngestData):
task = SEQ2SEQ task = SEQ2SEQ
def assert_examples(self, dataset): def assert_examples(self, dataset):
self.assertEqual(Example.objects.count(), len(dataset))
for text, expected_labels in dataset: for text, expected_labels in dataset:
example = Example.objects.get(text=text) example = Example.objects.get(text=text)
labels = set(text_label.text for text_label in example.texts.all()) labels = set(text_label.text for text_label in example.texts.all())

9
backend/api/views/upload/data.py

@ -1,7 +1,7 @@
import abc import abc
from typing import Dict from typing import Dict
from pydantic import BaseModel
from pydantic import BaseModel, validator
class BaseData(BaseModel, abc.ABC): class BaseData(BaseModel, abc.ABC):
@ -19,6 +19,13 @@ class BaseData(BaseModel, abc.ABC):
class TextData(BaseData): class TextData(BaseData):
text: str text: str
@validator('text')
def text_is_not_empty(cls, value: str):
if value:
return value
else:
raise ValueError('is not empty.')
class FileData(BaseData): class FileData(BaseData):
pass pass

14
backend/api/views/upload/dataset.py

@ -5,10 +5,10 @@ import os
from typing import Dict, Iterator, List, Optional, Type from typing import Dict, Iterator, List, Optional, Type
import chardet import chardet
import pydantic.error_wrappers
import pyexcel import pyexcel
import pyexcel.exceptions import pyexcel.exceptions
from chardet.universaldetector import UniversalDetector from chardet.universaldetector import UniversalDetector
from pydantic import ValidationError
from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens
from .data import BaseData from .data import BaseData
@ -67,7 +67,7 @@ class Dataset:
for filename in self.filenames: for filename in self.filenames:
try: try:
yield from self.load(filename) yield from self.load(filename)
except UnicodeDecodeError as err:
except (UnicodeDecodeError, ValidationError) as err:
message = str(err) message = str(err)
raise FileParseException(filename, line_num=-1, message=message) raise FileParseException(filename, line_num=-1, message=message)
@ -113,9 +113,15 @@ class Dataset:
label = [label] if isinstance(label, str) else label label = [label] if isinstance(label, str) else label
try: try:
label = [self.label_class.parse(o) for o in label] label = [self.label_class.parse(o) for o in label]
except (pydantic.error_wrappers.ValidationError, TypeError):
except (ValidationError, TypeError):
label = [] label = []
data = self.data_class.parse(text=text, filename=filename, meta=row)
try:
data = self.data_class.parse(text=text, filename=filename, meta=row)
except ValidationError:
message = 'The empty text is not allowed.'
raise FileParseException(filename, line_num, message)
record = Record(data=data, label=label) record = Record(data=data, label=label)
return record return record

Loading…
Cancel
Save