Browse Source

Refactor test_task.py

pull/1799/head
Hironsan 3 years ago
parent
commit
1e0f5fc128
3 changed files with 339 additions and 503 deletions
  1. 4
      backend/data_export/models.py
  2. 4
      backend/data_export/pipeline/formatters.py
  3. 834
      backend/data_export/tests/test_task.py

4
backend/data_export/models.py

@ -5,10 +5,12 @@ from django.db import models
from examples.models import Example
from labels.models import Category, Relation, Span, TextLabel
DATA = "data"
class ExportedExample(Example):
def to_dict(self) -> Dict[str, Any]:
return {"id": self.id, "data": self.text if self.project.is_text_project else self.upload_name, **self.meta}
return {"id": self.id, DATA: self.text if self.project.is_text_project else self.upload_name, **self.meta}
class Meta:
proxy = True

4
backend/data_export/pipeline/formatters.py

@ -5,6 +5,8 @@ import abc
import pandas as pd
from data_export.models import DATA
class Formatter(abc.ABC):
def __init__(self, target_column: str):
@ -43,7 +45,7 @@ class FastTextCategoryFormatter(Formatter):
"""Format the label column to `__label__LabelA __label__LabelB` format.
Also, drop the columns except for `data` and `self.target_column`.
"""
dataset = dataset[["data", self.target_column]]
dataset = dataset[[DATA, self.target_column]]
dataset[self.target_column] = dataset[self.target_column].apply(
lambda labels: sorted(f"__label__{label.to_string()}" for label in labels)
)

834
backend/data_export/tests/test_task.py
File diff suppressed because it is too large
View File

Loading…
Cancel
Save