Browse Source

Update frontend to handle relation dataset import

pull/1823/head
Hironsan 2 years ago
parent
commit
370c123ff4
11 changed files with 281 additions and 98 deletions
  1. 4
      backend/data_import/celery_tasks.py
  2. 17
      backend/data_import/datasets.py
  3. 263
      backend/data_import/pipeline/catalog.py
  4. 53
      backend/data_import/tests/test_tasks.py
  5. 9
      backend/data_import/views.py
  6. 6
      frontend/domain/models/upload/catalog.ts
  7. 2
      frontend/domain/models/upload/parseRepository.ts
  8. 14
      frontend/pages/projects/_id/dataset/import.vue
  9. 3
      frontend/repositories/upload/apiParseRepository.ts
  10. 4
      frontend/services/application/upload/catalogData.ts
  11. 4
      frontend/services/application/upload/parseApplicationService.ts

4
backend/data_import/celery_tasks.py

@ -53,7 +53,7 @@ def check_uploaded_files(upload_ids: List[str], file_format: str):
@shared_task
def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str], **kwargs):
def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str], task: str, **kwargs):
project = get_object_or_404(Project, pk=project_id)
user = get_object_or_404(get_user_model(), pk=user_id)
@ -64,7 +64,7 @@ def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str],
for tu in temporary_uploads
]
dataset = load_dataset(file_format, filenames, project, **kwargs)
dataset = load_dataset(task, file_format, filenames, project, **kwargs)
dataset.save(user, batch_size=settings.IMPORT_BATCH_SIZE)
upload_to_store(temporary_uploads)
errors.extend(dataset.errors)

17
backend/data_import/datasets.py

@ -4,6 +4,7 @@ from typing import List, Type
from django.contrib.auth.models import User
from .models import DummyLabelType
from .pipeline.catalog import RELATION_EXTRACTION
from .pipeline.data import BaseData, BinaryData, TextData
from .pipeline.exceptions import FileParseException
from .pipeline.factories import create_parser
@ -194,23 +195,23 @@ class CategoryAndSpanDataset(Dataset):
return self.reader.errors + self.example_maker.errors + self.category_maker.errors + self.span_maker.errors
def select_dataset(project: Project) -> Type[Dataset]:
use_relation = getattr(project, "use_relation", False)
def select_dataset(task: str, project: Project) -> Type[Dataset]:
mapping = {
DOCUMENT_CLASSIFICATION: TextClassificationDataset,
SEQUENCE_LABELING: RelationExtractionDataset if use_relation else SequenceLabelingDataset,
SEQUENCE_LABELING: SequenceLabelingDataset,
RELATION_EXTRACTION: RelationExtractionDataset,
SEQ2SEQ: Seq2seqDataset,
INTENT_DETECTION_AND_SLOT_FILLING: CategoryAndSpanDataset,
IMAGE_CLASSIFICATION: BinaryDataset,
SPEECH2TEXT: BinaryDataset,
}
if project.project_type not in mapping:
ValueError(f"Invalid project type: {project.project_type}")
return mapping[project.project_type]
if task not in mapping:
task = project.project_type
return mapping[task]
def load_dataset(file_format: str, data_files: List[FileName], project: Project, **kwargs) -> Dataset:
def load_dataset(task: str, file_format: str, data_files: List[FileName], project: Project, **kwargs) -> Dataset:
parser = create_parser(file_format, **kwargs)
reader = Reader(data_files, parser)
dataset_class = select_dataset(project)
dataset_class = select_dataset(task, project)
return dataset_class(reader, project, **kwargs)

263
backend/data_import/pipeline/catalog.py

@ -1,4 +1,5 @@
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Type
@ -14,7 +15,20 @@ from projects.models import (
SPEECH2TEXT,
)
# Define the example directories
EXAMPLE_DIR = Path(__file__).parent.resolve() / "examples"
TASK_AGNOSTIC_DIR = EXAMPLE_DIR / "task_agnostic"
TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification"
SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling"
RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction"
SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence"
INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection"
IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification"
SPEECH_TO_TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
# Define the task identifiers
RELATION_EXTRACTION = "RelationExtraction"
encodings = Literal[
"Auto",
"ascii",
@ -177,85 +191,236 @@ class AudioFile(Format):
accept_types = "audio/ogg, audio/aac, audio/mpeg, audio/wav"
class OptionColumn(BaseModel):
class ArgColumn(BaseModel):
encoding: encodings = "utf_8"
column_data: str = "text"
column_label: str = "label"
class OptionDelimiter(OptionColumn):
class ArgDelimiter(ArgColumn):
encoding: encodings = "utf_8"
delimiter: Literal[",", "\t", ";", "|", " "] = ","
class OptionEncoding(BaseModel):
class ArgEncoding(BaseModel):
encoding: encodings = "utf_8"
class OptionCoNLL(BaseModel):
class ArgCoNLL(BaseModel):
encoding: encodings = "utf_8"
scheme: Literal["IOB2", "IOE2", "IOBES", "BILOU"] = "IOB2"
delimiter: Literal[" ", ""] = " "
class OptionNone(BaseModel):
class ArgNone(BaseModel):
pass
@dataclass
class Option:
display_name: str
task_id: str
file_format: Type[Format]
arg: Type[BaseModel]
file: Path
@property
def example(self) -> str:
with open(self.file, "r", encoding="utf-8") as f:
return f.read()
def dict(self) -> Dict:
return {
**self.file_format.dict(),
**self.arg.schema(),
"example": self.example,
"task_id": self.task_id,
"display_name": self.display_name,
}
class Options:
options: Dict[str, List] = defaultdict(list)
@classmethod
def filter_by_task(cls, task_name: str):
def filter_by_task(cls, task_name: str, use_relation: bool = False):
options = cls.options[task_name]
return [
{**file_format.dict(), **option.schema(), "example": example} for file_format, option, example in options
]
if use_relation:
options = cls.options[task_name] + cls.options[RELATION_EXTRACTION]
return [option.dict() for option in options]
@classmethod
def register(cls, task: str, file_format: Type[Format], option: Type[BaseModel], file: Path):
example = cls.load_example(file)
cls.options[task].append((file_format, option, example))
@staticmethod
def load_example(file) -> str:
with open(file, encoding="utf-8") as f:
return f.read()
def register(cls, option: Option):
cls.options[option.task_id].append(option)
# Text tasks
text_tasks = [DOCUMENT_CLASSIFICATION, SEQUENCE_LABELING, SEQ2SEQ, INTENT_DETECTION_AND_SLOT_FILLING]
for task_id in text_tasks:
Options.register(
Option(
display_name=TextFile.name,
task_id=task_id,
file_format=TextFile,
arg=ArgEncoding,
file=TASK_AGNOSTIC_DIR / "text_files.txt",
)
)
Options.register(
Option(
display_name=TextLine.name,
task_id=task_id,
file_format=TextLine,
arg=ArgEncoding,
file=TASK_AGNOSTIC_DIR / "text_lines.txt",
)
)
# Text Classification
Options.register(
Option(
display_name=CSV.name,
task_id=DOCUMENT_CLASSIFICATION,
file_format=CSV,
arg=ArgDelimiter,
file=TEXT_CLASSIFICATION_DIR / "example.csv",
)
)
Options.register(
Option(
display_name=FastText.name,
task_id=DOCUMENT_CLASSIFICATION,
file_format=FastText,
arg=ArgEncoding,
file=TEXT_CLASSIFICATION_DIR / "example.txt",
)
)
Options.register(
Option(
display_name=JSON.name,
task_id=DOCUMENT_CLASSIFICATION,
file_format=JSON,
arg=ArgColumn,
file=TEXT_CLASSIFICATION_DIR / "example.json",
)
)
Options.register(
Option(
display_name=JSONL.name,
task_id=DOCUMENT_CLASSIFICATION,
file_format=JSONL,
arg=ArgColumn,
file=TEXT_CLASSIFICATION_DIR / "example.jsonl",
)
)
Options.register(
Option(
display_name=Excel.name,
task_id=DOCUMENT_CLASSIFICATION,
file_format=Excel,
arg=ArgColumn,
file=TEXT_CLASSIFICATION_DIR / "example.csv",
)
)
TASK_AGNOSTIC_DIR = EXAMPLE_DIR / "task_agnostic"
TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification"
Options.register(DOCUMENT_CLASSIFICATION, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
Options.register(DOCUMENT_CLASSIFICATION, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, TEXT_CLASSIFICATION_DIR / "example.csv")
Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionEncoding, TEXT_CLASSIFICATION_DIR / "example.txt")
Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.json")
Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.jsonl")
Options.register(DOCUMENT_CLASSIFICATION, Excel, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.csv")
# Sequence Labelling
Options.register(
Option(
display_name=JSONL.name,
task_id=SEQUENCE_LABELING,
file_format=JSONL,
arg=ArgColumn,
file=SEQUENCE_LABELING_DIR / "example.jsonl",
)
)
Options.register(
Option(
display_name=CoNLL.name,
task_id=SEQUENCE_LABELING,
file_format=CoNLL,
arg=ArgCoNLL,
file=SEQUENCE_LABELING_DIR / "example.txt",
)
)
SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling"
RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction"
Options.register(SEQUENCE_LABELING, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
Options.register(SEQUENCE_LABELING, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
Options.register(SEQUENCE_LABELING, JSONL, OptionColumn, SEQUENCE_LABELING_DIR / "example.jsonl")
Options.register(SEQUENCE_LABELING, CoNLL, OptionCoNLL, SEQUENCE_LABELING_DIR / "example.txt")
Options.register(SEQUENCE_LABELING, JSONL, OptionNone, RELATION_EXTRACTION_DIR / "example.jsonl")
# Relation Extraction
Options.register(
Option(
display_name="JSONL(Relation)",
task_id=RELATION_EXTRACTION,
file_format=JSONL,
arg=ArgNone,
file=RELATION_EXTRACTION_DIR / "example.jsonl",
)
)
SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence"
Options.register(SEQ2SEQ, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
Options.register(SEQ2SEQ, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
Options.register(SEQ2SEQ, CSV, OptionDelimiter, SEQ2SEQ_DIR / "example.csv")
Options.register(SEQ2SEQ, JSON, OptionColumn, SEQ2SEQ_DIR / "example.json")
Options.register(SEQ2SEQ, JSONL, OptionColumn, SEQ2SEQ_DIR / "example.jsonl")
Options.register(SEQ2SEQ, Excel, OptionColumn, SEQ2SEQ_DIR / "example.csv")
# Seq2seq
Options.register(
Option(
display_name=CSV.name,
task_id=SEQ2SEQ,
file_format=CSV,
arg=ArgDelimiter,
file=SEQ2SEQ_DIR / "example.csv",
)
)
Options.register(
Option(
display_name=JSON.name,
task_id=SEQ2SEQ,
file_format=JSON,
arg=ArgColumn,
file=SEQ2SEQ_DIR / "example.json",
)
)
Options.register(
Option(
display_name=JSONL.name,
task_id=SEQ2SEQ,
file_format=JSONL,
arg=ArgColumn,
file=SEQ2SEQ_DIR / "example.jsonl",
)
)
Options.register(
Option(
display_name=Excel.name,
task_id=SEQ2SEQ,
file_format=Excel,
arg=ArgColumn,
file=SEQ2SEQ_DIR / "example.csv",
)
)
INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection"
Options.register(INTENT_DETECTION_AND_SLOT_FILLING, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
Options.register(INTENT_DETECTION_AND_SLOT_FILLING, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
Options.register(INTENT_DETECTION_AND_SLOT_FILLING, JSONL, OptionNone, INTENT_DETECTION_DIR / "example.jsonl")
# Intent detection
Options.register(
Option(
display_name=JSONL.name,
task_id=INTENT_DETECTION_AND_SLOT_FILLING,
file_format=JSONL,
arg=ArgNone,
file=INTENT_DETECTION_DIR / "example.jsonl",
)
)
IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification"
Options.register(IMAGE_CLASSIFICATION, ImageFile, OptionNone, IMAGE_CLASSIFICATION_DIR / "image_files.txt")
# Image Classification
Options.register(
Option(
display_name=ImageFile.name,
task_id=IMAGE_CLASSIFICATION,
file_format=ImageFile,
arg=ArgNone,
file=IMAGE_CLASSIFICATION_DIR / "image_files.txt",
)
)
SPEECH_TO_TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
Options.register(SPEECH2TEXT, AudioFile, OptionNone, SPEECH_TO_TEXT_DIR / "audio_files.txt")
# Speech to Text
Options.register(
Option(
display_name=AudioFile.name,
task_id=SPEECH2TEXT,
file_format=AudioFile,
arg=ArgNone,
file=SPEECH_TO_TEXT_DIR / "audio_files.txt",
)
)

53
backend/data_import/tests/test_tasks.py

@ -8,6 +8,7 @@ from django_drf_filepond.models import StoredUpload, TemporaryUpload
from django_drf_filepond.utils import _get_file_id
from data_import.celery_tasks import import_dataset
from data_import.pipeline.catalog import RELATION_EXTRACTION
from examples.models import Example
from label_types.models import SpanType
from labels.models import Category, Span
@ -40,7 +41,7 @@ class TestImportData(TestCase):
except StoredUpload.DoesNotExist:
pass
def import_dataset(self, filename, file_format, kwargs=None):
def import_dataset(self, filename, file_format, task, kwargs=None):
file_path = str(self.data_path / filename)
TemporaryUpload.objects.create(
upload_id=self.upload_id,
@ -51,7 +52,7 @@ class TestImportData(TestCase):
)
upload_ids = [self.upload_id]
kwargs = kwargs or {}
return import_dataset(self.user.id, self.project.item.id, file_format, upload_ids, **kwargs)
return import_dataset(self.user.id, self.project.item.id, file_format, upload_ids, task, **kwargs)
@override_settings(MAX_UPLOAD_SIZE=0)
@ -62,7 +63,7 @@ class TestMaxFileSize(TestImportData):
filename = "text_classification/example.jsonl"
file_format = "JSONL"
kwargs = {"column_label": "labels"}
response = self.import_dataset(filename, file_format, kwargs)
response = self.import_dataset(filename, file_format, self.task, kwargs)
self.assertEqual(len(response["error"]), 1)
self.assertIn("maximum file size", response["error"][0]["message"])
@ -89,80 +90,80 @@ class TestImportClassificationData(TestImportData):
file_format = "JSONL"
kwargs = {"column_label": "labels"}
dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])]
self.import_dataset(filename, file_format, kwargs)
self.import_dataset(filename, file_format, self.task, kwargs)
self.assert_examples(dataset)
def test_csv(self):
filename = "text_classification/example.csv"
file_format = "CSV"
dataset = [("exampleA", ["positive"]), ("exampleB", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_csv_out_of_order_columns(self):
filename = "text_classification/example_out_of_order_columns.csv"
file_format = "CSV"
dataset = [("exampleA", ["positive"]), ("exampleB", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_fasttext(self):
filename = "text_classification/example_fasttext.txt"
file_format = "fastText"
dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_excel(self):
filename = "text_classification/example.xlsx"
file_format = "Excel"
dataset = [("exampleA", ["positive"]), ("exampleB", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_json(self):
filename = "text_classification/example.json"
file_format = "JSON"
dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_textfile(self):
filename = "example.txt"
file_format = "TextFile"
dataset = [("exampleA\nexampleB\n\nexampleC\n", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_textline(self):
filename = "example.txt"
file_format = "TextLine"
dataset = [("exampleA", []), ("exampleB", []), ("exampleC", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_wrong_jsonl(self):
filename = "text_classification/example.json"
file_format = "JSONL"
response = self.import_dataset(filename, file_format)
response = self.import_dataset(filename, file_format, self.task)
self.assert_parse_error(response)
def test_wrong_json(self):
filename = "text_classification/example.jsonl"
file_format = "JSON"
response = self.import_dataset(filename, file_format)
response = self.import_dataset(filename, file_format, self.task)
self.assert_parse_error(response)
def test_wrong_excel(self):
filename = "text_classification/example.jsonl"
file_format = "Excel"
response = self.import_dataset(filename, file_format)
response = self.import_dataset(filename, file_format, self.task)
self.assert_parse_error(response)
def test_wrong_csv(self):
filename = "text_classification/example.jsonl"
file_format = "CSV"
response = self.import_dataset(filename, file_format)
response = self.import_dataset(filename, file_format, self.task)
self.assert_parse_error(response)
@ -186,26 +187,26 @@ class TestImportSequenceLabelingData(TestImportData):
filename = "sequence_labeling/example.jsonl"
file_format = "JSONL"
dataset = [("exampleA", [[0, 1, "LOC"]]), ("exampleB", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_conll(self):
filename = "sequence_labeling/example.conll"
file_format = "CoNLL"
dataset = [("JAPAN GET", [[0, 5, "LOC"]]), ("Nadim Ladki", [[0, 11, "PER"]])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_wrong_conll(self):
filename = "sequence_labeling/example.jsonl"
file_format = "CoNLL"
response = self.import_dataset(filename, file_format)
response = self.import_dataset(filename, file_format, self.task)
self.assert_parse_error(response)
def test_jsonl_with_overlapping(self):
filename = "sequence_labeling/example_overlapping.jsonl"
file_format = "JSONL"
response = self.import_dataset(filename, file_format)
response = self.import_dataset(filename, file_format, self.task)
self.assertEqual(len(response["error"]), 0)
@ -241,7 +242,7 @@ class TestImportRelationExtractionData(TestImportData):
[[0, 6, "ORG"], [22, 39, "DATE"], [44, 54, "PERSON"], [59, 70, "PERSON"]],
),
]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, RELATION_EXTRACTION)
self.assert_examples(dataset)
@ -259,21 +260,21 @@ class TestImportSeq2seqData(TestImportData):
filename = "seq2seq/example.jsonl"
file_format = "JSONL"
dataset = [("exampleA", ["label1"]), ("exampleB", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_json(self):
filename = "seq2seq/example.json"
file_format = "JSON"
dataset = [("exampleA", ["label1"]), ("exampleB", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
def test_csv(self):
filename = "seq2seq/example.csv"
file_format = "CSV"
dataset = [("exampleA", ["label1"]), ("exampleB", [])]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
@ -298,7 +299,7 @@ class TestImportIntentDetectionAndSlotFillingData(TestImportData):
("exampleC", {"cats": [], "entities": [(0, 1, "LOC")]}),
("exampleD", {"cats": [], "entities": []}),
]
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assert_examples(dataset)
@ -308,7 +309,7 @@ class TestImportImageClassificationData(TestImportData):
def test_example(self):
filename = "images/1500x500.jpeg"
file_format = "ImageFile"
self.import_dataset(filename, file_format)
self.import_dataset(filename, file_format, self.task)
self.assertEqual(Example.objects.count(), 1)
@ -319,6 +320,6 @@ class TestFileTypeChecking(TestImportData):
def test_example(self):
filename = "images/example.ico"
file_format = "ImageFile"
response = self.import_dataset(filename, file_format)
response = self.import_dataset(filename, file_format, self.task)
self.assertEqual(len(response["error"]), 1)
self.assertIn("unexpected", response["error"][0]["message"])

9
backend/data_import/views.py

@ -16,7 +16,8 @@ class DatasetCatalog(APIView):
def get(self, request, *args, **kwargs):
project_id = kwargs["project_id"]
project = get_object_or_404(Project, pk=project_id)
options = Options.filter_by_task(project.project_type)
use_relation = getattr(project, "use_relation", False)
options = Options.filter_by_task(project.project_type, use_relation)
return Response(data=options, status=status.HTTP_200_OK)
@ -26,11 +27,13 @@ class DatasetImportAPI(APIView):
def post(self, request, *args, **kwargs):
upload_ids = request.data.pop("uploadIds")
file_format = request.data.pop("format")
task = import_dataset.delay(
task = request.data.pop("task")
celery_task = import_dataset.delay(
user_id=request.user.id,
project_id=self.kwargs["project_id"],
file_format=file_format,
upload_ids=upload_ids,
task=task,
**request.data,
)
return Response({"task_id": task.task_id})
return Response({"task_id": celery_task.task_id})

6
frontend/domain/models/upload/catalog.ts

@ -5,6 +5,12 @@ export class Catalog {
example: string;
properties: object;
@Expose({ name: 'task_id' })
taskId: string;
@Expose({ name: 'display_name' })
displayName: string;
@Expose({ name: 'accept_types' })
acceptTypes: string;
}

2
frontend/domain/models/upload/parseRepository.ts

@ -1,5 +1,5 @@
export interface ParseRepository {
analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise<string>
analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise<string>
revert(serverId: string): void
}

14
frontend/pages/projects/_id/dataset/import.vue

@ -13,7 +13,7 @@
<v-select
v-model="selected"
:items="catalog"
item-text="name"
item-text="displayName"
label="File format"
outlined
/>
@ -156,7 +156,7 @@ export default {
return this.uploadedFiles.length === 0 || this.taskId !== null || !this.valid
},
properties() {
const item = this.catalog.find(item => item.name === this.selected)
const item = this.catalog.find(item => item.displayName === this.selected)
if (item) {
return item.properties
} else {
@ -174,7 +174,7 @@ export default {
return Object.fromEntries(textFields)
},
acceptedFileTypes() {
const item = this.catalog.find(item => item.name === this.selected)
const item = this.catalog.find(item => item.displayName === this.selected)
if (item) {
return item.acceptTypes
} else {
@ -182,7 +182,7 @@ export default {
}
},
example() {
const item = this.catalog.find(item => item.name === this.selected)
const item = this.catalog.find(item => item.displayName === this.selected)
if (item) {
const column_data = 'column_data'
const column_label = 'column_label'
@ -201,7 +201,7 @@ export default {
watch: {
selected() {
const item = this.catalog.find(item => item.name === this.selected)
const item = this.catalog.find(item => item.displayName === this.selected)
for (const [key, value] of Object.entries(item.properties)) {
this.option[key] = value.default
}
@ -239,9 +239,11 @@ export default {
},
async importDataset() {
this.isImporting = true
const item = this.catalog.find(item => item.displayName === this.selected)
this.taskId = await this.$services.parse.analyze(
this.$route.params.id,
this.selected,
item.name,
item.taskId,
this.uploadedFiles.map(item => item.serverId),
this.option
)

3
frontend/repositories/upload/apiParseRepository.ts

@ -6,10 +6,11 @@ export class APIParseRepository implements ParseRepository {
private readonly request = ApiService
) {}
async analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise<string> {
async analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise<string> {
const url = `/projects/${projectId}/upload`
const data = {
format,
task,
uploadIds,
...option
}

4
frontend/services/application/upload/catalogData.ts

@ -6,11 +6,15 @@ export class CatalogDTO {
example: string
acceptTypes: string
properties: object
taskId: string
displayName: string
constructor(item: Catalog) {
this.name = item.name
this.example = item.example
this.acceptTypes = item.acceptTypes
this.properties = item.properties
this.displayName = item.displayName
this.taskId = item.taskId
}
}

4
frontend/services/application/upload/parseApplicationService.ts

@ -5,8 +5,8 @@ export class ParseApplicationService {
private readonly repository: ParseRepository
) {}
public async analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise<string> {
const item = await this.repository.analyze(projectId, format, uploadIds, option)
public async analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise<string> {
const item = await this.repository.analyze(projectId, format, task, uploadIds, option)
return item
}

Loading…
Cancel
Save