Browse Source

Add uuid field to the return value of reader

pull/1823/head
Hironsan 3 years ago
parent
commit
568843a013
3 changed files with 33 additions and 8 deletions
  1. 2
      backend/data_import/pipeline/data.py
  2. 3
      backend/data_import/pipeline/readers.py
  3. 36
      backend/data_import/tests/test_reader.py

2
backend/data_import/pipeline/data.py

@ -1,5 +1,4 @@
import abc
import uuid
from typing import Any, Dict
from pydantic import UUID4, BaseModel, validator
@ -15,7 +14,6 @@ class BaseData(BaseModel, abc.ABC):
meta: Dict[Any, Any] = {}
def __init__(self, **data):
data["uuid"] = uuid.uuid4()
super().__init__(**data)
@classmethod

3
backend/data_import/pipeline/readers.py

@ -1,6 +1,7 @@
import abc
import collections.abc
import dataclasses
import uuid
from typing import Any, Dict, Iterator, List
import pandas as pd
@ -13,6 +14,7 @@ DEFAULT_LABEL_COLUMN = "label"
LINE_NUM_COLUMN = "#line_num"
FILE_NAME_COLUMN = "filename"
UPLOAD_NAME_COLUMN = "upload_name"
UUID_COLUMN = "uuid"
class BaseReader(collections.abc.Iterable):
@ -80,6 +82,7 @@ class Reader(BaseReader):
try:
yield {
LINE_NUM_COLUMN: line_num,
UUID_COLUMN: uuid.uuid4(),
FILE_NAME_COLUMN: filename.generated_name,
UPLOAD_NAME_COLUMN: filename.upload_name,
**row,

36
backend/data_import/tests/test_reader.py

@ -1,10 +1,16 @@
import unittest
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch
import pandas as pd
from pandas.testing import assert_frame_equal
from data_import.pipeline.readers import FILE_NAME_COLUMN, LINE_NUM_COLUMN, Reader
from data_import.pipeline.readers import (
FILE_NAME_COLUMN,
LINE_NUM_COLUMN,
UPLOAD_NAME_COLUMN,
UUID_COLUMN,
Reader,
)
class TestReader(unittest.TestCase):
@ -12,18 +18,36 @@ class TestReader(unittest.TestCase):
self.parser = MagicMock()
self.parser.parse.return_value = [{"a": 1}, {"a": 2}]
filename = MagicMock()
filename.generated_name = "filename"
filename.upload_name = "upload_name"
self.filenames = MagicMock()
self.filenames.__iter__.return_value = [filename]
self.rows = [
{LINE_NUM_COLUMN: 1, FILE_NAME_COLUMN: filename, "a": 1},
{LINE_NUM_COLUMN: 2, FILE_NAME_COLUMN: filename, "a": 2},
{
LINE_NUM_COLUMN: 1,
UUID_COLUMN: "uuid",
FILE_NAME_COLUMN: filename.generated_name,
UPLOAD_NAME_COLUMN: filename.upload_name,
"a": 1,
},
{
LINE_NUM_COLUMN: 2,
UUID_COLUMN: "uuid",
FILE_NAME_COLUMN: filename.generated_name,
UPLOAD_NAME_COLUMN: filename.upload_name,
"a": 2,
},
]
def test_iter_method(self):
@patch("data_import.pipeline.readers.uuid.uuid4")
def test_iter_method(self, mock):
mock.return_value = "uuid"
reader = Reader(self.filenames, self.parser)
self.assertEqual(list(reader), self.rows)
def test_batch(self):
@patch("data_import.pipeline.readers.uuid.uuid4")
def test_batch(self, mock):
mock.return_value = "uuid"
reader = Reader(self.filenames, self.parser)
batch = next(reader.batch(2))
expected_df = pd.DataFrame(self.rows)

Loading…
Cancel
Save