Browse Source

Ensure data pagination is covered in tests

pull/223/head
Clemens Wolff 5 years ago
parent
commit
c495b141cc
2 changed files with 6 additions and 5 deletions
  1. 1
      app/server/tests/test_api.py
  2. 10
      app/server/utils.py

1
app/server/tests/test_api.py

@ -872,6 +872,7 @@ class TestFeatures(APITestCase):
self.assertFalse(response.json().get('cloud_upload'))
@override_settings(IMPORT_BATCH_SIZE=2)
class TestParser(APITestCase):
def parser_helper(self, filename, parser, include_label=True):

10
app/server/utils.py

@ -7,10 +7,10 @@ from collections import defaultdict
from random import Random
from django.db import transaction
from django.conf import settings
from rest_framework.renderers import JSONRenderer
from seqeval.metrics.sequence_labeling import get_entities
from app.settings import IMPORT_BATCH_SIZE
from .exceptions import FileParseException
from .models import Label
from .serializers import DocumentSerializer, LabelSerializer
@ -251,7 +251,7 @@ class CoNLLParser(FileParser):
words, tags = [], []
data = []
for i, line in enumerate(file, start=1):
if len(data) >= IMPORT_BATCH_SIZE:
if len(data) >= settings.IMPORT_BATCH_SIZE:
yield data
data = []
line = line.decode('utf-8')
@ -301,7 +301,7 @@ class PlainTextParser(FileParser):
def parse(self, file):
file = io.TextIOWrapper(file, encoding='utf-8')
while True:
batch = list(itertools.islice(file, IMPORT_BATCH_SIZE))
batch = list(itertools.islice(file, settings.IMPORT_BATCH_SIZE))
if not batch:
break
yield [{'text': line.strip()} for line in batch]
@ -327,7 +327,7 @@ class CSVParser(FileParser):
columns = next(reader)
data = []
for i, row in enumerate(reader, start=2):
if len(data) >= IMPORT_BATCH_SIZE:
if len(data) >= settings.IMPORT_BATCH_SIZE:
yield data
data = []
if len(row) == len(columns) and len(row) >= 2:
@ -347,7 +347,7 @@ class JSONParser(FileParser):
file = io.TextIOWrapper(file, encoding='utf-8')
data = []
for i, line in enumerate(file, start=1):
if len(data) >= IMPORT_BATCH_SIZE:
if len(data) >= settings.IMPORT_BATCH_SIZE:
yield data
data = []
try:

Loading…
Cancel
Save