Browse Source

Enable import of CSVs with arbitrary column order

pull/535/head
Clemens Wolff 4 years ago
parent
commit
3492874d5e
3 changed files with 22 additions and 2 deletions
  1. 4
      app/api/tests/data/example_out_of_order_columns.csv
  2. 15
      app/api/tests/test_api.py
  3. 5
      app/api/utils.py

4
app/api/tests/data/example_out_of_order_columns.csv

@ -0,0 +1,4 @@
label,foo,text,bar,baz
Positive,foo1,AAA,barA,baz
Positive,foo2,BBB,barB,bazz
Negative,foo3,CCC,barC,bazzz

15
app/api/tests/test_api.py

@ -1054,6 +1054,21 @@ class TestUploader(APITestCase):
file_format='csv',
expected_status=status.HTTP_201_CREATED)
def test_can_upload_classification_csv_with_out_of_order_columns(self):
self.upload_test_helper(project_id=self.classification_project.id,
filename='example_out_of_order_columns.csv',
file_format='csv',
expected_status=status.HTTP_201_CREATED)
self.label_test_helper(
project_id=self.classification_project.id,
expected_labels=[
{'text': 'Positive'},
{'text': 'Negative'},
],
expected_label_keys=[],
)
def test_can_upload_csv_with_non_utf8_encoding(self):
self.upload_test_helper(project_id=self.classification_project.id,
filename='example.utf16.csv',

5
app/api/utils.py

@ -356,8 +356,9 @@ class ExcelParser(FileParser):
data.append({'text': row[0]})
# Text, labels and metadata columns
elif len(row) == len(columns) and len(row) >= 2:
text, label = row[:2]
meta = json.dumps(dict(zip(columns[2:], row[2:])))
datum = dict(zip(columns, row))
text, label = datum.pop('text'), datum.pop('label')
meta = json.dumps(datum)
j = {'text': text, 'labels': [label], 'meta': meta}
data.append(j)
else:

Loading…
Cancel
Save