From 3492874d5e8b1922fa836e279ff10c6a6c732d5b Mon Sep 17 00:00:00 2001 From: Clemens Wolff Date: Mon, 13 Jan 2020 09:07:50 -0500 Subject: [PATCH] Enable import of CSVs with arbitrary column order --- .../tests/data/example_out_of_order_columns.csv | 4 ++++ app/api/tests/test_api.py | 15 +++++++++++++++ app/api/utils.py | 5 +++-- 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 app/api/tests/data/example_out_of_order_columns.csv diff --git a/app/api/tests/data/example_out_of_order_columns.csv b/app/api/tests/data/example_out_of_order_columns.csv new file mode 100644 index 00000000..4bba91c8 --- /dev/null +++ b/app/api/tests/data/example_out_of_order_columns.csv @@ -0,0 +1,4 @@ +label,foo,text,bar,baz +Positive,foo1,AAA,barA,baz +Positive,foo2,BBB,barB,bazz +Negative,foo3,CCC,barC,bazzz \ No newline at end of file diff --git a/app/api/tests/test_api.py b/app/api/tests/test_api.py index 0a77bb7c..c753e79f 100644 --- a/app/api/tests/test_api.py +++ b/app/api/tests/test_api.py @@ -1054,6 +1054,21 @@ class TestUploader(APITestCase): file_format='csv', expected_status=status.HTTP_201_CREATED) + def test_can_upload_classification_csv_with_out_of_order_columns(self): + self.upload_test_helper(project_id=self.classification_project.id, + filename='example_out_of_order_columns.csv', + file_format='csv', + expected_status=status.HTTP_201_CREATED) + + self.label_test_helper( + project_id=self.classification_project.id, + expected_labels=[ + {'text': 'Positive'}, + {'text': 'Negative'}, + ], + expected_label_keys=[], + ) + def test_can_upload_csv_with_non_utf8_encoding(self): self.upload_test_helper(project_id=self.classification_project.id, filename='example.utf16.csv', diff --git a/app/api/utils.py b/app/api/utils.py index 396a66b4..f40a0137 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -356,8 +356,9 @@ class ExcelParser(FileParser): data.append({'text': row[0]}) # Text, labels and metadata columns elif len(row) == len(columns) and len(row) >= 2: - text, label = row[:2] - meta = json.dumps(dict(zip(columns[2:], row[2:]))) + datum = dict(zip(columns, row)) + text, label = datum.pop('text'), datum.pop('label') + meta = json.dumps(datum) j = {'text': text, 'labels': [label], 'meta': meta} data.append(j) else: