diff --git a/app/api/tests/data/example_out_of_order_columns.csv b/app/api/tests/data/example_out_of_order_columns.csv new file mode 100644 index 00000000..4bba91c8 --- /dev/null +++ b/app/api/tests/data/example_out_of_order_columns.csv @@ -0,0 +1,4 @@ +label,foo,text,bar,baz +Positive,foo1,AAA,barA,baz +Positive,foo2,BBB,barB,bazz +Negative,foo3,CCC,barC,bazzz \ No newline at end of file diff --git a/app/api/tests/test_api.py b/app/api/tests/test_api.py index 46c95466..92f699b4 100644 --- a/app/api/tests/test_api.py +++ b/app/api/tests/test_api.py @@ -1067,6 +1067,21 @@ class TestUploader(APITestCase): file_format='csv', expected_status=status.HTTP_201_CREATED) + def test_can_upload_classification_csv_with_out_of_order_columns(self): + self.upload_test_helper(project_id=self.classification_project.id, + filename='example_out_of_order_columns.csv', + file_format='csv', + expected_status=status.HTTP_201_CREATED) + + self.label_test_helper( + project_id=self.classification_project.id, + expected_labels=[ + {'text': 'Positive'}, + {'text': 'Negative'}, + ], + expected_label_keys=[], + ) + def test_can_upload_csv_with_non_utf8_encoding(self): self.upload_test_helper(project_id=self.classification_project.id, filename='example.utf16.csv', diff --git a/app/api/utils.py b/app/api/utils.py index 396a66b4..f40a0137 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -356,8 +356,9 @@ class ExcelParser(FileParser): data.append({'text': row[0]}) # Text, labels and metadata columns elif len(row) == len(columns) and len(row) >= 2: - text, label = row[:2] - meta = json.dumps(dict(zip(columns[2:], row[2:]))) + datum = dict(zip(columns, row)) + text, label = datum.pop('text'), datum.pop('label') + meta = json.dumps(datum) j = {'text': text, 'labels': [label], 'meta': meta} data.append(j) else: