From 78d7575886b3aa9b0119e262639cadb4778a67ea Mon Sep 17 00:00:00 2001 From: Christian Gill Date: Tue, 30 Jun 2020 15:38:14 +0200 Subject: [PATCH 1/5] allow rows to contain less data than columns suggest --- app/api/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/api/utils.py b/app/api/utils.py index 17182696..af84c460 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -385,10 +385,10 @@ class ExcelParser(FileParser): yield data data = [] # Only text column - if len(row) == len(columns) and len(row) == 1: - data.append({'text': row[0]}) + if len(row) <= len(columns) and len(row) == 1: + data.append({'text': row[0] }) # Text, labels and metadata columns - elif len(row) == len(columns) and len(row) >= 2: + elif len(row) <= len(columns) and len(row) >= 2: datum = dict(zip(columns, row)) text, label = datum.pop('text'), datum.pop('label') meta = FileParser.encode_metadata(datum) From 43043e302b2a5ec8671d1e4b77edab9d53640bc9 Mon Sep 17 00:00:00 2001 From: Christian Gill Date: Tue, 30 Jun 2020 15:48:11 +0200 Subject: [PATCH 2/5] add testcase for csv upload --- app/api/tests/data/example.csv | 6 +++--- ...nvalid.1.csv => example_column_and_row_not_matching.csv} | 0 app/api/tests/test_api.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) rename app/api/tests/data/{example.invalid.1.csv => example_column_and_row_not_matching.csv} (100%) diff --git a/app/api/tests/data/example.csv b/app/api/tests/data/example.csv index ac64a3e3..69cfabd1 100644 --- a/app/api/tests/data/example.csv +++ b/app/api/tests/data/example.csv @@ -1,4 +1,4 @@ -text,label -AAA,Positive -BBB,Positive +text,label,meta +AAA +BBB,Positive,The following is meta data CCC,Negative \ No newline at end of file diff --git a/app/api/tests/data/example.invalid.1.csv b/app/api/tests/data/example_column_and_row_not_matching.csv similarity index 100% rename from app/api/tests/data/example.invalid.1.csv rename to app/api/tests/data/example_column_and_row_not_matching.csv diff --git a/app/api/tests/test_api.py b/app/api/tests/test_api.py index a1387a15..2451b3e1 100644 --- a/app/api/tests/test_api.py +++ b/app/api/tests/test_api.py @@ -1195,11 +1195,11 @@ class TestUploader(APITestCase): file_format='csv', expected_status=status.HTTP_201_CREATED) - def test_cannot_upload_csv_file_does_not_match_column_and_row(self): + def test_can_upload_csv_file_does_not_match_column_and_row(self): self.upload_test_helper(project_id=self.classification_project.id, - filename='example.invalid.1.csv', + filename='example_column_and_row_not_matching.csv', file_format='csv', - expected_status=status.HTTP_400_BAD_REQUEST) + expected_status=status.HTTP_201_CREATED) def test_cannot_upload_csv_file_has_too_many_columns(self): self.upload_test_helper(project_id=self.classification_project.id, From f1ff2f6066139485363c0f4309686b969f40e3b5 Mon Sep 17 00:00:00 2001 From: Christian Gill Date: Tue, 30 Jun 2020 16:14:56 +0200 Subject: [PATCH 3/5] fix csv testcases to overlook missing labels --- ...lsx => example_column_and_row_not_matching.xlsx} | Bin app/api/tests/test_api.py | 10 +++++----- 2 files changed, 5 insertions(+), 5 deletions(-) rename app/api/tests/data/{example.invalid.1.xlsx => example_column_and_row_not_matching.xlsx} (100%) diff --git a/app/api/tests/data/example.invalid.1.xlsx b/app/api/tests/data/example_column_and_row_not_matching.xlsx similarity index 100% rename from app/api/tests/data/example.invalid.1.xlsx rename to app/api/tests/data/example_column_and_row_not_matching.xlsx diff --git a/app/api/tests/test_api.py b/app/api/tests/test_api.py index 2451b3e1..8c74db01 100644 --- a/app/api/tests/test_api.py +++ b/app/api/tests/test_api.py @@ -1225,11 +1225,11 @@ class TestUploader(APITestCase): file_format='excel', expected_status=status.HTTP_201_CREATED) - def test_cannot_upload_excel_file_does_not_match_column_and_row(self): + def test_can_upload_excel_file_does_not_match_column_and_row(self): self.upload_test_helper(project_id=self.classification_project.id, - filename='example.invalid.1.xlsx', + filename='example_column_and_row_not_matching.xlsx', file_format='excel', - expected_status=status.HTTP_400_BAD_REQUEST) + expected_status=status.HTTP_201_CREATED) def test_cannot_upload_excel_file_has_too_many_columns(self): self.upload_test_helper(project_id=self.classification_project.id, @@ -1402,10 +1402,10 @@ class TestParser(APITestCase): parser=CoNLLParser()) def test_give_classification_data_to_csv_parser(self): - self.parser_helper(filename='example.csv', parser=CSVParser()) + self.parser_helper(filename='example.csv', parser=CSVParser(), include_label=False) def test_give_seq2seq_data_to_csv_parser(self): - self.parser_helper(filename='example.csv', parser=CSVParser()) + self.parser_helper(filename='example.csv', parser=CSVParser(), include_label=False) def test_give_classification_data_to_json_parser(self): self.parser_helper(filename='classification.jsonl', parser=JSONParser()) From 62f373f06191aff87d460624bb92d6589eecd593 Mon Sep 17 00:00:00 2001 From: Christian Gill Date: Wed, 22 Jul 2020 09:47:35 +0200 Subject: [PATCH 4/5] implement feedback --- app/api/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/api/utils.py b/app/api/utils.py index af84c460..194a63ed 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -386,9 +386,9 @@ class ExcelParser(FileParser): data = [] # Only text column if len(row) <= len(columns) and len(row) == 1: - data.append({'text': row[0] }) + data.append({'text': row[0]}) # Text, labels and metadata columns - elif len(row) <= len(columns) and len(row) >= 2: + elif 2 <= len(row) <= len(columns): datum = dict(zip(columns, row)) text, label = datum.pop('text'), datum.pop('label') meta = FileParser.encode_metadata(datum) From 303f9851c49a75dcfa613b815d35cd79f72307e0 Mon Sep 17 00:00:00 2001 From: Christian Gill Date: Mon, 27 Jul 2020 14:02:08 +0200 Subject: [PATCH 5/5] allow for uploading empty label testcase for empty label --- app/api/tests/data/example.csv | 3 ++- app/api/utils.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/app/api/tests/data/example.csv b/app/api/tests/data/example.csv index 69cfabd1..3ff73519 100644 --- a/app/api/tests/data/example.csv +++ b/app/api/tests/data/example.csv @@ -1,4 +1,5 @@ text,label,meta AAA BBB,Positive,The following is meta data -CCC,Negative \ No newline at end of file +CCC,Negative +DDD,,This is meta data \ No newline at end of file diff --git a/app/api/utils.py b/app/api/utils.py index 194a63ed..03bbe118 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -392,7 +392,10 @@ class ExcelParser(FileParser): datum = dict(zip(columns, row)) text, label = datum.pop('text'), datum.pop('label') meta = FileParser.encode_metadata(datum) - j = {'text': text, 'labels': [label], 'meta': meta} + if label != '': + j = {'text': text, 'labels': [label], 'meta': meta} + else: + j = {'text': text, 'meta': meta} data.append(j) else: raise FileParseException(line_num=i, line=row)