diff --git a/app/api/tests/data/example.csv b/app/api/tests/data/example.csv index ac64a3e3..3ff73519 100644 --- a/app/api/tests/data/example.csv +++ b/app/api/tests/data/example.csv @@ -1,4 +1,5 @@ -text,label -AAA,Positive -BBB,Positive -CCC,Negative \ No newline at end of file +text,label,meta +AAA +BBB,Positive,The following is meta data +CCC,Negative +DDD,,This is meta data \ No newline at end of file diff --git a/app/api/tests/data/example.invalid.1.csv b/app/api/tests/data/example_column_and_row_not_matching.csv similarity index 100% rename from app/api/tests/data/example.invalid.1.csv rename to app/api/tests/data/example_column_and_row_not_matching.csv diff --git a/app/api/tests/data/example.invalid.1.xlsx b/app/api/tests/data/example_column_and_row_not_matching.xlsx similarity index 100% rename from app/api/tests/data/example.invalid.1.xlsx rename to app/api/tests/data/example_column_and_row_not_matching.xlsx diff --git a/app/api/tests/test_api.py b/app/api/tests/test_api.py index 7c8b1b8a..743dca3c 100644 --- a/app/api/tests/test_api.py +++ b/app/api/tests/test_api.py @@ -1212,11 +1212,11 @@ class TestUploader(APITestCase): file_format='csv', expected_status=status.HTTP_201_CREATED) - def test_cannot_upload_csv_file_does_not_match_column_and_row(self): + def test_can_upload_csv_file_does_not_match_column_and_row(self): self.upload_test_helper(project_id=self.classification_project.id, - filename='example.invalid.1.csv', + filename='example_column_and_row_not_matching.csv', file_format='csv', - expected_status=status.HTTP_400_BAD_REQUEST) + expected_status=status.HTTP_201_CREATED) def test_cannot_upload_csv_file_has_too_many_columns(self): self.upload_test_helper(project_id=self.classification_project.id, @@ -1242,11 +1242,11 @@ class TestUploader(APITestCase): file_format='excel', expected_status=status.HTTP_201_CREATED) - def test_cannot_upload_excel_file_does_not_match_column_and_row(self): + def test_can_upload_excel_file_does_not_match_column_and_row(self): self.upload_test_helper(project_id=self.classification_project.id, - filename='example.invalid.1.xlsx', + filename='example_column_and_row_not_matching.xlsx', file_format='excel', - expected_status=status.HTTP_400_BAD_REQUEST) + expected_status=status.HTTP_201_CREATED) def test_cannot_upload_excel_file_has_too_many_columns(self): self.upload_test_helper(project_id=self.classification_project.id, @@ -1419,10 +1419,10 @@ class TestParser(APITestCase): parser=CoNLLParser()) def test_give_classification_data_to_csv_parser(self): - self.parser_helper(filename='example.csv', parser=CSVParser()) + self.parser_helper(filename='example.csv', parser=CSVParser(), include_label=False) def test_give_seq2seq_data_to_csv_parser(self): - self.parser_helper(filename='example.csv', parser=CSVParser()) + self.parser_helper(filename='example.csv', parser=CSVParser(), include_label=False) def test_give_classification_data_to_json_parser(self): self.parser_helper(filename='classification.jsonl', parser=JSONParser()) diff --git a/app/api/utils.py b/app/api/utils.py index 17182696..03bbe118 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -385,14 +385,17 @@ class ExcelParser(FileParser): yield data data = [] # Only text column - if len(row) == len(columns) and len(row) == 1: + if len(row) <= len(columns) and len(row) == 1: data.append({'text': row[0]}) # Text, labels and metadata columns - elif len(row) == len(columns) and len(row) >= 2: + elif 2 <= len(row) <= len(columns): datum = dict(zip(columns, row)) text, label = datum.pop('text'), datum.pop('label') meta = FileParser.encode_metadata(datum) - j = {'text': text, 'labels': [label], 'meta': meta} + if label != '': + j = {'text': text, 'labels': [label], 'meta': meta} + else: + j = {'text': text, 'meta': meta} data.append(j) else: raise FileParseException(line_num=i, line=row)