You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
2.0 KiB

  1. import os
  2. import shutil
  3. import tempfile
  4. import unittest
  5. from ...views.upload.data import TextData
  6. from ...views.upload.dataset import CsvDataset
  7. from ...views.upload.label import CategoryLabel
  8. class TestCsvDataset(unittest.TestCase):
  9. def setUp(self):
  10. self.test_dir = tempfile.mkdtemp()
  11. self.test_file = os.path.join(self.test_dir, 'test_file.csv')
  12. def tearDown(self):
  13. shutil.rmtree(self.test_dir)
  14. def create_file(self, content):
  15. with open(self.test_file, 'w') as f:
  16. f.write(content)
  17. def assert_record(self, content, dataset, data='Text', label=None):
  18. if label is None:
  19. label = [{'text': 'Label'}]
  20. self.create_file(content)
  21. record = next(dataset.load(self.test_file))
  22. self.assertEqual(record.data['text'], data)
  23. self.assertEqual(record.label, label)
  24. def test_can_load_default_column_names(self):
  25. content = 'label,text\nLabel,Text'
  26. dataset = CsvDataset(filenames=[], label_class=CategoryLabel, data_class=TextData)
  27. self.assert_record(content, dataset)
  28. def test_can_change_delimiter(self):
  29. content = 'label\ttext\nLabel\tText'
  30. dataset = CsvDataset(filenames=[], label_class=CategoryLabel, data_class=TextData, delimiter='\t')
  31. self.assert_record(content, dataset)
  32. def test_can_specify_column_name(self):
  33. content = 'star,body\nLabel,Text'
  34. dataset = CsvDataset(filenames=[], label_class=CategoryLabel, data_class=TextData,
  35. column_data='body', column_label='star')
  36. self.assert_record(content, dataset)
  37. def test_can_load_only_text_column(self):
  38. content = 'star,text\nLabel,Text'
  39. dataset = CsvDataset(filenames=[], label_class=CategoryLabel, data_class=TextData)
  40. self.assert_record(content, dataset, label=[])
  41. def test_does_not_match_column_and_row(self):
  42. content = 'text,label\nText'
  43. dataset = CsvDataset(filenames=[], label_class=CategoryLabel, data_class=TextData)
  44. self.assert_record(content, dataset, label=[])