You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
1.4 KiB

  1. import os
  2. import shutil
  3. import tempfile
  4. import unittest
  5. from ...views.upload.data import TextData
  6. from ...views.upload.dataset import Dataset
  7. from ...views.upload.label import Label
  8. class TestDataset(unittest.TestCase):
  9. def setUp(self):
  10. self.test_dir = tempfile.mkdtemp()
  11. self.test_file = os.path.join(self.test_dir, 'test_file.txt')
  12. self.content = 'こんにちは、世界!'
  13. def tearDown(self):
  14. shutil.rmtree(self.test_dir)
  15. def create_file(self, encoding=None):
  16. with open(self.test_file, 'w', encoding=encoding) as f:
  17. f.write(self.content)
  18. def test_can_load_utf8(self):
  19. self.create_file()
  20. dataset = Dataset(filenames=[], label_class=Label, data_class=TextData)
  21. record = next(dataset.load(self.test_file))
  22. self.assertEqual(record.data['filename'], self.test_file)
  23. def test_cannot_load_shiftjis_without_specifying_encoding(self):
  24. self.create_file('shift_jis')
  25. dataset = Dataset(filenames=[], label_class=Label, data_class=TextData)
  26. with self.assertRaises(UnicodeDecodeError):
  27. next(dataset.load(self.test_file))
  28. def test_can_load_shiftjis_with_specifying_encoding(self):
  29. self.create_file('shift_jis')
  30. dataset = Dataset(filenames=[], label_class=Label, data_class=TextData, encoding='shift_jis')
  31. record = next(dataset.load(self.test_file))
  32. self.assertEqual(record.data['filename'], self.test_file)