You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

48 lines
1.1 KiB

  1. import os
  2. import shutil
  3. import tempfile
  4. import unittest
  5. from ...views.upload.data import TextData
  6. from ...views.upload.dataset import CoNLLDataset
  7. from ...views.upload.label import OffsetLabel
  8. class TestCoNLLDataset(unittest.TestCase):
  9. def setUp(self):
  10. self.test_dir = tempfile.mkdtemp()
  11. self.test_file = os.path.join(self.test_dir, 'test_file.txt')
  12. self.content = """EU\tB-ORG
  13. rejects\tO
  14. German\tB-MISC
  15. call\tO
  16. to\tO
  17. boycott\tO
  18. British\tB-MISC
  19. lamb\tO
  20. .\tO
  21. Peter\tB-PER
  22. Blackburn\tI-PER
  23. """
  24. def tearDown(self):
  25. shutil.rmtree(self.test_dir)
  26. def create_file(self, encoding=None):
  27. with open(self.test_file, 'w', encoding=encoding) as f:
  28. f.write(self.content)
  29. def test_can_load(self):
  30. self.create_file()
  31. dataset = CoNLLDataset(
  32. filenames=[self.test_file],
  33. label_class=OffsetLabel,
  34. data_class=TextData
  35. )
  36. it = dataset.load(self.test_file)
  37. record = next(it)
  38. expected = 'EU rejects German call to boycott British lamb .'
  39. self.assertEqual(record.data['text'], expected)