You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

48 lines
1.1 KiB

import os
import shutil
import tempfile
import unittest
from ...views.upload.data import TextData
from ...views.upload.dataset import CoNLLDataset
from ...views.upload.label import OffsetLabel
class TestCoNLLDataset(unittest.TestCase):
def setUp(self):
self.test_dir = tempfile.mkdtemp()
self.test_file = os.path.join(self.test_dir, 'test_file.txt')
self.content = """EU\tB-ORG
rejects\tO
German\tB-MISC
call\tO
to\tO
boycott\tO
British\tB-MISC
lamb\tO
.\tO
Peter\tB-PER
Blackburn\tI-PER
"""
def tearDown(self):
shutil.rmtree(self.test_dir)
def create_file(self, encoding=None):
with open(self.test_file, 'w', encoding=encoding) as f:
f.write(self.content)
def test_can_load(self):
self.create_file()
dataset = CoNLLDataset(
filenames=[self.test_file],
label_class=OffsetLabel,
data_class=TextData
)
it = dataset.load(self.test_file)
record = next(it)
expected = 'EU rejects German call to boycott British lamb .'
self.assertEqual(record.data['text'], expected)