Browse Source
Merge pull request #225 from CatalystCode/bugfix/conll-with-trailing-newlines
Bugfix/Fix CoNLL parsing with trailing newlines
pull/281/head
Hiroki Nakayama
5 years ago
committed by
GitHub
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with
23 additions and
2 deletions
-
app/server/tests/data/labeling.trailing.conll
-
app/server/tests/test_api.py
-
app/server/utils.py
|
|
@ -0,0 +1,14 @@ |
|
|
|
SOCCER O |
|
|
|
- O |
|
|
|
JAPAN B-LOC |
|
|
|
GET O |
|
|
|
LUCKY O |
|
|
|
WIN O |
|
|
|
, O |
|
|
|
CHINA B-PER |
|
|
|
IN O |
|
|
|
SURPRISE O |
|
|
|
DEFEAT O |
|
|
|
. O |
|
|
|
|
|
|
|
|
|
|
@ -808,16 +808,22 @@ class TestParser(APITestCase): |
|
|
|
|
|
|
|
def parser_helper(self, filename, parser, include_label=True): |
|
|
|
with open(os.path.join(DATA_DIR, filename), mode='rb') as f: |
|
|
|
result = parser.parse(f) |
|
|
|
result = list(parser.parse(f)) |
|
|
|
for data in result: |
|
|
|
for r in data: |
|
|
|
self.assertIn('text', r) |
|
|
|
if include_label: |
|
|
|
self.assertIn('labels', r) |
|
|
|
return result |
|
|
|
|
|
|
|
def test_give_valid_data_to_conll_parser(self): |
|
|
|
self.parser_helper(filename='labeling.conll', parser=CoNLLParser()) |
|
|
|
|
|
|
|
def test_give_valid_data_to_conll_parser_with_trailing_newlines(self): |
|
|
|
result = self.parser_helper(filename='labeling.trailing.conll', parser=CoNLLParser()) |
|
|
|
self.assertEqual(len(result), 1) |
|
|
|
self.assertEqual(len(result[0]), 1) |
|
|
|
|
|
|
|
def test_plain_parser(self): |
|
|
|
self.parser_helper(filename='example.txt', parser=PlainTextParser(), include_label=False) |
|
|
|
|
|
|
|
|
|
@ -263,13 +263,14 @@ class CoNLLParser(FileParser): |
|
|
|
raise FileParseException(line_num=i, line=line) |
|
|
|
words.append(word) |
|
|
|
tags.append(tag) |
|
|
|
else: |
|
|
|
elif words and tags: |
|
|
|
j = self.calc_char_offset(words, tags) |
|
|
|
data.append(j) |
|
|
|
words, tags = [], [] |
|
|
|
if len(words) > 0: |
|
|
|
j = self.calc_char_offset(words, tags) |
|
|
|
data.append(j) |
|
|
|
if data: |
|
|
|
yield data |
|
|
|
|
|
|
|
@classmethod |
|
|
|