mirror of https://github.com/doccano/doccano.git
pythondatasetnatural-language-processingdata-labelingmachine-learningannotation-tooldatasetsactive-learningtext-annotation
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
21 lines
577 B
21 lines
577 B
import re
|
|
import string
|
|
|
|
|
|
def get_key_choices():
|
|
selectKey, shortKey = [c for c in string.ascii_lowercase], [c for c in string.ascii_lowercase]
|
|
checkKey = 'ctrl shift'
|
|
shortKey += [ck + ' ' + sk for ck in checkKey.split() for sk in selectKey]
|
|
shortKey += [checkKey + ' ' + sk for sk in selectKey]
|
|
shortKey += ['']
|
|
KEY_CHOICES = ((u, c) for u, c in zip(shortKey, shortKey))
|
|
return KEY_CHOICES
|
|
|
|
|
|
def extract_label(tag):
|
|
ptn = re.compile(r'(B|I|E|S)-(.+)')
|
|
m = ptn.match(tag)
|
|
if m:
|
|
return m.groups()[1]
|
|
else:
|
|
return tag
|