Browse Source

Update task runner

pull/10/head
Hironsan 6 years ago
parent
commit
5ee503185a
2 changed files with 16 additions and 4 deletions
  1. 7
      doccano/classifier/task.py
  2. 13
      doccano/classifier/utils.py

7
doccano/classifier/task.py

@ -5,7 +5,7 @@ import numpy as np
from doccano.classifier.model import build_model
from doccano.classifier.preprocess import build_vectorizer
from doccano.classifier.utils import load_dataset, save_dataset, train_test_split
from doccano.classifier.utils import load_dataset, save_dataset, make_output, train_test_split
def run(filename):
@ -30,6 +30,5 @@ def run(filename):
y_prob = np.max(y_prob, axis=-1)
print('Saving...')
outputs = {}
# data, ids, y_pred, y_prob
#save_dataset(outputs, filename)
data = make_output(data, ids, y_pred, y_prob)
save_dataset(data, filename)

13
doccano/classifier/utils.py

@ -30,3 +30,16 @@ def save_dataset(obj, filename):
with open(filename, 'w') as f:
for line in obj:
f.write('{}\n'.format(json.dumps(line)))
def make_output(data, ids, y_pred, y_prob):
i = 0
for d in data:
if i == len(ids):
break
if d['id'] == ids[i]:
d['label'] = int(y_pred[i])
d['prob'] = float(y_prob[i])
i += 1
return data
Loading…
Cancel
Save