Browse Source

fix fasttext

pull/1897/head
Casey 2 years ago
parent
commit
73b0367363
1 changed files with 5 additions and 2 deletions
  1. 7
      backend/data_export/pipeline/formatters.py

7
backend/data_export/pipeline/formatters.py

@ -46,12 +46,15 @@ class FastTextCategoryFormatter(Formatter):
"""Format the label column to `__label__LabelA __label__LabelB` format.
Also, drop the columns except for `data` and `self.target_column`.
"""
dataset = dataset[[DATA, self.target_column]]
dataset = dataset[[DATA, self.target_column, "Comments"]]
dataset[self.target_column] = dataset[self.target_column].apply(
lambda labels: " ".join(sorted(f"__label__{label.to_string()}" for label in labels))
)
dataset[self.target_column] = dataset[self.target_column].fillna("")
dataset = dataset[self.target_column] + " " + dataset[DATA]
dataset["Comments"] = dataset["Comments"].apply(
lambda comments: "#".join(comment.to_string() for comment in comments)
)
dataset = dataset[self.target_column] + " " + dataset[DATA] + " " + dataset["Comments"]
return dataset

Loading…
Cancel
Save