mirror of https://github.com/doccano/doccano.git
pythondatasetsactive-learningtext-annotationdatasetnatural-language-processingdata-labelingmachine-learningannotation-tool
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
114 lines
3.7 KiB
114 lines
3.7 KiB
import csv
|
|
import json
|
|
from io import TextIOWrapper
|
|
|
|
from django.urls import reverse
|
|
from django.http import HttpResponse, HttpResponseRedirect
|
|
from django.shortcuts import get_object_or_404
|
|
from django.views import View
|
|
from django.views.generic import TemplateView, CreateView
|
|
from django.views.generic.list import ListView
|
|
from django.contrib.auth.mixins import LoginRequiredMixin
|
|
|
|
from .permissions import SuperUserMixin
|
|
from .forms import ProjectForm
|
|
from .models import Document, Project
|
|
|
|
|
|
class IndexView(TemplateView):
|
|
template_name = 'index.html'
|
|
|
|
|
|
class ProjectView(LoginRequiredMixin, TemplateView):
|
|
|
|
def get_template_names(self):
|
|
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
|
|
return [project.get_template_name()]
|
|
|
|
|
|
class ProjectsView(LoginRequiredMixin, CreateView):
|
|
form_class = ProjectForm
|
|
template_name = 'projects.html'
|
|
|
|
|
|
class DatasetView(SuperUserMixin, LoginRequiredMixin, ListView):
|
|
template_name = 'admin/dataset.html'
|
|
paginate_by = 5
|
|
|
|
def get_queryset(self):
|
|
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
|
|
return project.documents.all()
|
|
|
|
|
|
class LabelView(SuperUserMixin, LoginRequiredMixin, TemplateView):
|
|
template_name = 'admin/label.html'
|
|
|
|
|
|
class StatsView(SuperUserMixin, LoginRequiredMixin, TemplateView):
|
|
template_name = 'admin/stats.html'
|
|
|
|
|
|
class GuidelineView(SuperUserMixin, LoginRequiredMixin, TemplateView):
|
|
template_name = 'admin/guideline.html'
|
|
|
|
|
|
class DataUpload(SuperUserMixin, LoginRequiredMixin, TemplateView):
|
|
template_name = 'admin/dataset_upload.html'
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
project = get_object_or_404(Project, pk=kwargs.get('project_id'))
|
|
import_format = request.POST['format']
|
|
try:
|
|
if import_format == 'csv':
|
|
form_data = TextIOWrapper(
|
|
request.FILES['file'].file, encoding='utf-8')
|
|
if project.is_type_of(Project.SEQUENCE_LABELING):
|
|
Document.objects.bulk_create([
|
|
Document(text=line.strip(), project=project)
|
|
for line in form_data
|
|
])
|
|
else:
|
|
reader = csv.reader(form_data)
|
|
Document.objects.bulk_create([
|
|
Document(text=line[0].strip(), project=project)
|
|
for line in reader
|
|
])
|
|
|
|
elif import_format == 'json':
|
|
form_data = request.FILES['file'].file
|
|
Document.objects.bulk_create([
|
|
Document(text=json.loads(entry)['text'], project=project)
|
|
for entry in form_data
|
|
])
|
|
return HttpResponseRedirect(reverse('dataset', args=[project.id]))
|
|
except:
|
|
return HttpResponseRedirect(reverse('upload', args=[project.id]))
|
|
|
|
|
|
class DataDownload(SuperUserMixin, LoginRequiredMixin, View):
|
|
|
|
def get(self, request, *args, **kwargs):
|
|
project_id = self.kwargs['project_id']
|
|
project = get_object_or_404(Project, pk=project_id)
|
|
docs = project.get_documents(is_null=False).distinct()
|
|
filename = '_'.join(project.name.lower().split())
|
|
response = HttpResponse(content_type='text/csv')
|
|
response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename)
|
|
|
|
writer = csv.writer(response)
|
|
for d in docs:
|
|
writer.writerows(d.make_dataset())
|
|
|
|
return response
|
|
|
|
|
|
class DemoTextClassification(TemplateView):
|
|
template_name = 'demo/demo_text_classification.html'
|
|
|
|
|
|
class DemoNamedEntityRecognition(TemplateView):
|
|
template_name = 'demo/demo_named_entity.html'
|
|
|
|
|
|
class DemoTranslation(TemplateView):
|
|
template_name = 'demo/demo_translation.html'
|