You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

207 lines
7.3 KiB

import csv
import json
from io import TextIOWrapper
import itertools as it
import logging
from django.contrib.auth.views import LoginView as BaseLoginView
from django.urls import reverse
from django.http import HttpResponse, HttpResponseRedirect
from django.shortcuts import get_object_or_404
from django.views import View
from django.views.generic import TemplateView, CreateView
from django.views.generic.list import ListView
from django.contrib.auth.mixins import LoginRequiredMixin
from django.contrib import messages
from .permissions import SuperUserMixin
from .forms import ProjectForm
from .models import Document, Project
from app import settings
logger = logging.getLogger(__name__)
class IndexView(TemplateView):
template_name = 'index.html'
class ProjectView(LoginRequiredMixin, TemplateView):
def get_template_names(self):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
return [project.get_template_name()]
class ProjectsView(LoginRequiredMixin, CreateView):
form_class = ProjectForm
template_name = 'projects.html'
class DatasetView(SuperUserMixin, LoginRequiredMixin, ListView):
template_name = 'admin/dataset.html'
paginate_by = 5
def get_queryset(self):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
return project.documents.all()
class LabelView(SuperUserMixin, LoginRequiredMixin, TemplateView):
template_name = 'admin/label.html'
class StatsView(SuperUserMixin, LoginRequiredMixin, TemplateView):
template_name = 'admin/stats.html'
class GuidelineView(SuperUserMixin, LoginRequiredMixin, TemplateView):
template_name = 'admin/guideline.html'
class DataUpload(SuperUserMixin, LoginRequiredMixin, TemplateView):
template_name = 'admin/dataset_upload.html'
class ImportFileError(Exception):
def __init__(self, message):
self.message = message
def extract_metadata_csv(self, row, text_col, header_without_text):
vals_without_text = [val for i, val in enumerate(row) if i != text_col]
return json.dumps(dict(zip(header_without_text, vals_without_text)))
def csv_to_documents(self, project, file, text_key='text'):
form_data = TextIOWrapper(file, encoding='utf-8')
reader = csv.reader(form_data)
maybe_header = next(reader)
if maybe_header:
if text_key in maybe_header:
text_col = maybe_header.index(text_key)
elif len(maybe_header) == 1:
reader = it.chain([maybe_header], reader)
text_col = 0
else:
raise DataUpload.ImportFileError("CSV file must have either a title with \"text\" column or have only one column ")
header_without_text = [title for i, title in enumerate(maybe_header)
if i != text_col]
return (
Document(
text=row[text_col],
metadata=self.extract_metadata_csv(row, text_col, header_without_text),
project=project
)
for row in reader
)
else:
return []
def extract_metadata_json(self, entry, text_key):
copy = entry.copy()
del copy[text_key]
return json.dumps(copy)
def json_to_documents(self, project, file, text_key='text'):
parsed_entries = (json.loads(line) for line in file)
return (
Document(text=entry[text_key], metadata=self.extract_metadata_json(entry, text_key), project=project)
for entry in parsed_entries
)
def post(self, request, *args, **kwargs):
project = get_object_or_404(Project, pk=kwargs.get('project_id'))
import_format = request.POST['format']
try:
file = request.FILES['file'].file
documents = []
if import_format == 'csv':
documents = self.csv_to_documents(project, file)
elif import_format == 'json':
documents = self.json_to_documents(project, file)
batch_size = settings.IMPORT_BATCH_SIZE
while True:
batch = list(it.islice(documents, batch_size))
if not batch:
break
Document.objects.bulk_create(batch, batch_size=batch_size)
return HttpResponseRedirect(reverse('dataset', args=[project.id]))
except DataUpload.ImportFileError as e:
messages.add_message(request, messages.ERROR, e.message)
return HttpResponseRedirect(reverse('upload', args=[project.id]))
except Exception as e:
logger.exception(e)
messages.add_message(request, messages.ERROR, 'Something went wrong')
return HttpResponseRedirect(reverse('upload', args=[project.id]))
class DataDownload(SuperUserMixin, LoginRequiredMixin, TemplateView):
template_name = 'admin/dataset_download.html'
class DataDownloadFile(SuperUserMixin, LoginRequiredMixin, View):
def get(self, request, *args, **kwargs):
project_id = self.kwargs['project_id']
project = get_object_or_404(Project, pk=project_id)
docs = project.get_documents(is_null=False).distinct()
export_format = request.GET.get('format')
filename = '_'.join(project.name.lower().split())
try:
if export_format == 'csv':
response = self.get_csv(filename, docs)
elif export_format == 'json':
response = self.get_json(filename, docs)
return response
except Exception as e:
logger.exception(e)
messages.add_message(request, messages.ERROR, "Something went wrong")
return HttpResponseRedirect(reverse('download', args=[project.id]))
def get_csv(self, filename, docs):
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename)
writer = csv.writer(response)
for d in docs:
writer.writerows(d.to_csv())
return response
def get_json(self, filename, docs):
response = HttpResponse(content_type='text/json')
response['Content-Disposition'] = 'attachment; filename="{}.json"'.format(filename)
for d in docs:
dump = json.dumps(d.to_json(), ensure_ascii=False)
response.write(dump + '\n') # write each json object end with a newline
return response
class LoginView(BaseLoginView):
template_name = 'login.html'
redirect_authenticated_user = True
extra_context = {
'github_login': bool(settings.SOCIAL_AUTH_GITHUB_KEY),
'aad_login': bool(settings.SOCIAL_AUTH_AZUREAD_TENANT_OAUTH2_TENANT_ID),
}
def get_context_data(self, **kwargs):
context = super(LoginView, self).get_context_data(**kwargs)
context['social_login_enabled'] = any(value for key, value in context.items()
if key.endswith('_login'))
return context
class DemoTextClassification(TemplateView):
template_name = 'demo/demo_text_classification.html'
class DemoNamedEntityRecognition(TemplateView):
template_name = 'demo/demo_named_entity.html'
class DemoTranslation(TemplateView):
template_name = 'demo/demo_translation.html'