import csv import json from io import TextIOWrapper import itertools as it import logging from django.contrib.auth.views import LoginView as BaseLoginView from django.urls import reverse from django.http import HttpResponse, HttpResponseRedirect from django.shortcuts import get_object_or_404 from django.views import View from django.views.generic import TemplateView, CreateView from django.views.generic.list import ListView from django.contrib.auth.mixins import LoginRequiredMixin from django.contrib import messages from .permissions import SuperUserMixin from .forms import ProjectForm from .models import Document, Project from app import settings logger = logging.getLogger(__name__) class IndexView(TemplateView): template_name = 'index.html' class ProjectView(LoginRequiredMixin, TemplateView): def get_template_names(self): project = get_object_or_404(Project, pk=self.kwargs['project_id']) return [project.get_template_name()] class ProjectsView(LoginRequiredMixin, CreateView): form_class = ProjectForm template_name = 'projects.html' class DatasetView(SuperUserMixin, LoginRequiredMixin, ListView): template_name = 'admin/dataset.html' paginate_by = 5 def get_queryset(self): project = get_object_or_404(Project, pk=self.kwargs['project_id']) return project.documents.all() class LabelView(SuperUserMixin, LoginRequiredMixin, TemplateView): template_name = 'admin/label.html' class StatsView(SuperUserMixin, LoginRequiredMixin, TemplateView): template_name = 'admin/stats.html' class GuidelineView(SuperUserMixin, LoginRequiredMixin, TemplateView): template_name = 'admin/guideline.html' class DataUpload(SuperUserMixin, LoginRequiredMixin, TemplateView): template_name = 'admin/dataset_upload.html' class ImportFileError(Exception): def __init__(self, message): self.message = message def extract_metadata_csv(self, row, text_col, header_without_text): vals_without_text = [val for i, val in enumerate(row) if i != text_col] return json.dumps(dict(zip(header_without_text, vals_without_text))) def csv_to_documents(self, project, file, text_key='text'): form_data = TextIOWrapper(file, encoding='utf-8') reader = csv.reader(form_data) maybe_header = next(reader) if maybe_header: if text_key in maybe_header: text_col = maybe_header.index(text_key) elif len(maybe_header) == 1: reader = it.chain([maybe_header], reader) text_col = 0 else: raise DataUpload.ImportFileError("CSV file must have either a title with \"text\" column or have only one column ") header_without_text = [title for i, title in enumerate(maybe_header) if i != text_col] return ( Document( text=row[text_col], metadata=self.extract_metadata_csv(row, text_col, header_without_text), project=project ) for row in reader ) else: return [] def extract_metadata_json(self, entry, text_key): copy = entry.copy() del copy[text_key] return json.dumps(copy) def json_to_documents(self, project, file, text_key='text'): parsed_entries = (json.loads(line) for line in file) return ( Document(text=entry[text_key], metadata=self.extract_metadata_json(entry, text_key), project=project) for entry in parsed_entries ) def post(self, request, *args, **kwargs): project = get_object_or_404(Project, pk=kwargs.get('project_id')) import_format = request.POST['format'] try: file = request.FILES['file'].file documents = [] if import_format == 'csv': documents = self.csv_to_documents(project, file) elif import_format == 'json': documents = self.json_to_documents(project, file) batch_size = settings.IMPORT_BATCH_SIZE while True: batch = list(it.islice(documents, batch_size)) if not batch: break Document.objects.bulk_create(batch, batch_size=batch_size) return HttpResponseRedirect(reverse('dataset', args=[project.id])) except DataUpload.ImportFileError as e: messages.add_message(request, messages.ERROR, e.message) return HttpResponseRedirect(reverse('upload', args=[project.id])) except Exception as e: logger.exception(e) messages.add_message(request, messages.ERROR, 'Something went wrong') return HttpResponseRedirect(reverse('upload', args=[project.id])) class DataDownload(SuperUserMixin, LoginRequiredMixin, TemplateView): template_name = 'admin/dataset_download.html' class DataDownloadFile(SuperUserMixin, LoginRequiredMixin, View): def get(self, request, *args, **kwargs): project_id = self.kwargs['project_id'] project = get_object_or_404(Project, pk=project_id) docs = project.get_documents(is_null=False).distinct() export_format = request.GET.get('format') filename = '_'.join(project.name.lower().split()) try: if export_format == 'csv': response = self.get_csv(filename, docs) elif export_format == 'json': response = self.get_json(filename, docs) return response except Exception as e: logger.exception(e) messages.add_message(request, messages.ERROR, "Something went wrong") return HttpResponseRedirect(reverse('download', args=[project.id])) def get_csv(self, filename, docs): response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename) writer = csv.writer(response) for d in docs: writer.writerows(d.to_csv()) return response def get_json(self, filename, docs): response = HttpResponse(content_type='text/json') response['Content-Disposition'] = 'attachment; filename="{}.json"'.format(filename) for d in docs: dump = json.dumps(d.to_json(), ensure_ascii=False) response.write(dump + '\n') # write each json object end with a newline return response class LoginView(BaseLoginView): template_name = 'login.html' redirect_authenticated_user = True extra_context = { 'github_login': bool(settings.SOCIAL_AUTH_GITHUB_KEY), 'aad_login': bool(settings.SOCIAL_AUTH_AZUREAD_TENANT_OAUTH2_TENANT_ID), } def get_context_data(self, **kwargs): context = super(LoginView, self).get_context_data(**kwargs) context['social_login_enabled'] = any(value for key, value in context.items() if key.endswith('_login')) return context class DemoTextClassification(TemplateView): template_name = 'demo/demo_text_classification.html' class DemoNamedEntityRecognition(TemplateView): template_name = 'demo/demo_named_entity.html' class DemoTranslation(TemplateView): template_name = 'demo/demo_translation.html'