Browse Source

Merge pull request #25 from Rox0z/Adiciona-Estatisticas-2

Adiciona estatisticas 2
pull/2438/head
Leonardo Albudane 3 months ago
committed by GitHub
parent
commit
50c9d7f6bd
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
8 changed files with 2735 additions and 9 deletions
  1. 8
      backend/metrics/urls.py
  2. 984
      backend/metrics/views.py
  3. 9
      frontend/components/layout/TheSideBar.vue
  4. 2
      frontend/i18n/en/index.js
  5. 9
      frontend/i18n/en/projects/perspectives.js
  6. 27
      frontend/i18n/en/projects/reports.js
  7. 1687
      frontend/pages/projects/_id/statistics/index.vue
  8. 18
      frontend/repositories/metrics/apiMetricsRepository.ts

8
backend/metrics/urls.py

@ -6,6 +6,10 @@ from .views import (
ProgressAPI,
RelationTypeDistribution,
SpanTypeDistribution,
DiscrepancyStatsAPI,
PerspectiveStatsAPI,
LabelStatsAPI,
ExportReportsAPI,
)
urlpatterns = [
@ -14,4 +18,8 @@ urlpatterns = [
path(route="category-distribution", view=CategoryTypeDistribution.as_view(), name="category_distribution"),
path(route="relation-distribution", view=RelationTypeDistribution.as_view(), name="relation_distribution"),
path(route="span-distribution", view=SpanTypeDistribution.as_view(), name="span_distribution"),
path(route="discrepancy-stats", view=DiscrepancyStatsAPI.as_view(), name="discrepancy_stats"),
path(route="perspective-stats", view=PerspectiveStatsAPI.as_view(), name="perspective_stats"),
path(route="label-stats", view=LabelStatsAPI.as_view(), name="label_stats"),
path(route="export", view=ExportReportsAPI.as_view(), name="export_reports"),
]

984
backend/metrics/views.py

@ -1,6 +1,10 @@
import abc
import csv
import io
from collections import defaultdict
from django.shortcuts import get_object_or_404
from django.http import HttpResponse
from rest_framework import status
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
@ -11,6 +15,7 @@ from label_types.models import CategoryType, LabelType, RelationType, SpanType
from labels.models import Category, Label, Relation, Span
from projects.models import Member, Project
from projects.permissions import IsProjectAdmin, IsProjectStaffAndReadOnly
from projects.perspective.models import Question, Answer, QuestionOption
class ProgressAPI(APIView):
@ -64,3 +69,982 @@ class SpanTypeDistribution(LabelDistribution):
class RelationTypeDistribution(LabelDistribution):
model = Relation
label_type = RelationType
class DiscrepancyStatsAPI(APIView):
permission_classes = [IsAuthenticated & IsProjectAdmin]
def get(self, request, *args, **kwargs):
project_id = self.kwargs["project_id"]
project = get_object_or_404(Project, pk=project_id)
# Get filter parameters
label_filter = request.GET.get('label')
# Get active members only
active_members = Member.objects.filter(project=project_id).select_related('user')
active_user_ids = set(member.user_id for member in active_members)
# Get all examples with annotations
examples = Example.objects.filter(project=project_id)
total_discrepancies = 0
total_agreements = 0
total_examples_analyzed = 0
for example in examples:
# Get users assigned to this specific example
assigned_user_ids = set(
example.assignments.filter(assignee_id__in=active_user_ids).values_list('assignee_id', flat=True)
)
# Skip examples with less than 2 assigned users
if len(assigned_user_ids) < 2:
continue
# Get all annotations for this example
annotations_by_user = defaultdict(list)
# Get spans (only from users assigned to this example)
spans = Span.objects.filter(example=example).select_related('label', 'user')
for span in spans:
# Skip if user is not assigned to this specific example
if span.user_id not in assigned_user_ids:
continue
label_text = span.label.text if span.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[span.user_id].append({
'type': 'span',
'label': label_text,
'start': span.start_offset,
'end': span.end_offset
})
# Get categories (only from users assigned to this example)
categories = Category.objects.filter(example=example).select_related('label', 'user')
for category in categories:
# Skip if user is not assigned to this specific example
if category.user_id not in assigned_user_ids:
continue
label_text = category.label.text if category.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[category.user_id].append({
'type': 'category',
'label': label_text
})
# Only analyze examples that have annotations from multiple users
user_ids = list(annotations_by_user.keys())
if len(user_ids) >= 2:
total_examples_analyzed += 1
# Check for discrepancies between all pairs of users
has_discrepancy = False
# Compare annotations between users
for i in range(len(user_ids)):
for j in range(i + 1, len(user_ids)):
user1_annotations = annotations_by_user[user_ids[i]]
user2_annotations = annotations_by_user[user_ids[j]]
# Compare the sets of annotations
if self._annotations_differ(user1_annotations, user2_annotations):
has_discrepancy = True
break
if has_discrepancy:
break
if has_discrepancy:
total_discrepancies += 1
else:
total_agreements += 1
# Calculate percentages
discrepancy_percentage = (total_discrepancies / total_examples_analyzed * 100) if total_examples_analyzed > 0 else 0
agreement_percentage = (total_agreements / total_examples_analyzed * 100) if total_examples_analyzed > 0 else 0
# Calculate user agreements for pairs that actually annotated the same texts
user_agreements = self._calculate_user_agreements(project_id, label_filter)
# Get top discrepant examples with real data
top_discrepant_examples = self._get_top_discrepant_examples(project_id, label_filter)
# Get label-specific discrepancies
label_discrepancies = self._calculate_label_discrepancies(project_id, label_filter)
# Calculate severity distribution
severity_distribution = self._calculate_severity_distribution(total_examples_analyzed, total_discrepancies, discrepancy_percentage)
return Response({
'total_examples': total_examples_analyzed,
'total_discrepancies': total_discrepancies,
'total_agreements': total_agreements,
'discrepancy_percentage': round(discrepancy_percentage, 2),
'agreement_percentage': round(agreement_percentage, 2),
'user_agreements': user_agreements,
'top_discrepant_examples': top_discrepant_examples,
'label_discrepancies': label_discrepancies,
'severity_distribution': severity_distribution,
'available_labels': self._get_available_labels(project_id),
'filter_applied': label_filter
})
def _annotations_differ(self, annotations1, annotations2):
"""Compare two sets of annotations to detect discrepancies"""
# Convert annotations to comparable format
set1 = set()
set2 = set()
for ann in annotations1:
if ann['type'] == 'span':
set1.add((ann['label'], ann['start'], ann['end']))
else:
set1.add((ann['label'],))
for ann in annotations2:
if ann['type'] == 'span':
set2.add((ann['label'], ann['start'], ann['end']))
else:
set2.add((ann['label'],))
return set1 != set2
def _get_available_labels(self, project_id):
labels = set()
# Get span labels
span_labels = SpanType.objects.filter(project=project_id).values_list('text', flat=True)
labels.update(span_labels)
# Get category labels
category_labels = CategoryType.objects.filter(project=project_id).values_list('text', flat=True)
labels.update(category_labels)
return sorted(list(labels))
def _calculate_user_agreements(self, project_id, label_filter=None):
"""Calculate agreement rates between pairs of users who actually annotated the same texts"""
# Get active members only
active_members = Member.objects.filter(project=project_id).select_related('user')
active_user_ids = set(member.user_id for member in active_members)
# Create a mapping of user_id to username
user_mapping = {member.user_id: member.user.username for member in active_members}
# Get all examples with annotations
examples = Example.objects.filter(project=project_id)
# Track agreements and disagreements between user pairs
user_pair_stats = defaultdict(lambda: {'agreements': 0, 'disagreements': 0, 'total_comparisons': 0})
for example in examples:
# Get users assigned to this specific example
assigned_user_ids = set(
example.assignments.filter(assignee_id__in=active_user_ids).values_list('assignee_id', flat=True)
)
# Skip examples with less than 2 assigned users
if len(assigned_user_ids) < 2:
continue
# Get all annotations for this example
annotations_by_user = defaultdict(list)
# Get spans (only from users assigned to this example)
spans = Span.objects.filter(example=example).select_related('label', 'user')
for span in spans:
# Skip if user is not assigned to this specific example
if span.user_id not in assigned_user_ids:
continue
label_text = span.label.text if span.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[span.user_id].append({
'type': 'span',
'label': label_text,
'start': span.start_offset,
'end': span.end_offset
})
# Get categories (only from users assigned to this example)
categories = Category.objects.filter(example=example).select_related('label', 'user')
for category in categories:
# Skip if user is not assigned to this specific example
if category.user_id not in assigned_user_ids:
continue
label_text = category.label.text if category.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[category.user_id].append({
'type': 'category',
'label': label_text
})
# Only analyze examples that have annotations from multiple users
user_ids = list(annotations_by_user.keys())
if len(user_ids) >= 2:
# Compare annotations between all pairs of users
for i in range(len(user_ids)):
for j in range(i + 1, len(user_ids)):
user1_id = user_ids[i]
user2_id = user_ids[j]
user1_annotations = annotations_by_user[user1_id]
user2_annotations = annotations_by_user[user2_id]
# Create a consistent pair key (smaller id first)
pair_key = (min(user1_id, user2_id), max(user1_id, user2_id))
# Compare the sets of annotations
if self._annotations_differ(user1_annotations, user2_annotations):
user_pair_stats[pair_key]['disagreements'] += 1
else:
user_pair_stats[pair_key]['agreements'] += 1
user_pair_stats[pair_key]['total_comparisons'] += 1
# Convert to list format with agreement percentages
user_agreements = []
for (user1_id, user2_id), stats in user_pair_stats.items():
if stats['total_comparisons'] > 0:
agreement_rate = round((stats['agreements'] / stats['total_comparisons']) * 100, 2)
user1_name = user_mapping.get(user1_id, f'User {user1_id}')
user2_name = user_mapping.get(user2_id, f'User {user2_id}')
user_agreements.append({
'user_pair': f'{user1_name} vs {user2_name}',
'agreement_rate': agreement_rate,
'agreements': stats['agreements'],
'disagreements': stats['disagreements'],
'total_comparisons': stats['total_comparisons']
})
# Sort by agreement rate (descending)
user_agreements.sort(key=lambda x: x['agreement_rate'], reverse=True)
return user_agreements
def _get_top_discrepant_examples(self, project_id, label_filter=None, limit=10):
"""Get examples with the highest discrepancy rates"""
# Get active members only
active_members = Member.objects.filter(project=project_id).select_related('user')
active_user_ids = set(member.user_id for member in active_members)
# Create a mapping of user_id to username
user_mapping = {member.user_id: member.user.username for member in active_members}
# Get all examples with annotations
examples = Example.objects.filter(project=project_id)
discrepant_examples = []
for example in examples:
# Get users assigned to this specific example
assigned_user_ids = set(
example.assignments.filter(assignee_id__in=active_user_ids).values_list('assignee_id', flat=True)
)
# Skip examples with less than 2 assigned users
if len(assigned_user_ids) < 2:
continue
# Get all annotations for this example
annotations_by_user = defaultdict(list)
# Get spans (only from users assigned to this example)
spans = Span.objects.filter(example=example).select_related('label', 'user')
for span in spans:
# Skip if user is not assigned to this specific example
if span.user_id not in assigned_user_ids:
continue
label_text = span.label.text if span.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[span.user_id].append({
'type': 'span',
'label': label_text,
'start': span.start_offset,
'end': span.end_offset
})
# Get categories (only from users assigned to this example)
categories = Category.objects.filter(example=example).select_related('label', 'user')
for category in categories:
# Skip if user is not assigned to this specific example
if category.user_id not in assigned_user_ids:
continue
label_text = category.label.text if category.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[category.user_id].append({
'type': 'category',
'label': label_text
})
# Only analyze examples that have annotations from multiple users
user_ids = list(annotations_by_user.keys())
if len(user_ids) >= 2:
# Count disagreements and agreements for this example
total_comparisons = 0
disagreements = 0
conflicting_labels = set()
annotator_names = []
# Compare annotations between all pairs of users
for i in range(len(user_ids)):
for j in range(i + 1, len(user_ids)):
user1_id = user_ids[i]
user2_id = user_ids[j]
user1_annotations = annotations_by_user[user1_id]
user2_annotations = annotations_by_user[user2_id]
total_comparisons += 1
# Compare the sets of annotations
if self._annotations_differ(user1_annotations, user2_annotations):
disagreements += 1
# Collect conflicting labels
for ann in user1_annotations + user2_annotations:
conflicting_labels.add(ann['label'])
# Collect annotator names
for user_id in user_ids:
annotator_names.append(user_mapping.get(user_id, f'User {user_id}'))
# Calculate discrepancy rate for this example
if total_comparisons > 0:
discrepancy_rate = round((disagreements / total_comparisons) * 100, 2)
# Only include examples with discrepancies
if disagreements > 0:
discrepant_examples.append({
'id': example.id,
'text': example.text,
'discrepancy_rate': discrepancy_rate,
'conflicting_labels': list(conflicting_labels),
'annotator_count': len(user_ids),
'annotators': annotator_names,
'disagreements': disagreements,
'total_comparisons': total_comparisons
})
# Sort by discrepancy rate (descending) and limit results
discrepant_examples.sort(key=lambda x: x['discrepancy_rate'], reverse=True)
return discrepant_examples[:limit]
def _calculate_label_discrepancies(self, project_id, label_filter=None):
"""Calculate discrepancy rates for each label"""
# Get active members only
active_members = Member.objects.filter(project=project_id).select_related('user')
active_user_ids = set(member.user_id for member in active_members)
# Get all examples with annotations
examples = Example.objects.filter(project=project_id)
# Track label statistics
label_stats = defaultdict(lambda: {'total_examples': 0, 'discrepant_examples': 0})
for example in examples:
# Get users assigned to this specific example
assigned_user_ids = set(
example.assignments.filter(assignee_id__in=active_user_ids).values_list('assignee_id', flat=True)
)
# Skip examples with less than 2 assigned users
if len(assigned_user_ids) < 2:
continue
# Get all annotations for this example
annotations_by_user = defaultdict(list)
# Get spans (only from users assigned to this example)
spans = Span.objects.filter(example=example).select_related('label', 'user')
for span in spans:
# Skip if user is not assigned to this specific example
if span.user_id not in assigned_user_ids:
continue
label_text = span.label.text if span.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[span.user_id].append({
'type': 'span',
'label': label_text,
'start': span.start_offset,
'end': span.end_offset
})
# Get categories (only from users assigned to this example)
categories = Category.objects.filter(example=example).select_related('label', 'user')
for category in categories:
# Skip if user is not assigned to this specific example
if category.user_id not in assigned_user_ids:
continue
label_text = category.label.text if category.label else 'No Label'
# Apply label filter if specified
if not label_filter or label_text == label_filter:
annotations_by_user[category.user_id].append({
'type': 'category',
'label': label_text
})
# Only analyze examples that have annotations from multiple users
user_ids = list(annotations_by_user.keys())
if len(user_ids) >= 2:
# Collect all labels used by each user for this example
user_label_sets = {}
all_labels_in_example = set()
for user_id in user_ids:
user_labels = set()
for annotation in annotations_by_user[user_id]:
user_labels.add(annotation['label'])
all_labels_in_example.add(annotation['label'])
user_label_sets[user_id] = user_labels
# For each label that appears in this example, check if there's disagreement
for label in all_labels_in_example:
label_stats[label]['total_examples'] += 1
# Check if there's disagreement about this specific label
# Disagreement occurs when some users include the label and others don't
users_with_label = []
users_without_label = []
for user_id in user_ids:
if label in user_label_sets[user_id]:
users_with_label.append(user_id)
else:
users_without_label.append(user_id)
# There's a discrepancy for this label if some users have it and others don't
if len(users_with_label) > 0 and len(users_without_label) > 0:
label_stats[label]['discrepant_examples'] += 1
# Convert to list format with discrepancy rates
label_discrepancies = []
for label, stats in label_stats.items():
if stats['total_examples'] > 0:
discrepancy_rate = round((stats['discrepant_examples'] / stats['total_examples']) * 100, 2)
label_discrepancies.append({
'label': label,
'count': stats['discrepant_examples'],
'rate': discrepancy_rate,
'total_examples': stats['total_examples']
})
# Sort by discrepancy rate (descending)
label_discrepancies.sort(key=lambda x: x['rate'], reverse=True)
return label_discrepancies
def _calculate_severity_distribution(self, total_examples, total_discrepancies, discrepancy_percentage):
"""Calculate severity distribution based on discrepancy rates"""
if total_examples == 0 or total_discrepancies == 0:
return []
# For small numbers of discrepancies, assign all to a single severity level
# based on the overall discrepancy percentage
severity_level = None
if discrepancy_percentage >= 75:
severity_level = 'critical'
elif discrepancy_percentage >= 50:
severity_level = 'high'
elif discrepancy_percentage >= 25:
severity_level = 'medium'
else:
severity_level = 'low'
# Return all discrepancies as a single severity level
return [{
'level': severity_level,
'count': total_discrepancies,
'percentage': 100.0
}]
class PerspectiveStatsAPI(APIView):
permission_classes = [IsAuthenticated & IsProjectAdmin]
def get(self, request, *args, **kwargs):
try:
project_id = self.kwargs["project_id"]
# Simplified version to test basic functionality
stats = {
'total_questions': 0,
'total_answers': 0,
'questions': [],
'available_questions': []
}
# Try to get project first
try:
project = get_object_or_404(Project, pk=project_id)
except Exception as e:
print(f"Error getting project {project_id}: {e}")
return Response(stats)
# Try to get questions
try:
questions = Question.objects.filter(project=project)
stats['total_questions'] = questions.count()
# Get filter parameters
question_filter = request.GET.get('question_id')
if question_filter:
questions = questions.filter(id=question_filter)
# Get total answers
stats['total_answers'] = Answer.objects.filter(question__project=project).count()
# Process each question
for question in questions:
try:
answers = Answer.objects.filter(question=question)
question_stats = {
'id': question.id,
'text': question.text,
'question_type': question.question_type,
'answer_count': answers.count(),
'response_rate': 0
}
# Calculate response rate
try:
total_members = Member.objects.filter(project=project).count()
if total_members > 0:
question_stats['response_rate'] = round((answers.count() / total_members) * 100, 2)
except Exception as e:
print(f"Error calculating response rate: {e}")
# Get answer distribution for closed questions
if question.question_type == 'closed':
try:
option_stats = []
for option in question.options.all():
option_answers = answers.filter(selected_option=option).count()
option_stats.append({
'id': option.id,
'text': option.text,
'count': option_answers,
'percentage': round((option_answers / answers.count()) * 100, 2) if answers.count() > 0 else 0
})
question_stats['options'] = option_stats
except Exception as e:
print(f"Error processing options: {e}")
question_stats['options'] = []
stats['questions'].append(question_stats)
except Exception as e:
print(f"Error processing question {question.id}: {e}")
continue
# Get available questions for filter
try:
all_questions = Question.objects.filter(project=project).values('id', 'text')
stats['available_questions'] = list(all_questions)
except Exception as e:
print(f"Error getting available questions: {e}")
stats['available_questions'] = []
except Exception as e:
print(f"Error processing questions: {e}")
return Response(stats)
except Exception as e:
print(f"Error in PerspectiveStatsAPI: {e}")
import traceback
traceback.print_exc()
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
class LabelStatsAPI(APIView):
permission_classes = [IsAuthenticated & IsProjectAdmin]
def get(self, request, *args, **kwargs):
try:
project_id = self.kwargs["project_id"]
# Simple counts first
span_count = Span.objects.filter(example__project=project_id).count()
category_count = Category.objects.filter(example__project=project_id).count()
total_labels = span_count + category_count
# Get available labels
span_labels = list(SpanType.objects.filter(project=project_id).values_list('text', flat=True))
category_labels = list(CategoryType.objects.filter(project=project_id).values_list('text', flat=True))
available_labels = sorted(list(set(span_labels + category_labels)))
# Get available users
available_users = []
members = Member.objects.filter(project=project_id).select_related('user')
active_user_ids = set()
for member in members:
available_users.append({
'id': member.user_id,
'username': member.user.username
})
active_user_ids.add(member.user_id)
# Label distribution with users
label_distribution = {}
# Process spans (only from active members)
spans = Span.objects.filter(example__project=project_id).select_related('label', 'user')
for span in spans:
# Skip if user is no longer a member of the project
if span.user_id not in active_user_ids:
continue
label_text = span.label.text if span.label else 'No Label'
if label_text not in label_distribution:
label_distribution[label_text] = {
'label': label_text,
'count': 0,
'users': set()
}
label_distribution[label_text]['count'] += 1
label_distribution[label_text]['users'].add(span.user)
# Process categories (only from active members)
categories = Category.objects.filter(example__project=project_id).select_related('label', 'user')
for category in categories:
# Skip if user is no longer a member of the project
if category.user_id not in active_user_ids:
continue
label_text = category.label.text if category.label else 'No Label'
if label_text not in label_distribution:
label_distribution[label_text] = {
'label': label_text,
'count': 0,
'users': set()
}
label_distribution[label_text]['count'] += 1
label_distribution[label_text]['users'].add(category.user)
# Recalculate total labels (only from active members)
total_labels_active = sum(data['count'] for data in label_distribution.values())
# Convert to list and add percentages
label_distribution_list = []
for label_data in label_distribution.values():
percentage = round((label_data['count'] / total_labels_active * 100), 2) if total_labels_active > 0 else 0
# Only include users that are still active members
users_list = [
{'id': user.id, 'username': user.username}
for user in label_data['users']
if user.id in active_user_ids
]
label_distribution_list.append({
'label': label_data['label'],
'count': label_data['count'],
'percentage': percentage,
'users': users_list
})
# Sort by count
label_distribution_list.sort(key=lambda x: x['count'], reverse=True)
# Basic user performance
user_performance = []
for member in members:
user_span_count = Span.objects.filter(example__project=project_id, user=member.user).count()
user_category_count = Category.objects.filter(example__project=project_id, user=member.user).count()
user_total = user_span_count + user_category_count
user_examples = set()
user_examples.update(Span.objects.filter(example__project=project_id, user=member.user).values_list('example_id', flat=True))
user_examples.update(Category.objects.filter(example__project=project_id, user=member.user).values_list('example_id', flat=True))
examples_count = len(user_examples)
labels_per_example = round(user_total / examples_count, 2) if examples_count > 0 else 0
user_performance.append({
'user_id': member.user_id,
'username': member.user.username,
'total_labels': user_total,
'examples_labeled': examples_count,
'labels_per_example': labels_per_example
})
# Sort by total labels
user_performance.sort(key=lambda x: x['total_labels'], reverse=True)
# Calculate total examples
span_examples = set(Span.objects.filter(example__project=project_id).values_list('example_id', flat=True))
category_examples = set(Category.objects.filter(example__project=project_id).values_list('example_id', flat=True))
total_examples = len(span_examples.union(category_examples))
avg_labels_per_example = round(total_labels / total_examples, 2) if total_examples > 0 else 0
return Response({
'total_labels': total_labels_active, # Use active members count
'total_examples': total_examples,
'total_users': len(available_users),
'avg_labels_per_example': round(total_labels_active / total_examples, 2) if total_examples > 0 else 0,
'label_distribution': label_distribution_list,
'user_performance': user_performance,
'available_labels': available_labels,
'available_users': available_users
})
except Exception as e:
print(f"Error in LabelStatsAPI: {e}")
import traceback
traceback.print_exc()
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
class ExportReportsAPI(APIView):
permission_classes = [IsAuthenticated & IsProjectAdmin]
def get(self, request, *args, **kwargs):
project_id = self.kwargs["project_id"]
export_type = request.GET.get('type', 'labels') # 'discrepancies', 'perspectives', or 'labels'
format_type = request.GET.get('format', 'csv') # 'csv' or 'pdf'
if export_type == 'discrepancies':
return self._export_discrepancies(project_id, format_type)
elif export_type == 'perspectives':
return self._export_perspectives(project_id, format_type)
elif export_type == 'labels':
return self._export_labels(project_id, format_type, request)
else:
return Response({'error': 'Invalid export type'}, status=status.HTTP_400_BAD_REQUEST)
def _export_discrepancies(self, project_id, format_type):
if format_type != 'csv':
return Response({'error': 'Only CSV format supported for now'}, status=status.HTTP_400_BAD_REQUEST)
# Get active members only
active_members = Member.objects.filter(project=project_id).select_related('user')
active_user_ids = set(member.user_id for member in active_members)
# Get discrepancy data
examples = Example.objects.filter(project=project_id)
# Create CSV
output = io.StringIO()
writer = csv.writer(output)
# Write header
writer.writerow(['Example ID', 'Text', 'Has Discrepancy', 'Agreement %', 'Annotators', 'Labels', 'Annotation Details'])
for example in examples:
# Get users assigned to this specific example
assigned_user_ids = set(
example.assignments.filter(assignee_id__in=active_user_ids).values_list('assignee_id', flat=True)
)
# Skip examples with less than 2 assigned users
if len(assigned_user_ids) < 2:
continue
# Get annotations for this example
annotations_by_user = defaultdict(list)
# Get spans (only from users assigned to this example)
spans = Span.objects.filter(example=example).select_related('label', 'user')
for span in spans:
# Skip if user is not assigned to this specific example
if span.user_id not in assigned_user_ids:
continue
label_text = span.label.text if span.label else 'No Label'
annotations_by_user[span.user_id].append({
'type': 'span',
'label': label_text,
'start': span.start_offset,
'end': span.end_offset
})
# Get categories (only from users assigned to this example)
categories = Category.objects.filter(example=example).select_related('label', 'user')
for category in categories:
# Skip if user is not assigned to this specific example
if category.user_id not in assigned_user_ids:
continue
label_text = category.label.text if category.label else 'No Label'
annotations_by_user[category.user_id].append({
'type': 'category',
'label': label_text
})
# Only include examples with multiple annotators
user_ids = list(annotations_by_user.keys())
if len(user_ids) < 2:
continue
# Check for discrepancies
has_discrepancy = False
for i in range(len(user_ids)):
for j in range(i + 1, len(user_ids)):
user1_annotations = annotations_by_user[user_ids[i]]
user2_annotations = annotations_by_user[user_ids[j]]
if self._annotations_differ(user1_annotations, user2_annotations):
has_discrepancy = True
break
if has_discrepancy:
break
# Calculate agreement percentage
agreement_percentage = 0 if has_discrepancy else 100
# Get member names
member_names = []
for user_id in user_ids:
try:
member = Member.objects.get(project=project_id, user_id=user_id)
member_names.append(member.username)
except Member.DoesNotExist:
member_names.append(f'User {user_id}')
# Get all unique labels
all_labels = set()
annotation_details = []
for user_id, user_annotations in annotations_by_user.items():
user_labels = []
for ann in user_annotations:
all_labels.add(ann['label'])
if ann['type'] == 'span':
user_labels.append(f"{ann['label']}({ann['start']}-{ann['end']})")
else:
user_labels.append(ann['label'])
try:
member = Member.objects.get(project=project_id, user_id=user_id)
username = member.username
except Member.DoesNotExist:
username = f'User {user_id}'
annotation_details.append(f"{username}: {', '.join(user_labels)}")
writer.writerow([
example.id,
example.text[:100] + '...' if len(example.text) > 100 else example.text,
'Yes' if has_discrepancy else 'No',
f'{agreement_percentage}%',
', '.join(member_names),
', '.join(sorted(all_labels)),
' | '.join(annotation_details)
])
# Create response
response = HttpResponse(output.getvalue(), content_type='text/csv')
response['Content-Disposition'] = f'attachment; filename="discrepancies_report_{project_id}.csv"'
return response
def _export_perspectives(self, project_id, format_type):
if format_type != 'csv':
return Response({'error': 'Only CSV format supported for now'}, status=status.HTTP_400_BAD_REQUEST)
# Get perspective data
questions = Question.objects.filter(project=project_id)
# Create CSV
output = io.StringIO()
writer = csv.writer(output)
# Write header
writer.writerow(['Question ID', 'Question Text', 'Question Type', 'Total Answers', 'Response Rate'])
total_members = Member.objects.filter(project=project_id).count()
for question in questions:
answers_count = Answer.objects.filter(question=question).count()
response_rate = round((answers_count / total_members) * 100, 2) if total_members > 0 else 0
writer.writerow([
question.id,
question.text,
question.question_type,
answers_count,
f'{response_rate}%'
])
# Create response
response = HttpResponse(output.getvalue(), content_type='text/csv')
response['Content-Disposition'] = f'attachment; filename="perspectives_report_{project_id}.csv"'
return response
def _export_labels(self, project_id, format_type, request):
if format_type != 'csv':
return Response({'error': 'Only CSV format supported for now'}, status=status.HTTP_400_BAD_REQUEST)
# Get filter parameters
label_filter = request.GET.get('label')
user_filter = request.GET.get('user_id')
# Get label data
all_spans = Span.objects.filter(example__project=project_id).select_related('label', 'user', 'example')
all_categories = Category.objects.filter(example__project=project_id).select_related('label', 'user', 'example')
# Apply filters
if label_filter:
all_spans = all_spans.filter(label__text=label_filter)
all_categories = all_categories.filter(label__text=label_filter)
if user_filter:
all_spans = all_spans.filter(user_id=user_filter)
all_categories = all_categories.filter(user_id=user_filter)
# Create CSV
output = io.StringIO()
writer = csv.writer(output)
# Write header
writer.writerow(['Example ID', 'Text', 'Label', 'Label Type', 'User', 'Start Offset', 'End Offset', 'Created At'])
# Write spans
for span in all_spans:
writer.writerow([
span.example.id,
span.example.text[:100] + '...' if len(span.example.text) > 100 else span.example.text,
span.label.text if span.label else 'No Label',
'Span',
span.user.username,
span.start_offset,
span.end_offset,
span.created_at.strftime('%Y-%m-%d %H:%M:%S') if hasattr(span, 'created_at') else ''
])
# Write categories
for category in all_categories:
writer.writerow([
category.example.id,
category.example.text[:100] + '...' if len(category.example.text) > 100 else category.example.text,
category.label.text if category.label else 'No Label',
'Category',
category.user.username,
'', # No start offset for categories
'', # No end offset for categories
category.created_at.strftime('%Y-%m-%d %H:%M:%S') if hasattr(category, 'created_at') else ''
])
# Create response
response = HttpResponse(output.getvalue(), content_type='text/csv')
response['Content-Disposition'] = f'attachment; filename="label_analysis_{project_id}.csv"'
return response

9
frontend/components/layout/TheSideBar.vue

@ -43,7 +43,8 @@ import {
mdiRobotOutline,
mdiFileDocumentOutline,
mdiEyeOutline,
mdiVote
mdiVote,
mdiFileChartOutline
} from '@mdi/js'
import { getLinkToAnnotationPage } from '~/presenter/linkToAnnotationPage'
@ -158,6 +159,12 @@ export default {
text: 'Voting',
link: 'voting',
isVisible: true
},
{
icon: mdiFileChartOutline,
text: 'Statistics',
link: 'statistics',
isVisible: this.isProjectAdmin
},
{
icon: mdiCog,

2
frontend/i18n/en/index.js

@ -18,6 +18,7 @@ import overview from './projects/overview'
import statistics from './projects/statistics'
import settings from './projects/settings'
import perspectives from './projects/perspectives'
import reports from './projects/reports'
export default {
home,
@ -39,5 +40,6 @@ export default {
statistics,
settings,
perspectives,
reports,
group
}

9
frontend/i18n/en/projects/perspectives.js

@ -9,12 +9,7 @@ export default {
createQuestion: 'Create Question',
editQuestion: 'Edit Question',
confirmDelete: 'Confirm Delete',
confirmDeleteAll: 'Confirm Delete Perspective',
deleteQuestionConfirm: 'Are you sure you want to delete this question? This action cannot be undone.',
deleteAllQuestionsConfirm: 'Are you sure you want to delete this perspective for the project?',
deleteAllQuestionsWarning: 'This will permanently delete the entire perspective and all associated answers',
thisActionCannotBeUndone: 'This action cannot be undone.',
deleteAll: 'DELETE PERSPECTIVE',
questionText: 'Question Text',
questionType: 'Question Type',
order: 'Order',
@ -43,15 +38,13 @@ export default {
questionCreatedSuccess: 'Question created successfully',
questionUpdatedSuccess: 'Question updated successfully',
questionDeletedSuccess: 'Question deleted successfully',
allQuestionsDeletedSuccess: 'Perspective deleted successfully',
answerSubmittedSuccess: 'Answer submitted successfully',
failedToLoadQuestions: 'Failed to load questions',
failedToLoadStatistics: 'Failed to load statistics',
failedToSaveQuestion: 'Failed to save question',
failedToDeleteQuestion: 'Failed to delete question',
failedToDeleteAllQuestions: 'Failed to delete all questions',
failedToSubmitAnswer: 'Failed to submit answer',
databaseConnectionError: 'Unable to connect to database. Cannot delete perspective.',
databaseConnectionError: 'Unable to connect to database. Cannot delete question.',
databaseConnectionErrorTitle: 'Database Connection Error',
// Validation messages

27
frontend/i18n/en/projects/reports.js

@ -0,0 +1,27 @@
export default {
reports: 'Reports',
discrepancies: 'Discrepancies',
perspectives: 'Perspectives',
overview: 'Overview',
totalExamples: 'Total Examples',
agreements: 'Agreements',
totalAgreements: 'Total Agreements',
totalDiscrepancies: 'Total Discrepancies',
availableLabels: 'Available Labels',
filterByLabel: 'Filter by Label',
filterByQuestion: 'Filter by Question',
refresh: 'Refresh',
export: 'Export',
exportCsv: 'Export CSV',
totalQuestions: 'Total Questions',
totalAnswers: 'Total Answers',
questionsDetails: 'Questions Details',
question: 'Question',
type: 'Type',
answers: 'Answers',
responseRate: 'Response Rate',
exportStarted: 'Export started successfully',
exportFailed: 'Failed to export data',
loadDiscrepancyFailed: 'Failed to load discrepancy statistics',
loadPerspectiveFailed: 'Failed to load perspective statistics'
}

1687
frontend/pages/projects/_id/statistics/index.vue
File diff suppressed because it is too large
View File

18
frontend/repositories/metrics/apiMetricsRepository.ts

@ -22,6 +22,24 @@ export class APIMetricsRepository {
return response.data
}
async fetchDiscrepancyStats(projectId: string, params: any = {}): Promise<any> {
const url = `/projects/${projectId}/metrics/discrepancy-stats`
const response = await this.request.get(url, { params })
return response.data
}
async fetchPerspectiveStats(projectId: string, params: any = {}): Promise<any> {
const url = `/projects/${projectId}/metrics/perspective-stats`
const response = await this.request.get(url, { params })
return response.data
}
async fetchLabelStats(projectId: string, params: any = {}): Promise<any> {
const url = `/projects/${projectId}/metrics/label-stats`
const response = await this.request.get(url, { params })
return response.data
}
async fetchMemberProgress(projectId: string): Promise<Progress> {
const url = `/projects/${projectId}/metrics/member-progress`
const response = await this.request.get(url)

Loading…
Cancel
Save