diff --git a/visualizer/models.py b/visualizer/models.py index 71a293eb..60e6a2a8 100644 --- a/visualizer/models.py +++ b/visualizer/models.py @@ -1,5 +1,7 @@ """ The django object models """ +import secrets + from django.conf import settings from django.contrib.auth import get_user_model from django.db import models @@ -131,22 +133,28 @@ def get_all_public(cls): return JsonConfig.objects.all().exclude(owner__in=privateUsers) def _get_unique_slug(self): - # loop until the name is unique + # Build the title-based prefix (kept for human-readable URLs and SEO). slug = slugify(self.title) if slug.endswith('json'): slug = slug[:-4] - # at most 220 chars for slug, 20 for title, leaving 15 for numbers + # at most 220 chars for slug, 20 for title, leaving 15 for suffix slug = slug[:220] - # loop until the name is unique - num = 1 - uniqueSlug = slug - while JsonConfig.objects.filter(slug=uniqueSlug).exists(): - uniqueSlug = f'{slug}-{num}' - num += 1 - - return uniqueSlug + # Append 48 bits of random hex. This has two effects: + # 1. Slugs are not enumerable. Knowing another visualization's title + # tells you nothing about its URL — previously, `/v/`, + # `/v/<title>-1`, `/v/<title>-2`, ... let anyone walk the index. + # 2. Slugs never reuse across database resets. A stale client that + # PATCHes by slug can't accidentally overwrite a different + # record that happened to land on the same counter value. + # The uniqueness loop is retained as belt-and-suspenders, but with + # 2**48 values the chance of a collision is astronomically small. + candidate = f'{slug}-{secrets.token_hex(6)}' + while JsonConfig.objects.filter(slug=candidate).exists(): + candidate = f'{slug}-{secrets.token_hex(6)}' + + return candidate def __str__(self): return f"{self.slug}: {self.title}" diff --git a/visualizer/tests/testRestApi.py b/visualizer/tests/testRestApi.py index 617d5db9..ff1436e1 100644 --- a/visualizer/tests/testRestApi.py +++ b/visualizer/tests/testRestApi.py @@ -358,19 +358,43 @@ def test_analytics(self): self.assertEqual(logs[1].method, 'GET') def test_slug_generation(self): - """ Ensure slug generation increments on the rest API """ + """ Ensure slug generation produces unique, non-enumerable slugs + that share the title prefix but have distinct random suffixes. """ self._authenticate_as('notadmin') # Upload once response = self._upload_file_for_api(filenames.ONE_ROUND) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + firstSlug = TestHelpers.get_latest_upload().slug # Upload again response = self._upload_file_for_api(filenames.ONE_ROUND) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + secondSlug = TestHelpers.get_latest_upload().slug + + # Both slugs start with the title prefix, but the random suffix + # makes them distinct and non-enumerable. + self.assertRegex(firstSlug, r'^one-round-[0-9a-f]{12}$') + self.assertRegex(secondSlug, r'^one-round-[0-9a-f]{12}$') + self.assertNotEqual(firstSlug, secondSlug) + + def test_detail_accepts_pk_and_slug(self): + """ Ensure detail endpoints accept both PK and slug as the lookup + value. PK is retained for backward compatibility; slug is the + preferred (and non-enumerable) form. """ + self._authenticate_as('notadmin') + response = self._upload_file_for_api(filenames.ONE_ROUND) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) - oneRoundObject = TestHelpers.get_latest_upload() - self.assertEqual(oneRoundObject.slug, 'one-round-1') + obj = TestHelpers.get_latest_upload() + + pk_response = self.client.get(f'/api/visualizations/{obj.pk}/') + slug_response = self.client.get(f'/api/visualizations/{obj.slug}/') + + self.assertEqual(pk_response.status_code, status.HTTP_200_OK) + self.assertEqual(slug_response.status_code, status.HTTP_200_OK) + self.assertEqual(pk_response.data['slug'], slug_response.data['slug']) + self.assertEqual(pk_response.data['slug'], obj.slug) def test_defaults(self): """ Ensure the correct defaults are used on upload """ diff --git a/visualizer/tests/testSimple.py b/visualizer/tests/testSimple.py index c42f88f6..426e5392 100644 --- a/visualizer/tests/testSimple.py +++ b/visualizer/tests/testSimple.py @@ -5,6 +5,7 @@ from datetime import timedelta from io import StringIO import json +import re from mock import patch from django.core.files import File @@ -386,17 +387,21 @@ def test_electionbuddy_data_is_sane(self): assert summary.rounds[2].winnerNames[0] == 'Vanilla' def test_uniqueness(self): - """ Ensures filenames are not overwritten """ - slug0 = "city-of-eastpointe-macomb-county-mi" - slug1 = "city-of-eastpointe-macomb-county-mi-1" + """ Ensures filenames are not overwritten when a title repeats. """ + slugPrefix = "city-of-eastpointe-macomb-county-mi" + slugPattern = re.compile(rf'^v/{slugPrefix}-[0-9a-f]{{12}}$') response = TestHelpers.get_multiwinner_upload_response(self.client) self.assertEqual(response.status_code, 302) - self.assertEqual(response['location'], f"v/{slug0}") + self.assertRegex(response['location'], slugPattern) + slug0 = response['location'][len('v/'):] response = TestHelpers.get_multiwinner_upload_response(self.client) self.assertEqual(response.status_code, 302) - self.assertEqual(response['location'], f"v/{slug1}") + self.assertRegex(response['location'], slugPattern) + slug1 = response['location'][len('v/'):] + + self.assertNotEqual(slug0, slug1) model0 = JsonConfig.objects.get(slug=slug0) model1 = JsonConfig.objects.get(slug=slug1) @@ -408,14 +413,17 @@ def test_management_commands(self): # Upload two files TestHelpers.get_multiwinner_upload_response(self.client) TestHelpers.get_multiwinner_upload_response(self.client) - slug = 'city-of-eastpointe-macomb-county-mi' + slugPrefix = 'city-of-eastpointe-macomb-county-mi' # Load those two via the database out = StringIO() call_command('checkUploads', 0, 100, stdout=out) - self.assertIn(f'0: Successfully loaded {slug}-1', out.getvalue()) - self.assertIn(f'1: Successfully loaded {slug}', out.getvalue()) - self.assertIn('Successfully loaded configs', out.getvalue()) + output = out.getvalue() + # Both uploads should be reported as successfully loaded by the command, + # regardless of the random slug suffixes. + self.assertEqual(len(re.findall(rf'Successfully loaded {slugPrefix}-[0-9a-f]{{12}}', + output)), 2) + self.assertIn('Successfully loaded configs', output) # Load all files in the testData/ directory (one of which raises a TypeError) out = StringIO() @@ -722,12 +730,13 @@ def response_with_data_source(url): def test_slug_generation(self): """ - Tests that slug generation increments with each upload + Tests that repeated uploads produce unique, title-prefixed slugs + with random suffixes (non-enumerable). """ TestHelpers.get_multiwinner_upload_response(self.client) TestHelpers.get_multiwinner_upload_response(self.client) slug = TestHelpers.get_latest_upload().slug - self.assertEqual(slug, 'city-of-eastpointe-macomb-county-mi-1') + self.assertRegex(slug, r'^city-of-eastpointe-macomb-county-mi-[0-9a-f]{12}$') def test_public_user(self): """ diff --git a/visualizer/views.py b/visualizer/views.py index b1a55dc1..14231f63 100644 --- a/visualizer/views.py +++ b/visualizer/views.py @@ -13,7 +13,7 @@ from django.contrib.auth.mixins import LoginRequiredMixin from django.core.cache import cache from django.http import Http404, JsonResponse, HttpResponse, HttpResponseNotModified -from django.shortcuts import render +from django.shortcuts import get_object_or_404, render from django.templatetags.static import static from django.urls import Resolver404 from django.urls import resolve @@ -516,7 +516,41 @@ def post(self, request): # For django REST -class JsonOnlyViewSet(LoggingMixin, viewsets.ModelViewSet): +class PkOrSlugLookupMixin: + """ + Allow ViewSet detail operations to be addressed by either integer PK + (legacy clients) or slug (new clients). The router URL pattern captures + a single path segment; we dispatch based on whether the value parses + as a positive integer and a matching PK exists. + + Rationale: the integer PK is a sequential auto-increment, which means + after a database reset (e.g. on an ephemeral / Cloud Run deployment) + newly-created records can occupy the same PKs as prior records held + by different clients. A stale PATCH from one client could then + silently overwrite another client's data. Slugs carry 48 bits of + randomness and do not collide across resets, so addressing by slug + is intrinsically safe. Integer-PK addressing is preserved for + backward compatibility with existing clients. + """ + + def get_object(self): + queryset = self.filter_queryset(self.get_queryset()) + lookup = self.kwargs[self.lookup_url_kwarg or self.lookup_field] + + obj = None + if lookup.isdigit(): + try: + obj = queryset.get(pk=int(lookup)) + except queryset.model.DoesNotExist: + pass + if obj is None: + obj = get_object_or_404(queryset, slug=lookup) + + self.check_object_permissions(self.request, obj) + return obj + + +class JsonOnlyViewSet(PkOrSlugLookupMixin, LoggingMixin, viewsets.ModelViewSet): """ API endpoint that allows tabulated JSONs to be viewed or edited. """ queryset = JsonConfig.objects.all().order_by('-uploadedAt') serializer_class = JsonOnlySerializer @@ -526,7 +560,7 @@ def perform_create(self, serializer): serializer.save(owner=self.request.user) -class VerboseViewSet(LoggingMixin, viewsets.ModelViewSet): +class VerboseViewSet(PkOrSlugLookupMixin, LoggingMixin, viewsets.ModelViewSet): """ API endpoint that expects all arguments to be supplied. """ queryset = JsonConfig.objects.all().order_by('-uploadedAt') serializer_class = VerboseSerializer @@ -536,7 +570,7 @@ def perform_create(self, serializer): serializer.save(owner=self.request.user) -class BallotpediaViewSet(LoggingMixin, viewsets.ModelViewSet): +class BallotpediaViewSet(PkOrSlugLookupMixin, LoggingMixin, viewsets.ModelViewSet): """ API endpoint with all ballotpedia fields """ queryset = JsonConfig.objects.all().order_by('-uploadedAt') serializer_class = BallotpediaSerializer