From 1c892421e3886902c92ba3a4c501e13b814620a9 Mon Sep 17 00:00:00 2001 From: skaphan Date: Sat, 18 Apr 2026 18:17:56 -0700 Subject: [PATCH] Generate unguessable slugs; accept slug or PK on detail API The slug generator previously appended a counter (-1, -2, ...) to make slugs unique across records that shared a title. Two problems with that: 1. Slugs are enumerable. Anyone who can guess a title can walk the counter space and browse other visualizations the system has created, even those not listed publicly. 2. Counter values are reused across database resets. On an ephemeral or externally-managed instance, a stale client PATCHing by PK can silently overwrite an unrelated record that happened to land on the same integer after a reset. _get_unique_slug now appends 12 random hex chars (secrets.token_hex(6)) instead of an incrementing counter. The title prefix is preserved for SEO and admin browsing; the random suffix makes slugs non-enumerable and unique across resets. The existing uniqueness loop is retained as a belt-and-suspenders guard for the astronomically-rare 48-bit collision. No schema change, no data migration: old records keep their counter-style slugs and remain valid. To let clients address records by the new unguessable slug instead of the reusable PK, the three write-capable ModelViewSets now use a new PkOrSlugLookupMixin. Its get_object() treats digit-only lookup values as PKs (backward compat for existing clients), falling back to slug lookup if the PK is not found; non-digit values dispatch to slug. No URL-routing changes. Tests updated: slug assertions regex-match the new format. Added a new test for dual PK/slug lookup on the detail endpoint. Co-Authored-By: Claude Opus 4.7 --- visualizer/models.py | 28 ++++++++++++++-------- visualizer/tests/testRestApi.py | 30 ++++++++++++++++++++--- visualizer/tests/testSimple.py | 31 +++++++++++++++--------- visualizer/views.py | 42 +++++++++++++++++++++++++++++---- 4 files changed, 103 insertions(+), 28 deletions(-) diff --git a/visualizer/models.py b/visualizer/models.py index 71a293eb..60e6a2a8 100644 --- a/visualizer/models.py +++ b/visualizer/models.py @@ -1,5 +1,7 @@ """ The django object models """ +import secrets + from django.conf import settings from django.contrib.auth import get_user_model from django.db import models @@ -131,22 +133,28 @@ def get_all_public(cls): return JsonConfig.objects.all().exclude(owner__in=privateUsers) def _get_unique_slug(self): - # loop until the name is unique + # Build the title-based prefix (kept for human-readable URLs and SEO). slug = slugify(self.title) if slug.endswith('json'): slug = slug[:-4] - # at most 220 chars for slug, 20 for title, leaving 15 for numbers + # at most 220 chars for slug, 20 for title, leaving 15 for suffix slug = slug[:220] - # loop until the name is unique - num = 1 - uniqueSlug = slug - while JsonConfig.objects.filter(slug=uniqueSlug).exists(): - uniqueSlug = f'{slug}-{num}' - num += 1 - - return uniqueSlug + # Append 48 bits of random hex. This has two effects: + # 1. Slugs are not enumerable. Knowing another visualization's title + # tells you nothing about its URL — previously, `/v/`, + # `/v/<title>-1`, `/v/<title>-2`, ... let anyone walk the index. + # 2. Slugs never reuse across database resets. A stale client that + # PATCHes by slug can't accidentally overwrite a different + # record that happened to land on the same counter value. + # The uniqueness loop is retained as belt-and-suspenders, but with + # 2**48 values the chance of a collision is astronomically small. + candidate = f'{slug}-{secrets.token_hex(6)}' + while JsonConfig.objects.filter(slug=candidate).exists(): + candidate = f'{slug}-{secrets.token_hex(6)}' + + return candidate def __str__(self): return f"{self.slug}: {self.title}" diff --git a/visualizer/tests/testRestApi.py b/visualizer/tests/testRestApi.py index 617d5db9..ff1436e1 100644 --- a/visualizer/tests/testRestApi.py +++ b/visualizer/tests/testRestApi.py @@ -358,19 +358,43 @@ def test_analytics(self): self.assertEqual(logs[1].method, 'GET') def test_slug_generation(self): - """ Ensure slug generation increments on the rest API """ + """ Ensure slug generation produces unique, non-enumerable slugs + that share the title prefix but have distinct random suffixes. """ self._authenticate_as('notadmin') # Upload once response = self._upload_file_for_api(filenames.ONE_ROUND) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + firstSlug = TestHelpers.get_latest_upload().slug # Upload again response = self._upload_file_for_api(filenames.ONE_ROUND) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + secondSlug = TestHelpers.get_latest_upload().slug + + # Both slugs start with the title prefix, but the random suffix + # makes them distinct and non-enumerable. + self.assertRegex(firstSlug, r'^one-round-[0-9a-f]{12}$') + self.assertRegex(secondSlug, r'^one-round-[0-9a-f]{12}$') + self.assertNotEqual(firstSlug, secondSlug) + + def test_detail_accepts_pk_and_slug(self): + """ Ensure detail endpoints accept both PK and slug as the lookup + value. PK is retained for backward compatibility; slug is the + preferred (and non-enumerable) form. """ + self._authenticate_as('notadmin') + response = self._upload_file_for_api(filenames.ONE_ROUND) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) - oneRoundObject = TestHelpers.get_latest_upload() - self.assertEqual(oneRoundObject.slug, 'one-round-1') + obj = TestHelpers.get_latest_upload() + + pk_response = self.client.get(f'/api/visualizations/{obj.pk}/') + slug_response = self.client.get(f'/api/visualizations/{obj.slug}/') + + self.assertEqual(pk_response.status_code, status.HTTP_200_OK) + self.assertEqual(slug_response.status_code, status.HTTP_200_OK) + self.assertEqual(pk_response.data['slug'], slug_response.data['slug']) + self.assertEqual(pk_response.data['slug'], obj.slug) def test_defaults(self): """ Ensure the correct defaults are used on upload """ diff --git a/visualizer/tests/testSimple.py b/visualizer/tests/testSimple.py index c42f88f6..426e5392 100644 --- a/visualizer/tests/testSimple.py +++ b/visualizer/tests/testSimple.py @@ -5,6 +5,7 @@ from datetime import timedelta from io import StringIO import json +import re from mock import patch from django.core.files import File @@ -386,17 +387,21 @@ def test_electionbuddy_data_is_sane(self): assert summary.rounds[2].winnerNames[0] == 'Vanilla' def test_uniqueness(self): - """ Ensures filenames are not overwritten """ - slug0 = "city-of-eastpointe-macomb-county-mi" - slug1 = "city-of-eastpointe-macomb-county-mi-1" + """ Ensures filenames are not overwritten when a title repeats. """ + slugPrefix = "city-of-eastpointe-macomb-county-mi" + slugPattern = re.compile(rf'^v/{slugPrefix}-[0-9a-f]{{12}}$') response = TestHelpers.get_multiwinner_upload_response(self.client) self.assertEqual(response.status_code, 302) - self.assertEqual(response['location'], f"v/{slug0}") + self.assertRegex(response['location'], slugPattern) + slug0 = response['location'][len('v/'):] response = TestHelpers.get_multiwinner_upload_response(self.client) self.assertEqual(response.status_code, 302) - self.assertEqual(response['location'], f"v/{slug1}") + self.assertRegex(response['location'], slugPattern) + slug1 = response['location'][len('v/'):] + + self.assertNotEqual(slug0, slug1) model0 = JsonConfig.objects.get(slug=slug0) model1 = JsonConfig.objects.get(slug=slug1) @@ -408,14 +413,17 @@ def test_management_commands(self): # Upload two files TestHelpers.get_multiwinner_upload_response(self.client) TestHelpers.get_multiwinner_upload_response(self.client) - slug = 'city-of-eastpointe-macomb-county-mi' + slugPrefix = 'city-of-eastpointe-macomb-county-mi' # Load those two via the database out = StringIO() call_command('checkUploads', 0, 100, stdout=out) - self.assertIn(f'0: Successfully loaded {slug}-1', out.getvalue()) - self.assertIn(f'1: Successfully loaded {slug}', out.getvalue()) - self.assertIn('Successfully loaded configs', out.getvalue()) + output = out.getvalue() + # Both uploads should be reported as successfully loaded by the command, + # regardless of the random slug suffixes. + self.assertEqual(len(re.findall(rf'Successfully loaded {slugPrefix}-[0-9a-f]{{12}}', + output)), 2) + self.assertIn('Successfully loaded configs', output) # Load all files in the testData/ directory (one of which raises a TypeError) out = StringIO() @@ -722,12 +730,13 @@ def response_with_data_source(url): def test_slug_generation(self): """ - Tests that slug generation increments with each upload + Tests that repeated uploads produce unique, title-prefixed slugs + with random suffixes (non-enumerable). """ TestHelpers.get_multiwinner_upload_response(self.client) TestHelpers.get_multiwinner_upload_response(self.client) slug = TestHelpers.get_latest_upload().slug - self.assertEqual(slug, 'city-of-eastpointe-macomb-county-mi-1') + self.assertRegex(slug, r'^city-of-eastpointe-macomb-county-mi-[0-9a-f]{12}$') def test_public_user(self): """ diff --git a/visualizer/views.py b/visualizer/views.py index b1a55dc1..14231f63 100644 --- a/visualizer/views.py +++ b/visualizer/views.py @@ -13,7 +13,7 @@ from django.contrib.auth.mixins import LoginRequiredMixin from django.core.cache import cache from django.http import Http404, JsonResponse, HttpResponse, HttpResponseNotModified -from django.shortcuts import render +from django.shortcuts import get_object_or_404, render from django.templatetags.static import static from django.urls import Resolver404 from django.urls import resolve @@ -516,7 +516,41 @@ def post(self, request): # For django REST -class JsonOnlyViewSet(LoggingMixin, viewsets.ModelViewSet): +class PkOrSlugLookupMixin: + """ + Allow ViewSet detail operations to be addressed by either integer PK + (legacy clients) or slug (new clients). The router URL pattern captures + a single path segment; we dispatch based on whether the value parses + as a positive integer and a matching PK exists. + + Rationale: the integer PK is a sequential auto-increment, which means + after a database reset (e.g. on an ephemeral / Cloud Run deployment) + newly-created records can occupy the same PKs as prior records held + by different clients. A stale PATCH from one client could then + silently overwrite another client's data. Slugs carry 48 bits of + randomness and do not collide across resets, so addressing by slug + is intrinsically safe. Integer-PK addressing is preserved for + backward compatibility with existing clients. + """ + + def get_object(self): + queryset = self.filter_queryset(self.get_queryset()) + lookup = self.kwargs[self.lookup_url_kwarg or self.lookup_field] + + obj = None + if lookup.isdigit(): + try: + obj = queryset.get(pk=int(lookup)) + except queryset.model.DoesNotExist: + pass + if obj is None: + obj = get_object_or_404(queryset, slug=lookup) + + self.check_object_permissions(self.request, obj) + return obj + + +class JsonOnlyViewSet(PkOrSlugLookupMixin, LoggingMixin, viewsets.ModelViewSet): """ API endpoint that allows tabulated JSONs to be viewed or edited. """ queryset = JsonConfig.objects.all().order_by('-uploadedAt') serializer_class = JsonOnlySerializer @@ -526,7 +560,7 @@ def perform_create(self, serializer): serializer.save(owner=self.request.user) -class VerboseViewSet(LoggingMixin, viewsets.ModelViewSet): +class VerboseViewSet(PkOrSlugLookupMixin, LoggingMixin, viewsets.ModelViewSet): """ API endpoint that expects all arguments to be supplied. """ queryset = JsonConfig.objects.all().order_by('-uploadedAt') serializer_class = VerboseSerializer @@ -536,7 +570,7 @@ def perform_create(self, serializer): serializer.save(owner=self.request.user) -class BallotpediaViewSet(LoggingMixin, viewsets.ModelViewSet): +class BallotpediaViewSet(PkOrSlugLookupMixin, LoggingMixin, viewsets.ModelViewSet): """ API endpoint with all ballotpedia fields """ queryset = JsonConfig.objects.all().order_by('-uploadedAt') serializer_class = BallotpediaSerializer