From f588ff4362d3dd5d7abb2b4a4bb072804493fd24 Mon Sep 17 00:00:00 2001 From: Daniel Gavrilov Date: Wed, 12 Mar 2025 09:48:42 +0100 Subject: [PATCH 1/2] test: random string distribution bias --- src/random.test.ts | 128 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/src/random.test.ts b/src/random.test.ts index b76e94c..6b91712 100644 --- a/src/random.test.ts +++ b/src/random.test.ts @@ -2,6 +2,55 @@ import { describe, expect, it, vi } from "vitest"; import { createRandomStringGenerator } from "./random"; import { getRandomValues } from "uncrypto"; +// Utility functions for distribution tests +function generateLargeRandomSample( + alphabet: "a-z" | "0-9", + sampleCount = 1000, + stringLength = 256, +): string { + const generator = createRandomStringGenerator(alphabet); + return new Array(sampleCount) + .fill(null) + .map(() => generator(stringLength)) + .join(""); +} + +function getCharCounts( + randomString: string, + expectedCharSet: string, +): Map { + const charCounts = new Map(); + + // Initialize all character counts to 0 + for (const char of expectedCharSet) { + charCounts.set(char, 0); + } + + // Count occurrences + for (const char of randomString) { + const currentCount = charCounts.get(char) || 0; + charCounts.set(char, currentCount + 1); + } + + return charCounts; +} + +function calculateChiSquared( + charCounts: Map, + totalChars: number, + charSetLength: number, +): number { + const expectedCount = totalChars / charSetLength; + let chiSquared = 0; + + for (const count of charCounts.values()) { + const deviation = count - expectedCount; + chiSquared += (deviation * deviation) / expectedCount; + } + + return chiSquared; +} + describe("createRandomStringGenerator", () => { it("generates a random string of specified length", () => { const generator = createRandomStringGenerator("a-z"); @@ -92,4 +141,83 @@ describe("createRandomStringGenerator", () => { vi.restoreAllMocks(); } }); + + describe("produces unbiased distribution across characters", () => { + it("with a 26-character alphabet", () => { + // Choose a small alphabet to make bias easier to detect + const alphabet = "a-z"; + const expectedCharSet = "abcdefghijklmnopqrstuvwxyz"; + const charSetLength = expectedCharSet.length; + + // Generate a very large sample to ensure statistical significance + const randomString = generateLargeRandomSample(alphabet); + + // Count occurrences of each character + const charCounts = getCharCounts(randomString, expectedCharSet); + + // Calculate chi-squared statistic for uniformity + const chiSquared = calculateChiSquared( + charCounts, + randomString.length, + charSetLength, + ); + + // For a 26-character alphabet (25 degrees of freedom) at 99.9% confidence, + // the critical chi-squared value is approximately 52.62 + // If our value exceeds this, the distribution is likely not uniform + // + // However, truly random values will occasionally produce high chi-squared values + // by chance. To avoid random test failures, we use a much higher threshold + // that would indicate a systematic bias rather than random variation. + + // Critical value multiplied by 3 to reduce false positives + const criticalValue = 52.62 * 3; + + expect(chiSquared).toBeLessThan(criticalValue); + }); + + it("with a 10-character alphabet", () => { + // Also test the distribution with a different, non-power-of-2 alphabet + // which is more likely to expose modulo bias + const alphabet = "0-9"; // 10 characters, not a power of 2 + const expectedCharSet = "0123456789"; + const charSetLength = expectedCharSet.length; + + // Generate a very large sample to ensure statistical significance + const randomString = generateLargeRandomSample(alphabet); + + // Count occurrences of each character + const charCounts = getCharCounts(randomString, expectedCharSet); + + // Calculate chi-squared statistic for uniformity + const chiSquared = calculateChiSquared( + charCounts, + randomString.length, + charSetLength, + ); + + // For a 10-character alphabet (9 degrees of freedom) at 99.9% confidence, + // the critical chi-squared value is approximately 27.877 + // Again, we multiply by 3 to avoid false positives + const criticalValue = 27.877 * 3; + + expect(chiSquared).toBeLessThan(criticalValue); + + // Check min/max frequency difference as another bias indicator + // In a truly uniform distribution, the difference should be relatively small + const counts = Array.from(charCounts.values()); + const minCount = Math.min(...counts); + const maxCount = Math.max(...counts); + + // Calculate expected count per character in a perfect distribution + const expectedCount = randomString.length / charSetLength; + + // Maximum allowed deviation as a percentage of expected count + // The 0.1 (10%) is chosen to be high enough to avoid random failures + // but low enough to catch serious bias + const maxAllowedDeviation = expectedCount * 0.1; + + expect(maxCount - minCount).toBeLessThan(maxAllowedDeviation); + }); + }); }); From d30aab3e49a7b002233dd2742f2732438cf07c95 Mon Sep 17 00:00:00 2001 From: Len Boyette Date: Sun, 20 Apr 2025 12:49:49 -0700 Subject: [PATCH 2/2] fix: random string distribution bias --- src/random.test.ts | 2 +- src/random.ts | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/random.test.ts b/src/random.test.ts index 6b91712..5f3f722 100644 --- a/src/random.test.ts +++ b/src/random.test.ts @@ -138,7 +138,7 @@ describe("createRandomStringGenerator", () => { expect(randomString).toHaveLength(256); } finally { // Restore the original implementation - vi.restoreAllMocks(); + vi.unmock("uncrypto"); } }); diff --git a/src/random.ts b/src/random.ts index 558c111..0902a37 100644 --- a/src/random.ts +++ b/src/random.ts @@ -42,13 +42,26 @@ export function createRandomStringGenerator( charSetLength = charSet.length; } - const charArray = new Uint8Array(length); - getRandomValues(charArray); + const maxValid = Math.floor(256 / charSetLength) * charSetLength; + const buf = new Uint8Array(length * 2); + const bufLength = buf.length; let result = ""; - for (let i = 0; i < length; i++) { - const index = charArray[i] % charSetLength; - result += charSet[index]; + let bufIndex = bufLength; + let rand: number; + + while (result.length < length) { + if (bufIndex >= bufLength) { + getRandomValues(buf); + bufIndex = 0; + } + + rand = buf[bufIndex++]; + + // avoid modulo bias + if (rand < maxValid) { + result += charSet[rand % charSetLength]; + } } return result;