Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: Continuous Integration
on:
workflow_dispatch: {}
schedule:
# schedule:
# Weekly every Saturday at midnight
- cron: '0 0 * * 6'
# - cron: '0 0 * * 6'

jobs:
test-lint-scan:
Expand Down
18 changes: 9 additions & 9 deletions local/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,42 @@
class TestBase36Encode:
# Tests that the function returns "0" when given the number 0
def test_zero_returns_0(self: "TestBase36Encode") -> None:
assert utils.base36_encode(0) == "0"
assert utils.custom_base_encode(0) == "0"

# Tests that the function raises a ValueError when given a negative number
def test_negative_number_raises_value_error(self: "TestBase36Encode") -> None:
with pytest.raises(ValueError, match="Cannot encode negative integers."):
utils.base36_encode(-1)
utils.custom_base_encode(-1)

# Tests that the function can handle the maximum value for an integer
def test_max_int(self: "TestBase36Encode") -> None:
assert utils.base36_encode(2147483647) == "zik0zj"
assert utils.custom_base_encode(2147483647) == "zik0zj"

# Tests that the function returns a string consisting only of lowercase letters and digits
def test_returns_lowercase_letters_and_digits(self: "TestBase36Encode") -> None:
encoded_string = utils.base36_encode(123456789)
encoded_string = utils.custom_base_encode(123456789)
assert all(c in string.digits + string.ascii_lowercase for c in encoded_string)

# Tests that the function can handle a large prime number
def test_large_prime_number(self: "TestBase36Encode") -> None:
assert utils.base36_encode(982451653) == "g8xcjp"
assert utils.custom_base_encode(982451653) == "g8xcjp"

# Tests that the function can handle a large composite number
def test_large_composite_number(self: "TestBase36Encode") -> None:
assert utils.base36_encode(999999999) == "gjdgxr"
assert utils.custom_base_encode(999999999) == "gjdgxr"

# Tests that the function can handle a large power of 2
def test_large_power_of_2(self: "TestBase36Encode") -> None:
assert utils.base36_encode(2**50) == "b33j9ynrb4"
assert utils.custom_base_encode(2**50) == "b33j9ynrb4"

# Tests that the function can handle a large power of 10
def test_large_power_of_10(self: "TestBase36Encode") -> None:
assert utils.base36_encode(10**50) == "1ku3a4pjfxx2nd2gl07gtqboljenwn75s"
assert utils.custom_base_encode(10**50) == "1ku3a4pjfxx2nd2gl07gtqboljenwn75s"

# Tests the performance of the function for very large numbers
def test_performance_for_very_large_numbers(self: "TestBase36Encode") -> None:
assert (
utils.base36_encode(2**10000)
utils.custom_base_encode(2**10000)
== "2kqaqr9n8eopgtn6k95g23riodx51p4o3jwyma480okqkygdk2cn232qvv2svfuvbzb5dy9yeoqceom839h5k1yzf6izbx3rnrjx4pfili0r67ebjqnjhwqevgboilk8yf8ueh7pnd28hk2xttyvgmiqcew98grghfhqz4xe93yiifh69uh4kxt2ld4ba87izsm9u323ekhjh37k5tsyn9of1gds6lzq526i1r3f70gd74z9ni2b2ej456p7frfijzu9hpdkw1vpsuwds7zpbr7uwcu2qbb0o9djiehjcltlpptcqg19sxajz8vyffjeuajmdew0q9j9h7ovpgmyat92n3rx0fgfsu4kluaoi146z5v1t00q073fe6f6ijdue1g06lvqx9ijmezax2tooljo5c362eil7nkkz91d0n5ghfe1rhofsyujw4209klyhjzcu10ycc0oc19pq7sqo1ugs10ym59phjo752siiuj56z2yns5dtodwff92we0a9sgho98t2jgqtgiwilmcmksi5aighqi8h5pxsvyc6owrans06be91u2gtwi43s3i46rjkvpn85xzco0dgwfzzp8hto1axij5w3j69snadxvc0ed3971r936qzc0cyou95081lvplzcrid76kf8wbm68tj3zv4j9fj4dnl19etn1koc3hx8eixx85uda4rv92j3dl0ib8ixgeywnjmivv3kmich1balqsp1hd4b88r7aephwoc9uphu9u2eorlvjmquvqziuu7w1usbf3lmdx289lkdeyecnymd88xlb7thrlmxvzjlmnxfjzmqcnaie4sfz81mlqq6n48b4vdkh7gimhfy9rddxdy3fi2faochb1cnikvkwdz02qa5v9p3l7cnzhxqwhrqdhrcawfl2lbzvs26uzf2fyj2u5i7gv750aitt8drg8md9i551u8hpaw28r1p3qi29soq8d2iakhw2ezzeux71zlex537kgs18lgk56nw1gbrk00oqowzb5t117bvgp6221nr5slvxz3ozr2gqdzume9xuqpeeza98qd009p50qkmghyb3tqsyhmo72h9ptv98w6dh1vwrja2oa5lfi2ei234f2haziqh3jp0v8vv40tur6cydgv8dohm20yurg6gymgjgm3pb8q2hf57hxioeiyild50rersbfumcyc4ij5nhxavlg5s976mkphr0wurzuri1uk44vmrw6w3mftfozkxvo14aii0xct1t6czu9w9njatnx9igw51aqy3c7rjz7vugf0yzlo6t61a72ddp7lrd9ymgyca271cr17f16fojq5g688h6rgxg1hhb5zzedw23723s93mai3f603gd4utc7levfzpdhqt7l7t4b70efvq029zofoqrzgno8bpyg5vgz12jdun0of0ua47zzpa7wk0psnrq0yal1isbf6hbnlvfundz2hbrl4mfxnxxgobkv2jyaag9872k45x8uwau1mlzx8xhtc43829wyauwejfqifs4em5ipgih0ypn8bfjyq6b3blevcmrostiqldnqa7znph1zfm7xdjmhghmx57qit1ojxjkjliendm98redq1xtai822suagwdzhq1y8kf523m0nslhvjomttuydoqqe6pr5rf76aqe3pwx6pmcrub3gmvg6scojyjj4o429qjpzjsehoqe8y6rivp7i904dricmv1l75cfomy5x92cd34m7r8k886i7o58krwj5257b9wfq7dcj4mwq76vctcezae4v8jtz29fsjl2lbjqhxfjb8itp6x89ems9fga26i7t7zl5njmbqtp2jt9ommmhiz3ty7izh9gk5dxr26n9bz6j3swbu980hfd9v0vbdrn72ra3eiawckwkvhmdgfpi12cjamr0jf22jf268sg" # noqa: E501
)

Expand Down
47 changes: 42 additions & 5 deletions src/cuid2/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,45 @@
from cuid2.generator import cuid_wrapper

generate_cuid = cuid_wrapper()

from argparse import ArgumentParser
from cuid2.generator import cuid_wrapper, MAXIMUM_LENGTH

def main() -> None:
"""Print out a CUID generated string. Used by the CLI console script."""
print(generate_cuid()) # noqa: T201 (print statement)

# parse cli arguments
arguments_parser = ArgumentParser()
arguments_parser.add_argument(
nargs="?",
dest="length",
help="Length of generated cuid2",
type=int,
default=24)
arguments_parser.add_argument(
"-p", "--pretty",
help="Pretty print generated cuid2 in blocks",
action="store_true",
default=False)
arguments_parser.add_argument(
"-pl", "--pretty_length",
metavar="BLOCK-LENGTH",
help="Set the custom length of pretty blocks",
type=int,
default=-5)
arguments_parser.add_argument(
"--alphabet",
metavar="ABCDE..",
help="Set the custom alphabet to generate cuid2",
type=str,
default=None)
cli_arguments = arguments_parser.parse_args()

# generate cuid of arbitrary length
cuid2: str = ""
while len(cuid2) < cli_arguments.length:
cuid2 += cuid_wrapper(min(MAXIMUM_LENGTH, cli_arguments.length - len(cuid2)), alphabet=cli_arguments.alphabet)()

# print
if cli_arguments.pretty or cli_arguments.pretty_length > 0:
cuid2_list: list[str] = [cuid2[i:i + abs(cli_arguments.pretty_length)] for i in
range(0, len(cuid2), abs(cli_arguments.pretty_length))]
print("-".join(cuid2_list))
else:
print(cuid2) # noqa: T201 (print statement)
29 changes: 22 additions & 7 deletions src/cuid2/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(
counter: Callable[[int], Callable[[], int]] = utils.create_counter,
length: int = DEFAULT_LENGTH,
fingerprint: FingerprintCallable = utils.create_fingerprint,
alphabet: Optional[str] = None
) -> None:
"""Initialization function for the Cuid class that generates a universally unique,
base36 encoded string.
Expand All @@ -46,6 +47,9 @@ def __init__(
A length value greater than `MAXIMUM_LENGTH` (98 characters) will raise a ValueError.
fingerprint : "FingerprintCallable", default=utils.create_fingerprint
The "fingerprint" parameter is a callable function that generates a unique identifier.
alphabet : str, default=None
Alphabet to use in string encoding process.
If alphabet is not proivided default lowecase+digits will be used.

Raises
------
Expand All @@ -60,6 +64,7 @@ def __init__(
self._counter: Callable[[], int] = counter(floor(self._random.random() * INITIAL_COUNT_MAX))
self._length: int = length
self._fingerprint: str = fingerprint(random_generator=self._random)
self._alphabet = alphabet

def generate(self: Cuid, length: Optional[int] = None) -> str:
"""Generates a universally unique, base36 encoded string with a specified length.
Expand Down Expand Up @@ -88,28 +93,38 @@ def generate(self: Cuid, length: Optional[int] = None) -> str:
msg = "Length must never exceed 98 characters."
raise ValueError(msg)

first_letter: str = utils.create_letter(random_generator=self._random)
first_letter: str = utils.create_letter(random_generator=self._random, alphabet=self._alphabet)

base36_time: str = utils.base36_encode(time.time_ns())
base36_count: str = utils.base36_encode(self._counter())
base36_time: str = utils.custom_base_encode(time.time_ns(), self._alphabet)
base36_count: str = utils.custom_base_encode(self._counter(), self._alphabet)

salt: str = utils.create_entropy(length=length, random_generator=self._random)
hash_input: str = base36_time + salt + base36_count + self._fingerprint

return first_letter + utils.create_hash(hash_input)[1 : length or self._length]
return first_letter + utils.create_hash(hash_input, alphabet=self._alphabet)[1 : length or self._length]


def cuid_wrapper() -> Callable[[], str]:
def cuid_wrapper(length: Optional[int] = None, alphabet: Optional[str] = None) -> Callable[[], str]:
"""Wrap a single Cuid class instance and return a callable that generates a CUID string.

Parameters
----------
length : int, optional
The length parameter is an optional integer value that specifies the length of the generated string.
If it is not provided, the default length value provided during class initialization is used.
A length value greater than `MAXIMUM_LENGTH` (98 characters) will raise a ValueError.
alphabet : str, default=None
Alphabet to use in string encoding process.
If alphabet is not proivided default lowecase+digits will be used.

Returns
-------
Callable[[], str]
A callable that generates a CUID string.
"""
cuid_generator: Cuid = Cuid()
cuid_generator: Cuid = Cuid(alphabet=alphabet)

def cuid() -> str:
return cuid_generator.generate()
return cuid_generator.generate(length)

return cuid
67 changes: 56 additions & 11 deletions src/cuid2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def create_fingerprint(random_generator: Random, fingerprint_data: Optional[str]
return create_hash(fingerprint)[0:BIG_LENGTH]


def create_entropy(random_generator: Random, length: int = 4) -> str:
def create_entropy(random_generator: Random, length: int = 4, alphabet: Optional[str] = None) -> str:
"""Creates a random string of specified length using a base36 encoding.

Parameters
Expand All @@ -88,6 +88,9 @@ def create_entropy(random_generator: Random, length: int = 4) -> str:
length : int, default=4
The length parameter is an integer that specifies the length of the entropy string to be generated.
The default value is 4, but it can be set to any positive integer value.
alphabet : str, default=None
Alphabet to use in string encoding process.
If alphabet is not proivided default lowecase+digits will be used.

Returns
-------
Expand All @@ -107,12 +110,12 @@ def create_entropy(random_generator: Random, length: int = 4) -> str:
entropy: str = ""

while len(entropy) < length:
entropy += base36_encode(floor(random_generator.random() * 36))
entropy += custom_base_encode(floor(random_generator.random() * 36), alphabet=alphabet)

return entropy


def create_hash(data: str = "") -> str:
def create_hash(data: str = "", alphabet: Optional[str] = None) -> str:
"""Creates a hash value for a given string using the SHA-512 algorithm (prefers SHA3) and returns
it in base36 encoding format after dropping the first character.

Expand All @@ -121,6 +124,9 @@ def create_hash(data: str = "") -> str:
data : str, default=""
Data to be hashed. It is an optional parameter with a default value of an empty string.
If no value is provided for `data`, an empty string will be hashed.
alphabet : str, default=None
Alphabet to use in string encoding process.
If alphabet is not proivided default lowecase+digits will be used.

Returns
-------
Expand All @@ -132,10 +138,10 @@ def create_hash(data: str = "") -> str:
hashed_int: int = int.from_bytes(hashed_value.digest(), byteorder="big")

# Drop the first character because it will bias the histogram to the left.
return base36_encode(hashed_int)[1:]
return custom_base_encode(hashed_int, alphabet=alphabet)[1:]


def create_letter(random_generator: Random) -> str:
def create_letter(random_generator: Random, alphabet: Optional[str] = None) -> str:
"""Generates a random lowercase letter using a given random number generator.

Parameters
Expand All @@ -148,17 +154,55 @@ def create_letter(random_generator: Random) -> str:
str
a randomly generated lowercase letter from the English alphabet.
"""
alphabet: str = string.ascii_lowercase
if not alphabet:
alphabet = string.ascii_lowercase
return alphabet[floor(random_generator.random() * len(alphabet))]


def base36_encode(number: int) -> str:
"""Encodes a positive integer into a base36 string.
# def base36_encode(number: int) -> str:
# """Encodes a positive integer into a base36 string.

# Parameters
# ----------
# number : int
# Integer to be encoded as a base36 string.

# Returns
# -------
# str
# A string that represents the base36 encoded input integer.
# If the input integer is negative, a ValueError is raised.
# If the input integer is 0, the function returns the string "0".

# Raises
# ------
# ValueError
# If the input integer is negative.
# """
# if number < 0:
# msg = "Cannot encode negative integers."
# raise ValueError(msg)

# encoded_string: str = ""
# alphabet: str = string.digits + string.ascii_lowercase
# alphabet_length: int = len(alphabet)

# while number != 0:
# number, mod = divmod(number, alphabet_length)
# encoded_string = alphabet[mod] + encoded_string

# return encoded_string or "0"

def custom_base_encode(number: int, alphabet: Optional[str] = None) -> str:
"""Encodes a positive integer into a string with the provided alphabet.

Parameters
----------
number : int
Integer to be encoded as a base36 string.
Integer to be encoded as a alphabet-encoded string.
alphabet : str
Alphabet to use in encoding process.
If alphabet is not proivided default lowecase+digits will be used.

Returns
-------
Expand All @@ -177,11 +221,12 @@ def base36_encode(number: int) -> str:
raise ValueError(msg)

encoded_string: str = ""
alphabet: str = string.digits + string.ascii_lowercase
if alphabet is None:
alphabet = string.digits + string.ascii_lowercase
alphabet_length: int = len(alphabet)

while number != 0:
number, mod = divmod(number, alphabet_length)
encoded_string = alphabet[mod] + encoded_string

return encoded_string or "0"
return encoded_string or alphabet[0]