diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 2cbe2ac..68b6788 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -1,9 +1,9 @@ name: Continuous Integration on: workflow_dispatch: {} - schedule: + # schedule: # Weekly every Saturday at midnight - - cron: '0 0 * * 6' + # - cron: '0 0 * * 6' jobs: test-lint-scan: diff --git a/local/tests/test_utils.py b/local/tests/test_utils.py index 509a3d5..5fff4ea 100644 --- a/local/tests/test_utils.py +++ b/local/tests/test_utils.py @@ -13,42 +13,42 @@ class TestBase36Encode: # Tests that the function returns "0" when given the number 0 def test_zero_returns_0(self: "TestBase36Encode") -> None: - assert utils.base36_encode(0) == "0" + assert utils.custom_base_encode(0) == "0" # Tests that the function raises a ValueError when given a negative number def test_negative_number_raises_value_error(self: "TestBase36Encode") -> None: with pytest.raises(ValueError, match="Cannot encode negative integers."): - utils.base36_encode(-1) + utils.custom_base_encode(-1) # Tests that the function can handle the maximum value for an integer def test_max_int(self: "TestBase36Encode") -> None: - assert utils.base36_encode(2147483647) == "zik0zj" + assert utils.custom_base_encode(2147483647) == "zik0zj" # Tests that the function returns a string consisting only of lowercase letters and digits def test_returns_lowercase_letters_and_digits(self: "TestBase36Encode") -> None: - encoded_string = utils.base36_encode(123456789) + encoded_string = utils.custom_base_encode(123456789) assert all(c in string.digits + string.ascii_lowercase for c in encoded_string) # Tests that the function can handle a large prime number def test_large_prime_number(self: "TestBase36Encode") -> None: - assert utils.base36_encode(982451653) == "g8xcjp" + assert utils.custom_base_encode(982451653) == "g8xcjp" # Tests that the function can handle a large composite number def test_large_composite_number(self: "TestBase36Encode") -> None: - assert utils.base36_encode(999999999) == "gjdgxr" + assert utils.custom_base_encode(999999999) == "gjdgxr" # Tests that the function can handle a large power of 2 def test_large_power_of_2(self: "TestBase36Encode") -> None: - assert utils.base36_encode(2**50) == "b33j9ynrb4" + assert utils.custom_base_encode(2**50) == "b33j9ynrb4" # Tests that the function can handle a large power of 10 def test_large_power_of_10(self: "TestBase36Encode") -> None: - assert utils.base36_encode(10**50) == "1ku3a4pjfxx2nd2gl07gtqboljenwn75s" + assert utils.custom_base_encode(10**50) == "1ku3a4pjfxx2nd2gl07gtqboljenwn75s" # Tests the performance of the function for very large numbers def test_performance_for_very_large_numbers(self: "TestBase36Encode") -> None: assert ( - utils.base36_encode(2**10000) + utils.custom_base_encode(2**10000) == "2kqaqr9n8eopgtn6k95g23riodx51p4o3jwyma480okqkygdk2cn232qvv2svfuvbzb5dy9yeoqceom839h5k1yzf6izbx3rnrjx4pfili0r67ebjqnjhwqevgboilk8yf8ueh7pnd28hk2xttyvgmiqcew98grghfhqz4xe93yiifh69uh4kxt2ld4ba87izsm9u323ekhjh37k5tsyn9of1gds6lzq526i1r3f70gd74z9ni2b2ej456p7frfijzu9hpdkw1vpsuwds7zpbr7uwcu2qbb0o9djiehjcltlpptcqg19sxajz8vyffjeuajmdew0q9j9h7ovpgmyat92n3rx0fgfsu4kluaoi146z5v1t00q073fe6f6ijdue1g06lvqx9ijmezax2tooljo5c362eil7nkkz91d0n5ghfe1rhofsyujw4209klyhjzcu10ycc0oc19pq7sqo1ugs10ym59phjo752siiuj56z2yns5dtodwff92we0a9sgho98t2jgqtgiwilmcmksi5aighqi8h5pxsvyc6owrans06be91u2gtwi43s3i46rjkvpn85xzco0dgwfzzp8hto1axij5w3j69snadxvc0ed3971r936qzc0cyou95081lvplzcrid76kf8wbm68tj3zv4j9fj4dnl19etn1koc3hx8eixx85uda4rv92j3dl0ib8ixgeywnjmivv3kmich1balqsp1hd4b88r7aephwoc9uphu9u2eorlvjmquvqziuu7w1usbf3lmdx289lkdeyecnymd88xlb7thrlmxvzjlmnxfjzmqcnaie4sfz81mlqq6n48b4vdkh7gimhfy9rddxdy3fi2faochb1cnikvkwdz02qa5v9p3l7cnzhxqwhrqdhrcawfl2lbzvs26uzf2fyj2u5i7gv750aitt8drg8md9i551u8hpaw28r1p3qi29soq8d2iakhw2ezzeux71zlex537kgs18lgk56nw1gbrk00oqowzb5t117bvgp6221nr5slvxz3ozr2gqdzume9xuqpeeza98qd009p50qkmghyb3tqsyhmo72h9ptv98w6dh1vwrja2oa5lfi2ei234f2haziqh3jp0v8vv40tur6cydgv8dohm20yurg6gymgjgm3pb8q2hf57hxioeiyild50rersbfumcyc4ij5nhxavlg5s976mkphr0wurzuri1uk44vmrw6w3mftfozkxvo14aii0xct1t6czu9w9njatnx9igw51aqy3c7rjz7vugf0yzlo6t61a72ddp7lrd9ymgyca271cr17f16fojq5g688h6rgxg1hhb5zzedw23723s93mai3f603gd4utc7levfzpdhqt7l7t4b70efvq029zofoqrzgno8bpyg5vgz12jdun0of0ua47zzpa7wk0psnrq0yal1isbf6hbnlvfundz2hbrl4mfxnxxgobkv2jyaag9872k45x8uwau1mlzx8xhtc43829wyauwejfqifs4em5ipgih0ypn8bfjyq6b3blevcmrostiqldnqa7znph1zfm7xdjmhghmx57qit1ojxjkjliendm98redq1xtai822suagwdzhq1y8kf523m0nslhvjomttuydoqqe6pr5rf76aqe3pwx6pmcrub3gmvg6scojyjj4o429qjpzjsehoqe8y6rivp7i904dricmv1l75cfomy5x92cd34m7r8k886i7o58krwj5257b9wfq7dcj4mwq76vctcezae4v8jtz29fsjl2lbjqhxfjb8itp6x89ems9fga26i7t7zl5njmbqtp2jt9ommmhiz3ty7izh9gk5dxr26n9bz6j3swbu980hfd9v0vbdrn72ra3eiawckwkvhmdgfpi12cjamr0jf22jf268sg" # noqa: E501 ) diff --git a/src/cuid2/cli.py b/src/cuid2/cli.py index c38330d..09c1dfc 100644 --- a/src/cuid2/cli.py +++ b/src/cuid2/cli.py @@ -1,8 +1,45 @@ -from cuid2.generator import cuid_wrapper - -generate_cuid = cuid_wrapper() - +from argparse import ArgumentParser +from cuid2.generator import cuid_wrapper, MAXIMUM_LENGTH def main() -> None: """Print out a CUID generated string. Used by the CLI console script.""" - print(generate_cuid()) # noqa: T201 (print statement) + + # parse cli arguments + arguments_parser = ArgumentParser() + arguments_parser.add_argument( + nargs="?", + dest="length", + help="Length of generated cuid2", + type=int, + default=24) + arguments_parser.add_argument( + "-p", "--pretty", + help="Pretty print generated cuid2 in blocks", + action="store_true", + default=False) + arguments_parser.add_argument( + "-pl", "--pretty_length", + metavar="BLOCK-LENGTH", + help="Set the custom length of pretty blocks", + type=int, + default=-5) + arguments_parser.add_argument( + "--alphabet", + metavar="ABCDE..", + help="Set the custom alphabet to generate cuid2", + type=str, + default=None) + cli_arguments = arguments_parser.parse_args() + + # generate cuid of arbitrary length + cuid2: str = "" + while len(cuid2) < cli_arguments.length: + cuid2 += cuid_wrapper(min(MAXIMUM_LENGTH, cli_arguments.length - len(cuid2)), alphabet=cli_arguments.alphabet)() + + # print + if cli_arguments.pretty or cli_arguments.pretty_length > 0: + cuid2_list: list[str] = [cuid2[i:i + abs(cli_arguments.pretty_length)] for i in + range(0, len(cuid2), abs(cli_arguments.pretty_length))] + print("-".join(cuid2_list)) + else: + print(cuid2) # noqa: T201 (print statement) diff --git a/src/cuid2/generator.py b/src/cuid2/generator.py index 1b8a442..4810580 100644 --- a/src/cuid2/generator.py +++ b/src/cuid2/generator.py @@ -28,6 +28,7 @@ def __init__( counter: Callable[[int], Callable[[], int]] = utils.create_counter, length: int = DEFAULT_LENGTH, fingerprint: FingerprintCallable = utils.create_fingerprint, + alphabet: Optional[str] = None ) -> None: """Initialization function for the Cuid class that generates a universally unique, base36 encoded string. @@ -46,6 +47,9 @@ def __init__( A length value greater than `MAXIMUM_LENGTH` (98 characters) will raise a ValueError. fingerprint : "FingerprintCallable", default=utils.create_fingerprint The "fingerprint" parameter is a callable function that generates a unique identifier. + alphabet : str, default=None + Alphabet to use in string encoding process. + If alphabet is not proivided default lowecase+digits will be used. Raises ------ @@ -60,6 +64,7 @@ def __init__( self._counter: Callable[[], int] = counter(floor(self._random.random() * INITIAL_COUNT_MAX)) self._length: int = length self._fingerprint: str = fingerprint(random_generator=self._random) + self._alphabet = alphabet def generate(self: Cuid, length: Optional[int] = None) -> str: """Generates a universally unique, base36 encoded string with a specified length. @@ -88,28 +93,38 @@ def generate(self: Cuid, length: Optional[int] = None) -> str: msg = "Length must never exceed 98 characters." raise ValueError(msg) - first_letter: str = utils.create_letter(random_generator=self._random) + first_letter: str = utils.create_letter(random_generator=self._random, alphabet=self._alphabet) - base36_time: str = utils.base36_encode(time.time_ns()) - base36_count: str = utils.base36_encode(self._counter()) + base36_time: str = utils.custom_base_encode(time.time_ns(), self._alphabet) + base36_count: str = utils.custom_base_encode(self._counter(), self._alphabet) salt: str = utils.create_entropy(length=length, random_generator=self._random) hash_input: str = base36_time + salt + base36_count + self._fingerprint - return first_letter + utils.create_hash(hash_input)[1 : length or self._length] + return first_letter + utils.create_hash(hash_input, alphabet=self._alphabet)[1 : length or self._length] -def cuid_wrapper() -> Callable[[], str]: +def cuid_wrapper(length: Optional[int] = None, alphabet: Optional[str] = None) -> Callable[[], str]: """Wrap a single Cuid class instance and return a callable that generates a CUID string. + Parameters + ---------- + length : int, optional + The length parameter is an optional integer value that specifies the length of the generated string. + If it is not provided, the default length value provided during class initialization is used. + A length value greater than `MAXIMUM_LENGTH` (98 characters) will raise a ValueError. + alphabet : str, default=None + Alphabet to use in string encoding process. + If alphabet is not proivided default lowecase+digits will be used. + Returns ------- Callable[[], str] A callable that generates a CUID string. """ - cuid_generator: Cuid = Cuid() + cuid_generator: Cuid = Cuid(alphabet=alphabet) def cuid() -> str: - return cuid_generator.generate() + return cuid_generator.generate(length) return cuid diff --git a/src/cuid2/utils.py b/src/cuid2/utils.py index 765fd62..a99d655 100644 --- a/src/cuid2/utils.py +++ b/src/cuid2/utils.py @@ -78,7 +78,7 @@ def create_fingerprint(random_generator: Random, fingerprint_data: Optional[str] return create_hash(fingerprint)[0:BIG_LENGTH] -def create_entropy(random_generator: Random, length: int = 4) -> str: +def create_entropy(random_generator: Random, length: int = 4, alphabet: Optional[str] = None) -> str: """Creates a random string of specified length using a base36 encoding. Parameters @@ -88,6 +88,9 @@ def create_entropy(random_generator: Random, length: int = 4) -> str: length : int, default=4 The length parameter is an integer that specifies the length of the entropy string to be generated. The default value is 4, but it can be set to any positive integer value. + alphabet : str, default=None + Alphabet to use in string encoding process. + If alphabet is not proivided default lowecase+digits will be used. Returns ------- @@ -107,12 +110,12 @@ def create_entropy(random_generator: Random, length: int = 4) -> str: entropy: str = "" while len(entropy) < length: - entropy += base36_encode(floor(random_generator.random() * 36)) + entropy += custom_base_encode(floor(random_generator.random() * 36), alphabet=alphabet) return entropy -def create_hash(data: str = "") -> str: +def create_hash(data: str = "", alphabet: Optional[str] = None) -> str: """Creates a hash value for a given string using the SHA-512 algorithm (prefers SHA3) and returns it in base36 encoding format after dropping the first character. @@ -121,6 +124,9 @@ def create_hash(data: str = "") -> str: data : str, default="" Data to be hashed. It is an optional parameter with a default value of an empty string. If no value is provided for `data`, an empty string will be hashed. + alphabet : str, default=None + Alphabet to use in string encoding process. + If alphabet is not proivided default lowecase+digits will be used. Returns ------- @@ -132,10 +138,10 @@ def create_hash(data: str = "") -> str: hashed_int: int = int.from_bytes(hashed_value.digest(), byteorder="big") # Drop the first character because it will bias the histogram to the left. - return base36_encode(hashed_int)[1:] + return custom_base_encode(hashed_int, alphabet=alphabet)[1:] -def create_letter(random_generator: Random) -> str: +def create_letter(random_generator: Random, alphabet: Optional[str] = None) -> str: """Generates a random lowercase letter using a given random number generator. Parameters @@ -148,17 +154,55 @@ def create_letter(random_generator: Random) -> str: str a randomly generated lowercase letter from the English alphabet. """ - alphabet: str = string.ascii_lowercase + if not alphabet: + alphabet = string.ascii_lowercase return alphabet[floor(random_generator.random() * len(alphabet))] -def base36_encode(number: int) -> str: - """Encodes a positive integer into a base36 string. +# def base36_encode(number: int) -> str: +# """Encodes a positive integer into a base36 string. + +# Parameters +# ---------- +# number : int +# Integer to be encoded as a base36 string. + +# Returns +# ------- +# str +# A string that represents the base36 encoded input integer. +# If the input integer is negative, a ValueError is raised. +# If the input integer is 0, the function returns the string "0". + +# Raises +# ------ +# ValueError +# If the input integer is negative. +# """ +# if number < 0: +# msg = "Cannot encode negative integers." +# raise ValueError(msg) + +# encoded_string: str = "" +# alphabet: str = string.digits + string.ascii_lowercase +# alphabet_length: int = len(alphabet) + +# while number != 0: +# number, mod = divmod(number, alphabet_length) +# encoded_string = alphabet[mod] + encoded_string + +# return encoded_string or "0" + +def custom_base_encode(number: int, alphabet: Optional[str] = None) -> str: + """Encodes a positive integer into a string with the provided alphabet. Parameters ---------- number : int - Integer to be encoded as a base36 string. + Integer to be encoded as a alphabet-encoded string. + alphabet : str + Alphabet to use in encoding process. + If alphabet is not proivided default lowecase+digits will be used. Returns ------- @@ -177,11 +221,12 @@ def base36_encode(number: int) -> str: raise ValueError(msg) encoded_string: str = "" - alphabet: str = string.digits + string.ascii_lowercase + if alphabet is None: + alphabet = string.digits + string.ascii_lowercase alphabet_length: int = len(alphabet) while number != 0: number, mod = divmod(number, alphabet_length) encoded_string = alphabet[mod] + encoded_string - return encoded_string or "0" + return encoded_string or alphabet[0]