diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result new file mode 100644 index 0000000000000..92813c1a83a5f --- /dev/null +++ b/mysql-test/main/func_xxh.result @@ -0,0 +1,54 @@ +SELECT XXH32('abc') = '32d153ff' AS xxh32_expected; +xxh32_expected +1 +SELECT XXH32(11223344) = '1852349e' AS xxh32_expected; +xxh32_expected +1 +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +xxh32_eq +1 +SELECT XXH32(NULL) IS NULL AS xxh32_null; +xxh32_null +1 +SELECT XXH32('') = '02cc5d05' AS xxh32_empty; +xxh32_empty +1 +SELECT XXH3('abc') = '78af5f94892f3950' AS xxh3_expected; +xxh3_expected +1 +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_expected; +xxh3_expected +1 +SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; +xxh3_eq +0 +SELECT XXH3(NULL) IS NULL AS xxh3_null; +xxh3_null +1 +SELECT XXH3('') = '2d06800538d394c2' AS xxh3_empty; +xxh3_empty +1 +SELECT XXH3_128('abc') = '06b05ab6733a618578af5f94892f3950' AS xxh3_128_expected; +xxh3_128_expected +1 +SELECT XXH3_128(11223344) = '4a18cfb1c6fc9ebb0869a1f5a72ff851' AS xxh3_128_num; +xxh3_128_num +1 +SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; +xxh3_128_eq +1 +SELECT XXH3_128(NULL) IS NULL AS xxh3_128_null; +xxh3_128_null +1 +SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; +xxh3_128_empty +1 +SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; +xxh32_charset_eq +1 +SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; +xxh3_charset_eq +1 +SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; +xxh3_128_charset_eq +1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test new file mode 100644 index 0000000000000..4bd6d7cb8243f --- /dev/null +++ b/mysql-test/main/func_xxh.test @@ -0,0 +1,21 @@ +SELECT XXH32('abc') = '32d153ff' AS xxh32_expected; +SELECT XXH32(11223344) = '1852349e' AS xxh32_expected; +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +SELECT XXH32(NULL) IS NULL AS xxh32_null; +SELECT XXH32('') = '02cc5d05' AS xxh32_empty; + +SELECT XXH3('abc') = '78af5f94892f3950' AS xxh3_expected; +SELECT XXH3(11223344) = '8178f6e2d84ca479' AS xxh3_expected; +SELECT XXH3('abc') = XXH32('abc') AS xxh3_eq; +SELECT XXH3(NULL) IS NULL AS xxh3_null; +SELECT XXH3('') = '2d06800538d394c2' AS xxh3_empty; + +SELECT XXH3_128('abc') = '06b05ab6733a618578af5f94892f3950' AS xxh3_128_expected; +SELECT XXH3_128(11223344) = '4a18cfb1c6fc9ebb0869a1f5a72ff851' AS xxh3_128_num; +SELECT XXH3_128('abc') = XXH3_128('abc') AS xxh3_128_eq; +SELECT XXH3_128(NULL) IS NULL AS xxh3_128_null; +SELECT XXH3_128('') = '99aa06d3014798d86001c324468d497f' AS xxh3_128_empty; + +SELECT XXH32(_latin1'abc') = XXH32(_utf8mb4'abc') AS xxh32_charset_eq; +SELECT XXH3(_latin1'abc') = XXH3(_utf8mb4'abc') AS xxh3_charset_eq; +SELECT XXH3_128(_latin1'abc') = XXH3_128(_utf8mb4'abc') AS xxh3_128_charset_eq; diff --git a/sql/item_create.cc b/sql/item_create.cc index f707607e1e84a..7c27edfeab966 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -633,6 +633,38 @@ class Create_func_crc32c : public Create_native_func virtual ~Create_func_crc32c() = default; }; +class Create_func_xxh32 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh32 s_singleton; +protected: + Create_func_xxh32() = default; + ~Create_func_xxh32() override = default; +}; + +class Create_func_xxh3 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh3 s_singleton; +protected: + Create_func_xxh3() = default; + ~Create_func_xxh3() override = default; +}; + +class Create_func_xxh3_128 : public Create_native_func +{ +public: + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; + static Create_func_xxh3_128 s_singleton; +protected: + Create_func_xxh3_128() = default; + ~Create_func_xxh3_128() override = default; +}; class Create_func_datediff : public Create_func_arg2 { @@ -3682,6 +3714,56 @@ Create_func_crc32c::create_native(THD *thd, const LEX_CSTRING *name, : new (thd->mem_root) Item_func_crc32(thd, true, arg1); } +Create_func_xxh32 Create_func_xxh32::s_singleton; + +Item *Create_func_xxh32::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh32(thd, arg1); +} + +Create_func_xxh3 Create_func_xxh3::s_singleton; + +Item *Create_func_xxh3::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh3(thd, arg1); +} + +Create_func_xxh3_128 Create_func_xxh3_128::s_singleton; + +Item *Create_func_xxh3_128::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + if (unlikely(argc != 1)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(); + DBUG_ASSERT(!arg1->is_explicit_name()); + return new (thd->mem_root) Item_func_xxh3_128(thd, arg1); +} Create_func_datediff Create_func_datediff::s_singleton; @@ -6338,6 +6420,9 @@ const Native_func_registry func_array[] = { { STRING_WITH_LEN("COT") }, BUILDER(Create_func_cot)}, { { STRING_WITH_LEN("CRC32") }, BUILDER(Create_func_crc32)}, { { STRING_WITH_LEN("CRC32C") }, BUILDER(Create_func_crc32c)}, + { { STRING_WITH_LEN("XXH32") }, BUILDER(Create_func_xxh32) }, +{ { STRING_WITH_LEN("XXH3") }, BUILDER(Create_func_xxh3) }, + { { STRING_WITH_LEN("XXH3_128") }, BUILDER(Create_func_xxh3_128) }, { { STRING_WITH_LEN("DATABASE") }, BUILDER(Create_func_database)}, { { STRING_WITH_LEN("DATEDIFF") }, BUILDER(Create_func_datediff)}, { { STRING_WITH_LEN("DATE_FORMAT") }, BUILDER(Create_func_date_format)}, diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index d4328284b6bde..23594a87480fb 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -43,6 +43,7 @@ // my_make_scrambled_password_323 #include #include +#include "../mysys/xxhash.h" C_MODE_START #include "../mysys/my_static.h" // For soundex_map C_MODE_END @@ -4570,6 +4571,111 @@ longlong Item_func_crc32::val_int() (ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())}); } +namespace +{ +constexpr const CHARSET_INFO *XxhCharset= &my_charset_utf8mb4_bin; + +void BytesToHexLower(const unsigned char *digest, size_t length, String *to) +{ + static const char hex[]= "0123456789abcdef"; + char buffer[32 * 2]; + + DBUG_ASSERT(length * 2 <= sizeof(buffer)); + + for (size_t i= 0; i < length; ++i) + { + buffer[i * 2]= hex[digest[i] >> 4]; + buffer[i * 2 + 1]= hex[digest[i] & 0x0f]; + } + + to->copy(buffer, length * 2, &my_charset_latin1); +} + +String *GetStableXxhInput(Item *arg, String *value, String *converted_value, + bool *null_value) +{ + String *input= arg->val_str(value); + if (!input) + { + *null_value= true; + return nullptr; + } + + *null_value= false; + + if (input->charset() == XxhCharset) + return input; + + uint errors= 0; + converted_value->length(0); + if (converted_value->copy(input->ptr(), input->length(), + input->charset(), XxhCharset, &errors)) + { + *null_value= true; + return nullptr; + } + + return converted_value; +} +} + +String *Item_func_xxh32::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= + GetStableXxhInput(args[0], &value, &converted_value, &null_value); + if (!input) + return nullptr; + + const XXH32_hash_t hash= + XXH32(reinterpret_cast(input->ptr()), input->length(), 0); + + XXH32_canonical_t canonical; + XXH32_canonicalFromHash(&canonical, hash); + BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + return to; +} + +String *Item_func_xxh3::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= + GetStableXxhInput(args[0], &value, &converted_value, &null_value); + if (!input) + return nullptr; + + const XXH64_hash_t hash= + XXH3_64bits(reinterpret_cast(input->ptr()), input->length()); + + XXH64_canonical_t canonical; + XXH64_canonicalFromHash(&canonical, hash); + BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + return to; +} + +String *Item_func_xxh3_128::val_str_ascii(String *to) +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + String *input= + GetStableXxhInput(args[0], &value, &converted_value, &null_value); + if (!input) + return nullptr; + + const XXH128_hash_t hash= + XXH3_128bits(reinterpret_cast(input->ptr()), input->length()); + + XXH128_canonical_t canonical; + XXH128_canonicalFromHash(&canonical, hash); + BytesToHexLower(canonical.digest, sizeof(canonical.digest), to); + return to; +} + #ifdef HAVE_COMPRESS #include "zlib.h" diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 4383f86178971..4027ef2f154b7 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -2317,6 +2317,90 @@ class Item_func_crc32 :public Item_long_func { return get_item_copy(thd, this); } }; +class Item_func_xxh32 : public Item_str_ascii_checksum_func +{ + String value; + String converted_value; +public: + Item_func_xxh32(THD *thd, Item *arg) + : Item_str_ascii_checksum_func(thd, arg) {} + + String *val_str_ascii(String *to) override; + + bool fix_length_and_dec(THD *) override + { + fix_length_and_charset(8, default_charset()); + return false; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= { STRING_WITH_LEN("XXH32") }; + return name; + } + + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + +class Item_func_xxh3 : public Item_str_ascii_checksum_func +{ + String value; + String converted_value; +public: + Item_func_xxh3(THD *thd, Item *arg) + : Item_str_ascii_checksum_func(thd, arg) {} + + String *val_str_ascii(String *to) override; + + bool fix_length_and_dec(THD *) override + { + fix_length_and_charset(16, default_charset()); + return false; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= { STRING_WITH_LEN("XXH3") }; + return name; + } + + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + +class Item_func_xxh3_128 : public Item_str_ascii_checksum_func +{ + String value; + String converted_value; +public: + Item_func_xxh3_128(THD *thd, Item *arg) + : Item_str_ascii_checksum_func(thd, arg) {} + + String *val_str_ascii(String *to) override; + + bool fix_length_and_dec(THD *) override + { + fix_length_and_charset(32, default_charset()); + return false; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= { STRING_WITH_LEN("XXH3_128") }; + return name; + } + + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + class Item_func_uncompressed_length : public Item_long_func_length { String value;