Skip to content

Commit f41e66b

Browse files
committed
MDEV-38975: HEAP engine BLOB/TEXT/JSON/GEOMETRY column support
Allow BLOB/TEXT/JSON/GEOMETRY columns in MEMORY (HEAP) engine tables by storing blob data in variable-length continuation record chains within the existing `HP_BLOCK` structure. **Continuation runs**: blob data is split across contiguous sequences of `recbuffer`-sized records. Each run stores a 10-byte header (`next_cont` pointer + `run_rec_count`) in the first record; inner records (rec 1..N-1) have no flags byte — full `recbuffer` payload. Runs are linked via `next_cont` pointers. Individual runs are capped at 65,535 records (`uint16` format limit); larger blobs are automatically split into multiple runs. **Zero-copy reads**: single-run blobs return pointers directly into `HP_BLOCK` records, avoiding `blob_buff` reassembly entirely: - Case A (`run_rec_count == 1`): return `chain + HP_CONT_HEADER_SIZE` - Case B (`HP_ROW_CONT_ZEROCOPY` flag): return `chain + recbuffer` - Case C (multi-run): walk chain, reassemble into `blob_buff` `HP_INFO::has_zerocopy_blobs` tracks zero-copy state; used by `heap_update()` to refresh the caller's record buffer after freeing old chains, preventing dangling pointers. **Free list scavenging**: on insert, the free list is walked read-only (peek) tracking contiguous groups in descending address order (LIFO). Qualifying groups (>= `min_run_records`) are unlinked and used. The first non-qualifying group terminates the scan — remaining data is allocated from the block tail. The free list is never disturbed when no qualifying group is found. **Record counting**: new `HP_SHARE::total_records` tracks all physical records (primary + continuation). `HP_SHARE::records` remains logical (primary-only) to preserve linear hash bucket mapping correctness. **Scan/check batch-skip**: `heap_scan()` and `heap_check_heap()` read `run_rec_count` from rec 0 and skip entire continuation runs at once. **Hash functions**: `hp_rec_hashnr()`, `hp_rec_key_cmp()`, `hp_key_cmp()`, `hp_make_key()` updated to handle `HA_BLOB_PART` key segments — reading actual blob data via pointer dereference or chain materialization. **SQL layer**: `choose_engine()` no longer rejects HEAP for blob tables (replaced `blob_fields` check with `reclength > HA_MAX_REC_LENGTH`). `remove_duplicates()` routes HEAP+blob to `remove_dup_with_compare()`. `ha_heap::remember_rnd_pos()` / `restart_rnd_next()` implemented for DISTINCT deduplication support. Fixed undefined behavior in `test_if_cheaper_ordering()` where `select_limit/fanout` could overflow to infinity — capped at `HA_POS_ERROR`. https://jira.mariadb.org/browse/MDEV-38975
1 parent 14f96a2 commit f41e66b

84 files changed

Lines changed: 3317 additions & 298 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

include/heap.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,21 +131,27 @@ typedef struct st_hp_keydef /* Key definition with open */
131131
uint (*get_key_length)(struct st_hp_keydef *keydef, const uchar *key);
132132
} HP_KEYDEF;
133133

134+
typedef struct st_hp_blob_desc
135+
{
136+
uint offset; /* Byte offset of blob descriptor within record buffer */
137+
uint packlength; /* 1, 2, 3, or 4: length prefix size */
138+
} HP_BLOB_DESC;
139+
134140
typedef struct st_heap_share
135141
{
136142
HP_BLOCK block;
137143
HP_KEYDEF *keydef;
138144
ulonglong data_length,index_length,max_table_size;
139145
ulonglong auto_increment;
140146
ulong min_records,max_records; /* Params to open */
141-
ulong records; /* records */
147+
ulong records; /* Logical (primary) record count */
142148
ulong blength; /* records rounded up to 2^n */
143149
ulong deleted; /* Deleted records in database */
144150
uint key_stat_version; /* version to indicate insert/delete */
145151
uint key_version; /* Updated on key change */
146152
uint file_version; /* Update on clear */
147153
uint reclength; /* Length of one record */
148-
uint visible; /* Offset to the visible/deleted mark */
154+
uint visible; /* Offset to the flags byte (active/deleted/continuation) */
149155
uint changed;
150156
uint keys,max_key_length;
151157
uint currently_disabled_keys; /* saved value from "keys" when disabled */
@@ -156,6 +162,9 @@ typedef struct st_heap_share
156162
THR_LOCK lock;
157163
my_bool delete_on_close;
158164
my_bool internal; /* Internal temporary table */
165+
HP_BLOB_DESC *blob_descs; /* Array of blob column descriptors */
166+
uint blob_count; /* Number of blob columns */
167+
ulong total_records; /* All active records (primary + blob continuation) */
159168
LIST open_list;
160169
uint auto_key;
161170
uint auto_key_type; /* real type of the auto key segment */
@@ -181,6 +190,9 @@ typedef struct st_heap_info
181190
uint file_version; /* Version at scan */
182191
uint lastkey_len;
183192
my_bool implicit_emptied;
193+
uchar *blob_buff; /* Reassembly buffer for blob reads */
194+
uint32 blob_buff_len; /* Current allocated size of blob_buff */
195+
my_bool has_zerocopy_blobs; /* Last hp_read_blobs produced zero-copy ptrs */
184196
THR_LOCK_DATA lock;
185197
LIST open_list;
186198
} HP_INFO;
@@ -204,6 +216,8 @@ typedef struct st_heap_create_info
204216
open_count to 1. Is only looked at if not internal_table.
205217
*/
206218
my_bool pin_share;
219+
HP_BLOB_DESC *blob_descs;
220+
uint blob_count;
207221
} HP_CREATE_INFO;
208222

209223
/* Prototypes for heap-functions */

mysql-test/include/mtr_check.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ BEGIN
6666
collation_name, column_type, column_key, extra, column_comment
6767
FROM INFORMATION_SCHEMA.COLUMNS
6868
WHERE table_schema='mysql'
69-
ORDER BY columns_in_mysql;
69+
ORDER BY columns_in_mysql, ordinal_position;
7070

7171
-- Dump all events, there should be none
7272
SELECT * FROM INFORMATION_SCHEMA.EVENTS;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
set optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
2+
set @blob_len = 16;
3+
set @prefix_len = 6;
4+
set @suffix_len = @blob_len - @prefix_len;
5+
create table t1 (a1 blob(16), a2 blob(16));
6+
create table t2 (b1 blob(16), b2 blob(16));
7+
insert into t1 values
8+
(concat('1 - 00', repeat('x', @suffix_len)), concat('2 - 00', repeat('x', @suffix_len)));
9+
insert into t1 values
10+
(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len)));
11+
insert into t1 values
12+
(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len)));
13+
insert into t2 values
14+
(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len)));
15+
insert into t2 values
16+
(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len)));
17+
insert into t2 values
18+
(concat('1 - 03', repeat('x', @suffix_len)), concat('2 - 03', repeat('x', @suffix_len)));
19+
explain extended select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0');
20+
id select_type table type possible_keys key key_len ref rows filtered Extra
21+
1 PRIMARY t1 ALL NULL NULL NULL NULL 3 100.00 Using where
22+
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 3 100.00 Using where
23+
Warnings:
24+
Note 1003 /* select#1 */ select left(`test`.`t1`.`a1`,7) AS `left(a1,7)`,left(`test`.`t1`.`a2`,7) AS `left(a2,7)` from `test`.`t1` where <expr_cache><`test`.`t1`.`a1`>(<in_optimizer>(`test`.`t1`.`a1`,<exists>(/* select#2 */ select `test`.`t2`.`b1` from `test`.`t2` where `test`.`t2`.`b1` > '0' and <cache>(`test`.`t1`.`a1`) = `test`.`t2`.`b1`)))
25+
select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0');
26+
left(a1,7) left(a2,7)
27+
1 - 01x 2 - 01x
28+
1 - 02x 2 - 02x
29+
drop table t1, t2;

mysql-test/main/blob_sj_test.test

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
set optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
2+
set @blob_len = 16;
3+
set @prefix_len = 6;
4+
set @suffix_len = @blob_len - @prefix_len;
5+
6+
create table t1 (a1 blob(16), a2 blob(16));
7+
create table t2 (b1 blob(16), b2 blob(16));
8+
9+
insert into t1 values
10+
(concat('1 - 00', repeat('x', @suffix_len)), concat('2 - 00', repeat('x', @suffix_len)));
11+
insert into t1 values
12+
(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len)));
13+
insert into t1 values
14+
(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len)));
15+
16+
insert into t2 values
17+
(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len)));
18+
insert into t2 values
19+
(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len)));
20+
insert into t2 values
21+
(concat('1 - 03', repeat('x', @suffix_len)), concat('2 - 03', repeat('x', @suffix_len)));
22+
23+
explain extended select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0');
24+
select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0');
25+
26+
drop table t1, t2;

mysql-test/main/create.result

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,7 @@ Note 1051 Unknown table 'test.t1,test.t2'
3030
create table t1 (b char(0) not null, index(b));
3131
ERROR 42000: The storage engine MyISAM can't index column `b`
3232
create table t1 (a int not null,b text) engine=heap;
33-
ERROR 42000: Storage engine MEMORY doesn't support BLOB/TEXT columns
34-
drop table if exists t1;
35-
Warnings:
36-
Note 1051 Unknown table 'test.t1'
33+
drop table t1;
3734
create table t1 (ordid int(8) not null auto_increment, ord varchar(50) not null, primary key (ord,ordid)) engine=heap;
3835
ERROR 42000: Incorrect table definition; there can be only one auto column and it must be defined as a key
3936
create table not_existing_database.test (a int);
@@ -1089,7 +1086,7 @@ t1 CREATE TABLE `t1` (
10891086
`QUERY_ID` bigint(4) NOT NULL,
10901087
`INFO_BINARY` blob,
10911088
`TID` bigint(4) NOT NULL
1092-
) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci
1089+
) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci
10931090
drop table t1;
10941091
create temporary table t1 like information_schema.processlist;
10951092
show create table t1;
@@ -1113,7 +1110,7 @@ t1 CREATE TEMPORARY TABLE `t1` (
11131110
`QUERY_ID` bigint(4) NOT NULL,
11141111
`INFO_BINARY` blob,
11151112
`TID` bigint(4) NOT NULL
1116-
) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci
1113+
) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci
11171114
drop table t1;
11181115
create table t1 like information_schema.character_sets;
11191116
show create table t1;

mysql-test/main/create.test

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ create table t2 select auto+1 from t1;
3030
drop table if exists t1,t2;
3131
--error ER_WRONG_KEY_COLUMN
3232
create table t1 (b char(0) not null, index(b));
33-
--error ER_TABLE_CANT_HANDLE_BLOB
3433
create table t1 (a int not null,b text) engine=heap;
35-
drop table if exists t1;
34+
drop table t1;
3635

3736
--error ER_WRONG_AUTO_KEY
3837
create table t1 (ordid int(8) not null auto_increment, ord varchar(50) not null, primary key (ord,ordid)) engine=heap;

mysql-test/main/cte_recursive.test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3212,6 +3212,8 @@ show create table t2;
32123212
--eval insert ignore into t2 $query;
32133213
drop table t2;
32143214
set @@sql_mode="";
3215+
# Rows with identical (level, mid) due to overflow have non-deterministic order
3216+
--sorted_result
32153217
--eval $query
32163218
--eval create table t2 as $query;
32173219
show create table t2;

mysql-test/main/derived_view.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2372,7 +2372,7 @@ GROUP BY TABLE_SCHEMA) AS UNIQUES
23722372
ON ( COLUMNS.TABLE_SCHEMA = UNIQUES.TABLE_SCHEMA);
23732373
id select_type table type possible_keys key key_len ref rows Extra
23742374
1 PRIMARY COLUMNS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases
2375-
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
2375+
1 PRIMARY <derived2> ref key0 key0 194 information_schema.COLUMNS.TABLE_SCHEMA 2
23762376
2 DERIVED STATISTICS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases; Using filesort
23772377
SELECT COUNT(*) > 0
23782378
FROM INFORMATION_SCHEMA.COLUMNS

mysql-test/main/distinct.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1189,7 +1189,7 @@ insert into t1 values (1, 'Aa123456', 'abc'), (2, 'Bb7897777', 'def'),
11891189
(3, 'Cc01287', 'xyz'), (5, 'd12345', 'efg');
11901190
select distinct if(sum(a), b, 0) from t1 group by value(c) with rollup;
11911191
if(sum(a), b, 0)
1192-
Aa123456
1192+
SOME_B_VALUE
11931193
drop table t1;
11941194
#
11951195
# end of 10.5 tests

mysql-test/main/distinct.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,9 @@ create table t1 (a int, b longtext, c varchar(18));
915915
insert into t1 values (1, 'Aa123456', 'abc'), (2, 'Bb7897777', 'def'),
916916
(3, 'Cc01287', 'xyz'), (5, 'd12345', 'efg');
917917

918+
# ROLLUP row's b value is indeterminate (depends on last group processed),
919+
# which varies by temp table engine (HEAP vs Aria). Mask the value.
920+
--replace_regex /(Aa123456|Bb7897777|Cc01287|d12345)/SOME_B_VALUE/
918921
select distinct if(sum(a), b, 0) from t1 group by value(c) with rollup;
919922
drop table t1;
920923

0 commit comments

Comments
 (0)