@@ -868,6 +868,117 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite {
868868 |""" .stripMargin
869869 checkSparkAnswerAndOperator(query)
870870 }
871+
872+ test(" instr function - basic functionality" ) {
873+ withTable(" t1" ) {
874+ sql("""
875+ CREATE TABLE t1(str STRING, substr STRING) USING parquet
876+ """ )
877+ sql("""
878+ INSERT INTO t1 VALUES
879+ ('hello world', 'world'),
880+ ('hello world', 'hello'),
881+ ('hello world', 'o'),
882+ ('hello world', 'z'),
883+ (null, 'test'),
884+ ('test', null)
885+ """ )
886+
887+ // Test basic instr functionality
888+ checkSparkAnswerAndOperator(" SELECT instr(str, substr) FROM t1" )
889+ }
890+ }
891+
892+ test(" instr function - empty substring" ) {
893+ withTable(" t1" ) {
894+ sql(" CREATE TABLE t1(str STRING) USING parquet" )
895+ sql(" INSERT INTO t1 VALUES ('hello'), ('world'), ('')" )
896+
897+ // Empty substring should return 0
898+ checkSparkAnswerAndOperator(" SELECT instr(str, '') FROM t1" )
899+ }
900+ }
901+
902+ test(" instr function - UTF-8 multi-byte characters" ) {
903+ withTable(" t1" ) {
904+ sql(" CREATE TABLE t1(str STRING, substr STRING) USING parquet" )
905+ sql("""
906+ INSERT INTO t1 VALUES
907+ ('你好世界', '世界'),
908+ ('hello世界', '世界'),
909+ ('test', '世界'),
910+ ('hello😀world', '😀'),
911+ ('test😀', '😀')
912+ """ )
913+
914+ // Test UTF-8 character position (not byte position)
915+ checkSparkAnswerAndOperator(" SELECT instr(str, substr) FROM t1" )
916+ }
917+ }
918+
919+ test(" instr function - with expressions" ) {
920+ withTable(" t1" ) {
921+ sql(" CREATE TABLE t1(str STRING, substr STRING) USING parquet" )
922+ sql(" INSERT INTO t1 VALUES ('banana', 'a'), ('testtesttest', 'test'), ('abcabcabc', 'abc')" )
923+
924+ // Test with array column as substring (element-wise)
925+ checkSparkAnswerAndOperator(" SELECT instr(str, substr) FROM t1" )
926+ }
927+ }
928+
929+ test(" instr function - case sensitivity" ) {
930+ withTable(" t1" ) {
931+ sql(" CREATE TABLE t1(str STRING, substr STRING) USING parquet" )
932+ sql("""
933+ INSERT INTO t1 VALUES
934+ ('Hello', 'hello'),
935+ ('HELLO', 'hello'),
936+ ('Hello', 'Hello'),
937+ ('hElLo', 'hello')
938+ """ )
939+
940+ // Instr is case-sensitive
941+ checkSparkAnswerAndOperator(" SELECT instr(str, substr) FROM t1" )
942+ }
943+ }
944+
945+ test(" instr function - in filter clause" ) {
946+ withTable(" t1" ) {
947+ sql(" CREATE TABLE t1(str STRING, substr STRING) USING parquet" )
948+ sql("""
949+ INSERT INTO t1 VALUES
950+ ('hello world', 'world'),
951+ ('hello', 'world'),
952+ ('testing', 'test'),
953+ ('abc', 'def')
954+ """ )
955+
956+ // Test instr in WHERE clause
957+ checkSparkAnswerAndOperator("""
958+ SELECT str FROM t1 WHERE instr(str, substr) > 0
959+ """ )
960+ }
961+ }
962+
963+ test(" instr function - with grouping" ) {
964+ withTable(" t1" ) {
965+ sql(" CREATE TABLE t1(str STRING, substr STRING) USING parquet" )
966+ sql("""
967+ INSERT INTO t1 VALUES
968+ ('test1', 'test'),
969+ ('test2', 'test'),
970+ ('hello', 'world'),
971+ ('testing', 'test')
972+ """ )
973+
974+ // Test instr in GROUP BY
975+ checkSparkAnswerAndOperator("""
976+ SELECT substr, COUNT(*) as cnt
977+ FROM t1
978+ WHERE instr(str, substr) > 0
979+ GROUP BY substr
980+ ORDER BY substr
981+ """ )
871982 }
872983 }
873984}
0 commit comments