From a112e1aae6adaa7894b8921400f8bca0331d6098 Mon Sep 17 00:00:00 2001 From: Attila Turoczy Date: Sat, 13 Jun 2026 21:59:08 +0200 Subject: [PATCH 1/2] HIVE-29664: Add syntax highlight support for beeline --- .../java/org/apache/hive/beeline/BeeLine.java | 2 +- .../org/apache/hive/beeline/BeeLineOpts.java | 9 + .../hive/beeline/HiveSqlHighlighter.java | 317 ++++++++++++++++++ beeline/src/main/resources/BeeLine.properties | 1 + .../hive/beeline/TestBeelineArgParsing.java | 26 ++ .../hive/beeline/TestHiveSqlHighlighter.java | 173 ++++++++++ 6 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java create mode 100644 beeline/src/test/org/apache/hive/beeline/TestHiveSqlHighlighter.java diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLine.java b/beeline/src/java/org/apache/hive/beeline/BeeLine.java index bef953409657..50faf4f832bd 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLine.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLine.java @@ -1477,9 +1477,9 @@ public LineReader initializeLineReader(InputStream inputStream) throws IOExcepti } builder.completer(new BeeLineCompleter(this)); + builder.highlighter(new HiveSqlHighlighter(() -> getOpts().getSyntaxHighlighting())); lineReader = builder.build(); lineReader.unsetOpt(LineReader.Option.HISTORY_TIMESTAMPED); - // need to disable expansion, otherwise commands (starting with "!") will activate history items lineReader.setOpt(LineReader.Option.DISABLE_EVENT_EXPANSION); if (this.history != null) { diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java index fa97ebe13afd..867a2c512e94 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java @@ -72,6 +72,7 @@ public class BeeLineOpts implements Completer { private boolean silent = false; private Boolean report = null; private boolean color = false; + private boolean syntaxHighlighting = true; private boolean showHeader = true; private boolean escapeCRLF = false; private boolean showDbInPrompt = false; @@ -490,6 +491,14 @@ public boolean getColor() { return color; } + public void setSyntaxHighlighting(boolean syntaxHighlighting) { + this.syntaxHighlighting = syntaxHighlighting; + } + + public boolean getSyntaxHighlighting() { + return syntaxHighlighting; + } + public void setShowHeader(boolean showHeader) { this.showHeader = showHeader; } diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java b/beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java new file mode 100644 index 000000000000..62747ae28382 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java @@ -0,0 +1,317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import java.util.function.BooleanSupplier; +import java.util.regex.Pattern; + +import org.jline.reader.Highlighter; +import org.jline.reader.LineReader; +import org.jline.utils.AttributedString; +import org.jline.utils.AttributedStringBuilder; +import org.jline.utils.AttributedStyle; + +public class HiveSqlHighlighter implements Highlighter { + + static final AttributedStyle KEYWORD_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.CYAN).bold(); + static final AttributedStyle TYPE_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.BLUE).bold(); + static final AttributedStyle CONSTANT_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.MAGENTA).bold(); + static final AttributedStyle FUNCTION_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.YELLOW); + static final AttributedStyle STRING_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.GREEN); + static final AttributedStyle NUMBER_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.MAGENTA); + static final AttributedStyle COMMENT_STYLE = + AttributedStyle.DEFAULT.faint(); + static final AttributedStyle TABLE_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.YELLOW); + static final AttributedStyle COLUMN_STYLE = + AttributedStyle.DEFAULT.foreground(AttributedStyle.WHITE); + static final AttributedStyle DEFAULT_STYLE = AttributedStyle.DEFAULT; + + // Keywords after which an identifier is (most likely) a table/relation name. + // Used by the positional table-vs-column heuristic. + static final Set TABLE_CONTEXT = immutableUpper( + "FROM", "JOIN", "INTO", "UPDATE", "TABLE", "DESCRIBE", "TRUNCATE"); + + // ---- Hive data types (matched before the generic keyword set) ------------ + static final Set TYPES = immutableUpper( + "TINYINT", "SMALLINT", "INT", "INTEGER", "BIGINT", "FLOAT", "DOUBLE", + "DECIMAL", "NUMERIC", "DEC", "REAL", "PRECISION", "BOOLEAN", "STRING", + "CHAR", "VARCHAR", "BINARY", "DATE", "DATETIME", "TIMESTAMP", + "TIMESTAMPLOCALTZ", "INTERVAL", "ARRAY", "MAP", "STRUCT", "UNIONTYPE", + "VARIANT", "LONG"); + + static final Set CONSTANTS = immutableUpper( + "TRUE", "FALSE", "NULL", "UNKNOWN"); + + private static final String KEYWORD_LIST = + "ABORT,ACTIVATE,ACTIVE,ADD,ADMIN,AFTER,ALL,ALLOC_FRACTION,ALTER,ANALYZE," + + "AND,ANY,APPLICATION,ARCHIVE,AS,ASC,AST,AT,AUTHORIZATION,AUTOCOMMIT," + + "BEFORE,BETWEEN,BOTH,BRANCH,BUCKET,BUCKETS,BY,CACHE,CASCADE,CASE,CAST," + + "CATALOG,CATALOGS,CBO,CHANGE,CHECK,CLUSTER,CLUSTERED,CLUSTERSTATUS," + + "COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONID," + + "COMPACTIONS,COMPUTE,CONCATENATE,CONF,CONNECTOR,CONNECTORS,CONSTRAINT," + + "CONTINUE,CONVERT,COST,CREATE,CRON,CROSS,CUBE,CURRENT,CURRENT_DATE," + + "CURRENT_TIMESTAMP,CURSOR,DATA,DATABASE,DATABASES,DAY,DAYOFWEEK,DAYS," + + "DBPROPERTIES,DCPROPERTIES,DDL,DEBUG,DEFAULT,DEFERRED,DEFINED,DELETE," + + "DELIMITED,DEPENDENCY,DESC,DESCRIBE,DETAIL,DIRECTORIES,DIRECTORY," + + "DISABLE,DISABLED,DISTINCT,DISTRIBUTE,DISTRIBUTED,DO,DROP,DUMP,ELSE," + + "ENABLE,ENABLED,END,ENFORCED,ESCAPED,EVERY,EXCEPT,EXCHANGE,EXCLUSIVE," + + "EXECUTE,EXECUTED,EXISTS,EXPIRE_SNAPSHOTS,EXPLAIN,EXPORT,EXPRESSION," + + "EXTENDED,EXTERNAL,EXTRACT,FETCH,FIELDS,FILE,FILEFORMAT,FIRST,FLOOR," + + "FOLLOWING,FOR,FORCE,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION," + + "FUNCTIONS,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDXPROPERTIES,IF," + + "IGNORE,IMPORT,IN,INDEX,INDEXES,INNER,INPATH,INPUTDRIVER,INPUTFORMAT," + + "INSERT,INTERSECT,INTO,IS,ISOLATION,ITEMS,JAR,JOIN,JOINCOST,KEY,KEYS," + + "KILL,LAST,LATERAL,LEADING,LEFT,LESS,LEVEL,LIKE,LIMIT,LINES,LOAD,LOCAL," + + "LOCALLY,LOCATION,LOCK,LOCKS,LOGICAL,MACRO,MANAGED,MANAGEDLOCATION," + + "MANAGEMENT,MAPJOIN,MAPPING,MATCHED,MATERIALIZED,MERGE,METADATA,MINUS," + + "MINUTE,MINUTES,MONTH,MONTHS,MORE,MOVE,MSCK,NONE,NORELY,NOSCAN,NOT," + + "NOVALIDATE,NULLS,OF,OFFSET,OLDER,ON,ONLY,OPERATOR,OPTIMIZE,OPTION,OR," + + "ORDER,ORDERED,OUT,OUTER,OUTPUTDRIVER,OUTPUTFORMAT,OVER,OVERWRITE,OWNER," + + "PARTITION,PARTITIONED,PARTITIONS,PATH,PERCENT,PKFK_JOIN,PLAN,PLANS," + + "PLUS,POOL,PRECEDING,PREPARE,PRESERVE,PRIMARY,PRINCIPALS,PROCEDURE," + + "PROPERTIES,PURGE,QUALIFY,QUARTER,QUERY,QUERY_PARALLELISM,RANGE,READ," + + "READS,REBUILD,RECORDREADER,RECORDWRITER,REDUCE,REFERENCES,REGEXP," + + "RELOAD,RELY,REMOTE,RENAME,REOPTIMIZATION,REPAIR,REPL,REPLACE," + + "REPLICATION,RESOURCE,RESPECT,RESTRICT,RETAIN,RETENTION,REVOKE,REWRITE," + + "RIGHT,RLIKE,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEDULED," + + "SCHEDULING_POLICY,SCHEMA,SCHEMAS,SECOND,SECONDS,SELECT,SEMI,SERDE," + + "SERDEPROPERTIES,SERVER,SET,SETS,SET_CURRENT_SNAPSHOT,SHARED,SHOW," + + "SHOW_DATABASE,SKEWED,SNAPSHOT,SNAPSHOTS,SOME,SORT,SORTED,SPEC,SSL," + + "START,STATISTICS,STATUS,STORED,STREAMTABLE,SUMMARY,SYNC,SYSTEM_TIME," + + "SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TAG,TBLPROPERTIES,TEMPORARY," + + "TERMINATED,THAN,THEN,TIME,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONAL," + + "TRANSACTIONS,TRANSFORM,TRIGGER,TRIM,TRUNCATE,TYPE,UNARCHIVE,UNBOUNDED," + + "UNDO,UNION,UNIQUE,UNIQUEJOIN,UNLOCK,UNMANAGED,UNSET,UNSIGNED,UPDATE," + + "URI,URL,USE,USER,USING,UTC,UTC_TMESTAMP,VALIDATE,VALUES,VECTORIZATION," + + "VIEW,VIEWS,WAIT,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WORK," + + "WORKLOAD,WRITE,YEAR,YEARS,ZONE,ZORDER"; + + static final Set KEYWORDS; + static { + Set kw = new HashSet<>(); + for (String k : KEYWORD_LIST.split(",")) { + String t = k.trim().toUpperCase(); + if (!t.isEmpty() && !TYPES.contains(t) && !CONSTANTS.contains(t)) { + kw.add(t); + } + } + KEYWORDS = Collections.unmodifiableSet(kw); + } + + private final BooleanSupplier enabled; + + public HiveSqlHighlighter(BooleanSupplier enabled) { + this.enabled = enabled; + } + + @Override + public AttributedString highlight(LineReader reader, String buffer) { + if (buffer == null) { + return new AttributedString(""); + } + if (enabled != null && !enabled.getAsBoolean()) { + return new AttributedString(buffer); + } + return highlight(buffer); + } + + AttributedString highlight(String buffer) { + AttributedStringBuilder sb = new AttributedStringBuilder(); + int n = buffer.length(); + int i = 0; + + String prevKw = ""; + while (i < n) { + char c = buffer.charAt(i); + + if (c == '-' && i + 1 < n && buffer.charAt(i + 1) == '-') { + int end = buffer.indexOf('\n', i); + if (end < 0) { + end = n; + } + sb.append(buffer.substring(i, end), COMMENT_STYLE); + i = end; + continue; + } + + if (c == '/' && i + 1 < n && buffer.charAt(i + 1) == '*') { + int end = buffer.indexOf("*/", i + 2); + end = (end < 0) ? n : end + 2; + sb.append(buffer.substring(i, end), COMMENT_STYLE); + i = end; + continue; + } + + if (c == '\'' || c == '"') { + int end = scanString(buffer, i, c); + sb.append(buffer.substring(i, end), STRING_STYLE); + i = end; + continue; + } + + if (c == '`') { + int end = i + 1; + while (end < n && buffer.charAt(end) != '`') { + end++; + } + end = Math.min(end + 1, n); + sb.append(buffer.substring(i, end), DEFAULT_STYLE); + i = end; + continue; + } + + if (isDigit(c) || (c == '.' && i + 1 < n && isDigit(buffer.charAt(i + 1)))) { + int end = scanNumber(buffer, i); + sb.append(buffer.substring(i, end), NUMBER_STYLE); + i = end; + continue; + } + + if (isIdentStart(c)) { + int end = i + 1; + while (end < n && isIdentPart(buffer.charAt(end))) { + end++; + } + String word = buffer.substring(i, end); + String upper = word.toUpperCase(); + sb.append(word, styleForWord(upper, buffer, end, prevKw)); + if (KEYWORDS.contains(upper)) { + prevKw = upper; + } + i = end; + continue; + } + + sb.append(c); + if (c == '(' || c == ';') { + prevKw = ""; + } + i++; + } + return sb.toAttributedString(); + } + + private AttributedStyle styleForWord(String upper, String buffer, int wordEnd, String prevKw) { + if (CONSTANTS.contains(upper)) { + return CONSTANT_STYLE; + } + if (TYPES.contains(upper)) { + return TYPE_STYLE; + } + if (KEYWORDS.contains(upper)) { + return KEYWORD_STYLE; + } + // Plain identifier: classify as table, function, or column. + // A table name follows FROM/JOIN/INTO/UPDATE/TABLE/... (takes precedence so + // that e.g. CREATE TABLE t (...) colors t as a table, not a function). + if (TABLE_CONTEXT.contains(prevKw)) { + return TABLE_STYLE; + } + int j = wordEnd; + while (j < buffer.length() && Character.isWhitespace(buffer.charAt(j))) { + j++; + } + if (j < buffer.length()) { + char next = buffer.charAt(j); + if (next == '(') { + return FUNCTION_STYLE; // identifier immediately before '(' is a call + } + if (next == '.') { + return TABLE_STYLE; // qualifier in alias.col / db.tbl + } + } + return COLUMN_STYLE; + } + + private static int scanString(String s, int start, char quote) { + int n = s.length(); + int i = start + 1; + while (i < n) { + char c = s.charAt(i); + if (c == '\\') { + i += 2; + continue; + } + if (c == quote) { + return i + 1; + } + i++; + } + return n; + } + + private static int scanNumber(String s, int start) { + int n = s.length(); + int i = start; + boolean seenExp = false; + while (i < n) { + char c = s.charAt(i); + if (isDigit(c) || c == '.') { + i++; + } else if ((c == 'e' || c == 'E') && !seenExp) { + seenExp = true; + i++; + if (i < n && (s.charAt(i) == '+' || s.charAt(i) == '-')) { + i++; + } + } else { + break; + } + } + return i; + } + + private static boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + private static boolean isIdentStart(char c) { + return Character.isLetter(c) || c == '_'; + } + + private static boolean isIdentPart(char c) { + return Character.isLetterOrDigit(c) || c == '_'; + } + + private static Set immutableUpper(String... words) { + Set s = new HashSet<>(); + for (String w : words) { + s.add(w.toUpperCase()); + } + return Collections.unmodifiableSet(s); + } + + // We do our own coloring; JLine's parser-error highlighting hooks are unused. + @Override + public void setErrorPattern(Pattern errorPattern) { + } + + @Override + public void setErrorIndex(int errorIndex) { + } +} diff --git a/beeline/src/main/resources/BeeLine.properties b/beeline/src/main/resources/BeeLine.properties index f58cf100f3ef..00634bb5c128 100644 --- a/beeline/src/main/resources/BeeLine.properties +++ b/beeline/src/main/resources/BeeLine.properties @@ -178,6 +178,7 @@ cmd-usage: Usage: java org.apache.hive.cli.beeline.BeeLine \n \ \ commands or queries.\n \ \ --property-file= the file to read connection properties (url, driver, user, password) from\n \ \ --color=[true/false] control whether color is used for display\n \ +\ --syntaxHighlighting=[true/false] highlight Hive SQL keywords on the interactive prompt (default true)\n \ \ --showHeader=[true/false] show column names in query results\n \ \ --escapeCRLF=[true/false] show carriage return and line feeds in query results as escaped \\r and \\n \n \ \ --headerInterval=ROWS; the interval between which heades are displayed\n \ diff --git a/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java b/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java index 9f1036247182..83c04bf35f4b 100644 --- a/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java +++ b/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java @@ -131,6 +131,32 @@ public void testSimpleArgs() throws Exception { Assert.assertTrue(bl.getOpts().getAuthType().equals("authType")); } + @Test + public void testSyntaxHighlightingDefaultsOn() throws Exception { + TestBeeline bl = new TestBeeline(); + String[] args = new String[] {"-u", "url", "-n", "name", "-p", "password", "-d", "driver"}; + Assert.assertEquals(0, bl.initArgs(args)); + Assert.assertTrue(bl.getOpts().getSyntaxHighlighting()); + } + + @Test + public void testSyntaxHighlightingDisabledByArg() throws Exception { + TestBeeline bl = new TestBeeline(); + String[] args = new String[] {"--syntaxHighlighting=false", "-u", "url", "-n", "name", + "-p", "password", "-d", "driver"}; + Assert.assertEquals(0, bl.initArgs(args)); + Assert.assertFalse(bl.getOpts().getSyntaxHighlighting()); + } + + @Test + public void testSyntaxHighlightingArgIsCaseInsensitive() throws Exception { + TestBeeline bl = new TestBeeline(); + String[] args = new String[] {"--syntaxhighlighting=false", "-u", "url", "-n", "name", + "-p", "password", "-d", "driver"}; + Assert.assertEquals(0, bl.initArgs(args)); + Assert.assertFalse(bl.getOpts().getSyntaxHighlighting()); + } + @Test public void testEmptyHiveConfVariable() throws Exception { ByteArrayOutputStream os = new ByteArrayOutputStream(); diff --git a/beeline/src/test/org/apache/hive/beeline/TestHiveSqlHighlighter.java b/beeline/src/test/org/apache/hive/beeline/TestHiveSqlHighlighter.java new file mode 100644 index 000000000000..31859332e4d5 --- /dev/null +++ b/beeline/src/test/org/apache/hive/beeline/TestHiveSqlHighlighter.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.jline.reader.LineReader; +import org.jline.utils.AttributedString; +import org.jline.utils.AttributedStyle; +import org.junit.Test; + +/** + * Unit tests for {@link HiveSqlHighlighter}. + */ +public class TestHiveSqlHighlighter { + + private final HiveSqlHighlighter on = new HiveSqlHighlighter(() -> true); + + /** Style at character offset {@code idx} when highlighting {@code sql}. */ + private AttributedStyle styleAt(String sql, int idx) { + return on.highlight(sql).styleAt(idx); + } + + @Test + public void testKeyword() { + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt("SELECT", 0)); + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt("INSERT OVERWRITE", 0)); + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt("INSERT OVERWRITE", 7)); + } + + @Test + public void testKeywordCaseInsensitive() { + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt("select", 0)); + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt("SeLeCt", 0)); + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt("where", 0)); + } + + @Test + public void testDataType() { + assertEquals(HiveSqlHighlighter.TYPE_STYLE, styleAt("INT", 0)); + assertEquals(HiveSqlHighlighter.TYPE_STYLE, styleAt("string", 0)); + assertEquals(HiveSqlHighlighter.TYPE_STYLE, styleAt("ARRAY", 0)); + // a type must not be reported as a generic keyword + assertTrue(HiveSqlHighlighter.TYPES.contains("INT")); + assertTrue(!HiveSqlHighlighter.KEYWORDS.contains("INT")); + } + + @Test + public void testConstant() { + assertEquals(HiveSqlHighlighter.CONSTANT_STYLE, styleAt("NULL", 0)); + assertEquals(HiveSqlHighlighter.CONSTANT_STYLE, styleAt("true", 0)); + assertEquals(HiveSqlHighlighter.CONSTANT_STYLE, styleAt("FALSE", 0)); + } + + @Test + public void testStringLiteral() { + assertEquals(HiveSqlHighlighter.STRING_STYLE, styleAt("'abc'", 0)); + assertEquals(HiveSqlHighlighter.STRING_STYLE, styleAt("'abc'", 2)); + assertEquals(HiveSqlHighlighter.STRING_STYLE, styleAt("\"abc\"", 0)); + // an escaped quote does not end the string + assertEquals(HiveSqlHighlighter.STRING_STYLE, styleAt("'a\\'b'", 4)); + } + + @Test + public void testUnterminatedStringColorsToEnd() { + String sql = "x = 'abc"; + assertEquals(HiveSqlHighlighter.STRING_STYLE, styleAt(sql, sql.length() - 1)); + } + + @Test + public void testNumberLiteral() { + assertEquals(HiveSqlHighlighter.NUMBER_STYLE, styleAt("42", 0)); + assertEquals(HiveSqlHighlighter.NUMBER_STYLE, styleAt("3.14", 0)); + assertEquals(HiveSqlHighlighter.NUMBER_STYLE, styleAt("1e9", 0)); + // a number embedded in an identifier is part of the (column) identifier + assertEquals(HiveSqlHighlighter.COLUMN_STYLE, styleAt("col1", 3)); + } + + @Test + public void testLineComment() { + assertEquals(HiveSqlHighlighter.COMMENT_STYLE, styleAt("-- a comment", 0)); + assertEquals(HiveSqlHighlighter.COMMENT_STYLE, styleAt("-- a comment", 5)); + // comment ends at newline; the next line is highlighted again + String sql = "-- c\nSELECT"; + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt(sql, 5)); + } + + @Test + public void testBlockComment() { + assertEquals(HiveSqlHighlighter.COMMENT_STYLE, styleAt("/* x */", 0)); + assertEquals(HiveSqlHighlighter.COMMENT_STYLE, styleAt("/* x */", 3)); + } + + @Test + public void testFunctionCall() { + // count is not a Hive grammar keyword; followed by '(' -> function + assertEquals(HiveSqlHighlighter.FUNCTION_STYLE, styleAt("count(*)", 0)); + assertEquals(HiveSqlHighlighter.FUNCTION_STYLE, styleAt("my_udf (x)", 0)); + // a bare identifier (no FROM context, no '(') is a column + assertEquals(HiveSqlHighlighter.COLUMN_STYLE, styleAt("mytable", 0)); + } + + @Test + public void testTableVsColumn() { + String sql = "SELECT a, b FROM sales"; + assertEquals(HiveSqlHighlighter.COLUMN_STYLE, styleAt(sql, sql.indexOf("a,"))); + assertEquals(HiveSqlHighlighter.COLUMN_STYLE, styleAt(sql, sql.indexOf("b "))); + assertEquals(HiveSqlHighlighter.TABLE_STYLE, styleAt(sql, sql.indexOf("sales"))); + + // both relations in a join are tables + String j = "FROM t1 JOIN t2"; + assertEquals(HiveSqlHighlighter.TABLE_STYLE, styleAt(j, j.indexOf("t1"))); + assertEquals(HiveSqlHighlighter.TABLE_STYLE, styleAt(j, j.indexOf("t2"))); + + // CREATE TABLE name -> table; the column inside the parens -> column + String c = "CREATE TABLE foo (id INT)"; + assertEquals(HiveSqlHighlighter.TABLE_STYLE, styleAt(c, c.indexOf("foo"))); + assertEquals(HiveSqlHighlighter.COLUMN_STYLE, styleAt(c, c.indexOf("id"))); + + // qualifier in alias.column -> alias is table-colored, the field is a column + String d = "WHERE t.amount > 0"; + assertEquals(HiveSqlHighlighter.TABLE_STYLE, styleAt(d, d.indexOf("t."))); + assertEquals(HiveSqlHighlighter.COLUMN_STYLE, styleAt(d, d.indexOf("amount"))); + } + + @Test + public void testMixedStatement() { + String sql = "SELECT count(*) FROM t1 WHERE x = 1"; + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt(sql, sql.indexOf("SELECT"))); + assertEquals(HiveSqlHighlighter.FUNCTION_STYLE, styleAt(sql, sql.indexOf("count"))); + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt(sql, sql.indexOf("FROM"))); + assertEquals(HiveSqlHighlighter.TABLE_STYLE, styleAt(sql, sql.indexOf("t1"))); + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, styleAt(sql, sql.indexOf("WHERE"))); + assertEquals(HiveSqlHighlighter.COLUMN_STYLE, styleAt(sql, sql.indexOf("x "))); + assertEquals(HiveSqlHighlighter.NUMBER_STYLE, styleAt(sql, sql.length() - 1)); + } + + @Test + public void testDisabledReturnsPlainText() { + HiveSqlHighlighter off = new HiveSqlHighlighter(() -> false); + AttributedString out = off.highlight((LineReader) null, "SELECT * FROM t"); + assertEquals("SELECT * FROM t", out.toString()); + assertEquals(AttributedStyle.DEFAULT, out.styleAt(0)); + } + + @Test + public void testEnabledThroughPublicApi() { + AttributedString out = on.highlight((LineReader) null, "SELECT"); + assertEquals(HiveSqlHighlighter.KEYWORD_STYLE, out.styleAt(0)); + assertEquals("SELECT", out.toString()); + } + + @Test + public void testNullBufferIsSafe() { + assertEquals("", on.highlight((LineReader) null, null).toString()); + } +} From 96e7f320a1291e5748d45c954460e1e936ac1a40 Mon Sep 17 00:00:00 2001 From: Attila Turoczy Date: Sun, 14 Jun 2026 13:19:23 +0200 Subject: [PATCH 2/2] HIVE-29664: Address SonarCloud findings --- .../hive/beeline/HiveSqlHighlighter.java | 160 ++++++++++-------- .../hive/beeline/TestBeelineArgParsing.java | 10 +- 2 files changed, 99 insertions(+), 71 deletions(-) diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java b/beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java index 62747ae28382..25ced1039521 100644 --- a/beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java +++ b/beeline/src/java/org/apache/hive/beeline/HiveSqlHighlighter.java @@ -142,78 +142,72 @@ public AttributedString highlight(LineReader reader, String buffer) { AttributedString highlight(String buffer) { AttributedStringBuilder sb = new AttributedStringBuilder(); + Context ctx = new Context(); int n = buffer.length(); int i = 0; - - String prevKw = ""; while (i < n) { - char c = buffer.charAt(i); - - if (c == '-' && i + 1 < n && buffer.charAt(i + 1) == '-') { - int end = buffer.indexOf('\n', i); - if (end < 0) { - end = n; - } - sb.append(buffer.substring(i, end), COMMENT_STYLE); - i = end; - continue; - } - - if (c == '/' && i + 1 < n && buffer.charAt(i + 1) == '*') { - int end = buffer.indexOf("*/", i + 2); - end = (end < 0) ? n : end + 2; - sb.append(buffer.substring(i, end), COMMENT_STYLE); - i = end; - continue; - } - - if (c == '\'' || c == '"') { - int end = scanString(buffer, i, c); - sb.append(buffer.substring(i, end), STRING_STYLE); - i = end; - continue; - } - - if (c == '`') { - int end = i + 1; - while (end < n && buffer.charAt(end) != '`') { - end++; - } - end = Math.min(end + 1, n); - sb.append(buffer.substring(i, end), DEFAULT_STYLE); - i = end; - continue; - } + i = scanToken(sb, buffer, i, ctx); + } + return sb.toAttributedString(); + } - if (isDigit(c) || (c == '.' && i + 1 < n && isDigit(buffer.charAt(i + 1)))) { - int end = scanNumber(buffer, i); - sb.append(buffer.substring(i, end), NUMBER_STYLE); - i = end; - continue; - } + /** Carry-over state between tokens: the last structural keyword seen. */ + private static final class Context { + private String prevKw = ""; + } - if (isIdentStart(c)) { - int end = i + 1; - while (end < n && isIdentPart(buffer.charAt(end))) { - end++; - } - String word = buffer.substring(i, end); - String upper = word.toUpperCase(); - sb.append(word, styleForWord(upper, buffer, end, prevKw)); - if (KEYWORDS.contains(upper)) { - prevKw = upper; - } - i = end; - continue; - } + /** Append the single token starting at {@code i}; returns the index just past it. */ + private int scanToken(AttributedStringBuilder sb, String buf, int i, Context ctx) { + char c = buf.charAt(i); + if (isLineCommentAt(buf, i)) { + int end = lineCommentEnd(buf, i); + sb.append(buf.substring(i, end), COMMENT_STYLE); + return end; + } + if (isBlockCommentAt(buf, i)) { + int end = blockCommentEnd(buf, i); + sb.append(buf.substring(i, end), COMMENT_STYLE); + return end; + } + if (c == '\'' || c == '"') { + int end = scanString(buf, i, c); + sb.append(buf.substring(i, end), STRING_STYLE); + return end; + } + if (c == '`') { + int end = scanQuotedIdentifier(buf, i); + sb.append(buf.substring(i, end), DEFAULT_STYLE); + return end; + } + if (isNumberStartAt(buf, i)) { + int end = scanNumber(buf, i); + sb.append(buf.substring(i, end), NUMBER_STYLE); + return end; + } + if (isIdentStart(c)) { + return scanWord(sb, buf, i, ctx); + } + sb.append(c); + if (c == '(' || c == ';') { + ctx.prevKw = ""; + } + return i + 1; + } - sb.append(c); - if (c == '(' || c == ';') { - prevKw = ""; - } - i++; + /** Append an identifier/keyword token, update table-context, and return the end index. */ + private int scanWord(AttributedStringBuilder sb, String buf, int i, Context ctx) { + int n = buf.length(); + int end = i + 1; + while (end < n && isIdentPart(buf.charAt(end))) { + end++; } - return sb.toAttributedString(); + String word = buf.substring(i, end); + String upper = word.toUpperCase(); + sb.append(word, styleForWord(upper, buf, end, ctx.prevKw)); + if (KEYWORDS.contains(upper)) { + ctx.prevKw = upper; + } + return end; } private AttributedStyle styleForWord(String upper, String buffer, int wordEnd, String prevKw) { @@ -286,6 +280,38 @@ private static int scanNumber(String s, int start) { return i; } + private static boolean isLineCommentAt(String s, int i) { + return s.charAt(i) == '-' && i + 1 < s.length() && s.charAt(i + 1) == '-'; + } + + private static boolean isBlockCommentAt(String s, int i) { + return s.charAt(i) == '/' && i + 1 < s.length() && s.charAt(i + 1) == '*'; + } + + private static boolean isNumberStartAt(String s, int i) { + char c = s.charAt(i); + return isDigit(c) || (c == '.' && i + 1 < s.length() && isDigit(s.charAt(i + 1))); + } + + private static int lineCommentEnd(String s, int i) { + int end = s.indexOf('\n', i); + return end < 0 ? s.length() : end; + } + + private static int blockCommentEnd(String s, int i) { + int end = s.indexOf("*/", i + 2); + return end < 0 ? s.length() : end + 2; + } + + private static int scanQuotedIdentifier(String s, int start) { + int n = s.length(); + int end = start + 1; + while (end < n && s.charAt(end) != '`') { + end++; + } + return Math.min(end + 1, n); + } + private static boolean isDigit(char c) { return c >= '0' && c <= '9'; } @@ -306,12 +332,14 @@ private static Set immutableUpper(String... words) { return Collections.unmodifiableSet(s); } - // We do our own coloring; JLine's parser-error highlighting hooks are unused. @Override public void setErrorPattern(Pattern errorPattern) { + // No-op: this highlighter colorizes tokens itself and does not use JLine's + // parser-error highlighting hooks. } @Override public void setErrorIndex(int errorIndex) { + // No-op: see setErrorPattern. } } diff --git a/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java b/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java index 83c04bf35f4b..57c60ee8ce23 100644 --- a/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java +++ b/beeline/src/test/org/apache/hive/beeline/TestBeelineArgParsing.java @@ -132,7 +132,7 @@ public void testSimpleArgs() throws Exception { } @Test - public void testSyntaxHighlightingDefaultsOn() throws Exception { + public void testSyntaxHighlightingDefaultsOn() { TestBeeline bl = new TestBeeline(); String[] args = new String[] {"-u", "url", "-n", "name", "-p", "password", "-d", "driver"}; Assert.assertEquals(0, bl.initArgs(args)); @@ -140,19 +140,19 @@ public void testSyntaxHighlightingDefaultsOn() throws Exception { } @Test - public void testSyntaxHighlightingDisabledByArg() throws Exception { + public void testSyntaxHighlightingDisabledByArg() { TestBeeline bl = new TestBeeline(); String[] args = new String[] {"--syntaxHighlighting=false", "-u", "url", "-n", "name", - "-p", "password", "-d", "driver"}; + "-p", "password", "-d", "driver"}; Assert.assertEquals(0, bl.initArgs(args)); Assert.assertFalse(bl.getOpts().getSyntaxHighlighting()); } @Test - public void testSyntaxHighlightingArgIsCaseInsensitive() throws Exception { + public void testSyntaxHighlightingArgIsCaseInsensitive() { TestBeeline bl = new TestBeeline(); String[] args = new String[] {"--syntaxhighlighting=false", "-u", "url", "-n", "name", - "-p", "password", "-d", "driver"}; + "-p", "password", "-d", "driver"}; Assert.assertEquals(0, bl.initArgs(args)); Assert.assertFalse(bl.getOpts().getSyntaxHighlighting()); }