diff --git a/lib/rouge/lexers/scala.rb b/lib/rouge/lexers/scala.rb index 0758699b60..375ef4e9cb 100644 --- a/lib/rouge/lexers/scala.rb +++ b/lib/rouge/lexers/scala.rb @@ -13,29 +13,37 @@ class Scala < RegexLexer mimetypes 'text/x-scala', 'application/x-scala' # As documented in the ENBF section of the scala specification - # https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html + # Scala 2: https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html + # Scala 3: https://docs.scala-lang.org/scala3/reference/syntax.html # https://en.wikipedia.org/wiki/Unicode_character_property#General_Category whitespace = /\p{Space}/ letter = /[\p{L}$_]/ upper = /[\p{Lu}$_]/ + lower = /[\p{Ll}$_]/ digits = /[0-9]/ parens = /[(){}\[\]]/ - delims = %r([‘’".;,]) + delims = %r([''".;,]) # negative lookahead to filter out other classes op = %r( (?!#{whitespace}|#{letter}|#{digits}|#{parens}|#{delims}) - [-!#%&*/:?@\\^\p{Sm}\p{So}] + [-!#%&*/:?@\\^\p{Sm}\p{So}] # Basic operators and Unicode math/symbol chars )x - # manually removed +<=>|~ from regexp because they're in property Sm - # pp CHRS:(0x00..0x7f).map(&:chr).grep(/\p{Sm}/) + # Note: Some operators like +<=>|~ are in Unicode property Sm - idrest = %r(#{letter}(?:#{letter}|#{digits})*(?:(?<=_)#{op}+)?)x + idrest_core = %r((?:#{letter}|#{digits})*(?:_#{op}+)?)x + idrest = %r(#{lower}#{idrest_core})x + upper_idrest = %r(#{upper}#{idrest_core})x + + # For string interpolation prefixes like s"", f"" - simplified identifier + plain_interpol_id = %r(#{letter}(?:#{letter}|#{digits})*)x keywords = %w( abstract case catch def do else extends final finally for forSome if implicit lazy match new override private protected requires return sealed super this throw try val var while with yield + enum export given open transparent extension using derives then end + inline opaque infix transparent ) state :root do @@ -49,6 +57,17 @@ class Scala < RegexLexer rule %r(//.*), Comment::Single rule %r(/\*), Comment::Multiline, :comment + # Interpolated strings: s"...", f"""...""", etc. + # Must be before general string rules and identifier rules that might catch the prefix. + # s"..." + rule %r/(#{plain_interpol_id})(")((?:\\(?:["\\\/bfnrt']|u[0-9a-fA-F]{4})|[^"\\])*?)(")/ do + groups Name::Tag, Str, Str, Str + end + # s"""...""" + rule %r/(#{plain_interpol_id})(""".*?"""(?!"))/m do + groups Name::Tag, Str + end + rule %r/@#{idrest}/, Name::Decorator rule %r/(def)(\s+)(#{idrest}|#{op}+|`[^`]+`)(\s*)/ do @@ -67,7 +86,7 @@ class Scala < RegexLexer groups Name::Variable, Text, Operator, Name::Property end - rule %r/#{upper}#{idrest}\b/, Name::Class + rule %r/#{upper_idrest}\b/, Name::Class rule %r/(#{idrest})(#{whitespace}*)(\()/ do groups Name::Function, Text, Operator @@ -95,7 +114,8 @@ class Scala < RegexLexer rule %r/""".*?"""(?!")/m, Str rule %r/"(\\\\|\\"|[^"])*"/, Str - rule %r/'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'/, Str::Char + rule %r/'([^'\\]|\\[\\'"bfnrt]|\\u[0-9a-fA-F]{4})'/, Str::Char + rule %r/'[^']*'/, Str rule idrest, Name rule %r/`[^`]+`/, Name @@ -104,15 +124,40 @@ class Scala < RegexLexer rule %r/[\(\)\{\};,.#]/, Operator rule %r/#{op}+/, Operator - rule %r/([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?/, Num::Float - rule %r/([0-9][0-9]*[fFdD])/, Num::Float - rule %r/0x[0-9a-fA-F]+/, Num::Hex - rule %r/[0-9]+L?/, Num::Integer + # Order: Floats, Binary, Hex, Decimal Integers. + # Floating point with optional underscores + rule %r/([0-9](?:_?[0-9])*\.(?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?|\.[0-9](?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?)[fFdD]?/, Num::Float + rule %r/([0-9](?:_?[0-9])*)[fFdD]/, Num::Float + + # Binary literals (e.g., 0b1010, 0B1_0_1L) + rule %r/0[bB][01](?:_?[01])*[lL]?/, Num::Integer + # Hex literals (e.g., 0xFF, 0xAB_CDL) + rule %r/0[xX][0-9a-fA-F](?:_?[0-9a-fA-F])*[lL]?/, Num::Hex + # Decimal integers (e.g., 123, 1_000_000L) + # This must be after float, hex, bin which might also start with digits. + rule %r/[0-9](?:_?[0-9])*[lL]?/, Num::Integer + rule %r/\n/, Text + + # End markers for control structures and definitions + rule %r/(end)(\s+)(if|while|for|match|try|new|this|given|extension|val|def|class|object|trait)\b/ do + groups Keyword, Text, Keyword + end + + # Type operators for union and intersection types + rule %r/[&|](?![&|])/, Operator + + # Named type arguments + rule %r/([A-Z]\w*)(\s*)(=)(?=\s*[A-Z])/ do + groups Name::Class, Text, Operator + end + + # Context function type + rule %r/\?=>/, Operator end state :class do - rule %r/(#{idrest}|#{op}+|`[^`]+`)(\s*)(\[)/ do + rule %r/(#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`)(\s*)(\[)/ do groups Name::Class, Text, Operator push :typeparam end @@ -121,7 +166,7 @@ class Scala < RegexLexer rule %r/{/, Operator, :pop! rule %r/\(/, Operator, :pop! rule %r(//.*), Comment::Single, :pop! - rule %r(#{idrest}|#{op}+|`[^`]+`), Name::Class, :pop! + rule %r(#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`), Name::Class, :pop! end state :type do @@ -133,7 +178,7 @@ class Scala < RegexLexer end rule %r/[\(\{]/, Operator, :type - typechunk = /(?:#{idrest}|#{op}+\`[^`]+`)/ + typechunk = /(?:#{idrest}|#{upper_idrest}|#{op}+\`[^`]+`)/ rule %r/(#{typechunk}(?:\.#{typechunk})*)(\s*)(\[)/ do groups Keyword::Type, Text, Operator pop! @@ -146,7 +191,7 @@ class Scala < RegexLexer end rule %r(//.*), Comment::Single, :pop! - rule %r/\.|#{idrest}|#{op}+|`[^`]+`/, Keyword::Type + rule %r/\.|#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`/, Keyword::Type end state :typeparam do @@ -154,7 +199,7 @@ class Scala < RegexLexer rule %r/<[%:]|=>|>:|[#_\u21D2]|forSome|type/, Keyword rule %r/([\]\)\}])/, Operator, :pop! rule %r/[\(\[\{]/, Operator, :typeparam - rule %r/\.|#{idrest}|#{op}+|`[^`]+`/, Keyword::Type + rule %r/\.|#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`/, Keyword::Type end state :comment do @@ -165,7 +210,12 @@ class Scala < RegexLexer end state :import do - rule %r((#{idrest}|\.)+), Name::Namespace, :pop! + # Handle 'as' imports with optional whitespace + rule %r/((#{idrest}|#{upper_idrest}|\.)+)(\s*)(as)(\s*)(#{idrest}|#{upper_idrest})/ do |m| + groups Name::Namespace, Text, Keyword, Text, Name::Namespace + pop! + end + rule %r/((#{idrest}|#{upper_idrest}|\.)+)/, Name::Namespace, :pop! end end end diff --git a/spec/visual/samples/scala b/spec/visual/samples/scala index 3fd6cfc579..111fc98fe4 100644 --- a/spec/visual/samples/scala +++ b/spec/visual/samples/scala @@ -1,7 +1,7 @@ package whatever.mine import foo.bar.{Foo, Bar => Baz} - +import baz.bar.{ Baz as Bar } /* This file /* which is totally legal scala */ should be highlighted correcty by rouge */ @@ -68,4 +68,179 @@ StorageState.table(StorageState.NewUsers()).format( keyParams('app_id).asInstanceOf[String] ) +// Scala 3 Features + +// Enums +enum Color(val rgb: Int): + case Red extends Color(0xFF0000) + case Green extends Color(0x00FF00) + case Blue extends Color(0x0000FF) + case Yellow // simple case + +// Given/Using clauses +trait Ord[T]: + def compare(x: T, y: T): Int + extension (x: T) + def < (y: T) = compare(x, y) < 0 + def > (y: T) = compare(x, y) > 0 + +given intOrd: Ord[Int] with + def compare(x: Int, y: Int) = + if x < y then -1 else if x > y then 1 else 0 + +given listOrd[T](using ord: Ord[T]): Ord[List[T]] with + def compare(xs: List[T], ys: List[T]): Int = + (xs, ys) match + case (Nil, Nil) => 0 + case (Nil, _) => -1 + case (_, Nil) => 1 + case (x :: xt, y :: yt) => + val fst = ord.compare(x,y) + if fst != 0 then fst else compare(xt, yt) + +def sort[A](xs: List[A])(using Ord[A]): List[A] = + xs.sorted // uses listOrd and intOrd implicitly for List[Int] + +val sortedInts = sort(List(3_000, 1, -20_000_000)) + +// Extension Methods +extension (s: String) + def capitalized: String = s.toUpperCase + def twice: String = s + " " + s + +val greeting = "hello".capitalized.twice + +// New Control Syntax (if/then/else, optional braces) +def checkSign(x: Int): String = + if x > 0 then + "positive" + else if x < 0 then + "negative" + else + "zero" + +var count = 3 +while count > 0 do + println(s"Count is: $count") // String interpolation + count -= 1 + +// Binary Literals +val binary1 = 0b101010 +val binary2 = 0B0011_0011 +val binaryLong = 0b1111_0000_1111_0000L + +// Multiline String Interpolation +val name = "Scala 3" +val version = 3.3 +val multilineInterpolated = s"""This is + a multiline string + for $name, version $version. + A raw one: raw"""\u0041\n""" // raw interpolator +""" + +// Export Clauses +class Service: + def operation(x: Int): String = s"Result: ${x * 2}" + +class Client(s: Service): + export s.operation + +val service = new Service +val client = new Client(service) +val clientResult = client.operation(101_010) // uses exported method + +// Opaque Type Aliases +opaque type Logarithm = Double + +object Logarithm: + def apply(d: Double): Logarithm = math.log(d) + def toDouble(l: Logarithm): Double = math.exp(l) + +extension (l: Logarithm) + def + (other: Logarithm): Logarithm = Logarithm(toDouble(l) * toDouble(other)) + +val log2 = Logarithm(2.0) +val log3 = Logarithm(3.0) +val combinedLog = log2 + log3 + +// End markers +def processList(items: List[String]): Unit = + if items.nonEmpty then + for item <- items do + println(item) + end for + println("Done with items") + end if + println("List processing finished") +end processList + +val anIdentifier_with_op_! = 42 + // Comment at EOF + +// Match expressions with end marker +def describe(x: Any): String = + match x + case i: Int if i > 0 => "positive number" + case 0 => "zero" + case s: String => s"string: $s" + case _ => "something else" + end match + +type Resettable = { def reset(): Unit } +type Growable[T] = { def add(t: T): Unit } + +def combine[T](x: Resettable & Growable[T]): Unit = ??? + +def help(id: String | Int) = + id match + case s: String => println(s"String ID: $s") + case i: Int => println(s"Int ID: $i") + +trait Animal: + def speak: String + def eat(food: String): Unit + +class Dog extends Animal: + def speak = "Woof!" + def eat(food: String) = + println(s"Dog eating $food") + end eat +end Dog + +import scala.compiletime.{error, requireConst} +transparent inline def validate(inline str: String): Unit = + requireConst(str) + if str.isEmpty then + error("Empty string not allowed") + +type Executable[T] = ExecutionContext ?=> T +def executeAsync(x: Int)(using ctx: ExecutionContext): Unit = ??? + +def generic[A, B](x: A, y: B) = (x, y) +val result = generic[A = String, B = Int]("hello", 42) + +// End markers for different constructs +object ComplexExample: + class Inner: + def process(items: List[Int]): Int = + if items.isEmpty then + println("Empty list") + 0 + else + var sum = 0 + for + x <- items + if x > 0 + y = x * 2 + do + sum += y + println(s"Processing $x") + end for + sum + end if + end process + end Inner +end ComplexExample + +// Comment at EOF \ No newline at end of file