Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 68 additions & 18 deletions lib/rouge/lexers/scala.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,37 @@ class Scala < RegexLexer
mimetypes 'text/x-scala', 'application/x-scala'

# As documented in the ENBF section of the scala specification
# https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
# Scala 2: https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
# Scala 3: https://docs.scala-lang.org/scala3/reference/syntax.html
# https://en.wikipedia.org/wiki/Unicode_character_property#General_Category
whitespace = /\p{Space}/
letter = /[\p{L}$_]/
upper = /[\p{Lu}$_]/
lower = /[\p{Ll}$_]/
digits = /[0-9]/
parens = /[(){}\[\]]/
delims = %r([‘’".;,])
delims = %r([''".;,])

# negative lookahead to filter out other classes
op = %r(
(?!#{whitespace}|#{letter}|#{digits}|#{parens}|#{delims})
[-!#%&*/:?@\\^\p{Sm}\p{So}]
[-!#%&*/:?@\\^\p{Sm}\p{So}] # Basic operators and Unicode math/symbol chars
)x
# manually removed +<=>|~ from regexp because they're in property Sm
# pp CHRS:(0x00..0x7f).map(&:chr).grep(/\p{Sm}/)
# Note: Some operators like +<=>|~ are in Unicode property Sm

idrest = %r(#{letter}(?:#{letter}|#{digits})*(?:(?<=_)#{op}+)?)x
idrest_core = %r((?:#{letter}|#{digits})*(?:_#{op}+)?)x
idrest = %r(#{lower}#{idrest_core})x
upper_idrest = %r(#{upper}#{idrest_core})x

# For string interpolation prefixes like s"", f"" - simplified identifier
plain_interpol_id = %r(#{letter}(?:#{letter}|#{digits})*)x

keywords = %w(
abstract case catch def do else extends final finally for forSome
if implicit lazy match new override private protected requires return
sealed super this throw try val var while with yield
enum export given open transparent extension using derives then end
inline opaque infix transparent
)

state :root do
Expand All @@ -49,6 +57,17 @@ class Scala < RegexLexer
rule %r(//.*), Comment::Single
rule %r(/\*), Comment::Multiline, :comment

# Interpolated strings: s"...", f"""...""", etc.
# Must be before general string rules and identifier rules that might catch the prefix.
# s"..."
rule %r/(#{plain_interpol_id})(")((?:\\(?:["\\\/bfnrt']|u[0-9a-fA-F]{4})|[^"\\])*?)(")/ do
groups Name::Tag, Str, Str, Str
end
# s"""..."""
rule %r/(#{plain_interpol_id})(""".*?"""(?!"))/m do
groups Name::Tag, Str
end

rule %r/@#{idrest}/, Name::Decorator

rule %r/(def)(\s+)(#{idrest}|#{op}+|`[^`]+`)(\s*)/ do
Expand All @@ -67,7 +86,7 @@ class Scala < RegexLexer
groups Name::Variable, Text, Operator, Name::Property
end

rule %r/#{upper}#{idrest}\b/, Name::Class
rule %r/#{upper_idrest}\b/, Name::Class

rule %r/(#{idrest})(#{whitespace}*)(\()/ do
groups Name::Function, Text, Operator
Expand Down Expand Up @@ -95,7 +114,8 @@ class Scala < RegexLexer

rule %r/""".*?"""(?!")/m, Str
rule %r/"(\\\\|\\"|[^"])*"/, Str
rule %r/'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'/, Str::Char
rule %r/'([^'\\]|\\[\\'"bfnrt]|\\u[0-9a-fA-F]{4})'/, Str::Char
rule %r/'[^']*'/, Str

rule idrest, Name
rule %r/`[^`]+`/, Name
Expand All @@ -104,15 +124,40 @@ class Scala < RegexLexer
rule %r/[\(\)\{\};,.#]/, Operator
rule %r/#{op}+/, Operator

rule %r/([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?/, Num::Float
rule %r/([0-9][0-9]*[fFdD])/, Num::Float
rule %r/0x[0-9a-fA-F]+/, Num::Hex
rule %r/[0-9]+L?/, Num::Integer
# Order: Floats, Binary, Hex, Decimal Integers.
# Floating point with optional underscores
rule %r/([0-9](?:_?[0-9])*\.(?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?|\.[0-9](?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?)[fFdD]?/, Num::Float
rule %r/([0-9](?:_?[0-9])*)[fFdD]/, Num::Float

# Binary literals (e.g., 0b1010, 0B1_0_1L)
rule %r/0[bB][01](?:_?[01])*[lL]?/, Num::Integer
# Hex literals (e.g., 0xFF, 0xAB_CDL)
rule %r/0[xX][0-9a-fA-F](?:_?[0-9a-fA-F])*[lL]?/, Num::Hex
# Decimal integers (e.g., 123, 1_000_000L)
# This must be after float, hex, bin which might also start with digits.
rule %r/[0-9](?:_?[0-9])*[lL]?/, Num::Integer

rule %r/\n/, Text

# End markers for control structures and definitions
rule %r/(end)(\s+)(if|while|for|match|try|new|this|given|extension|val|def|class|object|trait)\b/ do
groups Keyword, Text, Keyword
end

# Type operators for union and intersection types
rule %r/[&|](?![&|])/, Operator

# Named type arguments
rule %r/([A-Z]\w*)(\s*)(=)(?=\s*[A-Z])/ do
groups Name::Class, Text, Operator
end

# Context function type
rule %r/\?=>/, Operator
end

state :class do
rule %r/(#{idrest}|#{op}+|`[^`]+`)(\s*)(\[)/ do
rule %r/(#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`)(\s*)(\[)/ do
groups Name::Class, Text, Operator
push :typeparam
end
Expand All @@ -121,7 +166,7 @@ class Scala < RegexLexer
rule %r/{/, Operator, :pop!
rule %r/\(/, Operator, :pop!
rule %r(//.*), Comment::Single, :pop!
rule %r(#{idrest}|#{op}+|`[^`]+`), Name::Class, :pop!
rule %r(#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`), Name::Class, :pop!
end

state :type do
Expand All @@ -133,7 +178,7 @@ class Scala < RegexLexer
end
rule %r/[\(\{]/, Operator, :type

typechunk = /(?:#{idrest}|#{op}+\`[^`]+`)/
typechunk = /(?:#{idrest}|#{upper_idrest}|#{op}+\`[^`]+`)/
rule %r/(#{typechunk}(?:\.#{typechunk})*)(\s*)(\[)/ do
groups Keyword::Type, Text, Operator
pop!
Expand All @@ -146,15 +191,15 @@ class Scala < RegexLexer
end

rule %r(//.*), Comment::Single, :pop!
rule %r/\.|#{idrest}|#{op}+|`[^`]+`/, Keyword::Type
rule %r/\.|#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`/, Keyword::Type
end

state :typeparam do
rule %r/[\s,]+/, Text
rule %r/<[%:]|=>|>:|[#_\u21D2]|forSome|type/, Keyword
rule %r/([\]\)\}])/, Operator, :pop!
rule %r/[\(\[\{]/, Operator, :typeparam
rule %r/\.|#{idrest}|#{op}+|`[^`]+`/, Keyword::Type
rule %r/\.|#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`/, Keyword::Type
end

state :comment do
Expand All @@ -165,7 +210,12 @@ class Scala < RegexLexer
end

state :import do
rule %r((#{idrest}|\.)+), Name::Namespace, :pop!
# Handle 'as' imports with optional whitespace
rule %r/((#{idrest}|#{upper_idrest}|\.)+)(\s*)(as)(\s*)(#{idrest}|#{upper_idrest})/ do |m|
groups Name::Namespace, Text, Keyword, Text, Name::Namespace
pop!
end
rule %r/((#{idrest}|#{upper_idrest}|\.)+)/, Name::Namespace, :pop!
end
end
end
Expand Down
177 changes: 176 additions & 1 deletion spec/visual/samples/scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package whatever.mine

import foo.bar.{Foo, Bar => Baz}

import baz.bar.{ Baz as Bar }
/* This file /* which is totally legal scala */ should be highlighted
correcty by rouge */

Expand Down Expand Up @@ -68,4 +68,179 @@ StorageState.table(StorageState.NewUsers()).format(
keyParams('app_id).asInstanceOf[String]
)

// Scala 3 Features

// Enums
enum Color(val rgb: Int):
case Red extends Color(0xFF0000)
case Green extends Color(0x00FF00)
case Blue extends Color(0x0000FF)
case Yellow // simple case

// Given/Using clauses
trait Ord[T]:
def compare(x: T, y: T): Int
extension (x: T)
def < (y: T) = compare(x, y) < 0
def > (y: T) = compare(x, y) > 0

given intOrd: Ord[Int] with
def compare(x: Int, y: Int) =
if x < y then -1 else if x > y then 1 else 0

given listOrd[T](using ord: Ord[T]): Ord[List[T]] with
def compare(xs: List[T], ys: List[T]): Int =
(xs, ys) match
case (Nil, Nil) => 0
case (Nil, _) => -1
case (_, Nil) => 1
case (x :: xt, y :: yt) =>
val fst = ord.compare(x,y)
if fst != 0 then fst else compare(xt, yt)

def sort[A](xs: List[A])(using Ord[A]): List[A] =
xs.sorted // uses listOrd and intOrd implicitly for List[Int]

val sortedInts = sort(List(3_000, 1, -20_000_000))

// Extension Methods
extension (s: String)
def capitalized: String = s.toUpperCase
def twice: String = s + " " + s

val greeting = "hello".capitalized.twice

// New Control Syntax (if/then/else, optional braces)
def checkSign(x: Int): String =
if x > 0 then
"positive"
else if x < 0 then
"negative"
else
"zero"

var count = 3
while count > 0 do
println(s"Count is: $count") // String interpolation
count -= 1

// Binary Literals
val binary1 = 0b101010
val binary2 = 0B0011_0011
val binaryLong = 0b1111_0000_1111_0000L

// Multiline String Interpolation
val name = "Scala 3"
val version = 3.3
val multilineInterpolated = s"""This is
a multiline string
for $name, version $version.
A raw one: raw"""\u0041\n""" // raw interpolator
"""

// Export Clauses
class Service:
def operation(x: Int): String = s"Result: ${x * 2}"

class Client(s: Service):
export s.operation

val service = new Service
val client = new Client(service)
val clientResult = client.operation(101_010) // uses exported method

// Opaque Type Aliases
opaque type Logarithm = Double

object Logarithm:
def apply(d: Double): Logarithm = math.log(d)
def toDouble(l: Logarithm): Double = math.exp(l)

extension (l: Logarithm)
def + (other: Logarithm): Logarithm = Logarithm(toDouble(l) * toDouble(other))

val log2 = Logarithm(2.0)
val log3 = Logarithm(3.0)
val combinedLog = log2 + log3

// End markers
def processList(items: List[String]): Unit =
if items.nonEmpty then
for item <- items do
println(item)
end for
println("Done with items")
end if
println("List processing finished")
end processList

val anIdentifier_with_op_! = 42

// Comment at EOF

// Match expressions with end marker
def describe(x: Any): String =
match x
case i: Int if i > 0 => "positive number"
case 0 => "zero"
case s: String => s"string: $s"
case _ => "something else"
end match

type Resettable = { def reset(): Unit }
type Growable[T] = { def add(t: T): Unit }

def combine[T](x: Resettable & Growable[T]): Unit = ???

def help(id: String | Int) =
id match
case s: String => println(s"String ID: $s")
case i: Int => println(s"Int ID: $i")

trait Animal:
def speak: String
def eat(food: String): Unit

class Dog extends Animal:
def speak = "Woof!"
def eat(food: String) =
println(s"Dog eating $food")
end eat
end Dog

import scala.compiletime.{error, requireConst}
transparent inline def validate(inline str: String): Unit =
requireConst(str)
if str.isEmpty then
error("Empty string not allowed")

type Executable[T] = ExecutionContext ?=> T
def executeAsync(x: Int)(using ctx: ExecutionContext): Unit = ???

def generic[A, B](x: A, y: B) = (x, y)
val result = generic[A = String, B = Int]("hello", 42)

// End markers for different constructs
object ComplexExample:
class Inner:
def process(items: List[Int]): Int =
if items.isEmpty then
println("Empty list")
0
else
var sum = 0
for
x <- items
if x > 0
y = x * 2
do
sum += y
println(s"Processing $x")
end for
sum
end if
end process
end Inner
end ComplexExample

// Comment at EOF
Loading