diff --git a/pom.xml b/pom.xml index 2fe6ca3a855..bb8fd240d00 100644 --- a/pom.xml +++ b/pom.xml @@ -173,7 +173,7 @@ false - false + true ${project.build.outputDirectory} ${project.basedir}/FUNDING ${project.basedir}/CITATION.md diff --git a/src/org/rascalmpl/library/IO.rsc b/src/org/rascalmpl/library/IO.rsc index 5e3255b4f67..a1d8f235c49 100644 --- a/src/org/rascalmpl/library/IO.rsc +++ b/src/org/rascalmpl/library/IO.rsc @@ -140,8 +140,9 @@ Append a textual representation of some values to an existing or a newly created * All other values are printed as-is. * Each value is terminated by a newline character. -The existing file can be stored using any character set possible, if you know the character set, please use ((appendToFileEnc)). -Else the same method of deciding the character set is used as in ((readFile)). +The existing file can be stored using any character set possible. +If you know the character set, please use the `charset` keyword parameter. +Otherwise, the same method of deciding the character set is used as in ((readFile)). } @pitfalls{ * The same encoding pitfalls as the ((readFile)) function. diff --git a/src/org/rascalmpl/library/List.rsc b/src/org/rascalmpl/library/List.rsc index aa7be84cc1c..6a69285dca9 100644 --- a/src/org/rascalmpl/library/List.rsc +++ b/src/org/rascalmpl/library/List.rsc @@ -19,7 +19,7 @@ module List import Exception; import Map; - +import String; @synopsis{Concatenate a list of lists.} @examples{ @@ -259,8 +259,7 @@ intercalate(", ", ["zebra", "elephant", "snake", "owl"]); ``` } str intercalate(str sep, list[value] l) = - "<}>"; - + "<}>"[..-size(sep)]; @synopsis{Intersperses a list of values with a separator.} @examples{ @@ -272,9 +271,8 @@ intersperse(1, []); intersperse([], [1]); ``` } -list[&T] intersperse(&T sep, list[&T] xs) = - (isEmpty(xs))? [] : ([head(xs)] | it + [sep,x] | x <- tail(xs)); - +list[&T] intersperse(&T sep, list[&T] xs) = + [x, sep | &T x <- xs][..-1]; @synopsis{Test whether a list is empty.} @description{ @@ -655,6 +653,29 @@ tuple[list[&T],list[&T]] split(list[&T] l) { return ; } +@synopsis{Groups sublists for consecutive elements which are `similar`} +@description{ +This function does not change the order of the elements. Only elements +which are similar end-up in a sub-list with more than one element. The +elements which are not similar to their siblings, end up in singleton +lists. +} +@examples{ +```rascal-shell +import List; +bool bothEvenOrBothOdd(int a, int b) = (a % 2 == 0 && b % 2 == 0) || (a % 2 == 1 && b % 2 == 1); +group([1,7,3,6,2,9], bothEvenOrBothOdd); +``` +} +public list[list[&T]] group(list[&T] input, bool (&T a, &T b) similar) { + lres = while ([hd, *tl] := input) { + sim = [hd, *takeWhile(tl, bool (&T a) { return similar(a, hd); })]; + append sim; + input = drop(size(sim), input); + } + + return lres; +} @synopsis{Sum the elements of a list.} @examples{ diff --git a/src/org/rascalmpl/library/ParseTree.rsc b/src/org/rascalmpl/library/ParseTree.rsc index e1e49aa00de..2ffabf48413 100644 --- a/src/org/rascalmpl/library/ParseTree.rsc +++ b/src/org/rascalmpl/library/ParseTree.rsc @@ -10,7 +10,6 @@ @contributor{Tijs van der Storm - Tijs.van.der.Storm@cwi.nl} @contributor{Paul Klint - Paul.Klint@cwi.nl - CWI} @contributor{Arnold Lankamp - Arnold.Lankamp@cwi.nl} - @synopsis{Library functions for parse trees.} @description{ A _concrete syntax tree_ or [parse tree](http://en.wikipedia.org/wiki/Parse_tree) is an ordered, rooted tree that @@ -139,7 +138,6 @@ for the type of source locations. Therefore the annotation name has to be escape * We are in transition from deprecating the annotation `@\loc` with the keyword field `.src=|unknown:///|`. Currently the run-time already uses `.src` while the source code still uses `@\loc`. } - module ParseTree extend List; @@ -158,14 +156,17 @@ A `Tree` defines the trees normally found after parsing; additional constructors <3> Ambiguous subtree. <4> A single character. } - -data Tree(loc parseError = |unknown:///|(0,0,<0,0>,<0,0>)) //loc src = |unknown:///|(0,0,<0,0>,<0,0>) - = appl(Production prod, list[Tree] args) // <1> - | cycle(Symbol symbol, int cycleLength) // <2> - | amb(set[Tree] alternatives) // <3> - | char(int character) // <4> - ; - +data Tree (loc parseError = |unknown:///|(0, 0, <0, 0>, <0, 0>)) + //loc src = |unknown:///|(0,0,<0,0>,<0,0>) + = appl(Production prod, list[Tree] args) + // <1> + | cycle(Symbol symbol, int cycleLength) + // <2> + | amb(set[Tree] alternatives) + // <3> + | char(int character) + // <4> + ; @synopsis{Production in ParseTrees} @description{ @@ -184,21 +185,28 @@ construct ordered and un-ordered compositions, and associativity groups. for extending priority chains and such. <6> `error` means a node produced by error recovery. <7> `skipped` means characters skipped during error recovery, always the last child of an `appl` with a `error` production. -} -data Production - = prod(Symbol def, list[Symbol] symbols, set[Attr] attributes) // <1> - | regular(Symbol def) // <2> - ; - -data Production - = \priority(Symbol def, list[Production] choices) // <3> - | \associativity(Symbol def, Associativity \assoc, set[Production] alternatives) // <4> - | \reference(Symbol def, str cons) // <5> - ; +} +data Production + = prod(Symbol def, list[Symbol] symbols, set[Attr] attributes) + // <1> + | regular(Symbol def) + // <2> + ; data Production - = \error(Symbol def, Production prod, int dot) - | \skipped(Symbol def); + = \priority(Symbol def, list[Production] choices) + // <3> + | \associativity( + Symbol def, Associativity \assoc, set[Production] alternatives) + // <4> + | \reference(Symbol def, str cons) + // <5> + ; + +data Production + = \error(Symbol def, Production prod, int dot) + | \skipped(Symbol def) + ; @synopsis{A special exception that wraps errors that are (almost) certainly caused by unexpected parse errors} @description{ @@ -215,7 +223,6 @@ using try/catch you can make a language processor robust against (deeply nested) @pitfalls{ it is advised to try/catch these exception high up in the call graph of your language processor, otherwise you'll have to write try/catch in many different places } - data RuntimeException = ParseErrorRecovery(RuntimeException trigger, loc src); @synopsis{Attributes in productions.} @@ -224,24 +231,22 @@ An `Attr` (attribute) documents additional semantics of a production rule. Neith brackets are processed by the parser generator. Rather downstream processors are activated by these. Associativity is a parser generator feature though. } -data Attr - = \bracket() - | \assoc(Associativity \assoc) - ; - +data Attr + = \bracket() + | \assoc(Associativity \assoc) + ; @synopsis{Associativity attribute.} @description{ Associativity defines the various kinds of associativity of a specific production. -} -data Associativity +} +data Associativity = \left() - | \right() - | \assoc() + | \right() + | \assoc() | \non-assoc() ; - @synopsis{Character ranges and character class} @description{ * `CharRange` defines a range of characters. @@ -251,7 +256,6 @@ data CharRange = range(int begin, int end); alias CharClass = list[CharRange]; - @synopsis{Symbols that can occur in a ParseTree} @description{ The type `Symbol` is introduced in ((Library:module:Type)), see ((Type-Symbol)), to represent the basic Rascal types, @@ -279,43 +283,59 @@ e.g., `int`, `list`, and `rel`. Here we extend it with the symbols that may occu <19> Conditional occurrence of a symbol. } data Symbol // <1> - = \start(Symbol symbol); + = \start(Symbol symbol); // These symbols are the named non-terminals. -data Symbol - = \sort(str name) // <2> - | \lex(str name) // <3> - | \layouts(str name) // <4> - | \keywords(str name) // <5> - | \parameterized-sort(str name, list[Symbol] parameters) // <6> - | \parameterized-lex(str name, list[Symbol] parameters) // <7> - ; +data Symbol + = \sort(str name) + // <2> + | \lex(str name) + // <3> + | \layouts(str name) + // <4> + | \keywords(str name) + // <5> + | \parameterized-sort(str name, list[Symbol] parameters) + // <6> + | \parameterized-lex(str name, list[Symbol] parameters) + // <7> + ; // These are the terminal symbols. -data Symbol - = \lit(str string) // <8> - | \cilit(str string) // <9> - | \char-class(list[CharRange] ranges) // <10> - ; - +data Symbol + = \lit(str string) + // <8> + | \cilit(str string) + // <9> + | \char-class(list[CharRange] ranges) + // <10> + ; + // These are the regular expressions. data Symbol - = \empty() // <11> - | \opt(Symbol symbol) // <12> - | \iter(Symbol symbol) // <13> - | \iter-star(Symbol symbol) // <14> - | \iter-seps(Symbol symbol, list[Symbol] separators) // <15> - | \iter-star-seps(Symbol symbol, list[Symbol] separators) // <16> - | \alt(set[Symbol] alternatives) // <17> - | \seq(list[Symbol] symbols) // <18> - ; - + = \empty() + // <11> + | \opt(Symbol symbol) + // <12> + | \iter(Symbol symbol) + // <13> + | \iter-star(Symbol symbol) + // <14> + | \iter-seps(Symbol symbol, list[Symbol] separators) + // <15> + | \iter-star-seps(Symbol symbol, list[Symbol] separators) + // <16> + | \alt(set[Symbol] alternatives) + // <17> + | \seq(list[Symbol] symbols) + // <18> + ; + data Symbol // <19> - = \conditional(Symbol symbol, set[Condition] conditions); + = \conditional(Symbol symbol, set[Condition] conditions); bool subtype(Symbol::\sort(_), Symbol::\adt("Tree", _)) = true; - @synopsis{Datatype for declaring preconditions and postconditions on symbols} @description{ A `Condition` can be attached to a symbol; it restricts the applicability @@ -324,22 +344,23 @@ is followed by another symbol and `at-column` requires that it occurs at a certain position in the current line of the input text. } data Condition - = \follow(Symbol symbol) - | \not-follow(Symbol symbol) - | \precede(Symbol symbol) - | \not-precede(Symbol symbol) - | \delete(Symbol symbol) - | \at-column(int column) - | \begin-of-line() - | \end-of-line() - | \except(str label) - ; - + = \follow(Symbol symbol) + | \not-follow(Symbol symbol) + | \precede(Symbol symbol) + | \not-precede(Symbol symbol) + | \delete(Symbol symbol) + | \at-column(int column) + | \begin-of-line() + | \end-of-line() + | \except(str label) + ; @synopsis{Nested priority is flattened.} -Production priority(Symbol s, [*Production a, priority(Symbol _, list[Production] b), *Production c]) - = priority(s,a+b+c); - +Production priority( + Symbol s, + [*Production a, priority(Symbol _, list[Production] b), *Production c] +) + = priority(s, a + b + c); @synopsis{Normalization of associativity.} @description{ @@ -347,15 +368,24 @@ Production priority(Symbol s, [*Production a, priority(Symbol _, list[Production * Nested (equal) associativity is flattened. * ((ParseTree-priority)) under an associativity group defaults to choice. } -Production associativity(Symbol s, Associativity as, {*Production a, choice(Symbol t, set[Production] b)}) - = associativity(s, as, a+b); - -Production associativity(Symbol rhs, Associativity a, {associativity(rhs, Associativity b, set[Production] alts), *Production rest}) - = associativity(rhs, a, rest + alts); // the nested associativity, even if contradictory, is lost - -Production associativity(Symbol s, Associativity as, {*Production a, priority(Symbol t, list[Production] b)}) - = associativity(s, as, {*a, *b}); - +Production associativity( + Symbol s, Associativity as, + {*Production a, choice(Symbol t, set[Production] b)} +) + = associativity(s, as, a + b); + +Production associativity( + Symbol rhs, Associativity a, + {associativity(rhs, Associativity b, set[Production] alts), *Production rest} +) + = associativity(rhs, a, rest + alts); + +// the nested associativity, even if contradictory, is lost +Production associativity( + Symbol s, Associativity as, + {*Production a, priority(Symbol t, list[Production] b)} +) + = associativity(s, as, {*a, *b}); @synopsis{Annotate a parse tree node with a source location.} @description{ @@ -401,8 +431,12 @@ ignores @\loc annotations and whitespace and comments. * Annotated trees are strictly too big for optimal memory usage. Often `@\loc` is the first and only annotation, so it introduces a map for keyword parameters for every node. Also more nodes are different, impeding in optimal reference sharing. If you require long time storage of many parse trees it may be useful to strip them of annotations for selected categories of nodes, using ((reposition)). -} -anno loc Tree@\loc; +} anno + loc + Tree + @ + \loc +; @synopsis{Parse input text (from a string or a location) and return a parse tree.} @description{ @@ -477,16 +511,81 @@ catch ParseError(loc l): { } ``` } - -&T<:Tree parse(type[&T<:Tree] begin, str input, bool allowAmbiguity=false, int maxAmbDepth=2, bool allowRecovery=false, int maxRecoveryAttempts=30, int maxRecoveryTokens=3, bool hasSideEffects=false, set[Tree(Tree)] filters={}) - = parser(begin, allowAmbiguity=allowAmbiguity, maxAmbDepth=maxAmbDepth, allowRecovery=allowRecovery, maxRecoveryAttempts=maxRecoveryAttempts, maxRecoveryTokens=maxRecoveryTokens, hasSideEffects=hasSideEffects, filters=filters)(input, |unknown:///|); - -&T<:Tree parse(type[&T<:Tree] begin, str input, loc origin, bool allowAmbiguity=false, int maxAmbDepth=2, bool allowRecovery=false, int maxRecoveryAttempts=30, int maxRecoveryTokens=3, bool hasSideEffects=false, set[Tree(Tree)] filters={}) - = parser(begin, allowAmbiguity=allowAmbiguity, maxAmbDepth=maxAmbDepth, allowRecovery=allowRecovery, maxRecoveryAttempts=maxRecoveryAttempts, maxRecoveryTokens=maxRecoveryTokens, hasSideEffects=hasSideEffects, filters=filters)(input, origin); - -&T<:Tree parse(type[&T<:Tree] begin, loc input, bool allowAmbiguity=false, int maxAmbDepth=2, bool allowRecovery=false, int maxRecoveryAttempts=30, int maxRecoveryTokens=3, bool hasSideEffects=false, set[Tree(Tree)] filters={}) - = parser(begin, allowAmbiguity=allowAmbiguity, maxAmbDepth=maxAmbDepth, allowRecovery=allowRecovery, maxRecoveryAttempts=maxRecoveryAttempts, maxRecoveryTokens=maxRecoveryTokens, hasSideEffects=hasSideEffects, filters=filters)(input, input); - +&T <: Tree parse( + type[&T <: Tree] begin, + str input, + bool allowAmbiguity = false, + int maxAmbDepth = 2, + bool allowRecovery = false, + int maxRecoveryAttempts = 30, + int maxRecoveryTokens = 3, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +) + = parser( + begin, + allowAmbiguity = allowAmbiguity, + maxAmbDepth = maxAmbDepth, + allowRecovery = allowRecovery, + maxRecoveryAttempts = maxRecoveryAttempts, + maxRecoveryTokens = maxRecoveryTokens, + hasSideEffects = hasSideEffects, + filters = filters + )( + input, + |unknown:///| + ); + +&T <: Tree parse( + type[&T <: Tree] begin, + str input, + loc origin, + bool allowAmbiguity = false, + int maxAmbDepth = 2, + bool allowRecovery = false, + int maxRecoveryAttempts = 30, + int maxRecoveryTokens = 3, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +) + = parser( + begin, + allowAmbiguity = allowAmbiguity, + maxAmbDepth = maxAmbDepth, + allowRecovery = allowRecovery, + maxRecoveryAttempts = maxRecoveryAttempts, + maxRecoveryTokens = maxRecoveryTokens, + hasSideEffects = hasSideEffects, + filters = filters + )( + input, + origin + ); + +&T <: Tree parse( + type[&T <: Tree] begin, + loc input, + bool allowAmbiguity = false, + int maxAmbDepth = 2, + bool allowRecovery = false, + int maxRecoveryAttempts = 30, + int maxRecoveryTokens = 3, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +) + = parser( + begin, + allowAmbiguity = allowAmbiguity, + maxAmbDepth = maxAmbDepth, + allowRecovery = allowRecovery, + maxRecoveryAttempts = maxRecoveryAttempts, + maxRecoveryTokens = maxRecoveryTokens, + hasSideEffects = hasSideEffects, + filters = filters + )( + input, + input + ); @synopsis{Generates a parser from an input grammar.} @description{ @@ -519,7 +618,16 @@ The parse function behaves differently depending of the given keyword parameters interpreted environment to make side effects (like a symbol table) and it can share more intermediate results as a result. } @javaClass{org.rascalmpl.library.Prelude} -java &T (value input, loc origin) parser(type[&T] grammar, bool allowAmbiguity=false, int maxAmbDepth=2, bool allowRecovery=false, int maxRecoveryAttempts=30, int maxRecoveryTokens=3, bool hasSideEffects=false, set[Tree(Tree)] filters={}); +java &T(value input, loc origin) parser( + type[&T] grammar, + bool allowAmbiguity = false, + int maxAmbDepth = 2, + bool allowRecovery = false, + int maxRecoveryAttempts = 30, + int maxRecoveryTokens = 3, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +); @javaClass{org.rascalmpl.library.Prelude} @synopsis{Generates a parser function that can be used to find the left-most deepest ambiguous sub-sentence.} @@ -532,7 +640,11 @@ the tree that exhibits ambiguity. This can be done very quickly, while the whole * The returned sub-tree usually has a different type than the parameter of the type[] symbol that was passed in. The reason is that sub-trees typically have a different non-terminal than the start non-terminal of a grammar. } -java Tree (value input, loc origin) firstAmbiguityFinder(type[Tree] grammar, bool hasSideEffects=false, set[Tree(Tree)] filters={}); +java Tree(value input, loc origin) firstAmbiguityFinder( + type[Tree] grammar, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +); @synopsis{Generates parsers from a grammar (reified type), where all non-terminals in the grammar can be used as start-symbol.} @description{ @@ -540,7 +652,16 @@ This parser generator behaves the same as the `parser` function, but it produces nonterminal parameter. This can be used to select a specific non-terminal from the grammar to use as start-symbol for parsing. } @javaClass{org.rascalmpl.library.Prelude} -java &U (type[&U] nonterminal, value input, loc origin) parsers(type[&T] grammar, bool allowAmbiguity=false, int maxAmbDepth=2, bool allowRecovery=false, int maxRecoveryAttempts=30, int maxRecoveryTokens=3, bool hasSideEffects=false, set[Tree(Tree)] filters={}); +java &U(type[&U] nonterminal, value input, loc origin) parsers( + type[&T] grammar, + bool allowAmbiguity = false, + int maxAmbDepth = 2, + bool allowRecovery = false, + int maxRecoveryAttempts = 30, + int maxRecoveryTokens = 3, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +); @javaClass{org.rascalmpl.library.Prelude} @synopsis{Generates a parser function that can be used to find the left-most deepest ambiguous sub-sentence.} @@ -553,7 +674,11 @@ the tree that exhibits ambiguity. This can be done very quickly, while the whole * The returned sub-tree usually has a different type than the parameter of the type[] symbol that was passed in. The reason is that sub-trees typically have a different non-terminal than the start non-terminal of a grammar. } -java Tree (type[Tree] nonterminal, value input, loc origin) firstAmbiguityFinders(type[Tree] grammar, bool hasSideEffects=false, set[Tree(Tree)] filters={}); +java Tree(type[Tree] nonterminal, value input, loc origin) firstAmbiguityFinders( + type[Tree] grammar, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +); @synopsis{Parse the input but instead of returning the entire tree, return the trees for the first ambiguous substring.} @description{ @@ -564,10 +689,10 @@ the cost of constructing nested ambiguity clusters. If the input sentence is not ambiguous after all, simply the entire tree is returned. } Tree firstAmbiguity(type[Tree] begin, str input) - = firstAmbiguityFinder(begin)(input, |unknown:///|); + = firstAmbiguityFinder(begin)(input, |unknown:///|); Tree firstAmbiguity(type[Tree] begin, loc input) - = firstAmbiguityFinder(begin)(input, input); + = firstAmbiguityFinder(begin)(input, input); @javaClass{org.rascalmpl.library.Prelude} @synopsis{Generate a parser and store it in serialized form for later reuse.} @@ -631,7 +756,16 @@ p(type(sort("E"), ()), "e+e", |src:///|); * reifiying types (use of `#`) will trigger the loading of a parser generator anyway. You have to use this notation for types to avoid that: `type(\start(sort("MySort")), ())` to avoid the computation for `#start[A]` } -java &U (type[&U] nonterminal, value input, loc origin) loadParsers(loc savedParsers, bool allowAmbiguity=false, int maxAmbDepth=2, bool allowRecovery=false, int maxRecoveryAttempts=30, int maxRecoveryTokens=3, bool hasSideEffects=false, set[Tree(Tree)] filters={}); +java &U(type[&U] nonterminal, value input, loc origin) loadParsers( + loc savedParsers, + bool allowAmbiguity = false, + int maxAmbDepth = 2, + bool allowRecovery = false, + int maxRecoveryAttempts = 30, + int maxRecoveryTokens = 3, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +); @synopsis{Load a previously serialized parser, for a specific non-terminal, from disk for usage} @description{ @@ -639,7 +773,17 @@ This loader behaves just like ((loadParsers)), except that the resulting parser bound to a specific non-terminal. } @javaClass{org.rascalmpl.library.Prelude} -java &U (value input, loc origin) loadParser(type[&U] nonterminal, loc savedParsers, bool allowAmbiguity=false, int maxAmbDepth=2, bool allowRecovery=false, int maxRecoveryAttempts=30, int maxRecoveryTokens=3, bool hasSideEffects=false, set[Tree(Tree)] filters={}); +java &U(value input, loc origin) loadParser( + type[&U] nonterminal, + loc savedParsers, + bool allowAmbiguity = false, + int maxAmbDepth = 2, + bool allowRecovery = false, + int maxRecoveryAttempts = 30, + int maxRecoveryTokens = 3, + bool hasSideEffects = false, + set[Tree(Tree)] filters = {} +); @synopsis{Yield the string of characters that form the leafs of the given parse tree.} @description{ @@ -666,7 +810,6 @@ str unparse(Tree tree) = ""; java str printSymbol(Symbol sym, bool withLayout); @javaClass{org.rascalmpl.library.Prelude} - @synopsis{Implode a parse tree according to a given (ADT) type.} @description{ Given a grammar for a language, its sentences can be parsed and the result is a parse tree @@ -785,47 +928,57 @@ Can be imploded into: data Exp = add(Exp, Exp); ``` } -java &T<:value implode(type[&T<:value] t, Tree tree); +java &T <: value implode(type[&T <: value] t, Tree tree); @synopsis{Tree search result type for ((treeAt)).} -data TreeSearchResult[&T<:Tree] = treeFound(&T tree) | treeNotFound(); - - +data TreeSearchResult[&T <: Tree] + = treeFound(&T tree) + | treeNotFound() + ; @synopsis{Select the innermost Tree of a given type which is enclosed by a given location.} @description{ } -TreeSearchResult[&T<:Tree] treeAt(type[&T<:Tree] t, loc l, Tree a:appl(_, _)) { - if ((a@\loc)?, al := a@\loc, al.offset <= l.offset, al.offset + al.length >= l.offset + l.length) { - for (arg <- a.args, TreeSearchResult[&T<:Tree] r:treeFound(&T<:Tree _) := treeAt(t, l, arg)) { - return r; - } - - if (&T<:Tree tree := a) { - return treeFound(tree); - } - } - return treeNotFound(); +TreeSearchResult[&T <: Tree] treeAt( + type[&T <: Tree] t, loc l, Tree a: appl(_, _) +) { + if ((a@\loc)?, al := a@\loc, al.offset <= l.offset, al.offset + al.length >= l.offset + l.length) + { + for (arg <- a.args, TreeSearchResult[&T <: Tree] r: treeFound(&T <: Tree _) := treeAt(t, l, arg)) { + return r; + } + + if (&T <: Tree tree := a) { + return treeFound(tree); + } + } + return treeNotFound(); } -default TreeSearchResult[&T<:Tree] treeAt(type[&T<:Tree] t, loc l, Tree root) = treeNotFound(); +default TreeSearchResult[&T <: Tree] treeAt( + type[&T <: Tree] t, loc l, Tree root +) + = treeNotFound(); -bool sameType(label(_,Symbol s),Symbol t) = sameType(s,t); -bool sameType(Symbol s,label(_,Symbol t)) = sameType(s,t); -bool sameType(Symbol s,conditional(Symbol t,_)) = sameType(s,t); -bool sameType(conditional(Symbol s,_), Symbol t) = sameType(s,t); +bool sameType(label(_, Symbol s), Symbol t) = sameType(s, t); +bool sameType(Symbol s, label(_, Symbol t)) = sameType(s, t); +bool sameType(Symbol s, conditional(Symbol t, _)) = sameType(s, t); +bool sameType(conditional(Symbol s, _), Symbol t) = sameType(s, t); bool sameType(Symbol s, s) = true; default bool sameType(Symbol s, Symbol t) = false; - @synopsis{Determine if the given type is a non-terminal type.} bool isNonTerminalType(Symbol::\sort(str _)) = true; bool isNonTerminalType(Symbol::\lex(str _)) = true; bool isNonTerminalType(Symbol::\layouts(str _)) = true; bool isNonTerminalType(Symbol::\keywords(str _)) = true; -bool isNonTerminalType(Symbol::\parameterized-sort(str _, list[Symbol] _)) = true; -bool isNonTerminalType(Symbol::\parameterized-lex(str _, list[Symbol] _)) = true; +bool isNonTerminalType( + Symbol::\parameterized-sort(str _, list[Symbol] _) +) + = true; +bool isNonTerminalType(Symbol::\parameterized-lex(str _, list[Symbol] _)) + = true; bool isNonTerminalType(Symbol::\start(Symbol s)) = isNonTerminalType(s); default bool isNonTerminalType(Symbol s) = false; @@ -877,119 +1030,129 @@ yield of a tree should always produce the exact same locations as ((reposition)) * The default mark options simulate the behavior of ((parser)) functions. } &T <: Tree reposition( - &T <: Tree tree, - loc file = tree@\loc.top, - bool \markStart = true, - bool \markSyntax = true, - bool \markLexical = true, - bool \markSubLexical = true, - bool \markRegular = true, - bool \markLayout = true, - bool \markSubLayout = true, - bool \markLit = false, - bool \markSubLit = false, - bool \markAmb = false, - bool \markCycle = false, - bool \markChar = false - ) { + &T <: Tree tree, + loc file = tree@\loc.top, + bool \markStart = true, + bool \markSyntax = true, + bool \markLexical = true, + bool \markSubLexical = true, + bool \markRegular = true, + bool \markLayout = true, + bool \markSubLayout = true, + bool \markLit = false, + bool \markSubLit = false, + bool \markAmb = false, + bool \markCycle = false, + bool \markChar = false +) { // the cur variables are shared state by the `rec` local function that recurses over the entire tree int curOffset = 0; int curLine = 1; int curColumn = 0; - + @synopsis{Check if this rule is configured to be annotated} - default bool doAnno(Production _) = false; - bool doAnno(prod(\lex(_), _, _)) = markLexical; - bool doAnno(prod(\label(_, \lex(_)), _, _)) = markLexical; - bool doAnno(prod(\parameterized-lex(_, _), _, _)) = markLexical; + default bool doAnno(Production _) = false; + bool doAnno(prod(\lex(_), _, _)) = markLexical; + bool doAnno(prod(\label(_, \lex(_)), _, _)) = markLexical; + bool doAnno(prod(\parameterized-lex(_, _), _, _)) = markLexical; bool doAnno(prod(\label(_, \parameterized-lex(_, _)), _, _)) = markLexical; - bool doAnno(prod(\layouts(_), _, _)) = markLayout; + bool doAnno(prod(\layouts(_), _, _)) = markLayout; bool doAnno(prod(\label(_, \layouts(_)), _, _)) = markLayout; - bool doAnno(prod(\sort(_), _, _)) = markSyntax; - bool doAnno(prod(\label(_, \sort(_)), _, _)) = markSyntax; - bool doAnno(prod(\parameterized-sort(_, _), _, _)) = markSyntax; + bool doAnno(prod(\sort(_), _, _)) = markSyntax; + bool doAnno(prod(\label(_, \sort(_)), _, _)) = markSyntax; + bool doAnno(prod(\parameterized-sort(_, _), _, _)) = markSyntax; bool doAnno(prod(\label(_, \parameterized-sort(_, _)), _, _)) = markSyntax; - bool doAnno(\regular(_)) = markRegular; - bool doAnno(prod(\lit(_), _, _)) = markLit; - bool doAnno(prod(\cilit(_), _, _)) = markLit; - bool doAnno(prod(\start(_), _, _)) = markStart; - + bool doAnno(\regular(_)) = markRegular; + bool doAnno(prod(\lit(_), _, _)) = markLit; + bool doAnno(prod(\cilit(_), _, _)) = markLit; + bool doAnno(prod(\start(_), _, _)) = markStart; + @synopsis{Check if sub-structure of this rule is configured to be annotated} - default bool doSub(Production _) = true; - bool doSub(prod(\lex(_), _, _)) = \markSubLexical; - bool doSub(prod(\label(_, lex(_)), _, _)) = \markSubLexical; - bool doSub(prod(\layouts(_), _, _)) = \markSubLayout; + default bool doSub(Production _) = true; + bool doSub(prod(\lex(_), _, _)) = \markSubLexical; + bool doSub(prod(\label(_, lex(_)), _, _)) = \markSubLexical; + bool doSub(prod(\layouts(_), _, _)) = \markSubLayout; bool doSub(prod(\label(_, \layouts(_)), _, _)) = \markSubLayout; - bool doSub(prod(\lit(_), _, _)) = \markSubLit; - bool doSub(prod(\cilit(_), _, _)) = \markSubLit; - + bool doSub(prod(\lit(_), _, _)) = \markSubLit; + bool doSub(prod(\cilit(_), _, _)) = \markSubLit; + // the character nodes drive the actual current position: offset, line and column - Tree rec(Tree t:char(int ch), bool _sub) { - beginOffset = curOffset; - beginLine = curLine; - beginColumn = curColumn; - - curOffset += 1; - curColumn += 1; - - switch (t) { - case NewLineChar _ : { - curLine += 1; - curColumn = 0; + Tree rec(Tree t: char(int ch), bool _sub) { + beginOffset = curOffset; + beginLine = curLine; + beginColumn = curColumn; + + curOffset += 1; + curColumn += 1; + + switch(t) { + case NewLineChar _: { + curLine += 1; + curColumn = 0; + } } - } - - Tree washCC(Tree x) = x; // workaround for issue #2342 - - return markChar - ? washCC(char(ch))[@\loc=file(beginOffset, 1, , )] - : washCC(char(ch)) - ; + + Tree washCC(Tree x) = x; + + // workaround for issue #2342 + return + markChar + ? washCC(char(ch))[@\loc = file( + beginOffset, + 1, + , + + )] + : washCC(char(ch)); } - + // cycles take no space - Tree rec(cycle(Symbol s, int up), bool _sub) = markCycle - ? cycle(s, up)[@\loc=file(curOffset, 0, , )] - : cycle(s, up) - ; - + Tree rec(cycle(Symbol s, int up), bool _sub) + = markCycle + ? cycle(s, up)[@\loc = file(curOffset, 0, , )] + : cycle(s, up); + // application nodes always have children to traverse, to get to the individual characters eventually // different types of nodes lead to annotation, or not, depending on the parameters of ((reposition)) Tree rec(appl(Production prod, list[Tree] args), bool sub) { - beginOffset = curOffset; - beginLine = curLine; - beginColumn = curColumn; - - // once `sub` is false, going down, we can never turn it on again - newArgs = [mergeRec(a, sub && doSub(prod)) | a <- args]; - - return (sub && doAnno(prod)) - ? appl(prod, newArgs)[@\loc=file(beginOffset, curOffset - beginOffset, , )] - : appl(prod, newArgs) - ; - } - + beginOffset = curOffset; + beginLine = curLine; + beginColumn = curColumn; + + // once `sub` is false, going down, we can never turn it on again + newArgs = [mergeRec(a, sub && doSub(prod)) | a <- args]; + + return + (sub && doAnno(prod)) + ? appl(prod, newArgs)[@\loc = file( + beginOffset, + curOffset - beginOffset, + , + + )] + : appl(prod, newArgs); + } + // ambiguity nodes are simply choices between alternatives which each receive their own positions. Tree rec(amb(set[Tree] alts), bool sub) { - newAlts = {mergeRec(a, sub) | a <- alts}; - // inherit the outermost positions from one of the alternatives, since they are all the same by definition. - Tree x = getFirstFrom(newAlts); - return markAmb && x@\loc? - ? amb(newAlts)[@\loc=x@\loc] - : amb(newAlts) - ; + newAlts = {mergeRec(a, sub)| a <- alts}; + + // inherit the outermost positions from one of the alternatives, since they are all the same by definition. + Tree x = getFirstFrom(newAlts); + return markAmb && x@\loc? ? amb(newAlts)[@\loc = x@\loc] : amb(newAlts); } - + @synopsis{Recurse, but not without recovering all other keyword parameters except "src" a.k.a. @\loc from the original.} Tree mergeRec(Tree t, bool sub) { - oldParams = getKeywordParameters(t); - t = rec(t, sub); - newParams = getKeywordParameters(t); - mergedParams = (oldParams - ("src" : |unknown:///|)) + newParams; - return setKeywordParameters(t, mergedParams); + oldParams = getKeywordParameters(t); + t = rec(t, sub); + newParams = getKeywordParameters(t); + mergedParams = (oldParams - ("src" : |unknown:///|)) + newParams; + return setKeywordParameters(t, mergedParams); } - + // we start recursion at the top, not forgetting to merge its other keyword fields return mergeRec(tree, true); } - \ No newline at end of file + + diff --git a/src/org/rascalmpl/library/Type.rsc b/src/org/rascalmpl/library/Type.rsc index 8c6cafe1997..c3511cfda0b 100644 --- a/src/org/rascalmpl/library/Type.rsc +++ b/src/org/rascalmpl/library/Type.rsc @@ -30,7 +30,7 @@ The ((subtype)) relation of Rascal has all the mathematical properties of a _fin This is a core design principle of Rascal with the following benefits: * Type inference has a guaranteed least or greatest solution, always. This means that constraints are always solvable in an unambiguous manner. * A _principal type_ can always be computed, which is a most precise and unique solution of a type inference problem. Without the lattice, solution candidates could become incomparable and thus ambiguous. Without -this principal type property, type inference is predictable for programmers. +this principal type property, type inference is unpredictable for programmers. * Solving type inference constraints can be implemented efficiently. The algorithm, based on ((lub)) and ((glb)), makes progress _deterministically_ and does not require backtracking to find better solutions. Since the lattice is not very deep, fixed-point solutions are always found quickly. diff --git a/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc b/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc index f1c41d5c695..4ccf4641949 100644 --- a/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc +++ b/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc @@ -46,13 +46,13 @@ void executeFileSystemChange(changed(loc file, list[TextEdit] edits)) { } str executeTextEdits(str content, list[TextEdit] edits) { - assert isSorted(edits, less=bool (TextEdit e1, TextEdit e2) { - return e1.range.offset < e2.range.offset; - }); + // assert isSorted(edits, less=bool (TextEdit e1, TextEdit e2) { + // return e1.range.offset < e2.range.offset; + // }); - for (replace(loc range, str repl) <- reverse(edits)) { - content = ""; - } + int cursor = 0; - return content; + // linear-time streamed reconstruction of the entire text + return "< + cursor = range.offset + range.length;}>"; } diff --git a/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc b/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc index 65a5b722ca4..31f0e88cb38 100644 --- a/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc +++ b/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc @@ -27,6 +27,8 @@ module analysis::diff::edits::HiFiLayoutDiff extend analysis::diff::edits::HiFiTreeDiff; import ParseTree; // this should not be necessary because imported by HiFiTreeDiff import String; // this should not be be necessary because imported by HiFiTreeDiff +import lang::rascal::grammar::definition::Characters; +import IO; @synopsis{Normalization choices for case-insensitive literals.} data CaseInsensitivity @@ -106,22 +108,31 @@ list[TextEdit] layoutDiff(Tree original, Tree formatted, bool recoverComments = default list[TextEdit] rec( Tree t:appl(Production p, list[Tree] argsA), appl(p /* must be the same by the above assert */, list[Tree] argsB)) - = [*rec(a, b) | <- zip2(argsA, argsB)]; + = [*rec(argsA[i], argsB[i]) | i <- [0..size(argsA)]]; // first add required locations to layout nodes - original = reposition(original, markLit=true, markLayout=true, markSubLayout=true); + // TODO: check if indeed repositioning is never needed + // original = reposition(original, markLit=true, markLayout=true, markSubLayout=true); return rec(original, formatted); } +private Symbol newlineClass = \char-class([range(10,10)]); + @synopsis{Make sure the new layout still contains all the source code comments of the original layout} @description{ -This algorithm uses the @category("Comments") tag to detect source code comments inside layout substrings. If the original +This algorithm uses the `@category(/[cC]omments/)` tag to detect source code comments inside layout substrings. If the original layout contains comments, we re-introduce the comments at the expected level of indentation. New comments present in the replacement are kept and will overwrite any original comments. -This trick is complicated by the syntax of multiline comments and single line comments that have -to end with a newline. +There are corner cases with respect to the original comments: +* the single line comment that does not end with a newline itself, yet it must always end with a newline after it. +* multiple single line comments after each other + +Then there are corner cases with respect to the replacement whitespace: +* the last line of the replacement whitespace is special. This is the indentation to use for all comments. +* but there could be no newlines in the replacement whitespace; and still there is a single line comment to be included. +Now we need to infer an indentation level for what follows the comment from "thin air". } @benefits{ * if comments are kepts and formatted by tools like Tree2Box, then this algorithm does not overwrite these. @@ -132,7 +143,14 @@ to end with a newline. * if comments are not marked with `@category("Comment")` in the original grammar, then this algorithm recovers nothing. } private str learnComments(Tree original, Tree replacement) { - originalComments = ["" | /c:appl(prod(_,_,{\tag("category"(/^[Cc]omment$/)), *_}), _) := original]; + bool mustEndWithNewline(lit("\n")) = true; + bool mustEndWithNewline(conditional(Symbol s, _)) = mustEndWithNewline(s); + // if a comment can not contain newline characters, but everything else, then it must be followed by one: + bool mustEndWithNewline(\iter(Symbol cc:\char-class(_))) = intersection(cc, newlineClass) != newlineClass; + bool mustEndWithNewline(\iter-star(Symbol cc:\char-class(_))) = intersection(cc, newlineClass) != newlineClass; + default bool mustEndWithNewline(_) = false; + + originalComments = [ | /c:appl(prod(_,[*_,Symbol lastSym],{\tag("category"(/^[Cc]omment$/)), *_}), _) := original, str s := ""]; if (originalComments == []) { // if the original did not contain comments, stick with the replacements @@ -146,23 +164,42 @@ private str learnComments(Tree original, Tree replacement) { return ""; } - // At this point, we know that: (a) comments are not present in the replacement and (b) they used to be there in the original. - // So the old comments are going to be the new output. however, we want to learn indentation from the replacement. + // At this point, we know that: + // (a) comments are not present in the replacement and + // (b) they used to be there in the original. + // So the old comments are going to be copied to the new output. + // But, we want to indent them using the style of the replacement. + + // The last line of the replacement string typically has the indentation for the construct that follows: + // | // a comment + // | if (true) { + // ^^^^ + // newIndent + // + // However, if the replacement string is on a single line, then we don't have the indentation + // for the string on the next line readily available. In this case we indent the next line + // to the start column of the replacement layout, as a proxy. + + str replString = ""; + str newIndent = split("\n", replString)[-1] ? ""; - // Drop the last newline of single-line comments, because we don't want two newlines in the output for every comment: - str dropEndNl(str line:/^.*\n$/) = (line[..-1]); - default str dropEndNl(str line) = line; + if (/\n/ !:= replString) { + // no newline in the repl string, so no indentation available for what follows the comment... + newIndent = " <}>"; + } - // the first line of the replacement ,is the indentation to use. - str replString = ""; - str replacementIndent = /^\n+$/ !:= replString - ? split("\n", replString)[0] - : ""; - - // trimming each line makes sure we forget about the original indentation, and drop accidental spaces after comment lines - return replString + indent(replacementIndent, - " - '<}>"[..-1], indentFirstLine=false) + replString; + // we always place sequential comments vertically, because we don't know if we are dealing + // we a single line comment that has to end with newline by follow restriction or by a literal "\n". + // TODO: a deeper analysis of the comment rule that's in use could also be used to discover this. + str trimmedOriginals = " <- originalComments) {> + '<}><}>"; + + // we wrap the comment with the formatted whitespace to assure the proper indentation + // of its first line, and the proper indentation of what comes after this layout node + return replString + + indent(newIndent, trimmedOriginals, indentFirstLine=false) + + newIndent + ; } private Symbol delabel(label(_, Symbol t)) = t; diff --git a/src/org/rascalmpl/library/analysis/formalconcepts/CXTIO.rsc b/src/org/rascalmpl/library/analysis/formalconcepts/CXTIO.rsc index 42e239c53bb..c5950d1c815 100644 --- a/src/org/rascalmpl/library/analysis/formalconcepts/CXTIO.rsc +++ b/src/org/rascalmpl/library/analysis/formalconcepts/CXTIO.rsc @@ -6,33 +6,26 @@ http://www.eclipse.org/legal/epl-v10.html } module analysis::formalconcepts::CXTIO + import IO; import String; import List; import Set; import analysis::formalconcepts::FCA; -@synopsis{Read object attribute in .cxt format.} -public FormalContext[str, str] readCxt(loc input) { +@synopsis{Read object attribute in .cxt format.} +public FormalContext[str, str] readCxt(loc input) { list[str] d = readFileLines(input); int nRows = toInt(d[2]); int nCols = toInt(d[3]); - int theStart = 5+nRows+nCols; - list[str] e = tail(d, size(d)-theStart); + int theStart = 5 + nRows + nCols; + list[str] e = tail(d, size(d) - theStart); int idx = 5; - map [str, set[str]] vb = (); + map[str, set[str]] vb = (); for (str f <- e) { - set[str] b = {d[5+nRows+i]|int i<-[0, 1..size(f)], charAt(f,i)==88}; - vb[d[idx]] = b; - idx = idx+1; - } - return toFormalContext(vb); + set[str] b = {d[5 + nRows + i]| int i <- [0, 1..size(f)], charAt(f, i) == 88}; + vb[d[idx]] = b; + idx = idx + 1; } - -loc input = |file:///ufs/bertl/cxt/digits.cxt|; - -public void main() { - FormalContext[str, str] d = readCxt(input); - ConceptLattice[str, str] e = fca(d); - println(toDotString(e)); - } + return toFormalContext(vb); +} diff --git a/src/org/rascalmpl/library/analysis/formalconcepts/FCA.rsc b/src/org/rascalmpl/library/analysis/formalconcepts/FCA.rsc index a686a722d77..3700b4eb435 100644 --- a/src/org/rascalmpl/library/analysis/formalconcepts/FCA.rsc +++ b/src/org/rascalmpl/library/analysis/formalconcepts/FCA.rsc @@ -6,7 +6,6 @@ http://www.eclipse.org/legal/epl-v10.html } @contributor{Bert Lisser - Bert.Lisser@cwi.nl (CWI)} - @synopsis{Library for Formal Concept Analysis} @description{ Formal Concept Analysis is a somewhat ubiquitous tool in software analysis projects. @@ -14,7 +13,6 @@ It can be used to find latent groups of objects that share the same attributes i Typically, we apply `FCA` to a relation `rel[&O objects, &A attributes]`, which represents extracted source code artifacts and their attributes. } - module analysis::formalconcepts::FCA import Set; @@ -30,136 +28,148 @@ public alias ConceptLattice[&Object, &Attribute] = rel[Concept[&Object, &Attribu public alias Object2Attributes[&Object, &Attribute] = map[&Object, set[&Attribute]]; public alias Attribute2Objects[&Attribute, &Object] = map[&Attribute, set[&Object]]; - - @synopsis{Computes Concept Lattice given the Object Attribute Relation.} -public ConceptLattice[&Object, &Attribute] fca (FormalContext[&Object, &Attribute] fc) { +public ConceptLattice[&Object, &Attribute] fca(FormalContext[&Object, &Attribute] fc) { rel[set[&Attribute], set[&Attribute]] lat = createAttributeLattice(fc); - return {<, >|<-lat}; + return + {<, > + | <- lat + }; } - @synopsis{Computes Dot Graph from Concept Lattice.} public DotGraph toDot(ConceptLattice[&Object, &Attribute] cl) { - return toDot(cl, true); - } - + return toDot(cl, true); +} + public DotGraph toDot(ConceptLattice[&Object, &Attribute] cl, bool lab) { - map[Concept[&Object, &Attribute], int] z = makeNodes(cl); - set[Concept[&Object, &Attribute]] d = domain(z); - Stms nodes = []; - for (Concept[&Object, &Attribute] c <- d) { - nodes += compose(c, z, lab); - } - Stms edges = [ E("\"\"", "\"\"") | x<-cl]; - return digraph("fca", - [NODE( [<"style","filled">, <"fillcolor","cornsilk">,<"fontcolor","blue">,<"shape","ellipse">])] - +nodes+edges); - } - + map[Concept[&Object, &Attribute], int] z = makeNodes(cl); + set[Concept[&Object, &Attribute]] d = domain(z); + Stms nodes = []; + for (Concept[&Object, &Attribute] c <- d) { + nodes += compose(c, z, lab); + } + Stms edges = [E("\"\"", "\"\"") | x <- cl]; + return + digraph( + "fca", + [ + NODE( + [<"style", "filled">, + <"fillcolor", "cornsilk">, + <"fontcolor", "blue">, + <"shape", "ellipse"> ] + ) + ] + + nodes + + edges + ); +} + public Dotline toDotline(ConceptLattice[&Object, &Attribute] cl) { - return ; - } + return ; +} - public Outline toOutline(ConceptLattice[&Object, &Attribute] cl) { - map[Concept[&Object, &Attribute], int] z = makeNodes(cl); - set[Concept[&Object, &Attribute]] d = domain(z); - Outline r = (z[c]:["", ""] | Concept[&Object, &Attribute] c <- d); - return r; - } - + map[Concept[&Object, &Attribute], int] z = makeNodes(cl); + set[Concept[&Object, &Attribute]] d = domain(z); + Outline r = (z[c]: ["", ""] | Concept[&Object, &Attribute] c <- d ); + return r; +} + public FormalContext[&Object, &Attribute] toFormalContext(Object2Attributes[&Object, &Attribute] objects) { - return { | &Object object <- domain(objects), - &Attribute attribute <- objects[object]}; - } + return + { + | &Object object <- domain(objects), &Attribute attribute <- objects[object] + }; +} public FormalContext[&Object, &Attribute] toFormalContext(Attribute2Objects[&Object, &Attribute] attributes) { - return { | &Attribute attribute <- domain(attributes), - &Object object <- attributes[attribute]}; - } -/*---------------------------------------------------------------------------------------------*/ - -set[&T] intersection(set[set[&T]] st) -{ - set[&T] result = isEmpty(st)?{}:getOneFrom(st); - for(set[&T] elm <- st){ - result = result & elm; - } - return result; -} - -set[&T] union(set[set[&T]] st) -{ - set[&T] result = {}; - for(set[&T] elm <- st){ - result += elm; - } - return result; -} - -bool isSubset(set[set[&T]] candidate, set[&T] s ) { - for (set[&T] c <- candidate) - if (s + | &Attribute attribute <- domain(attributes), &Object object <- attributes[attribute] + }; +} + +set[&T] intersection(set[set[&T]] st) { + set[&T] result = isEmpty(st) ? {} : getOneFrom(st); + for (set[&T] elm <- st) { + result = result & elm; + } + return result; +} + +set[&T] union(set[set[&T]] st) { + set[&T] result = {}; + for (set[&T] elm <- st) { + result += elm; + } + return result; +} + +bool isSubset(set[set[&T]] candidate, set[&T] s) { + for (set[&T] c <- candidate) + if ( s < c) + return true; + return false; +} @javaClass{org.rascalmpl.library.analysis.formalconcepts.FCA} java set[&Attribute] sigma(FormalContext[&Object, &Attribute] fc, set[&Object] objects); - //= objects == {} ? fc<1> : { a | a <- fc<1>, all(obj <- objects, in fc)}; - + +//= objects == {} ? fc<1> : { a | a <- fc<1>, all(obj <- objects, in fc)}; @javaClass{org.rascalmpl.library.analysis.formalconcepts.FCA} java set[&Object] tau(FormalContext[&Object, &Attribute] fc, set[&Attributes] attributes); - //= attributes == {} ? fc<0> : { ob | ob <- fc<0>, all(a <- attributes, in fc)}; - -set[set[&T]] maxincl(set[set[&T]] c) {return {s|set[&T] s <- c, !isSubset(c, s)};} + +//= attributes == {} ? fc<0> : { ob | ob <- fc<0>, all(a <- attributes, in fc)}; +set[set[&T]] maxincl(set[set[&T]] c) { + return {s| set[&T] s <- c, !isSubset(c, s)}; +} rel[set[&Attribute], set[&Attribute]] createAttributeLattice(FormalContext[&Object, &Attribute] fc) { - set[&Object] G = domain(fc); - set[&Attribute] M = range(fc); - set[set[&Attribute]] layer = {M}; - set[set[&Attribute]] B = {sigma(fc, {g}) | g <- G}; - rel[set[&Attribute], set[&Attribute]] r = {}; - while (!isEmpty(layer)&& layer!={{}}) { - set[set[&Attribute]] nextLayer = {}; - for (set[&Attribute] m<-layer) { - set[set[&Attribute]] cover = maxincl({b&m|set[&Attribute] b<-B, (b&m)}; - nextLayer += cover; - } - layer = nextLayer; - } - return r; - } - - /*-----------------------------------------------------------------------------------*/ - + set[&Object] G = domain(fc); + set[&Attribute] M = range(fc); + set[set[&Attribute]] layer = {M}; + set[set[&Attribute]] B = {sigma(fc, {g})| g <- G}; + rel[set[&Attribute], set[&Attribute]] r = {}; + while (!isEmpty(layer) && layer != {{}}) { + set[set[&Attribute]] nextLayer = {}; + for (set[&Attribute] m <- layer) { + set[set[&Attribute]] cover = maxincl({b & m| set[&Attribute] b <- B, (b & m) < m}); + for (set[&Attribute] cov <- cover) + r += {}; + nextLayer += cover; + } + layer = nextLayer; + } + return r; +} + map[Concept[&Object, &Attribute], int] makeNodes(ConceptLattice[&Object, &Attribute] q) { - set[Concept[&Object, &Attribute]] c = carrier(q); - int i = 0; - map[Concept[&Object, &Attribute], int] r = (); - for (Concept[&Object, &Attribute] b<-c) { - if (!(r[b])?) { - r[b] = i; - i=i+1; - } - } - return r; - } - -set[&Attribute] addConcept(ConceptLattice[&Object, &Attribute] q, Concept[&Object, &Attribute] c) { - set[Concept[&Object, &Attribute]] parents = range(domainR(q, {c})); - return c[1] - union({p[1]|Concept[&Object, &Attribute] p <-parents}); -} - -Stm compose(Concept[&Object, &Attribute] c, map[Concept[&Object, &Attribute], int] z, bool lab) { - return N("\"\"", lab?[<"label", "">]:[]); -} - - - -@synopsis{Write relation in `.dot` format.} -public str toDotString(ConceptLattice[&Object, &Attribute] q) { - DotGraph d = toDot(q); - return toString(d); - } + set[Concept[&Object, &Attribute]] c = carrier(q); + int i = 0; + map[Concept[&Object, &Attribute], int] r = (); + for (Concept[&Object, &Attribute] b <- c) { + if ( !(r[b])?) { + r[b] = i; + i = i + 1; + } + } + return r; +} + +set[&Attribute] addConcept(ConceptLattice[&Object, &Attribute] q, Concept[&Object, &Attribute] c) { + set[Concept[&Object, &Attribute]] parents = range(domainR(q, {c})); + return c[1] - union({p[1]| Concept[&Object, &Attribute] p <- parents}); +} + +Stm compose(Concept[&Object, &Attribute] c, map[Concept[&Object, &Attribute], int] z, + bool lab) { + return N("\"\"", lab ? [<"label", "">] : []); +} + +@synopsis{Write relation in `.dot` format.} +public str toDotString(ConceptLattice[&Object, &Attribute] q) { + DotGraph d = toDot(q); + return toString(d); +} diff --git a/src/org/rascalmpl/library/lang/box/syntax/Box.rsc b/src/org/rascalmpl/library/lang/box/syntax/Box.rsc index bf6ef897bb9..a26a1066709 100644 --- a/src/org/rascalmpl/library/lang/box/syntax/Box.rsc +++ b/src/org/rascalmpl/library/lang/box/syntax/Box.rsc @@ -10,6 +10,8 @@ @synopsis{An abstract declarative language for two dimensional text layout} module lang::box::\syntax::Box +import List; + @synopsis{Every kind of boxes encodes one or more parameterized two-dimensional text constraints.} @description{ * `H` puts their elements next to each other one the same line separated by `hs` spaces. @@ -22,9 +24,11 @@ module lang::box::\syntax::Box * `SPACE` produces `space` spaces * `L` produces A literal word. This word may only contain printable characters and no spaces; this is a required property that the formatting algorithm depends on for correctness. * `U` splices its contents in the surrounding box, for automatic flattening of overly nested structures in syntax trees. -* `G` is an additional group-by feature that reduces tot the above core features -* `SL` is a convenience box for separated syntax lists based on `G` -* `NULL()` is the group that will disappear from its context, useful for skipping content. It is based on the `U` box. +* `G` is an additional group-by feature for `list[Box]` that reduces tot the above core features. You can use it to wrap another +box around every `gs` elements. +* `AG` is an additional group-by feature for array `Row`s that reduces to the above core features. You can use it to wrap a `R` row +around every `gs` elements and then construct an `A` around those rows. +* `NULL()` is the group that will dissappear from its context, useful for skipping content. It is based on the `U` box. } @benefits{ * Box expressions are a declarative mechanism to express formatting rules that are flexible enough to deal @@ -38,20 +42,35 @@ set on every `I` Box according to the current preferences of the user. * `U(boxes)` is rendered as `H(boxes)` if it's the outermost Box. } data Box(int hs=1, int vs=0, int is=4) - = H(list[Box] boxes) - | V(list[Box] boxes) - | HOV(list[Box] boxes) - | HV(list[Box] boxes) - | I(list[Box] boxes) - | WD(list[Box] boxes) - | A(list[Row] rows, list[Alignment] columns=[l() | [R(list[Box] cs), *_] := rows, _ <- cs] /* learns the amount of columns from the first row */) + = H_(list[Box] boxes) + | V_(list[Box] boxes) + | HOV_(list[Box] boxes) + | HV_(list[Box] boxes) + | I_(list[Box] boxes) + | WD_(list[Box] boxes) + | A_(list[Row] rows, Box rs=NULL(), list[Alignment] columns=[]) + | AG_(list[Box] boxes, int gs=2, list[Alignment] columns=[], Box rs=NULL()) | SPACE(int space) | L(str word) - | U(list[Box] boxes) - | G(list[Box] boxes, Box(list[Box]) op = H, int gs=2) + | U_(list[Box] boxes) + | G_(list[Box] boxes, bool backwards=false, int gs=2, Box op = H([])) | NULL() ; +Box H(Box boxes..., int hs=1) = H_(boxes, hs=hs); +Box V(Box boxes..., int vs=0) = V_(boxes, vs=vs); +Box HOV(Box boxes..., int hs=1, int vs=0) = HOV_(boxes, hs=hs, vs=vs); +Box HV(Box boxes..., int hs=1, int vs=0) = HV_(boxes, hs=hs, vs=vs); +Box I(Box boxes...) = I_(boxes); +Box WD(Box boxes...) = WD_(boxes); +Box A(Row rows..., Box rs=NULL(), list[Alignment] columns=[]) + = A_(rows, rs=rs, columns=columns); +Box AG(Box boxes..., int gs=2, list[Alignment] columns=[], Box rs=NULL()) + = AG_(boxes, gs=gs, columns=columns, rs=rs); +Box U(Box boxes...) = U_(boxes); +Box G(Box boxes..., bool backwards=false, int gs=2, Box op = H([])) + = G_(boxes, backwards=backwards, gs=gs, op=op); + @synopsis{A row is a list of boxes that go into an `A` array/table.} @description{ Rows do not have parameters. These are set on the `A` level instead, @@ -59,6 +78,8 @@ or per cell Box. } data Row = R(list[Box] cells); +// Row R(Box cells...) = _R(cells); + data Alignment = l() | r() | c(); @synopsis{NULL can be used to return a Box that will completely disappear in the surrounding context.} @@ -81,4 +102,68 @@ algorithm starts counting boxes and widths. * Do not use `NULL` for empty Row cells, unless you do want your cells aligned to the left and filled up to the right with empty H boxes. * NULL will be formatted as `H([])` if it's the outermost Box. } -Box NULL() = U([]); \ No newline at end of file +Box NULL() = U([]); + +@synopsis{Convenience box for adding separators to an existing box list} +@description{ +Each element is wrapped by the `op` operator together with the next separator. +The resulting list is wrapped by a G box, of which the elements will be spliced +into their context. +} +Box SL(list[Box] boxes, Box sep, Box op = H([], hs=0)) + = G([b, sep | b <- boxes][..-1], op=op, gs=2); + +@synopsis{Flatten and fold U and G boxes to simplify the Box structure} +@description{ +U and G and AG boxes greatly simplify the Box tree before it is formatted. This +happens "just-in-time" for efficiency reasons. However, from a Box tree +with many U and G boxes it can become hard to see what the actual formatting +constraints are going to be. + +This function applies the semantics of G and U and returns a Box that renders +exactly the same output, but with a lot less nested structure. +} +@benefits{ +* useful to debug complex `toBox` mappings +* formatting semantics preserving transformation +} +@pitfalls{ +* only useful for debugging purposes, because it becomes a pipeline bottleneck otherwise. +} +Box debUG(Box b) { + list[Box] groupBy([], int _gs, Box _op) = []; + list[Box] groupBy(list[Box] boxes:[Box _, *_], int gs, Box op) + = [op[boxes=boxes[..gs]], *groupBy(boxes[gs..], gs, op)]; + + list[Box] groupByBackward([], int _gs, Box _op) = []; + list[Box] groupByBackward(list[Box] boxes:[Box _, *_], int gs, Box op) + = [op[boxes=boxes[..size(boxes) mod gs]], *groupBy(boxes[size(boxes) mod gs..], gs, op)]; + + list[Row] groupRows([], int _gs) = []; + list[Row] groupRows(list[Box] boxes:[Box _, *_], int gs) + = [R(boxes[..gs]), *groupRows(boxes[gs..], gs)]; + + return innermost visit(b) { + case [*Box pre, U_([*Box mid]), *Box post] => [*pre, *mid, *post] + case G_(list[Box] boxes, gs=gs, op=op, backwards=bw) => U_(bw ? groupByBackward(boxes, gs, op) : groupBy(boxes, gs, op)) + case AG_(list[Box] boxes, gs=gs, columns=cs, rs=rs) => A(groupRows(boxes, gs), columns=cs, rs=rs) + } +} + +@synopsis{Short-hand for `H(hs=0)``} +Box H0(Box boxes...) = H_(boxes, hs=0); + +@synopsis{Short-hand for `HOV(hs=0)``} +Box HOV0(Box boxes ...) = HOV_(boxes, hs=0); + +@synopsis{Short-hand for `H(hs=1)``} +Box H1(Box boxes...) = H_(boxes, hs=1); + +@synopsis{Short-hand for `HOV(hs=1)``} +Box HOV1(Box boxes...) = HOV_(boxes, hs=1); + +@synopsis{Short-hand for `V(vs=0)``} +Box V0(Box boxes...) = V_(boxes, vs=0); + +@synopsis{Short-hand for `V(vs=1)``} +Box V1(Box boxes...) = V_(boxes, vs=1); diff --git a/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc b/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc index 08026eaf619..39a0e3d6ae8 100644 --- a/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc +++ b/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc @@ -32,15 +32,15 @@ This demonstrates the semantics of the main hard constraints: ```rascal-shell import lang::box::util::Box2Text; import lang::box::\syntax::Box; -format(H([L("A"), L("B"), L("C")], hs=2)) -format(H([L("A"), L("B"), L("C")], hs=1)) -format(H([L("A"), L("B"), L("C")], hs=0)) -format(V([L("A"), L("B"), L("C")], vs=2)) -format(V([L("A"), L("B"), L("C")], vs=1)) -format(V([L("A"), L("B"), L("C")], vs=0)) -format(H([L("A"), V([L("B"), L("C")])])) -format(H([L("A"), I([L("B")]), L("C")])) -format(H([L("A"), V([L("B"), H([L("C"), L("D")])])])) +format(H(L("A"), L("B"), L("C"), hs=2)) +format(H(L("A"), L("B"), L("C"), hs=1)) +format(H(L("A"), L("B"), L("C"), hs=0)) +format(V(L("A"), L("B"), L("C"), vs=2)) +format(V(L("A"), L("B"), L("C"), vs=1)) +format(V(L("A"), L("B"), L("C"), vs=0)) +format(H(L("A"), V(L("B"), L("C")))) +format(H(L("A"), I(L("B")), L("C"))) +format(H(L("A"), V(L("B"), H(L("C"), L("D"))))) ``` The "soft" constraints change their behavior based on available horizontal room: @@ -57,9 +57,9 @@ format(HOV([L("W") | i <- [0..30]])); By cleverly combining constraints, a specifically desired behavior is easy to achieve: ```rascal-shell,continue -format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HOV([L("doSomething")])])) -format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HOV([L("W") | i <- [0..30]])])) -format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HV([L("W") | i <- [0..30]])])) +format(H(L("if"), H(L("("), L("true"), L(")"), hs=0), HOV(L("doSomething")))) +format(H(L("if"), H(L("("), L("true"), L(")"), hs=0), HOV([L("W") | i <- [0..30]]))) +format(H(L("if"), H(L("("), L("true"), L(")"), hs=0), HV([L("W") | i <- [0..30]]))) ``` } @pitfalls{ @@ -71,6 +71,19 @@ import util::Math; import List; import String; import lang::box::\syntax::Box; +import IO; + +@synopsis{formatting options for ((Box2Text))} +@description{ + * `maxWidth` is the constraint that makes HV and HOV boxes switch to vertical mode + * `wrapAfter` is the lowerbound that makes HV and HOV stay horizontal + * `is` is the default indentation used when an `I` box does not have an explicit `is` parameter +} +data FormatOptions( + int maxWidth=80, + int wrapAfter=70, + int is=4 +) = formatOptions(); @synopsis{Converts boxes into a string by finding an "optimal" two-dimensional layout} @description{ @@ -84,8 +97,8 @@ fit it will still be printed. We say `maxWidth` is a _soft_ constraint. * HV and HOV are the soft constraints that allow for better solutions, so use them where you can to allow for flexible layout that can handle deeply nested expressions and statements. } -public str format(Box b, int maxWidth=80, int wrapAfter=70) - = " +public str format(Box b, FormatOptions opts = formatOptions()) + = " '<}>"; @synopsis{Box2text uses list[str] as intermediate representation of the output during formatting} @@ -100,8 +113,8 @@ ANSI escape codes, and characters like \r and \n in `L` boxes _will break_ the a alias Text = list[str]; @synopsis{Converts boxes into list of lines (Unicode)} -public Text box2text(Box b, int maxWidth=80, int wrapAfter=70) - = box2data(b, options(maxWidth=maxWidth, wrapAfter=wrapAfter)); +public Text box2text(Box b, FormatOptions opts = formatOptions()) + = box2data(b, options(maxWidth=opts.maxWidth, wrapAfter=opts.wrapAfter, is=opts.is)); ////////// private functions below implement the intermediate data-structures ////////// and the constraint solver @@ -117,7 +130,7 @@ This is used during the algorithm, not for external usage. * `wrapAfter` is the threshold criterion for line fullness, to go to the next line in a HV box and to switching between horizontal and vertical for HOV boxes. } -data Options = options( +private data Options = options( int hs = 1, int vs = 0, int is = 4, @@ -127,7 +140,7 @@ data Options = options( @synopsis{Quickly splice in any nested U boxes, and empty H, V, HV, I or HOV boxes} list[Box] u(list[Box] boxes) { - return [*((U(list[Box] nested) := b) ? u(nested) : [b]) | b <- boxes, !isDegenerate(b)]; + return [*((U_(list[Box] nested) := b) ? u(nested) : [b]) | b <- boxes, !isDegenerate(b)]; } @synopsis{Empty H, V, HOV, HV, I boxes should not lead to accidental extra separators in their context} @@ -137,22 +150,19 @@ private bool isDegenerate(Box b) = b has boxes && b.boxes == []; private Text vv(Text a, Text b) = [*a, *b]; @synopsis{Create a string of spaces just as wide as the parameter a} -private str blank(str a) = right("", width(a)); +private str blank(str a) = right("", size(a)); @synopsis{Computes a white line with the length of the last line of a} Text wd([]) = []; Text wd([*_, str x]) = [blank(x)]; - -@synopsis{Computes the length of unescaped string s} -private int width(str s) = size(s); @synopsis{Computes the maximum width of text t} private int twidth([]) = 0; -private default int twidth(Text t) = max([width(line) | line <- t]); +private default int twidth(Text t) = max([size(line) | line <- t]); @synopsis{Computes the length of the last line of t} private int hwidth([]) = 0; -private int hwidth([*_, str last]) = width(last); +private int hwidth([*_, str last]) = size(last); @synopsis{Prepends str a before text b, all lines of b will be shifted} private Text bar(str a, []) = [a]; @@ -186,8 +196,11 @@ private Text rhh(Text a, Text b) = hh(a, b); private Text rvv(Text _, []) = []; private default Text rvv(Text a, Text b) = vv(a,b); -private Text LL(str s ) = [s]; - +private Text LL(str s) { + assert s != "" : "literal strings must never be empty for Box2Text to work correctly."; + return [s]; +} + private Text HH([], Box _, Options _opts, int _m) = []; private Text HH(list[Box] b:[_, *_], Box _, Options opts, int m) { @@ -203,13 +216,27 @@ private Text HH(list[Box] b:[_, *_], Box _, Options opts, int m) { return r; } +private Text GG(list[Box] boxes, Box c, Options opts, int m, int gs, Box op, bool backwards) + = \continue(c[boxes=groupBy(boxes, gs, op, backwards)], c, opts, m); + +public list[Box] groupBy(list[Box] boxes, int gs, Box op, false) = groupBy(boxes, gs, op); + +@synopsis{simulates grouping as-if done from the back, by starting to peel off the rest instead of grouping the rest at the end} +public list[Box] groupBy(list[Box] boxes, int gs, Box op, true) + = [op[boxes=boxes[..size(boxes) mod gs]], *groupBy(boxes[size(boxes) mod gs..], gs, op)]; + +public list[Box] groupBy([], int _gs, Box _op) = []; + +public list[Box] groupBy(list[Box] boxes:[Box _, *_], int gs, Box op) + = [op[boxes=boxes[..gs]], *groupBy(boxes[gs..], gs, op)]; + private Text VV([], Box _c, Options _opts, int _m) = []; private Text VV(list[Box] b:[_, *_], Box c, Options opts, int m) { Text r = []; b = reverse(b); for (a <- b) { - if (V(_) !:= c || L("") !:= a) { + if (V_(_) !:= c || L("") !:= a) { Text t = \continue(a, V([]), opts, m); r = vv(t, rvv(vskip(opts.vs), r)); } @@ -219,10 +246,10 @@ private Text VV(list[Box] b:[_, *_], Box c, Options opts, int m) { private Text II([], Box _c, Options _opts, int _m) = []; -private Text II(list[Box] b:[_, *_] , c:H(list[Box] _), Options opts, int m) +private Text II(list[Box] b:[_, *_] , c:H_(list[Box] _), Options opts, int m) = HH(b, c, opts, m); -private Text II(list[Box] b:[Box _, *Box _], c:V(list[Box] _), Options opts, int m) +private Text II(list[Box] b:[Box _, *Box _], c:V_(list[Box] _), Options opts, int m) = rhh(hskip(opts.is), \continue(V(b, vs=opts.vs), c, opts, m - opts.is)); private Text WDWD([], Box _c , Options _opts, int _m) @@ -238,7 +265,7 @@ private Text WDWD([Box head, *Box tail], Box c , Options opts, int m) { private Text ifHOV([], Box b, Box c, Options opts, int m) = []; private Text ifHOV(Text t:[str head], Box b, Box c, Options opts, int m) - = width(head) <= m ? t : \continue(b, c, opts, m); + = size(head) <= m ? t : \continue(b, c, opts, m); private Text ifHOV(Text t:[str head, str _, *str_], Box b, Box c, Options opts, int m) = \continue(b, c, opts, m); @@ -255,21 +282,21 @@ private Text HVHV(Text T, int s, Text a, Box A, list[Box] B, Options opts, int m if (size(a) > 1) { // Multiple lines Text T1 = \continue(A, V([]), opts, m-i); - return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H([])))); + return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H_([])))); } if (n <= s) { // Box A fits in current line - return HVHV(hh(lhh(T, hskip(h)), a), s-n, B, opts, m, H([])); + return HVHV(hh(lhh(T, hskip(h)), a), s-n, B, opts, m, H_([])); } else { - n -= h; // n == width(a) + n -= h; // n == size(a) if (i + n < m) { // Fits in the next line, not in current line Text T1 =\continue(A, V([]), opts, m-i); - return vv(T, rvv(vskip(v), HVHV(T1, m-n-i, B, opts, m, H([])))); + return vv(T, rvv(vskip(v), HVHV(T1, m-n-i, B, opts, m, H_([])))); } else { // Doesn't fit in either lines Text T1 = \continue(A, V([]), opts, m-i); - return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H([])))); + return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H_([])))); } } } @@ -285,47 +312,33 @@ private Text HVHV([], Box _, Options opts, int m) = []; private Text HVHV(list[Box] b:[Box head], Box _, Options opts, int m) - = \continue(head, V([]), opts, m); + = \continue(head, V_([]), opts, m); private Text HVHV(list[Box] b:[Box head, Box next, *Box tail], Box _, Options opts, int m) { - Text T = \continue(head, V([]), opts, m); - return HVHV(T, m - hwidth(T), [next, *tail], opts, m, H([])); + Text T = \continue(head, V_([]), opts, m); + return HVHV(T, m - hwidth(T), [next, *tail], opts, m, H_([])); } -// empty lists do not need grouping -private Text GG([], Box(list[Box]) op, int gs, Box c, Options opts, int m) - = \continue(U([]), c, opts, m); - -// the last elements are smaller than the group size, just wrap them up and finish -private Text GG([*Box last], Box(list[Box]) op, int gs, Box c, Options opts, int m) - = \continue(op(u(last))[hs=opts.hs][vs=opts.vs][is=opts.is], c, opts, m) - when size(last) < gs; - -// we pick the head of (size group size) and then continue with the rest -private Text GG([*Box heads, *Box tail], Box(list[Box]) op, int gs, Box c, Options opts, int m) - = \continue(op(heads)[hs=opts.hs][vs=opts.vs][is=opts.is], NULL(), opts, m) - + \continue(G(tail, op=op, hs=opts.hs, vs=opts.vs, is=opts.is, gs=gs), c, opts, m) - when size(heads) == gs; - private Text continueWith(Box b:L(str s) , Box c, Options opts, int m) = LL(s); -private Text continueWith(Box b:H(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); -private Text continueWith(Box b:V(list[Box] bl) , Box c, Options opts, int m) = VV(u(bl), c, opts, m); -private Text continueWith(Box b:I(list[Box] bl) , Box c, Options opts, int m) = II(u(bl), c, opts, m); -private Text continueWith(Box b:WD(list[Box] bl) , Box c, Options opts, int m) = WDWD(u(bl), c, opts, m); -private Text continueWith(Box b:HOV(list[Box] bl), Box c, Options opts, int m) = HOVHOV(u(bl), c, opts, m); -private Text continueWith(Box b:HV(list[Box] bl) , Box c, Options opts, int m) = HVHV(u(bl), c, opts, m); +private Text continueWith(Box b:H_(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); +private Text continueWith(Box b:V_(list[Box] bl) , Box c, Options opts, int m) = VV(u(bl), c, opts, m); +private Text continueWith(Box b:I_(list[Box] bl) , Box c, Options opts, int m) = II(u(bl), c, opts, m); +private Text continueWith(Box b:WD_(list[Box] bl) , Box c, Options opts, int m) = WDWD(u(bl), c, opts, m); +private Text continueWith(Box b:HOV_(list[Box] bl), Box c, Options opts, int m) = HOVHOV(u(bl), c, opts, m); +private Text continueWith(Box b:HV_(list[Box] bl) , Box c, Options opts, int m) = HVHV(u(bl), c, opts, m); private Text continueWith(Box b:SPACE(int n) , Box c, Options opts, int m) = hskip(n); // This is a degenerate case, an outermost U-Box without a wrapper around it. -private Text continueWith(Box b:U(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); +private Text continueWith(Box b:U_(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); -private Text continueWith(Box b:A(list[Row] rows), Box c, Options opts, int m) - = AA(rows, c, b.columns, opts, m); +private Text continueWith(Box b:G_(list[Box] bl), Box c, Options opts, int m) + = GG(u(bl), c, opts, m, b.gs, b.op, b.backwards); -private Text continueWith(Box b:G(list[Box] bl), Box c, Options opts, int m) = GG(u(bl), b.op, b.gs, c, opts, m); +private Text continueWith(Box b:A_(list[Row] rows), Box c, Options opts, int m) + = AA(rows, c, b.columns, b.rs, opts, m); -@synopsis{General shape of a Box operator, as a parameter to `G`} -private alias BoxOp = Box(list[Box]); +private Text continueWith(Box b:AG_(list[Box] boxes), Box c, Options opts, int m) + = AAG(u(boxes), b.gs, b.columns, b.rs, c, opts, m); @synopsis{Option inheritance layer; then continue with the next box.} @description{ @@ -333,7 +346,7 @@ The next box is either configured by itself. Options are transferred from the box to the opts parameter for easy passing on to recursive calls. } private Text \continue(Box b, Box c, Options opts, int m) - = continueWith(b, c, opts[hs=b.hs][vs=b.vs][is=b.is], m); + = continueWith(b, c, opts[hs=b.hs][vs=b.vs][is=(b.is?)?b.is:opts.is], m); /* ------------------------------- Alignment ------------------------------------------------------------*/ @@ -358,37 +371,86 @@ private int Acolumns(list[Row] rows) = (0 | max(it, size(row.cells)) | row <- ro @synopsis{Compute the maximum cell width for each column in an array} private list[int] Awidth(list[list[Box]] rows) - = [(0 | max(it, row[col].width) | row <- rows ) | int col <- [0..size(head(rows))]]; + = [(0 | max(it, row[col].width) | row <- rows, col < size(row) ) | int col <- [0..size(head(rows))]]; @synopsis{Adds empty cells to every row until every row has the same amount of columns.} -list[Row] AcompleteRows(list[Row] rows, int columns=Acolumns(rows)) - = [ R(u([*row.cells, *[H([]) | _ <- [0..columns - size(row.cells)]]])) | row <- rows]; +list[Row] AcompleteRows(list[Row] rows, int columns=Acolumns(rows), Box rs=NULL()) + = [ R(u([*row.cells[..-1], H_([row.cells[-1], rs],hs=0), *[SPACE(1) | _ <- [0..columns - size(row.cells)]]])) | row <- rows[..-1]] + + [ R(u([*rows[-1].cells, *[SPACE(1) | _ <- [0..columns - size(rows[-1].cells)]]]))] ; @synopsis{Helper function for aligning Text inside an array cell} private Box align(l(), Box cell, int maxWidth) = maxWidth - cell.width > 0 - ? H([cell, SPACE(maxWidth - cell.width)], hs=0) + ? H_([cell, SPACE(maxWidth - cell.width)], hs=0) : cell; private Box align(r(), Box cell, int maxWidth) = maxWidth - cell.width > 0 - ? H([SPACE(maxWidth - cell.width), cell], hs=0) + ? H_([SPACE(maxWidth - cell.width), cell], hs=0) : cell; private Box align(c(), Box cell, int maxWidth) = maxWidth - cell.width > 1 - ? H([SPACE((maxWidth - cell.width) / 2), cell, SPACE((maxWidth - cell.width) / 2)], hs=0) + ? H_([SPACE((maxWidth - cell.width) / 2), cell, SPACE((maxWidth - cell.width) / 2)], hs=0) : maxWidth - cell.width == 1 ? align(l(), cell, maxWidth) : cell; -private Text AA(list[Row] table, Box c, list[Alignment] alignments, Options opts, int m) { - list[list[Box]] rows = RR(AcompleteRows(table), c, opts, m); +private Text AA(list[Row] table, Box c, list[Alignment] alignments, Box rs, Options opts, int m) { + if (table == []) { + return []; + } + + // first flatten any nested U cell lists into the Rows + table = [R(u(r.cells)) | Row r <- table]; + + // we remove any H-V backtracking because table cells are too small anyway, generally. + // so we prefer the less wide V over HOV and HV. This boosts efficiency radically, because + // later, ever cell will be formatted individually to an optimal width, and measured, before we even start + // to format the table. Then the same cells will be formatted again from scratch. By removing the + // backtracking, larger tables (like reified grammars) become doable. + table = visit (table) { + case Box b:HOV_(list[Box] boxes) => V_(boxes, vs=b.vs) + case Box b:HV_(list[Box] boxes) => V_(boxes, vs=b.vs) + } + + // then we can know the number of columns + int maxColumns = Acolumns(table); + + // then we fill each row up to the maximum of columns + list[list[Box]] rows = RR(AcompleteRows(table, columns=maxColumns, rs=rs), c, opts, m); + + // and we infer alignments where not provided + alignments = AcompleteAlignments(alignments, maxColumns); + + // finally we compute alignment information list[int] maxWidths = Awidth(rows); - - return \continue(V([ - H([align(al, cell, mw) | <- zip3(row, alignments, maxWidths)]) - | row <- rows - ]),c, opts, m); + + try { + // A row is simply an H box where each cell is filled with enough spaces to align for the next column + return \continue(V_([ + H_([align(al, cell, mw) | <- zip3(row, alignments, maxWidths)]) | row <- rows]), c, opts, m); + } + catch IllegalArgument(_, "List size mismatch"): { + throw IllegalArgument("Array alignments size is while there are columns."); + } } +private Text AAG([], int _gs, list[Alignment] _columns, Box _rs, Box _c, Options _opts, int _m) = []; + +private Text AAG(list[Box] boxes:[Box _, *_], int gs, list[Alignment] columns, Box rs, Box c, Options opts, int m) + = \continue(A(groupRows(boxes, gs), columns=columns, rs=rs), c, opts, m); + +private list[Row] groupRows([], int _gs) = []; + +private list[Row] groupRows(list[Box] boxes:[Box _, *_], int gs) + = [R(boxes[..gs]), *groupRows(boxes[gs..], gs)]; + +@synopsis{Cuts off and extends the alignment spec to the width of the table} +@description{ +* if too few columns are specified: `l()`'s are added accordingly +* if too many columns are specified: they are cut off from the right +} +private list[Alignment] AcompleteAlignments(list[Alignment] alignments, int maxColumns) + = [*alignments[..maxColumns], *[l() | _ <- [0..maxColumns - size(alignments)]]]; + @synopsis{Check soft limit for HV and HOV boxes} // TODO this seems to ignore SPACE boxes? private bool noWidthOverflow(list[Box] hv, Options opts) @@ -396,46 +458,88 @@ private bool noWidthOverflow(list[Box] hv, Options opts) @synopsis{Changes all HV boxes that do fit horizontally into hard H boxes.} private Box applyHVconstraints(Box b, Options opts) = innermost visit(b) { - case HV(boxes, hs=h, is=i, vs=v) => H(boxes, hs=h, is=i, vs=v) + case Box B:HV_(list[Box] boxes, hs=h, is=i, vs=v) => H_(boxes, hs=h, is=(B.is?)?i:opts.is, vs=v) when noWidthOverflow(boxes, opts) }; @synopsis{Changes all HOV boxes that do fit horizontally into hard H boxes, and the others into hard V boxes.} private Box applyHOVconstraints(Box b, Options opts) = innermost visit(b) { - case HOV(boxes, hs=h, is=i, vs=v) => noWidthOverflow(boxes, opts) - ? H(boxes, hs=h, is=i, vs=v) - : V(boxes, hs=h, is=i, vs=v) + case Box B:HOV_(list[Box] boxes, hs=h, is=i, vs=v) => noWidthOverflow(boxes, opts) + ? H_(boxes, hs=h, is=(B.is?)?i:opts.is, vs=v) + : V_(boxes, hs=h, is=(B.is?)?i:opts.is, vs=v) }; @synopsis{Workhorse, that first applies hard HV and HOV limits and then starts the general algorithm} private Text box2data(Box b, Options opts) { b = applyHVconstraints(b, opts); b = applyHOVconstraints(b, opts); - return \continue(b, V([]), options(), opts.maxWidth); + return \continue(b, V_([]), options(is=opts.is), opts.maxWidth); } ///////////////// regression tests //////////////////////////////// test bool horizontalPlacement2() - = format(H([L("A"), L("B"), L("C")], hs=2)) + = format(H(L("A"), L("B"), L("C"), hs=2)) == "A B C '"; test bool horizontalPlacement3() - = format(H([L("A"), L("B"), L("C")], hs=3)) + = format(H(L("A"), L("B"), L("C"), hs=3)) == "A B C '"; +test bool horizontalIndentIsNoop1() + = format(H(L("A"), I(L("B")))) + == "A B + '"; + +test bool horizontalIndentIsNoop2() + = format(HV(L("A"), I(L("B")))) + == "A B + '"; + +test bool horizontalIndentIsNoop3() + = format(HOV(L("A"), I(L("B")))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing1() + = format(H(L("A"), H(), L("B"))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing2() + = format(H(L("A"), V(), L("B"))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing3() + = format(H(L("A"), I(), L("B"))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing4() + = format(V(L("A"), H(), L("B"))) + == "A + 'B + '"; + +test bool emptyBoxesNoExtraSpacing5() + = format(V(L("A"), V(), L("B"))) + == "A + 'B + '"; + test bool verticalPlacement0() - = format(V([L("A"), L("B"), L("C")], vs=0)) + = format(V(L("A"), L("B"), L("C"), vs=0)) == "A 'B 'C '"; test bool verticalPlacement1() - = format(V([L("A"), L("B"), L("C")], vs=1)) + = format(V(L("A"), L("B"), L("C"), vs=1)) == "A ' 'B @@ -444,14 +548,14 @@ test bool verticalPlacement1() '"; test bool verticalIndentation2() - = format(V([L("A"), I([L("B")]), L("C")])) + = format(V(L("A"), I(L("B")), L("C"))) == "A ' B 'C '"; test bool blockIndent() - = format(V([L("A"), I([V([L("B"), L("C")])]), L("D")])) + = format(V(L("A"), I(V(L("B"), L("C"))), L("D"))) == "A ' B ' C @@ -459,52 +563,52 @@ test bool blockIndent() '"; test bool wrappingIgnoreIndent() - = format(HV([L("A"), I([L("B")]), L("C")], hs=0), maxWidth=2, wrapAfter=2) + = format(HV(L("A"), I(L("B")), L("C"), hs=0), opts=formatOptions(maxWidth=2, wrapAfter=2)) == "AB 'C '"; test bool wrappingWithIndent() - = format(HV([L("A"), I([L("B")]), I([L("C")])], hs=0), maxWidth=2, wrapAfter=2) + = format(HV(L("A"), I(L("B")), I(L("C")), hs=0),opts=formatOptions( maxWidth=2, wrapAfter=2)) == "AB ' C '"; test bool multiBoxIndentIsVertical() - = format(I([L("A"), L("B")])) + = format(I(L("A"), L("B"))) == " A ' B '"; test bool flipping1NoIndent() - = format(HOV([L("A"), L("B"), L("C")], hs=0, vs=0), maxWidth=2, wrapAfter=2) + = format(HOV(L("A"), L("B"), L("C"), hs=0, vs=0), opts=formatOptions(maxWidth=2, wrapAfter=2)) == "A 'B 'C '"; test bool horizontalOfOneVertical() - = format(H([L("A"), V([L("B"), L("C")])])) + = format(H(L("A"), V(L("B"), L("C")))) == "A B ' C '"; test bool stairCase() - = format(H([L("A"), V([L("B"), H([L("C"), V([L("D"), H([L("E"), L("F")])])])])])) + = format(H(L("A"), V(L("B"), H(L("C"), V(L("D"), H(L("E"), L("F"))))))) == "A B ' C D ' E F '"; test bool simpleTable() - = format(A([R([L("1"),L("2"),L("3")]),R([L("4"), L("5"), L("6")]),R([L("7"), L("8"), L("9")])])) + = format(A(R([L("1"),L("2"),L("3")]),R([L("4"), L("5"), L("6")]),R([L("7"), L("8"), L("9")]))) == "1 2 3 '4 5 6 '7 8 9 '"; test bool simpleAlignedTable() - = format(A([R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")])], + = format(A(R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")]), columns=[l(),c(),r()])) == "1 2 3 '44 55 66 @@ -512,7 +616,7 @@ test bool simpleAlignedTable() '"; test bool simpleAlignedTableDifferentAlignment() - = format(A([R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")])], + = format(A(R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")]), columns=[r(),c(),l()])) == " 1 2 3 ' 44 55 66 @@ -520,54 +624,51 @@ test bool simpleAlignedTableDifferentAlignment() '"; test bool WDtest() { - L1 = H([L("aap")] , hs=0); - L2 = H([WD([L1]), L("noot")], hs=0); - L3 = H([WD([L2]), L("mies")], hs=0); + L1 = H(L("aap") , hs=0); + L2 = H(WD(L1), L("noot"), hs=0); + L3 = H(WD(L2), L("mies"), hs=0); - return format(V([L1, L2, L3])) + return format(V(L1, L2, L3)) == "aap ' noot ' mies '"; } -test bool groupBy() { +test bool groupByTest() { lst = [L("") | i <- [0..10]]; - g1 = G(lst, op=H, gs=3); - lst2 = [H([L(""), L(""), L("")]) | i <- [0,3..7]] + [H([L("9")])]; + g1 = G(lst, op=H(), gs=3); + lst2 = [H(L(""), L(""), L("")) | i <- [0,3..7]] + [H(L("9"))]; - return format(V([g1])) == format(V(lst2)); + return format(V(g1)) == format(V(lst2)); } -test bool noDegenerateHSeparators() - = format(H([L("a"),H([]),L("b")])) - == "a b - '"; +test bool groupByBackwardsTest() { + lst = [L("") | i <- [0..10]]; + g1 = G(lst, op=H(), gs=3, backwards=true); + lst2 = [H(L("0"))] + [H(L(""), L(""), L("")) | i <- [1, 4..10]]; -test bool noDegenerateVSeparators() - = format(V([L("a"),H([]),L("b")])) - == "a - 'b - '"; + return format(V([g1])) == format(V(lst2)); +} test bool noDegenerateHVSeparators1() - = format(HV([L("a"),V([]),L("b")])) + = format(HV(L("a"),V(),L("b"))) == "a b '"; test bool noDegenerateHVSeparators2() - = format(HV([L("a"),V([]),L("b")]), maxWidth=1, wrapAfter=1) + = format(HV(L("a"),V(),L("b")), opts=formatOptions(maxWidth=1, wrapAfter=1)) == "a 'b '"; test bool noDegenerateHOVSeparators1() - = format(HOV([L("a"),V([]),L("b")])) + = format(HOV(L("a"),V(),L("b"))) == "a b '"; test bool noDegenerateHOVSeparators2() - = format(HOV([L("a"),V([]),L("b")]), maxWidth=1, wrapAfter=1) + = format(HOV(L("a"),V(),L("b")), opts=formatOptions(maxWidth=1, wrapAfter=1)) == "a 'b '"; diff --git a/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc b/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc index bf580f16f4f..efdcf69f879 100644 --- a/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc +++ b/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc @@ -67,12 +67,12 @@ module lang::box::util::Tree2Box import ParseTree; import lang::box::\syntax::Box; import String; -import IO; + @synopsis{Configuration options for toBox} -data FormatOptions = formatOptions( +data FormatOptions( CaseInsensitivity ci = asIs() -); +) = formatOptions(); @synopsis{Normalization choices for case-insensitive literals.} data CaseInsensitivity @@ -92,8 +92,6 @@ by the user is necessary. default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { // the big workhorse switch identifies all kinds of special cases for shapes of // grammar rules, and accidental instances (emptiness, only whitespace, etc.) - Symbol _nl = #[\n].symbol; - Symbol notNl = #![\n].symbol; switch () { // nothing should not produce additional spaces @@ -101,39 +99,48 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { return NULL(); // literals are printed as-is - case : - return L(""); + case : { + str yield = ""; + return yield != "" ? L(yield) : NULL(); + } // case-insensitive literals are optionally normalized - case : - return L(ci("", opts.ci)); + case : { + str yield = ""; + return yield != "" ? L(ci("", opts.ci)) : NULL(); + } // non-existing content should not generate accidental spaces case : - return NULL(); + return NULL(); + + case : + return U([toBox(present)]); // non-separated lists should stick without spacing (probably lexical) case : return H([toBox(e, opts=opts) | e <- elements], hs=0); + // non-separated lists should stick without spacing (probably lexical) case : return H([toBox(e, opts=opts) | e <- elements], hs=0); - // comma's are usually for parameters separation + // comma's are usually for parameters separation. leaving it to + // parent to wrap the box in the right context. case : - return HOV([ + return U([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(",") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); // comma's are usually for parameters separation case : - return HOV([ + return HV([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(",") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); @@ -141,8 +148,8 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(";") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); @@ -150,17 +157,16 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); - // semi-colons are usually for parameters separation case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(";") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); @@ -168,75 +174,87 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=4, hs=0, op=H)], hs=1); + // now we have any other literal as separator + case : + return U([ + H([ + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator + ], hs=0) | int i <- [0,4..size(elements)] + ]); - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=4, hs=0, op=H)], hs=1); - - // with only one separator it's probably a lexical - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=2, hs=0, op=H)], hs=0); + case : + return U([ + H([ + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator + ], hs=0) | int i <- [0,4..size(elements)] + ]); - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=2, hs=0, op=H)], hs=0); + + // this is a normal list + case : + return U([toBox(elements[i], opts=opts) | int i <- [0,2..size(elements)]]); + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + + // this is a normal list + case : + return U([toBox(elements[i], opts=opts) | int i <- [0,2..size(elements)]]); + + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + // We remove all layout node positions to make the number of children predictable // Comments can be recovered by `layoutDiff`. By not recursing into layout // positions `toBox` becomes more than twice as fast. case : return NULL(); - // if we are given a comment node, then we can format it here for use by layoutDiff - case : - return V([ - H([toBox(elements[0], opts=opts), - H([L(e) | e <- words("")], hs=1) - ], hs=1) - ]); - - // if we are given a comment node, then we can pretty print it here for use by layoutDiff - case : - return V([ - H([toBox(elements[0], opts=opts), - H([L(w) | e <- elements[1..], w <- words("")], hs=1) - ], hs=1) - ]); - - // multiline comments are rewrapped for the sake of readability and fitting on the page - case : - return HV([toBox(elements[0], opts=opts), // recurse in case its a ci literal - *[L(w) | e <- elements[1..-1], w <- words("")], // wrap a nice paragraph - toBox(elements[-1], opts=opts) // recurse in case its a ci literal - ], hs=1); - - // lexicals are never split in pieces, unless it's comments but those are handled above. - case : - return L(""); + // lexicals are never split in pieces + case : { + str yield = ""; + return yield != "" ? L(yield) : NULL(); + } // Now we will deal with a lot of cases for expressions and block-structured statements. // Those kinds of structures appear again and again as many languages share inspiration - // from their predecessors. Watching out not to loose any comments... + // from their pre-decessors. - case : - return HOV([toBox(elements[0], opts=opts), H([toBox(e, opts=opts) | e <- elements[1..]])]); + // binary operators become flat lists + case : + return U([toBox(elements[0]), L(op), toBox(elements[-1])]); // postfix operators stick - case : + case : return H([toBox(e, opts=opts) | e <- elements], hs=0); // prefix operators stick - case : + case : return H([toBox(e, opts=opts) | e <- elements], hs=0); // brackets stick - case : - return H([toBox(e, opts=opts) | e <- elements], hs=0); + case : + return H(L("("), I(HOV(toBox(elements[2], opts=opts))), L(")"), hs=0); + + case : + return toBox(single); // if the sort name is statement-like and the structure block-like, we go for // vertical with indentation @@ -247,6 +265,10 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { I([V([toBox(e, opts=opts) | Tree e <- elements[size(pre)+1..-1]])]), toBox(elements[-1], opts=opts) ]); + + // this is to simplify the tree structure for efficiency and readability + case : + return toBox(singleton); } return HV([toBox(a, opts=opts) | a <- args]); @@ -261,19 +283,92 @@ default Box toBox(c:char(_), FormatOptions opts=fo() ) = L(""); @synopsis{Cycles are invisible and zero length} default Box toBox(cycle(_, _), FO opts=fo()) = NULL(); -@synopsis{Private type alias for legibility's sake} -private alias FO = FormatOptions; +@synopsis{Create a V box of V boxes where the inner boxes are connected and the outer boxes are separated by an empty line.} +@description{ +This function learns from the input trees how vertical clusters were layout in the original tree. +The resulting box maintains the original clustering. +For example, such lists of declarations which are separated by a newline, remain separated after formatting with `toClusterBox` +``` +int a1 = 1; +int a2 = 2; + +int b1 = 3; +int b2 = 4; +``` +} +@benefits{ +* many programmers use vertical clustering, or "grouping statements", to indicate meaning or intent, by not throwing this +away we are not throwing away the documentative value of their grouping efforts. +} +@pitfalls{ +* ((toClusterBox)) is one of the (very) few Box functions that use layout information from the input tree to +influence the layout of the output formatted code. It replaces a call to ((toBox)) for that reason. +* ((toClusterBox)) does not work on separated lists, yet. +} +Box toClusterBox(list[Tree] lst, FO opts=fo()) { + list[Box] cluster([]) = []; -@synopsis{Removing production labels removes similar patterns in the main toBox function.} -private Production delabel(prod(label(_, Symbol s), list[Symbol] syms, set[Attr] attrs)) - = prod(s, delabel(syms), attrs); + list[Box] cluster([Tree e]) = [V([toBox(e)], vs=0)]; -private default Production delabel(Production p) = p; + list[Box] cluster([*Tree pre, Tree last, Tree first, *Tree post]) + = [V([*[toBox(p, opts=opts) | p <- pre], toBox(last, opts=opts)], vs=0), *cluster([first, *post])] + when first@\loc.begin.line - last@\loc.end.line > 1 + ; -private list[Symbol] delabel(list[Symbol] syms) = [delabel(s) | s <- syms]; + default list[Box] cluster(list[Tree] l) = [V([toBox(e, opts=opts) | e <- l], vs=0)]; + + return V(cluster(lst), vs=1); +} -private Symbol delabel(label(_, Symbol s)) = s; -private default Symbol delabel(Symbol s) = s; +Box toClusterBox(&T* lst, FO opts=fo()) = toClusterBox([e | e <- lst], opts=opts); +Box toClusterBox(&T+ lst, FO opts=fo()) = toClusterBox([e | e <- lst], opts=opts); + +@synopsis{Reusable way of dealing with large binary expression trees} +@description{ +1. the default `toBox` will flatten nested binary expressions to U lists. +2. the G box groups each operator with the following expression on the right hand-side, + * given an initial element (usually L("=") or L(":=")) for the assignment operators +3. the entire list is indented in case the surrounding context needs more space +4. the net result is usually in vertical mode: +``` + = operand1 + + operand2 + + operand3 +``` +or in horizontal mode: +``` += operand1 + operand2 + operand3 +``` + +By default ((toExpBox)) wraps it result in a HOV context, but you can pass +in a different `wrapper` if you like. +} +Box toExpBox(Box prefix, Tree expression, Box wrapper=HOV()) + = wrapper[boxes=[G(prefix, toBox(expression), gs=2, op=H())]]; + +@synopsis{Reusable way of dealing with large binary expression trees} +@description{ +1. the default `toBox` will flatten nested binary expressions to U lists. +2. the G box groups each operator horizontally with the following expression on the right hand-side. +4. the net result is usually in vertical mode: +``` + operand1 + operand2 + + operand3 +``` +or in horizontal mode: +``` +operand1 + operand2 + operand3 +``` + +By default ((toExpBox)) wraps it result in a HV context, but you can pass +in a different `wrapper` if you like. + +} +Box toExpBox(Tree expression, Box wrapper=HV()) + = wrapper[boxes=[G(toBox(expression), gs=2, backwards=true, op=H())]]; + +@synopsis{Private type alias for legibility's sake} +private alias FO = FormatOptions; @synopsis{This is a short-hand for legibility's sake} private FO fo() = formatOptions(); @@ -284,6 +379,11 @@ private str ci(str word, toUpper()) = toUpperCase(word); private str ci(str word, toCapitalized()) = capitalize(word); private str ci(str word, asIs()) = word; -@synopsis{Split a text by the supported whitespace characters} -private list[str] words(str text) - = [ x | // := text]; \ No newline at end of file +@synopsis{Removing production labels helps with case distinctions on ((Symbol)) kinds.} +private Production delabel(prod(Symbol s, list[Symbol] syms, set[Attr] attrs)) = prod(delabel(s), [delabel(x) | x <- syms], attrs); +private Production delabel(regular(Symbol s)) = regular(delabel(s)); + +@synopsis{Removing symbol labels helps with case distinctions on ((Symbol)) kinds.} +private Symbol delabel(label(_, Symbol s)) = delabel(s); +private Symbol delabel(conditional(Symbol s, _)) = delabel(s); +private default Symbol delabel(Symbol s) = s; \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/c90/syntax/C.rsc b/src/org/rascalmpl/library/lang/c90/syntax/C.rsc index 556b7fcb9fa..b054d59a69b 100644 --- a/src/org/rascalmpl/library/lang/c90/syntax/C.rsc +++ b/src/org/rascalmpl/library/lang/c90/syntax/C.rsc @@ -87,8 +87,7 @@ syntax Expression | Expression "\>\>=" Expression | Expression "&=" Expression | Expression "^=" Expression - | Expression " - | =" Expression + | Expression "|=" Expression ) > left commaExpression: Expression "," Expression ; diff --git a/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc b/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc index 20941f8a5f8..02649d8a481 100644 --- a/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc +++ b/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc @@ -47,18 +47,18 @@ list[TextEdit] formatPicoTree(start[Program] file) { @synopsis{Format while} Box toBox((Statement) `while do <{Statement ";"}* block> od`, FO opts = fo()) - = V([ - H([L("while"), toBox(e, opts=opts), L("do")]), - I([toBox(block, opts=opts)]), + = V( + H(L("while"), HV(toBox(e, opts=opts)), L("do")), + I(toClusterBox(block, opts=opts)), L("od") - ]); + ); @synopsis{Format if-then-else } Box toBox((Statement) `if then <{Statement ";"}* thenPart> else <{Statement ";"}* elsePart> fi`, FO opts = fo()) - = V([ - H([L("if"), toBox(e, opts=opts), L("then")]), - I([toBox(thenPart, opts=opts)]), + = V( + H(L("if"), HV(toBox(e, opts=opts)), L("then")), + I(toClusterBox(thenPart, opts=opts)), L("else"), - I([toBox(elsePart, opts=opts)]), + I(toClusterBox(elsePart, opts=opts)), L("fi") - ]); \ No newline at end of file + ); \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/pico/syntax/Main.rsc b/src/org/rascalmpl/library/lang/pico/syntax/Main.rsc index ac6d9905073..c028d99323c 100644 --- a/src/org/rascalmpl/library/lang/pico/syntax/Main.rsc +++ b/src/org/rascalmpl/library/lang/pico/syntax/Main.rsc @@ -58,9 +58,5 @@ lexical WhitespaceAndComment ; public start[Program] program(str s) { - return parse(#start[Program], s); -} - -public start[Program] program(str s, loc l) { - return parse(#start[Program], s, l); -} + return parse(#start[Program], s); +} \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc b/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc index d29f1ad0e30..73e27142e60 100644 --- a/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc +++ b/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc @@ -10,6 +10,10 @@ @contributor{Arnold Lankamp - Arnold.Lankamp@cwi.nl} @synopsis{Convert the Rascal internal grammar representation format (Grammar) to a syntax definition in Rascal source code.} +@pitfalls{ +This function does not use advanced formatting feature because it is a part of +components early in Rascal's bootstrapping and standard library construction cycle. +} module lang::rascal::format::Grammar import ParseTree; diff --git a/src/org/rascalmpl/library/lang/rascal/format/Rascal.rsc b/src/org/rascalmpl/library/lang/rascal/format/Rascal.rsc new file mode 100644 index 00000000000..402e6c10b39 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/format/Rascal.rsc @@ -0,0 +1,1099 @@ +@license{ +Copyright (c) 2022, NWO-I Centrum Wiskunde & Informatica (CWI) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +} +@synopsis{Composes a default formatter for Rascal modules} +@description{ +This module composes and describes a "standard" formatting style for Rascal. +There could be other styles of course. Other styles can be build by +writing different `toBox` rules. + +TODO's: + // * HOV instead of HV for constructor parameters (this is probably a good thing) + * global variable assignment indentation too deep (has todo with visibility keywords?) + * alias has no spacing around = + * `public` indents function declarations too deeply if there is no function body `;` (see also globals) + // * parse error in IO output: + // * multiple empty lines between function definitions in IO.rsc + // * copyFile has is=2? + // * missing brackets around parameters in final closure calls + * parameter lists of function calls go to vertical too soon + * single line comment at end of line should not go to its own line + * if brackets go vertical of call syntax, parameters also always go vertical. not necessary. + * "if" can get "\{" below it (HV?) in some corner cases. Not good. + * in Set ArithmeticException a string template ends with an unindented quote? + * too many spaces around = in kwparams default binding +} +@bootstrapParser +module lang::rascal::format::Rascal + +// by extending these modules we compose a `toBox` function +// which handles all relevant constructs of Rascal +extend lang::box::util::Tree2Box; +extend lang::rascal::\syntax::Rascal; + +import IO; +import ParseTree; +import String; +import analysis::diff::edits::ExecuteTextEdits; +import analysis::diff::edits::HiFiLayoutDiff; +import analysis::diff::edits::TextEdits; +import lang::box::\syntax::Box; +import lang::box::util::Box2Text; +import util::Formatters; +import util::Reflective; + +@synopsis{Format an entire Rascal file, in-place.} +void (loc) formatRascalFile = fileFormatter(#start[Module], toBox); +@synopsis{Format a Rascal module string} +str (str) formatRascalModule = stringFormatter(#start[Module], toBox); + +@synopsis{Format any Rascal module and dump the result as a string} +void debugFormatRascalFile(loc \module, bool console=false) { + debugFileFormat(#start[Module], toBox, \module, console=console, opts=formatOptions(is=8)); +} + +void testOnLibrary() { + debugFilesFormat( + #start[Module], + toBox, + |project://rascal/src/org/rascalmpl/library/|, + "rsc", + ansi=true, + shadowFiles=false, + appendFile=true, + console=false); +} + +/* Modules */ + +Box toBox(Toplevel* toplevels) = toClusterBox(toplevels); + +Box toBox((Module) ` module `) + = V(V(toBox(tags), + H(L("module"), toBox(name))), + toClusterBox(imports), + toBox(body), vs=1); + +Box toBox(Import* imports) = toClusterBox(imports); + +Box toBox((Import) `import ;`) + = H(L("import"), H0(toBox(m), L(";"))); + +Box toBox((Import) `extend ;`) + = H(L("extend"), H0(toBox(m), L(";"))); + +Box toBox((Visibility) ``) = NULL(); + +/* Syntax definitions */ + +Box toBox((SyntaxDefinition) ` syntax = ;`) + = (production is \all || production is \first) + ? V(H(toBox(st), L("syntax"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(toBox(st), L("syntax"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + +Box toBox((SyntaxDefinition) `lexical = ;`) + = (production is \all || production is \first) + ? V(H(L("lexical"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(L("lexical"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + +Box toBox((SyntaxDefinition) `keyword = ;`) + = (production is \all || production is \first) + ? V(H(L("keyword"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(L("keyword"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + +Box toBox((SyntaxDefinition) ` layout = ;`) + = (production is \all || production is \first) + ? V(H(toBox(v), L("layout"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(toBox(v), L("layout"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + + +Box toBox((Prod) ` | `) + = U(toBox(lhs), L("|"), toBox(rhs)); + +Box toBox((Prod) ` \> `) + = U(toBox(lhs), L("\>"), toBox(rhs)); + +Box toBox((Prod) `:`) + = H0(L(":"), toBox(n)); + +Box toBox((Prod) ` : `) + = H([toBox(modifiers), H0(toBox(name), L(":")), *[toBox(s) | s <- syms]]); + +Box toBox((Prod) ` `) + = H([toBox(modifiers), *[toBox(s) | s <- syms]]); + +Box toBox((Prod) ` ()`) + = H(toBox(a), HOV(G(L("("), U(toBox(g)), L(")"), gs=2, op=H([])))); + +/* symbols */ +Box toBox((Sym) `{ }*`) = H0(L("{"), H1(toBox(e), toBox(sep)), L("}"), L("*")); +Box toBox((Sym) `{ }+`) = H0(L("{"), H1(toBox(e), toBox(sep)), L("}"), L("+")); +Box toBox((Sym) `*`) = H0(toBox(e), L("*")); +Box toBox((Sym) `+`) = H0(toBox(e), L("+")); +Box toBox((Sym) `?`) = H0(toBox(e), L("?")); +Box toBox((Sym) `()`) = H0(L("("), L(")")); + +Box toBox((Sym) `( )`) + = H0(L("("), H1([toBox(first), *[toBox(e) | Sym e <- sequence]]),L(")")); + +Box toBox((Sym) `start[]`) = H0(L("start"), L("["), toBox(s), L("]")); + +Box toBox((Sym) `( | <{Sym "|"}+ alternatives>)`) + = H0(L("("), H1([toBox(first), *[L("|"), toBox(e) | Sym e <- alternatives]]),L(")")); + +Box toBox((Class) `[]`) + = H0([L("["), *[toBox(r) | r <- ranges], L("]")]); + +Box toBox((Range) ` - `) + = H0(toBox(s), L("-"), toBox(e)); + +/* Declarations */ + +Box toBox((QualifiedName) `<{Name "::"}+ names>`) + = L(""); + +Box toBox((Tag) `@ `) + = H0(L("@"), toBox(n), toBox(contents)); + +Box toBox((Tag) `@ = `) + = H0(L("@"), toBox(n), L("="), toBox(exp)); + +Box toBox((Tag) `@`) + = H0(L("@"), toBox(n)); + +Box toBox(QualifiedName n) = L(""); + +Box toBox((Declaration) ` alias = ;`) + = V(toBox(t), + H(toBox(v), L("alias"), toBox(user), L("="), H0(toBox(base), L(";")))); + +Box toBox((Declaration) ` data ;`) + = V(toBox(tg), + H(toBox(v), L("data"), H0(toBox(typ), toBox(ps), L(";")))); + +Box toBox((Declaration) ` data = ;`) + = HV(V(toBox(tg), + H(toBox(v), L("data"), H0(toBox(typ)), toBox(ps))), + I(H(L("="), H0(toBox(va), L(";"))))); + +Box toBox((Declaration) ` data = | <{Variant "|"}+ vs>;`) + = V(toBox(tg), + H(toBox(v), L("data"), H0(toBox(typ)), toBox(ps)), + I([G([ + L("="), + toBox(va), + *[L("|"), toBox(vax) | Variant vax <- vs] // hoist the bars `|` up to the same level of `=` + ]), L(";")])); + +Box toBox((Declaration) ` = ;`) + = HV( + V( + toBox(tags), + H1(toBox(visibility), toBox(typ), toBox(name)) + ), + I(HOV(G(L("="), U([toBox(initial)])))), L(";")); + +Box toBox((Declaration) ` , <{Variable ","}+ variables>;`) + = HV(V(toBox(tags), H1(toBox(visibility), toBox(typ))), I(HOV(H0(toBox(first), L(",")), SL([toBox(v) | v <- variables], L(",")))), L(";")); + +Box toBox((Declarator) ` `) + = H1(toBox(typ), toBox(name)); + +Box toBox((Declarator) ` = `) + = HV(H(toBox(typ), toBox(name)), I(toExpBox(L("="), initial))); + +Box toBox((Declarator) ` , <{Variable ","}+ variables>`) + = HV(I(HOV(H(toBox(typ), toBox(first)), L(","), SL([toBox(v) | v <- variables], L(","))))); + +Box toBox((CommonKeywordParameters) `(<{KeywordFormal ","}+ fs>)`) + = H0(L("("), HOV(toBox(fs)), L(")")); + +Box toBox((Variant) `(<{TypeArg ","}* args>, <{KeywordFormal ","}+ kws>)`) + = HV( + H0(toBox(n), L("(")), + HOV( + I(H0(toBox(args), L(","))), + I(toBox(kws)), hs=1), + L(")"), hs=0); + +Box toBox((Variant) `(<{TypeArg ","}* args>)`) + = HV(H0(toBox(n), L("(")), + I(toBox(args)), + L(")"), hs=0); + +Box toBox((Variant) `(<{TypeArg ","}* args> + '<{KeywordFormal ","}+ kws>)`) + = HV( + H0(toBox(n), L("(")), + HOV( + I(H0(toBox(args))), + I(toBox(kws)), hs=1 + ), + L(")"), hs=0); + +Box toBox(FunctionModifier* modifiers) = H([toBox(b) | b <- modifiers]); + +Box toBox((Signature) ` throws <{Type ","}+ exs>`) + = HOV([ + H(toBox(modifiers), toBox(typ), H0(toBox(name), L("("))), + G(toBox(parameters), gs=1, op=I()), + H([L(")"), L("throws"), SL([toBox(e) | e <- exs], L(","))], hs=1)], hs=0); + +Box toBox((Signature) ` `) + = HOV( + H(toBox(modifiers), toBox(typ), H0(toBox(name), L("("))), + G(toBox(parameters), gs=1, op=I()), + L(")"), + hs=0); + +Box toBox((FunctionDeclaration) ` ;`) + = V( + toBox(tags), + HOV( + toBox(vis), + H0(toBox(sig), L(";")) + ) + ); + +Box toBox((FunctionDeclaration) ` = ;`) + = V(toBox(tags), + HOV( + toBox(vis), + toBox(sig), + I(H(HOV(G(L("="), toBox(exp), gs=2, op=H())), L(";"), hs=0)))) + when !(exp is \visit || exp is voidClosure || exp is closure); + +Box toBox((FunctionDeclaration) ` = { };`) + = V(toBox(tags), + HOV( + toBox(vis), + toBox(sig), + I(HOV( + H(L("="), H0(toBox(typ), L("("))), + G(toBox(parameters), gs=1, op=I()), + H(L(")"), L("{")) + ))), + I(V(toBox(statements))), + H0(L("}"), L(";"))); + +Box toBox((FunctionDeclaration) ` = { };`) + = V(toBox(tags), + HOV( + toBox(vis), + toBox(sig), + I(HOV( + H(L("="), L("(")), + G(toBox(parameters), gs=1, op=I()), + H(L(")"), L("{")) + ))), + I(V(toBox(statements))), + H0(L("}"), L(";"))); + + +Box toBox((FunctionDeclaration) ` =