diff --git a/Strata/DDM/AST.lean b/Strata/DDM/AST.lean index dc5c2c337..607aa1da0 100644 --- a/Strata/DDM/AST.lean +++ b/Strata/DDM/AST.lean @@ -6,10 +6,10 @@ module public import Std.Data.HashMap.Basic +public import Strata.DDM.AST.Datatype public import Strata.DDM.Util.ByteArray public import Strata.DDM.Util.Decimal -public import Lean.Data.Position -public import Strata.DDM.AST.Datatype +public import Strata.DDM.Util.SourceRange import Std.Data.HashMap import all Strata.DDM.Util.Array @@ -263,122 +263,6 @@ public def flatten {α} (e : ExprF α) (prev : List (ArgF α) := []) : ExprF α end ExprF -/-- -Source location information in the DDM is defined -by a range of bytes in a UTF-8 string with the input -Line/column information can be construced from a -`Lean.FileMap` - -As an example, in the string `"123abc\ndef"`, the string -`"abc"` has the position `{start := 3, stop := 6 }` while -`"def"` has the position `{start := 7, stop := 10 }`. --/ -structure SourceRange where - /-- The starting offset of the source range. -/ - start : String.Pos.Raw - /-- One past the end of the range. -/ - stop : String.Pos.Raw -deriving DecidableEq, Inhabited, Repr - -namespace SourceRange - -def none : SourceRange := { start := 0, stop := 0 } - -def isNone (loc : SourceRange) : Bool := loc.start = 0 ∧ loc.stop = 0 - -instance : ToFormat SourceRange where - format fr := f!"{fr.start}-{fr.stop}" - -end SourceRange - -inductive Uri where - | file (path: String) - deriving DecidableEq, Repr, Inhabited - -instance : ToFormat Uri where - format fr := match fr with | .file path => path - -structure FileRange where - file: Uri - range: Strata.SourceRange - deriving DecidableEq, Repr, Inhabited - -instance : ToFormat FileRange where - format fr := f!"{fr.file}:{fr.range}" - -/-- A default file range for errors without source location. -This should only be used for generated nodes that are guaranteed to be correct. -/ -def FileRange.unknown : FileRange := - { file := .file "", range := SourceRange.none } - -/-- A diagnostic model that holds a file range and a message. - This can be converted to a formatted string using a FileMap. -/ -structure DiagnosticModel where - fileRange : FileRange - message : String - deriving Repr, BEq, Inhabited - -instance : Inhabited DiagnosticModel where - default := { fileRange := FileRange.unknown, message := "" } - -/-- Create a DiagnosticModel from just a message (using default location). -This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/ -def DiagnosticModel.fromMessage (msg : String) : DiagnosticModel := - { fileRange := FileRange.unknown, message := msg } - -/-- Create a DiagnosticModel from a Format (using default location). -This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/ -def DiagnosticModel.fromFormat (fmt : Std.Format) : DiagnosticModel := - { fileRange := FileRange.unknown, message := toString fmt } - -/-- Create a DiagnosticModel with source location. -/ -def DiagnosticModel.withRange (fr : FileRange) (msg : Format) : DiagnosticModel := - { fileRange := fr, message := toString msg } - -/-- Format a file range using a FileMap to convert byte offsets to line/column positions. -/ -def FileRange.format (fr : FileRange) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format := - let baseName := match fr.file with - | .file path => (path.splitToList (· == '/')).getLast! - match fileMap with - | some fm => - let startPos := fm.toPosition fr.range.start - let endPos := fm.toPosition fr.range.stop - if includeEnd? then - if startPos.line == endPos.line then - f!"{baseName}({startPos.line},({startPos.column}-{endPos.column}))" - else - f!"{baseName}(({startPos.line},{startPos.column})-({endPos.line},{endPos.column}))" - else - f!"{baseName}({startPos.line}, {startPos.column})" - | none => - if fr.range.isNone then - f!"" - else - f!"{baseName}({fr.range.start}-{fr.range.stop})" - -/-- Format a DiagnosticModel using a FileMap to convert byte offsets to line/column positions. -/ -def DiagnosticModel.format (dm : DiagnosticModel) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format := - let rangeStr := dm.fileRange.format fileMap includeEnd? - if dm.fileRange.range.isNone then - f!"{dm.message}" - else - f!"{rangeStr} {dm.message}" - -/-- Format just the file range portion of a DiagnosticModel. -/ -def DiagnosticModel.formatRange (dm : DiagnosticModel) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format := - dm.fileRange.format fileMap includeEnd? - -/-- Update the file range of a DiagnosticModel if it's currently unknown. -This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/ -def DiagnosticModel.withRangeIfUnknown (dm : DiagnosticModel) (fr : FileRange) : DiagnosticModel := - if dm.fileRange.range.isNone then - { dm with fileRange := fr } - else - dm - -instance : ToString DiagnosticModel where - toString dm := dm.format none |> toString - abbrev Arg := ArgF SourceRange abbrev Expr := ExprF SourceRange abbrev Operation := OperationF SourceRange @@ -1739,7 +1623,7 @@ private partial def OperationF.foldBindingSpecs {α β} | some lvl => foldOverArgAtLevel m f init argDecls argsV lvl decl.newBindings.foldl (init := init) fun a b => f a op.ann b argsV else - @panic _ ⟨init⟩ "Expected arguments to match bindings" + @panic _ ⟨init⟩ s!"{op.name} expects {argDecls.size} arguments when {args.size} provided." | _ => @panic _ ⟨init⟩ s!"Unknown operation {op.name}" /-- diff --git a/Strata/DDM/Format.lean b/Strata/DDM/Format.lean index d5c08d22b..b63365bf9 100644 --- a/Strata/DDM/Format.lean +++ b/Strata/DDM/Format.lean @@ -133,10 +133,10 @@ structure FormatState where namespace FormatState /-- A format context that uses no syntactic sugar. -/ -private def empty : FormatState where +def empty : FormatState where openDialects := {} -private instance : Inhabited FormatState where +instance : Inhabited FormatState where default := .empty def pushBinding (s : FormatState) (ident : String) : FormatState := diff --git a/Strata/DDM/Ion.lean b/Strata/DDM/Ion.lean index 805022b03..c571f4397 100644 --- a/Strata/DDM/Ion.lean +++ b/Strata/DDM/Ion.lean @@ -603,7 +603,7 @@ private protected def ArgF.fromIon {α} [FromIon α] (v : Ion SymbolId) : FromIo | "ident" => let ⟨p⟩ ← .checkArgCount "ident" sexp 3 .ident <$> fromIon sexp[1] - <*> .asString "Identifier value" sexp[2] + <*> .asSymbolString "Identifier value" sexp[2] | "num" => let ⟨p⟩ ← .checkArgCount "num" sexp 3 let ann ← fromIon sexp[1] @@ -1457,7 +1457,10 @@ def fromIonFragment (f : Ion.Fragment) commands := ← fromIonFragmentCommands f } -def fileFromIon (dialects : DialectMap) (dialect : DialectName) (bytes : ByteArray) : Except String Strata.Program := do +/-- +Decodes bytes in the Ion format into a single Strata program. +-/ +def fromIon (dialects : DialectMap) (dialect : DialectName) (bytes : ByteArray) : Except String Strata.Program := do let (hdr, frag) ← match Strata.Ion.Header.parse bytes with | .error msg => diff --git a/Strata/DDM/Util/Fin.lean b/Strata/DDM/Util/Fin.lean index 16a5b5e86..f0894c110 100644 --- a/Strata/DDM/Util/Fin.lean +++ b/Strata/DDM/Util/Fin.lean @@ -35,3 +35,10 @@ end Range def range (n : Nat) : Range n := .mk end Fin + +public section +namespace Strata.Fin +export _root_.Fin(Range range) +end Strata.Fin + +end diff --git a/Strata/DDM/Util/SourceRange.lean b/Strata/DDM/Util/SourceRange.lean new file mode 100644 index 000000000..1d584740a --- /dev/null +++ b/Strata/DDM/Util/SourceRange.lean @@ -0,0 +1,38 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +module + +public section +namespace Strata + +/-- +Source location information in the DDM is defined +by a range of bytes in a UTF-8 string with the input +Line/column information can be construced from a +`Lean.FileMap` + +As an example, in the string `"123abc\ndef"`, the string +`"abc"` has the position `{start := 3, stop := 6 }` while +`"def"` has the position `{start := 7, stop := 10 }`. +-/ +structure SourceRange where + /-- The starting offset of the source range. -/ + start : String.Pos.Raw + /-- One past the end of the range. -/ + stop : String.Pos.Raw +deriving DecidableEq, Inhabited, Repr + +namespace SourceRange + +def none : SourceRange := { start := 0, stop := 0 } + +def isNone (loc : SourceRange) : Bool := loc.start = 0 ∧ loc.stop = 0 + +instance : Std.ToFormat SourceRange where + format fr := f!"{fr.start}-{fr.stop}" + +end Strata.SourceRange +end diff --git a/Strata/DL/Imperative/MetaData.lean b/Strata/DL/Imperative/MetaData.lean index ce1b84ca0..13d1c6cf4 100644 --- a/Strata/DL/Imperative/MetaData.lean +++ b/Strata/DL/Imperative/MetaData.lean @@ -6,8 +6,7 @@ import Strata.DL.Imperative.PureExpr import Strata.DL.Util.DecidableEq -import Strata.DDM.AST -import Lean.Data.Position +import Strata.Util.FileRange namespace Imperative open Strata (DiagnosticModel FileRange) diff --git a/Strata/DL/Lambda/Identifiers.lean b/Strata/DL/Lambda/Identifiers.lean index 569638011..33baf2515 100644 --- a/Strata/DL/Lambda/Identifiers.lean +++ b/Strata/DL/Lambda/Identifiers.lean @@ -4,11 +4,9 @@ SPDX-License-Identifier: Apache-2.0 OR MIT -/ - - import Strata.DL.Lambda.LTy import Strata.DL.Util.Map -import Strata.DDM.AST +import Strata.Util.FileRange --------------------------------------------------------------------- diff --git a/Strata/Languages/Python/PythonDialect.lean b/Strata/Languages/Python/PythonDialect.lean index a9996a7f1..59386bc87 100644 --- a/Strata/Languages/Python/PythonDialect.lean +++ b/Strata/Languages/Python/PythonDialect.lean @@ -7,5 +7,130 @@ import Strata.DDM.Integration.Lean namespace Strata.Python #load_dialect "../../../Tools/Python/dialects/Python.dialect.st.ion" + #strata_gen Python + +instance {α} [Inhabited α] : ToString (Strata.Python.constant α) where + toString s := toString (eformat s.toAst) + +deriving instance DecidableEq for operator + +def alias.ann {α} : alias α → α +| .mk_alias ann _ _ => ann + +def alias.name {α} : alias α → String +| .mk_alias _ name _ => name.val + +def alias.asname {α} : alias α → Option String +| .mk_alias _ _ asname => asname.val.map (·.val) + +def constant.ann {α} : constant α → α +| ConTrue ann .. => ann +| ConFalse ann .. => ann +| ConPos ann .. => ann +| ConNeg ann .. => ann +| ConString ann .. => ann +| ConFloat ann .. => ann +| ConComplex ann .. => ann +| ConNone ann .. => ann +| ConEllipsis ann .. => ann +| ConBytes ann .. => ann + +def keyword.ann {α} : keyword α → α +| .mk_keyword ann _ _ => ann + +def keyword.arg {α} : keyword α → Ann (Option (Ann String α)) α +| .mk_keyword _ arg _ => arg + +def keyword.value {α} : keyword α → expr α +| .mk_keyword _ _ value => value + +namespace int + +def value {α} (i : int α) : Int := + match i with + | IntPos _ n => n.val + | IntNeg _ n => - (n.val : Int) + +end int + + +namespace expr + +def ann {α} : expr α → α +| BoolOp a .. => a +| NamedExpr a .. => a +| BinOp a .. => a +| UnaryOp a .. => a +| Lambda a .. => a +| IfExp a .. => a +| Dict a .. => a +| Set a .. => a +| ListComp a .. => a +| SetComp a .. => a +| DictComp a .. => a +| GeneratorExp a .. => a +| Await a .. => a +| Yield a .. => a +| YieldFrom a .. => a +| Compare a .. => a +| Call a .. => a +| FormattedValue a .. => a +| Interpolation a .. => a +| JoinedStr a .. => a +| TemplateStr a .. => a +| Constant a .. => a +| Attribute a .. => a +| Subscript a .. => a +| Starred a .. => a +| Name a .. => a +| List a .. => a +| Tuple a .. => a +| Slice a .. => a + +end expr + +namespace stmt + +def ann {α} : stmt α → α +| AnnAssign a .. => a +| Assert a .. => a +| Assign a .. => a +| AsyncFor a .. => a +| AsyncFunctionDef a .. => a +| AsyncWith a .. => a +| AugAssign a .. => a +| Break a .. => a +| ClassDef a .. => a +| Continue a .. => a +| Delete a .. => a +| Expr a .. => a +| For a .. => a +| FunctionDef a .. => a +| Global a .. => a +| If a .. => a +| Import a .. => a +| ImportFrom a .. => a +| Match a .. => a +| Nonlocal a .. => a +| Pass a .. => a +| Raise a .. => a +| Return a .. => a +| Try a .. => a +| TryStar a .. => a +| TypeAlias a .. => a +| While a .. => a +| With a .. => a + +end stmt + +instance {α} [Inhabited α] : ToString (Strata.Python.expr α) where + toString s := toString (eformat s.toAst) + +def stmt.format {α} [Inhabited α] : ToString (Strata.Python.stmt α) where + toString s := toString (mformat s.toAst (.ofDialects Python_map) .empty |>.format) + +instance {α} [Inhabited α] : ToString (Strata.Python.stmt α) where + toString s := toString (mformat s.toAst (.ofDialects Python_map) .empty |>.format) + end Strata.Python diff --git a/Strata/Languages/Python/PythonToCore.lean b/Strata/Languages/Python/PythonToCore.lean index ff560bae9..14dffdeb2 100644 --- a/Strata/Languages/Python/PythonToCore.lean +++ b/Strata/Languages/Python/PythonToCore.lean @@ -78,11 +78,6 @@ def toPyCommands (a : Array Operation) : Array (Python.Command SourceRange) := | .error e => panic! s!"Failed to translate to Python.Command: {e}" | .ok cmd => cmd) -def unwrapModule (c : Python.Command SourceRange) : Array (Python.stmt SourceRange) := - match c with - | Python.Command.Module _ body _ => body.val - | _ => panic! "Expected module" - def strToCoreExpr (s: String) : Core.Expression.Expr := .strConst () s @@ -742,10 +737,7 @@ def PyClassDefToCore (s: Python.stmt SourceRange) (translation_ctx: TranslationC .proc (pythonFuncToCore (c_name.val++"_"++name) args body ret default translation_ctx)), {name := c_name.val}) | _ => panic! s!"Expected function def: {repr s}" -def pythonToCore (signatures : Python.Signatures) (pgm: Strata.Program): Core.Program := - let pyCmds := toPyCommands pgm.commands - assert! pyCmds.size == 1 - let insideMod := unwrapModule pyCmds[0]! +def pythonToCore (signatures : Python.Signatures) (insideMod : Array (Python.stmt SourceRange)) : Core.Program := let func_defs := insideMod.filter (λ s => match s with | .FunctionDef _ _ _ _ _ _ _ _ => true | _ => false) diff --git a/Strata/Languages/Python/ReadPython.lean b/Strata/Languages/Python/ReadPython.lean new file mode 100644 index 000000000..e33fbe37d --- /dev/null +++ b/Strata/Languages/Python/ReadPython.lean @@ -0,0 +1,121 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +import Strata.Languages.Python.PythonDialect + +namespace Strata.Python + +def readPythonStrataBytes (strataPath : String) (bytes : ByteArray) : Except String (Array (Strata.Python.stmt Strata.SourceRange)) := do + if ! Ion.isIonFile bytes then + throw <| s!"{strataPath} is not an Ion file." + match Strata.Program.fromIon Strata.Python.Python_map Strata.Python.Python.name bytes with + | .ok pgm => + let pyCmds ← pgm.commands.mapM fun cmd => + match Strata.Python.Command.ofAst cmd with + | .error msg => + throw s!"Error reading {strataPath}: {msg}" + | .ok r => pure r + let isTrue p := inferInstanceAs (Decidable (pyCmds.size = 1)) + | throw s!"Error reading {strataPath}: Expected Python module" + let .Module _ stmts _ := pyCmds[0] + | throw s!"Error reading {strataPath}: Expected Python module" + pure stmts.val + | .error msg => + throw s!"Error reading {strataPath}: {msg}" + +def formatParseFailureStderr (stderr : String) : Option String := do + match stderr.find? "Parse failure:\n" with + | some idx => + match idx.find? "\n" with + | some newLinePos => + let subs : Substring.Raw := { + str := stderr + startPos := newLinePos.offset + stopPos := stderr.rawEndPos + } + some subs.trimLeft.toString + | none => none + | none => none + +/-- +This runs `python -m strata.gen py_to_strata` to convert a +Python file into a Strata file, and then reads it in. + +This function if the environment isn't configured correctly +or the Python file cannot be parsed. +-/ +def pythonToStrata (dialectFile pythonFile : System.FilePath) : + EIO String (Array (Strata.Python.stmt Strata.SourceRange)) := do + let (_handle, strataFile) ← + match ← IO.FS.createTempFile |>.toBaseIO with + | .ok p => pure p + | .error msg => + throw s!"Cannot create temporary file: {msg}" + try + let spawnArgs : IO.Process.SpawnArgs := { + cmd := "python" + args := #["-m", "strata.gen", "py_to_strata", + "--dialect", dialectFile.toString, + pythonFile.toString, + strataFile.toString + ] + cwd := none + inheritEnv := true + stdin := .null + stdout := .piped + stderr := .piped + } + let child ← + match ← IO.Process.spawn spawnArgs |>.toBaseIO with + | .ok c => pure c + | .error msg => throw s!"Could not run Python: {msg}" + let stdout ← IO.asTask child.stdout.readToEnd Task.Priority.dedicated + let stderr ← + match ← child.stderr.readToEnd |>.toBaseIO with + | .ok c => pure c + | .error msg => throw s!"Could not read stderr from Python: {msg}" + let exitCode ← + match ← child.wait |>.toBaseIO with + | .ok c => pure c + | .error msg => throw s!"Could not wait for process exit code: {msg}" + let stdout ← + match stdout.get with + | .ok c => pure c + | .error msg => throw s!"Could not read stdout: {msg}" + if exitCode = 100 then + if let some msg := formatParseFailureStderr stderr then + throw <| s!"{pythonFile} parse error:\n {msg}" + if exitCode ≠ 0 then + let msg := s!"Internal: Python translation failed (exitCode = {exitCode})\n" + let msg := s!"{msg}Standard output:\n" + let msg := stdout.splitOn.foldl (init := msg) fun msg ln => s!"{msg}. {ln}\n" + let msg := s!"{msg}Standard error:\n" + let msg := stderr.splitOn.foldl (init := msg) fun msg ln => s!"{msg}. {ln}\n" + throw <| msg + let bytes ← + match ← IO.FS.readBinFile strataFile |>.toBaseIO with + | .ok b => pure b + | .error msg => + throw <| s!"Error reading Strata temp file {strataFile}: {msg}" + match readPythonStrataBytes strataFile.toString bytes with + | .ok stmts => pure stmts + | .error msg => throw msg + finally + match ← IO.FS.removeFile strataFile |>.toBaseIO with + | .ok () => pure () + | .error msg => throw s!"Internal: Error deleting temp file {strataFile}: {msg}" + +def readPythonStrata (strataPath : String) : EIO String (Array (Strata.Python.stmt Strata.SourceRange)) := do + let bytes ← + match ← IO.FS.readBinFile strataPath |>.toBaseIO with + | .ok b => + pure b + | .error msg => + throw <| s!"Error reading {strataPath}: {msg}" + match readPythonStrataBytes strataPath bytes with + | .ok r => pure r + | .error msg => throw msg + +end Strata.Python diff --git a/Strata/Languages/Python/Specs.lean b/Strata/Languages/Python/Specs.lean new file mode 100644 index 000000000..f4b11d74c --- /dev/null +++ b/Strata/Languages/Python/Specs.lean @@ -0,0 +1,970 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +import Strata.Languages.Python.Specs.DDM +import Strata.Languages.Python.ReadPython +import Strata.DDM.Util.Fin + +namespace Strata.Python.Specs + +def decideProp (p : Prop) [h : Decidable p] : Decidable p := h + +abbrev EventType := String +def importEvent : EventType := "import" + +def logEvent (event : EventType) (message : String) : BaseIO Unit := do + if event = importEvent then + let _ ← IO.eprintln s!"[{event}]: {message}" |>.toBaseIO + pure () + +inductive Pred where +| const (b : Bool) + +namespace Pred + +def not (p : Pred) : Pred := + match p with + | .const b => .const (¬ b) + +end Pred + +inductive Iterable where +| list + + + +structure SpecError where + file : System.FilePath + loc : Strata.SourceRange + message : String + +structure ModuleName where + components : Array String + componentsSizePos : components.size > 0 + +namespace ModuleName + +def ofStringAux {mod : String} (a : Array String) (start cur : String.ValidPos mod) : Except String ModuleName := + if h : cur.IsAtEnd then + let r := start.extract (e := cur) + pure { + components := a.push r + componentsSizePos := by simp + } + else + let c := cur.get h + if _ : c = '.' then + let r := start.extract (e := cur) + let next := cur.next h + ofStringAux (a.push r) next next + else + let next := cur.next h + ofStringAux a start next + termination_by cur + +def ofString (mod : String) : Except String ModuleName := + ofStringAux #[] mod.startValidPos mod.startValidPos + +instance : ToString ModuleName where + toString m := + let p : m.components.size > 0 := m.componentsSizePos + m.components.foldl (init := m.components[0]) (start := 1) fun s c => s!"{s}.{c}" + +def foldlDirs {α} (mod : ModuleName) (init : α) (f : α → String → α) : α := + mod.components.foldl (init := init) (stop := mod.components.size - 1) f + +def foldlMDirs {α m} [Monad m] (mod : ModuleName) (init : α) (f : α → String → m α) : m α := do + mod.components.foldlM (init := init) (stop := mod.components.size - 1) f + +def fileRoot (mod : ModuleName) : String := + let p := mod.componentsSizePos + mod.components.back + +def findInPath (mod : ModuleName) (searchPath : System.FilePath) : EIO String System.FilePath := do + let findComponent path comp := do + let newPath := path / comp + if !(← newPath.isDir) then + throw s!"Directory {newPath} not found" + return newPath + let searchPath ← mod.foldlMDirs (init := searchPath) findComponent + let file := searchPath / s!"{mod.fileRoot}.py" + match ← file.metadata |>.toBaseIO with + | .error err => + throw s!"{file} not found: {err}" + | .ok md => + if md.type != .file then + throw s!"{file} is not a regular file." + pure file + +def strataDir (mod : ModuleName) (root : System.FilePath) : System.FilePath := + mod.foldlDirs (init := root) fun d c => d / c + +def strataFileName (mod : ModuleName) : String := s!"{mod.fileRoot}.pyspec.st.ion" + +end ModuleName + +inductive SpecValue +| boolConst (b : Bool) +| dictValue (a : Array (String × SpecValue)) +| intConst (loc : SourceRange) (s : Int) +| metaType (name : MetadataType) +| noneConst +| stringConst (loc : SourceRange) (s : String) +| tuple (elts : Array SpecValue) +| typingOverload +| typingTypedDict +| typeValue (type : SpecType) +deriving Inhabited, Repr + +structure TypeDecl where + ident : PythonIdent + value : SpecValue + +/-- + map from python identifiers to the numbe +-/ +structure TypeSignature where + rank : Std.HashMap String (Option (Std.HashMap String SpecValue)) + +namespace TypeSignature + +protected def ofList (l : List TypeDecl) : TypeSignature where + rank := l.foldl (init := {}) fun m d => + m.alter d.ident.pythonModule fun r => + match r with + | .none => some <| some <| .ofList [(d.ident.name, d.value)] + | .some none => .some none + | .some (some m) => m |>.insert d.ident.name d.value + +protected def insert (sig : TypeSignature) (name : String) (m : Option (Std.HashMap String SpecValue)) := + { sig with rank := sig.rank.insert name m } + +end TypeSignature + +def typeIdent (tp : PythonIdent) : SpecValue := .typeValue (.ident tp) + +def preludeSig := + TypeSignature.ofList [ + .mk .builtinsBool (typeIdent .builtinsBool), + .mk .builtinsBytes (typeIdent .builtinsBytes), + .mk .builtinsDict (typeIdent .builtinsDict), + .mk .builtinsFloat (typeIdent .builtinsFloat), + .mk .builtinsInt (typeIdent .builtinsInt), + .mk .builtinsStr (typeIdent .builtinsStr), + .mk .noneType (typeIdent .noneType), + + .mk .typingAny (typeIdent .typingAny), + .mk .typingDict (.metaType .typingDict), + .mk .typingGenerator (.metaType .typingGenerator), + .mk .typingList (.metaType .typingList), + .mk .typingLiteral (.metaType .typingLiteral), + .mk .typingMapping (.metaType .typingMapping), + .mk .typingOverload .typingOverload, + .mk .typingTypedDict .typingTypedDict, + .mk .typingUnion (.metaType .typingUnion), + ] + +inductive ClassRef where +| unresolved (range : SourceRange) +| resolved + +/-- Callback that takes a module name and provides filepath to module -/ +abbrev ModuleReader := ModuleName → EIO String System.FilePath + +structure PySpecContext where + /-- Path to Python dialect. -/ + dialectFile : System.FilePath + /-- Path of current Python file being read. -/ + pythonFile : System.FilePath + /-- Path to write Strata files to. -/ + strataDir : System.FilePath + /-- Callback that takes a module name and provides filepath to module -/ + moduleReader : ModuleReader + +structure PySpecState where + typeSigs : TypeSignature := preludeSig + errors : Array SpecError := #[] + /-- + This maps global identifiers to their value. + -/ + nameMap : Std.HashMap String SpecValue := + SpecType.preludeAtoms.foldl (init := {}) fun m (nm, tp) => + m.insert nm (.typeValue tp) + typeReferences : Std.HashMap String ClassRef := {} + +class PySpecMClass (m : Type → Type) where + specError (loc : SourceRange) (message : String) : m Unit + runChecked {α} (act : m α) : m (Bool × α) + +export PySpecMClass (specError runChecked) + +abbrev PySpecM := ReaderT PySpecContext (StateT PySpecState BaseIO) + +def specErrorAt (file : System.FilePath) (loc : SourceRange) (message : String) : PySpecM Unit := do + let e : SpecError := { file, loc, message } + modify fun s => { s with errors := s.errors.push e } + +instance : PySpecMClass PySpecM where + specError loc message := do + specErrorAt (←read).pythonFile loc message + runChecked act := do + let cnt := (←get).errors.size + let r ← act + let new_cnt := (←get).errors.size + return (cnt = new_cnt, r) + +def getNameValue? (id : String) : PySpecM (Option SpecValue) := + return (←get).nameMap[id]? + +def setNameValue (id : String) (v : SpecValue) : PySpecM Unit := + modify $ fun s => { s with nameMap := s.nameMap.insert id v } + +def recordTypeDef (loc : SourceRange) (cl : String) : PySpecM Unit := do + match (←get).typeReferences[cl]? with + | some .resolved => + specError loc s!"Class {cl} already defined." + | _ => + modify fun s => { s with + typeReferences := s.typeReferences.insert cl .resolved + } + +def recordTypeRef (loc : SourceRange) (cl : String) : PySpecM Unit := do + modify fun s => { s with + typeReferences := s.typeReferences.insertIfNew cl (.unresolved loc) + } + +/-- +Type for converting AST expressions to PySpec types +-/ +structure TypeTranslator where + callback : SourceRange -> SpecValue -> PySpecM SpecType + +def checkEq {α : Type} (loc : SourceRange) (name : String) (as : Array α) (n : Nat) : + PySpecM (Option (PULift.{1, 0} (as.size = n))) := + match inferInstanceAs (Decidable (as.size = n)) with + | .isTrue p => + pure (some ⟨p⟩) + | .isFalse _ => do + specError loc s!"{name} expects {n} arguments." + pure none + +def valueAsType (loc : SourceRange) (v : SpecValue) : PySpecM SpecType := do + match v with + | .typeValue itp => + pure itp + | .noneConst => + return .ofAtom .noneType + | .stringConst loc val => + recordTypeRef loc val + return .ofAtom (.pyClass val #[]) + | _ => + specError loc s!"Expected type instead of {repr v}." + return default + +def fixedTranslator (t : PreludeType) (arity : Nat) : TypeTranslator where + callback := fun loc arg => do + if arity = 1 then + let tp ← valueAsType loc arg + return .ident t.ident #[tp] + else + let .tuple args := arg + | specError loc s!"Expected multiple args instead of {repr arg}."; return default + let some ⟨_⟩ ← checkEq loc (toString t) args arity + | return default + let args ← args.mapM (valueAsType loc) + return .ident t.ident args + +def dictTranslator : TypeTranslator := fixedTranslator .typingDict 2 + +def listTranslator : TypeTranslator := fixedTranslator .typingList 1 + +def mappingTranslator := fixedTranslator .typingMapping 2 + +def generatorTranslator := fixedTranslator .typingGenerator 3 + +def unionTranslator : TypeTranslator where + callback := fun loc arg => do + let .tuple args := arg + | specError loc s!"Union expects tuple"; return default + let .isTrue argsP := decideProp (args.size > 0) + | specError loc s!"Union expects at least one argument."; return default + let tp ← valueAsType loc args[0] + args.foldlM (start := 1) (init := tp) fun tp v => do + return tp ||| (← valueAsType loc v) + +def literalTranslator : TypeTranslator where + callback := fun loc arg => do + let args := + match arg with + | .tuple args => args + | arg => #[arg] + let .isTrue _ := decideProp (args.size > 0) + | specError loc s!"Union expects at least one argument."; return default + let trans (v : SpecValue) : PySpecM SpecAtomType := do + match v with + | .intConst _ n => + pure <| .intLiteral n + | .stringConst _ s => + pure <| .stringLiteral s + | _ => + specError loc s!"Unsupported literal value {repr v}." + pure default + return .ofArray (← args.mapM trans) + +def metadataProcessor : MetadataType → TypeTranslator +| .typingDict => dictTranslator +| .typingGenerator => generatorTranslator +| .typingList => listTranslator +| .typingLiteral => literalTranslator +| .typingMapping => mappingTranslator +| .typingUnion => unionTranslator + +def translateCall (loc : SourceRange) (func : SpecValue) + (args : Array SpecValue) (kwargs : Array (Option String × SpecValue)) + : PySpecM SpecValue := do + match func with + | .typingTypedDict => + let .isTrue argsSizep := decideProp (args.size = 2) + | specError loc "TypedDict expects two arguments"; return default + let .isTrue kwargsSizep := decideProp (kwargs.size = 1) + | specError loc "TypedDict expects one keyword"; return default + let (some "total", totalValue) := kwargs[0] + | specError loc "TypedDict expects total"; return default + let .boolConst total := totalValue + | specError loc "TypedDict expects total bool"; return default + let .stringConst _ name := args[0] + | specError loc "TypedDict expects string contant"; return default + let .dictValue fieldsPairs := args[1] + | specError loc "TypedDict expects dictionary fields"; return default + let fields := fieldsPairs |>.map (·.fst) + let values ← fieldsPairs |>.mapM fun (_name, v) => do + valueAsType loc v + return .typeValue <| .ofAtom <| .typedDict fields values total + | _ => + specError loc s!"Unknown call {repr func}." + return default + + +def translateConstant (value : constant SourceRange) : PySpecM SpecValue := do + match value with + | .ConFalse .. => + return .boolConst false + | .ConTrue .. => + return .boolConst true + | .ConNone _ => + return .noneConst + | .ConPos _ n => + return .intConst n.ann (Int.ofNat n.val) + | .ConNeg _ n => + return .intConst n.ann (Int.negOfNat n.val) + | .ConString _ name => + return .stringConst name.ann name.val + | _ => + specError value.ann s!"Could not interpret constant {value}" + return default + +def translateSubscript (paramLoc : SourceRange) (paramType : String) (sargs : SpecValue) : PySpecM SpecValue := do + match ← getNameValue? paramType with + | none => + specError paramLoc s!"Unknown parameterized type {paramType}." + return default + | some (.typeValue tpp) => + let .isTrue tpp_sizep := inferInstanceAs (Decidable (tpp.atoms.size = 1)) + | specError paramLoc s!"Expected type name" + return default + let tpa := tpp.atoms[0] + let .ident tpId tpParams := tpa + | specError paramLoc "Expected an identifier" + return default + if tpId == .builtinsDict ∧ tpParams.size = 0 then + .typeValue <$> dictTranslator.callback paramLoc sargs + else + specError paramLoc s!"Unsupported type {repr tpId}" + return default + | some (.metaType tpId) => + let t := metadataProcessor tpId + .typeValue <$> t.callback paramLoc sargs + | some _ => + specError paramLoc s!"Expected {paramType} to be a type." + return default + +def translateDictKey (loc : SourceRange) (mk : opt_expr SourceRange) : PySpecM String := do + let .some_expr _ k := mk + | specError loc s!"Dict key missing"; return default + match k with + | .Constant _ (.ConString _ key) _ => + pure key.val + | _ => + specError loc s!"Dict key value mismatch" + return default + +mutual + +def pyKeywordValue (k : keyword SourceRange) : PySpecM (Option String × SpecValue) := do + let arg : Option String := + match k.arg.val with + | none => none + | some e => e.val + pure (arg, ← pySpecValue k.value) +termination_by 2 * sizeOf k +decreasing_by + cases k + simp [keyword.value] + decreasing_tactic + +def pySpecValue (expr : expr SourceRange) : PySpecM SpecValue := do + match h : expr with + | .BinOp loc x op y => do + match op with + | .BitOr _ => + return .typeValue <| (← pySpecType x) ||| (← pySpecType y) + | _ => + specError loc s!"Unsupported binary operator {repr op}" + return default + | .Call loc pyFunc ⟨_, pyArgs⟩ ⟨_, pyKeywords⟩ => + let (success, (func, args, kwargs)) ← runChecked <| do + let func ← pySpecValue pyFunc + let args ← pyArgs.attach.mapM fun ⟨e, em⟩ => pySpecValue e + let kwargs ← pyKeywords.attach.mapM fun ⟨k, km⟩ => pyKeywordValue k + return (func, args, kwargs) + if success then + translateCall loc func args kwargs + else + return default + | .Constant _ value kind => + assert! kind.val.isNone + translateConstant value + | .Dict loc ⟨_, keys⟩ ⟨_, values⟩ => + let .isTrue size_eq := inferInstanceAs (Decidable (keys.size = values.size)) + | specError loc s!"Dict key value mismatch"; return default + let m : Array (String × SpecValue) ← Array.ofFnM fun (⟨i, _⟩ : Fin keys.size) => do + let key ← translateDictKey loc keys[i] + let v ← pySpecValue values[i] + pure ⟨key, v⟩ + return .dictValue m + | .Name _ ident (.Load _) => + let some v := ←getNameValue? ident.val + | specError expr.ann s!"Unknown identifier {ident.val}."; return default + pure v + | .Subscript _ (.Name paramLoc ⟨_, paramType⟩ (.Load _)) subscriptArgs _ => + let (success, sargs) ← runChecked <| pySpecValue subscriptArgs + if success then + translateSubscript paramLoc paramType sargs + else + pure default + | .Tuple ann ⟨_, pyElts⟩ _ctx => + let elts ← pyElts.attach.mapM fun ⟨e, em⟩ => pySpecValue e + return .tuple elts + | _ => + specError expr.ann s!"Could not interpret {expr}" + return default +termination_by 2 * sizeOf expr +decreasing_by + · decreasing_tactic + · decreasing_tactic + · decreasing_tactic + · decreasing_tactic + · decreasing_tactic + · decreasing_tactic + · decreasing_tactic + · decreasing_tactic + +def pySpecType (e : expr SourceRange) : PySpecM SpecType := do + let (success, v) ← runChecked <| pySpecValue e + if success then + valueAsType e.ann v + else + return default +termination_by 2 * sizeOf e + 1 + +end + +-- Check expression is compatible with value +def pyDefaultValue (val : expr SourceRange) (_tp : SpecType) : PySpecM Unit := do + match val with + | .Constant _ c _ => + match c with + | .ConNone _ => + pure () + | _ => + specError val.ann s!"Unexpected value {toString val}" + | _ => + specError val.ann s!"Unexpected value {toString val}" + +def pySpecArg (usedNames : Std.HashSet String) + (selfType : Option String) + (arg : Strata.Python.arg Strata.SourceRange) + (de : Option (expr SourceRange)) : PySpecM Arg := do + let .mk_arg loc name ⟨_typeLoc, type⟩ comment := arg + if name.val ∈ usedNames then + specError name.ann s!"Argument {name.val} already declared." + assert! !loc.isNone + assert! _typeLoc.isNone + let tp ← + match selfType with + | none => + match type with + | none => + specError loc s!"Missing argument to {name.val}" + pure default + | some tp => pySpecType tp + | some cl => + if type.isSome then + specError loc s!"Unexpected argument to {name.val}" + pure <| .pyClass cl #[] + assert! comment.val.isNone + let hasDefault ← + match de with + | none => + pure false + | some d => + pyDefaultValue d tp + pure true + return { + name := name.val + type := tp + hasDefault := hasDefault + } + +structure SpecAssertionContext where + filePath : System.FilePath + +/-- +State for `SpecAssertionM` + +`argc` denotes the number of named arguments. +-/ +structure SpecAssertionState (argc : Nat) where + assertions : Array (Assertion argc) := #[] + postconditions : Array (SpecPred (argc + 1)) := #[] + errors : Array SpecError := #[] + +/-- +Monad for extracting pre and post conditions from methods. +-/ +abbrev SpecAssertionM (argc : Nat) := ReaderT SpecAssertionContext (StateM (SpecAssertionState argc)) + +instance {argc} : PySpecMClass (SpecAssertionM argc) where + specError loc message := do + let file := (←read) |>.filePath + let e : SpecError := { file, loc, message } + modify fun s => { s with errors := s.errors.push e } + runChecked act := do + let cnt := (←get).errors.size + let r ← act + let new_cnt := (←get).errors.size + return (cnt = new_cnt, r) + +def transPred {argc} (_e : expr SourceRange) : SpecAssertionM argc Pred := do + -- FIXME + pure (.const true) + +def transIter {argc} (_e : expr SourceRange) : SpecAssertionM argc Iterable := do + -- FIXME + return .list + +def assumePred {argc} (_p : Pred) (act : SpecAssertionM argc Unit) : SpecAssertionM argc Unit := do + act + +mutual + +def blockStmt {argc : Nat} (s : stmt SourceRange) : SpecAssertionM argc Unit := do + match s with + | .Assign _ _targets _value _typeAnn => + pure () -- FIXME + | .AnnAssign .. => + pure () -- FIXME + | .Expr .. => + pure () -- FIXME + | .Assert .. => -- FIXME + pure () + | .Return .. =>-- FIXME + pure () + | .Raise .. =>-- FIXME + pure () + | .ClassDef .. => -- FIXME + specError s.ann s!"Inner classes are not supported." + | .For _ _target _iter _body orelse type_comment => + assert! type_comment.val.isNone + assert! orelse.val.size == 0 + pure () + | .If _ pred t f => + let p ← transPred pred + assumePred p <| blockStmts t.val + assumePred (.not p) <| blockStmts f.val + | .Pass _ => + pure () + | _ => specError s.ann s!"Unsupported statement: {eformat s.toAst}" +termination_by sizeOf s +decreasing_by + · cases t; + decreasing_tactic + · cases f; + decreasing_tactic + +def blockStmts {argc : Nat} (as : Array (stmt SourceRange)) : SpecAssertionM argc Unit := do + as.attach.forM fun ⟨b, _⟩ => blockStmt b +termination_by sizeOf as +decreasing_by +· decreasing_tactic + +end + +def collectAssertions (decls : ArgDecls) (_returnType : SpecType) (action : SpecAssertionM decls.count Unit) : PySpecM (SpecAssertionState decls.count) := do + let errors := (←get).errors + modify fun s => { s with errors := #[] } + let filePath := (←read).pythonFile + let ((), as) := action { filePath } { errors } + modify fun s => { s with errors := as.errors } + pure as + +def pySpecFunctionArgs (fnLoc : SourceRange) + (className : Option String) + (funName : String) + (arguments : arguments SourceRange) + (body : Array (Python.stmt SourceRange)) + (decorators : Array (expr SourceRange)) + (returns : Option (expr SourceRange)) : PySpecM FunctionDecl := do + let mut overload : Bool := false + for pyd in decorators do + let (success, d) ← runChecked <| pySpecValue pyd + if success then + match d with + | .typingOverload => + overload := true + | _ => + specError pyd.ann s!"Decorator {repr d} not supported." + + let .mk_arguments _ posonly ⟨_, posArgs⟩ vararg kwonly kw_defaults kwarg defaults := arguments + assert! posonly.val.size = 0 + let argc := posArgs.size + + let .up defaults_bnd ← + if h : defaults.val.size ≤ posArgs.size then + pure <| PULift.up.{1, 0} h + else + specError fnLoc s!"internal: bad index"; return default + + let .isTrue kw_bnd := inferInstanceAs (Decidable (kwonly.val.size = kw_defaults.val.size)) + | specError fnLoc s!"Keyword only arguments must have defaults."; return default + assert! vararg.val.isNone + assert! kwarg.val.isNone + let min_default := argc - defaults.val.size + let isMethod := className.isSome + if isMethod ∧ argc = 0 then + specError fnLoc "Method expecting self argument" + let mut usedNames : Std.HashSet String := {} + let mut specArgs : Array Arg := .emptyWithCapacity argc + for ⟨i, ib⟩ in Strata.Fin.range argc do + let a := posArgs[i] + -- Arguments with defaults occur at end + let d : Option _ := + if p : i ≥ min_default then + some defaults.val[i - min_default] + else + none + let self_type := + match className with + | some cl => if i = 0 then some cl else none + | none => none + let ba ← pySpecArg usedNames self_type a d + usedNames := usedNames.insert ba.name + specArgs := specArgs.push ba + let mut kwSpecArgs : Array Arg := .emptyWithCapacity kwonly.val.size + for ⟨i, ib⟩ in Fin.range kwonly.val.size do + let a := kwonly.val[i] + -- Arguments with defaults occur at end + let d : Option _ := + match kw_defaults.val[i] with + | .some_expr _ v => some v + | .missing_expr _ => none + let ba ← pySpecArg usedNames none a d + usedNames := usedNames.insert ba.name + kwSpecArgs := kwSpecArgs.push ba + let argDecls : ArgDecls := { args := specArgs, kwonly := kwSpecArgs } + let returnType : SpecType ← + match returns with + | none => pure .NoneType + | some tp => pySpecType tp + let as ← collectAssertions argDecls returnType <| body.forM blockStmt + + return { + loc := fnLoc + nameLoc := fnLoc + name := funName + args := argDecls + returnType + isOverload := overload + preconditions := as.assertions + postconditions := as.postconditions + } + +def pySpecClassBody (loc : SourceRange) (className : String) (body : Array (Strata.Python.stmt Strata.SourceRange)) : PySpecM ClassDef := do + let mut usedNames : Std.HashSet String := {} + let mut methods : Array FunctionDecl := #[] + for stmt in body do + match stmt with + | .Expr .. => pure () -- Skip expressions + | .FunctionDef loc ⟨_, name⟩ args ⟨_, body⟩ ⟨_, decorators⟩ ⟨_, returns⟩ + ⟨_, type_comment⟩ ⟨_, type_params⟩ => + assert! type_comment.isNone + assert! type_params.size = 0 + if name ∈ usedNames then + specError loc s!"{name} already defined." + let d ← pySpecFunctionArgs (className := some className) loc name args + body decorators returns + methods := methods.push d + | _ => + specError stmt.ann s!"Unknown class statement {stmt}" + return { + loc := loc + name := className + methods := methods + } + +def checkLevel (loc : SourceRange) (level : Option (int SourceRange)) : PySpecM Unit := do + match level with + | some lvl => + if lvl.value ≠ 0 then + specError loc s!"Local import {lvl.value} not supported." + | none => + specError loc s!"Missing import level." + + +def translateImportFrom (mod : String) (types : Std.HashMap String SpecValue) (names : Array (alias SourceRange)) : PySpecM Unit := do + for a in names do + let name := a.name + match types[name]? with + | none => + specError a.ann s!"{name} is not defined in module {mod}." + | some tpv => + let asname := a.asname.getD name + setNameValue asname tpv + +def getModifiedTime (f : System.FilePath) : IO IO.FS.SystemTime := do + let md ← f.metadata + pure <| md.modified + +/-- +Create a value map for module from signatures. +-/ +def signatureValueMap (mod : String) (sigs : Array Signature) : + Std.HashMap String SpecValue := + let addType (m : Std.HashMap String SpecValue) (sig : Signature) := + match sig with + | .classDef d => + let pyIdent : PythonIdent := { + pythonModule := mod + name := d.name + } + m.insert d.name (.typeValue (.ident pyIdent)) + | .functionDecl .. | .typeDef .. | .externTypeDecl .. => m + sigs.foldl (init := {}) addType + +def checkOverloadBody (stmt : stmt SourceRange) : PySpecM Unit := do + match stmt with + | .Expr _ (.Constant _ (.ConEllipsis _) _) => pure () + | _ => specError stmt.ann s!"Expected ellipsis" + +mutual + +partial def resolveModule (loc : SourceRange) (modName : String) : + PySpecM (Std.HashMap String SpecValue) := do + let mod ← + match ModuleName.ofString modName with + | .ok r => pure r + | .error msg => + specError loc msg + return default + let moduleReader := (←read).moduleReader + let pythonFile ← + match ← moduleReader mod |>.toBaseIO with + | .ok r => + pure r + | .error msg => + specError loc msg + return default + let strataDir := mod.strataDir (←read).strataDir + let strataFile := strataDir / mod.strataFileName + + let .ok pythonMetadata ← pythonFile.metadata |>.toBaseIO + | specError loc s!"Could not get file mod time."; return default + + -- Check if strataFile is newer than pythonSource + let useStrata : Bool := + match ← strataFile.metadata |>.toBaseIO with + | .ok strataMetadata => strataMetadata.modified > pythonMetadata.modified + | .error _ => false + -- If Strata is newer use it. + if useStrata then + logEvent importEvent s!"Importing {modName} from PySpec file" + match ← readDDM strataFile |>.toBaseIO with + | .ok sigs => + return signatureValueMap modName sigs + | .error msg => + specError loc s!"Could not load Strata file: {msg}" + return default + + logEvent importEvent s!"Importing {modName} from Python" + let dialectFile := (←read).dialectFile + let commands ← + match ← pythonToStrata dialectFile pythonFile |>.toBaseIO with + | .ok r => pure r + | .error msg => + specError loc msg + return default + let errors := (←get).errors + let errorCount := errors.size + modify fun s => { s with errors := #[] } + let ctx := { (←read) with pythonFile := pythonFile } + let (sigs, t) ← translateModuleAux commands |>.run ctx |>.run { errors := errors } + modify fun s => { s with errors := t.errors } + if t.errors.size > errorCount then + return default + + if let .error msg ← IO.FS.createDirAll strataDir |>.toBaseIO then + specError loc s!"Could not create directory {strataDir}: {msg}" + return default + + if let .error msg ← writeDDM strataFile sigs |>.toBaseIO then + specError loc s!"Could not write file {strataFile}: {msg}" + return default + + return signatureValueMap (toString mod) sigs + +partial def tryCachedResolveModule (loc : SourceRange) (modName : String) + : PySpecM (Option (Std.HashMap String SpecValue)) := do + match (←get).typeSigs.rank[modName]? with + | some types => + return types + | none => + let (success, r) ← runChecked <| resolveModule loc modName + let r := if success then some r else none + modify fun s => { s with typeSigs := s.typeSigs.insert modName r } + return r + +partial def translate (body : Array (Strata.Python.stmt Strata.SourceRange)) : PySpecM (Array Signature) := do + let mut elements : Array Signature := #[] + for stmt in body do + match stmt with + | .Assign loc ⟨_, targets⟩ value _typeAnn => + let (success, v) ← runChecked <| pySpecValue value + if not success then + continue + let .isTrue eq := inferInstanceAs (Decidable (targets.size = 1)) + | specError loc s!"Only single target expected."; continue + let .Name nameLoc ⟨_, name⟩ _ := targets[0] + | specError loc s!"Unsupported target {targets[0]}"; continue + assert! !nameLoc.isNone + setNameValue name v + match v with + | .typeValue tp => + recordTypeDef loc name + let d : TypeDef := { + loc := loc + nameLoc := nameLoc + name := name + definition := tp + } + elements := elements.push <| .typeDef d + | _ => + pure () + | .Expr .. => + -- Skip expressions + pure () + | .FunctionDef loc + ⟨_funNameLoc, funName⟩ + args + ⟨_bodyLoc, body⟩ + ⟨_decoratorsLoc, decorators⟩ + ⟨_returnsLoc, returns⟩ + ⟨_typeCommentLoc, typeComment⟩ + ⟨_typeParamsLoc, typeParams⟩ => + assert! _bodyLoc.isNone + -- Flag indicating this is an overload + assert! _decoratorsLoc.isNone + assert! _returnsLoc.isNone + assert! _typeCommentLoc.isNone + assert! typeComment.isNone + assert! _typeParamsLoc.isNone + assert! typeParams.size = 0 + let d ← pySpecFunctionArgs (className := none) loc funName args body decorators returns + elements := elements.push (.functionDecl d) + | .Import loc names => + specError loc s!"Import of {repr names} not supported." + | .ImportFrom loc ⟨_, pyModule⟩ ⟨_, names⟩ ⟨_, level⟩ => + let (success, ()) ← runChecked <| checkLevel loc level + if not success then + continue + let some ⟨_, mod⟩ := pyModule + | specError loc s!"Local imports not supported"; continue + if let some types ← tryCachedResolveModule loc mod then + translateImportFrom mod types names + | .ClassDef loc ⟨_classNameLoc, className⟩ bases keywords stmts decorators typeParams => + assert! _classNameLoc.isNone + assert! bases.val.size = 0 + assert! keywords.val.size = 0 + assert! decorators.val.size = 0 + assert! typeParams.val.size = 0 + let (success, _) ← runChecked <| recordTypeDef loc className + let d ← pySpecClassBody loc className stmts.val + if success then + elements := elements.push (.classDef d) + | _ => specError stmt.ann s!"Unknown statement {stmt}" + return elements + +partial def translateModuleAux (body : Array (Strata.Python.stmt Strata.SourceRange)) + : PySpecM (Array Signature) := do + let sig ← translate body + let s ← get + for ⟨cl, t⟩ in s.typeReferences do + if let .unresolved loc := t then + specError loc s!"Class {cl} not defined." + return sig + +end + +abbrev FileMaps := Std.HashMap System.FilePath Lean.FileMap + +def FileMaps.ppSourceRange (fmm : Strata.Python.Specs.FileMaps) (path : System.FilePath) (loc : SourceRange) : String := + match fmm[path]? with + | none => + panic! "Invalid path {file}" + | some fm => + let spos := fm.toPosition loc.start + let epos := fm.toPosition loc.stop + -- Render the error location information in a format VSCode understands + if spos.line == spos.line then + s!"{path}:{spos.line}:{spos.column+1}-{epos.column+1}" + else + s!"{path}:{spos.line}:{spos.column+1}" + +def translateModule + (dialectFile searchPath strataDir pythonFile : System.FilePath) + (fileMap : Lean.FileMap) + (body : Array (Strata.Python.stmt Strata.SourceRange)) : + BaseIO (FileMaps × Array Signature × Array SpecError) := do + let fmm : FileMaps := {} + let fmm := fmm.insert pythonFile fileMap + let fileMapsRef : IO.Ref FileMaps ← IO.mkRef fmm + let ctx : PySpecContext := { + dialectFile := dialectFile.toString + moduleReader := fun (mod : ModuleName) => do + let pythonPath ← mod.findInPath searchPath + logEvent "findFile" s!"Found {mod} as {pythonPath} in {searchPath}" + match ← IO.FS.readFile pythonPath |>.toBaseIO with + | .ok contents => + let fm := Lean.FileMap.ofString contents + fileMapsRef.modify fun m => m.insert pythonPath fm + pure pythonPath + | .error msg => + throw s!"Could not read file {pythonPath}: {msg}" + strataDir := strataDir + pythonFile := pythonFile + } + let (res, s) ← translateModuleAux body |>.run ctx |>.run {} + pure (←fileMapsRef.get, res, s.errors) + +end Strata.Python.Specs diff --git a/Strata/Languages/Python/Specs/DDM.lean b/Strata/Languages/Python/Specs/DDM.lean new file mode 100644 index 000000000..a0447150d --- /dev/null +++ b/Strata/Languages/Python/Specs/DDM.lean @@ -0,0 +1,244 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +import Strata.DDM.Integration.Lean +import Strata.Languages.Python.Specs.Decls + +namespace Strata.Python.Specs +namespace DDM + +#dialect +dialect PythonSpecs; + +category Int; +op natInt (x : Num) : Int => x; +op negSuccInt (x : Num) : Int => "-" x; + +category SpecType; +category FieldDecl; + +op mkFieldDecl(name : Ident, fieldType : SpecType) : FieldDecl => name " : " fieldType; + +op typeIdent (x : Ident, y : CommaSepBy SpecType) : SpecType => "ident" "(" x ", " y ")"; +op typeClass (x : Ident, y : CommaSepBy SpecType) : SpecType => "class" "(" x ", " y ")"; +op typeIntLiteral (x : Int) : SpecType => x; +op typeStringLiteral (x : Str) : SpecType => x; +op typeUnion (args : CommaSepBy SpecType) : SpecType => "union" "(" args ")"; +op typeNoneType : SpecType => "NoneType"; +op typeTypedDict (fields : CommaSepBy FieldDecl, isTotal : Bool): SpecType => + "dict" "(" fields ")" "[" "isTotal" "=" isTotal "]"; + +category ArgDecl; +op mkArgDecl (name : Str, argType : SpecType, hasDefault : Bool) : ArgDecl => + name " : " argType " (" "hasDefault" ": " hasDefault ")"; + +category FunDecl; +op mkFunDecl (name : Str, + args : Seq ArgDecl, + kwonly : Seq ArgDecl, + returnType : SpecType, + isOverload : Bool) : FunDecl => + "function " name "{\n" + indent(2, + "args" ": " args "\n" + "kwonly" ": " kwonly "\n" + "return" ": " returnType "\n" + "overload" ": " isOverload "\n") + "}"; + +op externTypeDecl (name : Ident, source : Ident) : Command => + "extern " name " from " source ";"; +op classDef (name : Str, methods : Seq FunDecl) : Command => + "class " name " {\n" + indent(2, methods) + "}\n"; +op functionDecl (decl : FunDecl) : Command => decl; +op typeDef (name : Str, definition : SpecType) : Command => + "type " name " = " definition "\n"; +#end + +#strata_gen PythonSpecs + +abbrev Signature := Command + +end DDM + +def PythonIdent.toDDM (d : PythonIdent) : Ann String SourceRange := + ⟨.none, toString d⟩ + +def toDDMInt {α} (ann : α) (i : Int) : DDM.Int α := + match i with + | .ofNat n => .natInt ann ⟨ann, n⟩ + | .negSucc n => .negSuccInt ann ⟨ann, n⟩ + +def DDM.Int.ofDDM : DDM.Int α → _root_.Int +| .natInt _ ⟨_, n⟩ => .ofNat n +| .negSuccInt _ ⟨_, n⟩ => .negSucc n + + +mutual + +def SpecAtomType.toDDM (d : SpecAtomType) : DDM.SpecType SourceRange := + match d with + | .ident nm args => .typeIdent .none nm.toDDM ⟨.none, args.map (·.toDDM)⟩ + | .pyClass name args => .typeClass .none ⟨.none, name⟩ ⟨.none, args.map (·.toDDM)⟩ + | .intLiteral i => .typeIntLiteral .none (toDDMInt .none i) + | .stringLiteral v => .typeStringLiteral .none ⟨.none, v⟩ + | .noneType => .typeNoneType .none + | .typedDict fields types isTotal => + assert! fields.size = types.size + let argc := types.size + let a := Array.ofFn fun (⟨i, ilt⟩ : Fin argc) => + .mkFieldDecl .none ⟨.none, fields[i]!⟩ types[i].toDDM + .typeTypedDict .none ⟨.none, a⟩ ⟨.none, isTotal⟩ +termination_by sizeOf d + +def SpecType.toDDM (d : SpecType) : DDM.SpecType SourceRange := + assert! d.atoms.size > 0 + if p : d.atoms.size = 1 then + d.atoms[0].toDDM + else + .typeUnion .none ⟨.none, d.atoms.map (·.toDDM)⟩ +termination_by sizeOf d +decreasing_by + all_goals { + cases d + decreasing_tactic + } + +end + + +def Arg.toDDM (d : Arg) : DDM.ArgDecl SourceRange := + .mkArgDecl .none ⟨.none, d.name⟩ d.type.toDDM ⟨.none, d.hasDefault⟩ + +def FunctionDecl.toDDM (d : FunctionDecl) : DDM.FunDecl SourceRange := + .mkFunDecl + d.loc + (name := .mk d.nameLoc d.name) + (args := ⟨.none, d.args.args.map (·.toDDM)⟩) + (kwonly := ⟨.none, d.args.kwonly.map (·.toDDM)⟩) + (returnType := d.returnType.toDDM) + (isOverload := ⟨.none, d.isOverload⟩) + +def Signature.toDDM (sig : Signature) : DDM.Signature SourceRange := + match sig with + | .externTypeDecl name source => + .externTypeDecl .none ⟨.none, name⟩ source.toDDM + | .classDef d => + .classDef d.loc (.mk .none d.name) ⟨.none, d.methods.map (·.toDDM)⟩ + | .functionDecl d => + .functionDecl d.loc d.toDDM + | .typeDef d => + .typeDef d.loc (.mk d.nameLoc d.name) d.definition.toDDM + +def DDM.SpecType.fromDDM (d : DDM.SpecType SourceRange) : Specs.SpecType := + match d with + | .typeClass _ ⟨_, cl⟩ ⟨_, args⟩ => + let a := args.map (·.fromDDM) + .ofAtom <| .pyClass cl a + | .typeIdent _ ⟨_, ident⟩ ⟨_, args⟩ => + let a := args.map (·.fromDDM) + if let some pyIdent := PythonIdent.ofString ident then + .ofAtom <| .ident pyIdent a + else + panic! "Bad identifier" + | .typeIntLiteral _ i => .ofAtom <| .intLiteral i.ofDDM + | .typeNoneType _ => .NoneType + | .typeStringLiteral _ ⟨_, s⟩ => .ofAtom <| .stringLiteral s + | .typeTypedDict _ ⟨_, fields⟩ ⟨_, isTotal⟩ => + let names := fields.map fun (.mkFieldDecl _ ⟨_, name⟩ _) => name + let types := fields.attach.map fun ⟨.mkFieldDecl _ _ tp, mem⟩ => tp.fromDDM + .ofAtom <| .typedDict names types isTotal + | .typeUnion _ ⟨_, args⟩ => + if p : args.size > 0 then + args.foldl (init := args[0].fromDDM) fun a b => a ||| b.fromDDM + else + panic! "Expected non-empty union" +termination_by sizeOf d +decreasing_by + · decreasing_tactic + · decreasing_tactic + · have szp := Array.sizeOf_lt_of_mem mem + simp_all + decreasing_tactic + · decreasing_tactic + · decreasing_tactic + +def DDM.ArgDecl.fromDDM (d : DDM.ArgDecl SourceRange) : Specs.Arg := + let .mkArgDecl _ ⟨_, name⟩ type ⟨_, hasDefault⟩ := d + { + name := name + type := type.fromDDM + hasDefault := hasDefault + } + +def DDM.FunDecl.fromDDM (d : DDM.FunDecl SourceRange) : Specs.FunctionDecl := + let .mkFunDecl loc ⟨nameLoc, name⟩ ⟨_, args⟩ ⟨_, kwonly⟩ + returnType ⟨_, isOverload⟩ := d + { + loc := loc + nameLoc := nameLoc + name := name + args := { + args := args.map (·.fromDDM) + kwonly := kwonly.map (·.fromDDM) + } + returnType := returnType.fromDDM + isOverload := isOverload + preconditions := #[] -- FIXME + postconditions := #[] -- FIXME + } + +def DDM.Command.fromDDM (cmd : DDM.Command SourceRange) : Specs.Signature := + match cmd with + | .externTypeDecl _ ⟨_, name⟩ ⟨_, ddmDefinition⟩ => + if let some definition := PythonIdent.ofString ddmDefinition then + .externTypeDecl name definition + else + panic! "Extern type decl definition has bad format." + | .classDef ann ⟨_, name⟩ ⟨_, methods⟩ => + let d : ClassDef := { + loc := ann + name := name + methods := methods.map (·.fromDDM) + } + .classDef d + | .functionDecl _ d => .functionDecl d.fromDDM + | .typeDef loc ⟨nameLoc, name⟩ definition => + let d : TypeDef := { + loc := loc + nameLoc := nameLoc + name := name + definition := definition.fromDDM + } + .typeDef d + +def readDDM (path : System.FilePath) : EIO String (Array Signature) := do + let contents ← + match ← IO.FS.readBinFile path |>.toBaseIO with + | .ok r => pure r + | .error msg => throw s!"Error reading {path}: {msg}" + match Program.fromIon DDM.PythonSpecs_map DDM.PythonSpecs.name contents with + | .ok pgm => + let r := + pgm.commands.mapM fun cmd => do + let pySig ← DDM.Command.ofAst cmd + return pySig.fromDDM + match r with + | .ok r => pure r + | .error msg => throw msg + | .error msg => throw msg + +def writeDDM (path : System.FilePath) (sigs : Array Signature) : IO Unit := do + let pgm : Strata.Program := { + dialects := DDM.PythonSpecs_map + dialect := DDM.PythonSpecs.name + commands := sigs.map fun s => s.toDDM.toAst + } + IO.FS.writeBinFile path pgm.toIon + + +end Strata.Python.Specs diff --git a/Strata/Languages/Python/Specs/Decls.lean b/Strata/Languages/Python/Specs/Decls.lean new file mode 100644 index 000000000..15346daa3 --- /dev/null +++ b/Strata/Languages/Python/Specs/Decls.lean @@ -0,0 +1,267 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +module +public import Std.Data.HashMap.Basic +public import Strata.DDM.Util.SourceRange + +public section +namespace Strata.Python.Specs + +structure PythonIdent where + pythonModule : String + name : String + deriving DecidableEq, Hashable, Ord, Repr + +namespace PythonIdent + +protected def ofString (s : String) : Option PythonIdent := + match s.revFind fun c => c = '.' with + | none => none + | some idx => + some { + pythonModule := String.Pos.Raw.extract s s.rawStartPos idx + name := String.Pos.Raw.extract s (idx.next s) s.rawEndPos + } + +instance : ToString PythonIdent where + toString i := s!"{i.pythonModule}.{i.name}" + +def builtinsBool := mk "builtins" "bool" +def builtinsBytes := mk "builtins" "bytes" +def builtinsDict := mk "builtins" "dict" +def builtinsFloat := mk "builtins" "float" +def builtinsInt := mk "builtins" "int" +def builtinsStr := mk "builtins" "str" +def noneType := mk "_types" "NoneType" + +def typingAny := mk "typing" "Any" +def typingDict := mk "typing" "Dict" +def typingGenerator := mk "typing" "Generator" +def typingList := mk "typing" "List" +def typingLiteral := mk "typing" "Literal" +def typingMapping := mk "typing" "Mapping" +def typingOverload := mk "typing" "overload" +def typingTypedDict := mk "typing" "TypedDict" +def typingUnion := mk "typing" "Union" + +end PythonIdent + +inductive MetadataType where + | typingDict + | typingGenerator + | typingList + | typingLiteral + | typingMapping + | typingUnion + deriving Repr + +inductive PreludeType where +| builtinsBool +| builtinsBytes +| builtinsDict +| builtinsFloat +| builtinsInt +| builtinsStr +| typingAny +| typingDict +| typingGenerator +| typingList +| typingMapping +| noneType +deriving DecidableEq, Hashable, Ord, Repr + +def PreludeType.ident : PreludeType -> PythonIdent +| .builtinsBool => .builtinsBool +| .builtinsBytes => .builtinsBytes +| .builtinsDict => .builtinsDict +| .builtinsFloat => .builtinsFloat +| .builtinsInt => .builtinsInt +| .builtinsStr => .builtinsStr +| .typingAny => .typingAny +| .typingDict => .typingDict +| .typingGenerator => .typingGenerator +| .typingList => .typingList +| .typingMapping => .typingMapping +| .noneType => .noneType + +instance : ToString PreludeType where + toString tp := toString tp.ident + +mutual + +/-- +An atomic type in the PySpec language +-/ +inductive SpecAtomType where +| ident (nm : PythonIdent) (args : Array SpecType) +| pyClass (name : String) (args : Array SpecType) +/- An integer literal -/ +| intLiteral (value : Int) +/-- A string literal -/ +| stringLiteral (value : String) +| noneType +/- +A typed dictionary with an array of fields and their types. The arrays +must be of the same length. +If the `isTotal` flag is set, then all fields are required, and if not the +fields are optional. +-/ +| typedDict (fields : Array String) + (fieldTypes : Array SpecType) + (isTotal : Bool) +deriving BEq, Hashable, Inhabited, Ord, Repr + +/-- +A PySpec type is a union of atom types. +-/ +structure SpecType where + atoms : Array SpecAtomType +deriving Inhabited, Ord + +end + +instance : LT SpecAtomType where + lt x y := private compare x y = .lt + +namespace SpecType + +instance : Repr SpecType where + reprPrec tp prec := private reprPrec tp.atoms.toList prec + +private partial def unionAux (x y : Array SpecAtomType) (i : Fin x.size) (j : Fin y.size) (r : Array SpecAtomType) : Array SpecAtomType := + let xe := x[i] + let ye := y[j] + match compare xe ye with + | .lt => + let i' := i.val + 1 + if xip : i' < x.size then + unionAux x y ⟨i', xip⟩ j (r.push xe) + else + r.push xe ++ y.drop j + | .eq => + let i' := i.val + 1 + let j' := j.val + 1 + if xip : i' < x.size then + if yjp : j' < y.size then + unionAux x y ⟨i', xip⟩ ⟨j', yjp⟩ (r.push xe) + else + r.push xe ++ x.drop i' + else + r.push xe ++ y.drop j + | .gt => + let j' := j.val + 1 + if yjp : j' < y.size then + unionAux x y i ⟨j', yjp⟩ (r.push xe) + else + r.push xe ++ x.drop i.val + +instance : OrOp SpecType where + or x y := private + if xp : 0 < x.atoms.size then + if yp : 0 < y.atoms.size then + { atoms := unionAux x.atoms y.atoms ⟨0, xp⟩ ⟨0, yp⟩ #[] } + else + x + else + y + +def ofAtom (atom : SpecAtomType) : SpecType := { atoms := #[atom] } + +def ident (i : PythonIdent) (args : Array SpecType := #[]) : SpecType := + .ofAtom (.ident i args) + +def preludeAtoms : List (String × SpecType) := [ + ("bool", .ident .builtinsBool), + ("bytes", .ident .builtinsBytes), + ("dict", .ident .builtinsDict), + ("float", .ident .builtinsFloat), + ("int", .ident .builtinsInt), + ("str", .ident .builtinsStr), +] + +def dict (key value : SpecType) : SpecType := .ident .typingDict #[key, value] + +def mapping (key value : SpecType) : SpecType := .ident .typingMapping #[key, value] + +def NoneType : SpecType := .ident .noneType #[] + +def ofArray (atoms : Array SpecAtomType) : SpecType := { atoms := atoms.qsort (· < ·) } + +def pyClass (name : String) (params : Array SpecType) : SpecType := ofAtom <| .pyClass name params + +def asSingleton (tp : SpecType) : Option SpecAtomType := do + if tp.atoms.size = 1 then + for atp in tp.atoms do return atp + none + +def isAtom (tp : SpecType) (atp : SpecAtomType) : Bool := tp.asSingleton.any (· == atp) + +instance : Membership SpecAtomType SpecType where + mem a e := private a.atoms.binSearchContains e (· < ·) = true + +@[instance] +def instDecidableMem (e : SpecAtomType) (tp : SpecType) : Decidable (e ∈ tp) := + inferInstanceAs (Decidable (_ = _)) + +end SpecType + +structure Arg where + name : String + type : SpecType + hasDefault : Bool +deriving Inhabited + +structure ArgDecls where + args : Array Arg + kwonly : Array Arg +deriving Inhabited + +namespace ArgDecls + +def count (ad : ArgDecls) := ad.args.size + ad.kwonly.size + +end ArgDecls + +inductive SpecPred (free : Nat) where +| placeholder +deriving Inhabited + +structure Assertion (free : Nat) where + message : String + formula : SpecPred free +deriving Inhabited + +structure FunctionDecl where + loc : SourceRange + nameLoc : SourceRange + name : String + args : ArgDecls + returnType : SpecType + isOverload : Bool + preconditions : Array (Assertion args.count) + postconditions : Array (SpecPred (args.count + 1)) +deriving Inhabited + +structure ClassDef where + loc : SourceRange + name : String + methods : Array FunctionDecl + +structure TypeDef where + loc : SourceRange + nameLoc : SourceRange + name : String + definition : SpecType + +inductive Signature where + | externTypeDecl (name : String) (source : PythonIdent) + | classDef (d : ClassDef) + | functionDecl (d : FunctionDecl) + | typeDef (d : TypeDef) + deriving Inhabited + +end Strata.Python.Specs +end diff --git a/Strata/Util/FileRange.lean b/Strata/Util/FileRange.lean new file mode 100644 index 000000000..440f1e928 --- /dev/null +++ b/Strata/Util/FileRange.lean @@ -0,0 +1,119 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +module +public import Strata.DDM.Util.SourceRange +public import Lean.Data.Position + +open Std (Format) + +public section +namespace Strata + +inductive Uri where + | file (path: String) + deriving DecidableEq, Repr, Inhabited + +instance : Std.ToFormat Uri where + format fr := private match fr with | .file path => path + +structure FileRange where + file: Uri + range: SourceRange + deriving DecidableEq, Repr, Inhabited + +instance : Std.ToFormat FileRange where + format fr := private f!"{fr.file}:{fr.range}" + +structure File2dRange where + file: Uri + start: Lean.Position + ending: Lean.Position + deriving DecidableEq, Repr + +instance : Std.ToFormat File2dRange where + format fr := private + let baseName := match fr.file with + | .file path => (path.splitToList (· == '/')).getLast! + f!"{baseName}({fr.start.line}, {fr.start.column})-({fr.ending.line}, {fr.ending.column})" + +instance : Std.ToFormat FileRange where + format fr := f!"{fr.file}:{fr.range}" + +/-- A default file range for errors without source location. +This should only be used for generated nodes that are guaranteed to be correct. -/ +def FileRange.unknown : FileRange := + { file := .file "", range := SourceRange.none } + +/-- Format a file range using a FileMap to convert byte offsets to line/column positions. -/ +def FileRange.format (fr : FileRange) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format := + let baseName := match fr.file with + | .file path => (path.splitToList (· == '/')).getLast! + match fileMap with + | some fm => + let startPos := fm.toPosition fr.range.start + let endPos := fm.toPosition fr.range.stop + if includeEnd? then + if startPos.line == endPos.line then + f!"{baseName}({startPos.line},({startPos.column}-{endPos.column}))" + else + f!"{baseName}(({startPos.line},{startPos.column})-({endPos.line},{endPos.column}))" + else + f!"{baseName}({startPos.line}, {startPos.column})" + | none => + if fr.range.isNone then + f!"" + else + f!"{baseName}({fr.range.start}-{fr.range.stop})" + +/-- A diagnostic model that holds a file range and a message. + This can be converted to a formatted string using a FileMap. -/ +structure DiagnosticModel where + fileRange : FileRange + message : String + deriving Repr, BEq, Inhabited + +instance : Inhabited DiagnosticModel where + default := { fileRange := FileRange.unknown, message := "" } + +/-- Create a DiagnosticModel from just a message (using default location). +This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/ +def DiagnosticModel.fromMessage (msg : String) : DiagnosticModel := + { fileRange := FileRange.unknown, message := msg } + +/-- Create a DiagnosticModel from a Format (using default location). +This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/ +def DiagnosticModel.fromFormat (fmt : Std.Format) : DiagnosticModel := + { fileRange := FileRange.unknown, message := toString fmt } + +/-- Create a DiagnosticModel with source location. -/ +def DiagnosticModel.withRange (fr : FileRange) (msg : Format) : DiagnosticModel := + { fileRange := fr, message := toString msg } + +/-- Format a DiagnosticModel using a FileMap to convert byte offsets to line/column positions. -/ +def DiagnosticModel.format (dm : DiagnosticModel) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format := + let rangeStr := dm.fileRange.format fileMap includeEnd? + if dm.fileRange.range.isNone then + f!"{dm.message}" + else + f!"{rangeStr} {dm.message}" + +/-- Format just the file range portion of a DiagnosticModel. -/ +def DiagnosticModel.formatRange (dm : DiagnosticModel) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format := + dm.fileRange.format fileMap includeEnd? + +/-- Update the file range of a DiagnosticModel if it's currently unknown. +This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/ +def DiagnosticModel.withRangeIfUnknown (dm : DiagnosticModel) (fr : FileRange) : DiagnosticModel := + if dm.fileRange.range.isNone then + { dm with fileRange := fr } + else + dm + +instance : ToString DiagnosticModel where + toString dm := dm.format none |> toString + +end Strata +end diff --git a/StrataMain.lean b/StrataMain.lean index 2a70578f4..5045c61e8 100644 --- a/StrataMain.lean +++ b/StrataMain.lean @@ -5,19 +5,12 @@ -/ -- Executable with utilities for working with Strata files. -import Strata.DDM.Elab -import Strata.DDM.Ion -import Strata.DDM.Util.ByteArray -import Strata.Util.IO - import Strata.DDM.Integration.Java.Gen -import Strata.Languages.Python.Python -import Strata.Transform.CoreTransform -import Strata.Transform.ProcedureInlining - -import Strata.Languages.Laurel.Grammar.LaurelGrammar import Strata.Languages.Laurel.Grammar.ConcreteToAbstractTreeTranslator import Strata.Languages.Laurel.LaurelToCoreTranslator +import Strata.Languages.Python.Python +import Strata.Languages.Python.Specs +import Strata.Transform.ProcedureInlining def exitFailure {α} (message : String) : IO α := do IO.eprintln ("Exception: " ++ message ++ "\n\nRun strata --help for additional help.") @@ -175,13 +168,78 @@ def diffCommand : Command where | _, _ => exitFailure "Cannot compare dialect def with another dialect/program." -def readPythonStrata (path : String) : IO Strata.Program := do - let bytes ← Strata.Util.readBinInputSource path +def readPythonStrata (strataPath : String) : + IO (Array (Strata.Python.stmt Strata.SourceRange)) := do + let bytes ← Strata.Util.readBinInputSource strataPath if ! Ion.isIonFile bytes then exitFailure s!"pyAnalyze expected Ion file" - match Strata.Program.fileFromIon Strata.Python.Python_map Strata.Python.Python.name bytes with - | .ok p => pure p - | .error msg => exitFailure msg + let map := Strata.Python.Python_map + match Strata.Program.fromIon map Strata.Python.Python.name bytes with + | .ok pgm => + let pyCmds ← pgm.commands.mapM fun cmd => + match Strata.Python.Command.ofAst cmd with + | .error msg => + exitFailure $ + s!"Internal: could not parse command:\n" + ++ "Message: {msg}\n" + ++ "Command: {Strata.eformat cmd}" + | .ok r => pure r + let isTrue p := inferInstanceAs (Decidable (pyCmds.size = 1)) + | exitFailure s!"Internal: Invalid number of commands" + let .Module _ stmts _ := pyCmds[0] + | exitFailure s!"Internal: Expected Python module" + pure stmts.val + | .error msg => + exitFailure msg + +def pySpecsCommand : Command where + name := "pySpecs" + args := [ "python_path", "strata_path" ] + help := "Translate a Python specification source file into binary pySpec file." + callback := fun _ v => do + let dialectFile := "Tools/Python/dialects/Python.dialect.st.ion" + let pythonFile : System.FilePath := v[0] + let strataDir : System.FilePath := v[1] + if (←pythonFile.metadata).type != .file then + exitFailure s!"Expected Python to be a regular file." + match ←strataDir.metadata |>.toBaseIO with + | .ok md => + if md.type != .dir then + exitFailure s!"Expected Strata to be a directory." + | .error _ => + IO.FS.createDir strataDir + let some searchPath := pythonFile.parent + | exitFailure s!"{pythonFile} directory unknown" + let contents ← + match ← IO.FS.readFile pythonFile |>.toBaseIO with + | .ok b => pure b + | .error msg => + exitFailure s!"Could not read {pythonFile}" + let body ← + match ← Strata.Python.pythonToStrata dialectFile pythonFile |>.toBaseIO with + | .ok r => pure r + | .error msg => + exitFailure msg + + let (fmm, sigs, errors) ← + Strata.Python.Specs.translateModule + (dialectFile := dialectFile) + (searchPath := searchPath) + (strataDir := strataDir) + (pythonFile := pythonFile) + (.ofString contents) + body + if errors.size > 0 then + IO.eprintln "Translation errors:" + for e in errors do + IO.eprintln s!"{fmm.ppSourceRange pythonFile e.loc}: {e.message}" + IO.Process.exit 1 + let some mod := pythonFile.fileStem + | exitFailure s!"No stem {pythonFile}" + let .ok mod := Strata.Python.Specs.ModuleName.ofString mod + | exitFailure s!"Invalid module {mod}" + let strataFile := strataDir / mod.strataFileName + Strata.Python.Specs.writeDDM strataFile sigs def pyTranslateCommand : Command where name := "pyTranslate" @@ -294,6 +352,7 @@ def commandList : List Command := [ diffCommand, pyAnalyzeCommand, pyTranslateCommand, + pySpecsCommand, laurelAnalyzeCommand, ] diff --git a/StrataTest/DDM/Integration/Java/TestGen.lean b/StrataTest/DDM/Integration/Java/TestGen.lean index 710223cf1..8c4d78e48 100644 --- a/StrataTest/DDM/Integration/Java/TestGen.lean +++ b/StrataTest/DDM/Integration/Java/TestGen.lean @@ -308,7 +308,7 @@ elab "#testRoundtrip" : command => do | Lean.logError "Simple dialect not found"; return let dm := Strata.DialectMap.ofList! [Strata.initDialect, simple] let ionBytes ← IO.FS.readBinFile "StrataTest/DDM/Integration/Java/testdata/comprehensive.ion" - match Strata.Program.fileFromIon dm "Simple" ionBytes with + match Strata.Program.fromIon dm "Simple" ionBytes with | .error e => Lean.logError s!"Roundtrip test failed: {e}" | .ok prog => if prog.commands.size != 1 then Lean.logError "Expected 1 command"; return diff --git a/Tools/Python/strata/gen.py b/Tools/Python/strata/gen.py index 457c6427e..fc3535be1 100755 --- a/Tools/Python/strata/gen.py +++ b/Tools/Python/strata/gen.py @@ -13,6 +13,8 @@ import strata.pythonast as pythonast import sys +sys.setrecursionlimit(2500) + def write_dialect(dialect : Dialect, dir : Path): if dir.exists(): if not dir.is_dir(): @@ -36,6 +38,9 @@ def python_dialect(path : str|None) -> Dialect: with open(path, 'rb') as f: return Dialect.from_ion(f) +# Error code for py_to_strata on parse failure. +exit_parse_failure = 100 + def py_to_strata_imp(args): PythonAST = python_dialect(args.dialect) parser = pythonast.Parser(PythonAST) @@ -44,8 +49,8 @@ def py_to_strata_imp(args): try: (_, p) = parser.parse_module(path.read_bytes(), path) except SyntaxError as e: - print(f"Error parsing {path}:\n {e}", file=sys.stderr) - sys.exit(1) + print(f"Parse failure:\n {e}", file=sys.stderr) + sys.exit(exit_parse_failure) with open(args.output, 'wb') as w: ion.dump(p.to_ion(), w, binary=True) diff --git a/Tools/Python/strata/pythonast.py b/Tools/Python/strata/pythonast.py index 45fdf1dc9..5af5b875e 100644 --- a/Tools/Python/strata/pythonast.py +++ b/Tools/Python/strata/pythonast.py @@ -45,6 +45,13 @@ def __init__(self, decl : strata.OpDecl, args : list[OpArg]): 'type': 'mk_type' } +# The ast package includes a few classes that are deprecated +# and no longer generated in AST parse trees. We ignore this +# in the DDM dialect. +ignored_ast_ops : set[type] = set([ + ast.AugLoad, ast.AugStore, ast.ExtSlice, ast.Index, ast.Param, ast.Suite +]) + def gen_dialect() -> strata.Dialect: """ Create the Python dialect. @@ -143,6 +150,8 @@ def translate_op(name : str, op : type, category : SyntaxCat): for (cat, cat_ref) in Python_catmap.items(): if cat.__subclasses__(): for op in cat.__subclasses__(): + if op in ignored_ast_ops: + continue translate_op(op.__name__, op, cat_ref) else: translate_op(f"mk_{cat.__name__}", cat, cat_ref) @@ -167,11 +176,6 @@ def source_range(mapping : FileMapping, t : object) -> SourceRange|None: end_off = mapping.byte_offset(end_lineno, end_col_offset) return SourceRange(off, end_off) -# Note these are all deprecated. -ignored_cats : set[type] = set([ - ast.AugLoad, ast.AugStore, ast.ExtSlice, ast.Index, ast.Param, ast.Suite -]) - def create_opmap(PythonAST : strata.Dialect) -> dict[type, Op]: def populate_op(name : str, op : type) -> Op: decl = getattr(PythonAST, name) @@ -201,7 +205,7 @@ def populate_op(name : str, op : type) -> Op: for cat in ast.AST.__subclasses__(): if cat.__subclasses__(): for op in cat.__subclasses__(): - if op in ignored_cats: + if op in ignored_ast_ops: continue Python_opmap[op] = populate_op(op.__name__, op) @@ -245,7 +249,7 @@ def check_ast(d : strata.Dialect): for cat in ast.AST.__subclasses__(): if cat.__subclasses__(): for op in cat.__subclasses__(): - if op in ignored_cats: + if op in ignored_ast_ops: continue check_op(d, op.__name__, op) else: @@ -333,8 +337,7 @@ def ast_to_op(self, mapping : FileMapping, t : object) -> strata.Operation: if a.name is None: v = a.missing else: - v = getattr(t, a.name) - v = self.ast_to_arg(mapping, v, a.cat) + v = self.ast_to_arg(mapping, getattr(t, a.name), a.cat) args.append(v) return decl(*args, ann=src)