Compare commits

...

5 Commits

Author SHA1 Message Date
Peter Harpending f04b7311f5 stuff 2026-06-05 00:58:53 -07:00
pharpend 10424927b1 stuff 2026-06-04 14:01:46 -07:00
pharpend fdb40dcb92 stuff 2026-06-04 11:42:48 -07:00
Peter Harpending e180dc955d stuff 2026-06-03 19:28:55 -07:00
Peter Harpending 4e54bebeba parens work... moving on to documenting work 2026-06-03 15:17:55 -07:00
10 changed files with 1019 additions and 214 deletions
+77
View File
@@ -0,0 +1,77 @@
-spec s2t_file(Signal) -> AstFile when
Signal :: [tk()],
AstFile :: #ns{meta :: file, kids :: asf()}.
s2t_file([]) ->
error(empty_file);
s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) ->
Blk0 = s2t_gulp_block(FileCol, S0),
Blk1 = t2t_parse_tds_in_block(Blk0),
#ns{meta = file, kids = [Blk1]}.
-spec s2t_gulp_block(BlkCol, Signal) -> Block when
BlkCol :: pos_integer(),
Signal :: [tk()],
Block :: #ns{meta :: block}.
s2t_gulp_block(BCol, Tks) ->
% sanity check
InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end,
true = lists:all(InBlock, Tks),
BlockItems = s2f_block_items(BCol, Tks),
#ns{meta = block, kids = BlockItems}.
-spec s2f_block_items(BCol, Signal) -> BlkItems when
BCol :: pos_integer(),
Signal :: [tk()],
BlkItems :: [BlkItem],
BlkItem :: #ns{meta :: block_item,
kids :: asf()}.
s2f_block_items(BCol, Signal) ->
s2f_block_items(BCol, [], Signal).
s2f_block_items(_BCol, Stk, []) ->
lists:reverse(Stk);
s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) ->
{slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0),
s2f_block_items(BCol, [BlkItem | Stk], F1).
s2t_slurp_block_item(BCol, T0, F0) ->
{ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0),
Item = #ns{meta = block_item, kids = ItemTokens},
{slurp, Item, F1}.
% sw = splitwith; kind of take/drop
s2s_sw_block_item(BCol, T0, F0) ->
InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end,
{F0_II, F1} = lists:splitwith(InItem, F0),
{[T0 | F0_II], F1}.
-spec t2t_parse_tds_in_block(Block0) -> Block1 when
Block0 :: ast(),
Block1 :: ast().
% go through and convert the block_item nodes to top
% decls
t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) ->
F1 = lists:map(fun t2t_parse_td_from_item/1, F0),
B0#ns{kids = F1}.
-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when
BlockItem :: #ns{meta :: block_item},
TopDecl :: #ns{meta :: td_meta()}.
t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) ->
s2t_top_decl(Signal).
-spec s2t_top_decl(Signal) -> TdTree when
Signal :: [tk()],
TdTree :: ast().
s2t_top_decl(S0) ->
+292
View File
@@ -0,0 +1,292 @@
# Syntax
## Lexical syntax
### Comments
Single line comments start with `//` and block comments are enclosed in `/*`
and `*/` and can be nested.
### Keywords
```
contract include let switch type record datatype if elif else function
stateful payable true false mod public entrypoint private indexed namespace
interface main using as for hiding
```
### Tokens
- `Id = [a-z_][A-Za-z0-9_']*` identifiers start with a lower case letter.
- `Con = [A-Z][A-Za-z0-9_']*` constructors start with an upper case letter.
- `QId = (Con\.)+Id` qualified identifiers (e.g. `Map.member`)
- `QCon = (Con\.)+Con` qualified constructor
- `TVar = 'Id` type variable (e.g `'a`, `'b`)
- `Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` integer literal with optional `_` separators
- `Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` byte array literal with optional `_` separators
- `String` string literal enclosed in `"` with escape character `\`
- `Char` character literal enclosed in `'` with escape character `\`
- `AccountAddress` base58-encoded 32 byte account pubkey with `ak_` prefix
- `ContractAddress` base58-encoded 32 byte contract address with `ct_` prefix
- `Signature` base58-encoded 64 byte cryptographic signature with `sg_` prefix
Valid string escape codes are
| Escape | ASCII | |
|---------------|-------------|---|
| `\b` | 8 | |
| `\t` | 9 | |
| `\n` | 10 | |
| `\v` | 11 | |
| `\f` | 12 | |
| `\r` | 13 | |
| `\e` | 27 | |
| `\xHexDigits` | *HexDigits* | |
See the [identifier encoding scheme](https://git.qpq.swiss/QPQ-AG/protocol/src/branch/master/node/api/api_encoding.md) for the
details on the base58 literals.
## Layout blocks
Sophia uses Python-style layout rules to group declarations and statements. A
layout block with more than one element must start on a separate line and be
indented more than the currently enclosing layout block. Blocks with a single
element can be written on the same line as the previous token.
Each element of the block must share the same indentation and no part of an
element may be indented less than the indentation of the block. For instance
```sophia
contract Layout =
function foo() = 0 // no layout
function bar() = // layout block starts on next line
let x = foo() // indented more than 2 spaces
x
+ 1 // the '+' is indented more than the 'x'
```
## Notation
In describing the syntax below, we use the following conventions:
- Upper-case identifiers denote non-terminals (like `Expr`) or terminals with
some associated value (like `Id`).
- Keywords and symbols are enclosed in single quotes: `'let'` or `'='`.
- Choices are separated by vertical bars: `|`.
- Optional elements are enclosed in `[` square brackets `]`.
- `(` Parentheses `)` are used for grouping.
- Zero or more repetitions are denoted by a postfix `*`, and one or more
repetitions by a `+`.
- `Block(X)` denotes a layout block of `X`s.
- `Sep(X, S)` is short for `[X (S X)*]`, i.e. a possibly empty sequence of `X`s
separated by `S`s.
- `Sep1(X, S)` is short for `X (S X)*`, i.e. same as `Sep`, but must not be empty.
## Declarations
A Sophia file consists of a sequence of *declarations* in a layout block.
```c
File ::= Block(TopDecl)
TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
| 'contract' 'interface' Con [Implement] '=' Block(Decl)
| 'namespace' Con '=' Block(Decl)
| '@compiler' PragmaOp Version
| 'include' String
| Using
Implement ::= ':' Sep1(Con, ',')
Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
| 'record' Id ['(' TVar* ')'] '=' RecordType
| 'datatype' Id ['(' TVar* ')'] '=' DataType
| 'let' Id [':' Type] '=' Expr
| (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
| Using
FunDecl ::= Id ':' Type // Type signature
| Id Args [':' Type] '=' Block(Stmt) // Definition
| Id Args [':' Type] Block(GuardedDef) // Guarded definitions
GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
Using ::= 'using' Con ['as' Con] [UsingParts]
UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
| 'hiding' '[' Sep1(Id, ',') ']'
PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
Version ::= Sep1(Int, '.')
EModifier ::= 'payable' | 'stateful'
FModifier ::= 'stateful' | 'private'
Args ::= '(' Sep(Pattern, ',') ')'
```
Contract declarations must appear at the top-level.
For example,
```sophia
contract Test =
type t = int
entrypoint add (x : t, y : t) = x + y
```
There are three forms of type declarations: type aliases (declared with the
`type` keyword), record type definitions (`record`) and data type definitions
(`datatype`):
```c
TypeAlias ::= Type
RecordType ::= '{' Sep(FieldType, ',') '}'
DataType ::= Sep1(ConDecl, '|')
FieldType ::= Id ':' Type
ConDecl ::= Con ['(' Sep1(Type, ',') ')']
```
For example,
```sophia
record point('a) = {x : 'a, y : 'a}
datatype shape('a) = Circle(point('a), 'a) | Rect(point('a), point('a))
type int_shape = shape(int)
```
## Types
```c
Type ::= Domain '=>' Type // Function type
| Type '(' Sep(Type, ',') ')' // Type application
| '(' Type ')' // Parens
| 'unit' | Sep(Type, '*') // Tuples
| Id | QId | TVar
Domain ::= Type // Single argument
| '(' Sep(Type, ',') ')' // Multiple arguments
```
The function type arrow associates to the right.
Example,
```sophia
'a => list('a) => (int * list('a))
```
## Statements
Function bodies are blocks of *statements*, where a statement is one of the following
```c
Stmt ::= 'switch' '(' Expr ')' Block(Case)
| 'if' '(' Expr ')' Block(Stmt)
| 'elif' '(' Expr ')' Block(Stmt)
| 'else' Block(Stmt)
| 'let' LetDef
| Using
| Expr
LetDef ::= Id Args [':' Type] '=' Block(Stmt) // Function definition
| Pattern '=' Block(Stmt) // Value definition
Case ::= Pattern '=>' Block(Stmt)
| Pattern Block(GuardedCase)
GuardedCase ::= '|' Sep1(Expr, ',') '=>' Block(Stmt)
Pattern ::= Expr
```
`if` statements can be followed by zero or more `elif` statements and an optional final `else` statement. For example,
```sophia
let x : int = 4
switch(f(x))
None => 0
Some(y) =>
if(y > 10)
"too big"
elif(y < 3)
"too small"
else
"just right"
```
## Expressions
```c
Expr ::= '(' LamArgs ')' '=>' Block(Stmt) // Anonymous function (x) => x + 1
| '(' BinOp ')' // Operator lambda (+)
| 'if' '(' Expr ')' Expr 'else' Expr // If expression if(x < y) y else x
| Expr ':' Type // Type annotation 5 : int
| Expr BinOp Expr // Binary operator x + y
| UnOp Expr // Unary operator ! b
| Expr '(' Sep(Expr, ',') ')' // Application f(x, y)
| Expr '.' Id // Projection state.x
| Expr '[' Expr ']' // Map lookup map[key]
| Expr '{' Sep(FieldUpdate, ',') '}' // Record or map update r{ fld[key].x = y }
| '[' Sep(Expr, ',') ']' // List [1, 2, 3]
| '[' Expr '|' Sep(Generator, ',') ']'
// List comprehension [k | x <- [1], if (f(x)), let k = x+1]
| '[' Expr '..' Expr ']' // List range [1..n]
| '{' Sep(FieldUpdate, ',') '}' // Record or map value {x = 0, y = 1}, {[key] = val}
| '(' Expr ')' // Parens (1 + 2) * 3
| '(' Expr '=' Expr ')' // Assign pattern (y = x::_)
| Id | Con | QId | QCon // Identifiers x, None, Map.member, AELib.Token
| Int | Bytes | String | Char // Literals 123, 0xff, #00abc123, "foo", '%'
| AccountAddress | ContractAddress // Chain identifiers
| Signature // Signature
| '???' // Hole expression 1 + ???
Generator ::= Pattern '<-' Expr // Generator
| 'if' '(' Expr ')' // Guard
| LetDef // Definition
LamArgs ::= '(' Sep(LamArg, ',') ')'
LamArg ::= Id [':' Type]
FieldUpdate ::= Path '=' Expr
Path ::= Id // Record field
| '[' Expr ']' // Map key
| Path '.' Id // Nested record field
| Path '[' Expr ']' // Nested map key
BinOp ::= '||' | '&&' | '<' | '>' | '=<' | '>=' | '==' | '!='
| '::' | '++' | '+' | '-' | '*' | '/' | 'mod' | '^'
| 'band' | 'bor' | 'bxor' | '<<' | '>>' | '|>'
UnOp ::= '-' | '!' | 'bnot'
```
## Operators types
| Operators | Type
| --- | ---
| `-` `+` `*` `/` `mod` `^` | arithmetic operators
| `!` `&&` `\|\|` | logical operators
| `band` `bor` `bxor` `bnot` `<<` `>>` | bitwise operators
| `==` `!=` `<` `>` `=<` `>=` | comparison operators
| `::` `++` | list operators
| `\|>` | functional operators
## Operator precedence
In order of highest to lowest precedence.
| Operators | Associativity
| --- | ---
| `!` `bnot`| right
| `^` | left
| `*` `/` `mod` | left
| `-` (unary) | right
| `+` `-` | left
| `<<` `>>` | left
| `::` `++` | right
| `<` `>` `=<` `>=` `==` `!=` | none
| `band` | left
| `bxor` | left
| `bor` | left
| `&&` | right
| `\|\|` | right
| `\|>` | left
+67
View File
@@ -0,0 +1,67 @@
-spec mktree(Signal) -> Tree when
Signal :: gsc:signal(),
Tree :: gsc_ntree:ntree().
% @doc make into a tree
mktree(Sig) ->
Tree0 = gsc_ntree:nstem(vtokens, Sig),
Tree1 = rerootl_tkstr("=>", Tree0),
Tree2 = rerootl_tkstr("*", Tree1),
Tree2.
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
Kids0 = gsc_ntree:deleaf0(Tree0),
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
case lists:splitwith(IsntS, Kids0) of
% found
% input:
% *s Root0
% |
% +-- .l Foo
% +-- .l "=>"
% +-- .l Bar
% output:
% *s "=>"
% |
% +-- *s Root0 -- .l Foo
% +-- *s Root0 -- .l Bar
{LHS1, [Tk0 | RHS1]} ->
Root1 = Root0,
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
NewRoot0 = {op, Tk0},
NewKids0 = [LTree1, RTree1],
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
NewTree;
% not found, nothing to do
{Kids0, []} ->
Tree0
end.
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
% Kids0 = gsc_ntree:deleaf0(Tree0),
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
% case lists:splitwith(IsntMapsto, Kids0) of
% % found
% {LHS1, [Tk0 | RHS1]} ->
% Root1 = Root0,
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
% NewRoot0 = {op, Tk0},
% NewKids0 = [LTree1, RTree1],
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
% NewTree;
% % nothing to do
% {Kids0, []} ->
% Tree0
% end.
isnt_str(X, Y) ->
not is_str(X, Y).
is_str(S, #tk{str = S}) -> true;
is_str(_, _) -> false.
+273
View File
@@ -0,0 +1,273 @@
% @doc experiment centering around the file syntax node using ntree approach
-module(gsc_test_file).
-export([
main/0
]).
-include("$gsc_include/gsc.hrl").
-record(ct,
{payable = none :: none | false | {true, tk()},
main = none :: none | false | {true, tk()},
contract = none :: none | tk(),
con = none :: none | tk(),
impls = none :: none | [tk()],
eq = none :: none | tk()}).
-type meta() :: #ct{}.
-record(decl_type,
{type = none :: none | tk(),
id = none :: none | tk(),
params = none :: none | [tk()],
eq = none :: none | tk()}).
-type decl_meta() :: #decl_type{}.
-type ast_meta() :: file
| meta()
| decl_meta()
| nyi
| {nyi, any()}
.
-type target()
:: ct
| iface
| ns
| pragma
| include
| using
.
-type s2t_target()
:: file
| top_decl
| target()
| nyi
| {nyi, any()}
.
-type s2f_target()
:: {block_of, s2t_target()}
.
-type ast() :: ntree(ast_meta(), tk()).
-type asf() :: nforest(ast_meta(), tk()).
main() ->
HelloN = "hello.aes",
HelloP = ts_utils:ct_file_abspath(HelloN),
{ok, HelloS} = file:read_file(HelloP),
S0 = gsc:unsafe_signal_from_file(HelloP),
T1 = s2t(file, S0),
io:format("hello.aes:~n", []),
io:format("```~n", []),
io:format("~ts", [HelloS]),
io:format("```~n~n", []),
io:format("AST: ~tp~n", [T1]),
ok.
% // Hello World Contract
% // Copyright (c) 2025 QPQ AG
%
% contract Hello =
% type state = unit
% entrypoint init(): state =
% ()
%
% entrypoint hello(): string =
% "hello, world"
-spec s2t(ParseTarget, Signal) -> AST when
ParseTarget :: file,
Signal :: [tk()],
AST :: ast().
% File ::= Block(TopDecl)
s2t(file, Signal) ->
case Signal of
[] -> error(empty_file);
_ -> {ns, file, s2f({block_of, top_decl}, Signal)}
end;
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl)
% | 'namespace' Con '=' Block(Decl)
% | '@compiler' PragmaOp Version
% | 'include' String
% | Using
s2t(top_decl, Signal) ->
NewTarget =
case gsc_tokens:strings(3, Signal) of
["payable", "contract", "interface"] -> iface;
["contract", "interface" | _] -> iface;
["payable", "main", "contract"] -> ct;
["payable", "contract" | _] -> ct;
["contract" | _] -> ct;
["namespace" | _] -> namespace;
["@compiler" | _] -> pragma;
["include" | _] -> include;
["using" | _] -> using
end,
s2t(NewTarget, Signal);
% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
s2t(ct, S0) ->
{slurp, CtMeta, S1} = s2s_slurp_meta(#ct{}, S0),
{ns, CtMeta, s2f({block_of, decl}, S1)};
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
% | 'record' Id ['(' TVar* ')'] '=' RecordType
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
% | 'let' Id [':' Type] '=' Expr
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
% | Using
s2t(decl, S0) ->
NewTarget =
case gsc_tokens:strings(3, S0) of
["type" | _] -> decl_type;
["record" | _] -> decl_record;
["datatype" | _] -> decl_datatype;
["let" | _] -> decl_let;
Pfx3 ->
IsEp = lists:member("entrypoint", Pfx3),
IsFn = lists:member("function", Pfx3),
if
IsEp -> decl_entrypoint;
IsFn -> decl_function;
true -> error({bad_decl, S0})
end
end,
s2t(NewTarget, S0);
% 'type' Id ['(' TVar* ')'] '=' TypeAlias
s2t(decl_type, S0) ->
{slurp, Meta, S1} = s2s_slurp_meta(#decl_type{}, S0),
{ns, Meta, s2t(type, S1)};
s2t(nyi, Signal) ->
{ns, nyi, Signal};
s2t(NYI = {nyi, _}, Signal) ->
{ns, NYI, Signal};
s2t(NYI, Signal) ->
{ns, {nyi, NYI}, Signal}.
-spec s2f(ForestTarget, Signal) -> Forest when
ForestTarget :: s2f_target(),
Signal :: [tk()],
Forest :: asf().
s2f({block_of, TreeTarget}, S0) ->
{gulp, Items} = gsc_signal:gulp_block_items(S0),
[s2t(TreeTarget, I) || I <- Items].
-spec s2s_slurp_meta(InitMeta, Signal) -> Result when
InitMeta :: Meta,
Signal :: [tk()],
Result :: {slurp, Meta, NewSignal},
Meta :: ast_meta(),
NewSignal :: Signal.
s2s_slurp_meta(M = #ct{}, S) ->
s2s_sm_ct(M, S);
s2s_slurp_meta(M = #decl_type{}, S) ->
s2s_sm_decl_type(M, S);
s2s_slurp_meta(M, S) ->
error({s2s_slurp_meta, M, S}).
s2s_sm_ct(Ct = #ct{payable = none}, S0) ->
case S0 of
[#tk{str = "payable"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{payable = {true, T0}}, S1);
_ ->
s2s_sm_ct(Ct#ct{payable = false}, S0)
end;
s2s_sm_ct(Ct = #ct{main = none}, S0) ->
case S0 of
[#tk{str = "main"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{main = {true, T0}}, S1);
_ ->
s2s_sm_ct(Ct#ct{main = false}, S0)
end;
s2s_sm_ct(Ct = #ct{contract = none}, S0) ->
case S0 of
[#tk{str = "contract"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{contract = T0}, S1);
_ ->
error({no_kwd_contract, Ct, S0})
end;
s2s_sm_ct(Ct = #ct{con = none}, S0) ->
case S0 of
[#tk{shape = con} = T0 | S1] ->
s2s_sm_ct(Ct#ct{con = T0}, S1);
_ ->
error({no_contract_name, Ct, S0})
end;
s2s_sm_ct(Ct = #ct{impls = none}, S0) ->
case gsc_tokens:strings(1, S0) of
[":"] ->
{slurp, Impls, S1} = s2f_slurp_impls(S0),
s2s_sm_ct(Ct#ct{impls = Impls}, S1);
_ ->
s2s_sm_ct(Ct#ct{impls = []}, S0)
end;
s2s_sm_ct(Ct = #ct{eq = none}, S0) ->
case S0 of
[#tk{str = "="} = T0 | S1] ->
s2s_sm_ct(Ct#ct{eq = T0}, S1);
_ ->
error({no_equal_sign, Ct, S0})
end;
s2s_sm_ct(Ct, S0) ->
{slurp, Ct, S0}.
s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0], S0).
s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0 | Stk], S0);
s2f_slurp_impls(Stk, S0) ->
{slurp, lists:reverse(Stk), S0}.
%-record(decl_type,
% {type = none :: none | tk(),
% id = none :: none | tk(),
% params = none :: none | [tk()],
% eq = none :: none | tk()}).
s2s_sm_decl_type(M = #decl_type{type = none}, S0) ->
case S0 of
[#tk{str = "type"} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{type = T0}, S1);
_ ->
error({no_kwd_type, S0})
end;
s2s_sm_decl_type(M = #decl_type{id = none}, S0) ->
case S0 of
[#tk{shape = id} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{id = T0}, S1);
_ ->
error({no_type_id, S0})
end;
s2s_sm_decl_type(M = #decl_type{params = none}, S0) ->
case S0 of
[#tk{str = "("} = T0 | _] ->
error({fixme, parens_bad});
_ ->
s2s_sm_decl_type(M#decl_type{params = []}, S0)
end;
s2s_sm_decl_type(M = #decl_type{eq = none}, S0) ->
case S0 of
[#tk{str = "="} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{eq = T0}, S1);
_ ->
error({no_equal_sign, S0})
end;
s2s_sm_decl_type(M, S0) ->
{slurp, M, S0}.
+112 -77
View File
@@ -6,6 +6,21 @@
-include("$gsc_include/gsc.hrl").
% just parsing type expressions right now, so only need
% to worry about round parens
%
% none is to indicate general-purpose grouping, for
% e.g. LHS/RHS of an op
-type syntax_meta()
:: {op, tk()}
| op_arg
| {parens, Open :: tk(), Close :: tk()}
.
-type ast() :: ntree(syntax_meta(), tk()).
-type asf() :: nforest(syntax_meta(), tk()).
-type asts() :: asf().
main() ->
x00(),
@@ -17,93 +32,113 @@ x00() ->
io:format(" SrcStr = ~p~n", [x00_src()]),
io:format(" Tokens = ~p~n", [x00_tks()]),
io:format(" Signal = ~p~n", [x00_sgl()]),
io:format(" Tree0 = ~p~n", [x00_tree0()]),
io:format(" Forest = ~p~n", [x00_fst()]),
ok.
% sample type expr, tokens, signal
x00_src() -> "foo => bar * baz".
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
x00_sgl() -> gsc:filter_signal(x00_tks()).
x00_tree0() -> mktree(x00_sgl()).
% records copypasta for now
-record(ns, {val :: any(), kids :: list(any())}).
-record(nl, {val :: any()}).
-type ntree(X, Y) :: gsc_ntree:ntree(X, Y).
-type ntree() :: gsc_ntree:ntree().
-type ast_stem_t() :: vtokens
| {op, tk()}
.
-type ast() :: ntree(ast_stem_t(), tk()).
x00_src() -> "(foo => (bar) * baz)".
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
x00_sgl() -> gsc:filter_signal(x00_tks()).
x00_fst() -> parse(x00_sgl()).
-spec mktree(Signal) -> Tree when
Signal :: gsc:signal(),
Tree :: gsc_ntree:ntree().
-spec parse(Signal) -> ASF when
Signal :: [tk()],
ASF :: asf().
% @doc make into a tree
mktree(Sig) ->
Tree0 = gsc_ntree:nstem(vtokens, Sig),
Tree1 = rerootl_tkstr("=>", Tree0),
Tree2 = rerootl_tkstr("*", Tree1),
Tree2.
parse(Signal) ->
% key insight here is our signal is already a
% forest, assuming the leaf type is `tk()`.
%
% our parser is a sequence of forest-to-forest
% transformers.
%
% at the end we should end up with just one tree (i
% think)?
F0 = Signal,
F1 = f2f_parens(F0),
F2 = f2f_op("=>", F1),
F3 = f2f_op("*", F2),
Result = F2,
Result.
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
Kids0 = gsc_ntree:deleaf0(Tree0),
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
case lists:splitwith(IsntS, Kids0) of
% found
% input:
% *s Root0
% |
% +-- .l Foo
% +-- .l "=>"
% +-- .l Bar
% output:
% *s "=>"
% |
% +-- *s Root0 -- .l Foo
% +-- *s Root0 -- .l Bar
{LHS1, [Tk0 | RHS1]} ->
Root1 = Root0,
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
NewRoot0 = {op, Tk0},
NewKids0 = [LTree1, RTree1],
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
NewTree;
% not found, nothing to do
{Kids0, []} ->
Tree0
end.
f2f_op(OpStr, Fst) ->
f2f_op(OpStr, [], Fst).
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
% Kids0 = gsc_ntree:deleaf0(Tree0),
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
% case lists:splitwith(IsntMapsto, Kids0) of
% % found
% {LHS1, [Tk0 | RHS1]} ->
% Root1 = Root0,
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
% NewRoot0 = {op, Tk0},
% NewKids0 = [LTree1, RTree1],
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
% NewTree;
% % nothing to do
% {Kids0, []} ->
% Tree0
% end.
% never saw the op
f2f_op(_opstr, Stk, []) ->
lists:reverse(Stk);
% see op
f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) ->
Lhf = lists:reverse(LhsStk),
Rhf = f2f_op(OpStr, Rest),
Lht = #ns{meta = op_arg, kids = Lhf},
Rht = #ns{meta = op_arg, kids = Rhf},
ResultT = #ns{meta = {op, OpTk},
kids = [Lht, Rht]},
ResultF = [ResultT],
ResultF;
% see stem, descend
f2f_op(OpStr, LhsStk, [Ns = #ns{kids = NsKids} | Rest]) ->
NewNsKids = f2f_op(OpStr, NsKids),
NewNs = Ns#ns{kids = NewNsKids},
NewStk = [NewNs | LhsStk],
f2f_op(OpStr, NewStk, Rest);
% see leaf, just add
f2f_op(OpStr, Stk, [L | Rest]) ->
f2f_op(OpStr, [L | Stk], Rest).
-spec f2f_parens(Forest) -> NewForest when
Forest :: asts(),
NewForest :: Forest.
% @doc
% recursive parens decomposition
%
% the input here is the flat list of tokens. here we
% basically replace the string of tokens between `(`
% and `)` with a single tree
%
% interesting quirk is that this doesn't error on too
% many close parens, only too many open parens
f2f_parens(Fst) ->
f2f_parens([], Fst).
% done
f2f_parens(Stk, []) ->
lists:reverse(Stk);
% crawl down the forest and scan for open parens
% open paren, we descend
f2f_parens(Stk, [#tk{str = "("} = TkOpen | Rest0]) ->
InitMeta = {parens, TkOpen, none},
{slurp, PStem, Rest1} = slurp_pstem(InitMeta, [], Rest0),
NewStk = [PStem | Stk],
f2f_parens(NewStk, Rest1);
% something else, we continue
f2f_parens(Stk, [Tree | Rest]) ->
f2f_parens([Tree | Stk], Rest).
isnt_str(X, Y) ->
not is_str(X, Y).
is_str(S, #tk{str = S}) -> true;
is_str(_, _) -> false.
% ran out of tokens before close paren
slurp_pstem({parens, TkOpen, none}, Stk, []) ->
error({no_close_for, TkOpen, Stk});
% hit close paren, we done
slurp_pstem({parens, TkOpen, none}, Stk, [TkClose = #tk{str = ")"} | Rest]) ->
FinalMeta = {parens, TkOpen, TkClose},
Midsection = lists:reverse(Stk),
FinalTree = #ns{meta = FinalMeta,
kids = Midsection},
{slurp, FinalTree, Rest};
% hit open paren, we recurse
slurp_pstem(AccMeta, Stk, [TkOpen_II = #tk{str = "("} | Rest0]) ->
InitMeta_II = {parens, TkOpen_II, none},
{slurp, PStem_II, Rest1} = slurp_pstem(InitMeta_II, [], Rest0),
NewStk = [PStem_II | Stk],
slurp_pstem(AccMeta, NewStk, Rest1);
% hit something else, we move along
slurp_pstem(AccMeta, Stk, [Tree | Rest]) ->
slurp_pstem(AccMeta, [Tree | Stk], Rest).
+3 -1
View File
@@ -3,7 +3,7 @@
-export([
ct_dir/0,
ct_file/1
ct_file/1, ct_file_abspath/1
]).
@@ -14,6 +14,8 @@ ct_dir() ->
zx_daemon:get_home() ++ "/ct".
ct_file_abspath(Name) ->
ct_file(Name).
-spec ct_file(Name) -> AbsPath when
Name :: string(),
+32
View File
@@ -143,3 +143,35 @@
| #gsc_err_nyi{}
| #gsc_err_empty_file{}
| #gsc_err{}.
%----------------------------
% tree type for parsing
%----------------------------
% @doc stem record
-record(ns, {meta :: any(),
kids :: list(any())}).
% @doc `ntree(S, L)' is a "node tree" (meaning stems
% have values and children)
%
% for the purposes of the compiler, the key observation
% is that a flat list of tokens is already a forest
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
| L.
% @doc forest is just a list of trees
-type nforest(S, L) :: [ntree(S, L)].
% aliases
-type nt(S, L) :: ntree(S, L).
-type nf(S, L) :: nforest(S, L).
-type ntree() :: ntree(any(), any()).
-type nforest() :: [ntree()].
-type nt() :: ntree().
-type nf() :: nforest().
+15 -27
View File
@@ -1,41 +1,29 @@
% @doc bikeshed proctrastination head into vim warmup thing
% @doc bikeshed proctrastination head into vim warmup
% thing
%
% sophia compiler from scratch by PRH
%
% based on original sophia compiler
%
% parse layers:
% 1. gsc_tokens: SrcStr -> (Tokens | SigTokens)
%
% SigTokens = not comment/whitespace
%
% layers:
% a. gsc_strmatch : matches string shapes
% b. gso_scan : converts to so_scan shapes
%
%
% terminology:
%
% - `slurp`/`barf` borrowed from emacs paredit mode:
%
% slurp : (a b) c -> (a b c)
% barf : (a b c) -> a (b c)
%
% * `slurp` usually involves *transforming* input
% into a new type (e.g. slurp a token from src
% string); think of slurp as a verb meaning to
% consume and then digest
% * `barf` basically means blindly splitting off
% input
%
% based on original sophia compiler; target for version
% 0.1 is to match behavior exactly
% @end
-module(gsc).
% token and tokens
-export_type([
token/0,
signal/0
]).
% syntax tree/forest wrapper type
-export_type([
ntree/2, ntree/0,
nforest/2, nforest/0,
nt/2, nt/0,
nf/2, nf/0
]).
-export([
unsafe_tokens_from_file/1,
unsafe_tokens_from_string/1,
+37 -109
View File
@@ -1,127 +1,55 @@
-module(gsc_ntree).
-export_type([
ntree/2,
ntree/0
]).
-export([
nstem/2,
flatten/1,
deleaf0/1,
releaf0/2
nstem/2, meta/1, kids/1,
flatten_tree/1, flatten_forest/1
]).
-include("$gsc_include/gsc.hrl").
%%=====================================================
%% API: types
%%=====================================================
-record(ns, {val :: any(), kids :: list(any())}).
-record(nl, {val :: any()}).
%% @doc ntree(S, L) is a "node tree" (meaning stems
%% have values and children)
-type ntree(S, L)
:: #ns{val :: S, kids :: [ntree(S, L)]}
| #nl{val :: L}.
-type ntree() :: ntree(any(), any()).
%%=====================================================
%% API: functions
%%=====================================================
-spec nstem(Root, List) -> Tree when
Root :: X,
List :: list(Y),
Tree :: ntree(X, Y),
X :: any(),
Y :: any().
% @doc
% You *probably* want `releaf0/2' instead.
%
% This function naively wraps each element in the list
% in a leaf type, even if it's already wrapped.
%
% nstem(root, [Foo, Bar, Baz]) ~>
% *s root
% |
% +--- .l Foo
% |
% +--- .l Bar
% |
% +--- .l Baz
%
% Much more common use case is to releaf only the input
% nodes which are not already wrapped, which is what
% `releaf0/2' does.
% @end
nstem(Root, List) ->
{ns, Root, [{nl, Y} || Y <- List]}.
-spec flatten(Tree) -> LeafVals when
Tree :: ntree(any(), LeafType),
LeafVals :: [LeafType],
LeafType :: any().
flatten({nl, X}) ->
[X];
flatten({ns, _, Keeids}) ->
lists:flatten([flatten(Keeid) || Keeid <- Keeids]).
-spec deleaf0(Tree) -> Result when
Tree :: ntree(S, L),
Result :: [L | Tree],
S :: any(),
L :: any().
% @doc unwrap the leaf children, and leave the stem
% children intact
%
% ex. 1:
% (+ 1 2 (* 3 4) 5)
% ~> '(1 2 (* 3 4) 5)
%
% ex. 2:
% {ns, '+', [{nl, 1},
% {nl, 2},
% {ns, '*', [{nl, 3}, {nl, 4}]},
% {nl, 5}]}
% ~> [1, 2, {ns, '*', [{nl, 3}, {nl, 4}]}, 5]
% @end
deleaf0({nl, L}) -> [L];
deleaf0({ns, _, Ls}) -> dl0([], Ls).
dl0(Stk, []) -> lists:reverse(Stk);
dl0(Stk, [{nl, X} | Rest]) -> dl0([X | Stk], Rest);
dl0(Stk, [X | Rest]) -> dl0([X | Stk], Rest).
-spec releaf0(Root, Keeids) -> Rooted when
-spec nstem(Root, Forest) -> Tree when
Root :: S,
Keeids :: [L | ntree(S, L)],
Rooted :: ntree(S, L),
Forest :: nforest(S, L),
Tree :: ntree(S, L),
S :: any(),
L :: any().
% @doc notional inverse of `deleaf0/1'
%
% Note that this does **NOT** double-wrap leafs in the
% input
releaf0(Root, Ks) ->
#ns{val = Root,
kids = lists:map(fun rl0/1, Ks)}.
nstem(Root, List) ->
{ns, Root, List}.
rl0(X = #ns{}) -> X;
rl0(X = #nl{}) -> X;
rl0(X) -> {nl, X}.
meta(#ns{meta = M}) -> M.
kids(#ns{kids = K}) -> K.
-spec flatten_tree(Tree) -> Leafs when
Tree :: ntree(_, L),
Leafs :: [L],
L :: any().
flatten_tree(T) ->
lists:flatten(ft(T)).
-spec flatten_forest(Forest) -> Leafs when
Forest :: nforest(_, L),
Leafs :: [L],
L :: any().
flatten_forest(F) ->
lists:flatten(ff(F)).
ft(#ns{kids = F}) -> ff(F);
ft(Leaf) -> [Leaf].
ff(F) ->
[ft(T) || T <- F].
+111
View File
@@ -0,0 +1,111 @@
% signal = non-noisy tokens
-module(gsc_signal).
-export([
from_tokens/1,
is_block/1,
gulp_block_items/1,
block_to_items/1,
take_block_item/1
]).
-include("$gsc_include/gsc.hrl").
-spec from_tokens(Tokens) -> Signal when
Tokens :: [tk()],
Signal :: [tk()].
% @doc filter out comments/whitespace
from_tokens(Tokens) ->
gsc_tokens:filter_significant(Tokens).
-spec is_block(Signal) -> Result when
Signal :: [tk()],
Result :: boolean().
is_block([]) ->
true;
is_block([#tk{pos = {_, BCol}} | Rest]) ->
InBlock =
fun(#tk{pos = {_, TCol}}) ->
BCol =< TCol
end,
lists:all(InBlock, Rest).
-spec gulp_block_items(Signal) -> Result when
Signal :: [tk()],
Result :: {slurp, Items, NewSignal}
| {error, any()},
Items :: [Signal],
NewSignal :: Signal.
gulp_block_items(S) ->
case is_block(S) of
true -> {gulp, block_to_items(S)};
false -> find_badness(S)
end.
find_badness([#tk{pos = {_, StartCol}} = StartTk | Rest]) ->
find_badness(StartCol, StartTk, Rest).
find_badness(StartCol, StartTk, [#tk{pos = {_, TkCol}} = Tk | Rest]) ->
Bad = TkCol < StartCol,
case Bad of
false -> find_badness(StartCol, StartTk, Rest);
true -> {error, {bad_block, [{start_col, StartCol},
{end_col, TkCol},
{start_tk, StartTk},
{end_tk, Tk}]}}
end.
-spec block_to_items(Signal) -> BlockItems when
Signal :: [tk()],
BlockItems :: [Signal].
% @doc
% naive algorithm, so doesn't ensure all block items
% are same indent level
%
% Input:
% foo = ...
% bar = ...
% baz = ...
%
% Output:
% [foo = ...,
% bar = ...,
% baz = ...]
block_to_items([]) ->
[];
block_to_items(S) ->
b2is([], S).
b2is(Acc, []) ->
lists:reverse(Acc);
b2is(Acc, S) ->
{Item, S1} = take_block_item(S),
b2is([Item | Acc], S1).
-spec take_block_item(Signal) -> Result when
Signal :: [tk()],
Result :: {Item, NewSignal},
Item :: Signal,
NewSignal :: Signal.
take_block_item([]) ->
{[], []};
take_block_item([#tk{pos = {_, ICol}} = T0 | S0]) ->
InItem =
fun(#tk{pos = {_, TCol}}) ->
ICol < TCol
end,
{S0_II, S1} = lists:splitwith(InItem, S0),
{[T0 | S0_II], S1}.