Compare commits
9 Commits
ef69016294
..
master
| Author | SHA1 | Date | |
|---|---|---|---|
| f04b7311f5 | |||
| 10424927b1 | |||
| fdb40dcb92 | |||
| e180dc955d | |||
| 4e54bebeba | |||
| 4f4adaa284 | |||
| 2c36a02331 | |||
| 5cae022b8b | |||
| dfb158e593 |
+12
-7
@@ -1,10 +1,15 @@
|
||||
# TODONE
|
||||
|
||||
# TODO
|
||||
|
||||
- barf for outputs, slurp for inputs
|
||||
- architecture needs more careful thought but only after something works
|
||||
- architecture needs more careful thought but only after something
|
||||
works
|
||||
- too fuzzy right now
|
||||
- possibly:
|
||||
- rename parser layers sequentially:
|
||||
- gsc_
|
||||
- undo gs_ naming fuckery.. everything is `gsc_*`. it's just
|
||||
needlessly confusing. for now let's name new things gsc_* and then
|
||||
go back and undo the stupidity
|
||||
|
||||
# TONOTDO
|
||||
|
||||
- barf for outputs, slurp for inputs
|
||||
- rename parser layers sequentially
|
||||
|
||||
# TODONE
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
|
||||
-spec s2t_file(Signal) -> AstFile when
|
||||
Signal :: [tk()],
|
||||
AstFile :: #ns{meta :: file, kids :: asf()}.
|
||||
|
||||
s2t_file([]) ->
|
||||
error(empty_file);
|
||||
s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) ->
|
||||
Blk0 = s2t_gulp_block(FileCol, S0),
|
||||
Blk1 = t2t_parse_tds_in_block(Blk0),
|
||||
#ns{meta = file, kids = [Blk1]}.
|
||||
|
||||
-spec s2t_gulp_block(BlkCol, Signal) -> Block when
|
||||
BlkCol :: pos_integer(),
|
||||
Signal :: [tk()],
|
||||
Block :: #ns{meta :: block}.
|
||||
|
||||
s2t_gulp_block(BCol, Tks) ->
|
||||
% sanity check
|
||||
InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end,
|
||||
true = lists:all(InBlock, Tks),
|
||||
BlockItems = s2f_block_items(BCol, Tks),
|
||||
#ns{meta = block, kids = BlockItems}.
|
||||
|
||||
-spec s2f_block_items(BCol, Signal) -> BlkItems when
|
||||
BCol :: pos_integer(),
|
||||
Signal :: [tk()],
|
||||
BlkItems :: [BlkItem],
|
||||
BlkItem :: #ns{meta :: block_item,
|
||||
kids :: asf()}.
|
||||
|
||||
s2f_block_items(BCol, Signal) ->
|
||||
s2f_block_items(BCol, [], Signal).
|
||||
|
||||
|
||||
s2f_block_items(_BCol, Stk, []) ->
|
||||
lists:reverse(Stk);
|
||||
s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) ->
|
||||
{slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0),
|
||||
s2f_block_items(BCol, [BlkItem | Stk], F1).
|
||||
|
||||
|
||||
s2t_slurp_block_item(BCol, T0, F0) ->
|
||||
{ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0),
|
||||
Item = #ns{meta = block_item, kids = ItemTokens},
|
||||
{slurp, Item, F1}.
|
||||
|
||||
% sw = splitwith; kind of take/drop
|
||||
s2s_sw_block_item(BCol, T0, F0) ->
|
||||
InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end,
|
||||
{F0_II, F1} = lists:splitwith(InItem, F0),
|
||||
{[T0 | F0_II], F1}.
|
||||
|
||||
-spec t2t_parse_tds_in_block(Block0) -> Block1 when
|
||||
Block0 :: ast(),
|
||||
Block1 :: ast().
|
||||
|
||||
% go through and convert the block_item nodes to top
|
||||
% decls
|
||||
t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) ->
|
||||
F1 = lists:map(fun t2t_parse_td_from_item/1, F0),
|
||||
B0#ns{kids = F1}.
|
||||
|
||||
|
||||
-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when
|
||||
BlockItem :: #ns{meta :: block_item},
|
||||
TopDecl :: #ns{meta :: td_meta()}.
|
||||
|
||||
t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) ->
|
||||
s2t_top_decl(Signal).
|
||||
|
||||
|
||||
-spec s2t_top_decl(Signal) -> TdTree when
|
||||
Signal :: [tk()],
|
||||
TdTree :: ast().
|
||||
|
||||
s2t_top_decl(S0) ->
|
||||
@@ -0,0 +1,292 @@
|
||||
# Syntax
|
||||
|
||||
## Lexical syntax
|
||||
|
||||
### Comments
|
||||
|
||||
Single line comments start with `//` and block comments are enclosed in `/*`
|
||||
and `*/` and can be nested.
|
||||
|
||||
### Keywords
|
||||
|
||||
```
|
||||
contract include let switch type record datatype if elif else function
|
||||
stateful payable true false mod public entrypoint private indexed namespace
|
||||
interface main using as for hiding
|
||||
```
|
||||
|
||||
### Tokens
|
||||
|
||||
- `Id = [a-z_][A-Za-z0-9_']*` identifiers start with a lower case letter.
|
||||
- `Con = [A-Z][A-Za-z0-9_']*` constructors start with an upper case letter.
|
||||
- `QId = (Con\.)+Id` qualified identifiers (e.g. `Map.member`)
|
||||
- `QCon = (Con\.)+Con` qualified constructor
|
||||
- `TVar = 'Id` type variable (e.g `'a`, `'b`)
|
||||
- `Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` integer literal with optional `_` separators
|
||||
- `Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` byte array literal with optional `_` separators
|
||||
- `String` string literal enclosed in `"` with escape character `\`
|
||||
- `Char` character literal enclosed in `'` with escape character `\`
|
||||
- `AccountAddress` base58-encoded 32 byte account pubkey with `ak_` prefix
|
||||
- `ContractAddress` base58-encoded 32 byte contract address with `ct_` prefix
|
||||
- `Signature` base58-encoded 64 byte cryptographic signature with `sg_` prefix
|
||||
|
||||
Valid string escape codes are
|
||||
|
||||
| Escape | ASCII | |
|
||||
|---------------|-------------|---|
|
||||
| `\b` | 8 | |
|
||||
| `\t` | 9 | |
|
||||
| `\n` | 10 | |
|
||||
| `\v` | 11 | |
|
||||
| `\f` | 12 | |
|
||||
| `\r` | 13 | |
|
||||
| `\e` | 27 | |
|
||||
| `\xHexDigits` | *HexDigits* | |
|
||||
|
||||
|
||||
See the [identifier encoding scheme](https://git.qpq.swiss/QPQ-AG/protocol/src/branch/master/node/api/api_encoding.md) for the
|
||||
details on the base58 literals.
|
||||
|
||||
## Layout blocks
|
||||
|
||||
Sophia uses Python-style layout rules to group declarations and statements. A
|
||||
layout block with more than one element must start on a separate line and be
|
||||
indented more than the currently enclosing layout block. Blocks with a single
|
||||
element can be written on the same line as the previous token.
|
||||
|
||||
Each element of the block must share the same indentation and no part of an
|
||||
element may be indented less than the indentation of the block. For instance
|
||||
|
||||
```sophia
|
||||
contract Layout =
|
||||
function foo() = 0 // no layout
|
||||
function bar() = // layout block starts on next line
|
||||
let x = foo() // indented more than 2 spaces
|
||||
x
|
||||
+ 1 // the '+' is indented more than the 'x'
|
||||
```
|
||||
|
||||
## Notation
|
||||
|
||||
In describing the syntax below, we use the following conventions:
|
||||
|
||||
- Upper-case identifiers denote non-terminals (like `Expr`) or terminals with
|
||||
some associated value (like `Id`).
|
||||
- Keywords and symbols are enclosed in single quotes: `'let'` or `'='`.
|
||||
- Choices are separated by vertical bars: `|`.
|
||||
- Optional elements are enclosed in `[` square brackets `]`.
|
||||
- `(` Parentheses `)` are used for grouping.
|
||||
- Zero or more repetitions are denoted by a postfix `*`, and one or more
|
||||
repetitions by a `+`.
|
||||
- `Block(X)` denotes a layout block of `X`s.
|
||||
- `Sep(X, S)` is short for `[X (S X)*]`, i.e. a possibly empty sequence of `X`s
|
||||
separated by `S`s.
|
||||
- `Sep1(X, S)` is short for `X (S X)*`, i.e. same as `Sep`, but must not be empty.
|
||||
|
||||
|
||||
## Declarations
|
||||
|
||||
A Sophia file consists of a sequence of *declarations* in a layout block.
|
||||
|
||||
```c
|
||||
File ::= Block(TopDecl)
|
||||
|
||||
TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
| 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||
| 'namespace' Con '=' Block(Decl)
|
||||
| '@compiler' PragmaOp Version
|
||||
| 'include' String
|
||||
| Using
|
||||
|
||||
Implement ::= ':' Sep1(Con, ',')
|
||||
|
||||
Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
| 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||
| 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||
| 'let' Id [':' Type] '=' Expr
|
||||
| (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
| Using
|
||||
|
||||
FunDecl ::= Id ':' Type // Type signature
|
||||
| Id Args [':' Type] '=' Block(Stmt) // Definition
|
||||
| Id Args [':' Type] Block(GuardedDef) // Guarded definitions
|
||||
|
||||
GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
|
||||
|
||||
Using ::= 'using' Con ['as' Con] [UsingParts]
|
||||
UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
|
||||
| 'hiding' '[' Sep1(Id, ',') ']'
|
||||
|
||||
PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
|
||||
Version ::= Sep1(Int, '.')
|
||||
|
||||
EModifier ::= 'payable' | 'stateful'
|
||||
FModifier ::= 'stateful' | 'private'
|
||||
|
||||
Args ::= '(' Sep(Pattern, ',') ')'
|
||||
```
|
||||
|
||||
Contract declarations must appear at the top-level.
|
||||
|
||||
For example,
|
||||
```sophia
|
||||
contract Test =
|
||||
type t = int
|
||||
entrypoint add (x : t, y : t) = x + y
|
||||
```
|
||||
|
||||
There are three forms of type declarations: type aliases (declared with the
|
||||
`type` keyword), record type definitions (`record`) and data type definitions
|
||||
(`datatype`):
|
||||
|
||||
```c
|
||||
TypeAlias ::= Type
|
||||
RecordType ::= '{' Sep(FieldType, ',') '}'
|
||||
DataType ::= Sep1(ConDecl, '|')
|
||||
|
||||
FieldType ::= Id ':' Type
|
||||
ConDecl ::= Con ['(' Sep1(Type, ',') ')']
|
||||
```
|
||||
|
||||
For example,
|
||||
```sophia
|
||||
record point('a) = {x : 'a, y : 'a}
|
||||
datatype shape('a) = Circle(point('a), 'a) | Rect(point('a), point('a))
|
||||
type int_shape = shape(int)
|
||||
```
|
||||
|
||||
## Types
|
||||
|
||||
```c
|
||||
Type ::= Domain '=>' Type // Function type
|
||||
| Type '(' Sep(Type, ',') ')' // Type application
|
||||
| '(' Type ')' // Parens
|
||||
| 'unit' | Sep(Type, '*') // Tuples
|
||||
| Id | QId | TVar
|
||||
|
||||
Domain ::= Type // Single argument
|
||||
| '(' Sep(Type, ',') ')' // Multiple arguments
|
||||
```
|
||||
|
||||
The function type arrow associates to the right.
|
||||
|
||||
Example,
|
||||
```sophia
|
||||
'a => list('a) => (int * list('a))
|
||||
```
|
||||
|
||||
## Statements
|
||||
|
||||
Function bodies are blocks of *statements*, where a statement is one of the following
|
||||
|
||||
```c
|
||||
Stmt ::= 'switch' '(' Expr ')' Block(Case)
|
||||
| 'if' '(' Expr ')' Block(Stmt)
|
||||
| 'elif' '(' Expr ')' Block(Stmt)
|
||||
| 'else' Block(Stmt)
|
||||
| 'let' LetDef
|
||||
| Using
|
||||
| Expr
|
||||
|
||||
LetDef ::= Id Args [':' Type] '=' Block(Stmt) // Function definition
|
||||
| Pattern '=' Block(Stmt) // Value definition
|
||||
|
||||
Case ::= Pattern '=>' Block(Stmt)
|
||||
| Pattern Block(GuardedCase)
|
||||
|
||||
GuardedCase ::= '|' Sep1(Expr, ',') '=>' Block(Stmt)
|
||||
|
||||
Pattern ::= Expr
|
||||
```
|
||||
|
||||
`if` statements can be followed by zero or more `elif` statements and an optional final `else` statement. For example,
|
||||
|
||||
```sophia
|
||||
let x : int = 4
|
||||
switch(f(x))
|
||||
None => 0
|
||||
Some(y) =>
|
||||
if(y > 10)
|
||||
"too big"
|
||||
elif(y < 3)
|
||||
"too small"
|
||||
else
|
||||
"just right"
|
||||
```
|
||||
|
||||
## Expressions
|
||||
|
||||
```c
|
||||
Expr ::= '(' LamArgs ')' '=>' Block(Stmt) // Anonymous function (x) => x + 1
|
||||
| '(' BinOp ')' // Operator lambda (+)
|
||||
| 'if' '(' Expr ')' Expr 'else' Expr // If expression if(x < y) y else x
|
||||
| Expr ':' Type // Type annotation 5 : int
|
||||
| Expr BinOp Expr // Binary operator x + y
|
||||
| UnOp Expr // Unary operator ! b
|
||||
| Expr '(' Sep(Expr, ',') ')' // Application f(x, y)
|
||||
| Expr '.' Id // Projection state.x
|
||||
| Expr '[' Expr ']' // Map lookup map[key]
|
||||
| Expr '{' Sep(FieldUpdate, ',') '}' // Record or map update r{ fld[key].x = y }
|
||||
| '[' Sep(Expr, ',') ']' // List [1, 2, 3]
|
||||
| '[' Expr '|' Sep(Generator, ',') ']'
|
||||
// List comprehension [k | x <- [1], if (f(x)), let k = x+1]
|
||||
| '[' Expr '..' Expr ']' // List range [1..n]
|
||||
| '{' Sep(FieldUpdate, ',') '}' // Record or map value {x = 0, y = 1}, {[key] = val}
|
||||
| '(' Expr ')' // Parens (1 + 2) * 3
|
||||
| '(' Expr '=' Expr ')' // Assign pattern (y = x::_)
|
||||
| Id | Con | QId | QCon // Identifiers x, None, Map.member, AELib.Token
|
||||
| Int | Bytes | String | Char // Literals 123, 0xff, #00abc123, "foo", '%'
|
||||
| AccountAddress | ContractAddress // Chain identifiers
|
||||
| Signature // Signature
|
||||
| '???' // Hole expression 1 + ???
|
||||
|
||||
Generator ::= Pattern '<-' Expr // Generator
|
||||
| 'if' '(' Expr ')' // Guard
|
||||
| LetDef // Definition
|
||||
|
||||
LamArgs ::= '(' Sep(LamArg, ',') ')'
|
||||
LamArg ::= Id [':' Type]
|
||||
|
||||
FieldUpdate ::= Path '=' Expr
|
||||
Path ::= Id // Record field
|
||||
| '[' Expr ']' // Map key
|
||||
| Path '.' Id // Nested record field
|
||||
| Path '[' Expr ']' // Nested map key
|
||||
|
||||
BinOp ::= '||' | '&&' | '<' | '>' | '=<' | '>=' | '==' | '!='
|
||||
| '::' | '++' | '+' | '-' | '*' | '/' | 'mod' | '^'
|
||||
| 'band' | 'bor' | 'bxor' | '<<' | '>>' | '|>'
|
||||
UnOp ::= '-' | '!' | 'bnot'
|
||||
```
|
||||
|
||||
## Operators types
|
||||
|
||||
| Operators | Type
|
||||
| --- | ---
|
||||
| `-` `+` `*` `/` `mod` `^` | arithmetic operators
|
||||
| `!` `&&` `\|\|` | logical operators
|
||||
| `band` `bor` `bxor` `bnot` `<<` `>>` | bitwise operators
|
||||
| `==` `!=` `<` `>` `=<` `>=` | comparison operators
|
||||
| `::` `++` | list operators
|
||||
| `\|>` | functional operators
|
||||
|
||||
## Operator precedence
|
||||
|
||||
In order of highest to lowest precedence.
|
||||
|
||||
| Operators | Associativity
|
||||
| --- | ---
|
||||
| `!` `bnot`| right
|
||||
| `^` | left
|
||||
| `*` `/` `mod` | left
|
||||
| `-` (unary) | right
|
||||
| `+` `-` | left
|
||||
| `<<` `>>` | left
|
||||
| `::` `++` | right
|
||||
| `<` `>` `=<` `>=` `==` `!=` | none
|
||||
| `band` | left
|
||||
| `bxor` | left
|
||||
| `bor` | left
|
||||
| `&&` | right
|
||||
| `\|\|` | right
|
||||
| `\|>` | left
|
||||
@@ -0,0 +1,67 @@
|
||||
-spec mktree(Signal) -> Tree when
|
||||
Signal :: gsc:signal(),
|
||||
Tree :: gsc_ntree:ntree().
|
||||
|
||||
% @doc make into a tree
|
||||
mktree(Sig) ->
|
||||
Tree0 = gsc_ntree:nstem(vtokens, Sig),
|
||||
Tree1 = rerootl_tkstr("=>", Tree0),
|
||||
Tree2 = rerootl_tkstr("*", Tree1),
|
||||
Tree2.
|
||||
|
||||
|
||||
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
|
||||
Kids0 = gsc_ntree:deleaf0(Tree0),
|
||||
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
|
||||
case lists:splitwith(IsntS, Kids0) of
|
||||
% found
|
||||
% input:
|
||||
% *s Root0
|
||||
% |
|
||||
% +-- .l Foo
|
||||
% +-- .l "=>"
|
||||
% +-- .l Bar
|
||||
% output:
|
||||
% *s "=>"
|
||||
% |
|
||||
% +-- *s Root0 -- .l Foo
|
||||
% +-- *s Root0 -- .l Bar
|
||||
{LHS1, [Tk0 | RHS1]} ->
|
||||
Root1 = Root0,
|
||||
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
|
||||
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
|
||||
NewRoot0 = {op, Tk0},
|
||||
NewKids0 = [LTree1, RTree1],
|
||||
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
|
||||
NewTree;
|
||||
% not found, nothing to do
|
||||
{Kids0, []} ->
|
||||
Tree0
|
||||
end.
|
||||
|
||||
|
||||
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
|
||||
% Kids0 = gsc_ntree:deleaf0(Tree0),
|
||||
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
|
||||
% case lists:splitwith(IsntMapsto, Kids0) of
|
||||
% % found
|
||||
% {LHS1, [Tk0 | RHS1]} ->
|
||||
% Root1 = Root0,
|
||||
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
|
||||
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
|
||||
% NewRoot0 = {op, Tk0},
|
||||
% NewKids0 = [LTree1, RTree1],
|
||||
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
|
||||
% NewTree;
|
||||
% % nothing to do
|
||||
% {Kids0, []} ->
|
||||
% Tree0
|
||||
% end.
|
||||
|
||||
|
||||
|
||||
isnt_str(X, Y) ->
|
||||
not is_str(X, Y).
|
||||
|
||||
is_str(S, #tk{str = S}) -> true;
|
||||
is_str(_, _) -> false.
|
||||
+68
-5
@@ -47,6 +47,8 @@ do(["list", "tests"]) ->
|
||||
do_tlist();
|
||||
do(["test"]) ->
|
||||
do_tests();
|
||||
do(["test" | Tests]) ->
|
||||
do_tests(Tests);
|
||||
do(["tests"]) ->
|
||||
do_tests();
|
||||
do(["run", "tests"]) ->
|
||||
@@ -75,18 +77,79 @@ do(Args) ->
|
||||
|
||||
do_doi() ->
|
||||
FP = zx:get_home() ++ "/priv/doi.txt",
|
||||
Cmd = "less " ++ FP,
|
||||
io:format("~s~n", [Cmd]).
|
||||
page_file(FP).
|
||||
|
||||
% thank you chatgpt
|
||||
% os:cmd didnt do nuffin because that's for running
|
||||
% stuff in the background and capturing the output, not
|
||||
% for taking over the screen
|
||||
page_file(FilePath) ->
|
||||
Less = os:find_executable("less"),
|
||||
case Less of
|
||||
false -> cat_file(FilePath);
|
||||
_ -> less_file(Less, FilePath)
|
||||
end.
|
||||
|
||||
cat_file(FilePath) ->
|
||||
{ok, Bytes} = file:read_file(FilePath),
|
||||
io:format("~ts", [Bytes]).
|
||||
|
||||
less_file(Less, FilePath) ->
|
||||
Port = open_port({spawn_executable, Less},
|
||||
[{args, [FilePath]},
|
||||
nouse_stdio, exit_status]),
|
||||
receive
|
||||
{Port, {exit_status, 0}} ->
|
||||
ok;
|
||||
{Port, {exit_status, N}} ->
|
||||
error({less_exit_status, N});
|
||||
{'EXIT', Port, Reason} ->
|
||||
error(Reason)
|
||||
end.
|
||||
|
||||
do_tests() ->
|
||||
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
|
||||
io:format("TestModules = ~p~n", [test_mods()]),
|
||||
do_runall_tests().
|
||||
|
||||
do_runall_tests() ->
|
||||
lists:foreach(fun run_mod_main/1, test_mods()).
|
||||
|
||||
|
||||
do_tests(List) ->
|
||||
lists:foreach(fun run_test/1, List).
|
||||
|
||||
% n
|
||||
run_test(TestName) ->
|
||||
% we have two candidate atoms
|
||||
C1 = list_to_atom(TestName),
|
||||
C2 = list_to_atom("gsc_test_" ++ TestName),
|
||||
KnownMods = test_mods(),
|
||||
IsC1 = lists:member(C1, KnownMods),
|
||||
IsC2 = lists:member(C2, KnownMods),
|
||||
if
|
||||
IsC1 -> rmm(C1);
|
||||
IsC2 -> rmm(C2);
|
||||
true -> error({no_such_test, TestName})
|
||||
end.
|
||||
|
||||
|
||||
rmm(X) -> run_mod_main(X).
|
||||
|
||||
% KnownTests = test_mods(),
|
||||
% TestMods = ensure_all_known([], List, KnownTests),
|
||||
% lists:foreach(fun run_mod_main/1, TestMods).
|
||||
|
||||
|
||||
%ensure_all_known(Acc, [], _) ->
|
||||
% lists:sort(Acc);
|
||||
%ensure_all_known(Acc, [T | Ts], Knowns) ->
|
||||
% case lists:member(T, Knowns) of
|
||||
%
|
||||
% end.
|
||||
|
||||
|
||||
test_mods() ->
|
||||
known_modules_with_prefix("gs_test").
|
||||
known_modules_with_prefix("gsc_test").
|
||||
|
||||
known_modules_with_prefix(Pfx) ->
|
||||
ModsZipBeamsZipLoaded = code:all_available(),
|
||||
@@ -134,7 +197,7 @@ do_eshell() ->
|
||||
end.
|
||||
|
||||
tokenizers_agree(File) ->
|
||||
so_tokens(File) =:= tokens(File).
|
||||
gso_tokens(File) =:= so_tokens(File).
|
||||
|
||||
|
||||
do_tokens(FilePath) ->
|
||||
|
||||
@@ -0,0 +1,273 @@
|
||||
% @doc experiment centering around the file syntax node using ntree approach
|
||||
-module(gsc_test_file).
|
||||
|
||||
-export([
|
||||
main/0
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
-record(ct,
|
||||
{payable = none :: none | false | {true, tk()},
|
||||
main = none :: none | false | {true, tk()},
|
||||
contract = none :: none | tk(),
|
||||
con = none :: none | tk(),
|
||||
impls = none :: none | [tk()],
|
||||
eq = none :: none | tk()}).
|
||||
|
||||
-type meta() :: #ct{}.
|
||||
|
||||
-record(decl_type,
|
||||
{type = none :: none | tk(),
|
||||
id = none :: none | tk(),
|
||||
params = none :: none | [tk()],
|
||||
eq = none :: none | tk()}).
|
||||
|
||||
-type decl_meta() :: #decl_type{}.
|
||||
|
||||
|
||||
-type ast_meta() :: file
|
||||
| meta()
|
||||
| decl_meta()
|
||||
| nyi
|
||||
| {nyi, any()}
|
||||
.
|
||||
|
||||
|
||||
-type target()
|
||||
:: ct
|
||||
| iface
|
||||
| ns
|
||||
| pragma
|
||||
| include
|
||||
| using
|
||||
.
|
||||
|
||||
-type s2t_target()
|
||||
:: file
|
||||
| top_decl
|
||||
| target()
|
||||
| nyi
|
||||
| {nyi, any()}
|
||||
.
|
||||
|
||||
-type s2f_target()
|
||||
:: {block_of, s2t_target()}
|
||||
.
|
||||
|
||||
|
||||
-type ast() :: ntree(ast_meta(), tk()).
|
||||
-type asf() :: nforest(ast_meta(), tk()).
|
||||
|
||||
main() ->
|
||||
HelloN = "hello.aes",
|
||||
HelloP = ts_utils:ct_file_abspath(HelloN),
|
||||
{ok, HelloS} = file:read_file(HelloP),
|
||||
S0 = gsc:unsafe_signal_from_file(HelloP),
|
||||
T1 = s2t(file, S0),
|
||||
io:format("hello.aes:~n", []),
|
||||
io:format("```~n", []),
|
||||
io:format("~ts", [HelloS]),
|
||||
io:format("```~n~n", []),
|
||||
io:format("AST: ~tp~n", [T1]),
|
||||
ok.
|
||||
|
||||
% // Hello World Contract
|
||||
% // Copyright (c) 2025 QPQ AG
|
||||
%
|
||||
% contract Hello =
|
||||
% type state = unit
|
||||
% entrypoint init(): state =
|
||||
% ()
|
||||
%
|
||||
% entrypoint hello(): string =
|
||||
% "hello, world"
|
||||
|
||||
-spec s2t(ParseTarget, Signal) -> AST when
|
||||
ParseTarget :: file,
|
||||
Signal :: [tk()],
|
||||
AST :: ast().
|
||||
|
||||
% File ::= Block(TopDecl)
|
||||
s2t(file, Signal) ->
|
||||
case Signal of
|
||||
[] -> error(empty_file);
|
||||
_ -> {ns, file, s2f({block_of, top_decl}, Signal)}
|
||||
end;
|
||||
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||
% | 'namespace' Con '=' Block(Decl)
|
||||
% | '@compiler' PragmaOp Version
|
||||
% | 'include' String
|
||||
% | Using
|
||||
s2t(top_decl, Signal) ->
|
||||
NewTarget =
|
||||
case gsc_tokens:strings(3, Signal) of
|
||||
["payable", "contract", "interface"] -> iface;
|
||||
["contract", "interface" | _] -> iface;
|
||||
["payable", "main", "contract"] -> ct;
|
||||
["payable", "contract" | _] -> ct;
|
||||
["contract" | _] -> ct;
|
||||
["namespace" | _] -> namespace;
|
||||
["@compiler" | _] -> pragma;
|
||||
["include" | _] -> include;
|
||||
["using" | _] -> using
|
||||
end,
|
||||
s2t(NewTarget, Signal);
|
||||
% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
s2t(ct, S0) ->
|
||||
{slurp, CtMeta, S1} = s2s_slurp_meta(#ct{}, S0),
|
||||
{ns, CtMeta, s2f({block_of, decl}, S1)};
|
||||
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||
% | 'let' Id [':' Type] '=' Expr
|
||||
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
% | Using
|
||||
s2t(decl, S0) ->
|
||||
NewTarget =
|
||||
case gsc_tokens:strings(3, S0) of
|
||||
["type" | _] -> decl_type;
|
||||
["record" | _] -> decl_record;
|
||||
["datatype" | _] -> decl_datatype;
|
||||
["let" | _] -> decl_let;
|
||||
Pfx3 ->
|
||||
IsEp = lists:member("entrypoint", Pfx3),
|
||||
IsFn = lists:member("function", Pfx3),
|
||||
if
|
||||
IsEp -> decl_entrypoint;
|
||||
IsFn -> decl_function;
|
||||
true -> error({bad_decl, S0})
|
||||
end
|
||||
end,
|
||||
s2t(NewTarget, S0);
|
||||
% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
s2t(decl_type, S0) ->
|
||||
{slurp, Meta, S1} = s2s_slurp_meta(#decl_type{}, S0),
|
||||
{ns, Meta, s2t(type, S1)};
|
||||
s2t(nyi, Signal) ->
|
||||
{ns, nyi, Signal};
|
||||
s2t(NYI = {nyi, _}, Signal) ->
|
||||
{ns, NYI, Signal};
|
||||
s2t(NYI, Signal) ->
|
||||
{ns, {nyi, NYI}, Signal}.
|
||||
|
||||
|
||||
|
||||
-spec s2f(ForestTarget, Signal) -> Forest when
|
||||
ForestTarget :: s2f_target(),
|
||||
Signal :: [tk()],
|
||||
Forest :: asf().
|
||||
|
||||
s2f({block_of, TreeTarget}, S0) ->
|
||||
{gulp, Items} = gsc_signal:gulp_block_items(S0),
|
||||
[s2t(TreeTarget, I) || I <- Items].
|
||||
|
||||
|
||||
-spec s2s_slurp_meta(InitMeta, Signal) -> Result when
|
||||
InitMeta :: Meta,
|
||||
Signal :: [tk()],
|
||||
Result :: {slurp, Meta, NewSignal},
|
||||
Meta :: ast_meta(),
|
||||
NewSignal :: Signal.
|
||||
|
||||
s2s_slurp_meta(M = #ct{}, S) ->
|
||||
s2s_sm_ct(M, S);
|
||||
s2s_slurp_meta(M = #decl_type{}, S) ->
|
||||
s2s_sm_decl_type(M, S);
|
||||
s2s_slurp_meta(M, S) ->
|
||||
error({s2s_slurp_meta, M, S}).
|
||||
|
||||
|
||||
s2s_sm_ct(Ct = #ct{payable = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "payable"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{payable = {true, T0}}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{payable = false}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{main = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "main"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{main = {true, T0}}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{main = false}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{contract = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "contract"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{contract = T0}, S1);
|
||||
_ ->
|
||||
error({no_kwd_contract, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{con = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{shape = con} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{con = T0}, S1);
|
||||
_ ->
|
||||
error({no_contract_name, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{impls = none}, S0) ->
|
||||
case gsc_tokens:strings(1, S0) of
|
||||
[":"] ->
|
||||
{slurp, Impls, S1} = s2f_slurp_impls(S0),
|
||||
s2s_sm_ct(Ct#ct{impls = Impls}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{impls = []}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{eq = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "="} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{eq = T0}, S1);
|
||||
_ ->
|
||||
error({no_equal_sign, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct, S0) ->
|
||||
{slurp, Ct, S0}.
|
||||
|
||||
s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) ->
|
||||
s2f_slurp_impls([I0], S0).
|
||||
|
||||
s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) ->
|
||||
s2f_slurp_impls([I0 | Stk], S0);
|
||||
s2f_slurp_impls(Stk, S0) ->
|
||||
{slurp, lists:reverse(Stk), S0}.
|
||||
|
||||
|
||||
%-record(decl_type,
|
||||
% {type = none :: none | tk(),
|
||||
% id = none :: none | tk(),
|
||||
% params = none :: none | [tk()],
|
||||
% eq = none :: none | tk()}).
|
||||
|
||||
s2s_sm_decl_type(M = #decl_type{type = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "type"} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{type = T0}, S1);
|
||||
_ ->
|
||||
error({no_kwd_type, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{id = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{shape = id} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{id = T0}, S1);
|
||||
_ ->
|
||||
error({no_type_id, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{params = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "("} = T0 | _] ->
|
||||
error({fixme, parens_bad});
|
||||
_ ->
|
||||
s2s_sm_decl_type(M#decl_type{params = []}, S0)
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{eq = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "="} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{eq = T0}, S1);
|
||||
_ ->
|
||||
error({no_equal_sign, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M, S0) ->
|
||||
{slurp, M, S0}.
|
||||
@@ -0,0 +1,144 @@
|
||||
-module(gsc_test_ntree).
|
||||
|
||||
-export([
|
||||
main/0
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
% just parsing type expressions right now, so only need
|
||||
% to worry about round parens
|
||||
%
|
||||
% none is to indicate general-purpose grouping, for
|
||||
% e.g. LHS/RHS of an op
|
||||
-type syntax_meta()
|
||||
:: {op, tk()}
|
||||
| op_arg
|
||||
| {parens, Open :: tk(), Close :: tk()}
|
||||
.
|
||||
|
||||
-type ast() :: ntree(syntax_meta(), tk()).
|
||||
-type asf() :: nforest(syntax_meta(), tk()).
|
||||
-type asts() :: asf().
|
||||
|
||||
|
||||
main() ->
|
||||
x00(),
|
||||
ok.
|
||||
|
||||
% x00 = example00
|
||||
x00() ->
|
||||
io:format("Example 00:~n", []),
|
||||
io:format(" SrcStr = ~p~n", [x00_src()]),
|
||||
io:format(" Tokens = ~p~n", [x00_tks()]),
|
||||
io:format(" Signal = ~p~n", [x00_sgl()]),
|
||||
io:format(" Forest = ~p~n", [x00_fst()]),
|
||||
ok.
|
||||
|
||||
% sample type expr, tokens, signal
|
||||
x00_src() -> "(foo => (bar) * baz)".
|
||||
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
|
||||
x00_sgl() -> gsc:filter_signal(x00_tks()).
|
||||
x00_fst() -> parse(x00_sgl()).
|
||||
|
||||
|
||||
-spec parse(Signal) -> ASF when
|
||||
Signal :: [tk()],
|
||||
ASF :: asf().
|
||||
|
||||
parse(Signal) ->
|
||||
% key insight here is our signal is already a
|
||||
% forest, assuming the leaf type is `tk()`.
|
||||
%
|
||||
% our parser is a sequence of forest-to-forest
|
||||
% transformers.
|
||||
%
|
||||
% at the end we should end up with just one tree (i
|
||||
% think)?
|
||||
F0 = Signal,
|
||||
F1 = f2f_parens(F0),
|
||||
F2 = f2f_op("=>", F1),
|
||||
F3 = f2f_op("*", F2),
|
||||
Result = F2,
|
||||
Result.
|
||||
|
||||
|
||||
f2f_op(OpStr, Fst) ->
|
||||
f2f_op(OpStr, [], Fst).
|
||||
|
||||
|
||||
% never saw the op
|
||||
f2f_op(_opstr, Stk, []) ->
|
||||
lists:reverse(Stk);
|
||||
% see op
|
||||
f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) ->
|
||||
Lhf = lists:reverse(LhsStk),
|
||||
Rhf = f2f_op(OpStr, Rest),
|
||||
Lht = #ns{meta = op_arg, kids = Lhf},
|
||||
Rht = #ns{meta = op_arg, kids = Rhf},
|
||||
ResultT = #ns{meta = {op, OpTk},
|
||||
kids = [Lht, Rht]},
|
||||
ResultF = [ResultT],
|
||||
ResultF;
|
||||
% see stem, descend
|
||||
f2f_op(OpStr, LhsStk, [Ns = #ns{kids = NsKids} | Rest]) ->
|
||||
NewNsKids = f2f_op(OpStr, NsKids),
|
||||
NewNs = Ns#ns{kids = NewNsKids},
|
||||
NewStk = [NewNs | LhsStk],
|
||||
f2f_op(OpStr, NewStk, Rest);
|
||||
% see leaf, just add
|
||||
f2f_op(OpStr, Stk, [L | Rest]) ->
|
||||
f2f_op(OpStr, [L | Stk], Rest).
|
||||
|
||||
|
||||
-spec f2f_parens(Forest) -> NewForest when
|
||||
Forest :: asts(),
|
||||
NewForest :: Forest.
|
||||
% @doc
|
||||
% recursive parens decomposition
|
||||
%
|
||||
% the input here is the flat list of tokens. here we
|
||||
% basically replace the string of tokens between `(`
|
||||
% and `)` with a single tree
|
||||
%
|
||||
% interesting quirk is that this doesn't error on too
|
||||
% many close parens, only too many open parens
|
||||
|
||||
f2f_parens(Fst) ->
|
||||
f2f_parens([], Fst).
|
||||
|
||||
% done
|
||||
f2f_parens(Stk, []) ->
|
||||
lists:reverse(Stk);
|
||||
% crawl down the forest and scan for open parens
|
||||
% open paren, we descend
|
||||
f2f_parens(Stk, [#tk{str = "("} = TkOpen | Rest0]) ->
|
||||
InitMeta = {parens, TkOpen, none},
|
||||
{slurp, PStem, Rest1} = slurp_pstem(InitMeta, [], Rest0),
|
||||
NewStk = [PStem | Stk],
|
||||
f2f_parens(NewStk, Rest1);
|
||||
% something else, we continue
|
||||
f2f_parens(Stk, [Tree | Rest]) ->
|
||||
f2f_parens([Tree | Stk], Rest).
|
||||
|
||||
|
||||
|
||||
% ran out of tokens before close paren
|
||||
slurp_pstem({parens, TkOpen, none}, Stk, []) ->
|
||||
error({no_close_for, TkOpen, Stk});
|
||||
% hit close paren, we done
|
||||
slurp_pstem({parens, TkOpen, none}, Stk, [TkClose = #tk{str = ")"} | Rest]) ->
|
||||
FinalMeta = {parens, TkOpen, TkClose},
|
||||
Midsection = lists:reverse(Stk),
|
||||
FinalTree = #ns{meta = FinalMeta,
|
||||
kids = Midsection},
|
||||
{slurp, FinalTree, Rest};
|
||||
% hit open paren, we recurse
|
||||
slurp_pstem(AccMeta, Stk, [TkOpen_II = #tk{str = "("} | Rest0]) ->
|
||||
InitMeta_II = {parens, TkOpen_II, none},
|
||||
{slurp, PStem_II, Rest1} = slurp_pstem(InitMeta_II, [], Rest0),
|
||||
NewStk = [PStem_II | Stk],
|
||||
slurp_pstem(AccMeta, NewStk, Rest1);
|
||||
% hit something else, we move along
|
||||
slurp_pstem(AccMeta, Stk, [Tree | Rest]) ->
|
||||
slurp_pstem(AccMeta, [Tree | Stk], Rest).
|
||||
@@ -1,5 +1,5 @@
|
||||
% gsc tokenizer tests
|
||||
-module(gs_test_tokens).
|
||||
-module(gsc_test_tokens).
|
||||
|
||||
-export([
|
||||
main/0, ct_dir/0
|
||||
@@ -116,11 +116,10 @@ tokstr_concat_test_() ->
|
||||
|
||||
concat_property(FileName, FilePath) ->
|
||||
%?debugFmt("concat_property(~p, _)", [FileName]),
|
||||
{ok, FileBytes} = file:read_file(FilePath),
|
||||
FileChars = unicode:characters_to_nfc_list(FileBytes),
|
||||
FileChars = gsc:very_stable_file(FilePath),
|
||||
{FileName ++ ": file = sum(tokens)",
|
||||
fun() ->
|
||||
case gsc_tokenizer:tokens(FileChars) of
|
||||
case gsc:tokens_from_file(FileChars) of
|
||||
{ok, SfcTokens} ->
|
||||
ConcatStr = concat_token_strs(SfcTokens, []),
|
||||
?assertEqual(FileChars, ConcatStr);
|
||||
@@ -139,13 +138,15 @@ div_test_() ->
|
||||
% divergence
|
||||
DivFiles = div_files(),
|
||||
%?debugFmt("DivFiles=~p", [DivFiles]),
|
||||
{"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}.
|
||||
{"claude tokenizer divergences fixed",
|
||||
[tokens_match(N, P) || {N, P} <- DivFiles]}.
|
||||
|
||||
tokens_match(FileName, FilePath) ->
|
||||
%?debugFmt("tokens_match(~p, _)", [FileName]),
|
||||
% extracting data to be tested
|
||||
SoTokens = gsc_cli:so_tokens(FilePath),
|
||||
SfTokens = gsc_cli:gso_tokens(FilePath),
|
||||
% i hate this so much but lazy and this is test code so who really cares.
|
||||
SoTokens = so_tokens_from_file(FilePath),
|
||||
SfTokens = gsc:gso_tokens_from_file(FilePath),
|
||||
{FileName ++ ": tokenizers_agree",
|
||||
fun() ->
|
||||
case {SoTokens, SfTokens} of
|
||||
@@ -155,3 +156,10 @@ tokens_match(FileName, FilePath) ->
|
||||
{{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded")
|
||||
end
|
||||
end}.
|
||||
|
||||
% that's right, we have to enter via converting the
|
||||
% bytes in the file to a list... lol
|
||||
so_tokens_from_file(F) ->
|
||||
{ok, Bytes} = file:read_file(F),
|
||||
S = binary_to_list(Bytes),
|
||||
so_scan:scan(S).
|
||||
@@ -0,0 +1,27 @@
|
||||
% testing utilities
|
||||
-module(ts_utils).
|
||||
|
||||
-export([
|
||||
ct_dir/0,
|
||||
ct_file/1, ct_file_abspath/1
|
||||
]).
|
||||
|
||||
|
||||
-spec ct_dir() -> string().
|
||||
|
||||
% directory containing the tests for the tokenizer
|
||||
ct_dir() ->
|
||||
zx_daemon:get_home() ++ "/ct".
|
||||
|
||||
|
||||
ct_file_abspath(Name) ->
|
||||
ct_file(Name).
|
||||
|
||||
-spec ct_file(Name) -> AbsPath when
|
||||
Name :: string(),
|
||||
AbsPath :: string().
|
||||
% @doc
|
||||
% ct_file("foo.aes") -> "/path/to/ct/foo.aes"
|
||||
|
||||
ct_file(Name) ->
|
||||
ct_dir() ++ "/" ++ Name.
|
||||
@@ -143,3 +143,35 @@
|
||||
| #gsc_err_nyi{}
|
||||
| #gsc_err_empty_file{}
|
||||
| #gsc_err{}.
|
||||
|
||||
|
||||
%----------------------------
|
||||
% tree type for parsing
|
||||
%----------------------------
|
||||
|
||||
% @doc stem record
|
||||
-record(ns, {meta :: any(),
|
||||
kids :: list(any())}).
|
||||
|
||||
% @doc `ntree(S, L)' is a "node tree" (meaning stems
|
||||
% have values and children)
|
||||
%
|
||||
% for the purposes of the compiler, the key observation
|
||||
% is that a flat list of tokens is already a forest
|
||||
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
|
||||
| L.
|
||||
|
||||
% @doc forest is just a list of trees
|
||||
-type nforest(S, L) :: [ntree(S, L)].
|
||||
|
||||
|
||||
% aliases
|
||||
|
||||
-type nt(S, L) :: ntree(S, L).
|
||||
-type nf(S, L) :: nforest(S, L).
|
||||
|
||||
-type ntree() :: ntree(any(), any()).
|
||||
-type nforest() :: [ntree()].
|
||||
|
||||
-type nt() :: ntree().
|
||||
-type nf() :: nforest().
|
||||
|
||||
+9
-9
@@ -196,13 +196,13 @@
|
||||
%gulp_file([]) ->
|
||||
% {error, empty_file};
|
||||
%gulp_file(Tokens) ->
|
||||
% case gs_tokens:take_block(Tokens) of
|
||||
% case gsc_tokens:take_block(Tokens) of
|
||||
% {Tokens, []} ->
|
||||
% gulp_block(fun gulp_top_decl/1, Tokens);
|
||||
% %gulp_file2([], [], Tokens);
|
||||
% {A, B} ->
|
||||
% StartPos = gs_tokens:start_pos(A),
|
||||
% ErrPos = gs_tokens:start_pos(B),
|
||||
% StartPos = gsc_tokens:start_pos(A),
|
||||
% ErrPos = gsc_tokens:start_pos(B),
|
||||
% Msg = efmt("gulp_file: block starting at ~p ends at ~p instead of EOF",
|
||||
% [StartPos, ErrPos]),
|
||||
% {error, #parse_error{pos = ErrPos, msg = Msg}}
|
||||
@@ -212,7 +212,7 @@
|
||||
%
|
||||
%%gulp_file2(AccOks, AccErrs, Tokens = [_ | _]) ->
|
||||
%% % ItemTokens will be nonempty
|
||||
%% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens),
|
||||
%% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens),
|
||||
%% case gulp_top_decl(ItemTokens) of
|
||||
%% {gulp, Ok} -> gulp_file2([Ok | AccOks], AccErrs, NewTokens);
|
||||
%% Err -> gulp_file2(AccOks, [Err | AccErrs], NewTokens)
|
||||
@@ -258,7 +258,7 @@
|
||||
%
|
||||
%gulp_block(GulpItem, AccOks, AccErrs, Tokens = [_ | _]) ->
|
||||
% % ItemTokens will be nonempty
|
||||
% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens),
|
||||
% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens),
|
||||
% case GulpItem(ItemTokens) of
|
||||
% {gulp, Ok} -> gulp_block(GulpItem, [Ok | AccOks], AccErrs, NewTokens);
|
||||
% Err -> gulp_block(GulpItem, AccOks, [Err | AccErrs], NewTokens)
|
||||
@@ -284,7 +284,7 @@
|
||||
%% | Using
|
||||
%% @end
|
||||
%gulp_top_decl(DeclTokens) ->
|
||||
% case gs_tokens:strings(3, DeclTokens) of
|
||||
% case gsc_tokens:strings(3, DeclTokens) of
|
||||
% ["payable", "contract", "interface"] ->
|
||||
% gulp_nyi(DeclTokens);
|
||||
% ["contract", "interface" | _] ->
|
||||
@@ -410,7 +410,7 @@
|
||||
%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
%% | Using
|
||||
%gulp_decl(Tokens) ->
|
||||
% case gs_tokens:strings(1, Tokens) of
|
||||
% case gsc_tokens:strings(1, Tokens) of
|
||||
% ["type"] -> gulp_type_alias(Tokens);
|
||||
% _ -> gulp_nyi(Tokens)
|
||||
% end.
|
||||
@@ -611,7 +611,7 @@
|
||||
%% Type1 = {plist, Types} () (foo) (foo, bar)
|
||||
%% | {token, #tk{}} foo Bar.baz 'quux
|
||||
%slurp_type1(Tks) ->
|
||||
% case gs_tokens:slurp_plist(Tks) of
|
||||
% case gsc_tokens:slurp_plist(Tks) of
|
||||
% % head token is NOT open paren -> must be id/qid/tvar
|
||||
% {slurp, [], [Tk | NewTks]} ->
|
||||
% TkType = Tk#tk.type,
|
||||
@@ -633,7 +633,7 @@
|
||||
%
|
||||
%
|
||||
%%slurp_type_expr_plist(Tks) ->
|
||||
%% case gs_tokens:slurp_plist(Tks) of
|
||||
%% case gsc_tokens:slurp_plist(Tks) of
|
||||
%% % head token is NOT open paren -> must be id/qid/tvar
|
||||
%% {slurp, [], [Tk | NewTks]} ->
|
||||
%% TkType = Tk#tk.type,
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
%
|
||||
%% @doc for testing
|
||||
%unsafe_vtks_from_string(S) ->
|
||||
% {ok, SigTks} = gs_tokens:significant_tokens(S),
|
||||
% {ok, SigTks} = gsc_tokens:significant_tokens(S),
|
||||
% {gulp, Vtks} = gulp_vtks(SigTks),
|
||||
% Vtks.
|
||||
%
|
||||
@@ -110,7 +110,7 @@
|
||||
% end.
|
||||
%
|
||||
%slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
|
||||
% case gs_tokens:slurp_plist(Tokens) of
|
||||
% case gsc_tokens:slurp_plist(Tokens) of
|
||||
% {slurp, [], _} ->
|
||||
% barf;
|
||||
% {slurp, PTokens, NewTokens} ->
|
||||
@@ -156,7 +156,7 @@
|
||||
% {_Pfx = Tks1_BeforeOpen,
|
||||
% _Sfx = Tks2_OpenNAfter
|
||||
% = [#tk{string = "("} | _]} ->
|
||||
% case gs_tokens:slurp_plist(Tks2_OpenNAfter) of
|
||||
% case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of
|
||||
% {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
|
||||
% NewAcc = [Acc,
|
||||
% Tks1_BeforeOpen,
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
%-spec end_pos([gsc_token()]) -> {value, tk_pos()} | none.
|
||||
%
|
||||
%end_pos([#gsc_token{pos = Pos, string = Str}]) ->
|
||||
% {value, gs_tokens:new_pos(Pos, Str)};
|
||||
% {value, gsc_tokens:new_pos(Pos, Str)};
|
||||
%end_pos([_ | T]) ->
|
||||
% end_pos(T);
|
||||
%end_pos([]) ->
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
% @doc
|
||||
% <pre>
|
||||
% T R O N A L D D U M P
|
||||
%
|
||||
% .-""""""""""""-.
|
||||
% .-' _..------.._ '-.
|
||||
% .' .' GOLDEN NFC '. '.
|
||||
% / / COMB-OVER MAP \ \
|
||||
% ; ; .-^^^^^^^^^^-. ; ;
|
||||
% | | / THEY'RE \ | |
|
||||
% | | | NOT SENDING | | |
|
||||
% | | | ASCII | | |
|
||||
% ; ; \_.--. .--._./ ; ;
|
||||
% \ \ (o)(o) / /
|
||||
% '. '. __ .' .'
|
||||
% '-._ '._==_.' _.-'
|
||||
% '-._____.-'
|
||||
% /|||\
|
||||
% / ||| \
|
||||
% / ||| \
|
||||
% .-------' ||| '-------.
|
||||
% / THE BEST NORMALIZER \
|
||||
% / VERY STABLE CODEPOINTS \
|
||||
% /_________________________________\
|
||||
% </pre>
|
||||
%
|
||||
% When unicode sends its codepoints, they're not
|
||||
% sending their best. They're not sending ASCII.
|
||||
% They're not sending ASCII. They're sending integers
|
||||
% that have lots of problems, and they're bringing
|
||||
% those problems with us. They're bringing diacritics.
|
||||
% They're bringing non-idempotent lowercasing. They're
|
||||
% bringing graphemes that don't correspond bijectively
|
||||
% with printable characters. They're bringing RTL.
|
||||
% They're bringing invisible characters. They're
|
||||
% bringing characters that draw outside the character
|
||||
% boundary. They're bringing variable-width
|
||||
% whitespace. They're bringing control characters.
|
||||
% They're bringing emojis.
|
||||
%
|
||||
% And some, I assume, are good characters.
|
||||
%
|
||||
% `SrcStr' is a unicode NFC list, not an ordinary
|
||||
% string. you think a string is a list of codepoints.
|
||||
%
|
||||
% NOOOOO.
|
||||
%
|
||||
% See it's different, because that's why.
|
||||
%
|
||||
% This is the cost of diversity, folks.
|
||||
% @end
|
||||
|
||||
+137
-34
@@ -1,45 +1,48 @@
|
||||
% @doc bikeshed proctrastination head into vim warmup thing
|
||||
% @doc bikeshed proctrastination head into vim warmup
|
||||
% thing
|
||||
%
|
||||
% sophia compiler from scratch by PRH
|
||||
%
|
||||
% based on original sophia compiler
|
||||
%
|
||||
% parse layers:
|
||||
% 1. gs_tokens: SrcStr -> (Tokens | SigTokens)
|
||||
%
|
||||
% SigTokens = not comment/whitespace
|
||||
%
|
||||
% layers:
|
||||
% a. gs_strmatch : matches string shapes
|
||||
% b. gso_scan : converts to so_scan shapes
|
||||
%
|
||||
%
|
||||
% terminology:
|
||||
%
|
||||
% - `slurp`/`barf` borrowed from emacs paredit mode:
|
||||
%
|
||||
% slurp : (a b) c -> (a b c)
|
||||
% barf : (a b c) -> a (b c)
|
||||
%
|
||||
% * `slurp` usually involves *transforming* input
|
||||
% into a new type (e.g. slurp a token from src
|
||||
% string); think of slurp as a verb meaning to
|
||||
% consume and then digest
|
||||
% * `barf` basically means blindly splitting off
|
||||
% input
|
||||
%
|
||||
% based on original sophia compiler; target for version
|
||||
% 0.1 is to match behavior exactly
|
||||
% @end
|
||||
|
||||
-module(gsc).
|
||||
|
||||
% token and tokens
|
||||
-export_type([
|
||||
token/0
|
||||
token/0,
|
||||
signal/0
|
||||
]).
|
||||
|
||||
% syntax tree/forest wrapper type
|
||||
-export_type([
|
||||
ntree/2, ntree/0,
|
||||
nforest/2, nforest/0,
|
||||
nt/2, nt/0,
|
||||
nf/2, nf/0
|
||||
]).
|
||||
|
||||
|
||||
-export([
|
||||
unsafe_tokens_from_file/1,
|
||||
unsafe_tokens_from_string/1,
|
||||
unsafe_signal_from_file/1,
|
||||
unsafe_signal_from_string/1,
|
||||
filter_signal/1,
|
||||
signal_from_string/1,
|
||||
signal_from_file/1,
|
||||
sigtokens_from_file/1,
|
||||
sigtokens_from_string/1,
|
||||
tokens_from_file/1,
|
||||
tokens_from_string/1
|
||||
tokens_from_string/1,
|
||||
% sophia compatibility
|
||||
gso_tokens_from_file/1,
|
||||
gso_tokens_from_string/1,
|
||||
% unicode normalization
|
||||
very_stable_codepoints/1,
|
||||
very_stable_string/1,
|
||||
very_stable_file/1
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
@@ -50,19 +53,52 @@
|
||||
|
||||
-type token() :: tk().
|
||||
|
||||
% @doc signal means non-noise (whitespace/comment)
|
||||
% tokens; legacy name still around is "sigtokens"
|
||||
-type signal() :: [tk()].
|
||||
|
||||
%-----------------------------------------
|
||||
% functions
|
||||
% API: FUNCTIONS
|
||||
%-----------------------------------------
|
||||
|
||||
%-----------------------------------------
|
||||
% aint nobody got time for case shit
|
||||
%-----------------------------------------
|
||||
% tokens
|
||||
unsafe_tokens_from_file(F) ->
|
||||
{ok, Tks} = tokens_from_file(F),
|
||||
Tks.
|
||||
|
||||
unsafe_tokens_from_string(S) ->
|
||||
{ok, Tks} = tokens_from_string(S),
|
||||
Tks.
|
||||
|
||||
|
||||
% signal
|
||||
unsafe_signal_from_file(F) ->
|
||||
{ok, Tks} = signal_from_file(F),
|
||||
Tks.
|
||||
|
||||
unsafe_signal_from_string(S) ->
|
||||
{ok, Tks} = signal_from_string(S),
|
||||
Tks.
|
||||
|
||||
|
||||
%
|
||||
filter_signal(X) -> gsc_tokens:filter_significant(X).
|
||||
signal_from_file(X) -> sigtokens_from_file(X).
|
||||
signal_from_string(X) -> sigtokens_from_string(X).
|
||||
|
||||
% @doc legacy name for signal
|
||||
sigtokens_from_file(X) ->
|
||||
case tokens_from_file(X) of
|
||||
{ok, Y} -> {ok, gs_tokens:filter_significant(Y)};
|
||||
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
|
||||
Err -> Err
|
||||
end.
|
||||
|
||||
sigtokens_from_string(X) ->
|
||||
case tokens_from_string(X) of
|
||||
{ok, Y} -> {ok, gs_tokens:filter_significant(Y)};
|
||||
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
|
||||
Err -> Err
|
||||
end.
|
||||
|
||||
@@ -81,7 +117,6 @@ tokens_from_file(FilePath) ->
|
||||
|
||||
|
||||
|
||||
|
||||
-spec tokens_from_string(SrcStr) -> Result
|
||||
when SrcStr :: string(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -89,4 +124,72 @@ tokens_from_file(FilePath) ->
|
||||
Tokens :: [tk()].
|
||||
|
||||
tokens_from_string(SrcStr) ->
|
||||
gs_tokens:tokens(SrcStr).
|
||||
gsc_tokens:tokens(SrcStr).
|
||||
|
||||
|
||||
|
||||
-spec gso_tokens_from_file(FilePath) -> Result when
|
||||
FilePath :: string(),
|
||||
Result :: {ok, GsoTks} | {error, Reason},
|
||||
GsoTks :: [gso_scan:so_token()],
|
||||
Reason :: gsc_err() | any().
|
||||
|
||||
gso_tokens_from_file(FilePath) ->
|
||||
case file:read_file(FilePath) of
|
||||
{ok, Bytes} -> gso_tokens_from_string(Bytes);
|
||||
Error -> Error
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec gso_tokens_from_string(Str) -> Result when
|
||||
Str :: iolist(),
|
||||
Result :: {ok, GsoTks} | {error, Reason},
|
||||
GsoTks :: [gso_scan:so_token()],
|
||||
Reason :: gsc_err() | any().
|
||||
|
||||
gso_tokens_from_string(Evil) ->
|
||||
Str = gsc_tokens:very_stable_codepoints(Evil),
|
||||
gso_scan:scan(Str).
|
||||
|
||||
|
||||
|
||||
|
||||
-spec very_stable_codepoints(String) -> Normalized when
|
||||
String :: iolist(),
|
||||
Normalized :: string().
|
||||
|
||||
%% @doc normalize string to utf8 NFC list form
|
||||
very_stable_codepoints(X) ->
|
||||
gsc_tokens:very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_string(String) -> Normalized when
|
||||
String :: iolist(),
|
||||
Normalized :: string().
|
||||
|
||||
%% @doc alias for `very_stable_codepoints/1'
|
||||
very_stable_string(X) ->
|
||||
gsc_tokens:very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_file(FilePath) -> Contents when
|
||||
FilePath :: string(),
|
||||
Contents :: string().
|
||||
|
||||
%% @doc Read file, return contents as
|
||||
%% `unicode:characters_to_nfc_list/1' list.
|
||||
%%
|
||||
%% Please note that this function is NOT in fact very
|
||||
%% stable, as it throws an error if there's some error
|
||||
%% reading the file (e.g. not found).
|
||||
%%
|
||||
%% this function exists mostly for scripting/shell
|
||||
%% convenience
|
||||
very_stable_file(X) ->
|
||||
case file:read_file(X) of
|
||||
{ok, B} -> very_stable_codepoints(B);
|
||||
Error -> error(Error)
|
||||
end.
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
-module(gsc_ntree).
|
||||
|
||||
-export([
|
||||
nstem/2, meta/1, kids/1,
|
||||
flatten_tree/1, flatten_forest/1
|
||||
]).
|
||||
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
%%=====================================================
|
||||
%% API: functions
|
||||
%%=====================================================
|
||||
|
||||
-spec nstem(Root, Forest) -> Tree when
|
||||
Root :: S,
|
||||
Forest :: nforest(S, L),
|
||||
Tree :: ntree(S, L),
|
||||
S :: any(),
|
||||
L :: any().
|
||||
|
||||
nstem(Root, List) ->
|
||||
{ns, Root, List}.
|
||||
|
||||
|
||||
meta(#ns{meta = M}) -> M.
|
||||
kids(#ns{kids = K}) -> K.
|
||||
|
||||
|
||||
|
||||
-spec flatten_tree(Tree) -> Leafs when
|
||||
Tree :: ntree(_, L),
|
||||
Leafs :: [L],
|
||||
L :: any().
|
||||
|
||||
flatten_tree(T) ->
|
||||
lists:flatten(ft(T)).
|
||||
|
||||
|
||||
|
||||
-spec flatten_forest(Forest) -> Leafs when
|
||||
Forest :: nforest(_, L),
|
||||
Leafs :: [L],
|
||||
L :: any().
|
||||
|
||||
flatten_forest(F) ->
|
||||
lists:flatten(ff(F)).
|
||||
|
||||
|
||||
ft(#ns{kids = F}) -> ff(F);
|
||||
ft(Leaf) -> [Leaf].
|
||||
|
||||
ff(F) ->
|
||||
[ft(T) || T <- F].
|
||||
@@ -0,0 +1,111 @@
|
||||
% signal = non-noisy tokens
|
||||
-module(gsc_signal).
|
||||
|
||||
-export([
|
||||
from_tokens/1,
|
||||
is_block/1,
|
||||
gulp_block_items/1,
|
||||
block_to_items/1,
|
||||
take_block_item/1
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
-spec from_tokens(Tokens) -> Signal when
|
||||
Tokens :: [tk()],
|
||||
Signal :: [tk()].
|
||||
% @doc filter out comments/whitespace
|
||||
|
||||
from_tokens(Tokens) ->
|
||||
gsc_tokens:filter_significant(Tokens).
|
||||
|
||||
|
||||
|
||||
-spec is_block(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: boolean().
|
||||
|
||||
is_block([]) ->
|
||||
true;
|
||||
is_block([#tk{pos = {_, BCol}} | Rest]) ->
|
||||
InBlock =
|
||||
fun(#tk{pos = {_, TCol}}) ->
|
||||
BCol =< TCol
|
||||
end,
|
||||
lists:all(InBlock, Rest).
|
||||
|
||||
|
||||
|
||||
-spec gulp_block_items(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: {slurp, Items, NewSignal}
|
||||
| {error, any()},
|
||||
Items :: [Signal],
|
||||
NewSignal :: Signal.
|
||||
|
||||
gulp_block_items(S) ->
|
||||
case is_block(S) of
|
||||
true -> {gulp, block_to_items(S)};
|
||||
false -> find_badness(S)
|
||||
end.
|
||||
|
||||
find_badness([#tk{pos = {_, StartCol}} = StartTk | Rest]) ->
|
||||
find_badness(StartCol, StartTk, Rest).
|
||||
|
||||
find_badness(StartCol, StartTk, [#tk{pos = {_, TkCol}} = Tk | Rest]) ->
|
||||
Bad = TkCol < StartCol,
|
||||
case Bad of
|
||||
false -> find_badness(StartCol, StartTk, Rest);
|
||||
true -> {error, {bad_block, [{start_col, StartCol},
|
||||
{end_col, TkCol},
|
||||
{start_tk, StartTk},
|
||||
{end_tk, Tk}]}}
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec block_to_items(Signal) -> BlockItems when
|
||||
Signal :: [tk()],
|
||||
BlockItems :: [Signal].
|
||||
% @doc
|
||||
% naive algorithm, so doesn't ensure all block items
|
||||
% are same indent level
|
||||
%
|
||||
% Input:
|
||||
% foo = ...
|
||||
% bar = ...
|
||||
% baz = ...
|
||||
%
|
||||
% Output:
|
||||
% [foo = ...,
|
||||
% bar = ...,
|
||||
% baz = ...]
|
||||
block_to_items([]) ->
|
||||
[];
|
||||
block_to_items(S) ->
|
||||
b2is([], S).
|
||||
|
||||
b2is(Acc, []) ->
|
||||
lists:reverse(Acc);
|
||||
b2is(Acc, S) ->
|
||||
{Item, S1} = take_block_item(S),
|
||||
b2is([Item | Acc], S1).
|
||||
|
||||
|
||||
|
||||
-spec take_block_item(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: {Item, NewSignal},
|
||||
Item :: Signal,
|
||||
NewSignal :: Signal.
|
||||
|
||||
take_block_item([]) ->
|
||||
{[], []};
|
||||
take_block_item([#tk{pos = {_, ICol}} = T0 | S0]) ->
|
||||
InItem =
|
||||
fun(#tk{pos = {_, TCol}}) ->
|
||||
ICol < TCol
|
||||
end,
|
||||
{S0_II, S1} = lists:splitwith(InItem, S0),
|
||||
{[T0 | S0_II], S1}.
|
||||
@@ -70,7 +70,7 @@
|
||||
% `contract` gets tokenized as a keyword and not a variable name), and then
|
||||
% calls into this module in order to match the string shape it's looking for.
|
||||
% @end
|
||||
-module(gs_strmatch).
|
||||
-module(gsc_strmatch).
|
||||
|
||||
%-compile([export_all, nowarn_export_all]).
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
% 2. to future-proof in case we decide to incrementally incorporate the gsc
|
||||
% code into the legacy sophia compiler
|
||||
% @end
|
||||
-module(gs_tokens).
|
||||
-module(gsc_tokens).
|
||||
|
||||
% meta
|
||||
-export([
|
||||
@@ -39,6 +39,9 @@
|
||||
is_significant/1,
|
||||
filter_significant/1,
|
||||
significant_tokens/1,
|
||||
very_stable_codepoints/1,
|
||||
very_stable_string/1,
|
||||
very_stable_characters/1,
|
||||
tokens_from_iolist/1,
|
||||
tokens/1,
|
||||
slurp_token/2,
|
||||
@@ -188,13 +191,13 @@ slurp_dlist(All, Opens, [#tk{str = "["} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, Opens, [#tk{str = "{"} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||
% sad: mismatch cases
|
||||
slurp_dlist(All, Opens, []) ->
|
||||
slurp_dlist(_, Opens, []) ->
|
||||
{error, {fixme, mismatch, Opens, none}};
|
||||
slurp_dlist(All, Opens, [#tk{str = "}"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = "}"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
slurp_dlist(All, Opens, [#tk{str = "]"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = "]"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
slurp_dlist(All, Opens, [#tk{str = ")"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = ")"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
% general case: non-terminal token gets pushed
|
||||
slurp_dlist(All, Opens, [Tk | NewTks]) ->
|
||||
@@ -330,6 +333,29 @@ is_significant(#tk{shape = ws}) -> false;
|
||||
is_significant(_) -> true.
|
||||
|
||||
|
||||
|
||||
% aliases
|
||||
very_stable_string(X) -> very_stable_codepoints(X).
|
||||
very_stable_characters(X) -> very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_codepoints(IoList) -> NfcList when
|
||||
IoList :: iolist(),
|
||||
NfcList :: string().
|
||||
|
||||
%% @doc When Unicode sends its characters, they're not
|
||||
%% sending their best. They're not sending ASCII.
|
||||
%% They're not sending ASCII. They're sending
|
||||
%% characters that have lots of problems, and they're
|
||||
%% bringing those problems with us. They're bringing
|
||||
%% diacritics. They're bringing homoglyphs. They're
|
||||
%% bringing RTL. They're rapists. And some, we assume,
|
||||
%% are good characters.
|
||||
very_stable_codepoints(S) ->
|
||||
unicode:characters_to_nfc_list(S).
|
||||
|
||||
|
||||
-spec tokens_from_iolist(SrcStr) -> Result when
|
||||
SrcStr :: iolist(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -341,6 +367,7 @@ tokens_from_iolist(S) -> tokens(S).
|
||||
|
||||
|
||||
|
||||
|
||||
-spec tokens(SrcStr) -> Result
|
||||
when SrcStr :: iolist(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -355,7 +382,8 @@ tokens_from_iolist(S) -> tokens(S).
|
||||
|
||||
tokens(S) ->
|
||||
% defensive normalization
|
||||
tokens([], {1, 1}, unicode:characters_to_nfc_list(S)).
|
||||
tokens([], {1, 1}, very_stable_codepoints(S)).
|
||||
|
||||
|
||||
tokens(Stack, _FinalPos, "") ->
|
||||
{ok, lists:reverse(Stack)};
|
||||
@@ -559,8 +587,8 @@ slurp_token_of_shape(bcom, Pos, SrcStr0) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(ws, Pos, SrcStr) ->
|
||||
WhitespaceMatcher = gs_strmatch:smr_sf_ws(),
|
||||
case gs_strmatch:match(WhitespaceMatcher, SrcStr) of
|
||||
WhitespaceMatcher = gsc_strmatch:smr_sf_ws(),
|
||||
case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, WS, Rest} ->
|
||||
@@ -594,7 +622,7 @@ slurp_token_of_shape(kwd, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(op, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_op(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = op, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -602,7 +630,7 @@ slurp_token_of_shape(op, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(punct, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_punct(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = punct, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -611,7 +639,7 @@ slurp_token_of_shape(punct, Pos, SrcStr) ->
|
||||
end;
|
||||
% SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar
|
||||
slurp_token_of_shape(id, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_id(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of
|
||||
{strmatch, IdStr, Rest} ->
|
||||
Token = #tk{shape = id, pos = Pos, str = IdStr},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -619,7 +647,7 @@ slurp_token_of_shape(id, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(con, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_con(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = con, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -627,7 +655,7 @@ slurp_token_of_shape(con, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(qid, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_qid(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = qid, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -635,7 +663,7 @@ slurp_token_of_shape(qid, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(qcon, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_qcon(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = qcon, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -643,7 +671,7 @@ slurp_token_of_shape(qcon, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(tvar, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_tvar(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = tvar, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -651,7 +679,7 @@ slurp_token_of_shape(tvar, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(int16, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_int16(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = int16, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -659,7 +687,7 @@ slurp_token_of_shape(int16, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(int10, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_int10(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = int10, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -671,8 +699,8 @@ slurp_token_of_shape(int10, Pos, SrcStr) ->
|
||||
%
|
||||
% char: sophia char literal
|
||||
slurp_token_of_shape(ak, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_ak(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_ak(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -680,8 +708,8 @@ slurp_token_of_shape(ak, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(ct, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_ct(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_ct(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -689,8 +717,8 @@ slurp_token_of_shape(ct, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(sg, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_sg(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_sg(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -698,8 +726,8 @@ slurp_token_of_shape(sg, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(char, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_char(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_char(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -707,7 +735,7 @@ slurp_token_of_shape(char, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(string, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_str(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -715,7 +743,7 @@ slurp_token_of_shape(string, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(bytes, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_bytes(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
+2
-2
@@ -1,6 +1,6 @@
|
||||
% @doc compatibility layer to test against so_scan
|
||||
%
|
||||
% converts gs_tokens data to so_scan tokens
|
||||
% converts gsc_tokens data to so_scan tokens
|
||||
%
|
||||
% Ref: so_scan.erl
|
||||
-module(gso_scan).
|
||||
@@ -104,7 +104,7 @@
|
||||
% @end
|
||||
|
||||
scan(SrcStr) ->
|
||||
case gs_tokens:tokens(SrcStr) of
|
||||
case gsc_tokens:tokens(SrcStr) of
|
||||
{ok, SfLTokens} ->
|
||||
SoTokens = to_so_tokens(SfLTokens),
|
||||
{ok, SoTokens};
|
||||
|
||||
Reference in New Issue
Block a user