Compare commits
9 Commits
e180dc955d
..
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 3f73cd4d85 | |||
| 7c98182bcb | |||
| ba70aace96 | |||
| b88e51bb81 | |||
| ff066072e2 | |||
| f79403b97f | |||
| f04b7311f5 | |||
| 10424927b1 | |||
| fdb40dcb92 |
+8
-2
@@ -3,13 +3,19 @@
|
||||
- architecture needs more careful thought but only after something
|
||||
works
|
||||
- too fuzzy right now
|
||||
- ytree with tokens
|
||||
- dialyze
|
||||
- makefile
|
||||
- consolidate
|
||||
|
||||
# TODONE
|
||||
|
||||
- undo gs_ naming fuckery.. everything is `gsc_*`. it's just
|
||||
needlessly confusing. for now let's name new things gsc_* and then
|
||||
go back and undo the stupidity
|
||||
|
||||
# TONOTDO
|
||||
# TODONT
|
||||
|
||||
- barf for outputs, slurp for inputs
|
||||
- rename parser layers sequentially
|
||||
|
||||
# TODONE
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# gsc = gajumaru sophia compiler
|
||||
|
||||
**This is _NOT_ the official Sophia compiler.** If you're looking for
|
||||
that see https://git.qpq.swiss/QPQ-AG/sophia
|
||||
|
||||
This is an incomplete prototype rewrite of the legacy (official)
|
||||
sophia compiler in straightforward Erlang. It grew out of my (Peter
|
||||
Harpending) own efforts to document the language and its relationship
|
||||
to FATE (the gajumaru virtual machine).
|
||||
|
||||
The goal for version 0.1 is to mirror the success behavior of the
|
||||
legacy sophia compiler.
|
||||
|
||||
# Setup
|
||||
|
||||
```
|
||||
git clone https://git.qpq.swiss/QPQ-AG/gsc.git
|
||||
```
|
||||
|
||||
Add the following to `~/.bashrc` or wheremstever:
|
||||
|
||||
```
|
||||
export PATH=$PATH:/path/to/gsc/bin
|
||||
```
|
||||
|
||||
To test run
|
||||
|
||||
```
|
||||
gsc --help
|
||||
```
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
# Bash harness for GSC command line tool
|
||||
#
|
||||
# based on similar harness for sfp
|
||||
#
|
||||
# gsc foo bar baz
|
||||
# -> zx rundir \
|
||||
# /home/pharpend/src/ioecs/gsc/cli \
|
||||
# --libs=gsc:/home/pharpend/src/ioecs/gsc \
|
||||
# -> zx rundir /home/pharpend/src/ioecs/gsc \
|
||||
# foo bar baz
|
||||
|
||||
zx_exists() {
|
||||
@@ -27,8 +27,12 @@ fi
|
||||
FP_THIS_FILE=$(readlink -f "${BASH_SOURCE[0]}")
|
||||
FP_THIS_DIR=$(dirname -- "${FP_THIS_FILE}")
|
||||
FP_PRJ_DIR=$(dirname -- "${FP_THIS_DIR}")
|
||||
FP_CLI_DIR="${FP_PRJ_DIR}/cli"
|
||||
|
||||
# simplified
|
||||
zx rundir $FP_PRJ_DIR $@
|
||||
|
||||
|
||||
# commented out legacy code in case need it later:
|
||||
# # compute libs string
|
||||
# LIB_PARTS=()
|
||||
# for depname in "${LOCAL_DEPS[@]}"; do
|
||||
@@ -37,9 +41,9 @@ FP_CLI_DIR="${FP_PRJ_DIR}/cli"
|
||||
# IFS=,
|
||||
# LOCAL_LIBS="${LIB_PARTS[*]}"
|
||||
# unset IFS
|
||||
|
||||
LOCAL_LIBS="gsc:${FP_PRJ_DIR}"
|
||||
|
||||
#echo "zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@"
|
||||
|
||||
zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@
|
||||
#
|
||||
#LOCAL_LIBS=""
|
||||
#
|
||||
##echo "zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@"
|
||||
#
|
||||
##zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
.eunit
|
||||
deps
|
||||
*.o
|
||||
*.beam
|
||||
*.plt
|
||||
*.swp
|
||||
erl_crash.dump
|
||||
ebin/*.beam
|
||||
doc/*.html
|
||||
doc/*.css
|
||||
doc/edoc-info
|
||||
doc/erlang.png
|
||||
rel/example_project
|
||||
.concrete/DEV_MODE
|
||||
.rebar
|
||||
@@ -1 +0,0 @@
|
||||
{"src/*", [debug_info, {i, "include/"}, {outdir, "ebin/"}]}.
|
||||
@@ -1,7 +0,0 @@
|
||||
{application,gsc_cli,
|
||||
[{description,"GSC CLI and test suite"},
|
||||
{registered,[]},
|
||||
{included_applications,[]},
|
||||
{applications,[stdlib,kernel]},
|
||||
{vsn,"0.1.0"},
|
||||
{modules,[gsc_cli]}]}.
|
||||
@@ -1,25 +0,0 @@
|
||||
% testing utilities
|
||||
-module(ts_utils).
|
||||
|
||||
-export([
|
||||
ct_dir/0,
|
||||
ct_file/1
|
||||
]).
|
||||
|
||||
|
||||
-spec ct_dir() -> string().
|
||||
|
||||
% directory containing the tests for the tokenizer
|
||||
ct_dir() ->
|
||||
zx_daemon:get_home() ++ "/ct".
|
||||
|
||||
|
||||
|
||||
-spec ct_file(Name) -> AbsPath when
|
||||
Name :: string(),
|
||||
AbsPath :: string().
|
||||
% @doc
|
||||
% ct_file("foo.aes") -> "/path/to/ct/foo.aes"
|
||||
|
||||
ct_file(Name) ->
|
||||
ct_dir() ++ "/" ++ Name.
|
||||
@@ -1,18 +0,0 @@
|
||||
{name,"GSC CLI"}.
|
||||
{type,cli}.
|
||||
{modules,[]}.
|
||||
{mod,"gsc_cli"}.
|
||||
{author,"Peter Harpending"}.
|
||||
{prefix,none}.
|
||||
{desc,"GSC CLI and test suite"}.
|
||||
{package_id,{"otpr","gsc_cli",{0,1,0}}}.
|
||||
{deps,[{"otpr","sophia",{9,0,0}},{"otpr","gsc",{0,1,0}}]}.
|
||||
{key_name,none}.
|
||||
{a_email,"peterharpending@qpq.swiss"}.
|
||||
{c_email,"peterharpending@qpq.swiss"}.
|
||||
{copyright,"Peter Harpending"}.
|
||||
{file_exts,[]}.
|
||||
{license,"GPL-3.0-only"}.
|
||||
{repo_url,[]}.
|
||||
{tags,[]}.
|
||||
{ws_url,[]}.
|
||||
+5
-1
@@ -4,4 +4,8 @@
|
||||
{included_applications,[]},
|
||||
{applications,[stdlib,kernel]},
|
||||
{vsn,"0.1.0"},
|
||||
{modules,[gsc]}]}.
|
||||
{modules,['ast-gulp',gsc_ast,gsc_bst,gsc_parse_type_expr,
|
||||
gsc_token_chunks,ifarith,parse_type_expr,test_ntree,
|
||||
unicode,gsc,gsc_cli,gsc_ntree,gsc_signal,gsc_strmatch,
|
||||
gsc_tokens,gso_scan,gsc_test_file,gsc_test_ntree,
|
||||
gsc_test_tokens,ts_utils]}]}.
|
||||
|
||||
+35
-3
@@ -23,14 +23,14 @@
|
||||
| qid % Foo.Bar.baz
|
||||
| qcon % Foo.Bar.Baz
|
||||
| tvar % 'foo, 'foo_bar, '_'foo'_'bar'''
|
||||
% kwds ops and punct are all collapsed by
|
||||
% kwds ops and sep are all collapsed by
|
||||
% so_scan:scan down to eg {'contract', {420, 69}}
|
||||
% where {420, 69} is the source location
|
||||
% these are three different parsers
|
||||
| kwd % contract, interface, payable, etc
|
||||
| op % "=!<>+-*/:&|?~@^"
|
||||
| punct % ".." | oneof(",.;()[]{}")
|
||||
% kwds and punct are kind of the same thing
|
||||
| sep % ".." | oneof(",.;()[]{}")
|
||||
% kwds and sep are kind of the same thing
|
||||
% but i'll keep them separate now for my own sanity. ok
|
||||
% i guess op or symbol or whatever is fine.
|
||||
%
|
||||
@@ -143,3 +143,35 @@
|
||||
| #gsc_err_nyi{}
|
||||
| #gsc_err_empty_file{}
|
||||
| #gsc_err{}.
|
||||
|
||||
|
||||
%----------------------------
|
||||
% tree type for parsing
|
||||
%----------------------------
|
||||
|
||||
% @doc stem record
|
||||
-record(ns, {meta :: any(),
|
||||
kids :: list(any())}).
|
||||
|
||||
% @doc `ntree(S, L)' is a "node tree" (meaning stems
|
||||
% have values and children)
|
||||
%
|
||||
% for the purposes of the compiler, the key observation
|
||||
% is that a flat list of tokens is already a forest
|
||||
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
|
||||
| L.
|
||||
|
||||
% @doc forest is just a list of trees
|
||||
-type nforest(S, L) :: [ntree(S, L)].
|
||||
|
||||
|
||||
% aliases
|
||||
|
||||
-type nt(S, L) :: ntree(S, L).
|
||||
-type nf(S, L) :: nforest(S, L).
|
||||
|
||||
-type ntree() :: ntree(any(), any()).
|
||||
-type nforest() :: [ntree()].
|
||||
|
||||
-type nt() :: ntree().
|
||||
-type nf() :: nforest().
|
||||
|
||||
@@ -131,7 +131,7 @@ slurp_ct_impls([#gsc_token{string = ":", type = op},
|
||||
slurp_ct_impls(_) ->
|
||||
reject.
|
||||
|
||||
slurp_ct_impls2([#gsc_token{string = ",", type = punct},
|
||||
slurp_ct_impls2([#gsc_token{string = ",", type = sep},
|
||||
#gsc_token{string = Con1, type = con}
|
||||
| Rest],
|
||||
Acc) ->
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
|
||||
do(["test"]) ->
|
||||
do_tests();
|
||||
do(["test" | Tests]) ->
|
||||
do_tests(Tests);
|
||||
do(["tests"]) ->
|
||||
do_tests();
|
||||
|
||||
do_runall_tests() ->
|
||||
lists:foreach(fun run_mod_main/1, test_mods()).
|
||||
|
||||
|
||||
do_tests(List) ->
|
||||
lists:foreach(fun run_test/1, List).
|
||||
|
||||
% n
|
||||
run_test(TestName) ->
|
||||
% we have two candidate atoms
|
||||
C1 = list_to_atom(TestName),
|
||||
C2 = list_to_atom("gsc_test_" ++ TestName),
|
||||
KnownMods = test_mods(),
|
||||
IsC1 = lists:member(C1, KnownMods),
|
||||
IsC2 = lists:member(C2, KnownMods),
|
||||
if
|
||||
IsC1 -> rmm(C1);
|
||||
IsC2 -> rmm(C2);
|
||||
true -> error({no_such_test, TestName})
|
||||
end.
|
||||
|
||||
|
||||
rmm(X) -> run_mod_main(X).
|
||||
|
||||
% KnownTests = test_mods(),
|
||||
% TestMods = ensure_all_known([], List, KnownTests),
|
||||
% lists:foreach(fun run_mod_main/1, TestMods).
|
||||
|
||||
|
||||
%ensure_all_known(Acc, [], _) ->
|
||||
% lists:sort(Acc);
|
||||
%ensure_all_known(Acc, [T | Ts], Knowns) ->
|
||||
% case lists:member(T, Knowns) of
|
||||
%
|
||||
% end.
|
||||
|
||||
|
||||
test_mods() ->
|
||||
known_modules_with_prefix("gsc_test").
|
||||
|
||||
known_modules_with_prefix(Pfx) ->
|
||||
ModsZipBeamsZipLoaded = code:all_available(),
|
||||
kmp(Pfx, ModsZipBeamsZipLoaded, []).
|
||||
|
||||
kmp(_Pfx, [], Acc) ->
|
||||
lists:sort(Acc);
|
||||
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
|
||||
case lists:prefix(Pfx, ModStr) of
|
||||
false -> kmp(Pfx, Rest, Acc);
|
||||
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
|
||||
end.
|
||||
|
||||
run_mod_main(Mod) ->
|
||||
io:format("========================================\n"
|
||||
"~p:main()\n"
|
||||
"========================================\n",
|
||||
[Mod]),
|
||||
try
|
||||
Mod:main()
|
||||
catch
|
||||
Err:ErrType:Trace ->
|
||||
io:format("~p: ~p~n", [Err, ErrType]),
|
||||
io:format("Trace:~n~p~n", [Trace])
|
||||
end.
|
||||
|
||||
do_tlist() ->
|
||||
lists:foreach(
|
||||
fun(ModName) ->
|
||||
io:format("~s~n", [ModName])
|
||||
end,
|
||||
test_mods()
|
||||
).
|
||||
|
||||
|
||||
tokenizers_agree(File) ->
|
||||
gso_tokens(File) =:= so_tokens(File).
|
||||
@@ -0,0 +1,77 @@
|
||||
|
||||
-spec s2t_file(Signal) -> AstFile when
|
||||
Signal :: [tk()],
|
||||
AstFile :: #ns{meta :: file, kids :: asf()}.
|
||||
|
||||
s2t_file([]) ->
|
||||
error(empty_file);
|
||||
s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) ->
|
||||
Blk0 = s2t_gulp_block(FileCol, S0),
|
||||
Blk1 = t2t_parse_tds_in_block(Blk0),
|
||||
#ns{meta = file, kids = [Blk1]}.
|
||||
|
||||
-spec s2t_gulp_block(BlkCol, Signal) -> Block when
|
||||
BlkCol :: pos_integer(),
|
||||
Signal :: [tk()],
|
||||
Block :: #ns{meta :: block}.
|
||||
|
||||
s2t_gulp_block(BCol, Tks) ->
|
||||
% sanity check
|
||||
InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end,
|
||||
true = lists:all(InBlock, Tks),
|
||||
BlockItems = s2f_block_items(BCol, Tks),
|
||||
#ns{meta = block, kids = BlockItems}.
|
||||
|
||||
-spec s2f_block_items(BCol, Signal) -> BlkItems when
|
||||
BCol :: pos_integer(),
|
||||
Signal :: [tk()],
|
||||
BlkItems :: [BlkItem],
|
||||
BlkItem :: #ns{meta :: block_item,
|
||||
kids :: asf()}.
|
||||
|
||||
s2f_block_items(BCol, Signal) ->
|
||||
s2f_block_items(BCol, [], Signal).
|
||||
|
||||
|
||||
s2f_block_items(_BCol, Stk, []) ->
|
||||
lists:reverse(Stk);
|
||||
s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) ->
|
||||
{slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0),
|
||||
s2f_block_items(BCol, [BlkItem | Stk], F1).
|
||||
|
||||
|
||||
s2t_slurp_block_item(BCol, T0, F0) ->
|
||||
{ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0),
|
||||
Item = #ns{meta = block_item, kids = ItemTokens},
|
||||
{slurp, Item, F1}.
|
||||
|
||||
% sw = splitwith; kind of take/drop
|
||||
s2s_sw_block_item(BCol, T0, F0) ->
|
||||
InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end,
|
||||
{F0_II, F1} = lists:splitwith(InItem, F0),
|
||||
{[T0 | F0_II], F1}.
|
||||
|
||||
-spec t2t_parse_tds_in_block(Block0) -> Block1 when
|
||||
Block0 :: ast(),
|
||||
Block1 :: ast().
|
||||
|
||||
% go through and convert the block_item nodes to top
|
||||
% decls
|
||||
t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) ->
|
||||
F1 = lists:map(fun t2t_parse_td_from_item/1, F0),
|
||||
B0#ns{kids = F1}.
|
||||
|
||||
|
||||
-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when
|
||||
BlockItem :: #ns{meta :: block_item},
|
||||
TopDecl :: #ns{meta :: td_meta()}.
|
||||
|
||||
t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) ->
|
||||
s2t_top_decl(Signal).
|
||||
|
||||
|
||||
-spec s2t_top_decl(Signal) -> TdTree when
|
||||
Signal :: [tk()],
|
||||
TdTree :: ast().
|
||||
|
||||
s2t_top_decl(S0) ->
|
||||
@@ -0,0 +1,292 @@
|
||||
# Syntax
|
||||
|
||||
## Lexical syntax
|
||||
|
||||
### Comments
|
||||
|
||||
Single line comments start with `//` and block comments are enclosed in `/*`
|
||||
and `*/` and can be nested.
|
||||
|
||||
### Keywords
|
||||
|
||||
```
|
||||
contract include let switch type record datatype if elif else function
|
||||
stateful payable true false mod public entrypoint private indexed namespace
|
||||
interface main using as for hiding
|
||||
```
|
||||
|
||||
### Tokens
|
||||
|
||||
- `Id = [a-z_][A-Za-z0-9_']*` identifiers start with a lower case letter.
|
||||
- `Con = [A-Z][A-Za-z0-9_']*` constructors start with an upper case letter.
|
||||
- `QId = (Con\.)+Id` qualified identifiers (e.g. `Map.member`)
|
||||
- `QCon = (Con\.)+Con` qualified constructor
|
||||
- `TVar = 'Id` type variable (e.g `'a`, `'b`)
|
||||
- `Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` integer literal with optional `_` separators
|
||||
- `Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` byte array literal with optional `_` separators
|
||||
- `String` string literal enclosed in `"` with escape character `\`
|
||||
- `Char` character literal enclosed in `'` with escape character `\`
|
||||
- `AccountAddress` base58-encoded 32 byte account pubkey with `ak_` prefix
|
||||
- `ContractAddress` base58-encoded 32 byte contract address with `ct_` prefix
|
||||
- `Signature` base58-encoded 64 byte cryptographic signature with `sg_` prefix
|
||||
|
||||
Valid string escape codes are
|
||||
|
||||
| Escape | ASCII | |
|
||||
|---------------|-------------|---|
|
||||
| `\b` | 8 | |
|
||||
| `\t` | 9 | |
|
||||
| `\n` | 10 | |
|
||||
| `\v` | 11 | |
|
||||
| `\f` | 12 | |
|
||||
| `\r` | 13 | |
|
||||
| `\e` | 27 | |
|
||||
| `\xHexDigits` | *HexDigits* | |
|
||||
|
||||
|
||||
See the [identifier encoding scheme](https://git.qpq.swiss/QPQ-AG/protocol/src/branch/master/node/api/api_encoding.md) for the
|
||||
details on the base58 literals.
|
||||
|
||||
## Layout blocks
|
||||
|
||||
Sophia uses Python-style layout rules to group declarations and statements. A
|
||||
layout block with more than one element must start on a separate line and be
|
||||
indented more than the currently enclosing layout block. Blocks with a single
|
||||
element can be written on the same line as the previous token.
|
||||
|
||||
Each element of the block must share the same indentation and no part of an
|
||||
element may be indented less than the indentation of the block. For instance
|
||||
|
||||
```sophia
|
||||
contract Layout =
|
||||
function foo() = 0 // no layout
|
||||
function bar() = // layout block starts on next line
|
||||
let x = foo() // indented more than 2 spaces
|
||||
x
|
||||
+ 1 // the '+' is indented more than the 'x'
|
||||
```
|
||||
|
||||
## Notation
|
||||
|
||||
In describing the syntax below, we use the following conventions:
|
||||
|
||||
- Upper-case identifiers denote non-terminals (like `Expr`) or terminals with
|
||||
some associated value (like `Id`).
|
||||
- Keywords and symbols are enclosed in single quotes: `'let'` or `'='`.
|
||||
- Choices are separated by vertical bars: `|`.
|
||||
- Optional elements are enclosed in `[` square brackets `]`.
|
||||
- `(` Parentheses `)` are used for grouping.
|
||||
- Zero or more repetitions are denoted by a postfix `*`, and one or more
|
||||
repetitions by a `+`.
|
||||
- `Block(X)` denotes a layout block of `X`s.
|
||||
- `Sep(X, S)` is short for `[X (S X)*]`, i.e. a possibly empty sequence of `X`s
|
||||
separated by `S`s.
|
||||
- `Sep1(X, S)` is short for `X (S X)*`, i.e. same as `Sep`, but must not be empty.
|
||||
|
||||
|
||||
## Declarations
|
||||
|
||||
A Sophia file consists of a sequence of *declarations* in a layout block.
|
||||
|
||||
```c
|
||||
File ::= Block(TopDecl)
|
||||
|
||||
TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
| 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||
| 'namespace' Con '=' Block(Decl)
|
||||
| '@compiler' PragmaOp Version
|
||||
| 'include' String
|
||||
| Using
|
||||
|
||||
Implement ::= ':' Sep1(Con, ',')
|
||||
|
||||
Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
| 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||
| 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||
| 'let' Id [':' Type] '=' Expr
|
||||
| (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
| Using
|
||||
|
||||
FunDecl ::= Id ':' Type // Type signature
|
||||
| Id Args [':' Type] '=' Block(Stmt) // Definition
|
||||
| Id Args [':' Type] Block(GuardedDef) // Guarded definitions
|
||||
|
||||
GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
|
||||
|
||||
Using ::= 'using' Con ['as' Con] [UsingParts]
|
||||
UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
|
||||
| 'hiding' '[' Sep1(Id, ',') ']'
|
||||
|
||||
PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
|
||||
Version ::= Sep1(Int, '.')
|
||||
|
||||
EModifier ::= 'payable' | 'stateful'
|
||||
FModifier ::= 'stateful' | 'private'
|
||||
|
||||
Args ::= '(' Sep(Pattern, ',') ')'
|
||||
```
|
||||
|
||||
Contract declarations must appear at the top-level.
|
||||
|
||||
For example,
|
||||
```sophia
|
||||
contract Test =
|
||||
type t = int
|
||||
entrypoint add (x : t, y : t) = x + y
|
||||
```
|
||||
|
||||
There are three forms of type declarations: type aliases (declared with the
|
||||
`type` keyword), record type definitions (`record`) and data type definitions
|
||||
(`datatype`):
|
||||
|
||||
```c
|
||||
TypeAlias ::= Type
|
||||
RecordType ::= '{' Sep(FieldType, ',') '}'
|
||||
DataType ::= Sep1(ConDecl, '|')
|
||||
|
||||
FieldType ::= Id ':' Type
|
||||
ConDecl ::= Con ['(' Sep1(Type, ',') ')']
|
||||
```
|
||||
|
||||
For example,
|
||||
```sophia
|
||||
record point('a) = {x : 'a, y : 'a}
|
||||
datatype shape('a) = Circle(point('a), 'a) | Rect(point('a), point('a))
|
||||
type int_shape = shape(int)
|
||||
```
|
||||
|
||||
## Types
|
||||
|
||||
```c
|
||||
Type ::= Domain '=>' Type // Function type
|
||||
| Type '(' Sep(Type, ',') ')' // Type application
|
||||
| '(' Type ')' // Parens
|
||||
| 'unit' | Sep(Type, '*') // Tuples
|
||||
| Id | QId | TVar
|
||||
|
||||
Domain ::= Type // Single argument
|
||||
| '(' Sep(Type, ',') ')' // Multiple arguments
|
||||
```
|
||||
|
||||
The function type arrow associates to the right.
|
||||
|
||||
Example,
|
||||
```sophia
|
||||
'a => list('a) => (int * list('a))
|
||||
```
|
||||
|
||||
## Statements
|
||||
|
||||
Function bodies are blocks of *statements*, where a statement is one of the following
|
||||
|
||||
```c
|
||||
Stmt ::= 'switch' '(' Expr ')' Block(Case)
|
||||
| 'if' '(' Expr ')' Block(Stmt)
|
||||
| 'elif' '(' Expr ')' Block(Stmt)
|
||||
| 'else' Block(Stmt)
|
||||
| 'let' LetDef
|
||||
| Using
|
||||
| Expr
|
||||
|
||||
LetDef ::= Id Args [':' Type] '=' Block(Stmt) // Function definition
|
||||
| Pattern '=' Block(Stmt) // Value definition
|
||||
|
||||
Case ::= Pattern '=>' Block(Stmt)
|
||||
| Pattern Block(GuardedCase)
|
||||
|
||||
GuardedCase ::= '|' Sep1(Expr, ',') '=>' Block(Stmt)
|
||||
|
||||
Pattern ::= Expr
|
||||
```
|
||||
|
||||
`if` statements can be followed by zero or more `elif` statements and an optional final `else` statement. For example,
|
||||
|
||||
```sophia
|
||||
let x : int = 4
|
||||
switch(f(x))
|
||||
None => 0
|
||||
Some(y) =>
|
||||
if(y > 10)
|
||||
"too big"
|
||||
elif(y < 3)
|
||||
"too small"
|
||||
else
|
||||
"just right"
|
||||
```
|
||||
|
||||
## Expressions
|
||||
|
||||
```c
|
||||
Expr ::= '(' LamArgs ')' '=>' Block(Stmt) // Anonymous function (x) => x + 1
|
||||
| '(' BinOp ')' // Operator lambda (+)
|
||||
| 'if' '(' Expr ')' Expr 'else' Expr // If expression if(x < y) y else x
|
||||
| Expr ':' Type // Type annotation 5 : int
|
||||
| Expr BinOp Expr // Binary operator x + y
|
||||
| UnOp Expr // Unary operator ! b
|
||||
| Expr '(' Sep(Expr, ',') ')' // Application f(x, y)
|
||||
| Expr '.' Id // Projection state.x
|
||||
| Expr '[' Expr ']' // Map lookup map[key]
|
||||
| Expr '{' Sep(FieldUpdate, ',') '}' // Record or map update r{ fld[key].x = y }
|
||||
| '[' Sep(Expr, ',') ']' // List [1, 2, 3]
|
||||
| '[' Expr '|' Sep(Generator, ',') ']'
|
||||
// List comprehension [k | x <- [1], if (f(x)), let k = x+1]
|
||||
| '[' Expr '..' Expr ']' // List range [1..n]
|
||||
| '{' Sep(FieldUpdate, ',') '}' // Record or map value {x = 0, y = 1}, {[key] = val}
|
||||
| '(' Expr ')' // Parens (1 + 2) * 3
|
||||
| '(' Expr '=' Expr ')' // Assign pattern (y = x::_)
|
||||
| Id | Con | QId | QCon // Identifiers x, None, Map.member, AELib.Token
|
||||
| Int | Bytes | String | Char // Literals 123, 0xff, #00abc123, "foo", '%'
|
||||
| AccountAddress | ContractAddress // Chain identifiers
|
||||
| Signature // Signature
|
||||
| '???' // Hole expression 1 + ???
|
||||
|
||||
Generator ::= Pattern '<-' Expr // Generator
|
||||
| 'if' '(' Expr ')' // Guard
|
||||
| LetDef // Definition
|
||||
|
||||
LamArgs ::= '(' Sep(LamArg, ',') ')'
|
||||
LamArg ::= Id [':' Type]
|
||||
|
||||
FieldUpdate ::= Path '=' Expr
|
||||
Path ::= Id // Record field
|
||||
| '[' Expr ']' // Map key
|
||||
| Path '.' Id // Nested record field
|
||||
| Path '[' Expr ']' // Nested map key
|
||||
|
||||
BinOp ::= '||' | '&&' | '<' | '>' | '=<' | '>=' | '==' | '!='
|
||||
| '::' | '++' | '+' | '-' | '*' | '/' | 'mod' | '^'
|
||||
| 'band' | 'bor' | 'bxor' | '<<' | '>>' | '|>'
|
||||
UnOp ::= '-' | '!' | 'bnot'
|
||||
```
|
||||
|
||||
## Operators types
|
||||
|
||||
| Operators | Type
|
||||
| --- | ---
|
||||
| `-` `+` `*` `/` `mod` `^` | arithmetic operators
|
||||
| `!` `&&` `\|\|` | logical operators
|
||||
| `band` `bor` `bxor` `bnot` `<<` `>>` | bitwise operators
|
||||
| `==` `!=` `<` `>` `=<` `>=` | comparison operators
|
||||
| `::` `++` | list operators
|
||||
| `\|>` | functional operators
|
||||
|
||||
## Operator precedence
|
||||
|
||||
In order of highest to lowest precedence.
|
||||
|
||||
| Operators | Associativity
|
||||
| --- | ---
|
||||
| `!` `bnot`| right
|
||||
| `^` | left
|
||||
| `*` `/` `mod` | left
|
||||
| `-` (unary) | right
|
||||
| `+` `-` | left
|
||||
| `<<` `>>` | left
|
||||
| `::` `++` | right
|
||||
| `<` `>` `=<` `>=` `==` `!=` | none
|
||||
| `band` | left
|
||||
| `bxor` | left
|
||||
| `bor` | left
|
||||
| `&&` | right
|
||||
| `\|\|` | right
|
||||
| `\|>` | left
|
||||
+15
-27
@@ -1,41 +1,29 @@
|
||||
% @doc bikeshed proctrastination head into vim warmup thing
|
||||
% @doc bikeshed proctrastination head into vim warmup
|
||||
% thing
|
||||
%
|
||||
% sophia compiler from scratch by PRH
|
||||
%
|
||||
% based on original sophia compiler
|
||||
%
|
||||
% parse layers:
|
||||
% 1. gsc_tokens: SrcStr -> (Tokens | SigTokens)
|
||||
%
|
||||
% SigTokens = not comment/whitespace
|
||||
%
|
||||
% layers:
|
||||
% a. gsc_strmatch : matches string shapes
|
||||
% b. gso_scan : converts to so_scan shapes
|
||||
%
|
||||
%
|
||||
% terminology:
|
||||
%
|
||||
% - `slurp`/`barf` borrowed from emacs paredit mode:
|
||||
%
|
||||
% slurp : (a b) c -> (a b c)
|
||||
% barf : (a b c) -> a (b c)
|
||||
%
|
||||
% * `slurp` usually involves *transforming* input
|
||||
% into a new type (e.g. slurp a token from src
|
||||
% string); think of slurp as a verb meaning to
|
||||
% consume and then digest
|
||||
% * `barf` basically means blindly splitting off
|
||||
% input
|
||||
%
|
||||
% based on original sophia compiler; target for version
|
||||
% 0.1 is to match behavior exactly
|
||||
% @end
|
||||
|
||||
-module(gsc).
|
||||
|
||||
% token and tokens
|
||||
-export_type([
|
||||
token/0,
|
||||
signal/0
|
||||
]).
|
||||
|
||||
% syntax tree/forest wrapper type
|
||||
-export_type([
|
||||
ntree/2, ntree/0,
|
||||
nforest/2, nforest/0,
|
||||
nt/2, nt/0,
|
||||
nf/2, nf/0
|
||||
]).
|
||||
|
||||
|
||||
-export([
|
||||
unsafe_tokens_from_file/1,
|
||||
unsafe_tokens_from_string/1,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
%%% @doc
|
||||
%%% GSC CLI: explorer/harness for sfc iteration
|
||||
%%% GSC CLI: explorer/harness for gsc iteration
|
||||
%%% @end
|
||||
|
||||
-module(gsc_cli).
|
||||
@@ -8,11 +8,10 @@
|
||||
-copyright("Peter Harpending <peterharpending@qpq.swiss>").
|
||||
-license("GPL-3.0-only").
|
||||
|
||||
-export([
|
||||
tokens/1,
|
||||
so_tokens/1,
|
||||
gso_tokens/1
|
||||
]).
|
||||
%-export([
|
||||
% tokens/1,
|
||||
% gso_tokens/1
|
||||
%]).
|
||||
-export([start/1]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
@@ -37,37 +36,29 @@ start(["eshell"]) ->
|
||||
do_eshell(),
|
||||
ok;
|
||||
start(ArgV) ->
|
||||
%io:format("ArgV: ~p~n", [ArgV]),
|
||||
do(ArgV),
|
||||
zx:silent_stop().
|
||||
|
||||
do(["list"]) ->
|
||||
do_tlist();
|
||||
do(["list", "tests"]) ->
|
||||
do_tlist();
|
||||
do(["test"]) ->
|
||||
do_tests();
|
||||
do(["test" | Tests]) ->
|
||||
do_tests(Tests);
|
||||
do(["tests"]) ->
|
||||
do_tests();
|
||||
do(["run", "tests"]) ->
|
||||
do_tests();
|
||||
do(["tokenizers_agree", Foo]) ->
|
||||
io:format("~p~n", [tokenizers_agree(Foo)]);
|
||||
do_test(Tests);
|
||||
% slowly phasing out shitty names like lctokens
|
||||
% tokens = native sfc token representation
|
||||
do(["tokens", Foo]) -> do_tokens(Foo);
|
||||
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
|
||||
do(["ctokens", Foo]) -> do_color_tokens(Foo);
|
||||
do(["colour_tokens" | _]) -> do_doi();
|
||||
% so_tokens = so_scan tokens
|
||||
do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
|
||||
do(["so_tokens", Foo]) -> do_so_tokens(Foo);
|
||||
% print source file to screen with token boundaries
|
||||
% highlighted
|
||||
do(["tokens", "-c", Foo]) -> do_color_tokens(Foo);
|
||||
do(["tokens", "--color", Foo]) -> do_color_tokens(Foo);
|
||||
do(["tcat", Foo]) -> do_color_tokens(Foo);
|
||||
do(["ctokens", Foo]) -> do_color_tokens(Foo);
|
||||
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
|
||||
do(["tokens", "--colour" | _]) -> do_doi();
|
||||
do(["colour_tokens" | _]) -> do_doi();
|
||||
%% so_tokens = so_scan tokens
|
||||
%do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
|
||||
%do(["so_tokens", Foo]) -> do_so_tokens(Foo);
|
||||
% gso_tokens = our mockery
|
||||
do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo);
|
||||
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
|
||||
% print source file to screen with token boundaries highlighted
|
||||
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
|
||||
% script utility
|
||||
do(["rmm", Foo]) ->
|
||||
do_rmm(Foo);
|
||||
@@ -79,6 +70,20 @@ do_doi() ->
|
||||
FP = zx:get_home() ++ "/priv/doi.txt",
|
||||
page_file(FP).
|
||||
|
||||
|
||||
do_test(Args) ->
|
||||
GscTestsMod = gsc_tests_mod(),
|
||||
GscTestsMod:cli_args(Args).
|
||||
|
||||
|
||||
gsc_tests_mod() ->
|
||||
FilePath = zx:get_home() ++ "/test/gsc_tests.erl",
|
||||
case compile:file(FilePath) of
|
||||
{ok, Mod} -> Mod;
|
||||
Error -> error(Error)
|
||||
end.
|
||||
|
||||
|
||||
% thank you chatgpt
|
||||
% os:cmd didnt do nuffin because that's for running
|
||||
% stuff in the background and capturing the output, not
|
||||
@@ -107,83 +112,6 @@ less_file(Less, FilePath) ->
|
||||
error(Reason)
|
||||
end.
|
||||
|
||||
do_tests() ->
|
||||
io:format("TestModules = ~p~n", [test_mods()]),
|
||||
do_runall_tests().
|
||||
|
||||
do_runall_tests() ->
|
||||
lists:foreach(fun run_mod_main/1, test_mods()).
|
||||
|
||||
|
||||
do_tests(List) ->
|
||||
lists:foreach(fun run_test/1, List).
|
||||
|
||||
% n
|
||||
run_test(TestName) ->
|
||||
% we have two candidate atoms
|
||||
C1 = list_to_atom(TestName),
|
||||
C2 = list_to_atom("gsc_test_" ++ TestName),
|
||||
KnownMods = test_mods(),
|
||||
IsC1 = lists:member(C1, KnownMods),
|
||||
IsC2 = lists:member(C2, KnownMods),
|
||||
if
|
||||
IsC1 -> rmm(C1);
|
||||
IsC2 -> rmm(C2);
|
||||
true -> error({no_such_test, TestName})
|
||||
end.
|
||||
|
||||
|
||||
rmm(X) -> run_mod_main(X).
|
||||
|
||||
% KnownTests = test_mods(),
|
||||
% TestMods = ensure_all_known([], List, KnownTests),
|
||||
% lists:foreach(fun run_mod_main/1, TestMods).
|
||||
|
||||
|
||||
%ensure_all_known(Acc, [], _) ->
|
||||
% lists:sort(Acc);
|
||||
%ensure_all_known(Acc, [T | Ts], Knowns) ->
|
||||
% case lists:member(T, Knowns) of
|
||||
%
|
||||
% end.
|
||||
|
||||
|
||||
test_mods() ->
|
||||
known_modules_with_prefix("gsc_test").
|
||||
|
||||
known_modules_with_prefix(Pfx) ->
|
||||
ModsZipBeamsZipLoaded = code:all_available(),
|
||||
kmp(Pfx, ModsZipBeamsZipLoaded, []).
|
||||
|
||||
kmp(_Pfx, [], Acc) ->
|
||||
lists:sort(Acc);
|
||||
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
|
||||
case lists:prefix(Pfx, ModStr) of
|
||||
false -> kmp(Pfx, Rest, Acc);
|
||||
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
|
||||
end.
|
||||
|
||||
run_mod_main(Mod) ->
|
||||
io:format("========================================\n"
|
||||
"~p:main()\n"
|
||||
"========================================\n",
|
||||
[Mod]),
|
||||
try
|
||||
Mod:main()
|
||||
catch
|
||||
Err:ErrType:Trace ->
|
||||
io:format("~p: ~p~n", [Err, ErrType]),
|
||||
io:format("Trace:~n~p~n", [Trace])
|
||||
end.
|
||||
|
||||
do_tlist() ->
|
||||
lists:foreach(
|
||||
fun(ModName) ->
|
||||
io:format("~s~n", [ModName])
|
||||
end,
|
||||
test_mods()
|
||||
).
|
||||
|
||||
|
||||
-spec do_eshell() -> ok.
|
||||
% @doc start an erlang shell
|
||||
@@ -196,16 +124,11 @@ do_eshell() ->
|
||||
{error, Reason} -> error(Reason)
|
||||
end.
|
||||
|
||||
tokenizers_agree(File) ->
|
||||
gso_tokens(File) =:= so_tokens(File).
|
||||
|
||||
|
||||
do_tokens(FilePath) ->
|
||||
[io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)].
|
||||
|
||||
do_so_tokens(FilePath) ->
|
||||
[io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)].
|
||||
|
||||
do_gso_tokens(FilePath) ->
|
||||
[io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)].
|
||||
|
||||
@@ -219,12 +142,6 @@ do_rmm(FilePath) ->
|
||||
end.
|
||||
|
||||
|
||||
so_tokens(FilePath) ->
|
||||
{ok, FileBytes} = file:read_file(FilePath),
|
||||
FileStr = unicode:characters_to_nfc_list(FileBytes),
|
||||
{ok, Tokens} = so_scan:scan(FileStr),
|
||||
Tokens.
|
||||
|
||||
gso_tokens(FilePath) ->
|
||||
{ok, FileBytes} = file:read_file(FilePath),
|
||||
FileStr = unicode:characters_to_nfc_list(FileBytes),
|
||||
@@ -263,14 +180,26 @@ colorize_tokens(_, [], Acc) ->
|
||||
rotate([A | Rest]) ->
|
||||
{A, Rest ++ [A]}.
|
||||
|
||||
colorize_token_str(Color, #tk{str = Str}) ->
|
||||
{Pfx, Sfx} = color_fixes(Color),
|
||||
colorize_token_str(Color, T = #tk{str = Str}) ->
|
||||
SN =
|
||||
case T#tk.shape of
|
||||
bcom -> noise;
|
||||
lcom -> noise;
|
||||
ws -> noise;
|
||||
_ -> signal
|
||||
end,
|
||||
{Pfx, Sfx} = color_fixes(SN, Color),
|
||||
[Pfx, Str, Sfx].
|
||||
|
||||
color_fixes(red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
|
||||
color_fixes(green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
|
||||
color_fixes(yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
|
||||
color_fixes(blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
|
||||
color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
|
||||
color_fixes(cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
|
||||
|
||||
% dim noisy tokens
|
||||
%color_fixes(noise, Color) ->
|
||||
% {P, S} = color_fixes(signal, Color),
|
||||
% {[?ANSI_DIM, P], [S, ?ANSI_UNDIM]};
|
||||
color_fixes(_, red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
|
||||
color_fixes(_, green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
|
||||
color_fixes(_, yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
|
||||
color_fixes(_, blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
|
||||
color_fixes(_, magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
|
||||
color_fixes(_, cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
|
||||
|
||||
@@ -1,12 +1,5 @@
|
||||
-module(gsc_ntree).
|
||||
|
||||
-export_type([
|
||||
ntree/2, ntree/0,
|
||||
nforest/2, nforest/0,
|
||||
nt/2, nt/0,
|
||||
nf/2, nf/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
nstem/2, meta/1, kids/1,
|
||||
flatten_tree/1, flatten_forest/1
|
||||
@@ -15,37 +8,6 @@
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
%%=====================================================
|
||||
%% API: types
|
||||
%%=====================================================
|
||||
|
||||
% @doc stem record
|
||||
-record(ns, {meta :: any(),
|
||||
kids :: list(any())}).
|
||||
|
||||
% @doc `ntree(S, L)' is a "node tree" (meaning stems
|
||||
% have values and children)
|
||||
%
|
||||
% for the purposes of the compiler, the key observation
|
||||
% is that a flat list of tokens is already a forest
|
||||
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
|
||||
| L.
|
||||
|
||||
% @doc forest is just a list of trees
|
||||
-type nforest(S, L) :: [ntree(S, L)].
|
||||
|
||||
|
||||
% aliases
|
||||
|
||||
-type nt(S, L) :: ntree(S, L).
|
||||
-type nf(S, L) :: nforest(S, L).
|
||||
|
||||
-type ntree() :: ntree(any(), any()).
|
||||
-type nforest() :: [ntree()].
|
||||
|
||||
-type nt() :: ntree().
|
||||
-type nf() :: nforest().
|
||||
|
||||
|
||||
%%=====================================================
|
||||
%% API: functions
|
||||
@@ -91,4 +53,3 @@ ft(Leaf) -> [Leaf].
|
||||
|
||||
ff(F) ->
|
||||
[ft(T) || T <- F].
|
||||
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
% signal = non-noisy tokens
|
||||
-module(gsc_signal).
|
||||
|
||||
-export([
|
||||
from_tokens/1,
|
||||
is_block/1,
|
||||
gulp_block_items/1,
|
||||
block_to_items/1,
|
||||
take_block_item/1
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
-spec from_tokens(Tokens) -> Signal when
|
||||
Tokens :: [tk()],
|
||||
Signal :: [tk()].
|
||||
% @doc filter out comments/whitespace
|
||||
|
||||
from_tokens(Tokens) ->
|
||||
gsc_tokens:filter_significant(Tokens).
|
||||
|
||||
|
||||
|
||||
-spec is_block(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: boolean().
|
||||
|
||||
is_block([]) ->
|
||||
true;
|
||||
is_block([#tk{pos = {_, BCol}} | Rest]) ->
|
||||
InBlock =
|
||||
fun(#tk{pos = {_, TCol}}) ->
|
||||
BCol =< TCol
|
||||
end,
|
||||
lists:all(InBlock, Rest).
|
||||
|
||||
|
||||
|
||||
-spec gulp_block_items(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: {slurp, Items, NewSignal}
|
||||
| {error, any()},
|
||||
Items :: [Signal],
|
||||
NewSignal :: Signal.
|
||||
|
||||
gulp_block_items(S) ->
|
||||
case is_block(S) of
|
||||
true -> {gulp, block_to_items(S)};
|
||||
false -> find_badness(S)
|
||||
end.
|
||||
|
||||
find_badness([#tk{pos = {_, StartCol}} = StartTk | Rest]) ->
|
||||
find_badness(StartCol, StartTk, Rest).
|
||||
|
||||
find_badness(StartCol, StartTk, [#tk{pos = {_, TkCol}} = Tk | Rest]) ->
|
||||
Bad = TkCol < StartCol,
|
||||
case Bad of
|
||||
false -> find_badness(StartCol, StartTk, Rest);
|
||||
true -> {error, {bad_block, [{start_col, StartCol},
|
||||
{end_col, TkCol},
|
||||
{start_tk, StartTk},
|
||||
{end_tk, Tk}]}}
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec block_to_items(Signal) -> BlockItems when
|
||||
Signal :: [tk()],
|
||||
BlockItems :: [Signal].
|
||||
% @doc
|
||||
% naive algorithm, so doesn't ensure all block items
|
||||
% are same indent level
|
||||
%
|
||||
% Input:
|
||||
% foo = ...
|
||||
% bar = ...
|
||||
% baz = ...
|
||||
%
|
||||
% Output:
|
||||
% [foo = ...,
|
||||
% bar = ...,
|
||||
% baz = ...]
|
||||
block_to_items([]) ->
|
||||
[];
|
||||
block_to_items(S) ->
|
||||
b2is([], S).
|
||||
|
||||
b2is(Acc, []) ->
|
||||
lists:reverse(Acc);
|
||||
b2is(Acc, S) ->
|
||||
{Item, S1} = take_block_item(S),
|
||||
b2is([Item | Acc], S1).
|
||||
|
||||
|
||||
|
||||
-spec take_block_item(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: {Item, NewSignal},
|
||||
Item :: Signal,
|
||||
NewSignal :: Signal.
|
||||
|
||||
take_block_item([]) ->
|
||||
{[], []};
|
||||
take_block_item([#tk{pos = {_, ICol}} = T0 | S0]) ->
|
||||
InItem =
|
||||
fun(#tk{pos = {_, TCol}}) ->
|
||||
ICol < TCol
|
||||
end,
|
||||
{S0_II, S1} = lists:splitwith(InItem, S0),
|
||||
{[T0 | S0_II], S1}.
|
||||
@@ -88,7 +88,7 @@
|
||||
-export([
|
||||
smr_sf_ws/0,
|
||||
smr_sf_op/0,
|
||||
smr_sf_punct/0,
|
||||
smr_sf_sep/0,
|
||||
smr_sf_id/0,
|
||||
smr_sf_con/0,
|
||||
smr_sf_qid/0,
|
||||
@@ -175,7 +175,7 @@ match(Matcher, Source) ->
|
||||
% -export([
|
||||
% smr_sf_ws/0,
|
||||
% smr_sf_op/0,
|
||||
% smr_sf_punct/0,
|
||||
% smr_sf_sep/0,
|
||||
% smr_sf_id/0,
|
||||
% smr_sf_con/0,
|
||||
% smr_sf_qid/0,
|
||||
@@ -224,7 +224,7 @@ smr_sf_op() ->
|
||||
|
||||
|
||||
|
||||
-spec smr_sf_punct() -> string_matcher().
|
||||
-spec smr_sf_sep() -> string_matcher().
|
||||
% @doc
|
||||
% String matcher for parens/braces
|
||||
%
|
||||
@@ -233,7 +233,7 @@ smr_sf_op() ->
|
||||
% , {"\\.\\.|[,.;()\\[\\]{}]", symbol()}
|
||||
% @end
|
||||
|
||||
smr_sf_punct() ->
|
||||
smr_sf_sep() ->
|
||||
M_DotDotOp = smr_string(".."),
|
||||
M_PunctChars = smr_oneofchars(",.;()[]{}"),
|
||||
smr_union([M_DotDotOp, M_PunctChars]).
|
||||
|
||||
+6
-6
@@ -254,7 +254,7 @@ token_shapes_parse_order() ->
|
||||
lists:flatten([
|
||||
% comments and whitespace
|
||||
lcom, bcom, ws,
|
||||
punct,
|
||||
sep,
|
||||
% literals
|
||||
char, string, int16, int10, bytes,
|
||||
ak, ct, sg,
|
||||
@@ -264,7 +264,7 @@ token_shapes_parse_order() ->
|
||||
% keywords need to be parsed ahead of ids
|
||||
kwd, id,
|
||||
con,
|
||||
% ops [=, =>, >>], punctuation (parens/braces)
|
||||
% ops [=, =>, >>], sepuation (parens/braces)
|
||||
op
|
||||
]).
|
||||
|
||||
@@ -597,7 +597,7 @@ slurp_token_of_shape(ws, Pos, SrcStr) ->
|
||||
str = WS},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct
|
||||
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, sep
|
||||
%
|
||||
% all the kwds are valid ids, so we match as an id and then check if it's a
|
||||
% kwd
|
||||
@@ -629,10 +629,10 @@ slurp_token_of_shape(op, Pos, SrcStr) ->
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(punct, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of
|
||||
slurp_token_of_shape(sep, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_sep(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = punct, pos = Pos, str = Str},
|
||||
Token = #tk{shape = sep, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
|
||||
+3
-3
@@ -299,7 +299,7 @@ pass_types() ->
|
||||
[lcom, % ak_AB// breaks out of id
|
||||
bcom, % ak_AB/* breaks out of id
|
||||
ws, % ak_AB\t breaks out of id
|
||||
punct, % ak_AB{ breaks out of id
|
||||
sep, % ak_AB{ breaks out of id
|
||||
string, % ak_AB" breaks out of id
|
||||
bytes, % ak_AB# breaks out of id
|
||||
ak,ct,sg, % ak_ABak [akctsg] all in base58 alphabet
|
||||
@@ -335,13 +335,13 @@ to_so_token(#tk{shape = SfTokenType,
|
||||
%
|
||||
% {contract, {420, 69}}
|
||||
%-----------------------
|
||||
% kwds ops and punct are all collapsed by
|
||||
% kwds ops and sep are all collapsed by
|
||||
% so_scan:scan down to eg {'contract', {420, 69}}
|
||||
% where {420, 69} is the source location
|
||||
% these are three different parsers
|
||||
Sym when Sym =:= kwd;
|
||||
Sym =:= op;
|
||||
Sym =:= punct ->
|
||||
Sym =:= sep ->
|
||||
Symbol = list_to_atom(SfTokenStr),
|
||||
{true, {Symbol, Pos}};
|
||||
%------------------------------------
|
||||
|
||||
@@ -0,0 +1,273 @@
|
||||
% @doc experiment centering around the file syntax node using ntree approach
|
||||
-module(gsc_test_file).
|
||||
|
||||
-export([
|
||||
main/0
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
-record(ct,
|
||||
{payable = none :: none | false | {true, tk()},
|
||||
main = none :: none | false | {true, tk()},
|
||||
contract = none :: none | tk(),
|
||||
con = none :: none | tk(),
|
||||
impls = none :: none | [tk()],
|
||||
eq = none :: none | tk()}).
|
||||
|
||||
-type meta() :: #ct{}.
|
||||
|
||||
-record(decl_type,
|
||||
{type = none :: none | tk(),
|
||||
id = none :: none | tk(),
|
||||
params = none :: none | [tk()],
|
||||
eq = none :: none | tk()}).
|
||||
|
||||
-type decl_meta() :: #decl_type{}.
|
||||
|
||||
|
||||
-type ast_meta() :: file
|
||||
| meta()
|
||||
| decl_meta()
|
||||
| nyi
|
||||
| {nyi, any()}
|
||||
.
|
||||
|
||||
|
||||
-type target()
|
||||
:: ct
|
||||
| iface
|
||||
| ns
|
||||
| pragma
|
||||
| include
|
||||
| using
|
||||
.
|
||||
|
||||
-type s2t_target()
|
||||
:: file
|
||||
| top_decl
|
||||
| target()
|
||||
| nyi
|
||||
| {nyi, any()}
|
||||
.
|
||||
|
||||
-type s2f_target()
|
||||
:: {block_of, s2t_target()}
|
||||
.
|
||||
|
||||
|
||||
-type ast() :: ntree(ast_meta(), tk()).
|
||||
-type asf() :: nforest(ast_meta(), tk()).
|
||||
|
||||
main() ->
|
||||
HelloN = "hello.aes",
|
||||
HelloP = ts_utils:ct_file_abspath(HelloN),
|
||||
{ok, HelloS} = file:read_file(HelloP),
|
||||
S0 = gsc:unsafe_signal_from_file(HelloP),
|
||||
T1 = s2t(file, S0),
|
||||
io:format("hello.aes:~n", []),
|
||||
io:format("```~n", []),
|
||||
io:format("~ts", [HelloS]),
|
||||
io:format("```~n~n", []),
|
||||
io:format("AST: ~tp~n", [T1]),
|
||||
ok.
|
||||
|
||||
% // Hello World Contract
|
||||
% // Copyright (c) 2025 QPQ AG
|
||||
%
|
||||
% contract Hello =
|
||||
% type state = unit
|
||||
% entrypoint init(): state =
|
||||
% ()
|
||||
%
|
||||
% entrypoint hello(): string =
|
||||
% "hello, world"
|
||||
|
||||
-spec s2t(ParseTarget, Signal) -> AST when
|
||||
ParseTarget :: file,
|
||||
Signal :: [tk()],
|
||||
AST :: ast().
|
||||
|
||||
% File ::= Block(TopDecl)
|
||||
s2t(file, Signal) ->
|
||||
case Signal of
|
||||
[] -> error(empty_file);
|
||||
_ -> {ns, file, s2f({block_of, top_decl}, Signal)}
|
||||
end;
|
||||
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||
% | 'namespace' Con '=' Block(Decl)
|
||||
% | '@compiler' PragmaOp Version
|
||||
% | 'include' String
|
||||
% | Using
|
||||
s2t(top_decl, Signal) ->
|
||||
NewTarget =
|
||||
case gsc_tokens:strings(3, Signal) of
|
||||
["payable", "contract", "interface"] -> iface;
|
||||
["contract", "interface" | _] -> iface;
|
||||
["payable", "main", "contract"] -> ct;
|
||||
["payable", "contract" | _] -> ct;
|
||||
["contract" | _] -> ct;
|
||||
["namespace" | _] -> namespace;
|
||||
["@compiler" | _] -> pragma;
|
||||
["include" | _] -> include;
|
||||
["using" | _] -> using
|
||||
end,
|
||||
s2t(NewTarget, Signal);
|
||||
% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
s2t(ct, S0) ->
|
||||
{slurp, CtMeta, S1} = s2s_slurp_meta(#ct{}, S0),
|
||||
{ns, CtMeta, s2f({block_of, decl}, S1)};
|
||||
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||
% | 'let' Id [':' Type] '=' Expr
|
||||
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
% | Using
|
||||
s2t(decl, S0) ->
|
||||
NewTarget =
|
||||
case gsc_tokens:strings(3, S0) of
|
||||
["type" | _] -> decl_type;
|
||||
["record" | _] -> decl_record;
|
||||
["datatype" | _] -> decl_datatype;
|
||||
["let" | _] -> decl_let;
|
||||
Pfx3 ->
|
||||
IsEp = lists:member("entrypoint", Pfx3),
|
||||
IsFn = lists:member("function", Pfx3),
|
||||
if
|
||||
IsEp -> decl_entrypoint;
|
||||
IsFn -> decl_function;
|
||||
true -> error({bad_decl, S0})
|
||||
end
|
||||
end,
|
||||
s2t(NewTarget, S0);
|
||||
% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
s2t(decl_type, S0) ->
|
||||
{slurp, Meta, S1} = s2s_slurp_meta(#decl_type{}, S0),
|
||||
{ns, Meta, s2t(type, S1)};
|
||||
s2t(nyi, Signal) ->
|
||||
{ns, nyi, Signal};
|
||||
s2t(NYI = {nyi, _}, Signal) ->
|
||||
{ns, NYI, Signal};
|
||||
s2t(NYI, Signal) ->
|
||||
{ns, {nyi, NYI}, Signal}.
|
||||
|
||||
|
||||
|
||||
-spec s2f(ForestTarget, Signal) -> Forest when
|
||||
ForestTarget :: s2f_target(),
|
||||
Signal :: [tk()],
|
||||
Forest :: asf().
|
||||
|
||||
s2f({block_of, TreeTarget}, S0) ->
|
||||
{gulp, Items} = gsc_signal:gulp_block_items(S0),
|
||||
[s2t(TreeTarget, I) || I <- Items].
|
||||
|
||||
|
||||
-spec s2s_slurp_meta(InitMeta, Signal) -> Result when
|
||||
InitMeta :: Meta,
|
||||
Signal :: [tk()],
|
||||
Result :: {slurp, Meta, NewSignal},
|
||||
Meta :: ast_meta(),
|
||||
NewSignal :: Signal.
|
||||
|
||||
s2s_slurp_meta(M = #ct{}, S) ->
|
||||
s2s_sm_ct(M, S);
|
||||
s2s_slurp_meta(M = #decl_type{}, S) ->
|
||||
s2s_sm_decl_type(M, S);
|
||||
s2s_slurp_meta(M, S) ->
|
||||
error({s2s_slurp_meta, M, S}).
|
||||
|
||||
|
||||
s2s_sm_ct(Ct = #ct{payable = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "payable"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{payable = {true, T0}}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{payable = false}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{main = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "main"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{main = {true, T0}}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{main = false}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{contract = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "contract"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{contract = T0}, S1);
|
||||
_ ->
|
||||
error({no_kwd_contract, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{con = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{shape = con} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{con = T0}, S1);
|
||||
_ ->
|
||||
error({no_contract_name, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{impls = none}, S0) ->
|
||||
case gsc_tokens:strings(1, S0) of
|
||||
[":"] ->
|
||||
{slurp, Impls, S1} = s2f_slurp_impls(S0),
|
||||
s2s_sm_ct(Ct#ct{impls = Impls}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{impls = []}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{eq = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "="} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{eq = T0}, S1);
|
||||
_ ->
|
||||
error({no_equal_sign, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct, S0) ->
|
||||
{slurp, Ct, S0}.
|
||||
|
||||
s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) ->
|
||||
s2f_slurp_impls([I0], S0).
|
||||
|
||||
s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) ->
|
||||
s2f_slurp_impls([I0 | Stk], S0);
|
||||
s2f_slurp_impls(Stk, S0) ->
|
||||
{slurp, lists:reverse(Stk), S0}.
|
||||
|
||||
|
||||
%-record(decl_type,
|
||||
% {type = none :: none | tk(),
|
||||
% id = none :: none | tk(),
|
||||
% params = none :: none | [tk()],
|
||||
% eq = none :: none | tk()}).
|
||||
|
||||
s2s_sm_decl_type(M = #decl_type{type = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "type"} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{type = T0}, S1);
|
||||
_ ->
|
||||
error({no_kwd_type, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{id = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{shape = id} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{id = T0}, S1);
|
||||
_ ->
|
||||
error({no_type_id, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{params = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "("} = _T0 | _] ->
|
||||
error({fixme, parens_bad});
|
||||
_ ->
|
||||
s2s_sm_decl_type(M#decl_type{params = []}, S0)
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{eq = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "="} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{eq = T0}, S1);
|
||||
_ ->
|
||||
error({no_equal_sign, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M, S0) ->
|
||||
{slurp, M, S0}.
|
||||
@@ -6,29 +6,18 @@
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
% records copypasta for now
|
||||
-record(ns, {meta :: any(), kids :: list(any())}).
|
||||
|
||||
-type ntree(X, Y) :: gsc_ntree:ntree(X, Y).
|
||||
-type nforest(X, Y) :: gsc_nforest:nforest(X, Y).
|
||||
|
||||
-type nt(X, Y) :: gsc_ntree:ntree(X, Y).
|
||||
-type nf(X, Y) :: gsc_nforest:nforest(X, Y).
|
||||
|
||||
|
||||
% just parsing type expressions right now, so only need
|
||||
% to worry about round parens
|
||||
%
|
||||
% none is to indicate general-purpose grouping, for
|
||||
% e.g. LHS/RHS of an op
|
||||
-type syntax_meta()
|
||||
:: none
|
||||
| {op, tk()}
|
||||
:: {op, tk()}
|
||||
| op_arg
|
||||
| {parens, Open :: tk(), Close :: tk()}
|
||||
.
|
||||
|
||||
-type ast() :: ntree(StemMeta :: syntax_meta(),
|
||||
LeafType :: tk()).
|
||||
%-type ast() :: ntree(syntax_meta(), tk()).
|
||||
-type asf() :: nforest(syntax_meta(), tk()).
|
||||
-type asts() :: asf().
|
||||
|
||||
@@ -70,7 +59,7 @@ parse(Signal) ->
|
||||
F1 = f2f_parens(F0),
|
||||
F2 = f2f_op("=>", F1),
|
||||
F3 = f2f_op("*", F2),
|
||||
Result = F2,
|
||||
Result = F3,
|
||||
Result.
|
||||
|
||||
|
||||
@@ -85,8 +74,8 @@ f2f_op(_opstr, Stk, []) ->
|
||||
f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) ->
|
||||
Lhf = lists:reverse(LhsStk),
|
||||
Rhf = f2f_op(OpStr, Rest),
|
||||
Lht = #ns{meta = none, kids = Lhf},
|
||||
Rht = #ns{meta = none, kids = Rhf},
|
||||
Lht = #ns{meta = op_arg, kids = Lhf},
|
||||
Rht = #ns{meta = op_arg, kids = Rhf},
|
||||
ResultT = #ns{meta = {op, OpTk},
|
||||
kids = [Lht, Rht]},
|
||||
ResultF = [ResultT],
|
||||
@@ -11,13 +11,14 @@
|
||||
main() ->
|
||||
%io:format("~p~n", [div_files()]),
|
||||
%io:format("MAINNNNN!~n", []),
|
||||
eunit:test(?MODULE, [verbose]).
|
||||
%eunit:test(?MODULE).
|
||||
%eunit:test(?MODULE, [verbose]),
|
||||
eunit:test(?MODULE),
|
||||
ok.
|
||||
|
||||
|
||||
% directory containing the tests for the tokenizer
|
||||
ct_dir() ->
|
||||
zx_daemon:get_home() ++ "/ct".
|
||||
zx_daemon:get_home() ++ "/test/ct".
|
||||
|
||||
agreement_tests_dir() ->
|
||||
ct_dir() ++ "/tokenizers_agree".
|
||||
@@ -0,0 +1,64 @@
|
||||
% dynamic hacky module that loads all the tests
|
||||
-module(gsc_tests).
|
||||
|
||||
-export([
|
||||
main/0,
|
||||
cli_args/1
|
||||
]).
|
||||
|
||||
|
||||
|
||||
main() ->
|
||||
cli_args([]),
|
||||
ok.
|
||||
|
||||
cli_args(TestNames) ->
|
||||
% load ts_utils
|
||||
TsUtils = zx:get_home() ++ "/test/ts_utils.erl",
|
||||
case compile:file(TsUtils) of
|
||||
{ok, ts_utils} -> ok;
|
||||
Error -> error(Error)
|
||||
end,
|
||||
% this loads the test deps and then cleans up any
|
||||
% beam files afterwards
|
||||
ts_utils:tidily(fun() -> do_gsc_test(TestNames) end).
|
||||
|
||||
|
||||
do_gsc_test(["-h" | _]) -> do_help();
|
||||
do_gsc_test(["--help" | _]) -> do_help();
|
||||
do_gsc_test(["-l" | _]) -> do_list();
|
||||
do_gsc_test(["--list" | _]) -> do_list();
|
||||
do_gsc_test(["-a" | _]) -> do_all();
|
||||
do_gsc_test(["--all" | _]) -> do_all();
|
||||
do_gsc_test(["so_tokens", X]) -> do_so_tokens(X);
|
||||
do_gsc_test(["so", "tokens", X]) -> do_so_tokens(X);
|
||||
do_gsc_test(["tokenizers_agree", X]) ->
|
||||
do_tokenizers_agree(X);
|
||||
do_gsc_test([TestName]) ->
|
||||
ts_utils:run_test_by_name(TestName);
|
||||
do_gsc_test(_) ->
|
||||
do_help().
|
||||
|
||||
|
||||
|
||||
do_help() ->
|
||||
io:format("go help yourself~n").
|
||||
|
||||
do_list() ->
|
||||
Names = ts_utils:runnable_test_names(),
|
||||
[io:format("~p~n", [N]) || N <- Names].
|
||||
|
||||
|
||||
do_all() ->
|
||||
{Gd, _} = ts_utils:runnable_test_mods(),
|
||||
[begin ts_utils:rmm(G), io:format("~n") end || G <- Gd].
|
||||
|
||||
|
||||
do_so_tokens(FilePath) ->
|
||||
[io:format("~p~n", [Tk]) || Tk <- ts_utils:so_tokens(FilePath)].
|
||||
|
||||
|
||||
|
||||
do_tokenizers_agree(RelPath) ->
|
||||
Result = ts_utils:tokenizers_agree(RelPath),
|
||||
io:format("~tp~n", [Result]).
|
||||
@@ -0,0 +1,356 @@
|
||||
% test suite utilities
|
||||
-module(ts_utils).
|
||||
|
||||
-export([
|
||||
tokenizers_agree/1,
|
||||
absify/1,
|
||||
so_tokens/1,
|
||||
load_test_deps/0,
|
||||
test_deps/0,
|
||||
load_dep/1,
|
||||
clean_after/1, tidily/1,
|
||||
delete_beams/0, tidy/0,
|
||||
run_test_by_name/1,
|
||||
rmm/1, run_mod_main/1,
|
||||
runnable_test_names/0,
|
||||
runnable_test_mods/0,
|
||||
load_test_erls/0,
|
||||
abspath_to_name/1,
|
||||
ls_test_erls/0,
|
||||
ls_test_beams/0,
|
||||
is_erl/1,
|
||||
is_beam/1,
|
||||
ls_test/0,
|
||||
test_dir/0,
|
||||
ct_dir/0,
|
||||
ct_file/1, ct_file_abspath/1, ct_abspath/1
|
||||
]).
|
||||
|
||||
tokenizers_agree(Relpath) ->
|
||||
FilePath = absify(Relpath),
|
||||
% extracting data to be tested
|
||||
% i hate this so much but lazy and this is test code so who really cares.
|
||||
SoTokens = so_tokens(FilePath),
|
||||
SfTokens = gsc:gso_tokens_from_file(FilePath),
|
||||
case {SoTokens, SfTokens} of
|
||||
{{ok, So}, {ok, Sf}} -> So =:= Sf;
|
||||
{{error, _}, {error, _}} -> true;
|
||||
{{ok, _}, {error, _}} -> false;
|
||||
{{error, _}, {ok, _}} -> false
|
||||
end.
|
||||
|
||||
|
||||
absify(RelPath) ->
|
||||
filename:absname(RelPath).
|
||||
|
||||
|
||||
so_tokens(FilePath) ->
|
||||
{ok, FileBytes} = file:read_file(FilePath),
|
||||
FileStr = binary_to_list(FileBytes),
|
||||
so_scan:scan(FileStr).
|
||||
|
||||
|
||||
load_test_deps() ->
|
||||
lists:foreach(fun load_dep/1, test_deps()).
|
||||
|
||||
test_deps() ->
|
||||
[{"otpr", "sophia", {9, 0, 0}}].
|
||||
|
||||
|
||||
load_dep(D) ->
|
||||
{ok, Cwd} = file:get_cwd(),
|
||||
% apparently zx changes the working dir when doing
|
||||
% all this stuff so beam files get dropped in
|
||||
% random dep dir
|
||||
ok =
|
||||
case zx_lib:installed(D) of
|
||||
false ->
|
||||
Id = zx_daemon:fetch(D),
|
||||
ok = zx_daemon:wait_result(Id),
|
||||
ok;
|
||||
true ->
|
||||
ok
|
||||
end,
|
||||
Result = zx_daemon:build(D),
|
||||
ok = file:set_cwd(Cwd),
|
||||
Result.
|
||||
|
||||
|
||||
-spec clean_after(Fun) -> Result when
|
||||
Fun :: fun(() -> Result),
|
||||
Result :: any().
|
||||
|
||||
% @doc
|
||||
% run Fun(), delete gsc/test/*.beam afterward even if
|
||||
% Fun() errors
|
||||
% @end
|
||||
clean_after(Fun) ->
|
||||
try
|
||||
load_test_deps(),
|
||||
Fun()
|
||||
after
|
||||
delete_beams()
|
||||
end.
|
||||
|
||||
|
||||
% @doc alias for `clean_after/1'
|
||||
tidily(Fun) ->
|
||||
clean_after(Fun).
|
||||
|
||||
|
||||
|
||||
-spec delete_beams() -> ok.
|
||||
|
||||
delete_beams() ->
|
||||
Beams = ls_test_beams(),
|
||||
%io:format("Deleting: ~tp~n", [Beams]),
|
||||
lists:foreach(fun file:delete/1, Beams).
|
||||
|
||||
tidy() ->
|
||||
delete_beams().
|
||||
|
||||
|
||||
-spec run_test_by_name(Name) -> Result when
|
||||
Name :: string(),
|
||||
Result :: ok.
|
||||
|
||||
run_test_by_name(Name) when is_list(Name) ->
|
||||
case find_test_by_name(Name) of
|
||||
{good, Mod} ->
|
||||
rmm(Mod);
|
||||
{bad, Mod} ->
|
||||
io:format("FATAL: Module ~tp didn't compile~n", [Mod]),
|
||||
ok;
|
||||
not_found ->
|
||||
io:format("FATAL: test not found: ~p~n", [Name]),
|
||||
ok
|
||||
end.
|
||||
|
||||
|
||||
run_mod_main(Mod) ->
|
||||
rmm(Mod).
|
||||
|
||||
rmm(Mod) ->
|
||||
try
|
||||
io:format("=================================================~n"),
|
||||
io:format("~p:main()~n", [Mod]),
|
||||
io:format("=================================================~n"),
|
||||
Mod:main()
|
||||
catch
|
||||
Cat:Err:Tr ->
|
||||
io:format("~tp:main(): ERROR~n", [Mod]),
|
||||
io:format("~tp: ~tp~n", [Cat, Err]),
|
||||
io:format("Trace: ~tp~n", [Tr]),
|
||||
ok
|
||||
end.
|
||||
|
||||
|
||||
|
||||
find_test_by_name(Name) ->
|
||||
C1 = list_to_atom(Name),
|
||||
C2 = list_to_atom("gsc_test_" ++ Name),
|
||||
{Gd, Bd} = runnable_test_mods(),
|
||||
C1Gd = lists:member(C1, Gd),
|
||||
C2Gd = lists:member(C2, Gd),
|
||||
C1Bd = lists:member(C1, Gd),
|
||||
C2Bd = lists:member(C2, Bd),
|
||||
if
|
||||
C1Gd -> {good, C1};
|
||||
C2Gd -> {good, C2};
|
||||
C1Bd -> {bad, C1};
|
||||
C2Bd -> {bad, C2};
|
||||
true -> not_found
|
||||
end.
|
||||
|
||||
|
||||
-spec runnable_test_names() -> Result when
|
||||
Result :: [{string(), atom()}].
|
||||
|
||||
runnable_test_names() ->
|
||||
{Gd, Bd} = runnable_test_mods(),
|
||||
rtns([], lists:sort(Gd ++ Bd)).
|
||||
|
||||
rtns(Acc, []) ->
|
||||
lists:reverse(Acc);
|
||||
rtns(Acc, [TestMod | Rest]) ->
|
||||
TestName = test_mod_name(TestMod),
|
||||
rtns([{TestName, TestMod} | Acc], Rest).
|
||||
|
||||
test_mod_name(TestModAtom) ->
|
||||
"gsc_test_" ++ Name = atom_to_list(TestModAtom),
|
||||
Name.
|
||||
|
||||
|
||||
|
||||
-spec runnable_test_mods() -> Result when
|
||||
Result :: {Good, Bad},
|
||||
Good :: Mods,
|
||||
Bad :: Mods,
|
||||
Mods :: [atom()].
|
||||
|
||||
runnable_test_mods() ->
|
||||
{Ld, Bds} = load_test_erls(),
|
||||
Gd = lists:filter(fun is_runnable/1, Ld),
|
||||
Bd = lists:filter(fun is_runnable/1, Bds),
|
||||
{Gd, Bd}.
|
||||
|
||||
|
||||
|
||||
is_runnable(ModAtom) ->
|
||||
case atom_to_list(ModAtom) of
|
||||
"gsc_test_" ++ _ -> true;
|
||||
_ -> false
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec load_test_erls() -> {Loaded, Errs} when
|
||||
Loaded :: [atom()],
|
||||
Errs :: [atom()].
|
||||
|
||||
load_test_erls() ->
|
||||
ltes([], [], ls_test_erls()).
|
||||
|
||||
|
||||
ltes(Ld, Errs, []) ->
|
||||
{lists:reverse(Ld), lists:reverse(Errs)};
|
||||
ltes(Ld, Errs, [FP | Rest]) ->
|
||||
FN = abspath_to_name(FP),
|
||||
ModAtom = fp_to_mod_atom(FP),
|
||||
case compile:file(FP) of
|
||||
{ok, Mod} ->
|
||||
ltes([Mod | Ld], Errs, Rest);
|
||||
Err ->
|
||||
io:format("ERROR ~tp: ~tp~n", [FN, Err]),
|
||||
ltes(Ld, [ModAtom | Errs], Rest)
|
||||
end.
|
||||
|
||||
|
||||
fp_to_mod_atom(FP) ->
|
||||
FN = abspath_to_name(FP),
|
||||
[ModStr, "erl"] = string:split(FN, ".", trailing),
|
||||
list_to_atom(ModStr).
|
||||
|
||||
|
||||
|
||||
-spec abspath_to_name(FilePath) -> FileName when
|
||||
FilePath :: string(),
|
||||
FileName :: string().
|
||||
% @doc "/path/to/foo.bar" -> "foo.bar"
|
||||
|
||||
abspath_to_name(FP) ->
|
||||
lists:last(string:tokens(FP, "/")).
|
||||
|
||||
|
||||
|
||||
-spec ls_test_erls() -> AbsPaths when
|
||||
AbsPaths :: [string()].
|
||||
% @doc ["/path/to/gsc/test/foo.erl",
|
||||
% "/path/to/gsc/test/bar.erl",
|
||||
% "/path/to/gsc/test/baz.erl"]
|
||||
|
||||
ls_test_erls() ->
|
||||
lists:filter(fun is_erl/1, ls_test()).
|
||||
|
||||
|
||||
|
||||
-spec ls_test_beams() -> AbsPaths when
|
||||
AbsPaths :: [string()].
|
||||
|
||||
% important: beams get dropped in working dir
|
||||
ls_test_beams() ->
|
||||
lists:filter(fun is_beam/1, ls_pwd()).
|
||||
|
||||
|
||||
|
||||
-spec is_beam(AbsPath) -> IsBeam when
|
||||
AbsPath :: string(),
|
||||
IsBeam :: boolean().
|
||||
|
||||
% @private
|
||||
% "foo.beam" ~> true
|
||||
% _ ~> false
|
||||
is_beam(Filename) ->
|
||||
case filename:extension(Filename) of
|
||||
".beam" -> true;
|
||||
_ -> false
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec is_erl(AbsPath) -> IsErl when
|
||||
AbsPath :: string(),
|
||||
IsErl :: boolean().
|
||||
% @private
|
||||
% "foo.erl" ~> true
|
||||
% _ ~> false
|
||||
|
||||
is_erl(Filename) ->
|
||||
case filename:extension(Filename) of
|
||||
".erl" -> true;
|
||||
_ -> false
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec ls_test() -> Abspaths when
|
||||
Abspaths :: [string()].
|
||||
% @doc
|
||||
% Includes junk/irrelevant files:
|
||||
%
|
||||
% ["/path/to/gsc/test/foo.erl",
|
||||
% "/path/to/gsc/test/.foo.erl.swp",
|
||||
% "/path/to/gsc/test/bar.erl"]
|
||||
|
||||
ls_test() ->
|
||||
TD = test_dir(),
|
||||
{ok, Names} = file:list_dir(TD),
|
||||
lists:sort([TD ++ "/" ++ Name || Name <- Names]).
|
||||
|
||||
|
||||
ls_pwd() ->
|
||||
{ok, D} = file:get_cwd(),
|
||||
{ok, Ns} = file:list_dir(D),
|
||||
lists:sort([D ++ "/" ++ N || N <- Ns]).
|
||||
|
||||
|
||||
-spec test_dir() -> AbsPath when
|
||||
AbsPath :: string().
|
||||
% @doc "/path/to/gsc/test"
|
||||
|
||||
test_dir() ->
|
||||
zx_daemon:get_home() ++ "/test".
|
||||
|
||||
|
||||
|
||||
-spec ct_dir() -> AbsPath when
|
||||
AbsPath :: string().
|
||||
|
||||
% @doc "/path/to/gsc/test/ct"
|
||||
%
|
||||
% directory containing the tests for the tokenizer
|
||||
ct_dir() ->
|
||||
test_dir() ++ "/ct".
|
||||
|
||||
|
||||
|
||||
|
||||
-spec ct_file(Name) -> AbsPath when
|
||||
Name :: string(),
|
||||
AbsPath :: string().
|
||||
% @doc
|
||||
% "foo.aes" -> "/path/to/ct/foo.aes"
|
||||
|
||||
ct_file(Name) ->
|
||||
ct_dir() ++ "/" ++ Name.
|
||||
|
||||
|
||||
% @doc alias for `ct_file/1'
|
||||
%
|
||||
% "foo.aes" -> "/path/to/ct/foo.aes"
|
||||
ct_file_abspath(Name) -> ct_file(Name).
|
||||
|
||||
% @doc alias for `ct_file/1'
|
||||
%
|
||||
% "foo.aes" -> "/path/to/ct/foo.aes"
|
||||
ct_abspath(Name) -> ct_file(Name).
|
||||
@@ -1,15 +1,16 @@
|
||||
{name,"Gajumaru Sophia Compiler"}.
|
||||
{type,lib}.
|
||||
{type,cli}.
|
||||
{modules,[]}.
|
||||
{mod, "gsc_cli"}.
|
||||
{author,"Peter Harpending"}.
|
||||
{prefix,"gs"}.
|
||||
{prefix,"gsc"}.
|
||||
{desc,"Exploratory sophia compiler rewrite"}.
|
||||
{package_id,{"otpr","gsc",{0,1,0}}}.
|
||||
{deps,[]}.
|
||||
{key_name,none}.
|
||||
{a_email,"peterharpending@qpq.swiss"}.
|
||||
{c_email,"peterharpending@qpq.swiss"}.
|
||||
{copyright,"Peter Harpending"}.
|
||||
{copyright,"2026 QPQ AG"}.
|
||||
{file_exts,[]}.
|
||||
{license,"GPL-3.0-only"}.
|
||||
{repo_url,"https://git.qpq.swiss/QPQ-AG/gsc"}.
|
||||
|
||||
Reference in New Issue
Block a user