Compare commits

..

9 Commits

Author SHA1 Message Date
Peter Harpending 3f73cd4d85 some stuff 2026-06-08 16:35:27 -07:00
Peter Harpending 7c98182bcb stuff 2026-06-08 15:24:54 -07:00
pharpend ba70aace96 stuff 2026-06-08 14:55:47 -07:00
pharpend b88e51bb81 restructure done 2026-06-05 18:35:34 -07:00
pharpend ff066072e2 restructuring done I think 2026-06-05 18:33:18 -07:00
pharpend f79403b97f wip restructuring 2026-06-05 13:36:01 -07:00
Peter Harpending f04b7311f5 stuff 2026-06-05 00:58:53 -07:00
pharpend 10424927b1 stuff 2026-06-04 14:01:46 -07:00
pharpend fdb40dcb92 stuff 2026-06-04 11:42:48 -07:00
66 changed files with 1415 additions and 339 deletions
+8 -2
View File
@@ -3,13 +3,19 @@
- architecture needs more careful thought but only after something
works
- too fuzzy right now
- ytree with tokens
- dialyze
- makefile
- consolidate
# TODONE
- undo gs_ naming fuckery.. everything is `gsc_*`. it's just
needlessly confusing. for now let's name new things gsc_* and then
go back and undo the stupidity
# TONOTDO
# TODONT
- barf for outputs, slurp for inputs
- rename parser layers sequentially
# TODONE
-30
View File
@@ -1,30 +0,0 @@
# gsc = gajumaru sophia compiler
**This is _NOT_ the official Sophia compiler.** If you're looking for
that see https://git.qpq.swiss/QPQ-AG/sophia
This is an incomplete prototype rewrite of the legacy (official)
sophia compiler in straightforward Erlang. It grew out of my (Peter
Harpending) own efforts to document the language and its relationship
to FATE (the gajumaru virtual machine).
The goal for version 0.1 is to mirror the success behavior of the
legacy sophia compiler.
# Setup
```
git clone https://git.qpq.swiss/QPQ-AG/gsc.git
```
Add the following to `~/.bashrc` or wheremstever:
```
export PATH=$PATH:/path/to/gsc/bin
```
To test run
```
gsc --help
```
+14 -10
View File
@@ -2,10 +2,10 @@
# Bash harness for GSC command line tool
#
# based on similar harness for sfp
#
# gsc foo bar baz
# -> zx rundir \
# /home/pharpend/src/ioecs/gsc/cli \
# --libs=gsc:/home/pharpend/src/ioecs/gsc \
# -> zx rundir /home/pharpend/src/ioecs/gsc \
# foo bar baz
zx_exists() {
@@ -27,8 +27,12 @@ fi
FP_THIS_FILE=$(readlink -f "${BASH_SOURCE[0]}")
FP_THIS_DIR=$(dirname -- "${FP_THIS_FILE}")
FP_PRJ_DIR=$(dirname -- "${FP_THIS_DIR}")
FP_CLI_DIR="${FP_PRJ_DIR}/cli"
# simplified
zx rundir $FP_PRJ_DIR $@
# commented out legacy code in case need it later:
# # compute libs string
# LIB_PARTS=()
# for depname in "${LOCAL_DEPS[@]}"; do
@@ -37,9 +41,9 @@ FP_CLI_DIR="${FP_PRJ_DIR}/cli"
# IFS=,
# LOCAL_LIBS="${LIB_PARTS[*]}"
# unset IFS
LOCAL_LIBS="gsc:${FP_PRJ_DIR}"
#echo "zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@"
zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@
#
#LOCAL_LIBS=""
#
##echo "zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@"
#
##zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@
-15
View File
@@ -1,15 +0,0 @@
.eunit
deps
*.o
*.beam
*.plt
*.swp
erl_crash.dump
ebin/*.beam
doc/*.html
doc/*.css
doc/edoc-info
doc/erlang.png
rel/example_project
.concrete/DEV_MODE
.rebar
-1
View File
@@ -1 +0,0 @@
{"src/*", [debug_info, {i, "include/"}, {outdir, "ebin/"}]}.
-7
View File
@@ -1,7 +0,0 @@
{application,gsc_cli,
[{description,"GSC CLI and test suite"},
{registered,[]},
{included_applications,[]},
{applications,[stdlib,kernel]},
{vsn,"0.1.0"},
{modules,[gsc_cli]}]}.
-25
View File
@@ -1,25 +0,0 @@
% testing utilities
-module(ts_utils).
-export([
ct_dir/0,
ct_file/1
]).
-spec ct_dir() -> string().
% directory containing the tests for the tokenizer
ct_dir() ->
zx_daemon:get_home() ++ "/ct".
-spec ct_file(Name) -> AbsPath when
Name :: string(),
AbsPath :: string().
% @doc
% ct_file("foo.aes") -> "/path/to/ct/foo.aes"
ct_file(Name) ->
ct_dir() ++ "/" ++ Name.
-18
View File
@@ -1,18 +0,0 @@
{name,"GSC CLI"}.
{type,cli}.
{modules,[]}.
{mod,"gsc_cli"}.
{author,"Peter Harpending"}.
{prefix,none}.
{desc,"GSC CLI and test suite"}.
{package_id,{"otpr","gsc_cli",{0,1,0}}}.
{deps,[{"otpr","sophia",{9,0,0}},{"otpr","gsc",{0,1,0}}]}.
{key_name,none}.
{a_email,"peterharpending@qpq.swiss"}.
{c_email,"peterharpending@qpq.swiss"}.
{copyright,"Peter Harpending"}.
{file_exts,[]}.
{license,"GPL-3.0-only"}.
{repo_url,[]}.
{tags,[]}.
{ws_url,[]}.
+5 -1
View File
@@ -4,4 +4,8 @@
{included_applications,[]},
{applications,[stdlib,kernel]},
{vsn,"0.1.0"},
{modules,[gsc]}]}.
{modules,['ast-gulp',gsc_ast,gsc_bst,gsc_parse_type_expr,
gsc_token_chunks,ifarith,parse_type_expr,test_ntree,
unicode,gsc,gsc_cli,gsc_ntree,gsc_signal,gsc_strmatch,
gsc_tokens,gso_scan,gsc_test_file,gsc_test_ntree,
gsc_test_tokens,ts_utils]}]}.
+35 -3
View File
@@ -23,14 +23,14 @@
| qid % Foo.Bar.baz
| qcon % Foo.Bar.Baz
| tvar % 'foo, 'foo_bar, '_'foo'_'bar'''
% kwds ops and punct are all collapsed by
% kwds ops and sep are all collapsed by
% so_scan:scan down to eg {'contract', {420, 69}}
% where {420, 69} is the source location
% these are three different parsers
| kwd % contract, interface, payable, etc
| op % "=!<>+-*/:&|?~@^"
| punct % ".." | oneof(",.;()[]{}")
% kwds and punct are kind of the same thing
| sep % ".." | oneof(",.;()[]{}")
% kwds and sep are kind of the same thing
% but i'll keep them separate now for my own sanity. ok
% i guess op or symbol or whatever is fine.
%
@@ -143,3 +143,35 @@
| #gsc_err_nyi{}
| #gsc_err_empty_file{}
| #gsc_err{}.
%----------------------------
% tree type for parsing
%----------------------------
% @doc stem record
-record(ns, {meta :: any(),
kids :: list(any())}).
% @doc `ntree(S, L)' is a "node tree" (meaning stems
% have values and children)
%
% for the purposes of the compiler, the key observation
% is that a flat list of tokens is already a forest
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
| L.
% @doc forest is just a list of trees
-type nforest(S, L) :: [ntree(S, L)].
% aliases
-type nt(S, L) :: ntree(S, L).
-type nf(S, L) :: nforest(S, L).
-type ntree() :: ntree(any(), any()).
-type nforest() :: [ntree()].
-type nt() :: ntree().
-type nf() :: nforest().
View File
+1 -1
View File
@@ -131,7 +131,7 @@ slurp_ct_impls([#gsc_token{string = ":", type = op},
slurp_ct_impls(_) ->
reject.
slurp_ct_impls2([#gsc_token{string = ",", type = punct},
slurp_ct_impls2([#gsc_token{string = ",", type = sep},
#gsc_token{string = Con1, type = con}
| Rest],
Acc) ->
+84
View File
@@ -0,0 +1,84 @@
do(["test"]) ->
do_tests();
do(["test" | Tests]) ->
do_tests(Tests);
do(["tests"]) ->
do_tests();
do_runall_tests() ->
lists:foreach(fun run_mod_main/1, test_mods()).
do_tests(List) ->
lists:foreach(fun run_test/1, List).
% n
run_test(TestName) ->
% we have two candidate atoms
C1 = list_to_atom(TestName),
C2 = list_to_atom("gsc_test_" ++ TestName),
KnownMods = test_mods(),
IsC1 = lists:member(C1, KnownMods),
IsC2 = lists:member(C2, KnownMods),
if
IsC1 -> rmm(C1);
IsC2 -> rmm(C2);
true -> error({no_such_test, TestName})
end.
rmm(X) -> run_mod_main(X).
% KnownTests = test_mods(),
% TestMods = ensure_all_known([], List, KnownTests),
% lists:foreach(fun run_mod_main/1, TestMods).
%ensure_all_known(Acc, [], _) ->
% lists:sort(Acc);
%ensure_all_known(Acc, [T | Ts], Knowns) ->
% case lists:member(T, Knowns) of
%
% end.
test_mods() ->
known_modules_with_prefix("gsc_test").
known_modules_with_prefix(Pfx) ->
ModsZipBeamsZipLoaded = code:all_available(),
kmp(Pfx, ModsZipBeamsZipLoaded, []).
kmp(_Pfx, [], Acc) ->
lists:sort(Acc);
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
case lists:prefix(Pfx, ModStr) of
false -> kmp(Pfx, Rest, Acc);
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
end.
run_mod_main(Mod) ->
io:format("========================================\n"
"~p:main()\n"
"========================================\n",
[Mod]),
try
Mod:main()
catch
Err:ErrType:Trace ->
io:format("~p: ~p~n", [Err, ErrType]),
io:format("Trace:~n~p~n", [Trace])
end.
do_tlist() ->
lists:foreach(
fun(ModName) ->
io:format("~s~n", [ModName])
end,
test_mods()
).
tokenizers_agree(File) ->
gso_tokens(File) =:= so_tokens(File).
+77
View File
@@ -0,0 +1,77 @@
-spec s2t_file(Signal) -> AstFile when
Signal :: [tk()],
AstFile :: #ns{meta :: file, kids :: asf()}.
s2t_file([]) ->
error(empty_file);
s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) ->
Blk0 = s2t_gulp_block(FileCol, S0),
Blk1 = t2t_parse_tds_in_block(Blk0),
#ns{meta = file, kids = [Blk1]}.
-spec s2t_gulp_block(BlkCol, Signal) -> Block when
BlkCol :: pos_integer(),
Signal :: [tk()],
Block :: #ns{meta :: block}.
s2t_gulp_block(BCol, Tks) ->
% sanity check
InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end,
true = lists:all(InBlock, Tks),
BlockItems = s2f_block_items(BCol, Tks),
#ns{meta = block, kids = BlockItems}.
-spec s2f_block_items(BCol, Signal) -> BlkItems when
BCol :: pos_integer(),
Signal :: [tk()],
BlkItems :: [BlkItem],
BlkItem :: #ns{meta :: block_item,
kids :: asf()}.
s2f_block_items(BCol, Signal) ->
s2f_block_items(BCol, [], Signal).
s2f_block_items(_BCol, Stk, []) ->
lists:reverse(Stk);
s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) ->
{slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0),
s2f_block_items(BCol, [BlkItem | Stk], F1).
s2t_slurp_block_item(BCol, T0, F0) ->
{ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0),
Item = #ns{meta = block_item, kids = ItemTokens},
{slurp, Item, F1}.
% sw = splitwith; kind of take/drop
s2s_sw_block_item(BCol, T0, F0) ->
InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end,
{F0_II, F1} = lists:splitwith(InItem, F0),
{[T0 | F0_II], F1}.
-spec t2t_parse_tds_in_block(Block0) -> Block1 when
Block0 :: ast(),
Block1 :: ast().
% go through and convert the block_item nodes to top
% decls
t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) ->
F1 = lists:map(fun t2t_parse_td_from_item/1, F0),
B0#ns{kids = F1}.
-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when
BlockItem :: #ns{meta :: block_item},
TopDecl :: #ns{meta :: td_meta()}.
t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) ->
s2t_top_decl(Signal).
-spec s2t_top_decl(Signal) -> TdTree when
Signal :: [tk()],
TdTree :: ast().
s2t_top_decl(S0) ->
+292
View File
@@ -0,0 +1,292 @@
# Syntax
## Lexical syntax
### Comments
Single line comments start with `//` and block comments are enclosed in `/*`
and `*/` and can be nested.
### Keywords
```
contract include let switch type record datatype if elif else function
stateful payable true false mod public entrypoint private indexed namespace
interface main using as for hiding
```
### Tokens
- `Id = [a-z_][A-Za-z0-9_']*` identifiers start with a lower case letter.
- `Con = [A-Z][A-Za-z0-9_']*` constructors start with an upper case letter.
- `QId = (Con\.)+Id` qualified identifiers (e.g. `Map.member`)
- `QCon = (Con\.)+Con` qualified constructor
- `TVar = 'Id` type variable (e.g `'a`, `'b`)
- `Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` integer literal with optional `_` separators
- `Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` byte array literal with optional `_` separators
- `String` string literal enclosed in `"` with escape character `\`
- `Char` character literal enclosed in `'` with escape character `\`
- `AccountAddress` base58-encoded 32 byte account pubkey with `ak_` prefix
- `ContractAddress` base58-encoded 32 byte contract address with `ct_` prefix
- `Signature` base58-encoded 64 byte cryptographic signature with `sg_` prefix
Valid string escape codes are
| Escape | ASCII | |
|---------------|-------------|---|
| `\b` | 8 | |
| `\t` | 9 | |
| `\n` | 10 | |
| `\v` | 11 | |
| `\f` | 12 | |
| `\r` | 13 | |
| `\e` | 27 | |
| `\xHexDigits` | *HexDigits* | |
See the [identifier encoding scheme](https://git.qpq.swiss/QPQ-AG/protocol/src/branch/master/node/api/api_encoding.md) for the
details on the base58 literals.
## Layout blocks
Sophia uses Python-style layout rules to group declarations and statements. A
layout block with more than one element must start on a separate line and be
indented more than the currently enclosing layout block. Blocks with a single
element can be written on the same line as the previous token.
Each element of the block must share the same indentation and no part of an
element may be indented less than the indentation of the block. For instance
```sophia
contract Layout =
function foo() = 0 // no layout
function bar() = // layout block starts on next line
let x = foo() // indented more than 2 spaces
x
+ 1 // the '+' is indented more than the 'x'
```
## Notation
In describing the syntax below, we use the following conventions:
- Upper-case identifiers denote non-terminals (like `Expr`) or terminals with
some associated value (like `Id`).
- Keywords and symbols are enclosed in single quotes: `'let'` or `'='`.
- Choices are separated by vertical bars: `|`.
- Optional elements are enclosed in `[` square brackets `]`.
- `(` Parentheses `)` are used for grouping.
- Zero or more repetitions are denoted by a postfix `*`, and one or more
repetitions by a `+`.
- `Block(X)` denotes a layout block of `X`s.
- `Sep(X, S)` is short for `[X (S X)*]`, i.e. a possibly empty sequence of `X`s
separated by `S`s.
- `Sep1(X, S)` is short for `X (S X)*`, i.e. same as `Sep`, but must not be empty.
## Declarations
A Sophia file consists of a sequence of *declarations* in a layout block.
```c
File ::= Block(TopDecl)
TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
| 'contract' 'interface' Con [Implement] '=' Block(Decl)
| 'namespace' Con '=' Block(Decl)
| '@compiler' PragmaOp Version
| 'include' String
| Using
Implement ::= ':' Sep1(Con, ',')
Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
| 'record' Id ['(' TVar* ')'] '=' RecordType
| 'datatype' Id ['(' TVar* ')'] '=' DataType
| 'let' Id [':' Type] '=' Expr
| (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
| Using
FunDecl ::= Id ':' Type // Type signature
| Id Args [':' Type] '=' Block(Stmt) // Definition
| Id Args [':' Type] Block(GuardedDef) // Guarded definitions
GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
Using ::= 'using' Con ['as' Con] [UsingParts]
UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
| 'hiding' '[' Sep1(Id, ',') ']'
PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
Version ::= Sep1(Int, '.')
EModifier ::= 'payable' | 'stateful'
FModifier ::= 'stateful' | 'private'
Args ::= '(' Sep(Pattern, ',') ')'
```
Contract declarations must appear at the top-level.
For example,
```sophia
contract Test =
type t = int
entrypoint add (x : t, y : t) = x + y
```
There are three forms of type declarations: type aliases (declared with the
`type` keyword), record type definitions (`record`) and data type definitions
(`datatype`):
```c
TypeAlias ::= Type
RecordType ::= '{' Sep(FieldType, ',') '}'
DataType ::= Sep1(ConDecl, '|')
FieldType ::= Id ':' Type
ConDecl ::= Con ['(' Sep1(Type, ',') ')']
```
For example,
```sophia
record point('a) = {x : 'a, y : 'a}
datatype shape('a) = Circle(point('a), 'a) | Rect(point('a), point('a))
type int_shape = shape(int)
```
## Types
```c
Type ::= Domain '=>' Type // Function type
| Type '(' Sep(Type, ',') ')' // Type application
| '(' Type ')' // Parens
| 'unit' | Sep(Type, '*') // Tuples
| Id | QId | TVar
Domain ::= Type // Single argument
| '(' Sep(Type, ',') ')' // Multiple arguments
```
The function type arrow associates to the right.
Example,
```sophia
'a => list('a) => (int * list('a))
```
## Statements
Function bodies are blocks of *statements*, where a statement is one of the following
```c
Stmt ::= 'switch' '(' Expr ')' Block(Case)
| 'if' '(' Expr ')' Block(Stmt)
| 'elif' '(' Expr ')' Block(Stmt)
| 'else' Block(Stmt)
| 'let' LetDef
| Using
| Expr
LetDef ::= Id Args [':' Type] '=' Block(Stmt) // Function definition
| Pattern '=' Block(Stmt) // Value definition
Case ::= Pattern '=>' Block(Stmt)
| Pattern Block(GuardedCase)
GuardedCase ::= '|' Sep1(Expr, ',') '=>' Block(Stmt)
Pattern ::= Expr
```
`if` statements can be followed by zero or more `elif` statements and an optional final `else` statement. For example,
```sophia
let x : int = 4
switch(f(x))
None => 0
Some(y) =>
if(y > 10)
"too big"
elif(y < 3)
"too small"
else
"just right"
```
## Expressions
```c
Expr ::= '(' LamArgs ')' '=>' Block(Stmt) // Anonymous function (x) => x + 1
| '(' BinOp ')' // Operator lambda (+)
| 'if' '(' Expr ')' Expr 'else' Expr // If expression if(x < y) y else x
| Expr ':' Type // Type annotation 5 : int
| Expr BinOp Expr // Binary operator x + y
| UnOp Expr // Unary operator ! b
| Expr '(' Sep(Expr, ',') ')' // Application f(x, y)
| Expr '.' Id // Projection state.x
| Expr '[' Expr ']' // Map lookup map[key]
| Expr '{' Sep(FieldUpdate, ',') '}' // Record or map update r{ fld[key].x = y }
| '[' Sep(Expr, ',') ']' // List [1, 2, 3]
| '[' Expr '|' Sep(Generator, ',') ']'
// List comprehension [k | x <- [1], if (f(x)), let k = x+1]
| '[' Expr '..' Expr ']' // List range [1..n]
| '{' Sep(FieldUpdate, ',') '}' // Record or map value {x = 0, y = 1}, {[key] = val}
| '(' Expr ')' // Parens (1 + 2) * 3
| '(' Expr '=' Expr ')' // Assign pattern (y = x::_)
| Id | Con | QId | QCon // Identifiers x, None, Map.member, AELib.Token
| Int | Bytes | String | Char // Literals 123, 0xff, #00abc123, "foo", '%'
| AccountAddress | ContractAddress // Chain identifiers
| Signature // Signature
| '???' // Hole expression 1 + ???
Generator ::= Pattern '<-' Expr // Generator
| 'if' '(' Expr ')' // Guard
| LetDef // Definition
LamArgs ::= '(' Sep(LamArg, ',') ')'
LamArg ::= Id [':' Type]
FieldUpdate ::= Path '=' Expr
Path ::= Id // Record field
| '[' Expr ']' // Map key
| Path '.' Id // Nested record field
| Path '[' Expr ']' // Nested map key
BinOp ::= '||' | '&&' | '<' | '>' | '=<' | '>=' | '==' | '!='
| '::' | '++' | '+' | '-' | '*' | '/' | 'mod' | '^'
| 'band' | 'bor' | 'bxor' | '<<' | '>>' | '|>'
UnOp ::= '-' | '!' | 'bnot'
```
## Operators types
| Operators | Type
| --- | ---
| `-` `+` `*` `/` `mod` `^` | arithmetic operators
| `!` `&&` `\|\|` | logical operators
| `band` `bor` `bxor` `bnot` `<<` `>>` | bitwise operators
| `==` `!=` `<` `>` `=<` `>=` | comparison operators
| `::` `++` | list operators
| `\|>` | functional operators
## Operator precedence
In order of highest to lowest precedence.
| Operators | Associativity
| --- | ---
| `!` `bnot`| right
| `^` | left
| `*` `/` `mod` | left
| `-` (unary) | right
| `+` `-` | left
| `<<` `>>` | left
| `::` `++` | right
| `<` `>` `=<` `>=` `==` `!=` | none
| `band` | left
| `bxor` | left
| `bor` | left
| `&&` | right
| `\|\|` | right
| `\|>` | left
+15 -27
View File
@@ -1,41 +1,29 @@
% @doc bikeshed proctrastination head into vim warmup thing
% @doc bikeshed proctrastination head into vim warmup
% thing
%
% sophia compiler from scratch by PRH
%
% based on original sophia compiler
%
% parse layers:
% 1. gsc_tokens: SrcStr -> (Tokens | SigTokens)
%
% SigTokens = not comment/whitespace
%
% layers:
% a. gsc_strmatch : matches string shapes
% b. gso_scan : converts to so_scan shapes
%
%
% terminology:
%
% - `slurp`/`barf` borrowed from emacs paredit mode:
%
% slurp : (a b) c -> (a b c)
% barf : (a b c) -> a (b c)
%
% * `slurp` usually involves *transforming* input
% into a new type (e.g. slurp a token from src
% string); think of slurp as a verb meaning to
% consume and then digest
% * `barf` basically means blindly splitting off
% input
%
% based on original sophia compiler; target for version
% 0.1 is to match behavior exactly
% @end
-module(gsc).
% token and tokens
-export_type([
token/0,
signal/0
]).
% syntax tree/forest wrapper type
-export_type([
ntree/2, ntree/0,
nforest/2, nforest/0,
nt/2, nt/0,
nf/2, nf/0
]).
-export([
unsafe_tokens_from_file/1,
unsafe_tokens_from_string/1,
+53 -124
View File
@@ -1,5 +1,5 @@
%%% @doc
%%% GSC CLI: explorer/harness for sfc iteration
%%% GSC CLI: explorer/harness for gsc iteration
%%% @end
-module(gsc_cli).
@@ -8,11 +8,10 @@
-copyright("Peter Harpending <peterharpending@qpq.swiss>").
-license("GPL-3.0-only").
-export([
tokens/1,
so_tokens/1,
gso_tokens/1
]).
%-export([
% tokens/1,
% gso_tokens/1
%]).
-export([start/1]).
-include("$gsc_include/gsc.hrl").
@@ -37,37 +36,29 @@ start(["eshell"]) ->
do_eshell(),
ok;
start(ArgV) ->
%io:format("ArgV: ~p~n", [ArgV]),
do(ArgV),
zx:silent_stop().
do(["list"]) ->
do_tlist();
do(["list", "tests"]) ->
do_tlist();
do(["test"]) ->
do_tests();
do(["test" | Tests]) ->
do_tests(Tests);
do(["tests"]) ->
do_tests();
do(["run", "tests"]) ->
do_tests();
do(["tokenizers_agree", Foo]) ->
io:format("~p~n", [tokenizers_agree(Foo)]);
do_test(Tests);
% slowly phasing out shitty names like lctokens
% tokens = native sfc token representation
do(["tokens", Foo]) -> do_tokens(Foo);
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
do(["ctokens", Foo]) -> do_color_tokens(Foo);
do(["colour_tokens" | _]) -> do_doi();
% so_tokens = so_scan tokens
do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
do(["so_tokens", Foo]) -> do_so_tokens(Foo);
% print source file to screen with token boundaries
% highlighted
do(["tokens", "-c", Foo]) -> do_color_tokens(Foo);
do(["tokens", "--color", Foo]) -> do_color_tokens(Foo);
do(["tcat", Foo]) -> do_color_tokens(Foo);
do(["ctokens", Foo]) -> do_color_tokens(Foo);
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
do(["tokens", "--colour" | _]) -> do_doi();
do(["colour_tokens" | _]) -> do_doi();
%% so_tokens = so_scan tokens
%do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
%do(["so_tokens", Foo]) -> do_so_tokens(Foo);
% gso_tokens = our mockery
do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo);
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
% print source file to screen with token boundaries highlighted
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
% script utility
do(["rmm", Foo]) ->
do_rmm(Foo);
@@ -79,6 +70,20 @@ do_doi() ->
FP = zx:get_home() ++ "/priv/doi.txt",
page_file(FP).
do_test(Args) ->
GscTestsMod = gsc_tests_mod(),
GscTestsMod:cli_args(Args).
gsc_tests_mod() ->
FilePath = zx:get_home() ++ "/test/gsc_tests.erl",
case compile:file(FilePath) of
{ok, Mod} -> Mod;
Error -> error(Error)
end.
% thank you chatgpt
% os:cmd didnt do nuffin because that's for running
% stuff in the background and capturing the output, not
@@ -107,83 +112,6 @@ less_file(Less, FilePath) ->
error(Reason)
end.
do_tests() ->
io:format("TestModules = ~p~n", [test_mods()]),
do_runall_tests().
do_runall_tests() ->
lists:foreach(fun run_mod_main/1, test_mods()).
do_tests(List) ->
lists:foreach(fun run_test/1, List).
% n
run_test(TestName) ->
% we have two candidate atoms
C1 = list_to_atom(TestName),
C2 = list_to_atom("gsc_test_" ++ TestName),
KnownMods = test_mods(),
IsC1 = lists:member(C1, KnownMods),
IsC2 = lists:member(C2, KnownMods),
if
IsC1 -> rmm(C1);
IsC2 -> rmm(C2);
true -> error({no_such_test, TestName})
end.
rmm(X) -> run_mod_main(X).
% KnownTests = test_mods(),
% TestMods = ensure_all_known([], List, KnownTests),
% lists:foreach(fun run_mod_main/1, TestMods).
%ensure_all_known(Acc, [], _) ->
% lists:sort(Acc);
%ensure_all_known(Acc, [T | Ts], Knowns) ->
% case lists:member(T, Knowns) of
%
% end.
test_mods() ->
known_modules_with_prefix("gsc_test").
known_modules_with_prefix(Pfx) ->
ModsZipBeamsZipLoaded = code:all_available(),
kmp(Pfx, ModsZipBeamsZipLoaded, []).
kmp(_Pfx, [], Acc) ->
lists:sort(Acc);
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
case lists:prefix(Pfx, ModStr) of
false -> kmp(Pfx, Rest, Acc);
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
end.
run_mod_main(Mod) ->
io:format("========================================\n"
"~p:main()\n"
"========================================\n",
[Mod]),
try
Mod:main()
catch
Err:ErrType:Trace ->
io:format("~p: ~p~n", [Err, ErrType]),
io:format("Trace:~n~p~n", [Trace])
end.
do_tlist() ->
lists:foreach(
fun(ModName) ->
io:format("~s~n", [ModName])
end,
test_mods()
).
-spec do_eshell() -> ok.
% @doc start an erlang shell
@@ -196,16 +124,11 @@ do_eshell() ->
{error, Reason} -> error(Reason)
end.
tokenizers_agree(File) ->
gso_tokens(File) =:= so_tokens(File).
do_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)].
do_so_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)].
do_gso_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)].
@@ -219,12 +142,6 @@ do_rmm(FilePath) ->
end.
so_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
{ok, Tokens} = so_scan:scan(FileStr),
Tokens.
gso_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
@@ -263,14 +180,26 @@ colorize_tokens(_, [], Acc) ->
rotate([A | Rest]) ->
{A, Rest ++ [A]}.
colorize_token_str(Color, #tk{str = Str}) ->
{Pfx, Sfx} = color_fixes(Color),
colorize_token_str(Color, T = #tk{str = Str}) ->
SN =
case T#tk.shape of
bcom -> noise;
lcom -> noise;
ws -> noise;
_ -> signal
end,
{Pfx, Sfx} = color_fixes(SN, Color),
[Pfx, Str, Sfx].
color_fixes(red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
color_fixes(green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
color_fixes(yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
color_fixes(blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
color_fixes(cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
% dim noisy tokens
%color_fixes(noise, Color) ->
% {P, S} = color_fixes(signal, Color),
% {[?ANSI_DIM, P], [S, ?ANSI_UNDIM]};
color_fixes(_, red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
color_fixes(_, green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
color_fixes(_, yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
color_fixes(_, blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
color_fixes(_, magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
color_fixes(_, cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
-39
View File
@@ -1,12 +1,5 @@
-module(gsc_ntree).
-export_type([
ntree/2, ntree/0,
nforest/2, nforest/0,
nt/2, nt/0,
nf/2, nf/0
]).
-export([
nstem/2, meta/1, kids/1,
flatten_tree/1, flatten_forest/1
@@ -15,37 +8,6 @@
-include("$gsc_include/gsc.hrl").
%%=====================================================
%% API: types
%%=====================================================
% @doc stem record
-record(ns, {meta :: any(),
kids :: list(any())}).
% @doc `ntree(S, L)' is a "node tree" (meaning stems
% have values and children)
%
% for the purposes of the compiler, the key observation
% is that a flat list of tokens is already a forest
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
| L.
% @doc forest is just a list of trees
-type nforest(S, L) :: [ntree(S, L)].
% aliases
-type nt(S, L) :: ntree(S, L).
-type nf(S, L) :: nforest(S, L).
-type ntree() :: ntree(any(), any()).
-type nforest() :: [ntree()].
-type nt() :: ntree().
-type nf() :: nforest().
%%=====================================================
%% API: functions
@@ -91,4 +53,3 @@ ft(Leaf) -> [Leaf].
ff(F) ->
[ft(T) || T <- F].
+111
View File
@@ -0,0 +1,111 @@
% signal = non-noisy tokens
-module(gsc_signal).
-export([
from_tokens/1,
is_block/1,
gulp_block_items/1,
block_to_items/1,
take_block_item/1
]).
-include("$gsc_include/gsc.hrl").
-spec from_tokens(Tokens) -> Signal when
Tokens :: [tk()],
Signal :: [tk()].
% @doc filter out comments/whitespace
from_tokens(Tokens) ->
gsc_tokens:filter_significant(Tokens).
-spec is_block(Signal) -> Result when
Signal :: [tk()],
Result :: boolean().
is_block([]) ->
true;
is_block([#tk{pos = {_, BCol}} | Rest]) ->
InBlock =
fun(#tk{pos = {_, TCol}}) ->
BCol =< TCol
end,
lists:all(InBlock, Rest).
-spec gulp_block_items(Signal) -> Result when
Signal :: [tk()],
Result :: {slurp, Items, NewSignal}
| {error, any()},
Items :: [Signal],
NewSignal :: Signal.
gulp_block_items(S) ->
case is_block(S) of
true -> {gulp, block_to_items(S)};
false -> find_badness(S)
end.
find_badness([#tk{pos = {_, StartCol}} = StartTk | Rest]) ->
find_badness(StartCol, StartTk, Rest).
find_badness(StartCol, StartTk, [#tk{pos = {_, TkCol}} = Tk | Rest]) ->
Bad = TkCol < StartCol,
case Bad of
false -> find_badness(StartCol, StartTk, Rest);
true -> {error, {bad_block, [{start_col, StartCol},
{end_col, TkCol},
{start_tk, StartTk},
{end_tk, Tk}]}}
end.
-spec block_to_items(Signal) -> BlockItems when
Signal :: [tk()],
BlockItems :: [Signal].
% @doc
% naive algorithm, so doesn't ensure all block items
% are same indent level
%
% Input:
% foo = ...
% bar = ...
% baz = ...
%
% Output:
% [foo = ...,
% bar = ...,
% baz = ...]
block_to_items([]) ->
[];
block_to_items(S) ->
b2is([], S).
b2is(Acc, []) ->
lists:reverse(Acc);
b2is(Acc, S) ->
{Item, S1} = take_block_item(S),
b2is([Item | Acc], S1).
-spec take_block_item(Signal) -> Result when
Signal :: [tk()],
Result :: {Item, NewSignal},
Item :: Signal,
NewSignal :: Signal.
take_block_item([]) ->
{[], []};
take_block_item([#tk{pos = {_, ICol}} = T0 | S0]) ->
InItem =
fun(#tk{pos = {_, TCol}}) ->
ICol < TCol
end,
{S0_II, S1} = lists:splitwith(InItem, S0),
{[T0 | S0_II], S1}.
+4 -4
View File
@@ -88,7 +88,7 @@
-export([
smr_sf_ws/0,
smr_sf_op/0,
smr_sf_punct/0,
smr_sf_sep/0,
smr_sf_id/0,
smr_sf_con/0,
smr_sf_qid/0,
@@ -175,7 +175,7 @@ match(Matcher, Source) ->
% -export([
% smr_sf_ws/0,
% smr_sf_op/0,
% smr_sf_punct/0,
% smr_sf_sep/0,
% smr_sf_id/0,
% smr_sf_con/0,
% smr_sf_qid/0,
@@ -224,7 +224,7 @@ smr_sf_op() ->
-spec smr_sf_punct() -> string_matcher().
-spec smr_sf_sep() -> string_matcher().
% @doc
% String matcher for parens/braces
%
@@ -233,7 +233,7 @@ smr_sf_op() ->
% , {"\\.\\.|[,.;()\\[\\]{}]", symbol()}
% @end
smr_sf_punct() ->
smr_sf_sep() ->
M_DotDotOp = smr_string(".."),
M_PunctChars = smr_oneofchars(",.;()[]{}"),
smr_union([M_DotDotOp, M_PunctChars]).
+6 -6
View File
@@ -254,7 +254,7 @@ token_shapes_parse_order() ->
lists:flatten([
% comments and whitespace
lcom, bcom, ws,
punct,
sep,
% literals
char, string, int16, int10, bytes,
ak, ct, sg,
@@ -264,7 +264,7 @@ token_shapes_parse_order() ->
% keywords need to be parsed ahead of ids
kwd, id,
con,
% ops [=, =>, >>], punctuation (parens/braces)
% ops [=, =>, >>], sepuation (parens/braces)
op
]).
@@ -597,7 +597,7 @@ slurp_token_of_shape(ws, Pos, SrcStr) ->
str = WS},
{tokmatch, Token, Rest}
end;
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, sep
%
% all the kwds are valid ids, so we match as an id and then check if it's a
% kwd
@@ -629,10 +629,10 @@ slurp_token_of_shape(op, Pos, SrcStr) ->
no_strmatch ->
no_tokmatch
end;
slurp_token_of_shape(punct, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of
slurp_token_of_shape(sep, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_sep(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{shape = punct, pos = Pos, str = Str},
Token = #tk{shape = sep, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
+3 -3
View File
@@ -299,7 +299,7 @@ pass_types() ->
[lcom, % ak_AB// breaks out of id
bcom, % ak_AB/* breaks out of id
ws, % ak_AB\t breaks out of id
punct, % ak_AB{ breaks out of id
sep, % ak_AB{ breaks out of id
string, % ak_AB" breaks out of id
bytes, % ak_AB# breaks out of id
ak,ct,sg, % ak_ABak [akctsg] all in base58 alphabet
@@ -335,13 +335,13 @@ to_so_token(#tk{shape = SfTokenType,
%
% {contract, {420, 69}}
%-----------------------
% kwds ops and punct are all collapsed by
% kwds ops and sep are all collapsed by
% so_scan:scan down to eg {'contract', {420, 69}}
% where {420, 69} is the source location
% these are three different parsers
Sym when Sym =:= kwd;
Sym =:= op;
Sym =:= punct ->
Sym =:= sep ->
Symbol = list_to_atom(SfTokenStr),
{true, {Symbol, Pos}};
%------------------------------------
View File
+273
View File
@@ -0,0 +1,273 @@
% @doc experiment centering around the file syntax node using ntree approach
-module(gsc_test_file).
-export([
main/0
]).
-include("$gsc_include/gsc.hrl").
-record(ct,
{payable = none :: none | false | {true, tk()},
main = none :: none | false | {true, tk()},
contract = none :: none | tk(),
con = none :: none | tk(),
impls = none :: none | [tk()],
eq = none :: none | tk()}).
-type meta() :: #ct{}.
-record(decl_type,
{type = none :: none | tk(),
id = none :: none | tk(),
params = none :: none | [tk()],
eq = none :: none | tk()}).
-type decl_meta() :: #decl_type{}.
-type ast_meta() :: file
| meta()
| decl_meta()
| nyi
| {nyi, any()}
.
-type target()
:: ct
| iface
| ns
| pragma
| include
| using
.
-type s2t_target()
:: file
| top_decl
| target()
| nyi
| {nyi, any()}
.
-type s2f_target()
:: {block_of, s2t_target()}
.
-type ast() :: ntree(ast_meta(), tk()).
-type asf() :: nforest(ast_meta(), tk()).
main() ->
HelloN = "hello.aes",
HelloP = ts_utils:ct_file_abspath(HelloN),
{ok, HelloS} = file:read_file(HelloP),
S0 = gsc:unsafe_signal_from_file(HelloP),
T1 = s2t(file, S0),
io:format("hello.aes:~n", []),
io:format("```~n", []),
io:format("~ts", [HelloS]),
io:format("```~n~n", []),
io:format("AST: ~tp~n", [T1]),
ok.
% // Hello World Contract
% // Copyright (c) 2025 QPQ AG
%
% contract Hello =
% type state = unit
% entrypoint init(): state =
% ()
%
% entrypoint hello(): string =
% "hello, world"
-spec s2t(ParseTarget, Signal) -> AST when
ParseTarget :: file,
Signal :: [tk()],
AST :: ast().
% File ::= Block(TopDecl)
s2t(file, Signal) ->
case Signal of
[] -> error(empty_file);
_ -> {ns, file, s2f({block_of, top_decl}, Signal)}
end;
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl)
% | 'namespace' Con '=' Block(Decl)
% | '@compiler' PragmaOp Version
% | 'include' String
% | Using
s2t(top_decl, Signal) ->
NewTarget =
case gsc_tokens:strings(3, Signal) of
["payable", "contract", "interface"] -> iface;
["contract", "interface" | _] -> iface;
["payable", "main", "contract"] -> ct;
["payable", "contract" | _] -> ct;
["contract" | _] -> ct;
["namespace" | _] -> namespace;
["@compiler" | _] -> pragma;
["include" | _] -> include;
["using" | _] -> using
end,
s2t(NewTarget, Signal);
% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
s2t(ct, S0) ->
{slurp, CtMeta, S1} = s2s_slurp_meta(#ct{}, S0),
{ns, CtMeta, s2f({block_of, decl}, S1)};
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
% | 'record' Id ['(' TVar* ')'] '=' RecordType
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
% | 'let' Id [':' Type] '=' Expr
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
% | Using
s2t(decl, S0) ->
NewTarget =
case gsc_tokens:strings(3, S0) of
["type" | _] -> decl_type;
["record" | _] -> decl_record;
["datatype" | _] -> decl_datatype;
["let" | _] -> decl_let;
Pfx3 ->
IsEp = lists:member("entrypoint", Pfx3),
IsFn = lists:member("function", Pfx3),
if
IsEp -> decl_entrypoint;
IsFn -> decl_function;
true -> error({bad_decl, S0})
end
end,
s2t(NewTarget, S0);
% 'type' Id ['(' TVar* ')'] '=' TypeAlias
s2t(decl_type, S0) ->
{slurp, Meta, S1} = s2s_slurp_meta(#decl_type{}, S0),
{ns, Meta, s2t(type, S1)};
s2t(nyi, Signal) ->
{ns, nyi, Signal};
s2t(NYI = {nyi, _}, Signal) ->
{ns, NYI, Signal};
s2t(NYI, Signal) ->
{ns, {nyi, NYI}, Signal}.
-spec s2f(ForestTarget, Signal) -> Forest when
ForestTarget :: s2f_target(),
Signal :: [tk()],
Forest :: asf().
s2f({block_of, TreeTarget}, S0) ->
{gulp, Items} = gsc_signal:gulp_block_items(S0),
[s2t(TreeTarget, I) || I <- Items].
-spec s2s_slurp_meta(InitMeta, Signal) -> Result when
InitMeta :: Meta,
Signal :: [tk()],
Result :: {slurp, Meta, NewSignal},
Meta :: ast_meta(),
NewSignal :: Signal.
s2s_slurp_meta(M = #ct{}, S) ->
s2s_sm_ct(M, S);
s2s_slurp_meta(M = #decl_type{}, S) ->
s2s_sm_decl_type(M, S);
s2s_slurp_meta(M, S) ->
error({s2s_slurp_meta, M, S}).
s2s_sm_ct(Ct = #ct{payable = none}, S0) ->
case S0 of
[#tk{str = "payable"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{payable = {true, T0}}, S1);
_ ->
s2s_sm_ct(Ct#ct{payable = false}, S0)
end;
s2s_sm_ct(Ct = #ct{main = none}, S0) ->
case S0 of
[#tk{str = "main"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{main = {true, T0}}, S1);
_ ->
s2s_sm_ct(Ct#ct{main = false}, S0)
end;
s2s_sm_ct(Ct = #ct{contract = none}, S0) ->
case S0 of
[#tk{str = "contract"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{contract = T0}, S1);
_ ->
error({no_kwd_contract, Ct, S0})
end;
s2s_sm_ct(Ct = #ct{con = none}, S0) ->
case S0 of
[#tk{shape = con} = T0 | S1] ->
s2s_sm_ct(Ct#ct{con = T0}, S1);
_ ->
error({no_contract_name, Ct, S0})
end;
s2s_sm_ct(Ct = #ct{impls = none}, S0) ->
case gsc_tokens:strings(1, S0) of
[":"] ->
{slurp, Impls, S1} = s2f_slurp_impls(S0),
s2s_sm_ct(Ct#ct{impls = Impls}, S1);
_ ->
s2s_sm_ct(Ct#ct{impls = []}, S0)
end;
s2s_sm_ct(Ct = #ct{eq = none}, S0) ->
case S0 of
[#tk{str = "="} = T0 | S1] ->
s2s_sm_ct(Ct#ct{eq = T0}, S1);
_ ->
error({no_equal_sign, Ct, S0})
end;
s2s_sm_ct(Ct, S0) ->
{slurp, Ct, S0}.
s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0], S0).
s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0 | Stk], S0);
s2f_slurp_impls(Stk, S0) ->
{slurp, lists:reverse(Stk), S0}.
%-record(decl_type,
% {type = none :: none | tk(),
% id = none :: none | tk(),
% params = none :: none | [tk()],
% eq = none :: none | tk()}).
s2s_sm_decl_type(M = #decl_type{type = none}, S0) ->
case S0 of
[#tk{str = "type"} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{type = T0}, S1);
_ ->
error({no_kwd_type, S0})
end;
s2s_sm_decl_type(M = #decl_type{id = none}, S0) ->
case S0 of
[#tk{shape = id} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{id = T0}, S1);
_ ->
error({no_type_id, S0})
end;
s2s_sm_decl_type(M = #decl_type{params = none}, S0) ->
case S0 of
[#tk{str = "("} = _T0 | _] ->
error({fixme, parens_bad});
_ ->
s2s_sm_decl_type(M#decl_type{params = []}, S0)
end;
s2s_sm_decl_type(M = #decl_type{eq = none}, S0) ->
case S0 of
[#tk{str = "="} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{eq = T0}, S1);
_ ->
error({no_equal_sign, S0})
end;
s2s_sm_decl_type(M, S0) ->
{slurp, M, S0}.
@@ -6,29 +6,18 @@
-include("$gsc_include/gsc.hrl").
% records copypasta for now
-record(ns, {meta :: any(), kids :: list(any())}).
-type ntree(X, Y) :: gsc_ntree:ntree(X, Y).
-type nforest(X, Y) :: gsc_nforest:nforest(X, Y).
-type nt(X, Y) :: gsc_ntree:ntree(X, Y).
-type nf(X, Y) :: gsc_nforest:nforest(X, Y).
% just parsing type expressions right now, so only need
% to worry about round parens
%
% none is to indicate general-purpose grouping, for
% e.g. LHS/RHS of an op
-type syntax_meta()
:: none
| {op, tk()}
:: {op, tk()}
| op_arg
| {parens, Open :: tk(), Close :: tk()}
.
-type ast() :: ntree(StemMeta :: syntax_meta(),
LeafType :: tk()).
%-type ast() :: ntree(syntax_meta(), tk()).
-type asf() :: nforest(syntax_meta(), tk()).
-type asts() :: asf().
@@ -70,7 +59,7 @@ parse(Signal) ->
F1 = f2f_parens(F0),
F2 = f2f_op("=>", F1),
F3 = f2f_op("*", F2),
Result = F2,
Result = F3,
Result.
@@ -85,8 +74,8 @@ f2f_op(_opstr, Stk, []) ->
f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) ->
Lhf = lists:reverse(LhsStk),
Rhf = f2f_op(OpStr, Rest),
Lht = #ns{meta = none, kids = Lhf},
Rht = #ns{meta = none, kids = Rhf},
Lht = #ns{meta = op_arg, kids = Lhf},
Rht = #ns{meta = op_arg, kids = Rhf},
ResultT = #ns{meta = {op, OpTk},
kids = [Lht, Rht]},
ResultF = [ResultT],
@@ -11,13 +11,14 @@
main() ->
%io:format("~p~n", [div_files()]),
%io:format("MAINNNNN!~n", []),
eunit:test(?MODULE, [verbose]).
%eunit:test(?MODULE).
%eunit:test(?MODULE, [verbose]),
eunit:test(?MODULE),
ok.
% directory containing the tests for the tokenizer
ct_dir() ->
zx_daemon:get_home() ++ "/ct".
zx_daemon:get_home() ++ "/test/ct".
agreement_tests_dir() ->
ct_dir() ++ "/tokenizers_agree".
+64
View File
@@ -0,0 +1,64 @@
% dynamic hacky module that loads all the tests
-module(gsc_tests).
-export([
main/0,
cli_args/1
]).
main() ->
cli_args([]),
ok.
cli_args(TestNames) ->
% load ts_utils
TsUtils = zx:get_home() ++ "/test/ts_utils.erl",
case compile:file(TsUtils) of
{ok, ts_utils} -> ok;
Error -> error(Error)
end,
% this loads the test deps and then cleans up any
% beam files afterwards
ts_utils:tidily(fun() -> do_gsc_test(TestNames) end).
do_gsc_test(["-h" | _]) -> do_help();
do_gsc_test(["--help" | _]) -> do_help();
do_gsc_test(["-l" | _]) -> do_list();
do_gsc_test(["--list" | _]) -> do_list();
do_gsc_test(["-a" | _]) -> do_all();
do_gsc_test(["--all" | _]) -> do_all();
do_gsc_test(["so_tokens", X]) -> do_so_tokens(X);
do_gsc_test(["so", "tokens", X]) -> do_so_tokens(X);
do_gsc_test(["tokenizers_agree", X]) ->
do_tokenizers_agree(X);
do_gsc_test([TestName]) ->
ts_utils:run_test_by_name(TestName);
do_gsc_test(_) ->
do_help().
do_help() ->
io:format("go help yourself~n").
do_list() ->
Names = ts_utils:runnable_test_names(),
[io:format("~p~n", [N]) || N <- Names].
do_all() ->
{Gd, _} = ts_utils:runnable_test_mods(),
[begin ts_utils:rmm(G), io:format("~n") end || G <- Gd].
do_so_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- ts_utils:so_tokens(FilePath)].
do_tokenizers_agree(RelPath) ->
Result = ts_utils:tokenizers_agree(RelPath),
io:format("~tp~n", [Result]).
+356
View File
@@ -0,0 +1,356 @@
% test suite utilities
-module(ts_utils).
-export([
tokenizers_agree/1,
absify/1,
so_tokens/1,
load_test_deps/0,
test_deps/0,
load_dep/1,
clean_after/1, tidily/1,
delete_beams/0, tidy/0,
run_test_by_name/1,
rmm/1, run_mod_main/1,
runnable_test_names/0,
runnable_test_mods/0,
load_test_erls/0,
abspath_to_name/1,
ls_test_erls/0,
ls_test_beams/0,
is_erl/1,
is_beam/1,
ls_test/0,
test_dir/0,
ct_dir/0,
ct_file/1, ct_file_abspath/1, ct_abspath/1
]).
tokenizers_agree(Relpath) ->
FilePath = absify(Relpath),
% extracting data to be tested
% i hate this so much but lazy and this is test code so who really cares.
SoTokens = so_tokens(FilePath),
SfTokens = gsc:gso_tokens_from_file(FilePath),
case {SoTokens, SfTokens} of
{{ok, So}, {ok, Sf}} -> So =:= Sf;
{{error, _}, {error, _}} -> true;
{{ok, _}, {error, _}} -> false;
{{error, _}, {ok, _}} -> false
end.
absify(RelPath) ->
filename:absname(RelPath).
so_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = binary_to_list(FileBytes),
so_scan:scan(FileStr).
load_test_deps() ->
lists:foreach(fun load_dep/1, test_deps()).
test_deps() ->
[{"otpr", "sophia", {9, 0, 0}}].
load_dep(D) ->
{ok, Cwd} = file:get_cwd(),
% apparently zx changes the working dir when doing
% all this stuff so beam files get dropped in
% random dep dir
ok =
case zx_lib:installed(D) of
false ->
Id = zx_daemon:fetch(D),
ok = zx_daemon:wait_result(Id),
ok;
true ->
ok
end,
Result = zx_daemon:build(D),
ok = file:set_cwd(Cwd),
Result.
-spec clean_after(Fun) -> Result when
Fun :: fun(() -> Result),
Result :: any().
% @doc
% run Fun(), delete gsc/test/*.beam afterward even if
% Fun() errors
% @end
clean_after(Fun) ->
try
load_test_deps(),
Fun()
after
delete_beams()
end.
% @doc alias for `clean_after/1'
tidily(Fun) ->
clean_after(Fun).
-spec delete_beams() -> ok.
delete_beams() ->
Beams = ls_test_beams(),
%io:format("Deleting: ~tp~n", [Beams]),
lists:foreach(fun file:delete/1, Beams).
tidy() ->
delete_beams().
-spec run_test_by_name(Name) -> Result when
Name :: string(),
Result :: ok.
run_test_by_name(Name) when is_list(Name) ->
case find_test_by_name(Name) of
{good, Mod} ->
rmm(Mod);
{bad, Mod} ->
io:format("FATAL: Module ~tp didn't compile~n", [Mod]),
ok;
not_found ->
io:format("FATAL: test not found: ~p~n", [Name]),
ok
end.
run_mod_main(Mod) ->
rmm(Mod).
rmm(Mod) ->
try
io:format("=================================================~n"),
io:format("~p:main()~n", [Mod]),
io:format("=================================================~n"),
Mod:main()
catch
Cat:Err:Tr ->
io:format("~tp:main(): ERROR~n", [Mod]),
io:format("~tp: ~tp~n", [Cat, Err]),
io:format("Trace: ~tp~n", [Tr]),
ok
end.
find_test_by_name(Name) ->
C1 = list_to_atom(Name),
C2 = list_to_atom("gsc_test_" ++ Name),
{Gd, Bd} = runnable_test_mods(),
C1Gd = lists:member(C1, Gd),
C2Gd = lists:member(C2, Gd),
C1Bd = lists:member(C1, Gd),
C2Bd = lists:member(C2, Bd),
if
C1Gd -> {good, C1};
C2Gd -> {good, C2};
C1Bd -> {bad, C1};
C2Bd -> {bad, C2};
true -> not_found
end.
-spec runnable_test_names() -> Result when
Result :: [{string(), atom()}].
runnable_test_names() ->
{Gd, Bd} = runnable_test_mods(),
rtns([], lists:sort(Gd ++ Bd)).
rtns(Acc, []) ->
lists:reverse(Acc);
rtns(Acc, [TestMod | Rest]) ->
TestName = test_mod_name(TestMod),
rtns([{TestName, TestMod} | Acc], Rest).
test_mod_name(TestModAtom) ->
"gsc_test_" ++ Name = atom_to_list(TestModAtom),
Name.
-spec runnable_test_mods() -> Result when
Result :: {Good, Bad},
Good :: Mods,
Bad :: Mods,
Mods :: [atom()].
runnable_test_mods() ->
{Ld, Bds} = load_test_erls(),
Gd = lists:filter(fun is_runnable/1, Ld),
Bd = lists:filter(fun is_runnable/1, Bds),
{Gd, Bd}.
is_runnable(ModAtom) ->
case atom_to_list(ModAtom) of
"gsc_test_" ++ _ -> true;
_ -> false
end.
-spec load_test_erls() -> {Loaded, Errs} when
Loaded :: [atom()],
Errs :: [atom()].
load_test_erls() ->
ltes([], [], ls_test_erls()).
ltes(Ld, Errs, []) ->
{lists:reverse(Ld), lists:reverse(Errs)};
ltes(Ld, Errs, [FP | Rest]) ->
FN = abspath_to_name(FP),
ModAtom = fp_to_mod_atom(FP),
case compile:file(FP) of
{ok, Mod} ->
ltes([Mod | Ld], Errs, Rest);
Err ->
io:format("ERROR ~tp: ~tp~n", [FN, Err]),
ltes(Ld, [ModAtom | Errs], Rest)
end.
fp_to_mod_atom(FP) ->
FN = abspath_to_name(FP),
[ModStr, "erl"] = string:split(FN, ".", trailing),
list_to_atom(ModStr).
-spec abspath_to_name(FilePath) -> FileName when
FilePath :: string(),
FileName :: string().
% @doc "/path/to/foo.bar" -> "foo.bar"
abspath_to_name(FP) ->
lists:last(string:tokens(FP, "/")).
-spec ls_test_erls() -> AbsPaths when
AbsPaths :: [string()].
% @doc ["/path/to/gsc/test/foo.erl",
% "/path/to/gsc/test/bar.erl",
% "/path/to/gsc/test/baz.erl"]
ls_test_erls() ->
lists:filter(fun is_erl/1, ls_test()).
-spec ls_test_beams() -> AbsPaths when
AbsPaths :: [string()].
% important: beams get dropped in working dir
ls_test_beams() ->
lists:filter(fun is_beam/1, ls_pwd()).
-spec is_beam(AbsPath) -> IsBeam when
AbsPath :: string(),
IsBeam :: boolean().
% @private
% "foo.beam" ~> true
% _ ~> false
is_beam(Filename) ->
case filename:extension(Filename) of
".beam" -> true;
_ -> false
end.
-spec is_erl(AbsPath) -> IsErl when
AbsPath :: string(),
IsErl :: boolean().
% @private
% "foo.erl" ~> true
% _ ~> false
is_erl(Filename) ->
case filename:extension(Filename) of
".erl" -> true;
_ -> false
end.
-spec ls_test() -> Abspaths when
Abspaths :: [string()].
% @doc
% Includes junk/irrelevant files:
%
% ["/path/to/gsc/test/foo.erl",
% "/path/to/gsc/test/.foo.erl.swp",
% "/path/to/gsc/test/bar.erl"]
ls_test() ->
TD = test_dir(),
{ok, Names} = file:list_dir(TD),
lists:sort([TD ++ "/" ++ Name || Name <- Names]).
ls_pwd() ->
{ok, D} = file:get_cwd(),
{ok, Ns} = file:list_dir(D),
lists:sort([D ++ "/" ++ N || N <- Ns]).
-spec test_dir() -> AbsPath when
AbsPath :: string().
% @doc "/path/to/gsc/test"
test_dir() ->
zx_daemon:get_home() ++ "/test".
-spec ct_dir() -> AbsPath when
AbsPath :: string().
% @doc "/path/to/gsc/test/ct"
%
% directory containing the tests for the tokenizer
ct_dir() ->
test_dir() ++ "/ct".
-spec ct_file(Name) -> AbsPath when
Name :: string(),
AbsPath :: string().
% @doc
% "foo.aes" -> "/path/to/ct/foo.aes"
ct_file(Name) ->
ct_dir() ++ "/" ++ Name.
% @doc alias for `ct_file/1'
%
% "foo.aes" -> "/path/to/ct/foo.aes"
ct_file_abspath(Name) -> ct_file(Name).
% @doc alias for `ct_file/1'
%
% "foo.aes" -> "/path/to/ct/foo.aes"
ct_abspath(Name) -> ct_file(Name).
+4 -3
View File
@@ -1,15 +1,16 @@
{name,"Gajumaru Sophia Compiler"}.
{type,lib}.
{type,cli}.
{modules,[]}.
{mod, "gsc_cli"}.
{author,"Peter Harpending"}.
{prefix,"gs"}.
{prefix,"gsc"}.
{desc,"Exploratory sophia compiler rewrite"}.
{package_id,{"otpr","gsc",{0,1,0}}}.
{deps,[]}.
{key_name,none}.
{a_email,"peterharpending@qpq.swiss"}.
{c_email,"peterharpending@qpq.swiss"}.
{copyright,"Peter Harpending"}.
{copyright,"2026 QPQ AG"}.
{file_exts,[]}.
{license,"GPL-3.0-only"}.
{repo_url,"https://git.qpq.swiss/QPQ-AG/gsc"}.