From fdb40dcb9247523994762bb6b358b14a28af0972 Mon Sep 17 00:00:00 2001 From: Peter Harpending Date: Thu, 4 Jun 2026 11:42:48 -0700 Subject: [PATCH] stuff --- cli/src/gsc_test_file.erl | 279 +++++++++++++++++++++++++++++++++++++ cli/src/gsc_test_ntree.erl | 21 +-- cli/src/ts_utils.erl | 4 +- include/gsc.hrl | 32 +++++ src/gsc.erl | 42 ++---- src/gsc_ntree.erl | 39 ------ 6 files changed, 334 insertions(+), 83 deletions(-) create mode 100644 cli/src/gsc_test_file.erl diff --git a/cli/src/gsc_test_file.erl b/cli/src/gsc_test_file.erl new file mode 100644 index 0000000..56d80c5 --- /dev/null +++ b/cli/src/gsc_test_file.erl @@ -0,0 +1,279 @@ +% @doc experiment centering around the file syntax node using ntree approach +-module(gsc_test_file). + +-export([ + main/0 +]). + +-include("$gsc_include/gsc.hrl"). + +-type td_target() + :: ct + | iface + | ns + | pragma + | include + | using + . + +-record(ct, + {payable = none :: none | false | {true, tk()}, + main = none :: none | false | {true, tk()}, + contract = none :: none | tk(), + con = none :: none | tk(), + impls = none :: none | [tk()], + eq = none :: none | tk()}). + +-type td_meta() :: #ct{}. + + +-type ast_meta() :: file + | block + | block_item + | td_meta() + | nyi + | {nyi, any()} + . + +-type s2t_target() + :: file + | {block_of, s2t_target()} + | nyi + | {nyi, any()} + . + + +-type ast() :: ntree(ast_meta(), tk()). +-type asf() :: nforest(ast_meta(), tk()). + +main() -> + HelloN = "hello.aes", + HelloP = ts_utils:ct_file_abspath(HelloN), + {ok, HelloS} = file:read_file(HelloP), + S0 = gsc:unsafe_signal_from_file(HelloP), + T1 = s2t_file(S0), + io:format("hello.aes:~n", []), + io:format("```~n", []), + io:format("~ts", [HelloS]), + io:format("```~n~n", []), + io:format("AST: ~tp~n", [T1]), + ok. + + +-spec s2t(ParseTarget, Signal) -> Tree when + ParseTarget :: s2t_target(), + Signal :: [tk()], + Tree :: ast(). + +s2t(_, _) -> + error(nyi). + + +% // Hello World Contract +% // Copyright (c) 2025 QPQ AG +% +% contract Hello = +% type state = unit +% entrypoint init(): state = +% () +% +% entrypoint hello(): string = +% "hello, world" + + +-spec s2t_file(Signal) -> AstFile when + Signal :: [tk()], + AstFile :: #ns{meta :: file, kids :: asf()}. + +s2t_file([]) -> + error(empty_file); +s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) -> + Blk0 = s2t_gulp_block(FileCol, S0), + Blk1 = t2t_parse_tds_in_block(Blk0), + #ns{meta = file, kids = [Blk1]}. + + + + +-spec s2t_gulp_block(BlkCol, Signal) -> Block when + BlkCol :: pos_integer(), + Signal :: [tk()], + Block :: #ns{meta :: block}. + +s2t_gulp_block(BCol, Tks) -> + % sanity check + InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end, + true = lists:all(InBlock, Tks), + BlockItems = s2f_block_items(BCol, Tks), + #ns{meta = block, kids = BlockItems}. + + +-spec s2f_block_items(BCol, Signal) -> BlkItems when + BCol :: pos_integer(), + Signal :: [tk()], + BlkItems :: [BlkItem], + BlkItem :: #ns{meta :: block_item, + kids :: asf()}. + +s2f_block_items(BCol, Signal) -> + s2f_block_items(BCol, [], Signal). + + +s2f_block_items(_BCol, Stk, []) -> + lists:reverse(Stk); +s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) -> + {slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0), + s2f_block_items(BCol, [BlkItem | Stk], F1). + + +s2t_slurp_block_item(BCol, T0, F0) -> + {ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0), + Item = #ns{meta = block_item, kids = ItemTokens}, + {slurp, Item, F1}. + +% sw = splitwith; kind of take/drop +s2s_sw_block_item(BCol, T0, F0) -> + InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end, + {F0_II, F1} = lists:splitwith(InItem, F0), + {[T0 | F0_II], F1}. + + +% File ::= Block(TopDecl) +% +% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl) +% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl) +% | 'namespace' Con '=' Block(Decl) +% | '@compiler' PragmaOp Version +% | 'include' String +% | Using + + + +-spec t2t_parse_tds_in_block(Block0) -> Block1 when + Block0 :: ast(), + Block1 :: ast(). + +% go through and convert the block_item nodes to top +% decls +t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) -> + F1 = lists:map(fun t2t_parse_td_from_item/1, F0), + B0#ns{kids = F1}. + + +-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when + BlockItem :: #ns{meta :: block_item}, + TopDecl :: #ns{meta :: td_meta()}. + +t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) -> + s2t_top_decl(Signal). + + +-spec s2t_top_decl(Signal) -> TdTree when + Signal :: [tk()], + TdTree :: ast(). + +s2t_top_decl(S0) -> + case choose_td_target(S0) of + ct -> + s2t_ct(S0); + iface -> + f2t_nyi(iface, S0); + namespace -> + f2t_nyi(namespace, S0); + pragma -> + f2t_nyi(pragma, S0); + include -> + f2t_nyi(include, S0); + using -> + f2t_nyi(using, S0) + end. + + + +-spec choose_td_target(Signal) -> TdTarget when + Signal :: [tk()], + TdTarget :: td_target(). + +choose_td_target(Signal) -> + case gsc_tokens:strings(3, Signal) of + ["payable", "contract", "interface"] -> iface; + ["contract", "interface" | _] -> iface; + ["payable", "main", "contract"] -> ct; + ["payable", "contract" | _] -> ct; + ["contract" | _] -> ct; + ["namespace" | _] -> namespace; + ["@compiler" | _] -> pragma; + ["include" | _] -> include; + ["using" | _] -> using + end. + + + +-spec s2t_ct(Signal) -> CtAst when + Signal :: [tk()], + CtAst :: ast(). + +s2t_ct(S0) -> + s2t_ct(#ct{}, S0). + +s2t_ct(Ct = #ct{payable = none}, S0) -> + case S0 of + [#tk{str = "payable"} = T0 | S1] -> + s2t_ct(Ct#ct{payable = {true, T0}}, S1); + _ -> + s2t_ct(Ct#ct{payable = false}, S0) + end; +s2t_ct(Ct = #ct{main = none}, S0) -> + case S0 of + [#tk{str = "main"} = T0 | S1] -> + s2t_ct(Ct#ct{main = {true, T0}}, S1); + _ -> + s2t_ct(Ct#ct{main = false}, S0) + end; +s2t_ct(Ct = #ct{contract = none}, S0) -> + case S0 of + [#tk{str = "contract"} = T0 | S1] -> + s2t_ct(Ct#ct{contract = T0}, S1); + _ -> + error({no_kwd_contract, Ct, S0}) + end; +s2t_ct(Ct = #ct{con = none}, S0) -> + case S0 of + [#tk{shape = con} = T0 | S1] -> + s2t_ct(Ct#ct{con = T0}, S1); + _ -> + error({no_contract_name, Ct, S0}) + end; +s2t_ct(Ct = #ct{impls = none}, S0) -> + case gsc_tokens:strings(1, S0) of + [":"] -> + {slurp, Impls, S1} = s2f_slurp_impls(S0), + s2t_ct(Ct#ct{impls = Impls}, S1); + _ -> + s2t_ct(Ct#ct{impls = []}, S0) + end; +s2t_ct(Ct = #ct{eq = none}, S0) -> + case S0 of + [#tk{str = "="} = T0 | S1] -> + s2t_ct(Ct#ct{eq = T0}, S1); + _ -> + error({no_equal_sign, Ct, S0}) + end; +s2t_ct(Ct, S0) -> + #ns{meta = Ct, kids = S0}. + + +s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) -> + s2f_slurp_impls([I0], S0). + +s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) -> + s2f_slurp_impls([I0 | Stk], S0); +s2f_slurp_impls(Stk, S0) -> + {slurp, lists:reverse(Stk), S0}. + + +f2t_nyi(F) -> + {ns, nyi, F}. + +f2t_nyi(Why, F) -> + {ns, {nyi, Why}, F}. diff --git a/cli/src/gsc_test_ntree.erl b/cli/src/gsc_test_ntree.erl index 5285157..42fef53 100644 --- a/cli/src/gsc_test_ntree.erl +++ b/cli/src/gsc_test_ntree.erl @@ -6,29 +6,18 @@ -include("$gsc_include/gsc.hrl"). -% records copypasta for now --record(ns, {meta :: any(), kids :: list(any())}). - --type ntree(X, Y) :: gsc_ntree:ntree(X, Y). --type nforest(X, Y) :: gsc_nforest:nforest(X, Y). - --type nt(X, Y) :: gsc_ntree:ntree(X, Y). --type nf(X, Y) :: gsc_nforest:nforest(X, Y). - - % just parsing type expressions right now, so only need % to worry about round parens % % none is to indicate general-purpose grouping, for % e.g. LHS/RHS of an op -type syntax_meta() - :: none - | {op, tk()} + :: {op, tk()} + | op_arg | {parens, Open :: tk(), Close :: tk()} . --type ast() :: ntree(StemMeta :: syntax_meta(), - LeafType :: tk()). +-type ast() :: ntree(syntax_meta(), tk()). -type asf() :: nforest(syntax_meta(), tk()). -type asts() :: asf(). @@ -85,8 +74,8 @@ f2f_op(_opstr, Stk, []) -> f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) -> Lhf = lists:reverse(LhsStk), Rhf = f2f_op(OpStr, Rest), - Lht = #ns{meta = none, kids = Lhf}, - Rht = #ns{meta = none, kids = Rhf}, + Lht = #ns{meta = op_arg, kids = Lhf}, + Rht = #ns{meta = op_arg, kids = Rhf}, ResultT = #ns{meta = {op, OpTk}, kids = [Lht, Rht]}, ResultF = [ResultT], diff --git a/cli/src/ts_utils.erl b/cli/src/ts_utils.erl index 7813171..3140d9a 100644 --- a/cli/src/ts_utils.erl +++ b/cli/src/ts_utils.erl @@ -3,7 +3,7 @@ -export([ ct_dir/0, - ct_file/1 + ct_file/1, ct_file_abspath/1 ]). @@ -14,6 +14,8 @@ ct_dir() -> zx_daemon:get_home() ++ "/ct". +ct_file_abspath(Name) -> + ct_file(Name). -spec ct_file(Name) -> AbsPath when Name :: string(), diff --git a/include/gsc.hrl b/include/gsc.hrl index d601c3e..65b27df 100644 --- a/include/gsc.hrl +++ b/include/gsc.hrl @@ -143,3 +143,35 @@ | #gsc_err_nyi{} | #gsc_err_empty_file{} | #gsc_err{}. + + +%---------------------------- +% tree type for parsing +%---------------------------- + +% @doc stem record +-record(ns, {meta :: any(), + kids :: list(any())}). + +% @doc `ntree(S, L)' is a "node tree" (meaning stems +% have values and children) +% +% for the purposes of the compiler, the key observation +% is that a flat list of tokens is already a forest +-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]} + | L. + +% @doc forest is just a list of trees +-type nforest(S, L) :: [ntree(S, L)]. + + +% aliases + +-type nt(S, L) :: ntree(S, L). +-type nf(S, L) :: nforest(S, L). + +-type ntree() :: ntree(any(), any()). +-type nforest() :: [ntree()]. + +-type nt() :: ntree(). +-type nf() :: nforest(). diff --git a/src/gsc.erl b/src/gsc.erl index 2fa5aad..35228be 100644 --- a/src/gsc.erl +++ b/src/gsc.erl @@ -1,41 +1,29 @@ -% @doc bikeshed proctrastination head into vim warmup thing +% @doc bikeshed proctrastination head into vim warmup +% thing +% % sophia compiler from scratch by PRH % -% based on original sophia compiler -% -% parse layers: -% 1. gsc_tokens: SrcStr -> (Tokens | SigTokens) -% -% SigTokens = not comment/whitespace -% -% layers: -% a. gsc_strmatch : matches string shapes -% b. gso_scan : converts to so_scan shapes -% -% -% terminology: -% -% - `slurp`/`barf` borrowed from emacs paredit mode: -% -% slurp : (a b) c -> (a b c) -% barf : (a b c) -> a (b c) -% -% * `slurp` usually involves *transforming* input -% into a new type (e.g. slurp a token from src -% string); think of slurp as a verb meaning to -% consume and then digest -% * `barf` basically means blindly splitting off -% input -% +% based on original sophia compiler; target for version +% 0.1 is to match behavior exactly % @end -module(gsc). +% token and tokens -export_type([ token/0, signal/0 ]). +% syntax tree/forest wrapper type +-export_type([ + ntree/2, ntree/0, + nforest/2, nforest/0, + nt/2, nt/0, + nf/2, nf/0 +]). + + -export([ unsafe_tokens_from_file/1, unsafe_tokens_from_string/1, diff --git a/src/gsc_ntree.erl b/src/gsc_ntree.erl index 3090140..8f12a9a 100644 --- a/src/gsc_ntree.erl +++ b/src/gsc_ntree.erl @@ -1,12 +1,5 @@ -module(gsc_ntree). --export_type([ - ntree/2, ntree/0, - nforest/2, nforest/0, - nt/2, nt/0, - nf/2, nf/0 -]). - -export([ nstem/2, meta/1, kids/1, flatten_tree/1, flatten_forest/1 @@ -15,37 +8,6 @@ -include("$gsc_include/gsc.hrl"). -%%===================================================== -%% API: types -%%===================================================== - -% @doc stem record --record(ns, {meta :: any(), - kids :: list(any())}). - -% @doc `ntree(S, L)' is a "node tree" (meaning stems -% have values and children) -% -% for the purposes of the compiler, the key observation -% is that a flat list of tokens is already a forest --type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]} - | L. - -% @doc forest is just a list of trees --type nforest(S, L) :: [ntree(S, L)]. - - -% aliases - --type nt(S, L) :: ntree(S, L). --type nf(S, L) :: nforest(S, L). - --type ntree() :: ntree(any(), any()). --type nforest() :: [ntree()]. - --type nt() :: ntree(). --type nf() :: nforest(). - %%===================================================== %% API: functions @@ -91,4 +53,3 @@ ft(Leaf) -> [Leaf]. ff(F) -> [ft(T) || T <- F]. -