This commit is contained in:
2026-06-04 11:42:48 -07:00
parent e180dc955d
commit fdb40dcb92
6 changed files with 334 additions and 83 deletions
+279
View File
@@ -0,0 +1,279 @@
% @doc experiment centering around the file syntax node using ntree approach
-module(gsc_test_file).
-export([
main/0
]).
-include("$gsc_include/gsc.hrl").
-type td_target()
:: ct
| iface
| ns
| pragma
| include
| using
.
-record(ct,
{payable = none :: none | false | {true, tk()},
main = none :: none | false | {true, tk()},
contract = none :: none | tk(),
con = none :: none | tk(),
impls = none :: none | [tk()],
eq = none :: none | tk()}).
-type td_meta() :: #ct{}.
-type ast_meta() :: file
| block
| block_item
| td_meta()
| nyi
| {nyi, any()}
.
-type s2t_target()
:: file
| {block_of, s2t_target()}
| nyi
| {nyi, any()}
.
-type ast() :: ntree(ast_meta(), tk()).
-type asf() :: nforest(ast_meta(), tk()).
main() ->
HelloN = "hello.aes",
HelloP = ts_utils:ct_file_abspath(HelloN),
{ok, HelloS} = file:read_file(HelloP),
S0 = gsc:unsafe_signal_from_file(HelloP),
T1 = s2t_file(S0),
io:format("hello.aes:~n", []),
io:format("```~n", []),
io:format("~ts", [HelloS]),
io:format("```~n~n", []),
io:format("AST: ~tp~n", [T1]),
ok.
-spec s2t(ParseTarget, Signal) -> Tree when
ParseTarget :: s2t_target(),
Signal :: [tk()],
Tree :: ast().
s2t(_, _) ->
error(nyi).
% // Hello World Contract
% // Copyright (c) 2025 QPQ AG
%
% contract Hello =
% type state = unit
% entrypoint init(): state =
% ()
%
% entrypoint hello(): string =
% "hello, world"
-spec s2t_file(Signal) -> AstFile when
Signal :: [tk()],
AstFile :: #ns{meta :: file, kids :: asf()}.
s2t_file([]) ->
error(empty_file);
s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) ->
Blk0 = s2t_gulp_block(FileCol, S0),
Blk1 = t2t_parse_tds_in_block(Blk0),
#ns{meta = file, kids = [Blk1]}.
-spec s2t_gulp_block(BlkCol, Signal) -> Block when
BlkCol :: pos_integer(),
Signal :: [tk()],
Block :: #ns{meta :: block}.
s2t_gulp_block(BCol, Tks) ->
% sanity check
InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end,
true = lists:all(InBlock, Tks),
BlockItems = s2f_block_items(BCol, Tks),
#ns{meta = block, kids = BlockItems}.
-spec s2f_block_items(BCol, Signal) -> BlkItems when
BCol :: pos_integer(),
Signal :: [tk()],
BlkItems :: [BlkItem],
BlkItem :: #ns{meta :: block_item,
kids :: asf()}.
s2f_block_items(BCol, Signal) ->
s2f_block_items(BCol, [], Signal).
s2f_block_items(_BCol, Stk, []) ->
lists:reverse(Stk);
s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) ->
{slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0),
s2f_block_items(BCol, [BlkItem | Stk], F1).
s2t_slurp_block_item(BCol, T0, F0) ->
{ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0),
Item = #ns{meta = block_item, kids = ItemTokens},
{slurp, Item, F1}.
% sw = splitwith; kind of take/drop
s2s_sw_block_item(BCol, T0, F0) ->
InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end,
{F0_II, F1} = lists:splitwith(InItem, F0),
{[T0 | F0_II], F1}.
% File ::= Block(TopDecl)
%
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl)
% | 'namespace' Con '=' Block(Decl)
% | '@compiler' PragmaOp Version
% | 'include' String
% | Using
-spec t2t_parse_tds_in_block(Block0) -> Block1 when
Block0 :: ast(),
Block1 :: ast().
% go through and convert the block_item nodes to top
% decls
t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) ->
F1 = lists:map(fun t2t_parse_td_from_item/1, F0),
B0#ns{kids = F1}.
-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when
BlockItem :: #ns{meta :: block_item},
TopDecl :: #ns{meta :: td_meta()}.
t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) ->
s2t_top_decl(Signal).
-spec s2t_top_decl(Signal) -> TdTree when
Signal :: [tk()],
TdTree :: ast().
s2t_top_decl(S0) ->
case choose_td_target(S0) of
ct ->
s2t_ct(S0);
iface ->
f2t_nyi(iface, S0);
namespace ->
f2t_nyi(namespace, S0);
pragma ->
f2t_nyi(pragma, S0);
include ->
f2t_nyi(include, S0);
using ->
f2t_nyi(using, S0)
end.
-spec choose_td_target(Signal) -> TdTarget when
Signal :: [tk()],
TdTarget :: td_target().
choose_td_target(Signal) ->
case gsc_tokens:strings(3, Signal) of
["payable", "contract", "interface"] -> iface;
["contract", "interface" | _] -> iface;
["payable", "main", "contract"] -> ct;
["payable", "contract" | _] -> ct;
["contract" | _] -> ct;
["namespace" | _] -> namespace;
["@compiler" | _] -> pragma;
["include" | _] -> include;
["using" | _] -> using
end.
-spec s2t_ct(Signal) -> CtAst when
Signal :: [tk()],
CtAst :: ast().
s2t_ct(S0) ->
s2t_ct(#ct{}, S0).
s2t_ct(Ct = #ct{payable = none}, S0) ->
case S0 of
[#tk{str = "payable"} = T0 | S1] ->
s2t_ct(Ct#ct{payable = {true, T0}}, S1);
_ ->
s2t_ct(Ct#ct{payable = false}, S0)
end;
s2t_ct(Ct = #ct{main = none}, S0) ->
case S0 of
[#tk{str = "main"} = T0 | S1] ->
s2t_ct(Ct#ct{main = {true, T0}}, S1);
_ ->
s2t_ct(Ct#ct{main = false}, S0)
end;
s2t_ct(Ct = #ct{contract = none}, S0) ->
case S0 of
[#tk{str = "contract"} = T0 | S1] ->
s2t_ct(Ct#ct{contract = T0}, S1);
_ ->
error({no_kwd_contract, Ct, S0})
end;
s2t_ct(Ct = #ct{con = none}, S0) ->
case S0 of
[#tk{shape = con} = T0 | S1] ->
s2t_ct(Ct#ct{con = T0}, S1);
_ ->
error({no_contract_name, Ct, S0})
end;
s2t_ct(Ct = #ct{impls = none}, S0) ->
case gsc_tokens:strings(1, S0) of
[":"] ->
{slurp, Impls, S1} = s2f_slurp_impls(S0),
s2t_ct(Ct#ct{impls = Impls}, S1);
_ ->
s2t_ct(Ct#ct{impls = []}, S0)
end;
s2t_ct(Ct = #ct{eq = none}, S0) ->
case S0 of
[#tk{str = "="} = T0 | S1] ->
s2t_ct(Ct#ct{eq = T0}, S1);
_ ->
error({no_equal_sign, Ct, S0})
end;
s2t_ct(Ct, S0) ->
#ns{meta = Ct, kids = S0}.
s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0], S0).
s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0 | Stk], S0);
s2f_slurp_impls(Stk, S0) ->
{slurp, lists:reverse(Stk), S0}.
f2t_nyi(F) ->
{ns, nyi, F}.
f2t_nyi(Why, F) ->
{ns, {nyi, Why}, F}.
+5 -16
View File
@@ -6,29 +6,18 @@
-include("$gsc_include/gsc.hrl"). -include("$gsc_include/gsc.hrl").
% records copypasta for now
-record(ns, {meta :: any(), kids :: list(any())}).
-type ntree(X, Y) :: gsc_ntree:ntree(X, Y).
-type nforest(X, Y) :: gsc_nforest:nforest(X, Y).
-type nt(X, Y) :: gsc_ntree:ntree(X, Y).
-type nf(X, Y) :: gsc_nforest:nforest(X, Y).
% just parsing type expressions right now, so only need % just parsing type expressions right now, so only need
% to worry about round parens % to worry about round parens
% %
% none is to indicate general-purpose grouping, for % none is to indicate general-purpose grouping, for
% e.g. LHS/RHS of an op % e.g. LHS/RHS of an op
-type syntax_meta() -type syntax_meta()
:: none :: {op, tk()}
| {op, tk()} | op_arg
| {parens, Open :: tk(), Close :: tk()} | {parens, Open :: tk(), Close :: tk()}
. .
-type ast() :: ntree(StemMeta :: syntax_meta(), -type ast() :: ntree(syntax_meta(), tk()).
LeafType :: tk()).
-type asf() :: nforest(syntax_meta(), tk()). -type asf() :: nforest(syntax_meta(), tk()).
-type asts() :: asf(). -type asts() :: asf().
@@ -85,8 +74,8 @@ f2f_op(_opstr, Stk, []) ->
f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) -> f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) ->
Lhf = lists:reverse(LhsStk), Lhf = lists:reverse(LhsStk),
Rhf = f2f_op(OpStr, Rest), Rhf = f2f_op(OpStr, Rest),
Lht = #ns{meta = none, kids = Lhf}, Lht = #ns{meta = op_arg, kids = Lhf},
Rht = #ns{meta = none, kids = Rhf}, Rht = #ns{meta = op_arg, kids = Rhf},
ResultT = #ns{meta = {op, OpTk}, ResultT = #ns{meta = {op, OpTk},
kids = [Lht, Rht]}, kids = [Lht, Rht]},
ResultF = [ResultT], ResultF = [ResultT],
+3 -1
View File
@@ -3,7 +3,7 @@
-export([ -export([
ct_dir/0, ct_dir/0,
ct_file/1 ct_file/1, ct_file_abspath/1
]). ]).
@@ -14,6 +14,8 @@ ct_dir() ->
zx_daemon:get_home() ++ "/ct". zx_daemon:get_home() ++ "/ct".
ct_file_abspath(Name) ->
ct_file(Name).
-spec ct_file(Name) -> AbsPath when -spec ct_file(Name) -> AbsPath when
Name :: string(), Name :: string(),
+32
View File
@@ -143,3 +143,35 @@
| #gsc_err_nyi{} | #gsc_err_nyi{}
| #gsc_err_empty_file{} | #gsc_err_empty_file{}
| #gsc_err{}. | #gsc_err{}.
%----------------------------
% tree type for parsing
%----------------------------
% @doc stem record
-record(ns, {meta :: any(),
kids :: list(any())}).
% @doc `ntree(S, L)' is a "node tree" (meaning stems
% have values and children)
%
% for the purposes of the compiler, the key observation
% is that a flat list of tokens is already a forest
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
| L.
% @doc forest is just a list of trees
-type nforest(S, L) :: [ntree(S, L)].
% aliases
-type nt(S, L) :: ntree(S, L).
-type nf(S, L) :: nforest(S, L).
-type ntree() :: ntree(any(), any()).
-type nforest() :: [ntree()].
-type nt() :: ntree().
-type nf() :: nforest().
+15 -27
View File
@@ -1,41 +1,29 @@
% @doc bikeshed proctrastination head into vim warmup thing % @doc bikeshed proctrastination head into vim warmup
% thing
%
% sophia compiler from scratch by PRH % sophia compiler from scratch by PRH
% %
% based on original sophia compiler % based on original sophia compiler; target for version
% % 0.1 is to match behavior exactly
% parse layers:
% 1. gsc_tokens: SrcStr -> (Tokens | SigTokens)
%
% SigTokens = not comment/whitespace
%
% layers:
% a. gsc_strmatch : matches string shapes
% b. gso_scan : converts to so_scan shapes
%
%
% terminology:
%
% - `slurp`/`barf` borrowed from emacs paredit mode:
%
% slurp : (a b) c -> (a b c)
% barf : (a b c) -> a (b c)
%
% * `slurp` usually involves *transforming* input
% into a new type (e.g. slurp a token from src
% string); think of slurp as a verb meaning to
% consume and then digest
% * `barf` basically means blindly splitting off
% input
%
% @end % @end
-module(gsc). -module(gsc).
% token and tokens
-export_type([ -export_type([
token/0, token/0,
signal/0 signal/0
]). ]).
% syntax tree/forest wrapper type
-export_type([
ntree/2, ntree/0,
nforest/2, nforest/0,
nt/2, nt/0,
nf/2, nf/0
]).
-export([ -export([
unsafe_tokens_from_file/1, unsafe_tokens_from_file/1,
unsafe_tokens_from_string/1, unsafe_tokens_from_string/1,
-39
View File
@@ -1,12 +1,5 @@
-module(gsc_ntree). -module(gsc_ntree).
-export_type([
ntree/2, ntree/0,
nforest/2, nforest/0,
nt/2, nt/0,
nf/2, nf/0
]).
-export([ -export([
nstem/2, meta/1, kids/1, nstem/2, meta/1, kids/1,
flatten_tree/1, flatten_forest/1 flatten_tree/1, flatten_forest/1
@@ -15,37 +8,6 @@
-include("$gsc_include/gsc.hrl"). -include("$gsc_include/gsc.hrl").
%%=====================================================
%% API: types
%%=====================================================
% @doc stem record
-record(ns, {meta :: any(),
kids :: list(any())}).
% @doc `ntree(S, L)' is a "node tree" (meaning stems
% have values and children)
%
% for the purposes of the compiler, the key observation
% is that a flat list of tokens is already a forest
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
| L.
% @doc forest is just a list of trees
-type nforest(S, L) :: [ntree(S, L)].
% aliases
-type nt(S, L) :: ntree(S, L).
-type nf(S, L) :: nforest(S, L).
-type ntree() :: ntree(any(), any()).
-type nforest() :: [ntree()].
-type nt() :: ntree().
-type nf() :: nforest().
%%===================================================== %%=====================================================
%% API: functions %% API: functions
@@ -91,4 +53,3 @@ ft(Leaf) -> [Leaf].
ff(F) -> ff(F) ->
[ft(T) || T <- F]. [ft(T) || T <- F].