parens work... moving on to documenting work

This commit is contained in:
Peter Harpending
2026-06-03 15:17:33 -07:00
parent 4f4adaa284
commit 4e54bebeba
3 changed files with 241 additions and 175 deletions
+67
View File
@@ -0,0 +1,67 @@
-spec mktree(Signal) -> Tree when
Signal :: gsc:signal(),
Tree :: gsc_ntree:ntree().
% @doc make into a tree
mktree(Sig) ->
Tree0 = gsc_ntree:nstem(vtokens, Sig),
Tree1 = rerootl_tkstr("=>", Tree0),
Tree2 = rerootl_tkstr("*", Tree1),
Tree2.
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
Kids0 = gsc_ntree:deleaf0(Tree0),
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
case lists:splitwith(IsntS, Kids0) of
% found
% input:
% *s Root0
% |
% +-- .l Foo
% +-- .l "=>"
% +-- .l Bar
% output:
% *s "=>"
% |
% +-- *s Root0 -- .l Foo
% +-- *s Root0 -- .l Bar
{LHS1, [Tk0 | RHS1]} ->
Root1 = Root0,
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
NewRoot0 = {op, Tk0},
NewKids0 = [LTree1, RTree1],
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
NewTree;
% not found, nothing to do
{Kids0, []} ->
Tree0
end.
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
% Kids0 = gsc_ntree:deleaf0(Tree0),
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
% case lists:splitwith(IsntMapsto, Kids0) of
% % found
% {LHS1, [Tk0 | RHS1]} ->
% Root1 = Root0,
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
% NewRoot0 = {op, Tk0},
% NewKids0 = [LTree1, RTree1],
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
% NewTree;
% % nothing to do
% {Kids0, []} ->
% Tree0
% end.
isnt_str(X, Y) ->
not is_str(X, Y).
is_str(S, #tk{str = S}) -> true;
is_str(_, _) -> false.
+112 -77
View File
@@ -6,6 +6,32 @@
-include("$gsc_include/gsc.hrl").
% records copypasta for now
-record(ns, {meta :: any(), kids :: list(any())}).
-type ntree(X, Y) :: gsc_ntree:ntree(X, Y).
-type nforest(X, Y) :: gsc_nforest:nforest(X, Y).
-type nt(X, Y) :: gsc_ntree:ntree(X, Y).
-type nf(X, Y) :: gsc_nforest:nforest(X, Y).
% just parsing type expressions right now, so only need
% to worry about round parens
%
% none is to indicate general-purpose grouping, for
% e.g. LHS/RHS of an op
-type syntax_meta()
:: none
| {op, tk()}
| {parens, Open :: tk(), Close :: tk()}
.
-type ast() :: ntree(StemMeta :: syntax_meta(),
LeafType :: tk()).
-type asf() :: nforest(syntax_meta(), tk()).
-type asts() :: asf().
main() ->
x00(),
@@ -17,93 +43,102 @@ x00() ->
io:format(" SrcStr = ~p~n", [x00_src()]),
io:format(" Tokens = ~p~n", [x00_tks()]),
io:format(" Signal = ~p~n", [x00_sgl()]),
io:format(" Tree0 = ~p~n", [x00_tree0()]),
io:format(" Forest = ~p~n", [x00_fst()]),
ok.
% sample type expr, tokens, signal
x00_src() -> "foo => bar * baz".
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
x00_sgl() -> gsc:filter_signal(x00_tks()).
x00_tree0() -> mktree(x00_sgl()).
% records copypasta for now
-record(ns, {val :: any(), kids :: list(any())}).
-record(nl, {val :: any()}).
-type ntree(X, Y) :: gsc_ntree:ntree(X, Y).
-type ntree() :: gsc_ntree:ntree().
-type ast_stem_t() :: vtokens
| {op, tk()}
.
-type ast() :: ntree(ast_stem_t(), tk()).
x00_src() -> "(foo => (bar) * baz)".
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
x00_sgl() -> gsc:filter_signal(x00_tks()).
x00_fst() -> parse(x00_sgl()).
-spec mktree(Signal) -> Tree when
Signal :: gsc:signal(),
Tree :: gsc_ntree:ntree().
-spec parse(Signal) -> ASF when
Signal :: [tk()],
ASF :: asf().
% @doc make into a tree
mktree(Sig) ->
Tree0 = gsc_ntree:nstem(vtokens, Sig),
Tree1 = rerootl_tkstr("=>", Tree0),
Tree2 = rerootl_tkstr("*", Tree1),
Tree2.
parse(Signal) ->
% key insight here is our signal is already a
% forest, assuming the leaf type is `tk()`.
%
% our parser is a sequence of forest-to-forest
% transformers.
%
% at the end we should end up with just one tree (i
% think)?
F0 = Signal,
F1 = f2f_parens(F0),
%F2 = f2f_op("=>", F1),
Result = F1,
Result.
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
Kids0 = gsc_ntree:deleaf0(Tree0),
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
case lists:splitwith(IsntS, Kids0) of
% found
% input:
% *s Root0
% |
% +-- .l Foo
% +-- .l "=>"
% +-- .l Bar
% output:
% *s "=>"
% |
% +-- *s Root0 -- .l Foo
% +-- *s Root0 -- .l Bar
{LHS1, [Tk0 | RHS1]} ->
Root1 = Root0,
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
NewRoot0 = {op, Tk0},
NewKids0 = [LTree1, RTree1],
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
NewTree;
% not found, nothing to do
{Kids0, []} ->
Tree0
end.
%f2f_op(OpStr, Fst) ->
% case f2f_op(OpStr, [], none, Fst) of
% % never saw it, no change
% ident -> Fst;
%
%
%% never saw the op
%f2f_op(_, _, none, []) ->
% ident;
%% see op
%f2f_op(OpStr, LhsStk, none, [OpTk = #tk{str = OpStr} | Rest]) ->
% Lhf = lists:reverse(LhsStk),
% Rhf = f2f_op(OpStr, Rest),
% Lht = #ns{meta = none, kids = Lhf},
% Rht = #ns{meta = none, kids = Rhf},
% Result =
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
% Kids0 = gsc_ntree:deleaf0(Tree0),
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
% case lists:splitwith(IsntMapsto, Kids0) of
% % found
% {LHS1, [Tk0 | RHS1]} ->
% Root1 = Root0,
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
% NewRoot0 = {op, Tk0},
% NewKids0 = [LTree1, RTree1],
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
% NewTree;
% % nothing to do
% {Kids0, []} ->
% Tree0
% end.
-spec f2f_parens(Forest) -> NewForest when
Forest :: asts(),
NewForest :: Forest.
% @doc
% recursive parens decomposition
%
% the input here is the flat list of tokens. here we
% basically replace the string of tokens between `(`
% and `)` with a single tree
%
% interesting quirk is that this doesn't error on too
% many close parens, only too many open parens
f2f_parens(Fst) ->
f2f_parens([], Fst).
% done
f2f_parens(Stk, []) ->
lists:reverse(Stk);
% crawl down the forest and scan for open parens
% open paren, we descend
f2f_parens(Stk, [#tk{str = "("} = TkOpen | Rest0]) ->
InitMeta = {parens, TkOpen, none},
{slurp, PStem, Rest1} = slurp_pstem(InitMeta, [], Rest0),
NewStk = [PStem | Stk],
f2f_parens(NewStk, Rest1);
% something else, we continue
f2f_parens(Stk, [Tree | Rest]) ->
f2f_parens([Tree | Stk], Rest).
isnt_str(X, Y) ->
not is_str(X, Y).
is_str(S, #tk{str = S}) -> true;
is_str(_, _) -> false.
% ran out of tokens before close paren
slurp_pstem({parens, TkOpen, none}, Stk, []) ->
error({no_close_for, TkOpen, Stk});
% hit close paren, we done
slurp_pstem({parens, TkOpen, none}, Stk, [TkClose = #tk{str = ")"} | Rest]) ->
FinalMeta = {parens, TkOpen, TkClose},
Midsection = lists:reverse(Stk),
FinalTree = #ns{meta = FinalMeta,
kids = Midsection},
{slurp, FinalTree, Rest};
% hit open paren, we recurse
slurp_pstem(AccMeta, Stk, [TkOpen_II = #tk{str = "("} | Rest0]) ->
InitMeta_II = {parens, TkOpen_II, none},
{slurp, PStem_II, Rest1} = slurp_pstem(InitMeta_II, [], Rest0),
NewStk = [PStem_II | Stk],
slurp_pstem(AccMeta, NewStk, Rest1);
% hit something else, we move along
slurp_pstem(AccMeta, Stk, [Tree | Rest]) ->
slurp_pstem(AccMeta, [Tree | Stk], Rest).