wip name cleanups

This commit is contained in:
Peter Harpending
2026-06-01 18:00:37 -07:00
parent f548c7d88d
commit 9da6dbf18d
12 changed files with 804 additions and 281 deletions
+199
View File
@@ -0,0 +1,199 @@
% @doc
% File ::= Block(TopDecl)
-record(ast_file,
{top_decls = none :: none | [top_decl()]}).
-type ast() :: #ast_file{}
| top_decl()
| #ast_nyi{}
.
%% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
%% | 'record' Id ['(' TVar* ')'] '=' RecordType
%% | 'datatype' Id ['(' TVar* ')'] '=' DataType
%% | 'let' Id [':' Type] '=' Expr
%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
%% | Using
%-record(ast_type_alias,
% {name = none :: none | string(),
% tvars = none :: none | [string()],
% rewrites_to = none :: none | ast_type()}).
-type gulp_target()
:: ast_file
| top_decl
| ast_ct
| ast_nyi
.
% gulp means it must consume all input
-spec gulp(AstTarget, SigTokens) -> Perhaps
when AstTarget :: gulp_target(),
SigTokens :: [sfc_token()],
Perhaps :: {gulp, ast()}
| {error, sfc_err()}.
gulp(ast_file, Tokens) ->
gulp_file(Tokens);
gulp(top_decl, Tokens) ->
Targets = [ast_ct,
ast_nyi],
gulp_oneof(Targets, Tokens);
gulp(ast_ct, Tokens) ->
gulp_ct(#ast_ct{}, Tokens);
gulp(ast_nyi, Tokens) ->
{gulp, #ast_nyi{tokens = Tokens}};
gulp({block_of, X}, Tokens) ->
{barf, ItemChunks, []} = sfc_token_chunks:barf(block_as_items, Tokens),
gulp_block_of(X, ItemChunks);
gulp(Nyi, Tokens) ->
Msg = io_lib:format("sfc_ast:gulp/2: unknown target: ~p", [Nyi]),
Err = #sfc_err{atom = gulp_nyi,
string = Msg,
extra = [{target, Nyi}, {tokens, Tokens}]},
{error, Err}.
% FIXME: payable and main need to be in that order i think
gulp_ct(Ast = #ast_ct{payable = none}, Tokens) ->
case Tokens of
[#sfc_token{string = "payable", type = kwd} | NewTokens] ->
gulp_ct(Ast#ast_ct{payable = payable}, NewTokens);
_ ->
gulp_ct(Ast#ast_ct{payable = false}, Tokens)
end;
gulp_ct(Ast = #ast_ct{main = none}, Tokens) ->
case Tokens of
[#sfc_token{string = "main", type = kwd} | NewTokens] ->
gulp_ct(Ast#ast_ct{main = main}, NewTokens);
_ ->
gulp_ct(Ast#ast_ct{main = false}, Tokens)
end;
gulp_ct(Ast = #ast_ct{contract = none}, Tokens) ->
case Tokens of
[#sfc_token{string = "contract", type = kwd} | NewTokens] ->
gulp_ct(Ast#ast_ct{contract = contract}, NewTokens);
% FIXME: reject logic applies to choice of branch, therefore
% should be contained in branchpoint code
_ ->
reject
%[#sfc_token{pos = P, string = S} | _] ->
% {error, #sfc_err{atom = no_kwd_contract,
% extra = [{pos, P},
% {expecting, "contract"},
% {got, S},
% {ast, Ast},
% {tokens, Tokens}]}};
%[] ->
% {error, #sfc_err{atom = no_kwd_contract,
% extra = [{pos, none},
% {expecting, "contract"},
% {got, eof},
% {ast, Ast},
% {tokens, Tokens}]}}
end;
gulp_ct(Ast = #ast_ct{name = none}, Tokens) ->
case Tokens of
[#sfc_token{string = Name, type = con} | NewTokens] ->
gulp_ct(Ast#ast_ct{name = Name}, NewTokens);
_ ->
reject
end;
gulp_ct(Ast = #ast_ct{implements = none}, Tokens) ->
case slurp_ct_impls(Tokens) of
{slurp, Names, NewTokens} ->
gulp_ct(Ast#ast_ct{implements = {':', Names}}, NewTokens);
reject ->
gulp_ct(Ast#ast_ct{implements = {':', []}}, Tokens);
Poison -> Poison
end;
gulp_ct(Ast = #ast_ct{eq = none}, Tokens) ->
case Tokens of
[#sfc_token{string = "=", type = op} | NewTokens] ->
gulp_ct(Ast#ast_ct{eq = '='}, NewTokens);
_ ->
{error, #sfc_err{atom = no_eq}}
end;
gulp_ct(Ast = #ast_ct{decls = none}, Tokens) ->
Decls = [gulp(decl, Item) || Item <- sfc_token_chunks:unsafe_block_to_items(Tokens)],
{gulp, Ast#ast_ct{decls = Decls}};
gulp_ct(_, _) ->
reject.
slurp_ct_impls([#sfc_token{string = ":", type = op},
#sfc_token{string = Con1, type = con}
| Rest]) ->
slurp_ct_impls2(Rest, [Con1]);
slurp_ct_impls(_) ->
reject.
slurp_ct_impls2([#sfc_token{string = ",", type = punct},
#sfc_token{string = Con1, type = con}
| Rest],
Acc) ->
slurp_ct_impls2(Rest, [Con1 | Acc]);
slurp_ct_impls2(Rest, Names) ->
{slurp, lists:reverse(Names), Rest}.
-spec gulp_file(SigTokens) -> Perhaps
when SigTokens :: [sfc_token()],
Perhaps :: {gulp, #ast_file{}}
| {error, sfc_err()}.
% @private
% `file` enforces that the entire SigTokens is one
% block, chokes otherwise
gulp_file([]) ->
{error, #sfc_err{atom = empty_file}};
gulp_file(FileTokens = [#sfc_token{pos = FilePos} | _]) ->
case sfc_token_chunks:barf(block, FileTokens) of
% happy path: got the whole file back
{barf, FileTokens, []} ->
gulp_full_file(FileTokens);
% sad path: block terminated
{barf, _, [#sfc_token{pos = EndPos}]} ->
Msg = io_lib:format("block starting at ~p ends at ~p instead of EOF",
[FilePos, EndPos]),
{error, #sfc_err{atom = bad_file,
string = Msg}};
Nyi ->
{error, #sfc_err{atom = bad_file_nyi, extra = Nyi}}
end.
% FIXME: need to rethink types here in order to handle syntax errors
% from different blocks independently.
% file = block(top_decl)
gulp_full_file(BlockTokens) ->
ItemChunks = sfc_token_chunks:unsafe_block_to_items(BlockTokens),
gulp_file_decls([], [], ItemChunks).
gulp_file_decls(Decls, Errs, [DeclTokens | Rest]) ->
case gulp(top_decl, DeclTokens) of
{gulp, NewDecl} ->
gulp_file_decls([NewDecl | Decls], Errs, Rest);
reject ->
ErrPos = sfc_token_chunks:start_pos(DeclTokens),
NewErr = #sfc_err{atom = bad_top_decl,
extra = [{tokens, DeclTokens},
{pos, ErrPos}]},
gulp_file_decls(Decls, [NewErr | Errs], Rest);
Poison ->
gulp_file_decls(Decls, [Poison | Errs], Rest)
end;
% end of block
gulp_file_decls(Decls, _Errs = [], _Input = []) ->
{gulp, #ast_file{top_decls = lists:reverse(Decls)}};
gulp_file_decls(_Decls, Errs, _Input = []) ->
{error, #sfc_err{atom = many,
extra = Errs}}.
+59
View File
@@ -0,0 +1,59 @@
% @doc
% working out infix parsing bullshit on toy arith language
%
% our operators for now are
%
% [+, *, ^] in outer->inner order
-module(ifarith).
-export([main/0]).
test_str() ->
"1 + 2 + 3"
main() ->
% first going to tokenize
Tokens = tokens(test_str()).
-record(tk,
{type = none :: int | op | noise,
str = none :: none | string(),
val = none :: none | integer() | atom()}).
tokens(Stk, []) ->
lists:reverse(Stk).
% [+*^] op token
tokens(Stk, [Char | Rest]) ->
case Char of
Op when $+ =:= Op; $* =:= Op; $^ =:= Op ->
Tk = #tk{type = op, str = [Op], val = list_to_tuple([Op])},
tokens([Tk | Stk], Rest);
D when $0 =< D, D =< $9 ->
{Tk, NewSrcStr} = tk_int([D], [D], Rest),
tokens([Tk | Stk], NewSrcStr).
_ ->
tokens(Stk, Rest)
end.
% tokens for now are
-spec tk_int(DigitStack, CharStack, SrcStr) -> Result when
DigitStack :: string(),
CharStack :: string(),
SrcStr :: string(),
Result :: {Token, NewSrcStr},
Token :: #tk{},
NewSrcStr :: string().
tk_int(DigitStack, CharStack, SrcStr) ->
case SrcStr of
% cases when still consuming the int
% [0-9]
[D | NewSrcStr] when $0 =< D, D =< $9 ->
tk_int([D | DigitStack], [D | CharStack], NewSrcStr);
[$_ | NewSrcStr] ->
tk_int(DigitStack, [D | CharStack], NewSrcStr);
% otherwise done
_ ->
Digits =
end.
+88
View File
@@ -0,0 +1,88 @@
-type ifx_tree_() :: any().
%% placeholders
-type ast_() :: any().
-type ast_type_expr_() :: any().
-type ast_te_() :: any().
% @doc
% product type: foo * bar * baz
%
% stupid weird implication from bad syntax foresight trying to be
% fancy and overload what parens do is products must always have at
% least two operands; probably this is because `(foo)` is always the
% same as `foo`
%
% - 0-tuple -> `unit`
% - 1-tuple -> type itself
% - 2+ -> here
-record(ast_te_prod2,
{types = none :: none | [ast_te_()]}).
% @doc
% function type: (string, string) => string
-record(ast_te_ts_to_t,
{dom = none :: none | [ast_te_()],
codom = none :: none | ast_te_()}).
% @doc
% application type: map(string, int)
-record(ast_te_t_of_ts,
{fn = none :: none | ast_te_(),
args = none :: none | [ast_te_()]}).
% @doc
% node for a type name
% token type id : string int unit
% qid : Foo.Bar.baz
% tvar : 'a
-record(ast_te_name,
{name = none :: none | sfc_token()}).
% @doc
% placeholder
-record(ast_te_nyi,
{tokens = none :: none | [sfc_token()]}).
-type ast_te_nyi() :: #ast_nyi{}.
-type ast_type_expr()
:: #ast_te_ts_to_t{} % function (string, string) => string
| #ast_te_t_of_ts{} % application map(string, int)
| #ast_te_prod2{} % product foo * bar * baz
| #ast_te_tkid{} % token string int 'a Foo.Bar.baz
| #ast_te_nyi{}.
-type ast_te() :: ast_type_expr().
-record(ifx_stem_op,
{left = none :: none | [ifx_tree_()],
op = none :: none | infix_op(),
op_token = none :: none | {value, sfc_token()},
right = none :: none | [ifx_tree_()]}).
-record(ifx_stem_plist,
{items :: [any()]}).
-record(ifx_leaf_idtk,
{token :: sfc_token()}).
-type ifx_tree()
:: #ifx_stem_op{}
| #ifx_stem_plist{}
| #ifx_leaf_idtk{}.
-spec slurp_ifx_tree(Tokens) -> SlurpedIfxTree when
Tokens :: [sfc_token()],
SlurpedIfxNode :: slurped(ifx_tree()).
slurp_ifx_tree(Tokens) ->
case take_until_ifx_op(Tokens) of
+193
View File
@@ -0,0 +1,193 @@
% @doc
% helper functions for grabbing collections of tokens
% off the token stream
%
% generally assume no whitespace/comment tokens in
% input stream
-module(sfc_token_chunks).
%-export_type([
% chunk_shape/0,
% choke_reason/0
%]).
%
%-export([
% take/2,
% unsafe_block_to_items/1,
% barf/2,
% start_pos/1,
% end_pos/1
%]).
%
%% $sfc_include is so c() works from sfp eshell
%-include("$sfc_include/sfc.hrl").
%
%%------------------------------------------
%% Types
%%------------------------------------------
%
%-type chunk_shape()
% :: block
% | block_item
% | {block_item, Level :: pos_integer()}
% | block_as_items
% .
%
%% FIXME
%-type choke_reason() :: any().
%
%
%%------------------------------------------
%% functions
%%------------------------------------------
%
%% take = just split
%
%take(block, []) ->
% {[], []};
%take(block, [Hd = #sfc_token{pos = {_, BCol}} | Tl]) ->
% tw(fun(#sfc_token{pos = {_, TkCol}}) -> BCol =< TkCol end, [Hd], Tl);
%take(block_item, []) ->
% {[], []};
%take(block_item, [Hd = #sfc_token{pos = {_, ICol}} | Tl]) ->
% tw(fun(#sfc_token{pos = {_, TkCol}}) -> ICol < TkCol end, Tl).
%
%
%
%-spec start_pos([sfc_token()]) -> {value, sfc_pos()} | none.
%
%start_pos([#sfc_token{pos = P}]) -> {value, P};
%start_pos([]) -> none.
%
%
%-spec end_pos([sfc_token()]) -> {value, sfc_pos()} | none.
%
%end_pos([#sfc_token{pos = Pos, string = Str}]) ->
% {value, sfc_tokens:new_pos(Pos, Str)};
%end_pos([_ | T]) ->
% end_pos(T);
%end_pos([]) ->
% none.
%
%
%-spec barf(ChunkShape, SigTokens) -> Perhaps
% when ChunkShape :: chunk_shape(),
% SigTokens :: [Token],
% Perhaps :: {barf, Chunk, Rest}
% | {choke, Reason},
% Chunk :: [Token] % most
% | [[Token]], % block_as_items
% Rest :: [Token],
% Reason :: choke_reason(),
% Token :: sfc_token().
%
%% @doc
%% slurp/barf terminology comes from paredit mode in
%% emacs
%%
%% slurp ~= accepting input
%% barf ~= separating input
%%
%% slurp: (foo bar) baz ~> (foo bar baz)
%% barf : (foo bar baz) ~> foo (bar baz)
%
%barf(_, []) ->
% {barf, [], []};
%barf(block, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
% Take =
% fun(#sfc_token{pos = {_, TkCol}}) ->
% BlkCol =< TkCol
% end,
% {A, B} = tw(Take, T),
% {barf, [H | A], B};
%barf(block_item, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
% Take =
% fun(#sfc_token{pos = {_, TkCol}}) ->
% BlkCol < TkCol
% end,
% {A, B} = tw(Take, T),
% {barf, [H | A], B};
%% not needed for our case, future-proofing. see unsafe_block_to_items
%% for details
%barf({block_item, Level}, Tokens = [#sfc_token{pos = {_, StartLevel}} | _]) ->
% case Level =:= StartLevel of
% false -> {barf, [], Tokens};
% true -> barf(block_item, Tokens)
% end;
%% this has a fancy name in Haskell like Lens . lift ^. mapM_
%%
%% i think it's `sequence` actually, but not looking it up
%%
%% this barfs a block, and then uses unsafe_block_to_items/1 to split
%% the block tokens into individual items
%barf(block_as_items, Tokens) ->
% {barf, BlockTokens, Rest} = barf(block, Tokens),
% {barf, unsafe_block_to_items(BlockTokens), Rest};
%barf(_, _) ->
% {choke, #sfc_err_nyi{}}.
%
%
%
%
%
%
%-spec unsafe_block_to_items([Token]) -> [[Token]]
% when Token :: sfc_token().
%
%% @doc
%% PITFALL: this ASSUMES that the given list of tokens has the
%% property that all indent levels are >= that of the head... i.e. the
%% input to this is assumed to be the output of (e.g.) barf(block, _)
%%
%% the danger case is something my intuition is pointing to as a
%% possibility perhaps if you're doing some incremental parallel
%% stream parsing voodoo, naively parsing a block by greedily pulling
%% block items off the head of the list
%%
%% with the current way things work, we actually do not need to check
%% the indent level of each block item and make sure they're all the
%% same
%%
%% BLOCK =
%% foo
%% ...
%% bar
%% ...
%% baz
%% ...
%%
%% BLOCK_ITEM =
%% foo
%% ...
%%
%%
%% very important property of blocks is that each list item starts at
%% the same indent level.
%%
%% a concern would be that when we go to grab the bar item that
%% BarIndentLevel is somehow different from FooIndentLevel.
%%
%% let us reason through why it must be the case that FooIndentLevel
%% =:= BarIndentLevel
%%
%% 1. not (BarIndentLevel < FooIndentLevel); i.e.
%%
%% // impossible by call path:
%% foo ...
%% bar ...
%%
%% This is impossible because the call path ensures that all tokens
%% in BlkItems have indent level >= FooIndentLevel
%%
%% 2. not (FooIndentLevel < BarIndentLevel),
%%
%% // impossible because bar would get
%% // consumed by the foo block
%% foo ...
%% bar ...
%
%unsafe_block_to_items([]) ->
% [];
%unsafe_block_to_items(BlockTks) ->
% {barf, ItemTks, NewBlockTks} = barf(block_item, BlockTks),
% [ItemTks | unsafe_block_to_items(NewBlockTks)].