wip name cleanups
This commit is contained in:
@@ -0,0 +1,199 @@
|
||||
% @doc
|
||||
% File ::= Block(TopDecl)
|
||||
-record(ast_file,
|
||||
{top_decls = none :: none | [top_decl()]}).
|
||||
|
||||
|
||||
-type ast() :: #ast_file{}
|
||||
| top_decl()
|
||||
| #ast_nyi{}
|
||||
.
|
||||
|
||||
%% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
%% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||
%% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||
%% | 'let' Id [':' Type] '=' Expr
|
||||
%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
%% | Using
|
||||
%-record(ast_type_alias,
|
||||
% {name = none :: none | string(),
|
||||
% tvars = none :: none | [string()],
|
||||
% rewrites_to = none :: none | ast_type()}).
|
||||
|
||||
|
||||
-type gulp_target()
|
||||
:: ast_file
|
||||
| top_decl
|
||||
| ast_ct
|
||||
| ast_nyi
|
||||
.
|
||||
|
||||
% gulp means it must consume all input
|
||||
-spec gulp(AstTarget, SigTokens) -> Perhaps
|
||||
when AstTarget :: gulp_target(),
|
||||
SigTokens :: [sfc_token()],
|
||||
Perhaps :: {gulp, ast()}
|
||||
| {error, sfc_err()}.
|
||||
|
||||
gulp(ast_file, Tokens) ->
|
||||
gulp_file(Tokens);
|
||||
gulp(top_decl, Tokens) ->
|
||||
Targets = [ast_ct,
|
||||
ast_nyi],
|
||||
gulp_oneof(Targets, Tokens);
|
||||
gulp(ast_ct, Tokens) ->
|
||||
gulp_ct(#ast_ct{}, Tokens);
|
||||
gulp(ast_nyi, Tokens) ->
|
||||
{gulp, #ast_nyi{tokens = Tokens}};
|
||||
gulp({block_of, X}, Tokens) ->
|
||||
{barf, ItemChunks, []} = sfc_token_chunks:barf(block_as_items, Tokens),
|
||||
gulp_block_of(X, ItemChunks);
|
||||
gulp(Nyi, Tokens) ->
|
||||
Msg = io_lib:format("sfc_ast:gulp/2: unknown target: ~p", [Nyi]),
|
||||
Err = #sfc_err{atom = gulp_nyi,
|
||||
string = Msg,
|
||||
extra = [{target, Nyi}, {tokens, Tokens}]},
|
||||
{error, Err}.
|
||||
|
||||
|
||||
|
||||
% FIXME: payable and main need to be in that order i think
|
||||
gulp_ct(Ast = #ast_ct{payable = none}, Tokens) ->
|
||||
case Tokens of
|
||||
[#sfc_token{string = "payable", type = kwd} | NewTokens] ->
|
||||
gulp_ct(Ast#ast_ct{payable = payable}, NewTokens);
|
||||
_ ->
|
||||
gulp_ct(Ast#ast_ct{payable = false}, Tokens)
|
||||
end;
|
||||
gulp_ct(Ast = #ast_ct{main = none}, Tokens) ->
|
||||
case Tokens of
|
||||
[#sfc_token{string = "main", type = kwd} | NewTokens] ->
|
||||
gulp_ct(Ast#ast_ct{main = main}, NewTokens);
|
||||
_ ->
|
||||
gulp_ct(Ast#ast_ct{main = false}, Tokens)
|
||||
end;
|
||||
gulp_ct(Ast = #ast_ct{contract = none}, Tokens) ->
|
||||
case Tokens of
|
||||
[#sfc_token{string = "contract", type = kwd} | NewTokens] ->
|
||||
gulp_ct(Ast#ast_ct{contract = contract}, NewTokens);
|
||||
% FIXME: reject logic applies to choice of branch, therefore
|
||||
% should be contained in branchpoint code
|
||||
_ ->
|
||||
reject
|
||||
%[#sfc_token{pos = P, string = S} | _] ->
|
||||
% {error, #sfc_err{atom = no_kwd_contract,
|
||||
% extra = [{pos, P},
|
||||
% {expecting, "contract"},
|
||||
% {got, S},
|
||||
% {ast, Ast},
|
||||
% {tokens, Tokens}]}};
|
||||
%[] ->
|
||||
% {error, #sfc_err{atom = no_kwd_contract,
|
||||
% extra = [{pos, none},
|
||||
% {expecting, "contract"},
|
||||
% {got, eof},
|
||||
% {ast, Ast},
|
||||
% {tokens, Tokens}]}}
|
||||
end;
|
||||
gulp_ct(Ast = #ast_ct{name = none}, Tokens) ->
|
||||
case Tokens of
|
||||
[#sfc_token{string = Name, type = con} | NewTokens] ->
|
||||
gulp_ct(Ast#ast_ct{name = Name}, NewTokens);
|
||||
_ ->
|
||||
reject
|
||||
end;
|
||||
gulp_ct(Ast = #ast_ct{implements = none}, Tokens) ->
|
||||
case slurp_ct_impls(Tokens) of
|
||||
{slurp, Names, NewTokens} ->
|
||||
gulp_ct(Ast#ast_ct{implements = {':', Names}}, NewTokens);
|
||||
reject ->
|
||||
gulp_ct(Ast#ast_ct{implements = {':', []}}, Tokens);
|
||||
Poison -> Poison
|
||||
end;
|
||||
gulp_ct(Ast = #ast_ct{eq = none}, Tokens) ->
|
||||
case Tokens of
|
||||
[#sfc_token{string = "=", type = op} | NewTokens] ->
|
||||
gulp_ct(Ast#ast_ct{eq = '='}, NewTokens);
|
||||
_ ->
|
||||
{error, #sfc_err{atom = no_eq}}
|
||||
end;
|
||||
gulp_ct(Ast = #ast_ct{decls = none}, Tokens) ->
|
||||
Decls = [gulp(decl, Item) || Item <- sfc_token_chunks:unsafe_block_to_items(Tokens)],
|
||||
{gulp, Ast#ast_ct{decls = Decls}};
|
||||
gulp_ct(_, _) ->
|
||||
reject.
|
||||
|
||||
|
||||
slurp_ct_impls([#sfc_token{string = ":", type = op},
|
||||
#sfc_token{string = Con1, type = con}
|
||||
| Rest]) ->
|
||||
slurp_ct_impls2(Rest, [Con1]);
|
||||
slurp_ct_impls(_) ->
|
||||
reject.
|
||||
|
||||
slurp_ct_impls2([#sfc_token{string = ",", type = punct},
|
||||
#sfc_token{string = Con1, type = con}
|
||||
| Rest],
|
||||
Acc) ->
|
||||
slurp_ct_impls2(Rest, [Con1 | Acc]);
|
||||
slurp_ct_impls2(Rest, Names) ->
|
||||
{slurp, lists:reverse(Names), Rest}.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
-spec gulp_file(SigTokens) -> Perhaps
|
||||
when SigTokens :: [sfc_token()],
|
||||
Perhaps :: {gulp, #ast_file{}}
|
||||
| {error, sfc_err()}.
|
||||
% @private
|
||||
% `file` enforces that the entire SigTokens is one
|
||||
% block, chokes otherwise
|
||||
|
||||
gulp_file([]) ->
|
||||
{error, #sfc_err{atom = empty_file}};
|
||||
gulp_file(FileTokens = [#sfc_token{pos = FilePos} | _]) ->
|
||||
case sfc_token_chunks:barf(block, FileTokens) of
|
||||
% happy path: got the whole file back
|
||||
{barf, FileTokens, []} ->
|
||||
gulp_full_file(FileTokens);
|
||||
% sad path: block terminated
|
||||
{barf, _, [#sfc_token{pos = EndPos}]} ->
|
||||
Msg = io_lib:format("block starting at ~p ends at ~p instead of EOF",
|
||||
[FilePos, EndPos]),
|
||||
{error, #sfc_err{atom = bad_file,
|
||||
string = Msg}};
|
||||
Nyi ->
|
||||
{error, #sfc_err{atom = bad_file_nyi, extra = Nyi}}
|
||||
end.
|
||||
|
||||
|
||||
% FIXME: need to rethink types here in order to handle syntax errors
|
||||
% from different blocks independently.
|
||||
|
||||
% file = block(top_decl)
|
||||
gulp_full_file(BlockTokens) ->
|
||||
ItemChunks = sfc_token_chunks:unsafe_block_to_items(BlockTokens),
|
||||
gulp_file_decls([], [], ItemChunks).
|
||||
|
||||
|
||||
gulp_file_decls(Decls, Errs, [DeclTokens | Rest]) ->
|
||||
case gulp(top_decl, DeclTokens) of
|
||||
{gulp, NewDecl} ->
|
||||
gulp_file_decls([NewDecl | Decls], Errs, Rest);
|
||||
reject ->
|
||||
ErrPos = sfc_token_chunks:start_pos(DeclTokens),
|
||||
NewErr = #sfc_err{atom = bad_top_decl,
|
||||
extra = [{tokens, DeclTokens},
|
||||
{pos, ErrPos}]},
|
||||
gulp_file_decls(Decls, [NewErr | Errs], Rest);
|
||||
Poison ->
|
||||
gulp_file_decls(Decls, [Poison | Errs], Rest)
|
||||
end;
|
||||
% end of block
|
||||
gulp_file_decls(Decls, _Errs = [], _Input = []) ->
|
||||
{gulp, #ast_file{top_decls = lists:reverse(Decls)}};
|
||||
gulp_file_decls(_Decls, Errs, _Input = []) ->
|
||||
{error, #sfc_err{atom = many,
|
||||
extra = Errs}}.
|
||||
@@ -0,0 +1,59 @@
|
||||
% @doc
|
||||
% working out infix parsing bullshit on toy arith language
|
||||
%
|
||||
% our operators for now are
|
||||
%
|
||||
% [+, *, ^] in outer->inner order
|
||||
-module(ifarith).
|
||||
|
||||
-export([main/0]).
|
||||
|
||||
test_str() ->
|
||||
"1 + 2 + 3"
|
||||
|
||||
main() ->
|
||||
% first going to tokenize
|
||||
Tokens = tokens(test_str()).
|
||||
|
||||
-record(tk,
|
||||
{type = none :: int | op | noise,
|
||||
str = none :: none | string(),
|
||||
val = none :: none | integer() | atom()}).
|
||||
|
||||
tokens(Stk, []) ->
|
||||
lists:reverse(Stk).
|
||||
% [+*^] op token
|
||||
tokens(Stk, [Char | Rest]) ->
|
||||
case Char of
|
||||
Op when $+ =:= Op; $* =:= Op; $^ =:= Op ->
|
||||
Tk = #tk{type = op, str = [Op], val = list_to_tuple([Op])},
|
||||
tokens([Tk | Stk], Rest);
|
||||
D when $0 =< D, D =< $9 ->
|
||||
{Tk, NewSrcStr} = tk_int([D], [D], Rest),
|
||||
tokens([Tk | Stk], NewSrcStr).
|
||||
_ ->
|
||||
tokens(Stk, Rest)
|
||||
end.
|
||||
|
||||
|
||||
% tokens for now are
|
||||
-spec tk_int(DigitStack, CharStack, SrcStr) -> Result when
|
||||
DigitStack :: string(),
|
||||
CharStack :: string(),
|
||||
SrcStr :: string(),
|
||||
Result :: {Token, NewSrcStr},
|
||||
Token :: #tk{},
|
||||
NewSrcStr :: string().
|
||||
|
||||
tk_int(DigitStack, CharStack, SrcStr) ->
|
||||
case SrcStr of
|
||||
% cases when still consuming the int
|
||||
% [0-9]
|
||||
[D | NewSrcStr] when $0 =< D, D =< $9 ->
|
||||
tk_int([D | DigitStack], [D | CharStack], NewSrcStr);
|
||||
[$_ | NewSrcStr] ->
|
||||
tk_int(DigitStack, [D | CharStack], NewSrcStr);
|
||||
% otherwise done
|
||||
_ ->
|
||||
Digits =
|
||||
end.
|
||||
@@ -0,0 +1,88 @@
|
||||
-type ifx_tree_() :: any().
|
||||
|
||||
%% placeholders
|
||||
-type ast_() :: any().
|
||||
-type ast_type_expr_() :: any().
|
||||
-type ast_te_() :: any().
|
||||
|
||||
|
||||
% @doc
|
||||
% product type: foo * bar * baz
|
||||
%
|
||||
% stupid weird implication from bad syntax foresight trying to be
|
||||
% fancy and overload what parens do is products must always have at
|
||||
% least two operands; probably this is because `(foo)` is always the
|
||||
% same as `foo`
|
||||
%
|
||||
% - 0-tuple -> `unit`
|
||||
% - 1-tuple -> type itself
|
||||
% - 2+ -> here
|
||||
-record(ast_te_prod2,
|
||||
{types = none :: none | [ast_te_()]}).
|
||||
|
||||
|
||||
|
||||
% @doc
|
||||
% function type: (string, string) => string
|
||||
-record(ast_te_ts_to_t,
|
||||
{dom = none :: none | [ast_te_()],
|
||||
codom = none :: none | ast_te_()}).
|
||||
|
||||
|
||||
% @doc
|
||||
% application type: map(string, int)
|
||||
-record(ast_te_t_of_ts,
|
||||
{fn = none :: none | ast_te_(),
|
||||
args = none :: none | [ast_te_()]}).
|
||||
|
||||
|
||||
% @doc
|
||||
% node for a type name
|
||||
% token type id : string int unit
|
||||
% qid : Foo.Bar.baz
|
||||
% tvar : 'a
|
||||
-record(ast_te_name,
|
||||
{name = none :: none | sfc_token()}).
|
||||
|
||||
|
||||
% @doc
|
||||
% placeholder
|
||||
-record(ast_te_nyi,
|
||||
{tokens = none :: none | [sfc_token()]}).
|
||||
-type ast_te_nyi() :: #ast_nyi{}.
|
||||
|
||||
|
||||
-type ast_type_expr()
|
||||
:: #ast_te_ts_to_t{} % function (string, string) => string
|
||||
| #ast_te_t_of_ts{} % application map(string, int)
|
||||
| #ast_te_prod2{} % product foo * bar * baz
|
||||
| #ast_te_tkid{} % token string int 'a Foo.Bar.baz
|
||||
| #ast_te_nyi{}.
|
||||
-type ast_te() :: ast_type_expr().
|
||||
|
||||
|
||||
-record(ifx_stem_op,
|
||||
{left = none :: none | [ifx_tree_()],
|
||||
op = none :: none | infix_op(),
|
||||
op_token = none :: none | {value, sfc_token()},
|
||||
right = none :: none | [ifx_tree_()]}).
|
||||
|
||||
-record(ifx_stem_plist,
|
||||
{items :: [any()]}).
|
||||
|
||||
-record(ifx_leaf_idtk,
|
||||
{token :: sfc_token()}).
|
||||
|
||||
-type ifx_tree()
|
||||
:: #ifx_stem_op{}
|
||||
| #ifx_stem_plist{}
|
||||
| #ifx_leaf_idtk{}.
|
||||
|
||||
|
||||
-spec slurp_ifx_tree(Tokens) -> SlurpedIfxTree when
|
||||
Tokens :: [sfc_token()],
|
||||
SlurpedIfxNode :: slurped(ifx_tree()).
|
||||
|
||||
slurp_ifx_tree(Tokens) ->
|
||||
case take_until_ifx_op(Tokens) of
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
% @doc
|
||||
% helper functions for grabbing collections of tokens
|
||||
% off the token stream
|
||||
%
|
||||
% generally assume no whitespace/comment tokens in
|
||||
% input stream
|
||||
-module(sfc_token_chunks).
|
||||
|
||||
%-export_type([
|
||||
% chunk_shape/0,
|
||||
% choke_reason/0
|
||||
%]).
|
||||
%
|
||||
%-export([
|
||||
% take/2,
|
||||
% unsafe_block_to_items/1,
|
||||
% barf/2,
|
||||
% start_pos/1,
|
||||
% end_pos/1
|
||||
%]).
|
||||
%
|
||||
%% $sfc_include is so c() works from sfp eshell
|
||||
%-include("$sfc_include/sfc.hrl").
|
||||
%
|
||||
%%------------------------------------------
|
||||
%% Types
|
||||
%%------------------------------------------
|
||||
%
|
||||
%-type chunk_shape()
|
||||
% :: block
|
||||
% | block_item
|
||||
% | {block_item, Level :: pos_integer()}
|
||||
% | block_as_items
|
||||
% .
|
||||
%
|
||||
%% FIXME
|
||||
%-type choke_reason() :: any().
|
||||
%
|
||||
%
|
||||
%%------------------------------------------
|
||||
%% functions
|
||||
%%------------------------------------------
|
||||
%
|
||||
%% take = just split
|
||||
%
|
||||
%take(block, []) ->
|
||||
% {[], []};
|
||||
%take(block, [Hd = #sfc_token{pos = {_, BCol}} | Tl]) ->
|
||||
% tw(fun(#sfc_token{pos = {_, TkCol}}) -> BCol =< TkCol end, [Hd], Tl);
|
||||
%take(block_item, []) ->
|
||||
% {[], []};
|
||||
%take(block_item, [Hd = #sfc_token{pos = {_, ICol}} | Tl]) ->
|
||||
% tw(fun(#sfc_token{pos = {_, TkCol}}) -> ICol < TkCol end, Tl).
|
||||
%
|
||||
%
|
||||
%
|
||||
%-spec start_pos([sfc_token()]) -> {value, sfc_pos()} | none.
|
||||
%
|
||||
%start_pos([#sfc_token{pos = P}]) -> {value, P};
|
||||
%start_pos([]) -> none.
|
||||
%
|
||||
%
|
||||
%-spec end_pos([sfc_token()]) -> {value, sfc_pos()} | none.
|
||||
%
|
||||
%end_pos([#sfc_token{pos = Pos, string = Str}]) ->
|
||||
% {value, sfc_tokens:new_pos(Pos, Str)};
|
||||
%end_pos([_ | T]) ->
|
||||
% end_pos(T);
|
||||
%end_pos([]) ->
|
||||
% none.
|
||||
%
|
||||
%
|
||||
%-spec barf(ChunkShape, SigTokens) -> Perhaps
|
||||
% when ChunkShape :: chunk_shape(),
|
||||
% SigTokens :: [Token],
|
||||
% Perhaps :: {barf, Chunk, Rest}
|
||||
% | {choke, Reason},
|
||||
% Chunk :: [Token] % most
|
||||
% | [[Token]], % block_as_items
|
||||
% Rest :: [Token],
|
||||
% Reason :: choke_reason(),
|
||||
% Token :: sfc_token().
|
||||
%
|
||||
%% @doc
|
||||
%% slurp/barf terminology comes from paredit mode in
|
||||
%% emacs
|
||||
%%
|
||||
%% slurp ~= accepting input
|
||||
%% barf ~= separating input
|
||||
%%
|
||||
%% slurp: (foo bar) baz ~> (foo bar baz)
|
||||
%% barf : (foo bar baz) ~> foo (bar baz)
|
||||
%
|
||||
%barf(_, []) ->
|
||||
% {barf, [], []};
|
||||
%barf(block, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
|
||||
% Take =
|
||||
% fun(#sfc_token{pos = {_, TkCol}}) ->
|
||||
% BlkCol =< TkCol
|
||||
% end,
|
||||
% {A, B} = tw(Take, T),
|
||||
% {barf, [H | A], B};
|
||||
%barf(block_item, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
|
||||
% Take =
|
||||
% fun(#sfc_token{pos = {_, TkCol}}) ->
|
||||
% BlkCol < TkCol
|
||||
% end,
|
||||
% {A, B} = tw(Take, T),
|
||||
% {barf, [H | A], B};
|
||||
%% not needed for our case, future-proofing. see unsafe_block_to_items
|
||||
%% for details
|
||||
%barf({block_item, Level}, Tokens = [#sfc_token{pos = {_, StartLevel}} | _]) ->
|
||||
% case Level =:= StartLevel of
|
||||
% false -> {barf, [], Tokens};
|
||||
% true -> barf(block_item, Tokens)
|
||||
% end;
|
||||
%% this has a fancy name in Haskell like Lens . lift ^. mapM_
|
||||
%%
|
||||
%% i think it's `sequence` actually, but not looking it up
|
||||
%%
|
||||
%% this barfs a block, and then uses unsafe_block_to_items/1 to split
|
||||
%% the block tokens into individual items
|
||||
%barf(block_as_items, Tokens) ->
|
||||
% {barf, BlockTokens, Rest} = barf(block, Tokens),
|
||||
% {barf, unsafe_block_to_items(BlockTokens), Rest};
|
||||
%barf(_, _) ->
|
||||
% {choke, #sfc_err_nyi{}}.
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%-spec unsafe_block_to_items([Token]) -> [[Token]]
|
||||
% when Token :: sfc_token().
|
||||
%
|
||||
%% @doc
|
||||
%% PITFALL: this ASSUMES that the given list of tokens has the
|
||||
%% property that all indent levels are >= that of the head... i.e. the
|
||||
%% input to this is assumed to be the output of (e.g.) barf(block, _)
|
||||
%%
|
||||
%% the danger case is something my intuition is pointing to as a
|
||||
%% possibility perhaps if you're doing some incremental parallel
|
||||
%% stream parsing voodoo, naively parsing a block by greedily pulling
|
||||
%% block items off the head of the list
|
||||
%%
|
||||
%% with the current way things work, we actually do not need to check
|
||||
%% the indent level of each block item and make sure they're all the
|
||||
%% same
|
||||
%%
|
||||
%% BLOCK =
|
||||
%% foo
|
||||
%% ...
|
||||
%% bar
|
||||
%% ...
|
||||
%% baz
|
||||
%% ...
|
||||
%%
|
||||
%% BLOCK_ITEM =
|
||||
%% foo
|
||||
%% ...
|
||||
%%
|
||||
%%
|
||||
%% very important property of blocks is that each list item starts at
|
||||
%% the same indent level.
|
||||
%%
|
||||
%% a concern would be that when we go to grab the bar item that
|
||||
%% BarIndentLevel is somehow different from FooIndentLevel.
|
||||
%%
|
||||
%% let us reason through why it must be the case that FooIndentLevel
|
||||
%% =:= BarIndentLevel
|
||||
%%
|
||||
%% 1. not (BarIndentLevel < FooIndentLevel); i.e.
|
||||
%%
|
||||
%% // impossible by call path:
|
||||
%% foo ...
|
||||
%% bar ...
|
||||
%%
|
||||
%% This is impossible because the call path ensures that all tokens
|
||||
%% in BlkItems have indent level >= FooIndentLevel
|
||||
%%
|
||||
%% 2. not (FooIndentLevel < BarIndentLevel),
|
||||
%%
|
||||
%% // impossible because bar would get
|
||||
%% // consumed by the foo block
|
||||
%% foo ...
|
||||
%% bar ...
|
||||
%
|
||||
%unsafe_block_to_items([]) ->
|
||||
% [];
|
||||
%unsafe_block_to_items(BlockTks) ->
|
||||
% {barf, ItemTks, NewBlockTks} = barf(block_item, BlockTks),
|
||||
% [ItemTks | unsafe_block_to_items(NewBlockTks)].
|
||||
Reference in New Issue
Block a user