From 9da6dbf18dc17514f8d21746282fe89979d2b737 Mon Sep 17 00:00:00 2001 From: Peter Harpending Date: Mon, 1 Jun 2026 18:00:37 -0700 Subject: [PATCH] wip name cleanups --- include/gsc.hrl | 86 +++++------ scratch/ast-gulp.erl | 199 ++++++++++++++++++++++++++ scratch/ifarith.erl | 59 ++++++++ scratch/parse_type_expr.erl | 88 ++++++++++++ scratch/sfc_token_chunks.erl | 193 +++++++++++++++++++++++++ src/gsc.erl | 50 +++---- src/gsc_ast.erl | 108 +++++++------- src/gsc_bst.erl | 10 +- src/gsc_parse_type_expr.erl | 46 +++--- src/gsc_strmatch.erl | 4 +- src/gsc_tokens.erl | 188 ++++++++++++------------ src/{gsc_so_scan.erl => gso_scan.erl} | 54 +++---- 12 files changed, 804 insertions(+), 281 deletions(-) create mode 100644 scratch/ast-gulp.erl create mode 100644 scratch/ifarith.erl create mode 100644 scratch/parse_type_expr.erl create mode 100644 scratch/sfc_token_chunks.erl rename src/{gsc_so_scan.erl => gso_scan.erl} (91%) diff --git a/include/gsc.hrl b/include/gsc.hrl index f02f96d..a452b8d 100644 --- a/include/gsc.hrl +++ b/include/gsc.hrl @@ -1,20 +1,10 @@ -% This is a header file that contains sfc's record types +% This is a header file that contains gsc's record types % % This is in order to % 1. share records across modules; and, -% 2. allow external modules to just use the sfc records +% 2. allow external modules to just use the gsc records -%------------------------------------------------------- -% API Types: sfc internal token representation -% -% -export_type([ -% sf_token_type/0 -% sf_token/0 -% ]). -%------------------------------------------------------- - -% --type sfc_token_type() +-type tk_shape() :: bcom % /* ... */ | lcom % // | ws % whitespace @@ -50,14 +40,14 @@ . --type sfc_pos() :: {Line :: pos_integer(), Col :: pos_integer()}. +-type tk_pos() :: {Line :: pos_integer(), Col :: pos_integer()}. --record(sfc_token, - {type :: sfc_token_type(), - pos :: sfc_pos(), - string :: string()}). +-record(tk, + {shape :: tk_shape(), + pos :: tk_pos(), + str :: string()}). --type sfc_token() :: #sfc_token{}. +-type tk() :: #tk{}. % tokens are in essence the "chunk boundaries" of @@ -93,15 +83,15 @@ % lists = (_, _, _) % | [_, _, _] % | {_, _, _} -%-record(sfc_ast1_block, +%-record(gsc_ast1_block, % {indent = none :: none | pos_integer(), -% decls = none :: [sfc_ast1_decl()]}). +% decls = none :: [gsc_ast1_decl()]}). % -%-type sfc_ast() :: +%-type gsc_ast() :: % -%-type sfc_list_group() :: {'(', [sfc_token()], ')'} -% | {'[', [sfc_token()], ']'} -% | {'{', [sfc_token()], '}'} +%-type gsc_list_group() :: {'(', [tk()], ')'} +% | {'[', [tk()], ']'} +% | {'{', [tk()], '}'} % | {proof, % . @@ -112,50 +102,44 @@ % record type: unterminated block comments at the end % of files. these are ok in legacy sophia, so we have to % specifically account for this error --record(sfc_err_bcom_unterminated, - {prev_tokens :: [sfc_token()], - break_pos :: sfc_pos(), +-record(gsc_err_bcom_unterminated, + {prev_tokens :: [tk()], + break_pos :: gsc_pos(), rest :: string()}). --record(sfc_err_no_tokmatch, - {prev_tokens :: [sfc_token()], - break_pos :: sfc_pos(), +-record(gsc_err_no_tokmatch, + {prev_tokens :: [tk()], + break_pos :: gsc_pos(), rest :: string()}). --record(sfc_err_delims, - {past :: [sfc_token()], - open_stack :: [sfc_token()], - bad_close :: sfc_token(), - future :: [sfc_token()]}). +-record(gsc_err_delims, + {past :: [tk()], + open_stack :: [tk()], + bad_close :: tk(), + future :: [tk()]}). % FIXME --record(sfc_err_nyi, {}). --record(sfc_err_empty_file, {}). +-record(gsc_err_nyi, {}). +-record(gsc_err_empty_file, {}). %-record(src_parse_error, % {atom = none :: none | atom(), % string = -%j-record(sfc_err_gulp_ct, +%j-record(gsc_err_gulp_ct, %j {gulped :: % @doc % generic placeholder error for now --record(sfc_err, +-record(gsc_err, {atom :: atom(), string = none :: none | iolist(), extra = none :: none | any()}). % @doc all errors SFC can return conveniently listed in % one place --type sfc_err() :: #sfc_err_bcom_unterminated{} - | #sfc_err_no_tokmatch{} - | #sfc_err_nyi{} - | #sfc_err_empty_file{} - | #sfc_err{}. - - -%% FIXME --type sfc_ast() :: any(). - - +-type gsc_err() :: #gsc_err_bcom_unterminated{} + | #gsc_err_no_tokmatch{} + | #gsc_err_nyi{} + | #gsc_err_empty_file{} + | #gsc_err{}. diff --git a/scratch/ast-gulp.erl b/scratch/ast-gulp.erl new file mode 100644 index 0000000..6690d21 --- /dev/null +++ b/scratch/ast-gulp.erl @@ -0,0 +1,199 @@ +% @doc +% File ::= Block(TopDecl) +-record(ast_file, + {top_decls = none :: none | [top_decl()]}). + + +-type ast() :: #ast_file{} + | top_decl() + | #ast_nyi{} + . + +%% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias +%% | 'record' Id ['(' TVar* ')'] '=' RecordType +%% | 'datatype' Id ['(' TVar* ')'] '=' DataType +%% | 'let' Id [':' Type] '=' Expr +%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl) +%% | Using +%-record(ast_type_alias, +% {name = none :: none | string(), +% tvars = none :: none | [string()], +% rewrites_to = none :: none | ast_type()}). + + +-type gulp_target() + :: ast_file + | top_decl + | ast_ct + | ast_nyi + . + +% gulp means it must consume all input +-spec gulp(AstTarget, SigTokens) -> Perhaps + when AstTarget :: gulp_target(), + SigTokens :: [sfc_token()], + Perhaps :: {gulp, ast()} + | {error, sfc_err()}. + +gulp(ast_file, Tokens) -> + gulp_file(Tokens); +gulp(top_decl, Tokens) -> + Targets = [ast_ct, + ast_nyi], + gulp_oneof(Targets, Tokens); +gulp(ast_ct, Tokens) -> + gulp_ct(#ast_ct{}, Tokens); +gulp(ast_nyi, Tokens) -> + {gulp, #ast_nyi{tokens = Tokens}}; +gulp({block_of, X}, Tokens) -> + {barf, ItemChunks, []} = sfc_token_chunks:barf(block_as_items, Tokens), + gulp_block_of(X, ItemChunks); +gulp(Nyi, Tokens) -> + Msg = io_lib:format("sfc_ast:gulp/2: unknown target: ~p", [Nyi]), + Err = #sfc_err{atom = gulp_nyi, + string = Msg, + extra = [{target, Nyi}, {tokens, Tokens}]}, + {error, Err}. + + + +% FIXME: payable and main need to be in that order i think +gulp_ct(Ast = #ast_ct{payable = none}, Tokens) -> + case Tokens of + [#sfc_token{string = "payable", type = kwd} | NewTokens] -> + gulp_ct(Ast#ast_ct{payable = payable}, NewTokens); + _ -> + gulp_ct(Ast#ast_ct{payable = false}, Tokens) + end; +gulp_ct(Ast = #ast_ct{main = none}, Tokens) -> + case Tokens of + [#sfc_token{string = "main", type = kwd} | NewTokens] -> + gulp_ct(Ast#ast_ct{main = main}, NewTokens); + _ -> + gulp_ct(Ast#ast_ct{main = false}, Tokens) + end; +gulp_ct(Ast = #ast_ct{contract = none}, Tokens) -> + case Tokens of + [#sfc_token{string = "contract", type = kwd} | NewTokens] -> + gulp_ct(Ast#ast_ct{contract = contract}, NewTokens); + % FIXME: reject logic applies to choice of branch, therefore + % should be contained in branchpoint code + _ -> + reject + %[#sfc_token{pos = P, string = S} | _] -> + % {error, #sfc_err{atom = no_kwd_contract, + % extra = [{pos, P}, + % {expecting, "contract"}, + % {got, S}, + % {ast, Ast}, + % {tokens, Tokens}]}}; + %[] -> + % {error, #sfc_err{atom = no_kwd_contract, + % extra = [{pos, none}, + % {expecting, "contract"}, + % {got, eof}, + % {ast, Ast}, + % {tokens, Tokens}]}} + end; +gulp_ct(Ast = #ast_ct{name = none}, Tokens) -> + case Tokens of + [#sfc_token{string = Name, type = con} | NewTokens] -> + gulp_ct(Ast#ast_ct{name = Name}, NewTokens); + _ -> + reject + end; +gulp_ct(Ast = #ast_ct{implements = none}, Tokens) -> + case slurp_ct_impls(Tokens) of + {slurp, Names, NewTokens} -> + gulp_ct(Ast#ast_ct{implements = {':', Names}}, NewTokens); + reject -> + gulp_ct(Ast#ast_ct{implements = {':', []}}, Tokens); + Poison -> Poison + end; +gulp_ct(Ast = #ast_ct{eq = none}, Tokens) -> + case Tokens of + [#sfc_token{string = "=", type = op} | NewTokens] -> + gulp_ct(Ast#ast_ct{eq = '='}, NewTokens); + _ -> + {error, #sfc_err{atom = no_eq}} + end; +gulp_ct(Ast = #ast_ct{decls = none}, Tokens) -> + Decls = [gulp(decl, Item) || Item <- sfc_token_chunks:unsafe_block_to_items(Tokens)], + {gulp, Ast#ast_ct{decls = Decls}}; +gulp_ct(_, _) -> + reject. + + +slurp_ct_impls([#sfc_token{string = ":", type = op}, + #sfc_token{string = Con1, type = con} + | Rest]) -> + slurp_ct_impls2(Rest, [Con1]); +slurp_ct_impls(_) -> + reject. + +slurp_ct_impls2([#sfc_token{string = ",", type = punct}, + #sfc_token{string = Con1, type = con} + | Rest], + Acc) -> + slurp_ct_impls2(Rest, [Con1 | Acc]); +slurp_ct_impls2(Rest, Names) -> + {slurp, lists:reverse(Names), Rest}. + + + + + +-spec gulp_file(SigTokens) -> Perhaps + when SigTokens :: [sfc_token()], + Perhaps :: {gulp, #ast_file{}} + | {error, sfc_err()}. +% @private +% `file` enforces that the entire SigTokens is one +% block, chokes otherwise + +gulp_file([]) -> + {error, #sfc_err{atom = empty_file}}; +gulp_file(FileTokens = [#sfc_token{pos = FilePos} | _]) -> + case sfc_token_chunks:barf(block, FileTokens) of + % happy path: got the whole file back + {barf, FileTokens, []} -> + gulp_full_file(FileTokens); + % sad path: block terminated + {barf, _, [#sfc_token{pos = EndPos}]} -> + Msg = io_lib:format("block starting at ~p ends at ~p instead of EOF", + [FilePos, EndPos]), + {error, #sfc_err{atom = bad_file, + string = Msg}}; + Nyi -> + {error, #sfc_err{atom = bad_file_nyi, extra = Nyi}} + end. + + +% FIXME: need to rethink types here in order to handle syntax errors +% from different blocks independently. + +% file = block(top_decl) +gulp_full_file(BlockTokens) -> + ItemChunks = sfc_token_chunks:unsafe_block_to_items(BlockTokens), + gulp_file_decls([], [], ItemChunks). + + +gulp_file_decls(Decls, Errs, [DeclTokens | Rest]) -> + case gulp(top_decl, DeclTokens) of + {gulp, NewDecl} -> + gulp_file_decls([NewDecl | Decls], Errs, Rest); + reject -> + ErrPos = sfc_token_chunks:start_pos(DeclTokens), + NewErr = #sfc_err{atom = bad_top_decl, + extra = [{tokens, DeclTokens}, + {pos, ErrPos}]}, + gulp_file_decls(Decls, [NewErr | Errs], Rest); + Poison -> + gulp_file_decls(Decls, [Poison | Errs], Rest) + end; +% end of block +gulp_file_decls(Decls, _Errs = [], _Input = []) -> + {gulp, #ast_file{top_decls = lists:reverse(Decls)}}; +gulp_file_decls(_Decls, Errs, _Input = []) -> + {error, #sfc_err{atom = many, + extra = Errs}}. diff --git a/scratch/ifarith.erl b/scratch/ifarith.erl new file mode 100644 index 0000000..ff63cca --- /dev/null +++ b/scratch/ifarith.erl @@ -0,0 +1,59 @@ +% @doc +% working out infix parsing bullshit on toy arith language +% +% our operators for now are +% +% [+, *, ^] in outer->inner order +-module(ifarith). + +-export([main/0]). + +test_str() -> + "1 + 2 + 3" + +main() -> + % first going to tokenize + Tokens = tokens(test_str()). + +-record(tk, + {type = none :: int | op | noise, + str = none :: none | string(), + val = none :: none | integer() | atom()}). + +tokens(Stk, []) -> + lists:reverse(Stk). +% [+*^] op token +tokens(Stk, [Char | Rest]) -> + case Char of + Op when $+ =:= Op; $* =:= Op; $^ =:= Op -> + Tk = #tk{type = op, str = [Op], val = list_to_tuple([Op])}, + tokens([Tk | Stk], Rest); + D when $0 =< D, D =< $9 -> + {Tk, NewSrcStr} = tk_int([D], [D], Rest), + tokens([Tk | Stk], NewSrcStr). + _ -> + tokens(Stk, Rest) + end. + + +% tokens for now are +-spec tk_int(DigitStack, CharStack, SrcStr) -> Result when + DigitStack :: string(), + CharStack :: string(), + SrcStr :: string(), + Result :: {Token, NewSrcStr}, + Token :: #tk{}, + NewSrcStr :: string(). + +tk_int(DigitStack, CharStack, SrcStr) -> + case SrcStr of + % cases when still consuming the int + % [0-9] + [D | NewSrcStr] when $0 =< D, D =< $9 -> + tk_int([D | DigitStack], [D | CharStack], NewSrcStr); + [$_ | NewSrcStr] -> + tk_int(DigitStack, [D | CharStack], NewSrcStr); + % otherwise done + _ -> + Digits = + end. diff --git a/scratch/parse_type_expr.erl b/scratch/parse_type_expr.erl new file mode 100644 index 0000000..8f02c7e --- /dev/null +++ b/scratch/parse_type_expr.erl @@ -0,0 +1,88 @@ +-type ifx_tree_() :: any(). + +%% placeholders +-type ast_() :: any(). +-type ast_type_expr_() :: any(). +-type ast_te_() :: any(). + + +% @doc +% product type: foo * bar * baz +% +% stupid weird implication from bad syntax foresight trying to be +% fancy and overload what parens do is products must always have at +% least two operands; probably this is because `(foo)` is always the +% same as `foo` +% +% - 0-tuple -> `unit` +% - 1-tuple -> type itself +% - 2+ -> here +-record(ast_te_prod2, + {types = none :: none | [ast_te_()]}). + + + +% @doc +% function type: (string, string) => string +-record(ast_te_ts_to_t, + {dom = none :: none | [ast_te_()], + codom = none :: none | ast_te_()}). + + +% @doc +% application type: map(string, int) +-record(ast_te_t_of_ts, + {fn = none :: none | ast_te_(), + args = none :: none | [ast_te_()]}). + + +% @doc +% node for a type name +% token type id : string int unit +% qid : Foo.Bar.baz +% tvar : 'a +-record(ast_te_name, + {name = none :: none | sfc_token()}). + + +% @doc +% placeholder +-record(ast_te_nyi, + {tokens = none :: none | [sfc_token()]}). +-type ast_te_nyi() :: #ast_nyi{}. + + +-type ast_type_expr() + :: #ast_te_ts_to_t{} % function (string, string) => string + | #ast_te_t_of_ts{} % application map(string, int) + | #ast_te_prod2{} % product foo * bar * baz + | #ast_te_tkid{} % token string int 'a Foo.Bar.baz + | #ast_te_nyi{}. +-type ast_te() :: ast_type_expr(). + + +-record(ifx_stem_op, + {left = none :: none | [ifx_tree_()], + op = none :: none | infix_op(), + op_token = none :: none | {value, sfc_token()}, + right = none :: none | [ifx_tree_()]}). + +-record(ifx_stem_plist, + {items :: [any()]}). + +-record(ifx_leaf_idtk, + {token :: sfc_token()}). + +-type ifx_tree() + :: #ifx_stem_op{} + | #ifx_stem_plist{} + | #ifx_leaf_idtk{}. + + +-spec slurp_ifx_tree(Tokens) -> SlurpedIfxTree when + Tokens :: [sfc_token()], + SlurpedIfxNode :: slurped(ifx_tree()). + +slurp_ifx_tree(Tokens) -> + case take_until_ifx_op(Tokens) of + diff --git a/scratch/sfc_token_chunks.erl b/scratch/sfc_token_chunks.erl new file mode 100644 index 0000000..2206e92 --- /dev/null +++ b/scratch/sfc_token_chunks.erl @@ -0,0 +1,193 @@ +% @doc +% helper functions for grabbing collections of tokens +% off the token stream +% +% generally assume no whitespace/comment tokens in +% input stream +-module(sfc_token_chunks). + +%-export_type([ +% chunk_shape/0, +% choke_reason/0 +%]). +% +%-export([ +% take/2, +% unsafe_block_to_items/1, +% barf/2, +% start_pos/1, +% end_pos/1 +%]). +% +%% $sfc_include is so c() works from sfp eshell +%-include("$sfc_include/sfc.hrl"). +% +%%------------------------------------------ +%% Types +%%------------------------------------------ +% +%-type chunk_shape() +% :: block +% | block_item +% | {block_item, Level :: pos_integer()} +% | block_as_items +% . +% +%% FIXME +%-type choke_reason() :: any(). +% +% +%%------------------------------------------ +%% functions +%%------------------------------------------ +% +%% take = just split +% +%take(block, []) -> +% {[], []}; +%take(block, [Hd = #sfc_token{pos = {_, BCol}} | Tl]) -> +% tw(fun(#sfc_token{pos = {_, TkCol}}) -> BCol =< TkCol end, [Hd], Tl); +%take(block_item, []) -> +% {[], []}; +%take(block_item, [Hd = #sfc_token{pos = {_, ICol}} | Tl]) -> +% tw(fun(#sfc_token{pos = {_, TkCol}}) -> ICol < TkCol end, Tl). +% +% +% +%-spec start_pos([sfc_token()]) -> {value, sfc_pos()} | none. +% +%start_pos([#sfc_token{pos = P}]) -> {value, P}; +%start_pos([]) -> none. +% +% +%-spec end_pos([sfc_token()]) -> {value, sfc_pos()} | none. +% +%end_pos([#sfc_token{pos = Pos, string = Str}]) -> +% {value, sfc_tokens:new_pos(Pos, Str)}; +%end_pos([_ | T]) -> +% end_pos(T); +%end_pos([]) -> +% none. +% +% +%-spec barf(ChunkShape, SigTokens) -> Perhaps +% when ChunkShape :: chunk_shape(), +% SigTokens :: [Token], +% Perhaps :: {barf, Chunk, Rest} +% | {choke, Reason}, +% Chunk :: [Token] % most +% | [[Token]], % block_as_items +% Rest :: [Token], +% Reason :: choke_reason(), +% Token :: sfc_token(). +% +%% @doc +%% slurp/barf terminology comes from paredit mode in +%% emacs +%% +%% slurp ~= accepting input +%% barf ~= separating input +%% +%% slurp: (foo bar) baz ~> (foo bar baz) +%% barf : (foo bar baz) ~> foo (bar baz) +% +%barf(_, []) -> +% {barf, [], []}; +%barf(block, [H = #sfc_token{pos = {_, BlkCol}} | T]) -> +% Take = +% fun(#sfc_token{pos = {_, TkCol}}) -> +% BlkCol =< TkCol +% end, +% {A, B} = tw(Take, T), +% {barf, [H | A], B}; +%barf(block_item, [H = #sfc_token{pos = {_, BlkCol}} | T]) -> +% Take = +% fun(#sfc_token{pos = {_, TkCol}}) -> +% BlkCol < TkCol +% end, +% {A, B} = tw(Take, T), +% {barf, [H | A], B}; +%% not needed for our case, future-proofing. see unsafe_block_to_items +%% for details +%barf({block_item, Level}, Tokens = [#sfc_token{pos = {_, StartLevel}} | _]) -> +% case Level =:= StartLevel of +% false -> {barf, [], Tokens}; +% true -> barf(block_item, Tokens) +% end; +%% this has a fancy name in Haskell like Lens . lift ^. mapM_ +%% +%% i think it's `sequence` actually, but not looking it up +%% +%% this barfs a block, and then uses unsafe_block_to_items/1 to split +%% the block tokens into individual items +%barf(block_as_items, Tokens) -> +% {barf, BlockTokens, Rest} = barf(block, Tokens), +% {barf, unsafe_block_to_items(BlockTokens), Rest}; +%barf(_, _) -> +% {choke, #sfc_err_nyi{}}. +% +% +% +% +% +% +%-spec unsafe_block_to_items([Token]) -> [[Token]] +% when Token :: sfc_token(). +% +%% @doc +%% PITFALL: this ASSUMES that the given list of tokens has the +%% property that all indent levels are >= that of the head... i.e. the +%% input to this is assumed to be the output of (e.g.) barf(block, _) +%% +%% the danger case is something my intuition is pointing to as a +%% possibility perhaps if you're doing some incremental parallel +%% stream parsing voodoo, naively parsing a block by greedily pulling +%% block items off the head of the list +%% +%% with the current way things work, we actually do not need to check +%% the indent level of each block item and make sure they're all the +%% same +%% +%% BLOCK = +%% foo +%% ... +%% bar +%% ... +%% baz +%% ... +%% +%% BLOCK_ITEM = +%% foo +%% ... +%% +%% +%% very important property of blocks is that each list item starts at +%% the same indent level. +%% +%% a concern would be that when we go to grab the bar item that +%% BarIndentLevel is somehow different from FooIndentLevel. +%% +%% let us reason through why it must be the case that FooIndentLevel +%% =:= BarIndentLevel +%% +%% 1. not (BarIndentLevel < FooIndentLevel); i.e. +%% +%% // impossible by call path: +%% foo ... +%% bar ... +%% +%% This is impossible because the call path ensures that all tokens +%% in BlkItems have indent level >= FooIndentLevel +%% +%% 2. not (FooIndentLevel < BarIndentLevel), +%% +%% // impossible because bar would get +%% // consumed by the foo block +%% foo ... +%% bar ... +% +%unsafe_block_to_items([]) -> +% []; +%unsafe_block_to_items(BlockTks) -> +% {barf, ItemTks, NewBlockTks} = barf(block_item, BlockTks), +% [ItemTks | unsafe_block_to_items(NewBlockTks)]. diff --git a/src/gsc.erl b/src/gsc.erl index 90f3475..a5948e2 100644 --- a/src/gsc.erl +++ b/src/gsc.erl @@ -4,15 +4,15 @@ % based on original sophia compiler % % parse layers: -% 1. sfc_tokenizer: SrcStr -> (Tokens | SigTokens) +% 1. gsc_tokenizer: SrcStr -> (Tokens | SigTokens) % % SigTokens = not comment/whitespace % % layers: -% a. sfc_strmatch : matches string shapes -% b. sfc_so_scan : converts to so_scan shapes +% a. gsc_strmatch : matches string shapes +% b. gsc_so_scan : converts to so_scan shapes % -% 2. sfc_ast: SigTokens -> AST +% 2. gsc_ast: SigTokens -> AST % % terminology: % @@ -36,8 +36,8 @@ % - too fuzzy right now % - possibly: % - rename parser layers sequentially: -% - sfc_ --module(sfc). +% - gsc_ +-module(gsc). -export_type([ @@ -54,13 +54,13 @@ ast_from_tokens/1 ]). --include("$sfc_include/sfc.hrl"). +-include("$gsc_include/gsc.hrl"). %----------------------------------------- % types %----------------------------------------- --type token() :: sfc_token(). +-type token() :: tk(). %----------------------------------------- % functions @@ -68,13 +68,13 @@ sigtokens_from_file(X) -> case tokens_from_file(X) of - {ok, Y} -> {ok, sfc_tokens:filter_significant(Y)}; + {ok, Y} -> {ok, gsc_tokens:filter_significant(Y)}; Err -> Err end. sigtokens_from_string(X) -> case tokens_from_string(X) of - {ok, Y} -> {ok, sfc_tokens:filter_significant(Y)}; + {ok, Y} -> {ok, gsc_tokens:filter_significant(Y)}; Err -> Err end. @@ -82,8 +82,8 @@ sigtokens_from_string(X) -> -spec tokens_from_file(FilePath) -> Perhaps when FilePath :: string(), Perhaps :: {ok, Tokens} - | {error, sfc_err() | any()}, - Tokens :: [sfc_token()]. + | {error, gsc_err() | any()}, + Tokens :: [tk()]. tokens_from_file(FilePath) -> case file:read_file(FilePath) of @@ -97,18 +97,18 @@ tokens_from_file(FilePath) -> -spec tokens_from_string(SrcStr) -> Result when SrcStr :: string(), Result :: {ok, Tokens} - | {error, sfc_err()}, - Tokens :: [sfc_token()]. + | {error, gsc_err()}, + Tokens :: [tk()]. tokens_from_string(SrcStr) -> - sfc_tokens:tokens(SrcStr). + gsc_tokens:tokens(SrcStr). -spec ast_from_file(FilePath) -> Perhaps when FilePath :: string(), - Perhaps :: {ok, AST} | {error, sfc_err()}, - AST :: sfc_ast(). + Perhaps :: {ok, AST} | {error, gsc_err()}, + AST :: gsc_ast(). ast_from_file(FilePath) -> case file:read_file(FilePath) of @@ -120,11 +120,11 @@ ast_from_file(FilePath) -> -spec ast_from_string(SrcStr) -> Perhaps when SrcStr :: string(), - Perhaps :: {ok, AST} | {error, sfc_err()}, - AST :: sfc_ast(). + Perhaps :: {ok, AST} | {error, gsc_err()}, + AST :: gsc_ast(). ast_from_string(SrcStr) -> - case sfc_tokens:significant_tokens(SrcStr) of + case gsc_tokens:significant_tokens(SrcStr) of {ok, SigTks} -> ast_from_tokens(SigTks); Error -> Error end. @@ -132,13 +132,13 @@ ast_from_string(SrcStr) -> -spec ast_from_tokens(SrcTokens) -> Perhaps - when SrcTokens :: [sfc_token()], - Perhaps :: {ok, AST} | {error, sfc_err()}, - AST :: sfc_ast(). + when SrcTokens :: [tk()], + Perhaps :: {ok, AST} | {error, gsc_err()}, + AST :: gsc_ast(). ast_from_tokens(Tks) -> - SigTks = sfc_tokens:filter_significant(Tks), - case sfc_ast:gulp_file(SigTks) of + SigTks = gsc_tokens:filter_significant(Tks), + case gsc_ast:gulp_file(SigTks) of {gulp, AST} -> {ok, AST}; Error -> Error end. diff --git a/src/gsc_ast.erl b/src/gsc_ast.erl index 6fef4f6..9431795 100644 --- a/src/gsc_ast.erl +++ b/src/gsc_ast.erl @@ -56,7 +56,7 @@ % Domain ::= Type // Single argument % | '(' Sep(Type, ',') ')' // Multiple arguments % ``` --module(sfc_ast). +-module(gsc_ast). %-compile([export_all,nowarn_export_all]). % @@ -71,16 +71,16 @@ % gulp_file/1 %]). % -%-include("$sfc_include/sfc.hrl"). +%-include("$gsc_include/gsc.hrl"). % %%%----------------------------- -%%% TYPES: sfc_ast +%%% TYPES: gsc_ast %%%----------------------------- % % %% % placeholders %-type ast_() :: any(). -%-record(ast_nyi, {tokens = none :: [sfc_token()]}). +%-record(ast_nyi, {tokens = none :: [tk()]}). %-type ast_nyi() :: #ast_nyi{}. % % @@ -109,8 +109,8 @@ %% ta = type alias %% 'type' Id ['(' TVar* ')'] '=' TypeAlias %-record(ast_ta, -% {alias = none :: none | sfc_token(), -% tvars = none :: none | [sfc_token()], +% {alias = none :: none | tk(), +% tvars = none :: none | [tk()], % points_to = none :: none | ast_type_expr()}). % %% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias @@ -126,8 +126,8 @@ %-record(ast_ct, % {payable = none :: none | boolean(), % main = none :: none | boolean(), -% name = none :: none | sfc_token(), -% impls = none :: none | [sfc_token()], +% name = none :: none | tk(), +% impls = none :: none | [tk()], % decls = none :: none | [decl()]}). % %-record(ast_td_iface,{}). @@ -158,7 +158,7 @@ % %-type parse_error_() :: any(). %-record(parse_error, -% {pos = none :: none | sfc_pos(), +% {pos = none :: none | gsc_pos(), % msg = "" :: string(), % subs = [] :: [parse_error_()], % extra = none :: any()}). @@ -189,20 +189,20 @@ %%%----------------------------- % %-spec gulp_file(Tokens) -> Perhaps -% when Tokens :: [sfc_token()], +% when Tokens :: [tk()], % Perhaps :: {gulp, #ast_file{}} % | {error, #parse_error{}}. % %gulp_file([]) -> % {error, empty_file}; %gulp_file(Tokens) -> -% case sfc_tokens:take_block(Tokens) of +% case gsc_tokens:take_block(Tokens) of % {Tokens, []} -> % gulp_block(fun gulp_top_decl/1, Tokens); % %gulp_file2([], [], Tokens); % {A, B} -> -% StartPos = sfc_tokens:start_pos(A), -% ErrPos = sfc_tokens:start_pos(B), +% StartPos = gsc_tokens:start_pos(A), +% ErrPos = gsc_tokens:start_pos(B), % Msg = efmt("gulp_file: block starting at ~p ends at ~p instead of EOF", % [StartPos, ErrPos]), % {error, #parse_error{pos = ErrPos, msg = Msg}} @@ -212,7 +212,7 @@ % %%gulp_file2(AccOks, AccErrs, Tokens = [_ | _]) -> %% % ItemTokens will be nonempty -%% {ItemTokens, NewTokens} = sfc_tokens:take_block_item(Tokens), +%% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens), %% case gulp_top_decl(ItemTokens) of %% {gulp, Ok} -> gulp_file2([Ok | AccOks], AccErrs, NewTokens); %% Err -> gulp_file2(AccOks, [Err | AccErrs], NewTokens) @@ -227,7 +227,7 @@ % %-spec gulp_block(GulpItem, Tokens) -> GulpedItems % when GulpItem :: fun((ItemTokens) -> GulpedItem), -% Tokens :: [sfc_token()], +% Tokens :: [tk()], % ItemTokens :: Tokens, % GulpedItem :: {gulp, Item} | {error, Reason}, % GulpedItems :: {gulp, Items} | {error, Reason}, @@ -258,7 +258,7 @@ % %gulp_block(GulpItem, AccOks, AccErrs, Tokens = [_ | _]) -> % % ItemTokens will be nonempty -% {ItemTokens, NewTokens} = sfc_tokens:take_block_item(Tokens), +% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens), % case GulpItem(ItemTokens) of % {gulp, Ok} -> gulp_block(GulpItem, [Ok | AccOks], AccErrs, NewTokens); % Err -> gulp_block(GulpItem, AccOks, [Err | AccErrs], NewTokens) @@ -272,7 +272,7 @@ % % %-spec gulp_top_decl(DeclTokens) -> Result -% when DeclTokens :: [sfc_token()], +% when DeclTokens :: [tk()], % Result :: {gulp, ast()} | {error, any()}. % %% @doc @@ -284,7 +284,7 @@ %% | Using %% @end %gulp_top_decl(DeclTokens) -> -% case sfc_tokens:strings(3, DeclTokens) of +% case gsc_tokens:strings(3, DeclTokens) of % ["payable", "contract", "interface"] -> % gulp_nyi(DeclTokens); % ["contract", "interface" | _] -> @@ -311,7 +311,7 @@ % gulp_nyi(DeclTokens); % _ -> % % decl tokens will always be nonempty -% [H = #sfc_token{pos = ErrPos} | _] = DeclTokens, +% [H = #tk{pos = ErrPos} | _] = DeclTokens, % EMsg = efmt("gulp_top_decl: bad token: ~p; " % "expecting one of: [payable, main, contract, namespace, @compiler, include, using]", % [H]), @@ -320,7 +320,7 @@ % end. % % -%gulp_ct2(Ast, Tokens = [#sfc_token{pos = Pos} | _]) -> +%gulp_ct2(Ast, Tokens = [#tk{pos = Pos} | _]) -> % % need to pass through pos for error messages on premature end of % % input... for now it's a fixme thing % gulp_ct3(Ast, Tokens, Pos). @@ -331,7 +331,7 @@ %gulp_ct3(Ast = #ast_ct{name = none}, Tokens, Pos) -> % NewPos = Pos, %% fixme % case Tokens of -% [Name = #sfc_token{type = con} | NewTokens] -> +% [Name = #tk{type = con} | NewTokens] -> % NewAst = Ast#ast_ct{name = Name}, % gulp_ct3(NewAst, NewTokens, NewPos); % [Name | _] -> @@ -355,7 +355,7 @@ %% contract X : Y, Z, W = %% ^ %gulp_ct3(Ast = #ast_ct{decls = none}, -% [#sfc_token{string = "="} | Tokens], +% [#tk{string = "="} | Tokens], % _Pos) -> % case gulp_block(fun gulp_decl/1, Tokens) of % {gulp, Decls} -> @@ -370,7 +370,7 @@ % % %-spec slurp_ct_impls(Tokens) -> Slurped -% when Tokens :: [sfc_token()], +% when Tokens :: [tk()], % Slurped :: {slurp, Impls, NewTokens} % | {error, Reason}, % Impls :: Tokens, @@ -380,11 +380,11 @@ % %% FIXME: this should be a restructured a tiny bit for better error %% handling -%slurp_ct_impls([_ = #sfc_token{string = ":"}, -% Con = #sfc_token{type = con} +%slurp_ct_impls([_ = #tk{string = ":"}, +% Con = #tk{type = con} % | NewTokens]) -> % slurp_ct_impls2([Con], NewTokens); -%slurp_ct_impls(NewTokens = [#sfc_token{string = "="} | _]) -> +%slurp_ct_impls(NewTokens = [#tk{string = "="} | _]) -> % {slurp, [], NewTokens}; %slurp_ct_impls([BadToken | _]) -> % Msg = efmt("slurp_ct_impls: expecting (: Con | =), got ~p", [BadToken]), @@ -393,11 +393,11 @@ % {error, {fixme, "expecting : or =, got end-of-input"}}. % % -%slurp_ct_impls2(Stk, [_ = #sfc_token{string = ","}, -% Con = #sfc_token{type = con} +%slurp_ct_impls2(Stk, [_ = #tk{string = ","}, +% Con = #tk{type = con} % | NewTokens]) -> % slurp_ct_impls2([Con | Stk], NewTokens); -%slurp_ct_impls2(Stk, NewTokens = [#sfc_token{string = "="} | _]) -> +%slurp_ct_impls2(Stk, NewTokens = [#tk{string = "="} | _]) -> % {slurp, lists:reverse(Stk), NewTokens}; %slurp_ct_impls2(Stk, BadTokens) -> % {error, {fixme, nyi, slurp_ct_impls2, [Stk, BadTokens]}}. @@ -410,7 +410,7 @@ %% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl) %% | Using %gulp_decl(Tokens) -> -% case sfc_tokens:strings(1, Tokens) of +% case gsc_tokens:strings(1, Tokens) of % ["type"] -> gulp_type_alias(Tokens); % _ -> gulp_nyi(Tokens) % end. @@ -423,7 +423,7 @@ %% 'type' Id ['(' TVar* ')'] '=' TypeAlias %% ^ %gulp_ta(Ast = #ast_ta{alias = none}, -% _ = [#sfc_token{string = "type"}, Alias = #sfc_token{type = id} +% _ = [#tk{string = "type"}, Alias = #tk{type = id} % | NewTokens]) -> % NewAst = Ast#ast_ta{alias = Alias}, % gulp_ta(NewAst, NewTokens); @@ -440,7 +440,7 @@ %% 'type' Id ['(' TVar* ')'] '=' TypeAlias %% ^ %gulp_ta(Ast = #ast_ta{points_to = none}, -% _ = [#sfc_token{string = "="} | NewTokens]) -> +% _ = [#tk{string = "="} | NewTokens]) -> % case gulp_type_expr(NewTokens) of % {gulp, TypeExpr} -> % Result = Ast#ast_ta{points_to = TypeExpr}, @@ -455,25 +455,25 @@ %% ^ %% %% "(bar, baz) = ..." ~> {slurp, [bar, baz], "= ..."} -%slurp_ta_tvars(Tks = [#sfc_token{string = "="} | _]) -> +%slurp_ta_tvars(Tks = [#tk{string = "="} | _]) -> % {slurp, [], Tks}; -%slurp_ta_tvars([#sfc_token{string = "("}, -% #sfc_token{string = ")"} +%slurp_ta_tvars([#tk{string = "("}, +% #tk{string = ")"} % | NewTokens]) -> % {slurp, [], NewTokens}; -%slurp_ta_tvars([_ = #sfc_token{string = "("}, -% TVar = #sfc_token{type = tvar} +%slurp_ta_tvars([_ = #tk{string = "("}, +% TVar = #tk{type = tvar} % | NewTokens]) -> % slurp_tavars([TVar], NewTokens). % % %slurp_tavars(Stk, -% [_ = #sfc_token{string = ","}, -% TVar = #sfc_token{type = tvar} +% [_ = #tk{string = ","}, +% TVar = #tk{type = tvar} % | NewTks]) -> % slurp_tavars([TVar | Stk], NewTks); %slurp_tavars(Stk, -% [#sfc_token{string = ")"} +% [#tk{string = ")"} % | NewTks]) -> % {slurp, lists:reverse(Stk), NewTks}. % @@ -536,7 +536,7 @@ %% (X) // legal, equiv to X %% (X, Y) // illegal, should be X * Y %% -%slurp_type_expr(Tks = [#sfc_token{pos = Pos} | _]) -> +%slurp_type_expr(Tks = [#tk{pos = Pos} | _]) -> % case slurp_type1(Tks) of % % plist cases % % function types _ => _ @@ -546,7 +546,7 @@ % % () => _ % % (_) => _ % % (_, _) => _ -% {DomainType, [#sfc_token{string = "=>"} | After]} -> +% {DomainType, [#tk{string = "=>"} | After]} -> % case slurp_type_expr(After) of % {slurp, CodomainType, NewTks} -> % TypeExpr = #ast_te_fn{dom = DomainType, @@ -572,7 +572,7 @@ % {slurp, {token, Tk}, NewTks} -> % case NewTks of % % foo => bar is fine, normalized to (foo) => bar -% [#sfc_token{string = "=>"} | After] -> +% [#tk{string = "=>"} | After] -> % case slurp_type_expr(After) of % {slurp, Codom, NewAfter} -> % ArgType = #ast_te_tk{token = ArgTypeTk}, @@ -600,7 +600,7 @@ % % ["("] -> error(nyi); % % ["*"] -> error(nyi); % %case After of -% % [#sfc_token{string = "("} | _] -> +% % [#tk{string = "("} | _] -> % % case slurp_type1_II(After) of % % {slurp, {plist, ArgTypes}, NewAfter} -> % % {slurp, #ast_te_ap{fn = FirstType @@ -609,12 +609,12 @@ % % %% Type1 = {plist, Types} () (foo) (foo, bar) -%% | {token, #sfc_token{}} foo Bar.baz 'quux +%% | {token, #tk{}} foo Bar.baz 'quux %slurp_type1(Tks) -> -% case sfc_tokens:slurp_plist(Tks) of +% case gsc_tokens:slurp_plist(Tks) of % % head token is NOT open paren -> must be id/qid/tvar % {slurp, [], [Tk | NewTks]} -> -% TkType = Tk#sfc_token.type, +% TkType = Tk#tk.type, % case TkType of % id -> {slurp, {token, Tk}, NewTks}; % qid -> {slurp, {token, Tk}, NewTks}; @@ -633,10 +633,10 @@ % % %%slurp_type_expr_plist(Tks) -> -%% case sfc_tokens:slurp_plist(Tks) of +%% case gsc_tokens:slurp_plist(Tks) of %% % head token is NOT open paren -> must be id/qid/tvar %% {slurp, [], [Tk | NewTks]} -> -%% TkType = Tk#sfc_token.type, +%% TkType = Tk#tk.type, %% case TkType of %% id -> {slurp, {token, Tk}, NewTks}; %% qid -> {slurp, {token, Tk}, NewTks}; @@ -653,23 +653,23 @@ %% Error = {error, _} -> Error %% end. % -%gulp_ptype1([#sfc_token{string = "("}, #sfc_token{string = ")"}]) -> +%gulp_ptype1([#tk{string = "("}, #tk{string = ")"}]) -> % {gulp, []}; -%gulp_ptype1([#sfc_token{string = "("} | Tail]) -> +%gulp_ptype1([#tk{string = "("} | Tail]) -> % gulp_ptype1_II([], Tail). % %gulp_ptype1_II(Stk, Tks) -> % case slurp_type_expr(Tks) of -% {slurp, NewType, [#sfc_token{string = ")"}]} -> +% {slurp, NewType, [#tk{string = ")"}]} -> % {gulp, lists:reverse([NewType | Stk])}; -% {slurp, NewType, [#sfc_token{string = ","} | NewTks]} -> +% {slurp, NewType, [#tk{string = ","} | NewTks]} -> % gulp_ptype1_II([NewType | Stk], NewTks); % Error = {error, _} -> % Error % end. % % -%%gulp_te_tk([Tk = #sfc_token{type = TkType}]) +%%gulp_te_tk([Tk = #tk{type = TkType}]) %% when id =:= TkType; %% qid =:= TkType; %% tvar =:= TkType -> diff --git a/src/gsc_bst.erl b/src/gsc_bst.erl index 5e0d7fa..7f19cd7 100644 --- a/src/gsc_bst.erl +++ b/src/gsc_bst.erl @@ -1,5 +1,5 @@ % @doc -% sfc_bst = ast second attempt but prefix so tab complete +% gsc_bst = ast second attempt but prefix so tab complete % % from docs/sophia/so_syntax.md: % @@ -38,12 +38,12 @@ % FModifier ::= 'stateful' | 'private' % % Args ::= '(' Sep(Pattern, ',') ')' --module(sfc_bst). +-module(gsc_bst). -compile([export_all, nowarn_export_all]). --include("$sfc_include/sfc.hrl"). +-include("$gsc_include/gsc.hrl"). -%-record(bst_nyi, {tokens :: [sfc_token()]). +%-record(bst_nyi, {tokens :: [tk()]). % %% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl) %-record(bst_ct, @@ -73,7 +73,7 @@ %% | '@compiler' PragmaOp Version %% | 'include' String %% | Using -%gulp(top_decl, [#sfc_token{string = S} | Rest]) -> +%gulp(top_decl, [#tk{string = S} | Rest]) -> % case strings(3, Tokens) of % ["payable", "contract", "interface"] -> % gulp_ct(#bst_iface{payable = true, main = true}, drop(3, Tokens)); diff --git a/src/gsc_parse_type_expr.erl b/src/gsc_parse_type_expr.erl index a2faca3..917477f 100644 --- a/src/gsc_parse_type_expr.erl +++ b/src/gsc_parse_type_expr.erl @@ -1,4 +1,4 @@ --module(sfc_parse_type_expr). +-module(gsc_parse_type_expr). -export_type([ ]). @@ -9,7 +9,7 @@ take_until_ifx_op/1 ]). --include("$sfc_include/sfc.hrl"). +-include("$gsc_include/gsc.hrl"). %------------------------------------------------------ @@ -17,18 +17,18 @@ %------------------------------------------------------ -type vtk_ifx_op() :: vtk_apply_to - | {'vtk_*', sfc_token()} - | {'vtk_=>', sfc_token()}. + | {'vtk_*', tk()} + | {'vtk_=>', tk()}. --type vtk() :: sfc_token() - | {vtk_plist, [sfc_token()]} +-type vtk() :: tk() + | {vtk_plist, [tk()]} | vtk_ifx_op(). -type gulped(X) :: {gulp, X} | {error, any()}. --type slurped(X) :: {slurp, X, Rest :: [sfc_token()]} +-type slurped(X) :: {slurp, X, Rest :: [tk()]} | {error, any()}. @@ -43,7 +43,7 @@ % @doc for testing unsafe_vtks_from_string(S) -> - {ok, SigTks} = sfc_tokens:significant_tokens(S), + {ok, SigTks} = gsc_tokens:significant_tokens(S), {gulp, Vtks} = gulp_vtks(SigTks), Vtks. @@ -79,9 +79,9 @@ unsafe_vtks_from_string(S) -> -record(ast_parens, - {open = none :: none | sfc_token(), - inner = none :: none | [sfc_token()], - close = none :: none | sfc_token()}). + {open = none :: none | tk(), + inner = none :: none | [tk()], + close = none :: none | tk()}). chunk_by(Strategy, Tokens) -> chunk_by(Strategy, [], Tokens). @@ -91,7 +91,7 @@ chunk_by(Strategy, Tokens) -> Strategy :: chunk_strategy(), Oks :: [any()], Errs :: [{error, Reason :: any()}], - Tokens :: [sfc_token()], + Tokens :: [tk()], Result :: {ok, gulp_chunks_by(_, Stk, [], []) -> @@ -109,8 +109,8 @@ gulp_chunks_by(plist, Stk, Errs, Tokens) -> gulp_chunks_by(plist, Stk, [Error | Errs], Tokens); end. -slurp_plist_rec(Tokens = [#sfc_token{string = "(" | _]) -> - case sfc_tokens:slurp_plist(Tokens) of +slurp_plist_rec(Tokens = [#tk{string = "(" | _]) -> + case gsc_tokens:slurp_plist(Tokens) of {slurp, [], _} -> barf; {slurp, PTokens, NewTokens} -> @@ -118,18 +118,18 @@ slurp_plist_rec(Tokens = [#sfc_token{string = "(" | _]) -> end; %-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when -% Tokens :: [sfc_token()], +% Tokens :: [tk()], % IfxTree :: ifx_tree(). % %-spec chunk_by(ChunkStrategy, Tokens) -> Result when % ChunkStrategy :: chunk_strategy(), -% Tokens :: [sfc_token()], +% Tokens :: [tk()], % Result :: {ChunkStrategy, -spec gulp_vtks(Tokens) -> Result when - Tokens :: [sfc_token()], + Tokens :: [tk()], Result :: gulped(VirtualTokens), VirtualTokens :: [vtk()]. @@ -155,8 +155,8 @@ gulp_vtks(Acc, Tks0) -> % ~> [..., foo, {plist, "(bar, baz)"}, ...] {_Pfx = Tks1_BeforeOpen, _Sfx = Tks2_OpenNAfter - = [#sfc_token{string = "("} | _]} -> - case sfc_tokens:slurp_plist(Tks2_OpenNAfter) of + = [#tk{string = "("} | _]} -> + case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} -> NewAcc = [Acc, Tks1_BeforeOpen, @@ -168,7 +168,7 @@ gulp_vtks(Acc, Tks0) -> end; % product {_Pfx = Tks0_BeforeTimes, - _Sfx = [ Tk1A_Times = #sfc_token{string = "*"} + _Sfx = [ Tk1A_Times = #tk{string = "*"} | Tks1B_AfterTimes]} -> NewAcc = [Acc, Tks0_BeforeTimes, @@ -176,7 +176,7 @@ gulp_vtks(Acc, Tks0) -> gulp_vtks(NewAcc, Tks1B_AfterTimes); % funType {_Pfx = Tks0_BeforeOp, - _Sfx = [ Tk1A_Op = #sfc_token{string = "=>"} + _Sfx = [ Tk1A_Op = #tk{string = "=>"} | Tks1B_AfterOp]} -> NewAcc = [Acc, Tks0_BeforeOp, @@ -187,7 +187,7 @@ gulp_vtks(Acc, Tks0) -> -spec take_until_ifx_op(Tokens) -> Result when - Tokens :: [sfc_token()], + Tokens :: [tk()], Result :: {Taken, NewTokens}, Taken :: Tokens, NewTokens :: Tokens. @@ -200,7 +200,7 @@ take_until_ifx_op(Tks) -> take_until_ifx_op(Stack, []) -> {lists:reverse(Stack), []}; take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) -> - TokStr = Token#sfc_token.string, + TokStr = Token#tk.string, Continue = case TokStr of % exit cases diff --git a/src/gsc_strmatch.erl b/src/gsc_strmatch.erl index 68eb42f..02992a8 100644 --- a/src/gsc_strmatch.erl +++ b/src/gsc_strmatch.erl @@ -65,12 +65,12 @@ % KW = string:join(Keywords, "|"), % % There is a lot going on in that code. This is purely the part that matches -% strings specifically, . The *tokenizer* (sfc_tokenizer) knows the hierarchy +% strings specifically, . The *tokenizer* (gsc_tokenizer) knows the hierarchy % of sophia tokens (e.g. it knows to match keywords before identifiers, so that % `contract` gets tokenized as a keyword and not a variable name), and then % calls into this module in order to match the string shape it's looking for. % @end --module(sfc_strmatch). +-module(gsc_strmatch). %-compile([export_all, nowarn_export_all]). diff --git a/src/gsc_tokens.erl b/src/gsc_tokens.erl index af8140c..cab30e1 100644 --- a/src/gsc_tokens.erl +++ b/src/gsc_tokens.erl @@ -7,16 +7,16 @@ % For MVP it mimics the behavior of so_scan exactly, in terms of like what its % definition of a token is and so on. % -% sfc_so_scan.erl contains a compatibility layer that should agree with so_scan +% gsc_so_scan.erl contains a compatibility layer that should agree with so_scan % exactly. It converts the data types here to the shapes that so_scan outputs. % % This is for two reasons: % % 1. in order to enable testing the two modules against each other, and -% 2. to future-proof in case we decide to incrementally incorporate the sfc +% 2. to future-proof in case we decide to incrementally incorporate the gsc % code into the legacy sophia compiler % @end --module(sfc_tokens). +-module(gsc_tokens). % meta -export([ @@ -46,7 +46,7 @@ new_pos/2 ]). --include("$sfc_include/sfc.hrl"). +-include("$gsc_include/gsc.hrl"). %======================================================= @@ -55,11 +55,11 @@ -spec strings(N, Tokens) -> AtMostNStrings when N :: non_neg_integer(), - Tokens :: [sfc_token()], + Tokens :: [tk()], AtMostNStrings :: [string()]. % @doc return the strings of the first N tokens -strings(N, [#sfc_token{string = S} | Rest]) when is_integer(N), N >= 1 -> +strings(N, [#tk{string = S} | Rest]) when is_integer(N), N >= 1 -> [S | strings(N-1, Rest)]; strings(_, []) -> []; @@ -84,16 +84,16 @@ strings(0, _) -> % ... -spec take_block(Tokens) -> {BlockTokens, Rest} - when Tokens :: [sfc_token()], + when Tokens :: [tk()], BlockTokens :: Tokens, Rest :: Tokens. % @doc % takes all tokens whose column position is >= the column position of % the head token -take_block([H = #sfc_token{pos = {_, BlkCol}} | T]) -> +take_block([H = #tk{pos = {_, BlkCol}} | T]) -> TokenInBlock = - fun(#sfc_token{pos = {_, TkCol}}) -> + fun(#tk{pos = {_, TkCol}}) -> BlkCol =< TkCol end, take_while(TokenInBlock, [H], T); @@ -103,16 +103,16 @@ take_block([]) -> -spec take_block_item(Tokens) -> {ItemTokens, Rest} - when Tokens :: [sfc_token()], + when Tokens :: [tk()], ItemTokens :: Tokens, Rest :: Tokens. % @doc % takes all tokens whose column position is > the column position of % the head token -take_block_item([H = #sfc_token{pos = {_, ItemCol}} | T]) -> +take_block_item([H = #tk{pos = {_, ItemCol}} | T]) -> TokenInItem = - fun(#sfc_token{pos = {_, TkCol}}) -> + fun(#tk{pos = {_, TkCol}}) -> ItemCol < TkCol end, take_while(TokenInItem, [H], T); @@ -128,7 +128,7 @@ take_block_item([]) -> Mismatch :: {fixme, mismatch, OpenStack, ClosedBy}, OpenStack :: Tokens, ClosedBy :: none | {value, Token}, - Token :: sfc_token(). + Token :: tk(). % @doc % the verbiage here is `slurp' rather than `take' because we insist on @@ -159,7 +159,7 @@ take_block_item([]) -> % counterintuitive to end-users (who are programmers, entirely % unfamiliar with notions like stacks and open/close delimiters) -slurp_plist([Hd = #sfc_token{string = "("} | Tl]) -> +slurp_plist([Hd = #tk{string = "("} | Tl]) -> slurp_dlist([Hd], [Hd], Tl); slurp_plist(Tks) -> {slurp, [], Tks}. @@ -170,30 +170,30 @@ slurp_dlist(All, [], NewTokens) -> {slurp, lists:reverse(All), NewTokens}; % WMA stack is nonempty % happy cases of opens getting popped -slurp_dlist(All, [#sfc_token{string = "("} | NewOpen], - [#sfc_token{string = ")"} = Tk | NewTks]) -> +slurp_dlist(All, [#tk{string = "("} | NewOpen], + [#tk{string = ")"} = Tk | NewTks]) -> slurp_dlist([Tk | All], NewOpen, NewTks); -slurp_dlist(All, [#sfc_token{string = "["} | NewOpen], - [#sfc_token{string = "]"} = Tk | NewTks]) -> +slurp_dlist(All, [#tk{string = "["} | NewOpen], + [#tk{string = "]"} = Tk | NewTks]) -> slurp_dlist([Tk | All], NewOpen, NewTks); -slurp_dlist(All, [#sfc_token{string = "{"} | NewOpen], - [#sfc_token{string = "}"} = Tk | NewTks]) -> +slurp_dlist(All, [#tk{string = "{"} | NewOpen], + [#tk{string = "}"} = Tk | NewTks]) -> slurp_dlist([Tk | All], NewOpen, NewTks); % happy: open delimiters getting pushed -slurp_dlist(All, Opens, [#sfc_token{string = "("} = Tk | NewTks]) -> +slurp_dlist(All, Opens, [#tk{string = "("} = Tk | NewTks]) -> slurp_dlist([Tk | All], [Tk | Opens], NewTks); -slurp_dlist(All, Opens, [#sfc_token{string = "["} = Tk | NewTks]) -> +slurp_dlist(All, Opens, [#tk{string = "["} = Tk | NewTks]) -> slurp_dlist([Tk | All], [Tk | Opens], NewTks); -slurp_dlist(All, Opens, [#sfc_token{string = "{"} = Tk | NewTks]) -> +slurp_dlist(All, Opens, [#tk{string = "{"} = Tk | NewTks]) -> slurp_dlist([Tk | All], [Tk | Opens], NewTks); % sad: mismatch cases slurp_dlist(All, Opens, []) -> {error, {fixme, mismatch, Opens, none}}; -slurp_dlist(All, Opens, [#sfc_token{string = "}"} = BadClose | _]) -> +slurp_dlist(All, Opens, [#tk{string = "}"} = BadClose | _]) -> {error, {fixme, mismatch, Opens, {value, BadClose}}}; -slurp_dlist(All, Opens, [#sfc_token{string = "]"} = BadClose | _]) -> +slurp_dlist(All, Opens, [#tk{string = "]"} = BadClose | _]) -> {error, {fixme, mismatch, Opens, {value, BadClose}}}; -slurp_dlist(All, Opens, [#sfc_token{string = ")"} = BadClose | _]) -> +slurp_dlist(All, Opens, [#tk{string = ")"} = BadClose | _]) -> {error, {fixme, mismatch, Opens, {value, BadClose}}}; % general case: non-terminal token gets pushed slurp_dlist(All, Opens, [Tk | NewTks]) -> @@ -211,7 +211,7 @@ slurp_dlist(All, Opens, [Tk | NewTks]) -> % ]). %------------------------------------------------------- --spec token_types_parse_order() -> [sfc_token_type()]. +-spec token_types_parse_order() -> [gsc_token_type()]. % @doc % list of sophia tokens in parse order (if an earlier type matches, the later % type isn't even checked) @@ -288,9 +288,9 @@ kwds() -> %------------------------------------------------------- % Token accessors --spec indent_level(sfc_token()) -> pos_integer(). +-spec indent_level(tk()) -> pos_integer(). -indent_level(#sfc_token{pos = {_, IndentLevel}}) -> +indent_level(#tk{pos = {_, IndentLevel}}) -> IndentLevel. @@ -298,8 +298,8 @@ indent_level(#sfc_token{pos = {_, IndentLevel}}) -> -spec significant_tokens(SrcStr) -> Result when SrcStr :: iolist(), Result :: {ok, Tokens} - | {error, sfc_err()}, - Tokens :: [sfc_token()]. + | {error, gsc_err()}, + Tokens :: [tk()]. significant_tokens(SrcStr) -> case tokens(SrcStr) of @@ -312,7 +312,7 @@ significant_tokens(SrcStr) -> -spec filter_significant(Tokens) -> SignificantTokens - when Tokens :: [sfc_token()], + when Tokens :: [tk()], SignificantTokens :: Tokens. filter_significant(Tokens) -> @@ -321,19 +321,19 @@ filter_significant(Tokens) -> -spec is_significant(Token) -> boolean() - when Token :: sfc_token(). + when Token :: tk(). -is_significant(#sfc_token{type = bcom}) -> false; -is_significant(#sfc_token{type = lcom}) -> false; -is_significant(#sfc_token{type = ws}) -> false; +is_significant(#tk{type = bcom}) -> false; +is_significant(#tk{type = lcom}) -> false; +is_significant(#tk{type = ws}) -> false; is_significant(_) -> true. -spec tokens(SrcStr) -> Result when SrcStr :: iolist(), Result :: {ok, Tokens} - | {error, sfc_err()}, - Tokens :: [sfc_token()]. + | {error, gsc_err()}, + Tokens :: [tk()]. % @doc % Recursively parse all tokens off the front end of the string. `Rest' is % the first tail of the string for which no token parser succeeds. @@ -349,13 +349,13 @@ tokens(Stack, _FinalPos, "") -> {ok, lists:reverse(Stack)}; tokens(Stack, Pos, SrcStr) -> case slurp_token(Pos, SrcStr) of - {tokmatch, NewToken = #sfc_token{string = TokStr}, + {tokmatch, NewToken = #tk{string = TokStr}, NewSrcStr} -> NewPos = new_pos(Pos, TokStr), tokens([NewToken | Stack], NewPos, NewSrcStr); no_tokmatch -> PrevTokens = lists:reverse(Stack), - Err = #sfc_err_no_tokmatch{prev_tokens = PrevTokens, + Err = #gsc_err_no_tokmatch{prev_tokens = PrevTokens, break_pos = Pos, rest = SrcStr}, {error, Err}; @@ -367,7 +367,7 @@ tokens(Stack, Pos, SrcStr) -> % for now we're just going to agree with so_scan {ierr, unterminated_block_comment} -> PrevTokens = lists:reverse(Stack), - Err = #sfc_err_bcom_unterminated{prev_tokens = PrevTokens, + Err = #gsc_err_bcom_unterminated{prev_tokens = PrevTokens, break_pos = Pos, rest = SrcStr}, {error, Err}; @@ -455,13 +455,13 @@ next_tabstop8(Col0) when Col0 >= 0 -> -spec slurp_token(Pos, SrcStr) -> Result - when Pos :: sfc_pos(), + when Pos :: gsc_pos(), SrcStr :: string(), Result :: {tokmatch, Token, Rest} | no_tokmatch - | {error, sfc_err()} + | {error, gsc_err()} | {ierr, unterminated_block_comment}, - Token :: sfc_token(), + Token :: tk(), Rest :: string(). % @doc % grab a single token off the front of the string according to @@ -474,14 +474,14 @@ slurp_token(Pos, SrcStr) -> -spec slurp_token_types(ParseOrder, Pos, SrcStr) -> Result - when ParseOrder :: [sfc_token_type()], - Pos :: sfc_pos(), + when ParseOrder :: [gsc_token_type()], + Pos :: gsc_pos(), SrcStr :: string(), Result :: {tokmatch, Token, Rest} | no_tokmatch - | {error, sfc_err()} + | {error, gsc_err()} | {ierr, unterminated_block_comment}, - Token :: sfc_token(), + Token :: tk(), Rest :: string(). % @doc % grab a single token off the front of the string according to @@ -499,14 +499,14 @@ slurp_token_types([], _Pos, _SrcStr) -> -spec slurp_token_of_type(TokenType, Pos, SrcStr) -> MaybeToken - when TokenType :: sfc_token_type(), - Pos :: sfc_pos(), + when TokenType :: gsc_token_type(), + Pos :: gsc_pos(), SrcStr :: string(), MaybeToken :: {tokmatch, Token, Rest} | no_tokmatch - | {error, sfc_err()} + | {error, gsc_err()} | {ierr, unterminated_block_comment}, - Token :: sfc_token(), + Token :: tk(), Rest :: string(). % @doc % match a sophia token of a given type off the front of the string @@ -522,7 +522,7 @@ slurp_token_of_type(lcom, Pos, SrcStr) -> case SrcStr of "//" ++ _ -> {Line, Rest} = takeline("", SrcStr), - Token = #sfc_token{type = lcom, + Token = #tk{type = lcom, pos = Pos, string = Line}, {tokmatch, Token, Rest}; @@ -536,7 +536,7 @@ slurp_token_of_type(bcom, Pos, SrcStr0) -> "/*" ++ SrcStr1 -> case bcom("/*", 1, SrcStr1) of {ok, CommentStr, SrcStr2} -> - Token = #sfc_token{type = bcom, + Token = #tk{type = bcom, pos = Pos, string = CommentStr}, {tokmatch, Token, SrcStr2}; @@ -547,12 +547,12 @@ slurp_token_of_type(bcom, Pos, SrcStr0) -> no_tokmatch end; slurp_token_of_type(ws, Pos, SrcStr) -> - WhitespaceMatcher = sfc_strmatch:smr_sf_ws(), - case sfc_strmatch:match(WhitespaceMatcher, SrcStr) of + WhitespaceMatcher = gsc_strmatch:smr_sf_ws(), + case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, WS, Rest} -> - Token = #sfc_token{type = ws, + Token = #tk{type = ws, pos = Pos, string = WS}, {tokmatch, Token, Rest} @@ -570,86 +570,86 @@ slurp_token_of_type(ws, Pos, SrcStr) -> % of the kwds slurp_token_of_type(kwd, Pos, SrcStr) -> case slurp_token_of_type(id, Pos, SrcStr) of - {tokmatch, IdTok = #sfc_token{string = IdStr}, Rest} -> + {tokmatch, IdTok = #tk{string = IdStr}, Rest} -> case lists:member(IdStr, kwds()) of false -> no_tokmatch; true -> - KwTok = IdTok#sfc_token{type = kwd}, + KwTok = IdTok#tk{type = kwd}, {tokmatch, KwTok, Rest} end; no_tokmatch -> no_tokmatch end; slurp_token_of_type(op, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_op(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = op, pos = Pos, string = Str}, + Token = #tk{type = op, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; slurp_token_of_type(punct, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_punct(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = punct, pos = Pos, string = Str}, + Token = #tk{type = punct, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; % SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar slurp_token_of_type(id, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_id(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of {strmatch, IdStr, Rest} -> - Token = #sfc_token{type = id, pos = Pos, string = IdStr}, + Token = #tk{type = id, pos = Pos, string = IdStr}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; slurp_token_of_type(con, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_con(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = con, pos = Pos, string = Str}, + Token = #tk{type = con, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; slurp_token_of_type(qid, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_qid(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = qid, pos = Pos, string = Str}, + Token = #tk{type = qid, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; slurp_token_of_type(qcon, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_qcon(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = qcon, pos = Pos, string = Str}, + Token = #tk{type = qcon, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; slurp_token_of_type(tvar, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_tvar(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = tvar, pos = Pos, string = Str}, + Token = #tk{type = tvar, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; slurp_token_of_type(int16, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_int16(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = int16, pos = Pos, string = Str}, + Token = #tk{type = int16, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; slurp_token_of_type(int10, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_int10(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of {strmatch, Str, Rest} -> - Token = #sfc_token{type = int10, pos = Pos, string = Str}, + Token = #tk{type = int10, pos = Pos, string = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch @@ -659,60 +659,60 @@ slurp_token_of_type(int10, Pos, SrcStr) -> % % char: sophia char literal slurp_token_of_type(ak, Pos, SrcStr) -> - StringMatcher = sfc_strmatch:smr_sf_ak(), - case sfc_strmatch:match(StringMatcher, SrcStr) of + StringMatcher = gsc_strmatch:smr_sf_ak(), + case gsc_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #sfc_token{type = ak, pos = Pos, string = TokenStr}, + Token = #tk{type = ak, pos = Pos, string = TokenStr}, {tokmatch, Token, Rest} end; slurp_token_of_type(ct, Pos, SrcStr) -> - StringMatcher = sfc_strmatch:smr_sf_ct(), - case sfc_strmatch:match(StringMatcher, SrcStr) of + StringMatcher = gsc_strmatch:smr_sf_ct(), + case gsc_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #sfc_token{type = ct, pos = Pos, string = TokenStr}, + Token = #tk{type = ct, pos = Pos, string = TokenStr}, {tokmatch, Token, Rest} end; slurp_token_of_type(sg, Pos, SrcStr) -> - StringMatcher = sfc_strmatch:smr_sf_sg(), - case sfc_strmatch:match(StringMatcher, SrcStr) of + StringMatcher = gsc_strmatch:smr_sf_sg(), + case gsc_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #sfc_token{type = sg, pos = Pos, string = TokenStr}, + Token = #tk{type = sg, pos = Pos, string = TokenStr}, {tokmatch, Token, Rest} end; slurp_token_of_type(char, Pos, SrcStr) -> - StringMatcher = sfc_strmatch:smr_sf_char(), - case sfc_strmatch:match(StringMatcher, SrcStr) of + StringMatcher = gsc_strmatch:smr_sf_char(), + case gsc_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #sfc_token{type = char, pos = Pos, string = TokenStr}, + Token = #tk{type = char, pos = Pos, string = TokenStr}, {tokmatch, Token, Rest} end; slurp_token_of_type(string, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_str(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #sfc_token{type = string, pos = Pos, string = TokenStr}, + Token = #tk{type = string, pos = Pos, string = TokenStr}, {tokmatch, Token, Rest} end; slurp_token_of_type(bytes, Pos, SrcStr) -> - case sfc_strmatch:match(sfc_strmatch:smr_sf_bytes(), SrcStr) of + case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #sfc_token{type = bytes, pos = Pos, string = TokenStr}, + Token = #tk{type = bytes, pos = Pos, string = TokenStr}, {tokmatch, Token, Rest} end; slurp_token_of_type(NyiType, Pos, SrcStr) -> Message = io_lib:format("cannot slurp token of type: ~p", [NyiType]), - error(#sfc_err{atom = nyi, + error(#gsc_err{atom = nyi, string = Message, extra = [{token_type, NyiType}, {pos, Pos}, diff --git a/src/gsc_so_scan.erl b/src/gso_scan.erl similarity index 91% rename from src/gsc_so_scan.erl rename to src/gso_scan.erl index 7ea6475..71aa185 100644 --- a/src/gsc_so_scan.erl +++ b/src/gso_scan.erl @@ -1,9 +1,9 @@ % @doc compatibility layer to test against so_scan % -% converts sfc_tokens data to so_scan tokens +% converts gsc_tokens data to so_scan tokens % % Ref: so_scan.erl --module(sfc_so_scan). +-module(gso_scan). -export_type([ so_kwd/0, @@ -19,7 +19,7 @@ ken_barson_rises/2 ]). --include("$sfc_include/sfc.hrl"). +-include("$gsc_include/gsc.hrl"). %================================ % API: types @@ -76,14 +76,14 @@ -type so_symbol() :: so_kwd() | so_special_char() | atom(). -type so_token2() :: {Symbol :: so_symbol(), - Location :: sfc_pos()}. + Location :: gsc_pos()}. % FIXME % this is 'id', 'con', qid -type so_tk3type() :: char | string | hex | int | bytes | qid | qcon | tvar | id | con. -type so_token3() :: {TokenType :: so_tk3type(), - Location :: sfc_pos(), + Location :: gsc_pos(), TokenValue :: term()}. -type so_token() :: so_token2() | so_token3(). @@ -93,23 +93,23 @@ % API: functions %================================ --spec scan(SrcStr) -> {ok, SoTokens} | {error, sfc_err()} +-spec scan(SrcStr) -> {ok, SoTokens} | {error, gsc_err()} when SrcStr :: iolist(), SoTokens :: [so_token()]. % @doc % this is meant to agree with so_scan:scan/1 in all cases % -% this converts sfc's internal representation of tokens into the format that +% this converts gsc's internal representation of tokens into the format that % so_scan outputs % @end scan(SrcStr) -> - case sfc_tokens:tokens(SrcStr) of + case gsc_tokens:tokens(SrcStr) of {ok, SfLTokens} -> SoTokens = to_so_tokens(SfLTokens), {ok, SoTokens}; % fucking stupid - {error, #sfc_err_bcom_unterminated{prev_tokens = SfcTokens}} -> + {error, #gsc_err_bcom_unterminated{prev_tokens = SfcTokens}} -> {ok, to_so_tokens(SfcTokens)}; Error -> Error @@ -118,11 +118,11 @@ scan(SrcStr) -> -spec to_so_tokens(SfcTokens) -> SoTokens - when SfcTokens :: [sfc_token()], + when SfcTokens :: [tk()], SoTokens :: [so_token()]. % @doc -% most sfc tokens map 1-to-1 with so_tokens. the +% most gsc tokens map 1-to-1 with so_tokens. the % exception is ak/ct/sg literals. this is a % many-to-one-mapping, and therefore ak, sg, ct need to % be handled at the list level. @@ -133,7 +133,7 @@ scan(SrcStr) -> % stage computes the pubkey that corresponds to. % % as a result, if we have ak_GHI, I is not a valid -% base58 char, so WE (sfc) end up lexing that as +% base58 char, so WE (gsc) end up lexing that as % % [{ak, "ak_GH"}, {con, "I"}] % @@ -151,12 +151,12 @@ scan(SrcStr) -> % % so if we see an ak/ct/sg token, we summon evil ben % carson to reconjoin the unconjoined twins -to_so_tokens([ AkTok = #sfc_token{type = AkCtSg, pos = Pos} +to_so_tokens([ AkTok = #tk{type = AkCtSg, pos = Pos} | Sheeit]) when ak =:= AkCtSg; ct =:= AkCtSg; sg =:= AkCtSg -> - {#sfc_token{string = FinalAkStr}, NewSheeit} + {#tk{string = FinalAkStr}, NewSheeit} = ken_barson_rises(AkTok, Sheeit), [{id, Pos, FinalAkStr}| to_so_tokens(NewSheeit)]; % this part is just lists:filtermap @@ -171,8 +171,8 @@ to_so_tokens([]) -> -spec ken_barson_rises(InitApiToken, SfToks) -> {FinalApiToken, NewSfToks} - when InitApiToken :: sfc_token(), - SfToks :: [sfc_token()], + when InitApiToken :: tk(), + SfToks :: [tk()], FinalApiToken :: InitApiToken, NewSfToks :: SfToks. % @doc @@ -252,15 +252,15 @@ to_so_tokens([]) -> % % When any of these appear AFTER at least one valid % base58 char in a `ak_`/`ct_`/`sg_` prefixed -% identifier, `sfc` splits what `so_scan` sees as one -% `id` token into 2+ sfc tokens. +% identifier, `gsc` splits what `so_scan` sees as one +% `id` token into 2+ gsc tokens. % % **No split if non-base58 char is immediately after % `_`**: `smr_plus` requires >=1 base58 char to % match; `ak_I`, `ak_0`, `ak__bar` all fall % through to `id` and both tokenizers agree. -ken_barson_rises(AkTokAcc = #sfc_token{string = AkStr}, - SrcTokens = [#sfc_token{type = CandidateType, +ken_barson_rises(AkTokAcc = #tk{string = AkStr}, + SrcTokens = [#tk{type = CandidateType, string = CandidateString} | Rest]) -> % candidate: @@ -273,7 +273,7 @@ ken_barson_rises(AkTokAcc = #sfc_token{string = AkStr}, Smash -> % dig out the token from LcTokApi NewAkStr = AkStr ++ CandidateString, - NewAkTokAcc = AkTokAcc#sfc_token{string = NewAkStr}, + NewAkTokAcc = AkTokAcc#tk{string = NewAkStr}, ken_barson_rises(NewAkTokAcc, Rest); Pass -> {AkTokAcc, SrcTokens} @@ -310,7 +310,7 @@ pass_types() -> -spec to_so_token(SfcToken) -> MaybeSoToken - when SfcToken :: sfc_token(), + when SfcToken :: tk(), MaybeSoToken :: {true, SoToken} | false, SoToken :: so_token(). @@ -320,7 +320,7 @@ pass_types() -> % follow-on tokens % @end -to_so_token(#sfc_token{type = SfTokenType, +to_so_token(#tk{type = SfTokenType, pos = Pos, string = SfTokenStr}) -> case SfTokenType of @@ -369,8 +369,8 @@ to_so_token(#sfc_token{type = SfTokenType, int10 -> {true, {int, Pos, so_parse_int(SfTokenStr)}}; bytes -> {true, {bytes, Pos, so_parse_bytes(SfTokenStr)}}; NYI -> - Msg = io_lib:format("sfc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]), - error(#sfc_err{atom = nyi, + Msg = io_lib:format("gsc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]), + error(#gsc_err{atom = nyi, string = Msg}) end. @@ -392,7 +392,7 @@ so_parse_char([$' | Chars]) -> case unicode:characters_to_nfc_list(unescape($', Chars, [])) of [Char] -> Char; _Bad -> - error(#sfc_err{atom = bad_token, + error(#gsc_err{atom = bad_token, string = "Bad character literal: '" ++ Chars}) end. @@ -434,7 +434,7 @@ unescape(Delim, [$\\, Code | Chars], Acc) -> $r -> Ok($\r); $t -> Ok($\t); $v -> Ok($\v); - _ -> error(#sfc_err{atom = bad_escape_char, + _ -> error(#gsc_err{atom = bad_escape_char, string = "Bad control sequence: \\" ++ [Code]}) %% TODO end; unescape(Delim, [C | Chars], Acc) ->