stopping point
This commit is contained in:
+12
-7
@@ -1,10 +1,15 @@
|
||||
# TODONE
|
||||
|
||||
# TODO
|
||||
|
||||
- barf for outputs, slurp for inputs
|
||||
- architecture needs more careful thought but only after something works
|
||||
- architecture needs more careful thought but only after something
|
||||
works
|
||||
- too fuzzy right now
|
||||
- possibly:
|
||||
- rename parser layers sequentially:
|
||||
- gsc_
|
||||
- undo gs_ naming fuckery.. everything is `gsc_*`. it's just
|
||||
needlessly confusing. for now let's name new things gsc_* and then
|
||||
go back and undo the stupidity
|
||||
|
||||
# TONOTDO
|
||||
|
||||
- barf for outputs, slurp for inputs
|
||||
- rename parser layers sequentially
|
||||
|
||||
# TODONE
|
||||
|
||||
+38
-2
@@ -47,6 +47,8 @@ do(["list", "tests"]) ->
|
||||
do_tlist();
|
||||
do(["test"]) ->
|
||||
do_tests();
|
||||
do(["test" | Tests]) ->
|
||||
do_tests(Tests);
|
||||
do(["tests"]) ->
|
||||
do_tests();
|
||||
do(["run", "tests"]) ->
|
||||
@@ -106,14 +108,48 @@ less_file(Less, FilePath) ->
|
||||
end.
|
||||
|
||||
do_tests() ->
|
||||
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
|
||||
io:format("TestModules = ~p~n", [test_mods()]),
|
||||
do_runall_tests().
|
||||
|
||||
do_runall_tests() ->
|
||||
lists:foreach(fun run_mod_main/1, test_mods()).
|
||||
|
||||
|
||||
do_tests(List) ->
|
||||
lists:foreach(fun run_test/1, List).
|
||||
|
||||
% n
|
||||
run_test(TestName) ->
|
||||
% we have two candidate atoms
|
||||
C1 = list_to_atom(TestName),
|
||||
C2 = list_to_atom("gsc_test_" ++ TestName),
|
||||
KnownMods = test_mods(),
|
||||
IsC1 = lists:member(C1, KnownMods),
|
||||
IsC2 = lists:member(C2, KnownMods),
|
||||
if
|
||||
IsC1 -> rmm(C1);
|
||||
IsC2 -> rmm(C2);
|
||||
true -> error({no_such_test, TestName})
|
||||
end.
|
||||
|
||||
|
||||
rmm(X) -> run_mod_main(X).
|
||||
|
||||
% KnownTests = test_mods(),
|
||||
% TestMods = ensure_all_known([], List, KnownTests),
|
||||
% lists:foreach(fun run_mod_main/1, TestMods).
|
||||
|
||||
|
||||
%ensure_all_known(Acc, [], _) ->
|
||||
% lists:sort(Acc);
|
||||
%ensure_all_known(Acc, [T | Ts], Knowns) ->
|
||||
% case lists:member(T, Knowns) of
|
||||
%
|
||||
% end.
|
||||
|
||||
|
||||
test_mods() ->
|
||||
known_modules_with_prefix("gs_test").
|
||||
known_modules_with_prefix("gsc_test").
|
||||
|
||||
known_modules_with_prefix(Pfx) ->
|
||||
ModsZipBeamsZipLoaded = code:all_available(),
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
-module(gsc_test_ntree).
|
||||
|
||||
-export([
|
||||
main/0
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
main() ->
|
||||
x00(),
|
||||
ok.
|
||||
|
||||
% x00 = example00
|
||||
x00() ->
|
||||
io:format("Example 00:~n", []),
|
||||
io:format(" SrcStr = ~p~n", [x00_src()]),
|
||||
io:format(" Tokens = ~p~n", [x00_tks()]),
|
||||
io:format(" Signal = ~p~n", [x00_sgl()]),
|
||||
io:format(" Tree0 = ~p~n", [x00_tree0()]),
|
||||
ok.
|
||||
|
||||
% sample type expr, tokens, signal
|
||||
x00_src() -> "foo => bar * baz".
|
||||
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
|
||||
x00_sgl() -> gsc:filter_signal(x00_tks()).
|
||||
x00_tree0() -> mktree(x00_sgl()).
|
||||
|
||||
% records copypasta for now
|
||||
-record(ns, {val :: any(), kids :: list(any())}).
|
||||
-record(nl, {val :: any()}).
|
||||
|
||||
-type ntree(X, Y) :: gsc_ntree:ntree(X, Y).
|
||||
-type ntree() :: gsc_ntree:ntree().
|
||||
|
||||
-type ast_stem_t() :: vtokens
|
||||
| {op, tk()}
|
||||
.
|
||||
|
||||
-type ast() :: ntree(ast_stem_t(), tk()).
|
||||
|
||||
|
||||
-spec mktree(Signal) -> Tree when
|
||||
Signal :: gsc:signal(),
|
||||
Tree :: gsc_ntree:ntree().
|
||||
|
||||
% @doc make into a tree
|
||||
mktree(Sig) ->
|
||||
Tree0 = gsc_ntree:nstem(vtokens, Sig),
|
||||
Tree1 = rerootl_tkstr("=>", Tree0),
|
||||
Tree2 = rerootl_tkstr("*", Tree1),
|
||||
Tree2.
|
||||
|
||||
|
||||
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
|
||||
Kids0 = gsc_ntree:deleaf0(Tree0),
|
||||
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
|
||||
case lists:splitwith(IsntS, Kids0) of
|
||||
% found
|
||||
% input:
|
||||
% *s Root0
|
||||
% |
|
||||
% +-- .l Foo
|
||||
% +-- .l "=>"
|
||||
% +-- .l Bar
|
||||
% output:
|
||||
% *s "=>"
|
||||
% |
|
||||
% +-- *s Root0 -- .l Foo
|
||||
% +-- *s Root0 -- .l Bar
|
||||
{LHS1, [Tk0 | RHS1]} ->
|
||||
Root1 = Root0,
|
||||
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
|
||||
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
|
||||
NewRoot0 = {op, Tk0},
|
||||
NewKids0 = [LTree1, RTree1],
|
||||
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
|
||||
NewTree;
|
||||
% not found, nothing to do
|
||||
{Kids0, []} ->
|
||||
Tree0
|
||||
end.
|
||||
|
||||
|
||||
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
|
||||
% Kids0 = gsc_ntree:deleaf0(Tree0),
|
||||
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
|
||||
% case lists:splitwith(IsntMapsto, Kids0) of
|
||||
% % found
|
||||
% {LHS1, [Tk0 | RHS1]} ->
|
||||
% Root1 = Root0,
|
||||
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
|
||||
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
|
||||
% NewRoot0 = {op, Tk0},
|
||||
% NewKids0 = [LTree1, RTree1],
|
||||
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
|
||||
% NewTree;
|
||||
% % nothing to do
|
||||
% {Kids0, []} ->
|
||||
% Tree0
|
||||
% end.
|
||||
|
||||
|
||||
|
||||
isnt_str(X, Y) ->
|
||||
not is_str(X, Y).
|
||||
|
||||
is_str(S, #tk{str = S}) -> true;
|
||||
is_str(_, _) -> false.
|
||||
@@ -1,5 +1,5 @@
|
||||
% gsc tokenizer tests
|
||||
-module(gs_test_tokens).
|
||||
-module(gsc_test_tokens).
|
||||
|
||||
-export([
|
||||
main/0, ct_dir/0
|
||||
@@ -0,0 +1,25 @@
|
||||
% testing utilities
|
||||
-module(ts_utils).
|
||||
|
||||
-export([
|
||||
ct_dir/0,
|
||||
ct_file/1
|
||||
]).
|
||||
|
||||
|
||||
-spec ct_dir() -> string().
|
||||
|
||||
% directory containing the tests for the tokenizer
|
||||
ct_dir() ->
|
||||
zx_daemon:get_home() ++ "/ct".
|
||||
|
||||
|
||||
|
||||
-spec ct_file(Name) -> AbsPath when
|
||||
Name :: string(),
|
||||
AbsPath :: string().
|
||||
% @doc
|
||||
% ct_file("foo.aes") -> "/path/to/ct/foo.aes"
|
||||
|
||||
ct_file(Name) ->
|
||||
ct_dir() ++ "/" ++ Name.
|
||||
+9
-9
@@ -196,13 +196,13 @@
|
||||
%gulp_file([]) ->
|
||||
% {error, empty_file};
|
||||
%gulp_file(Tokens) ->
|
||||
% case gs_tokens:take_block(Tokens) of
|
||||
% case gsc_tokens:take_block(Tokens) of
|
||||
% {Tokens, []} ->
|
||||
% gulp_block(fun gulp_top_decl/1, Tokens);
|
||||
% %gulp_file2([], [], Tokens);
|
||||
% {A, B} ->
|
||||
% StartPos = gs_tokens:start_pos(A),
|
||||
% ErrPos = gs_tokens:start_pos(B),
|
||||
% StartPos = gsc_tokens:start_pos(A),
|
||||
% ErrPos = gsc_tokens:start_pos(B),
|
||||
% Msg = efmt("gulp_file: block starting at ~p ends at ~p instead of EOF",
|
||||
% [StartPos, ErrPos]),
|
||||
% {error, #parse_error{pos = ErrPos, msg = Msg}}
|
||||
@@ -212,7 +212,7 @@
|
||||
%
|
||||
%%gulp_file2(AccOks, AccErrs, Tokens = [_ | _]) ->
|
||||
%% % ItemTokens will be nonempty
|
||||
%% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens),
|
||||
%% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens),
|
||||
%% case gulp_top_decl(ItemTokens) of
|
||||
%% {gulp, Ok} -> gulp_file2([Ok | AccOks], AccErrs, NewTokens);
|
||||
%% Err -> gulp_file2(AccOks, [Err | AccErrs], NewTokens)
|
||||
@@ -258,7 +258,7 @@
|
||||
%
|
||||
%gulp_block(GulpItem, AccOks, AccErrs, Tokens = [_ | _]) ->
|
||||
% % ItemTokens will be nonempty
|
||||
% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens),
|
||||
% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens),
|
||||
% case GulpItem(ItemTokens) of
|
||||
% {gulp, Ok} -> gulp_block(GulpItem, [Ok | AccOks], AccErrs, NewTokens);
|
||||
% Err -> gulp_block(GulpItem, AccOks, [Err | AccErrs], NewTokens)
|
||||
@@ -284,7 +284,7 @@
|
||||
%% | Using
|
||||
%% @end
|
||||
%gulp_top_decl(DeclTokens) ->
|
||||
% case gs_tokens:strings(3, DeclTokens) of
|
||||
% case gsc_tokens:strings(3, DeclTokens) of
|
||||
% ["payable", "contract", "interface"] ->
|
||||
% gulp_nyi(DeclTokens);
|
||||
% ["contract", "interface" | _] ->
|
||||
@@ -410,7 +410,7 @@
|
||||
%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
%% | Using
|
||||
%gulp_decl(Tokens) ->
|
||||
% case gs_tokens:strings(1, Tokens) of
|
||||
% case gsc_tokens:strings(1, Tokens) of
|
||||
% ["type"] -> gulp_type_alias(Tokens);
|
||||
% _ -> gulp_nyi(Tokens)
|
||||
% end.
|
||||
@@ -611,7 +611,7 @@
|
||||
%% Type1 = {plist, Types} () (foo) (foo, bar)
|
||||
%% | {token, #tk{}} foo Bar.baz 'quux
|
||||
%slurp_type1(Tks) ->
|
||||
% case gs_tokens:slurp_plist(Tks) of
|
||||
% case gsc_tokens:slurp_plist(Tks) of
|
||||
% % head token is NOT open paren -> must be id/qid/tvar
|
||||
% {slurp, [], [Tk | NewTks]} ->
|
||||
% TkType = Tk#tk.type,
|
||||
@@ -633,7 +633,7 @@
|
||||
%
|
||||
%
|
||||
%%slurp_type_expr_plist(Tks) ->
|
||||
%% case gs_tokens:slurp_plist(Tks) of
|
||||
%% case gsc_tokens:slurp_plist(Tks) of
|
||||
%% % head token is NOT open paren -> must be id/qid/tvar
|
||||
%% {slurp, [], [Tk | NewTks]} ->
|
||||
%% TkType = Tk#tk.type,
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
%
|
||||
%% @doc for testing
|
||||
%unsafe_vtks_from_string(S) ->
|
||||
% {ok, SigTks} = gs_tokens:significant_tokens(S),
|
||||
% {ok, SigTks} = gsc_tokens:significant_tokens(S),
|
||||
% {gulp, Vtks} = gulp_vtks(SigTks),
|
||||
% Vtks.
|
||||
%
|
||||
@@ -110,7 +110,7 @@
|
||||
% end.
|
||||
%
|
||||
%slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
|
||||
% case gs_tokens:slurp_plist(Tokens) of
|
||||
% case gsc_tokens:slurp_plist(Tokens) of
|
||||
% {slurp, [], _} ->
|
||||
% barf;
|
||||
% {slurp, PTokens, NewTokens} ->
|
||||
@@ -156,7 +156,7 @@
|
||||
% {_Pfx = Tks1_BeforeOpen,
|
||||
% _Sfx = Tks2_OpenNAfter
|
||||
% = [#tk{string = "("} | _]} ->
|
||||
% case gs_tokens:slurp_plist(Tks2_OpenNAfter) of
|
||||
% case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of
|
||||
% {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
|
||||
% NewAcc = [Acc,
|
||||
% Tks1_BeforeOpen,
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
%-spec end_pos([gsc_token()]) -> {value, tk_pos()} | none.
|
||||
%
|
||||
%end_pos([#gsc_token{pos = Pos, string = Str}]) ->
|
||||
% {value, gs_tokens:new_pos(Pos, Str)};
|
||||
% {value, gsc_tokens:new_pos(Pos, Str)};
|
||||
%end_pos([_ | T]) ->
|
||||
% end_pos(T);
|
||||
%end_pos([]) ->
|
||||
|
||||
+124
-9
@@ -4,12 +4,12 @@
|
||||
% based on original sophia compiler
|
||||
%
|
||||
% parse layers:
|
||||
% 1. gs_tokens: SrcStr -> (Tokens | SigTokens)
|
||||
% 1. gsc_tokens: SrcStr -> (Tokens | SigTokens)
|
||||
%
|
||||
% SigTokens = not comment/whitespace
|
||||
%
|
||||
% layers:
|
||||
% a. gs_strmatch : matches string shapes
|
||||
% a. gsc_strmatch : matches string shapes
|
||||
% b. gso_scan : converts to so_scan shapes
|
||||
%
|
||||
%
|
||||
@@ -32,14 +32,29 @@
|
||||
-module(gsc).
|
||||
|
||||
-export_type([
|
||||
token/0
|
||||
token/0,
|
||||
signal/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
unsafe_tokens_from_file/1,
|
||||
unsafe_tokens_from_string/1,
|
||||
unsafe_signal_from_file/1,
|
||||
unsafe_signal_from_string/1,
|
||||
filter_signal/1,
|
||||
signal_from_string/1,
|
||||
signal_from_file/1,
|
||||
sigtokens_from_file/1,
|
||||
sigtokens_from_string/1,
|
||||
tokens_from_file/1,
|
||||
tokens_from_string/1
|
||||
tokens_from_string/1,
|
||||
% sophia compatibility
|
||||
gso_tokens_from_file/1,
|
||||
gso_tokens_from_string/1,
|
||||
% unicode normalization
|
||||
very_stable_codepoints/1,
|
||||
very_stable_string/1,
|
||||
very_stable_file/1
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
@@ -50,19 +65,52 @@
|
||||
|
||||
-type token() :: tk().
|
||||
|
||||
% @doc signal means non-noise (whitespace/comment)
|
||||
% tokens; legacy name still around is "sigtokens"
|
||||
-type signal() :: [tk()].
|
||||
|
||||
%-----------------------------------------
|
||||
% functions
|
||||
% API: FUNCTIONS
|
||||
%-----------------------------------------
|
||||
|
||||
%-----------------------------------------
|
||||
% aint nobody got time for case shit
|
||||
%-----------------------------------------
|
||||
% tokens
|
||||
unsafe_tokens_from_file(F) ->
|
||||
{ok, Tks} = tokens_from_file(F),
|
||||
Tks.
|
||||
|
||||
unsafe_tokens_from_string(S) ->
|
||||
{ok, Tks} = tokens_from_string(S),
|
||||
Tks.
|
||||
|
||||
|
||||
% signal
|
||||
unsafe_signal_from_file(F) ->
|
||||
{ok, Tks} = signal_from_file(F),
|
||||
Tks.
|
||||
|
||||
unsafe_signal_from_string(S) ->
|
||||
{ok, Tks} = signal_from_string(S),
|
||||
Tks.
|
||||
|
||||
|
||||
%
|
||||
filter_signal(X) -> gsc_tokens:filter_significant(X).
|
||||
signal_from_file(X) -> sigtokens_from_file(X).
|
||||
signal_from_string(X) -> sigtokens_from_string(X).
|
||||
|
||||
% @doc legacy name for signal
|
||||
sigtokens_from_file(X) ->
|
||||
case tokens_from_file(X) of
|
||||
{ok, Y} -> {ok, gs_tokens:filter_significant(Y)};
|
||||
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
|
||||
Err -> Err
|
||||
end.
|
||||
|
||||
sigtokens_from_string(X) ->
|
||||
case tokens_from_string(X) of
|
||||
{ok, Y} -> {ok, gs_tokens:filter_significant(Y)};
|
||||
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
|
||||
Err -> Err
|
||||
end.
|
||||
|
||||
@@ -81,7 +129,6 @@ tokens_from_file(FilePath) ->
|
||||
|
||||
|
||||
|
||||
|
||||
-spec tokens_from_string(SrcStr) -> Result
|
||||
when SrcStr :: string(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -89,4 +136,72 @@ tokens_from_file(FilePath) ->
|
||||
Tokens :: [tk()].
|
||||
|
||||
tokens_from_string(SrcStr) ->
|
||||
gs_tokens:tokens(SrcStr).
|
||||
gsc_tokens:tokens(SrcStr).
|
||||
|
||||
|
||||
|
||||
-spec gso_tokens_from_file(FilePath) -> Result when
|
||||
FilePath :: string(),
|
||||
Result :: {ok, GsoTks} | {error, Reason},
|
||||
GsoTks :: [gso_scan:so_token()],
|
||||
Reason :: gsc_err() | any().
|
||||
|
||||
gso_tokens_from_file(FilePath) ->
|
||||
case file:read_file(FilePath) of
|
||||
{ok, Bytes} -> gso_tokens_from_string(Bytes);
|
||||
Error -> Error
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec gso_tokens_from_string(Str) -> Result when
|
||||
Str :: iolist(),
|
||||
Result :: {ok, GsoTks} | {error, Reason},
|
||||
GsoTks :: [gso_scan:so_token()],
|
||||
Reason :: gsc_err() | any().
|
||||
|
||||
gso_tokens_from_string(Evil) ->
|
||||
Str = gsc_tokens:very_stable_codepoints(Evil),
|
||||
gso_scan:scan(Str).
|
||||
|
||||
|
||||
|
||||
|
||||
-spec very_stable_codepoints(String) -> Normalized when
|
||||
String :: iolist(),
|
||||
Normalized :: string().
|
||||
|
||||
%% @doc normalize string to utf8 NFC list form
|
||||
very_stable_codepoints(X) ->
|
||||
gsc_tokens:very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_string(String) -> Normalized when
|
||||
String :: iolist(),
|
||||
Normalized :: string().
|
||||
|
||||
%% @doc alias for `very_stable_codepoints/1'
|
||||
very_stable_string(X) ->
|
||||
gsc_tokens:very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_file(FilePath) -> Contents when
|
||||
FilePath :: string(),
|
||||
Contents :: string().
|
||||
|
||||
%% @doc Read file, return contents as
|
||||
%% `unicode:characters_to_nfc_list/1' list.
|
||||
%%
|
||||
%% Please note that this function is NOT in fact very
|
||||
%% stable, as it throws an error if there's some error
|
||||
%% reading the file (e.g. not found).
|
||||
%%
|
||||
%% this function exists mostly for scripting/shell
|
||||
%% convenience
|
||||
very_stable_file(X) ->
|
||||
case file:read_file(X) of
|
||||
{ok, B} -> very_stable_codepoints(B);
|
||||
Error -> error(Error)
|
||||
end.
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
-module(gsc_ntree).
|
||||
|
||||
-export_type([
|
||||
ntree/2,
|
||||
ntree/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
nstem/2,
|
||||
flatten/1,
|
||||
deleaf0/1,
|
||||
releaf0/2
|
||||
]).
|
||||
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
%%=====================================================
|
||||
%% API: types
|
||||
%%=====================================================
|
||||
|
||||
-record(ns, {val :: any(), kids :: list(any())}).
|
||||
-record(nl, {val :: any()}).
|
||||
|
||||
%% @doc ntree(S, L) is a "node tree" (meaning stems
|
||||
%% have values and children)
|
||||
-type ntree(S, L)
|
||||
:: #ns{val :: S, kids :: [ntree(S, L)]}
|
||||
| #nl{val :: L}.
|
||||
|
||||
-type ntree() :: ntree(any(), any()).
|
||||
|
||||
|
||||
%%=====================================================
|
||||
%% API: functions
|
||||
%%=====================================================
|
||||
|
||||
|
||||
-spec nstem(Root, List) -> Tree when
|
||||
Root :: X,
|
||||
List :: list(Y),
|
||||
Tree :: ntree(X, Y),
|
||||
X :: any(),
|
||||
Y :: any().
|
||||
% @doc
|
||||
% You *probably* want `releaf0/2' instead.
|
||||
%
|
||||
% This function naively wraps each element in the list
|
||||
% in a leaf type, even if it's already wrapped.
|
||||
%
|
||||
% nstem(root, [Foo, Bar, Baz]) ~>
|
||||
% *s root
|
||||
% |
|
||||
% +--- .l Foo
|
||||
% |
|
||||
% +--- .l Bar
|
||||
% |
|
||||
% +--- .l Baz
|
||||
%
|
||||
% Much more common use case is to releaf only the input
|
||||
% nodes which are not already wrapped, which is what
|
||||
% `releaf0/2' does.
|
||||
% @end
|
||||
nstem(Root, List) ->
|
||||
{ns, Root, [{nl, Y} || Y <- List]}.
|
||||
|
||||
|
||||
|
||||
-spec flatten(Tree) -> LeafVals when
|
||||
Tree :: ntree(any(), LeafType),
|
||||
LeafVals :: [LeafType],
|
||||
LeafType :: any().
|
||||
|
||||
flatten({nl, X}) ->
|
||||
[X];
|
||||
flatten({ns, _, Keeids}) ->
|
||||
lists:flatten([flatten(Keeid) || Keeid <- Keeids]).
|
||||
|
||||
|
||||
|
||||
-spec deleaf0(Tree) -> Result when
|
||||
Tree :: ntree(S, L),
|
||||
Result :: [L | Tree],
|
||||
S :: any(),
|
||||
L :: any().
|
||||
|
||||
% @doc unwrap the leaf children, and leave the stem
|
||||
% children intact
|
||||
%
|
||||
% ex. 1:
|
||||
% (+ 1 2 (* 3 4) 5)
|
||||
% ~> '(1 2 (* 3 4) 5)
|
||||
%
|
||||
% ex. 2:
|
||||
% {ns, '+', [{nl, 1},
|
||||
% {nl, 2},
|
||||
% {ns, '*', [{nl, 3}, {nl, 4}]},
|
||||
% {nl, 5}]}
|
||||
% ~> [1, 2, {ns, '*', [{nl, 3}, {nl, 4}]}, 5]
|
||||
% @end
|
||||
deleaf0({nl, L}) -> [L];
|
||||
deleaf0({ns, _, Ls}) -> dl0([], Ls).
|
||||
|
||||
dl0(Stk, []) -> lists:reverse(Stk);
|
||||
dl0(Stk, [{nl, X} | Rest]) -> dl0([X | Stk], Rest);
|
||||
dl0(Stk, [X | Rest]) -> dl0([X | Stk], Rest).
|
||||
|
||||
|
||||
|
||||
-spec releaf0(Root, Keeids) -> Rooted when
|
||||
Root :: S,
|
||||
Keeids :: [L | ntree(S, L)],
|
||||
Rooted :: ntree(S, L),
|
||||
S :: any(),
|
||||
L :: any().
|
||||
|
||||
% @doc notional inverse of `deleaf0/1'
|
||||
%
|
||||
% Note that this does **NOT** double-wrap leafs in the
|
||||
% input
|
||||
releaf0(Root, Ks) ->
|
||||
#ns{val = Root,
|
||||
kids = lists:map(fun rl0/1, Ks)}.
|
||||
|
||||
rl0(X = #ns{}) -> X;
|
||||
rl0(X = #nl{}) -> X;
|
||||
rl0(X) -> {nl, X}.
|
||||
@@ -70,7 +70,7 @@
|
||||
% `contract` gets tokenized as a keyword and not a variable name), and then
|
||||
% calls into this module in order to match the string shape it's looking for.
|
||||
% @end
|
||||
-module(gs_strmatch).
|
||||
-module(gsc_strmatch).
|
||||
|
||||
%-compile([export_all, nowarn_export_all]).
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
% 2. to future-proof in case we decide to incrementally incorporate the gsc
|
||||
% code into the legacy sophia compiler
|
||||
% @end
|
||||
-module(gs_tokens).
|
||||
-module(gsc_tokens).
|
||||
|
||||
% meta
|
||||
-export([
|
||||
@@ -39,6 +39,9 @@
|
||||
is_significant/1,
|
||||
filter_significant/1,
|
||||
significant_tokens/1,
|
||||
very_stable_codepoints/1,
|
||||
very_stable_string/1,
|
||||
very_stable_characters/1,
|
||||
tokens_from_iolist/1,
|
||||
tokens/1,
|
||||
slurp_token/2,
|
||||
@@ -188,13 +191,13 @@ slurp_dlist(All, Opens, [#tk{str = "["} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, Opens, [#tk{str = "{"} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||
% sad: mismatch cases
|
||||
slurp_dlist(All, Opens, []) ->
|
||||
slurp_dlist(_, Opens, []) ->
|
||||
{error, {fixme, mismatch, Opens, none}};
|
||||
slurp_dlist(All, Opens, [#tk{str = "}"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = "}"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
slurp_dlist(All, Opens, [#tk{str = "]"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = "]"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
slurp_dlist(All, Opens, [#tk{str = ")"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = ")"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
% general case: non-terminal token gets pushed
|
||||
slurp_dlist(All, Opens, [Tk | NewTks]) ->
|
||||
@@ -330,6 +333,29 @@ is_significant(#tk{shape = ws}) -> false;
|
||||
is_significant(_) -> true.
|
||||
|
||||
|
||||
|
||||
% aliases
|
||||
very_stable_string(X) -> very_stable_codepoints(X).
|
||||
very_stable_characters(X) -> very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_codepoints(IoList) -> NfcList when
|
||||
IoList :: iolist(),
|
||||
NfcList :: string().
|
||||
|
||||
%% @doc When Unicode sends its characters, they're not
|
||||
%% sending their best. They're not sending ASCII.
|
||||
%% They're not sending ASCII. They're sending
|
||||
%% characters that have lots of problems, and they're
|
||||
%% bringing those problems with us. They're bringing
|
||||
%% diacritics. They're bringing homoglyphs. They're
|
||||
%% bringing RTL. They're rapists. And some, we assume,
|
||||
%% are good characters.
|
||||
very_stable_codepoints(S) ->
|
||||
unicode:characters_to_nfc_list(S).
|
||||
|
||||
|
||||
-spec tokens_from_iolist(SrcStr) -> Result when
|
||||
SrcStr :: iolist(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -341,6 +367,7 @@ tokens_from_iolist(S) -> tokens(S).
|
||||
|
||||
|
||||
|
||||
|
||||
-spec tokens(SrcStr) -> Result
|
||||
when SrcStr :: iolist(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -355,7 +382,8 @@ tokens_from_iolist(S) -> tokens(S).
|
||||
|
||||
tokens(S) ->
|
||||
% defensive normalization
|
||||
tokens([], {1, 1}, unicode:characters_to_nfc_list(S)).
|
||||
tokens([], {1, 1}, very_stable_codepoints(S)).
|
||||
|
||||
|
||||
tokens(Stack, _FinalPos, "") ->
|
||||
{ok, lists:reverse(Stack)};
|
||||
@@ -559,8 +587,8 @@ slurp_token_of_shape(bcom, Pos, SrcStr0) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(ws, Pos, SrcStr) ->
|
||||
WhitespaceMatcher = gs_strmatch:smr_sf_ws(),
|
||||
case gs_strmatch:match(WhitespaceMatcher, SrcStr) of
|
||||
WhitespaceMatcher = gsc_strmatch:smr_sf_ws(),
|
||||
case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, WS, Rest} ->
|
||||
@@ -594,7 +622,7 @@ slurp_token_of_shape(kwd, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(op, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_op(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = op, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -602,7 +630,7 @@ slurp_token_of_shape(op, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(punct, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_punct(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = punct, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -611,7 +639,7 @@ slurp_token_of_shape(punct, Pos, SrcStr) ->
|
||||
end;
|
||||
% SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar
|
||||
slurp_token_of_shape(id, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_id(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of
|
||||
{strmatch, IdStr, Rest} ->
|
||||
Token = #tk{shape = id, pos = Pos, str = IdStr},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -619,7 +647,7 @@ slurp_token_of_shape(id, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(con, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_con(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = con, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -627,7 +655,7 @@ slurp_token_of_shape(con, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(qid, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_qid(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = qid, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -635,7 +663,7 @@ slurp_token_of_shape(qid, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(qcon, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_qcon(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = qcon, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -643,7 +671,7 @@ slurp_token_of_shape(qcon, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(tvar, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_tvar(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = tvar, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -651,7 +679,7 @@ slurp_token_of_shape(tvar, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(int16, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_int16(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = int16, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -659,7 +687,7 @@ slurp_token_of_shape(int16, Pos, SrcStr) ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_shape(int10, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_int10(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{shape = int10, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
@@ -671,8 +699,8 @@ slurp_token_of_shape(int10, Pos, SrcStr) ->
|
||||
%
|
||||
% char: sophia char literal
|
||||
slurp_token_of_shape(ak, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_ak(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_ak(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -680,8 +708,8 @@ slurp_token_of_shape(ak, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(ct, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_ct(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_ct(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -689,8 +717,8 @@ slurp_token_of_shape(ct, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(sg, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_sg(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_sg(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -698,8 +726,8 @@ slurp_token_of_shape(sg, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(char, Pos, SrcStr) ->
|
||||
StringMatcher = gs_strmatch:smr_sf_char(),
|
||||
case gs_strmatch:match(StringMatcher, SrcStr) of
|
||||
StringMatcher = gsc_strmatch:smr_sf_char(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -707,7 +735,7 @@ slurp_token_of_shape(char, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(string, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_str(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
@@ -715,7 +743,7 @@ slurp_token_of_shape(string, Pos, SrcStr) ->
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_shape(bytes, Pos, SrcStr) ->
|
||||
case gs_strmatch:match(gs_strmatch:smr_sf_bytes(), SrcStr) of
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
+2
-2
@@ -1,6 +1,6 @@
|
||||
% @doc compatibility layer to test against so_scan
|
||||
%
|
||||
% converts gs_tokens data to so_scan tokens
|
||||
% converts gsc_tokens data to so_scan tokens
|
||||
%
|
||||
% Ref: so_scan.erl
|
||||
-module(gso_scan).
|
||||
@@ -104,7 +104,7 @@
|
||||
% @end
|
||||
|
||||
scan(SrcStr) ->
|
||||
case gs_tokens:tokens(SrcStr) of
|
||||
case gsc_tokens:tokens(SrcStr) of
|
||||
{ok, SfLTokens} ->
|
||||
SoTokens = to_so_tokens(SfLTokens),
|
||||
{ok, SoTokens};
|
||||
|
||||
Reference in New Issue
Block a user