more mass renaming

This commit is contained in:
2026-06-02 01:48:05 -07:00
parent eff77fff6b
commit 270f192f0c
53 changed files with 1264 additions and 431 deletions
+185 -8
View File
@@ -1,11 +1,5 @@
%%% @doc
%%% GSC CLI: gsc_cli
%%%
%%% This module is currently named `gsc_cli', but you may want to change that.
%%% Remember that changing the name in `-module()' below requires renaming
%%% this file, and it is recommended to run `zx update .app` in the main
%%% project directory to make sure the ebin/gsc_cli.app file stays in
%%% sync with the project whenever you add, remove or rename a module.
%%% GSC CLI: explorer/harness for sfc iteration
%%% @end
-module(gsc_cli).
@@ -16,10 +10,193 @@
-export([start/1]).
-include("$gsc_include/gsc.hrl").
-include("ansi.hrl").
do_help() ->
io:format("~ts", [help_screen()]).
help_screen() ->
["you can't help people who refuse to help themsleves\n"].
-spec start(ArgV) -> ok
when ArgV :: [string()].
start([]) ->
do_eshell(),
ok;
start(["shell"]) ->
do_eshell(),
ok;
start(["eshell"]) ->
do_eshell(),
ok;
start(ArgV) ->
ok = io:format("Hello, World! Args: ~tp~n", [ArgV]),
%io:format("ArgV: ~p~n", [ArgV]),
do(ArgV),
zx:silent_stop().
do(["list"]) ->
do_tlist();
do(["list", "tests"]) ->
do_tlist();
do(["run", "tests"]) ->
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
do_runall_tests();
do(["tokenizers_agree", Foo]) ->
io:format("~p~n", [tokenizers_agree(Foo)]);
% slowly phasing out shitty names like lctokens
% tokens = native sfc token representation
do(["tokens", Foo]) -> do_tokens(Foo);
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
do(["ctokens", Foo]) -> do_color_tokens(Foo);
do(["colour_tokens" | _]) -> do_doi();
% so_tokens = so_scan tokens
do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
do(["so_tokens", Foo]) -> do_so_tokens(Foo);
% gso_tokens = our mockery
do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo);
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
% print source file to screen with token boundaries highlighted
% script utility
do(["rmm", Foo]) ->
do_rmm(Foo);
do(Args) ->
io:format("bad args: ~p~n", [Args]),
do_help().
do_doi() ->
FP = zx:get_home() ++ "/priv/doi.txt",
Cmd = "less " ++ FP,
io:format("~s~n", [Cmd]).
do_runall_tests() ->
lists:foreach(fun run_mod_main/1, test_mods()).
test_mods() ->
known_modules_with_prefix("gt_").
known_modules_with_prefix(Pfx) ->
ModsZipBeamsZipLoaded = code:all_available(),
kmp(Pfx, ModsZipBeamsZipLoaded, []).
kmp(_Pfx, [], Acc) ->
lists:sort(Acc);
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
case lists:prefix(Pfx, ModStr) of
false -> kmp(Pfx, Rest, Acc);
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
end.
run_mod_main(Mod) ->
io:format("========================================\n"
"~p:main()\n"
"========================================\n",
[Mod]),
try
Mod:main()
catch
Err:ErrType:Trace ->
io:format("~p: ~p~n", [Err, ErrType]),
io:format("Trace:~n~p~n", [Trace])
end.
do_tlist() ->
lists:foreach(
fun(ModName) ->
io:format("~s~n", [ModName])
end,
test_mods()
).
-spec do_eshell() -> ok.
% @doc start an erlang shell
do_eshell() ->
io:format("Welcome to the GSC shell!~n", []),
case shell:start_interactive() of
ok -> ok;
{error, already_started} -> ok;
{error, Reason} -> error(Reason)
end.
tokenizers_agree(File) ->
so_tokens(File) =:= tokens(File).
do_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)].
do_so_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)].
do_gso_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)].
% rmm = run module:main() with our context loaded
% useful for prototyping
do_rmm(FilePath) ->
case compile:file(FilePath) of
{ok, Mod} -> Mod:main();
Error -> error(Error)
end.
so_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
{ok, Tokens} = so_scan:scan(FileStr),
Tokens.
gso_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
{ok, Tokens} = gso_scan:scan(FileStr),
Tokens.
tokens(FilePath) ->
{ok, Tokens} = gsc:tokens_from_file(FilePath),
Tokens.
do_color_tokens(File) ->
case gsc:tokens_from_file(File) of
{ok, Tokens} ->
ColorizedSrcStr = colorize_tokens(chunk_color_wheel(), Tokens, ""),
Full = [?ANSI_INVERT, ColorizedSrcStr, ?ANSI_UNINVERT],
io:format("~s", [Full]);
Error ->
io:format("~p~n", [Error])
end.
chunk_color_wheel() ->
%[yellow, blue].
[red, green, yellow, blue, magenta, cyan].
colorize_tokens(Wheel, [T | Ts], Acc) ->
{Color, NewWheel} = rotate(Wheel),
NewAcc = [Acc, colorize_token_str(Color, T)],
colorize_tokens(NewWheel, Ts, NewAcc);
colorize_tokens(_, [], Acc) ->
Acc.
rotate([A | Rest]) ->
{A, Rest ++ [A]}.
colorize_token_str(Color, #tk{str = Str}) ->
{Pfx, Sfx} = color_fixes(Color),
[Pfx, Str, Sfx].
color_fixes(red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
color_fixes(green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
color_fixes(yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
color_fixes(blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
color_fixes(cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
+157
View File
@@ -0,0 +1,157 @@
% gsc tokenizer tests
-module(tsfp_gsc_tokenizer).
-export([
main/0, ct_dir/0
%tokens_match/1
]).
-include("$gsc_include/gsc.hrl").
-include_lib("eunit/include/eunit.hrl").
main() ->
%io:format("~p~n", [div_files()]),
%io:format("MAINNNNN!~n", []),
eunit:test(?MODULE, [verbose]).
%eunit:test(?MODULE).
% directory containing the tests for the tokenizer
ct_dir() ->
zx_daemon:get_home() ++ "/test-data/gt_tokens".
agreement_tests_dir() ->
ct_dir() ++ "/tokenizers_agree".
% the divergences claude found between gsc tokenizer and so tokenizer
%
% mostly stupid corner cases like a string crossing a line boundary
% or unterminated block comment
%
% divergence files: "divergence" means so_scan disagrees with
% gsc_so_scan in one of the following ways:
%
% - one succeeds when the other errors
% - disagree on success case
%
% making errors agree on two programs that work differently is a
% fool's errand
div_files() ->
ContractsDir = agreement_tests_dir(),
% this is the equivalent of ls
% just has filenames, no /path/to/ prefix
{ok, Files} = file:list_dir(ContractsDir),
% originally i was a retard and didn't read the eunit
% documentation, so if any one test failed, the entire test suite
% would crash with no information regarding what happened
%
% so this was a hack to only run div01-div05 but not div06:
%
% % hack to fix one broken test at a time
% (FileName = "div0" ++ [Digit | _]) when Digit =< $9 ->
% FilePath = ct_dir() ++ "/" ++ FileName,
% {true, {FileName, FilePath}}
% (_) -> false
%
% Once i read the eunit docs and learned about test generators, I
% realized I could have only the failed test chimp out. what a
% concept.
%
% i also realized that printing the full filepath was a waste, so
% instead the test should know about the FileName (foo.bar) and the
% FilePath (/path/to/foo.bar).
%
% then i decided to start writing my own test contracts instead
% of having claude do it and i rean into the issue of vim swap
% files not lexing properly because they're not unicode
IsDivCt =
fun(FileName) ->
% need to filter out vim swap files
% originally was false-matching on ([$. | _])
% like a man
%
% god this feels like putting my balls in a little tiny
% guillotine (even the guillotine is emasculating) but
% claude suggested this and i mean it's kind of the
% most idiomatic and like straightforward. most
% importantly it's declarative
%
% god i feel so defeated
case filename:extension(FileName) of
".aes" ->
FilePath = ContractsDir ++ "/" ++ FileName,
{true, {FileName, FilePath}};
_ ->
false
end
end,
lists:sort(lists:filtermap(IsDivCt, Files)).
%div_file_names() -> [N || {N, _} <- div_files()].
%div_file_paths() -> [P || {_, P} <- div_files()].
tokstr_concat_test_() ->
% future proofing
ConcatTestFiles
= lists:flatten([
div_files()
]),
% exclude the contracts with like unterminated block comments
% where they don't tokenize properly
NonStupidFiles =
lists:filter(
fun
({"div05_bcom_eof.aes", _}) -> false;
({"div06_bcom_in_expr.aes", _}) -> false;
({"div07_bcom_nested.aes", _}) -> false;
({"div08_bcom_simple.aes", _}) -> false;
({_, _}) -> true
end,
ConcatTestFiles
),
%?debugFmt("ConcatTestFiles=~p", [ConcatTestFiles]),
{"file = sum(tokens)",
[concat_property(Name, Path) || {Name, Path} <- NonStupidFiles]}.
concat_property(FileName, FilePath) ->
%?debugFmt("concat_property(~p, _)", [FileName]),
{ok, FileBytes} = file:read_file(FilePath),
FileChars = unicode:characters_to_nfc_list(FileBytes),
{FileName ++ ": file = sum(tokens)",
fun() ->
case gsc_tokenizer:tokens(FileChars) of
{ok, SfcTokens} ->
ConcatStr = concat_token_strs(SfcTokens, []),
?assertEqual(FileChars, ConcatStr);
_Error ->
ok
end
end}.
concat_token_strs([#gsc_token{string = S} | Rest], Acc) ->
concat_token_strs(Rest, [Acc, S]);
concat_token_strs([], Acc) ->
unicode:characters_to_nfc_list(Acc).
% underscore marks this as a test *generator*
div_test_() ->
% divergence
DivFiles = div_files(),
%?debugFmt("DivFiles=~p", [DivFiles]),
{"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}.
tokens_match(FileName, FilePath) ->
%?debugFmt("tokens_match(~p, _)", [FileName]),
% extracting data to be tested
SoTokens = sfp:so_tokens(FilePath),
SfTokens = sfp:gsc_so_tokens(FilePath),
{FileName ++ ": tokenizers_agree",
fun() ->
case {SoTokens, SfTokens} of
{{ok, So}, {ok, Sf}} -> ?assertEqual(So, Sf);
{{error, _}, {error, _}} -> ok;
{{ok, _}, {error, _}} -> error("so_scan succeeded and gsc_so_scan failed");
{{error, _}, {ok, _}} -> error("so_scan failed and gsc_so_scan succeded")
end
end}.