more mass renaming

2026-06-02 01:48:05 -07:00
parent eff77fff6b
commit 270f192f0c
53 changed files with 1264 additions and 431 deletions
@@ -1,11 +1,5 @@
 %%% @doc
-%%% GSC CLI: gsc_cli
-%%%
-%%% This module is currently named `gsc_cli', but you may want to change that.
-%%% Remember that changing the name in `-module()' below requires renaming
-%%% this file, and it is recommended to run `zx update .app` in the main
-%%% project directory to make sure the ebin/gsc_cli.app file stays in
-%%% sync with the project whenever you add, remove or rename a module.
+%%% GSC CLI: explorer/harness for sfc iteration
 %%% @end

 -module(gsc_cli).
@@ -16,10 +10,193 @@

 -export([start/1]).

+-include("$gsc_include/gsc.hrl").
+-include("ansi.hrl").
+
+do_help() ->
+    io:format("~ts", [help_screen()]).
+
+help_screen() ->
+    ["you can't help people who refuse to help themsleves\n"].

 -spec start(ArgV) -> ok
    when ArgV :: [string()].

+start([]) ->
+    do_eshell(),
+    ok;
+start(["shell"]) ->
+    do_eshell(),
+    ok;
+start(["eshell"]) ->
+    do_eshell(),
+    ok;
 start(ArgV) ->
-    ok = io:format("Hello, World! Args: ~tp~n", [ArgV]),
+    %io:format("ArgV: ~p~n", [ArgV]),
+    do(ArgV),
    zx:silent_stop().
+
+do(["list"]) ->
+    do_tlist();
+do(["list", "tests"]) ->
+    do_tlist();
+do(["run", "tests"]) ->
+    io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
+    do_runall_tests();
+do(["tokenizers_agree", Foo]) ->
+    io:format("~p~n", [tokenizers_agree(Foo)]);
+% slowly phasing out shitty names like lctokens
+% tokens = native sfc token representation
+do(["tokens", Foo]) -> do_tokens(Foo);
+do(["color_tokens", Foo]) -> do_color_tokens(Foo);
+do(["ctokens", Foo]) -> do_color_tokens(Foo);
+do(["colour_tokens" | _]) -> do_doi();
+% so_tokens = so_scan tokens
+do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
+do(["so_tokens", Foo]) -> do_so_tokens(Foo);
+% gso_tokens = our mockery
+do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo);
+do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
+% print source file to screen with token boundaries highlighted
+% script utility
+do(["rmm", Foo]) ->
+    do_rmm(Foo);
+do(Args) ->
+    io:format("bad args: ~p~n", [Args]),
+    do_help().
+
+do_doi() ->
+    FP = zx:get_home() ++ "/priv/doi.txt",
+    Cmd = "less " ++ FP,
+    io:format("~s~n", [Cmd]).
+
+
+do_runall_tests() ->
+    lists:foreach(fun run_mod_main/1, test_mods()).
+
+test_mods() ->
+    known_modules_with_prefix("gt_").
+
+known_modules_with_prefix(Pfx) ->
+    ModsZipBeamsZipLoaded = code:all_available(),
+    kmp(Pfx, ModsZipBeamsZipLoaded, []).
+
+kmp(_Pfx, [], Acc) ->
+    lists:sort(Acc);
+kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
+    case lists:prefix(Pfx, ModStr) of
+        false -> kmp(Pfx, Rest, Acc);
+        true  -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
+    end.
+
+run_mod_main(Mod) ->
+    io:format("========================================\n"
+              "~p:main()\n"
+              "========================================\n",
+              [Mod]),
+    try
+        Mod:main()
+    catch
+        Err:ErrType:Trace ->
+            io:format("~p: ~p~n", [Err, ErrType]),
+            io:format("Trace:~n~p~n", [Trace])
+    end.
+
+do_tlist() ->
+    lists:foreach(
+        fun(ModName) ->
+            io:format("~s~n", [ModName])
+        end,
+        test_mods()
+    ).
+
+
+-spec do_eshell() -> ok.
+% @doc start an erlang shell
+
+do_eshell() ->
+    io:format("Welcome to the GSC shell!~n", []),
+    case shell:start_interactive() of
+        ok -> ok;
+        {error, already_started} -> ok;
+        {error, Reason} -> error(Reason)
+    end.
+
+tokenizers_agree(File) ->
+    so_tokens(File) =:= tokens(File).
+
+
+do_tokens(FilePath) ->
+    [io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)].
+
+do_so_tokens(FilePath) ->
+    [io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)].
+
+do_gso_tokens(FilePath) ->
+    [io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)].
+
+
+% rmm = run module:main() with our context loaded
+% useful for prototyping
+do_rmm(FilePath) ->
+    case compile:file(FilePath) of
+        {ok, Mod} -> Mod:main();
+        Error     -> error(Error)
+    end.
+
+
+so_tokens(FilePath) ->
+    {ok, FileBytes} = file:read_file(FilePath),
+    FileStr = unicode:characters_to_nfc_list(FileBytes),
+    {ok, Tokens} = so_scan:scan(FileStr),
+    Tokens.
+
+gso_tokens(FilePath) ->
+    {ok, FileBytes} = file:read_file(FilePath),
+    FileStr = unicode:characters_to_nfc_list(FileBytes),
+    {ok, Tokens} = gso_scan:scan(FileStr),
+    Tokens.
+
+
+tokens(FilePath) ->
+    {ok, Tokens} = gsc:tokens_from_file(FilePath),
+    Tokens.
+
+
+do_color_tokens(File) ->
+    case gsc:tokens_from_file(File) of
+        {ok, Tokens} ->
+            ColorizedSrcStr = colorize_tokens(chunk_color_wheel(), Tokens, ""),
+            Full = [?ANSI_INVERT, ColorizedSrcStr, ?ANSI_UNINVERT],
+            io:format("~s", [Full]);
+        Error ->
+            io:format("~p~n", [Error])
+    end.
+
+chunk_color_wheel() ->
+    %[yellow, blue].
+    [red, green, yellow, blue, magenta, cyan].
+
+
+
+colorize_tokens(Wheel, [T | Ts], Acc) ->
+    {Color, NewWheel} = rotate(Wheel),
+    NewAcc = [Acc, colorize_token_str(Color, T)],
+    colorize_tokens(NewWheel, Ts, NewAcc);
+colorize_tokens(_, [], Acc) ->
+    Acc.
+
+rotate([A | Rest]) ->
+    {A, Rest ++ [A]}.
+
+colorize_token_str(Color, #tk{str = Str}) ->
+    {Pfx, Sfx} = color_fixes(Color),
+    [Pfx, Str, Sfx].
+
+color_fixes(red)     -> {?ANSI_FG_RED,     ?ANSI_FG_RESET};
+color_fixes(green)   -> {?ANSI_FG_GREEN,   ?ANSI_FG_RESET};
+color_fixes(yellow)  -> {?ANSI_FG_YELLOW,  ?ANSI_FG_RESET};
+color_fixes(blue)    -> {?ANSI_FG_BLUE,    ?ANSI_FG_RESET};
+color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
+color_fixes(cyan)    -> {?ANSI_FG_CYAN,    ?ANSI_FG_RESET}.
+
@@ -0,0 +1,157 @@
+% gsc tokenizer tests
+-module(tsfp_gsc_tokenizer).
+
+-export([
+    main/0, ct_dir/0
+    %tokens_match/1
+]).
+-include("$gsc_include/gsc.hrl").
+-include_lib("eunit/include/eunit.hrl").
+
+main() ->
+    %io:format("~p~n", [div_files()]),
+    %io:format("MAINNNNN!~n", []),
+    eunit:test(?MODULE, [verbose]).
+    %eunit:test(?MODULE).
+
+
+% directory containing the tests for the tokenizer
+ct_dir() ->
+    zx_daemon:get_home() ++ "/test-data/gt_tokens".
+
+agreement_tests_dir() ->
+    ct_dir() ++ "/tokenizers_agree".
+
+
+% the divergences claude found between gsc tokenizer and so tokenizer
+%
+% mostly stupid corner cases like a string crossing a line boundary
+% or unterminated block comment
+%
+% divergence files: "divergence" means so_scan disagrees with
+% gsc_so_scan in one of the following ways:
+%
+%   - one succeeds when the other errors
+%   - disagree on success case
+%
+% making errors agree on two programs that work differently is a
+% fool's errand
+div_files() ->
+    ContractsDir = agreement_tests_dir(),
+    % this is the equivalent of ls
+    % just has filenames, no /path/to/ prefix
+    {ok, Files} = file:list_dir(ContractsDir),
+    % originally i was a retard and didn't read the eunit
+    % documentation, so if any one test failed, the entire test suite
+    % would crash with no information regarding what happened
+    %
+    % so this was a hack to only run div01-div05 but not div06:
+    %
+    %   % hack to fix one broken test at a time
+    %   (FileName = "div0" ++ [Digit | _]) when Digit =< $9 ->
+    %       FilePath = ct_dir() ++ "/" ++ FileName,
+    %       {true, {FileName, FilePath}}
+    %   (_) -> false
+    %
+    % Once i read the eunit docs and learned about test generators, I
+    % realized I could have only the failed test chimp out. what a
+    % concept.
+    %
+    % i also realized that printing the full filepath was a waste, so
+    % instead the test should know about the FileName (foo.bar) and the
+    % FilePath (/path/to/foo.bar).
+    %
+    % then i decided to start writing my own test contracts instead
+    % of having claude do it and i rean into the issue of vim swap
+    % files not lexing properly because they're not unicode
+    IsDivCt =
+        fun(FileName) ->
+            % need to filter out vim swap files
+            % originally was false-matching on ([$. | _])
+            % like a man
+            %
+            % god this feels like putting my balls in a little tiny
+            % guillotine (even the guillotine is emasculating) but
+            % claude suggested this and i mean it's kind of the
+            % most idiomatic and like straightforward. most
+            % importantly it's declarative
+            %
+            % god i feel so defeated
+            case filename:extension(FileName) of
+                ".aes" ->
+                    FilePath = ContractsDir ++ "/" ++ FileName,
+                    {true, {FileName, FilePath}};
+                _ ->
+                    false
+            end
+        end,
+    lists:sort(lists:filtermap(IsDivCt, Files)).
+
+
+%div_file_names() -> [N || {N, _} <- div_files()].
+%div_file_paths() -> [P || {_, P} <- div_files()].
+
+tokstr_concat_test_() ->
+    % future proofing
+    ConcatTestFiles
+        = lists:flatten([
+            div_files()
+        ]),
+    % exclude the contracts with like unterminated block comments
+    % where they don't tokenize properly
+    NonStupidFiles =
+        lists:filter(
+            fun
+                ({"div05_bcom_eof.aes", _}) -> false;
+                ({"div06_bcom_in_expr.aes", _}) -> false;
+                ({"div07_bcom_nested.aes", _}) -> false;
+                ({"div08_bcom_simple.aes", _}) -> false;
+                ({_, _}) -> true
+            end,
+            ConcatTestFiles
+        ),
+    %?debugFmt("ConcatTestFiles=~p", [ConcatTestFiles]),
+    {"file = sum(tokens)",
+     [concat_property(Name, Path) || {Name, Path} <- NonStupidFiles]}.
+
+concat_property(FileName, FilePath) ->
+    %?debugFmt("concat_property(~p, _)", [FileName]),
+    {ok, FileBytes} = file:read_file(FilePath),
+    FileChars = unicode:characters_to_nfc_list(FileBytes),
+    {FileName ++ ": file = sum(tokens)",
+     fun() ->
+        case gsc_tokenizer:tokens(FileChars) of
+            {ok, SfcTokens} ->
+                ConcatStr = concat_token_strs(SfcTokens, []),
+                ?assertEqual(FileChars, ConcatStr);
+            _Error ->
+                ok
+        end
+     end}.
+
+concat_token_strs([#gsc_token{string = S} | Rest], Acc) ->
+     concat_token_strs(Rest, [Acc, S]);
+concat_token_strs([], Acc) ->
+    unicode:characters_to_nfc_list(Acc).
+
+% underscore marks this as a test *generator*
+div_test_() ->
+    % divergence
+    DivFiles = div_files(),
+    %?debugFmt("DivFiles=~p", [DivFiles]),
+    {"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}.
+
+tokens_match(FileName, FilePath) ->
+    %?debugFmt("tokens_match(~p, _)", [FileName]),
+    % extracting data to be tested
+    SoTokens = sfp:so_tokens(FilePath),
+    SfTokens = sfp:gsc_so_tokens(FilePath),
+    {FileName ++ ": tokenizers_agree",
+     fun() ->
+          case {SoTokens, SfTokens} of
+              {{ok, So},    {ok, Sf}}  -> ?assertEqual(So, Sf);
+              {{error, _}, {error, _}} -> ok;
+              {{ok, _},    {error, _}} -> error("so_scan succeeded and gsc_so_scan failed");
+              {{error, _}, {ok, _}}    -> error("so_scan failed and gsc_so_scan succeded")
+          end
+     end}.