From 2c36a02331941b24fcc65664b44d7966cf09571e Mon Sep 17 00:00:00 2001 From: Peter Harpending Date: Tue, 2 Jun 2026 12:48:41 -0700 Subject: [PATCH] all the old tests pass... moving on --- cli/src/gs_test_tokens.erl | 20 ++++++++++++++------ cli/src/gsc_cli.erl | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/cli/src/gs_test_tokens.erl b/cli/src/gs_test_tokens.erl index 82342ff..eabdeaa 100644 --- a/cli/src/gs_test_tokens.erl +++ b/cli/src/gs_test_tokens.erl @@ -116,11 +116,10 @@ tokstr_concat_test_() -> concat_property(FileName, FilePath) -> %?debugFmt("concat_property(~p, _)", [FileName]), - {ok, FileBytes} = file:read_file(FilePath), - FileChars = unicode:characters_to_nfc_list(FileBytes), + FileChars = gsc:very_stable_file(FilePath), {FileName ++ ": file = sum(tokens)", fun() -> - case gsc_tokenizer:tokens(FileChars) of + case gsc:tokens_from_file(FileChars) of {ok, SfcTokens} -> ConcatStr = concat_token_strs(SfcTokens, []), ?assertEqual(FileChars, ConcatStr); @@ -139,13 +138,15 @@ div_test_() -> % divergence DivFiles = div_files(), %?debugFmt("DivFiles=~p", [DivFiles]), - {"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}. + {"claude tokenizer divergences fixed", + [tokens_match(N, P) || {N, P} <- DivFiles]}. tokens_match(FileName, FilePath) -> %?debugFmt("tokens_match(~p, _)", [FileName]), % extracting data to be tested - SoTokens = gsc_cli:so_tokens(FilePath), - SfTokens = gsc_cli:gso_tokens(FilePath), + % i hate this so much but lazy and this is test code so who really cares. + SoTokens = so_tokens_from_file(FilePath), + SfTokens = gsc:gso_tokens_from_file(FilePath), {FileName ++ ": tokenizers_agree", fun() -> case {SoTokens, SfTokens} of @@ -155,3 +156,10 @@ tokens_match(FileName, FilePath) -> {{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded") end end}. + +% that's right, we have to enter via converting the +% bytes in the file to a list... lol +so_tokens_from_file(F) -> + {ok, Bytes} = file:read_file(F), + S = binary_to_list(Bytes), + so_scan:scan(S). diff --git a/cli/src/gsc_cli.erl b/cli/src/gsc_cli.erl index 2e99fce..69f8b3d 100644 --- a/cli/src/gsc_cli.erl +++ b/cli/src/gsc_cli.erl @@ -75,8 +75,35 @@ do(Args) -> do_doi() -> FP = zx:get_home() ++ "/priv/doi.txt", - Cmd = "less " ++ FP, - io:format("~s~n", [Cmd]). + page_file(FP). + +% thank you chatgpt +% os:cmd didnt do nuffin because that's for running +% stuff in the background and capturing the output, not +% for taking over the screen +page_file(FilePath) -> + Less = os:find_executable("less"), + case Less of + false -> cat_file(FilePath); + _ -> less_file(Less, FilePath) + end. + +cat_file(FilePath) -> + {ok, Bytes} = file:read_file(FilePath), + io:format("~ts", [Bytes]). + +less_file(Less, FilePath) -> + Port = open_port({spawn_executable, Less}, + [{args, [FilePath]}, + nouse_stdio, exit_status]), + receive + {Port, {exit_status, 0}} -> + ok; + {Port, {exit_status, N}} -> + error({less_exit_status, N}); + {'EXIT', Port, Reason} -> + error(Reason) + end. do_tests() -> io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]), @@ -134,7 +161,7 @@ do_eshell() -> end. tokenizers_agree(File) -> - so_tokens(File) =:= tokens(File). + gso_tokens(File) =:= so_tokens(File). do_tokens(FilePath) ->