all the old tests pass... moving on

This commit is contained in:
Peter Harpending
2026-06-02 12:48:41 -07:00
parent 5cae022b8b
commit 2c36a02331
2 changed files with 44 additions and 9 deletions
+14 -6
View File
@@ -116,11 +116,10 @@ tokstr_concat_test_() ->
concat_property(FileName, FilePath) ->
%?debugFmt("concat_property(~p, _)", [FileName]),
{ok, FileBytes} = file:read_file(FilePath),
FileChars = unicode:characters_to_nfc_list(FileBytes),
FileChars = gsc:very_stable_file(FilePath),
{FileName ++ ": file = sum(tokens)",
fun() ->
case gsc_tokenizer:tokens(FileChars) of
case gsc:tokens_from_file(FileChars) of
{ok, SfcTokens} ->
ConcatStr = concat_token_strs(SfcTokens, []),
?assertEqual(FileChars, ConcatStr);
@@ -139,13 +138,15 @@ div_test_() ->
% divergence
DivFiles = div_files(),
%?debugFmt("DivFiles=~p", [DivFiles]),
{"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}.
{"claude tokenizer divergences fixed",
[tokens_match(N, P) || {N, P} <- DivFiles]}.
tokens_match(FileName, FilePath) ->
%?debugFmt("tokens_match(~p, _)", [FileName]),
% extracting data to be tested
SoTokens = gsc_cli:so_tokens(FilePath),
SfTokens = gsc_cli:gso_tokens(FilePath),
% i hate this so much but lazy and this is test code so who really cares.
SoTokens = so_tokens_from_file(FilePath),
SfTokens = gsc:gso_tokens_from_file(FilePath),
{FileName ++ ": tokenizers_agree",
fun() ->
case {SoTokens, SfTokens} of
@@ -155,3 +156,10 @@ tokens_match(FileName, FilePath) ->
{{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded")
end
end}.
% that's right, we have to enter via converting the
% bytes in the file to a list... lol
so_tokens_from_file(F) ->
{ok, Bytes} = file:read_file(F),
S = binary_to_list(Bytes),
so_scan:scan(S).
+30 -3
View File
@@ -75,8 +75,35 @@ do(Args) ->
do_doi() ->
FP = zx:get_home() ++ "/priv/doi.txt",
Cmd = "less " ++ FP,
io:format("~s~n", [Cmd]).
page_file(FP).
% thank you chatgpt
% os:cmd didnt do nuffin because that's for running
% stuff in the background and capturing the output, not
% for taking over the screen
page_file(FilePath) ->
Less = os:find_executable("less"),
case Less of
false -> cat_file(FilePath);
_ -> less_file(Less, FilePath)
end.
cat_file(FilePath) ->
{ok, Bytes} = file:read_file(FilePath),
io:format("~ts", [Bytes]).
less_file(Less, FilePath) ->
Port = open_port({spawn_executable, Less},
[{args, [FilePath]},
nouse_stdio, exit_status]),
receive
{Port, {exit_status, 0}} ->
ok;
{Port, {exit_status, N}} ->
error({less_exit_status, N});
{'EXIT', Port, Reason} ->
error(Reason)
end.
do_tests() ->
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
@@ -134,7 +161,7 @@ do_eshell() ->
end.
tokenizers_agree(File) ->
so_tokens(File) =:= tokens(File).
gso_tokens(File) =:= so_tokens(File).
do_tokens(FilePath) ->