all the old tests pass... moving on
This commit is contained in:
@@ -116,11 +116,10 @@ tokstr_concat_test_() ->
|
||||
|
||||
concat_property(FileName, FilePath) ->
|
||||
%?debugFmt("concat_property(~p, _)", [FileName]),
|
||||
{ok, FileBytes} = file:read_file(FilePath),
|
||||
FileChars = unicode:characters_to_nfc_list(FileBytes),
|
||||
FileChars = gsc:very_stable_file(FilePath),
|
||||
{FileName ++ ": file = sum(tokens)",
|
||||
fun() ->
|
||||
case gsc_tokenizer:tokens(FileChars) of
|
||||
case gsc:tokens_from_file(FileChars) of
|
||||
{ok, SfcTokens} ->
|
||||
ConcatStr = concat_token_strs(SfcTokens, []),
|
||||
?assertEqual(FileChars, ConcatStr);
|
||||
@@ -139,13 +138,15 @@ div_test_() ->
|
||||
% divergence
|
||||
DivFiles = div_files(),
|
||||
%?debugFmt("DivFiles=~p", [DivFiles]),
|
||||
{"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}.
|
||||
{"claude tokenizer divergences fixed",
|
||||
[tokens_match(N, P) || {N, P} <- DivFiles]}.
|
||||
|
||||
tokens_match(FileName, FilePath) ->
|
||||
%?debugFmt("tokens_match(~p, _)", [FileName]),
|
||||
% extracting data to be tested
|
||||
SoTokens = gsc_cli:so_tokens(FilePath),
|
||||
SfTokens = gsc_cli:gso_tokens(FilePath),
|
||||
% i hate this so much but lazy and this is test code so who really cares.
|
||||
SoTokens = so_tokens_from_file(FilePath),
|
||||
SfTokens = gsc:gso_tokens_from_file(FilePath),
|
||||
{FileName ++ ": tokenizers_agree",
|
||||
fun() ->
|
||||
case {SoTokens, SfTokens} of
|
||||
@@ -155,3 +156,10 @@ tokens_match(FileName, FilePath) ->
|
||||
{{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded")
|
||||
end
|
||||
end}.
|
||||
|
||||
% that's right, we have to enter via converting the
|
||||
% bytes in the file to a list... lol
|
||||
so_tokens_from_file(F) ->
|
||||
{ok, Bytes} = file:read_file(F),
|
||||
S = binary_to_list(Bytes),
|
||||
so_scan:scan(S).
|
||||
|
||||
+30
-3
@@ -75,8 +75,35 @@ do(Args) ->
|
||||
|
||||
do_doi() ->
|
||||
FP = zx:get_home() ++ "/priv/doi.txt",
|
||||
Cmd = "less " ++ FP,
|
||||
io:format("~s~n", [Cmd]).
|
||||
page_file(FP).
|
||||
|
||||
% thank you chatgpt
|
||||
% os:cmd didnt do nuffin because that's for running
|
||||
% stuff in the background and capturing the output, not
|
||||
% for taking over the screen
|
||||
page_file(FilePath) ->
|
||||
Less = os:find_executable("less"),
|
||||
case Less of
|
||||
false -> cat_file(FilePath);
|
||||
_ -> less_file(Less, FilePath)
|
||||
end.
|
||||
|
||||
cat_file(FilePath) ->
|
||||
{ok, Bytes} = file:read_file(FilePath),
|
||||
io:format("~ts", [Bytes]).
|
||||
|
||||
less_file(Less, FilePath) ->
|
||||
Port = open_port({spawn_executable, Less},
|
||||
[{args, [FilePath]},
|
||||
nouse_stdio, exit_status]),
|
||||
receive
|
||||
{Port, {exit_status, 0}} ->
|
||||
ok;
|
||||
{Port, {exit_status, N}} ->
|
||||
error({less_exit_status, N});
|
||||
{'EXIT', Port, Reason} ->
|
||||
error(Reason)
|
||||
end.
|
||||
|
||||
do_tests() ->
|
||||
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
|
||||
@@ -134,7 +161,7 @@ do_eshell() ->
|
||||
end.
|
||||
|
||||
tokenizers_agree(File) ->
|
||||
so_tokens(File) =:= tokens(File).
|
||||
gso_tokens(File) =:= so_tokens(File).
|
||||
|
||||
|
||||
do_tokens(FilePath) ->
|
||||
|
||||
Reference in New Issue
Block a user