all the old tests pass... moving on
This commit is contained in:
@@ -116,11 +116,10 @@ tokstr_concat_test_() ->
|
|||||||
|
|
||||||
concat_property(FileName, FilePath) ->
|
concat_property(FileName, FilePath) ->
|
||||||
%?debugFmt("concat_property(~p, _)", [FileName]),
|
%?debugFmt("concat_property(~p, _)", [FileName]),
|
||||||
{ok, FileBytes} = file:read_file(FilePath),
|
FileChars = gsc:very_stable_file(FilePath),
|
||||||
FileChars = unicode:characters_to_nfc_list(FileBytes),
|
|
||||||
{FileName ++ ": file = sum(tokens)",
|
{FileName ++ ": file = sum(tokens)",
|
||||||
fun() ->
|
fun() ->
|
||||||
case gsc_tokenizer:tokens(FileChars) of
|
case gsc:tokens_from_file(FileChars) of
|
||||||
{ok, SfcTokens} ->
|
{ok, SfcTokens} ->
|
||||||
ConcatStr = concat_token_strs(SfcTokens, []),
|
ConcatStr = concat_token_strs(SfcTokens, []),
|
||||||
?assertEqual(FileChars, ConcatStr);
|
?assertEqual(FileChars, ConcatStr);
|
||||||
@@ -139,13 +138,15 @@ div_test_() ->
|
|||||||
% divergence
|
% divergence
|
||||||
DivFiles = div_files(),
|
DivFiles = div_files(),
|
||||||
%?debugFmt("DivFiles=~p", [DivFiles]),
|
%?debugFmt("DivFiles=~p", [DivFiles]),
|
||||||
{"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}.
|
{"claude tokenizer divergences fixed",
|
||||||
|
[tokens_match(N, P) || {N, P} <- DivFiles]}.
|
||||||
|
|
||||||
tokens_match(FileName, FilePath) ->
|
tokens_match(FileName, FilePath) ->
|
||||||
%?debugFmt("tokens_match(~p, _)", [FileName]),
|
%?debugFmt("tokens_match(~p, _)", [FileName]),
|
||||||
% extracting data to be tested
|
% extracting data to be tested
|
||||||
SoTokens = gsc_cli:so_tokens(FilePath),
|
% i hate this so much but lazy and this is test code so who really cares.
|
||||||
SfTokens = gsc_cli:gso_tokens(FilePath),
|
SoTokens = so_tokens_from_file(FilePath),
|
||||||
|
SfTokens = gsc:gso_tokens_from_file(FilePath),
|
||||||
{FileName ++ ": tokenizers_agree",
|
{FileName ++ ": tokenizers_agree",
|
||||||
fun() ->
|
fun() ->
|
||||||
case {SoTokens, SfTokens} of
|
case {SoTokens, SfTokens} of
|
||||||
@@ -155,3 +156,10 @@ tokens_match(FileName, FilePath) ->
|
|||||||
{{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded")
|
{{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded")
|
||||||
end
|
end
|
||||||
end}.
|
end}.
|
||||||
|
|
||||||
|
% that's right, we have to enter via converting the
|
||||||
|
% bytes in the file to a list... lol
|
||||||
|
so_tokens_from_file(F) ->
|
||||||
|
{ok, Bytes} = file:read_file(F),
|
||||||
|
S = binary_to_list(Bytes),
|
||||||
|
so_scan:scan(S).
|
||||||
|
|||||||
+30
-3
@@ -75,8 +75,35 @@ do(Args) ->
|
|||||||
|
|
||||||
do_doi() ->
|
do_doi() ->
|
||||||
FP = zx:get_home() ++ "/priv/doi.txt",
|
FP = zx:get_home() ++ "/priv/doi.txt",
|
||||||
Cmd = "less " ++ FP,
|
page_file(FP).
|
||||||
io:format("~s~n", [Cmd]).
|
|
||||||
|
% thank you chatgpt
|
||||||
|
% os:cmd didnt do nuffin because that's for running
|
||||||
|
% stuff in the background and capturing the output, not
|
||||||
|
% for taking over the screen
|
||||||
|
page_file(FilePath) ->
|
||||||
|
Less = os:find_executable("less"),
|
||||||
|
case Less of
|
||||||
|
false -> cat_file(FilePath);
|
||||||
|
_ -> less_file(Less, FilePath)
|
||||||
|
end.
|
||||||
|
|
||||||
|
cat_file(FilePath) ->
|
||||||
|
{ok, Bytes} = file:read_file(FilePath),
|
||||||
|
io:format("~ts", [Bytes]).
|
||||||
|
|
||||||
|
less_file(Less, FilePath) ->
|
||||||
|
Port = open_port({spawn_executable, Less},
|
||||||
|
[{args, [FilePath]},
|
||||||
|
nouse_stdio, exit_status]),
|
||||||
|
receive
|
||||||
|
{Port, {exit_status, 0}} ->
|
||||||
|
ok;
|
||||||
|
{Port, {exit_status, N}} ->
|
||||||
|
error({less_exit_status, N});
|
||||||
|
{'EXIT', Port, Reason} ->
|
||||||
|
error(Reason)
|
||||||
|
end.
|
||||||
|
|
||||||
do_tests() ->
|
do_tests() ->
|
||||||
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
|
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
|
||||||
@@ -134,7 +161,7 @@ do_eshell() ->
|
|||||||
end.
|
end.
|
||||||
|
|
||||||
tokenizers_agree(File) ->
|
tokenizers_agree(File) ->
|
||||||
so_tokens(File) =:= tokens(File).
|
gso_tokens(File) =:= so_tokens(File).
|
||||||
|
|
||||||
|
|
||||||
do_tokens(FilePath) ->
|
do_tokens(FilePath) ->
|
||||||
|
|||||||
Reference in New Issue
Block a user