diff --git a/snippets/ununicode b/snippets/ununicode new file mode 100755 index 0000000..29e30da --- /dev/null +++ b/snippets/ununicode @@ -0,0 +1,105 @@ +#!/usr/bin/env escript + +% Takes an input file, shows where non-ascii characters are +% +% Written by Peter Harpending, 2026-03-05 +% +% Copyright (c) 2026 QPQ AG +% +% Permission is hereby granted, free of charge, to any person obtaining a copy +% of this software and associated documentation files (the "Software"), to deal +% in the Software without restriction, including without limitation the rights +% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the Software is +% furnished to do so, subject to the following conditions: +% +% The above copyright notice and this permission notice shall be included in +% all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +% SOFTWARE. + +-mode(compile). + +help() -> + io:format("USAGE: ununicode Filename~n" + "~n" + "Example:~n" + " ununicode foo.md | less -RS~n", + []). + + +main([Filename]) -> + case file:read_file(Filename) of + {ok, Bytes} -> + Lines = string:split(Bytes, "\n", all), + go(Lines, 1, []); + Error -> + io:format("ERROR reading file ~p: ~tp~n", [Filename, Error]), + help() + end; +main(_) -> + help(). + + +go([Line | Rest], LineNum, Badnesses) -> + NewAcc = + case bad(Line) of + true -> [{LineNum, Line} | Badnesses]; + false -> Badnesses + end, + go(Rest, LineNum + 1, NewAcc); +go([], _, Badness) -> + punish(Badness). + + +bad(<<0:1, _:7, Rest/bytes>>) -> bad(Rest); +bad(<<1:1, _:7, _/bytes>>) -> true; +bad(<<>>) -> false. + +% tabulate +punish([]) -> + io:format("no badness found~n"); +punish(Bads = [{MaxLineNum, _} | _]) -> + punish(ndigits(MaxLineNum), lists:reverse(Bads)). + + +punish(NDigits, [{Linum, Line} | Rest]) -> + Print = [format_digits(NDigits, Linum), "\t", highlight_bad(Line)], + io:format("~ts~n", [Print]), + punish(NDigits, Rest); +punish(_, []) -> + ok. + + +highlight_bad(Line) -> + hlb(Line, []). + +hlb(<<0:1, A:7, Rest/bytes>>, Acc) -> + hlb(Rest, [Acc, <<0:1, A:7>>]); +hlb(<<(2#110):3, A:5, B:8, Rest/bytes>>, Acc) -> + hlb(Rest, [Acc, red(<<(2#110):3, A:5, B:8>>)]); +hlb(<<(2#1110):4, A:4, B:8, C:8, Rest/bytes>>, Acc) -> + hlb(Rest, [Acc, red(<<(2#1110):4, A:4, B:8, C:8>>)]); +hlb(<<(2#11110):5, A:3, B:8, C:8, D:8, Rest/bytes>>, Acc) -> + hlb(Rest, [Acc, red(<<(2#11110):5, A:3, B:8, C:8, D:8>>)]); +hlb(<<>>, Acc) -> + Acc. + +red(String) -> + ["\e[7;31m", String, "\e[0m"]. + +ndigits(N) -> + round(math:floor(math:log10(N)) + 1). + +format_digits(Width, N) -> + NSpaces = Width - ndigits(N), + [spaces(NSpaces), integer_to_list(N)]. + +spaces(N) when N >= 1 -> [" " | spaces(N - 1)]; +spaces(_) -> "".