#!/usr/bin/env escript % Takes an input file, shows where non-ascii characters are % % Written by Peter Harpending, 2026-03-05 % % Copyright (c) 2026 QPQ AG % % Permission is hereby granted, free of charge, to any person obtaining a copy % of this software and associated documentation files (the "Software"), to deal % in the Software without restriction, including without limitation the rights % to use, copy, modify, merge, publish, distribute, sublicense, and/or sell % copies of the Software, and to permit persons to whom the Software is % furnished to do so, subject to the following conditions: % % The above copyright notice and this permission notice shall be included in % all copies or substantial portions of the Software. % % THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR % IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, % FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE % AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER % LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, % OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE % SOFTWARE. -mode(compile). help() -> io:format("USAGE: ununicode Filename~n" "~n" "Example:~n" " ununicode foo.md | less -RS~n", []). main([Filename]) -> case file:read_file(Filename) of {ok, Bytes} -> Lines = string:split(Bytes, "\n", all), go(Lines, 1, []); Error -> io:format("ERROR reading file ~p: ~tp~n", [Filename, Error]), help() end; main(_) -> help(). go([Line | Rest], LineNum, Badnesses) -> NewAcc = case bad(Line) of true -> [{LineNum, Line} | Badnesses]; false -> Badnesses end, go(Rest, LineNum + 1, NewAcc); go([], _, Badness) -> punish(Badness). bad(<<0:1, _:7, Rest/bytes>>) -> bad(Rest); bad(<<1:1, _:7, _/bytes>>) -> true; bad(<<>>) -> false. % tabulate punish([]) -> io:format("no badness found~n"); punish(Bads = [{MaxLineNum, _} | _]) -> punish(ndigits(MaxLineNum), lists:reverse(Bads)). punish(NDigits, [{Linum, Line} | Rest]) -> Print = [format_digits(NDigits, Linum), "\t", highlight_bad(Line)], io:format("~ts~n", [Print]), punish(NDigits, Rest); punish(_, []) -> ok. highlight_bad(Line) -> hlb(Line, []). hlb(<<0:1, A:7, Rest/bytes>>, Acc) -> hlb(Rest, [Acc, <<0:1, A:7>>]); hlb(<<(2#110):3, A:5, B:8, Rest/bytes>>, Acc) -> hlb(Rest, [Acc, red(<<(2#110):3, A:5, B:8>>)]); hlb(<<(2#1110):4, A:4, B:8, C:8, Rest/bytes>>, Acc) -> hlb(Rest, [Acc, red(<<(2#1110):4, A:4, B:8, C:8>>)]); hlb(<<(2#11110):5, A:3, B:8, C:8, D:8, Rest/bytes>>, Acc) -> hlb(Rest, [Acc, red(<<(2#11110):5, A:3, B:8, C:8, D:8>>)]); hlb(<<>>, Acc) -> Acc. red(String) -> ["\e[7;31m", String, "\e[0m"]. ndigits(N) -> round(math:floor(math:log10(N)) + 1). format_digits(Width, N) -> NSpaces = Width - ndigits(N), [spaces(NSpaces), integer_to_list(N)]. spaces(N) when N >= 1 -> [" " | spaces(N - 1)]; spaces(_) -> "".