Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
56e63051bc | ||
|
|
3f1c9bd626 | ||
|
|
97e32574c4 | ||
|
|
6f5525afcf | ||
|
|
4f1958b210 | ||
|
|
3da9bd570b | ||
|
|
d2163c1ff8 |
1092
src/hz.erl
1092
src/hz.erl
File diff suppressed because it is too large
Load Diff
1186
src/hz_aaci.erl
Normal file
1186
src/hz_aaci.erl
Normal file
File diff suppressed because it is too large
Load Diff
267
src/hz_sophia.erl
Normal file
267
src/hz_sophia.erl
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
-module(hz_sophia).
|
||||||
|
-vsn("0.8.2").
|
||||||
|
-author("Jarvis Carroll <spiveehere@gmail.com>").
|
||||||
|
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
||||||
|
-license("GPL-3.0-or-later").
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
|
parse_literal(Type, String) ->
|
||||||
|
case parse_expression(Type, {tk, 1, 1}, String) of
|
||||||
|
{ok, {Result, NewTk, NewString}} ->
|
||||||
|
parse_literal2(Result, NewTk, NewString);
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_literal2(Result, Tk, String) ->
|
||||||
|
% We have parsed a valid expression. Now check that the string ends.
|
||||||
|
case next_token(Tk, String) of
|
||||||
|
{ok, {{eof, _, _, _, _}, _, _}} ->
|
||||||
|
{ok, Result};
|
||||||
|
{ok, {{_, S, Row, Start, End}, _, _}} ->
|
||||||
|
{error, {unexpected_token, S, Row, Start, End}};
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%% Tokenizer
|
||||||
|
|
||||||
|
next_token({tk, Row, Col}, []) ->
|
||||||
|
{ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}};
|
||||||
|
next_token({tk, Row, Col}, " " ++ Rest) ->
|
||||||
|
next_token({tk, Row + 1, Col}, Rest);
|
||||||
|
next_token({tk, Row, Col}, "\t" ++ Rest) ->
|
||||||
|
next_token({tk, Row + 1, Col}, Rest);
|
||||||
|
next_token(Tk, [N | _] = String) when N >= $0, N =< $9 ->
|
||||||
|
num_token(Tk, Tk, String, []);
|
||||||
|
next_token(Tk, [N | _] = String) when N >= $A, N =< $Z ->
|
||||||
|
alphanum_token(Tk, Tk, String, []);
|
||||||
|
next_token(Tk, [N | _] = String) when N >= $a, N =< $z ->
|
||||||
|
alphanum_token(Tk, Tk, String, []);
|
||||||
|
next_token(Tk, [$_ | _] = String) ->
|
||||||
|
alphanum_token(Tk, Tk, String, []);
|
||||||
|
next_token({tk, Row, Col}, [Char | Rest]) ->
|
||||||
|
Token = {character, [Char], Row, Col, Col},
|
||||||
|
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
||||||
|
|
||||||
|
num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||||
|
num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||||
|
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||||
|
NumString = lists:reverse(Acc),
|
||||||
|
Token = {integer, NumString, Row, Start, End},
|
||||||
|
{ok, {Token, {tk, Row, End}, String}}.
|
||||||
|
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z ->
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z ->
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $0, C =< $9 ->
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, [$_ | Rest], Acc) ->
|
||||||
|
alphanum_token(Start, {tk, Row, Col}, Rest, [$_ | Acc]);
|
||||||
|
alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||||
|
AlphaString = lists:reverse(Acc),
|
||||||
|
Token = {alphanum, AlphaString, Row, Start, End},
|
||||||
|
{ok, {Token, {tk, Row, End}, String}}.
|
||||||
|
|
||||||
|
|
||||||
|
%%% Sophia Literal Parser
|
||||||
|
|
||||||
|
%%% This parser is a simple recursive descent parser, written explicitly in
|
||||||
|
%%% erlang.
|
||||||
|
%%%
|
||||||
|
%%% There are no infix operators in the subset we want to parse, so recursive
|
||||||
|
%%% descent is fine with no special tricks, no shunting yard algorithm, no
|
||||||
|
%%% parser generators, etc.
|
||||||
|
%%%
|
||||||
|
%%% If we were writing this in C then we might want to work iteratively with an
|
||||||
|
%%% array of finite state machines, i.e. with a pushdown automaton, instead of
|
||||||
|
%%% using recursion. This is a tried and true method of making fast parsers.
|
||||||
|
%%% Recall, however, that the BEAM *is* a stack machine, written in C, so
|
||||||
|
%%% rather than writing confusing iterative code in Erlang, to simulate a
|
||||||
|
%%% pushdown automaton inside another simulated stack machine... we should just
|
||||||
|
%%% write the recursive code, thus programming the BEAM to implement the
|
||||||
|
%%% pushdown automaton that we want.
|
||||||
|
|
||||||
|
parse_expression(Type, Tk, String) ->
|
||||||
|
{ok, {Token, NewTk, NewString}} = next_token(Tk, String),
|
||||||
|
parse_expression2(Type, NewTk, NewString, Token).
|
||||||
|
|
||||||
|
parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
|
||||||
|
Value = list_to_integer(S),
|
||||||
|
case Type of
|
||||||
|
{_, _, integer} ->
|
||||||
|
{ok, {Value, Tk, String}};
|
||||||
|
{_, _, unknown_type} ->
|
||||||
|
{ok, {Value, Tk, String}};
|
||||||
|
{O, N, _} ->
|
||||||
|
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||||
|
end;
|
||||||
|
parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) ->
|
||||||
|
parse_list(Type, Tk, String, Row, Start);
|
||||||
|
parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) ->
|
||||||
|
parse_record_or_map(Type, Tk, String, Row, Start);
|
||||||
|
parse_expression2(_, _, _, {_, S, Row, Start, End}) ->
|
||||||
|
{error, {unexpected_token, S, Row, Start, End}}.
|
||||||
|
|
||||||
|
unknown_type() ->
|
||||||
|
{unknown_type, already_normalized, unknown_type}.
|
||||||
|
|
||||||
|
expect_tokens([], Tk, String) ->
|
||||||
|
{ok, {Tk, String}};
|
||||||
|
expect_tokens([Str | Rest], Tk, String) ->
|
||||||
|
case next_token(Tk, String) of
|
||||||
|
{ok, {{_, Str, _, _, _}, NewTk, NewString}} ->
|
||||||
|
expect_tokens(Rest, NewTk, NewString);
|
||||||
|
{ok, {{_, Actual, Row, Start, End}}} ->
|
||||||
|
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%% List Parsing
|
||||||
|
|
||||||
|
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
|
||||||
|
parse_list_loop(Inner, Tk, String, Row, Start, []);
|
||||||
|
parse_list({_, _, unknown_type}, Tk, String, Row, Start) ->
|
||||||
|
parse_list_loop(unknown_type(), Tk, String, Row, Start, []);
|
||||||
|
parse_list({O, N, _}, _, _, Row, Start) ->
|
||||||
|
{error, {wrong_type, O, N, list, Row, Start, Start}}.
|
||||||
|
|
||||||
|
parse_list_loop(Inner, Tk, String, Row, Start, Acc) ->
|
||||||
|
case next_token(Tk, String) of
|
||||||
|
{ok, {{character, "]", _, _, _}, NewTk, NewString}} ->
|
||||||
|
{ok, {lists:reverse(Acc), NewTk, NewString}};
|
||||||
|
{ok, {Token, NewTk, NewString}} ->
|
||||||
|
parse_list_loop2(Inner, NewTk, NewString, Row, Start, Acc, Token)
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_list_loop2(Inner, Tk, String, Row, Start, Acc, Token) ->
|
||||||
|
case parse_expression2(Inner, Tk, String, Token) of
|
||||||
|
{ok, {Value, NewTk, NewString}} ->
|
||||||
|
parse_list_loop3(Inner, NewTk, NewString, Row, Start, [Value | Acc]);
|
||||||
|
{error, Reason} ->
|
||||||
|
Wrapped = wrap_error(Reason, {list_element, length(Acc)}),
|
||||||
|
{error, Wrapped}
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_list_loop3(Inner, Tk, String, Row, Start, Acc) ->
|
||||||
|
case next_token(Tk, String) of
|
||||||
|
{ok, {{character, "]", _, _, _}, NewTk, NewString}} ->
|
||||||
|
{ok, {lists:reverse(Acc), NewTk, NewString}};
|
||||||
|
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
|
||||||
|
parse_list_loop(Inner, NewTk, NewString, Row, Start, Acc);
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%% Record parsing
|
||||||
|
|
||||||
|
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) ->
|
||||||
|
parse_map(KeyType, ValueType, Tk, String, #{});
|
||||||
|
parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) ->
|
||||||
|
parse_record(Fields, Tk, String);
|
||||||
|
parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) ->
|
||||||
|
case next_token(Tk, String) of
|
||||||
|
{ok, {{character, "}", _, _, _}, NewTk, NewString}} ->
|
||||||
|
{ok, {#{}, NewTk, NewString}};
|
||||||
|
{ok, {{character, "[", _, _, _}, NewTk, NewString}} ->
|
||||||
|
parse_map2(unknown_type(), unknown_type(), NewTk, NewString, #{});
|
||||||
|
{ok, {{alphanum, _, Row, Start, End}, _, _}} ->
|
||||||
|
{error, {unresolved_record, Row, Start, End}};
|
||||||
|
{ok, {{_, S, Row, Start, End}, _, _}} ->
|
||||||
|
{error, {unexpected_token, S, Row, Start, End}}
|
||||||
|
end;
|
||||||
|
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
|
||||||
|
{error, {wrong_type, O, N, map, Row, Start, Start}}.
|
||||||
|
|
||||||
|
parse_record(Fields, Tk, String) ->
|
||||||
|
{error, not_yet_implemented}.
|
||||||
|
|
||||||
|
%%% Map Parsing
|
||||||
|
|
||||||
|
parse_map(KeyType, ValueType, Tk, String, Acc) ->
|
||||||
|
case next_token(Tk, String) of
|
||||||
|
{ok, {{character, "[", _, _, _}, NewTk, NewString}} ->
|
||||||
|
parse_map2(KeyType, ValueType, NewTk, NewString, Acc);
|
||||||
|
{ok, {{character, "}", _, _, _}, NewTk, NewString}} ->
|
||||||
|
{ok, {Acc, NewTk, NewString}};
|
||||||
|
{ok, {{_, S, Row, Start, End}}} ->
|
||||||
|
{error, {unexpected_token, S, Row, Start, End}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_map2(KeyType, ValueType, Tk, String, Acc) ->
|
||||||
|
case parse_expression(KeyType, Tk, String) of
|
||||||
|
{ok, {Result, NewTk, NewString}} ->
|
||||||
|
parse_map3(KeyType, ValueType, NewTk, NewString, Acc, Result);
|
||||||
|
{error, Reason} ->
|
||||||
|
wrap_error(Reason, {map_key, maps:size(Acc)})
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_map3(KeyType, ValueType, Tk, String, Acc, Key) ->
|
||||||
|
case expect_tokens(["]", "="], Tk, String) of
|
||||||
|
{ok, {NewTk, NewString}} ->
|
||||||
|
parse_map4(KeyType, ValueType, NewTk, NewString, Acc, Key);
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_map4(KeyType, ValueType, Tk, String, Acc, Key) ->
|
||||||
|
case parse_expression(ValueType, Tk, String) of
|
||||||
|
{ok, {Result, NewTk, NewString}} ->
|
||||||
|
NewAcc = maps:put(Key, Result, Acc),
|
||||||
|
parse_map5(KeyType, ValueType, NewTk, NewString, NewAcc);
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_map5(KeyType, ValueType, Tk, String, Acc) ->
|
||||||
|
case next_token(Tk, String) of
|
||||||
|
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
|
||||||
|
parse_map(KeyType, ValueType, NewTk, NewString, Acc);
|
||||||
|
{ok, {{character, "}", _, _, _}, NewTk, NewString}} ->
|
||||||
|
{ok, {Acc, NewTk, NewString}};
|
||||||
|
{ok, {{_, S, Row, Start, End}}} ->
|
||||||
|
{error, {unexpected_token, S, Row, Start, End}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
% TODO
|
||||||
|
wrap_error(Reason, _) -> Reason.
|
||||||
|
|
||||||
|
%%% Tests
|
||||||
|
|
||||||
|
check_sophia_to_fate(Type, Sophia, Fate) ->
|
||||||
|
{ok, FateActual} = parse_literal(Type, Sophia),
|
||||||
|
case FateActual of
|
||||||
|
Fate ->
|
||||||
|
ok;
|
||||||
|
_ ->
|
||||||
|
erlang:error({to_fate_failed, Fate, FateActual})
|
||||||
|
end.
|
||||||
|
|
||||||
|
check_parser(Type, Sophia, Fate) ->
|
||||||
|
check_sophia_to_fate(Type, Sophia, Fate),
|
||||||
|
check_sophia_to_fate(unknown_type(), Sophia, Fate),
|
||||||
|
|
||||||
|
% Finally, check that the FATE result is something that gmb understands.
|
||||||
|
gmb_fate_encoding:serialize(Fate),
|
||||||
|
|
||||||
|
ok.
|
||||||
|
|
||||||
|
check_parser(Sophia, Fate) ->
|
||||||
|
Source = "contract C = entrypoint f() = " ++ Sophia,
|
||||||
|
{ok, AACI} = hz_aaci:aaci_from_string(Source),
|
||||||
|
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
|
||||||
|
check_parser(Type, Sophia, Fate).
|
||||||
|
|
||||||
|
int_test() ->
|
||||||
|
check_parser("123", 123).
|
||||||
|
|
||||||
|
list_test() ->
|
||||||
|
check_parser("[1, 2, 3]", [1, 2, 3]).
|
||||||
|
|
||||||
|
list_of_lists_test() ->
|
||||||
|
check_parser("[[], [1], [2, 3]]", [[], [1], [2, 3]]).
|
||||||
|
|
||||||
|
maps_test() ->
|
||||||
|
check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}).
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user