Any error reasons or paths are just term() still, and ACI doesn't have a defined spec in the compiler, so whatever, but the AACI types, the erlang representation of terms, and the four different kinds of coerce function are all spec'd now. Also some internal type substitution functions were given types, just in the hopes of catching some errors, but dyalizer doesn't seem to complain at all no matter how badly I break my code. Strange approach to making a type system, but oh well.
1460 lines
60 KiB
Erlang
1460 lines
60 KiB
Erlang
-module(hz_sophia).
|
|
-vsn("0.8.2").
|
|
-author("Jarvis Carroll <spiveehere@gmail.com>").
|
|
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
|
-license("GPL-3.0-or-later").
|
|
|
|
-export([parse_literal/1, parse_literal/2]).
|
|
-export([fate_to_list/1, fate_to_list/2, fate_to_iolist/1, fate_to_iolist/2]).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
-spec parse_literal(Sophia) -> {ok, FATE} | {error, Reason}
|
|
when Sophia :: string(),
|
|
FATE :: gmb_fate_data:fate_type(),
|
|
Reason :: term().
|
|
|
|
parse_literal(String) ->
|
|
parse_literal(unknown_type(), String).
|
|
|
|
-spec parse_literal(Type, Sophia) -> {ok, FATE} | {error, Reason}
|
|
when Type :: hz_aaci:annotated_type(),
|
|
Sophia :: string(),
|
|
FATE :: gmb_fate_data:fate_type(),
|
|
Reason :: term().
|
|
|
|
parse_literal(Type, String) ->
|
|
case parse_expression(Type, {1, 1}, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
parse_literal2(Result, NewPos, NewString);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_literal2(Result, Pos, String) ->
|
|
% We have parsed a valid expression. Now check that the string ends.
|
|
case next_token(Pos, String) of
|
|
{ok, {{eof, _, _, _, _, _}, _, _}} ->
|
|
{ok, Result};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
%%% Tokenizer
|
|
|
|
-define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
|
|
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
|
|
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
|
|
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
|
|
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))).
|
|
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
|
|
|
|
next_token({Row, Col}, []) ->
|
|
{ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}};
|
|
next_token({Row, Col}, " " ++ Rest) ->
|
|
next_token({Row, Col + 1}, Rest);
|
|
next_token({Row, Col}, "\t" ++ Rest) ->
|
|
next_token({Row, Col + 1}, Rest);
|
|
next_token({Row, _}, "\r\n" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token({Row, _}, "\r" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token({Row, _}, "\n" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) ->
|
|
alphanum_token(Pos, Pos, String, []);
|
|
next_token(Pos, [C | _] = String) when ?IS_NUM(C) ->
|
|
num_token(Pos, Pos, String, [], 0);
|
|
next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
|
bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []);
|
|
next_token({Row, Col}, "\"" ++ Rest) ->
|
|
string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>);
|
|
next_token({Row, Col}, "'" ++ Rest) ->
|
|
character_token({Row, Col}, {Row, Col + 1}, Rest, "'");
|
|
next_token({Row, Col}, [Char | Rest]) ->
|
|
Token = {character, [Char], Char, Row, Col, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}}.
|
|
|
|
alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
|
|
alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]);
|
|
alphanum_token({_, Start}, {Row, End}, String, Acc) ->
|
|
AlphaString = lists:reverse(Acc),
|
|
Path = string:split(AlphaString, ".", all),
|
|
Token = {alphanum, AlphaString, Path, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
|
NewValue = Value * 10 + (C - $0),
|
|
num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue);
|
|
num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
|
NewValue = Value * 10 + (C - $0),
|
|
num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue);
|
|
num_token({_, Start}, {Row, End}, String, Chars, Value) ->
|
|
NumString = lists:reverse(Chars),
|
|
Token = {integer, NumString, Value, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
|
Digit = convert_digit(C),
|
|
bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]);
|
|
bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
|
Digit = convert_digit(C),
|
|
bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]);
|
|
bytes_token({_, Start}, {Row, End}, String, Chars, Digits) ->
|
|
BytesString = lists:reverse(Chars),
|
|
Value = reverse_combine_nibbles(Digits, <<>>),
|
|
Token = {bytes, BytesString, Value, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
convert_digit(C) when C >= $0, C =< $9 ->
|
|
C - $0;
|
|
convert_digit(C) when C >= $A, C =< $Z ->
|
|
C - $A + 10;
|
|
convert_digit(C) when C >= $a, C =< $z ->
|
|
C - $a + 10.
|
|
|
|
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
|
NewAcc = <<D2:4, D1:4, Acc/binary>>,
|
|
reverse_combine_nibbles(Rest, NewAcc);
|
|
reverse_combine_nibbles([D1], Acc) ->
|
|
<<0:4, D1:4, Acc/binary>>;
|
|
reverse_combine_nibbles([], Acc) ->
|
|
Acc.
|
|
|
|
string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) ->
|
|
SourceStr = lists:reverse([$" | SourceChars]),
|
|
Token = {string, SourceStr, Value, Row, Start, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}};
|
|
string_token({_, Start}, {Row, Col}, [], SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
|
string_token({_, Start}, {Row, Col}, [$\r | _], SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
|
string_token({_, Start}, {Row, Col}, [$\n | _], SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
|
string_token(Start, Pos, String, SourceChars, Value) ->
|
|
case parse_char(Start, Pos, String, SourceChars) of
|
|
{ok, {Char, NewSourceChars, NewPos, NewString}} ->
|
|
% TODO: ERTS probably had to convert this FROM utf8 at some point,
|
|
% so why bother, if we need to convert it back? I guess we could
|
|
% accept iolists if we really wanted to waste time on this point...
|
|
NewValue = <<Value/binary, Char/utf8>>,
|
|
string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
character_token({_, Start}, {Row, Col}, [], SourceChars) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
|
character_token({_, Start}, {Row, Col}, [$\r | _], SourceChars) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
|
character_token({_, Start}, {Row, Col}, [$\n | _], SourceChars) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
|
character_token(Start, Pos, String, SourceChars) ->
|
|
case parse_char(Start, Pos, String, SourceChars) of
|
|
{ok, {Char, NewSourceChars, NewPos, NewString}} ->
|
|
character_token2(Start, NewPos, NewString, NewSourceChars, Char);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
character_token2({_, Start}, {Row, Col}, [$' | Rest], SourceChars, Value) ->
|
|
SourceStr = lists:reverse([$' | SourceChars]),
|
|
Token = {char_literal, SourceStr, Value, Row, Start, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}};
|
|
character_token2({_, Start}, {Row, Col}, _, SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}.
|
|
|
|
parse_char(Start, {Row, Col}, "\\x{" ++ String, SourceChars) ->
|
|
escape_long_hex_code(Start, {Row, Col + 3}, String, "{x\\" ++ SourceChars, 0);
|
|
parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
|
|
Byte = convert_digit(A) * 16 + convert_digit(B),
|
|
{ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}};
|
|
parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) ->
|
|
case unescape_char(C) of
|
|
{ok, ByteVal} ->
|
|
{ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}};
|
|
error ->
|
|
{error, {invalid_escape_code, [$\\, C], Row, Start, Col + 1}}
|
|
end;
|
|
parse_char(_, {Row, Col}, [C | Rest], SourceChars) ->
|
|
{ok, {C, [C | SourceChars], {Row, Col + 1}, Rest}}.
|
|
|
|
escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) ->
|
|
{ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}};
|
|
escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
|
|
NewSourceChars = [C | SourceChars],
|
|
NewValue = 16 * Value + convert_digit(C),
|
|
escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue);
|
|
escape_long_hex_code(_, {Row, Col}, [C | _], _, _) ->
|
|
{error, {invalid_hexadecimal, [C], Row, Col}};
|
|
escape_long_hex_code(_, Pos, [], SourceChars, Value) ->
|
|
% Just return as if the escape code were closed, and let the string parser
|
|
% produce an unclosed string error instead.
|
|
{ok, {Value, SourceChars, Pos, []}}.
|
|
|
|
unescape_char($b) -> {ok, $\b};
|
|
unescape_char($e) -> {ok, $\e};
|
|
unescape_char($f) -> {ok, $\f};
|
|
unescape_char($n) -> {ok, $\n};
|
|
unescape_char($r) -> {ok, $\r};
|
|
unescape_char($t) -> {ok, $\t};
|
|
unescape_char($v) -> {ok, $\v};
|
|
% Technically \" and \' are only valid inside their own quote characters, not
|
|
% each other, but whatever, we will just be permissive here.
|
|
unescape_char($") -> {ok, $\"};
|
|
unescape_char($') -> {ok, $\'};
|
|
unescape_char($\\) -> {ok, $\\};
|
|
unescape_char(_) -> error.
|
|
|
|
% Not needed until later, but we'll put it here for symmetry.
|
|
escape_char($\b) -> "\\b";
|
|
escape_char($\e) -> "\\e";
|
|
escape_char($\f) -> "\\f";
|
|
escape_char($\n) -> "\\n";
|
|
escape_char($\r) -> "\\r";
|
|
escape_char($\t) -> "\\t";
|
|
escape_char($\v) -> "\\v";
|
|
escape_char($\") -> "\\\"";
|
|
escape_char($\\) -> "\\\\";
|
|
escape_char(I) -> I.
|
|
|
|
%%% Sophia Literal Parser
|
|
|
|
%%% This parser is a simple recursive descent parser, written explicitly in
|
|
%%% erlang.
|
|
%%%
|
|
%%% There are no infix operators in the subset we want to parse, so recursive
|
|
%%% descent is fine with no special tricks, no shunting yard algorithm, no
|
|
%%% parser generators, etc.
|
|
%%%
|
|
%%% If we were writing this in C then we might want to work iteratively with an
|
|
%%% array of finite state machines, i.e. with a pushdown automaton, instead of
|
|
%%% using recursion. This is a tried and true method of making fast parsers.
|
|
%%% Recall, however, that the BEAM *is* a stack machine, written in C, so
|
|
%%% rather than writing confusing iterative code in Erlang, to simulate a
|
|
%%% pushdown automaton inside another simulated stack machine... we should just
|
|
%%% write the recursive code, thus programming the BEAM to implement the
|
|
%%% pushdown automaton that we want.
|
|
|
|
parse_expression(Type, Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_expression2(Type, NewPos, NewString, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) ->
|
|
typecheck_integer(Type, Pos, String, Value, Row, Start, End);
|
|
parse_expression2(Type, Pos, String, {character, "-", _, _, _, _}) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{integer, _, Value, Row, Start, End}, NewPos, NewString}} ->
|
|
typecheck_integer(Type, NewPos, NewString, -Value, Row, Start, End);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
|
|
Len = byte_size(Value),
|
|
Result = {bytes, Value},
|
|
case Type of
|
|
{_, _, {bytes, [any]}} ->
|
|
{ok, {Result, Pos, String}};
|
|
{_, _, {bytes, [Len]}} ->
|
|
{ok, {Result, Pos, String}};
|
|
{_, _, {bytes, [ExpectedLen]}} ->
|
|
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
|
|
{_, _, bits} ->
|
|
Size = bit_size(Value),
|
|
<<IntValue:Size>> = Value,
|
|
{ok, {{bits, IntValue}, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Result, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
|
|
case Type of
|
|
{_, _, string} ->
|
|
{ok, {Value, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Value, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, string, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {char_literal, _, Value, Row, Start, End}) ->
|
|
case Type of
|
|
{_, _, char} ->
|
|
{ok, {Value, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Value, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, char, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
|
|
parse_list(Type, Pos, String, Row, Start);
|
|
parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) ->
|
|
parse_tuple(Type, Pos, String);
|
|
parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
|
|
parse_record_or_map(Type, Pos, String, Row, Start);
|
|
parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) ->
|
|
parse_alphanum(Type, Pos, String, Path, Row, Start, End);
|
|
parse_expression2(_, _, _, {eof, _, _, _, _, _}) ->
|
|
{error, unexpected_end_of_file};
|
|
parse_expression2(_, _, _, Token) ->
|
|
unexpected_token(Token).
|
|
|
|
unknown_type() ->
|
|
{unknown_type, already_normalized, unknown_type}.
|
|
|
|
expect_tokens([], Pos, String) ->
|
|
{ok, {Pos, String}};
|
|
expect_tokens([Str | Rest], Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{_, Str, _, _, _, _}, NewPos, NewString}} ->
|
|
expect_tokens(Rest, NewPos, NewString);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, Str);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
unexpected_token(Token, _Expected) ->
|
|
% I don't know if this is a good idea, but sometimes there are only one or
|
|
% two tokens that could have worked, which might make for simple
|
|
% non-technical error messages. I don't know how to format that yet,
|
|
% though.
|
|
unexpected_token(Token).
|
|
|
|
unexpected_token({eof, _, _, _, _, _}) ->
|
|
{error, expression_incomplete};
|
|
unexpected_token({_, S, _, Row, Start, End}) ->
|
|
{error, {unexpected_token, S, Row, Start, End}}.
|
|
|
|
%%% Ambiguous Chain Object vs Identifier Parsing
|
|
|
|
parse_alphanum(Type, Pos, String, ["true"], Row, Start, End) ->
|
|
typecheck_bool(Type, Pos, String, true, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, ["false"], Row, Start, End) ->
|
|
typecheck_bool(Type, Pos, String, false, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) ->
|
|
typecheck_bits(Type, Pos, String, -1, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) ->
|
|
typecheck_bits(Type, Pos, String, 0, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
|
|
% From a programming perspective, we are trying to parse a constant, so
|
|
% an alphanum token can really only be a constructor, or a chain object.
|
|
% Constructors start with uppercase characters, so lowercase can only be a
|
|
% chain object.
|
|
try
|
|
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
|
|
{account_pubkey, Data} ->
|
|
typecheck_address(Type, Pos, String, Data, Row, Start, End);
|
|
{contract_pubkey, Data} ->
|
|
typecheck_contract(Type, Pos, String, Data, Row, Start, End);
|
|
{signature, Data} ->
|
|
typecheck_signature(Type, Pos, String, Data, Row, Start, End);
|
|
{_, _} ->
|
|
% Only a few chain objects are recognized by Sophia. The rest
|
|
% are interpreted as identifiers, so we might as well give the
|
|
% same sort of error that the compiler would give.
|
|
{error, {unexpected_identifier, S, Row, Start, End}}
|
|
end
|
|
catch
|
|
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
|
|
end;
|
|
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
|
|
% Inversely, chain object prefixes are always lowercase, so any other path
|
|
% must be a variant constructor, or invalid.
|
|
parse_variant(Type, Pos, String, Path, Row, Start, End).
|
|
|
|
typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_integer({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_integer({_, _, bits}, Pos, String, Value, _, _, _) ->
|
|
{ok, {{bits, Value}, Pos, String}};
|
|
typecheck_integer({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, integer, Row, Start, End}}.
|
|
|
|
typecheck_bool({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_bool({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, boolean, Row, Start, End}}.
|
|
|
|
typecheck_bits({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
|
{ok, {{bits, Value}, Pos, String}};
|
|
typecheck_bits({_, _, bits}, Pos, String, Value, _, _, _) ->
|
|
{ok, {{bits, Value}, Pos, String}};
|
|
typecheck_bits({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, bits, Row, Start, End}}.
|
|
|
|
typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would type error, but we should be lenient here.
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, address, Row, Start, End}}.
|
|
|
|
typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would type error, but we should be lenient here.
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, contract, Row, Start, End}}.
|
|
|
|
typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would probably type-error, but whatever.
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would probably type-error, but whatever.
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, signature, Row, Start, End}}.
|
|
|
|
|
|
%%% List Parsing
|
|
|
|
parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) ->
|
|
parse_list2(Inner, Pos, String);
|
|
parse_list({_, _, unknown_type}, Pos, String, _, _) ->
|
|
parse_list2(unknown_type(), Pos, String);
|
|
parse_list({O, N, _}, _, _, Row, Start) ->
|
|
{error, {wrong_type, O, N, list, Row, Start, Start}}.
|
|
|
|
parse_list2(Inner, Pos, String) ->
|
|
case parse_list_loop(Inner, Pos, String, "]", []) of
|
|
{ok, {Result, _, _, NewPos, NewString}} ->
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_list_loop(Inner, Pos, String, CloseChar, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
|
|
{ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}};
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) ->
|
|
case parse_expression2(Inner, Pos, String, Token) of
|
|
{ok, {Value, NewPos, NewString}} ->
|
|
parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]);
|
|
{error, Reason} ->
|
|
Wrapper = choose_list_error_wrapper(CloseChar),
|
|
% TODO: Are tuple indices off by one from list indices?
|
|
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
|
{error, Wrapped}
|
|
end.
|
|
|
|
parse_list_loop3(Inner, Pos, String, CloseChar, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
|
|
{ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}};
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, CloseChar);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
choose_list_error_wrapper("]") -> list_element;
|
|
choose_list_error_wrapper(")") -> tuple_element.
|
|
|
|
%%% Ambiguous Parenthesis Parsing
|
|
|
|
parse_tuple({_, _, unknown_type}, Pos, String) ->
|
|
% An untyped tuple is a list of untyped terms, and weirdly our list parser
|
|
% works perfectly for that, as long as we change the closing character to
|
|
% be ")" instead of "]".
|
|
case parse_list_loop(unknown_type(), Pos, String, ")", []) of
|
|
{ok, {[Inner], false, _, NewPos, NewString}} ->
|
|
% In Sophia, trailing commas are invalid, and so all singleton
|
|
% tuples are unwrapped, and translated into the inner type.
|
|
{ok, {Inner, NewPos, NewString}};
|
|
{ok, {TermList, _, _, NewPos, NewString}} ->
|
|
Result = {tuple, list_to_tuple(TermList)},
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_tuple(Type, Pos, String) ->
|
|
% Typed tuple parsing is quite complex, because we also want to support
|
|
% normal parentheses for grouping. It's not strictly necessary for
|
|
% inputting data, since we don't have any infix operators in simple
|
|
% data/term notation, but the alternatives are to generate singleton tuples
|
|
% naively, (which are impossible to generate from Sophia,) or to hard error
|
|
% on singleton tuples! Being faithful to Sophia is clearly nice!
|
|
|
|
% Count how many ambiguous parens there are, including the one we already
|
|
% saw.
|
|
case count_open_parens(Pos, String, 1) of
|
|
{ok, {Count, Token, NewPos, NewString}} ->
|
|
% Compare that to the amount of nesting tuple connectives are in
|
|
% the type we are expected to produce.
|
|
{ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []),
|
|
% Now work out what to do with all this information.
|
|
parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
count_open_parens(Pos, String, Count) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
|
|
count_open_parens(NewPos, NewString, Count + 1);
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
{ok, {Count, Token, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 ->
|
|
% Have an open paren, and a tuple type. We need to go deeper!
|
|
extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]);
|
|
extract_tuple_type_info(ParenCount, HeadType, Tails) ->
|
|
% No parens, or no more (non-empty) tuples. Stop!
|
|
{ParenCount, HeadType, Tails}.
|
|
|
|
parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) ->
|
|
{error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
|
|
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) ->
|
|
parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col);
|
|
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) ->
|
|
% Finished with parentheses for now, try and parse an expression out, to
|
|
% get our head term.
|
|
case parse_expression2(HeadType, Pos, String, Token) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
% Got a head term. Now try to build all the other tuple layers.
|
|
parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString);
|
|
{error, Reason} ->
|
|
% TODO: Wrap errors here too.
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_empty_tuple(0, _, Tails, _, _, Row, Col) ->
|
|
% There are zero excess parens, meaning all our parens are tuples. Get the
|
|
% top one.
|
|
[Tail | _] = Tails,
|
|
% We expected some nonzero number of elements before the close paren, but
|
|
% got zero.
|
|
ExpectCount = 1 + length(Tail),
|
|
{error, {not_enough_elements, ExpectCount, 0, Row, Col}};
|
|
parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) ->
|
|
% If we have some ambiguous parentheses left, we now know one of them is
|
|
% this empty tuple.
|
|
HeadTerm = {tuple, {}},
|
|
NewExcessCount = ExcessCount - 1,
|
|
% Now continue the loop as if it were an integer or something, in the head
|
|
% position.
|
|
parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String);
|
|
parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
|
|
% We were expecting a head term of a different type!
|
|
{error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.
|
|
|
|
parse_tuple_tails(0, HeadTerm, [], Pos, String) ->
|
|
% No open parens left, no tuples left to build, we are done!
|
|
{ok, {HeadTerm, Pos, String}};
|
|
parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) ->
|
|
% The ambiguous case, where we have a mix of tuple parens, and grouping
|
|
% parens. We want to peek at the next token, to see if it closes a grouping
|
|
% paren.
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} ->
|
|
% It is grouping! Try closing a grouping paren.
|
|
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
|
|
{ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} ->
|
|
% It is a real tuple! Try parsing a tuple.
|
|
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
|
|
{ok, {Token, _, _}} ->
|
|
% Anything else is just a boring parse error we can complain about.
|
|
unexpected_token(Token, ")");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) ->
|
|
% A singleton tuple was expected, but a grouping paren was given. In theory
|
|
% we could be permissive here, but we were asked to do type checking, and
|
|
% this is a type error. The type error itself is a bit hard to reproduce,
|
|
% but we do know exactly what the fix is, so let's report that instead.
|
|
{error, {expected_trailing_comma, Row, Col}};
|
|
parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) ->
|
|
% A tuple (of more than one elements) was expected, but a grouping paren
|
|
% was given. Again, the type error is hard to produce, but the actual
|
|
% solution is simple; add more elements.
|
|
ExpectCount = length(Tail) + 1,
|
|
GotCount = 1,
|
|
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}};
|
|
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
|
|
% We were expecting some grouping parens, and now we know that one of them
|
|
% was in fact grouping. Good.
|
|
parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String).
|
|
|
|
parse_tuple_tails_comma(_, _, [], _, _, Row, Col) ->
|
|
% No more tuples, so commas are invalid. It's hard to describe the type
|
|
% error that a comma would actually produce, so instead let's just give
|
|
% the user the actual solution to their problems, which is to remove the
|
|
% comma.
|
|
{error, {expected_close_paren, Row, Col}};
|
|
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
|
|
% If there are no tails then we would have exited into the "grouping parens
|
|
% only" case, so we know this works:
|
|
[TailTypes | ParentTails] = Tails,
|
|
% Now we can parse this tuple as a tuple.
|
|
case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of
|
|
{ok, {Terms, NewPos, NewString}} ->
|
|
NewHead = {tuple, list_to_tuple(Terms)},
|
|
% Then continue the loop, with whatever parent tuple types this
|
|
% tuple is meant to be a part of.
|
|
parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString);
|
|
{error, Reason} ->
|
|
% TODO: wrap errors?
|
|
{error, Reason}
|
|
end.
|
|
|
|
%%% Unambiguous Tuple/Variant Parsing
|
|
|
|
parse_multivalue(ElemTypes, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
|
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_multivalue2([Next | Rest], Pos, String, Acc, Token) ->
|
|
case parse_expression2(Next, Pos, String, Token) of
|
|
{ok, {Value, NewPos, NewString}} ->
|
|
parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]);
|
|
{error, Reason} ->
|
|
Wrapper = choose_list_error_wrapper(")"),
|
|
% TODO: Are tuple indices off by one from list indices?
|
|
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
|
{error, Wrapped}
|
|
end;
|
|
parse_multivalue2([], Pos, String, Acc, Token) ->
|
|
count_multivalue_excess(Pos, String, Acc, Token).
|
|
|
|
parse_multivalue3(ElemTypes, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
|
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_multivalue(ElemTypes, NewPos, NewString, Acc);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, ")");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
count_multivalue_excess(Pos, String, TypedAcc, Token) ->
|
|
ExpectedLen = length(TypedAcc),
|
|
case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of
|
|
{ok, {TermList, _, {Row, Col}, _, _}} ->
|
|
ActualLen = length(TermList),
|
|
{error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
check_multivalue_long_enough([], Pos, String, _, _, Acc) ->
|
|
{ok, {lists:reverse(Acc), Pos, String}};
|
|
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
|
|
GotCount = length(Got),
|
|
ExpectCount = length(Remaining) + GotCount,
|
|
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.
|
|
|
|
%%% Variant parsing
|
|
|
|
parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) ->
|
|
parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End);
|
|
parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) ->
|
|
case get_typename(O, N) of
|
|
[Namespace, _] ->
|
|
parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End);
|
|
_ ->
|
|
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
|
|
end;
|
|
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
|
|
{error, {unresolved_variant, Row, Start, End}};
|
|
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
|
|
% In normal code, identifiers can have many meanings, which can result in
|
|
% lots of different errors. In constant/immediate/normalized Sophia terms
|
|
% we know identifiers are always variants, so we can type error if any
|
|
% other type was expected.
|
|
{error, {wrong_type, O, N, variant, Row, Start, End}}.
|
|
|
|
get_typename(O, already_normalized) ->
|
|
get_typename(O);
|
|
get_typename(_, N) ->
|
|
get_typename(N).
|
|
|
|
get_typename({Name, _}) ->
|
|
string:split(Name, ".", all);
|
|
get_typename(Name) ->
|
|
string:split(Name, ".", all).
|
|
|
|
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
|
|
case lookup_variant(Constructor, Variants, 0) of
|
|
{ok, {Tag, ElemTypes}} ->
|
|
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
|
|
Arities = lists:map(GetArity, Variants),
|
|
parse_variant3(Arities, Tag, ElemTypes, Pos, String);
|
|
error ->
|
|
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
|
|
end.
|
|
|
|
parse_variant3(Arities, Tag, [], Pos, String) ->
|
|
% Parsing of 0-arity variants is different.
|
|
Result = {variant, Arities, Tag, {}},
|
|
{ok, {Result, Pos, String}};
|
|
parse_variant3(Arities, Tag, ElemTypes, Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "(");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_variant4(Arities, Tag, ElemTypes, Pos, String) ->
|
|
case parse_multivalue(ElemTypes, Pos, String, []) of
|
|
{ok, {Terms, NewPos, NewString}} ->
|
|
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
lookup_variant(_, [], _) ->
|
|
error;
|
|
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
|
|
{ok, {Tag, ElemTypes}};
|
|
lookup_variant(Ident, [_ | Rest], Tag) ->
|
|
lookup_variant(Ident, Rest, Tag + 1).
|
|
|
|
%%% Record parsing
|
|
|
|
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
|
|
parse_map(KeyType, ValueType, Pos, String, #{});
|
|
parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) ->
|
|
parse_record(Fields, Pos, String, #{});
|
|
parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {#{}, NewPos, NewString}};
|
|
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{});
|
|
{ok, {{alphanum, _, _, Row, Start, End}, _, _}} ->
|
|
{error, {unresolved_record, Row, Start, End}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
|
|
{error, {wrong_type, O, N, map, Row, Start, Start}}.
|
|
|
|
parse_record(Fields, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End);
|
|
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) ->
|
|
case lists:keyfind(Ident, 1, Fields) of
|
|
{_, Type} ->
|
|
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type);
|
|
false ->
|
|
{error, {invalid_field, Ident, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) ->
|
|
case maps:is_key(Ident, Acc) of
|
|
false ->
|
|
parse_record4(Fields, Pos, String, Acc, Ident, Type);
|
|
true ->
|
|
{error, {field_already_present, Ident, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record4(Fields, Pos, String, Acc, Ident, Type) ->
|
|
case expect_tokens(["="], Pos, String) of
|
|
{ok, {NewPos, NewString}} ->
|
|
parse_record5(Fields, NewPos, NewString, Acc, Ident, Type);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record5(Fields, Pos, String, Acc, Ident, Type) ->
|
|
case parse_expression(Type, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
NewAcc = maps:put(Ident, Result, Acc),
|
|
parse_record6(Fields, NewPos, NewString, NewAcc);
|
|
{error, Reason} ->
|
|
wrap_error(Reason, {record_field, Ident})
|
|
end.
|
|
|
|
parse_record6(Fields, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_record(Fields, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) ->
|
|
case parse_record_final_loop(Fields, FieldValues, []) of
|
|
{ok, Result} ->
|
|
{ok, {Result, Pos, String}};
|
|
{error, {missing_field, Name}} ->
|
|
{error, {missing_field, Name, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
|
|
case maps:find(Name, FieldValues) of
|
|
{ok, Value} ->
|
|
parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
|
|
error ->
|
|
{error, {missing_field, Name}}
|
|
end;
|
|
parse_record_final_loop([], _, [Field]) ->
|
|
% Singleton records are type-checked in Sophia, but unwrapped in the
|
|
% resulting FATE.
|
|
{ok, Field};
|
|
parse_record_final_loop([], _, FieldsReverse) ->
|
|
Fields = lists:reverse(FieldsReverse),
|
|
Tuple = list_to_tuple(Fields),
|
|
{ok, {tuple, Tuple}}.
|
|
|
|
|
|
%%% Map Parsing
|
|
|
|
parse_map(KeyType, ValueType, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map2(KeyType, ValueType, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {Acc, NewPos, NewString}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map2(KeyType, ValueType, Pos, String, Acc) ->
|
|
case parse_expression(KeyType, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result);
|
|
{error, Reason} ->
|
|
wrap_error(Reason, {map_key, maps:size(Acc)})
|
|
end.
|
|
|
|
parse_map3(KeyType, ValueType, Pos, String, Acc, Key) ->
|
|
case expect_tokens(["]", "="], Pos, String) of
|
|
{ok, {NewPos, NewString}} ->
|
|
parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map4(KeyType, ValueType, Pos, String, Acc, Key) ->
|
|
case parse_expression(ValueType, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
NewAcc = maps:put(Key, Result, Acc),
|
|
parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map5(KeyType, ValueType, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map(KeyType, ValueType, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {Acc, NewPos, NewString}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
% TODO
|
|
wrap_error(Reason, _) -> Reason.
|
|
|
|
%%% Pretty Printing
|
|
|
|
-spec fate_to_list(FATE) -> Sophia
|
|
when FATE :: gmb_fate_data:fate_type(),
|
|
Sophia :: string().
|
|
|
|
fate_to_list(Term) ->
|
|
fate_to_list(unknown_type(), Term).
|
|
|
|
-spec fate_to_list(Type, FATE) -> Sophia
|
|
when Type :: hz_aaci:annotated_type(),
|
|
FATE :: gmb_fate_data:fate_type(),
|
|
Sophia :: string().
|
|
|
|
fate_to_list(Type, Term) ->
|
|
IOList = fate_to_iolist(Type, Term),
|
|
unicode:characters_to_list(IOList).
|
|
|
|
-spec fate_to_iolist(FATE) -> Sophia
|
|
when FATE :: gmb_fate_data:fate_type(),
|
|
Sophia :: iolist().
|
|
|
|
fate_to_iolist(Term) ->
|
|
fate_to_iolist(unknown_type(), Term).
|
|
|
|
-spec fate_to_iolist(Type, FATE) -> Sophia
|
|
when Type :: hz_aaci:annotated_type(),
|
|
FATE :: gmb_fate_data:fate_type(),
|
|
Sophia :: iolist().
|
|
|
|
% Special case for singleton records, since they are erased during compilation.
|
|
fate_to_iolist({_, _, {record, [{FieldName, FieldType}]}}, Term) ->
|
|
singleton_record_to_iolist(FieldName, FieldType, Term);
|
|
% Aggregate types, where we should check if there is useful type information to
|
|
% act on. Case logic is made explicit so that the default cases stand out.
|
|
fate_to_iolist(Type, {tuple, Tuple}) ->
|
|
case Type of
|
|
{_, _, {record, FieldTypes}} ->
|
|
record_to_iolist(FieldTypes, Tuple);
|
|
{_, _, {tuple, ElemTypes}} ->
|
|
tuple_to_iolist(ElemTypes, Tuple);
|
|
_ ->
|
|
tuple_to_iolist([], Tuple)
|
|
end;
|
|
fate_to_iolist(Type, {variant, _, Tag, Tuple}) ->
|
|
case Type of
|
|
{O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) ->
|
|
variant_to_iolist(O, N, VariantTypes, Tag, Tuple);
|
|
{O, N, _} ->
|
|
% TODO: Make up a special syntax for anonymous variant terms.
|
|
erlang:exit({untyped_variant, O, N});
|
|
_ ->
|
|
erlang:exit({untyped_variant, unknown_type, already_normalized})
|
|
end;
|
|
fate_to_iolist(Type, List) when is_list(List) ->
|
|
case Type of
|
|
{_, _, {list, [InnerType]}} ->
|
|
list_to_iolist(InnerType, List);
|
|
_ ->
|
|
list_to_iolist(unknown_type(), List)
|
|
end;
|
|
fate_to_iolist(Type, Map) when is_map(Map) ->
|
|
case Type of
|
|
{_, _, {map, [K, V]}} ->
|
|
map_to_iolist(K, V, Map);
|
|
_ ->
|
|
map_to_iolist(unknown_type(), unknown_type(), Map)
|
|
end;
|
|
% Other FATE types, where no recursion is needed, but type information could
|
|
% influence the format that is used.
|
|
fate_to_iolist(_, true) ->
|
|
"true";
|
|
fate_to_iolist(_, false) ->
|
|
"false";
|
|
fate_to_iolist(_, {bits, 0}) ->
|
|
"Bits.none";
|
|
fate_to_iolist(_, {bits, -1}) ->
|
|
"Bits.all";
|
|
fate_to_iolist(_, {bits, I}) when I > 0 ->
|
|
["#", integer_to_list(I, 16)];
|
|
fate_to_iolist(_, {bits, I}) when I < 0 ->
|
|
integer_to_list(I, 10);
|
|
fate_to_iolist({_, _, char}, $') ->
|
|
% Special case since it needs to be escaped in char literals.
|
|
"'\\''";
|
|
fate_to_iolist({_, _, char}, $") ->
|
|
% Special case since it does NOT need to be escaped in char literals.
|
|
"'\"'";
|
|
fate_to_iolist({_, _, char}, I) when is_integer(I) ->
|
|
[$', escape_char(I), $'];
|
|
fate_to_iolist(_, I) when is_integer(I) ->
|
|
integer_to_list(I);
|
|
fate_to_iolist(_, {address, Addr}) ->
|
|
gmser_api_encoder:encode(account_pubkey, Addr);
|
|
fate_to_iolist(_, {contract, Addr}) ->
|
|
gmser_api_encoder:encode(contract_pubkey, Addr);
|
|
fate_to_iolist(_, {bytes, Bytes}) ->
|
|
Size = bit_size(Bytes),
|
|
<<IntValue:Size>> = Bytes,
|
|
["#", integer_to_list(IntValue, 16)];
|
|
fate_to_iolist(_, Bytes) when is_binary(Bytes) ->
|
|
escape_string(Bytes).
|
|
|
|
escape_string(Binary) ->
|
|
escape_string(Binary, []).
|
|
|
|
escape_string(<<C/utf8, Rest/binary>>, Acc) ->
|
|
NewAcc = [Acc, escape_char(C)],
|
|
escape_string(Rest, NewAcc);
|
|
escape_string(<<>>, Acc) ->
|
|
[$", Acc, $"].
|
|
|
|
tuple_to_iolist([ElemType], {Elem}) ->
|
|
Inner = fate_to_iolist(ElemType, Elem),
|
|
["(", Inner, ",)"];
|
|
tuple_to_iolist(_, {Elem}) ->
|
|
Inner = fate_to_iolist(unknown_type(), Elem),
|
|
["(", Inner, ",)"];
|
|
tuple_to_iolist(ElemTypes, Tuple) ->
|
|
Elems = tuple_to_list(Tuple),
|
|
Multivalue = multivalue_to_iolist(ElemTypes, Elems),
|
|
["(", Multivalue, ")"].
|
|
|
|
list_to_iolist(InnerType, Elems) ->
|
|
InnerChars = list_elems_to_iolist(InnerType, Elems),
|
|
["[", InnerChars, "]"].
|
|
|
|
variant_to_iolist(O, N, Variants, Tag, Tuple) ->
|
|
Prefix = choose_variant_prefix(O, N),
|
|
{Name, ElemTypes} = lists:nth(Tag + 1, Variants),
|
|
case tuple_size(Tuple) of
|
|
0 ->
|
|
[Prefix, Name];
|
|
_ ->
|
|
Elems = tuple_to_list(Tuple),
|
|
Multivalue = multivalue_to_iolist(ElemTypes, Elems),
|
|
[Prefix, Name, "(", Multivalue, ")"]
|
|
end.
|
|
|
|
choose_variant_prefix(O, N) ->
|
|
case get_typename(O, N) of
|
|
[Namespace, _] ->
|
|
[Namespace, "."];
|
|
_ ->
|
|
[]
|
|
end.
|
|
|
|
multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) ->
|
|
FirstTermChars = fate_to_iolist(FirstType, FirstTerm),
|
|
multivalue_to_iolist(ElemTypes, Elems, FirstTermChars);
|
|
multivalue_to_iolist(_, Elems) ->
|
|
list_elems_to_iolist(unknown_type(), Elems).
|
|
|
|
multivalue_to_iolist([NextType | RestTypes], [NextTerm | RestTerms], Acc) ->
|
|
NextTermChars = fate_to_iolist(NextType, NextTerm),
|
|
multivalue_to_iolist(RestTypes, RestTerms, [Acc, ", ", NextTermChars]);
|
|
multivalue_to_iolist(_, Elems, Acc) ->
|
|
list_elems_to_iolist(unknown_type(), Elems, Acc).
|
|
|
|
list_elems_to_iolist(Type, [FirstTerm | Rest]) ->
|
|
FirstTermChars = fate_to_iolist(Type, FirstTerm),
|
|
list_elems_to_iolist(Type, Rest, FirstTermChars);
|
|
list_elems_to_iolist(_, []) ->
|
|
"".
|
|
|
|
list_elems_to_iolist(Type, [Next | Rest], Acc) ->
|
|
NextChars = fate_to_iolist(Type, Next),
|
|
list_elems_to_iolist(Type, Rest, [Acc, ", ", NextChars]);
|
|
list_elems_to_iolist(_, [], Acc) ->
|
|
Acc.
|
|
|
|
singleton_record_to_iolist(FieldName, FieldType, Term) ->
|
|
FieldChars = fate_to_iolist(FieldType, Term),
|
|
["{", FieldName, " = ", FieldChars, "}"].
|
|
|
|
record_to_iolist(FieldTypes, Tuple) ->
|
|
case length(FieldTypes) == tuple_size(Tuple) of
|
|
true ->
|
|
Chars = record_fields_to_iolist(FieldTypes, tuple_to_list(Tuple)),
|
|
["{", Chars, "}"];
|
|
false ->
|
|
tuple_to_iolist([], Tuple)
|
|
end.
|
|
|
|
record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms]) ->
|
|
TermChars = fate_to_iolist(Type, Term),
|
|
record_fields_to_iolist(FieldTypes, Terms, [Name, " = ", TermChars]);
|
|
record_fields_to_iolist(_, []) ->
|
|
"".
|
|
|
|
record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms], Acc) ->
|
|
TermChars = fate_to_iolist(Type, Term),
|
|
NewAcc = [Acc, ", ", Name, " = ", TermChars],
|
|
record_fields_to_iolist(FieldTypes, Terms, NewAcc);
|
|
record_fields_to_iolist(_, [], Acc) ->
|
|
Acc.
|
|
|
|
map_to_iolist(K, V, Map) ->
|
|
Iter = maps:iterator(Map),
|
|
case maps:next(Iter) of
|
|
{KeyTerm, ValTerm, Rest} ->
|
|
KChars = fate_to_iolist(K, KeyTerm),
|
|
VChars = fate_to_iolist(V, ValTerm),
|
|
RestChars = map_to_iolist_inner(K, V, Rest, ["[", KChars, "] = ", VChars]),
|
|
["{", RestChars, "}"];
|
|
none ->
|
|
"{}"
|
|
end.
|
|
|
|
map_to_iolist_inner(K, V, Iter, Acc) ->
|
|
case maps:next(Iter) of
|
|
{KeyTerm, ValTerm, Rest} ->
|
|
KChars = fate_to_iolist(K, KeyTerm),
|
|
VChars = fate_to_iolist(V, ValTerm),
|
|
map_to_iolist_inner(K, V, Rest, [Acc, ", [", KChars, "] = ", VChars]);
|
|
none ->
|
|
Acc
|
|
end.
|
|
|
|
%%% Tests
|
|
|
|
check_sophia_to_fate(Type, Sophia, Fate) ->
|
|
case parse_literal(Type, Sophia) of
|
|
{ok, Fate} ->
|
|
ok;
|
|
Result ->
|
|
erlang:error({to_fate_failed, Sophia, Fate, Result})
|
|
end.
|
|
|
|
check_fate_to_sophia(Type, Fate, Sophia) ->
|
|
case fate_to_list(Type, Fate) of
|
|
Sophia ->
|
|
ok;
|
|
Result ->
|
|
erlang:error({to_sophia_failed, Fate, Sophia, Result})
|
|
end.
|
|
|
|
roundtrip_parser(Type, Sophia, Fate) ->
|
|
check_sophia_to_fate(Type, Sophia, Fate),
|
|
check_fate_to_sophia(Type, Fate, Sophia),
|
|
|
|
ok.
|
|
|
|
% These test function names are getting ridiculous... I might want to optarg
|
|
% them or something, but, whatever, it's test code.
|
|
roundtrip_parser_lenient(Type, Sophia, Fate) ->
|
|
check_sophia_to_fate(Type, Sophia, Fate),
|
|
case fate_to_list(Type, Fate) of
|
|
Sophia ->
|
|
ok;
|
|
SophiaActual ->
|
|
check_sophia_to_fate(Type, SophiaActual, Fate)
|
|
end.
|
|
|
|
compile_entrypoint_value_and_type(Source, Entrypoint) ->
|
|
{ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
|
|
|
|
% Find the fcode for the correct entrypoint.
|
|
{fcode, Bodies, NamesMap, _} = FateCode,
|
|
Names = maps:to_list(NamesMap),
|
|
Name = unicode:characters_to_binary(Entrypoint),
|
|
{Hash, Name} = lists:keyfind(Name, 2, Names),
|
|
{_, _, Code} = maps:get(Hash, Bodies),
|
|
FATE = extract_return_value(Code),
|
|
|
|
% Generate the AACI, and get the AACI type info for the correct entrypoint.
|
|
AACI = hz_aaci:prepare(ACI),
|
|
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
|
|
|
|
{FATE, Type}.
|
|
|
|
extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
|
|
FATE;
|
|
extract_return_value(Code) ->
|
|
erlang:exit({invalid_literal_fcode, Code}).
|
|
|
|
check_parser(Sophia) ->
|
|
% Compile the literal using the compiler, to check that it is valid Sophia
|
|
% syntax, and to get an AACI object to pass to the parser.
|
|
Source = "contract C = entrypoint f() = " ++ Sophia,
|
|
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
|
|
|
% Check that when we parse the term we get the same value as the Sophia
|
|
% compiler. Also check that the pretty printer gives the same string back.
|
|
check_sophia_to_fate(unknown_type(), Sophia, Fate),
|
|
|
|
% Then, once we know that the term is correct, make sure that it is still
|
|
% accepted *with* type info. Don't bother roundtripping this, since the
|
|
% pretty printer doesn't enforce types anyway.
|
|
check_sophia_to_fate(Type, Sophia, Fate).
|
|
|
|
check_parser_roundtrip(Sophia) ->
|
|
Source = "contract C = entrypoint f() = " ++ Sophia,
|
|
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
|
roundtrip_parser(Type, Sophia, Fate),
|
|
% Without type information we might get a more generic result in Sophia
|
|
% syntax. Let's do a lenient test.
|
|
roundtrip_parser_lenient(unknown_type(), Sophia, Fate).
|
|
|
|
check_parser_with_typedef(Typedef, Sophia) ->
|
|
% Compile the type definitions alongside the usual literal expression.
|
|
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
|
|
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
|
|
|
% Do a typed parse, as usual, but there are probably record/variant
|
|
% definitions in the AACI, so untyped parses probably don't work, and
|
|
% variants often have optional namespaces, so the sophia result might not
|
|
% match exactly, but should still be equivalent.
|
|
roundtrip_parser_lenient(Type, Sophia, Fate).
|
|
|
|
anon_types_test() ->
|
|
% Integers.
|
|
check_parser_roundtrip("123"),
|
|
check_parser("1_2_3"),
|
|
check_parser_roundtrip("-123"),
|
|
% Booleans.
|
|
check_parser_roundtrip("true"),
|
|
check_parser_roundtrip("false"),
|
|
check_parser_roundtrip("[true, false]"),
|
|
% Bytes.
|
|
check_parser_roundtrip("#DEAD000BEEF"),
|
|
check_parser("#DE_AD0_00B_EEF"),
|
|
% Strings.
|
|
check_parser_roundtrip("\"hello world\""),
|
|
% The Sophia compiler doesn't handle this right, but we should still.
|
|
%check_parser_roundtrip("\"ÿ\""),
|
|
%check_parser_roundtrip("\"♣\""),
|
|
% Characters.
|
|
check_parser_roundtrip("'A'"),
|
|
check_parser_roundtrip("['a', ' ', '[']"),
|
|
%check_parser_roundtrip("'ÿ'"),
|
|
%check_parser_roundtrip("'♣'"),
|
|
% List of integers.
|
|
check_parser_roundtrip("[1, 2, 3]"),
|
|
% List of lists.
|
|
check_parser_roundtrip("[[], [1], [2, 3]]"),
|
|
% Tuple.
|
|
check_parser_roundtrip("(1, [2, 3], (4, 5))"),
|
|
% Map.
|
|
check_parser_roundtrip("{[1] = 2, [3] = 4}"),
|
|
|
|
ok.
|
|
|
|
string_escape_codes_test() ->
|
|
check_parser_roundtrip("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
|
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
|
check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
|
|
check_parser_roundtrip("\"'\""),
|
|
|
|
check_parser_roundtrip("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"),
|
|
check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"),
|
|
check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"),
|
|
check_parser_roundtrip("'\"'"),
|
|
|
|
ok.
|
|
|
|
records_test() ->
|
|
TypeDef = "record pair = {x: int, y: int}",
|
|
Sophia = "{x = 1, y = 2}",
|
|
check_parser_with_typedef(TypeDef, Sophia),
|
|
% The above won't run an untyped parse on the expression, but we can. It
|
|
% will error, though.
|
|
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
|
|
|
|
variant_test() ->
|
|
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
|
|
|
|
check_parser_with_typedef(TypeDef, "Zero"),
|
|
check_parser_with_typedef(TypeDef, "One(0)"),
|
|
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
|
|
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
|
|
check_parser_with_typedef(TypeDef, "C.Zero"),
|
|
|
|
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
|
|
|
|
ok.
|
|
|
|
ambiguous_variant_test() ->
|
|
TypeDef = "datatype mytype = C | D",
|
|
check_parser_with_typedef(TypeDef, "C"),
|
|
check_parser_with_typedef(TypeDef, "D"),
|
|
check_parser_with_typedef(TypeDef, "C.C"),
|
|
check_parser_with_typedef(TypeDef, "C.D"),
|
|
|
|
ok.
|
|
|
|
namespace_variant_test() ->
|
|
Term = "[N.A, N.B]",
|
|
Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
|
|
{Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"),
|
|
roundtrip_parser(VariantType, Term, Fate),
|
|
|
|
ok.
|
|
|
|
chain_objects_test() ->
|
|
% Address,
|
|
check_parser_roundtrip("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
|
|
% Two different forms of signature,
|
|
check_parser("sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n"),
|
|
check_parser("#00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF"),
|
|
check_parser_roundtrip("#112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF"),
|
|
|
|
% We have to build a totally custom contract example in order to get an
|
|
% AACI and return value for parsing contract addresses. This is because the
|
|
% compiler demands that contract addresses be type checked according to the
|
|
% logic of "contract oriented programming", including covariance, etc. and
|
|
% "contract oriented programming" is not very compatible with ML style type
|
|
% inference.
|
|
Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
|
|
Source = "contract C = entrypoint f(): C = " ++ Contract,
|
|
{Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
|
|
roundtrip_parser(ContractType, Contract, Fate),
|
|
roundtrip_parser(unknown_type(), Contract, Fate),
|
|
|
|
ok.
|
|
|
|
bits_test() ->
|
|
check_parser_roundtrip("Bits.all"),
|
|
check_parser_roundtrip("Bits.none"),
|
|
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"),
|
|
roundtrip_parser_lenient(Type, "5", {bits, 5}),
|
|
roundtrip_parser(Type, "-5", {bits, -5}),
|
|
roundtrip_parser(Type, "#123", {bits, 256 + 32 + 3}),
|
|
ok.
|
|
|
|
singleton_records_test() ->
|
|
TypeDef = "record singleton('a) = {it: 'a}",
|
|
check_parser_with_typedef(TypeDef, "{it = 123}"),
|
|
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
|
|
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),
|
|
|
|
ok.
|
|
|
|
singleton_variants_test() ->
|
|
% Similar tests to the singleton records, but this time there isn't
|
|
% actually a special case; singleton variants are in fact wrapped in the
|
|
% FATE too.
|
|
TypeDef = "datatype wrapped('a) = Wrap('a)",
|
|
check_parser_with_typedef(TypeDef, "Wrap(123)"),
|
|
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
|
|
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),
|
|
|
|
ok.
|
|
|
|
excess_parens_test() ->
|
|
% 'singleton' parens are another special case, but unlike singleton
|
|
% records, which exist in the type system, singleton parens aren't tuples
|
|
% at all! They are just grouping, for arithmetic. For example.
|
|
check_parser("(123)"),
|
|
check_parser("[1, (2), ((3))]"),
|
|
% Where this gets tricky, though, is when grouping parens are mixed with
|
|
% tuple parens. E.g. this list of three tuples should all parse to the same
|
|
% result.
|
|
check_parser("[((1, 2)), ((1), 2), (((1), 2))]"),
|
|
% Including multiple nestings of tuples and grouping, interleaved.
|
|
check_parser("((((1), ((2, 3)))), 4)"),
|
|
% Also empty tuples exist!
|
|
check_parser_roundtrip("()"),
|
|
check_parser_roundtrip("(((), ()), ((), ()))"),
|
|
check_parser("(((((), ())), ()))"),
|
|
|
|
ok.
|
|
|
|
lexer_offset_test() ->
|
|
% Test that various tokens report their position correctly.
|
|
{error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
|
|
{error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
|
|
{error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
|
|
{error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
|
|
{error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
|
|
{error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),
|
|
|
|
% Check that the tokenizer knows its position correctly *after* various
|
|
% tokens.
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
|
|
ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
|
|
{error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),
|
|
|
|
% Check that the tokenizer accounts for various line separators correctly.
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),
|
|
|
|
ok.
|
|
|
|
parser_offset_test() ->
|
|
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"),
|
|
|
|
{error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"),
|
|
{error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"),
|
|
{error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"),
|
|
{error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"),
|
|
|
|
ok.
|
|
|
|
singleton_test() ->
|
|
% The Sophia compiler would never generate this, but it is a valid type
|
|
% within the FATE virtual machine, and it is possible to represent within
|
|
% the ACI itself.
|
|
SingletonACI = #{tuple => [<<"int">>]},
|
|
|
|
% Build an AACI around this, and run it through the AACI machinery.
|
|
Function = #{name => <<"f">>,
|
|
arguments => [],
|
|
stateful => false,
|
|
payable => false,
|
|
returns => SingletonACI},
|
|
ACI = [#{contract => #{functions => [Function],
|
|
name => <<"C">>,
|
|
kind => contract_main,
|
|
payable => false,
|
|
typedefs => []}}],
|
|
{aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI),
|
|
|
|
% Now let's do some testing with this weird type, to see if we handle it
|
|
% correctly.
|
|
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"),
|
|
"(1,)" = fate_to_list(SingletonType, {tuple, {1}}),
|
|
% Some ambiguous nesting parens, for fun.
|
|
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"),
|
|
% No trailing comma should give an error.
|
|
{error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"),
|
|
% All of the above should behave the same in untyped contexts:
|
|
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"),
|
|
"(1,)" = fate_to_list(unknown_type(), {tuple, {1}}),
|
|
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"),
|
|
{ok, 1} = parse_literal(unknown_type(), "(1)"),
|
|
|
|
% Also if we wanted an integer, the singleton is NOT dropped, so is also an
|
|
% error.
|
|
{error, {expected_close_paren, 1, 3}} = parse_literal({integer, already_normalized, integer}, "(1,)"),
|
|
|
|
ok.
|