2 Commits

Author SHA1 Message Date
Jarvis Carroll d323fb0f52 Add special anonymous variant syntax
This is outside of the scope of the sophia parser, but is a simple generalization to
'sophia terms' to make them able to represent any FATE term anonymously.

We also parse these anonymous variant expressions without type info, since it is convenient
for users to copy the output of one call into another call.

Anonymous parsing of None and Some was also added, since new users would be shocked if this
doesn't work, and advanced users will greatly appreciate that it does. The resulting FATE
terms are still rendered as variant([0, 1], ...), since user defined types can also have [0, 1]
as their arity list, and since automation and tooling programmers hate special case exceptions like that.

Anonymous parsing of other Chain and AENS terms are not added, since anonymous variants already cover those types,
so very little is gained by hard-coding such complex types into the term parser. Complex, version-specific compiler
types are already supported by hakuzaru, in the form of the ACI/AACI; parsing without AACI, on the other hand, is
intended to support language-agnostic communication using the primitives of FATE, and in general, variants
in FATE are anonymous.
2026-06-05 03:08:38 +00:00
Jarvis Carroll ea3a5453f2 fix bytes coerce logic 2026-05-28 00:41:51 +00:00
2 changed files with 190 additions and 45 deletions
+17 -11
View File
@@ -926,7 +926,10 @@ erlang_to_fate({O, N, char}, Str) ->
single_error({invalid, O, N, Str})
end;
erlang_to_fate({O, N, {bytes, [Count]}}, Bytes) when is_bitstring(Bytes) ->
coerce_bytes(O, N, Count, Bytes);
case check_bytes(O, N, Count, Bytes) of
ok -> {ok, {bytes, Bytes}};
{error, Reason} -> {error, Reason}
end;
erlang_to_fate({_, _, bits}, Num) when is_integer(Num) ->
{ok, {bits, Num}};
erlang_to_fate({_, _, bits}, Bits) when is_bitstring(Bits) ->
@@ -988,14 +991,14 @@ decode_chain_object(Tag, S) ->
error:incorrect_size -> {error, incorrect_size}
end.
coerce_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 ->
check_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 ->
single_error({partial_bytes, O, N, bit_size(Bytes)});
coerce_bytes(_, _, any, Bytes) ->
{ok, Bytes};
coerce_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count ->
check_bytes(_, _, any, _) ->
ok;
check_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count ->
single_error({incorrect_size, O, N, Bytes});
coerce_bytes(_, _, _, Bytes) ->
{ok, Bytes}.
check_bytes(_, _, _, _) ->
ok.
coerce_zipped_bindings(Bindings, Direction, Tag) ->
coerce_zipped_bindings(Bindings, Direction, Tag, [], []).
@@ -1261,8 +1264,11 @@ fate_to_erlang({_, _, string}, Bin) ->
{ok, Str};
fate_to_erlang({_, _, char}, Val) ->
{ok, Val};
fate_to_erlang({O, N, {bytes, [Count]}}, Bytes) when is_bitstring(Bytes) ->
coerce_bytes(O, N, Count, Bytes);
fate_to_erlang({O, N, {bytes, [Count]}}, {bytes, Bytes}) when is_bitstring(Bytes) ->
case check_bytes(O, N, Count, Bytes) of
ok -> {ok, Bytes};
{error, Reason} -> {error, Reason}
end;
fate_to_erlang({_, _, bits}, {bits, Num}) ->
{ok, Num};
fate_to_erlang({_, _, {list, [Type]}}, Data) when is_list(Data) ->
@@ -1452,7 +1458,7 @@ coerce_record_test() ->
coerce_bytes_test() ->
{ok, Type} = annotate_type({tuple, [{bytes, [4]}, {bytes, [any]}]}, #{}),
check_roundtrip(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {<<"abcd">>, <<"efghi">>}}).
check_roundtrip(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {{bytes, <<"abcd">>}, {bytes, <<"efghi">>}}}).
coerce_bits_test() ->
{ok, Type} = annotate_type(bits, #{}),
@@ -1471,7 +1477,7 @@ coerce_unicode_test() ->
coerce_hash_test() ->
{ok, Type} = annotate_type("hash", builtin_typedefs()),
Hash = list_to_binary(lists:seq(1,32)),
check_roundtrip(Type, Hash, Hash),
check_roundtrip(Type, Hash, {bytes, Hash}),
ok.
+173 -34
View File
@@ -343,6 +343,12 @@ parse_expression2(_, _, _, Token) ->
unknown_type() ->
{unknown_type, already_normalized, unknown_type}.
int_type() ->
{integer, already_normalized, integer}.
int_list_type() ->
{{list, [integer]}, alread_normalized, {list, [int_type()]}}.
expect_tokens([], Pos, String) ->
{ok, {Pos, String}};
expect_tokens([Str | Rest], Pos, String) ->
@@ -377,11 +383,14 @@ parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) ->
typecheck_bits(Type, Pos, String, -1, Row, Start, End);
parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) ->
typecheck_bits(Type, Pos, String, 0, Row, Start, End);
parse_alphanum(Type, Pos, String, ["variant"], Row, Start, End) ->
parse_anonymous_variant(Type, Pos, String, Row, Start, End);
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
% From a programming perspective, we are trying to parse a constant, so
% an alphanum token can really only be a constructor, or a chain object.
% Constructors start with uppercase characters, so lowercase can only be a
% chain object.
% Constructors start with uppercase characters, and we have handled our
% made-up 'variant' case explicitly, so the only other lowercase constants
% are serialized chain objects.
try
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
{account_pubkey, Data} ->
@@ -400,8 +409,8 @@ parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
end;
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
% Inversely, chain object prefixes are always lowercase, so any other path
% must be a variant constructor, or invalid.
% Now having handled all lowercase terms, anything else must be uppercase,
% which is either a variant constructor, or totally invalid.
parse_variant(Type, Pos, String, Path, Row, Start, End).
typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) ->
@@ -731,6 +740,12 @@ parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor]
_ ->
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
end;
parse_variant({_, _, unknown_type}, Pos, String, ["None"], _, _, _) ->
% Special case for None without type info.
parse_variant3([0, 1], 0, [], Pos, String);
parse_variant({_, _, unknown_type}, Pos, String, ["Some"], _, _, _) ->
% Also a special case for Some.
parse_variant3([0, 1], 1, [unknown_type()], Pos, String);
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
{error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
@@ -753,8 +768,7 @@ get_typename(Name) ->
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
case lookup_variant(Constructor, Variants, 0) of
{ok, {Tag, ElemTypes}} ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
Arities = lists:map(GetArity, Variants),
Arities = get_arities(Variants),
parse_variant3(Arities, Tag, ElemTypes, Pos, String);
error ->
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
@@ -790,6 +804,112 @@ lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
lookup_variant(Ident, [_ | Rest], Tag) ->
lookup_variant(Ident, Rest, Tag + 1).
get_arities(Variants) ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
lists:map(GetArity, Variants).
parse_anonymous_variant({O, N, {variant, Variants}}, Pos, String, _, _, _) ->
parse_anonymous_variant2({O, N, {variant, Variants}}, Pos, String);
parse_anonymous_variant({O, N, unknown_type}, Pos, String, _, _, _) ->
parse_anonymous_variant2({O, N, unknown_type}, Pos, String);
parse_anonymous_variant({O, N, _}, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, variant, Row, Start, End}}.
parse_anonymous_variant2(Type, Pos, String) ->
case expect_tokens(["("], Pos, String) of
{ok, {NewPos, NewString}} ->
parse_anonymous_variant3(Type, NewPos, NewString);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant3(Type, Pos, String) ->
case parse_arities(Type, Pos, String) of
{ok, {Arities, NewPos, NewString}} ->
parse_anonymous_variant4(Type, NewPos, NewString, Arities);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant4(Type, Pos, String, Arities) ->
case expect_tokens([","], Pos, String) of
{ok, {NewPos, NewString}} ->
parse_anonymous_variant5(Type, NewPos, NewString, Arities);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant5(Type, Pos, String, Arities) ->
case parse_anonymous_tag(Pos, String, Arities) of
{ok, {Tag, NewPos, NewString}} ->
parse_anonymous_variant6(Type, NewPos, NewString, Arities, Tag);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant6(Type, Pos, String, Arities, Tag) ->
ElemTypes = infer_anonymous_variant_elem_types(Type, Arities, Tag),
case parse_multivalue3(ElemTypes, Pos, String, []) of
{ok, {Terms, NewPos, NewString}} ->
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
{ok, {Result, NewPos, NewString}};
{error, Reason} ->
{error, Reason}
end.
parse_arities(Type, Pos, String) ->
case next_token(Pos, String) of
{ok, {Token, NewPos, NewString}} ->
parse_arities2(Type, NewPos, NewString, Token);
{error, Reason} ->
{error, Reason}
end.
parse_arities2(Type, Pos, String, Token = {_, _, _, Row, Start, _}) ->
case parse_expression2(int_list_type(), Pos, String, Token) of
{ok, {Arities, NewPos, NewString}} ->
parse_arities3(Type, NewPos, NewString, Arities, Row, Start);
{error, Reason} ->
{error, Reason}
end.
parse_arities3({O, N, {variant, Variants}}, Pos, String, Arities, Row, Start) ->
ExpectedArities = get_arities(Variants),
case Arities == ExpectedArities of
true ->
{ok, {Arities, Pos, String}};
false ->
{error, {wrong_arities, O, N, Arities, Row, Start}}
end;
parse_arities3(_, Pos, String, Arities, _, _) ->
{ok, {Arities, Pos, String}}.
parse_anonymous_tag(Pos, String, Arities) ->
case next_token(Pos, String) of
{ok, {Token, NewPos, NewString}} ->
parse_anonymous_tag2(NewPos, NewString, Arities, Token);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_tag2(Pos, String, Arities, Token = {_, _, _, Row, Start, End}) ->
TagCount = length(Arities),
case parse_expression2(int_type(), Pos, String, Token) of
{ok, {Tag, _, _}} when Tag < 0 ->
{error, {negative_tag, Tag, Row, Start, End}};
{ok, {Tag, _, _}} when Tag >= TagCount ->
{error, {invalid_tag, Tag, TagCount, Row, Start, End}};
Result ->
Result
end.
infer_anonymous_variant_elem_types({_, _, {variant, Variants}}, _, Tag) ->
{_Name, ElemTypes} = lists:nth(Tag + 1, Variants),
ElemTypes;
infer_anonymous_variant_elem_types({_, _, unknown_type}, Arities, Tag) ->
Arity = lists:nth(Tag + 1, Arities),
lists:duplicate(Arity, unknown_type()).
%%% Record parsing
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
@@ -1027,15 +1147,12 @@ fate_to_iolist(Type, {tuple, Tuple}) ->
_ ->
tuple_to_iolist([], Tuple)
end;
fate_to_iolist(Type, {variant, _, Tag, Tuple}) ->
fate_to_iolist(Type, {variant, Arities, Tag, Tuple}) ->
case Type of
{O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) ->
variant_to_iolist(O, N, VariantTypes, Tag, Tuple);
{O, N, _} ->
% TODO: Make up a special syntax for anonymous variant terms.
erlang:exit({untyped_variant, O, N});
_ ->
erlang:exit({untyped_variant, unknown_type, already_normalized})
{_, _, _} ->
anonymous_variant_to_iolist(Arities, Tag, Tuple)
end;
fate_to_iolist(Type, List) when is_list(List) ->
case Type of
@@ -1130,6 +1247,22 @@ choose_variant_prefix(O, N) ->
[]
end.
% We don't have type information, but the Sophia programming language doesn't
% have syntax for anonymous variants, so we have to make a syntax up. This
% syntax is also supported when parsing terms, so that the output of one
% contract call can be fed easily into another contract call.
anonymous_variant_to_iolist(Arities, Tag, Tuple) ->
% Extract the elements of the tuple.
Elems = tuple_to_list(Tuple),
% Turn the arities, tag, and elements into an iolist.
AritiesStr = list_to_iolist(int_type(), Arities),
TagStr = integer_to_list(Tag),
FullTermsStr = list_elems_to_iolist(unknown_type(), Elems, [AritiesStr, ", ", TagStr]),
% Wrap that iolist in the anonymous 'variant' constructor.
["variant(", FullTermsStr, ")"].
multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) ->
FirstTermChars = fate_to_iolist(FirstType, FirstTerm),
multivalue_to_iolist(ElemTypes, Elems, FirstTermChars);
@@ -1282,16 +1415,18 @@ check_parser_roundtrip(Sophia) ->
% syntax. Let's do a lenient test.
roundtrip_parser_lenient(unknown_type(), Sophia, Fate).
check_parser_with_typedef(Typedef, Sophia) ->
check_parser_with_typedef(Typedef, Sophia, UntypedSophia) ->
% Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
% Do a typed parse, as usual, but there are probably record/variant
% definitions in the AACI, so untyped parses probably don't work, and
% variants often have optional namespaces, so the sophia result might not
% match exactly, but should still be equivalent.
roundtrip_parser_lenient(Type, Sophia, Fate).
% Do a typed parse, as usual. Variant namespaces can make pretty printing
% ambiguous, so make the roundtrip lenient.
roundtrip_parser_lenient(Type, Sophia, Fate),
% Do an untyped parse, but using a second special Sophia expression that
% doesn't require type info to parse. This one *doesn't* need to be
% lenient, since we are specifying a distinct sophia expression.
roundtrip_parser(unknown_type(), UntypedSophia, Fate).
anon_types_test() ->
% Integers.
@@ -1323,6 +1458,10 @@ anon_types_test() ->
check_parser_roundtrip("(1, [2, 3], (4, 5))"),
% Map.
check_parser_roundtrip("{[1] = 2, [3] = 4}"),
% Option.
check_parser_roundtrip("None"),
check_parser_roundtrip("Some(1)"),
check_parser_roundtrip("Some([1, 2, 3])"),
ok.
@@ -1342,7 +1481,7 @@ string_escape_codes_test() ->
records_test() ->
TypeDef = "record pair = {x: int, y: int}",
Sophia = "{x = 1, y = 2}",
check_parser_with_typedef(TypeDef, Sophia),
check_parser_with_typedef(TypeDef, Sophia, "(1, 2)"),
% The above won't run an untyped parse on the expression, but we can. It
% will error, though.
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
@@ -1350,11 +1489,11 @@ records_test() ->
variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
check_parser_with_typedef(TypeDef, "Zero"),
check_parser_with_typedef(TypeDef, "One(0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
check_parser_with_typedef(TypeDef, "C.Zero"),
check_parser_with_typedef(TypeDef, "Zero", "variant([0, 1, 2], 0)"),
check_parser_with_typedef(TypeDef, "One(0)", "variant([0, 1, 2], 1, 0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)", "variant([0, 1, 2], 2, 0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])", "variant([0, 1, 2], 2, [], [1, 2, 3])"),
check_parser_with_typedef(TypeDef, "C.Zero", "variant([0, 1, 2], 0)"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
@@ -1362,10 +1501,10 @@ variant_test() ->
ambiguous_variant_test() ->
TypeDef = "datatype mytype = C | D",
check_parser_with_typedef(TypeDef, "C"),
check_parser_with_typedef(TypeDef, "D"),
check_parser_with_typedef(TypeDef, "C.C"),
check_parser_with_typedef(TypeDef, "C.D"),
check_parser_with_typedef(TypeDef, "C", "variant([0, 0], 0)"),
check_parser_with_typedef(TypeDef, "D", "variant([0, 0], 1)"),
check_parser_with_typedef(TypeDef, "C.C", "variant([0, 0], 0)"),
check_parser_with_typedef(TypeDef, "C.D", "variant([0, 0], 1)"),
ok.
@@ -1410,9 +1549,9 @@ bits_test() ->
singleton_records_test() ->
TypeDef = "record singleton('a) = {it: 'a}",
check_parser_with_typedef(TypeDef, "{it = 123}"),
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),
check_parser_with_typedef(TypeDef, "{it = 123}", "123"),
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}", "5"),
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]", "[1, 2, 3]"),
ok.
@@ -1421,9 +1560,9 @@ singleton_variants_test() ->
% actually a special case; singleton variants are in fact wrapped in the
% FATE too.
TypeDef = "datatype wrapped('a) = Wrap('a)",
check_parser_with_typedef(TypeDef, "Wrap(123)"),
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),
check_parser_with_typedef(TypeDef, "Wrap(123)", "variant([1], 0, 123)"),
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))", "variant([1], 0, variant([1], 0, 123))"),
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]", "[variant([1], 0, 1), variant([1], 0, 2), variant([1], 0, 3)]"),
ok.