Add special anonymous variant syntax

This is outside of the scope of the sophia parser, but is a simple generalization to
'sophia terms' to make them able to represent any FATE term anonymously.

We also parse these anonymous variant expressions without type info, since it is convenient
for users to copy the output of one call into another call.

Anonymous parsing of None and Some was also added, since new users would be shocked if this
doesn't work, and advanced users will greatly appreciate that it does. The resulting FATE
terms are still rendered as variant([0, 1], ...), since user defined types can also have [0, 1]
as their arity list, and since automation and tooling programmers hate special case exceptions like that.

Anonymous parsing of other Chain and AENS terms are not added, since anonymous variants already cover those types,
so very little is gained by hard-coding such complex types into the term parser. Complex, version-specific compiler
types are already supported by hakuzaru, in the form of the ACI/AACI; parsing without AACI, on the other hand, is
intended to support language-agnostic communication using the primitives of FATE, and in general, variants
in FATE are anonymous.
This commit is contained in:
Jarvis Carroll
2026-06-05 03:08:38 +00:00
parent ea3a5453f2
commit d323fb0f52
+173 -34
View File
@@ -343,6 +343,12 @@ parse_expression2(_, _, _, Token) ->
unknown_type() -> unknown_type() ->
{unknown_type, already_normalized, unknown_type}. {unknown_type, already_normalized, unknown_type}.
int_type() ->
{integer, already_normalized, integer}.
int_list_type() ->
{{list, [integer]}, alread_normalized, {list, [int_type()]}}.
expect_tokens([], Pos, String) -> expect_tokens([], Pos, String) ->
{ok, {Pos, String}}; {ok, {Pos, String}};
expect_tokens([Str | Rest], Pos, String) -> expect_tokens([Str | Rest], Pos, String) ->
@@ -377,11 +383,14 @@ parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) ->
typecheck_bits(Type, Pos, String, -1, Row, Start, End); typecheck_bits(Type, Pos, String, -1, Row, Start, End);
parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) -> parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) ->
typecheck_bits(Type, Pos, String, 0, Row, Start, End); typecheck_bits(Type, Pos, String, 0, Row, Start, End);
parse_alphanum(Type, Pos, String, ["variant"], Row, Start, End) ->
parse_anonymous_variant(Type, Pos, String, Row, Start, End);
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) -> parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
% From a programming perspective, we are trying to parse a constant, so % From a programming perspective, we are trying to parse a constant, so
% an alphanum token can really only be a constructor, or a chain object. % an alphanum token can really only be a constructor, or a chain object.
% Constructors start with uppercase characters, so lowercase can only be a % Constructors start with uppercase characters, and we have handled our
% chain object. % made-up 'variant' case explicitly, so the only other lowercase constants
% are serialized chain objects.
try try
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
{account_pubkey, Data} -> {account_pubkey, Data} ->
@@ -400,8 +409,8 @@ parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}} _:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
end; end;
parse_alphanum(Type, Pos, String, Path, Row, Start, End) -> parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
% Inversely, chain object prefixes are always lowercase, so any other path % Now having handled all lowercase terms, anything else must be uppercase,
% must be a variant constructor, or invalid. % which is either a variant constructor, or totally invalid.
parse_variant(Type, Pos, String, Path, Row, Start, End). parse_variant(Type, Pos, String, Path, Row, Start, End).
typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) -> typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) ->
@@ -731,6 +740,12 @@ parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor]
_ -> _ ->
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}} {error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
end; end;
parse_variant({_, _, unknown_type}, Pos, String, ["None"], _, _, _) ->
% Special case for None without type info.
parse_variant3([0, 1], 0, [], Pos, String);
parse_variant({_, _, unknown_type}, Pos, String, ["Some"], _, _, _) ->
% Also a special case for Some.
parse_variant3([0, 1], 1, [unknown_type()], Pos, String);
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
{error, {unresolved_variant, Row, Start, End}}; {error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) -> parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
@@ -753,8 +768,7 @@ get_typename(Name) ->
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) -> parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
case lookup_variant(Constructor, Variants, 0) of case lookup_variant(Constructor, Variants, 0) of
{ok, {Tag, ElemTypes}} -> {ok, {Tag, ElemTypes}} ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, Arities = get_arities(Variants),
Arities = lists:map(GetArity, Variants),
parse_variant3(Arities, Tag, ElemTypes, Pos, String); parse_variant3(Arities, Tag, ElemTypes, Pos, String);
error -> error ->
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}} {error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
@@ -790,6 +804,112 @@ lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
lookup_variant(Ident, [_ | Rest], Tag) -> lookup_variant(Ident, [_ | Rest], Tag) ->
lookup_variant(Ident, Rest, Tag + 1). lookup_variant(Ident, Rest, Tag + 1).
get_arities(Variants) ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
lists:map(GetArity, Variants).
parse_anonymous_variant({O, N, {variant, Variants}}, Pos, String, _, _, _) ->
parse_anonymous_variant2({O, N, {variant, Variants}}, Pos, String);
parse_anonymous_variant({O, N, unknown_type}, Pos, String, _, _, _) ->
parse_anonymous_variant2({O, N, unknown_type}, Pos, String);
parse_anonymous_variant({O, N, _}, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, variant, Row, Start, End}}.
parse_anonymous_variant2(Type, Pos, String) ->
case expect_tokens(["("], Pos, String) of
{ok, {NewPos, NewString}} ->
parse_anonymous_variant3(Type, NewPos, NewString);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant3(Type, Pos, String) ->
case parse_arities(Type, Pos, String) of
{ok, {Arities, NewPos, NewString}} ->
parse_anonymous_variant4(Type, NewPos, NewString, Arities);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant4(Type, Pos, String, Arities) ->
case expect_tokens([","], Pos, String) of
{ok, {NewPos, NewString}} ->
parse_anonymous_variant5(Type, NewPos, NewString, Arities);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant5(Type, Pos, String, Arities) ->
case parse_anonymous_tag(Pos, String, Arities) of
{ok, {Tag, NewPos, NewString}} ->
parse_anonymous_variant6(Type, NewPos, NewString, Arities, Tag);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant6(Type, Pos, String, Arities, Tag) ->
ElemTypes = infer_anonymous_variant_elem_types(Type, Arities, Tag),
case parse_multivalue3(ElemTypes, Pos, String, []) of
{ok, {Terms, NewPos, NewString}} ->
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
{ok, {Result, NewPos, NewString}};
{error, Reason} ->
{error, Reason}
end.
parse_arities(Type, Pos, String) ->
case next_token(Pos, String) of
{ok, {Token, NewPos, NewString}} ->
parse_arities2(Type, NewPos, NewString, Token);
{error, Reason} ->
{error, Reason}
end.
parse_arities2(Type, Pos, String, Token = {_, _, _, Row, Start, _}) ->
case parse_expression2(int_list_type(), Pos, String, Token) of
{ok, {Arities, NewPos, NewString}} ->
parse_arities3(Type, NewPos, NewString, Arities, Row, Start);
{error, Reason} ->
{error, Reason}
end.
parse_arities3({O, N, {variant, Variants}}, Pos, String, Arities, Row, Start) ->
ExpectedArities = get_arities(Variants),
case Arities == ExpectedArities of
true ->
{ok, {Arities, Pos, String}};
false ->
{error, {wrong_arities, O, N, Arities, Row, Start}}
end;
parse_arities3(_, Pos, String, Arities, _, _) ->
{ok, {Arities, Pos, String}}.
parse_anonymous_tag(Pos, String, Arities) ->
case next_token(Pos, String) of
{ok, {Token, NewPos, NewString}} ->
parse_anonymous_tag2(NewPos, NewString, Arities, Token);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_tag2(Pos, String, Arities, Token = {_, _, _, Row, Start, End}) ->
TagCount = length(Arities),
case parse_expression2(int_type(), Pos, String, Token) of
{ok, {Tag, _, _}} when Tag < 0 ->
{error, {negative_tag, Tag, Row, Start, End}};
{ok, {Tag, _, _}} when Tag >= TagCount ->
{error, {invalid_tag, Tag, TagCount, Row, Start, End}};
Result ->
Result
end.
infer_anonymous_variant_elem_types({_, _, {variant, Variants}}, _, Tag) ->
{_Name, ElemTypes} = lists:nth(Tag + 1, Variants),
ElemTypes;
infer_anonymous_variant_elem_types({_, _, unknown_type}, Arities, Tag) ->
Arity = lists:nth(Tag + 1, Arities),
lists:duplicate(Arity, unknown_type()).
%%% Record parsing %%% Record parsing
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) -> parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
@@ -1027,15 +1147,12 @@ fate_to_iolist(Type, {tuple, Tuple}) ->
_ -> _ ->
tuple_to_iolist([], Tuple) tuple_to_iolist([], Tuple)
end; end;
fate_to_iolist(Type, {variant, _, Tag, Tuple}) -> fate_to_iolist(Type, {variant, Arities, Tag, Tuple}) ->
case Type of case Type of
{O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) -> {O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) ->
variant_to_iolist(O, N, VariantTypes, Tag, Tuple); variant_to_iolist(O, N, VariantTypes, Tag, Tuple);
{O, N, _} -> {_, _, _} ->
% TODO: Make up a special syntax for anonymous variant terms. anonymous_variant_to_iolist(Arities, Tag, Tuple)
erlang:exit({untyped_variant, O, N});
_ ->
erlang:exit({untyped_variant, unknown_type, already_normalized})
end; end;
fate_to_iolist(Type, List) when is_list(List) -> fate_to_iolist(Type, List) when is_list(List) ->
case Type of case Type of
@@ -1130,6 +1247,22 @@ choose_variant_prefix(O, N) ->
[] []
end. end.
% We don't have type information, but the Sophia programming language doesn't
% have syntax for anonymous variants, so we have to make a syntax up. This
% syntax is also supported when parsing terms, so that the output of one
% contract call can be fed easily into another contract call.
anonymous_variant_to_iolist(Arities, Tag, Tuple) ->
% Extract the elements of the tuple.
Elems = tuple_to_list(Tuple),
% Turn the arities, tag, and elements into an iolist.
AritiesStr = list_to_iolist(int_type(), Arities),
TagStr = integer_to_list(Tag),
FullTermsStr = list_elems_to_iolist(unknown_type(), Elems, [AritiesStr, ", ", TagStr]),
% Wrap that iolist in the anonymous 'variant' constructor.
["variant(", FullTermsStr, ")"].
multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) -> multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) ->
FirstTermChars = fate_to_iolist(FirstType, FirstTerm), FirstTermChars = fate_to_iolist(FirstType, FirstTerm),
multivalue_to_iolist(ElemTypes, Elems, FirstTermChars); multivalue_to_iolist(ElemTypes, Elems, FirstTermChars);
@@ -1282,16 +1415,18 @@ check_parser_roundtrip(Sophia) ->
% syntax. Let's do a lenient test. % syntax. Let's do a lenient test.
roundtrip_parser_lenient(unknown_type(), Sophia, Fate). roundtrip_parser_lenient(unknown_type(), Sophia, Fate).
check_parser_with_typedef(Typedef, Sophia) -> check_parser_with_typedef(Typedef, Sophia, UntypedSophia) ->
% Compile the type definitions alongside the usual literal expression. % Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
% Do a typed parse, as usual, but there are probably record/variant % Do a typed parse, as usual. Variant namespaces can make pretty printing
% definitions in the AACI, so untyped parses probably don't work, and % ambiguous, so make the roundtrip lenient.
% variants often have optional namespaces, so the sophia result might not roundtrip_parser_lenient(Type, Sophia, Fate),
% match exactly, but should still be equivalent. % Do an untyped parse, but using a second special Sophia expression that
roundtrip_parser_lenient(Type, Sophia, Fate). % doesn't require type info to parse. This one *doesn't* need to be
% lenient, since we are specifying a distinct sophia expression.
roundtrip_parser(unknown_type(), UntypedSophia, Fate).
anon_types_test() -> anon_types_test() ->
% Integers. % Integers.
@@ -1323,6 +1458,10 @@ anon_types_test() ->
check_parser_roundtrip("(1, [2, 3], (4, 5))"), check_parser_roundtrip("(1, [2, 3], (4, 5))"),
% Map. % Map.
check_parser_roundtrip("{[1] = 2, [3] = 4}"), check_parser_roundtrip("{[1] = 2, [3] = 4}"),
% Option.
check_parser_roundtrip("None"),
check_parser_roundtrip("Some(1)"),
check_parser_roundtrip("Some([1, 2, 3])"),
ok. ok.
@@ -1342,7 +1481,7 @@ string_escape_codes_test() ->
records_test() -> records_test() ->
TypeDef = "record pair = {x: int, y: int}", TypeDef = "record pair = {x: int, y: int}",
Sophia = "{x = 1, y = 2}", Sophia = "{x = 1, y = 2}",
check_parser_with_typedef(TypeDef, Sophia), check_parser_with_typedef(TypeDef, Sophia, "(1, 2)"),
% The above won't run an untyped parse on the expression, but we can. It % The above won't run an untyped parse on the expression, but we can. It
% will error, though. % will error, though.
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
@@ -1350,11 +1489,11 @@ records_test() ->
variant_test() -> variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
check_parser_with_typedef(TypeDef, "Zero"), check_parser_with_typedef(TypeDef, "Zero", "variant([0, 1, 2], 0)"),
check_parser_with_typedef(TypeDef, "One(0)"), check_parser_with_typedef(TypeDef, "One(0)", "variant([0, 1, 2], 1, 0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"), check_parser_with_typedef(TypeDef, "Two(0, 1)", "variant([0, 1, 2], 2, 0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])", "variant([0, 1, 2], 2, [], [1, 2, 3])"),
check_parser_with_typedef(TypeDef, "C.Zero"), check_parser_with_typedef(TypeDef, "C.Zero", "variant([0, 1, 2], 0)"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
@@ -1362,10 +1501,10 @@ variant_test() ->
ambiguous_variant_test() -> ambiguous_variant_test() ->
TypeDef = "datatype mytype = C | D", TypeDef = "datatype mytype = C | D",
check_parser_with_typedef(TypeDef, "C"), check_parser_with_typedef(TypeDef, "C", "variant([0, 0], 0)"),
check_parser_with_typedef(TypeDef, "D"), check_parser_with_typedef(TypeDef, "D", "variant([0, 0], 1)"),
check_parser_with_typedef(TypeDef, "C.C"), check_parser_with_typedef(TypeDef, "C.C", "variant([0, 0], 0)"),
check_parser_with_typedef(TypeDef, "C.D"), check_parser_with_typedef(TypeDef, "C.D", "variant([0, 0], 1)"),
ok. ok.
@@ -1410,9 +1549,9 @@ bits_test() ->
singleton_records_test() -> singleton_records_test() ->
TypeDef = "record singleton('a) = {it: 'a}", TypeDef = "record singleton('a) = {it: 'a}",
check_parser_with_typedef(TypeDef, "{it = 123}"), check_parser_with_typedef(TypeDef, "{it = 123}", "123"),
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"), check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}", "5"),
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"), check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]", "[1, 2, 3]"),
ok. ok.
@@ -1421,9 +1560,9 @@ singleton_variants_test() ->
% actually a special case; singleton variants are in fact wrapped in the % actually a special case; singleton variants are in fact wrapped in the
% FATE too. % FATE too.
TypeDef = "datatype wrapped('a) = Wrap('a)", TypeDef = "datatype wrapped('a) = Wrap('a)",
check_parser_with_typedef(TypeDef, "Wrap(123)"), check_parser_with_typedef(TypeDef, "Wrap(123)", "variant([1], 0, 123)"),
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"), check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))", "variant([1], 0, variant([1], 0, 123))"),
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"), check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]", "[variant([1], 0, 1), variant([1], 0, 2), variant([1], 0, 3)]"),
ok. ok.