diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 2a9892e..da7dd5f 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -343,6 +343,12 @@ parse_expression2(_, _, _, Token) -> unknown_type() -> {unknown_type, already_normalized, unknown_type}. +int_type() -> + {integer, already_normalized, integer}. + +int_list_type() -> + {{list, [integer]}, alread_normalized, {list, [int_type()]}}. + expect_tokens([], Pos, String) -> {ok, {Pos, String}}; expect_tokens([Str | Rest], Pos, String) -> @@ -377,11 +383,14 @@ parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) -> typecheck_bits(Type, Pos, String, -1, Row, Start, End); parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) -> typecheck_bits(Type, Pos, String, 0, Row, Start, End); +parse_alphanum(Type, Pos, String, ["variant"], Row, Start, End) -> + parse_anonymous_variant(Type, Pos, String, Row, Start, End); parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) -> % From a programming perspective, we are trying to parse a constant, so % an alphanum token can really only be a constructor, or a chain object. - % Constructors start with uppercase characters, so lowercase can only be a - % chain object. + % Constructors start with uppercase characters, and we have handled our + % made-up 'variant' case explicitly, so the only other lowercase constants + % are serialized chain objects. try case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of {account_pubkey, Data} -> @@ -400,8 +409,8 @@ parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN _:_ -> {error, {unexpected_identifier, S, Row, Start, End}} end; parse_alphanum(Type, Pos, String, Path, Row, Start, End) -> - % Inversely, chain object prefixes are always lowercase, so any other path - % must be a variant constructor, or invalid. + % Now having handled all lowercase terms, anything else must be uppercase, + % which is either a variant constructor, or totally invalid. parse_variant(Type, Pos, String, Path, Row, Start, End). typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) -> @@ -731,6 +740,12 @@ parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor] _ -> {error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}} end; +parse_variant({_, _, unknown_type}, Pos, String, ["None"], _, _, _) -> + % Special case for None without type info. + parse_variant3([0, 1], 0, [], Pos, String); +parse_variant({_, _, unknown_type}, Pos, String, ["Some"], _, _, _) -> + % Also a special case for Some. + parse_variant3([0, 1], 1, [unknown_type()], Pos, String); parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> {error, {unresolved_variant, Row, Start, End}}; parse_variant({O, N, _}, _, _, _, Row, Start, End) -> @@ -753,8 +768,7 @@ get_typename(Name) -> parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) -> case lookup_variant(Constructor, Variants, 0) of {ok, {Tag, ElemTypes}} -> - GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, - Arities = lists:map(GetArity, Variants), + Arities = get_arities(Variants), parse_variant3(Arities, Tag, ElemTypes, Pos, String); error -> {error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}} @@ -790,6 +804,112 @@ lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) -> lookup_variant(Ident, [_ | Rest], Tag) -> lookup_variant(Ident, Rest, Tag + 1). +get_arities(Variants) -> + GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, + lists:map(GetArity, Variants). + +parse_anonymous_variant({O, N, {variant, Variants}}, Pos, String, _, _, _) -> + parse_anonymous_variant2({O, N, {variant, Variants}}, Pos, String); +parse_anonymous_variant({O, N, unknown_type}, Pos, String, _, _, _) -> + parse_anonymous_variant2({O, N, unknown_type}, Pos, String); +parse_anonymous_variant({O, N, _}, _, _, Row, Start, End) -> + {error, {wrong_type, O, N, variant, Row, Start, End}}. + +parse_anonymous_variant2(Type, Pos, String) -> + case expect_tokens(["("], Pos, String) of + {ok, {NewPos, NewString}} -> + parse_anonymous_variant3(Type, NewPos, NewString); + {error, Reason} -> + {error, Reason} + end. + +parse_anonymous_variant3(Type, Pos, String) -> + case parse_arities(Type, Pos, String) of + {ok, {Arities, NewPos, NewString}} -> + parse_anonymous_variant4(Type, NewPos, NewString, Arities); + {error, Reason} -> + {error, Reason} + end. + +parse_anonymous_variant4(Type, Pos, String, Arities) -> + case expect_tokens([","], Pos, String) of + {ok, {NewPos, NewString}} -> + parse_anonymous_variant5(Type, NewPos, NewString, Arities); + {error, Reason} -> + {error, Reason} + end. + +parse_anonymous_variant5(Type, Pos, String, Arities) -> + case parse_anonymous_tag(Pos, String, Arities) of + {ok, {Tag, NewPos, NewString}} -> + parse_anonymous_variant6(Type, NewPos, NewString, Arities, Tag); + {error, Reason} -> + {error, Reason} + end. + +parse_anonymous_variant6(Type, Pos, String, Arities, Tag) -> + ElemTypes = infer_anonymous_variant_elem_types(Type, Arities, Tag), + case parse_multivalue3(ElemTypes, Pos, String, []) of + {ok, {Terms, NewPos, NewString}} -> + Result = {variant, Arities, Tag, list_to_tuple(Terms)}, + {ok, {Result, NewPos, NewString}}; + {error, Reason} -> + {error, Reason} + end. + +parse_arities(Type, Pos, String) -> + case next_token(Pos, String) of + {ok, {Token, NewPos, NewString}} -> + parse_arities2(Type, NewPos, NewString, Token); + {error, Reason} -> + {error, Reason} + end. + +parse_arities2(Type, Pos, String, Token = {_, _, _, Row, Start, _}) -> + case parse_expression2(int_list_type(), Pos, String, Token) of + {ok, {Arities, NewPos, NewString}} -> + parse_arities3(Type, NewPos, NewString, Arities, Row, Start); + {error, Reason} -> + {error, Reason} + end. + +parse_arities3({O, N, {variant, Variants}}, Pos, String, Arities, Row, Start) -> + ExpectedArities = get_arities(Variants), + case Arities == ExpectedArities of + true -> + {ok, {Arities, Pos, String}}; + false -> + {error, {wrong_arities, O, N, Arities, Row, Start}} + end; +parse_arities3(_, Pos, String, Arities, _, _) -> + {ok, {Arities, Pos, String}}. + +parse_anonymous_tag(Pos, String, Arities) -> + case next_token(Pos, String) of + {ok, {Token, NewPos, NewString}} -> + parse_anonymous_tag2(NewPos, NewString, Arities, Token); + {error, Reason} -> + {error, Reason} + end. + +parse_anonymous_tag2(Pos, String, Arities, Token = {_, _, _, Row, Start, End}) -> + TagCount = length(Arities), + case parse_expression2(int_type(), Pos, String, Token) of + {ok, {Tag, _, _}} when Tag < 0 -> + {error, {negative_tag, Tag, Row, Start, End}}; + {ok, {Tag, _, _}} when Tag >= TagCount -> + {error, {invalid_tag, Tag, TagCount, Row, Start, End}}; + Result -> + Result + end. + +infer_anonymous_variant_elem_types({_, _, {variant, Variants}}, _, Tag) -> + {_Name, ElemTypes} = lists:nth(Tag + 1, Variants), + ElemTypes; +infer_anonymous_variant_elem_types({_, _, unknown_type}, Arities, Tag) -> + Arity = lists:nth(Tag + 1, Arities), + lists:duplicate(Arity, unknown_type()). + %%% Record parsing parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) -> @@ -1027,15 +1147,12 @@ fate_to_iolist(Type, {tuple, Tuple}) -> _ -> tuple_to_iolist([], Tuple) end; -fate_to_iolist(Type, {variant, _, Tag, Tuple}) -> +fate_to_iolist(Type, {variant, Arities, Tag, Tuple}) -> case Type of {O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) -> variant_to_iolist(O, N, VariantTypes, Tag, Tuple); - {O, N, _} -> - % TODO: Make up a special syntax for anonymous variant terms. - erlang:exit({untyped_variant, O, N}); - _ -> - erlang:exit({untyped_variant, unknown_type, already_normalized}) + {_, _, _} -> + anonymous_variant_to_iolist(Arities, Tag, Tuple) end; fate_to_iolist(Type, List) when is_list(List) -> case Type of @@ -1130,6 +1247,22 @@ choose_variant_prefix(O, N) -> [] end. +% We don't have type information, but the Sophia programming language doesn't +% have syntax for anonymous variants, so we have to make a syntax up. This +% syntax is also supported when parsing terms, so that the output of one +% contract call can be fed easily into another contract call. +anonymous_variant_to_iolist(Arities, Tag, Tuple) -> + % Extract the elements of the tuple. + Elems = tuple_to_list(Tuple), + + % Turn the arities, tag, and elements into an iolist. + AritiesStr = list_to_iolist(int_type(), Arities), + TagStr = integer_to_list(Tag), + FullTermsStr = list_elems_to_iolist(unknown_type(), Elems, [AritiesStr, ", ", TagStr]), + + % Wrap that iolist in the anonymous 'variant' constructor. + ["variant(", FullTermsStr, ")"]. + multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) -> FirstTermChars = fate_to_iolist(FirstType, FirstTerm), multivalue_to_iolist(ElemTypes, Elems, FirstTermChars); @@ -1282,16 +1415,18 @@ check_parser_roundtrip(Sophia) -> % syntax. Let's do a lenient test. roundtrip_parser_lenient(unknown_type(), Sophia, Fate). -check_parser_with_typedef(Typedef, Sophia) -> +check_parser_with_typedef(Typedef, Sophia, UntypedSophia) -> % Compile the type definitions alongside the usual literal expression. Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), - % Do a typed parse, as usual, but there are probably record/variant - % definitions in the AACI, so untyped parses probably don't work, and - % variants often have optional namespaces, so the sophia result might not - % match exactly, but should still be equivalent. - roundtrip_parser_lenient(Type, Sophia, Fate). + % Do a typed parse, as usual. Variant namespaces can make pretty printing + % ambiguous, so make the roundtrip lenient. + roundtrip_parser_lenient(Type, Sophia, Fate), + % Do an untyped parse, but using a second special Sophia expression that + % doesn't require type info to parse. This one *doesn't* need to be + % lenient, since we are specifying a distinct sophia expression. + roundtrip_parser(unknown_type(), UntypedSophia, Fate). anon_types_test() -> % Integers. @@ -1323,6 +1458,10 @@ anon_types_test() -> check_parser_roundtrip("(1, [2, 3], (4, 5))"), % Map. check_parser_roundtrip("{[1] = 2, [3] = 4}"), + % Option. + check_parser_roundtrip("None"), + check_parser_roundtrip("Some(1)"), + check_parser_roundtrip("Some([1, 2, 3])"), ok. @@ -1342,7 +1481,7 @@ string_escape_codes_test() -> records_test() -> TypeDef = "record pair = {x: int, y: int}", Sophia = "{x = 1, y = 2}", - check_parser_with_typedef(TypeDef, Sophia), + check_parser_with_typedef(TypeDef, Sophia, "(1, 2)"), % The above won't run an untyped parse on the expression, but we can. It % will error, though. {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). @@ -1350,11 +1489,11 @@ records_test() -> variant_test() -> TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", - check_parser_with_typedef(TypeDef, "Zero"), - check_parser_with_typedef(TypeDef, "One(0)"), - check_parser_with_typedef(TypeDef, "Two(0, 1)"), - check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), - check_parser_with_typedef(TypeDef, "C.Zero"), + check_parser_with_typedef(TypeDef, "Zero", "variant([0, 1, 2], 0)"), + check_parser_with_typedef(TypeDef, "One(0)", "variant([0, 1, 2], 1, 0)"), + check_parser_with_typedef(TypeDef, "Two(0, 1)", "variant([0, 1, 2], 2, 0, 1)"), + check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])", "variant([0, 1, 2], 2, [], [1, 2, 3])"), + check_parser_with_typedef(TypeDef, "C.Zero", "variant([0, 1, 2], 0)"), {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), @@ -1362,10 +1501,10 @@ variant_test() -> ambiguous_variant_test() -> TypeDef = "datatype mytype = C | D", - check_parser_with_typedef(TypeDef, "C"), - check_parser_with_typedef(TypeDef, "D"), - check_parser_with_typedef(TypeDef, "C.C"), - check_parser_with_typedef(TypeDef, "C.D"), + check_parser_with_typedef(TypeDef, "C", "variant([0, 0], 0)"), + check_parser_with_typedef(TypeDef, "D", "variant([0, 0], 1)"), + check_parser_with_typedef(TypeDef, "C.C", "variant([0, 0], 0)"), + check_parser_with_typedef(TypeDef, "C.D", "variant([0, 0], 1)"), ok. @@ -1410,9 +1549,9 @@ bits_test() -> singleton_records_test() -> TypeDef = "record singleton('a) = {it: 'a}", - check_parser_with_typedef(TypeDef, "{it = 123}"), - check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"), - check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"), + check_parser_with_typedef(TypeDef, "{it = 123}", "123"), + check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}", "5"), + check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]", "[1, 2, 3]"), ok. @@ -1421,9 +1560,9 @@ singleton_variants_test() -> % actually a special case; singleton variants are in fact wrapped in the % FATE too. TypeDef = "datatype wrapped('a) = Wrap('a)", - check_parser_with_typedef(TypeDef, "Wrap(123)"), - check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"), - check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"), + check_parser_with_typedef(TypeDef, "Wrap(123)", "variant([1], 0, 123)"), + check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))", "variant([1], 0, variant([1], 0, 123))"), + check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]", "[variant([1], 0, 1), variant([1], 0, 2), variant([1], 0, 3)]"), ok.