2 Commits

Author SHA1 Message Date
Jarvis Carroll d323fb0f52 Add special anonymous variant syntax
This is outside of the scope of the sophia parser, but is a simple generalization to
'sophia terms' to make them able to represent any FATE term anonymously.

We also parse these anonymous variant expressions without type info, since it is convenient
for users to copy the output of one call into another call.

Anonymous parsing of None and Some was also added, since new users would be shocked if this
doesn't work, and advanced users will greatly appreciate that it does. The resulting FATE
terms are still rendered as variant([0, 1], ...), since user defined types can also have [0, 1]
as their arity list, and since automation and tooling programmers hate special case exceptions like that.

Anonymous parsing of other Chain and AENS terms are not added, since anonymous variants already cover those types,
so very little is gained by hard-coding such complex types into the term parser. Complex, version-specific compiler
types are already supported by hakuzaru, in the form of the ACI/AACI; parsing without AACI, on the other hand, is
intended to support language-agnostic communication using the primitives of FATE, and in general, variants
in FATE are anonymous.
2026-06-05 03:08:38 +00:00
Jarvis Carroll ea3a5453f2 fix bytes coerce logic 2026-05-28 00:41:51 +00:00
3 changed files with 218 additions and 283 deletions
+17 -11
View File
@@ -926,7 +926,10 @@ erlang_to_fate({O, N, char}, Str) ->
single_error({invalid, O, N, Str})
end;
erlang_to_fate({O, N, {bytes, [Count]}}, Bytes) when is_bitstring(Bytes) ->
coerce_bytes(O, N, Count, Bytes);
case check_bytes(O, N, Count, Bytes) of
ok -> {ok, {bytes, Bytes}};
{error, Reason} -> {error, Reason}
end;
erlang_to_fate({_, _, bits}, Num) when is_integer(Num) ->
{ok, {bits, Num}};
erlang_to_fate({_, _, bits}, Bits) when is_bitstring(Bits) ->
@@ -988,14 +991,14 @@ decode_chain_object(Tag, S) ->
error:incorrect_size -> {error, incorrect_size}
end.
coerce_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 ->
check_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 ->
single_error({partial_bytes, O, N, bit_size(Bytes)});
coerce_bytes(_, _, any, Bytes) ->
{ok, Bytes};
coerce_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count ->
check_bytes(_, _, any, _) ->
ok;
check_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count ->
single_error({incorrect_size, O, N, Bytes});
coerce_bytes(_, _, _, Bytes) ->
{ok, Bytes}.
check_bytes(_, _, _, _) ->
ok.
coerce_zipped_bindings(Bindings, Direction, Tag) ->
coerce_zipped_bindings(Bindings, Direction, Tag, [], []).
@@ -1261,8 +1264,11 @@ fate_to_erlang({_, _, string}, Bin) ->
{ok, Str};
fate_to_erlang({_, _, char}, Val) ->
{ok, Val};
fate_to_erlang({O, N, {bytes, [Count]}}, Bytes) when is_bitstring(Bytes) ->
coerce_bytes(O, N, Count, Bytes);
fate_to_erlang({O, N, {bytes, [Count]}}, {bytes, Bytes}) when is_bitstring(Bytes) ->
case check_bytes(O, N, Count, Bytes) of
ok -> {ok, Bytes};
{error, Reason} -> {error, Reason}
end;
fate_to_erlang({_, _, bits}, {bits, Num}) ->
{ok, Num};
fate_to_erlang({_, _, {list, [Type]}}, Data) when is_list(Data) ->
@@ -1452,7 +1458,7 @@ coerce_record_test() ->
coerce_bytes_test() ->
{ok, Type} = annotate_type({tuple, [{bytes, [4]}, {bytes, [any]}]}, #{}),
check_roundtrip(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {<<"abcd">>, <<"efghi">>}}).
check_roundtrip(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {{bytes, <<"abcd">>}, {bytes, <<"efghi">>}}}).
coerce_bits_test() ->
{ok, Type} = annotate_type(bits, #{}),
@@ -1471,7 +1477,7 @@ coerce_unicode_test() ->
coerce_hash_test() ->
{ok, Type} = annotate_type("hash", builtin_typedefs()),
Hash = list_to_binary(lists:seq(1,32)),
check_roundtrip(Type, Hash, Hash),
check_roundtrip(Type, Hash, {bytes, Hash}),
ok.
+28 -238
View File
@@ -1,43 +1,17 @@
%%% @doc
%%% Hakuzaru Key Functions
%%% Key functions
%%%
%%% The Gajumaru's default key type is based on Elliptical Curve Cryptography (ECC).
%%% The specific curve used is 25519, and the typical key representation is Ed25519.
%%%
%%% The "Ed" in "Ed25519" stands for Harold Edwards. This form represents
%%% a coordinate on a "Twisted Edwards Curve".
%%%
%%% The "X" in "X25519" stands for the X-coordinate, also known as the
%%% "Montgomery u-coordinate" on a "Montgomery Curve".
%%%
%%% The two are equivalent, but have meaningfully different properties.
%%% The main reason this is a module of its own is that in the original architecture
%%% it was a process rather than just a library of functions. Now that it exists, though,
%%% there is little motivation to cram everything here into the controller process's
%%% code.
%%% @end
-module(hz_key_master).
-vsn("0.9.2").
-export([make_key/0, make_key/1, encode/1, decode/1]).
-export([shared_secret_a/6, shared_secret_b/6,
ed25519_pk_to_x25519/1, ed25519_sk_to_x25519/1,
hkdf/4, hkdf/5]).
-spec make_key() -> {ID, KeyPair}
when ID :: string(),
KeyPair :: #{secret => binary(), public => binary()}.
%% @doc
%% @equiv make_key(<<>>)
make_key() ->
make_key(<<>>).
-spec make_key(Secret) -> {ID, KeyPair}
when Secret :: <<>> | <<_:32*8>>,
ID :: string(),
KeyPair :: #{secret => binary(), public => binary()}.
%% @doc
%% Generate a Ed25519 keypair tagged with the corresponding Gajumaru ID.
-export([make_key/1, encode/1, decode/1]).
-export([lcg/1]).
make_key(<<>>) ->
Pair = #{public := Public} = ecu_eddsa:sign_keypair(),
@@ -151,212 +125,28 @@ sumcheck(Width, Bits) ->
end.
-spec shared_secret_a(A_E_E_SK, B_P_E_PK, B_E_E_PK, Protocol, Version, Salt) -> SS
when A_E_E_SK :: binary(),
B_P_E_PK :: <<_:32*8>>,
B_E_E_PK :: <<_:32*8>>,
Protocol :: binary(),
Version :: binary(),
Salt :: binary(),
SS :: <<_:32*8>>.
%% @doc
%% Alice's side of a shared key derivation based on ed25519 keys as generated by this module.
-spec lcg(integer()) -> integer().
%% A simple PRNG that fits into 32 bits and is easy to implement anywhere (Kotlin).
%% Specifically, it is a "linear congruential generator" of the Lehmer variety.
%% The constants used are based on recommendations from Park, Miller and Stockmeyer:
%% https://www.firstpr.com.au/dsp/rand31/p105-crawford.pdf#page=4
%%
%% Typically Alice would be providing an ephemeral key to establish
%% a shared secret while remaining (at least initially) anonymous from Bob. Bob,
%% on the other hand, is providing a permanent key and also an ephemeral key,
%% proving identity without exposing the shared secret in the future were one of
%% the secrets to be compromised.
%% <ul>
%% <li>`A_E_E_SK' Alice's Ephemeral Ed25519 Secret Key.</li>
%% <li>`B_P_E_PK' Bob's Permanent Ed25519 Public Key.</li>
%% <li>`B_E_E_PK' Bob's Ephemeral Ed25519 Public Key.</li>
%% <li>`Protocol' is an arbitrary binary string, typically a protocol name in UTF-8.</li>
%% <li>`Version' is another arbitrary binary string, typically a protocol version in UTF-8.</li>
%% <li>`Salt' is a binary salt, which if empty will be replaced by a binary string of zeroes.</li>
%% <li>`SS' is the resulting 32-byte shared secret.</li>
%% </ul>
shared_secret_a(A_E_E_SK, B_P_E_PK, B_E_E_PK, Protocol, Version, Salt) ->
A_E_X_SK = ed25519_sk_to_x25519(A_E_E_SK),
B_P_X_PK = ed25519_pk_to_x25519(B_P_E_PK),
B_E_X_PK = ed25519_pk_to_x25519(B_E_E_PK),
DH_Permanent = crypto:compute_key(ecdh, B_P_X_PK, A_E_X_SK, x25519),
DH_Ephemeral = crypto:compute_key(ecdh, B_E_X_PK, A_E_X_SK, x25519),
finalize_hkdf(DH_Permanent, DH_Ephemeral, Protocol, Version, Salt).
-spec shared_secret_b(B_P_E_SK, B_E_E_SK, A_E_E_PK, Protocol, Version, Salt) -> SS
when B_P_E_SK :: binary(),
B_E_E_SK :: binary(),
A_E_E_PK :: <<_:32*8>>,
Protocol :: binary(),
Version :: binary(),
Salt :: binary(),
SS :: <<_:32*8>>.
%% @doc
%% Bobs's side of a shared key derivation based on ed25519 keys as generated by this module.
%% The input value should be between 1 and 2^31-1.
%%
%% Typically Alice would be providing an ephemeral key to establish
%% a shared secret while remaining (at least initially) anonymous from Bob. Bob,
%% on the other hand, is providing a permanent key and also an ephemeral key,
%% proving identity without exposing the shared secret in the future were one of
%% the secrets to be compromised.
%% <ul>
%% <li>`B_P_E_SK' Bob's Permanent Ed25519 Secret Key.</li>
%% <li>`B_E_E_SK' Bob's Ephemeral Ed25519 Secret Key.</li>
%% <li>`A_E_E_PK' Alice's Ephemeral Ed25519 Public Key.</li>
%% <li>`Protocol' is an arbitrary binary string, typically a protocol name in UTF-8.</li>
%% <li>`Version' is another arbitrary binary string, typically a protocol version in UTF-8.</li>
%% <li>`Salt' is a binary salt, which if empty will be replaced by a binary string of zeroes.</li>
%% <li>`SS' is the resulting 32-byte shared secret.</li>
%% </ul>
%% The purpose of this PRNG is for password-based dictionary shuffling.
shared_secret_b(B_P_E_SK, B_E_E_SK, A_E_E_PK, Protocol, Version, Salt) ->
B_P_X_SK = ed25519_sk_to_x25519(B_P_E_SK),
B_E_X_SK = ed25519_sk_to_x25519(B_E_E_SK),
A_E_X_PK = ed25519_pk_to_x25519(A_E_E_PK),
DH_Permanent = crypto:compute_key(ecdh, A_E_X_PK, B_P_X_SK, x25519),
DH_Ephemeral = crypto:compute_key(ecdh, A_E_X_PK, B_E_X_SK, x25519),
finalize_hkdf(DH_Permanent, DH_Ephemeral, Protocol, Version, Salt).
finalize_hkdf(DH_Permanent, DH_Ephemeral, Protocol, Version, Salt) ->
MixedInput = <<DH_Permanent/binary, DH_Ephemeral/binary>>,
Info = <<Protocol/binary, ":", Version/binary, ":">>,
hkdf(sha256, MixedInput, Salt, Info).
%% Curve25519 Prime Field Constant: 2^255 - 19
%% Yes, in hex it reads kind of like "lucky fed"
p() -> 16#7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED.
-spec ed25519_pk_to_x25519(ED25519_PubKey) -> X25519_PubKey
when ED25519_PubKey :: <<_:32*8>>,
X25519_PubKey :: <<_:32*8>>.
%% @doc
%% Convert a curve 25519 public key from Edwards representation to X-coordinate
%% representation.
ed25519_pk_to_x25519(<<ED25519_PK:32/binary>>) ->
<<CompressedInt:256/little-integer>> = ED25519_PK,
% Clear the sign bit (MSB) to get the raw y-coordinate
Y = CompressedInt band ((1 bsl 255) - 1),
% Compute u = (1 + y) / (1 - y) mod P
Num = (1 + Y) rem p(),
Den = (1 - Y + p()) rem p(),
case Den =:= 0 of
true ->
% If y == 1, the point maps to the point at infinity.
% On X25519, this translates to u = 0.
<<0:256/little-integer>>;
false ->
U = (Num * mod_inv(Den, p())) rem p(),
<<U:256/little-integer>>
lcg(N) ->
M = 16#7FFFFFFF,
A = 48271,
Q = 44488, % M div A
R = 3399, % M rem A
Div = N div Q,
Rem = N rem Q,
S = Rem * A,
T = Div * R,
Result = S - T,
case Result < 0 of
false -> Result;
true -> Result + M
end.
-spec ed25519_sk_to_x25519(ED25519_SecKey) -> X25519_SecKey
when ED25519_SecKey :: binary(),
X25519_SecKey :: <<_:32*8>>.
%% @doc
%% Convert a curve 25519 secret key from Edwards representation to X-coordinate
%% representation.
ed25519_sk_to_x25519(<<ED25519_SK_Secret:32/binary, _/binary>>) ->
<<X25519_SK:32/binary, _/binary>> = crypto:hash(sha512, ED25519_SK_Secret),
X25519_SK.
mod_inv(A, M) ->
{1, X, _} = ext_gcd(A, M),
(X + M) rem M.
ext_gcd(A, 0) ->
{A, 1, 0};
ext_gcd(A, B) ->
{G, X1, Y1} = ext_gcd(B, A rem B),
{G, Y1, X1 - (A div B) * Y1}.
-spec hkdf(Hash, IKM, Salt, Info) -> DerivedKey
when Hash :: md5 | sha | sha224 | sha256 | sha384 | sha512,
IKM :: binary(),
Salt :: binary(),
Info :: binary(),
DerivedKey :: <<_:32*8>>.
%% @doc
%% 32-byte HMAC-Based Extract-and-Expand Key Derivation
%% @equiv hkdf(Hash, IKM, Salt, Info, 32)
hkdf(Hash, IKM, Salt, Info) ->
hkdf(Hash, IKM, Salt, Info, 32).
-spec hkdf(Hash, IKM, Salt, Info, Length) -> DerivedKey
when Hash :: md5 | sha | sha224 | sha256 | sha384 | sha512,
IKM :: binary(),
Salt :: binary(),
Info :: binary(),
Length :: 16 | 20 | 28 | 32 | 48 | 64,
DerivedKey :: binary().
%% @doc
%% RFC-5869 compliant HMAC-Based Extract-and-Expand Key Derivation
%%
%% RFC-5869:
%% <a href="https://datatracker.ietf.org/doc/html/rfc5869">https://datatracker.ietf.org/doc/html/rfc5869</a>
%%
%% The purpose of HKDF is to take an initial, raw secret input that might
%% be mathematically strong but structurally "clumpy" and transform it into one
%% or more uniform, high-entropy keys suitable for use in cryptography.
%%
%% The problem is that when Alice and Bob compute a Diffie-Hellman shared secret
%% over X25519, the resulting bytes are mathematically secure, but they are not
%% evenly distributed as random noise. Cryptographic ciphers expect keys where
%% every single bit has an exactly 50% chance of being a 0 or a 1. Passing raw
%% DH outputs straight into a cipher can introduce subtle, exploitable patterns.
%%
%% HKDF "smooths out" the entropy.
%%
%% HMAC stands for "Keyed-Hash Message Authentication Code", but without the
%% leading "K" just to keep us on our toes. The problem it solves is that simply
%% concatenating a secret and some target data and hashing them together to produce
%% a message authentication hash leaves the resulting hash vulnerable to a "length
%% extension attack". An attacker can append additional data to the end of the
%% message and arrive at a valid new hash without ever knowing the secret.
%%
%% RFC-2104 provides good background information on the technique:
%% <a href="https://datatracker.ietf.org/doc/html/rfc2104">https://datatracker.ietf.org/doc/html/rfc2104</a>
hkdf(Hash, IKM, Salt, Info, Length) ->
PRK = extract(Hash, Salt, IKM),
expand(Hash, PRK, Info, Length).
extract(Hash, <<>>, IKM) ->
%% If salt is empty RFC 5869 requires a string of zeros equal to hash size
Salt = binary:copy(<<0>>, hash_size(Hash)),
extract(Hash, Salt, IKM);
extract(Hash, Salt, IKM) ->
crypto:mac(hmac, Hash, Salt, IKM).
expand(Hash, PRK, Info, OutLen) ->
HashLen = hash_size(Hash),
BlockCount = (OutLen + HashLen - 1) div HashLen,
true = BlockCount =< 255,
FullBlocks = expand_loop(Hash, PRK, Info, BlockCount, 1, <<>>, <<>>),
<<Output:OutLen/binary, _/binary>> = FullBlocks,
Output.
expand_loop(Hash, PRK, Info, N, Counter, PrevT, Acc) when Counter =< N ->
Payload = <<PrevT/binary, Info/binary, Counter:8>>,
T = crypto:mac(hmac, Hash, PRK, Payload),
expand_loop(Hash, PRK, Info, N, Counter + 1, T, <<Acc/binary, T/binary>>);
expand_loop(_, _, _, _, _, _, Acc) ->
Acc.
hash_size(md5) -> 16;
hash_size(sha) -> 20;
hash_size(sha224) -> 28;
hash_size(sha256) -> 32;
hash_size(sha384) -> 48;
hash_size(sha512) -> 64.
+173 -34
View File
@@ -343,6 +343,12 @@ parse_expression2(_, _, _, Token) ->
unknown_type() ->
{unknown_type, already_normalized, unknown_type}.
int_type() ->
{integer, already_normalized, integer}.
int_list_type() ->
{{list, [integer]}, alread_normalized, {list, [int_type()]}}.
expect_tokens([], Pos, String) ->
{ok, {Pos, String}};
expect_tokens([Str | Rest], Pos, String) ->
@@ -377,11 +383,14 @@ parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) ->
typecheck_bits(Type, Pos, String, -1, Row, Start, End);
parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) ->
typecheck_bits(Type, Pos, String, 0, Row, Start, End);
parse_alphanum(Type, Pos, String, ["variant"], Row, Start, End) ->
parse_anonymous_variant(Type, Pos, String, Row, Start, End);
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
% From a programming perspective, we are trying to parse a constant, so
% an alphanum token can really only be a constructor, or a chain object.
% Constructors start with uppercase characters, so lowercase can only be a
% chain object.
% Constructors start with uppercase characters, and we have handled our
% made-up 'variant' case explicitly, so the only other lowercase constants
% are serialized chain objects.
try
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
{account_pubkey, Data} ->
@@ -400,8 +409,8 @@ parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
end;
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
% Inversely, chain object prefixes are always lowercase, so any other path
% must be a variant constructor, or invalid.
% Now having handled all lowercase terms, anything else must be uppercase,
% which is either a variant constructor, or totally invalid.
parse_variant(Type, Pos, String, Path, Row, Start, End).
typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) ->
@@ -731,6 +740,12 @@ parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor]
_ ->
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
end;
parse_variant({_, _, unknown_type}, Pos, String, ["None"], _, _, _) ->
% Special case for None without type info.
parse_variant3([0, 1], 0, [], Pos, String);
parse_variant({_, _, unknown_type}, Pos, String, ["Some"], _, _, _) ->
% Also a special case for Some.
parse_variant3([0, 1], 1, [unknown_type()], Pos, String);
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
{error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
@@ -753,8 +768,7 @@ get_typename(Name) ->
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
case lookup_variant(Constructor, Variants, 0) of
{ok, {Tag, ElemTypes}} ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
Arities = lists:map(GetArity, Variants),
Arities = get_arities(Variants),
parse_variant3(Arities, Tag, ElemTypes, Pos, String);
error ->
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
@@ -790,6 +804,112 @@ lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
lookup_variant(Ident, [_ | Rest], Tag) ->
lookup_variant(Ident, Rest, Tag + 1).
get_arities(Variants) ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
lists:map(GetArity, Variants).
parse_anonymous_variant({O, N, {variant, Variants}}, Pos, String, _, _, _) ->
parse_anonymous_variant2({O, N, {variant, Variants}}, Pos, String);
parse_anonymous_variant({O, N, unknown_type}, Pos, String, _, _, _) ->
parse_anonymous_variant2({O, N, unknown_type}, Pos, String);
parse_anonymous_variant({O, N, _}, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, variant, Row, Start, End}}.
parse_anonymous_variant2(Type, Pos, String) ->
case expect_tokens(["("], Pos, String) of
{ok, {NewPos, NewString}} ->
parse_anonymous_variant3(Type, NewPos, NewString);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant3(Type, Pos, String) ->
case parse_arities(Type, Pos, String) of
{ok, {Arities, NewPos, NewString}} ->
parse_anonymous_variant4(Type, NewPos, NewString, Arities);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant4(Type, Pos, String, Arities) ->
case expect_tokens([","], Pos, String) of
{ok, {NewPos, NewString}} ->
parse_anonymous_variant5(Type, NewPos, NewString, Arities);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant5(Type, Pos, String, Arities) ->
case parse_anonymous_tag(Pos, String, Arities) of
{ok, {Tag, NewPos, NewString}} ->
parse_anonymous_variant6(Type, NewPos, NewString, Arities, Tag);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_variant6(Type, Pos, String, Arities, Tag) ->
ElemTypes = infer_anonymous_variant_elem_types(Type, Arities, Tag),
case parse_multivalue3(ElemTypes, Pos, String, []) of
{ok, {Terms, NewPos, NewString}} ->
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
{ok, {Result, NewPos, NewString}};
{error, Reason} ->
{error, Reason}
end.
parse_arities(Type, Pos, String) ->
case next_token(Pos, String) of
{ok, {Token, NewPos, NewString}} ->
parse_arities2(Type, NewPos, NewString, Token);
{error, Reason} ->
{error, Reason}
end.
parse_arities2(Type, Pos, String, Token = {_, _, _, Row, Start, _}) ->
case parse_expression2(int_list_type(), Pos, String, Token) of
{ok, {Arities, NewPos, NewString}} ->
parse_arities3(Type, NewPos, NewString, Arities, Row, Start);
{error, Reason} ->
{error, Reason}
end.
parse_arities3({O, N, {variant, Variants}}, Pos, String, Arities, Row, Start) ->
ExpectedArities = get_arities(Variants),
case Arities == ExpectedArities of
true ->
{ok, {Arities, Pos, String}};
false ->
{error, {wrong_arities, O, N, Arities, Row, Start}}
end;
parse_arities3(_, Pos, String, Arities, _, _) ->
{ok, {Arities, Pos, String}}.
parse_anonymous_tag(Pos, String, Arities) ->
case next_token(Pos, String) of
{ok, {Token, NewPos, NewString}} ->
parse_anonymous_tag2(NewPos, NewString, Arities, Token);
{error, Reason} ->
{error, Reason}
end.
parse_anonymous_tag2(Pos, String, Arities, Token = {_, _, _, Row, Start, End}) ->
TagCount = length(Arities),
case parse_expression2(int_type(), Pos, String, Token) of
{ok, {Tag, _, _}} when Tag < 0 ->
{error, {negative_tag, Tag, Row, Start, End}};
{ok, {Tag, _, _}} when Tag >= TagCount ->
{error, {invalid_tag, Tag, TagCount, Row, Start, End}};
Result ->
Result
end.
infer_anonymous_variant_elem_types({_, _, {variant, Variants}}, _, Tag) ->
{_Name, ElemTypes} = lists:nth(Tag + 1, Variants),
ElemTypes;
infer_anonymous_variant_elem_types({_, _, unknown_type}, Arities, Tag) ->
Arity = lists:nth(Tag + 1, Arities),
lists:duplicate(Arity, unknown_type()).
%%% Record parsing
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
@@ -1027,15 +1147,12 @@ fate_to_iolist(Type, {tuple, Tuple}) ->
_ ->
tuple_to_iolist([], Tuple)
end;
fate_to_iolist(Type, {variant, _, Tag, Tuple}) ->
fate_to_iolist(Type, {variant, Arities, Tag, Tuple}) ->
case Type of
{O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) ->
variant_to_iolist(O, N, VariantTypes, Tag, Tuple);
{O, N, _} ->
% TODO: Make up a special syntax for anonymous variant terms.
erlang:exit({untyped_variant, O, N});
_ ->
erlang:exit({untyped_variant, unknown_type, already_normalized})
{_, _, _} ->
anonymous_variant_to_iolist(Arities, Tag, Tuple)
end;
fate_to_iolist(Type, List) when is_list(List) ->
case Type of
@@ -1130,6 +1247,22 @@ choose_variant_prefix(O, N) ->
[]
end.
% We don't have type information, but the Sophia programming language doesn't
% have syntax for anonymous variants, so we have to make a syntax up. This
% syntax is also supported when parsing terms, so that the output of one
% contract call can be fed easily into another contract call.
anonymous_variant_to_iolist(Arities, Tag, Tuple) ->
% Extract the elements of the tuple.
Elems = tuple_to_list(Tuple),
% Turn the arities, tag, and elements into an iolist.
AritiesStr = list_to_iolist(int_type(), Arities),
TagStr = integer_to_list(Tag),
FullTermsStr = list_elems_to_iolist(unknown_type(), Elems, [AritiesStr, ", ", TagStr]),
% Wrap that iolist in the anonymous 'variant' constructor.
["variant(", FullTermsStr, ")"].
multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) ->
FirstTermChars = fate_to_iolist(FirstType, FirstTerm),
multivalue_to_iolist(ElemTypes, Elems, FirstTermChars);
@@ -1282,16 +1415,18 @@ check_parser_roundtrip(Sophia) ->
% syntax. Let's do a lenient test.
roundtrip_parser_lenient(unknown_type(), Sophia, Fate).
check_parser_with_typedef(Typedef, Sophia) ->
check_parser_with_typedef(Typedef, Sophia, UntypedSophia) ->
% Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
% Do a typed parse, as usual, but there are probably record/variant
% definitions in the AACI, so untyped parses probably don't work, and
% variants often have optional namespaces, so the sophia result might not
% match exactly, but should still be equivalent.
roundtrip_parser_lenient(Type, Sophia, Fate).
% Do a typed parse, as usual. Variant namespaces can make pretty printing
% ambiguous, so make the roundtrip lenient.
roundtrip_parser_lenient(Type, Sophia, Fate),
% Do an untyped parse, but using a second special Sophia expression that
% doesn't require type info to parse. This one *doesn't* need to be
% lenient, since we are specifying a distinct sophia expression.
roundtrip_parser(unknown_type(), UntypedSophia, Fate).
anon_types_test() ->
% Integers.
@@ -1323,6 +1458,10 @@ anon_types_test() ->
check_parser_roundtrip("(1, [2, 3], (4, 5))"),
% Map.
check_parser_roundtrip("{[1] = 2, [3] = 4}"),
% Option.
check_parser_roundtrip("None"),
check_parser_roundtrip("Some(1)"),
check_parser_roundtrip("Some([1, 2, 3])"),
ok.
@@ -1342,7 +1481,7 @@ string_escape_codes_test() ->
records_test() ->
TypeDef = "record pair = {x: int, y: int}",
Sophia = "{x = 1, y = 2}",
check_parser_with_typedef(TypeDef, Sophia),
check_parser_with_typedef(TypeDef, Sophia, "(1, 2)"),
% The above won't run an untyped parse on the expression, but we can. It
% will error, though.
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
@@ -1350,11 +1489,11 @@ records_test() ->
variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
check_parser_with_typedef(TypeDef, "Zero"),
check_parser_with_typedef(TypeDef, "One(0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
check_parser_with_typedef(TypeDef, "C.Zero"),
check_parser_with_typedef(TypeDef, "Zero", "variant([0, 1, 2], 0)"),
check_parser_with_typedef(TypeDef, "One(0)", "variant([0, 1, 2], 1, 0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)", "variant([0, 1, 2], 2, 0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])", "variant([0, 1, 2], 2, [], [1, 2, 3])"),
check_parser_with_typedef(TypeDef, "C.Zero", "variant([0, 1, 2], 0)"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
@@ -1362,10 +1501,10 @@ variant_test() ->
ambiguous_variant_test() ->
TypeDef = "datatype mytype = C | D",
check_parser_with_typedef(TypeDef, "C"),
check_parser_with_typedef(TypeDef, "D"),
check_parser_with_typedef(TypeDef, "C.C"),
check_parser_with_typedef(TypeDef, "C.D"),
check_parser_with_typedef(TypeDef, "C", "variant([0, 0], 0)"),
check_parser_with_typedef(TypeDef, "D", "variant([0, 0], 1)"),
check_parser_with_typedef(TypeDef, "C.C", "variant([0, 0], 0)"),
check_parser_with_typedef(TypeDef, "C.D", "variant([0, 0], 1)"),
ok.
@@ -1410,9 +1549,9 @@ bits_test() ->
singleton_records_test() ->
TypeDef = "record singleton('a) = {it: 'a}",
check_parser_with_typedef(TypeDef, "{it = 123}"),
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),
check_parser_with_typedef(TypeDef, "{it = 123}", "123"),
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}", "5"),
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]", "[1, 2, 3]"),
ok.
@@ -1421,9 +1560,9 @@ singleton_variants_test() ->
% actually a special case; singleton variants are in fact wrapped in the
% FATE too.
TypeDef = "datatype wrapped('a) = Wrap('a)",
check_parser_with_typedef(TypeDef, "Wrap(123)"),
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),
check_parser_with_typedef(TypeDef, "Wrap(123)", "variant([1], 0, 123)"),
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))", "variant([1], 0, variant([1], 0, 123))"),
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]", "[variant([1], 0, 1), variant([1], 0, 2), variant([1], 0, 3)]"),
ok.