stuff
+1
-1
@@ -26,7 +26,7 @@ Title | Brief Description
|
||||
[[Serializations]] | Conventions for field order in Gajumaru data structures
|
||||
[[Smart Contracts]] | Terminology
|
||||
[[Sophia]] | Introduction to Sophia, the Gajumaru smart contract language
|
||||
[[Sophia FAQ]] | what it says
|
||||
[[Sophia FQA]] | what it says
|
||||
[[State Channels]] | Overview and characteristics
|
||||
[[Testnet Node Setup]] | Tech support
|
||||
[[Transaction]] | Terminology
|
||||
|
||||
-259
@@ -1,259 +0,0 @@
|
||||
# Sophia FAQ
|
||||
|
||||
- Created: 2026-03-30
|
||||
- Authors: Peter Harpending `<peterharpending@qpq.swiss>`
|
||||
- Last Modified: 2026-04-07
|
||||
|
||||
# References
|
||||
|
||||
- [Sophia docs](https://git.qpq.swiss/QPQ-AG/sophia/src/branch/master/docs)
|
||||
- [Protocol docs](https://git.qpq.swiss/QPQ-AG/protocol)
|
||||
|
||||
# Defining Events in interfaces
|
||||
|
||||
apparently this is legal syntax but the point of this is unclear.
|
||||
|
||||
# Can there be the same function name with different arities?
|
||||
|
||||
# What happens if you delete a non-existent key from a map?
|
||||
|
||||
# How does sophia compilation work
|
||||
|
||||
|
||||
From commit `dbab49936daad7d82bae7cf7336b1ce82e7ab779`
|
||||
|
||||
```erlang
|
||||
% so_compiler.erl:84
|
||||
-spec file(string()) -> {ok, map()} | {error, [so_errors:error()]}.
|
||||
file(Filename) ->
|
||||
file(Filename, []).
|
||||
|
||||
-spec file(string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
|
||||
file(File, Options0) ->
|
||||
Options = add_include_path(File, Options0),
|
||||
case read_contract(File) of
|
||||
{ok, Bin} ->
|
||||
SrcDir = so_utils:canonical_dir(filename:dirname(File)),
|
||||
from_string(Bin, [{src_file, File}, {src_dir, SrcDir} | Options]);
|
||||
{error, Error} ->
|
||||
Msg = lists:flatten([File,": ",file:format_error(Error)]),
|
||||
{error, [so_errors:new(file_error, Msg)]}
|
||||
end.
|
||||
|
||||
-spec from_string(binary() | string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
|
||||
from_string(ContractBin, Options) when is_binary(ContractBin) ->
|
||||
from_string(binary_to_list(ContractBin), Options);
|
||||
from_string(ContractString, Options) ->
|
||||
try
|
||||
from_string1(ContractString, Options)
|
||||
catch
|
||||
throw:{error, Errors} -> {error, Errors}
|
||||
end.
|
||||
|
||||
from_string1(ContractString, Options) ->
|
||||
#{ fcode := FCode
|
||||
, fcode_env := FCodeEnv
|
||||
, folded_typed_ast := FoldedTypedAst
|
||||
, warnings := Warnings } = string_to_code(ContractString, Options),
|
||||
#{ child_con_env := ChildContracts } = FCodeEnv,
|
||||
SavedFreshNames = maps:get(saved_fresh_names, FCodeEnv, #{}),
|
||||
FateCode = so_fcode_to_fate:compile(ChildContracts, FCode, SavedFreshNames, Options),
|
||||
pp_assembler(FateCode, Options),
|
||||
ByteCode = gmb_fate_code:serialize(FateCode, []),
|
||||
{ok, Version} = version(),
|
||||
Res = #{byte_code => ByteCode,
|
||||
compiler_version => Version,
|
||||
contract_source => ContractString,
|
||||
type_info => [],
|
||||
fate_code => FateCode,
|
||||
abi_version => gmb_fate_abi:abi_version(),
|
||||
payable => maps:get(payable, FCode),
|
||||
warnings => Warnings
|
||||
},
|
||||
{ok, maybe_generate_aci(Res, FoldedTypedAst, Options)}.
|
||||
|
||||
```
|
||||
|
||||
So a lot is going on in `string_to_code/2`
|
||||
|
||||
```erlang
|
||||
-spec string_to_code(string(), options()) -> map().
|
||||
string_to_code(ContractString, Options) ->
|
||||
Ast = parse(ContractString, Options),
|
||||
pp_sophia_code(Ast, Options),
|
||||
pp_ast(Ast, Options),
|
||||
{TypeEnv, FoldedTypedAst, UnfoldedTypedAst, Warnings} = so_ast_infer_types:infer(Ast, [return_env | Options]),
|
||||
pp_typed_ast(UnfoldedTypedAst, Options),
|
||||
{Env, Fcode} = so_ast_to_fcode:ast_to_fcode(UnfoldedTypedAst, [{original_src, ContractString}|Options]),
|
||||
#{ fcode => Fcode
|
||||
, fcode_env => Env
|
||||
, unfolded_typed_ast => UnfoldedTypedAst
|
||||
, folded_typed_ast => FoldedTypedAst
|
||||
, type_env => TypeEnv
|
||||
, ast => Ast
|
||||
, warnings => Warnings }.
|
||||
|
||||
|
||||
-spec parse(string(), so_compiler:options()) -> none() | so_syntax:ast().
|
||||
parse(Text, Options) ->
|
||||
parse(Text, sets:new(), Options).
|
||||
|
||||
-spec parse(string(), sets:set(), so_compiler:options()) -> none() | so_syntax:ast().
|
||||
parse(Text, Included, Options) ->
|
||||
so_parser:string(Text, Included, Options).
|
||||
```
|
||||
|
||||
So we get an AST from `so_parser:string/3`
|
||||
|
||||
```
|
||||
%% so_parser.erl
|
||||
-spec string(string(), sets:set(include_hash()), so_compiler:options()) -> parse_result().
|
||||
string(String, Included, Opts) ->
|
||||
AST = run_parser(file(), String, Opts),
|
||||
case expand_includes(AST, Included, Opts) of
|
||||
{ok, AST1} -> AST1;
|
||||
{error, Err} -> parse_error(Err)
|
||||
end.
|
||||
|
||||
|
||||
run_parser(P, Inp) ->
|
||||
escape_errors(parse_and_scan(P, Inp, [])).
|
||||
run_parser(P, Inp, Opts) ->
|
||||
escape_errors(parse_and_scan(P, Inp, Opts)).
|
||||
|
||||
parse_and_scan(P, S, Opts) ->
|
||||
set_current_file(proplists:get_value(src_file, Opts, no_file)),
|
||||
set_current_dir(proplists:get_value(src_dir, Opts, no_file)),
|
||||
set_current_include_type(proplists:get_value(include_type, Opts, none)),
|
||||
case so_scan:scan(S) of
|
||||
{ok, Tokens} -> so_parse_lib:parse(P, Tokens);
|
||||
{error, {{Input, Pos}, _}} ->
|
||||
{error, {Pos, scan_error, Input}}
|
||||
end.
|
||||
|
||||
```
|
||||
|
||||
So there's a lot of metadata being kept, but the key part is the call to
|
||||
`so_scan:scan/1`
|
||||
|
||||
```erl
|
||||
lexer() ->
|
||||
Number = fun(Digit) -> [Digit, "+(_", Digit, "+)*"] end,
|
||||
DIGIT = "[0-9]",
|
||||
HEXDIGIT = "[0-9a-fA-F]",
|
||||
LOWER = "[a-z_]",
|
||||
UPPER = "[A-Z]",
|
||||
CON = [UPPER, "[a-zA-Z0-9_]*"],
|
||||
INT = Number(DIGIT),
|
||||
HEX = ["0x", Number(HEXDIGIT)],
|
||||
BYTES = ["#", Number(HEXDIGIT)],
|
||||
WS = "[\\000-\\ ]+",
|
||||
ID = [LOWER, "[a-zA-Z0-9_']*"],
|
||||
TVAR = ["'", ID],
|
||||
QID = ["(", CON, "\\.)+", ID],
|
||||
QCON = ["(", CON, "\\.)+", CON],
|
||||
OP = "[=!<>+\\-*/:&|?~@^]+",
|
||||
%% Five cases for a character
|
||||
%% * 1 7-bit ascii, not \ or '
|
||||
%% * 2-4 8-bit values (UTF8)
|
||||
%% * \ followed by a known modifier [aernrtv]
|
||||
%% * \xhh
|
||||
%% * \x{hhh...}
|
||||
CHAR = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'",
|
||||
STRING = "\"([^\"\\\\]|(\\\\.))*\"",
|
||||
|
||||
CommentStart = {"/\\*", push(comment, skip())},
|
||||
CommentRules =
|
||||
[ CommentStart
|
||||
, {"\\*/", pop(skip())}
|
||||
, {"[^/*]+|[/*]", skip()} ],
|
||||
|
||||
Keywords = ["contract", "include", "let", "switch", "type", "record", "datatype", "if", "elif", "else", "function",
|
||||
"stateful", "payable", "true", "false", "mod", "public", "entrypoint", "private", "indexed", "namespace",
|
||||
"interface", "main", "using", "as", "for", "hiding", "band", "bor", "bxor", "bnot"
|
||||
],
|
||||
KW = string:join(Keywords, "|"),
|
||||
|
||||
Rules =
|
||||
%% Comments and whitespace
|
||||
[ CommentStart
|
||||
, {"//.*", skip()}
|
||||
, {WS, skip()}
|
||||
|
||||
%% Special characters
|
||||
, {"\\.\\.|[,.;()\\[\\]{}]", symbol()}
|
||||
|
||||
%% Literals
|
||||
, {CHAR, token(char, fun parse_char/1)}
|
||||
, {STRING, token(string, fun parse_string/1)}
|
||||
, {HEX, token(hex, fun parse_hex/1)}
|
||||
, {INT, token(int, fun parse_int/1)}
|
||||
, {BYTES, token(bytes, fun parse_bytes/1)}
|
||||
|
||||
%% Identifiers (qualified first!)
|
||||
, {QID, token(qid, fun(S) -> string:tokens(S, ".") end)}
|
||||
, {QCON, token(qcon, fun(S) -> string:tokens(S, ".") end)}
|
||||
, {TVAR, token(tvar)}
|
||||
, override({ID, token(id)}, {KW, symbol()}) %% Keywords override identifiers. Need to
|
||||
, {CON, token(con)} %% use override to avoid lexing "lettuce"
|
||||
%% as ['let', {id, "tuce"}].
|
||||
%% Operators
|
||||
, {OP, symbol()}
|
||||
],
|
||||
|
||||
[{code, Rules}, {comment, CommentRules}].
|
||||
|
||||
scan(String) ->
|
||||
Lexer = so_scan_lib:compile(lexer()),
|
||||
so_scan_lib:string(Lexer, code, String).
|
||||
```
|
||||
|
||||
OK. let's look at `so_scan_lib`
|
||||
|
||||
```erl
|
||||
-type regex() :: iodata() | unicode:charlist().
|
||||
-type pos() :: {integer(), integer()}.
|
||||
-type lex_state() :: atom().
|
||||
-type token() :: {atom(), pos(), term()} | {atom(), pos()}.
|
||||
|
||||
-type token_spec() :: {regex(), token_action()}.
|
||||
-opaque token_action() :: fun((string(), pos()) -> {tok_result(), state_change()}).
|
||||
|
||||
-opaque lexer() :: [{lex_state(),
|
||||
fun((string(), pos()) -> {ok, tok_result(), string(), pos()}
|
||||
| end_of_file | error)}].
|
||||
|
||||
%% -- Internal types --
|
||||
-type tok_result() :: {token, token()} | skip.
|
||||
-type state_change() :: none | pop | {push, lex_state()}.
|
||||
|
||||
%% @doc Compile a lexer specification. Takes the regexps for each state and
|
||||
%% combines them into a single big regexp that is then compiled with re:compile/1.
|
||||
%% Note: contrary to lexer generators like leex, we don't have longest match
|
||||
%% semantics (since this isn't supported by re). Use override/2 instead.
|
||||
-spec compile([{lex_state(), [token_spec()]}]) -> lexer().
|
||||
compile(TokenSpecs) ->
|
||||
[{S, compile_spec(Spec)} || {S, Spec} <- TokenSpecs].
|
||||
|
||||
compile_spec(TokenSpecs) ->
|
||||
WithIxs = lists:zip(lists:seq(1, length(TokenSpecs)), TokenSpecs),
|
||||
{ok, Regex} = re:compile(["^(", name(0), string:join([ ["(", name(I), R, ")"] || {I, {R, _}} <- WithIxs ], "|"),")"]),
|
||||
Actions = [ Fun || {_, Fun} <- TokenSpecs ],
|
||||
fun ("", _Pos) -> end_of_file;
|
||||
(S, Pos) ->
|
||||
case re:run(S, Regex, [{capture, all_names}]) of
|
||||
{match, [{0, N} | Capture]} ->
|
||||
Index = 1 + length(lists:takewhile(fun({P, _}) -> P == -1 end, Capture)),
|
||||
Action = lists:nth(Index, Actions),
|
||||
{TokS, Rest} = lists:split(N, S),
|
||||
Tok = Action(TokS, Pos),
|
||||
{ok, Tok, Rest, next_pos(TokS, Pos)};
|
||||
nomatch ->
|
||||
error
|
||||
end
|
||||
end.
|
||||
```
|
||||
|
||||
|
||||
|
||||
# How does sophia compilation work
|
||||
+569
@@ -0,0 +1,569 @@
|
||||
# Sophia Frequently Questioned Answers
|
||||
|
||||
- Created: 2026-03-30
|
||||
- Authors: Peter Harpending `<peterharpending@qpq.swiss>`
|
||||
- Last Modified: 2026-06-04
|
||||
|
||||
# References
|
||||
|
||||
- [Sophia Compiler][so]
|
||||
- [Sophia docs][so-docs]
|
||||
- [Protocol docs](https://git.qpq.swiss/QPQ-AG/protocol)
|
||||
- [GSC][gsc]
|
||||
- [GSC Token definition](https://git.qpq.swiss/QPQ-AG/gsc/src/commit/ba70aace96ed73138496744f7d90c2666428eafc/include/gsc.hrl#L45-L50)
|
||||
|
||||
[gsc]: https://git.qpq.swiss/QPQ-AG/gsc
|
||||
[so]: https://git.qpq.swiss/QPQ-AG/sophia
|
||||
[so/docs]: https://git.qpq.swiss/QPQ-AG/sophia/src/branch/master/docs
|
||||
|
||||
# GSC
|
||||
|
||||
[GSC (= "gajumaru sophia compiler")][gsc] is an experimental
|
||||
work-in-progress maybe-will-be-finished-maybe-won't Sophia compiler
|
||||
that I (PRH) wrote in an effort to experiment with and document the
|
||||
Sophia language.
|
||||
|
||||
It is used a lot to illustrate things in this document, so you might
|
||||
want to download it and get it to work on your machine. The goal in
|
||||
version 0.1 is to match the exact behavior of the [legacy Sophia
|
||||
compiler][so].
|
||||
|
||||
I wrote gsc mostly because I got nerdsniped by the problems that gsc
|
||||
must solve in order to work. In the interest of retroactively
|
||||
justifying being nerdsniped, I will note that the legacy compiler
|
||||
|
||||
1. is *extremely* central to Gajumaru's trust model
|
||||
2. has many serious-to-semi-serious bugs/warts/edge-cases which
|
||||
(prior to this work) were either unknown or
|
||||
known-but-not-documented; e.g., unterminated block comments at
|
||||
the end of files are admissible provided what precedes is a valid
|
||||
Sophia contract:
|
||||
|
||||
```sophia
|
||||
contract Test =
|
||||
type state = unit
|
||||
entrypoint init() : state =
|
||||
()
|
||||
/*
|
||||
according to the legacy sophia compiler, this is a totally 100%
|
||||
legal sophia contract that ends with an unterminated block
|
||||
comment
|
||||
```
|
||||
|
||||
|
||||
# Architecture of the Sophia Compiler
|
||||
|
||||
First some disclaimers:
|
||||
|
||||
1. **Compilers are _NOT_ magic incomprehensible black boxes** that
|
||||
are totally inaccessible to ordinary programmers. (If you
|
||||
encounter one that is, that says more about the compiler and its
|
||||
authors than it does about you...). Compilers simply translate a
|
||||
well-specified input format into a well-specified output format.
|
||||
|
||||
**Compilers are just ordinary pieces of software that work the
|
||||
same way every other piece of software does.**
|
||||
|
||||
2. Like all other types of software, **compilers have bugs and
|
||||
strange unexpected corner cases**. A (the?) purpose of this
|
||||
document is to write down all such cases that I have encountered
|
||||
thus far in this nerdsnipe adventure.
|
||||
|
||||
Most compilers have some variation of the following architecture:
|
||||
|
||||
1. **Tokenization** (also called **lexical analysis**); this step
|
||||
takes the flat array of input characters found in the source code
|
||||
and discovers the "chunk boundaries" in the file:
|
||||
|
||||

|
||||
|
||||
Each chunk is called a "token".
|
||||
|
||||
2. **Parsing** (also called **syntax analysis**); this step takes
|
||||
the flat sequence of tokens, and arranges it into a hierarchy
|
||||
(usually called an "abstract syntax tree" or "AST").
|
||||
|
||||
The set of rules regarding how the signal is transformed into the
|
||||
abstract syntax tree is called the **grammar** of the language.
|
||||
|
||||
```
|
||||
source:
|
||||
the quick brown fox jumps over the lazy dog
|
||||
signal:
|
||||
["the", "quick", "brown", "fox", "jumps",
|
||||
"over", "the", "lazy", "dog"]
|
||||
ast:
|
||||
(Sentence
|
||||
(NounPhrase
|
||||
(determiner "the")
|
||||
(adjective "quick")
|
||||
(adjective "brown")
|
||||
(noun "fox"))
|
||||
(VerbPhrase
|
||||
(verb "jumps")
|
||||
(PrepositionalPhrase
|
||||
(preposition "over")
|
||||
(NounPhrase
|
||||
(determiner "the")
|
||||
(adjective "lazy")
|
||||
(noun "dog")))))
|
||||
```
|
||||
|
||||
This is the first step in which we think of a language in terms
|
||||
of its **structure** rather than simply being a sequence of
|
||||
words/tokens.
|
||||
|
||||
3. **Semantic analysis**: the compiler transforms the abstract
|
||||
syntax tree through a sequence of **intermediate
|
||||
representations** (**IR**s).
|
||||
|
||||
This is where compiler engineering gets interesting, and factors
|
||||
like artistic choice and taste start to dominate. Different
|
||||
optimizations occur at different levels of intermediate
|
||||
represntation. The structure of this meta-step depends heavily on
|
||||
the source and target languages, problem domains, goals of the
|
||||
specific compiler, etc.
|
||||
|
||||
This is the step in which we think of phrases in the language in
|
||||
terms of their **meaning** rather than in some strict notion of
|
||||
valid vs. invalid.
|
||||
|
||||
4. **Code generation**: once the compiler has completed its analysis
|
||||
of the input data, and figured out in some precise way what the
|
||||
author of the input was attempting to express notionally, it's
|
||||
finally time to express said notion in the target language.
|
||||
|
||||
At the time of writing (June 2026), only GSC's tokenizer has been
|
||||
fully ironed out and thoroughly tested, the discussion of which will
|
||||
constitute the remainder of this document.
|
||||
|
||||
# PITFALL WARNING! TERMINOLOGY COLLISION re "tokens" vs. gsc "signal"
|
||||
|
||||
What most compilers call "tokens", gsc calls "signal".
|
||||
|
||||
GSC classifies tokens into "signal" and "noise"; "noise" means
|
||||
comments and whitespace, and "signal" is everything else.
|
||||
|
||||
Most compilers discard "noise" tokens (comments and whitespace). GSC
|
||||
retains them for two reasons:
|
||||
|
||||
1. sanity-checking to make sure information isn't lost on accident;
|
||||
e.g. one of gsc's tests
|
||||
2. future-proofing in case we want to add Python/Lisp
|
||||
style doc comments as a language feature down the line.
|
||||
|
||||
```python
|
||||
def foo():
|
||||
"this is a doc comment for foo"
|
||||
print("hi from foo")
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
However for non-bikeshed compiler tasks (figuring out what the code
|
||||
is supposed to *do* and then expressing that in the target language),
|
||||
noise tokens are entirely irrelevant.
|
||||
|
||||
# What is a token?
|
||||
|
||||
Tokens are the "chunk boundaries" of source files.
|
||||
|
||||

|
||||
|
||||
This is roughly analogous to "word boundaries" in natural language;
|
||||
we can hack together a string-splitting function in the Erlang shell
|
||||
to illustrate the notion:
|
||||
|
||||
```erlang
|
||||
11> Intersperse = fun I([], _Sep) -> []; I([Last], _Sep) -> [Last]; I([One | More], Sep) -> [One, Sep | I(More, Sep)] end.
|
||||
#Fun<erl_eval.18.113135111>
|
||||
12> Intersperse(["foo", "bar", "baz"], " ").
|
||||
["foo"," ","bar"," ","baz"]
|
||||
13> string:tokens("foo bar baz", " ").
|
||||
["foo","bar","baz"]
|
||||
14> TokensEn = fun(SrcStr) -> Sep = " ", Signal = string:tokens(SrcStr, Sep), Tokens = Intersperse(Signal, Sep), Tokens end.
|
||||
#Fun<erl_eval.42.113135111>
|
||||
15> TokensEn("foo bar baz").
|
||||
["foo"," ","bar"," ","baz"]
|
||||
16> TokensEn("The quick brown fox jumped over the lazy dog").
|
||||
["The"," ","quick"," ","brown"," ","fox"," ","jumped"," ",
|
||||
"over"," ","the"," ","lazy"," ","dog"]
|
||||
```
|
||||
|
||||
You can see the pitfall regarding termionology collision present in
|
||||
the behavior of the Erlang standard library `string:tokens/2`
|
||||
function, which discards the separator characters:
|
||||
|
||||
```erlang
|
||||
17> string:tokens("foo.bar.baz", ".").
|
||||
["foo","bar","baz"]
|
||||
18> string:tokens("foo.bar,baz", ",").
|
||||
["foo.bar","baz"]
|
||||
```
|
||||
|
||||
# Sophia Tokens
|
||||
|
||||
```erlang
|
||||
-type tk_shape()
|
||||
:: bcom % /* ... */
|
||||
| lcom % //
|
||||
| ws % whitespace
|
||||
% literals
|
||||
| char % 'a'
|
||||
| string % "foo"
|
||||
| int10 % 69_420
|
||||
| int16 % 0xDEAD_BEEF
|
||||
| bytes % #DEAD_BEEF
|
||||
| ak % ak_ABC
|
||||
| ct % ct_ABC
|
||||
| sg % sg_ABC
|
||||
% kwds/variables/etc
|
||||
| id % foo, foo_bar, foo_bar'baz' _'foo'
|
||||
| con % Foo, Foo_Bar, FooBar
|
||||
| qid % Foo.Bar.baz
|
||||
| qcon % Foo.Bar.Baz
|
||||
| tvar % 'foo, 'foo_bar, '_'foo'_'bar'''
|
||||
% kwds ops and sep are all collapsed by
|
||||
% so_scan:scan down to eg {'contract', {420, 69}}
|
||||
% where {420, 69} is the source location
|
||||
% these are three different parsers
|
||||
| kwd % contract, interface, payable, etc
|
||||
| op % "=!<>+-*/:&|?~@^"
|
||||
| sep % ".." | oneof(",.;()[]{}")
|
||||
% kwds and sep are kind of the same thing
|
||||
% but i'll keep them separate now for my own sanity. ok
|
||||
% i guess op or symbol or whatever is fine.
|
||||
%
|
||||
% not going to overthink. if having them separate
|
||||
% becomes an issue it's easy enough to collapse. harder
|
||||
% to separate afterward if collapsing is wrong.
|
||||
.
|
||||
|
||||
-type tk_pos() :: {Line :: pos_integer(), Col :: pos_integer()}.
|
||||
|
||||
-record(tk,
|
||||
{shape :: tk_shape(),
|
||||
pos :: tk_pos(),
|
||||
str :: string()}).
|
||||
|
||||
-type tk() :: #tk{}.
|
||||
```
|
||||
|
||||
Concretely:
|
||||
|
||||
```sophia
|
||||
// Hello World Contract
|
||||
// Copyright (c) 2025 QPQ AG
|
||||
|
||||
contract Hello =
|
||||
type state = unit
|
||||
entrypoint init(): state =
|
||||
()
|
||||
|
||||
entrypoint hello(): string =
|
||||
"hello, world"
|
||||
```
|
||||
|
||||

|
||||
|
||||
```erlang
|
||||
[pharpend@desktop ioecs/gsc master] % gsc tokens test/ct/hello.aes
|
||||
{tk,lcom,{1,1},"// Hello World Contract"}
|
||||
{tk,ws,{1,24},"\n"}
|
||||
{tk,lcom,{2,1},"// Copyright (c) 2025 QPQ AG"}
|
||||
{tk,ws,{2,29},"\n\n"}
|
||||
{tk,kwd,{4,1},"contract"}
|
||||
{tk,ws,{4,9}," "}
|
||||
{tk,con,{4,10},"Hello"}
|
||||
{tk,ws,{4,15}," "}
|
||||
{tk,op,{4,16},"="}
|
||||
{tk,ws,{4,17},"\n "}
|
||||
{tk,kwd,{5,5},"type"}
|
||||
{tk,ws,{5,9}," "}
|
||||
{tk,id,{5,10},"state"}
|
||||
{tk,ws,{5,15}," "}
|
||||
{tk,op,{5,16},"="}
|
||||
{tk,ws,{5,17}," "}
|
||||
{tk,id,{5,18},"unit"}
|
||||
{tk,ws,{5,22},"\n "}
|
||||
{tk,kwd,{6,5},"entrypoint"}
|
||||
{tk,ws,{6,15}," "}
|
||||
{tk,id,{6,16},"init"}
|
||||
{tk,sep,{6,20},"("}
|
||||
{tk,sep,{6,21},")"}
|
||||
{tk,op,{6,22},":"}
|
||||
{tk,ws,{6,23}," "}
|
||||
{tk,id,{6,24},"state"}
|
||||
{tk,ws,{6,29}," "}
|
||||
{tk,op,{6,30},"="}
|
||||
{tk,ws,{6,31},"\n "}
|
||||
{tk,sep,{7,9},"("}
|
||||
{tk,sep,{7,10},")"}
|
||||
{tk,ws,{7,11},"\n\n "}
|
||||
{tk,kwd,{9,5},"entrypoint"}
|
||||
{tk,ws,{9,15}," "}
|
||||
{tk,id,{9,16},"hello"}
|
||||
{tk,sep,{9,21},"("}
|
||||
{tk,sep,{9,22},")"}
|
||||
{tk,op,{9,23},":"}
|
||||
{tk,ws,{9,24}," "}
|
||||
{tk,id,{9,25},"string"}
|
||||
{tk,ws,{9,31}," "}
|
||||
{tk,op,{9,32},"="}
|
||||
{tk,ws,{9,33},"\n "}
|
||||
{tk,string,{10,9},"\"hello, world\""}
|
||||
{tk,ws,{10,23},"\n"}
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Defining Events in interfaces
|
||||
|
||||
apparently this is legal syntax but the point of this is unclear.
|
||||
|
||||
# Can there be the same function name with different arities?
|
||||
|
||||
# What happens if you delete a non-existent key from a map?
|
||||
|
||||
# How does sophia compilation work
|
||||
|
||||
|
||||
From commit `dbab49936daad7d82bae7cf7336b1ce82e7ab779`
|
||||
|
||||
```erlang
|
||||
% so_compiler.erl:84
|
||||
-spec file(string()) -> {ok, map()} | {error, [so_errors:error()]}.
|
||||
file(Filename) ->
|
||||
file(Filename, []).
|
||||
|
||||
-spec file(string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
|
||||
file(File, Options0) ->
|
||||
Options = add_include_path(File, Options0),
|
||||
case read_contract(File) of
|
||||
{ok, Bin} ->
|
||||
SrcDir = so_utils:canonical_dir(filename:dirname(File)),
|
||||
from_string(Bin, [{src_file, File}, {src_dir, SrcDir} | Options]);
|
||||
{error, Error} ->
|
||||
Msg = lists:flatten([File,": ",file:format_error(Error)]),
|
||||
{error, [so_errors:new(file_error, Msg)]}
|
||||
end.
|
||||
|
||||
-spec from_string(binary() | string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
|
||||
from_string(ContractBin, Options) when is_binary(ContractBin) ->
|
||||
from_string(binary_to_list(ContractBin), Options);
|
||||
from_string(ContractString, Options) ->
|
||||
try
|
||||
from_string1(ContractString, Options)
|
||||
catch
|
||||
throw:{error, Errors} -> {error, Errors}
|
||||
end.
|
||||
|
||||
from_string1(ContractString, Options) ->
|
||||
#{ fcode := FCode
|
||||
, fcode_env := FCodeEnv
|
||||
, folded_typed_ast := FoldedTypedAst
|
||||
, warnings := Warnings } = string_to_code(ContractString, Options),
|
||||
#{ child_con_env := ChildContracts } = FCodeEnv,
|
||||
SavedFreshNames = maps:get(saved_fresh_names, FCodeEnv, #{}),
|
||||
FateCode = so_fcode_to_fate:compile(ChildContracts, FCode, SavedFreshNames, Options),
|
||||
pp_assembler(FateCode, Options),
|
||||
ByteCode = gmb_fate_code:serialize(FateCode, []),
|
||||
{ok, Version} = version(),
|
||||
Res = #{byte_code => ByteCode,
|
||||
compiler_version => Version,
|
||||
contract_source => ContractString,
|
||||
type_info => [],
|
||||
fate_code => FateCode,
|
||||
abi_version => gmb_fate_abi:abi_version(),
|
||||
payable => maps:get(payable, FCode),
|
||||
warnings => Warnings
|
||||
},
|
||||
{ok, maybe_generate_aci(Res, FoldedTypedAst, Options)}.
|
||||
|
||||
```
|
||||
|
||||
So a lot is going on in `string_to_code/2`
|
||||
|
||||
```erlang
|
||||
-spec string_to_code(string(), options()) -> map().
|
||||
string_to_code(ContractString, Options) ->
|
||||
Ast = parse(ContractString, Options),
|
||||
pp_sophia_code(Ast, Options),
|
||||
pp_ast(Ast, Options),
|
||||
{TypeEnv, FoldedTypedAst, UnfoldedTypedAst, Warnings} = so_ast_infer_types:infer(Ast, [return_env | Options]),
|
||||
pp_typed_ast(UnfoldedTypedAst, Options),
|
||||
{Env, Fcode} = so_ast_to_fcode:ast_to_fcode(UnfoldedTypedAst, [{original_src, ContractString}|Options]),
|
||||
#{ fcode => Fcode
|
||||
, fcode_env => Env
|
||||
, unfolded_typed_ast => UnfoldedTypedAst
|
||||
, folded_typed_ast => FoldedTypedAst
|
||||
, type_env => TypeEnv
|
||||
, ast => Ast
|
||||
, warnings => Warnings }.
|
||||
|
||||
|
||||
-spec parse(string(), so_compiler:options()) -> none() | so_syntax:ast().
|
||||
parse(Text, Options) ->
|
||||
parse(Text, sets:new(), Options).
|
||||
|
||||
-spec parse(string(), sets:set(), so_compiler:options()) -> none() | so_syntax:ast().
|
||||
parse(Text, Included, Options) ->
|
||||
so_parser:string(Text, Included, Options).
|
||||
```
|
||||
|
||||
So we get an AST from `so_parser:string/3`
|
||||
|
||||
```
|
||||
%% so_parser.erl
|
||||
-spec string(string(), sets:set(include_hash()), so_compiler:options()) -> parse_result().
|
||||
string(String, Included, Opts) ->
|
||||
AST = run_parser(file(), String, Opts),
|
||||
case expand_includes(AST, Included, Opts) of
|
||||
{ok, AST1} -> AST1;
|
||||
{error, Err} -> parse_error(Err)
|
||||
end.
|
||||
|
||||
|
||||
run_parser(P, Inp) ->
|
||||
escape_errors(parse_and_scan(P, Inp, [])).
|
||||
run_parser(P, Inp, Opts) ->
|
||||
escape_errors(parse_and_scan(P, Inp, Opts)).
|
||||
|
||||
parse_and_scan(P, S, Opts) ->
|
||||
set_current_file(proplists:get_value(src_file, Opts, no_file)),
|
||||
set_current_dir(proplists:get_value(src_dir, Opts, no_file)),
|
||||
set_current_include_type(proplists:get_value(include_type, Opts, none)),
|
||||
case so_scan:scan(S) of
|
||||
{ok, Tokens} -> so_parse_lib:parse(P, Tokens);
|
||||
{error, {{Input, Pos}, _}} ->
|
||||
{error, {Pos, scan_error, Input}}
|
||||
end.
|
||||
|
||||
```
|
||||
|
||||
So there's a lot of metadata being kept, but the key part is the call to
|
||||
`so_scan:scan/1`
|
||||
|
||||
```erl
|
||||
lexer() ->
|
||||
Number = fun(Digit) -> [Digit, "+(_", Digit, "+)*"] end,
|
||||
DIGIT = "[0-9]",
|
||||
HEXDIGIT = "[0-9a-fA-F]",
|
||||
LOWER = "[a-z_]",
|
||||
UPPER = "[A-Z]",
|
||||
CON = [UPPER, "[a-zA-Z0-9_]*"],
|
||||
INT = Number(DIGIT),
|
||||
HEX = ["0x", Number(HEXDIGIT)],
|
||||
BYTES = ["#", Number(HEXDIGIT)],
|
||||
WS = "[\\000-\\ ]+",
|
||||
ID = [LOWER, "[a-zA-Z0-9_']*"],
|
||||
TVAR = ["'", ID],
|
||||
QID = ["(", CON, "\\.)+", ID],
|
||||
QCON = ["(", CON, "\\.)+", CON],
|
||||
OP = "[=!<>+\\-*/:&|?~@^]+",
|
||||
%% Five cases for a character
|
||||
%% * 1 7-bit ascii, not \ or '
|
||||
%% * 2-4 8-bit values (UTF8)
|
||||
%% * \ followed by a known modifier [aernrtv]
|
||||
%% * \xhh
|
||||
%% * \x{hhh...}
|
||||
CHAR = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'",
|
||||
STRING = "\"([^\"\\\\]|(\\\\.))*\"",
|
||||
|
||||
CommentStart = {"/\\*", push(comment, skip())},
|
||||
CommentRules =
|
||||
[ CommentStart
|
||||
, {"\\*/", pop(skip())}
|
||||
, {"[^/*]+|[/*]", skip()} ],
|
||||
|
||||
Keywords = ["contract", "include", "let", "switch", "type", "record", "datatype", "if", "elif", "else", "function",
|
||||
"stateful", "payable", "true", "false", "mod", "public", "entrypoint", "private", "indexed", "namespace",
|
||||
"interface", "main", "using", "as", "for", "hiding", "band", "bor", "bxor", "bnot"
|
||||
],
|
||||
KW = string:join(Keywords, "|"),
|
||||
|
||||
Rules =
|
||||
%% Comments and whitespace
|
||||
[ CommentStart
|
||||
, {"//.*", skip()}
|
||||
, {WS, skip()}
|
||||
|
||||
%% Special characters
|
||||
, {"\\.\\.|[,.;()\\[\\]{}]", symbol()}
|
||||
|
||||
%% Literals
|
||||
, {CHAR, token(char, fun parse_char/1)}
|
||||
, {STRING, token(string, fun parse_string/1)}
|
||||
, {HEX, token(hex, fun parse_hex/1)}
|
||||
, {INT, token(int, fun parse_int/1)}
|
||||
, {BYTES, token(bytes, fun parse_bytes/1)}
|
||||
|
||||
%% Identifiers (qualified first!)
|
||||
, {QID, token(qid, fun(S) -> string:tokens(S, ".") end)}
|
||||
, {QCON, token(qcon, fun(S) -> string:tokens(S, ".") end)}
|
||||
, {TVAR, token(tvar)}
|
||||
, override({ID, token(id)}, {KW, symbol()}) %% Keywords override identifiers. Need to
|
||||
, {CON, token(con)} %% use override to avoid lexing "lettuce"
|
||||
%% as ['let', {id, "tuce"}].
|
||||
%% Operators
|
||||
, {OP, symbol()}
|
||||
],
|
||||
|
||||
[{code, Rules}, {comment, CommentRules}].
|
||||
|
||||
scan(String) ->
|
||||
Lexer = so_scan_lib:compile(lexer()),
|
||||
so_scan_lib:string(Lexer, code, String).
|
||||
```
|
||||
|
||||
OK. let's look at `so_scan_lib`
|
||||
|
||||
```erl
|
||||
-type regex() :: iodata() | unicode:charlist().
|
||||
-type pos() :: {integer(), integer()}.
|
||||
-type lex_state() :: atom().
|
||||
-type token() :: {atom(), pos(), term()} | {atom(), pos()}.
|
||||
|
||||
-type token_spec() :: {regex(), token_action()}.
|
||||
-opaque token_action() :: fun((string(), pos()) -> {tok_result(), state_change()}).
|
||||
|
||||
-opaque lexer() :: [{lex_state(),
|
||||
fun((string(), pos()) -> {ok, tok_result(), string(), pos()}
|
||||
| end_of_file | error)}].
|
||||
|
||||
%% -- Internal types --
|
||||
-type tok_result() :: {token, token()} | skip.
|
||||
-type state_change() :: none | pop | {push, lex_state()}.
|
||||
|
||||
%% @doc Compile a lexer specification. Takes the regexps for each state and
|
||||
%% combines them into a single big regexp that is then compiled with re:compile/1.
|
||||
%% Note: contrary to lexer generators like leex, we don't have longest match
|
||||
%% semantics (since this isn't supported by re). Use override/2 instead.
|
||||
-spec compile([{lex_state(), [token_spec()]}]) -> lexer().
|
||||
compile(TokenSpecs) ->
|
||||
[{S, compile_spec(Spec)} || {S, Spec} <- TokenSpecs].
|
||||
|
||||
compile_spec(TokenSpecs) ->
|
||||
WithIxs = lists:zip(lists:seq(1, length(TokenSpecs)), TokenSpecs),
|
||||
{ok, Regex} = re:compile(["^(", name(0), string:join([ ["(", name(I), R, ")"] || {I, {R, _}} <- WithIxs ], "|"),")"]),
|
||||
Actions = [ Fun || {_, Fun} <- TokenSpecs ],
|
||||
fun ("", _Pos) -> end_of_file;
|
||||
(S, Pos) ->
|
||||
case re:run(S, Regex, [{capture, all_names}]) of
|
||||
{match, [{0, N} | Capture]} ->
|
||||
Index = 1 + length(lists:takewhile(fun({P, _}) -> P == -1 end, Capture)),
|
||||
Action = lists:nth(Index, Actions),
|
||||
{TokS, Rest} = lists:split(N, S),
|
||||
Tok = Action(TokS, Pos),
|
||||
{ok, Tok, Rest, next_pos(TokS, Pos)};
|
||||
nomatch ->
|
||||
error
|
||||
end
|
||||
end.
|
||||
```
|
||||
|
||||
|
||||
|
||||
# How does sophia compilation work
|
||||
+2
-84
@@ -4,88 +4,6 @@ documenting for myself
|
||||
|
||||
## Sophia syntax highlighting
|
||||
|
||||
todo. it's on github somewhere, not hard to find
|
||||
See: <https://github.com/yinkaenoch/sophia-vim-syntax>
|
||||
|
||||
## fuzzy finding plugin
|
||||
|
||||
this is annoying and requires like 10 minutes of setup.
|
||||
|
||||
BUT this is super helpful in huge repositories such as the node codebase
|
||||
|
||||
```
|
||||
sudo apt install bat fd-find fzf ripgrep
|
||||
```
|
||||
|
||||
(devuan excalibur)
|
||||
|
||||
ripgrep is optional, craig, but the vim plugin needs it if you want to search
|
||||
for regexes *inside* files
|
||||
|
||||
say you're trying to quickly remember what the fuck `gmser_id:id()` is. Like is
|
||||
that the record or is that the 33-byte tagged public key? I can't remember and
|
||||
neither can you
|
||||
|
||||

|
||||
|
||||
this saves you like 15 seconds and a bunch of context switching. each time
|
||||
|
||||
the plugin is super annoying to install but basically don't follow any of the
|
||||
instructions in the repo. just clone the `fzf.vim` repo on github (google) to
|
||||
`~/.vim/bundle/fzf.vim`.
|
||||
|
||||
you also need to tell vim to load the `.vim` file that ships with the package
|
||||
|
||||
```
|
||||
[pharpend@picklet ioecs/GajuDesk master] % dpkg -L fzf
|
||||
/.
|
||||
/usr
|
||||
/usr/bin
|
||||
/usr/bin/fzf
|
||||
/usr/bin/fzf-tmux
|
||||
/usr/share
|
||||
/usr/share/doc
|
||||
/usr/share/doc/fzf
|
||||
/usr/share/doc/fzf/README-VIM.md.gz
|
||||
/usr/share/doc/fzf/README.Debian
|
||||
/usr/share/doc/fzf/README.md.gz
|
||||
/usr/share/doc/fzf/changelog.Debian.amd64.gz
|
||||
/usr/share/doc/fzf/changelog.Debian.gz
|
||||
/usr/share/doc/fzf/changelog.gz
|
||||
/usr/share/doc/fzf/copyright
|
||||
/usr/share/doc/fzf/examples
|
||||
/usr/share/doc/fzf/examples/completion.bash
|
||||
/usr/share/doc/fzf/examples/completion.zsh
|
||||
/usr/share/doc/fzf/examples/fzf.vim
|
||||
/usr/share/doc/fzf/examples/key-bindings.bash
|
||||
/usr/share/doc/fzf/examples/key-bindings.fish
|
||||
/usr/share/doc/fzf/examples/key-bindings.zsh
|
||||
/usr/share/doc/fzf/examples/plugin
|
||||
/usr/share/fish
|
||||
/usr/share/fish/vendor_functions.d
|
||||
/usr/share/fish/vendor_functions.d/fzf_key_bindings.fish
|
||||
/usr/share/man
|
||||
/usr/share/man/man1
|
||||
/usr/share/man/man1/fzf-tmux.1.gz
|
||||
/usr/share/man/man1/fzf.1.gz
|
||||
/usr/share/doc/fzf/examples/plugin/fzf.vim
|
||||
```
|
||||
|
||||
last file there. put that file at `~/.vim/autoload/fzf.vim`
|
||||
|
||||
should just work.
|
||||
|
||||
- `:Files` opens the fuzzy file finder
|
||||
- `:Rg` is the interactive grep thing shown above
|
||||
|
||||
i have this vimrc:
|
||||
|
||||
```vim
|
||||
let $FZF_DEFAULT_COMMAND = 'fdfind --type f'
|
||||
noremap <C-e> :Files<CR>
|
||||
noremap <C-r> :Rg<CR>
|
||||
```
|
||||
|
||||
the fdfind thing means fuzzy find doesn't surface files in your .gitignore
|
||||
(e.g. beam files, `_build` insanity)
|
||||
|
||||
will try and see
|
||||
Read the link there and do the needful
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 43 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 6.0 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 262 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
Reference in New Issue
Block a user