moving sfc over here
This commit is contained in:
@@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Bash harness for GSC command line tool
|
||||||
|
#
|
||||||
|
# gsc foo bar baz
|
||||||
|
# -> zx rundir \
|
||||||
|
# /home/pharpend/src/ioecs/gsc/cli \
|
||||||
|
# --libs=gsc:/home/pharpend/src/ioecs/gsc \
|
||||||
|
# foo bar baz
|
||||||
|
|
||||||
|
zx_exists() {
|
||||||
|
command -v zx > /dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ! zx_exists ; then
|
||||||
|
echo "zx not found in \$PATH"
|
||||||
|
echo ""
|
||||||
|
echo "Please install Erlang and zx:"
|
||||||
|
echo "https://git.qpq.swiss/QPQ-AG/public-wiki/wiki/Install-Erlang-and-zx"
|
||||||
|
echo ""
|
||||||
|
echo "PATH=${PATH}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# script location and parent dir
|
||||||
|
FP_THIS_FILE=$(readlink -f "${BASH_SOURCE[0]}")
|
||||||
|
FP_THIS_DIR=$(dirname -- "${FP_THIS_FILE}")
|
||||||
|
FP_PRJ_DIR=$(dirname -- "${FP_THIS_DIR}")
|
||||||
|
FP_CLI_DIR="${FP_PRJ_DIR}/cli"
|
||||||
|
|
||||||
|
# # compute libs string
|
||||||
|
# LIB_PARTS=()
|
||||||
|
# for depname in "${LOCAL_DEPS[@]}"; do
|
||||||
|
# LIB_PARTS+=("${depname}:${PRJ_DIR}/${depname}")
|
||||||
|
# done
|
||||||
|
# IFS=,
|
||||||
|
# LOCAL_LIBS="${LIB_PARTS[*]}"
|
||||||
|
# unset IFS
|
||||||
|
|
||||||
|
LOCAL_LIBS="gsc:${FP_PRJ_DIR}"
|
||||||
|
|
||||||
|
#echo "zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@"
|
||||||
|
|
||||||
|
zx rundir $FP_CLI_DIR --libs=$LOCAL_LIBS $@
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
.eunit
|
||||||
|
deps
|
||||||
|
*.o
|
||||||
|
*.beam
|
||||||
|
*.plt
|
||||||
|
*.swp
|
||||||
|
erl_crash.dump
|
||||||
|
ebin/*.beam
|
||||||
|
doc/*.html
|
||||||
|
doc/*.css
|
||||||
|
doc/edoc-info
|
||||||
|
doc/erlang.png
|
||||||
|
rel/example_project
|
||||||
|
.concrete/DEV_MODE
|
||||||
|
.rebar
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"src/*", [debug_info, {i, "include/"}, {outdir, "ebin/"}]}.
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{application,gsc_cli,
|
||||||
|
[{description,"GSC CLI and test suite"},
|
||||||
|
{registered,[]},
|
||||||
|
{included_applications,[]},
|
||||||
|
{applications,[stdlib,kernel]},
|
||||||
|
{vsn,"0.1.0"},
|
||||||
|
{modules,[gsc_cli]}]}.
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
%%% @doc
|
||||||
|
%%% GSC CLI: gsc_cli
|
||||||
|
%%%
|
||||||
|
%%% This module is currently named `gsc_cli', but you may want to change that.
|
||||||
|
%%% Remember that changing the name in `-module()' below requires renaming
|
||||||
|
%%% this file, and it is recommended to run `zx update .app` in the main
|
||||||
|
%%% project directory to make sure the ebin/gsc_cli.app file stays in
|
||||||
|
%%% sync with the project whenever you add, remove or rename a module.
|
||||||
|
%%% @end
|
||||||
|
|
||||||
|
-module(gsc_cli).
|
||||||
|
-vsn("0.1.0").
|
||||||
|
-author("Peter Harpending <peterharpending@qpq.swiss>").
|
||||||
|
-copyright("Peter Harpending <peterharpending@qpq.swiss>").
|
||||||
|
-license("GPL-3.0-only").
|
||||||
|
|
||||||
|
-export([start/1]).
|
||||||
|
|
||||||
|
|
||||||
|
-spec start(ArgV) -> ok
|
||||||
|
when ArgV :: [string()].
|
||||||
|
|
||||||
|
start(ArgV) ->
|
||||||
|
ok = io:format("Hello, World! Args: ~tp~n", [ArgV]),
|
||||||
|
zx:silent_stop().
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
{name,"GSC CLI"}.
|
||||||
|
{type,cli}.
|
||||||
|
{modules,[]}.
|
||||||
|
{mod,"gsc_cli"}.
|
||||||
|
{prefix,none}.
|
||||||
|
{author,"Peter Harpending"}.
|
||||||
|
{desc,"GSC CLI and test suite"}.
|
||||||
|
{package_id,{"otpr","gsc_cli",{0,1,0}}}.
|
||||||
|
{deps,[{"otpr","gsc",{0,1,0}}]}.
|
||||||
|
{key_name,none}.
|
||||||
|
{a_email,"peterharpending@qpq.swiss"}.
|
||||||
|
{c_email,"peterharpending@qpq.swiss"}.
|
||||||
|
{copyright,"Peter Harpending"}.
|
||||||
|
{file_exts,[]}.
|
||||||
|
{license,"GPL-3.0-only"}.
|
||||||
|
{repo_url,[]}.
|
||||||
|
{tags,[]}.
|
||||||
|
{ws_url,[]}.
|
||||||
+161
@@ -0,0 +1,161 @@
|
|||||||
|
% This is a header file that contains sfc's record types
|
||||||
|
%
|
||||||
|
% This is in order to
|
||||||
|
% 1. share records across modules; and,
|
||||||
|
% 2. allow external modules to just use the sfc records
|
||||||
|
|
||||||
|
%-------------------------------------------------------
|
||||||
|
% API Types: sfc internal token representation
|
||||||
|
%
|
||||||
|
% -export_type([
|
||||||
|
% sf_token_type/0
|
||||||
|
% sf_token/0
|
||||||
|
% ]).
|
||||||
|
%-------------------------------------------------------
|
||||||
|
|
||||||
|
%
|
||||||
|
-type sfc_token_type()
|
||||||
|
:: bcom % /* ... */
|
||||||
|
| lcom % //
|
||||||
|
| ws % whitespace
|
||||||
|
% literals
|
||||||
|
| char % 'a'
|
||||||
|
| string % "foo"
|
||||||
|
| int10 % 69_420
|
||||||
|
| int16 % 0xDEAD_BEEF
|
||||||
|
| bytes % #DEAD_BEEF
|
||||||
|
| ak % ak_ABC
|
||||||
|
| ct % ct_ABC
|
||||||
|
| sg % sg_ABC
|
||||||
|
% kwds/variables/etc
|
||||||
|
| id % foo, foo_bar, foo_bar'baz' _'foo'
|
||||||
|
| con % Foo, Foo_Bar, FooBar
|
||||||
|
| qid % Foo.Bar.baz
|
||||||
|
| qcon % Foo.Bar.Baz
|
||||||
|
| tvar % 'foo, 'foo_bar, '_'foo'_'bar'''
|
||||||
|
% kwds ops and punct are all collapsed by
|
||||||
|
% so_scan:scan down to eg {'contract', {420, 69}}
|
||||||
|
% where {420, 69} is the source location
|
||||||
|
% these are three different parsers
|
||||||
|
| kwd % contract, interface, payable, etc
|
||||||
|
| op % "=!<>+-*/:&|?~@^"
|
||||||
|
| punct % ".." | oneof(",.;()[]{}")
|
||||||
|
% kwds and punct are kind of the same thing
|
||||||
|
% but i'll keep them separate now for my own sanity. ok
|
||||||
|
% i guess op or symbol or whatever is fine.
|
||||||
|
%
|
||||||
|
% not going to overthink. if having them separate
|
||||||
|
% becomes an issue it's easy enough to collapse. harder
|
||||||
|
% to separate afterward if collapsing is wrong.
|
||||||
|
.
|
||||||
|
|
||||||
|
|
||||||
|
-type sfc_pos() :: {Line :: pos_integer(), Col :: pos_integer()}.
|
||||||
|
|
||||||
|
-record(sfc_token,
|
||||||
|
{type :: sfc_token_type(),
|
||||||
|
pos :: sfc_pos(),
|
||||||
|
string :: string()}).
|
||||||
|
|
||||||
|
-type sfc_token() :: #sfc_token{}.
|
||||||
|
|
||||||
|
|
||||||
|
% tokens are in essence the "chunk boundaries" of
|
||||||
|
% the file
|
||||||
|
%
|
||||||
|
% because we have semantic whitespace, we have to be
|
||||||
|
% careful about block declarations, because we don't
|
||||||
|
% have an explicit open/close block token. blocks can
|
||||||
|
% be closed in one of two ways:
|
||||||
|
%
|
||||||
|
% 1. a new block at a previous indent level:
|
||||||
|
% switch(foo)
|
||||||
|
% // block starts here
|
||||||
|
% Bar => bar()
|
||||||
|
% Baz => baz()
|
||||||
|
% quux() // ends because indent level
|
||||||
|
% 2. it's part of some type of list:
|
||||||
|
%
|
||||||
|
% [switch(foo)
|
||||||
|
% // block starts here
|
||||||
|
% Bar => bar()
|
||||||
|
% Baz => baz(), // ends here
|
||||||
|
% switch(bizz)
|
||||||
|
% // block starts here
|
||||||
|
% Bar => bar()
|
||||||
|
% Baz => baz()]
|
||||||
|
%
|
||||||
|
% in order to avoid speculatively inserting virtual
|
||||||
|
% close tokens, at least on first write-out, we're
|
||||||
|
% going to disambiguate list notions right away
|
||||||
|
|
||||||
|
% token groups
|
||||||
|
% lists = (_, _, _)
|
||||||
|
% | [_, _, _]
|
||||||
|
% | {_, _, _}
|
||||||
|
%-record(sfc_ast1_block,
|
||||||
|
% {indent = none :: none | pos_integer(),
|
||||||
|
% decls = none :: [sfc_ast1_decl()]}).
|
||||||
|
%
|
||||||
|
%-type sfc_ast() ::
|
||||||
|
%
|
||||||
|
%-type sfc_list_group() :: {'(', [sfc_token()], ')'}
|
||||||
|
% | {'[', [sfc_token()], ']'}
|
||||||
|
% | {'{', [sfc_token()], '}'}
|
||||||
|
% | {proof,
|
||||||
|
% .
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% this one is very specific so it deserves its own
|
||||||
|
% record type: unterminated block comments at the end
|
||||||
|
% of files. these are ok in legacy sophia, so we have to
|
||||||
|
% specifically account for this error
|
||||||
|
-record(sfc_err_bcom_unterminated,
|
||||||
|
{prev_tokens :: [sfc_token()],
|
||||||
|
break_pos :: sfc_pos(),
|
||||||
|
rest :: string()}).
|
||||||
|
|
||||||
|
-record(sfc_err_no_tokmatch,
|
||||||
|
{prev_tokens :: [sfc_token()],
|
||||||
|
break_pos :: sfc_pos(),
|
||||||
|
rest :: string()}).
|
||||||
|
|
||||||
|
|
||||||
|
-record(sfc_err_delims,
|
||||||
|
{past :: [sfc_token()],
|
||||||
|
open_stack :: [sfc_token()],
|
||||||
|
bad_close :: sfc_token(),
|
||||||
|
future :: [sfc_token()]}).
|
||||||
|
% FIXME
|
||||||
|
-record(sfc_err_nyi, {}).
|
||||||
|
-record(sfc_err_empty_file, {}).
|
||||||
|
|
||||||
|
%-record(src_parse_error,
|
||||||
|
% {atom = none :: none | atom(),
|
||||||
|
% string =
|
||||||
|
|
||||||
|
%j-record(sfc_err_gulp_ct,
|
||||||
|
%j {gulped ::
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% generic placeholder error for now
|
||||||
|
-record(sfc_err,
|
||||||
|
{atom :: atom(),
|
||||||
|
string = none :: none | iolist(),
|
||||||
|
extra = none :: none | any()}).
|
||||||
|
|
||||||
|
% @doc all errors SFC can return conveniently listed in
|
||||||
|
% one place
|
||||||
|
-type sfc_err() :: #sfc_err_bcom_unterminated{}
|
||||||
|
| #sfc_err_no_tokmatch{}
|
||||||
|
| #sfc_err_nyi{}
|
||||||
|
| #sfc_err_empty_file{}
|
||||||
|
| #sfc_err{}.
|
||||||
|
|
||||||
|
|
||||||
|
%% FIXME
|
||||||
|
-type sfc_ast() :: any().
|
||||||
|
|
||||||
|
|
||||||
+140
-19
@@ -1,23 +1,144 @@
|
|||||||
%%% @doc
|
% @doc bikeshed proctrastination head into vim warmup thing
|
||||||
%%% Gajumaru Sophia Compiler: gsc
|
% sophia compiler from scratch by PRH
|
||||||
%%%
|
%
|
||||||
%%% This module is currently named `gsc', but you may want to change that.
|
% based on original sophia compiler
|
||||||
%%% Remember that changing the name in `-module()' below requires renaming
|
%
|
||||||
%%% this file, and it is recommended to run `zx update .app` in the main
|
% parse layers:
|
||||||
%%% project directory to make sure the ebin/gsc.app file stays in
|
% 1. sfc_tokenizer: SrcStr -> (Tokens | SigTokens)
|
||||||
%%% sync with the project whenever you add, remove or rename a module.
|
%
|
||||||
%%% @end
|
% SigTokens = not comment/whitespace
|
||||||
|
%
|
||||||
|
% layers:
|
||||||
|
% a. sfc_strmatch : matches string shapes
|
||||||
|
% b. sfc_so_scan : converts to so_scan shapes
|
||||||
|
%
|
||||||
|
% 2. sfc_ast: SigTokens -> AST
|
||||||
|
%
|
||||||
|
% terminology:
|
||||||
|
%
|
||||||
|
% - `slurp`/`barf` borrowed from emacs paredit mode:
|
||||||
|
%
|
||||||
|
% slurp : (a b) c -> (a b c)
|
||||||
|
% barf : (a b c) -> a (b c)
|
||||||
|
%
|
||||||
|
% * `slurp` usually involves *transforming* input
|
||||||
|
% into a new type (e.g. slurp a token from src
|
||||||
|
% string); think of slurp as a verb meaning to
|
||||||
|
% consume and then digest
|
||||||
|
% * `barf` basically means blindly splitting off
|
||||||
|
% input
|
||||||
|
%
|
||||||
|
% @end
|
||||||
|
|
||||||
-module(gsc).
|
% TODO:
|
||||||
-vsn("0.1.0").
|
% - barf for outputs, slurp for inputs
|
||||||
-author("Peter Harpending <peterharpending@qpq.swiss>").
|
% - architecture needs more careful thought but only after something works
|
||||||
-copyright("Peter Harpending <peterharpending@qpq.swiss>").
|
% - too fuzzy right now
|
||||||
-license("GPL-3.0-only").
|
% - possibly:
|
||||||
|
% - rename parser layers sequentially:
|
||||||
-export([hello/0]).
|
% - sfc_
|
||||||
|
-module(sfc).
|
||||||
|
|
||||||
|
|
||||||
-spec hello() -> ok.
|
-export_type([
|
||||||
|
token/0
|
||||||
|
]).
|
||||||
|
|
||||||
hello() ->
|
-export([
|
||||||
io:format("~p (~p) says \"Hello!\"~n", [self(), ?MODULE]).
|
sigtokens_from_file/1,
|
||||||
|
sigtokens_from_string/1,
|
||||||
|
tokens_from_file/1,
|
||||||
|
tokens_from_string/1,
|
||||||
|
ast_from_file/1,
|
||||||
|
ast_from_string/1,
|
||||||
|
ast_from_tokens/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include("$sfc_include/sfc.hrl").
|
||||||
|
|
||||||
|
%-----------------------------------------
|
||||||
|
% types
|
||||||
|
%-----------------------------------------
|
||||||
|
|
||||||
|
-type token() :: sfc_token().
|
||||||
|
|
||||||
|
%-----------------------------------------
|
||||||
|
% functions
|
||||||
|
%-----------------------------------------
|
||||||
|
|
||||||
|
sigtokens_from_file(X) ->
|
||||||
|
case tokens_from_file(X) of
|
||||||
|
{ok, Y} -> {ok, sfc_tokens:filter_significant(Y)};
|
||||||
|
Err -> Err
|
||||||
|
end.
|
||||||
|
|
||||||
|
sigtokens_from_string(X) ->
|
||||||
|
case tokens_from_string(X) of
|
||||||
|
{ok, Y} -> {ok, sfc_tokens:filter_significant(Y)};
|
||||||
|
Err -> Err
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
-spec tokens_from_file(FilePath) -> Perhaps
|
||||||
|
when FilePath :: string(),
|
||||||
|
Perhaps :: {ok, Tokens}
|
||||||
|
| {error, sfc_err() | any()},
|
||||||
|
Tokens :: [sfc_token()].
|
||||||
|
|
||||||
|
tokens_from_file(FilePath) ->
|
||||||
|
case file:read_file(FilePath) of
|
||||||
|
{ok, FBytes} -> tokens_from_string(FBytes);
|
||||||
|
Error -> Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec tokens_from_string(SrcStr) -> Result
|
||||||
|
when SrcStr :: string(),
|
||||||
|
Result :: {ok, Tokens}
|
||||||
|
| {error, sfc_err()},
|
||||||
|
Tokens :: [sfc_token()].
|
||||||
|
|
||||||
|
tokens_from_string(SrcStr) ->
|
||||||
|
sfc_tokens:tokens(SrcStr).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec ast_from_file(FilePath) -> Perhaps
|
||||||
|
when FilePath :: string(),
|
||||||
|
Perhaps :: {ok, AST} | {error, sfc_err()},
|
||||||
|
AST :: sfc_ast().
|
||||||
|
|
||||||
|
ast_from_file(FilePath) ->
|
||||||
|
case file:read_file(FilePath) of
|
||||||
|
{ok, FileBytes} -> ast_from_string(FileBytes);
|
||||||
|
Error -> Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec ast_from_string(SrcStr) -> Perhaps
|
||||||
|
when SrcStr :: string(),
|
||||||
|
Perhaps :: {ok, AST} | {error, sfc_err()},
|
||||||
|
AST :: sfc_ast().
|
||||||
|
|
||||||
|
ast_from_string(SrcStr) ->
|
||||||
|
case sfc_tokens:significant_tokens(SrcStr) of
|
||||||
|
{ok, SigTks} -> ast_from_tokens(SigTks);
|
||||||
|
Error -> Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec ast_from_tokens(SrcTokens) -> Perhaps
|
||||||
|
when SrcTokens :: [sfc_token()],
|
||||||
|
Perhaps :: {ok, AST} | {error, sfc_err()},
|
||||||
|
AST :: sfc_ast().
|
||||||
|
|
||||||
|
ast_from_tokens(Tks) ->
|
||||||
|
SigTks = sfc_tokens:filter_significant(Tks),
|
||||||
|
case sfc_ast:gulp_file(SigTks) of
|
||||||
|
{gulp, AST} -> {ok, AST};
|
||||||
|
Error -> Error
|
||||||
|
end.
|
||||||
|
|||||||
+693
@@ -0,0 +1,693 @@
|
|||||||
|
% @doc
|
||||||
|
% type for an SFC AST
|
||||||
|
%
|
||||||
|
% based on so_syntax.erl
|
||||||
|
%
|
||||||
|
% this is the second layer in the parsing pipeline after tokens. From
|
||||||
|
% so_syntax:
|
||||||
|
%
|
||||||
|
% ## Declarations
|
||||||
|
%
|
||||||
|
% A Sophia file consists of a sequence of *declarations* in a layout block.
|
||||||
|
%
|
||||||
|
% ```c
|
||||||
|
% File ::= Block(TopDecl)
|
||||||
|
%
|
||||||
|
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||||
|
% | 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||||
|
% | 'namespace' Con '=' Block(Decl)
|
||||||
|
% | '@compiler' PragmaOp Version
|
||||||
|
% | 'include' String
|
||||||
|
% | Using
|
||||||
|
%
|
||||||
|
% Implement ::= ':' Sep1(Con, ',')
|
||||||
|
%
|
||||||
|
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||||
|
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||||
|
% | 'let' Id [':' Type] '=' Expr
|
||||||
|
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||||
|
% | Using
|
||||||
|
%
|
||||||
|
% FunDecl ::= Id ':' Type // Type signature
|
||||||
|
% | Id Args [':' Type] '=' Block(Stmt) // Definition
|
||||||
|
% | Id Args [':' Type] Block(GuardedDef) // Guarded definitions
|
||||||
|
%
|
||||||
|
% GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
|
||||||
|
%
|
||||||
|
% Using ::= 'using' Con ['as' Con] [UsingParts]
|
||||||
|
% UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
|
||||||
|
% | 'hiding' '[' Sep1(Id, ',') ']'
|
||||||
|
%
|
||||||
|
% PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
|
||||||
|
% Version ::= Sep1(Int, '.')
|
||||||
|
%
|
||||||
|
% EModifier ::= 'payable' | 'stateful'
|
||||||
|
% FModifier ::= 'stateful' | 'private'
|
||||||
|
%
|
||||||
|
% Args ::= '(' Sep(Pattern, ',') ')'
|
||||||
|
%
|
||||||
|
% Type ::= Domain '=>' Type // Function type
|
||||||
|
% | Type '(' Sep(Type, ',') ')' // Type application
|
||||||
|
% | '(' Type ')' // Parens
|
||||||
|
% | 'unit' | Sep(Type, '*') // Tuples
|
||||||
|
% | Id | QId | TVar
|
||||||
|
%
|
||||||
|
% Domain ::= Type // Single argument
|
||||||
|
% | '(' Sep(Type, ',') ')' // Multiple arguments
|
||||||
|
% ```
|
||||||
|
-module(sfc_ast).
|
||||||
|
|
||||||
|
%-compile([export_all,nowarn_export_all]).
|
||||||
|
%
|
||||||
|
%%-export_type([
|
||||||
|
%%]).
|
||||||
|
%%
|
||||||
|
%%-export([
|
||||||
|
%% slurp/2
|
||||||
|
%%]).
|
||||||
|
%
|
||||||
|
%-export([
|
||||||
|
% gulp_file/1
|
||||||
|
%]).
|
||||||
|
%
|
||||||
|
%-include("$sfc_include/sfc.hrl").
|
||||||
|
%
|
||||||
|
%%%-----------------------------
|
||||||
|
%%% TYPES: sfc_ast
|
||||||
|
%%%-----------------------------
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% % placeholders
|
||||||
|
%-type ast_() :: any().
|
||||||
|
%-record(ast_nyi, {tokens = none :: [sfc_token()]}).
|
||||||
|
%-type ast_nyi() :: #ast_nyi{}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% product type: foo * bar * baz
|
||||||
|
%%
|
||||||
|
%% stupid weird implication from bad syntax foresight trying to be
|
||||||
|
%% fancy and overload what parens do is products must always have at
|
||||||
|
%% least two operands; probably this is because `(foo)` is always the
|
||||||
|
%% same as `foo`
|
||||||
|
%%
|
||||||
|
%% - 0-tuple -> `unit`
|
||||||
|
%% - 1-tuple -> type itself
|
||||||
|
%% - 2+ -> here
|
||||||
|
%-record(ast_te_pi,
|
||||||
|
% {types = none :: none | [ast_te_()]}).
|
||||||
|
%
|
||||||
|
%-type ast_type_expr()
|
||||||
|
% :: #ast_te_fn{} % function (string, string) => string
|
||||||
|
% | #ast_te_ap{} % application map(string, int)
|
||||||
|
% | #ast_te_pi{} % product foo * bar * baz
|
||||||
|
% | #ast_te_tk{} % token string int 'a Foo.Bar.baz
|
||||||
|
% | ast_nyi().
|
||||||
|
%-type ast_te() :: ast_type_expr().
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% ta = type alias
|
||||||
|
%% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
%-record(ast_ta,
|
||||||
|
% {alias = none :: none | sfc_token(),
|
||||||
|
% tvars = none :: none | [sfc_token()],
|
||||||
|
% points_to = none :: none | ast_type_expr()}).
|
||||||
|
%
|
||||||
|
%% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
%% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||||
|
%% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||||
|
%% | 'let' Id [':' Type] '=' Expr
|
||||||
|
%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||||
|
%% | Using
|
||||||
|
%-type decl() :: #ast_ta{}
|
||||||
|
% | #ast_nyi{}.
|
||||||
|
%
|
||||||
|
%% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||||
|
%-record(ast_ct,
|
||||||
|
% {payable = none :: none | boolean(),
|
||||||
|
% main = none :: none | boolean(),
|
||||||
|
% name = none :: none | sfc_token(),
|
||||||
|
% impls = none :: none | [sfc_token()],
|
||||||
|
% decls = none :: none | [decl()]}).
|
||||||
|
%
|
||||||
|
%-record(ast_td_iface,{}).
|
||||||
|
%-record(ast_td_ns,{}).
|
||||||
|
%-record(ast_td_compiler,{}).
|
||||||
|
%-record(ast_td_include,{}).
|
||||||
|
%-record(ast_using,{}).
|
||||||
|
%
|
||||||
|
%% @doc
|
||||||
|
%% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||||
|
%% | 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||||
|
%% | 'namespace' Con '=' Block(Decl)
|
||||||
|
%% | '@compiler' PragmaOp Version
|
||||||
|
%% | 'include' String
|
||||||
|
%% | Using
|
||||||
|
%-type top_decl() :: #ast_ct{}
|
||||||
|
% | #ast_td_iface{}
|
||||||
|
% | #ast_td_ns{}
|
||||||
|
% | #ast_td_compiler{}
|
||||||
|
% | #ast_td_include{}
|
||||||
|
% | #ast_using{}
|
||||||
|
% .
|
||||||
|
%
|
||||||
|
%-record(ast_file,
|
||||||
|
% {top_decls :: [top_decl()]}).
|
||||||
|
%-type ast_file() :: #ast_file{}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%-type parse_error_() :: any().
|
||||||
|
%-record(parse_error,
|
||||||
|
% {pos = none :: none | sfc_pos(),
|
||||||
|
% msg = "" :: string(),
|
||||||
|
% subs = [] :: [parse_error_()],
|
||||||
|
% extra = none :: any()}).
|
||||||
|
%-type parse_error() :: parse_error().
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%-type ast()
|
||||||
|
% :: #ast_ct{}
|
||||||
|
% | #ast_file{}
|
||||||
|
% | #ast_nyi{}
|
||||||
|
% | #ast_ta{}
|
||||||
|
% | #ast_td_compiler{}
|
||||||
|
% | #ast_td_iface{}
|
||||||
|
% | #ast_td_include{}
|
||||||
|
% | #ast_td_ns{}
|
||||||
|
% | #ast_te_ap{} % application map(string, int)
|
||||||
|
% | #ast_te_fn{} % function (string, string) => string
|
||||||
|
% | #ast_te_pi{} % product foo * bar * baz
|
||||||
|
% | #ast_te_tk{} % token string int 'a Foo.Bar.baz
|
||||||
|
% | #ast_using{}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%%%-----------------------------
|
||||||
|
%%% FUNCTIONS
|
||||||
|
%%%-----------------------------
|
||||||
|
%
|
||||||
|
%-spec gulp_file(Tokens) -> Perhaps
|
||||||
|
% when Tokens :: [sfc_token()],
|
||||||
|
% Perhaps :: {gulp, #ast_file{}}
|
||||||
|
% | {error, #parse_error{}}.
|
||||||
|
%
|
||||||
|
%gulp_file([]) ->
|
||||||
|
% {error, empty_file};
|
||||||
|
%gulp_file(Tokens) ->
|
||||||
|
% case sfc_tokens:take_block(Tokens) of
|
||||||
|
% {Tokens, []} ->
|
||||||
|
% gulp_block(fun gulp_top_decl/1, Tokens);
|
||||||
|
% %gulp_file2([], [], Tokens);
|
||||||
|
% {A, B} ->
|
||||||
|
% StartPos = sfc_tokens:start_pos(A),
|
||||||
|
% ErrPos = sfc_tokens:start_pos(B),
|
||||||
|
% Msg = efmt("gulp_file: block starting at ~p ends at ~p instead of EOF",
|
||||||
|
% [StartPos, ErrPos]),
|
||||||
|
% {error, #parse_error{pos = ErrPos, msg = Msg}}
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%%gulp_file2(AccOks, AccErrs, Tokens = [_ | _]) ->
|
||||||
|
%% % ItemTokens will be nonempty
|
||||||
|
%% {ItemTokens, NewTokens} = sfc_tokens:take_block_item(Tokens),
|
||||||
|
%% case gulp_top_decl(ItemTokens) of
|
||||||
|
%% {gulp, Ok} -> gulp_file2([Ok | AccOks], AccErrs, NewTokens);
|
||||||
|
%% Err -> gulp_file2(AccOks, [Err | AccErrs], NewTokens)
|
||||||
|
%% end;
|
||||||
|
%%gulp_file2(AccOks, [], []) ->
|
||||||
|
%% TopDecls = lists:reverse(AccOks),
|
||||||
|
%% {gulp, #ast_file{top_decls = TopDecls}};
|
||||||
|
%%gulp_file2(_, AccErrs, []) ->
|
||||||
|
%% Errs = [#parse_error{pos = Pos} | _]
|
||||||
|
%% = lists:reverse(AccErrs),
|
||||||
|
%% {error, #parse_error{pos = Pos, subs = Errs}}.
|
||||||
|
%
|
||||||
|
%-spec gulp_block(GulpItem, Tokens) -> GulpedItems
|
||||||
|
% when GulpItem :: fun((ItemTokens) -> GulpedItem),
|
||||||
|
% Tokens :: [sfc_token()],
|
||||||
|
% ItemTokens :: Tokens,
|
||||||
|
% GulpedItem :: {gulp, Item} | {error, Reason},
|
||||||
|
% GulpedItems :: {gulp, Items} | {error, Reason},
|
||||||
|
% Items :: [Item],
|
||||||
|
% Item :: ast(),
|
||||||
|
% Reason :: any().
|
||||||
|
%% @doc
|
||||||
|
%% this almost is the familiar `traverse' idiom from Haskell
|
||||||
|
%%
|
||||||
|
%% ghci> :t traverse
|
||||||
|
%% traverse
|
||||||
|
%% :: (Traversable t, Applicative f) => (a -> f b) -> t a -> f (t b)
|
||||||
|
%% ghci> :t mapM
|
||||||
|
%% mapM :: (Traversable t, Monad m) => (a -> m b) -> t a -> m (t b)
|
||||||
|
%%
|
||||||
|
%% (or `mapM' if youre being sloppy)
|
||||||
|
%%
|
||||||
|
%% however in our case our function is actually of
|
||||||
|
%% signature
|
||||||
|
%%
|
||||||
|
%% :: (t a -> f b)
|
||||||
|
%%
|
||||||
|
%% so this is more sophisticated
|
||||||
|
%% @end
|
||||||
|
%
|
||||||
|
%gulp_block(GulpItem, Tokens) ->
|
||||||
|
% gulp_block(GulpItem, [], [], Tokens).
|
||||||
|
%
|
||||||
|
%gulp_block(GulpItem, AccOks, AccErrs, Tokens = [_ | _]) ->
|
||||||
|
% % ItemTokens will be nonempty
|
||||||
|
% {ItemTokens, NewTokens} = sfc_tokens:take_block_item(Tokens),
|
||||||
|
% case GulpItem(ItemTokens) of
|
||||||
|
% {gulp, Ok} -> gulp_block(GulpItem, [Ok | AccOks], AccErrs, NewTokens);
|
||||||
|
% Err -> gulp_block(GulpItem, AccOks, [Err | AccErrs], NewTokens)
|
||||||
|
% end;
|
||||||
|
%gulp_block(_, AccOks, [], []) ->
|
||||||
|
% Items = lists:reverse(AccOks),
|
||||||
|
% {gulp, Items};
|
||||||
|
%gulp_block(_, _, AccErrs, []) ->
|
||||||
|
% {error, #parse_error{subs = lists:reverse(AccErrs)}}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%-spec gulp_top_decl(DeclTokens) -> Result
|
||||||
|
% when DeclTokens :: [sfc_token()],
|
||||||
|
% Result :: {gulp, ast()} | {error, any()}.
|
||||||
|
%
|
||||||
|
%% @doc
|
||||||
|
%% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||||
|
%% | 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||||
|
%% | 'namespace' Con '=' Block(Decl)
|
||||||
|
%% | '@compiler' PragmaOp Version
|
||||||
|
%% | 'include' String
|
||||||
|
%% | Using
|
||||||
|
%% @end
|
||||||
|
%gulp_top_decl(DeclTokens) ->
|
||||||
|
% case sfc_tokens:strings(3, DeclTokens) of
|
||||||
|
% ["payable", "contract", "interface"] ->
|
||||||
|
% gulp_nyi(DeclTokens);
|
||||||
|
% ["contract", "interface" | _] ->
|
||||||
|
% gulp_nyi(DeclTokens);
|
||||||
|
% ["payable", "main", "contract"] ->
|
||||||
|
% gulp_ct2(#ast_ct{payable = true, main = true},
|
||||||
|
% drop(3, DeclTokens));
|
||||||
|
% ["main", "contract" | _] ->
|
||||||
|
% gulp_ct2(#ast_ct{payable = false, main = true},
|
||||||
|
% drop(2, DeclTokens));
|
||||||
|
% ["payable", "contract" | _] ->
|
||||||
|
% gulp_ct2(#ast_ct{payable = true, main = false},
|
||||||
|
% drop(2, DeclTokens));
|
||||||
|
% ["contract" | _] ->
|
||||||
|
% gulp_ct2(#ast_ct{payable = false, main = false},
|
||||||
|
% drop(1, DeclTokens));
|
||||||
|
% ["namespace" | _] ->
|
||||||
|
% gulp_nyi(DeclTokens);
|
||||||
|
% ["@", "compiler" | _] ->
|
||||||
|
% gulp_nyi(DeclTokens);
|
||||||
|
% ["include" | _] ->
|
||||||
|
% gulp_nyi(DeclTokens);
|
||||||
|
% ["using" | _ ] ->
|
||||||
|
% gulp_nyi(DeclTokens);
|
||||||
|
% _ ->
|
||||||
|
% % decl tokens will always be nonempty
|
||||||
|
% [H = #sfc_token{pos = ErrPos} | _] = DeclTokens,
|
||||||
|
% EMsg = efmt("gulp_top_decl: bad token: ~p; "
|
||||||
|
% "expecting one of: [payable, main, contract, namespace, @compiler, include, using]",
|
||||||
|
% [H]),
|
||||||
|
% {error, #parse_error{pos = ErrPos,
|
||||||
|
% msg = EMsg}}
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%gulp_ct2(Ast, Tokens = [#sfc_token{pos = Pos} | _]) ->
|
||||||
|
% % need to pass through pos for error messages on premature end of
|
||||||
|
% % input... for now it's a fixme thing
|
||||||
|
% gulp_ct3(Ast, Tokens, Pos).
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% payable main contract X : Y, Z, W = <block>
|
||||||
|
%% ^
|
||||||
|
%gulp_ct3(Ast = #ast_ct{name = none}, Tokens, Pos) ->
|
||||||
|
% NewPos = Pos, %% fixme
|
||||||
|
% case Tokens of
|
||||||
|
% [Name = #sfc_token{type = con} | NewTokens] ->
|
||||||
|
% NewAst = Ast#ast_ct{name = Name},
|
||||||
|
% gulp_ct3(NewAst, NewTokens, NewPos);
|
||||||
|
% [Name | _] ->
|
||||||
|
% {error, {fixme, {bad_contract_name, Name}}};
|
||||||
|
% [] ->
|
||||||
|
% {error, {fixme, no_contract_name}}
|
||||||
|
% end;
|
||||||
|
%% contract X = <block>
|
||||||
|
%% contract X : Y, Z, W = <block>
|
||||||
|
%% ^
|
||||||
|
%gulp_ct3(Ast = #ast_ct{impls = none}, Tokens, Pos) ->
|
||||||
|
% NewPos = Pos, %% fixme
|
||||||
|
% case slurp_ct_impls(Tokens) of
|
||||||
|
% {slurp, Impls, NewTokens} ->
|
||||||
|
% NewAst = Ast#ast_ct{impls = Impls},
|
||||||
|
% gulp_ct3(NewAst, NewTokens, NewPos);
|
||||||
|
% Error ->
|
||||||
|
% Error
|
||||||
|
% end;
|
||||||
|
%% contract X = <block>
|
||||||
|
%% contract X : Y, Z, W = <block>
|
||||||
|
%% ^
|
||||||
|
%gulp_ct3(Ast = #ast_ct{decls = none},
|
||||||
|
% [#sfc_token{string = "="} | Tokens],
|
||||||
|
% _Pos) ->
|
||||||
|
% case gulp_block(fun gulp_decl/1, Tokens) of
|
||||||
|
% {gulp, Decls} ->
|
||||||
|
% Result = Ast#ast_ct{decls = Decls},
|
||||||
|
% {gulp, Result};
|
||||||
|
% Error ->
|
||||||
|
% Error
|
||||||
|
% end;
|
||||||
|
%gulp_ct3(Ast, Tokens, Pos) ->
|
||||||
|
% {error, {fixme, {nyi, gulp_ct3, [Ast, Tokens, Pos]}}}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%-spec slurp_ct_impls(Tokens) -> Slurped
|
||||||
|
% when Tokens :: [sfc_token()],
|
||||||
|
% Slurped :: {slurp, Impls, NewTokens}
|
||||||
|
% | {error, Reason},
|
||||||
|
% Impls :: Tokens,
|
||||||
|
% NewTokens :: Tokens,
|
||||||
|
% Reason :: any().
|
||||||
|
%% @doc ": X, Y, Z = ..." ~> {slurp, [X, Y, Z], "= ..."}
|
||||||
|
%
|
||||||
|
%% FIXME: this should be a restructured a tiny bit for better error
|
||||||
|
%% handling
|
||||||
|
%slurp_ct_impls([_ = #sfc_token{string = ":"},
|
||||||
|
% Con = #sfc_token{type = con}
|
||||||
|
% | NewTokens]) ->
|
||||||
|
% slurp_ct_impls2([Con], NewTokens);
|
||||||
|
%slurp_ct_impls(NewTokens = [#sfc_token{string = "="} | _]) ->
|
||||||
|
% {slurp, [], NewTokens};
|
||||||
|
%slurp_ct_impls([BadToken | _]) ->
|
||||||
|
% Msg = efmt("slurp_ct_impls: expecting (: Con | =), got ~p", [BadToken]),
|
||||||
|
% {error, {fixme, Msg}};
|
||||||
|
%slurp_ct_impls([]) ->
|
||||||
|
% {error, {fixme, "expecting : or =, got end-of-input"}}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%slurp_ct_impls2(Stk, [_ = #sfc_token{string = ","},
|
||||||
|
% Con = #sfc_token{type = con}
|
||||||
|
% | NewTokens]) ->
|
||||||
|
% slurp_ct_impls2([Con | Stk], NewTokens);
|
||||||
|
%slurp_ct_impls2(Stk, NewTokens = [#sfc_token{string = "="} | _]) ->
|
||||||
|
% {slurp, lists:reverse(Stk), NewTokens};
|
||||||
|
%slurp_ct_impls2(Stk, BadTokens) ->
|
||||||
|
% {error, {fixme, nyi, slurp_ct_impls2, [Stk, BadTokens]}}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
%% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||||
|
%% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||||
|
%% | 'let' Id [':' Type] '=' Expr
|
||||||
|
%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||||
|
%% | Using
|
||||||
|
%gulp_decl(Tokens) ->
|
||||||
|
% case sfc_tokens:strings(1, Tokens) of
|
||||||
|
% ["type"] -> gulp_type_alias(Tokens);
|
||||||
|
% _ -> gulp_nyi(Tokens)
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
%% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
%gulp_type_alias(Tokens) ->
|
||||||
|
% gulp_ta(#ast_ta{}, Tokens).
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
%% ^
|
||||||
|
%gulp_ta(Ast = #ast_ta{alias = none},
|
||||||
|
% _ = [#sfc_token{string = "type"}, Alias = #sfc_token{type = id}
|
||||||
|
% | NewTokens]) ->
|
||||||
|
% NewAst = Ast#ast_ta{alias = Alias},
|
||||||
|
% gulp_ta(NewAst, NewTokens);
|
||||||
|
%% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
%% ^
|
||||||
|
%gulp_ta(Ast = #ast_ta{tvars = none},
|
||||||
|
% Tokens) ->
|
||||||
|
% case slurp_ta_tvars(Tokens) of
|
||||||
|
% {slurp, TVars, NewTokens} ->
|
||||||
|
% NewAst = Ast#ast_ta{tvars = TVars},
|
||||||
|
% gulp_ta(NewAst, NewTokens);
|
||||||
|
% Error -> Error
|
||||||
|
% end;
|
||||||
|
%% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
%% ^
|
||||||
|
%gulp_ta(Ast = #ast_ta{points_to = none},
|
||||||
|
% _ = [#sfc_token{string = "="} | NewTokens]) ->
|
||||||
|
% case gulp_type_expr(NewTokens) of
|
||||||
|
% {gulp, TypeExpr} ->
|
||||||
|
% Result = Ast#ast_ta{points_to = TypeExpr},
|
||||||
|
% {gulp, Result};
|
||||||
|
% Error -> Error
|
||||||
|
% end;
|
||||||
|
%gulp_ta(A, B) ->
|
||||||
|
% {error, {fixme, gulp_ta, A, B, ?MODULE, ?LINE}}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% type foo(bar, baz) = quux
|
||||||
|
%% ^
|
||||||
|
%%
|
||||||
|
%% "(bar, baz) = ..." ~> {slurp, [bar, baz], "= ..."}
|
||||||
|
%slurp_ta_tvars(Tks = [#sfc_token{string = "="} | _]) ->
|
||||||
|
% {slurp, [], Tks};
|
||||||
|
%slurp_ta_tvars([#sfc_token{string = "("},
|
||||||
|
% #sfc_token{string = ")"}
|
||||||
|
% | NewTokens]) ->
|
||||||
|
% {slurp, [], NewTokens};
|
||||||
|
%slurp_ta_tvars([_ = #sfc_token{string = "("},
|
||||||
|
% TVar = #sfc_token{type = tvar}
|
||||||
|
% | NewTokens]) ->
|
||||||
|
% slurp_tavars([TVar], NewTokens).
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%slurp_tavars(Stk,
|
||||||
|
% [_ = #sfc_token{string = ","},
|
||||||
|
% TVar = #sfc_token{type = tvar}
|
||||||
|
% | NewTks]) ->
|
||||||
|
% slurp_tavars([TVar | Stk], NewTks);
|
||||||
|
%slurp_tavars(Stk,
|
||||||
|
% [#sfc_token{string = ")"}
|
||||||
|
% | NewTks]) ->
|
||||||
|
% {slurp, lists:reverse(Stk), NewTks}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%gulp_type_expr(Tokens) ->
|
||||||
|
% case slurp_type_expr(Tokens) of
|
||||||
|
% {slurp, T, []} -> {gulp, T};
|
||||||
|
% {slurp, _, E} -> {error, {fixme, trailing, E}};
|
||||||
|
% Error -> Error
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% Type ::= Domain '=>' Type // Function type
|
||||||
|
%% | Type '(' Sep(Type, ',') ')' // Type application
|
||||||
|
%% | '(' Type ')' // Parens
|
||||||
|
%% | 'unit'
|
||||||
|
%% | Sep(Type, '*') // Tuples
|
||||||
|
%% | Id
|
||||||
|
%% | QId
|
||||||
|
%% | TVar
|
||||||
|
%%
|
||||||
|
%% Domain ::= Type // Single argument
|
||||||
|
%% | '(' Sep(Type, ',') ')' // Multiple arguments
|
||||||
|
%
|
||||||
|
%% terminals:
|
||||||
|
%% TkType = id | qid | tvar
|
||||||
|
%% PList = PList0 | PList1 | PList2
|
||||||
|
%% ProdType = Type [* Type]+
|
||||||
|
%% LamType = Type [=> Type]+
|
||||||
|
%%
|
||||||
|
%% PList0 = ()
|
||||||
|
%% PList1 = (Type)
|
||||||
|
%% PList2 = (Type [, Type]+)
|
||||||
|
%%
|
||||||
|
%% Ah ok:
|
||||||
|
%%
|
||||||
|
%% Type1 = TkType | PList(Type)
|
||||||
|
%%
|
||||||
|
%% Type = Type1 % string, (string), (string, string)
|
||||||
|
%% | Type1 => Type % function
|
||||||
|
%% | Type1 PList(Type) % application
|
||||||
|
%% | Type1 * Type % product
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% each type either starts with an id/qid/tvar token or an open paren
|
||||||
|
%%
|
||||||
|
%% we're going to eliminate open paren cases first:
|
||||||
|
%%
|
||||||
|
%% legal cases:
|
||||||
|
%%
|
||||||
|
%% 1. Function types can have arbitrary #args:
|
||||||
|
%%
|
||||||
|
%% () => ... // legal
|
||||||
|
%% (_) => ... // legal
|
||||||
|
%% (_, _) => ... // legal
|
||||||
|
%%
|
||||||
|
%% 2. Otherwise it must be a plist of ONLY one type
|
||||||
|
%%
|
||||||
|
%% () // illegal, should be "unit"
|
||||||
|
%% (X) // legal, equiv to X
|
||||||
|
%% (X, Y) // illegal, should be X * Y
|
||||||
|
%%
|
||||||
|
%slurp_type_expr(Tks = [#sfc_token{pos = Pos} | _]) ->
|
||||||
|
% case slurp_type1(Tks) of
|
||||||
|
% % plist cases
|
||||||
|
% % function types _ => _
|
||||||
|
% {slurp, {plist, PList}, NewTks} ->
|
||||||
|
% case {PList, NewTks} of
|
||||||
|
% % ANY plist (including (), (foo)) followed by => is fine
|
||||||
|
% % () => _
|
||||||
|
% % (_) => _
|
||||||
|
% % (_, _) => _
|
||||||
|
% {DomainType, [#sfc_token{string = "=>"} | After]} ->
|
||||||
|
% case slurp_type_expr(After) of
|
||||||
|
% {slurp, CodomainType, NewTks} ->
|
||||||
|
% TypeExpr = #ast_te_fn{dom = DomainType,
|
||||||
|
% codom = CodomainType},
|
||||||
|
% {slurp, TypeExpr, NewTks};
|
||||||
|
% Error = {error, _} ->
|
||||||
|
% Error
|
||||||
|
% end;
|
||||||
|
% % WMA not a function type, so only plists with 1 type are
|
||||||
|
% % allowed
|
||||||
|
% {[One], After} ->
|
||||||
|
% slurp_type_expr2(One, After);
|
||||||
|
% % plists of 0 or 2+ not allowed
|
||||||
|
% {[], _} ->
|
||||||
|
% Msg = "() is called unit",
|
||||||
|
% {error, {fixme, ?MODULE, ?LINE, illegal_type, Pos, Msg}};
|
||||||
|
% {_, _} ->
|
||||||
|
% Msg = "(_, _, ...) must be followed by =>; "
|
||||||
|
% "the type for tuples (A, B) is A * B",
|
||||||
|
% {error, {fixme, ?MODULE, ?LINE, illegal_type, Pos, Msg}}
|
||||||
|
% end;
|
||||||
|
% % first token is id|qid|tvar: foo, Foo.bar, 'foo
|
||||||
|
% {slurp, {token, Tk}, NewTks} ->
|
||||||
|
% case NewTks of
|
||||||
|
% % foo => bar is fine, normalized to (foo) => bar
|
||||||
|
% [#sfc_token{string = "=>"} | After] ->
|
||||||
|
% case slurp_type_expr(After) of
|
||||||
|
% {slurp, Codom, NewAfter} ->
|
||||||
|
% ArgType = #ast_te_tk{token = ArgTypeTk},
|
||||||
|
% Dom = [ArgType],
|
||||||
|
% Result = #ast_te_fn{dom = Dom, codom = Codom},
|
||||||
|
% {slurp, Result, NewAfter};
|
||||||
|
% Error = {error, _} ->
|
||||||
|
% Error
|
||||||
|
% end;
|
||||||
|
% {_, _} ->
|
||||||
|
% One = #ast_te_tk{token = Tk},
|
||||||
|
% slurp_type_expr2(One, After)
|
||||||
|
% end;
|
||||||
|
% Error = {error, _} ->
|
||||||
|
% Error
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% parsed the first type, now need to see if this is an application
|
||||||
|
%% or a product or if we're done
|
||||||
|
%slurp_type_expr2(FirstType, After) ->
|
||||||
|
% % fixme: just going to ignore products and applications for now
|
||||||
|
% {slurp, FirstType, After}.
|
||||||
|
% %case strings(1, After) of
|
||||||
|
% % ["("] -> error(nyi);
|
||||||
|
% % ["*"] -> error(nyi);
|
||||||
|
% %case After of
|
||||||
|
% % [#sfc_token{string = "("} | _] ->
|
||||||
|
% % case slurp_type1_II(After) of
|
||||||
|
% % {slurp, {plist, ArgTypes}, NewAfter} ->
|
||||||
|
% % {slurp, #ast_te_ap{fn = FirstType
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% Type1 = {plist, Types} () (foo) (foo, bar)
|
||||||
|
%% | {token, #sfc_token{}} foo Bar.baz 'quux
|
||||||
|
%slurp_type1(Tks) ->
|
||||||
|
% case sfc_tokens:slurp_plist(Tks) of
|
||||||
|
% % head token is NOT open paren -> must be id/qid/tvar
|
||||||
|
% {slurp, [], [Tk | NewTks]} ->
|
||||||
|
% TkType = Tk#sfc_token.type,
|
||||||
|
% case TkType of
|
||||||
|
% id -> {slurp, {token, Tk}, NewTks};
|
||||||
|
% qid -> {slurp, {token, Tk}, NewTks};
|
||||||
|
% tvar -> {slurp, {token, Tk}, NewTks};
|
||||||
|
% _ -> {error, {fixme, illegal_type, Tk, ?MODULE, ?LINE}}
|
||||||
|
% end;
|
||||||
|
% % head token is an open paren, need to recursively consume
|
||||||
|
% % the inner types
|
||||||
|
% {slurp, PListTokens, After} ->
|
||||||
|
% case gulp_ptype1(PListTokens) of
|
||||||
|
% {gulp, Types} -> {slurp, {plist, Types}, After};
|
||||||
|
% Error = {error, _} -> Error
|
||||||
|
% end;
|
||||||
|
% Error = {error, _} -> Error
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%%slurp_type_expr_plist(Tks) ->
|
||||||
|
%% case sfc_tokens:slurp_plist(Tks) of
|
||||||
|
%% % head token is NOT open paren -> must be id/qid/tvar
|
||||||
|
%% {slurp, [], [Tk | NewTks]} ->
|
||||||
|
%% TkType = Tk#sfc_token.type,
|
||||||
|
%% case TkType of
|
||||||
|
%% id -> {slurp, {token, Tk}, NewTks};
|
||||||
|
%% qid -> {slurp, {token, Tk}, NewTks};
|
||||||
|
%% tvar -> {slurp, {token, Tk}, NewTks};
|
||||||
|
%% _ -> {error, {fixme, illegal_type, Tk}}
|
||||||
|
%% end;
|
||||||
|
%% % head token is an open paren, need to recursively consume
|
||||||
|
%% % the inner types
|
||||||
|
%% {slurp, PListTokens, After} ->
|
||||||
|
%% case gulp_ptype1(PListTokens) of
|
||||||
|
%% {gulp, Types} -> {slurp, {plist, Types}, After};
|
||||||
|
%% Error = {error, _} -> Error
|
||||||
|
%% end;
|
||||||
|
%% Error = {error, _} -> Error
|
||||||
|
%% end.
|
||||||
|
%
|
||||||
|
%gulp_ptype1([#sfc_token{string = "("}, #sfc_token{string = ")"}]) ->
|
||||||
|
% {gulp, []};
|
||||||
|
%gulp_ptype1([#sfc_token{string = "("} | Tail]) ->
|
||||||
|
% gulp_ptype1_II([], Tail).
|
||||||
|
%
|
||||||
|
%gulp_ptype1_II(Stk, Tks) ->
|
||||||
|
% case slurp_type_expr(Tks) of
|
||||||
|
% {slurp, NewType, [#sfc_token{string = ")"}]} ->
|
||||||
|
% {gulp, lists:reverse([NewType | Stk])};
|
||||||
|
% {slurp, NewType, [#sfc_token{string = ","} | NewTks]} ->
|
||||||
|
% gulp_ptype1_II([NewType | Stk], NewTks);
|
||||||
|
% Error = {error, _} ->
|
||||||
|
% Error
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%%gulp_te_tk([Tk = #sfc_token{type = TkType}])
|
||||||
|
%% when id =:= TkType;
|
||||||
|
%% qid =:= TkType;
|
||||||
|
%% tvar =:= TkType ->
|
||||||
|
%% {gulp, #ast_te_tk{token = Tk}};
|
||||||
|
%%gulp_te_tk(Tks) ->
|
||||||
|
%% {error, {fixme, nyi, gulp_te_tks, Tks}}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%gulp_nyi(Tokens) ->
|
||||||
|
% {gulp, #ast_nyi{tokens = Tokens}}.
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%drop(N, [_ | Xs]) when is_integer(N), N >= 1 ->
|
||||||
|
% drop(N-1, Xs);
|
||||||
|
%drop(_, []) ->
|
||||||
|
% [];
|
||||||
|
%drop(0, X) ->
|
||||||
|
% X.
|
||||||
|
%
|
||||||
|
%efmt(Fmt, Args) ->
|
||||||
|
% unicode:characters_to_list(io_lib:format(Fmt, Args)).
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
% @doc
|
||||||
|
% sfc_bst = ast second attempt but prefix so tab complete
|
||||||
|
%
|
||||||
|
% from docs/sophia/so_syntax.md:
|
||||||
|
%
|
||||||
|
% File ::= Block(TopDecl)
|
||||||
|
%
|
||||||
|
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||||
|
% | 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||||
|
% | 'namespace' Con '=' Block(Decl)
|
||||||
|
% | '@compiler' PragmaOp Version
|
||||||
|
% | 'include' String
|
||||||
|
% | Using
|
||||||
|
%
|
||||||
|
% Implement ::= ':' Sep1(Con, ',')
|
||||||
|
%
|
||||||
|
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||||
|
% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||||
|
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||||
|
% | 'let' Id [':' Type] '=' Expr
|
||||||
|
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||||
|
% | Using
|
||||||
|
%
|
||||||
|
% FunDecl ::= Id ':' Type // Type signature
|
||||||
|
% | Id Args [':' Type] '=' Block(Stmt) // Definition
|
||||||
|
% | Id Args [':' Type] Block(GuardedDef) // Guarded definitions
|
||||||
|
%
|
||||||
|
% GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
|
||||||
|
%
|
||||||
|
% Using ::= 'using' Con ['as' Con] [UsingParts]
|
||||||
|
% UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
|
||||||
|
% | 'hiding' '[' Sep1(Id, ',') ']'
|
||||||
|
%
|
||||||
|
% PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
|
||||||
|
% Version ::= Sep1(Int, '.')
|
||||||
|
%
|
||||||
|
% EModifier ::= 'payable' | 'stateful'
|
||||||
|
% FModifier ::= 'stateful' | 'private'
|
||||||
|
%
|
||||||
|
% Args ::= '(' Sep(Pattern, ',') ')'
|
||||||
|
-module(sfc_bst).
|
||||||
|
|
||||||
|
-compile([export_all, nowarn_export_all]).
|
||||||
|
-include("$sfc_include/sfc.hrl").
|
||||||
|
|
||||||
|
%-record(bst_nyi, {tokens :: [sfc_token()]).
|
||||||
|
%
|
||||||
|
%% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||||
|
%-record(bst_ct,
|
||||||
|
% {payable = none :: none | boolean(),
|
||||||
|
% main = none :: none | boolean(),
|
||||||
|
% name = none :: none | string(),
|
||||||
|
% impl = none :: none | [string()],
|
||||||
|
% eq = none :: none | '=',
|
||||||
|
% decls = none :: none | [decl()]}).
|
||||||
|
%
|
||||||
|
%% 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||||
|
%-record(bst_iface,
|
||||||
|
% {payable = none :: none | boolean()
|
||||||
|
% name = none :: none | string(),
|
||||||
|
% impl = none :: none | [string()],
|
||||||
|
% eq = none :: none | '=',
|
||||||
|
% decls = none :: none | [decl()]}).
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%% File ::= Block(TopDecl)
|
||||||
|
%gulp(file, Tokens) ->
|
||||||
|
% gulp({block, top_decl}, Tokens);
|
||||||
|
%% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||||
|
%% | 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||||
|
%% | 'namespace' Con '=' Block(Decl)
|
||||||
|
%% | '@compiler' PragmaOp Version
|
||||||
|
%% | 'include' String
|
||||||
|
%% | Using
|
||||||
|
%gulp(top_decl, [#sfc_token{string = S} | Rest]) ->
|
||||||
|
% case strings(3, Tokens) of
|
||||||
|
% ["payable", "contract", "interface"] ->
|
||||||
|
% gulp_ct(#bst_iface{payable = true, main = true}, drop(3, Tokens));
|
||||||
|
% ["payable", "contract", _] ->
|
||||||
|
% gulp_ct(#bst_ct{payable = true, main = false}, drop(2, Tokens));
|
||||||
|
% ["contract" | _] ->
|
||||||
|
% gulp({oneof, [bst_iface, bst_ct, bst_nyi]}, Tokens);
|
||||||
|
%gulp(bst_ct, Tokens) ->
|
||||||
|
%gulp(top_decl, Tokens) ->
|
||||||
@@ -0,0 +1,216 @@
|
|||||||
|
-module(sfc_parse_type_expr).
|
||||||
|
|
||||||
|
-export_type([
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
unsafe_vtks_from_string/1,
|
||||||
|
gulp_vtks/1,
|
||||||
|
take_until_ifx_op/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include("$sfc_include/sfc.hrl").
|
||||||
|
|
||||||
|
|
||||||
|
%------------------------------------------------------
|
||||||
|
% TYPES
|
||||||
|
%------------------------------------------------------
|
||||||
|
|
||||||
|
-type vtk_ifx_op() :: vtk_apply_to
|
||||||
|
| {'vtk_*', sfc_token()}
|
||||||
|
| {'vtk_=>', sfc_token()}.
|
||||||
|
|
||||||
|
-type vtk() :: sfc_token()
|
||||||
|
| {vtk_plist, [sfc_token()]}
|
||||||
|
| vtk_ifx_op().
|
||||||
|
|
||||||
|
|
||||||
|
-type gulped(X) :: {gulp, X}
|
||||||
|
| {error, any()}.
|
||||||
|
|
||||||
|
-type slurped(X) :: {slurp, X, Rest :: [sfc_token()]}
|
||||||
|
| {error, any()}.
|
||||||
|
|
||||||
|
|
||||||
|
%------------------------------------------------------
|
||||||
|
% FUNCTIONS
|
||||||
|
%------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
-spec unsafe_vtks_from_string(SrcStr) -> Vtks when
|
||||||
|
SrcStr :: string(),
|
||||||
|
Vtks :: [vtk()].
|
||||||
|
|
||||||
|
% @doc for testing
|
||||||
|
unsafe_vtks_from_string(S) ->
|
||||||
|
{ok, SigTks} = sfc_tokens:significant_tokens(S),
|
||||||
|
{gulp, Vtks} = gulp_vtks(SigTks),
|
||||||
|
Vtks.
|
||||||
|
|
||||||
|
|
||||||
|
% operators in descending order of exteriority
|
||||||
|
%
|
||||||
|
% precedence verbiage confuses me
|
||||||
|
%
|
||||||
|
% 1 + 2 * 3 ^ 4
|
||||||
|
%
|
||||||
|
% (+ 1 (* 2 (^ 3 4)))
|
||||||
|
%
|
||||||
|
% precedence is thinking about the operators as like
|
||||||
|
% having arms and legs and doing something. how much
|
||||||
|
% power do they have to bind to their neighbors. it's
|
||||||
|
% thinking of your operators as verbs in some state
|
||||||
|
% machine.
|
||||||
|
%
|
||||||
|
% i don't like when i have to model the state machine
|
||||||
|
% in my head when i'm reading code. everything should
|
||||||
|
% just be there. exteriority and interiority are
|
||||||
|
% properties of nouns, not properties of verbs.
|
||||||
|
%
|
||||||
|
% functional programming is all about offloading as
|
||||||
|
% much of your reasoning into nouns as possible. verbs
|
||||||
|
% change things. and change is bad.
|
||||||
|
-type chunk_strategy()
|
||||||
|
:: ast_parens % prefix paren (x y z)
|
||||||
|
| '=>' % infixr
|
||||||
|
| '*' % infix
|
||||||
|
| 'apply' % postfix paren f (...)
|
||||||
|
.
|
||||||
|
|
||||||
|
|
||||||
|
-record(ast_parens,
|
||||||
|
{open = none :: none | sfc_token(),
|
||||||
|
inner = none :: none | [sfc_token()],
|
||||||
|
close = none :: none | sfc_token()}).
|
||||||
|
|
||||||
|
chunk_by(Strategy, Tokens) ->
|
||||||
|
chunk_by(Strategy, [], Tokens).
|
||||||
|
|
||||||
|
|
||||||
|
-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when
|
||||||
|
Strategy :: chunk_strategy(),
|
||||||
|
Oks :: [any()],
|
||||||
|
Errs :: [{error, Reason :: any()}],
|
||||||
|
Tokens :: [sfc_token()],
|
||||||
|
Result :: {ok,
|
||||||
|
|
||||||
|
gulp_chunks_by(_, Stk, [], []) ->
|
||||||
|
{gulp, lists:reverse(Stk)};
|
||||||
|
gulp_chunks_by(_, _, Errs, []) ->
|
||||||
|
{error, {fixme, {?MODULE, ?LINE}, Errs}};
|
||||||
|
gulp_chunks_by(plist, Stk, Errs, Tokens) ->
|
||||||
|
case slurp_plist_rec(Tokens) of
|
||||||
|
{slurp, Plist, NewTokens} ->
|
||||||
|
gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens);
|
||||||
|
barf ->
|
||||||
|
[Token | NewTokens] = Tokens,
|
||||||
|
gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens);
|
||||||
|
Error ->
|
||||||
|
gulp_chunks_by(plist, Stk, [Error | Errs], Tokens);
|
||||||
|
end.
|
||||||
|
|
||||||
|
slurp_plist_rec(Tokens = [#sfc_token{string = "(" | _]) ->
|
||||||
|
case sfc_tokens:slurp_plist(Tokens) of
|
||||||
|
{slurp, [], _} ->
|
||||||
|
barf;
|
||||||
|
{slurp, PTokens, NewTokens} ->
|
||||||
|
PTokensInner = pt_inner(PTokens),
|
||||||
|
end;
|
||||||
|
|
||||||
|
%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when
|
||||||
|
% Tokens :: [sfc_token()],
|
||||||
|
% IfxTree :: ifx_tree().
|
||||||
|
%
|
||||||
|
%-spec chunk_by(ChunkStrategy, Tokens) -> Result when
|
||||||
|
% ChunkStrategy :: chunk_strategy(),
|
||||||
|
% Tokens :: [sfc_token()],
|
||||||
|
% Result :: {ChunkStrategy,
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec gulp_vtks(Tokens) -> Result when
|
||||||
|
Tokens :: [sfc_token()],
|
||||||
|
Result :: gulped(VirtualTokens),
|
||||||
|
VirtualTokens :: [vtk()].
|
||||||
|
|
||||||
|
% @private
|
||||||
|
% for infix precedence/associativity resolution.
|
||||||
|
%
|
||||||
|
% better/original name (although misnomer) was
|
||||||
|
% insert_virtual_tokens. we're using the gulp verbiage
|
||||||
|
% because we're guarding against mistmatched delimiters
|
||||||
|
% @end
|
||||||
|
gulp_vtks(Tokens) ->
|
||||||
|
gulp_vtks([], Tokens).
|
||||||
|
|
||||||
|
gulp_vtks(Acc, Tks0) ->
|
||||||
|
% scan until next "(" | "*" | "=>"
|
||||||
|
case take_until_ifx_op(Tks0) of
|
||||||
|
% no infix op remaining, return arg
|
||||||
|
{_Pfx = Tks0,
|
||||||
|
_Sfx = []} ->
|
||||||
|
{gulp, lists:flatten([Acc, Tks0])};
|
||||||
|
% application
|
||||||
|
% "... foo(bar, baz) ..."
|
||||||
|
% ~> [..., foo, {plist, "(bar, baz)"}, ...]
|
||||||
|
{_Pfx = Tks1_BeforeOpen,
|
||||||
|
_Sfx = Tks2_OpenNAfter
|
||||||
|
= [#sfc_token{string = "("} | _]} ->
|
||||||
|
case sfc_tokens:slurp_plist(Tks2_OpenNAfter) of
|
||||||
|
{slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
|
||||||
|
NewAcc = [Acc,
|
||||||
|
Tks1_BeforeOpen,
|
||||||
|
vtk_apply_to,
|
||||||
|
{vtk_plist, Tks2A_OpenToClose}],
|
||||||
|
gulp_vtks(NewAcc, Tks2B_AfterClose);
|
||||||
|
Error = {error, _} ->
|
||||||
|
Error
|
||||||
|
end;
|
||||||
|
% product
|
||||||
|
{_Pfx = Tks0_BeforeTimes,
|
||||||
|
_Sfx = [ Tk1A_Times = #sfc_token{string = "*"}
|
||||||
|
| Tks1B_AfterTimes]} ->
|
||||||
|
NewAcc = [Acc,
|
||||||
|
Tks0_BeforeTimes,
|
||||||
|
{'vtk_*', Tk1A_Times}],
|
||||||
|
gulp_vtks(NewAcc, Tks1B_AfterTimes);
|
||||||
|
% funType
|
||||||
|
{_Pfx = Tks0_BeforeOp,
|
||||||
|
_Sfx = [ Tk1A_Op = #sfc_token{string = "=>"}
|
||||||
|
| Tks1B_AfterOp]} ->
|
||||||
|
NewAcc = [Acc,
|
||||||
|
Tks0_BeforeOp,
|
||||||
|
{'vtk_=>', Tk1A_Op}],
|
||||||
|
gulp_vtks(NewAcc, Tks1B_AfterOp)
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec take_until_ifx_op(Tokens) -> Result when
|
||||||
|
Tokens :: [sfc_token()],
|
||||||
|
Result :: {Taken, NewTokens},
|
||||||
|
Taken :: Tokens,
|
||||||
|
NewTokens :: Tokens.
|
||||||
|
% @doc
|
||||||
|
% consume tokens until one of ["(", "*", "=>"]
|
||||||
|
|
||||||
|
take_until_ifx_op(Tks) ->
|
||||||
|
take_until_ifx_op([], Tks).
|
||||||
|
|
||||||
|
take_until_ifx_op(Stack, []) ->
|
||||||
|
{lists:reverse(Stack), []};
|
||||||
|
take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) ->
|
||||||
|
TokStr = Token#sfc_token.string,
|
||||||
|
Continue =
|
||||||
|
case TokStr of
|
||||||
|
% exit cases
|
||||||
|
"(" -> false;
|
||||||
|
"*" -> false;
|
||||||
|
"=>" -> false;
|
||||||
|
_ -> true
|
||||||
|
end,
|
||||||
|
case Continue of
|
||||||
|
true -> take_until_ifx_op([Token | Stack], NewTokens);
|
||||||
|
false -> {lists:reverse(Stack), Tokens}
|
||||||
|
end.
|
||||||
|
|
||||||
@@ -0,0 +1,457 @@
|
|||||||
|
% @doc compatibility layer to test against so_scan
|
||||||
|
%
|
||||||
|
% converts sfc_tokens data to so_scan tokens
|
||||||
|
%
|
||||||
|
% Ref: so_scan.erl
|
||||||
|
-module(sfc_so_scan).
|
||||||
|
|
||||||
|
-export_type([
|
||||||
|
so_kwd/0,
|
||||||
|
so_special_char/0,
|
||||||
|
so_symbol/0,
|
||||||
|
so_token2/0,
|
||||||
|
so_token3/0,
|
||||||
|
so_token/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
scan/1,
|
||||||
|
ken_barson_rises/2
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include("$sfc_include/sfc.hrl").
|
||||||
|
|
||||||
|
%================================
|
||||||
|
% API: types
|
||||||
|
%================================
|
||||||
|
|
||||||
|
% FIXME: single-quote all the atoms to future-proof against the elixir retards
|
||||||
|
% adding more keywords to erlang
|
||||||
|
-type so_kwd() :: contract
|
||||||
|
| include
|
||||||
|
| 'let'
|
||||||
|
| switch
|
||||||
|
| type
|
||||||
|
| record
|
||||||
|
| datatype
|
||||||
|
| 'if'
|
||||||
|
| elif
|
||||||
|
| 'else'
|
||||||
|
| function
|
||||||
|
| stateful
|
||||||
|
| payable
|
||||||
|
| 'true'
|
||||||
|
| 'false'
|
||||||
|
| mod
|
||||||
|
| public
|
||||||
|
| entrypoint
|
||||||
|
| private
|
||||||
|
| indexed
|
||||||
|
| namespace
|
||||||
|
| interface
|
||||||
|
| main
|
||||||
|
| using
|
||||||
|
| as
|
||||||
|
| for
|
||||||
|
| hiding
|
||||||
|
| 'band'
|
||||||
|
| 'bor'
|
||||||
|
| 'bxor'
|
||||||
|
| 'bnot'.
|
||||||
|
|
||||||
|
-type so_special_char() :: '..'
|
||||||
|
| ','
|
||||||
|
| '.'
|
||||||
|
| ';'
|
||||||
|
| '('
|
||||||
|
| ')'
|
||||||
|
| '['
|
||||||
|
| ']'
|
||||||
|
| '{'
|
||||||
|
| '}'
|
||||||
|
.
|
||||||
|
|
||||||
|
% @doc bad type... essentially a string that is the outcome of a regex match is
|
||||||
|
% cast to an atom, and that's the type that goes here
|
||||||
|
-type so_symbol() :: so_kwd() | so_special_char() | atom().
|
||||||
|
|
||||||
|
-type so_token2() :: {Symbol :: so_symbol(),
|
||||||
|
Location :: sfc_pos()}.
|
||||||
|
|
||||||
|
% FIXME
|
||||||
|
% this is 'id', 'con', qid
|
||||||
|
-type so_tk3type() :: char | string | hex | int | bytes | qid | qcon | tvar | id | con.
|
||||||
|
|
||||||
|
-type so_token3() :: {TokenType :: so_tk3type(),
|
||||||
|
Location :: sfc_pos(),
|
||||||
|
TokenValue :: term()}.
|
||||||
|
|
||||||
|
-type so_token() :: so_token2() | so_token3().
|
||||||
|
|
||||||
|
|
||||||
|
%================================
|
||||||
|
% API: functions
|
||||||
|
%================================
|
||||||
|
|
||||||
|
-spec scan(SrcStr) -> {ok, SoTokens} | {error, sfc_err()}
|
||||||
|
when SrcStr :: iolist(),
|
||||||
|
SoTokens :: [so_token()].
|
||||||
|
% @doc
|
||||||
|
% this is meant to agree with so_scan:scan/1 in all cases
|
||||||
|
%
|
||||||
|
% this converts sfc's internal representation of tokens into the format that
|
||||||
|
% so_scan outputs
|
||||||
|
% @end
|
||||||
|
|
||||||
|
scan(SrcStr) ->
|
||||||
|
case sfc_tokens:tokens(SrcStr) of
|
||||||
|
{ok, SfLTokens} ->
|
||||||
|
SoTokens = to_so_tokens(SfLTokens),
|
||||||
|
{ok, SoTokens};
|
||||||
|
% fucking stupid
|
||||||
|
{error, #sfc_err_bcom_unterminated{prev_tokens = SfcTokens}} ->
|
||||||
|
{ok, to_so_tokens(SfcTokens)};
|
||||||
|
Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec to_so_tokens(SfcTokens) -> SoTokens
|
||||||
|
when SfcTokens :: [sfc_token()],
|
||||||
|
SoTokens :: [so_token()].
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% most sfc tokens map 1-to-1 with so_tokens. the
|
||||||
|
% exception is ak/ct/sg literals. this is a
|
||||||
|
% many-to-one-mapping, and therefore ak, sg, ct need to
|
||||||
|
% be handled at the list level.
|
||||||
|
%
|
||||||
|
% the reason is as follows:
|
||||||
|
%
|
||||||
|
% so_scan lexes ak_ABCD to an id, then at the parsing
|
||||||
|
% stage computes the pubkey that corresponds to.
|
||||||
|
%
|
||||||
|
% as a result, if we have ak_GHI, I is not a valid
|
||||||
|
% base58 char, so WE (sfc) end up lexing that as
|
||||||
|
%
|
||||||
|
% [{ak, "ak_GH"}, {con, "I"}]
|
||||||
|
%
|
||||||
|
% and so_scan lexes that as {id, "ak_GHI"}].
|
||||||
|
%
|
||||||
|
% however we also don't ignore whitespace, so we can
|
||||||
|
% tell if this happens because it occurs precisely when
|
||||||
|
% an ak/sg/ct is immediately followed by a
|
||||||
|
% non-whitespace token. however there could be more
|
||||||
|
% than 1 and they can be a variety of different shapes.
|
||||||
|
% so we have to greedily consume them back into a
|
||||||
|
% single id.
|
||||||
|
%
|
||||||
|
% bugs in the happy path are trans-features
|
||||||
|
%
|
||||||
|
% so if we see an ak/ct/sg token, we summon evil ben
|
||||||
|
% carson to reconjoin the unconjoined twins
|
||||||
|
to_so_tokens([ AkTok = #sfc_token{type = AkCtSg, pos = Pos}
|
||||||
|
| Sheeit])
|
||||||
|
when ak =:= AkCtSg;
|
||||||
|
ct =:= AkCtSg;
|
||||||
|
sg =:= AkCtSg ->
|
||||||
|
{#sfc_token{string = FinalAkStr}, NewSheeit}
|
||||||
|
= ken_barson_rises(AkTok, Sheeit),
|
||||||
|
[{id, Pos, FinalAkStr}| to_so_tokens(NewSheeit)];
|
||||||
|
% this part is just lists:filtermap
|
||||||
|
to_so_tokens([X | Xs]) ->
|
||||||
|
case to_so_token(X) of
|
||||||
|
false -> to_so_tokens(Xs);
|
||||||
|
{true, SoToken} -> [SoToken | to_so_tokens(Xs)]
|
||||||
|
end;
|
||||||
|
to_so_tokens([]) ->
|
||||||
|
[].
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec ken_barson_rises(InitApiToken, SfToks) -> {FinalApiToken, NewSfToks}
|
||||||
|
when InitApiToken :: sfc_token(),
|
||||||
|
SfToks :: [sfc_token()],
|
||||||
|
FinalApiToken :: InitApiToken,
|
||||||
|
NewSfToks :: SfToks.
|
||||||
|
% @doc
|
||||||
|
%
|
||||||
|
% .-""""""""""""-.
|
||||||
|
% .-' .-======-. '-.
|
||||||
|
% .' / .----. \ '.
|
||||||
|
% / | / \ | \
|
||||||
|
% | | | @ @ | | |
|
||||||
|
% | | | __ | | |
|
||||||
|
% | | | /@@\ | | |
|
||||||
|
% \ | | \__/ | | /
|
||||||
|
% '. | \_++++_/ | .'
|
||||||
|
% '-._| |\/\/| |_.-'
|
||||||
|
% | |/\/\| |
|
||||||
|
% | \____/ |
|
||||||
|
% ___| BEN CARSON |___
|
||||||
|
% .-' | HAS BECOME | '-.
|
||||||
|
% / | TOO POWERFUL| \
|
||||||
|
% / |______________| \
|
||||||
|
% / .-'''-. .-'''-. \
|
||||||
|
% | / .-. \ / .-. \ |
|
||||||
|
% | | ( ) | | ( ) | |
|
||||||
|
% | \ '-' / \ '-' / |
|
||||||
|
% \ '-...-' /\ '-...-' /
|
||||||
|
% '._ / \ _.'
|
||||||
|
% '-._____.-' '-._____.-'
|
||||||
|
%
|
||||||
|
% THE SOFT-SPOKEN DOOM DOCTOR
|
||||||
|
% “I prescribed… CHAOS.”
|
||||||
|
%
|
||||||
|
% BUGS IN THE HAPPY PATH ARE features.
|
||||||
|
% BUGS IN THE HAPPY PATH ARE features.
|
||||||
|
%
|
||||||
|
% WE LIKE features.
|
||||||
|
%
|
||||||
|
% features MAKE US MONEY.
|
||||||
|
%
|
||||||
|
% features ARE NOT FOOD.
|
||||||
|
% features ARE friends.
|
||||||
|
% @end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% This function takes the unconjoined twins (e.g.
|
||||||
|
% `ak_GHI` lexed to `ak_GH` followed by `I`) and
|
||||||
|
% recursively reconjoins them so they can all live
|
||||||
|
% happily together as a single so_scan token which will
|
||||||
|
% fail in the parsing step.
|
||||||
|
%
|
||||||
|
% on account of the property that the concatenation of
|
||||||
|
% all the token strings equals the original source file
|
||||||
|
% (FIXME: should test this in test suite)
|
||||||
|
%
|
||||||
|
% basically this looks at the next token, and if it's a
|
||||||
|
% type that so_scan is going to consume as part of an
|
||||||
|
% `id` token, then we add it to the stack.
|
||||||
|
%
|
||||||
|
% quoth claude:
|
||||||
|
% so_scan lexes identifiers with
|
||||||
|
% /[a-z_][a-zA-Z0-9_']*/. The base58 alphabet used by
|
||||||
|
% `smr_apistr58` is:
|
||||||
|
%
|
||||||
|
% 123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz
|
||||||
|
%
|
||||||
|
% Characters **valid in a so_scan id tail** but
|
||||||
|
% **absent from base58**:
|
||||||
|
%
|
||||||
|
% | char | why excluded from base58 |
|
||||||
|
% |------|---------------------------------------|
|
||||||
|
% | `0` | looks like `O` |
|
||||||
|
% | `I` | looks like `1` or `l` |
|
||||||
|
% | `O` | looks like `0` |
|
||||||
|
% | `l` | looks like `1` or `I` |
|
||||||
|
% | `_` | not alphanumeric (structural, not b58)|
|
||||||
|
% | `'` | not alphanumeric (sophia id quirk) |
|
||||||
|
%
|
||||||
|
% When any of these appear AFTER at least one valid
|
||||||
|
% base58 char in a `ak_`/`ct_`/`sg_` prefixed
|
||||||
|
% identifier, `sfc` splits what `so_scan` sees as one
|
||||||
|
% `id` token into 2+ sfc tokens.
|
||||||
|
%
|
||||||
|
% **No split if non-base58 char is immediately after
|
||||||
|
% `_`**: `smr_plus` requires >=1 base58 char to
|
||||||
|
% match; `ak_I`, `ak_0`, `ak__bar` all fall
|
||||||
|
% through to `id` and both tokenizers agree.
|
||||||
|
ken_barson_rises(AkTokAcc = #sfc_token{string = AkStr},
|
||||||
|
SrcTokens = [#sfc_token{type = CandidateType,
|
||||||
|
string = CandidateString}
|
||||||
|
| Rest]) ->
|
||||||
|
% candidate:
|
||||||
|
% dig out the token type and the string
|
||||||
|
Smash = lists:member(CandidateType, smash_types()),
|
||||||
|
Pass = lists:member(CandidateType, pass_types()),
|
||||||
|
% sanity check
|
||||||
|
true = Smash or Pass,
|
||||||
|
if
|
||||||
|
Smash ->
|
||||||
|
% dig out the token from LcTokApi
|
||||||
|
NewAkStr = AkStr ++ CandidateString,
|
||||||
|
NewAkTokAcc = AkTokAcc#sfc_token{string = NewAkStr},
|
||||||
|
ken_barson_rises(NewAkTokAcc, Rest);
|
||||||
|
Pass ->
|
||||||
|
{AkTokAcc, SrcTokens}
|
||||||
|
end;
|
||||||
|
ken_barson_rises(Done, []) ->
|
||||||
|
{Done, []}.
|
||||||
|
|
||||||
|
smash_types() ->
|
||||||
|
[char, % ak_GH'a' -> {char, "'a'"}
|
||||||
|
int16, % ak_GH0xAB -> {int16, "0xAB"}
|
||||||
|
int10, % ak_GH0123 -> {int10, "0123"}
|
||||||
|
tvar, % ak_GH'a -> {tvar, "'a"}
|
||||||
|
kwd, % ak_GHlet -> {kwd, "let"}
|
||||||
|
id, % ak_GH_AB -> {id, "_AB"}
|
||||||
|
con]. % ak_GHI -> {con, "I"}
|
||||||
|
|
||||||
|
pass_types() ->
|
||||||
|
% why each of these are impossible
|
||||||
|
% meaning the prefix for each of these will cause
|
||||||
|
% so_scan to break out of consuming an id, or will
|
||||||
|
% never be a disjoined
|
||||||
|
% neighbor
|
||||||
|
[lcom, % ak_AB// breaks out of id
|
||||||
|
bcom, % ak_AB/* breaks out of id
|
||||||
|
ws, % ak_AB\t breaks out of id
|
||||||
|
punct, % ak_AB{ breaks out of id
|
||||||
|
string, % ak_AB" breaks out of id
|
||||||
|
bytes, % ak_AB# breaks out of id
|
||||||
|
ak,ct,sg, % ak_ABak [akctsg] all in base58 alphabet
|
||||||
|
qid, % ak_ABI.Am.A.qid ??? maybe sophia lexes this to [{id, _}, '.']?
|
||||||
|
qcon, % ak_ABI.Am.A.QCon ??? same
|
||||||
|
op]. % ak_AB=< [=!<>+-*/:&|?~@^] break out of id
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec to_so_token(SfcToken) -> MaybeSoToken
|
||||||
|
when SfcToken :: sfc_token(),
|
||||||
|
MaybeSoToken :: {true, SoToken}
|
||||||
|
| false,
|
||||||
|
SoToken :: so_token().
|
||||||
|
|
||||||
|
% @private
|
||||||
|
% does NOT handle ak/ct/sg because these may consume
|
||||||
|
% follow-on tokens
|
||||||
|
% @end
|
||||||
|
|
||||||
|
to_so_token(#sfc_token{type = SfTokenType,
|
||||||
|
pos = Pos,
|
||||||
|
string = SfTokenStr}) ->
|
||||||
|
case SfTokenType of
|
||||||
|
%-----------------
|
||||||
|
% Ignored
|
||||||
|
%-----------------
|
||||||
|
bcom -> false;
|
||||||
|
lcom -> false;
|
||||||
|
ws -> false;
|
||||||
|
%-----------------------
|
||||||
|
% {_, _}
|
||||||
|
%
|
||||||
|
% {contract, {420, 69}}
|
||||||
|
%-----------------------
|
||||||
|
% kwds ops and punct are all collapsed by
|
||||||
|
% so_scan:scan down to eg {'contract', {420, 69}}
|
||||||
|
% where {420, 69} is the source location
|
||||||
|
% these are three different parsers
|
||||||
|
Sym when Sym =:= kwd;
|
||||||
|
Sym =:= op;
|
||||||
|
Sym =:= punct ->
|
||||||
|
Symbol = list_to_atom(SfTokenStr),
|
||||||
|
{true, {Symbol, Pos}};
|
||||||
|
%------------------------------------
|
||||||
|
% {_, _, _}
|
||||||
|
%
|
||||||
|
% {id, {420, 69}, "foo"}
|
||||||
|
%--------------------------------
|
||||||
|
QVar when QVar =:= qid; QVar =:= qcon ->
|
||||||
|
% qualifieds tokenize to
|
||||||
|
% {qid, {420, 69}, ["Foo", "Bar", "baz"]}
|
||||||
|
{true, {QVar, Pos, string:tokens(SfTokenStr, ".")}};
|
||||||
|
SfVar when SfVar =:= id; SfVar =:= con; SfVar =:= tvar ->
|
||||||
|
{true, {SfVar, Pos, SfTokenStr}};
|
||||||
|
% literals
|
||||||
|
% from so_scan:
|
||||||
|
% {CHAR, token(char, fun parse_char/1)}
|
||||||
|
% {STRING, token(string, fun parse_string/1)}
|
||||||
|
% {HEX, token(hex, fun parse_hex/1)}
|
||||||
|
% {INT, token(int, fun parse_int/1)}
|
||||||
|
% {BYTES, token(bytes, fun parse_bytes/1)}
|
||||||
|
% so_scan casts strings to binary
|
||||||
|
char -> {true, {char, Pos, so_parse_char(SfTokenStr)}};
|
||||||
|
string -> {true, {string, Pos, so_parse_string(SfTokenStr)}};
|
||||||
|
int16 -> {true, {hex, Pos, so_parse_hex(SfTokenStr)}};
|
||||||
|
int10 -> {true, {int, Pos, so_parse_int(SfTokenStr)}};
|
||||||
|
bytes -> {true, {bytes, Pos, so_parse_bytes(SfTokenStr)}};
|
||||||
|
NYI ->
|
||||||
|
Msg = io_lib:format("sfc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]),
|
||||||
|
error(#sfc_err{atom = nyi,
|
||||||
|
string = Msg})
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% ak/ct/sg all tokenize to id
|
||||||
|
%% FIXEDME: implement? it seems like so_scan just parses these as
|
||||||
|
%% identifiers, so not clear what the advantage is here?
|
||||||
|
%%
|
||||||
|
%% i suppose we'll find out when we write the syntax parser
|
||||||
|
%%
|
||||||
|
%% so_scan lexes ak/ct/sg as ids and then parses them as addresses/sigs
|
||||||
|
%% in the parsing step
|
||||||
|
%API when API =:= ak;
|
||||||
|
% API =:= ct;
|
||||||
|
% API =:= sg ->
|
||||||
|
% {true, {id, Pos, SfTokenStr}};
|
||||||
|
|
||||||
|
% copied from so_scan.erl
|
||||||
|
so_parse_char([$' | Chars]) ->
|
||||||
|
case unicode:characters_to_nfc_list(unescape($', Chars, [])) of
|
||||||
|
[Char] -> Char;
|
||||||
|
_Bad ->
|
||||||
|
error(#sfc_err{atom = bad_token,
|
||||||
|
string = "Bad character literal: '" ++ Chars})
|
||||||
|
end.
|
||||||
|
|
||||||
|
so_parse_string([$" | Chars]) ->
|
||||||
|
unicode:characters_to_nfc_binary(unescape(Chars)).
|
||||||
|
|
||||||
|
% FIXME: unfuck this shit
|
||||||
|
%
|
||||||
|
% this all works in some stupid fucking way because
|
||||||
|
% so_scan operates on lists of bytes, rather than on
|
||||||
|
% character-lists. So single codepoints have to be
|
||||||
|
% converted to multi-byte sequences or some shit. We're
|
||||||
|
% always working on lists, so this can probably be
|
||||||
|
% simplified. I don't care enough at the moment to fix
|
||||||
|
% this, but this function has been the source of
|
||||||
|
% several annoying bugs
|
||||||
|
unescape(Str) -> unescape($", Str, []).
|
||||||
|
|
||||||
|
unescape(Delim, [Delim], Acc) ->
|
||||||
|
unicode:characters_to_binary(lists:reverse(Acc));
|
||||||
|
unescape(Delim, [$\\, $x, ${ | Chars ], Acc) ->
|
||||||
|
{Ds, [_ | Cs]} = lists:splitwith(fun($}) -> false ; (_) -> true end, Chars),
|
||||||
|
C = list_to_integer(Ds, 16),
|
||||||
|
Utf8Cs = unicode:characters_to_nfc_list([C]),
|
||||||
|
unescape(Delim, Cs, [Utf8Cs | Acc]);
|
||||||
|
unescape(Delim, [$\\, $x, D1, D2 | Chars ], Acc) ->
|
||||||
|
C = list_to_integer([D1, D2], 16),
|
||||||
|
Utf8Cs = unicode:characters_to_nfc_list([C]),
|
||||||
|
unescape(Delim, Chars, [Utf8Cs | Acc]);
|
||||||
|
unescape(Delim, [$\\, Code | Chars], Acc) ->
|
||||||
|
Ok = fun(C) -> unescape(Delim, Chars, [C | Acc]) end,
|
||||||
|
case Code of
|
||||||
|
Delim -> Ok(Delim);
|
||||||
|
$\\ -> Ok($\\);
|
||||||
|
$b -> Ok($\b);
|
||||||
|
$e -> Ok($\e);
|
||||||
|
$f -> Ok($\f);
|
||||||
|
$n -> Ok($\n);
|
||||||
|
$r -> Ok($\r);
|
||||||
|
$t -> Ok($\t);
|
||||||
|
$v -> Ok($\v);
|
||||||
|
_ -> error(#sfc_err{atom = bad_escape_char,
|
||||||
|
string = "Bad control sequence: \\" ++ [Code]}) %% TODO
|
||||||
|
end;
|
||||||
|
unescape(Delim, [C | Chars], Acc) ->
|
||||||
|
unescape(Delim, Chars, [C | Acc]).
|
||||||
|
|
||||||
|
|
||||||
|
so_parse_hex("0x" ++ S) ->
|
||||||
|
list_to_integer(strip_underscores(S), 16).
|
||||||
|
|
||||||
|
so_parse_int(S) ->
|
||||||
|
list_to_integer(strip_underscores(S)).
|
||||||
|
|
||||||
|
so_parse_bytes("#" ++ S0) ->
|
||||||
|
S = strip_underscores(S0),
|
||||||
|
N = list_to_integer(S, 16),
|
||||||
|
Digits = (length(S) + 1) div 2,
|
||||||
|
<<N:Digits/unit:8>>.
|
||||||
|
|
||||||
|
strip_underscores(S) ->
|
||||||
|
lists:filter(fun(C) -> C /= $_ end, S).
|
||||||
@@ -0,0 +1,881 @@
|
|||||||
|
% @doc
|
||||||
|
% A string matcher is roughly analogous to a regex. It describes a pattern,
|
||||||
|
% which a string may or may not match.
|
||||||
|
%
|
||||||
|
% This module is essentially a pure erlang implementation of the subset of
|
||||||
|
% regular expressions that are needed to tokenize sophia.
|
||||||
|
%
|
||||||
|
% The intent for now (May 2026) is simply to perfectly mimic the so_scan library
|
||||||
|
%
|
||||||
|
% Reference is `docs/sophia_syntax.md` as well as `src/so_scan_lib.erl` in
|
||||||
|
% original sophia lib
|
||||||
|
%
|
||||||
|
% From docs/sophia_syntax.md:
|
||||||
|
%
|
||||||
|
% - Id = [a-z_][A-Za-z0-9_']* identifiers start with a lower case letter.
|
||||||
|
% - Con = [A-Z][A-Za-z0-9_]* constructors start with an upper case letter.
|
||||||
|
% - QId = (Con\.)+Id qualified identifiers (e.g. `Map.member`)
|
||||||
|
% - QCon = (Con\.)+Con qualified constructor
|
||||||
|
% - TVar = 'Id type variable (e.g `'a`, `'b`)
|
||||||
|
% - Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)* integer literal with optional `_` separators
|
||||||
|
% - Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)* byte array literal with optional `_` separators
|
||||||
|
% - String` string literal enclosed in " with escape character `\`
|
||||||
|
% - Char character literal enclosed in ' with escape character `\`
|
||||||
|
% - AccountAddress base58-encoded 32 byte account pubkey with `ak_` prefix
|
||||||
|
% - ContractAddress base58-encoded 32 byte contract address with `ct_` prefix
|
||||||
|
% - Signature base58-encoded 64 byte cryptographic signature with `sg_` prefix
|
||||||
|
%
|
||||||
|
% Sophia's notion of tokens also includes keywords, parens, whitespace, etc.
|
||||||
|
% Real reference is of course the code:
|
||||||
|
%
|
||||||
|
% Number = fun(Digit) -> [Digit, "+(_", Digit, "+)*"] end,
|
||||||
|
% DIGIT = "[0-9]",
|
||||||
|
% HEXDIGIT = "[0-9a-fA-F]",
|
||||||
|
% LOWER = "[a-z_]",
|
||||||
|
% UPPER = "[A-Z]",
|
||||||
|
% CON = [UPPER, "[a-zA-Z0-9_]*"],
|
||||||
|
% INT = Number(DIGIT),
|
||||||
|
% HEX = ["0x", Number(HEXDIGIT)],
|
||||||
|
% BYTES = ["#", Number(HEXDIGIT)],
|
||||||
|
% WS = "[\\000-\\ ]+",
|
||||||
|
% ID = [LOWER, "[a-zA-Z0-9_']*"],
|
||||||
|
% TVAR = ["'", ID],
|
||||||
|
% QID = ["(", CON, "\\.)+", ID],
|
||||||
|
% QCON = ["(", CON, "\\.)+", CON],
|
||||||
|
% OP = "[=!<>+\\-*/:&|?~@^]+",
|
||||||
|
% %% Five cases for a character
|
||||||
|
% %% * 1 7-bit ascii, not \ or '
|
||||||
|
% %% * 2-4 8-bit values (UTF8)
|
||||||
|
% %% * \ followed by a known modifier [aernrtv]
|
||||||
|
% %% * \xhh
|
||||||
|
% %% * \x{hhh...}
|
||||||
|
% CHAR = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'",
|
||||||
|
% STRING = "\"([^\"\\\\]|(\\\\.))*\"",
|
||||||
|
%
|
||||||
|
% CommentStart = {"/\\*", push(comment, skip())},
|
||||||
|
% CommentRules =
|
||||||
|
% [ CommentStart
|
||||||
|
% , {"\\*/", pop(skip())}
|
||||||
|
% , {"[^/*]+|[/*]", skip()} ],
|
||||||
|
%
|
||||||
|
% Keywords = ["contract", "include", "let", "switch", "type", "record", "datatype", "if", "elif", "else", "function",
|
||||||
|
% "stateful", "payable", "true", "false", "mod", "public", "entrypoint", "private", "indexed", "namespace",
|
||||||
|
% "interface", "main", "using", "as", "for", "hiding", "band", "bor", "bxor", "bnot"
|
||||||
|
% ],
|
||||||
|
% KW = string:join(Keywords, "|"),
|
||||||
|
%
|
||||||
|
% There is a lot going on in that code. This is purely the part that matches
|
||||||
|
% strings specifically, . The *tokenizer* (sfc_tokenizer) knows the hierarchy
|
||||||
|
% of sophia tokens (e.g. it knows to match keywords before identifiers, so that
|
||||||
|
% `contract` gets tokenized as a keyword and not a variable name), and then
|
||||||
|
% calls into this module in order to match the string shape it's looking for.
|
||||||
|
% @end
|
||||||
|
-module(sfc_strmatch).
|
||||||
|
|
||||||
|
%-compile([export_all, nowarn_export_all]).
|
||||||
|
|
||||||
|
|
||||||
|
-export_type([
|
||||||
|
string_matcher/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
% given a string matcher and a string, determine match or no
|
||||||
|
-export([
|
||||||
|
match/2
|
||||||
|
]).
|
||||||
|
|
||||||
|
% string matchers for sophia token shapes
|
||||||
|
-export([
|
||||||
|
smr_sf_ws/0,
|
||||||
|
smr_sf_op/0,
|
||||||
|
smr_sf_punct/0,
|
||||||
|
smr_sf_id/0,
|
||||||
|
smr_sf_con/0,
|
||||||
|
smr_sf_qid/0,
|
||||||
|
smr_sf_qcon/0,
|
||||||
|
smr_sf_tvar/0,
|
||||||
|
smr_sf_int16/0,
|
||||||
|
smr_sf_int10/0,
|
||||||
|
smr_sf_bytes/0,
|
||||||
|
smr_sf_str/0,
|
||||||
|
smr_sf_char/0,
|
||||||
|
smr_sf_ak/0,
|
||||||
|
smr_sf_ct/0,
|
||||||
|
smr_sf_sg/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
|
||||||
|
% regex primitives/combinators
|
||||||
|
-export([
|
||||||
|
% plumbing
|
||||||
|
smr_char/1,
|
||||||
|
smr_char_range/2,
|
||||||
|
smr_union/1,
|
||||||
|
smr_seq/1,
|
||||||
|
smr_plus/1,
|
||||||
|
smr_star/1,
|
||||||
|
smr_dot/0,
|
||||||
|
smr_ncmatch/2,
|
||||||
|
% porcelain
|
||||||
|
smr_string/1,
|
||||||
|
smr_oneofchars/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%%=======================================================================
|
||||||
|
%% API: Types
|
||||||
|
%%=======================================================================
|
||||||
|
|
||||||
|
-type string_matcher()
|
||||||
|
:: {smr_char, integer()} % /a/, /b/, /cd/
|
||||||
|
| {smr_char_range, integer(), integer()} % /[a-z]/
|
||||||
|
| {smr_union, [string_matcher()]} % /[abc]/
|
||||||
|
| {smr_seq, [string_matcher()]} % /abc/
|
||||||
|
| {smr_plus, string_matcher()} % /(abc)+/
|
||||||
|
| {smr_star, string_matcher()} % /(abc)*/
|
||||||
|
| smr_dot % /./
|
||||||
|
% negative conditional match
|
||||||
|
% /[^a-z]/, but more general
|
||||||
|
% /[^a-z]/ <~> smr_ncmatch(smr_char_range($a, $z), smr_dot()).
|
||||||
|
| {smr_ncmatch, MustNotMatch :: string_matcher(),
|
||||||
|
Match :: string_matcher()}.
|
||||||
|
|
||||||
|
|
||||||
|
%=========================================================
|
||||||
|
% API: Functions
|
||||||
|
%=========================================================
|
||||||
|
|
||||||
|
%---------------------------------------------------------
|
||||||
|
% API: string matching logic
|
||||||
|
%
|
||||||
|
% -export([
|
||||||
|
% match/2
|
||||||
|
% ]).
|
||||||
|
%---------------------------------------------------------
|
||||||
|
|
||||||
|
-spec match(Matcher, Source) -> MaybeMatch
|
||||||
|
when Matcher :: string_matcher(),
|
||||||
|
Source :: iolist(),
|
||||||
|
MaybeMatch :: {strmatch, Matched :: string(), Rest :: string()}
|
||||||
|
| no_strmatch.
|
||||||
|
% @doc
|
||||||
|
% normalize input to an nfc list before parsing
|
||||||
|
%
|
||||||
|
% match(Matcher, Source) ->
|
||||||
|
% string_match(Matcher, unicode:characters_to_nfc_list(Source)).
|
||||||
|
% @end
|
||||||
|
match(Matcher, Source) ->
|
||||||
|
string_match(Matcher, unicode:characters_to_nfc_list(Source)).
|
||||||
|
|
||||||
|
|
||||||
|
%---------------------------------------------------------
|
||||||
|
% API: string matchers for sophia tokens
|
||||||
|
%
|
||||||
|
% -export([
|
||||||
|
% smr_sf_ws/0,
|
||||||
|
% smr_sf_op/0,
|
||||||
|
% smr_sf_punct/0,
|
||||||
|
% smr_sf_id/0,
|
||||||
|
% smr_sf_con/0,
|
||||||
|
% smr_sf_qid/0,
|
||||||
|
% smr_sf_qcon/0,
|
||||||
|
% smr_sf_tvar/0,
|
||||||
|
% smr_sf_int16/0,
|
||||||
|
% smr_sf_int10/0,
|
||||||
|
% smr_sf_bytes/0,
|
||||||
|
% smr_sf_str/0,
|
||||||
|
% smr_sf_char/0,
|
||||||
|
% smr_sf_ak/0,
|
||||||
|
% smr_sf_ct/0,
|
||||||
|
% smr_sf_sg/0
|
||||||
|
% ]).
|
||||||
|
%---------------------------------------------------------
|
||||||
|
|
||||||
|
-spec smr_sf_ws() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher for whitespace
|
||||||
|
%
|
||||||
|
% from so_scan.erl (9.0.0)
|
||||||
|
%
|
||||||
|
% WS = "[\\000-\\ ]+",
|
||||||
|
%
|
||||||
|
% turns out all the ascii codepoints which are 32 or lower are control chars or
|
||||||
|
% whitespace: https://www.asciitable.com/
|
||||||
|
% @end
|
||||||
|
smr_sf_ws() ->
|
||||||
|
WhitespaceChars = lists:seq(0, 32),
|
||||||
|
smr_plus(smr_oneofchars(WhitespaceChars)).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_op() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher for a sophia operator
|
||||||
|
%
|
||||||
|
% from so_scan.erl (9.0.0)
|
||||||
|
%
|
||||||
|
% OP = "[=!<>+\\-*/:&|?~@^]+",
|
||||||
|
% @end
|
||||||
|
smr_sf_op() ->
|
||||||
|
SfOpChars = "=!<>+-*/:&|?~@^",
|
||||||
|
SfOpChar = smr_union([smr_char(C) || C <- SfOpChars]),
|
||||||
|
smr_plus(SfOpChar).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_punct() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher for parens/braces
|
||||||
|
%
|
||||||
|
% from so_scan.erl (9.0.0)
|
||||||
|
%
|
||||||
|
% , {"\\.\\.|[,.;()\\[\\]{}]", symbol()}
|
||||||
|
% @end
|
||||||
|
|
||||||
|
smr_sf_punct() ->
|
||||||
|
M_DotDotOp = smr_string(".."),
|
||||||
|
M_PunctChars = smr_oneofchars(",.;()[]{}"),
|
||||||
|
smr_union([M_DotDotOp, M_PunctChars]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_id() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher for a sophia identifier
|
||||||
|
%
|
||||||
|
% foo
|
||||||
|
% _foo
|
||||||
|
% fooBar'
|
||||||
|
%
|
||||||
|
% - Id = [a-z_][A-Za-z0-9_']* identifiers start with a lower case letter.
|
||||||
|
% @end
|
||||||
|
|
||||||
|
smr_sf_id() ->
|
||||||
|
% upper lower digit under quote
|
||||||
|
ULDUQ =
|
||||||
|
{smr_union, [{smr_char_range, $A, $Z},
|
||||||
|
{smr_char_range, $a, $z},
|
||||||
|
{smr_char_range, $0, $9},
|
||||||
|
{smr_char, $_},
|
||||||
|
{smr_char, $'}]},
|
||||||
|
smr_seq([smr_union([smr_char_range($a, $z), smr_char($_)]),
|
||||||
|
smr_star(ULDUQ)]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_con() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher for a sophia constructor name
|
||||||
|
%
|
||||||
|
% Foo
|
||||||
|
% Foo_Bar
|
||||||
|
% Foo_Bar3_'
|
||||||
|
%
|
||||||
|
% - Con = [A-Z][A-Za-z0-9_']* constructors start with an upper case letter.
|
||||||
|
% @end
|
||||||
|
|
||||||
|
smr_sf_con() ->
|
||||||
|
ULDU =
|
||||||
|
{smr_union, [{smr_char_range, $A, $Z},
|
||||||
|
{smr_char_range, $a, $z},
|
||||||
|
{smr_char_range, $0, $9},
|
||||||
|
{smr_char, $_}]},
|
||||||
|
smr_seq([smr_char_range($A, $Z),
|
||||||
|
smr_star(ULDU)]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_qid() -> string_matcher().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% String matcher for a Sophia qualified identifier
|
||||||
|
%
|
||||||
|
% Foo.Bar.Baz.quux
|
||||||
|
%
|
||||||
|
% - QId = (Con\.)+Id qualified identifiers (e.g. `Map.member`)
|
||||||
|
% @end
|
||||||
|
smr_sf_qid() ->
|
||||||
|
Qualifier = smr_seq([smr_sf_con(), smr_char($.)]),
|
||||||
|
Qualifiers = smr_plus(Qualifier),
|
||||||
|
Identifier = smr_sf_id(),
|
||||||
|
smr_seq([Qualifiers, Identifier]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_qcon() -> string_matcher().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
%
|
||||||
|
% String matcher for a sophia qualified constructor
|
||||||
|
%
|
||||||
|
% Foo.Bar.Baz
|
||||||
|
%
|
||||||
|
% - QCon = (Con\.)+Con qualified constructor
|
||||||
|
% @end
|
||||||
|
smr_sf_qcon() ->
|
||||||
|
Qualifier = smr_seq([smr_sf_con(), smr_char($.)]),
|
||||||
|
Qualifiers = smr_plus(Qualifier),
|
||||||
|
Constructor = smr_sf_con(),
|
||||||
|
smr_seq([Qualifiers, Constructor]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_tvar() -> string_matcher().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% String matcher for a sophia type variable; e.g.
|
||||||
|
%
|
||||||
|
% 'a
|
||||||
|
% 'foo_bar
|
||||||
|
%
|
||||||
|
% - TVar = 'Id type variable (e.g `'a`, `'b`)
|
||||||
|
% @end
|
||||||
|
smr_sf_tvar() ->
|
||||||
|
smr_seq([smr_char($'), smr_sf_id()]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_int16() -> string_matcher().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% String matcher for a sophia base16 integer 0xDEAD_BEEF
|
||||||
|
%
|
||||||
|
% so_scan parses base10/base16 in one go, but i think it's clearer if they're
|
||||||
|
% different
|
||||||
|
%
|
||||||
|
% - Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)* integer literal with optional `_` separators
|
||||||
|
% @end
|
||||||
|
smr_sf_int16() ->
|
||||||
|
HexDigit = smr_union([smr_char_range($0, $9),
|
||||||
|
smr_char_range($A, $F),
|
||||||
|
smr_char_range($a, $f)]),
|
||||||
|
HexDigits = smr_plus(HexDigit),
|
||||||
|
UHexDigits = smr_seq([smr_char($_), HexDigits]),
|
||||||
|
smr_seq([smr_string("0x"), HexDigits, smr_star(UHexDigits)]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_int10() -> string_matcher().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% string matcher for a sophia base 10 int 012_345_6_7
|
||||||
|
%
|
||||||
|
% so_scan parses base10/base16 in one go, but i think it's clearer if they're
|
||||||
|
% different
|
||||||
|
%
|
||||||
|
% - Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)* integer literal with optional `_` separators
|
||||||
|
% @end
|
||||||
|
smr_sf_int10() ->
|
||||||
|
DecDigit = smr_char_range($0, $9),
|
||||||
|
DecDigits = smr_plus(DecDigit),
|
||||||
|
UDecDigits = smr_seq([smr_char($_), DecDigits]),
|
||||||
|
smr_seq([DecDigits, smr_star(UDecDigits)]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_bytes() -> string_matcher().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% String matcher for a sophia bytestring
|
||||||
|
%
|
||||||
|
% #DEAD_BEEF
|
||||||
|
%
|
||||||
|
% - Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)* byte array literal with optional `_` separators
|
||||||
|
% @end
|
||||||
|
smr_sf_bytes() ->
|
||||||
|
HexDigit = smr_union([smr_char_range($0, $9),
|
||||||
|
smr_char_range($A, $F),
|
||||||
|
smr_char_range($a, $f)]),
|
||||||
|
HexDigits = smr_plus(HexDigit),
|
||||||
|
UHexDigits = smr_seq([smr_char($_), HexDigits]),
|
||||||
|
smr_seq([smr_char($#), HexDigits, smr_star(UHexDigits)]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_str() -> string_matcher().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% String matcher for sophia string literal
|
||||||
|
%
|
||||||
|
% String string literal enclosed in " with escape character `\`
|
||||||
|
%
|
||||||
|
% STRING = "\"([^\"\\\\]|(\\\\.))*\"",
|
||||||
|
% @end
|
||||||
|
smr_sf_str() ->
|
||||||
|
smr_seq([smr_char($"), smr_star(smr_sf_strchar()), smr_char($")]).
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_strchar() -> string_matcher().
|
||||||
|
% @private
|
||||||
|
% string matcher for a character in a sophia string
|
||||||
|
%
|
||||||
|
% STRING = "\"([^\"\\\\]|(\\\\.))*\"",
|
||||||
|
%
|
||||||
|
% this is for
|
||||||
|
%
|
||||||
|
% ([^\"\\\\]|(\\\\.))
|
||||||
|
%
|
||||||
|
% cleaned up:
|
||||||
|
%
|
||||||
|
% ([^"\\]|(\\.))
|
||||||
|
% @end
|
||||||
|
smr_sf_strchar() ->
|
||||||
|
% cannot have a literal newline in string
|
||||||
|
%
|
||||||
|
% "foo
|
||||||
|
% bar"
|
||||||
|
%
|
||||||
|
% is not a valid sophia string
|
||||||
|
AnythingButNewline = smr_ncmatch(smr_char($\n), smr_dot()),
|
||||||
|
IsASpecialChar = smr_union([smr_char($"), smr_char($\\)]),
|
||||||
|
NotEscSeq = smr_ncmatch(IsASpecialChar, AnythingButNewline),
|
||||||
|
% FIXME: maybe we should enfore escape sequence rules here?
|
||||||
|
%
|
||||||
|
% especially to be consistent with char rules
|
||||||
|
EscSeq = smr_seq([smr_char($\\), AnythingButNewline]),
|
||||||
|
smr_union([NotEscSeq, EscSeq]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_char() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher for a Sophia char literal
|
||||||
|
%
|
||||||
|
% From so_scan.erl:
|
||||||
|
%
|
||||||
|
% %% Five cases for a character
|
||||||
|
% %% * 1 7-bit ascii, not \ or '
|
||||||
|
% %% * 2-4 8-bit values (UTF8)
|
||||||
|
% %% * \ followed by a known modifier [aernrtv]
|
||||||
|
% %% * \xhh
|
||||||
|
% %% * \x{hhh...}
|
||||||
|
% CHAR = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'",
|
||||||
|
%
|
||||||
|
% > Char character literal enclosed in ' with escape character `\`
|
||||||
|
% @end
|
||||||
|
|
||||||
|
% ok we get this monstrosity
|
||||||
|
%
|
||||||
|
% "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'"
|
||||||
|
%
|
||||||
|
% there's like 4 levels of escaping and shit, so let's break it down. First
|
||||||
|
% let's notice this pattern:
|
||||||
|
%
|
||||||
|
% '(...)'.
|
||||||
|
%
|
||||||
|
% So let's make a hole
|
||||||
|
smr_sf_char() ->
|
||||||
|
smr_seq([smr_char($'), smr_sf_char_inner(), smr_char($')]).
|
||||||
|
|
||||||
|
% smr_sf_char_inner() will deal with the stuff in the monstrosity
|
||||||
|
%
|
||||||
|
% we had this before
|
||||||
|
% "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'"
|
||||||
|
%
|
||||||
|
% let's trim
|
||||||
|
% ([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\})
|
||||||
|
%
|
||||||
|
% and reorg
|
||||||
|
% ([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])
|
||||||
|
% | ([\\x00-\\xff][\\x80-\\xff]{1,3})
|
||||||
|
% | (\\\\[befnrtv'\\\\])
|
||||||
|
% | (\\\\x[0-9a-fA-F]{2,2})
|
||||||
|
% | (\\\\x\\{[0-9a-fA-F]*\\})
|
||||||
|
%
|
||||||
|
% trim some more
|
||||||
|
% [\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f]
|
||||||
|
% | [\\x00-\\xff][\\x80-\\xff]{1,3}
|
||||||
|
% | \\\\[befnrtv'\\\\]
|
||||||
|
% | \\\\x[0-9a-fA-F]{2,2}
|
||||||
|
% | \\\\x\\{[0-9a-fA-F]*\\}
|
||||||
|
%
|
||||||
|
% undo some escapes
|
||||||
|
% [\x00-\x26\x28-\x5b\x5d-\x7f]
|
||||||
|
% | [\x00-\xff][\x80-\xff]{1,3}
|
||||||
|
% | \\[befnrtv'\\]
|
||||||
|
% | \\x[0-9a-fA-F]{2,2}
|
||||||
|
% | \\x\{[0-9a-fA-F]*\}
|
||||||
|
%
|
||||||
|
% rewrite
|
||||||
|
% [^'\] <~> (16#00..16#26 | 16#28..16#5b | 16#5d..16#7f)
|
||||||
|
% <<_:8, (_ >= 128){1,3}>> <~> [\x00-\xff][\x80-\xff]{1,3}
|
||||||
|
% <<$\\, X>> <~> \\[befnrtv'\\]
|
||||||
|
% \xAB <~> \\x[0-9a-fA-F]{2,2}
|
||||||
|
% \x{DEADBEEF} <~> \\x\{[0-9a-fA-F]*\}
|
||||||
|
|
||||||
|
smr_sf_char_inner() ->
|
||||||
|
Escapable = smr_oneofchars("befnrtv'\\"),
|
||||||
|
EscSeq = smr_seq([smr_char($\\), Escapable]),
|
||||||
|
HexChar = smr_oneofchars("0123456789ABCDEFabcdef"),
|
||||||
|
HexEsc2 = smr_seq([smr_string("\\x"), HexChar, HexChar]),
|
||||||
|
HexEsc = smr_seq([smr_string("\\x{"), smr_star(HexChar), smr_char($})]),
|
||||||
|
% FIXME: possible erroneous oversimplification here
|
||||||
|
QuoteOrBackslash = smr_oneofchars([$', $\\]),
|
||||||
|
Utf8Char = smr_ncmatch(QuoteOrBackslash, smr_dot()),
|
||||||
|
smr_union([EscSeq, HexEsc2, HexEsc, Utf8Char]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_ak() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% string matcher for
|
||||||
|
%
|
||||||
|
% ak_....
|
||||||
|
%
|
||||||
|
% sophia's tokenizer tokenizes ak_.../sg_... etc as identifiers and then in the
|
||||||
|
% parsing stage disambiguates them
|
||||||
|
%
|
||||||
|
% i don't like that, but for version 0.1 we're going to match the behavior of
|
||||||
|
% `so_scan` exactly, just for clarity
|
||||||
|
%
|
||||||
|
% however, note that is the token step, we can still write a string matcher to
|
||||||
|
% be useful later
|
||||||
|
%
|
||||||
|
% > AccountAddress base58-encoded 32 byte account pubkey with `ak_` prefix
|
||||||
|
% @end
|
||||||
|
smr_sf_ak() ->
|
||||||
|
smr_apistr58("ak").
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_ct() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% string matcher for
|
||||||
|
%
|
||||||
|
% ct_....
|
||||||
|
%
|
||||||
|
% sophia's tokenizer tokenizes ak_.../sg_... etc as identifiers and then in the
|
||||||
|
% parsing stage disambiguates them
|
||||||
|
%
|
||||||
|
% i don't like that, but for version 0.1 we're going to match the behavior of
|
||||||
|
% `so_scan` exactly, just for clarity
|
||||||
|
%
|
||||||
|
% however, note that is the token step, we can still write a string matcher to
|
||||||
|
% be useful later
|
||||||
|
%
|
||||||
|
% > ContractAddress base58-encoded 32 byte contract address with `ct_` prefix
|
||||||
|
% @end
|
||||||
|
smr_sf_ct() ->
|
||||||
|
smr_apistr58("ct").
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_sf_sg() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% string matcher for
|
||||||
|
%
|
||||||
|
% sg_....
|
||||||
|
%
|
||||||
|
% sophia's tokenizer tokenizes ak_.../sg_... etc as identifiers and then in the
|
||||||
|
% parsing stage disambiguates them
|
||||||
|
%
|
||||||
|
% i don't like that, but for version 0.1 we're going to match the behavior of
|
||||||
|
% `so_scan` exactly, just for clarity
|
||||||
|
%
|
||||||
|
% however, note that is the token step, we can still write a string matcher to
|
||||||
|
% be useful later
|
||||||
|
%
|
||||||
|
% > Signature base58-encoded 64 byte cryptographic signature with `sg_` prefix
|
||||||
|
% @end
|
||||||
|
smr_sf_sg() ->
|
||||||
|
smr_apistr58("sg").
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_apistr58(Prefix) -> string_matcher()
|
||||||
|
when Prefix :: string().
|
||||||
|
% @private
|
||||||
|
% string matcher for
|
||||||
|
%
|
||||||
|
% ak_...
|
||||||
|
% ct_...
|
||||||
|
% sg_...
|
||||||
|
%
|
||||||
|
% prefix is given as arg
|
||||||
|
%
|
||||||
|
% ... are base58 chars
|
||||||
|
% @end
|
||||||
|
smr_apistr58(Prefix) ->
|
||||||
|
smr_seq([smr_string(Prefix), smr_char($_), smr_plus(smr_base58char())]).
|
||||||
|
|
||||||
|
|
||||||
|
smr_base58char() ->
|
||||||
|
smr_oneofchars("123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz").
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%---------------------------------------------------------
|
||||||
|
% API: string matcher primitive constructors
|
||||||
|
%---------------------------------------------------------
|
||||||
|
|
||||||
|
-spec smr_char(Char) -> string_matcher()
|
||||||
|
when Char :: integer().
|
||||||
|
% @doc
|
||||||
|
% string matcher for a specific char
|
||||||
|
%
|
||||||
|
% /[abc]/ <~> smr_union([smr_char($a), smr_char($b), smr_char($c)])
|
||||||
|
%
|
||||||
|
% @end
|
||||||
|
smr_char(X) when is_integer(X) ->
|
||||||
|
{smr_char, X}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_char_range(LowerBound, UpperBound) -> string_matcher()
|
||||||
|
when LowerBound :: integer(),
|
||||||
|
UpperBound :: integer().
|
||||||
|
% @doc
|
||||||
|
% string matcher for a range of characters
|
||||||
|
%
|
||||||
|
% /[a-z]/ <~> smr_char_range($a, $z)
|
||||||
|
% /[0-9]/ <~> smr_char_range($0, $9)
|
||||||
|
% @end
|
||||||
|
smr_char_range(X, Y) when is_integer(X), is_integer(Y) ->
|
||||||
|
{smr_char_range, X, Y}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_union(StringMatchers) -> string_matcher()
|
||||||
|
when StringMatchers :: [StringMatcher],
|
||||||
|
StringMatcher :: string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher that matches on the first matcher given that matches
|
||||||
|
%
|
||||||
|
% /[abc]/ <~> smr_union([smr_char($a), smr_char($b), smr_char($c)])
|
||||||
|
% /(foo|bar)/ <~> smr_union([smr_string("foo"), smr_string("bar")])
|
||||||
|
% @end
|
||||||
|
smr_union(List) when is_list(List) ->
|
||||||
|
{smr_union, List}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_seq(StringMatchers) -> string_matcher()
|
||||||
|
when StringMatchers :: [string_matcher()].
|
||||||
|
% @doc
|
||||||
|
% Match a sequence of matchers
|
||||||
|
%
|
||||||
|
% /abc/ <~> smr_seq([smr_char($a), smr_char($b), smr_char($c)])
|
||||||
|
%
|
||||||
|
% smr_string/1 just maps to a sequence of chars
|
||||||
|
% @end
|
||||||
|
smr_seq(List) when is_list(List) ->
|
||||||
|
{smr_seq, List}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_plus(Matcher) -> string_matcher()
|
||||||
|
when Matcher :: string_matcher().
|
||||||
|
% @doc
|
||||||
|
% "one or more of"; like the `+` operator in regexes.
|
||||||
|
%
|
||||||
|
% sm_plus(SMR, Src0) ->
|
||||||
|
% case string_match(SMR, Src0) of
|
||||||
|
% {strmatch, Str, Src1} -> sm_star(SMR, Str, Src1);
|
||||||
|
% no_strmatch -> no_strmatch
|
||||||
|
% end.
|
||||||
|
% @end
|
||||||
|
smr_plus(SMR) ->
|
||||||
|
{smr_plus, SMR}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_star(Matcher) -> string_matcher()
|
||||||
|
when Matcher :: string_matcher().
|
||||||
|
% @doc
|
||||||
|
% "zero or more of"; like the `*` operator in regexes.
|
||||||
|
%
|
||||||
|
% sm_star(SMR, Acc, Src0) ->
|
||||||
|
% case string_match(SMR, Src0) of
|
||||||
|
% % 0
|
||||||
|
% no_strmatch ->
|
||||||
|
% {strmatch, unicode:characters_to_list(Acc), Src0};
|
||||||
|
% % or more
|
||||||
|
% {strmatch, Str, Src1} ->
|
||||||
|
% sm_star(SMR, [Acc, Str], Src1)
|
||||||
|
% end.
|
||||||
|
% @end
|
||||||
|
smr_star(SMR) ->
|
||||||
|
{smr_star, SMR}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_dot() -> string_matcher().
|
||||||
|
% @doc
|
||||||
|
% matches every character; analogous to /./
|
||||||
|
%
|
||||||
|
% string_match(smr_dot, SrcStr) ->
|
||||||
|
% case SrcStr of
|
||||||
|
% [C | Rest] -> {strmatch, [C], Rest};
|
||||||
|
% [] -> no_strmatch
|
||||||
|
% end;
|
||||||
|
% @end
|
||||||
|
smr_dot() ->
|
||||||
|
smr_dot.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_ncmatch(MustNotMatch, Match) -> string_matcher()
|
||||||
|
when MustNotMatch :: string_matcher(),
|
||||||
|
Match :: string_matcher().
|
||||||
|
% @doc
|
||||||
|
% Negative conditional match; analogous to `[^abc]` but more flexible
|
||||||
|
%
|
||||||
|
%
|
||||||
|
% /[^abc]/ <-> smr_ncmatch(smr_union([smr_char($a), smr_char($b), smr_char($c)]),
|
||||||
|
% smr_dot()).
|
||||||
|
%
|
||||||
|
%
|
||||||
|
% string_match({smr_ncmatch, MustNotMatch, Match}, SrcStr) ->
|
||||||
|
% case string_match(MustNotMatch, SrcStr) of
|
||||||
|
% no_strmatch -> string_match(Match, SrcStr);
|
||||||
|
% _ -> no_strmatch
|
||||||
|
% end.
|
||||||
|
%
|
||||||
|
% @end
|
||||||
|
smr_ncmatch(A, B) ->
|
||||||
|
{smr_ncmatch, A, B}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%---------------------------------------------------------
|
||||||
|
% string matcher helpers
|
||||||
|
%---------------------------------------------------------
|
||||||
|
|
||||||
|
-spec smr_string(Chars) -> string_matcher()
|
||||||
|
when Chars :: string().
|
||||||
|
% @doc
|
||||||
|
% matches chars given in sequence; basically like putting the string in raw in
|
||||||
|
% a regex
|
||||||
|
%
|
||||||
|
% /foo/ <~> smr_string("foo")
|
||||||
|
% <~> smr_seq([smr_char($f), smr_char($o), smr_char($o)])
|
||||||
|
%
|
||||||
|
% rewrite over smr_seq/1 and smr_char/1
|
||||||
|
%
|
||||||
|
% smr_string(String) when is_list(String) ->
|
||||||
|
% smr_seq([smr_char(C) || C <- String]).
|
||||||
|
% @end
|
||||||
|
smr_string(String) when is_list(String) ->
|
||||||
|
smr_seq([smr_char(C) || C <- String]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec smr_oneofchars(Chars) -> UnionMatcher
|
||||||
|
when Chars :: string(),
|
||||||
|
UnionMatcher :: string_matcher().
|
||||||
|
% @doc
|
||||||
|
% String matcher for one of chars
|
||||||
|
%
|
||||||
|
% /[abc]/ <~> smr_costring("abc")
|
||||||
|
% <~> smr_union([smr_char($f), smr_char($o), smr_char($o)])
|
||||||
|
%
|
||||||
|
% this is the dual of smr_string/1. string puts chars in sequence, this puts
|
||||||
|
% chars in parallel.
|
||||||
|
%
|
||||||
|
% "costring" nomenclature is chosen specifically to annoy craig
|
||||||
|
%
|
||||||
|
% if you fix your stupid url schema i will consider changing this name
|
||||||
|
%
|
||||||
|
% the thing is though this is actually a good name, your url schema is just...
|
||||||
|
% well you know it's compact, so you have amazon beat. no page-long urls for
|
||||||
|
% gajumarket
|
||||||
|
%
|
||||||
|
% you know what, we're keeping both names
|
||||||
|
%
|
||||||
|
% i'm confusing myself, renaming to "oneofchars"
|
||||||
|
% @end
|
||||||
|
smr_oneofchars(Chars) ->
|
||||||
|
smr_union([smr_char(C) || C <- Chars]).
|
||||||
|
|
||||||
|
|
||||||
|
%%=======================================================================
|
||||||
|
%% INTERNALS: string matching logic
|
||||||
|
%%=======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
-spec string_match(Matcher, Source) -> MaybeMatch
|
||||||
|
when Matcher :: string_matcher(),
|
||||||
|
Source :: string(),
|
||||||
|
MaybeMatch :: {strmatch, Matched :: string(), Rest :: string()}
|
||||||
|
| no_strmatch.
|
||||||
|
% @private
|
||||||
|
% See if the source matches the given matcher; returns
|
||||||
|
%
|
||||||
|
% %% NOTIONAL code
|
||||||
|
% string_match(/[abc]/, "abc") ->
|
||||||
|
% {strmatch, "a", "bc"}
|
||||||
|
% string_match(/[abc]/, "def") ->
|
||||||
|
% no_strmatch
|
||||||
|
% @end
|
||||||
|
string_match({smr_char, C}, SrcStr) ->
|
||||||
|
case SrcStr of
|
||||||
|
[X | Rest] when X =:= C -> {strmatch, [C], Rest};
|
||||||
|
_ -> no_strmatch
|
||||||
|
end;
|
||||||
|
string_match({smr_char_range, X, Y}, Src0) ->
|
||||||
|
case Src0 of
|
||||||
|
[C | Src1] when X =< C, C =< Y -> {strmatch, [C], Src1};
|
||||||
|
_ -> no_strmatch
|
||||||
|
end;
|
||||||
|
string_match({smr_union, SMRs}, Src0) ->
|
||||||
|
sm_union(SMRs, Src0);
|
||||||
|
string_match({smr_seq, SMRs}, Src0) ->
|
||||||
|
sm_seq(SMRs, [], Src0);
|
||||||
|
string_match({smr_plus, SMR}, Src0) ->
|
||||||
|
sm_plus(SMR, Src0);
|
||||||
|
string_match({smr_star, SMR}, Src0) ->
|
||||||
|
sm_star(SMR, [], Src0);
|
||||||
|
string_match(smr_dot, SrcStr) ->
|
||||||
|
case SrcStr of
|
||||||
|
[C | Rest] -> {strmatch, [C], Rest};
|
||||||
|
[] -> no_strmatch
|
||||||
|
end;
|
||||||
|
string_match({smr_ncmatch, MustNotMatch, Match}, SrcStr) ->
|
||||||
|
case string_match(MustNotMatch, SrcStr) of
|
||||||
|
no_strmatch -> string_match(Match, SrcStr);
|
||||||
|
_ -> no_strmatch
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
% @private union must match *one* thing
|
||||||
|
sm_union([SMR | SMRs], Src0) ->
|
||||||
|
case string_match(SMR, Src0) of
|
||||||
|
no_strmatch -> sm_union(SMRs, Src0);
|
||||||
|
Match -> Match
|
||||||
|
end;
|
||||||
|
sm_union([], _) ->
|
||||||
|
no_strmatch.
|
||||||
|
|
||||||
|
|
||||||
|
% @private sequence must match *EACH* thing
|
||||||
|
sm_seq([SMR | SMRs], Acc, Src0) ->
|
||||||
|
case string_match(SMR, Src0) of
|
||||||
|
{strmatch, Str, Src1} -> sm_seq(SMRs, [Acc, Str], Src1);
|
||||||
|
no_strmatch -> no_strmatch
|
||||||
|
end;
|
||||||
|
sm_seq([], Acc, Src) ->
|
||||||
|
{strmatch, unicode:characters_to_list(Acc), Src}.
|
||||||
|
|
||||||
|
|
||||||
|
% @private plus matches at least one
|
||||||
|
sm_plus(SMR, Src0) ->
|
||||||
|
case string_match(SMR, Src0) of
|
||||||
|
{strmatch, Str, Src1} -> sm_star(SMR, Str, Src1);
|
||||||
|
no_strmatch -> no_strmatch
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
% @private star matches 0 or more
|
||||||
|
sm_star(SMR, Acc, Src0) ->
|
||||||
|
case string_match(SMR, Src0) of
|
||||||
|
% 0
|
||||||
|
no_strmatch ->
|
||||||
|
{strmatch, unicode:characters_to_list(Acc), Src0};
|
||||||
|
% or more
|
||||||
|
{strmatch, Str, Src1} ->
|
||||||
|
sm_star(SMR, [Acc, Str], Src1)
|
||||||
|
end.
|
||||||
|
|
||||||
@@ -0,0 +1,790 @@
|
|||||||
|
% @doc
|
||||||
|
% Ref: so_scan.erl
|
||||||
|
%
|
||||||
|
% This file contains a sophia tokenizer written in straightforward erlang with data
|
||||||
|
% types that are sane.
|
||||||
|
%
|
||||||
|
% For MVP it mimics the behavior of so_scan exactly, in terms of like what its
|
||||||
|
% definition of a token is and so on.
|
||||||
|
%
|
||||||
|
% sfc_so_scan.erl contains a compatibility layer that should agree with so_scan
|
||||||
|
% exactly. It converts the data types here to the shapes that so_scan outputs.
|
||||||
|
%
|
||||||
|
% This is for two reasons:
|
||||||
|
%
|
||||||
|
% 1. in order to enable testing the two modules against each other, and
|
||||||
|
% 2. to future-proof in case we decide to incrementally incorporate the sfc
|
||||||
|
% code into the legacy sophia compiler
|
||||||
|
% @end
|
||||||
|
-module(sfc_tokens).
|
||||||
|
|
||||||
|
% meta
|
||||||
|
-export([
|
||||||
|
token_types_parse_order/0,
|
||||||
|
kwds/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
take_while/2,
|
||||||
|
take_while/3,
|
||||||
|
take_block/1,
|
||||||
|
take_block_item/1,
|
||||||
|
strings/2,
|
||||||
|
slurp_plist/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
% token slurping
|
||||||
|
-export([
|
||||||
|
indent_level/1,
|
||||||
|
is_significant/1,
|
||||||
|
filter_significant/1,
|
||||||
|
significant_tokens/1,
|
||||||
|
tokens/1,
|
||||||
|
slurp_token/2,
|
||||||
|
slurp_token_types/3,
|
||||||
|
slurp_token_of_type/3,
|
||||||
|
new_pos/2
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include("$sfc_include/sfc.hrl").
|
||||||
|
|
||||||
|
|
||||||
|
%=======================================================
|
||||||
|
% API: functions
|
||||||
|
%=======================================================
|
||||||
|
|
||||||
|
-spec strings(N, Tokens) -> AtMostNStrings
|
||||||
|
when N :: non_neg_integer(),
|
||||||
|
Tokens :: [sfc_token()],
|
||||||
|
AtMostNStrings :: [string()].
|
||||||
|
% @doc return the strings of the first N tokens
|
||||||
|
|
||||||
|
strings(N, [#sfc_token{string = S} | Rest]) when is_integer(N), N >= 1 ->
|
||||||
|
[S | strings(N-1, Rest)];
|
||||||
|
strings(_, []) ->
|
||||||
|
[];
|
||||||
|
strings(0, _) ->
|
||||||
|
[].
|
||||||
|
|
||||||
|
|
||||||
|
% used by parser
|
||||||
|
%
|
||||||
|
% a block is a column-delimited list of block items
|
||||||
|
%
|
||||||
|
% BLOCK =
|
||||||
|
% foo
|
||||||
|
% ...
|
||||||
|
% bar
|
||||||
|
% ...
|
||||||
|
% baz
|
||||||
|
% ...
|
||||||
|
%
|
||||||
|
% BLOCK_ITEM =
|
||||||
|
% foo
|
||||||
|
% ...
|
||||||
|
|
||||||
|
-spec take_block(Tokens) -> {BlockTokens, Rest}
|
||||||
|
when Tokens :: [sfc_token()],
|
||||||
|
BlockTokens :: Tokens,
|
||||||
|
Rest :: Tokens.
|
||||||
|
% @doc
|
||||||
|
% takes all tokens whose column position is >= the column position of
|
||||||
|
% the head token
|
||||||
|
|
||||||
|
take_block([H = #sfc_token{pos = {_, BlkCol}} | T]) ->
|
||||||
|
TokenInBlock =
|
||||||
|
fun(#sfc_token{pos = {_, TkCol}}) ->
|
||||||
|
BlkCol =< TkCol
|
||||||
|
end,
|
||||||
|
take_while(TokenInBlock, [H], T);
|
||||||
|
take_block([]) ->
|
||||||
|
{[], []}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec take_block_item(Tokens) -> {ItemTokens, Rest}
|
||||||
|
when Tokens :: [sfc_token()],
|
||||||
|
ItemTokens :: Tokens,
|
||||||
|
Rest :: Tokens.
|
||||||
|
% @doc
|
||||||
|
% takes all tokens whose column position is > the column position of
|
||||||
|
% the head token
|
||||||
|
|
||||||
|
take_block_item([H = #sfc_token{pos = {_, ItemCol}} | T]) ->
|
||||||
|
TokenInItem =
|
||||||
|
fun(#sfc_token{pos = {_, TkCol}}) ->
|
||||||
|
ItemCol < TkCol
|
||||||
|
end,
|
||||||
|
take_while(TokenInItem, [H], T);
|
||||||
|
take_block_item([]) ->
|
||||||
|
{[], []}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec slurp_plist(Tokens) -> Result
|
||||||
|
when Tokens :: [Token],
|
||||||
|
Result :: {slurp, PList :: Tokens, After :: Tokens}
|
||||||
|
| {error, Mismatch},
|
||||||
|
Mismatch :: {fixme, mismatch, OpenStack, ClosedBy},
|
||||||
|
OpenStack :: Tokens,
|
||||||
|
ClosedBy :: none | {value, Token},
|
||||||
|
Token :: sfc_token().
|
||||||
|
|
||||||
|
% @doc
|
||||||
|
% the verbiage here is `slurp' rather than `take' because we insist on
|
||||||
|
% delimiter matching.
|
||||||
|
%
|
||||||
|
% typical happy path:
|
||||||
|
% "(foo, bar) baz" ~> {slurp, "(foo, bar)", "baz"}
|
||||||
|
% "() baz" ~> {slurp, "()", "baz"}
|
||||||
|
% "foo () baz" ~> {slurp, "", "foo () baz"}
|
||||||
|
% "(foo, bar) baz" ~> {slurp, "(foo, bar)", "baz"}
|
||||||
|
%
|
||||||
|
% typical sad path:
|
||||||
|
% "(foo, bar]" ~> {mismatch, ["("], {value, "]"}}
|
||||||
|
% "(foo, bar" ~> {mismatch, ["("], none}
|
||||||
|
% "([foo, bar)" ~> {mismatch, ["[", "("], {value, ")"}}
|
||||||
|
%
|
||||||
|
% counterintuitive:
|
||||||
|
% "[foo, bar) baz" ~> {slurp, "", "[foo, bar) baz"}
|
||||||
|
% "~!!\inv4l1d syntax" ~> {slurp, "", "~!!\inv4l1d syntax"}
|
||||||
|
% "(foo, bar)(baz)" ~> {slurp, "(foo bar)", "(baz)"}
|
||||||
|
%
|
||||||
|
% the only "syntax checking" occurring is making sure the delimiter
|
||||||
|
% stack pushes and pops properly
|
||||||
|
%
|
||||||
|
% please note that on mismatch, the list of open delimiters is
|
||||||
|
% returned in STACK order, meaning the most recent open delimiters
|
||||||
|
% first. this is more convenient for programs, but might be
|
||||||
|
% counterintuitive to end-users (who are programmers, entirely
|
||||||
|
% unfamiliar with notions like stacks and open/close delimiters)
|
||||||
|
|
||||||
|
slurp_plist([Hd = #sfc_token{string = "("} | Tl]) ->
|
||||||
|
slurp_dlist([Hd], [Hd], Tl);
|
||||||
|
slurp_plist(Tks) ->
|
||||||
|
{slurp, [], Tks}.
|
||||||
|
|
||||||
|
|
||||||
|
% happy terminal case: stack popped entirely
|
||||||
|
slurp_dlist(All, [], NewTokens) ->
|
||||||
|
{slurp, lists:reverse(All), NewTokens};
|
||||||
|
% WMA stack is nonempty
|
||||||
|
% happy cases of opens getting popped
|
||||||
|
slurp_dlist(All, [#sfc_token{string = "("} | NewOpen],
|
||||||
|
[#sfc_token{string = ")"} = Tk | NewTks]) ->
|
||||||
|
slurp_dlist([Tk | All], NewOpen, NewTks);
|
||||||
|
slurp_dlist(All, [#sfc_token{string = "["} | NewOpen],
|
||||||
|
[#sfc_token{string = "]"} = Tk | NewTks]) ->
|
||||||
|
slurp_dlist([Tk | All], NewOpen, NewTks);
|
||||||
|
slurp_dlist(All, [#sfc_token{string = "{"} | NewOpen],
|
||||||
|
[#sfc_token{string = "}"} = Tk | NewTks]) ->
|
||||||
|
slurp_dlist([Tk | All], NewOpen, NewTks);
|
||||||
|
% happy: open delimiters getting pushed
|
||||||
|
slurp_dlist(All, Opens, [#sfc_token{string = "("} = Tk | NewTks]) ->
|
||||||
|
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||||
|
slurp_dlist(All, Opens, [#sfc_token{string = "["} = Tk | NewTks]) ->
|
||||||
|
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||||
|
slurp_dlist(All, Opens, [#sfc_token{string = "{"} = Tk | NewTks]) ->
|
||||||
|
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||||
|
% sad: mismatch cases
|
||||||
|
slurp_dlist(All, Opens, []) ->
|
||||||
|
{error, {fixme, mismatch, Opens, none}};
|
||||||
|
slurp_dlist(All, Opens, [#sfc_token{string = "}"} = BadClose | _]) ->
|
||||||
|
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||||
|
slurp_dlist(All, Opens, [#sfc_token{string = "]"} = BadClose | _]) ->
|
||||||
|
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||||
|
slurp_dlist(All, Opens, [#sfc_token{string = ")"} = BadClose | _]) ->
|
||||||
|
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||||
|
% general case: non-terminal token gets pushed
|
||||||
|
slurp_dlist(All, Opens, [Tk | NewTks]) ->
|
||||||
|
slurp_dlist([Tk | All], Opens, NewTks).
|
||||||
|
|
||||||
|
|
||||||
|
%-------------------------------------------------------
|
||||||
|
% API: meta info
|
||||||
|
%
|
||||||
|
% This is parse order definition, list of keywords, etc
|
||||||
|
%
|
||||||
|
% -export([
|
||||||
|
% token_types_parse_order/0,
|
||||||
|
% kwds/0
|
||||||
|
% ]).
|
||||||
|
%-------------------------------------------------------
|
||||||
|
|
||||||
|
-spec token_types_parse_order() -> [sfc_token_type()].
|
||||||
|
% @doc
|
||||||
|
% list of sophia tokens in parse order (if an earlier type matches, the later
|
||||||
|
% type isn't even checked)
|
||||||
|
%
|
||||||
|
%
|
||||||
|
% Rules =
|
||||||
|
% %% Comments and whitespace
|
||||||
|
% [ CommentStart
|
||||||
|
% , {"//.*", skip()}
|
||||||
|
% , {WS, skip()}
|
||||||
|
%
|
||||||
|
% %% Special characters
|
||||||
|
% , {"\\.\\.|[,.;()\\[\\]{}]", symbol()}
|
||||||
|
%
|
||||||
|
% %% Literals
|
||||||
|
% , {CHAR, token(char, fun parse_char/1)}
|
||||||
|
% , {STRING, token(string, fun parse_string/1)}
|
||||||
|
% , {HEX, token(hex, fun parse_hex/1)}
|
||||||
|
% , {INT, token(int, fun parse_int/1)}
|
||||||
|
% , {BYTES, token(bytes, fun parse_bytes/1)}
|
||||||
|
%
|
||||||
|
% %% Identifiers (qualified first!)
|
||||||
|
% , {QID, token(qid, fun(S) -> string:tokens(S, ".") end)}
|
||||||
|
% , {QCON, token(qcon, fun(S) -> string:tokens(S, ".") end)}
|
||||||
|
% , {TVAR, token(tvar)}
|
||||||
|
% , override({ID, token(id)}, {KW, symbol()}) %% Keywords override identifiers. Need to
|
||||||
|
% , {CON, token(con)} %% use override to avoid lexing "lettuce"
|
||||||
|
% %% as ['let', {id, "tuce"}].
|
||||||
|
% %% Operators
|
||||||
|
% , {OP, symbol()}
|
||||||
|
% ],
|
||||||
|
% @end
|
||||||
|
|
||||||
|
token_types_parse_order() ->
|
||||||
|
% written in this style to be maximally editable
|
||||||
|
lists:flatten([
|
||||||
|
% comments and whitespace
|
||||||
|
lcom, bcom, ws,
|
||||||
|
punct,
|
||||||
|
% literals
|
||||||
|
char, string, int16, int10, bytes,
|
||||||
|
ak, ct, sg,
|
||||||
|
% qualified names need to go ahead of unqualifieds
|
||||||
|
qid, qcon,
|
||||||
|
tvar,
|
||||||
|
% keywords need to be parsed ahead of ids
|
||||||
|
kwd, id,
|
||||||
|
con,
|
||||||
|
% ops [=, =>, >>], punctuation (parens/braces)
|
||||||
|
op
|
||||||
|
]).
|
||||||
|
|
||||||
|
|
||||||
|
-spec kwds() -> list(string()).
|
||||||
|
% @doc list of sophia kwds
|
||||||
|
|
||||||
|
kwds() ->
|
||||||
|
["contract", "include", "let", "switch", "type", "record", "datatype",
|
||||||
|
"if", "elif", "else", "function", "stateful", "payable", "true", "false",
|
||||||
|
"mod", "public", "entrypoint", "private", "indexed", "namespace",
|
||||||
|
"interface", "main", "using", "as", "for", "hiding", "band", "bor",
|
||||||
|
"bxor", "bnot"].
|
||||||
|
|
||||||
|
|
||||||
|
%-------------------------------------------------------
|
||||||
|
% API: token slurping
|
||||||
|
%
|
||||||
|
% -export([
|
||||||
|
% tokens/1,
|
||||||
|
% slurp_token/1,
|
||||||
|
% slurp_token_types/2,
|
||||||
|
% slurp_token_of_type/2
|
||||||
|
% ]).
|
||||||
|
%-------------------------------------------------------
|
||||||
|
|
||||||
|
% Token accessors
|
||||||
|
-spec indent_level(sfc_token()) -> pos_integer().
|
||||||
|
|
||||||
|
indent_level(#sfc_token{pos = {_, IndentLevel}}) ->
|
||||||
|
IndentLevel.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec significant_tokens(SrcStr) -> Result
|
||||||
|
when SrcStr :: iolist(),
|
||||||
|
Result :: {ok, Tokens}
|
||||||
|
| {error, sfc_err()},
|
||||||
|
Tokens :: [sfc_token()].
|
||||||
|
|
||||||
|
significant_tokens(SrcStr) ->
|
||||||
|
case tokens(SrcStr) of
|
||||||
|
{ok, Tokens} ->
|
||||||
|
{ok, filter_significant(Tokens)};
|
||||||
|
Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec filter_significant(Tokens) -> SignificantTokens
|
||||||
|
when Tokens :: [sfc_token()],
|
||||||
|
SignificantTokens :: Tokens.
|
||||||
|
|
||||||
|
filter_significant(Tokens) ->
|
||||||
|
lists:filter(fun is_significant/1, Tokens).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec is_significant(Token) -> boolean()
|
||||||
|
when Token :: sfc_token().
|
||||||
|
|
||||||
|
is_significant(#sfc_token{type = bcom}) -> false;
|
||||||
|
is_significant(#sfc_token{type = lcom}) -> false;
|
||||||
|
is_significant(#sfc_token{type = ws}) -> false;
|
||||||
|
is_significant(_) -> true.
|
||||||
|
|
||||||
|
|
||||||
|
-spec tokens(SrcStr) -> Result
|
||||||
|
when SrcStr :: iolist(),
|
||||||
|
Result :: {ok, Tokens}
|
||||||
|
| {error, sfc_err()},
|
||||||
|
Tokens :: [sfc_token()].
|
||||||
|
% @doc
|
||||||
|
% Recursively parse all tokens off the front end of the string. `Rest' is
|
||||||
|
% the first tail of the string for which no token parser succeeds.
|
||||||
|
%
|
||||||
|
% Semantically, `Rest'` being nonempty amounts to the presence of an illegal
|
||||||
|
% character.
|
||||||
|
|
||||||
|
tokens(S) ->
|
||||||
|
% defensive normalization
|
||||||
|
tokens([], {1, 1}, unicode:characters_to_nfc_list(S)).
|
||||||
|
|
||||||
|
tokens(Stack, _FinalPos, "") ->
|
||||||
|
{ok, lists:reverse(Stack)};
|
||||||
|
tokens(Stack, Pos, SrcStr) ->
|
||||||
|
case slurp_token(Pos, SrcStr) of
|
||||||
|
{tokmatch, NewToken = #sfc_token{string = TokStr},
|
||||||
|
NewSrcStr} ->
|
||||||
|
NewPos = new_pos(Pos, TokStr),
|
||||||
|
tokens([NewToken | Stack], NewPos, NewSrcStr);
|
||||||
|
no_tokmatch ->
|
||||||
|
PrevTokens = lists:reverse(Stack),
|
||||||
|
Err = #sfc_err_no_tokmatch{prev_tokens = PrevTokens,
|
||||||
|
break_pos = Pos,
|
||||||
|
rest = SrcStr},
|
||||||
|
{error, Err};
|
||||||
|
% FIXME so_scan bad
|
||||||
|
% this is so fucking stupid
|
||||||
|
% so_scan for some reason allows unterminated block comments at
|
||||||
|
% the end of files
|
||||||
|
%
|
||||||
|
% for now we're just going to agree with so_scan
|
||||||
|
{ierr, unterminated_block_comment} ->
|
||||||
|
PrevTokens = lists:reverse(Stack),
|
||||||
|
Err = #sfc_err_bcom_unterminated{prev_tokens = PrevTokens,
|
||||||
|
break_pos = Pos,
|
||||||
|
rest = SrcStr},
|
||||||
|
{error, Err};
|
||||||
|
Error = {error, _} ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
% alright some bullshit here
|
||||||
|
%
|
||||||
|
% we're computing the line/column position of each string
|
||||||
|
%
|
||||||
|
% however this is meant to be compatible with so_scan, so it's a bit wonky
|
||||||
|
% because regex list bullshit.
|
||||||
|
%
|
||||||
|
% recall that so_scan operates on the list representation of the utf-8 encoded
|
||||||
|
% bytes; this is different than on a list of bignum codepoints (e.g.
|
||||||
|
% unicode:characters_to_nfc_list(Bytes)); let's suppose some stupid complicated
|
||||||
|
% foreign character which a sane language would simply criminalize has list
|
||||||
|
% representation [ABC], but byte representation <<A,B,C>>
|
||||||
|
%
|
||||||
|
% as far as so_scan is concerned, this means the character ABC consumes 3
|
||||||
|
% columns. the only exception is tab characters, which always fast-forward to
|
||||||
|
% the next tab stop, which is 1-indexed because god hates all of us
|
||||||
|
%
|
||||||
|
% so the tab-stops are
|
||||||
|
% 1 9 17 25 33 ...
|
||||||
|
%
|
||||||
|
% column position is determined in all cases by byte order, EXCEPT for $\t
|
||||||
|
% which goes to the next tab stop
|
||||||
|
%
|
||||||
|
% so in general, for the token string, we need to convert to bytes first,
|
||||||
|
% then handle `\t` bytes as a special case
|
||||||
|
%
|
||||||
|
% again in the tokenizer context, we're assuming that the input to our
|
||||||
|
% tokenizer is an nfc-list which has a flat list of each unicode character in
|
||||||
|
% codepoint form
|
||||||
|
%
|
||||||
|
% here we're just converting it to byte form, then computing columns based on
|
||||||
|
% bytes
|
||||||
|
new_pos(OldPos, TokStr) ->
|
||||||
|
new_pos_bytes(unicode:characters_to_binary(TokStr), OldPos).
|
||||||
|
|
||||||
|
% newline just goes to {L+1, 1}
|
||||||
|
new_pos_bytes(<<$\n:8, Rest/bytes>>, _Pos = {L, _}) ->
|
||||||
|
NewPos = {L+1, 1},
|
||||||
|
new_pos_bytes(Rest, NewPos);
|
||||||
|
new_pos_bytes(<<$\t:8, Rest/bytes>>, _Pos = {Linum, Colnum1}) ->
|
||||||
|
% stinky wet sweaty robots need 1-based indexing
|
||||||
|
% so tab stops are at
|
||||||
|
% 1 9 17 25
|
||||||
|
% super awesome dry silicon robots use 0-based indexing
|
||||||
|
% so tab stops are at
|
||||||
|
% 0 8 16 25
|
||||||
|
Colnum0 = Colnum1 - 1,
|
||||||
|
% 0 based is based
|
||||||
|
NextTabstop0 = next_tabstop8(Colnum0),
|
||||||
|
NextTabstop1 = NextTabstop0 + 1,
|
||||||
|
NextPos = {Linum, NextTabstop1},
|
||||||
|
new_pos_bytes(Rest, NextPos);
|
||||||
|
new_pos_bytes(<<_:8, Rest/bytes>>, _Pos = {Linum, Colnum1}) ->
|
||||||
|
% in general advance by 1
|
||||||
|
new_pos_bytes(Rest, {Linum, Colnum1 + 1});
|
||||||
|
new_pos_bytes(<<>>, FinalPos) ->
|
||||||
|
FinalPos.
|
||||||
|
|
||||||
|
% 0 8 16 24 etc
|
||||||
|
% 0*8 1*8 2*8 3*8 etc
|
||||||
|
next_tabstop8(Col0) when Col0 >= 0 ->
|
||||||
|
% Col0 = PrevTabQ*8 + PrevTabR
|
||||||
|
PrevTabQ = Col0 div 8,
|
||||||
|
PrevTabR = Col0 rem 8,
|
||||||
|
Col0 = PrevTabQ*8 + PrevTabR,
|
||||||
|
NextTabQ = PrevTabQ + 1,
|
||||||
|
NextTabCol0 = NextTabQ*8,
|
||||||
|
NextTabCol0.
|
||||||
|
|
||||||
|
%% copied from so_scan_lib.erl just to match behavior
|
||||||
|
%-define(TAB_SIZE, 8).
|
||||||
|
%
|
||||||
|
%next_pos([], P) -> P;
|
||||||
|
%next_pos([$\n | S], {L, _}) -> next_pos(S, {L + 1, 1});
|
||||||
|
%next_pos([$\t | S], {L, C}) -> next_pos(S, {L, (C + ?TAB_SIZE - 1) div ?TAB_SIZE * ?TAB_SIZE + 1});
|
||||||
|
%next_pos([_ | S], {L, C}) -> next_pos(S, {L, C + 1}).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec slurp_token(Pos, SrcStr) -> Result
|
||||||
|
when Pos :: sfc_pos(),
|
||||||
|
SrcStr :: string(),
|
||||||
|
Result :: {tokmatch, Token, Rest}
|
||||||
|
| no_tokmatch
|
||||||
|
| {error, sfc_err()}
|
||||||
|
| {ierr, unterminated_block_comment},
|
||||||
|
Token :: sfc_token(),
|
||||||
|
Rest :: string().
|
||||||
|
% @doc
|
||||||
|
% grab a single token off the front of the string according to
|
||||||
|
% `token_types_parse_order/0'
|
||||||
|
|
||||||
|
slurp_token(Pos, SrcStr) ->
|
||||||
|
% this is the easiest format if i need to fuck with it
|
||||||
|
slurp_token_types(token_types_parse_order(), Pos, SrcStr).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-spec slurp_token_types(ParseOrder, Pos, SrcStr) -> Result
|
||||||
|
when ParseOrder :: [sfc_token_type()],
|
||||||
|
Pos :: sfc_pos(),
|
||||||
|
SrcStr :: string(),
|
||||||
|
Result :: {tokmatch, Token, Rest}
|
||||||
|
| no_tokmatch
|
||||||
|
| {error, sfc_err()}
|
||||||
|
| {ierr, unterminated_block_comment},
|
||||||
|
Token :: sfc_token(),
|
||||||
|
Rest :: string().
|
||||||
|
% @doc
|
||||||
|
% grab a single token off the front of the string according to
|
||||||
|
% `token_types_parse_order/0'
|
||||||
|
|
||||||
|
slurp_token_types([TokenType | TTs], Pos, SrcStr) ->
|
||||||
|
case slurp_token_of_type(TokenType, Pos, SrcStr) of
|
||||||
|
Match = {tokmatch, _, _} -> Match;
|
||||||
|
no_tokmatch -> slurp_token_types(TTs, Pos, SrcStr);
|
||||||
|
IErr = {ierr, _} -> IErr;
|
||||||
|
Error = {error, _} -> Error
|
||||||
|
end;
|
||||||
|
slurp_token_types([], _Pos, _SrcStr) ->
|
||||||
|
no_tokmatch.
|
||||||
|
|
||||||
|
|
||||||
|
-spec slurp_token_of_type(TokenType, Pos, SrcStr) -> MaybeToken
|
||||||
|
when TokenType :: sfc_token_type(),
|
||||||
|
Pos :: sfc_pos(),
|
||||||
|
SrcStr :: string(),
|
||||||
|
MaybeToken :: {tokmatch, Token, Rest}
|
||||||
|
| no_tokmatch
|
||||||
|
| {error, sfc_err()}
|
||||||
|
| {ierr, unterminated_block_comment},
|
||||||
|
Token :: sfc_token(),
|
||||||
|
Rest :: string().
|
||||||
|
% @doc
|
||||||
|
% match a sophia token of a given type off the front of the string
|
||||||
|
% @end
|
||||||
|
|
||||||
|
% COMMENTS AND WHITESPACE: lcom, bcom, ws
|
||||||
|
%
|
||||||
|
% sophia line comment
|
||||||
|
%
|
||||||
|
% i am not going to bother writing a string matcher thing for this
|
||||||
|
% FIXME: make a string matcher for line comments
|
||||||
|
slurp_token_of_type(lcom, Pos, SrcStr) ->
|
||||||
|
case SrcStr of
|
||||||
|
"//" ++ _ ->
|
||||||
|
{Line, Rest} = takeline("", SrcStr),
|
||||||
|
Token = #sfc_token{type = lcom,
|
||||||
|
pos = Pos,
|
||||||
|
string = Line},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
_ ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
% Block comments cannot have a string matcher because they have a whole stack
|
||||||
|
% thing keeping track of depth because of nested block comments
|
||||||
|
slurp_token_of_type(bcom, Pos, SrcStr0) ->
|
||||||
|
case SrcStr0 of
|
||||||
|
"/*" ++ SrcStr1 ->
|
||||||
|
case bcom("/*", 1, SrcStr1) of
|
||||||
|
{ok, CommentStr, SrcStr2} ->
|
||||||
|
Token = #sfc_token{type = bcom,
|
||||||
|
pos = Pos,
|
||||||
|
string = CommentStr},
|
||||||
|
{tokmatch, Token, SrcStr2};
|
||||||
|
Error ->
|
||||||
|
Error
|
||||||
|
end;
|
||||||
|
_ ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(ws, Pos, SrcStr) ->
|
||||||
|
WhitespaceMatcher = sfc_strmatch:smr_sf_ws(),
|
||||||
|
case sfc_strmatch:match(WhitespaceMatcher, SrcStr) of
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch;
|
||||||
|
{strmatch, WS, Rest} ->
|
||||||
|
Token = #sfc_token{type = ws,
|
||||||
|
pos = Pos,
|
||||||
|
string = WS},
|
||||||
|
{tokmatch, Token, Rest}
|
||||||
|
end;
|
||||||
|
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct
|
||||||
|
%
|
||||||
|
% all the kwds are valid ids, so we match as an id and then check if it's a
|
||||||
|
% kwd
|
||||||
|
%
|
||||||
|
% kwds are allowed to be prefixes for user-defined variable names; e.g.
|
||||||
|
% "lettuce" should be parsed as an id, not as ["let", "tuce"]; for this reason
|
||||||
|
% we need to be careful with greedily parsing kwds
|
||||||
|
%
|
||||||
|
% we know kwds are always ids, so we parse it as an id and see if it's one
|
||||||
|
% of the kwds
|
||||||
|
slurp_token_of_type(kwd, Pos, SrcStr) ->
|
||||||
|
case slurp_token_of_type(id, Pos, SrcStr) of
|
||||||
|
{tokmatch, IdTok = #sfc_token{string = IdStr}, Rest} ->
|
||||||
|
case lists:member(IdStr, kwds()) of
|
||||||
|
false ->
|
||||||
|
no_tokmatch;
|
||||||
|
true ->
|
||||||
|
KwTok = IdTok#sfc_token{type = kwd},
|
||||||
|
{tokmatch, KwTok, Rest}
|
||||||
|
end;
|
||||||
|
no_tokmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(op, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_op(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = op, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(punct, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_punct(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = punct, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
% SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar
|
||||||
|
slurp_token_of_type(id, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_id(), SrcStr) of
|
||||||
|
{strmatch, IdStr, Rest} ->
|
||||||
|
Token = #sfc_token{type = id, pos = Pos, string = IdStr},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(con, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_con(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = con, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(qid, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_qid(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = qid, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(qcon, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_qcon(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = qcon, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(tvar, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_tvar(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = tvar, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(int16, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_int16(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = int16, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(int10, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_int10(), SrcStr) of
|
||||||
|
{strmatch, Str, Rest} ->
|
||||||
|
Token = #sfc_token{type = int10, pos = Pos, string = Str},
|
||||||
|
{tokmatch, Token, Rest};
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch
|
||||||
|
end;
|
||||||
|
% LITERAL PARSERS: char, string, hex, int, bytes10, bytes16,
|
||||||
|
% ak, ct, sg
|
||||||
|
%
|
||||||
|
% char: sophia char literal
|
||||||
|
slurp_token_of_type(ak, Pos, SrcStr) ->
|
||||||
|
StringMatcher = sfc_strmatch:smr_sf_ak(),
|
||||||
|
case sfc_strmatch:match(StringMatcher, SrcStr) of
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch;
|
||||||
|
{strmatch, TokenStr, Rest} ->
|
||||||
|
Token = #sfc_token{type = ak, pos = Pos, string = TokenStr},
|
||||||
|
{tokmatch, Token, Rest}
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(ct, Pos, SrcStr) ->
|
||||||
|
StringMatcher = sfc_strmatch:smr_sf_ct(),
|
||||||
|
case sfc_strmatch:match(StringMatcher, SrcStr) of
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch;
|
||||||
|
{strmatch, TokenStr, Rest} ->
|
||||||
|
Token = #sfc_token{type = ct, pos = Pos, string = TokenStr},
|
||||||
|
{tokmatch, Token, Rest}
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(sg, Pos, SrcStr) ->
|
||||||
|
StringMatcher = sfc_strmatch:smr_sf_sg(),
|
||||||
|
case sfc_strmatch:match(StringMatcher, SrcStr) of
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch;
|
||||||
|
{strmatch, TokenStr, Rest} ->
|
||||||
|
Token = #sfc_token{type = sg, pos = Pos, string = TokenStr},
|
||||||
|
{tokmatch, Token, Rest}
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(char, Pos, SrcStr) ->
|
||||||
|
StringMatcher = sfc_strmatch:smr_sf_char(),
|
||||||
|
case sfc_strmatch:match(StringMatcher, SrcStr) of
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch;
|
||||||
|
{strmatch, TokenStr, Rest} ->
|
||||||
|
Token = #sfc_token{type = char, pos = Pos, string = TokenStr},
|
||||||
|
{tokmatch, Token, Rest}
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(string, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_str(), SrcStr) of
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch;
|
||||||
|
{strmatch, TokenStr, Rest} ->
|
||||||
|
Token = #sfc_token{type = string, pos = Pos, string = TokenStr},
|
||||||
|
{tokmatch, Token, Rest}
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(bytes, Pos, SrcStr) ->
|
||||||
|
case sfc_strmatch:match(sfc_strmatch:smr_sf_bytes(), SrcStr) of
|
||||||
|
no_strmatch ->
|
||||||
|
no_tokmatch;
|
||||||
|
{strmatch, TokenStr, Rest} ->
|
||||||
|
Token = #sfc_token{type = bytes, pos = Pos, string = TokenStr},
|
||||||
|
{tokmatch, Token, Rest}
|
||||||
|
end;
|
||||||
|
slurp_token_of_type(NyiType, Pos, SrcStr) ->
|
||||||
|
Message = io_lib:format("cannot slurp token of type: ~p", [NyiType]),
|
||||||
|
error(#sfc_err{atom = nyi,
|
||||||
|
string = Message,
|
||||||
|
extra = [{token_type, NyiType},
|
||||||
|
{pos, Pos},
|
||||||
|
{rest, SrcStr}]}).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
takeline(Acc, "") -> {lists:reverse(Acc), ""};
|
||||||
|
takeline(Acc, Rest = "\n" ++ _) -> {lists:reverse(Acc), Rest};
|
||||||
|
takeline(Acc, [C | Rest]) -> takeline([C | Acc], Rest).
|
||||||
|
|
||||||
|
|
||||||
|
bcom(CommentStr, Depth, SrcStr0) when Depth > 0 ->
|
||||||
|
case SrcStr0 of
|
||||||
|
% premature end
|
||||||
|
"" ->
|
||||||
|
{ierr, unterminated_block_comment};
|
||||||
|
% decrease depth
|
||||||
|
"*/" ++ SrcStr1 ->
|
||||||
|
NewCommentStr = [CommentStr, "*/"],
|
||||||
|
NewDepth = Depth - 1,
|
||||||
|
bcom(NewCommentStr, NewDepth, SrcStr1);
|
||||||
|
% increase depth
|
||||||
|
"/*" ++ SrcStr1 ->
|
||||||
|
NewCommentStr = [CommentStr, "/*"],
|
||||||
|
NewDepth = Depth + 1,
|
||||||
|
bcom(NewCommentStr, NewDepth, SrcStr1);
|
||||||
|
% same depth, add to list
|
||||||
|
[C | SrcStr1] ->
|
||||||
|
NewCommentStr = [CommentStr, C],
|
||||||
|
bcom(NewCommentStr, Depth, SrcStr1)
|
||||||
|
end;
|
||||||
|
bcom(CommentStr, 0, SrcStr) ->
|
||||||
|
{ok, unicode:characters_to_nfc_list(CommentStr), SrcStr}.
|
||||||
|
|
||||||
|
|
||||||
|
%------------------------------------------
|
||||||
|
% INTERNAL UTILITIES
|
||||||
|
%------------------------------------------
|
||||||
|
|
||||||
|
-spec take_while(Pred, List) -> {Taken, Rest}
|
||||||
|
when Pred :: fun((Item) -> boolean()),
|
||||||
|
List :: [Item],
|
||||||
|
Taken :: List,
|
||||||
|
Rest :: List.
|
||||||
|
% @doc similar to lists:takewhile but returns {Taken, Rest}. Name is
|
||||||
|
% to remind you it returns 2 things.
|
||||||
|
|
||||||
|
take_while(Pred, List) ->
|
||||||
|
take_while(Pred, [], List).
|
||||||
|
|
||||||
|
|
||||||
|
-spec take_while(Pred, Prefix, List) -> {Taken, Rest}
|
||||||
|
when Pred :: fun((Item) -> boolean()),
|
||||||
|
Prefix :: List,
|
||||||
|
List :: [Item],
|
||||||
|
Taken :: List,
|
||||||
|
Rest :: List.
|
||||||
|
% @doc
|
||||||
|
% similar to takewhile_ii/2, but returns {Prefix ++ Taken, Rest}
|
||||||
|
%
|
||||||
|
% where Prefix
|
||||||
|
%
|
||||||
|
% middle argument is just the accum
|
||||||
|
take_while(Pred, Pfx, List) ->
|
||||||
|
tw3(Pred, lists:reverse(Pfx), List).
|
||||||
|
|
||||||
|
|
||||||
|
tw3(Pred, Stk, [X | Xs]) ->
|
||||||
|
case Pred(X) of
|
||||||
|
true -> tw3(Pred, [X | Stk], Xs);
|
||||||
|
false -> {lists:reverse(Stk), [X | Xs]}
|
||||||
|
end;
|
||||||
|
tw3(_, Stk, []) ->
|
||||||
|
{lists:reverse(Stk), []}.
|
||||||
Reference in New Issue
Block a user