Compare commits
12 Commits
eff77fff6b
..
master
| Author | SHA1 | Date | |
|---|---|---|---|
| f04b7311f5 | |||
| 10424927b1 | |||
| fdb40dcb92 | |||
| e180dc955d | |||
| 4e54bebeba | |||
| 4f4adaa284 | |||
| 2c36a02331 | |||
| 5cae022b8b | |||
| dfb158e593 | |||
| ef69016294 | |||
| 55a56753c1 | |||
| 270f192f0c |
@@ -0,0 +1,15 @@
|
||||
# TODO
|
||||
|
||||
- architecture needs more careful thought but only after something
|
||||
works
|
||||
- too fuzzy right now
|
||||
- undo gs_ naming fuckery.. everything is `gsc_*`. it's just
|
||||
needlessly confusing. for now let's name new things gsc_* and then
|
||||
go back and undo the stupidity
|
||||
|
||||
# TONOTDO
|
||||
|
||||
- barf for outputs, slurp for inputs
|
||||
- rename parser layers sequentially
|
||||
|
||||
# TODONE
|
||||
@@ -0,0 +1,30 @@
|
||||
# gsc = gajumaru sophia compiler
|
||||
|
||||
**This is _NOT_ the official Sophia compiler.** If you're looking for
|
||||
that see https://git.qpq.swiss/QPQ-AG/sophia
|
||||
|
||||
This is an incomplete prototype rewrite of the legacy (official)
|
||||
sophia compiler in straightforward Erlang. It grew out of my (Peter
|
||||
Harpending) own efforts to document the language and its relationship
|
||||
to FATE (the gajumaru virtual machine).
|
||||
|
||||
The goal for version 0.1 is to mirror the success behavior of the
|
||||
legacy sophia compiler.
|
||||
|
||||
# Setup
|
||||
|
||||
```
|
||||
git clone https://git.qpq.swiss/QPQ-AG/gsc.git
|
||||
```
|
||||
|
||||
Add the following to `~/.bashrc` or wheremstever:
|
||||
|
||||
```
|
||||
export PATH=$PATH:/path/to/gsc/bin
|
||||
```
|
||||
|
||||
To test run
|
||||
|
||||
```
|
||||
gsc --help
|
||||
```
|
||||
|
||||
@@ -0,0 +1,199 @@
|
||||
% ANSI screen drawing macros in erlang
|
||||
%
|
||||
% Author: Peter Harpending <peterharpending@qpq.swiss>
|
||||
% Date: 2026-04-10
|
||||
%
|
||||
% Copyright (C) 2026, QPQ AG
|
||||
|
||||
% Not exhaustive, just what I need for the moment
|
||||
% ref: https://gist.github.com/ConnerWill/d4b6c776b509add763e17f9f113fd25b
|
||||
-define(ANSI_ESC, [27]).
|
||||
-define(ANSI_CRLF, "\r\n").
|
||||
-define(ANSI_FF, [12]).
|
||||
-define(ANSI_CLEAR, [12]).
|
||||
|
||||
-define(ANSI_LINE(X), [X, ?ANSI_CRLF]).
|
||||
|
||||
% MARKDOWN TIER TEXT FORMATTING
|
||||
|
||||
% resets all formatting
|
||||
-define(ANSI_RESET, [?ANSI_ESC, "[0m"]).
|
||||
|
||||
-define(ANSI_BOLD, [?ANSI_ESC, "[1m"]).
|
||||
-define(ANSI_DIM, [?ANSI_ESC, "[2m"]).
|
||||
-define(ANSI_ITALIC, [?ANSI_ESC, "[3m"]).
|
||||
-define(ANSI_ULINE, [?ANSI_ESC, "[4m"]).
|
||||
-define(ANSI_BLINK, [?ANSI_ESC, "[5m"]).
|
||||
-define(ANSI_INVERT, [?ANSI_ESC, "[7m"]).
|
||||
-define(ANSI_INVIS, [?ANSI_ESC, "[8m"]).
|
||||
-define(ANSI_STRIKE, [?ANSI_ESC, "[9m"]).
|
||||
|
||||
% > Note: Both dim and bold modes are reset with the ESC[22m sequence. The
|
||||
% > ESC[21m sequence is a non-specified sequence for double underline mode and
|
||||
% > only work in some terminals and is reset with ESC[24m.
|
||||
-define(ANSI_UNBOLD, [?ANSI_ESC, "[22m"]).
|
||||
-define(ANSI_UNDIM, [?ANSI_ESC, "[22m"]).
|
||||
-define(ANSI_UNITALIC, [?ANSI_ESC, "[23m"]).
|
||||
-define(ANSI_UNULINE, [?ANSI_ESC, "[24m"]).
|
||||
-define(ANSI_UNBLINK, [?ANSI_ESC, "[25m"]).
|
||||
-define(ANSI_UNINVERT, [?ANSI_ESC, "[27m"]).
|
||||
-define(ANSI_UNINVIS, [?ANSI_ESC, "[28m"]).
|
||||
-define(ANSI_UNSTRIKE, [?ANSI_ESC, "[29m"]).
|
||||
|
||||
-define(ANSI_BOLD(X), [?ANSI_BOLD, X, ?ANSI_UNBOLD]).
|
||||
-define(ANSI_DIM(X), [?ANSI_DIM, X, ?ANSI_UNDIM]).
|
||||
-define(ANSI_ITALIC(X), [?ANSI_ITALIC, X, ?ANSI_UNITALIC]).
|
||||
-define(ANSI_ULINE(X), [?ANSI_ULINE, X, ?ANSI_UNULINE]).
|
||||
-define(ANSI_BLINK(X), [?ANSI_BLINK, X, ?ANSI_UNBLINK]).
|
||||
-define(ANSI_INVERT(X), [?ANSI_INVERT, X, ?ANSI_UNINVERT]).
|
||||
-define(ANSI_INVIS(X), [?ANSI_INVIS, X, ?ANSI_UNINVIS]).
|
||||
-define(ANSI_STRIKE(X), [?ANSI_STRIKE, X, ?ANSI_UNSTRIKE]).
|
||||
|
||||
|
||||
% COLORS
|
||||
%
|
||||
% COLOR SetFG SetBG
|
||||
% -----------------------------
|
||||
% Black 30 40
|
||||
% Red 31 41
|
||||
% Green 32 42
|
||||
% Yellow 33 43
|
||||
% Blue 34 44
|
||||
% Magenta 35 45
|
||||
% Cyan 36 46
|
||||
% White 37 47
|
||||
% Default 39 49
|
||||
|
||||
-define(ANSI_FG_RESET, [?ANSI_ESC, "[39m"]).
|
||||
-define(ANSI_BG_RESET, [?ANSI_ESC, "[49m"]).
|
||||
|
||||
-define(ANSI_FG_BLACK, [?ANSI_ESC, "[30m"]).
|
||||
-define(ANSI_FG_RED, [?ANSI_ESC, "[31m"]).
|
||||
-define(ANSI_FG_GREEN, [?ANSI_ESC, "[32m"]).
|
||||
-define(ANSI_FG_YELLOW, [?ANSI_ESC, "[33m"]).
|
||||
-define(ANSI_FG_BLUE, [?ANSI_ESC, "[34m"]).
|
||||
-define(ANSI_FG_MAGENTA, [?ANSI_ESC, "[35m"]).
|
||||
-define(ANSI_FG_CYAN, [?ANSI_ESC, "[36m"]).
|
||||
-define(ANSI_FG_WHITE, [?ANSI_ESC, "[37m"]).
|
||||
|
||||
-define(ANSI_BG_BLACK, [?ANSI_ESC, "[40m"]).
|
||||
-define(ANSI_BG_RED, [?ANSI_ESC, "[41m"]).
|
||||
-define(ANSI_BG_GREEN, [?ANSI_ESC, "[42m"]).
|
||||
-define(ANSI_BG_YELLOW, [?ANSI_ESC, "[43m"]).
|
||||
-define(ANSI_BG_BLUE, [?ANSI_ESC, "[44m"]).
|
||||
-define(ANSI_BG_MAGENTA, [?ANSI_ESC, "[45m"]).
|
||||
-define(ANSI_BG_CYAN, [?ANSI_ESC, "[46m"]).
|
||||
-define(ANSI_BG_WHITE, [?ANSI_ESC, "[47m"]).
|
||||
|
||||
|
||||
-define(ANSI_FG_BLACK(X), [?ANSI_FG_BLACK, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_RED(X), [?ANSI_FG_RED, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_GREEN(X), [?ANSI_FG_GREEN, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_YELLOW(X), [?ANSI_FG_YELLOW, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BLUE(X), [?ANSI_FG_BLUE, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_MAGENTA(X), [?ANSI_FG_MAGENTA, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_CYAN(X), [?ANSI_FG_CYAN, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_WHITE(X), [?ANSI_FG_WHITE, X, ?ANSI_FG_RESET]).
|
||||
|
||||
-define(ANSI_BG_BLACK(X), [?ANSI_BG_BLACK, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_RED(X), [?ANSI_BG_RED, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_GREEN(X), [?ANSI_BG_GREEN, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_YELLOW(X), [?ANSI_BG_YELLOW, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BLUE(X), [?ANSI_BG_BLUE, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_MAGENTA(X), [?ANSI_BG_MAGENTA, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_CYAN(X), [?ANSI_BG_CYAN, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_WHITE(X), [?ANSI_BG_WHITE, X, ?ANSI_BG_RESET]).
|
||||
|
||||
% bright colors
|
||||
-define(ANSI_FG_BBLACK, [?ANSI_ESC, "[90m"]).
|
||||
-define(ANSI_FG_BRED, [?ANSI_ESC, "[91m"]).
|
||||
-define(ANSI_FG_BGREEN, [?ANSI_ESC, "[92m"]).
|
||||
-define(ANSI_FG_BYELLOW, [?ANSI_ESC, "[93m"]).
|
||||
-define(ANSI_FG_BBLUE, [?ANSI_ESC, "[94m"]).
|
||||
-define(ANSI_FG_BMAGENTA, [?ANSI_ESC, "[95m"]).
|
||||
-define(ANSI_FG_BCYAN, [?ANSI_ESC, "[96m"]).
|
||||
-define(ANSI_FG_BWHITE, [?ANSI_ESC, "[97m"]).
|
||||
|
||||
-define(ANSI_BG_BBLACK, [?ANSI_ESC, "[100m"]).
|
||||
-define(ANSI_BG_BRED, [?ANSI_ESC, "[101m"]).
|
||||
-define(ANSI_BG_BGREEN, [?ANSI_ESC, "[102m"]).
|
||||
-define(ANSI_BG_BYELLOW, [?ANSI_ESC, "[103m"]).
|
||||
-define(ANSI_BG_BBLUE, [?ANSI_ESC, "[104m"]).
|
||||
-define(ANSI_BG_BMAGENTA, [?ANSI_ESC, "[105m"]).
|
||||
-define(ANSI_BG_BCYAN, [?ANSI_ESC, "[106m"]).
|
||||
-define(ANSI_BG_BWHITE, [?ANSI_ESC, "[107m"]).
|
||||
|
||||
-define(ANSI_FG_BBLACK(X), [?ANSI_FG_BBLACK, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BRED(X), [?ANSI_FG_BRED, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BGREEN(X), [?ANSI_FG_BGREEN, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BYELLOW(X), [?ANSI_FG_BYELLOW, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BBLUE(X), [?ANSI_FG_BBLUE, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BMAGENTA(X), [?ANSI_FG_BMAGENTA, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BCYAN(X), [?ANSI_FG_BCYAN, X, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_FG_BWHITE(X), [?ANSI_FG_BWHITE, X, ?ANSI_FG_RESET]).
|
||||
|
||||
-define(ANSI_BG_BBLACK(X), [?ANSI_BG_BBLACK, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BRED(X), [?ANSI_BG_BRED, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BGREEN(X), [?ANSI_BG_BGREEN, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BYELLOW(X), [?ANSI_BG_BYELLOW, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BBLUE(X), [?ANSI_BG_BBLUE, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BMAGENTA(X), [?ANSI_BG_BMAGENTA, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BCYAN(X), [?ANSI_BG_BCYAN, X, ?ANSI_BG_RESET]).
|
||||
-define(ANSI_BG_BWHITE(X), [?ANSI_BG_BWHITE, X, ?ANSI_BG_RESET]).
|
||||
|
||||
-define(ANSI_FG_RGB(R,G,B),
|
||||
[?ANSI_ESC,
|
||||
"[38;2;",
|
||||
integer_to_list(R),";",
|
||||
integer_to_list(G),";",
|
||||
integer_to_list(B),"m"]
|
||||
).
|
||||
-define(ANSI_BG_RGB(R,G,B),
|
||||
[?ANSI_ESC,
|
||||
"[48;2;",
|
||||
integer_to_list(R),";",
|
||||
integer_to_list(G),";",
|
||||
integer_to_list(B),"m"]
|
||||
).
|
||||
|
||||
-define(ANSI_FG_RGB(R,G,B,Chars), [?ANSI_FG_RGB(R,G,B), Chars, ?ANSI_FG_RESET]).
|
||||
-define(ANSI_BG_RGB(R,G,B,Chars), [?ANSI_BG_RGB(R,G,B), Chars, ?ANSI_BG_RESET]).
|
||||
|
||||
|
||||
% cursor controls
|
||||
-define(ANSI_CUR_HOME, [?ANSI_ESC, "[H"]).
|
||||
|
||||
-define(ANSI_CUR_XY(X, Y), [?ANSI_ESC, "[", integer_to_list(Y), ";", integer_to_list(X), "H"]).
|
||||
-define(ANSI_CUR_UP(N), [?ANSI_ESC, "[", integer_to_list(N), "A"]).
|
||||
-define(ANSI_CUR_DOWN(N), [?ANSI_ESC, "[", integer_to_list(N), "B"]).
|
||||
-define(ANSI_CUR_RIGHT(N), [?ANSI_ESC, "[", integer_to_list(N), "C"]).
|
||||
-define(ANSI_CUR_LEFT(N), [?ANSI_ESC, "[", integer_to_list(N), "D"]).
|
||||
-define(ANSI_CUR_SAVE, [?ANSI_ESC, "7"]).
|
||||
-define(ANSI_CUR_RESTORE, [?ANSI_ESC, "8"]).
|
||||
-define(ANSI_CUR_QUERY, [?ANSI_ESC, "[6n"]).
|
||||
|
||||
-define(ANSI_CUR_UP, ?ANSI_CUR_UP(1)).
|
||||
-define(ANSI_CUR_DOWN, ?ANSI_CUR_DOWN(1)).
|
||||
-define(ANSI_CUR_RIGHT, ?ANSI_CUR_RIGHT(1)).
|
||||
-define(ANSI_CUR_LEFT, ?ANSI_CUR_LEFT(1)).
|
||||
|
||||
|
||||
% relative movement "forward" +X=right, +Y=down
|
||||
-define(ANSI_CUR_VECT(X, Y),
|
||||
if X =< 0, Y =< 0 -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_UP(-1*Y)];
|
||||
X =< 0, 0 < Y -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_DOWN(Y)];
|
||||
0 < X, Y =< 0 -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_UP(-1*Y)];
|
||||
0 < X, 0 < Y -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_DOWN(Y)]
|
||||
end
|
||||
).
|
||||
|
||||
|
||||
|
||||
-define(ANSI_ALTBUF, [?ANSI_ESC, "[?1049h"]).
|
||||
-define(ANSI_UNALTBUF, [?ANSI_ESC, "[?1049l"]).
|
||||
|
||||
-define(ANSI_CUR_INVIS, [?ANSI_ESC, "[?25l"]).
|
||||
-define(ANSI_CUR_VIS, [?ANSI_ESC, "[?25h"]).
|
||||
|
||||
-define(ANSI_WRAP, [?ANSI_ESC, "[=7h"]).
|
||||
-define(ANSI_NOWRAP, [?ANSI_ESC, "[=7l"]).
|
||||
@@ -0,0 +1,256 @@
|
||||
In Congress, July 4, 1776
|
||||
|
||||
The unanimous Declaration of the thirteen united States of America,
|
||||
|
||||
When in the Course of human events, it becomes necessary for one
|
||||
people to dissolve the political bands which have connected them with
|
||||
another, and to assume among the powers of the earth, the separate
|
||||
and equal station to which the Laws of Nature and of Nature's God
|
||||
entitle them, a decent respect to the opinions of mankind requires
|
||||
that they should declare the causes which impel them to the
|
||||
separation.
|
||||
|
||||
We hold these truths to be self-evident, that all men are created
|
||||
equal, that they are endowed by their Creator with certain
|
||||
unalienable Rights, that among these are Life, Liberty and the
|
||||
pursuit of Happiness.--That to secure these rights, Governments are
|
||||
instituted among Men, deriving their just powers from the consent of
|
||||
the governed, --That whenever any Form of Government becomes
|
||||
destructive of these ends, it is the Right of the People to alter or
|
||||
to abolish it, and to institute new Government, laying its foundation
|
||||
on such principles and organizing its powers in such form, as to them
|
||||
shall seem most likely to effect their Safety and Happiness.
|
||||
Prudence, indeed, will dictate that Governments long established
|
||||
should not be changed for light and transient causes; and accordingly
|
||||
all experience hath shewn, that mankind are more disposed to suffer,
|
||||
while evils are sufferable, than to right themselves by abolishing
|
||||
the forms to which they are accustomed. But when a long train of
|
||||
abuses and usurpations, pursuing invariably the same Object evinces a
|
||||
design to reduce them under absolute Despotism, it is their right, it
|
||||
is their duty, to throw off such Government, and to provide new
|
||||
Guards for their future security.--Such has been the patient
|
||||
sufferance of these Colonies; and such is now the necessity which
|
||||
constrains them to alter their former Systems of Government. The
|
||||
history of the present King of Great Britain is a history of repeated
|
||||
injuries and usurpations, all having in direct object the
|
||||
establishment of an absolute Tyranny over these States. To prove
|
||||
this, let Facts be submitted to a candid world.
|
||||
|
||||
He has refused his Assent to Laws, the most wholesome and
|
||||
necessary for the public good.
|
||||
|
||||
He has forbidden his Governors to pass Laws of immediate and
|
||||
pressing importance, unless suspended in their operation till his
|
||||
Assent should be obtained; and when so suspended, he has utterly
|
||||
neglected to attend to them.
|
||||
|
||||
He has refused to pass other Laws for the accommodation of large
|
||||
districts of people, unless those people would relinquish the
|
||||
right of Representation in the Legislature, a right inestimable
|
||||
to them and formidable to tyrants only.
|
||||
|
||||
He has called together legislative bodies at places unusual,
|
||||
uncomfortable, and distant from the depository of their public
|
||||
Records, for the sole purpose of fatiguing them into compliance
|
||||
with his measures.
|
||||
|
||||
He has dissolved Representative Houses repeatedly, for opposing
|
||||
with manly firmness his invasions on the rights of the people.
|
||||
|
||||
He has refused for a long time, after such dissolutions, to cause
|
||||
others to be elected; whereby the Legislative powers, incapable
|
||||
of Annihilation, have returned to the People at large for their
|
||||
exercise; the State remaining in the mean time exposed to all the
|
||||
dangers of invasion from without, and convulsions within.
|
||||
|
||||
He has endeavoured to prevent the population of these States; for
|
||||
that purpose obstructing the Laws for Naturalization of
|
||||
Foreigners; refusing to pass others to encourage their migrations
|
||||
hither, and raising the conditions of new Appropriations of
|
||||
Lands.
|
||||
|
||||
He has obstructed the Administration of Justice, by refusing his
|
||||
Assent to Laws for establishing Judiciary powers.
|
||||
|
||||
He has made Judges dependent on his Will alone, for the tenure of
|
||||
their offices, and the amount and payment of their salaries.
|
||||
|
||||
He has erected a multitude of New Offices, and sent hither swarms
|
||||
of Officers to harrass our people, and eat out their substance.
|
||||
|
||||
He has kept among us, in times of peace, Standing Armies without
|
||||
the Consent of our legislatures.
|
||||
|
||||
He has affected to render the Military independent of and
|
||||
superior to the Civil power.
|
||||
|
||||
He has combined with others to subject us to a jurisdiction
|
||||
foreign to our constitution, and unacknowledged by our laws;
|
||||
giving his Assent to their Acts of pretended Legislation:
|
||||
|
||||
For Quartering large bodies of armed troops among us:
|
||||
|
||||
For protecting them, by a mock Trial, from punishment for any
|
||||
Murders which they should commit on the Inhabitants of these
|
||||
States:
|
||||
|
||||
For cutting off our Trade with all parts of the world:
|
||||
|
||||
For imposing Taxes on us without our Consent:
|
||||
|
||||
For depriving us in many cases, of the benefits of Trial by Jury:
|
||||
|
||||
For transporting us beyond Seas to be tried for pretended
|
||||
offences:
|
||||
|
||||
For abolishing the free System of English Laws in a neighbouring
|
||||
Province, establishing therein an Arbitrary government, and
|
||||
enlarging its Boundaries so as to render it at once an example
|
||||
and fit instrument for introducing the same absolute rule into
|
||||
these Colonies:
|
||||
|
||||
For taking away our Charters, abolishing our most valuable Laws,
|
||||
and altering fundamentally the Forms of our Governments:
|
||||
|
||||
For suspending our own Legislatures, and declaring themselves
|
||||
invested with power to legislate for us in all cases whatsoever.
|
||||
|
||||
He has abdicated Government here, by declaring us out of his
|
||||
Protection and waging War against us.
|
||||
|
||||
He has plundered our seas, ravaged our Coasts, burnt our towns,
|
||||
and destroyed the lives of our people.
|
||||
|
||||
He is at this time transporting large Armies of foreign
|
||||
Mercenaries to compleat the works of death, desolation and
|
||||
tyranny, already begun with circumstances of Cruelty & perfidy
|
||||
scarcely paralleled in the most barbarous ages, and totally
|
||||
unworthy the Head of a civilized nation.
|
||||
|
||||
He has constrained our fellow Citizens taken Captive on the high
|
||||
Seas to bear Arms against their Country, to become the
|
||||
executioners of their friends and Brethren, or to fall themselves
|
||||
by their Hands.
|
||||
|
||||
He has excited domestic insurrections amongst us, and has
|
||||
endeavoured to bring on the inhabitants of our frontiers, the
|
||||
merciless Indian Savages, whose known rule of warfare, is an
|
||||
undistinguished destruction of all ages, sexes and conditions.
|
||||
|
||||
In every stage of these Oppressions We have Petitioned for Redress in
|
||||
the most humble terms: Our repeated Petitions have been answered only
|
||||
by repeated injury. A Prince, whose character is thus marked by every
|
||||
act which may define a Tyrant, is unfit to be the ruler of a free
|
||||
people.
|
||||
|
||||
Nor have We been wanting in attentions to our Brittish brethren. We
|
||||
have warned them from time to time of attempts by their legislature
|
||||
to extend an unwarrantable jurisdiction over us. We have reminded
|
||||
them of the circumstances of our emigration and settlement here. We
|
||||
have appealed to their native justice and magnanimity, and we have
|
||||
conjured them by the ties of our common kindred to disavow these
|
||||
usurpations, which, would inevitably interrupt our connections and
|
||||
correspondence. They too have been deaf to the voice of justice and
|
||||
of consanguinity. We must, therefore, acquiesce in the necessity,
|
||||
which denounces our Separation, and hold them, as we hold the rest of
|
||||
mankind, Enemies in War, in Peace Friends.
|
||||
|
||||
We, therefore, the Representatives of the united States of America,
|
||||
in General Congress, Assembled, appealing to the Supreme Judge of the
|
||||
world for the rectitude of our intentions, do, in the Name, and by
|
||||
Authority of the good People of these Colonies, solemnly publish and
|
||||
declare, That these United Colonies are, and of Right ought to be
|
||||
Free and Independent States; that they are Absolved from all
|
||||
Allegiance to the British Crown, and that all political connection
|
||||
between them and the State of Great Britain, is and ought to be
|
||||
totally dissolved; and that as Free and Independent States, they have
|
||||
full Power to levy War, conclude Peace, contract Alliances, establish
|
||||
Commerce, and to do all other Acts and Things which Independent
|
||||
States may of right do. And for the support of this Declaration, with
|
||||
a firm reliance on the protection of divine Providence, we mutually
|
||||
pledge to each other our Lives, our Fortunes and our sacred Honor.
|
||||
|
||||
Georgia
|
||||
Button Gwinnett
|
||||
Lyman Hall
|
||||
George Walton
|
||||
North Carolina
|
||||
William Hooper
|
||||
Joseph Hewes
|
||||
John Penn
|
||||
|
||||
South Carolina
|
||||
Edward Rutledge
|
||||
Thomas Heyward, Jr.
|
||||
Thomas Lynch, Jr.
|
||||
Arthur Middleton
|
||||
|
||||
Massachusetts
|
||||
John Hancock
|
||||
|
||||
Maryland
|
||||
Samuel Chase
|
||||
William Paca
|
||||
Thomas Stone
|
||||
Charles Carroll of Carrollton
|
||||
|
||||
Virginia
|
||||
George Wythe
|
||||
Richard Henry Lee
|
||||
Thomas Jefferson
|
||||
Benjamin Harrison
|
||||
Thomas Nelson, Jr.
|
||||
Francis Lightfoot Lee
|
||||
Carter Braxton
|
||||
|
||||
Pennsylvania
|
||||
Robert Morris
|
||||
Benjamin Rush
|
||||
Benjamin Franklin
|
||||
John Morton
|
||||
George Clymer
|
||||
James Smith
|
||||
George Taylor
|
||||
James Wilson
|
||||
George Ross
|
||||
|
||||
Delaware
|
||||
Caesar Rodney
|
||||
George Read
|
||||
Thomas McKean
|
||||
|
||||
New York
|
||||
William Floyd
|
||||
Philip Livingston
|
||||
Francis Lewis
|
||||
Lewis Morris
|
||||
|
||||
New Jersey
|
||||
Richard Stockton
|
||||
John Witherspoon
|
||||
Francis Hopkinson
|
||||
John Hart
|
||||
Abraham Clark
|
||||
|
||||
New Hampshire
|
||||
Josiah Bartlett
|
||||
William Whipple
|
||||
|
||||
Massachusetts
|
||||
Samuel Adams
|
||||
John Adams
|
||||
Robert Treat Paine
|
||||
Elbridge Gerry
|
||||
|
||||
Rhode Island
|
||||
Stephen Hopkins
|
||||
William Ellery
|
||||
|
||||
Connecticut
|
||||
Roger Sherman
|
||||
Samuel Huntington
|
||||
William Williams
|
||||
Oliver Wolcott
|
||||
|
||||
New Hampshire
|
||||
Matthew Thornton
|
||||
@@ -0,0 +1,44 @@
|
||||
|
||||
|
||||
%%=====================================================
|
||||
%% ARG PARSING
|
||||
%%=====================================================
|
||||
|
||||
|
||||
%%-----------------------------------------------------
|
||||
%% TOKENIZING
|
||||
%%-----------------------------------------------------
|
||||
|
||||
-record{ctk,
|
||||
{shape = none :: none | '-' | '--' | str,
|
||||
val = none :: none | [char()] | string() | string(),
|
||||
str = none :: none | string()}).
|
||||
-type ctk() :: #ctk{}.
|
||||
|
||||
|
||||
|
||||
-spec tokenize(Args) -> CliTokens when
|
||||
Args :: [string()],
|
||||
CliTokens :: [ctk()].
|
||||
%% @private tokenize cli args
|
||||
|
||||
tokenize(Args) ->
|
||||
[tokenize_arg(S) || S <- Args].
|
||||
|
||||
tokenize_arg(Str = "--" ++ Val) ->
|
||||
#ctk{shape = '--',
|
||||
val = Val,
|
||||
str = Str};
|
||||
tokenize_arg(Str = "-" ++ Val) ->
|
||||
#ctk{shape = '-',
|
||||
val = Val,
|
||||
str = Str};
|
||||
tokenize_arg(Str) ->
|
||||
#ctk{shape = str,
|
||||
val = none,
|
||||
str = Str}.
|
||||
|
||||
|
||||
%%-----------------------------------------------------
|
||||
%% PARSING
|
||||
%%-----------------------------------------------------
|
||||
@@ -0,0 +1,77 @@
|
||||
|
||||
-spec s2t_file(Signal) -> AstFile when
|
||||
Signal :: [tk()],
|
||||
AstFile :: #ns{meta :: file, kids :: asf()}.
|
||||
|
||||
s2t_file([]) ->
|
||||
error(empty_file);
|
||||
s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) ->
|
||||
Blk0 = s2t_gulp_block(FileCol, S0),
|
||||
Blk1 = t2t_parse_tds_in_block(Blk0),
|
||||
#ns{meta = file, kids = [Blk1]}.
|
||||
|
||||
-spec s2t_gulp_block(BlkCol, Signal) -> Block when
|
||||
BlkCol :: pos_integer(),
|
||||
Signal :: [tk()],
|
||||
Block :: #ns{meta :: block}.
|
||||
|
||||
s2t_gulp_block(BCol, Tks) ->
|
||||
% sanity check
|
||||
InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end,
|
||||
true = lists:all(InBlock, Tks),
|
||||
BlockItems = s2f_block_items(BCol, Tks),
|
||||
#ns{meta = block, kids = BlockItems}.
|
||||
|
||||
-spec s2f_block_items(BCol, Signal) -> BlkItems when
|
||||
BCol :: pos_integer(),
|
||||
Signal :: [tk()],
|
||||
BlkItems :: [BlkItem],
|
||||
BlkItem :: #ns{meta :: block_item,
|
||||
kids :: asf()}.
|
||||
|
||||
s2f_block_items(BCol, Signal) ->
|
||||
s2f_block_items(BCol, [], Signal).
|
||||
|
||||
|
||||
s2f_block_items(_BCol, Stk, []) ->
|
||||
lists:reverse(Stk);
|
||||
s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) ->
|
||||
{slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0),
|
||||
s2f_block_items(BCol, [BlkItem | Stk], F1).
|
||||
|
||||
|
||||
s2t_slurp_block_item(BCol, T0, F0) ->
|
||||
{ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0),
|
||||
Item = #ns{meta = block_item, kids = ItemTokens},
|
||||
{slurp, Item, F1}.
|
||||
|
||||
% sw = splitwith; kind of take/drop
|
||||
s2s_sw_block_item(BCol, T0, F0) ->
|
||||
InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end,
|
||||
{F0_II, F1} = lists:splitwith(InItem, F0),
|
||||
{[T0 | F0_II], F1}.
|
||||
|
||||
-spec t2t_parse_tds_in_block(Block0) -> Block1 when
|
||||
Block0 :: ast(),
|
||||
Block1 :: ast().
|
||||
|
||||
% go through and convert the block_item nodes to top
|
||||
% decls
|
||||
t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) ->
|
||||
F1 = lists:map(fun t2t_parse_td_from_item/1, F0),
|
||||
B0#ns{kids = F1}.
|
||||
|
||||
|
||||
-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when
|
||||
BlockItem :: #ns{meta :: block_item},
|
||||
TopDecl :: #ns{meta :: td_meta()}.
|
||||
|
||||
t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) ->
|
||||
s2t_top_decl(Signal).
|
||||
|
||||
|
||||
-spec s2t_top_decl(Signal) -> TdTree when
|
||||
Signal :: [tk()],
|
||||
TdTree :: ast().
|
||||
|
||||
s2t_top_decl(S0) ->
|
||||
@@ -0,0 +1,292 @@
|
||||
# Syntax
|
||||
|
||||
## Lexical syntax
|
||||
|
||||
### Comments
|
||||
|
||||
Single line comments start with `//` and block comments are enclosed in `/*`
|
||||
and `*/` and can be nested.
|
||||
|
||||
### Keywords
|
||||
|
||||
```
|
||||
contract include let switch type record datatype if elif else function
|
||||
stateful payable true false mod public entrypoint private indexed namespace
|
||||
interface main using as for hiding
|
||||
```
|
||||
|
||||
### Tokens
|
||||
|
||||
- `Id = [a-z_][A-Za-z0-9_']*` identifiers start with a lower case letter.
|
||||
- `Con = [A-Z][A-Za-z0-9_']*` constructors start with an upper case letter.
|
||||
- `QId = (Con\.)+Id` qualified identifiers (e.g. `Map.member`)
|
||||
- `QCon = (Con\.)+Con` qualified constructor
|
||||
- `TVar = 'Id` type variable (e.g `'a`, `'b`)
|
||||
- `Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` integer literal with optional `_` separators
|
||||
- `Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` byte array literal with optional `_` separators
|
||||
- `String` string literal enclosed in `"` with escape character `\`
|
||||
- `Char` character literal enclosed in `'` with escape character `\`
|
||||
- `AccountAddress` base58-encoded 32 byte account pubkey with `ak_` prefix
|
||||
- `ContractAddress` base58-encoded 32 byte contract address with `ct_` prefix
|
||||
- `Signature` base58-encoded 64 byte cryptographic signature with `sg_` prefix
|
||||
|
||||
Valid string escape codes are
|
||||
|
||||
| Escape | ASCII | |
|
||||
|---------------|-------------|---|
|
||||
| `\b` | 8 | |
|
||||
| `\t` | 9 | |
|
||||
| `\n` | 10 | |
|
||||
| `\v` | 11 | |
|
||||
| `\f` | 12 | |
|
||||
| `\r` | 13 | |
|
||||
| `\e` | 27 | |
|
||||
| `\xHexDigits` | *HexDigits* | |
|
||||
|
||||
|
||||
See the [identifier encoding scheme](https://git.qpq.swiss/QPQ-AG/protocol/src/branch/master/node/api/api_encoding.md) for the
|
||||
details on the base58 literals.
|
||||
|
||||
## Layout blocks
|
||||
|
||||
Sophia uses Python-style layout rules to group declarations and statements. A
|
||||
layout block with more than one element must start on a separate line and be
|
||||
indented more than the currently enclosing layout block. Blocks with a single
|
||||
element can be written on the same line as the previous token.
|
||||
|
||||
Each element of the block must share the same indentation and no part of an
|
||||
element may be indented less than the indentation of the block. For instance
|
||||
|
||||
```sophia
|
||||
contract Layout =
|
||||
function foo() = 0 // no layout
|
||||
function bar() = // layout block starts on next line
|
||||
let x = foo() // indented more than 2 spaces
|
||||
x
|
||||
+ 1 // the '+' is indented more than the 'x'
|
||||
```
|
||||
|
||||
## Notation
|
||||
|
||||
In describing the syntax below, we use the following conventions:
|
||||
|
||||
- Upper-case identifiers denote non-terminals (like `Expr`) or terminals with
|
||||
some associated value (like `Id`).
|
||||
- Keywords and symbols are enclosed in single quotes: `'let'` or `'='`.
|
||||
- Choices are separated by vertical bars: `|`.
|
||||
- Optional elements are enclosed in `[` square brackets `]`.
|
||||
- `(` Parentheses `)` are used for grouping.
|
||||
- Zero or more repetitions are denoted by a postfix `*`, and one or more
|
||||
repetitions by a `+`.
|
||||
- `Block(X)` denotes a layout block of `X`s.
|
||||
- `Sep(X, S)` is short for `[X (S X)*]`, i.e. a possibly empty sequence of `X`s
|
||||
separated by `S`s.
|
||||
- `Sep1(X, S)` is short for `X (S X)*`, i.e. same as `Sep`, but must not be empty.
|
||||
|
||||
|
||||
## Declarations
|
||||
|
||||
A Sophia file consists of a sequence of *declarations* in a layout block.
|
||||
|
||||
```c
|
||||
File ::= Block(TopDecl)
|
||||
|
||||
TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
| 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||
| 'namespace' Con '=' Block(Decl)
|
||||
| '@compiler' PragmaOp Version
|
||||
| 'include' String
|
||||
| Using
|
||||
|
||||
Implement ::= ':' Sep1(Con, ',')
|
||||
|
||||
Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
| 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||
| 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||
| 'let' Id [':' Type] '=' Expr
|
||||
| (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
| Using
|
||||
|
||||
FunDecl ::= Id ':' Type // Type signature
|
||||
| Id Args [':' Type] '=' Block(Stmt) // Definition
|
||||
| Id Args [':' Type] Block(GuardedDef) // Guarded definitions
|
||||
|
||||
GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
|
||||
|
||||
Using ::= 'using' Con ['as' Con] [UsingParts]
|
||||
UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
|
||||
| 'hiding' '[' Sep1(Id, ',') ']'
|
||||
|
||||
PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
|
||||
Version ::= Sep1(Int, '.')
|
||||
|
||||
EModifier ::= 'payable' | 'stateful'
|
||||
FModifier ::= 'stateful' | 'private'
|
||||
|
||||
Args ::= '(' Sep(Pattern, ',') ')'
|
||||
```
|
||||
|
||||
Contract declarations must appear at the top-level.
|
||||
|
||||
For example,
|
||||
```sophia
|
||||
contract Test =
|
||||
type t = int
|
||||
entrypoint add (x : t, y : t) = x + y
|
||||
```
|
||||
|
||||
There are three forms of type declarations: type aliases (declared with the
|
||||
`type` keyword), record type definitions (`record`) and data type definitions
|
||||
(`datatype`):
|
||||
|
||||
```c
|
||||
TypeAlias ::= Type
|
||||
RecordType ::= '{' Sep(FieldType, ',') '}'
|
||||
DataType ::= Sep1(ConDecl, '|')
|
||||
|
||||
FieldType ::= Id ':' Type
|
||||
ConDecl ::= Con ['(' Sep1(Type, ',') ')']
|
||||
```
|
||||
|
||||
For example,
|
||||
```sophia
|
||||
record point('a) = {x : 'a, y : 'a}
|
||||
datatype shape('a) = Circle(point('a), 'a) | Rect(point('a), point('a))
|
||||
type int_shape = shape(int)
|
||||
```
|
||||
|
||||
## Types
|
||||
|
||||
```c
|
||||
Type ::= Domain '=>' Type // Function type
|
||||
| Type '(' Sep(Type, ',') ')' // Type application
|
||||
| '(' Type ')' // Parens
|
||||
| 'unit' | Sep(Type, '*') // Tuples
|
||||
| Id | QId | TVar
|
||||
|
||||
Domain ::= Type // Single argument
|
||||
| '(' Sep(Type, ',') ')' // Multiple arguments
|
||||
```
|
||||
|
||||
The function type arrow associates to the right.
|
||||
|
||||
Example,
|
||||
```sophia
|
||||
'a => list('a) => (int * list('a))
|
||||
```
|
||||
|
||||
## Statements
|
||||
|
||||
Function bodies are blocks of *statements*, where a statement is one of the following
|
||||
|
||||
```c
|
||||
Stmt ::= 'switch' '(' Expr ')' Block(Case)
|
||||
| 'if' '(' Expr ')' Block(Stmt)
|
||||
| 'elif' '(' Expr ')' Block(Stmt)
|
||||
| 'else' Block(Stmt)
|
||||
| 'let' LetDef
|
||||
| Using
|
||||
| Expr
|
||||
|
||||
LetDef ::= Id Args [':' Type] '=' Block(Stmt) // Function definition
|
||||
| Pattern '=' Block(Stmt) // Value definition
|
||||
|
||||
Case ::= Pattern '=>' Block(Stmt)
|
||||
| Pattern Block(GuardedCase)
|
||||
|
||||
GuardedCase ::= '|' Sep1(Expr, ',') '=>' Block(Stmt)
|
||||
|
||||
Pattern ::= Expr
|
||||
```
|
||||
|
||||
`if` statements can be followed by zero or more `elif` statements and an optional final `else` statement. For example,
|
||||
|
||||
```sophia
|
||||
let x : int = 4
|
||||
switch(f(x))
|
||||
None => 0
|
||||
Some(y) =>
|
||||
if(y > 10)
|
||||
"too big"
|
||||
elif(y < 3)
|
||||
"too small"
|
||||
else
|
||||
"just right"
|
||||
```
|
||||
|
||||
## Expressions
|
||||
|
||||
```c
|
||||
Expr ::= '(' LamArgs ')' '=>' Block(Stmt) // Anonymous function (x) => x + 1
|
||||
| '(' BinOp ')' // Operator lambda (+)
|
||||
| 'if' '(' Expr ')' Expr 'else' Expr // If expression if(x < y) y else x
|
||||
| Expr ':' Type // Type annotation 5 : int
|
||||
| Expr BinOp Expr // Binary operator x + y
|
||||
| UnOp Expr // Unary operator ! b
|
||||
| Expr '(' Sep(Expr, ',') ')' // Application f(x, y)
|
||||
| Expr '.' Id // Projection state.x
|
||||
| Expr '[' Expr ']' // Map lookup map[key]
|
||||
| Expr '{' Sep(FieldUpdate, ',') '}' // Record or map update r{ fld[key].x = y }
|
||||
| '[' Sep(Expr, ',') ']' // List [1, 2, 3]
|
||||
| '[' Expr '|' Sep(Generator, ',') ']'
|
||||
// List comprehension [k | x <- [1], if (f(x)), let k = x+1]
|
||||
| '[' Expr '..' Expr ']' // List range [1..n]
|
||||
| '{' Sep(FieldUpdate, ',') '}' // Record or map value {x = 0, y = 1}, {[key] = val}
|
||||
| '(' Expr ')' // Parens (1 + 2) * 3
|
||||
| '(' Expr '=' Expr ')' // Assign pattern (y = x::_)
|
||||
| Id | Con | QId | QCon // Identifiers x, None, Map.member, AELib.Token
|
||||
| Int | Bytes | String | Char // Literals 123, 0xff, #00abc123, "foo", '%'
|
||||
| AccountAddress | ContractAddress // Chain identifiers
|
||||
| Signature // Signature
|
||||
| '???' // Hole expression 1 + ???
|
||||
|
||||
Generator ::= Pattern '<-' Expr // Generator
|
||||
| 'if' '(' Expr ')' // Guard
|
||||
| LetDef // Definition
|
||||
|
||||
LamArgs ::= '(' Sep(LamArg, ',') ')'
|
||||
LamArg ::= Id [':' Type]
|
||||
|
||||
FieldUpdate ::= Path '=' Expr
|
||||
Path ::= Id // Record field
|
||||
| '[' Expr ']' // Map key
|
||||
| Path '.' Id // Nested record field
|
||||
| Path '[' Expr ']' // Nested map key
|
||||
|
||||
BinOp ::= '||' | '&&' | '<' | '>' | '=<' | '>=' | '==' | '!='
|
||||
| '::' | '++' | '+' | '-' | '*' | '/' | 'mod' | '^'
|
||||
| 'band' | 'bor' | 'bxor' | '<<' | '>>' | '|>'
|
||||
UnOp ::= '-' | '!' | 'bnot'
|
||||
```
|
||||
|
||||
## Operators types
|
||||
|
||||
| Operators | Type
|
||||
| --- | ---
|
||||
| `-` `+` `*` `/` `mod` `^` | arithmetic operators
|
||||
| `!` `&&` `\|\|` | logical operators
|
||||
| `band` `bor` `bxor` `bnot` `<<` `>>` | bitwise operators
|
||||
| `==` `!=` `<` `>` `=<` `>=` | comparison operators
|
||||
| `::` `++` | list operators
|
||||
| `\|>` | functional operators
|
||||
|
||||
## Operator precedence
|
||||
|
||||
In order of highest to lowest precedence.
|
||||
|
||||
| Operators | Associativity
|
||||
| --- | ---
|
||||
| `!` `bnot`| right
|
||||
| `^` | left
|
||||
| `*` `/` `mod` | left
|
||||
| `-` (unary) | right
|
||||
| `+` `-` | left
|
||||
| `<<` `>>` | left
|
||||
| `::` `++` | right
|
||||
| `<` `>` `=<` `>=` `==` `!=` | none
|
||||
| `band` | left
|
||||
| `bxor` | left
|
||||
| `bor` | left
|
||||
| `&&` | right
|
||||
| `\|\|` | right
|
||||
| `\|>` | left
|
||||
@@ -0,0 +1,67 @@
|
||||
-spec mktree(Signal) -> Tree when
|
||||
Signal :: gsc:signal(),
|
||||
Tree :: gsc_ntree:ntree().
|
||||
|
||||
% @doc make into a tree
|
||||
mktree(Sig) ->
|
||||
Tree0 = gsc_ntree:nstem(vtokens, Sig),
|
||||
Tree1 = rerootl_tkstr("=>", Tree0),
|
||||
Tree2 = rerootl_tkstr("*", Tree1),
|
||||
Tree2.
|
||||
|
||||
|
||||
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
|
||||
Kids0 = gsc_ntree:deleaf0(Tree0),
|
||||
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
|
||||
case lists:splitwith(IsntS, Kids0) of
|
||||
% found
|
||||
% input:
|
||||
% *s Root0
|
||||
% |
|
||||
% +-- .l Foo
|
||||
% +-- .l "=>"
|
||||
% +-- .l Bar
|
||||
% output:
|
||||
% *s "=>"
|
||||
% |
|
||||
% +-- *s Root0 -- .l Foo
|
||||
% +-- *s Root0 -- .l Bar
|
||||
{LHS1, [Tk0 | RHS1]} ->
|
||||
Root1 = Root0,
|
||||
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
|
||||
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
|
||||
NewRoot0 = {op, Tk0},
|
||||
NewKids0 = [LTree1, RTree1],
|
||||
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
|
||||
NewTree;
|
||||
% not found, nothing to do
|
||||
{Kids0, []} ->
|
||||
Tree0
|
||||
end.
|
||||
|
||||
|
||||
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
|
||||
% Kids0 = gsc_ntree:deleaf0(Tree0),
|
||||
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
|
||||
% case lists:splitwith(IsntMapsto, Kids0) of
|
||||
% % found
|
||||
% {LHS1, [Tk0 | RHS1]} ->
|
||||
% Root1 = Root0,
|
||||
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
|
||||
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
|
||||
% NewRoot0 = {op, Tk0},
|
||||
% NewKids0 = [LTree1, RTree1],
|
||||
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
|
||||
% NewTree;
|
||||
% % nothing to do
|
||||
% {Kids0, []} ->
|
||||
% Tree0
|
||||
% end.
|
||||
|
||||
|
||||
|
||||
isnt_str(X, Y) ->
|
||||
not is_str(X, Y).
|
||||
|
||||
is_str(S, #tk{str = S}) -> true;
|
||||
is_str(_, _) -> false.
|
||||
+259
-8
@@ -1,11 +1,5 @@
|
||||
%%% @doc
|
||||
%%% GSC CLI: gsc_cli
|
||||
%%%
|
||||
%%% This module is currently named `gsc_cli', but you may want to change that.
|
||||
%%% Remember that changing the name in `-module()' below requires renaming
|
||||
%%% this file, and it is recommended to run `zx update .app` in the main
|
||||
%%% project directory to make sure the ebin/gsc_cli.app file stays in
|
||||
%%% sync with the project whenever you add, remove or rename a module.
|
||||
%%% GSC CLI: explorer/harness for sfc iteration
|
||||
%%% @end
|
||||
|
||||
-module(gsc_cli).
|
||||
@@ -14,12 +8,269 @@
|
||||
-copyright("Peter Harpending <peterharpending@qpq.swiss>").
|
||||
-license("GPL-3.0-only").
|
||||
|
||||
-export([
|
||||
tokens/1,
|
||||
so_tokens/1,
|
||||
gso_tokens/1
|
||||
]).
|
||||
-export([start/1]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
-include("ansi.hrl").
|
||||
|
||||
do_help() ->
|
||||
io:format("~ts", [help_screen()]).
|
||||
|
||||
help_screen() ->
|
||||
["you can't help people who refuse to help themsleves\n"].
|
||||
|
||||
-spec start(ArgV) -> ok
|
||||
when ArgV :: [string()].
|
||||
|
||||
start([]) ->
|
||||
do_eshell(),
|
||||
ok;
|
||||
start(["shell"]) ->
|
||||
do_eshell(),
|
||||
ok;
|
||||
start(["eshell"]) ->
|
||||
do_eshell(),
|
||||
ok;
|
||||
start(ArgV) ->
|
||||
ok = io:format("Hello, World! Args: ~tp~n", [ArgV]),
|
||||
%io:format("ArgV: ~p~n", [ArgV]),
|
||||
do(ArgV),
|
||||
zx:silent_stop().
|
||||
|
||||
do(["list"]) ->
|
||||
do_tlist();
|
||||
do(["list", "tests"]) ->
|
||||
do_tlist();
|
||||
do(["test"]) ->
|
||||
do_tests();
|
||||
do(["test" | Tests]) ->
|
||||
do_tests(Tests);
|
||||
do(["tests"]) ->
|
||||
do_tests();
|
||||
do(["run", "tests"]) ->
|
||||
do_tests();
|
||||
do(["tokenizers_agree", Foo]) ->
|
||||
io:format("~p~n", [tokenizers_agree(Foo)]);
|
||||
% slowly phasing out shitty names like lctokens
|
||||
% tokens = native sfc token representation
|
||||
do(["tokens", Foo]) -> do_tokens(Foo);
|
||||
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
|
||||
do(["ctokens", Foo]) -> do_color_tokens(Foo);
|
||||
do(["colour_tokens" | _]) -> do_doi();
|
||||
% so_tokens = so_scan tokens
|
||||
do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
|
||||
do(["so_tokens", Foo]) -> do_so_tokens(Foo);
|
||||
% gso_tokens = our mockery
|
||||
do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo);
|
||||
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
|
||||
% print source file to screen with token boundaries highlighted
|
||||
% script utility
|
||||
do(["rmm", Foo]) ->
|
||||
do_rmm(Foo);
|
||||
do(Args) ->
|
||||
io:format("bad args: ~p~n", [Args]),
|
||||
do_help().
|
||||
|
||||
do_doi() ->
|
||||
FP = zx:get_home() ++ "/priv/doi.txt",
|
||||
page_file(FP).
|
||||
|
||||
% thank you chatgpt
|
||||
% os:cmd didnt do nuffin because that's for running
|
||||
% stuff in the background and capturing the output, not
|
||||
% for taking over the screen
|
||||
page_file(FilePath) ->
|
||||
Less = os:find_executable("less"),
|
||||
case Less of
|
||||
false -> cat_file(FilePath);
|
||||
_ -> less_file(Less, FilePath)
|
||||
end.
|
||||
|
||||
cat_file(FilePath) ->
|
||||
{ok, Bytes} = file:read_file(FilePath),
|
||||
io:format("~ts", [Bytes]).
|
||||
|
||||
less_file(Less, FilePath) ->
|
||||
Port = open_port({spawn_executable, Less},
|
||||
[{args, [FilePath]},
|
||||
nouse_stdio, exit_status]),
|
||||
receive
|
||||
{Port, {exit_status, 0}} ->
|
||||
ok;
|
||||
{Port, {exit_status, N}} ->
|
||||
error({less_exit_status, N});
|
||||
{'EXIT', Port, Reason} ->
|
||||
error(Reason)
|
||||
end.
|
||||
|
||||
do_tests() ->
|
||||
io:format("TestModules = ~p~n", [test_mods()]),
|
||||
do_runall_tests().
|
||||
|
||||
do_runall_tests() ->
|
||||
lists:foreach(fun run_mod_main/1, test_mods()).
|
||||
|
||||
|
||||
do_tests(List) ->
|
||||
lists:foreach(fun run_test/1, List).
|
||||
|
||||
% n
|
||||
run_test(TestName) ->
|
||||
% we have two candidate atoms
|
||||
C1 = list_to_atom(TestName),
|
||||
C2 = list_to_atom("gsc_test_" ++ TestName),
|
||||
KnownMods = test_mods(),
|
||||
IsC1 = lists:member(C1, KnownMods),
|
||||
IsC2 = lists:member(C2, KnownMods),
|
||||
if
|
||||
IsC1 -> rmm(C1);
|
||||
IsC2 -> rmm(C2);
|
||||
true -> error({no_such_test, TestName})
|
||||
end.
|
||||
|
||||
|
||||
rmm(X) -> run_mod_main(X).
|
||||
|
||||
% KnownTests = test_mods(),
|
||||
% TestMods = ensure_all_known([], List, KnownTests),
|
||||
% lists:foreach(fun run_mod_main/1, TestMods).
|
||||
|
||||
|
||||
%ensure_all_known(Acc, [], _) ->
|
||||
% lists:sort(Acc);
|
||||
%ensure_all_known(Acc, [T | Ts], Knowns) ->
|
||||
% case lists:member(T, Knowns) of
|
||||
%
|
||||
% end.
|
||||
|
||||
|
||||
test_mods() ->
|
||||
known_modules_with_prefix("gsc_test").
|
||||
|
||||
known_modules_with_prefix(Pfx) ->
|
||||
ModsZipBeamsZipLoaded = code:all_available(),
|
||||
kmp(Pfx, ModsZipBeamsZipLoaded, []).
|
||||
|
||||
kmp(_Pfx, [], Acc) ->
|
||||
lists:sort(Acc);
|
||||
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
|
||||
case lists:prefix(Pfx, ModStr) of
|
||||
false -> kmp(Pfx, Rest, Acc);
|
||||
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
|
||||
end.
|
||||
|
||||
run_mod_main(Mod) ->
|
||||
io:format("========================================\n"
|
||||
"~p:main()\n"
|
||||
"========================================\n",
|
||||
[Mod]),
|
||||
try
|
||||
Mod:main()
|
||||
catch
|
||||
Err:ErrType:Trace ->
|
||||
io:format("~p: ~p~n", [Err, ErrType]),
|
||||
io:format("Trace:~n~p~n", [Trace])
|
||||
end.
|
||||
|
||||
do_tlist() ->
|
||||
lists:foreach(
|
||||
fun(ModName) ->
|
||||
io:format("~s~n", [ModName])
|
||||
end,
|
||||
test_mods()
|
||||
).
|
||||
|
||||
|
||||
-spec do_eshell() -> ok.
|
||||
% @doc start an erlang shell
|
||||
|
||||
do_eshell() ->
|
||||
io:format("Welcome to the GSC shell!~n", []),
|
||||
case shell:start_interactive() of
|
||||
ok -> ok;
|
||||
{error, already_started} -> ok;
|
||||
{error, Reason} -> error(Reason)
|
||||
end.
|
||||
|
||||
tokenizers_agree(File) ->
|
||||
gso_tokens(File) =:= so_tokens(File).
|
||||
|
||||
|
||||
do_tokens(FilePath) ->
|
||||
[io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)].
|
||||
|
||||
do_so_tokens(FilePath) ->
|
||||
[io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)].
|
||||
|
||||
do_gso_tokens(FilePath) ->
|
||||
[io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)].
|
||||
|
||||
|
||||
% rmm = run module:main() with our context loaded
|
||||
% useful for prototyping
|
||||
do_rmm(FilePath) ->
|
||||
case compile:file(FilePath) of
|
||||
{ok, Mod} -> Mod:main();
|
||||
Error -> error(Error)
|
||||
end.
|
||||
|
||||
|
||||
so_tokens(FilePath) ->
|
||||
{ok, FileBytes} = file:read_file(FilePath),
|
||||
FileStr = unicode:characters_to_nfc_list(FileBytes),
|
||||
{ok, Tokens} = so_scan:scan(FileStr),
|
||||
Tokens.
|
||||
|
||||
gso_tokens(FilePath) ->
|
||||
{ok, FileBytes} = file:read_file(FilePath),
|
||||
FileStr = unicode:characters_to_nfc_list(FileBytes),
|
||||
{ok, Tokens} = gso_scan:scan(FileStr),
|
||||
Tokens.
|
||||
|
||||
|
||||
tokens(FilePath) ->
|
||||
{ok, Tokens} = gsc:tokens_from_file(FilePath),
|
||||
Tokens.
|
||||
|
||||
|
||||
do_color_tokens(File) ->
|
||||
case gsc:tokens_from_file(File) of
|
||||
{ok, Tokens} ->
|
||||
ColorizedSrcStr = colorize_tokens(chunk_color_wheel(), Tokens, ""),
|
||||
Full = [?ANSI_INVERT, ColorizedSrcStr, ?ANSI_UNINVERT],
|
||||
io:format("~s", [Full]);
|
||||
Error ->
|
||||
io:format("~p~n", [Error])
|
||||
end.
|
||||
|
||||
chunk_color_wheel() ->
|
||||
%[yellow, blue].
|
||||
[red, green, yellow, blue, magenta, cyan].
|
||||
|
||||
|
||||
|
||||
colorize_tokens(Wheel, [T | Ts], Acc) ->
|
||||
{Color, NewWheel} = rotate(Wheel),
|
||||
NewAcc = [Acc, colorize_token_str(Color, T)],
|
||||
colorize_tokens(NewWheel, Ts, NewAcc);
|
||||
colorize_tokens(_, [], Acc) ->
|
||||
Acc.
|
||||
|
||||
rotate([A | Rest]) ->
|
||||
{A, Rest ++ [A]}.
|
||||
|
||||
colorize_token_str(Color, #tk{str = Str}) ->
|
||||
{Pfx, Sfx} = color_fixes(Color),
|
||||
[Pfx, Str, Sfx].
|
||||
|
||||
color_fixes(red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
|
||||
color_fixes(green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
|
||||
color_fixes(yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
|
||||
color_fixes(blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
|
||||
color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
|
||||
color_fixes(cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
|
||||
|
||||
|
||||
@@ -0,0 +1,273 @@
|
||||
% @doc experiment centering around the file syntax node using ntree approach
|
||||
-module(gsc_test_file).
|
||||
|
||||
-export([
|
||||
main/0
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
-record(ct,
|
||||
{payable = none :: none | false | {true, tk()},
|
||||
main = none :: none | false | {true, tk()},
|
||||
contract = none :: none | tk(),
|
||||
con = none :: none | tk(),
|
||||
impls = none :: none | [tk()],
|
||||
eq = none :: none | tk()}).
|
||||
|
||||
-type meta() :: #ct{}.
|
||||
|
||||
-record(decl_type,
|
||||
{type = none :: none | tk(),
|
||||
id = none :: none | tk(),
|
||||
params = none :: none | [tk()],
|
||||
eq = none :: none | tk()}).
|
||||
|
||||
-type decl_meta() :: #decl_type{}.
|
||||
|
||||
|
||||
-type ast_meta() :: file
|
||||
| meta()
|
||||
| decl_meta()
|
||||
| nyi
|
||||
| {nyi, any()}
|
||||
.
|
||||
|
||||
|
||||
-type target()
|
||||
:: ct
|
||||
| iface
|
||||
| ns
|
||||
| pragma
|
||||
| include
|
||||
| using
|
||||
.
|
||||
|
||||
-type s2t_target()
|
||||
:: file
|
||||
| top_decl
|
||||
| target()
|
||||
| nyi
|
||||
| {nyi, any()}
|
||||
.
|
||||
|
||||
-type s2f_target()
|
||||
:: {block_of, s2t_target()}
|
||||
.
|
||||
|
||||
|
||||
-type ast() :: ntree(ast_meta(), tk()).
|
||||
-type asf() :: nforest(ast_meta(), tk()).
|
||||
|
||||
main() ->
|
||||
HelloN = "hello.aes",
|
||||
HelloP = ts_utils:ct_file_abspath(HelloN),
|
||||
{ok, HelloS} = file:read_file(HelloP),
|
||||
S0 = gsc:unsafe_signal_from_file(HelloP),
|
||||
T1 = s2t(file, S0),
|
||||
io:format("hello.aes:~n", []),
|
||||
io:format("```~n", []),
|
||||
io:format("~ts", [HelloS]),
|
||||
io:format("```~n~n", []),
|
||||
io:format("AST: ~tp~n", [T1]),
|
||||
ok.
|
||||
|
||||
% // Hello World Contract
|
||||
% // Copyright (c) 2025 QPQ AG
|
||||
%
|
||||
% contract Hello =
|
||||
% type state = unit
|
||||
% entrypoint init(): state =
|
||||
% ()
|
||||
%
|
||||
% entrypoint hello(): string =
|
||||
% "hello, world"
|
||||
|
||||
-spec s2t(ParseTarget, Signal) -> AST when
|
||||
ParseTarget :: file,
|
||||
Signal :: [tk()],
|
||||
AST :: ast().
|
||||
|
||||
% File ::= Block(TopDecl)
|
||||
s2t(file, Signal) ->
|
||||
case Signal of
|
||||
[] -> error(empty_file);
|
||||
_ -> {ns, file, s2f({block_of, top_decl}, Signal)}
|
||||
end;
|
||||
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl)
|
||||
% | 'namespace' Con '=' Block(Decl)
|
||||
% | '@compiler' PragmaOp Version
|
||||
% | 'include' String
|
||||
% | Using
|
||||
s2t(top_decl, Signal) ->
|
||||
NewTarget =
|
||||
case gsc_tokens:strings(3, Signal) of
|
||||
["payable", "contract", "interface"] -> iface;
|
||||
["contract", "interface" | _] -> iface;
|
||||
["payable", "main", "contract"] -> ct;
|
||||
["payable", "contract" | _] -> ct;
|
||||
["contract" | _] -> ct;
|
||||
["namespace" | _] -> namespace;
|
||||
["@compiler" | _] -> pragma;
|
||||
["include" | _] -> include;
|
||||
["using" | _] -> using
|
||||
end,
|
||||
s2t(NewTarget, Signal);
|
||||
% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
|
||||
s2t(ct, S0) ->
|
||||
{slurp, CtMeta, S1} = s2s_slurp_meta(#ct{}, S0),
|
||||
{ns, CtMeta, s2f({block_of, decl}, S1)};
|
||||
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
% | 'record' Id ['(' TVar* ')'] '=' RecordType
|
||||
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
|
||||
% | 'let' Id [':' Type] '=' Expr
|
||||
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
|
||||
% | Using
|
||||
s2t(decl, S0) ->
|
||||
NewTarget =
|
||||
case gsc_tokens:strings(3, S0) of
|
||||
["type" | _] -> decl_type;
|
||||
["record" | _] -> decl_record;
|
||||
["datatype" | _] -> decl_datatype;
|
||||
["let" | _] -> decl_let;
|
||||
Pfx3 ->
|
||||
IsEp = lists:member("entrypoint", Pfx3),
|
||||
IsFn = lists:member("function", Pfx3),
|
||||
if
|
||||
IsEp -> decl_entrypoint;
|
||||
IsFn -> decl_function;
|
||||
true -> error({bad_decl, S0})
|
||||
end
|
||||
end,
|
||||
s2t(NewTarget, S0);
|
||||
% 'type' Id ['(' TVar* ')'] '=' TypeAlias
|
||||
s2t(decl_type, S0) ->
|
||||
{slurp, Meta, S1} = s2s_slurp_meta(#decl_type{}, S0),
|
||||
{ns, Meta, s2t(type, S1)};
|
||||
s2t(nyi, Signal) ->
|
||||
{ns, nyi, Signal};
|
||||
s2t(NYI = {nyi, _}, Signal) ->
|
||||
{ns, NYI, Signal};
|
||||
s2t(NYI, Signal) ->
|
||||
{ns, {nyi, NYI}, Signal}.
|
||||
|
||||
|
||||
|
||||
-spec s2f(ForestTarget, Signal) -> Forest when
|
||||
ForestTarget :: s2f_target(),
|
||||
Signal :: [tk()],
|
||||
Forest :: asf().
|
||||
|
||||
s2f({block_of, TreeTarget}, S0) ->
|
||||
{gulp, Items} = gsc_signal:gulp_block_items(S0),
|
||||
[s2t(TreeTarget, I) || I <- Items].
|
||||
|
||||
|
||||
-spec s2s_slurp_meta(InitMeta, Signal) -> Result when
|
||||
InitMeta :: Meta,
|
||||
Signal :: [tk()],
|
||||
Result :: {slurp, Meta, NewSignal},
|
||||
Meta :: ast_meta(),
|
||||
NewSignal :: Signal.
|
||||
|
||||
s2s_slurp_meta(M = #ct{}, S) ->
|
||||
s2s_sm_ct(M, S);
|
||||
s2s_slurp_meta(M = #decl_type{}, S) ->
|
||||
s2s_sm_decl_type(M, S);
|
||||
s2s_slurp_meta(M, S) ->
|
||||
error({s2s_slurp_meta, M, S}).
|
||||
|
||||
|
||||
s2s_sm_ct(Ct = #ct{payable = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "payable"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{payable = {true, T0}}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{payable = false}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{main = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "main"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{main = {true, T0}}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{main = false}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{contract = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "contract"} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{contract = T0}, S1);
|
||||
_ ->
|
||||
error({no_kwd_contract, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{con = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{shape = con} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{con = T0}, S1);
|
||||
_ ->
|
||||
error({no_contract_name, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{impls = none}, S0) ->
|
||||
case gsc_tokens:strings(1, S0) of
|
||||
[":"] ->
|
||||
{slurp, Impls, S1} = s2f_slurp_impls(S0),
|
||||
s2s_sm_ct(Ct#ct{impls = Impls}, S1);
|
||||
_ ->
|
||||
s2s_sm_ct(Ct#ct{impls = []}, S0)
|
||||
end;
|
||||
s2s_sm_ct(Ct = #ct{eq = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "="} = T0 | S1] ->
|
||||
s2s_sm_ct(Ct#ct{eq = T0}, S1);
|
||||
_ ->
|
||||
error({no_equal_sign, Ct, S0})
|
||||
end;
|
||||
s2s_sm_ct(Ct, S0) ->
|
||||
{slurp, Ct, S0}.
|
||||
|
||||
s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) ->
|
||||
s2f_slurp_impls([I0], S0).
|
||||
|
||||
s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) ->
|
||||
s2f_slurp_impls([I0 | Stk], S0);
|
||||
s2f_slurp_impls(Stk, S0) ->
|
||||
{slurp, lists:reverse(Stk), S0}.
|
||||
|
||||
|
||||
%-record(decl_type,
|
||||
% {type = none :: none | tk(),
|
||||
% id = none :: none | tk(),
|
||||
% params = none :: none | [tk()],
|
||||
% eq = none :: none | tk()}).
|
||||
|
||||
s2s_sm_decl_type(M = #decl_type{type = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "type"} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{type = T0}, S1);
|
||||
_ ->
|
||||
error({no_kwd_type, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{id = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{shape = id} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{id = T0}, S1);
|
||||
_ ->
|
||||
error({no_type_id, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{params = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "("} = T0 | _] ->
|
||||
error({fixme, parens_bad});
|
||||
_ ->
|
||||
s2s_sm_decl_type(M#decl_type{params = []}, S0)
|
||||
end;
|
||||
s2s_sm_decl_type(M = #decl_type{eq = none}, S0) ->
|
||||
case S0 of
|
||||
[#tk{str = "="} = T0 | S1] ->
|
||||
s2s_sm_decl_type(M#decl_type{eq = T0}, S1);
|
||||
_ ->
|
||||
error({no_equal_sign, S0})
|
||||
end;
|
||||
s2s_sm_decl_type(M, S0) ->
|
||||
{slurp, M, S0}.
|
||||
@@ -0,0 +1,144 @@
|
||||
-module(gsc_test_ntree).
|
||||
|
||||
-export([
|
||||
main/0
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
% just parsing type expressions right now, so only need
|
||||
% to worry about round parens
|
||||
%
|
||||
% none is to indicate general-purpose grouping, for
|
||||
% e.g. LHS/RHS of an op
|
||||
-type syntax_meta()
|
||||
:: {op, tk()}
|
||||
| op_arg
|
||||
| {parens, Open :: tk(), Close :: tk()}
|
||||
.
|
||||
|
||||
-type ast() :: ntree(syntax_meta(), tk()).
|
||||
-type asf() :: nforest(syntax_meta(), tk()).
|
||||
-type asts() :: asf().
|
||||
|
||||
|
||||
main() ->
|
||||
x00(),
|
||||
ok.
|
||||
|
||||
% x00 = example00
|
||||
x00() ->
|
||||
io:format("Example 00:~n", []),
|
||||
io:format(" SrcStr = ~p~n", [x00_src()]),
|
||||
io:format(" Tokens = ~p~n", [x00_tks()]),
|
||||
io:format(" Signal = ~p~n", [x00_sgl()]),
|
||||
io:format(" Forest = ~p~n", [x00_fst()]),
|
||||
ok.
|
||||
|
||||
% sample type expr, tokens, signal
|
||||
x00_src() -> "(foo => (bar) * baz)".
|
||||
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
|
||||
x00_sgl() -> gsc:filter_signal(x00_tks()).
|
||||
x00_fst() -> parse(x00_sgl()).
|
||||
|
||||
|
||||
-spec parse(Signal) -> ASF when
|
||||
Signal :: [tk()],
|
||||
ASF :: asf().
|
||||
|
||||
parse(Signal) ->
|
||||
% key insight here is our signal is already a
|
||||
% forest, assuming the leaf type is `tk()`.
|
||||
%
|
||||
% our parser is a sequence of forest-to-forest
|
||||
% transformers.
|
||||
%
|
||||
% at the end we should end up with just one tree (i
|
||||
% think)?
|
||||
F0 = Signal,
|
||||
F1 = f2f_parens(F0),
|
||||
F2 = f2f_op("=>", F1),
|
||||
F3 = f2f_op("*", F2),
|
||||
Result = F2,
|
||||
Result.
|
||||
|
||||
|
||||
f2f_op(OpStr, Fst) ->
|
||||
f2f_op(OpStr, [], Fst).
|
||||
|
||||
|
||||
% never saw the op
|
||||
f2f_op(_opstr, Stk, []) ->
|
||||
lists:reverse(Stk);
|
||||
% see op
|
||||
f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) ->
|
||||
Lhf = lists:reverse(LhsStk),
|
||||
Rhf = f2f_op(OpStr, Rest),
|
||||
Lht = #ns{meta = op_arg, kids = Lhf},
|
||||
Rht = #ns{meta = op_arg, kids = Rhf},
|
||||
ResultT = #ns{meta = {op, OpTk},
|
||||
kids = [Lht, Rht]},
|
||||
ResultF = [ResultT],
|
||||
ResultF;
|
||||
% see stem, descend
|
||||
f2f_op(OpStr, LhsStk, [Ns = #ns{kids = NsKids} | Rest]) ->
|
||||
NewNsKids = f2f_op(OpStr, NsKids),
|
||||
NewNs = Ns#ns{kids = NewNsKids},
|
||||
NewStk = [NewNs | LhsStk],
|
||||
f2f_op(OpStr, NewStk, Rest);
|
||||
% see leaf, just add
|
||||
f2f_op(OpStr, Stk, [L | Rest]) ->
|
||||
f2f_op(OpStr, [L | Stk], Rest).
|
||||
|
||||
|
||||
-spec f2f_parens(Forest) -> NewForest when
|
||||
Forest :: asts(),
|
||||
NewForest :: Forest.
|
||||
% @doc
|
||||
% recursive parens decomposition
|
||||
%
|
||||
% the input here is the flat list of tokens. here we
|
||||
% basically replace the string of tokens between `(`
|
||||
% and `)` with a single tree
|
||||
%
|
||||
% interesting quirk is that this doesn't error on too
|
||||
% many close parens, only too many open parens
|
||||
|
||||
f2f_parens(Fst) ->
|
||||
f2f_parens([], Fst).
|
||||
|
||||
% done
|
||||
f2f_parens(Stk, []) ->
|
||||
lists:reverse(Stk);
|
||||
% crawl down the forest and scan for open parens
|
||||
% open paren, we descend
|
||||
f2f_parens(Stk, [#tk{str = "("} = TkOpen | Rest0]) ->
|
||||
InitMeta = {parens, TkOpen, none},
|
||||
{slurp, PStem, Rest1} = slurp_pstem(InitMeta, [], Rest0),
|
||||
NewStk = [PStem | Stk],
|
||||
f2f_parens(NewStk, Rest1);
|
||||
% something else, we continue
|
||||
f2f_parens(Stk, [Tree | Rest]) ->
|
||||
f2f_parens([Tree | Stk], Rest).
|
||||
|
||||
|
||||
|
||||
% ran out of tokens before close paren
|
||||
slurp_pstem({parens, TkOpen, none}, Stk, []) ->
|
||||
error({no_close_for, TkOpen, Stk});
|
||||
% hit close paren, we done
|
||||
slurp_pstem({parens, TkOpen, none}, Stk, [TkClose = #tk{str = ")"} | Rest]) ->
|
||||
FinalMeta = {parens, TkOpen, TkClose},
|
||||
Midsection = lists:reverse(Stk),
|
||||
FinalTree = #ns{meta = FinalMeta,
|
||||
kids = Midsection},
|
||||
{slurp, FinalTree, Rest};
|
||||
% hit open paren, we recurse
|
||||
slurp_pstem(AccMeta, Stk, [TkOpen_II = #tk{str = "("} | Rest0]) ->
|
||||
InitMeta_II = {parens, TkOpen_II, none},
|
||||
{slurp, PStem_II, Rest1} = slurp_pstem(InitMeta_II, [], Rest0),
|
||||
NewStk = [PStem_II | Stk],
|
||||
slurp_pstem(AccMeta, NewStk, Rest1);
|
||||
% hit something else, we move along
|
||||
slurp_pstem(AccMeta, Stk, [Tree | Rest]) ->
|
||||
slurp_pstem(AccMeta, [Tree | Stk], Rest).
|
||||
@@ -0,0 +1,165 @@
|
||||
% gsc tokenizer tests
|
||||
-module(gsc_test_tokens).
|
||||
|
||||
-export([
|
||||
main/0, ct_dir/0
|
||||
%tokens_match/1
|
||||
]).
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
main() ->
|
||||
%io:format("~p~n", [div_files()]),
|
||||
%io:format("MAINNNNN!~n", []),
|
||||
eunit:test(?MODULE, [verbose]).
|
||||
%eunit:test(?MODULE).
|
||||
|
||||
|
||||
% directory containing the tests for the tokenizer
|
||||
ct_dir() ->
|
||||
zx_daemon:get_home() ++ "/ct".
|
||||
|
||||
agreement_tests_dir() ->
|
||||
ct_dir() ++ "/tokenizers_agree".
|
||||
|
||||
|
||||
% the divergences claude found between gsc tokenizer and so tokenizer
|
||||
%
|
||||
% mostly stupid corner cases like a string crossing a line boundary
|
||||
% or unterminated block comment
|
||||
%
|
||||
% divergence files: "divergence" means so_scan disagrees with
|
||||
% gsc_so_scan in one of the following ways:
|
||||
%
|
||||
% - one succeeds when the other errors
|
||||
% - disagree on success case
|
||||
%
|
||||
% making errors agree on two programs that work differently is a
|
||||
% fool's errand
|
||||
div_files() ->
|
||||
ContractsDir = agreement_tests_dir(),
|
||||
% this is the equivalent of ls
|
||||
% just has filenames, no /path/to/ prefix
|
||||
{ok, Files} = file:list_dir(ContractsDir),
|
||||
% originally i was a retard and didn't read the eunit
|
||||
% documentation, so if any one test failed, the entire test suite
|
||||
% would crash with no information regarding what happened
|
||||
%
|
||||
% so this was a hack to only run div01-div05 but not div06:
|
||||
%
|
||||
% % hack to fix one broken test at a time
|
||||
% (FileName = "div0" ++ [Digit | _]) when Digit =< $9 ->
|
||||
% FilePath = ct_dir() ++ "/" ++ FileName,
|
||||
% {true, {FileName, FilePath}}
|
||||
% (_) -> false
|
||||
%
|
||||
% Once i read the eunit docs and learned about test generators, I
|
||||
% realized I could have only the failed test chimp out. what a
|
||||
% concept.
|
||||
%
|
||||
% i also realized that printing the full filepath was a waste, so
|
||||
% instead the test should know about the FileName (foo.bar) and the
|
||||
% FilePath (/path/to/foo.bar).
|
||||
%
|
||||
% then i decided to start writing my own test contracts instead
|
||||
% of having claude do it and i rean into the issue of vim swap
|
||||
% files not lexing properly because they're not unicode
|
||||
IsDivCt =
|
||||
fun(FileName) ->
|
||||
% need to filter out vim swap files
|
||||
% originally was false-matching on ([$. | _])
|
||||
% like a man
|
||||
%
|
||||
% god this feels like putting my balls in a little tiny
|
||||
% guillotine (even the guillotine is emasculating) but
|
||||
% claude suggested this and i mean it's kind of the
|
||||
% most idiomatic and like straightforward. most
|
||||
% importantly it's declarative
|
||||
%
|
||||
% god i feel so defeated
|
||||
case filename:extension(FileName) of
|
||||
".aes" ->
|
||||
FilePath = ContractsDir ++ "/" ++ FileName,
|
||||
{true, {FileName, FilePath}};
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end,
|
||||
lists:sort(lists:filtermap(IsDivCt, Files)).
|
||||
|
||||
|
||||
%div_file_names() -> [N || {N, _} <- div_files()].
|
||||
%div_file_paths() -> [P || {_, P} <- div_files()].
|
||||
|
||||
tokstr_concat_test_() ->
|
||||
% future proofing
|
||||
ConcatTestFiles
|
||||
= lists:flatten([
|
||||
div_files()
|
||||
]),
|
||||
% exclude the contracts with like unterminated block comments
|
||||
% where they don't tokenize properly
|
||||
NonStupidFiles =
|
||||
lists:filter(
|
||||
fun
|
||||
({"div05_bcom_eof.aes", _}) -> false;
|
||||
({"div06_bcom_in_expr.aes", _}) -> false;
|
||||
({"div07_bcom_nested.aes", _}) -> false;
|
||||
({"div08_bcom_simple.aes", _}) -> false;
|
||||
({_, _}) -> true
|
||||
end,
|
||||
ConcatTestFiles
|
||||
),
|
||||
%?debugFmt("ConcatTestFiles=~p", [ConcatTestFiles]),
|
||||
{"file = sum(tokens)",
|
||||
[concat_property(Name, Path) || {Name, Path} <- NonStupidFiles]}.
|
||||
|
||||
concat_property(FileName, FilePath) ->
|
||||
%?debugFmt("concat_property(~p, _)", [FileName]),
|
||||
FileChars = gsc:very_stable_file(FilePath),
|
||||
{FileName ++ ": file = sum(tokens)",
|
||||
fun() ->
|
||||
case gsc:tokens_from_file(FileChars) of
|
||||
{ok, SfcTokens} ->
|
||||
ConcatStr = concat_token_strs(SfcTokens, []),
|
||||
?assertEqual(FileChars, ConcatStr);
|
||||
_Error ->
|
||||
ok
|
||||
end
|
||||
end}.
|
||||
|
||||
concat_token_strs([#tk{str = S} | Rest], Acc) ->
|
||||
concat_token_strs(Rest, [Acc, S]);
|
||||
concat_token_strs([], Acc) ->
|
||||
unicode:characters_to_nfc_list(Acc).
|
||||
|
||||
% underscore marks this as a test *generator*
|
||||
div_test_() ->
|
||||
% divergence
|
||||
DivFiles = div_files(),
|
||||
%?debugFmt("DivFiles=~p", [DivFiles]),
|
||||
{"claude tokenizer divergences fixed",
|
||||
[tokens_match(N, P) || {N, P} <- DivFiles]}.
|
||||
|
||||
tokens_match(FileName, FilePath) ->
|
||||
%?debugFmt("tokens_match(~p, _)", [FileName]),
|
||||
% extracting data to be tested
|
||||
% i hate this so much but lazy and this is test code so who really cares.
|
||||
SoTokens = so_tokens_from_file(FilePath),
|
||||
SfTokens = gsc:gso_tokens_from_file(FilePath),
|
||||
{FileName ++ ": tokenizers_agree",
|
||||
fun() ->
|
||||
case {SoTokens, SfTokens} of
|
||||
{{ok, So}, {ok, Sf}} -> ?assertEqual(So, Sf);
|
||||
{{error, _}, {error, _}} -> ok;
|
||||
{{ok, _}, {error, _}} -> error("so_scan succeeded and gso_scan failed");
|
||||
{{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded")
|
||||
end
|
||||
end}.
|
||||
|
||||
% that's right, we have to enter via converting the
|
||||
% bytes in the file to a list... lol
|
||||
so_tokens_from_file(F) ->
|
||||
{ok, Bytes} = file:read_file(F),
|
||||
S = binary_to_list(Bytes),
|
||||
so_scan:scan(S).
|
||||
@@ -0,0 +1,27 @@
|
||||
% testing utilities
|
||||
-module(ts_utils).
|
||||
|
||||
-export([
|
||||
ct_dir/0,
|
||||
ct_file/1, ct_file_abspath/1
|
||||
]).
|
||||
|
||||
|
||||
-spec ct_dir() -> string().
|
||||
|
||||
% directory containing the tests for the tokenizer
|
||||
ct_dir() ->
|
||||
zx_daemon:get_home() ++ "/ct".
|
||||
|
||||
|
||||
ct_file_abspath(Name) ->
|
||||
ct_file(Name).
|
||||
|
||||
-spec ct_file(Name) -> AbsPath when
|
||||
Name :: string(),
|
||||
AbsPath :: string().
|
||||
% @doc
|
||||
% ct_file("foo.aes") -> "/path/to/ct/foo.aes"
|
||||
|
||||
ct_file(Name) ->
|
||||
ct_dir() ++ "/" ++ Name.
|
||||
+2
-2
@@ -2,11 +2,11 @@
|
||||
{type,cli}.
|
||||
{modules,[]}.
|
||||
{mod,"gsc_cli"}.
|
||||
{prefix,none}.
|
||||
{author,"Peter Harpending"}.
|
||||
{prefix,none}.
|
||||
{desc,"GSC CLI and test suite"}.
|
||||
{package_id,{"otpr","gsc_cli",{0,1,0}}}.
|
||||
{deps,[{"otpr","gsc",{0,1,0}}]}.
|
||||
{deps,[{"otpr","sophia",{9,0,0}},{"otpr","gsc",{0,1,0}}]}.
|
||||
{key_name,none}.
|
||||
{a_email,"peterharpending@qpq.swiss"}.
|
||||
{c_email,"peterharpending@qpq.swiss"}.
|
||||
|
||||
+35
-3
@@ -104,12 +104,12 @@
|
||||
% specifically account for this error
|
||||
-record(gsc_err_bcom_unterminated,
|
||||
{prev_tokens :: [tk()],
|
||||
break_pos :: gsc_pos(),
|
||||
break_pos :: tk_pos(),
|
||||
rest :: string()}).
|
||||
|
||||
-record(gsc_err_no_tokmatch,
|
||||
{prev_tokens :: [tk()],
|
||||
break_pos :: gsc_pos(),
|
||||
break_pos :: tk_pos(),
|
||||
rest :: string()}).
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
% generic placeholder error for now
|
||||
-record(gsc_err,
|
||||
{atom :: atom(),
|
||||
string = none :: none | iolist(),
|
||||
str = none :: none | iolist(),
|
||||
extra = none :: none | any()}).
|
||||
|
||||
% @doc all errors GSC can return conveniently listed in
|
||||
@@ -143,3 +143,35 @@
|
||||
| #gsc_err_nyi{}
|
||||
| #gsc_err_empty_file{}
|
||||
| #gsc_err{}.
|
||||
|
||||
|
||||
%----------------------------
|
||||
% tree type for parsing
|
||||
%----------------------------
|
||||
|
||||
% @doc stem record
|
||||
-record(ns, {meta :: any(),
|
||||
kids :: list(any())}).
|
||||
|
||||
% @doc `ntree(S, L)' is a "node tree" (meaning stems
|
||||
% have values and children)
|
||||
%
|
||||
% for the purposes of the compiler, the key observation
|
||||
% is that a flat list of tokens is already a forest
|
||||
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
|
||||
| L.
|
||||
|
||||
% @doc forest is just a list of trees
|
||||
-type nforest(S, L) :: [ntree(S, L)].
|
||||
|
||||
|
||||
% aliases
|
||||
|
||||
-type nt(S, L) :: ntree(S, L).
|
||||
-type nf(S, L) :: nforest(S, L).
|
||||
|
||||
-type ntree() :: ntree(any(), any()).
|
||||
-type nforest() :: [ntree()].
|
||||
|
||||
-type nt() :: ntree().
|
||||
-type nf() :: nforest().
|
||||
|
||||
@@ -158,7 +158,7 @@
|
||||
%
|
||||
%-type parse_error_() :: any().
|
||||
%-record(parse_error,
|
||||
% {pos = none :: none | gsc_pos(),
|
||||
% {pos = none :: none | tk_pos(),
|
||||
% msg = "" :: string(),
|
||||
% subs = [] :: [parse_error_()],
|
||||
% extra = none :: any()}).
|
||||
@@ -0,0 +1,216 @@
|
||||
-module(gsc_parse_type_expr).
|
||||
%
|
||||
%-export_type([
|
||||
%]).
|
||||
%
|
||||
%-export([
|
||||
% unsafe_vtks_from_string/1,
|
||||
% gulp_vtks/1,
|
||||
% take_until_ifx_op/1
|
||||
%]).
|
||||
%
|
||||
%-include("$gsc_include/gsc.hrl").
|
||||
%
|
||||
%
|
||||
%%------------------------------------------------------
|
||||
%% TYPES
|
||||
%%------------------------------------------------------
|
||||
%
|
||||
%-type vtk_ifx_op() :: vtk_apply_to
|
||||
% | {'vtk_*', tk()}
|
||||
% | {'vtk_=>', tk()}.
|
||||
%
|
||||
%-type vtk() :: tk()
|
||||
% | {vtk_plist, [tk()]}
|
||||
% | vtk_ifx_op().
|
||||
%
|
||||
%
|
||||
%-type gulped(X) :: {gulp, X}
|
||||
% | {error, any()}.
|
||||
%
|
||||
%-type slurped(X) :: {slurp, X, Rest :: [tk()]}
|
||||
% | {error, any()}.
|
||||
%
|
||||
%
|
||||
%%------------------------------------------------------
|
||||
%% FUNCTIONS
|
||||
%%------------------------------------------------------
|
||||
%
|
||||
%
|
||||
%-spec unsafe_vtks_from_string(SrcStr) -> Vtks when
|
||||
% SrcStr :: string(),
|
||||
% Vtks :: [vtk()].
|
||||
%
|
||||
%% @doc for testing
|
||||
%unsafe_vtks_from_string(S) ->
|
||||
% {ok, SigTks} = gsc_tokens:significant_tokens(S),
|
||||
% {gulp, Vtks} = gulp_vtks(SigTks),
|
||||
% Vtks.
|
||||
%
|
||||
%
|
||||
%% operators in descending order of exteriority
|
||||
%%
|
||||
%% precedence verbiage confuses me
|
||||
%%
|
||||
%% 1 + 2 * 3 ^ 4
|
||||
%%
|
||||
%% (+ 1 (* 2 (^ 3 4)))
|
||||
%%
|
||||
%% precedence is thinking about the operators as like
|
||||
%% having arms and legs and doing something. how much
|
||||
%% power do they have to bind to their neighbors. it's
|
||||
%% thinking of your operators as verbs in some state
|
||||
%% machine.
|
||||
%%
|
||||
%% i don't like when i have to model the state machine
|
||||
%% in my head when i'm reading code. everything should
|
||||
%% just be there. exteriority and interiority are
|
||||
%% properties of nouns, not properties of verbs.
|
||||
%%
|
||||
%% functional programming is all about offloading as
|
||||
%% much of your reasoning into nouns as possible. verbs
|
||||
%% change things. and change is bad.
|
||||
%-type chunk_strategy()
|
||||
% :: ast_parens % prefix paren (x y z)
|
||||
% | '=>' % infixr
|
||||
% | '*' % infix
|
||||
% | 'apply' % postfix paren f (...)
|
||||
% .
|
||||
%
|
||||
%
|
||||
%-record(ast_parens,
|
||||
% {open = none :: none | tk(),
|
||||
% inner = none :: none | [tk()],
|
||||
% close = none :: none | tk()}).
|
||||
%
|
||||
%chunk_by(Strategy, Tokens) ->
|
||||
% chunk_by(Strategy, [], Tokens).
|
||||
%
|
||||
%
|
||||
%-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when
|
||||
% Strategy :: chunk_strategy(),
|
||||
% Oks :: [any()],
|
||||
% Errs :: [{error, Reason :: any()}],
|
||||
% Tokens :: [tk()],
|
||||
% Result :: {ok,
|
||||
%
|
||||
%gulp_chunks_by(_, Stk, [], []) ->
|
||||
% {gulp, lists:reverse(Stk)};
|
||||
%gulp_chunks_by(_, _, Errs, []) ->
|
||||
% {error, {fixme, {?MODULE, ?LINE}, Errs}};
|
||||
%gulp_chunks_by(plist, Stk, Errs, Tokens) ->
|
||||
% case slurp_plist_rec(Tokens) of
|
||||
% {slurp, Plist, NewTokens} ->
|
||||
% gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens);
|
||||
% barf ->
|
||||
% [Token | NewTokens] = Tokens,
|
||||
% gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens);
|
||||
% Error ->
|
||||
% gulp_chunks_by(plist, Stk, [Error | Errs], Tokens);
|
||||
% end.
|
||||
%
|
||||
%slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
|
||||
% case gsc_tokens:slurp_plist(Tokens) of
|
||||
% {slurp, [], _} ->
|
||||
% barf;
|
||||
% {slurp, PTokens, NewTokens} ->
|
||||
% PTokensInner = pt_inner(PTokens),
|
||||
% end;
|
||||
%
|
||||
%%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when
|
||||
%% Tokens :: [tk()],
|
||||
%% IfxTree :: ifx_tree().
|
||||
%%
|
||||
%%-spec chunk_by(ChunkStrategy, Tokens) -> Result when
|
||||
%% ChunkStrategy :: chunk_strategy(),
|
||||
%% Tokens :: [tk()],
|
||||
%% Result :: {ChunkStrategy,
|
||||
%
|
||||
%
|
||||
%
|
||||
%-spec gulp_vtks(Tokens) -> Result when
|
||||
% Tokens :: [tk()],
|
||||
% Result :: gulped(VirtualTokens),
|
||||
% VirtualTokens :: [vtk()].
|
||||
%
|
||||
%% @private
|
||||
%% for infix precedence/associativity resolution.
|
||||
%%
|
||||
%% better/original name (although misnomer) was
|
||||
%% insert_virtual_tokens. we're using the gulp verbiage
|
||||
%% because we're guarding against mistmatched delimiters
|
||||
%% @end
|
||||
%gulp_vtks(Tokens) ->
|
||||
% gulp_vtks([], Tokens).
|
||||
%
|
||||
%gulp_vtks(Acc, Tks0) ->
|
||||
% % scan until next "(" | "*" | "=>"
|
||||
% case take_until_ifx_op(Tks0) of
|
||||
% % no infix op remaining, return arg
|
||||
% {_Pfx = Tks0,
|
||||
% _Sfx = []} ->
|
||||
% {gulp, lists:flatten([Acc, Tks0])};
|
||||
% % application
|
||||
% % "... foo(bar, baz) ..."
|
||||
% % ~> [..., foo, {plist, "(bar, baz)"}, ...]
|
||||
% {_Pfx = Tks1_BeforeOpen,
|
||||
% _Sfx = Tks2_OpenNAfter
|
||||
% = [#tk{string = "("} | _]} ->
|
||||
% case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of
|
||||
% {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
|
||||
% NewAcc = [Acc,
|
||||
% Tks1_BeforeOpen,
|
||||
% vtk_apply_to,
|
||||
% {vtk_plist, Tks2A_OpenToClose}],
|
||||
% gulp_vtks(NewAcc, Tks2B_AfterClose);
|
||||
% Error = {error, _} ->
|
||||
% Error
|
||||
% end;
|
||||
% % product
|
||||
% {_Pfx = Tks0_BeforeTimes,
|
||||
% _Sfx = [ Tk1A_Times = #tk{string = "*"}
|
||||
% | Tks1B_AfterTimes]} ->
|
||||
% NewAcc = [Acc,
|
||||
% Tks0_BeforeTimes,
|
||||
% {'vtk_*', Tk1A_Times}],
|
||||
% gulp_vtks(NewAcc, Tks1B_AfterTimes);
|
||||
% % funType
|
||||
% {_Pfx = Tks0_BeforeOp,
|
||||
% _Sfx = [ Tk1A_Op = #tk{string = "=>"}
|
||||
% | Tks1B_AfterOp]} ->
|
||||
% NewAcc = [Acc,
|
||||
% Tks0_BeforeOp,
|
||||
% {'vtk_=>', Tk1A_Op}],
|
||||
% gulp_vtks(NewAcc, Tks1B_AfterOp)
|
||||
% end.
|
||||
%
|
||||
%
|
||||
%
|
||||
%-spec take_until_ifx_op(Tokens) -> Result when
|
||||
% Tokens :: [tk()],
|
||||
% Result :: {Taken, NewTokens},
|
||||
% Taken :: Tokens,
|
||||
% NewTokens :: Tokens.
|
||||
%% @doc
|
||||
%% consume tokens until one of ["(", "*", "=>"]
|
||||
%
|
||||
%take_until_ifx_op(Tks) ->
|
||||
% take_until_ifx_op([], Tks).
|
||||
%
|
||||
%take_until_ifx_op(Stack, []) ->
|
||||
% {lists:reverse(Stack), []};
|
||||
%take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) ->
|
||||
% TokStr = Token#tk.string,
|
||||
% Continue =
|
||||
% case TokStr of
|
||||
% % exit cases
|
||||
% "(" -> false;
|
||||
% "*" -> false;
|
||||
% "=>" -> false;
|
||||
% _ -> true
|
||||
% end,
|
||||
% case Continue of
|
||||
% true -> take_until_ifx_op([Token | Stack], NewTokens);
|
||||
% false -> {lists:reverse(Stack), Tokens}
|
||||
% end.
|
||||
%
|
||||
@@ -54,13 +54,13 @@
|
||||
%
|
||||
%
|
||||
%
|
||||
%-spec start_pos([gsc_token()]) -> {value, gsc_pos()} | none.
|
||||
%-spec start_pos([gsc_token()]) -> {value, tk_pos()} | none.
|
||||
%
|
||||
%start_pos([#gsc_token{pos = P}]) -> {value, P};
|
||||
%start_pos([]) -> none.
|
||||
%
|
||||
%
|
||||
%-spec end_pos([gsc_token()]) -> {value, gsc_pos()} | none.
|
||||
%-spec end_pos([gsc_token()]) -> {value, tk_pos()} | none.
|
||||
%
|
||||
%end_pos([#gsc_token{pos = Pos, string = Str}]) ->
|
||||
% {value, gsc_tokens:new_pos(Pos, Str)};
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
% @doc
|
||||
% <pre>
|
||||
% T R O N A L D D U M P
|
||||
%
|
||||
% .-""""""""""""-.
|
||||
% .-' _..------.._ '-.
|
||||
% .' .' GOLDEN NFC '. '.
|
||||
% / / COMB-OVER MAP \ \
|
||||
% ; ; .-^^^^^^^^^^-. ; ;
|
||||
% | | / THEY'RE \ | |
|
||||
% | | | NOT SENDING | | |
|
||||
% | | | ASCII | | |
|
||||
% ; ; \_.--. .--._./ ; ;
|
||||
% \ \ (o)(o) / /
|
||||
% '. '. __ .' .'
|
||||
% '-._ '._==_.' _.-'
|
||||
% '-._____.-'
|
||||
% /|||\
|
||||
% / ||| \
|
||||
% / ||| \
|
||||
% .-------' ||| '-------.
|
||||
% / THE BEST NORMALIZER \
|
||||
% / VERY STABLE CODEPOINTS \
|
||||
% /_________________________________\
|
||||
% </pre>
|
||||
%
|
||||
% When unicode sends its codepoints, they're not
|
||||
% sending their best. They're not sending ASCII.
|
||||
% They're not sending ASCII. They're sending integers
|
||||
% that have lots of problems, and they're bringing
|
||||
% those problems with us. They're bringing diacritics.
|
||||
% They're bringing non-idempotent lowercasing. They're
|
||||
% bringing graphemes that don't correspond bijectively
|
||||
% with printable characters. They're bringing RTL.
|
||||
% They're bringing invisible characters. They're
|
||||
% bringing characters that draw outside the character
|
||||
% boundary. They're bringing variable-width
|
||||
% whitespace. They're bringing control characters.
|
||||
% They're bringing emojis.
|
||||
%
|
||||
% And some, I assume, are good characters.
|
||||
%
|
||||
% `SrcStr' is a unicode NFC list, not an ordinary
|
||||
% string. you think a string is a list of codepoints.
|
||||
%
|
||||
% NOOOOO.
|
||||
%
|
||||
% See it's different, because that's why.
|
||||
%
|
||||
% This is the cost of diversity, folks.
|
||||
% @end
|
||||
|
||||
+126
-75
@@ -1,57 +1,48 @@
|
||||
% @doc bikeshed proctrastination head into vim warmup thing
|
||||
% @doc bikeshed proctrastination head into vim warmup
|
||||
% thing
|
||||
%
|
||||
% sophia compiler from scratch by PRH
|
||||
%
|
||||
% based on original sophia compiler
|
||||
%
|
||||
% parse layers:
|
||||
% 1. gsc_tokenizer: SrcStr -> (Tokens | SigTokens)
|
||||
%
|
||||
% SigTokens = not comment/whitespace
|
||||
%
|
||||
% layers:
|
||||
% a. gsc_strmatch : matches string shapes
|
||||
% b. gsc_so_scan : converts to so_scan shapes
|
||||
%
|
||||
% 2. gsc_ast: SigTokens -> AST
|
||||
%
|
||||
% terminology:
|
||||
%
|
||||
% - `slurp`/`barf` borrowed from emacs paredit mode:
|
||||
%
|
||||
% slurp : (a b) c -> (a b c)
|
||||
% barf : (a b c) -> a (b c)
|
||||
%
|
||||
% * `slurp` usually involves *transforming* input
|
||||
% into a new type (e.g. slurp a token from src
|
||||
% string); think of slurp as a verb meaning to
|
||||
% consume and then digest
|
||||
% * `barf` basically means blindly splitting off
|
||||
% input
|
||||
%
|
||||
% based on original sophia compiler; target for version
|
||||
% 0.1 is to match behavior exactly
|
||||
% @end
|
||||
|
||||
% TODO:
|
||||
% - barf for outputs, slurp for inputs
|
||||
% - architecture needs more careful thought but only after something works
|
||||
% - too fuzzy right now
|
||||
% - possibly:
|
||||
% - rename parser layers sequentially:
|
||||
% - gsc_
|
||||
-module(gsc).
|
||||
|
||||
|
||||
% token and tokens
|
||||
-export_type([
|
||||
token/0
|
||||
token/0,
|
||||
signal/0
|
||||
]).
|
||||
|
||||
% syntax tree/forest wrapper type
|
||||
-export_type([
|
||||
ntree/2, ntree/0,
|
||||
nforest/2, nforest/0,
|
||||
nt/2, nt/0,
|
||||
nf/2, nf/0
|
||||
]).
|
||||
|
||||
|
||||
-export([
|
||||
unsafe_tokens_from_file/1,
|
||||
unsafe_tokens_from_string/1,
|
||||
unsafe_signal_from_file/1,
|
||||
unsafe_signal_from_string/1,
|
||||
filter_signal/1,
|
||||
signal_from_string/1,
|
||||
signal_from_file/1,
|
||||
sigtokens_from_file/1,
|
||||
sigtokens_from_string/1,
|
||||
tokens_from_file/1,
|
||||
tokens_from_string/1,
|
||||
ast_from_file/1,
|
||||
ast_from_string/1,
|
||||
ast_from_tokens/1
|
||||
% sophia compatibility
|
||||
gso_tokens_from_file/1,
|
||||
gso_tokens_from_string/1,
|
||||
% unicode normalization
|
||||
very_stable_codepoints/1,
|
||||
very_stable_string/1,
|
||||
very_stable_file/1
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
@@ -62,10 +53,43 @@
|
||||
|
||||
-type token() :: tk().
|
||||
|
||||
% @doc signal means non-noise (whitespace/comment)
|
||||
% tokens; legacy name still around is "sigtokens"
|
||||
-type signal() :: [tk()].
|
||||
|
||||
%-----------------------------------------
|
||||
% functions
|
||||
% API: FUNCTIONS
|
||||
%-----------------------------------------
|
||||
|
||||
%-----------------------------------------
|
||||
% aint nobody got time for case shit
|
||||
%-----------------------------------------
|
||||
% tokens
|
||||
unsafe_tokens_from_file(F) ->
|
||||
{ok, Tks} = tokens_from_file(F),
|
||||
Tks.
|
||||
|
||||
unsafe_tokens_from_string(S) ->
|
||||
{ok, Tks} = tokens_from_string(S),
|
||||
Tks.
|
||||
|
||||
|
||||
% signal
|
||||
unsafe_signal_from_file(F) ->
|
||||
{ok, Tks} = signal_from_file(F),
|
||||
Tks.
|
||||
|
||||
unsafe_signal_from_string(S) ->
|
||||
{ok, Tks} = signal_from_string(S),
|
||||
Tks.
|
||||
|
||||
|
||||
%
|
||||
filter_signal(X) -> gsc_tokens:filter_significant(X).
|
||||
signal_from_file(X) -> sigtokens_from_file(X).
|
||||
signal_from_string(X) -> sigtokens_from_string(X).
|
||||
|
||||
% @doc legacy name for signal
|
||||
sigtokens_from_file(X) ->
|
||||
case tokens_from_file(X) of
|
||||
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
|
||||
@@ -93,7 +117,6 @@ tokens_from_file(FilePath) ->
|
||||
|
||||
|
||||
|
||||
|
||||
-spec tokens_from_string(SrcStr) -> Result
|
||||
when SrcStr :: string(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -105,40 +128,68 @@ tokens_from_string(SrcStr) ->
|
||||
|
||||
|
||||
|
||||
-spec ast_from_file(FilePath) -> Perhaps
|
||||
when FilePath :: string(),
|
||||
Perhaps :: {ok, AST} | {error, gsc_err()},
|
||||
AST :: gsc_ast().
|
||||
-spec gso_tokens_from_file(FilePath) -> Result when
|
||||
FilePath :: string(),
|
||||
Result :: {ok, GsoTks} | {error, Reason},
|
||||
GsoTks :: [gso_scan:so_token()],
|
||||
Reason :: gsc_err() | any().
|
||||
|
||||
ast_from_file(FilePath) ->
|
||||
gso_tokens_from_file(FilePath) ->
|
||||
case file:read_file(FilePath) of
|
||||
{ok, FileBytes} -> ast_from_string(FileBytes);
|
||||
Error -> Error
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec ast_from_string(SrcStr) -> Perhaps
|
||||
when SrcStr :: string(),
|
||||
Perhaps :: {ok, AST} | {error, gsc_err()},
|
||||
AST :: gsc_ast().
|
||||
|
||||
ast_from_string(SrcStr) ->
|
||||
case gsc_tokens:significant_tokens(SrcStr) of
|
||||
{ok, SigTks} -> ast_from_tokens(SigTks);
|
||||
Error -> Error
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec ast_from_tokens(SrcTokens) -> Perhaps
|
||||
when SrcTokens :: [tk()],
|
||||
Perhaps :: {ok, AST} | {error, gsc_err()},
|
||||
AST :: gsc_ast().
|
||||
|
||||
ast_from_tokens(Tks) ->
|
||||
SigTks = gsc_tokens:filter_significant(Tks),
|
||||
case gsc_ast:gulp_file(SigTks) of
|
||||
{gulp, AST} -> {ok, AST};
|
||||
{ok, Bytes} -> gso_tokens_from_string(Bytes);
|
||||
Error -> Error
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec gso_tokens_from_string(Str) -> Result when
|
||||
Str :: iolist(),
|
||||
Result :: {ok, GsoTks} | {error, Reason},
|
||||
GsoTks :: [gso_scan:so_token()],
|
||||
Reason :: gsc_err() | any().
|
||||
|
||||
gso_tokens_from_string(Evil) ->
|
||||
Str = gsc_tokens:very_stable_codepoints(Evil),
|
||||
gso_scan:scan(Str).
|
||||
|
||||
|
||||
|
||||
|
||||
-spec very_stable_codepoints(String) -> Normalized when
|
||||
String :: iolist(),
|
||||
Normalized :: string().
|
||||
|
||||
%% @doc normalize string to utf8 NFC list form
|
||||
very_stable_codepoints(X) ->
|
||||
gsc_tokens:very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_string(String) -> Normalized when
|
||||
String :: iolist(),
|
||||
Normalized :: string().
|
||||
|
||||
%% @doc alias for `very_stable_codepoints/1'
|
||||
very_stable_string(X) ->
|
||||
gsc_tokens:very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_file(FilePath) -> Contents when
|
||||
FilePath :: string(),
|
||||
Contents :: string().
|
||||
|
||||
%% @doc Read file, return contents as
|
||||
%% `unicode:characters_to_nfc_list/1' list.
|
||||
%%
|
||||
%% Please note that this function is NOT in fact very
|
||||
%% stable, as it throws an error if there's some error
|
||||
%% reading the file (e.g. not found).
|
||||
%%
|
||||
%% this function exists mostly for scripting/shell
|
||||
%% convenience
|
||||
very_stable_file(X) ->
|
||||
case file:read_file(X) of
|
||||
{ok, B} -> very_stable_codepoints(B);
|
||||
Error -> error(Error)
|
||||
end.
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
-module(gsc_ntree).
|
||||
|
||||
-export([
|
||||
nstem/2, meta/1, kids/1,
|
||||
flatten_tree/1, flatten_forest/1
|
||||
]).
|
||||
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
%%=====================================================
|
||||
%% API: functions
|
||||
%%=====================================================
|
||||
|
||||
-spec nstem(Root, Forest) -> Tree when
|
||||
Root :: S,
|
||||
Forest :: nforest(S, L),
|
||||
Tree :: ntree(S, L),
|
||||
S :: any(),
|
||||
L :: any().
|
||||
|
||||
nstem(Root, List) ->
|
||||
{ns, Root, List}.
|
||||
|
||||
|
||||
meta(#ns{meta = M}) -> M.
|
||||
kids(#ns{kids = K}) -> K.
|
||||
|
||||
|
||||
|
||||
-spec flatten_tree(Tree) -> Leafs when
|
||||
Tree :: ntree(_, L),
|
||||
Leafs :: [L],
|
||||
L :: any().
|
||||
|
||||
flatten_tree(T) ->
|
||||
lists:flatten(ft(T)).
|
||||
|
||||
|
||||
|
||||
-spec flatten_forest(Forest) -> Leafs when
|
||||
Forest :: nforest(_, L),
|
||||
Leafs :: [L],
|
||||
L :: any().
|
||||
|
||||
flatten_forest(F) ->
|
||||
lists:flatten(ff(F)).
|
||||
|
||||
|
||||
ft(#ns{kids = F}) -> ff(F);
|
||||
ft(Leaf) -> [Leaf].
|
||||
|
||||
ff(F) ->
|
||||
[ft(T) || T <- F].
|
||||
@@ -1,216 +0,0 @@
|
||||
-module(gsc_parse_type_expr).
|
||||
|
||||
-export_type([
|
||||
]).
|
||||
|
||||
-export([
|
||||
unsafe_vtks_from_string/1,
|
||||
gulp_vtks/1,
|
||||
take_until_ifx_op/1
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
%------------------------------------------------------
|
||||
% TYPES
|
||||
%------------------------------------------------------
|
||||
|
||||
-type vtk_ifx_op() :: vtk_apply_to
|
||||
| {'vtk_*', tk()}
|
||||
| {'vtk_=>', tk()}.
|
||||
|
||||
-type vtk() :: tk()
|
||||
| {vtk_plist, [tk()]}
|
||||
| vtk_ifx_op().
|
||||
|
||||
|
||||
-type gulped(X) :: {gulp, X}
|
||||
| {error, any()}.
|
||||
|
||||
-type slurped(X) :: {slurp, X, Rest :: [tk()]}
|
||||
| {error, any()}.
|
||||
|
||||
|
||||
%------------------------------------------------------
|
||||
% FUNCTIONS
|
||||
%------------------------------------------------------
|
||||
|
||||
|
||||
-spec unsafe_vtks_from_string(SrcStr) -> Vtks when
|
||||
SrcStr :: string(),
|
||||
Vtks :: [vtk()].
|
||||
|
||||
% @doc for testing
|
||||
unsafe_vtks_from_string(S) ->
|
||||
{ok, SigTks} = gsc_tokens:significant_tokens(S),
|
||||
{gulp, Vtks} = gulp_vtks(SigTks),
|
||||
Vtks.
|
||||
|
||||
|
||||
% operators in descending order of exteriority
|
||||
%
|
||||
% precedence verbiage confuses me
|
||||
%
|
||||
% 1 + 2 * 3 ^ 4
|
||||
%
|
||||
% (+ 1 (* 2 (^ 3 4)))
|
||||
%
|
||||
% precedence is thinking about the operators as like
|
||||
% having arms and legs and doing something. how much
|
||||
% power do they have to bind to their neighbors. it's
|
||||
% thinking of your operators as verbs in some state
|
||||
% machine.
|
||||
%
|
||||
% i don't like when i have to model the state machine
|
||||
% in my head when i'm reading code. everything should
|
||||
% just be there. exteriority and interiority are
|
||||
% properties of nouns, not properties of verbs.
|
||||
%
|
||||
% functional programming is all about offloading as
|
||||
% much of your reasoning into nouns as possible. verbs
|
||||
% change things. and change is bad.
|
||||
-type chunk_strategy()
|
||||
:: ast_parens % prefix paren (x y z)
|
||||
| '=>' % infixr
|
||||
| '*' % infix
|
||||
| 'apply' % postfix paren f (...)
|
||||
.
|
||||
|
||||
|
||||
-record(ast_parens,
|
||||
{open = none :: none | tk(),
|
||||
inner = none :: none | [tk()],
|
||||
close = none :: none | tk()}).
|
||||
|
||||
chunk_by(Strategy, Tokens) ->
|
||||
chunk_by(Strategy, [], Tokens).
|
||||
|
||||
|
||||
-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when
|
||||
Strategy :: chunk_strategy(),
|
||||
Oks :: [any()],
|
||||
Errs :: [{error, Reason :: any()}],
|
||||
Tokens :: [tk()],
|
||||
Result :: {ok,
|
||||
|
||||
gulp_chunks_by(_, Stk, [], []) ->
|
||||
{gulp, lists:reverse(Stk)};
|
||||
gulp_chunks_by(_, _, Errs, []) ->
|
||||
{error, {fixme, {?MODULE, ?LINE}, Errs}};
|
||||
gulp_chunks_by(plist, Stk, Errs, Tokens) ->
|
||||
case slurp_plist_rec(Tokens) of
|
||||
{slurp, Plist, NewTokens} ->
|
||||
gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens);
|
||||
barf ->
|
||||
[Token | NewTokens] = Tokens,
|
||||
gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens);
|
||||
Error ->
|
||||
gulp_chunks_by(plist, Stk, [Error | Errs], Tokens);
|
||||
end.
|
||||
|
||||
slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
|
||||
case gsc_tokens:slurp_plist(Tokens) of
|
||||
{slurp, [], _} ->
|
||||
barf;
|
||||
{slurp, PTokens, NewTokens} ->
|
||||
PTokensInner = pt_inner(PTokens),
|
||||
end;
|
||||
|
||||
%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when
|
||||
% Tokens :: [tk()],
|
||||
% IfxTree :: ifx_tree().
|
||||
%
|
||||
%-spec chunk_by(ChunkStrategy, Tokens) -> Result when
|
||||
% ChunkStrategy :: chunk_strategy(),
|
||||
% Tokens :: [tk()],
|
||||
% Result :: {ChunkStrategy,
|
||||
|
||||
|
||||
|
||||
-spec gulp_vtks(Tokens) -> Result when
|
||||
Tokens :: [tk()],
|
||||
Result :: gulped(VirtualTokens),
|
||||
VirtualTokens :: [vtk()].
|
||||
|
||||
% @private
|
||||
% for infix precedence/associativity resolution.
|
||||
%
|
||||
% better/original name (although misnomer) was
|
||||
% insert_virtual_tokens. we're using the gulp verbiage
|
||||
% because we're guarding against mistmatched delimiters
|
||||
% @end
|
||||
gulp_vtks(Tokens) ->
|
||||
gulp_vtks([], Tokens).
|
||||
|
||||
gulp_vtks(Acc, Tks0) ->
|
||||
% scan until next "(" | "*" | "=>"
|
||||
case take_until_ifx_op(Tks0) of
|
||||
% no infix op remaining, return arg
|
||||
{_Pfx = Tks0,
|
||||
_Sfx = []} ->
|
||||
{gulp, lists:flatten([Acc, Tks0])};
|
||||
% application
|
||||
% "... foo(bar, baz) ..."
|
||||
% ~> [..., foo, {plist, "(bar, baz)"}, ...]
|
||||
{_Pfx = Tks1_BeforeOpen,
|
||||
_Sfx = Tks2_OpenNAfter
|
||||
= [#tk{string = "("} | _]} ->
|
||||
case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of
|
||||
{slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
|
||||
NewAcc = [Acc,
|
||||
Tks1_BeforeOpen,
|
||||
vtk_apply_to,
|
||||
{vtk_plist, Tks2A_OpenToClose}],
|
||||
gulp_vtks(NewAcc, Tks2B_AfterClose);
|
||||
Error = {error, _} ->
|
||||
Error
|
||||
end;
|
||||
% product
|
||||
{_Pfx = Tks0_BeforeTimes,
|
||||
_Sfx = [ Tk1A_Times = #tk{string = "*"}
|
||||
| Tks1B_AfterTimes]} ->
|
||||
NewAcc = [Acc,
|
||||
Tks0_BeforeTimes,
|
||||
{'vtk_*', Tk1A_Times}],
|
||||
gulp_vtks(NewAcc, Tks1B_AfterTimes);
|
||||
% funType
|
||||
{_Pfx = Tks0_BeforeOp,
|
||||
_Sfx = [ Tk1A_Op = #tk{string = "=>"}
|
||||
| Tks1B_AfterOp]} ->
|
||||
NewAcc = [Acc,
|
||||
Tks0_BeforeOp,
|
||||
{'vtk_=>', Tk1A_Op}],
|
||||
gulp_vtks(NewAcc, Tks1B_AfterOp)
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec take_until_ifx_op(Tokens) -> Result when
|
||||
Tokens :: [tk()],
|
||||
Result :: {Taken, NewTokens},
|
||||
Taken :: Tokens,
|
||||
NewTokens :: Tokens.
|
||||
% @doc
|
||||
% consume tokens until one of ["(", "*", "=>"]
|
||||
|
||||
take_until_ifx_op(Tks) ->
|
||||
take_until_ifx_op([], Tks).
|
||||
|
||||
take_until_ifx_op(Stack, []) ->
|
||||
{lists:reverse(Stack), []};
|
||||
take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) ->
|
||||
TokStr = Token#tk.string,
|
||||
Continue =
|
||||
case TokStr of
|
||||
% exit cases
|
||||
"(" -> false;
|
||||
"*" -> false;
|
||||
"=>" -> false;
|
||||
_ -> true
|
||||
end,
|
||||
case Continue of
|
||||
true -> take_until_ifx_op([Token | Stack], NewTokens);
|
||||
false -> {lists:reverse(Stack), Tokens}
|
||||
end.
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
% signal = non-noisy tokens
|
||||
-module(gsc_signal).
|
||||
|
||||
-export([
|
||||
from_tokens/1,
|
||||
is_block/1,
|
||||
gulp_block_items/1,
|
||||
block_to_items/1,
|
||||
take_block_item/1
|
||||
]).
|
||||
|
||||
-include("$gsc_include/gsc.hrl").
|
||||
|
||||
|
||||
-spec from_tokens(Tokens) -> Signal when
|
||||
Tokens :: [tk()],
|
||||
Signal :: [tk()].
|
||||
% @doc filter out comments/whitespace
|
||||
|
||||
from_tokens(Tokens) ->
|
||||
gsc_tokens:filter_significant(Tokens).
|
||||
|
||||
|
||||
|
||||
-spec is_block(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: boolean().
|
||||
|
||||
is_block([]) ->
|
||||
true;
|
||||
is_block([#tk{pos = {_, BCol}} | Rest]) ->
|
||||
InBlock =
|
||||
fun(#tk{pos = {_, TCol}}) ->
|
||||
BCol =< TCol
|
||||
end,
|
||||
lists:all(InBlock, Rest).
|
||||
|
||||
|
||||
|
||||
-spec gulp_block_items(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: {slurp, Items, NewSignal}
|
||||
| {error, any()},
|
||||
Items :: [Signal],
|
||||
NewSignal :: Signal.
|
||||
|
||||
gulp_block_items(S) ->
|
||||
case is_block(S) of
|
||||
true -> {gulp, block_to_items(S)};
|
||||
false -> find_badness(S)
|
||||
end.
|
||||
|
||||
find_badness([#tk{pos = {_, StartCol}} = StartTk | Rest]) ->
|
||||
find_badness(StartCol, StartTk, Rest).
|
||||
|
||||
find_badness(StartCol, StartTk, [#tk{pos = {_, TkCol}} = Tk | Rest]) ->
|
||||
Bad = TkCol < StartCol,
|
||||
case Bad of
|
||||
false -> find_badness(StartCol, StartTk, Rest);
|
||||
true -> {error, {bad_block, [{start_col, StartCol},
|
||||
{end_col, TkCol},
|
||||
{start_tk, StartTk},
|
||||
{end_tk, Tk}]}}
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-spec block_to_items(Signal) -> BlockItems when
|
||||
Signal :: [tk()],
|
||||
BlockItems :: [Signal].
|
||||
% @doc
|
||||
% naive algorithm, so doesn't ensure all block items
|
||||
% are same indent level
|
||||
%
|
||||
% Input:
|
||||
% foo = ...
|
||||
% bar = ...
|
||||
% baz = ...
|
||||
%
|
||||
% Output:
|
||||
% [foo = ...,
|
||||
% bar = ...,
|
||||
% baz = ...]
|
||||
block_to_items([]) ->
|
||||
[];
|
||||
block_to_items(S) ->
|
||||
b2is([], S).
|
||||
|
||||
b2is(Acc, []) ->
|
||||
lists:reverse(Acc);
|
||||
b2is(Acc, S) ->
|
||||
{Item, S1} = take_block_item(S),
|
||||
b2is([Item | Acc], S1).
|
||||
|
||||
|
||||
|
||||
-spec take_block_item(Signal) -> Result when
|
||||
Signal :: [tk()],
|
||||
Result :: {Item, NewSignal},
|
||||
Item :: Signal,
|
||||
NewSignal :: Signal.
|
||||
|
||||
take_block_item([]) ->
|
||||
{[], []};
|
||||
take_block_item([#tk{pos = {_, ICol}} = T0 | S0]) ->
|
||||
InItem =
|
||||
fun(#tk{pos = {_, TCol}}) ->
|
||||
ICol < TCol
|
||||
end,
|
||||
{S0_II, S1} = lists:splitwith(InItem, S0),
|
||||
{[T0 | S0_II], S1}.
|
||||
+132
-92
@@ -20,7 +20,7 @@
|
||||
|
||||
% meta
|
||||
-export([
|
||||
token_types_parse_order/0,
|
||||
token_shapes_parse_order/0,
|
||||
kwds/0
|
||||
]).
|
||||
|
||||
@@ -39,10 +39,14 @@
|
||||
is_significant/1,
|
||||
filter_significant/1,
|
||||
significant_tokens/1,
|
||||
very_stable_codepoints/1,
|
||||
very_stable_string/1,
|
||||
very_stable_characters/1,
|
||||
tokens_from_iolist/1,
|
||||
tokens/1,
|
||||
slurp_token/2,
|
||||
slurp_token_types/3,
|
||||
slurp_token_of_type/3,
|
||||
slurp_token_shapes/3,
|
||||
slurp_token_of_shape/3,
|
||||
new_pos/2
|
||||
]).
|
||||
|
||||
@@ -59,7 +63,7 @@
|
||||
AtMostNStrings :: [string()].
|
||||
% @doc return the strings of the first N tokens
|
||||
|
||||
strings(N, [#tk{string = S} | Rest]) when is_integer(N), N >= 1 ->
|
||||
strings(N, [#tk{str = S} | Rest]) when is_integer(N), N >= 1 ->
|
||||
[S | strings(N-1, Rest)];
|
||||
strings(_, []) ->
|
||||
[];
|
||||
@@ -159,7 +163,7 @@ take_block_item([]) ->
|
||||
% counterintuitive to end-users (who are programmers, entirely
|
||||
% unfamiliar with notions like stacks and open/close delimiters)
|
||||
|
||||
slurp_plist([Hd = #tk{string = "("} | Tl]) ->
|
||||
slurp_plist([Hd = #tk{str = "("} | Tl]) ->
|
||||
slurp_dlist([Hd], [Hd], Tl);
|
||||
slurp_plist(Tks) ->
|
||||
{slurp, [], Tks}.
|
||||
@@ -170,30 +174,30 @@ slurp_dlist(All, [], NewTokens) ->
|
||||
{slurp, lists:reverse(All), NewTokens};
|
||||
% WMA stack is nonempty
|
||||
% happy cases of opens getting popped
|
||||
slurp_dlist(All, [#tk{string = "("} | NewOpen],
|
||||
[#tk{string = ")"} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, [#tk{str = "("} | NewOpen],
|
||||
[#tk{str = ")"} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], NewOpen, NewTks);
|
||||
slurp_dlist(All, [#tk{string = "["} | NewOpen],
|
||||
[#tk{string = "]"} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, [#tk{str = "["} | NewOpen],
|
||||
[#tk{str = "]"} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], NewOpen, NewTks);
|
||||
slurp_dlist(All, [#tk{string = "{"} | NewOpen],
|
||||
[#tk{string = "}"} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, [#tk{str = "{"} | NewOpen],
|
||||
[#tk{str = "}"} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], NewOpen, NewTks);
|
||||
% happy: open delimiters getting pushed
|
||||
slurp_dlist(All, Opens, [#tk{string = "("} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, Opens, [#tk{str = "("} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||
slurp_dlist(All, Opens, [#tk{string = "["} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, Opens, [#tk{str = "["} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||
slurp_dlist(All, Opens, [#tk{string = "{"} = Tk | NewTks]) ->
|
||||
slurp_dlist(All, Opens, [#tk{str = "{"} = Tk | NewTks]) ->
|
||||
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
|
||||
% sad: mismatch cases
|
||||
slurp_dlist(All, Opens, []) ->
|
||||
slurp_dlist(_, Opens, []) ->
|
||||
{error, {fixme, mismatch, Opens, none}};
|
||||
slurp_dlist(All, Opens, [#tk{string = "}"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = "}"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
slurp_dlist(All, Opens, [#tk{string = "]"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = "]"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
slurp_dlist(All, Opens, [#tk{string = ")"} = BadClose | _]) ->
|
||||
slurp_dlist(_, Opens, [#tk{str = ")"} = BadClose | _]) ->
|
||||
{error, {fixme, mismatch, Opens, {value, BadClose}}};
|
||||
% general case: non-terminal token gets pushed
|
||||
slurp_dlist(All, Opens, [Tk | NewTks]) ->
|
||||
@@ -206,15 +210,15 @@ slurp_dlist(All, Opens, [Tk | NewTks]) ->
|
||||
% This is parse order definition, list of keywords, etc
|
||||
%
|
||||
% -export([
|
||||
% token_types_parse_order/0,
|
||||
% token_shapes_parse_order/0,
|
||||
% kwds/0
|
||||
% ]).
|
||||
%-------------------------------------------------------
|
||||
|
||||
-spec token_types_parse_order() -> [gsc_token_type()].
|
||||
-spec token_shapes_parse_order() -> [tk_shape()].
|
||||
% @doc
|
||||
% list of sophia tokens in parse order (if an earlier type matches, the later
|
||||
% type isn't even checked)
|
||||
% list of sophia token shapes in parse order (if an earlier shape matches, the later
|
||||
% shape isn't even checked)
|
||||
%
|
||||
%
|
||||
% Rules =
|
||||
@@ -245,7 +249,7 @@ slurp_dlist(All, Opens, [Tk | NewTks]) ->
|
||||
% ],
|
||||
% @end
|
||||
|
||||
token_types_parse_order() ->
|
||||
token_shapes_parse_order() ->
|
||||
% written in this style to be maximally editable
|
||||
lists:flatten([
|
||||
% comments and whitespace
|
||||
@@ -282,8 +286,8 @@ kwds() ->
|
||||
% -export([
|
||||
% tokens/1,
|
||||
% slurp_token/1,
|
||||
% slurp_token_types/2,
|
||||
% slurp_token_of_type/2
|
||||
% slurp_token_shapes/2,
|
||||
% slurp_token_of_shape/2
|
||||
% ]).
|
||||
%-------------------------------------------------------
|
||||
|
||||
@@ -323,12 +327,47 @@ filter_significant(Tokens) ->
|
||||
-spec is_significant(Token) -> boolean()
|
||||
when Token :: tk().
|
||||
|
||||
is_significant(#tk{type = bcom}) -> false;
|
||||
is_significant(#tk{type = lcom}) -> false;
|
||||
is_significant(#tk{type = ws}) -> false;
|
||||
is_significant(#tk{shape = bcom}) -> false;
|
||||
is_significant(#tk{shape = lcom}) -> false;
|
||||
is_significant(#tk{shape = ws}) -> false;
|
||||
is_significant(_) -> true.
|
||||
|
||||
|
||||
|
||||
% aliases
|
||||
very_stable_string(X) -> very_stable_codepoints(X).
|
||||
very_stable_characters(X) -> very_stable_codepoints(X).
|
||||
|
||||
|
||||
|
||||
-spec very_stable_codepoints(IoList) -> NfcList when
|
||||
IoList :: iolist(),
|
||||
NfcList :: string().
|
||||
|
||||
%% @doc When Unicode sends its characters, they're not
|
||||
%% sending their best. They're not sending ASCII.
|
||||
%% They're not sending ASCII. They're sending
|
||||
%% characters that have lots of problems, and they're
|
||||
%% bringing those problems with us. They're bringing
|
||||
%% diacritics. They're bringing homoglyphs. They're
|
||||
%% bringing RTL. They're rapists. And some, we assume,
|
||||
%% are good characters.
|
||||
very_stable_codepoints(S) ->
|
||||
unicode:characters_to_nfc_list(S).
|
||||
|
||||
|
||||
-spec tokens_from_iolist(SrcStr) -> Result when
|
||||
SrcStr :: iolist(),
|
||||
Result :: {ok, Tokens}
|
||||
| {error, gsc_err()},
|
||||
Tokens :: [tk()].
|
||||
|
||||
% @doc alias for tokens/1
|
||||
tokens_from_iolist(S) -> tokens(S).
|
||||
|
||||
|
||||
|
||||
|
||||
-spec tokens(SrcStr) -> Result
|
||||
when SrcStr :: iolist(),
|
||||
Result :: {ok, Tokens}
|
||||
@@ -343,13 +382,14 @@ is_significant(_) -> true.
|
||||
|
||||
tokens(S) ->
|
||||
% defensive normalization
|
||||
tokens([], {1, 1}, unicode:characters_to_nfc_list(S)).
|
||||
tokens([], {1, 1}, very_stable_codepoints(S)).
|
||||
|
||||
|
||||
tokens(Stack, _FinalPos, "") ->
|
||||
{ok, lists:reverse(Stack)};
|
||||
tokens(Stack, Pos, SrcStr) ->
|
||||
case slurp_token(Pos, SrcStr) of
|
||||
{tokmatch, NewToken = #tk{string = TokStr},
|
||||
{tokmatch, NewToken = #tk{str = TokStr},
|
||||
NewSrcStr} ->
|
||||
NewPos = new_pos(Pos, TokStr),
|
||||
tokens([NewToken | Stack], NewPos, NewSrcStr);
|
||||
@@ -455,7 +495,7 @@ next_tabstop8(Col0) when Col0 >= 0 ->
|
||||
|
||||
|
||||
-spec slurp_token(Pos, SrcStr) -> Result
|
||||
when Pos :: gsc_pos(),
|
||||
when Pos :: tk_pos(),
|
||||
SrcStr :: string(),
|
||||
Result :: {tokmatch, Token, Rest}
|
||||
| no_tokmatch
|
||||
@@ -465,17 +505,17 @@ next_tabstop8(Col0) when Col0 >= 0 ->
|
||||
Rest :: string().
|
||||
% @doc
|
||||
% grab a single token off the front of the string according to
|
||||
% `token_types_parse_order/0'
|
||||
% `token_shapes_parse_order/0'
|
||||
|
||||
slurp_token(Pos, SrcStr) ->
|
||||
% this is the easiest format if i need to fuck with it
|
||||
slurp_token_types(token_types_parse_order(), Pos, SrcStr).
|
||||
slurp_token_shapes(token_shapes_parse_order(), Pos, SrcStr).
|
||||
|
||||
|
||||
|
||||
-spec slurp_token_types(ParseOrder, Pos, SrcStr) -> Result
|
||||
when ParseOrder :: [gsc_token_type()],
|
||||
Pos :: gsc_pos(),
|
||||
-spec slurp_token_shapes(ParseOrder, Pos, SrcStr) -> Result
|
||||
when ParseOrder :: [tk_shape()],
|
||||
Pos :: tk_pos(),
|
||||
SrcStr :: string(),
|
||||
Result :: {tokmatch, Token, Rest}
|
||||
| no_tokmatch
|
||||
@@ -485,22 +525,22 @@ slurp_token(Pos, SrcStr) ->
|
||||
Rest :: string().
|
||||
% @doc
|
||||
% grab a single token off the front of the string according to
|
||||
% `token_types_parse_order/0'
|
||||
% `token_shapes_parse_order/0'
|
||||
|
||||
slurp_token_types([TokenType | TTs], Pos, SrcStr) ->
|
||||
case slurp_token_of_type(TokenType, Pos, SrcStr) of
|
||||
slurp_token_shapes([TokenType | TTs], Pos, SrcStr) ->
|
||||
case slurp_token_of_shape(TokenType, Pos, SrcStr) of
|
||||
Match = {tokmatch, _, _} -> Match;
|
||||
no_tokmatch -> slurp_token_types(TTs, Pos, SrcStr);
|
||||
no_tokmatch -> slurp_token_shapes(TTs, Pos, SrcStr);
|
||||
IErr = {ierr, _} -> IErr;
|
||||
Error = {error, _} -> Error
|
||||
end;
|
||||
slurp_token_types([], _Pos, _SrcStr) ->
|
||||
slurp_token_shapes([], _Pos, _SrcStr) ->
|
||||
no_tokmatch.
|
||||
|
||||
|
||||
-spec slurp_token_of_type(TokenType, Pos, SrcStr) -> MaybeToken
|
||||
when TokenType :: gsc_token_type(),
|
||||
Pos :: gsc_pos(),
|
||||
-spec slurp_token_of_shape(TokenType, Pos, SrcStr) -> MaybeToken
|
||||
when TokenType :: tk_shape(),
|
||||
Pos :: tk_pos(),
|
||||
SrcStr :: string(),
|
||||
MaybeToken :: {tokmatch, Token, Rest}
|
||||
| no_tokmatch
|
||||
@@ -509,7 +549,7 @@ slurp_token_types([], _Pos, _SrcStr) ->
|
||||
Token :: tk(),
|
||||
Rest :: string().
|
||||
% @doc
|
||||
% match a sophia token of a given type off the front of the string
|
||||
% match a sophia token of a given shape off the front of the string
|
||||
% @end
|
||||
|
||||
% COMMENTS AND WHITESPACE: lcom, bcom, ws
|
||||
@@ -518,27 +558,27 @@ slurp_token_types([], _Pos, _SrcStr) ->
|
||||
%
|
||||
% i am not going to bother writing a string matcher thing for this
|
||||
% FIXME: make a string matcher for line comments
|
||||
slurp_token_of_type(lcom, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(lcom, Pos, SrcStr) ->
|
||||
case SrcStr of
|
||||
"//" ++ _ ->
|
||||
{Line, Rest} = takeline("", SrcStr),
|
||||
Token = #tk{type = lcom,
|
||||
Token = #tk{shape = lcom,
|
||||
pos = Pos,
|
||||
string = Line},
|
||||
str = Line},
|
||||
{tokmatch, Token, Rest};
|
||||
_ ->
|
||||
no_tokmatch
|
||||
end;
|
||||
% Block comments cannot have a string matcher because they have a whole stack
|
||||
% thing keeping track of depth because of nested block comments
|
||||
slurp_token_of_type(bcom, Pos, SrcStr0) ->
|
||||
slurp_token_of_shape(bcom, Pos, SrcStr0) ->
|
||||
case SrcStr0 of
|
||||
"/*" ++ SrcStr1 ->
|
||||
case bcom("/*", 1, SrcStr1) of
|
||||
{ok, CommentStr, SrcStr2} ->
|
||||
Token = #tk{type = bcom,
|
||||
Token = #tk{shape = bcom,
|
||||
pos = Pos,
|
||||
string = CommentStr},
|
||||
str = CommentStr},
|
||||
{tokmatch, Token, SrcStr2};
|
||||
Error ->
|
||||
Error
|
||||
@@ -546,15 +586,15 @@ slurp_token_of_type(bcom, Pos, SrcStr0) ->
|
||||
_ ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(ws, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(ws, Pos, SrcStr) ->
|
||||
WhitespaceMatcher = gsc_strmatch:smr_sf_ws(),
|
||||
case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, WS, Rest} ->
|
||||
Token = #tk{type = ws,
|
||||
Token = #tk{shape = ws,
|
||||
pos = Pos,
|
||||
string = WS},
|
||||
str = WS},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct
|
||||
@@ -568,88 +608,88 @@ slurp_token_of_type(ws, Pos, SrcStr) ->
|
||||
%
|
||||
% we know kwds are always ids, so we parse it as an id and see if it's one
|
||||
% of the kwds
|
||||
slurp_token_of_type(kwd, Pos, SrcStr) ->
|
||||
case slurp_token_of_type(id, Pos, SrcStr) of
|
||||
{tokmatch, IdTok = #tk{string = IdStr}, Rest} ->
|
||||
slurp_token_of_shape(kwd, Pos, SrcStr) ->
|
||||
case slurp_token_of_shape(id, Pos, SrcStr) of
|
||||
{tokmatch, IdTok = #tk{str = IdStr}, Rest} ->
|
||||
case lists:member(IdStr, kwds()) of
|
||||
false ->
|
||||
no_tokmatch;
|
||||
true ->
|
||||
KwTok = IdTok#tk{type = kwd},
|
||||
KwTok = IdTok#tk{shape = kwd},
|
||||
{tokmatch, KwTok, Rest}
|
||||
end;
|
||||
no_tokmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(op, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(op, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = op, pos = Pos, string = Str},
|
||||
Token = #tk{shape = op, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(punct, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(punct, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = punct, pos = Pos, string = Str},
|
||||
Token = #tk{shape = punct, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
% SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar
|
||||
slurp_token_of_type(id, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(id, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of
|
||||
{strmatch, IdStr, Rest} ->
|
||||
Token = #tk{type = id, pos = Pos, string = IdStr},
|
||||
Token = #tk{shape = id, pos = Pos, str = IdStr},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(con, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(con, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = con, pos = Pos, string = Str},
|
||||
Token = #tk{shape = con, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(qid, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(qid, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = qid, pos = Pos, string = Str},
|
||||
Token = #tk{shape = qid, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(qcon, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(qcon, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = qcon, pos = Pos, string = Str},
|
||||
Token = #tk{shape = qcon, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(tvar, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(tvar, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = tvar, pos = Pos, string = Str},
|
||||
Token = #tk{shape = tvar, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(int16, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(int16, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = int16, pos = Pos, string = Str},
|
||||
Token = #tk{shape = int16, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
end;
|
||||
slurp_token_of_type(int10, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(int10, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of
|
||||
{strmatch, Str, Rest} ->
|
||||
Token = #tk{type = int10, pos = Pos, string = Str},
|
||||
Token = #tk{shape = int10, pos = Pos, str = Str},
|
||||
{tokmatch, Token, Rest};
|
||||
no_strmatch ->
|
||||
no_tokmatch
|
||||
@@ -658,63 +698,63 @@ slurp_token_of_type(int10, Pos, SrcStr) ->
|
||||
% ak, ct, sg
|
||||
%
|
||||
% char: sophia char literal
|
||||
slurp_token_of_type(ak, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(ak, Pos, SrcStr) ->
|
||||
StringMatcher = gsc_strmatch:smr_sf_ak(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
Token = #tk{type = ak, pos = Pos, string = TokenStr},
|
||||
Token = #tk{shape = ak, pos = Pos, str = TokenStr},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_type(ct, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(ct, Pos, SrcStr) ->
|
||||
StringMatcher = gsc_strmatch:smr_sf_ct(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
Token = #tk{type = ct, pos = Pos, string = TokenStr},
|
||||
Token = #tk{shape = ct, pos = Pos, str = TokenStr},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_type(sg, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(sg, Pos, SrcStr) ->
|
||||
StringMatcher = gsc_strmatch:smr_sf_sg(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
Token = #tk{type = sg, pos = Pos, string = TokenStr},
|
||||
Token = #tk{shape = sg, pos = Pos, str = TokenStr},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_type(char, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(char, Pos, SrcStr) ->
|
||||
StringMatcher = gsc_strmatch:smr_sf_char(),
|
||||
case gsc_strmatch:match(StringMatcher, SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
Token = #tk{type = char, pos = Pos, string = TokenStr},
|
||||
Token = #tk{shape = char, pos = Pos, str = TokenStr},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_type(string, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(string, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
Token = #tk{type = string, pos = Pos, string = TokenStr},
|
||||
Token = #tk{shape = string, pos = Pos, str = TokenStr},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_type(bytes, Pos, SrcStr) ->
|
||||
slurp_token_of_shape(bytes, Pos, SrcStr) ->
|
||||
case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of
|
||||
no_strmatch ->
|
||||
no_tokmatch;
|
||||
{strmatch, TokenStr, Rest} ->
|
||||
Token = #tk{type = bytes, pos = Pos, string = TokenStr},
|
||||
Token = #tk{shape = bytes, pos = Pos, str = TokenStr},
|
||||
{tokmatch, Token, Rest}
|
||||
end;
|
||||
slurp_token_of_type(NyiType, Pos, SrcStr) ->
|
||||
Message = io_lib:format("cannot slurp token of type: ~p", [NyiType]),
|
||||
slurp_token_of_shape(NyiType, Pos, SrcStr) ->
|
||||
Message = io_lib:format("cannot slurp token of shape: ~p", [NyiType]),
|
||||
error(#gsc_err{atom = nyi,
|
||||
string = Message,
|
||||
extra = [{token_type, NyiType},
|
||||
str = Message,
|
||||
extra = [{token_shape, NyiType},
|
||||
{pos, Pos},
|
||||
{rest, SrcStr}]}).
|
||||
|
||||
|
||||
+14
-14
@@ -76,14 +76,14 @@
|
||||
-type so_symbol() :: so_kwd() | so_special_char() | atom().
|
||||
|
||||
-type so_token2() :: {Symbol :: so_symbol(),
|
||||
Location :: gsc_pos()}.
|
||||
Location :: tk_pos()}.
|
||||
|
||||
% FIXME
|
||||
% this is 'id', 'con', qid
|
||||
-type so_tk3type() :: char | string | hex | int | bytes | qid | qcon | tvar | id | con.
|
||||
|
||||
-type so_token3() :: {TokenType :: so_tk3type(),
|
||||
Location :: gsc_pos(),
|
||||
Location :: tk_pos(),
|
||||
TokenValue :: term()}.
|
||||
|
||||
-type so_token() :: so_token2() | so_token3().
|
||||
@@ -151,12 +151,12 @@ scan(SrcStr) ->
|
||||
%
|
||||
% so if we see an ak/ct/sg token, we summon evil ben
|
||||
% carson to reconjoin the unconjoined twins
|
||||
to_so_tokens([ AkTok = #tk{type = AkCtSg, pos = Pos}
|
||||
to_so_tokens([ AkTok = #tk{shape = AkCtSg, pos = Pos}
|
||||
| Sheeit])
|
||||
when ak =:= AkCtSg;
|
||||
ct =:= AkCtSg;
|
||||
sg =:= AkCtSg ->
|
||||
{#tk{string = FinalAkStr}, NewSheeit}
|
||||
{#tk{str = FinalAkStr}, NewSheeit}
|
||||
= ken_barson_rises(AkTok, Sheeit),
|
||||
[{id, Pos, FinalAkStr}| to_so_tokens(NewSheeit)];
|
||||
% this part is just lists:filtermap
|
||||
@@ -259,9 +259,9 @@ to_so_tokens([]) ->
|
||||
% `_`**: `smr_plus` requires >=1 base58 char to
|
||||
% match; `ak_I`, `ak_0`, `ak__bar` all fall
|
||||
% through to `id` and both tokenizers agree.
|
||||
ken_barson_rises(AkTokAcc = #tk{string = AkStr},
|
||||
SrcTokens = [#tk{type = CandidateType,
|
||||
string = CandidateString}
|
||||
ken_barson_rises(AkTokAcc = #tk{str = AkStr},
|
||||
SrcTokens = [#tk{shape = CandidateType,
|
||||
str = CandidateString}
|
||||
| Rest]) ->
|
||||
% candidate:
|
||||
% dig out the token type and the string
|
||||
@@ -273,7 +273,7 @@ ken_barson_rises(AkTokAcc = #tk{string = AkStr},
|
||||
Smash ->
|
||||
% dig out the token from LcTokApi
|
||||
NewAkStr = AkStr ++ CandidateString,
|
||||
NewAkTokAcc = AkTokAcc#tk{string = NewAkStr},
|
||||
NewAkTokAcc = AkTokAcc#tk{str = NewAkStr},
|
||||
ken_barson_rises(NewAkTokAcc, Rest);
|
||||
Pass ->
|
||||
{AkTokAcc, SrcTokens}
|
||||
@@ -320,9 +320,9 @@ pass_types() ->
|
||||
% follow-on tokens
|
||||
% @end
|
||||
|
||||
to_so_token(#tk{type = SfTokenType,
|
||||
pos = Pos,
|
||||
string = SfTokenStr}) ->
|
||||
to_so_token(#tk{shape = SfTokenType,
|
||||
pos = Pos,
|
||||
str = SfTokenStr}) ->
|
||||
case SfTokenType of
|
||||
%-----------------
|
||||
% Ignored
|
||||
@@ -371,7 +371,7 @@ to_so_token(#tk{type = SfTokenType,
|
||||
NYI ->
|
||||
Msg = io_lib:format("gsc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]),
|
||||
error(#gsc_err{atom = nyi,
|
||||
string = Msg})
|
||||
str = Msg})
|
||||
end.
|
||||
|
||||
%% ak/ct/sg all tokenize to id
|
||||
@@ -393,7 +393,7 @@ so_parse_char([$' | Chars]) ->
|
||||
[Char] -> Char;
|
||||
_Bad ->
|
||||
error(#gsc_err{atom = bad_token,
|
||||
string = "Bad character literal: '" ++ Chars})
|
||||
str = "Bad character literal: '" ++ Chars})
|
||||
end.
|
||||
|
||||
so_parse_string([$" | Chars]) ->
|
||||
@@ -435,7 +435,7 @@ unescape(Delim, [$\\, Code | Chars], Acc) ->
|
||||
$t -> Ok($\t);
|
||||
$v -> Ok($\v);
|
||||
_ -> error(#gsc_err{atom = bad_escape_char,
|
||||
string = "Bad control sequence: \\" ++ [Code]}) %% TODO
|
||||
str = "Bad control sequence: \\" ++ [Code]}) %% TODO
|
||||
end;
|
||||
unescape(Delim, [C | Chars], Acc) ->
|
||||
unescape(Delim, Chars, [C | Acc]).
|
||||
|
||||
Reference in New Issue
Block a user