Compare commits

..

12 Commits

Author SHA1 Message Date
Peter Harpending f04b7311f5 stuff 2026-06-05 00:58:53 -07:00
pharpend 10424927b1 stuff 2026-06-04 14:01:46 -07:00
pharpend fdb40dcb92 stuff 2026-06-04 11:42:48 -07:00
Peter Harpending e180dc955d stuff 2026-06-03 19:28:55 -07:00
Peter Harpending 4e54bebeba parens work... moving on to documenting work 2026-06-03 15:17:55 -07:00
Peter Harpending 4f4adaa284 stopping point 2026-06-02 16:51:05 -07:00
Peter Harpending 2c36a02331 all the old tests pass... moving on 2026-06-02 12:48:41 -07:00
Peter Harpending 5cae022b8b Merge remote-tracking branch 'refs/remotes/origin/master' 2026-06-02 11:04:54 -07:00
Peter Harpending dfb158e593 unicode 2026-06-02 11:04:34 -07:00
pharpend ef69016294 stuff 2026-06-02 10:32:53 -07:00
pharpend 55a56753c1 more renaming 2026-06-02 01:58:40 -07:00
pharpend 270f192f0c more mass renaming 2026-06-02 01:48:05 -07:00
61 changed files with 2595 additions and 414 deletions
+15
View File
@@ -0,0 +1,15 @@
# TODO
- architecture needs more careful thought but only after something
works
- too fuzzy right now
- undo gs_ naming fuckery.. everything is `gsc_*`. it's just
needlessly confusing. for now let's name new things gsc_* and then
go back and undo the stupidity
# TONOTDO
- barf for outputs, slurp for inputs
- rename parser layers sequentially
# TODONE
+30
View File
@@ -0,0 +1,30 @@
# gsc = gajumaru sophia compiler
**This is _NOT_ the official Sophia compiler.** If you're looking for
that see https://git.qpq.swiss/QPQ-AG/sophia
This is an incomplete prototype rewrite of the legacy (official)
sophia compiler in straightforward Erlang. It grew out of my (Peter
Harpending) own efforts to document the language and its relationship
to FATE (the gajumaru virtual machine).
The goal for version 0.1 is to mirror the success behavior of the
legacy sophia compiler.
# Setup
```
git clone https://git.qpq.swiss/QPQ-AG/gsc.git
```
Add the following to `~/.bashrc` or wheremstever:
```
export PATH=$PATH:/path/to/gsc/bin
```
To test run
```
gsc --help
```
+199
View File
@@ -0,0 +1,199 @@
% ANSI screen drawing macros in erlang
%
% Author: Peter Harpending <peterharpending@qpq.swiss>
% Date: 2026-04-10
%
% Copyright (C) 2026, QPQ AG
% Not exhaustive, just what I need for the moment
% ref: https://gist.github.com/ConnerWill/d4b6c776b509add763e17f9f113fd25b
-define(ANSI_ESC, [27]).
-define(ANSI_CRLF, "\r\n").
-define(ANSI_FF, [12]).
-define(ANSI_CLEAR, [12]).
-define(ANSI_LINE(X), [X, ?ANSI_CRLF]).
% MARKDOWN TIER TEXT FORMATTING
% resets all formatting
-define(ANSI_RESET, [?ANSI_ESC, "[0m"]).
-define(ANSI_BOLD, [?ANSI_ESC, "[1m"]).
-define(ANSI_DIM, [?ANSI_ESC, "[2m"]).
-define(ANSI_ITALIC, [?ANSI_ESC, "[3m"]).
-define(ANSI_ULINE, [?ANSI_ESC, "[4m"]).
-define(ANSI_BLINK, [?ANSI_ESC, "[5m"]).
-define(ANSI_INVERT, [?ANSI_ESC, "[7m"]).
-define(ANSI_INVIS, [?ANSI_ESC, "[8m"]).
-define(ANSI_STRIKE, [?ANSI_ESC, "[9m"]).
% > Note: Both dim and bold modes are reset with the ESC[22m sequence. The
% > ESC[21m sequence is a non-specified sequence for double underline mode and
% > only work in some terminals and is reset with ESC[24m.
-define(ANSI_UNBOLD, [?ANSI_ESC, "[22m"]).
-define(ANSI_UNDIM, [?ANSI_ESC, "[22m"]).
-define(ANSI_UNITALIC, [?ANSI_ESC, "[23m"]).
-define(ANSI_UNULINE, [?ANSI_ESC, "[24m"]).
-define(ANSI_UNBLINK, [?ANSI_ESC, "[25m"]).
-define(ANSI_UNINVERT, [?ANSI_ESC, "[27m"]).
-define(ANSI_UNINVIS, [?ANSI_ESC, "[28m"]).
-define(ANSI_UNSTRIKE, [?ANSI_ESC, "[29m"]).
-define(ANSI_BOLD(X), [?ANSI_BOLD, X, ?ANSI_UNBOLD]).
-define(ANSI_DIM(X), [?ANSI_DIM, X, ?ANSI_UNDIM]).
-define(ANSI_ITALIC(X), [?ANSI_ITALIC, X, ?ANSI_UNITALIC]).
-define(ANSI_ULINE(X), [?ANSI_ULINE, X, ?ANSI_UNULINE]).
-define(ANSI_BLINK(X), [?ANSI_BLINK, X, ?ANSI_UNBLINK]).
-define(ANSI_INVERT(X), [?ANSI_INVERT, X, ?ANSI_UNINVERT]).
-define(ANSI_INVIS(X), [?ANSI_INVIS, X, ?ANSI_UNINVIS]).
-define(ANSI_STRIKE(X), [?ANSI_STRIKE, X, ?ANSI_UNSTRIKE]).
% COLORS
%
% COLOR SetFG SetBG
% -----------------------------
% Black 30 40
% Red 31 41
% Green 32 42
% Yellow 33 43
% Blue 34 44
% Magenta 35 45
% Cyan 36 46
% White 37 47
% Default 39 49
-define(ANSI_FG_RESET, [?ANSI_ESC, "[39m"]).
-define(ANSI_BG_RESET, [?ANSI_ESC, "[49m"]).
-define(ANSI_FG_BLACK, [?ANSI_ESC, "[30m"]).
-define(ANSI_FG_RED, [?ANSI_ESC, "[31m"]).
-define(ANSI_FG_GREEN, [?ANSI_ESC, "[32m"]).
-define(ANSI_FG_YELLOW, [?ANSI_ESC, "[33m"]).
-define(ANSI_FG_BLUE, [?ANSI_ESC, "[34m"]).
-define(ANSI_FG_MAGENTA, [?ANSI_ESC, "[35m"]).
-define(ANSI_FG_CYAN, [?ANSI_ESC, "[36m"]).
-define(ANSI_FG_WHITE, [?ANSI_ESC, "[37m"]).
-define(ANSI_BG_BLACK, [?ANSI_ESC, "[40m"]).
-define(ANSI_BG_RED, [?ANSI_ESC, "[41m"]).
-define(ANSI_BG_GREEN, [?ANSI_ESC, "[42m"]).
-define(ANSI_BG_YELLOW, [?ANSI_ESC, "[43m"]).
-define(ANSI_BG_BLUE, [?ANSI_ESC, "[44m"]).
-define(ANSI_BG_MAGENTA, [?ANSI_ESC, "[45m"]).
-define(ANSI_BG_CYAN, [?ANSI_ESC, "[46m"]).
-define(ANSI_BG_WHITE, [?ANSI_ESC, "[47m"]).
-define(ANSI_FG_BLACK(X), [?ANSI_FG_BLACK, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_RED(X), [?ANSI_FG_RED, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_GREEN(X), [?ANSI_FG_GREEN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_YELLOW(X), [?ANSI_FG_YELLOW, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BLUE(X), [?ANSI_FG_BLUE, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_MAGENTA(X), [?ANSI_FG_MAGENTA, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_CYAN(X), [?ANSI_FG_CYAN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_WHITE(X), [?ANSI_FG_WHITE, X, ?ANSI_FG_RESET]).
-define(ANSI_BG_BLACK(X), [?ANSI_BG_BLACK, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_RED(X), [?ANSI_BG_RED, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_GREEN(X), [?ANSI_BG_GREEN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_YELLOW(X), [?ANSI_BG_YELLOW, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BLUE(X), [?ANSI_BG_BLUE, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_MAGENTA(X), [?ANSI_BG_MAGENTA, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_CYAN(X), [?ANSI_BG_CYAN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_WHITE(X), [?ANSI_BG_WHITE, X, ?ANSI_BG_RESET]).
% bright colors
-define(ANSI_FG_BBLACK, [?ANSI_ESC, "[90m"]).
-define(ANSI_FG_BRED, [?ANSI_ESC, "[91m"]).
-define(ANSI_FG_BGREEN, [?ANSI_ESC, "[92m"]).
-define(ANSI_FG_BYELLOW, [?ANSI_ESC, "[93m"]).
-define(ANSI_FG_BBLUE, [?ANSI_ESC, "[94m"]).
-define(ANSI_FG_BMAGENTA, [?ANSI_ESC, "[95m"]).
-define(ANSI_FG_BCYAN, [?ANSI_ESC, "[96m"]).
-define(ANSI_FG_BWHITE, [?ANSI_ESC, "[97m"]).
-define(ANSI_BG_BBLACK, [?ANSI_ESC, "[100m"]).
-define(ANSI_BG_BRED, [?ANSI_ESC, "[101m"]).
-define(ANSI_BG_BGREEN, [?ANSI_ESC, "[102m"]).
-define(ANSI_BG_BYELLOW, [?ANSI_ESC, "[103m"]).
-define(ANSI_BG_BBLUE, [?ANSI_ESC, "[104m"]).
-define(ANSI_BG_BMAGENTA, [?ANSI_ESC, "[105m"]).
-define(ANSI_BG_BCYAN, [?ANSI_ESC, "[106m"]).
-define(ANSI_BG_BWHITE, [?ANSI_ESC, "[107m"]).
-define(ANSI_FG_BBLACK(X), [?ANSI_FG_BBLACK, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BRED(X), [?ANSI_FG_BRED, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BGREEN(X), [?ANSI_FG_BGREEN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BYELLOW(X), [?ANSI_FG_BYELLOW, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BBLUE(X), [?ANSI_FG_BBLUE, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BMAGENTA(X), [?ANSI_FG_BMAGENTA, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BCYAN(X), [?ANSI_FG_BCYAN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BWHITE(X), [?ANSI_FG_BWHITE, X, ?ANSI_FG_RESET]).
-define(ANSI_BG_BBLACK(X), [?ANSI_BG_BBLACK, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BRED(X), [?ANSI_BG_BRED, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BGREEN(X), [?ANSI_BG_BGREEN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BYELLOW(X), [?ANSI_BG_BYELLOW, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BBLUE(X), [?ANSI_BG_BBLUE, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BMAGENTA(X), [?ANSI_BG_BMAGENTA, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BCYAN(X), [?ANSI_BG_BCYAN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BWHITE(X), [?ANSI_BG_BWHITE, X, ?ANSI_BG_RESET]).
-define(ANSI_FG_RGB(R,G,B),
[?ANSI_ESC,
"[38;2;",
integer_to_list(R),";",
integer_to_list(G),";",
integer_to_list(B),"m"]
).
-define(ANSI_BG_RGB(R,G,B),
[?ANSI_ESC,
"[48;2;",
integer_to_list(R),";",
integer_to_list(G),";",
integer_to_list(B),"m"]
).
-define(ANSI_FG_RGB(R,G,B,Chars), [?ANSI_FG_RGB(R,G,B), Chars, ?ANSI_FG_RESET]).
-define(ANSI_BG_RGB(R,G,B,Chars), [?ANSI_BG_RGB(R,G,B), Chars, ?ANSI_BG_RESET]).
% cursor controls
-define(ANSI_CUR_HOME, [?ANSI_ESC, "[H"]).
-define(ANSI_CUR_XY(X, Y), [?ANSI_ESC, "[", integer_to_list(Y), ";", integer_to_list(X), "H"]).
-define(ANSI_CUR_UP(N), [?ANSI_ESC, "[", integer_to_list(N), "A"]).
-define(ANSI_CUR_DOWN(N), [?ANSI_ESC, "[", integer_to_list(N), "B"]).
-define(ANSI_CUR_RIGHT(N), [?ANSI_ESC, "[", integer_to_list(N), "C"]).
-define(ANSI_CUR_LEFT(N), [?ANSI_ESC, "[", integer_to_list(N), "D"]).
-define(ANSI_CUR_SAVE, [?ANSI_ESC, "7"]).
-define(ANSI_CUR_RESTORE, [?ANSI_ESC, "8"]).
-define(ANSI_CUR_QUERY, [?ANSI_ESC, "[6n"]).
-define(ANSI_CUR_UP, ?ANSI_CUR_UP(1)).
-define(ANSI_CUR_DOWN, ?ANSI_CUR_DOWN(1)).
-define(ANSI_CUR_RIGHT, ?ANSI_CUR_RIGHT(1)).
-define(ANSI_CUR_LEFT, ?ANSI_CUR_LEFT(1)).
% relative movement "forward" +X=right, +Y=down
-define(ANSI_CUR_VECT(X, Y),
if X =< 0, Y =< 0 -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_UP(-1*Y)];
X =< 0, 0 < Y -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_DOWN(Y)];
0 < X, Y =< 0 -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_UP(-1*Y)];
0 < X, 0 < Y -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_DOWN(Y)]
end
).
-define(ANSI_ALTBUF, [?ANSI_ESC, "[?1049h"]).
-define(ANSI_UNALTBUF, [?ANSI_ESC, "[?1049l"]).
-define(ANSI_CUR_INVIS, [?ANSI_ESC, "[?25l"]).
-define(ANSI_CUR_VIS, [?ANSI_ESC, "[?25h"]).
-define(ANSI_WRAP, [?ANSI_ESC, "[=7h"]).
-define(ANSI_NOWRAP, [?ANSI_ESC, "[=7l"]).
+256
View File
@@ -0,0 +1,256 @@
In Congress, July 4, 1776
The unanimous Declaration of the thirteen united States of America,
When in the Course of human events, it becomes necessary for one
people to dissolve the political bands which have connected them with
another, and to assume among the powers of the earth, the separate
and equal station to which the Laws of Nature and of Nature's God
entitle them, a decent respect to the opinions of mankind requires
that they should declare the causes which impel them to the
separation.
We hold these truths to be self-evident, that all men are created
equal, that they are endowed by their Creator with certain
unalienable Rights, that among these are Life, Liberty and the
pursuit of Happiness.--That to secure these rights, Governments are
instituted among Men, deriving their just powers from the consent of
the governed, --That whenever any Form of Government becomes
destructive of these ends, it is the Right of the People to alter or
to abolish it, and to institute new Government, laying its foundation
on such principles and organizing its powers in such form, as to them
shall seem most likely to effect their Safety and Happiness.
Prudence, indeed, will dictate that Governments long established
should not be changed for light and transient causes; and accordingly
all experience hath shewn, that mankind are more disposed to suffer,
while evils are sufferable, than to right themselves by abolishing
the forms to which they are accustomed. But when a long train of
abuses and usurpations, pursuing invariably the same Object evinces a
design to reduce them under absolute Despotism, it is their right, it
is their duty, to throw off such Government, and to provide new
Guards for their future security.--Such has been the patient
sufferance of these Colonies; and such is now the necessity which
constrains them to alter their former Systems of Government. The
history of the present King of Great Britain is a history of repeated
injuries and usurpations, all having in direct object the
establishment of an absolute Tyranny over these States. To prove
this, let Facts be submitted to a candid world.
He has refused his Assent to Laws, the most wholesome and
necessary for the public good.
He has forbidden his Governors to pass Laws of immediate and
pressing importance, unless suspended in their operation till his
Assent should be obtained; and when so suspended, he has utterly
neglected to attend to them.
He has refused to pass other Laws for the accommodation of large
districts of people, unless those people would relinquish the
right of Representation in the Legislature, a right inestimable
to them and formidable to tyrants only.
He has called together legislative bodies at places unusual,
uncomfortable, and distant from the depository of their public
Records, for the sole purpose of fatiguing them into compliance
with his measures.
He has dissolved Representative Houses repeatedly, for opposing
with manly firmness his invasions on the rights of the people.
He has refused for a long time, after such dissolutions, to cause
others to be elected; whereby the Legislative powers, incapable
of Annihilation, have returned to the People at large for their
exercise; the State remaining in the mean time exposed to all the
dangers of invasion from without, and convulsions within.
He has endeavoured to prevent the population of these States; for
that purpose obstructing the Laws for Naturalization of
Foreigners; refusing to pass others to encourage their migrations
hither, and raising the conditions of new Appropriations of
Lands.
He has obstructed the Administration of Justice, by refusing his
Assent to Laws for establishing Judiciary powers.
He has made Judges dependent on his Will alone, for the tenure of
their offices, and the amount and payment of their salaries.
He has erected a multitude of New Offices, and sent hither swarms
of Officers to harrass our people, and eat out their substance.
He has kept among us, in times of peace, Standing Armies without
the Consent of our legislatures.
He has affected to render the Military independent of and
superior to the Civil power.
He has combined with others to subject us to a jurisdiction
foreign to our constitution, and unacknowledged by our laws;
giving his Assent to their Acts of pretended Legislation:
For Quartering large bodies of armed troops among us:
For protecting them, by a mock Trial, from punishment for any
Murders which they should commit on the Inhabitants of these
States:
For cutting off our Trade with all parts of the world:
For imposing Taxes on us without our Consent:
For depriving us in many cases, of the benefits of Trial by Jury:
For transporting us beyond Seas to be tried for pretended
offences:
For abolishing the free System of English Laws in a neighbouring
Province, establishing therein an Arbitrary government, and
enlarging its Boundaries so as to render it at once an example
and fit instrument for introducing the same absolute rule into
these Colonies:
For taking away our Charters, abolishing our most valuable Laws,
and altering fundamentally the Forms of our Governments:
For suspending our own Legislatures, and declaring themselves
invested with power to legislate for us in all cases whatsoever.
He has abdicated Government here, by declaring us out of his
Protection and waging War against us.
He has plundered our seas, ravaged our Coasts, burnt our towns,
and destroyed the lives of our people.
He is at this time transporting large Armies of foreign
Mercenaries to compleat the works of death, desolation and
tyranny, already begun with circumstances of Cruelty & perfidy
scarcely paralleled in the most barbarous ages, and totally
unworthy the Head of a civilized nation.
He has constrained our fellow Citizens taken Captive on the high
Seas to bear Arms against their Country, to become the
executioners of their friends and Brethren, or to fall themselves
by their Hands.
He has excited domestic insurrections amongst us, and has
endeavoured to bring on the inhabitants of our frontiers, the
merciless Indian Savages, whose known rule of warfare, is an
undistinguished destruction of all ages, sexes and conditions.
In every stage of these Oppressions We have Petitioned for Redress in
the most humble terms: Our repeated Petitions have been answered only
by repeated injury. A Prince, whose character is thus marked by every
act which may define a Tyrant, is unfit to be the ruler of a free
people.
Nor have We been wanting in attentions to our Brittish brethren. We
have warned them from time to time of attempts by their legislature
to extend an unwarrantable jurisdiction over us. We have reminded
them of the circumstances of our emigration and settlement here. We
have appealed to their native justice and magnanimity, and we have
conjured them by the ties of our common kindred to disavow these
usurpations, which, would inevitably interrupt our connections and
correspondence. They too have been deaf to the voice of justice and
of consanguinity. We must, therefore, acquiesce in the necessity,
which denounces our Separation, and hold them, as we hold the rest of
mankind, Enemies in War, in Peace Friends.
We, therefore, the Representatives of the united States of America,
in General Congress, Assembled, appealing to the Supreme Judge of the
world for the rectitude of our intentions, do, in the Name, and by
Authority of the good People of these Colonies, solemnly publish and
declare, That these United Colonies are, and of Right ought to be
Free and Independent States; that they are Absolved from all
Allegiance to the British Crown, and that all political connection
between them and the State of Great Britain, is and ought to be
totally dissolved; and that as Free and Independent States, they have
full Power to levy War, conclude Peace, contract Alliances, establish
Commerce, and to do all other Acts and Things which Independent
States may of right do. And for the support of this Declaration, with
a firm reliance on the protection of divine Providence, we mutually
pledge to each other our Lives, our Fortunes and our sacred Honor.
Georgia
Button Gwinnett
Lyman Hall
George Walton
North Carolina
William Hooper
Joseph Hewes
John Penn
South Carolina
Edward Rutledge
Thomas Heyward, Jr.
Thomas Lynch, Jr.
Arthur Middleton
Massachusetts
John Hancock
Maryland
Samuel Chase
William Paca
Thomas Stone
Charles Carroll of Carrollton
Virginia
George Wythe
Richard Henry Lee
Thomas Jefferson
Benjamin Harrison
Thomas Nelson, Jr.
Francis Lightfoot Lee
Carter Braxton
Pennsylvania
Robert Morris
Benjamin Rush
Benjamin Franklin
John Morton
George Clymer
James Smith
George Taylor
James Wilson
George Ross
Delaware
Caesar Rodney
George Read
Thomas McKean
New York
William Floyd
Philip Livingston
Francis Lewis
Lewis Morris
New Jersey
Richard Stockton
John Witherspoon
Francis Hopkinson
John Hart
Abraham Clark
New Hampshire
Josiah Bartlett
William Whipple
Massachusetts
Samuel Adams
John Adams
Robert Treat Paine
Elbridge Gerry
Rhode Island
Stephen Hopkins
William Ellery
Connecticut
Roger Sherman
Samuel Huntington
William Williams
Oliver Wolcott
New Hampshire
Matthew Thornton
+44
View File
@@ -0,0 +1,44 @@
%%=====================================================
%% ARG PARSING
%%=====================================================
%%-----------------------------------------------------
%% TOKENIZING
%%-----------------------------------------------------
-record{ctk,
{shape = none :: none | '-' | '--' | str,
val = none :: none | [char()] | string() | string(),
str = none :: none | string()}).
-type ctk() :: #ctk{}.
-spec tokenize(Args) -> CliTokens when
Args :: [string()],
CliTokens :: [ctk()].
%% @private tokenize cli args
tokenize(Args) ->
[tokenize_arg(S) || S <- Args].
tokenize_arg(Str = "--" ++ Val) ->
#ctk{shape = '--',
val = Val,
str = Str};
tokenize_arg(Str = "-" ++ Val) ->
#ctk{shape = '-',
val = Val,
str = Str};
tokenize_arg(Str) ->
#ctk{shape = str,
val = none,
str = Str}.
%%-----------------------------------------------------
%% PARSING
%%-----------------------------------------------------
+77
View File
@@ -0,0 +1,77 @@
-spec s2t_file(Signal) -> AstFile when
Signal :: [tk()],
AstFile :: #ns{meta :: file, kids :: asf()}.
s2t_file([]) ->
error(empty_file);
s2t_file(S0 = [#tk{pos = {_, FileCol}} | _]) ->
Blk0 = s2t_gulp_block(FileCol, S0),
Blk1 = t2t_parse_tds_in_block(Blk0),
#ns{meta = file, kids = [Blk1]}.
-spec s2t_gulp_block(BlkCol, Signal) -> Block when
BlkCol :: pos_integer(),
Signal :: [tk()],
Block :: #ns{meta :: block}.
s2t_gulp_block(BCol, Tks) ->
% sanity check
InBlock = fun(#tk{pos = {_, TCol}}) -> BCol =< TCol end,
true = lists:all(InBlock, Tks),
BlockItems = s2f_block_items(BCol, Tks),
#ns{meta = block, kids = BlockItems}.
-spec s2f_block_items(BCol, Signal) -> BlkItems when
BCol :: pos_integer(),
Signal :: [tk()],
BlkItems :: [BlkItem],
BlkItem :: #ns{meta :: block_item,
kids :: asf()}.
s2f_block_items(BCol, Signal) ->
s2f_block_items(BCol, [], Signal).
s2f_block_items(_BCol, Stk, []) ->
lists:reverse(Stk);
s2f_block_items(BCol, Stk, [#tk{pos = {_, BCol}} = T0 | F0]) ->
{slurp, BlkItem, F1} = s2t_slurp_block_item(BCol, T0, F0),
s2f_block_items(BCol, [BlkItem | Stk], F1).
s2t_slurp_block_item(BCol, T0, F0) ->
{ItemTokens, F1} = s2s_sw_block_item(BCol, T0, F0),
Item = #ns{meta = block_item, kids = ItemTokens},
{slurp, Item, F1}.
% sw = splitwith; kind of take/drop
s2s_sw_block_item(BCol, T0, F0) ->
InItem = fun(#tk{pos = {_, TCol}}) -> BCol < TCol end,
{F0_II, F1} = lists:splitwith(InItem, F0),
{[T0 | F0_II], F1}.
-spec t2t_parse_tds_in_block(Block0) -> Block1 when
Block0 :: ast(),
Block1 :: ast().
% go through and convert the block_item nodes to top
% decls
t2t_parse_tds_in_block(B0 = #ns{meta = block, kids = F0}) ->
F1 = lists:map(fun t2t_parse_td_from_item/1, F0),
B0#ns{kids = F1}.
-spec t2t_parse_td_from_item(BlockItem) -> TopDecl when
BlockItem :: #ns{meta :: block_item},
TopDecl :: #ns{meta :: td_meta()}.
t2t_parse_td_from_item(#ns{meta = block_item, kids = Signal}) ->
s2t_top_decl(Signal).
-spec s2t_top_decl(Signal) -> TdTree when
Signal :: [tk()],
TdTree :: ast().
s2t_top_decl(S0) ->
+292
View File
@@ -0,0 +1,292 @@
# Syntax
## Lexical syntax
### Comments
Single line comments start with `//` and block comments are enclosed in `/*`
and `*/` and can be nested.
### Keywords
```
contract include let switch type record datatype if elif else function
stateful payable true false mod public entrypoint private indexed namespace
interface main using as for hiding
```
### Tokens
- `Id = [a-z_][A-Za-z0-9_']*` identifiers start with a lower case letter.
- `Con = [A-Z][A-Za-z0-9_']*` constructors start with an upper case letter.
- `QId = (Con\.)+Id` qualified identifiers (e.g. `Map.member`)
- `QCon = (Con\.)+Con` qualified constructor
- `TVar = 'Id` type variable (e.g `'a`, `'b`)
- `Int = [0-9]+(_[0-9]+)*|0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` integer literal with optional `_` separators
- `Bytes = #[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*` byte array literal with optional `_` separators
- `String` string literal enclosed in `"` with escape character `\`
- `Char` character literal enclosed in `'` with escape character `\`
- `AccountAddress` base58-encoded 32 byte account pubkey with `ak_` prefix
- `ContractAddress` base58-encoded 32 byte contract address with `ct_` prefix
- `Signature` base58-encoded 64 byte cryptographic signature with `sg_` prefix
Valid string escape codes are
| Escape | ASCII | |
|---------------|-------------|---|
| `\b` | 8 | |
| `\t` | 9 | |
| `\n` | 10 | |
| `\v` | 11 | |
| `\f` | 12 | |
| `\r` | 13 | |
| `\e` | 27 | |
| `\xHexDigits` | *HexDigits* | |
See the [identifier encoding scheme](https://git.qpq.swiss/QPQ-AG/protocol/src/branch/master/node/api/api_encoding.md) for the
details on the base58 literals.
## Layout blocks
Sophia uses Python-style layout rules to group declarations and statements. A
layout block with more than one element must start on a separate line and be
indented more than the currently enclosing layout block. Blocks with a single
element can be written on the same line as the previous token.
Each element of the block must share the same indentation and no part of an
element may be indented less than the indentation of the block. For instance
```sophia
contract Layout =
function foo() = 0 // no layout
function bar() = // layout block starts on next line
let x = foo() // indented more than 2 spaces
x
+ 1 // the '+' is indented more than the 'x'
```
## Notation
In describing the syntax below, we use the following conventions:
- Upper-case identifiers denote non-terminals (like `Expr`) or terminals with
some associated value (like `Id`).
- Keywords and symbols are enclosed in single quotes: `'let'` or `'='`.
- Choices are separated by vertical bars: `|`.
- Optional elements are enclosed in `[` square brackets `]`.
- `(` Parentheses `)` are used for grouping.
- Zero or more repetitions are denoted by a postfix `*`, and one or more
repetitions by a `+`.
- `Block(X)` denotes a layout block of `X`s.
- `Sep(X, S)` is short for `[X (S X)*]`, i.e. a possibly empty sequence of `X`s
separated by `S`s.
- `Sep1(X, S)` is short for `X (S X)*`, i.e. same as `Sep`, but must not be empty.
## Declarations
A Sophia file consists of a sequence of *declarations* in a layout block.
```c
File ::= Block(TopDecl)
TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
| 'contract' 'interface' Con [Implement] '=' Block(Decl)
| 'namespace' Con '=' Block(Decl)
| '@compiler' PragmaOp Version
| 'include' String
| Using
Implement ::= ':' Sep1(Con, ',')
Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
| 'record' Id ['(' TVar* ')'] '=' RecordType
| 'datatype' Id ['(' TVar* ')'] '=' DataType
| 'let' Id [':' Type] '=' Expr
| (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
| Using
FunDecl ::= Id ':' Type // Type signature
| Id Args [':' Type] '=' Block(Stmt) // Definition
| Id Args [':' Type] Block(GuardedDef) // Guarded definitions
GuardedDef ::= '|' Sep1(Expr, ',') '=' Block(Stmt)
Using ::= 'using' Con ['as' Con] [UsingParts]
UsingParts ::= 'for' '[' Sep1(Id, ',') ']'
| 'hiding' '[' Sep1(Id, ',') ']'
PragmaOp ::= '<' | '=<' | '==' | '>=' | '>'
Version ::= Sep1(Int, '.')
EModifier ::= 'payable' | 'stateful'
FModifier ::= 'stateful' | 'private'
Args ::= '(' Sep(Pattern, ',') ')'
```
Contract declarations must appear at the top-level.
For example,
```sophia
contract Test =
type t = int
entrypoint add (x : t, y : t) = x + y
```
There are three forms of type declarations: type aliases (declared with the
`type` keyword), record type definitions (`record`) and data type definitions
(`datatype`):
```c
TypeAlias ::= Type
RecordType ::= '{' Sep(FieldType, ',') '}'
DataType ::= Sep1(ConDecl, '|')
FieldType ::= Id ':' Type
ConDecl ::= Con ['(' Sep1(Type, ',') ')']
```
For example,
```sophia
record point('a) = {x : 'a, y : 'a}
datatype shape('a) = Circle(point('a), 'a) | Rect(point('a), point('a))
type int_shape = shape(int)
```
## Types
```c
Type ::= Domain '=>' Type // Function type
| Type '(' Sep(Type, ',') ')' // Type application
| '(' Type ')' // Parens
| 'unit' | Sep(Type, '*') // Tuples
| Id | QId | TVar
Domain ::= Type // Single argument
| '(' Sep(Type, ',') ')' // Multiple arguments
```
The function type arrow associates to the right.
Example,
```sophia
'a => list('a) => (int * list('a))
```
## Statements
Function bodies are blocks of *statements*, where a statement is one of the following
```c
Stmt ::= 'switch' '(' Expr ')' Block(Case)
| 'if' '(' Expr ')' Block(Stmt)
| 'elif' '(' Expr ')' Block(Stmt)
| 'else' Block(Stmt)
| 'let' LetDef
| Using
| Expr
LetDef ::= Id Args [':' Type] '=' Block(Stmt) // Function definition
| Pattern '=' Block(Stmt) // Value definition
Case ::= Pattern '=>' Block(Stmt)
| Pattern Block(GuardedCase)
GuardedCase ::= '|' Sep1(Expr, ',') '=>' Block(Stmt)
Pattern ::= Expr
```
`if` statements can be followed by zero or more `elif` statements and an optional final `else` statement. For example,
```sophia
let x : int = 4
switch(f(x))
None => 0
Some(y) =>
if(y > 10)
"too big"
elif(y < 3)
"too small"
else
"just right"
```
## Expressions
```c
Expr ::= '(' LamArgs ')' '=>' Block(Stmt) // Anonymous function (x) => x + 1
| '(' BinOp ')' // Operator lambda (+)
| 'if' '(' Expr ')' Expr 'else' Expr // If expression if(x < y) y else x
| Expr ':' Type // Type annotation 5 : int
| Expr BinOp Expr // Binary operator x + y
| UnOp Expr // Unary operator ! b
| Expr '(' Sep(Expr, ',') ')' // Application f(x, y)
| Expr '.' Id // Projection state.x
| Expr '[' Expr ']' // Map lookup map[key]
| Expr '{' Sep(FieldUpdate, ',') '}' // Record or map update r{ fld[key].x = y }
| '[' Sep(Expr, ',') ']' // List [1, 2, 3]
| '[' Expr '|' Sep(Generator, ',') ']'
// List comprehension [k | x <- [1], if (f(x)), let k = x+1]
| '[' Expr '..' Expr ']' // List range [1..n]
| '{' Sep(FieldUpdate, ',') '}' // Record or map value {x = 0, y = 1}, {[key] = val}
| '(' Expr ')' // Parens (1 + 2) * 3
| '(' Expr '=' Expr ')' // Assign pattern (y = x::_)
| Id | Con | QId | QCon // Identifiers x, None, Map.member, AELib.Token
| Int | Bytes | String | Char // Literals 123, 0xff, #00abc123, "foo", '%'
| AccountAddress | ContractAddress // Chain identifiers
| Signature // Signature
| '???' // Hole expression 1 + ???
Generator ::= Pattern '<-' Expr // Generator
| 'if' '(' Expr ')' // Guard
| LetDef // Definition
LamArgs ::= '(' Sep(LamArg, ',') ')'
LamArg ::= Id [':' Type]
FieldUpdate ::= Path '=' Expr
Path ::= Id // Record field
| '[' Expr ']' // Map key
| Path '.' Id // Nested record field
| Path '[' Expr ']' // Nested map key
BinOp ::= '||' | '&&' | '<' | '>' | '=<' | '>=' | '==' | '!='
| '::' | '++' | '+' | '-' | '*' | '/' | 'mod' | '^'
| 'band' | 'bor' | 'bxor' | '<<' | '>>' | '|>'
UnOp ::= '-' | '!' | 'bnot'
```
## Operators types
| Operators | Type
| --- | ---
| `-` `+` `*` `/` `mod` `^` | arithmetic operators
| `!` `&&` `\|\|` | logical operators
| `band` `bor` `bxor` `bnot` `<<` `>>` | bitwise operators
| `==` `!=` `<` `>` `=<` `>=` | comparison operators
| `::` `++` | list operators
| `\|>` | functional operators
## Operator precedence
In order of highest to lowest precedence.
| Operators | Associativity
| --- | ---
| `!` `bnot`| right
| `^` | left
| `*` `/` `mod` | left
| `-` (unary) | right
| `+` `-` | left
| `<<` `>>` | left
| `::` `++` | right
| `<` `>` `=<` `>=` `==` `!=` | none
| `band` | left
| `bxor` | left
| `bor` | left
| `&&` | right
| `\|\|` | right
| `\|>` | left
+67
View File
@@ -0,0 +1,67 @@
-spec mktree(Signal) -> Tree when
Signal :: gsc:signal(),
Tree :: gsc_ntree:ntree().
% @doc make into a tree
mktree(Sig) ->
Tree0 = gsc_ntree:nstem(vtokens, Sig),
Tree1 = rerootl_tkstr("=>", Tree0),
Tree2 = rerootl_tkstr("*", Tree1),
Tree2.
rerootl_tkstr(S, Tree0 = #ns{val = Root0}) ->
Kids0 = gsc_ntree:deleaf0(Tree0),
IsntS = fun(Tk) -> isnt_str(S, Tk) end,
case lists:splitwith(IsntS, Kids0) of
% found
% input:
% *s Root0
% |
% +-- .l Foo
% +-- .l "=>"
% +-- .l Bar
% output:
% *s "=>"
% |
% +-- *s Root0 -- .l Foo
% +-- *s Root0 -- .l Bar
{LHS1, [Tk0 | RHS1]} ->
Root1 = Root0,
LTree1 = gsc_ntree:releaf0(Root1, LHS1),
RTree1 = rerootl_tkstr(S, gsc_ntree:releaf0(Root1, RHS1)),
NewRoot0 = {op, Tk0},
NewKids0 = [LTree1, RTree1],
NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
NewTree;
% not found, nothing to do
{Kids0, []} ->
Tree0
end.
%reroot_mapsto(Tree0 = #ns{val = Root0}) ->
% Kids0 = gsc_ntree:deleaf0(Tree0),
% IsntMapsto = fun(DL) -> isnt_str("=>", Tk) end,
% case lists:splitwith(IsntMapsto, Kids0) of
% % found
% {LHS1, [Tk0 | RHS1]} ->
% Root1 = Root0,
% LTree1 = gsc_ntree:releaf0(Root1, LHS1),
% RTree1 = reroot_mapsto(gsc_ntree:releaf0(Root1, RHS1)),
% NewRoot0 = {op, Tk0},
% NewKids0 = [LTree1, RTree1],
% NewTree = gsc_ntree:releaf0(NewRoot0, NewKids0),
% NewTree;
% % nothing to do
% {Kids0, []} ->
% Tree0
% end.
isnt_str(X, Y) ->
not is_str(X, Y).
is_str(S, #tk{str = S}) -> true;
is_str(_, _) -> false.
+259 -8
View File
@@ -1,11 +1,5 @@
%%% @doc %%% @doc
%%% GSC CLI: gsc_cli %%% GSC CLI: explorer/harness for sfc iteration
%%%
%%% This module is currently named `gsc_cli', but you may want to change that.
%%% Remember that changing the name in `-module()' below requires renaming
%%% this file, and it is recommended to run `zx update .app` in the main
%%% project directory to make sure the ebin/gsc_cli.app file stays in
%%% sync with the project whenever you add, remove or rename a module.
%%% @end %%% @end
-module(gsc_cli). -module(gsc_cli).
@@ -14,12 +8,269 @@
-copyright("Peter Harpending <peterharpending@qpq.swiss>"). -copyright("Peter Harpending <peterharpending@qpq.swiss>").
-license("GPL-3.0-only"). -license("GPL-3.0-only").
-export([
tokens/1,
so_tokens/1,
gso_tokens/1
]).
-export([start/1]). -export([start/1]).
-include("$gsc_include/gsc.hrl").
-include("ansi.hrl").
do_help() ->
io:format("~ts", [help_screen()]).
help_screen() ->
["you can't help people who refuse to help themsleves\n"].
-spec start(ArgV) -> ok -spec start(ArgV) -> ok
when ArgV :: [string()]. when ArgV :: [string()].
start([]) ->
do_eshell(),
ok;
start(["shell"]) ->
do_eshell(),
ok;
start(["eshell"]) ->
do_eshell(),
ok;
start(ArgV) -> start(ArgV) ->
ok = io:format("Hello, World! Args: ~tp~n", [ArgV]), %io:format("ArgV: ~p~n", [ArgV]),
do(ArgV),
zx:silent_stop(). zx:silent_stop().
do(["list"]) ->
do_tlist();
do(["list", "tests"]) ->
do_tlist();
do(["test"]) ->
do_tests();
do(["test" | Tests]) ->
do_tests(Tests);
do(["tests"]) ->
do_tests();
do(["run", "tests"]) ->
do_tests();
do(["tokenizers_agree", Foo]) ->
io:format("~p~n", [tokenizers_agree(Foo)]);
% slowly phasing out shitty names like lctokens
% tokens = native sfc token representation
do(["tokens", Foo]) -> do_tokens(Foo);
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
do(["ctokens", Foo]) -> do_color_tokens(Foo);
do(["colour_tokens" | _]) -> do_doi();
% so_tokens = so_scan tokens
do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
do(["so_tokens", Foo]) -> do_so_tokens(Foo);
% gso_tokens = our mockery
do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo);
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
% print source file to screen with token boundaries highlighted
% script utility
do(["rmm", Foo]) ->
do_rmm(Foo);
do(Args) ->
io:format("bad args: ~p~n", [Args]),
do_help().
do_doi() ->
FP = zx:get_home() ++ "/priv/doi.txt",
page_file(FP).
% thank you chatgpt
% os:cmd didnt do nuffin because that's for running
% stuff in the background and capturing the output, not
% for taking over the screen
page_file(FilePath) ->
Less = os:find_executable("less"),
case Less of
false -> cat_file(FilePath);
_ -> less_file(Less, FilePath)
end.
cat_file(FilePath) ->
{ok, Bytes} = file:read_file(FilePath),
io:format("~ts", [Bytes]).
less_file(Less, FilePath) ->
Port = open_port({spawn_executable, Less},
[{args, [FilePath]},
nouse_stdio, exit_status]),
receive
{Port, {exit_status, 0}} ->
ok;
{Port, {exit_status, N}} ->
error({less_exit_status, N});
{'EXIT', Port, Reason} ->
error(Reason)
end.
do_tests() ->
io:format("TestModules = ~p~n", [test_mods()]),
do_runall_tests().
do_runall_tests() ->
lists:foreach(fun run_mod_main/1, test_mods()).
do_tests(List) ->
lists:foreach(fun run_test/1, List).
% n
run_test(TestName) ->
% we have two candidate atoms
C1 = list_to_atom(TestName),
C2 = list_to_atom("gsc_test_" ++ TestName),
KnownMods = test_mods(),
IsC1 = lists:member(C1, KnownMods),
IsC2 = lists:member(C2, KnownMods),
if
IsC1 -> rmm(C1);
IsC2 -> rmm(C2);
true -> error({no_such_test, TestName})
end.
rmm(X) -> run_mod_main(X).
% KnownTests = test_mods(),
% TestMods = ensure_all_known([], List, KnownTests),
% lists:foreach(fun run_mod_main/1, TestMods).
%ensure_all_known(Acc, [], _) ->
% lists:sort(Acc);
%ensure_all_known(Acc, [T | Ts], Knowns) ->
% case lists:member(T, Knowns) of
%
% end.
test_mods() ->
known_modules_with_prefix("gsc_test").
known_modules_with_prefix(Pfx) ->
ModsZipBeamsZipLoaded = code:all_available(),
kmp(Pfx, ModsZipBeamsZipLoaded, []).
kmp(_Pfx, [], Acc) ->
lists:sort(Acc);
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
case lists:prefix(Pfx, ModStr) of
false -> kmp(Pfx, Rest, Acc);
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
end.
run_mod_main(Mod) ->
io:format("========================================\n"
"~p:main()\n"
"========================================\n",
[Mod]),
try
Mod:main()
catch
Err:ErrType:Trace ->
io:format("~p: ~p~n", [Err, ErrType]),
io:format("Trace:~n~p~n", [Trace])
end.
do_tlist() ->
lists:foreach(
fun(ModName) ->
io:format("~s~n", [ModName])
end,
test_mods()
).
-spec do_eshell() -> ok.
% @doc start an erlang shell
do_eshell() ->
io:format("Welcome to the GSC shell!~n", []),
case shell:start_interactive() of
ok -> ok;
{error, already_started} -> ok;
{error, Reason} -> error(Reason)
end.
tokenizers_agree(File) ->
gso_tokens(File) =:= so_tokens(File).
do_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)].
do_so_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)].
do_gso_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)].
% rmm = run module:main() with our context loaded
% useful for prototyping
do_rmm(FilePath) ->
case compile:file(FilePath) of
{ok, Mod} -> Mod:main();
Error -> error(Error)
end.
so_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
{ok, Tokens} = so_scan:scan(FileStr),
Tokens.
gso_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
{ok, Tokens} = gso_scan:scan(FileStr),
Tokens.
tokens(FilePath) ->
{ok, Tokens} = gsc:tokens_from_file(FilePath),
Tokens.
do_color_tokens(File) ->
case gsc:tokens_from_file(File) of
{ok, Tokens} ->
ColorizedSrcStr = colorize_tokens(chunk_color_wheel(), Tokens, ""),
Full = [?ANSI_INVERT, ColorizedSrcStr, ?ANSI_UNINVERT],
io:format("~s", [Full]);
Error ->
io:format("~p~n", [Error])
end.
chunk_color_wheel() ->
%[yellow, blue].
[red, green, yellow, blue, magenta, cyan].
colorize_tokens(Wheel, [T | Ts], Acc) ->
{Color, NewWheel} = rotate(Wheel),
NewAcc = [Acc, colorize_token_str(Color, T)],
colorize_tokens(NewWheel, Ts, NewAcc);
colorize_tokens(_, [], Acc) ->
Acc.
rotate([A | Rest]) ->
{A, Rest ++ [A]}.
colorize_token_str(Color, #tk{str = Str}) ->
{Pfx, Sfx} = color_fixes(Color),
[Pfx, Str, Sfx].
color_fixes(red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
color_fixes(green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
color_fixes(yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
color_fixes(blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
color_fixes(cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
+273
View File
@@ -0,0 +1,273 @@
% @doc experiment centering around the file syntax node using ntree approach
-module(gsc_test_file).
-export([
main/0
]).
-include("$gsc_include/gsc.hrl").
-record(ct,
{payable = none :: none | false | {true, tk()},
main = none :: none | false | {true, tk()},
contract = none :: none | tk(),
con = none :: none | tk(),
impls = none :: none | [tk()],
eq = none :: none | tk()}).
-type meta() :: #ct{}.
-record(decl_type,
{type = none :: none | tk(),
id = none :: none | tk(),
params = none :: none | [tk()],
eq = none :: none | tk()}).
-type decl_meta() :: #decl_type{}.
-type ast_meta() :: file
| meta()
| decl_meta()
| nyi
| {nyi, any()}
.
-type target()
:: ct
| iface
| ns
| pragma
| include
| using
.
-type s2t_target()
:: file
| top_decl
| target()
| nyi
| {nyi, any()}
.
-type s2f_target()
:: {block_of, s2t_target()}
.
-type ast() :: ntree(ast_meta(), tk()).
-type asf() :: nforest(ast_meta(), tk()).
main() ->
HelloN = "hello.aes",
HelloP = ts_utils:ct_file_abspath(HelloN),
{ok, HelloS} = file:read_file(HelloP),
S0 = gsc:unsafe_signal_from_file(HelloP),
T1 = s2t(file, S0),
io:format("hello.aes:~n", []),
io:format("```~n", []),
io:format("~ts", [HelloS]),
io:format("```~n~n", []),
io:format("AST: ~tp~n", [T1]),
ok.
% // Hello World Contract
% // Copyright (c) 2025 QPQ AG
%
% contract Hello =
% type state = unit
% entrypoint init(): state =
% ()
%
% entrypoint hello(): string =
% "hello, world"
-spec s2t(ParseTarget, Signal) -> AST when
ParseTarget :: file,
Signal :: [tk()],
AST :: ast().
% File ::= Block(TopDecl)
s2t(file, Signal) ->
case Signal of
[] -> error(empty_file);
_ -> {ns, file, s2f({block_of, top_decl}, Signal)}
end;
% TopDecl ::= ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
% | ['payable'] 'contract' 'interface' Con [Implement] '=' Block(Decl)
% | 'namespace' Con '=' Block(Decl)
% | '@compiler' PragmaOp Version
% | 'include' String
% | Using
s2t(top_decl, Signal) ->
NewTarget =
case gsc_tokens:strings(3, Signal) of
["payable", "contract", "interface"] -> iface;
["contract", "interface" | _] -> iface;
["payable", "main", "contract"] -> ct;
["payable", "contract" | _] -> ct;
["contract" | _] -> ct;
["namespace" | _] -> namespace;
["@compiler" | _] -> pragma;
["include" | _] -> include;
["using" | _] -> using
end,
s2t(NewTarget, Signal);
% ['payable'] ['main'] 'contract' Con [Implement] '=' Block(Decl)
s2t(ct, S0) ->
{slurp, CtMeta, S1} = s2s_slurp_meta(#ct{}, S0),
{ns, CtMeta, s2f({block_of, decl}, S1)};
% Decl ::= 'type' Id ['(' TVar* ')'] '=' TypeAlias
% | 'record' Id ['(' TVar* ')'] '=' RecordType
% | 'datatype' Id ['(' TVar* ')'] '=' DataType
% | 'let' Id [':' Type] '=' Expr
% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
% | Using
s2t(decl, S0) ->
NewTarget =
case gsc_tokens:strings(3, S0) of
["type" | _] -> decl_type;
["record" | _] -> decl_record;
["datatype" | _] -> decl_datatype;
["let" | _] -> decl_let;
Pfx3 ->
IsEp = lists:member("entrypoint", Pfx3),
IsFn = lists:member("function", Pfx3),
if
IsEp -> decl_entrypoint;
IsFn -> decl_function;
true -> error({bad_decl, S0})
end
end,
s2t(NewTarget, S0);
% 'type' Id ['(' TVar* ')'] '=' TypeAlias
s2t(decl_type, S0) ->
{slurp, Meta, S1} = s2s_slurp_meta(#decl_type{}, S0),
{ns, Meta, s2t(type, S1)};
s2t(nyi, Signal) ->
{ns, nyi, Signal};
s2t(NYI = {nyi, _}, Signal) ->
{ns, NYI, Signal};
s2t(NYI, Signal) ->
{ns, {nyi, NYI}, Signal}.
-spec s2f(ForestTarget, Signal) -> Forest when
ForestTarget :: s2f_target(),
Signal :: [tk()],
Forest :: asf().
s2f({block_of, TreeTarget}, S0) ->
{gulp, Items} = gsc_signal:gulp_block_items(S0),
[s2t(TreeTarget, I) || I <- Items].
-spec s2s_slurp_meta(InitMeta, Signal) -> Result when
InitMeta :: Meta,
Signal :: [tk()],
Result :: {slurp, Meta, NewSignal},
Meta :: ast_meta(),
NewSignal :: Signal.
s2s_slurp_meta(M = #ct{}, S) ->
s2s_sm_ct(M, S);
s2s_slurp_meta(M = #decl_type{}, S) ->
s2s_sm_decl_type(M, S);
s2s_slurp_meta(M, S) ->
error({s2s_slurp_meta, M, S}).
s2s_sm_ct(Ct = #ct{payable = none}, S0) ->
case S0 of
[#tk{str = "payable"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{payable = {true, T0}}, S1);
_ ->
s2s_sm_ct(Ct#ct{payable = false}, S0)
end;
s2s_sm_ct(Ct = #ct{main = none}, S0) ->
case S0 of
[#tk{str = "main"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{main = {true, T0}}, S1);
_ ->
s2s_sm_ct(Ct#ct{main = false}, S0)
end;
s2s_sm_ct(Ct = #ct{contract = none}, S0) ->
case S0 of
[#tk{str = "contract"} = T0 | S1] ->
s2s_sm_ct(Ct#ct{contract = T0}, S1);
_ ->
error({no_kwd_contract, Ct, S0})
end;
s2s_sm_ct(Ct = #ct{con = none}, S0) ->
case S0 of
[#tk{shape = con} = T0 | S1] ->
s2s_sm_ct(Ct#ct{con = T0}, S1);
_ ->
error({no_contract_name, Ct, S0})
end;
s2s_sm_ct(Ct = #ct{impls = none}, S0) ->
case gsc_tokens:strings(1, S0) of
[":"] ->
{slurp, Impls, S1} = s2f_slurp_impls(S0),
s2s_sm_ct(Ct#ct{impls = Impls}, S1);
_ ->
s2s_sm_ct(Ct#ct{impls = []}, S0)
end;
s2s_sm_ct(Ct = #ct{eq = none}, S0) ->
case S0 of
[#tk{str = "="} = T0 | S1] ->
s2s_sm_ct(Ct#ct{eq = T0}, S1);
_ ->
error({no_equal_sign, Ct, S0})
end;
s2s_sm_ct(Ct, S0) ->
{slurp, Ct, S0}.
s2f_slurp_impls([#tk{str = ":"}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0], S0).
s2f_slurp_impls(Stk, [#tk{str = ","}, #tk{shape = con} = I0 | S0]) ->
s2f_slurp_impls([I0 | Stk], S0);
s2f_slurp_impls(Stk, S0) ->
{slurp, lists:reverse(Stk), S0}.
%-record(decl_type,
% {type = none :: none | tk(),
% id = none :: none | tk(),
% params = none :: none | [tk()],
% eq = none :: none | tk()}).
s2s_sm_decl_type(M = #decl_type{type = none}, S0) ->
case S0 of
[#tk{str = "type"} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{type = T0}, S1);
_ ->
error({no_kwd_type, S0})
end;
s2s_sm_decl_type(M = #decl_type{id = none}, S0) ->
case S0 of
[#tk{shape = id} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{id = T0}, S1);
_ ->
error({no_type_id, S0})
end;
s2s_sm_decl_type(M = #decl_type{params = none}, S0) ->
case S0 of
[#tk{str = "("} = T0 | _] ->
error({fixme, parens_bad});
_ ->
s2s_sm_decl_type(M#decl_type{params = []}, S0)
end;
s2s_sm_decl_type(M = #decl_type{eq = none}, S0) ->
case S0 of
[#tk{str = "="} = T0 | S1] ->
s2s_sm_decl_type(M#decl_type{eq = T0}, S1);
_ ->
error({no_equal_sign, S0})
end;
s2s_sm_decl_type(M, S0) ->
{slurp, M, S0}.
+144
View File
@@ -0,0 +1,144 @@
-module(gsc_test_ntree).
-export([
main/0
]).
-include("$gsc_include/gsc.hrl").
% just parsing type expressions right now, so only need
% to worry about round parens
%
% none is to indicate general-purpose grouping, for
% e.g. LHS/RHS of an op
-type syntax_meta()
:: {op, tk()}
| op_arg
| {parens, Open :: tk(), Close :: tk()}
.
-type ast() :: ntree(syntax_meta(), tk()).
-type asf() :: nforest(syntax_meta(), tk()).
-type asts() :: asf().
main() ->
x00(),
ok.
% x00 = example00
x00() ->
io:format("Example 00:~n", []),
io:format(" SrcStr = ~p~n", [x00_src()]),
io:format(" Tokens = ~p~n", [x00_tks()]),
io:format(" Signal = ~p~n", [x00_sgl()]),
io:format(" Forest = ~p~n", [x00_fst()]),
ok.
% sample type expr, tokens, signal
x00_src() -> "(foo => (bar) * baz)".
x00_tks() -> gsc:unsafe_tokens_from_string(x00_src()).
x00_sgl() -> gsc:filter_signal(x00_tks()).
x00_fst() -> parse(x00_sgl()).
-spec parse(Signal) -> ASF when
Signal :: [tk()],
ASF :: asf().
parse(Signal) ->
% key insight here is our signal is already a
% forest, assuming the leaf type is `tk()`.
%
% our parser is a sequence of forest-to-forest
% transformers.
%
% at the end we should end up with just one tree (i
% think)?
F0 = Signal,
F1 = f2f_parens(F0),
F2 = f2f_op("=>", F1),
F3 = f2f_op("*", F2),
Result = F2,
Result.
f2f_op(OpStr, Fst) ->
f2f_op(OpStr, [], Fst).
% never saw the op
f2f_op(_opstr, Stk, []) ->
lists:reverse(Stk);
% see op
f2f_op(OpStr, LhsStk, [#tk{str = OpStr} = OpTk | Rest]) ->
Lhf = lists:reverse(LhsStk),
Rhf = f2f_op(OpStr, Rest),
Lht = #ns{meta = op_arg, kids = Lhf},
Rht = #ns{meta = op_arg, kids = Rhf},
ResultT = #ns{meta = {op, OpTk},
kids = [Lht, Rht]},
ResultF = [ResultT],
ResultF;
% see stem, descend
f2f_op(OpStr, LhsStk, [Ns = #ns{kids = NsKids} | Rest]) ->
NewNsKids = f2f_op(OpStr, NsKids),
NewNs = Ns#ns{kids = NewNsKids},
NewStk = [NewNs | LhsStk],
f2f_op(OpStr, NewStk, Rest);
% see leaf, just add
f2f_op(OpStr, Stk, [L | Rest]) ->
f2f_op(OpStr, [L | Stk], Rest).
-spec f2f_parens(Forest) -> NewForest when
Forest :: asts(),
NewForest :: Forest.
% @doc
% recursive parens decomposition
%
% the input here is the flat list of tokens. here we
% basically replace the string of tokens between `(`
% and `)` with a single tree
%
% interesting quirk is that this doesn't error on too
% many close parens, only too many open parens
f2f_parens(Fst) ->
f2f_parens([], Fst).
% done
f2f_parens(Stk, []) ->
lists:reverse(Stk);
% crawl down the forest and scan for open parens
% open paren, we descend
f2f_parens(Stk, [#tk{str = "("} = TkOpen | Rest0]) ->
InitMeta = {parens, TkOpen, none},
{slurp, PStem, Rest1} = slurp_pstem(InitMeta, [], Rest0),
NewStk = [PStem | Stk],
f2f_parens(NewStk, Rest1);
% something else, we continue
f2f_parens(Stk, [Tree | Rest]) ->
f2f_parens([Tree | Stk], Rest).
% ran out of tokens before close paren
slurp_pstem({parens, TkOpen, none}, Stk, []) ->
error({no_close_for, TkOpen, Stk});
% hit close paren, we done
slurp_pstem({parens, TkOpen, none}, Stk, [TkClose = #tk{str = ")"} | Rest]) ->
FinalMeta = {parens, TkOpen, TkClose},
Midsection = lists:reverse(Stk),
FinalTree = #ns{meta = FinalMeta,
kids = Midsection},
{slurp, FinalTree, Rest};
% hit open paren, we recurse
slurp_pstem(AccMeta, Stk, [TkOpen_II = #tk{str = "("} | Rest0]) ->
InitMeta_II = {parens, TkOpen_II, none},
{slurp, PStem_II, Rest1} = slurp_pstem(InitMeta_II, [], Rest0),
NewStk = [PStem_II | Stk],
slurp_pstem(AccMeta, NewStk, Rest1);
% hit something else, we move along
slurp_pstem(AccMeta, Stk, [Tree | Rest]) ->
slurp_pstem(AccMeta, [Tree | Stk], Rest).
+165
View File
@@ -0,0 +1,165 @@
% gsc tokenizer tests
-module(gsc_test_tokens).
-export([
main/0, ct_dir/0
%tokens_match/1
]).
-include("$gsc_include/gsc.hrl").
-include_lib("eunit/include/eunit.hrl").
main() ->
%io:format("~p~n", [div_files()]),
%io:format("MAINNNNN!~n", []),
eunit:test(?MODULE, [verbose]).
%eunit:test(?MODULE).
% directory containing the tests for the tokenizer
ct_dir() ->
zx_daemon:get_home() ++ "/ct".
agreement_tests_dir() ->
ct_dir() ++ "/tokenizers_agree".
% the divergences claude found between gsc tokenizer and so tokenizer
%
% mostly stupid corner cases like a string crossing a line boundary
% or unterminated block comment
%
% divergence files: "divergence" means so_scan disagrees with
% gsc_so_scan in one of the following ways:
%
% - one succeeds when the other errors
% - disagree on success case
%
% making errors agree on two programs that work differently is a
% fool's errand
div_files() ->
ContractsDir = agreement_tests_dir(),
% this is the equivalent of ls
% just has filenames, no /path/to/ prefix
{ok, Files} = file:list_dir(ContractsDir),
% originally i was a retard and didn't read the eunit
% documentation, so if any one test failed, the entire test suite
% would crash with no information regarding what happened
%
% so this was a hack to only run div01-div05 but not div06:
%
% % hack to fix one broken test at a time
% (FileName = "div0" ++ [Digit | _]) when Digit =< $9 ->
% FilePath = ct_dir() ++ "/" ++ FileName,
% {true, {FileName, FilePath}}
% (_) -> false
%
% Once i read the eunit docs and learned about test generators, I
% realized I could have only the failed test chimp out. what a
% concept.
%
% i also realized that printing the full filepath was a waste, so
% instead the test should know about the FileName (foo.bar) and the
% FilePath (/path/to/foo.bar).
%
% then i decided to start writing my own test contracts instead
% of having claude do it and i rean into the issue of vim swap
% files not lexing properly because they're not unicode
IsDivCt =
fun(FileName) ->
% need to filter out vim swap files
% originally was false-matching on ([$. | _])
% like a man
%
% god this feels like putting my balls in a little tiny
% guillotine (even the guillotine is emasculating) but
% claude suggested this and i mean it's kind of the
% most idiomatic and like straightforward. most
% importantly it's declarative
%
% god i feel so defeated
case filename:extension(FileName) of
".aes" ->
FilePath = ContractsDir ++ "/" ++ FileName,
{true, {FileName, FilePath}};
_ ->
false
end
end,
lists:sort(lists:filtermap(IsDivCt, Files)).
%div_file_names() -> [N || {N, _} <- div_files()].
%div_file_paths() -> [P || {_, P} <- div_files()].
tokstr_concat_test_() ->
% future proofing
ConcatTestFiles
= lists:flatten([
div_files()
]),
% exclude the contracts with like unterminated block comments
% where they don't tokenize properly
NonStupidFiles =
lists:filter(
fun
({"div05_bcom_eof.aes", _}) -> false;
({"div06_bcom_in_expr.aes", _}) -> false;
({"div07_bcom_nested.aes", _}) -> false;
({"div08_bcom_simple.aes", _}) -> false;
({_, _}) -> true
end,
ConcatTestFiles
),
%?debugFmt("ConcatTestFiles=~p", [ConcatTestFiles]),
{"file = sum(tokens)",
[concat_property(Name, Path) || {Name, Path} <- NonStupidFiles]}.
concat_property(FileName, FilePath) ->
%?debugFmt("concat_property(~p, _)", [FileName]),
FileChars = gsc:very_stable_file(FilePath),
{FileName ++ ": file = sum(tokens)",
fun() ->
case gsc:tokens_from_file(FileChars) of
{ok, SfcTokens} ->
ConcatStr = concat_token_strs(SfcTokens, []),
?assertEqual(FileChars, ConcatStr);
_Error ->
ok
end
end}.
concat_token_strs([#tk{str = S} | Rest], Acc) ->
concat_token_strs(Rest, [Acc, S]);
concat_token_strs([], Acc) ->
unicode:characters_to_nfc_list(Acc).
% underscore marks this as a test *generator*
div_test_() ->
% divergence
DivFiles = div_files(),
%?debugFmt("DivFiles=~p", [DivFiles]),
{"claude tokenizer divergences fixed",
[tokens_match(N, P) || {N, P} <- DivFiles]}.
tokens_match(FileName, FilePath) ->
%?debugFmt("tokens_match(~p, _)", [FileName]),
% extracting data to be tested
% i hate this so much but lazy and this is test code so who really cares.
SoTokens = so_tokens_from_file(FilePath),
SfTokens = gsc:gso_tokens_from_file(FilePath),
{FileName ++ ": tokenizers_agree",
fun() ->
case {SoTokens, SfTokens} of
{{ok, So}, {ok, Sf}} -> ?assertEqual(So, Sf);
{{error, _}, {error, _}} -> ok;
{{ok, _}, {error, _}} -> error("so_scan succeeded and gso_scan failed");
{{error, _}, {ok, _}} -> error("so_scan failed and gso_scan succeded")
end
end}.
% that's right, we have to enter via converting the
% bytes in the file to a list... lol
so_tokens_from_file(F) ->
{ok, Bytes} = file:read_file(F),
S = binary_to_list(Bytes),
so_scan:scan(S).
+27
View File
@@ -0,0 +1,27 @@
% testing utilities
-module(ts_utils).
-export([
ct_dir/0,
ct_file/1, ct_file_abspath/1
]).
-spec ct_dir() -> string().
% directory containing the tests for the tokenizer
ct_dir() ->
zx_daemon:get_home() ++ "/ct".
ct_file_abspath(Name) ->
ct_file(Name).
-spec ct_file(Name) -> AbsPath when
Name :: string(),
AbsPath :: string().
% @doc
% ct_file("foo.aes") -> "/path/to/ct/foo.aes"
ct_file(Name) ->
ct_dir() ++ "/" ++ Name.
+2 -2
View File
@@ -2,11 +2,11 @@
{type,cli}. {type,cli}.
{modules,[]}. {modules,[]}.
{mod,"gsc_cli"}. {mod,"gsc_cli"}.
{prefix,none}.
{author,"Peter Harpending"}. {author,"Peter Harpending"}.
{prefix,none}.
{desc,"GSC CLI and test suite"}. {desc,"GSC CLI and test suite"}.
{package_id,{"otpr","gsc_cli",{0,1,0}}}. {package_id,{"otpr","gsc_cli",{0,1,0}}}.
{deps,[{"otpr","gsc",{0,1,0}}]}. {deps,[{"otpr","sophia",{9,0,0}},{"otpr","gsc",{0,1,0}}]}.
{key_name,none}. {key_name,none}.
{a_email,"peterharpending@qpq.swiss"}. {a_email,"peterharpending@qpq.swiss"}.
{c_email,"peterharpending@qpq.swiss"}. {c_email,"peterharpending@qpq.swiss"}.
+35 -3
View File
@@ -104,12 +104,12 @@
% specifically account for this error % specifically account for this error
-record(gsc_err_bcom_unterminated, -record(gsc_err_bcom_unterminated,
{prev_tokens :: [tk()], {prev_tokens :: [tk()],
break_pos :: gsc_pos(), break_pos :: tk_pos(),
rest :: string()}). rest :: string()}).
-record(gsc_err_no_tokmatch, -record(gsc_err_no_tokmatch,
{prev_tokens :: [tk()], {prev_tokens :: [tk()],
break_pos :: gsc_pos(), break_pos :: tk_pos(),
rest :: string()}). rest :: string()}).
@@ -133,7 +133,7 @@
% generic placeholder error for now % generic placeholder error for now
-record(gsc_err, -record(gsc_err,
{atom :: atom(), {atom :: atom(),
string = none :: none | iolist(), str = none :: none | iolist(),
extra = none :: none | any()}). extra = none :: none | any()}).
% @doc all errors GSC can return conveniently listed in % @doc all errors GSC can return conveniently listed in
@@ -143,3 +143,35 @@
| #gsc_err_nyi{} | #gsc_err_nyi{}
| #gsc_err_empty_file{} | #gsc_err_empty_file{}
| #gsc_err{}. | #gsc_err{}.
%----------------------------
% tree type for parsing
%----------------------------
% @doc stem record
-record(ns, {meta :: any(),
kids :: list(any())}).
% @doc `ntree(S, L)' is a "node tree" (meaning stems
% have values and children)
%
% for the purposes of the compiler, the key observation
% is that a flat list of tokens is already a forest
-type ntree(S, L) :: #ns{meta :: S, kids :: [ntree(S, L)]}
| L.
% @doc forest is just a list of trees
-type nforest(S, L) :: [ntree(S, L)].
% aliases
-type nt(S, L) :: ntree(S, L).
-type nf(S, L) :: nforest(S, L).
-type ntree() :: ntree(any(), any()).
-type nforest() :: [ntree()].
-type nt() :: ntree().
-type nf() :: nforest().
+1 -1
View File
@@ -158,7 +158,7 @@
% %
%-type parse_error_() :: any(). %-type parse_error_() :: any().
%-record(parse_error, %-record(parse_error,
% {pos = none :: none | gsc_pos(), % {pos = none :: none | tk_pos(),
% msg = "" :: string(), % msg = "" :: string(),
% subs = [] :: [parse_error_()], % subs = [] :: [parse_error_()],
% extra = none :: any()}). % extra = none :: any()}).
+216
View File
@@ -0,0 +1,216 @@
-module(gsc_parse_type_expr).
%
%-export_type([
%]).
%
%-export([
% unsafe_vtks_from_string/1,
% gulp_vtks/1,
% take_until_ifx_op/1
%]).
%
%-include("$gsc_include/gsc.hrl").
%
%
%%------------------------------------------------------
%% TYPES
%%------------------------------------------------------
%
%-type vtk_ifx_op() :: vtk_apply_to
% | {'vtk_*', tk()}
% | {'vtk_=>', tk()}.
%
%-type vtk() :: tk()
% | {vtk_plist, [tk()]}
% | vtk_ifx_op().
%
%
%-type gulped(X) :: {gulp, X}
% | {error, any()}.
%
%-type slurped(X) :: {slurp, X, Rest :: [tk()]}
% | {error, any()}.
%
%
%%------------------------------------------------------
%% FUNCTIONS
%%------------------------------------------------------
%
%
%-spec unsafe_vtks_from_string(SrcStr) -> Vtks when
% SrcStr :: string(),
% Vtks :: [vtk()].
%
%% @doc for testing
%unsafe_vtks_from_string(S) ->
% {ok, SigTks} = gsc_tokens:significant_tokens(S),
% {gulp, Vtks} = gulp_vtks(SigTks),
% Vtks.
%
%
%% operators in descending order of exteriority
%%
%% precedence verbiage confuses me
%%
%% 1 + 2 * 3 ^ 4
%%
%% (+ 1 (* 2 (^ 3 4)))
%%
%% precedence is thinking about the operators as like
%% having arms and legs and doing something. how much
%% power do they have to bind to their neighbors. it's
%% thinking of your operators as verbs in some state
%% machine.
%%
%% i don't like when i have to model the state machine
%% in my head when i'm reading code. everything should
%% just be there. exteriority and interiority are
%% properties of nouns, not properties of verbs.
%%
%% functional programming is all about offloading as
%% much of your reasoning into nouns as possible. verbs
%% change things. and change is bad.
%-type chunk_strategy()
% :: ast_parens % prefix paren (x y z)
% | '=>' % infixr
% | '*' % infix
% | 'apply' % postfix paren f (...)
% .
%
%
%-record(ast_parens,
% {open = none :: none | tk(),
% inner = none :: none | [tk()],
% close = none :: none | tk()}).
%
%chunk_by(Strategy, Tokens) ->
% chunk_by(Strategy, [], Tokens).
%
%
%-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when
% Strategy :: chunk_strategy(),
% Oks :: [any()],
% Errs :: [{error, Reason :: any()}],
% Tokens :: [tk()],
% Result :: {ok,
%
%gulp_chunks_by(_, Stk, [], []) ->
% {gulp, lists:reverse(Stk)};
%gulp_chunks_by(_, _, Errs, []) ->
% {error, {fixme, {?MODULE, ?LINE}, Errs}};
%gulp_chunks_by(plist, Stk, Errs, Tokens) ->
% case slurp_plist_rec(Tokens) of
% {slurp, Plist, NewTokens} ->
% gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens);
% barf ->
% [Token | NewTokens] = Tokens,
% gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens);
% Error ->
% gulp_chunks_by(plist, Stk, [Error | Errs], Tokens);
% end.
%
%slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
% case gsc_tokens:slurp_plist(Tokens) of
% {slurp, [], _} ->
% barf;
% {slurp, PTokens, NewTokens} ->
% PTokensInner = pt_inner(PTokens),
% end;
%
%%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when
%% Tokens :: [tk()],
%% IfxTree :: ifx_tree().
%%
%%-spec chunk_by(ChunkStrategy, Tokens) -> Result when
%% ChunkStrategy :: chunk_strategy(),
%% Tokens :: [tk()],
%% Result :: {ChunkStrategy,
%
%
%
%-spec gulp_vtks(Tokens) -> Result when
% Tokens :: [tk()],
% Result :: gulped(VirtualTokens),
% VirtualTokens :: [vtk()].
%
%% @private
%% for infix precedence/associativity resolution.
%%
%% better/original name (although misnomer) was
%% insert_virtual_tokens. we're using the gulp verbiage
%% because we're guarding against mistmatched delimiters
%% @end
%gulp_vtks(Tokens) ->
% gulp_vtks([], Tokens).
%
%gulp_vtks(Acc, Tks0) ->
% % scan until next "(" | "*" | "=>"
% case take_until_ifx_op(Tks0) of
% % no infix op remaining, return arg
% {_Pfx = Tks0,
% _Sfx = []} ->
% {gulp, lists:flatten([Acc, Tks0])};
% % application
% % "... foo(bar, baz) ..."
% % ~> [..., foo, {plist, "(bar, baz)"}, ...]
% {_Pfx = Tks1_BeforeOpen,
% _Sfx = Tks2_OpenNAfter
% = [#tk{string = "("} | _]} ->
% case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of
% {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
% NewAcc = [Acc,
% Tks1_BeforeOpen,
% vtk_apply_to,
% {vtk_plist, Tks2A_OpenToClose}],
% gulp_vtks(NewAcc, Tks2B_AfterClose);
% Error = {error, _} ->
% Error
% end;
% % product
% {_Pfx = Tks0_BeforeTimes,
% _Sfx = [ Tk1A_Times = #tk{string = "*"}
% | Tks1B_AfterTimes]} ->
% NewAcc = [Acc,
% Tks0_BeforeTimes,
% {'vtk_*', Tk1A_Times}],
% gulp_vtks(NewAcc, Tks1B_AfterTimes);
% % funType
% {_Pfx = Tks0_BeforeOp,
% _Sfx = [ Tk1A_Op = #tk{string = "=>"}
% | Tks1B_AfterOp]} ->
% NewAcc = [Acc,
% Tks0_BeforeOp,
% {'vtk_=>', Tk1A_Op}],
% gulp_vtks(NewAcc, Tks1B_AfterOp)
% end.
%
%
%
%-spec take_until_ifx_op(Tokens) -> Result when
% Tokens :: [tk()],
% Result :: {Taken, NewTokens},
% Taken :: Tokens,
% NewTokens :: Tokens.
%% @doc
%% consume tokens until one of ["(", "*", "=>"]
%
%take_until_ifx_op(Tks) ->
% take_until_ifx_op([], Tks).
%
%take_until_ifx_op(Stack, []) ->
% {lists:reverse(Stack), []};
%take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) ->
% TokStr = Token#tk.string,
% Continue =
% case TokStr of
% % exit cases
% "(" -> false;
% "*" -> false;
% "=>" -> false;
% _ -> true
% end,
% case Continue of
% true -> take_until_ifx_op([Token | Stack], NewTokens);
% false -> {lists:reverse(Stack), Tokens}
% end.
%
+2 -2
View File
@@ -54,13 +54,13 @@
% %
% %
% %
%-spec start_pos([gsc_token()]) -> {value, gsc_pos()} | none. %-spec start_pos([gsc_token()]) -> {value, tk_pos()} | none.
% %
%start_pos([#gsc_token{pos = P}]) -> {value, P}; %start_pos([#gsc_token{pos = P}]) -> {value, P};
%start_pos([]) -> none. %start_pos([]) -> none.
% %
% %
%-spec end_pos([gsc_token()]) -> {value, gsc_pos()} | none. %-spec end_pos([gsc_token()]) -> {value, tk_pos()} | none.
% %
%end_pos([#gsc_token{pos = Pos, string = Str}]) -> %end_pos([#gsc_token{pos = Pos, string = Str}]) ->
% {value, gsc_tokens:new_pos(Pos, Str)}; % {value, gsc_tokens:new_pos(Pos, Str)};
+52
View File
@@ -0,0 +1,52 @@
% @doc
% <pre>
% T R O N A L D D U M P
%
% .-""""""""""""-.
% .-' _..------.._ '-.
% .' .' GOLDEN NFC '. '.
% / / COMB-OVER MAP \ \
% ; ; .-^^^^^^^^^^-. ; ;
% | | / THEY'RE \ | |
% | | | NOT SENDING | | |
% | | | ASCII | | |
% ; ; \_.--. .--._./ ; ;
% \ \ (o)(o) / /
% '. '. __ .' .'
% '-._ '._==_.' _.-'
% '-._____.-'
% /|||\
% / ||| \
% / ||| \
% .-------' ||| '-------.
% / THE BEST NORMALIZER \
% / VERY STABLE CODEPOINTS \
% /_________________________________\
% </pre>
%
% When unicode sends its codepoints, they're not
% sending their best. They're not sending ASCII.
% They're not sending ASCII. They're sending integers
% that have lots of problems, and they're bringing
% those problems with us. They're bringing diacritics.
% They're bringing non-idempotent lowercasing. They're
% bringing graphemes that don't correspond bijectively
% with printable characters. They're bringing RTL.
% They're bringing invisible characters. They're
% bringing characters that draw outside the character
% boundary. They're bringing variable-width
% whitespace. They're bringing control characters.
% They're bringing emojis.
%
% And some, I assume, are good characters.
%
% `SrcStr' is a unicode NFC list, not an ordinary
% string. you think a string is a list of codepoints.
%
% NOOOOO.
%
% See it's different, because that's why.
%
% This is the cost of diversity, folks.
% @end
+126 -75
View File
@@ -1,57 +1,48 @@
% @doc bikeshed proctrastination head into vim warmup thing % @doc bikeshed proctrastination head into vim warmup
% thing
%
% sophia compiler from scratch by PRH % sophia compiler from scratch by PRH
% %
% based on original sophia compiler % based on original sophia compiler; target for version
% % 0.1 is to match behavior exactly
% parse layers:
% 1. gsc_tokenizer: SrcStr -> (Tokens | SigTokens)
%
% SigTokens = not comment/whitespace
%
% layers:
% a. gsc_strmatch : matches string shapes
% b. gsc_so_scan : converts to so_scan shapes
%
% 2. gsc_ast: SigTokens -> AST
%
% terminology:
%
% - `slurp`/`barf` borrowed from emacs paredit mode:
%
% slurp : (a b) c -> (a b c)
% barf : (a b c) -> a (b c)
%
% * `slurp` usually involves *transforming* input
% into a new type (e.g. slurp a token from src
% string); think of slurp as a verb meaning to
% consume and then digest
% * `barf` basically means blindly splitting off
% input
%
% @end % @end
% TODO:
% - barf for outputs, slurp for inputs
% - architecture needs more careful thought but only after something works
% - too fuzzy right now
% - possibly:
% - rename parser layers sequentially:
% - gsc_
-module(gsc). -module(gsc).
% token and tokens
-export_type([ -export_type([
token/0 token/0,
signal/0
]). ]).
% syntax tree/forest wrapper type
-export_type([
ntree/2, ntree/0,
nforest/2, nforest/0,
nt/2, nt/0,
nf/2, nf/0
]).
-export([ -export([
unsafe_tokens_from_file/1,
unsafe_tokens_from_string/1,
unsafe_signal_from_file/1,
unsafe_signal_from_string/1,
filter_signal/1,
signal_from_string/1,
signal_from_file/1,
sigtokens_from_file/1, sigtokens_from_file/1,
sigtokens_from_string/1, sigtokens_from_string/1,
tokens_from_file/1, tokens_from_file/1,
tokens_from_string/1, tokens_from_string/1,
ast_from_file/1, % sophia compatibility
ast_from_string/1, gso_tokens_from_file/1,
ast_from_tokens/1 gso_tokens_from_string/1,
% unicode normalization
very_stable_codepoints/1,
very_stable_string/1,
very_stable_file/1
]). ]).
-include("$gsc_include/gsc.hrl"). -include("$gsc_include/gsc.hrl").
@@ -62,10 +53,43 @@
-type token() :: tk(). -type token() :: tk().
% @doc signal means non-noise (whitespace/comment)
% tokens; legacy name still around is "sigtokens"
-type signal() :: [tk()].
%----------------------------------------- %-----------------------------------------
% functions % API: FUNCTIONS
%----------------------------------------- %-----------------------------------------
%-----------------------------------------
% aint nobody got time for case shit
%-----------------------------------------
% tokens
unsafe_tokens_from_file(F) ->
{ok, Tks} = tokens_from_file(F),
Tks.
unsafe_tokens_from_string(S) ->
{ok, Tks} = tokens_from_string(S),
Tks.
% signal
unsafe_signal_from_file(F) ->
{ok, Tks} = signal_from_file(F),
Tks.
unsafe_signal_from_string(S) ->
{ok, Tks} = signal_from_string(S),
Tks.
%
filter_signal(X) -> gsc_tokens:filter_significant(X).
signal_from_file(X) -> sigtokens_from_file(X).
signal_from_string(X) -> sigtokens_from_string(X).
% @doc legacy name for signal
sigtokens_from_file(X) -> sigtokens_from_file(X) ->
case tokens_from_file(X) of case tokens_from_file(X) of
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)}; {ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
@@ -93,7 +117,6 @@ tokens_from_file(FilePath) ->
-spec tokens_from_string(SrcStr) -> Result -spec tokens_from_string(SrcStr) -> Result
when SrcStr :: string(), when SrcStr :: string(),
Result :: {ok, Tokens} Result :: {ok, Tokens}
@@ -105,40 +128,68 @@ tokens_from_string(SrcStr) ->
-spec ast_from_file(FilePath) -> Perhaps -spec gso_tokens_from_file(FilePath) -> Result when
when FilePath :: string(), FilePath :: string(),
Perhaps :: {ok, AST} | {error, gsc_err()}, Result :: {ok, GsoTks} | {error, Reason},
AST :: gsc_ast(). GsoTks :: [gso_scan:so_token()],
Reason :: gsc_err() | any().
ast_from_file(FilePath) -> gso_tokens_from_file(FilePath) ->
case file:read_file(FilePath) of case file:read_file(FilePath) of
{ok, FileBytes} -> ast_from_string(FileBytes); {ok, Bytes} -> gso_tokens_from_string(Bytes);
Error -> Error
end.
-spec ast_from_string(SrcStr) -> Perhaps
when SrcStr :: string(),
Perhaps :: {ok, AST} | {error, gsc_err()},
AST :: gsc_ast().
ast_from_string(SrcStr) ->
case gsc_tokens:significant_tokens(SrcStr) of
{ok, SigTks} -> ast_from_tokens(SigTks);
Error -> Error
end.
-spec ast_from_tokens(SrcTokens) -> Perhaps
when SrcTokens :: [tk()],
Perhaps :: {ok, AST} | {error, gsc_err()},
AST :: gsc_ast().
ast_from_tokens(Tks) ->
SigTks = gsc_tokens:filter_significant(Tks),
case gsc_ast:gulp_file(SigTks) of
{gulp, AST} -> {ok, AST};
Error -> Error Error -> Error
end. end.
-spec gso_tokens_from_string(Str) -> Result when
Str :: iolist(),
Result :: {ok, GsoTks} | {error, Reason},
GsoTks :: [gso_scan:so_token()],
Reason :: gsc_err() | any().
gso_tokens_from_string(Evil) ->
Str = gsc_tokens:very_stable_codepoints(Evil),
gso_scan:scan(Str).
-spec very_stable_codepoints(String) -> Normalized when
String :: iolist(),
Normalized :: string().
%% @doc normalize string to utf8 NFC list form
very_stable_codepoints(X) ->
gsc_tokens:very_stable_codepoints(X).
-spec very_stable_string(String) -> Normalized when
String :: iolist(),
Normalized :: string().
%% @doc alias for `very_stable_codepoints/1'
very_stable_string(X) ->
gsc_tokens:very_stable_codepoints(X).
-spec very_stable_file(FilePath) -> Contents when
FilePath :: string(),
Contents :: string().
%% @doc Read file, return contents as
%% `unicode:characters_to_nfc_list/1' list.
%%
%% Please note that this function is NOT in fact very
%% stable, as it throws an error if there's some error
%% reading the file (e.g. not found).
%%
%% this function exists mostly for scripting/shell
%% convenience
very_stable_file(X) ->
case file:read_file(X) of
{ok, B} -> very_stable_codepoints(B);
Error -> error(Error)
end.
+55
View File
@@ -0,0 +1,55 @@
-module(gsc_ntree).
-export([
nstem/2, meta/1, kids/1,
flatten_tree/1, flatten_forest/1
]).
-include("$gsc_include/gsc.hrl").
%%=====================================================
%% API: functions
%%=====================================================
-spec nstem(Root, Forest) -> Tree when
Root :: S,
Forest :: nforest(S, L),
Tree :: ntree(S, L),
S :: any(),
L :: any().
nstem(Root, List) ->
{ns, Root, List}.
meta(#ns{meta = M}) -> M.
kids(#ns{kids = K}) -> K.
-spec flatten_tree(Tree) -> Leafs when
Tree :: ntree(_, L),
Leafs :: [L],
L :: any().
flatten_tree(T) ->
lists:flatten(ft(T)).
-spec flatten_forest(Forest) -> Leafs when
Forest :: nforest(_, L),
Leafs :: [L],
L :: any().
flatten_forest(F) ->
lists:flatten(ff(F)).
ft(#ns{kids = F}) -> ff(F);
ft(Leaf) -> [Leaf].
ff(F) ->
[ft(T) || T <- F].
-216
View File
@@ -1,216 +0,0 @@
-module(gsc_parse_type_expr).
-export_type([
]).
-export([
unsafe_vtks_from_string/1,
gulp_vtks/1,
take_until_ifx_op/1
]).
-include("$gsc_include/gsc.hrl").
%------------------------------------------------------
% TYPES
%------------------------------------------------------
-type vtk_ifx_op() :: vtk_apply_to
| {'vtk_*', tk()}
| {'vtk_=>', tk()}.
-type vtk() :: tk()
| {vtk_plist, [tk()]}
| vtk_ifx_op().
-type gulped(X) :: {gulp, X}
| {error, any()}.
-type slurped(X) :: {slurp, X, Rest :: [tk()]}
| {error, any()}.
%------------------------------------------------------
% FUNCTIONS
%------------------------------------------------------
-spec unsafe_vtks_from_string(SrcStr) -> Vtks when
SrcStr :: string(),
Vtks :: [vtk()].
% @doc for testing
unsafe_vtks_from_string(S) ->
{ok, SigTks} = gsc_tokens:significant_tokens(S),
{gulp, Vtks} = gulp_vtks(SigTks),
Vtks.
% operators in descending order of exteriority
%
% precedence verbiage confuses me
%
% 1 + 2 * 3 ^ 4
%
% (+ 1 (* 2 (^ 3 4)))
%
% precedence is thinking about the operators as like
% having arms and legs and doing something. how much
% power do they have to bind to their neighbors. it's
% thinking of your operators as verbs in some state
% machine.
%
% i don't like when i have to model the state machine
% in my head when i'm reading code. everything should
% just be there. exteriority and interiority are
% properties of nouns, not properties of verbs.
%
% functional programming is all about offloading as
% much of your reasoning into nouns as possible. verbs
% change things. and change is bad.
-type chunk_strategy()
:: ast_parens % prefix paren (x y z)
| '=>' % infixr
| '*' % infix
| 'apply' % postfix paren f (...)
.
-record(ast_parens,
{open = none :: none | tk(),
inner = none :: none | [tk()],
close = none :: none | tk()}).
chunk_by(Strategy, Tokens) ->
chunk_by(Strategy, [], Tokens).
-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when
Strategy :: chunk_strategy(),
Oks :: [any()],
Errs :: [{error, Reason :: any()}],
Tokens :: [tk()],
Result :: {ok,
gulp_chunks_by(_, Stk, [], []) ->
{gulp, lists:reverse(Stk)};
gulp_chunks_by(_, _, Errs, []) ->
{error, {fixme, {?MODULE, ?LINE}, Errs}};
gulp_chunks_by(plist, Stk, Errs, Tokens) ->
case slurp_plist_rec(Tokens) of
{slurp, Plist, NewTokens} ->
gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens);
barf ->
[Token | NewTokens] = Tokens,
gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens);
Error ->
gulp_chunks_by(plist, Stk, [Error | Errs], Tokens);
end.
slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
case gsc_tokens:slurp_plist(Tokens) of
{slurp, [], _} ->
barf;
{slurp, PTokens, NewTokens} ->
PTokensInner = pt_inner(PTokens),
end;
%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when
% Tokens :: [tk()],
% IfxTree :: ifx_tree().
%
%-spec chunk_by(ChunkStrategy, Tokens) -> Result when
% ChunkStrategy :: chunk_strategy(),
% Tokens :: [tk()],
% Result :: {ChunkStrategy,
-spec gulp_vtks(Tokens) -> Result when
Tokens :: [tk()],
Result :: gulped(VirtualTokens),
VirtualTokens :: [vtk()].
% @private
% for infix precedence/associativity resolution.
%
% better/original name (although misnomer) was
% insert_virtual_tokens. we're using the gulp verbiage
% because we're guarding against mistmatched delimiters
% @end
gulp_vtks(Tokens) ->
gulp_vtks([], Tokens).
gulp_vtks(Acc, Tks0) ->
% scan until next "(" | "*" | "=>"
case take_until_ifx_op(Tks0) of
% no infix op remaining, return arg
{_Pfx = Tks0,
_Sfx = []} ->
{gulp, lists:flatten([Acc, Tks0])};
% application
% "... foo(bar, baz) ..."
% ~> [..., foo, {plist, "(bar, baz)"}, ...]
{_Pfx = Tks1_BeforeOpen,
_Sfx = Tks2_OpenNAfter
= [#tk{string = "("} | _]} ->
case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of
{slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
NewAcc = [Acc,
Tks1_BeforeOpen,
vtk_apply_to,
{vtk_plist, Tks2A_OpenToClose}],
gulp_vtks(NewAcc, Tks2B_AfterClose);
Error = {error, _} ->
Error
end;
% product
{_Pfx = Tks0_BeforeTimes,
_Sfx = [ Tk1A_Times = #tk{string = "*"}
| Tks1B_AfterTimes]} ->
NewAcc = [Acc,
Tks0_BeforeTimes,
{'vtk_*', Tk1A_Times}],
gulp_vtks(NewAcc, Tks1B_AfterTimes);
% funType
{_Pfx = Tks0_BeforeOp,
_Sfx = [ Tk1A_Op = #tk{string = "=>"}
| Tks1B_AfterOp]} ->
NewAcc = [Acc,
Tks0_BeforeOp,
{'vtk_=>', Tk1A_Op}],
gulp_vtks(NewAcc, Tks1B_AfterOp)
end.
-spec take_until_ifx_op(Tokens) -> Result when
Tokens :: [tk()],
Result :: {Taken, NewTokens},
Taken :: Tokens,
NewTokens :: Tokens.
% @doc
% consume tokens until one of ["(", "*", "=>"]
take_until_ifx_op(Tks) ->
take_until_ifx_op([], Tks).
take_until_ifx_op(Stack, []) ->
{lists:reverse(Stack), []};
take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) ->
TokStr = Token#tk.string,
Continue =
case TokStr of
% exit cases
"(" -> false;
"*" -> false;
"=>" -> false;
_ -> true
end,
case Continue of
true -> take_until_ifx_op([Token | Stack], NewTokens);
false -> {lists:reverse(Stack), Tokens}
end.
+111
View File
@@ -0,0 +1,111 @@
% signal = non-noisy tokens
-module(gsc_signal).
-export([
from_tokens/1,
is_block/1,
gulp_block_items/1,
block_to_items/1,
take_block_item/1
]).
-include("$gsc_include/gsc.hrl").
-spec from_tokens(Tokens) -> Signal when
Tokens :: [tk()],
Signal :: [tk()].
% @doc filter out comments/whitespace
from_tokens(Tokens) ->
gsc_tokens:filter_significant(Tokens).
-spec is_block(Signal) -> Result when
Signal :: [tk()],
Result :: boolean().
is_block([]) ->
true;
is_block([#tk{pos = {_, BCol}} | Rest]) ->
InBlock =
fun(#tk{pos = {_, TCol}}) ->
BCol =< TCol
end,
lists:all(InBlock, Rest).
-spec gulp_block_items(Signal) -> Result when
Signal :: [tk()],
Result :: {slurp, Items, NewSignal}
| {error, any()},
Items :: [Signal],
NewSignal :: Signal.
gulp_block_items(S) ->
case is_block(S) of
true -> {gulp, block_to_items(S)};
false -> find_badness(S)
end.
find_badness([#tk{pos = {_, StartCol}} = StartTk | Rest]) ->
find_badness(StartCol, StartTk, Rest).
find_badness(StartCol, StartTk, [#tk{pos = {_, TkCol}} = Tk | Rest]) ->
Bad = TkCol < StartCol,
case Bad of
false -> find_badness(StartCol, StartTk, Rest);
true -> {error, {bad_block, [{start_col, StartCol},
{end_col, TkCol},
{start_tk, StartTk},
{end_tk, Tk}]}}
end.
-spec block_to_items(Signal) -> BlockItems when
Signal :: [tk()],
BlockItems :: [Signal].
% @doc
% naive algorithm, so doesn't ensure all block items
% are same indent level
%
% Input:
% foo = ...
% bar = ...
% baz = ...
%
% Output:
% [foo = ...,
% bar = ...,
% baz = ...]
block_to_items([]) ->
[];
block_to_items(S) ->
b2is([], S).
b2is(Acc, []) ->
lists:reverse(Acc);
b2is(Acc, S) ->
{Item, S1} = take_block_item(S),
b2is([Item | Acc], S1).
-spec take_block_item(Signal) -> Result when
Signal :: [tk()],
Result :: {Item, NewSignal},
Item :: Signal,
NewSignal :: Signal.
take_block_item([]) ->
{[], []};
take_block_item([#tk{pos = {_, ICol}} = T0 | S0]) ->
InItem =
fun(#tk{pos = {_, TCol}}) ->
ICol < TCol
end,
{S0_II, S1} = lists:splitwith(InItem, S0),
{[T0 | S0_II], S1}.
+132 -92
View File
@@ -20,7 +20,7 @@
% meta % meta
-export([ -export([
token_types_parse_order/0, token_shapes_parse_order/0,
kwds/0 kwds/0
]). ]).
@@ -39,10 +39,14 @@
is_significant/1, is_significant/1,
filter_significant/1, filter_significant/1,
significant_tokens/1, significant_tokens/1,
very_stable_codepoints/1,
very_stable_string/1,
very_stable_characters/1,
tokens_from_iolist/1,
tokens/1, tokens/1,
slurp_token/2, slurp_token/2,
slurp_token_types/3, slurp_token_shapes/3,
slurp_token_of_type/3, slurp_token_of_shape/3,
new_pos/2 new_pos/2
]). ]).
@@ -59,7 +63,7 @@
AtMostNStrings :: [string()]. AtMostNStrings :: [string()].
% @doc return the strings of the first N tokens % @doc return the strings of the first N tokens
strings(N, [#tk{string = S} | Rest]) when is_integer(N), N >= 1 -> strings(N, [#tk{str = S} | Rest]) when is_integer(N), N >= 1 ->
[S | strings(N-1, Rest)]; [S | strings(N-1, Rest)];
strings(_, []) -> strings(_, []) ->
[]; [];
@@ -159,7 +163,7 @@ take_block_item([]) ->
% counterintuitive to end-users (who are programmers, entirely % counterintuitive to end-users (who are programmers, entirely
% unfamiliar with notions like stacks and open/close delimiters) % unfamiliar with notions like stacks and open/close delimiters)
slurp_plist([Hd = #tk{string = "("} | Tl]) -> slurp_plist([Hd = #tk{str = "("} | Tl]) ->
slurp_dlist([Hd], [Hd], Tl); slurp_dlist([Hd], [Hd], Tl);
slurp_plist(Tks) -> slurp_plist(Tks) ->
{slurp, [], Tks}. {slurp, [], Tks}.
@@ -170,30 +174,30 @@ slurp_dlist(All, [], NewTokens) ->
{slurp, lists:reverse(All), NewTokens}; {slurp, lists:reverse(All), NewTokens};
% WMA stack is nonempty % WMA stack is nonempty
% happy cases of opens getting popped % happy cases of opens getting popped
slurp_dlist(All, [#tk{string = "("} | NewOpen], slurp_dlist(All, [#tk{str = "("} | NewOpen],
[#tk{string = ")"} = Tk | NewTks]) -> [#tk{str = ")"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], NewOpen, NewTks); slurp_dlist([Tk | All], NewOpen, NewTks);
slurp_dlist(All, [#tk{string = "["} | NewOpen], slurp_dlist(All, [#tk{str = "["} | NewOpen],
[#tk{string = "]"} = Tk | NewTks]) -> [#tk{str = "]"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], NewOpen, NewTks); slurp_dlist([Tk | All], NewOpen, NewTks);
slurp_dlist(All, [#tk{string = "{"} | NewOpen], slurp_dlist(All, [#tk{str = "{"} | NewOpen],
[#tk{string = "}"} = Tk | NewTks]) -> [#tk{str = "}"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], NewOpen, NewTks); slurp_dlist([Tk | All], NewOpen, NewTks);
% happy: open delimiters getting pushed % happy: open delimiters getting pushed
slurp_dlist(All, Opens, [#tk{string = "("} = Tk | NewTks]) -> slurp_dlist(All, Opens, [#tk{str = "("} = Tk | NewTks]) ->
slurp_dlist([Tk | All], [Tk | Opens], NewTks); slurp_dlist([Tk | All], [Tk | Opens], NewTks);
slurp_dlist(All, Opens, [#tk{string = "["} = Tk | NewTks]) -> slurp_dlist(All, Opens, [#tk{str = "["} = Tk | NewTks]) ->
slurp_dlist([Tk | All], [Tk | Opens], NewTks); slurp_dlist([Tk | All], [Tk | Opens], NewTks);
slurp_dlist(All, Opens, [#tk{string = "{"} = Tk | NewTks]) -> slurp_dlist(All, Opens, [#tk{str = "{"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], [Tk | Opens], NewTks); slurp_dlist([Tk | All], [Tk | Opens], NewTks);
% sad: mismatch cases % sad: mismatch cases
slurp_dlist(All, Opens, []) -> slurp_dlist(_, Opens, []) ->
{error, {fixme, mismatch, Opens, none}}; {error, {fixme, mismatch, Opens, none}};
slurp_dlist(All, Opens, [#tk{string = "}"} = BadClose | _]) -> slurp_dlist(_, Opens, [#tk{str = "}"} = BadClose | _]) ->
{error, {fixme, mismatch, Opens, {value, BadClose}}}; {error, {fixme, mismatch, Opens, {value, BadClose}}};
slurp_dlist(All, Opens, [#tk{string = "]"} = BadClose | _]) -> slurp_dlist(_, Opens, [#tk{str = "]"} = BadClose | _]) ->
{error, {fixme, mismatch, Opens, {value, BadClose}}}; {error, {fixme, mismatch, Opens, {value, BadClose}}};
slurp_dlist(All, Opens, [#tk{string = ")"} = BadClose | _]) -> slurp_dlist(_, Opens, [#tk{str = ")"} = BadClose | _]) ->
{error, {fixme, mismatch, Opens, {value, BadClose}}}; {error, {fixme, mismatch, Opens, {value, BadClose}}};
% general case: non-terminal token gets pushed % general case: non-terminal token gets pushed
slurp_dlist(All, Opens, [Tk | NewTks]) -> slurp_dlist(All, Opens, [Tk | NewTks]) ->
@@ -206,15 +210,15 @@ slurp_dlist(All, Opens, [Tk | NewTks]) ->
% This is parse order definition, list of keywords, etc % This is parse order definition, list of keywords, etc
% %
% -export([ % -export([
% token_types_parse_order/0, % token_shapes_parse_order/0,
% kwds/0 % kwds/0
% ]). % ]).
%------------------------------------------------------- %-------------------------------------------------------
-spec token_types_parse_order() -> [gsc_token_type()]. -spec token_shapes_parse_order() -> [tk_shape()].
% @doc % @doc
% list of sophia tokens in parse order (if an earlier type matches, the later % list of sophia token shapes in parse order (if an earlier shape matches, the later
% type isn't even checked) % shape isn't even checked)
% %
% %
% Rules = % Rules =
@@ -245,7 +249,7 @@ slurp_dlist(All, Opens, [Tk | NewTks]) ->
% ], % ],
% @end % @end
token_types_parse_order() -> token_shapes_parse_order() ->
% written in this style to be maximally editable % written in this style to be maximally editable
lists:flatten([ lists:flatten([
% comments and whitespace % comments and whitespace
@@ -282,8 +286,8 @@ kwds() ->
% -export([ % -export([
% tokens/1, % tokens/1,
% slurp_token/1, % slurp_token/1,
% slurp_token_types/2, % slurp_token_shapes/2,
% slurp_token_of_type/2 % slurp_token_of_shape/2
% ]). % ]).
%------------------------------------------------------- %-------------------------------------------------------
@@ -323,12 +327,47 @@ filter_significant(Tokens) ->
-spec is_significant(Token) -> boolean() -spec is_significant(Token) -> boolean()
when Token :: tk(). when Token :: tk().
is_significant(#tk{type = bcom}) -> false; is_significant(#tk{shape = bcom}) -> false;
is_significant(#tk{type = lcom}) -> false; is_significant(#tk{shape = lcom}) -> false;
is_significant(#tk{type = ws}) -> false; is_significant(#tk{shape = ws}) -> false;
is_significant(_) -> true. is_significant(_) -> true.
% aliases
very_stable_string(X) -> very_stable_codepoints(X).
very_stable_characters(X) -> very_stable_codepoints(X).
-spec very_stable_codepoints(IoList) -> NfcList when
IoList :: iolist(),
NfcList :: string().
%% @doc When Unicode sends its characters, they're not
%% sending their best. They're not sending ASCII.
%% They're not sending ASCII. They're sending
%% characters that have lots of problems, and they're
%% bringing those problems with us. They're bringing
%% diacritics. They're bringing homoglyphs. They're
%% bringing RTL. They're rapists. And some, we assume,
%% are good characters.
very_stable_codepoints(S) ->
unicode:characters_to_nfc_list(S).
-spec tokens_from_iolist(SrcStr) -> Result when
SrcStr :: iolist(),
Result :: {ok, Tokens}
| {error, gsc_err()},
Tokens :: [tk()].
% @doc alias for tokens/1
tokens_from_iolist(S) -> tokens(S).
-spec tokens(SrcStr) -> Result -spec tokens(SrcStr) -> Result
when SrcStr :: iolist(), when SrcStr :: iolist(),
Result :: {ok, Tokens} Result :: {ok, Tokens}
@@ -343,13 +382,14 @@ is_significant(_) -> true.
tokens(S) -> tokens(S) ->
% defensive normalization % defensive normalization
tokens([], {1, 1}, unicode:characters_to_nfc_list(S)). tokens([], {1, 1}, very_stable_codepoints(S)).
tokens(Stack, _FinalPos, "") -> tokens(Stack, _FinalPos, "") ->
{ok, lists:reverse(Stack)}; {ok, lists:reverse(Stack)};
tokens(Stack, Pos, SrcStr) -> tokens(Stack, Pos, SrcStr) ->
case slurp_token(Pos, SrcStr) of case slurp_token(Pos, SrcStr) of
{tokmatch, NewToken = #tk{string = TokStr}, {tokmatch, NewToken = #tk{str = TokStr},
NewSrcStr} -> NewSrcStr} ->
NewPos = new_pos(Pos, TokStr), NewPos = new_pos(Pos, TokStr),
tokens([NewToken | Stack], NewPos, NewSrcStr); tokens([NewToken | Stack], NewPos, NewSrcStr);
@@ -455,7 +495,7 @@ next_tabstop8(Col0) when Col0 >= 0 ->
-spec slurp_token(Pos, SrcStr) -> Result -spec slurp_token(Pos, SrcStr) -> Result
when Pos :: gsc_pos(), when Pos :: tk_pos(),
SrcStr :: string(), SrcStr :: string(),
Result :: {tokmatch, Token, Rest} Result :: {tokmatch, Token, Rest}
| no_tokmatch | no_tokmatch
@@ -465,17 +505,17 @@ next_tabstop8(Col0) when Col0 >= 0 ->
Rest :: string(). Rest :: string().
% @doc % @doc
% grab a single token off the front of the string according to % grab a single token off the front of the string according to
% `token_types_parse_order/0' % `token_shapes_parse_order/0'
slurp_token(Pos, SrcStr) -> slurp_token(Pos, SrcStr) ->
% this is the easiest format if i need to fuck with it % this is the easiest format if i need to fuck with it
slurp_token_types(token_types_parse_order(), Pos, SrcStr). slurp_token_shapes(token_shapes_parse_order(), Pos, SrcStr).
-spec slurp_token_types(ParseOrder, Pos, SrcStr) -> Result -spec slurp_token_shapes(ParseOrder, Pos, SrcStr) -> Result
when ParseOrder :: [gsc_token_type()], when ParseOrder :: [tk_shape()],
Pos :: gsc_pos(), Pos :: tk_pos(),
SrcStr :: string(), SrcStr :: string(),
Result :: {tokmatch, Token, Rest} Result :: {tokmatch, Token, Rest}
| no_tokmatch | no_tokmatch
@@ -485,22 +525,22 @@ slurp_token(Pos, SrcStr) ->
Rest :: string(). Rest :: string().
% @doc % @doc
% grab a single token off the front of the string according to % grab a single token off the front of the string according to
% `token_types_parse_order/0' % `token_shapes_parse_order/0'
slurp_token_types([TokenType | TTs], Pos, SrcStr) -> slurp_token_shapes([TokenType | TTs], Pos, SrcStr) ->
case slurp_token_of_type(TokenType, Pos, SrcStr) of case slurp_token_of_shape(TokenType, Pos, SrcStr) of
Match = {tokmatch, _, _} -> Match; Match = {tokmatch, _, _} -> Match;
no_tokmatch -> slurp_token_types(TTs, Pos, SrcStr); no_tokmatch -> slurp_token_shapes(TTs, Pos, SrcStr);
IErr = {ierr, _} -> IErr; IErr = {ierr, _} -> IErr;
Error = {error, _} -> Error Error = {error, _} -> Error
end; end;
slurp_token_types([], _Pos, _SrcStr) -> slurp_token_shapes([], _Pos, _SrcStr) ->
no_tokmatch. no_tokmatch.
-spec slurp_token_of_type(TokenType, Pos, SrcStr) -> MaybeToken -spec slurp_token_of_shape(TokenType, Pos, SrcStr) -> MaybeToken
when TokenType :: gsc_token_type(), when TokenType :: tk_shape(),
Pos :: gsc_pos(), Pos :: tk_pos(),
SrcStr :: string(), SrcStr :: string(),
MaybeToken :: {tokmatch, Token, Rest} MaybeToken :: {tokmatch, Token, Rest}
| no_tokmatch | no_tokmatch
@@ -509,7 +549,7 @@ slurp_token_types([], _Pos, _SrcStr) ->
Token :: tk(), Token :: tk(),
Rest :: string(). Rest :: string().
% @doc % @doc
% match a sophia token of a given type off the front of the string % match a sophia token of a given shape off the front of the string
% @end % @end
% COMMENTS AND WHITESPACE: lcom, bcom, ws % COMMENTS AND WHITESPACE: lcom, bcom, ws
@@ -518,27 +558,27 @@ slurp_token_types([], _Pos, _SrcStr) ->
% %
% i am not going to bother writing a string matcher thing for this % i am not going to bother writing a string matcher thing for this
% FIXME: make a string matcher for line comments % FIXME: make a string matcher for line comments
slurp_token_of_type(lcom, Pos, SrcStr) -> slurp_token_of_shape(lcom, Pos, SrcStr) ->
case SrcStr of case SrcStr of
"//" ++ _ -> "//" ++ _ ->
{Line, Rest} = takeline("", SrcStr), {Line, Rest} = takeline("", SrcStr),
Token = #tk{type = lcom, Token = #tk{shape = lcom,
pos = Pos, pos = Pos,
string = Line}, str = Line},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
_ -> _ ->
no_tokmatch no_tokmatch
end; end;
% Block comments cannot have a string matcher because they have a whole stack % Block comments cannot have a string matcher because they have a whole stack
% thing keeping track of depth because of nested block comments % thing keeping track of depth because of nested block comments
slurp_token_of_type(bcom, Pos, SrcStr0) -> slurp_token_of_shape(bcom, Pos, SrcStr0) ->
case SrcStr0 of case SrcStr0 of
"/*" ++ SrcStr1 -> "/*" ++ SrcStr1 ->
case bcom("/*", 1, SrcStr1) of case bcom("/*", 1, SrcStr1) of
{ok, CommentStr, SrcStr2} -> {ok, CommentStr, SrcStr2} ->
Token = #tk{type = bcom, Token = #tk{shape = bcom,
pos = Pos, pos = Pos,
string = CommentStr}, str = CommentStr},
{tokmatch, Token, SrcStr2}; {tokmatch, Token, SrcStr2};
Error -> Error ->
Error Error
@@ -546,15 +586,15 @@ slurp_token_of_type(bcom, Pos, SrcStr0) ->
_ -> _ ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(ws, Pos, SrcStr) -> slurp_token_of_shape(ws, Pos, SrcStr) ->
WhitespaceMatcher = gsc_strmatch:smr_sf_ws(), WhitespaceMatcher = gsc_strmatch:smr_sf_ws(),
case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of
no_strmatch -> no_strmatch ->
no_tokmatch; no_tokmatch;
{strmatch, WS, Rest} -> {strmatch, WS, Rest} ->
Token = #tk{type = ws, Token = #tk{shape = ws,
pos = Pos, pos = Pos,
string = WS}, str = WS},
{tokmatch, Token, Rest} {tokmatch, Token, Rest}
end; end;
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct % KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct
@@ -568,88 +608,88 @@ slurp_token_of_type(ws, Pos, SrcStr) ->
% %
% we know kwds are always ids, so we parse it as an id and see if it's one % we know kwds are always ids, so we parse it as an id and see if it's one
% of the kwds % of the kwds
slurp_token_of_type(kwd, Pos, SrcStr) -> slurp_token_of_shape(kwd, Pos, SrcStr) ->
case slurp_token_of_type(id, Pos, SrcStr) of case slurp_token_of_shape(id, Pos, SrcStr) of
{tokmatch, IdTok = #tk{string = IdStr}, Rest} -> {tokmatch, IdTok = #tk{str = IdStr}, Rest} ->
case lists:member(IdStr, kwds()) of case lists:member(IdStr, kwds()) of
false -> false ->
no_tokmatch; no_tokmatch;
true -> true ->
KwTok = IdTok#tk{type = kwd}, KwTok = IdTok#tk{shape = kwd},
{tokmatch, KwTok, Rest} {tokmatch, KwTok, Rest}
end; end;
no_tokmatch -> no_tokmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(op, Pos, SrcStr) -> slurp_token_of_shape(op, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = op, pos = Pos, string = Str}, Token = #tk{shape = op, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(punct, Pos, SrcStr) -> slurp_token_of_shape(punct, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = punct, pos = Pos, string = Str}, Token = #tk{shape = punct, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
% SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar % SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar
slurp_token_of_type(id, Pos, SrcStr) -> slurp_token_of_shape(id, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of
{strmatch, IdStr, Rest} -> {strmatch, IdStr, Rest} ->
Token = #tk{type = id, pos = Pos, string = IdStr}, Token = #tk{shape = id, pos = Pos, str = IdStr},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(con, Pos, SrcStr) -> slurp_token_of_shape(con, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = con, pos = Pos, string = Str}, Token = #tk{shape = con, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(qid, Pos, SrcStr) -> slurp_token_of_shape(qid, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = qid, pos = Pos, string = Str}, Token = #tk{shape = qid, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(qcon, Pos, SrcStr) -> slurp_token_of_shape(qcon, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = qcon, pos = Pos, string = Str}, Token = #tk{shape = qcon, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(tvar, Pos, SrcStr) -> slurp_token_of_shape(tvar, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = tvar, pos = Pos, string = Str}, Token = #tk{shape = tvar, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(int16, Pos, SrcStr) -> slurp_token_of_shape(int16, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = int16, pos = Pos, string = Str}, Token = #tk{shape = int16, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
end; end;
slurp_token_of_type(int10, Pos, SrcStr) -> slurp_token_of_shape(int10, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of
{strmatch, Str, Rest} -> {strmatch, Str, Rest} ->
Token = #tk{type = int10, pos = Pos, string = Str}, Token = #tk{shape = int10, pos = Pos, str = Str},
{tokmatch, Token, Rest}; {tokmatch, Token, Rest};
no_strmatch -> no_strmatch ->
no_tokmatch no_tokmatch
@@ -658,63 +698,63 @@ slurp_token_of_type(int10, Pos, SrcStr) ->
% ak, ct, sg % ak, ct, sg
% %
% char: sophia char literal % char: sophia char literal
slurp_token_of_type(ak, Pos, SrcStr) -> slurp_token_of_shape(ak, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_ak(), StringMatcher = gsc_strmatch:smr_sf_ak(),
case gsc_strmatch:match(StringMatcher, SrcStr) of case gsc_strmatch:match(StringMatcher, SrcStr) of
no_strmatch -> no_strmatch ->
no_tokmatch; no_tokmatch;
{strmatch, TokenStr, Rest} -> {strmatch, TokenStr, Rest} ->
Token = #tk{type = ak, pos = Pos, string = TokenStr}, Token = #tk{shape = ak, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest} {tokmatch, Token, Rest}
end; end;
slurp_token_of_type(ct, Pos, SrcStr) -> slurp_token_of_shape(ct, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_ct(), StringMatcher = gsc_strmatch:smr_sf_ct(),
case gsc_strmatch:match(StringMatcher, SrcStr) of case gsc_strmatch:match(StringMatcher, SrcStr) of
no_strmatch -> no_strmatch ->
no_tokmatch; no_tokmatch;
{strmatch, TokenStr, Rest} -> {strmatch, TokenStr, Rest} ->
Token = #tk{type = ct, pos = Pos, string = TokenStr}, Token = #tk{shape = ct, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest} {tokmatch, Token, Rest}
end; end;
slurp_token_of_type(sg, Pos, SrcStr) -> slurp_token_of_shape(sg, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_sg(), StringMatcher = gsc_strmatch:smr_sf_sg(),
case gsc_strmatch:match(StringMatcher, SrcStr) of case gsc_strmatch:match(StringMatcher, SrcStr) of
no_strmatch -> no_strmatch ->
no_tokmatch; no_tokmatch;
{strmatch, TokenStr, Rest} -> {strmatch, TokenStr, Rest} ->
Token = #tk{type = sg, pos = Pos, string = TokenStr}, Token = #tk{shape = sg, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest} {tokmatch, Token, Rest}
end; end;
slurp_token_of_type(char, Pos, SrcStr) -> slurp_token_of_shape(char, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_char(), StringMatcher = gsc_strmatch:smr_sf_char(),
case gsc_strmatch:match(StringMatcher, SrcStr) of case gsc_strmatch:match(StringMatcher, SrcStr) of
no_strmatch -> no_strmatch ->
no_tokmatch; no_tokmatch;
{strmatch, TokenStr, Rest} -> {strmatch, TokenStr, Rest} ->
Token = #tk{type = char, pos = Pos, string = TokenStr}, Token = #tk{shape = char, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest} {tokmatch, Token, Rest}
end; end;
slurp_token_of_type(string, Pos, SrcStr) -> slurp_token_of_shape(string, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of
no_strmatch -> no_strmatch ->
no_tokmatch; no_tokmatch;
{strmatch, TokenStr, Rest} -> {strmatch, TokenStr, Rest} ->
Token = #tk{type = string, pos = Pos, string = TokenStr}, Token = #tk{shape = string, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest} {tokmatch, Token, Rest}
end; end;
slurp_token_of_type(bytes, Pos, SrcStr) -> slurp_token_of_shape(bytes, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of
no_strmatch -> no_strmatch ->
no_tokmatch; no_tokmatch;
{strmatch, TokenStr, Rest} -> {strmatch, TokenStr, Rest} ->
Token = #tk{type = bytes, pos = Pos, string = TokenStr}, Token = #tk{shape = bytes, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest} {tokmatch, Token, Rest}
end; end;
slurp_token_of_type(NyiType, Pos, SrcStr) -> slurp_token_of_shape(NyiType, Pos, SrcStr) ->
Message = io_lib:format("cannot slurp token of type: ~p", [NyiType]), Message = io_lib:format("cannot slurp token of shape: ~p", [NyiType]),
error(#gsc_err{atom = nyi, error(#gsc_err{atom = nyi,
string = Message, str = Message,
extra = [{token_type, NyiType}, extra = [{token_shape, NyiType},
{pos, Pos}, {pos, Pos},
{rest, SrcStr}]}). {rest, SrcStr}]}).
+14 -14
View File
@@ -76,14 +76,14 @@
-type so_symbol() :: so_kwd() | so_special_char() | atom(). -type so_symbol() :: so_kwd() | so_special_char() | atom().
-type so_token2() :: {Symbol :: so_symbol(), -type so_token2() :: {Symbol :: so_symbol(),
Location :: gsc_pos()}. Location :: tk_pos()}.
% FIXME % FIXME
% this is 'id', 'con', qid % this is 'id', 'con', qid
-type so_tk3type() :: char | string | hex | int | bytes | qid | qcon | tvar | id | con. -type so_tk3type() :: char | string | hex | int | bytes | qid | qcon | tvar | id | con.
-type so_token3() :: {TokenType :: so_tk3type(), -type so_token3() :: {TokenType :: so_tk3type(),
Location :: gsc_pos(), Location :: tk_pos(),
TokenValue :: term()}. TokenValue :: term()}.
-type so_token() :: so_token2() | so_token3(). -type so_token() :: so_token2() | so_token3().
@@ -151,12 +151,12 @@ scan(SrcStr) ->
% %
% so if we see an ak/ct/sg token, we summon evil ben % so if we see an ak/ct/sg token, we summon evil ben
% carson to reconjoin the unconjoined twins % carson to reconjoin the unconjoined twins
to_so_tokens([ AkTok = #tk{type = AkCtSg, pos = Pos} to_so_tokens([ AkTok = #tk{shape = AkCtSg, pos = Pos}
| Sheeit]) | Sheeit])
when ak =:= AkCtSg; when ak =:= AkCtSg;
ct =:= AkCtSg; ct =:= AkCtSg;
sg =:= AkCtSg -> sg =:= AkCtSg ->
{#tk{string = FinalAkStr}, NewSheeit} {#tk{str = FinalAkStr}, NewSheeit}
= ken_barson_rises(AkTok, Sheeit), = ken_barson_rises(AkTok, Sheeit),
[{id, Pos, FinalAkStr}| to_so_tokens(NewSheeit)]; [{id, Pos, FinalAkStr}| to_so_tokens(NewSheeit)];
% this part is just lists:filtermap % this part is just lists:filtermap
@@ -259,9 +259,9 @@ to_so_tokens([]) ->
% `_`**: `smr_plus` requires >=1 base58 char to % `_`**: `smr_plus` requires >=1 base58 char to
% match; `ak_I`, `ak_0`, `ak__bar` all fall % match; `ak_I`, `ak_0`, `ak__bar` all fall
% through to `id` and both tokenizers agree. % through to `id` and both tokenizers agree.
ken_barson_rises(AkTokAcc = #tk{string = AkStr}, ken_barson_rises(AkTokAcc = #tk{str = AkStr},
SrcTokens = [#tk{type = CandidateType, SrcTokens = [#tk{shape = CandidateType,
string = CandidateString} str = CandidateString}
| Rest]) -> | Rest]) ->
% candidate: % candidate:
% dig out the token type and the string % dig out the token type and the string
@@ -273,7 +273,7 @@ ken_barson_rises(AkTokAcc = #tk{string = AkStr},
Smash -> Smash ->
% dig out the token from LcTokApi % dig out the token from LcTokApi
NewAkStr = AkStr ++ CandidateString, NewAkStr = AkStr ++ CandidateString,
NewAkTokAcc = AkTokAcc#tk{string = NewAkStr}, NewAkTokAcc = AkTokAcc#tk{str = NewAkStr},
ken_barson_rises(NewAkTokAcc, Rest); ken_barson_rises(NewAkTokAcc, Rest);
Pass -> Pass ->
{AkTokAcc, SrcTokens} {AkTokAcc, SrcTokens}
@@ -320,9 +320,9 @@ pass_types() ->
% follow-on tokens % follow-on tokens
% @end % @end
to_so_token(#tk{type = SfTokenType, to_so_token(#tk{shape = SfTokenType,
pos = Pos, pos = Pos,
string = SfTokenStr}) -> str = SfTokenStr}) ->
case SfTokenType of case SfTokenType of
%----------------- %-----------------
% Ignored % Ignored
@@ -371,7 +371,7 @@ to_so_token(#tk{type = SfTokenType,
NYI -> NYI ->
Msg = io_lib:format("gsc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]), Msg = io_lib:format("gsc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]),
error(#gsc_err{atom = nyi, error(#gsc_err{atom = nyi,
string = Msg}) str = Msg})
end. end.
%% ak/ct/sg all tokenize to id %% ak/ct/sg all tokenize to id
@@ -393,7 +393,7 @@ so_parse_char([$' | Chars]) ->
[Char] -> Char; [Char] -> Char;
_Bad -> _Bad ->
error(#gsc_err{atom = bad_token, error(#gsc_err{atom = bad_token,
string = "Bad character literal: '" ++ Chars}) str = "Bad character literal: '" ++ Chars})
end. end.
so_parse_string([$" | Chars]) -> so_parse_string([$" | Chars]) ->
@@ -435,7 +435,7 @@ unescape(Delim, [$\\, Code | Chars], Acc) ->
$t -> Ok($\t); $t -> Ok($\t);
$v -> Ok($\v); $v -> Ok($\v);
_ -> error(#gsc_err{atom = bad_escape_char, _ -> error(#gsc_err{atom = bad_escape_char,
string = "Bad control sequence: \\" ++ [Code]}) %% TODO str = "Bad control sequence: \\" ++ [Code]}) %% TODO
end; end;
unescape(Delim, [C | Chars], Acc) -> unescape(Delim, [C | Chars], Acc) ->
unescape(Delim, Chars, [C | Acc]). unescape(Delim, Chars, [C | Acc]).
+1 -1
View File
@@ -2,7 +2,7 @@
{type,lib}. {type,lib}.
{modules,[]}. {modules,[]}.
{author,"Peter Harpending"}. {author,"Peter Harpending"}.
{prefix,none}. {prefix,"gs"}.
{desc,"Exploratory sophia compiler rewrite"}. {desc,"Exploratory sophia compiler rewrite"}.
{package_id,{"otpr","gsc",{0,1,0}}}. {package_id,{"otpr","gsc",{0,1,0}}}.
{deps,[]}. {deps,[]}.