more mass renaming

This commit is contained in:
2026-06-02 01:48:05 -07:00
parent eff77fff6b
commit 270f192f0c
53 changed files with 1264 additions and 431 deletions
+10
View File
@@ -0,0 +1,10 @@
# TODONE
# TODO
- barf for outputs, slurp for inputs
- architecture needs more careful thought but only after something works
- too fuzzy right now
- possibly:
- rename parser layers sequentially:
- gsc_
+30
View File
@@ -0,0 +1,30 @@
# gsc = gajumaru sophia compiler
**This is _NOT_ the official Sophia compiler.** If you're looking for
that see https://git.qpq.swiss/QPQ-AG/sophia
This is an incomplete prototype rewrite of the legacy (official)
sophia compiler in straightforward Erlang. It grew out of my (Peter
Harpending) own efforts to document the language and its relationship
to FATE (the gajumaru virtual machine).
The goal for version 0.1 is to mirror the success behavior of the
legacy sophia compiler.
# Setup
```
git clone https://git.qpq.swiss/QPQ-AG/gsc.git
```
Add the following to `~/.bashrc` or wheremstever:
```
export PATH=$PATH:/path/to/gsc/bin
```
To test run
```
gsc --help
```
+199
View File
@@ -0,0 +1,199 @@
% ANSI screen drawing macros in erlang
%
% Author: Peter Harpending <peterharpending@qpq.swiss>
% Date: 2026-04-10
%
% Copyright (C) 2026, QPQ AG
% Not exhaustive, just what I need for the moment
% ref: https://gist.github.com/ConnerWill/d4b6c776b509add763e17f9f113fd25b
-define(ANSI_ESC, [27]).
-define(ANSI_CRLF, "\r\n").
-define(ANSI_FF, [12]).
-define(ANSI_CLEAR, [12]).
-define(ANSI_LINE(X), [X, ?ANSI_CRLF]).
% MARKDOWN TIER TEXT FORMATTING
% resets all formatting
-define(ANSI_RESET, [?ANSI_ESC, "[0m"]).
-define(ANSI_BOLD, [?ANSI_ESC, "[1m"]).
-define(ANSI_DIM, [?ANSI_ESC, "[2m"]).
-define(ANSI_ITALIC, [?ANSI_ESC, "[3m"]).
-define(ANSI_ULINE, [?ANSI_ESC, "[4m"]).
-define(ANSI_BLINK, [?ANSI_ESC, "[5m"]).
-define(ANSI_INVERT, [?ANSI_ESC, "[7m"]).
-define(ANSI_INVIS, [?ANSI_ESC, "[8m"]).
-define(ANSI_STRIKE, [?ANSI_ESC, "[9m"]).
% > Note: Both dim and bold modes are reset with the ESC[22m sequence. The
% > ESC[21m sequence is a non-specified sequence for double underline mode and
% > only work in some terminals and is reset with ESC[24m.
-define(ANSI_UNBOLD, [?ANSI_ESC, "[22m"]).
-define(ANSI_UNDIM, [?ANSI_ESC, "[22m"]).
-define(ANSI_UNITALIC, [?ANSI_ESC, "[23m"]).
-define(ANSI_UNULINE, [?ANSI_ESC, "[24m"]).
-define(ANSI_UNBLINK, [?ANSI_ESC, "[25m"]).
-define(ANSI_UNINVERT, [?ANSI_ESC, "[27m"]).
-define(ANSI_UNINVIS, [?ANSI_ESC, "[28m"]).
-define(ANSI_UNSTRIKE, [?ANSI_ESC, "[29m"]).
-define(ANSI_BOLD(X), [?ANSI_BOLD, X, ?ANSI_UNBOLD]).
-define(ANSI_DIM(X), [?ANSI_DIM, X, ?ANSI_UNDIM]).
-define(ANSI_ITALIC(X), [?ANSI_ITALIC, X, ?ANSI_UNITALIC]).
-define(ANSI_ULINE(X), [?ANSI_ULINE, X, ?ANSI_UNULINE]).
-define(ANSI_BLINK(X), [?ANSI_BLINK, X, ?ANSI_UNBLINK]).
-define(ANSI_INVERT(X), [?ANSI_INVERT, X, ?ANSI_UNINVERT]).
-define(ANSI_INVIS(X), [?ANSI_INVIS, X, ?ANSI_UNINVIS]).
-define(ANSI_STRIKE(X), [?ANSI_STRIKE, X, ?ANSI_UNSTRIKE]).
% COLORS
%
% COLOR SetFG SetBG
% -----------------------------
% Black 30 40
% Red 31 41
% Green 32 42
% Yellow 33 43
% Blue 34 44
% Magenta 35 45
% Cyan 36 46
% White 37 47
% Default 39 49
-define(ANSI_FG_RESET, [?ANSI_ESC, "[39m"]).
-define(ANSI_BG_RESET, [?ANSI_ESC, "[49m"]).
-define(ANSI_FG_BLACK, [?ANSI_ESC, "[30m"]).
-define(ANSI_FG_RED, [?ANSI_ESC, "[31m"]).
-define(ANSI_FG_GREEN, [?ANSI_ESC, "[32m"]).
-define(ANSI_FG_YELLOW, [?ANSI_ESC, "[33m"]).
-define(ANSI_FG_BLUE, [?ANSI_ESC, "[34m"]).
-define(ANSI_FG_MAGENTA, [?ANSI_ESC, "[35m"]).
-define(ANSI_FG_CYAN, [?ANSI_ESC, "[36m"]).
-define(ANSI_FG_WHITE, [?ANSI_ESC, "[37m"]).
-define(ANSI_BG_BLACK, [?ANSI_ESC, "[40m"]).
-define(ANSI_BG_RED, [?ANSI_ESC, "[41m"]).
-define(ANSI_BG_GREEN, [?ANSI_ESC, "[42m"]).
-define(ANSI_BG_YELLOW, [?ANSI_ESC, "[43m"]).
-define(ANSI_BG_BLUE, [?ANSI_ESC, "[44m"]).
-define(ANSI_BG_MAGENTA, [?ANSI_ESC, "[45m"]).
-define(ANSI_BG_CYAN, [?ANSI_ESC, "[46m"]).
-define(ANSI_BG_WHITE, [?ANSI_ESC, "[47m"]).
-define(ANSI_FG_BLACK(X), [?ANSI_FG_BLACK, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_RED(X), [?ANSI_FG_RED, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_GREEN(X), [?ANSI_FG_GREEN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_YELLOW(X), [?ANSI_FG_YELLOW, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BLUE(X), [?ANSI_FG_BLUE, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_MAGENTA(X), [?ANSI_FG_MAGENTA, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_CYAN(X), [?ANSI_FG_CYAN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_WHITE(X), [?ANSI_FG_WHITE, X, ?ANSI_FG_RESET]).
-define(ANSI_BG_BLACK(X), [?ANSI_BG_BLACK, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_RED(X), [?ANSI_BG_RED, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_GREEN(X), [?ANSI_BG_GREEN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_YELLOW(X), [?ANSI_BG_YELLOW, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BLUE(X), [?ANSI_BG_BLUE, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_MAGENTA(X), [?ANSI_BG_MAGENTA, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_CYAN(X), [?ANSI_BG_CYAN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_WHITE(X), [?ANSI_BG_WHITE, X, ?ANSI_BG_RESET]).
% bright colors
-define(ANSI_FG_BBLACK, [?ANSI_ESC, "[90m"]).
-define(ANSI_FG_BRED, [?ANSI_ESC, "[91m"]).
-define(ANSI_FG_BGREEN, [?ANSI_ESC, "[92m"]).
-define(ANSI_FG_BYELLOW, [?ANSI_ESC, "[93m"]).
-define(ANSI_FG_BBLUE, [?ANSI_ESC, "[94m"]).
-define(ANSI_FG_BMAGENTA, [?ANSI_ESC, "[95m"]).
-define(ANSI_FG_BCYAN, [?ANSI_ESC, "[96m"]).
-define(ANSI_FG_BWHITE, [?ANSI_ESC, "[97m"]).
-define(ANSI_BG_BBLACK, [?ANSI_ESC, "[100m"]).
-define(ANSI_BG_BRED, [?ANSI_ESC, "[101m"]).
-define(ANSI_BG_BGREEN, [?ANSI_ESC, "[102m"]).
-define(ANSI_BG_BYELLOW, [?ANSI_ESC, "[103m"]).
-define(ANSI_BG_BBLUE, [?ANSI_ESC, "[104m"]).
-define(ANSI_BG_BMAGENTA, [?ANSI_ESC, "[105m"]).
-define(ANSI_BG_BCYAN, [?ANSI_ESC, "[106m"]).
-define(ANSI_BG_BWHITE, [?ANSI_ESC, "[107m"]).
-define(ANSI_FG_BBLACK(X), [?ANSI_FG_BBLACK, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BRED(X), [?ANSI_FG_BRED, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BGREEN(X), [?ANSI_FG_BGREEN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BYELLOW(X), [?ANSI_FG_BYELLOW, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BBLUE(X), [?ANSI_FG_BBLUE, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BMAGENTA(X), [?ANSI_FG_BMAGENTA, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BCYAN(X), [?ANSI_FG_BCYAN, X, ?ANSI_FG_RESET]).
-define(ANSI_FG_BWHITE(X), [?ANSI_FG_BWHITE, X, ?ANSI_FG_RESET]).
-define(ANSI_BG_BBLACK(X), [?ANSI_BG_BBLACK, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BRED(X), [?ANSI_BG_BRED, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BGREEN(X), [?ANSI_BG_BGREEN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BYELLOW(X), [?ANSI_BG_BYELLOW, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BBLUE(X), [?ANSI_BG_BBLUE, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BMAGENTA(X), [?ANSI_BG_BMAGENTA, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BCYAN(X), [?ANSI_BG_BCYAN, X, ?ANSI_BG_RESET]).
-define(ANSI_BG_BWHITE(X), [?ANSI_BG_BWHITE, X, ?ANSI_BG_RESET]).
-define(ANSI_FG_RGB(R,G,B),
[?ANSI_ESC,
"[38;2;",
integer_to_list(R),";",
integer_to_list(G),";",
integer_to_list(B),"m"]
).
-define(ANSI_BG_RGB(R,G,B),
[?ANSI_ESC,
"[48;2;",
integer_to_list(R),";",
integer_to_list(G),";",
integer_to_list(B),"m"]
).
-define(ANSI_FG_RGB(R,G,B,Chars), [?ANSI_FG_RGB(R,G,B), Chars, ?ANSI_FG_RESET]).
-define(ANSI_BG_RGB(R,G,B,Chars), [?ANSI_BG_RGB(R,G,B), Chars, ?ANSI_BG_RESET]).
% cursor controls
-define(ANSI_CUR_HOME, [?ANSI_ESC, "[H"]).
-define(ANSI_CUR_XY(X, Y), [?ANSI_ESC, "[", integer_to_list(Y), ";", integer_to_list(X), "H"]).
-define(ANSI_CUR_UP(N), [?ANSI_ESC, "[", integer_to_list(N), "A"]).
-define(ANSI_CUR_DOWN(N), [?ANSI_ESC, "[", integer_to_list(N), "B"]).
-define(ANSI_CUR_RIGHT(N), [?ANSI_ESC, "[", integer_to_list(N), "C"]).
-define(ANSI_CUR_LEFT(N), [?ANSI_ESC, "[", integer_to_list(N), "D"]).
-define(ANSI_CUR_SAVE, [?ANSI_ESC, "7"]).
-define(ANSI_CUR_RESTORE, [?ANSI_ESC, "8"]).
-define(ANSI_CUR_QUERY, [?ANSI_ESC, "[6n"]).
-define(ANSI_CUR_UP, ?ANSI_CUR_UP(1)).
-define(ANSI_CUR_DOWN, ?ANSI_CUR_DOWN(1)).
-define(ANSI_CUR_RIGHT, ?ANSI_CUR_RIGHT(1)).
-define(ANSI_CUR_LEFT, ?ANSI_CUR_LEFT(1)).
% relative movement "forward" +X=right, +Y=down
-define(ANSI_CUR_VECT(X, Y),
if X =< 0, Y =< 0 -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_UP(-1*Y)];
X =< 0, 0 < Y -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_DOWN(Y)];
0 < X, Y =< 0 -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_UP(-1*Y)];
0 < X, 0 < Y -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_DOWN(Y)]
end
).
-define(ANSI_ALTBUF, [?ANSI_ESC, "[?1049h"]).
-define(ANSI_UNALTBUF, [?ANSI_ESC, "[?1049l"]).
-define(ANSI_CUR_INVIS, [?ANSI_ESC, "[?25l"]).
-define(ANSI_CUR_VIS, [?ANSI_ESC, "[?25h"]).
-define(ANSI_WRAP, [?ANSI_ESC, "[=7h"]).
-define(ANSI_NOWRAP, [?ANSI_ESC, "[=7l"]).
+256
View File
@@ -0,0 +1,256 @@
In Congress, July 4, 1776
The unanimous Declaration of the thirteen united States of America,
When in the Course of human events, it becomes necessary for one
people to dissolve the political bands which have connected them with
another, and to assume among the powers of the earth, the separate
and equal station to which the Laws of Nature and of Nature's God
entitle them, a decent respect to the opinions of mankind requires
that they should declare the causes which impel them to the
separation.
We hold these truths to be self-evident, that all men are created
equal, that they are endowed by their Creator with certain
unalienable Rights, that among these are Life, Liberty and the
pursuit of Happiness.--That to secure these rights, Governments are
instituted among Men, deriving their just powers from the consent of
the governed, --That whenever any Form of Government becomes
destructive of these ends, it is the Right of the People to alter or
to abolish it, and to institute new Government, laying its foundation
on such principles and organizing its powers in such form, as to them
shall seem most likely to effect their Safety and Happiness.
Prudence, indeed, will dictate that Governments long established
should not be changed for light and transient causes; and accordingly
all experience hath shewn, that mankind are more disposed to suffer,
while evils are sufferable, than to right themselves by abolishing
the forms to which they are accustomed. But when a long train of
abuses and usurpations, pursuing invariably the same Object evinces a
design to reduce them under absolute Despotism, it is their right, it
is their duty, to throw off such Government, and to provide new
Guards for their future security.--Such has been the patient
sufferance of these Colonies; and such is now the necessity which
constrains them to alter their former Systems of Government. The
history of the present King of Great Britain is a history of repeated
injuries and usurpations, all having in direct object the
establishment of an absolute Tyranny over these States. To prove
this, let Facts be submitted to a candid world.
He has refused his Assent to Laws, the most wholesome and
necessary for the public good.
He has forbidden his Governors to pass Laws of immediate and
pressing importance, unless suspended in their operation till his
Assent should be obtained; and when so suspended, he has utterly
neglected to attend to them.
He has refused to pass other Laws for the accommodation of large
districts of people, unless those people would relinquish the
right of Representation in the Legislature, a right inestimable
to them and formidable to tyrants only.
He has called together legislative bodies at places unusual,
uncomfortable, and distant from the depository of their public
Records, for the sole purpose of fatiguing them into compliance
with his measures.
He has dissolved Representative Houses repeatedly, for opposing
with manly firmness his invasions on the rights of the people.
He has refused for a long time, after such dissolutions, to cause
others to be elected; whereby the Legislative powers, incapable
of Annihilation, have returned to the People at large for their
exercise; the State remaining in the mean time exposed to all the
dangers of invasion from without, and convulsions within.
He has endeavoured to prevent the population of these States; for
that purpose obstructing the Laws for Naturalization of
Foreigners; refusing to pass others to encourage their migrations
hither, and raising the conditions of new Appropriations of
Lands.
He has obstructed the Administration of Justice, by refusing his
Assent to Laws for establishing Judiciary powers.
He has made Judges dependent on his Will alone, for the tenure of
their offices, and the amount and payment of their salaries.
He has erected a multitude of New Offices, and sent hither swarms
of Officers to harrass our people, and eat out their substance.
He has kept among us, in times of peace, Standing Armies without
the Consent of our legislatures.
He has affected to render the Military independent of and
superior to the Civil power.
He has combined with others to subject us to a jurisdiction
foreign to our constitution, and unacknowledged by our laws;
giving his Assent to their Acts of pretended Legislation:
For Quartering large bodies of armed troops among us:
For protecting them, by a mock Trial, from punishment for any
Murders which they should commit on the Inhabitants of these
States:
For cutting off our Trade with all parts of the world:
For imposing Taxes on us without our Consent:
For depriving us in many cases, of the benefits of Trial by Jury:
For transporting us beyond Seas to be tried for pretended
offences:
For abolishing the free System of English Laws in a neighbouring
Province, establishing therein an Arbitrary government, and
enlarging its Boundaries so as to render it at once an example
and fit instrument for introducing the same absolute rule into
these Colonies:
For taking away our Charters, abolishing our most valuable Laws,
and altering fundamentally the Forms of our Governments:
For suspending our own Legislatures, and declaring themselves
invested with power to legislate for us in all cases whatsoever.
He has abdicated Government here, by declaring us out of his
Protection and waging War against us.
He has plundered our seas, ravaged our Coasts, burnt our towns,
and destroyed the lives of our people.
He is at this time transporting large Armies of foreign
Mercenaries to compleat the works of death, desolation and
tyranny, already begun with circumstances of Cruelty & perfidy
scarcely paralleled in the most barbarous ages, and totally
unworthy the Head of a civilized nation.
He has constrained our fellow Citizens taken Captive on the high
Seas to bear Arms against their Country, to become the
executioners of their friends and Brethren, or to fall themselves
by their Hands.
He has excited domestic insurrections amongst us, and has
endeavoured to bring on the inhabitants of our frontiers, the
merciless Indian Savages, whose known rule of warfare, is an
undistinguished destruction of all ages, sexes and conditions.
In every stage of these Oppressions We have Petitioned for Redress in
the most humble terms: Our repeated Petitions have been answered only
by repeated injury. A Prince, whose character is thus marked by every
act which may define a Tyrant, is unfit to be the ruler of a free
people.
Nor have We been wanting in attentions to our Brittish brethren. We
have warned them from time to time of attempts by their legislature
to extend an unwarrantable jurisdiction over us. We have reminded
them of the circumstances of our emigration and settlement here. We
have appealed to their native justice and magnanimity, and we have
conjured them by the ties of our common kindred to disavow these
usurpations, which, would inevitably interrupt our connections and
correspondence. They too have been deaf to the voice of justice and
of consanguinity. We must, therefore, acquiesce in the necessity,
which denounces our Separation, and hold them, as we hold the rest of
mankind, Enemies in War, in Peace Friends.
We, therefore, the Representatives of the united States of America,
in General Congress, Assembled, appealing to the Supreme Judge of the
world for the rectitude of our intentions, do, in the Name, and by
Authority of the good People of these Colonies, solemnly publish and
declare, That these United Colonies are, and of Right ought to be
Free and Independent States; that they are Absolved from all
Allegiance to the British Crown, and that all political connection
between them and the State of Great Britain, is and ought to be
totally dissolved; and that as Free and Independent States, they have
full Power to levy War, conclude Peace, contract Alliances, establish
Commerce, and to do all other Acts and Things which Independent
States may of right do. And for the support of this Declaration, with
a firm reliance on the protection of divine Providence, we mutually
pledge to each other our Lives, our Fortunes and our sacred Honor.
Georgia
Button Gwinnett
Lyman Hall
George Walton
North Carolina
William Hooper
Joseph Hewes
John Penn
South Carolina
Edward Rutledge
Thomas Heyward, Jr.
Thomas Lynch, Jr.
Arthur Middleton
Massachusetts
John Hancock
Maryland
Samuel Chase
William Paca
Thomas Stone
Charles Carroll of Carrollton
Virginia
George Wythe
Richard Henry Lee
Thomas Jefferson
Benjamin Harrison
Thomas Nelson, Jr.
Francis Lightfoot Lee
Carter Braxton
Pennsylvania
Robert Morris
Benjamin Rush
Benjamin Franklin
John Morton
George Clymer
James Smith
George Taylor
James Wilson
George Ross
Delaware
Caesar Rodney
George Read
Thomas McKean
New York
William Floyd
Philip Livingston
Francis Lewis
Lewis Morris
New Jersey
Richard Stockton
John Witherspoon
Francis Hopkinson
John Hart
Abraham Clark
New Hampshire
Josiah Bartlett
William Whipple
Massachusetts
Samuel Adams
John Adams
Robert Treat Paine
Elbridge Gerry
Rhode Island
Stephen Hopkins
William Ellery
Connecticut
Roger Sherman
Samuel Huntington
William Williams
Oliver Wolcott
New Hampshire
Matthew Thornton
+44
View File
@@ -0,0 +1,44 @@
%%=====================================================
%% ARG PARSING
%%=====================================================
%%-----------------------------------------------------
%% TOKENIZING
%%-----------------------------------------------------
-record{ctk,
{shape = none :: none | '-' | '--' | str,
val = none :: none | [char()] | string() | string(),
str = none :: none | string()}).
-type ctk() :: #ctk{}.
-spec tokenize(Args) -> CliTokens when
Args :: [string()],
CliTokens :: [ctk()].
%% @private tokenize cli args
tokenize(Args) ->
[tokenize_arg(S) || S <- Args].
tokenize_arg(Str = "--" ++ Val) ->
#ctk{shape = '--',
val = Val,
str = Str};
tokenize_arg(Str = "-" ++ Val) ->
#ctk{shape = '-',
val = Val,
str = Str};
tokenize_arg(Str) ->
#ctk{shape = str,
val = none,
str = Str}.
%%-----------------------------------------------------
%% PARSING
%%-----------------------------------------------------
+185 -8
View File
@@ -1,11 +1,5 @@
%%% @doc
%%% GSC CLI: gsc_cli
%%%
%%% This module is currently named `gsc_cli', but you may want to change that.
%%% Remember that changing the name in `-module()' below requires renaming
%%% this file, and it is recommended to run `zx update .app` in the main
%%% project directory to make sure the ebin/gsc_cli.app file stays in
%%% sync with the project whenever you add, remove or rename a module.
%%% GSC CLI: explorer/harness for sfc iteration
%%% @end
-module(gsc_cli).
@@ -16,10 +10,193 @@
-export([start/1]).
-include("$gsc_include/gsc.hrl").
-include("ansi.hrl").
do_help() ->
io:format("~ts", [help_screen()]).
help_screen() ->
["you can't help people who refuse to help themsleves\n"].
-spec start(ArgV) -> ok
when ArgV :: [string()].
start([]) ->
do_eshell(),
ok;
start(["shell"]) ->
do_eshell(),
ok;
start(["eshell"]) ->
do_eshell(),
ok;
start(ArgV) ->
ok = io:format("Hello, World! Args: ~tp~n", [ArgV]),
%io:format("ArgV: ~p~n", [ArgV]),
do(ArgV),
zx:silent_stop().
do(["list"]) ->
do_tlist();
do(["list", "tests"]) ->
do_tlist();
do(["run", "tests"]) ->
io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]),
do_runall_tests();
do(["tokenizers_agree", Foo]) ->
io:format("~p~n", [tokenizers_agree(Foo)]);
% slowly phasing out shitty names like lctokens
% tokens = native sfc token representation
do(["tokens", Foo]) -> do_tokens(Foo);
do(["color_tokens", Foo]) -> do_color_tokens(Foo);
do(["ctokens", Foo]) -> do_color_tokens(Foo);
do(["colour_tokens" | _]) -> do_doi();
% so_tokens = so_scan tokens
do(["so", "tokens", Foo]) -> do_so_tokens(Foo);
do(["so_tokens", Foo]) -> do_so_tokens(Foo);
% gso_tokens = our mockery
do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo);
do(["gso_tokens", Foo]) -> do_gso_tokens(Foo);
% print source file to screen with token boundaries highlighted
% script utility
do(["rmm", Foo]) ->
do_rmm(Foo);
do(Args) ->
io:format("bad args: ~p~n", [Args]),
do_help().
do_doi() ->
FP = zx:get_home() ++ "/priv/doi.txt",
Cmd = "less " ++ FP,
io:format("~s~n", [Cmd]).
do_runall_tests() ->
lists:foreach(fun run_mod_main/1, test_mods()).
test_mods() ->
known_modules_with_prefix("gt_").
known_modules_with_prefix(Pfx) ->
ModsZipBeamsZipLoaded = code:all_available(),
kmp(Pfx, ModsZipBeamsZipLoaded, []).
kmp(_Pfx, [], Acc) ->
lists:sort(Acc);
kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) ->
case lists:prefix(Pfx, ModStr) of
false -> kmp(Pfx, Rest, Acc);
true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc])
end.
run_mod_main(Mod) ->
io:format("========================================\n"
"~p:main()\n"
"========================================\n",
[Mod]),
try
Mod:main()
catch
Err:ErrType:Trace ->
io:format("~p: ~p~n", [Err, ErrType]),
io:format("Trace:~n~p~n", [Trace])
end.
do_tlist() ->
lists:foreach(
fun(ModName) ->
io:format("~s~n", [ModName])
end,
test_mods()
).
-spec do_eshell() -> ok.
% @doc start an erlang shell
do_eshell() ->
io:format("Welcome to the GSC shell!~n", []),
case shell:start_interactive() of
ok -> ok;
{error, already_started} -> ok;
{error, Reason} -> error(Reason)
end.
tokenizers_agree(File) ->
so_tokens(File) =:= tokens(File).
do_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)].
do_so_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)].
do_gso_tokens(FilePath) ->
[io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)].
% rmm = run module:main() with our context loaded
% useful for prototyping
do_rmm(FilePath) ->
case compile:file(FilePath) of
{ok, Mod} -> Mod:main();
Error -> error(Error)
end.
so_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
{ok, Tokens} = so_scan:scan(FileStr),
Tokens.
gso_tokens(FilePath) ->
{ok, FileBytes} = file:read_file(FilePath),
FileStr = unicode:characters_to_nfc_list(FileBytes),
{ok, Tokens} = gso_scan:scan(FileStr),
Tokens.
tokens(FilePath) ->
{ok, Tokens} = gsc:tokens_from_file(FilePath),
Tokens.
do_color_tokens(File) ->
case gsc:tokens_from_file(File) of
{ok, Tokens} ->
ColorizedSrcStr = colorize_tokens(chunk_color_wheel(), Tokens, ""),
Full = [?ANSI_INVERT, ColorizedSrcStr, ?ANSI_UNINVERT],
io:format("~s", [Full]);
Error ->
io:format("~p~n", [Error])
end.
chunk_color_wheel() ->
%[yellow, blue].
[red, green, yellow, blue, magenta, cyan].
colorize_tokens(Wheel, [T | Ts], Acc) ->
{Color, NewWheel} = rotate(Wheel),
NewAcc = [Acc, colorize_token_str(Color, T)],
colorize_tokens(NewWheel, Ts, NewAcc);
colorize_tokens(_, [], Acc) ->
Acc.
rotate([A | Rest]) ->
{A, Rest ++ [A]}.
colorize_token_str(Color, #tk{str = Str}) ->
{Pfx, Sfx} = color_fixes(Color),
[Pfx, Str, Sfx].
color_fixes(red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET};
color_fixes(green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET};
color_fixes(yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET};
color_fixes(blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET};
color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET};
color_fixes(cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}.
+157
View File
@@ -0,0 +1,157 @@
% gsc tokenizer tests
-module(tsfp_gsc_tokenizer).
-export([
main/0, ct_dir/0
%tokens_match/1
]).
-include("$gsc_include/gsc.hrl").
-include_lib("eunit/include/eunit.hrl").
main() ->
%io:format("~p~n", [div_files()]),
%io:format("MAINNNNN!~n", []),
eunit:test(?MODULE, [verbose]).
%eunit:test(?MODULE).
% directory containing the tests for the tokenizer
ct_dir() ->
zx_daemon:get_home() ++ "/test-data/gt_tokens".
agreement_tests_dir() ->
ct_dir() ++ "/tokenizers_agree".
% the divergences claude found between gsc tokenizer and so tokenizer
%
% mostly stupid corner cases like a string crossing a line boundary
% or unterminated block comment
%
% divergence files: "divergence" means so_scan disagrees with
% gsc_so_scan in one of the following ways:
%
% - one succeeds when the other errors
% - disagree on success case
%
% making errors agree on two programs that work differently is a
% fool's errand
div_files() ->
ContractsDir = agreement_tests_dir(),
% this is the equivalent of ls
% just has filenames, no /path/to/ prefix
{ok, Files} = file:list_dir(ContractsDir),
% originally i was a retard and didn't read the eunit
% documentation, so if any one test failed, the entire test suite
% would crash with no information regarding what happened
%
% so this was a hack to only run div01-div05 but not div06:
%
% % hack to fix one broken test at a time
% (FileName = "div0" ++ [Digit | _]) when Digit =< $9 ->
% FilePath = ct_dir() ++ "/" ++ FileName,
% {true, {FileName, FilePath}}
% (_) -> false
%
% Once i read the eunit docs and learned about test generators, I
% realized I could have only the failed test chimp out. what a
% concept.
%
% i also realized that printing the full filepath was a waste, so
% instead the test should know about the FileName (foo.bar) and the
% FilePath (/path/to/foo.bar).
%
% then i decided to start writing my own test contracts instead
% of having claude do it and i rean into the issue of vim swap
% files not lexing properly because they're not unicode
IsDivCt =
fun(FileName) ->
% need to filter out vim swap files
% originally was false-matching on ([$. | _])
% like a man
%
% god this feels like putting my balls in a little tiny
% guillotine (even the guillotine is emasculating) but
% claude suggested this and i mean it's kind of the
% most idiomatic and like straightforward. most
% importantly it's declarative
%
% god i feel so defeated
case filename:extension(FileName) of
".aes" ->
FilePath = ContractsDir ++ "/" ++ FileName,
{true, {FileName, FilePath}};
_ ->
false
end
end,
lists:sort(lists:filtermap(IsDivCt, Files)).
%div_file_names() -> [N || {N, _} <- div_files()].
%div_file_paths() -> [P || {_, P} <- div_files()].
tokstr_concat_test_() ->
% future proofing
ConcatTestFiles
= lists:flatten([
div_files()
]),
% exclude the contracts with like unterminated block comments
% where they don't tokenize properly
NonStupidFiles =
lists:filter(
fun
({"div05_bcom_eof.aes", _}) -> false;
({"div06_bcom_in_expr.aes", _}) -> false;
({"div07_bcom_nested.aes", _}) -> false;
({"div08_bcom_simple.aes", _}) -> false;
({_, _}) -> true
end,
ConcatTestFiles
),
%?debugFmt("ConcatTestFiles=~p", [ConcatTestFiles]),
{"file = sum(tokens)",
[concat_property(Name, Path) || {Name, Path} <- NonStupidFiles]}.
concat_property(FileName, FilePath) ->
%?debugFmt("concat_property(~p, _)", [FileName]),
{ok, FileBytes} = file:read_file(FilePath),
FileChars = unicode:characters_to_nfc_list(FileBytes),
{FileName ++ ": file = sum(tokens)",
fun() ->
case gsc_tokenizer:tokens(FileChars) of
{ok, SfcTokens} ->
ConcatStr = concat_token_strs(SfcTokens, []),
?assertEqual(FileChars, ConcatStr);
_Error ->
ok
end
end}.
concat_token_strs([#gsc_token{string = S} | Rest], Acc) ->
concat_token_strs(Rest, [Acc, S]);
concat_token_strs([], Acc) ->
unicode:characters_to_nfc_list(Acc).
% underscore marks this as a test *generator*
div_test_() ->
% divergence
DivFiles = div_files(),
%?debugFmt("DivFiles=~p", [DivFiles]),
{"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}.
tokens_match(FileName, FilePath) ->
%?debugFmt("tokens_match(~p, _)", [FileName]),
% extracting data to be tested
SoTokens = sfp:so_tokens(FilePath),
SfTokens = sfp:gsc_so_tokens(FilePath),
{FileName ++ ": tokenizers_agree",
fun() ->
case {SoTokens, SfTokens} of
{{ok, So}, {ok, Sf}} -> ?assertEqual(So, Sf);
{{error, _}, {error, _}} -> ok;
{{ok, _}, {error, _}} -> error("so_scan succeeded and gsc_so_scan failed");
{{error, _}, {ok, _}} -> error("so_scan failed and gsc_so_scan succeded")
end
end}.
+2 -2
View File
@@ -2,11 +2,11 @@
{type,cli}.
{modules,[]}.
{mod,"gsc_cli"}.
{prefix,none}.
{author,"Peter Harpending"}.
{prefix,none}.
{desc,"GSC CLI and test suite"}.
{package_id,{"otpr","gsc_cli",{0,1,0}}}.
{deps,[{"otpr","gsc",{0,1,0}}]}.
{deps,[{"otpr","sophia",{9,0,0}},{"otpr","gsc",{0,1,0}}]}.
{key_name,none}.
{a_email,"peterharpending@qpq.swiss"}.
{c_email,"peterharpending@qpq.swiss"}.
+3 -3
View File
@@ -104,12 +104,12 @@
% specifically account for this error
-record(gsc_err_bcom_unterminated,
{prev_tokens :: [tk()],
break_pos :: gsc_pos(),
break_pos :: tk_pos(),
rest :: string()}).
-record(gsc_err_no_tokmatch,
{prev_tokens :: [tk()],
break_pos :: gsc_pos(),
break_pos :: tk_pos(),
rest :: string()}).
@@ -133,7 +133,7 @@
% generic placeholder error for now
-record(gsc_err,
{atom :: atom(),
string = none :: none | iolist(),
str = none :: none | iolist(),
extra = none :: none | any()}).
% @doc all errors GSC can return conveniently listed in
+10 -10
View File
@@ -158,7 +158,7 @@
%
%-type parse_error_() :: any().
%-record(parse_error,
% {pos = none :: none | gsc_pos(),
% {pos = none :: none | tk_pos(),
% msg = "" :: string(),
% subs = [] :: [parse_error_()],
% extra = none :: any()}).
@@ -196,13 +196,13 @@
%gulp_file([]) ->
% {error, empty_file};
%gulp_file(Tokens) ->
% case gsc_tokens:take_block(Tokens) of
% case gs_tokens:take_block(Tokens) of
% {Tokens, []} ->
% gulp_block(fun gulp_top_decl/1, Tokens);
% %gulp_file2([], [], Tokens);
% {A, B} ->
% StartPos = gsc_tokens:start_pos(A),
% ErrPos = gsc_tokens:start_pos(B),
% StartPos = gs_tokens:start_pos(A),
% ErrPos = gs_tokens:start_pos(B),
% Msg = efmt("gulp_file: block starting at ~p ends at ~p instead of EOF",
% [StartPos, ErrPos]),
% {error, #parse_error{pos = ErrPos, msg = Msg}}
@@ -212,7 +212,7 @@
%
%%gulp_file2(AccOks, AccErrs, Tokens = [_ | _]) ->
%% % ItemTokens will be nonempty
%% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens),
%% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens),
%% case gulp_top_decl(ItemTokens) of
%% {gulp, Ok} -> gulp_file2([Ok | AccOks], AccErrs, NewTokens);
%% Err -> gulp_file2(AccOks, [Err | AccErrs], NewTokens)
@@ -258,7 +258,7 @@
%
%gulp_block(GulpItem, AccOks, AccErrs, Tokens = [_ | _]) ->
% % ItemTokens will be nonempty
% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens),
% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens),
% case GulpItem(ItemTokens) of
% {gulp, Ok} -> gulp_block(GulpItem, [Ok | AccOks], AccErrs, NewTokens);
% Err -> gulp_block(GulpItem, AccOks, [Err | AccErrs], NewTokens)
@@ -284,7 +284,7 @@
%% | Using
%% @end
%gulp_top_decl(DeclTokens) ->
% case gsc_tokens:strings(3, DeclTokens) of
% case gs_tokens:strings(3, DeclTokens) of
% ["payable", "contract", "interface"] ->
% gulp_nyi(DeclTokens);
% ["contract", "interface" | _] ->
@@ -410,7 +410,7 @@
%% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl)
%% | Using
%gulp_decl(Tokens) ->
% case gsc_tokens:strings(1, Tokens) of
% case gs_tokens:strings(1, Tokens) of
% ["type"] -> gulp_type_alias(Tokens);
% _ -> gulp_nyi(Tokens)
% end.
@@ -611,7 +611,7 @@
%% Type1 = {plist, Types} () (foo) (foo, bar)
%% | {token, #tk{}} foo Bar.baz 'quux
%slurp_type1(Tks) ->
% case gsc_tokens:slurp_plist(Tks) of
% case gs_tokens:slurp_plist(Tks) of
% % head token is NOT open paren -> must be id/qid/tvar
% {slurp, [], [Tk | NewTks]} ->
% TkType = Tk#tk.type,
@@ -633,7 +633,7 @@
%
%
%%slurp_type_expr_plist(Tks) ->
%% case gsc_tokens:slurp_plist(Tks) of
%% case gs_tokens:slurp_plist(Tks) of
%% % head token is NOT open paren -> must be id/qid/tvar
%% {slurp, [], [Tk | NewTks]} ->
%% TkType = Tk#tk.type,
+216
View File
@@ -0,0 +1,216 @@
-module(gsc_parse_type_expr).
%
%-export_type([
%]).
%
%-export([
% unsafe_vtks_from_string/1,
% gulp_vtks/1,
% take_until_ifx_op/1
%]).
%
%-include("$gsc_include/gsc.hrl").
%
%
%%------------------------------------------------------
%% TYPES
%%------------------------------------------------------
%
%-type vtk_ifx_op() :: vtk_apply_to
% | {'vtk_*', tk()}
% | {'vtk_=>', tk()}.
%
%-type vtk() :: tk()
% | {vtk_plist, [tk()]}
% | vtk_ifx_op().
%
%
%-type gulped(X) :: {gulp, X}
% | {error, any()}.
%
%-type slurped(X) :: {slurp, X, Rest :: [tk()]}
% | {error, any()}.
%
%
%%------------------------------------------------------
%% FUNCTIONS
%%------------------------------------------------------
%
%
%-spec unsafe_vtks_from_string(SrcStr) -> Vtks when
% SrcStr :: string(),
% Vtks :: [vtk()].
%
%% @doc for testing
%unsafe_vtks_from_string(S) ->
% {ok, SigTks} = gs_tokens:significant_tokens(S),
% {gulp, Vtks} = gulp_vtks(SigTks),
% Vtks.
%
%
%% operators in descending order of exteriority
%%
%% precedence verbiage confuses me
%%
%% 1 + 2 * 3 ^ 4
%%
%% (+ 1 (* 2 (^ 3 4)))
%%
%% precedence is thinking about the operators as like
%% having arms and legs and doing something. how much
%% power do they have to bind to their neighbors. it's
%% thinking of your operators as verbs in some state
%% machine.
%%
%% i don't like when i have to model the state machine
%% in my head when i'm reading code. everything should
%% just be there. exteriority and interiority are
%% properties of nouns, not properties of verbs.
%%
%% functional programming is all about offloading as
%% much of your reasoning into nouns as possible. verbs
%% change things. and change is bad.
%-type chunk_strategy()
% :: ast_parens % prefix paren (x y z)
% | '=>' % infixr
% | '*' % infix
% | 'apply' % postfix paren f (...)
% .
%
%
%-record(ast_parens,
% {open = none :: none | tk(),
% inner = none :: none | [tk()],
% close = none :: none | tk()}).
%
%chunk_by(Strategy, Tokens) ->
% chunk_by(Strategy, [], Tokens).
%
%
%-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when
% Strategy :: chunk_strategy(),
% Oks :: [any()],
% Errs :: [{error, Reason :: any()}],
% Tokens :: [tk()],
% Result :: {ok,
%
%gulp_chunks_by(_, Stk, [], []) ->
% {gulp, lists:reverse(Stk)};
%gulp_chunks_by(_, _, Errs, []) ->
% {error, {fixme, {?MODULE, ?LINE}, Errs}};
%gulp_chunks_by(plist, Stk, Errs, Tokens) ->
% case slurp_plist_rec(Tokens) of
% {slurp, Plist, NewTokens} ->
% gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens);
% barf ->
% [Token | NewTokens] = Tokens,
% gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens);
% Error ->
% gulp_chunks_by(plist, Stk, [Error | Errs], Tokens);
% end.
%
%slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
% case gs_tokens:slurp_plist(Tokens) of
% {slurp, [], _} ->
% barf;
% {slurp, PTokens, NewTokens} ->
% PTokensInner = pt_inner(PTokens),
% end;
%
%%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when
%% Tokens :: [tk()],
%% IfxTree :: ifx_tree().
%%
%%-spec chunk_by(ChunkStrategy, Tokens) -> Result when
%% ChunkStrategy :: chunk_strategy(),
%% Tokens :: [tk()],
%% Result :: {ChunkStrategy,
%
%
%
%-spec gulp_vtks(Tokens) -> Result when
% Tokens :: [tk()],
% Result :: gulped(VirtualTokens),
% VirtualTokens :: [vtk()].
%
%% @private
%% for infix precedence/associativity resolution.
%%
%% better/original name (although misnomer) was
%% insert_virtual_tokens. we're using the gulp verbiage
%% because we're guarding against mistmatched delimiters
%% @end
%gulp_vtks(Tokens) ->
% gulp_vtks([], Tokens).
%
%gulp_vtks(Acc, Tks0) ->
% % scan until next "(" | "*" | "=>"
% case take_until_ifx_op(Tks0) of
% % no infix op remaining, return arg
% {_Pfx = Tks0,
% _Sfx = []} ->
% {gulp, lists:flatten([Acc, Tks0])};
% % application
% % "... foo(bar, baz) ..."
% % ~> [..., foo, {plist, "(bar, baz)"}, ...]
% {_Pfx = Tks1_BeforeOpen,
% _Sfx = Tks2_OpenNAfter
% = [#tk{string = "("} | _]} ->
% case gs_tokens:slurp_plist(Tks2_OpenNAfter) of
% {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
% NewAcc = [Acc,
% Tks1_BeforeOpen,
% vtk_apply_to,
% {vtk_plist, Tks2A_OpenToClose}],
% gulp_vtks(NewAcc, Tks2B_AfterClose);
% Error = {error, _} ->
% Error
% end;
% % product
% {_Pfx = Tks0_BeforeTimes,
% _Sfx = [ Tk1A_Times = #tk{string = "*"}
% | Tks1B_AfterTimes]} ->
% NewAcc = [Acc,
% Tks0_BeforeTimes,
% {'vtk_*', Tk1A_Times}],
% gulp_vtks(NewAcc, Tks1B_AfterTimes);
% % funType
% {_Pfx = Tks0_BeforeOp,
% _Sfx = [ Tk1A_Op = #tk{string = "=>"}
% | Tks1B_AfterOp]} ->
% NewAcc = [Acc,
% Tks0_BeforeOp,
% {'vtk_=>', Tk1A_Op}],
% gulp_vtks(NewAcc, Tks1B_AfterOp)
% end.
%
%
%
%-spec take_until_ifx_op(Tokens) -> Result when
% Tokens :: [tk()],
% Result :: {Taken, NewTokens},
% Taken :: Tokens,
% NewTokens :: Tokens.
%% @doc
%% consume tokens until one of ["(", "*", "=>"]
%
%take_until_ifx_op(Tks) ->
% take_until_ifx_op([], Tks).
%
%take_until_ifx_op(Stack, []) ->
% {lists:reverse(Stack), []};
%take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) ->
% TokStr = Token#tk.string,
% Continue =
% case TokStr of
% % exit cases
% "(" -> false;
% "*" -> false;
% "=>" -> false;
% _ -> true
% end,
% case Continue of
% true -> take_until_ifx_op([Token | Stack], NewTokens);
% false -> {lists:reverse(Stack), Tokens}
% end.
%
+3 -3
View File
@@ -54,16 +54,16 @@
%
%
%
%-spec start_pos([gsc_token()]) -> {value, gsc_pos()} | none.
%-spec start_pos([gsc_token()]) -> {value, tk_pos()} | none.
%
%start_pos([#gsc_token{pos = P}]) -> {value, P};
%start_pos([]) -> none.
%
%
%-spec end_pos([gsc_token()]) -> {value, gsc_pos()} | none.
%-spec end_pos([gsc_token()]) -> {value, tk_pos()} | none.
%
%end_pos([#gsc_token{pos = Pos, string = Str}]) ->
% {value, gsc_tokens:new_pos(Pos, Str)};
% {value, gs_tokens:new_pos(Pos, Str)};
%end_pos([_ | T]) ->
% end_pos(T);
%end_pos([]) ->
+1 -1
View File
@@ -70,7 +70,7 @@
% `contract` gets tokenized as a keyword and not a variable name), and then
% calls into this module in order to match the string shape it's looking for.
% @end
-module(gsc_strmatch).
-module(gs_strmatch).
%-compile([export_all, nowarn_export_all]).
+124 -112
View File
@@ -16,11 +16,11 @@
% 2. to future-proof in case we decide to incrementally incorporate the gsc
% code into the legacy sophia compiler
% @end
-module(gsc_tokens).
-module(gs_tokens).
% meta
-export([
token_types_parse_order/0,
token_shapes_parse_order/0,
kwds/0
]).
@@ -39,10 +39,11 @@
is_significant/1,
filter_significant/1,
significant_tokens/1,
tokens_from_iolist/1,
tokens/1,
slurp_token/2,
slurp_token_types/3,
slurp_token_of_type/3,
slurp_token_shapes/3,
slurp_token_of_shape/3,
new_pos/2
]).
@@ -59,7 +60,7 @@
AtMostNStrings :: [string()].
% @doc return the strings of the first N tokens
strings(N, [#tk{string = S} | Rest]) when is_integer(N), N >= 1 ->
strings(N, [#tk{str = S} | Rest]) when is_integer(N), N >= 1 ->
[S | strings(N-1, Rest)];
strings(_, []) ->
[];
@@ -159,7 +160,7 @@ take_block_item([]) ->
% counterintuitive to end-users (who are programmers, entirely
% unfamiliar with notions like stacks and open/close delimiters)
slurp_plist([Hd = #tk{string = "("} | Tl]) ->
slurp_plist([Hd = #tk{str = "("} | Tl]) ->
slurp_dlist([Hd], [Hd], Tl);
slurp_plist(Tks) ->
{slurp, [], Tks}.
@@ -170,30 +171,30 @@ slurp_dlist(All, [], NewTokens) ->
{slurp, lists:reverse(All), NewTokens};
% WMA stack is nonempty
% happy cases of opens getting popped
slurp_dlist(All, [#tk{string = "("} | NewOpen],
[#tk{string = ")"} = Tk | NewTks]) ->
slurp_dlist(All, [#tk{str = "("} | NewOpen],
[#tk{str = ")"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], NewOpen, NewTks);
slurp_dlist(All, [#tk{string = "["} | NewOpen],
[#tk{string = "]"} = Tk | NewTks]) ->
slurp_dlist(All, [#tk{str = "["} | NewOpen],
[#tk{str = "]"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], NewOpen, NewTks);
slurp_dlist(All, [#tk{string = "{"} | NewOpen],
[#tk{string = "}"} = Tk | NewTks]) ->
slurp_dlist(All, [#tk{str = "{"} | NewOpen],
[#tk{str = "}"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], NewOpen, NewTks);
% happy: open delimiters getting pushed
slurp_dlist(All, Opens, [#tk{string = "("} = Tk | NewTks]) ->
slurp_dlist(All, Opens, [#tk{str = "("} = Tk | NewTks]) ->
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
slurp_dlist(All, Opens, [#tk{string = "["} = Tk | NewTks]) ->
slurp_dlist(All, Opens, [#tk{str = "["} = Tk | NewTks]) ->
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
slurp_dlist(All, Opens, [#tk{string = "{"} = Tk | NewTks]) ->
slurp_dlist(All, Opens, [#tk{str = "{"} = Tk | NewTks]) ->
slurp_dlist([Tk | All], [Tk | Opens], NewTks);
% sad: mismatch cases
slurp_dlist(All, Opens, []) ->
{error, {fixme, mismatch, Opens, none}};
slurp_dlist(All, Opens, [#tk{string = "}"} = BadClose | _]) ->
slurp_dlist(All, Opens, [#tk{str = "}"} = BadClose | _]) ->
{error, {fixme, mismatch, Opens, {value, BadClose}}};
slurp_dlist(All, Opens, [#tk{string = "]"} = BadClose | _]) ->
slurp_dlist(All, Opens, [#tk{str = "]"} = BadClose | _]) ->
{error, {fixme, mismatch, Opens, {value, BadClose}}};
slurp_dlist(All, Opens, [#tk{string = ")"} = BadClose | _]) ->
slurp_dlist(All, Opens, [#tk{str = ")"} = BadClose | _]) ->
{error, {fixme, mismatch, Opens, {value, BadClose}}};
% general case: non-terminal token gets pushed
slurp_dlist(All, Opens, [Tk | NewTks]) ->
@@ -206,15 +207,15 @@ slurp_dlist(All, Opens, [Tk | NewTks]) ->
% This is parse order definition, list of keywords, etc
%
% -export([
% token_types_parse_order/0,
% token_shapes_parse_order/0,
% kwds/0
% ]).
%-------------------------------------------------------
-spec token_types_parse_order() -> [gsc_token_type()].
-spec token_shapes_parse_order() -> [tk_shape()].
% @doc
% list of sophia tokens in parse order (if an earlier type matches, the later
% type isn't even checked)
% list of sophia token shapes in parse order (if an earlier shape matches, the later
% shape isn't even checked)
%
%
% Rules =
@@ -245,7 +246,7 @@ slurp_dlist(All, Opens, [Tk | NewTks]) ->
% ],
% @end
token_types_parse_order() ->
token_shapes_parse_order() ->
% written in this style to be maximally editable
lists:flatten([
% comments and whitespace
@@ -282,8 +283,8 @@ kwds() ->
% -export([
% tokens/1,
% slurp_token/1,
% slurp_token_types/2,
% slurp_token_of_type/2
% slurp_token_shapes/2,
% slurp_token_of_shape/2
% ]).
%-------------------------------------------------------
@@ -323,12 +324,23 @@ filter_significant(Tokens) ->
-spec is_significant(Token) -> boolean()
when Token :: tk().
is_significant(#tk{type = bcom}) -> false;
is_significant(#tk{type = lcom}) -> false;
is_significant(#tk{type = ws}) -> false;
is_significant(#tk{shape = bcom}) -> false;
is_significant(#tk{shape = lcom}) -> false;
is_significant(#tk{shape = ws}) -> false;
is_significant(_) -> true.
-spec tokens_from_iolist(SrcStr) -> Result when
SrcStr :: iolist(),
Result :: {ok, Tokens}
| {error, gsc_err()},
Tokens :: [tk()].
% @doc alias for tokens/1
tokens_from_iolist(S) -> tokens(S).
-spec tokens(SrcStr) -> Result
when SrcStr :: iolist(),
Result :: {ok, Tokens}
@@ -349,7 +361,7 @@ tokens(Stack, _FinalPos, "") ->
{ok, lists:reverse(Stack)};
tokens(Stack, Pos, SrcStr) ->
case slurp_token(Pos, SrcStr) of
{tokmatch, NewToken = #tk{string = TokStr},
{tokmatch, NewToken = #tk{str = TokStr},
NewSrcStr} ->
NewPos = new_pos(Pos, TokStr),
tokens([NewToken | Stack], NewPos, NewSrcStr);
@@ -455,7 +467,7 @@ next_tabstop8(Col0) when Col0 >= 0 ->
-spec slurp_token(Pos, SrcStr) -> Result
when Pos :: gsc_pos(),
when Pos :: tk_pos(),
SrcStr :: string(),
Result :: {tokmatch, Token, Rest}
| no_tokmatch
@@ -465,17 +477,17 @@ next_tabstop8(Col0) when Col0 >= 0 ->
Rest :: string().
% @doc
% grab a single token off the front of the string according to
% `token_types_parse_order/0'
% `token_shapes_parse_order/0'
slurp_token(Pos, SrcStr) ->
% this is the easiest format if i need to fuck with it
slurp_token_types(token_types_parse_order(), Pos, SrcStr).
slurp_token_shapes(token_shapes_parse_order(), Pos, SrcStr).
-spec slurp_token_types(ParseOrder, Pos, SrcStr) -> Result
when ParseOrder :: [gsc_token_type()],
Pos :: gsc_pos(),
-spec slurp_token_shapes(ParseOrder, Pos, SrcStr) -> Result
when ParseOrder :: [tk_shape()],
Pos :: tk_pos(),
SrcStr :: string(),
Result :: {tokmatch, Token, Rest}
| no_tokmatch
@@ -485,22 +497,22 @@ slurp_token(Pos, SrcStr) ->
Rest :: string().
% @doc
% grab a single token off the front of the string according to
% `token_types_parse_order/0'
% `token_shapes_parse_order/0'
slurp_token_types([TokenType | TTs], Pos, SrcStr) ->
case slurp_token_of_type(TokenType, Pos, SrcStr) of
slurp_token_shapes([TokenType | TTs], Pos, SrcStr) ->
case slurp_token_of_shape(TokenType, Pos, SrcStr) of
Match = {tokmatch, _, _} -> Match;
no_tokmatch -> slurp_token_types(TTs, Pos, SrcStr);
no_tokmatch -> slurp_token_shapes(TTs, Pos, SrcStr);
IErr = {ierr, _} -> IErr;
Error = {error, _} -> Error
end;
slurp_token_types([], _Pos, _SrcStr) ->
slurp_token_shapes([], _Pos, _SrcStr) ->
no_tokmatch.
-spec slurp_token_of_type(TokenType, Pos, SrcStr) -> MaybeToken
when TokenType :: gsc_token_type(),
Pos :: gsc_pos(),
-spec slurp_token_of_shape(TokenType, Pos, SrcStr) -> MaybeToken
when TokenType :: tk_shape(),
Pos :: tk_pos(),
SrcStr :: string(),
MaybeToken :: {tokmatch, Token, Rest}
| no_tokmatch
@@ -509,7 +521,7 @@ slurp_token_types([], _Pos, _SrcStr) ->
Token :: tk(),
Rest :: string().
% @doc
% match a sophia token of a given type off the front of the string
% match a sophia token of a given shape off the front of the string
% @end
% COMMENTS AND WHITESPACE: lcom, bcom, ws
@@ -518,27 +530,27 @@ slurp_token_types([], _Pos, _SrcStr) ->
%
% i am not going to bother writing a string matcher thing for this
% FIXME: make a string matcher for line comments
slurp_token_of_type(lcom, Pos, SrcStr) ->
slurp_token_of_shape(lcom, Pos, SrcStr) ->
case SrcStr of
"//" ++ _ ->
{Line, Rest} = takeline("", SrcStr),
Token = #tk{type = lcom,
Token = #tk{shape = lcom,
pos = Pos,
string = Line},
str = Line},
{tokmatch, Token, Rest};
_ ->
no_tokmatch
end;
% Block comments cannot have a string matcher because they have a whole stack
% thing keeping track of depth because of nested block comments
slurp_token_of_type(bcom, Pos, SrcStr0) ->
slurp_token_of_shape(bcom, Pos, SrcStr0) ->
case SrcStr0 of
"/*" ++ SrcStr1 ->
case bcom("/*", 1, SrcStr1) of
{ok, CommentStr, SrcStr2} ->
Token = #tk{type = bcom,
Token = #tk{shape = bcom,
pos = Pos,
string = CommentStr},
str = CommentStr},
{tokmatch, Token, SrcStr2};
Error ->
Error
@@ -546,15 +558,15 @@ slurp_token_of_type(bcom, Pos, SrcStr0) ->
_ ->
no_tokmatch
end;
slurp_token_of_type(ws, Pos, SrcStr) ->
WhitespaceMatcher = gsc_strmatch:smr_sf_ws(),
case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of
slurp_token_of_shape(ws, Pos, SrcStr) ->
WhitespaceMatcher = gs_strmatch:smr_sf_ws(),
case gs_strmatch:match(WhitespaceMatcher, SrcStr) of
no_strmatch ->
no_tokmatch;
{strmatch, WS, Rest} ->
Token = #tk{type = ws,
Token = #tk{shape = ws,
pos = Pos,
string = WS},
str = WS},
{tokmatch, Token, Rest}
end;
% KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct
@@ -568,88 +580,88 @@ slurp_token_of_type(ws, Pos, SrcStr) ->
%
% we know kwds are always ids, so we parse it as an id and see if it's one
% of the kwds
slurp_token_of_type(kwd, Pos, SrcStr) ->
case slurp_token_of_type(id, Pos, SrcStr) of
{tokmatch, IdTok = #tk{string = IdStr}, Rest} ->
slurp_token_of_shape(kwd, Pos, SrcStr) ->
case slurp_token_of_shape(id, Pos, SrcStr) of
{tokmatch, IdTok = #tk{str = IdStr}, Rest} ->
case lists:member(IdStr, kwds()) of
false ->
no_tokmatch;
true ->
KwTok = IdTok#tk{type = kwd},
KwTok = IdTok#tk{shape = kwd},
{tokmatch, KwTok, Rest}
end;
no_tokmatch ->
no_tokmatch
end;
slurp_token_of_type(op, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of
slurp_token_of_shape(op, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_op(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = op, pos = Pos, string = Str},
Token = #tk{shape = op, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
slurp_token_of_type(punct, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of
slurp_token_of_shape(punct, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_punct(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = punct, pos = Pos, string = Str},
Token = #tk{shape = punct, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
% SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar
slurp_token_of_type(id, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of
slurp_token_of_shape(id, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_id(), SrcStr) of
{strmatch, IdStr, Rest} ->
Token = #tk{type = id, pos = Pos, string = IdStr},
Token = #tk{shape = id, pos = Pos, str = IdStr},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
slurp_token_of_type(con, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of
slurp_token_of_shape(con, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_con(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = con, pos = Pos, string = Str},
Token = #tk{shape = con, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
slurp_token_of_type(qid, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of
slurp_token_of_shape(qid, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_qid(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = qid, pos = Pos, string = Str},
Token = #tk{shape = qid, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
slurp_token_of_type(qcon, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of
slurp_token_of_shape(qcon, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_qcon(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = qcon, pos = Pos, string = Str},
Token = #tk{shape = qcon, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
slurp_token_of_type(tvar, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of
slurp_token_of_shape(tvar, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_tvar(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = tvar, pos = Pos, string = Str},
Token = #tk{shape = tvar, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
slurp_token_of_type(int16, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of
slurp_token_of_shape(int16, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_int16(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = int16, pos = Pos, string = Str},
Token = #tk{shape = int16, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
end;
slurp_token_of_type(int10, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of
slurp_token_of_shape(int10, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_int10(), SrcStr) of
{strmatch, Str, Rest} ->
Token = #tk{type = int10, pos = Pos, string = Str},
Token = #tk{shape = int10, pos = Pos, str = Str},
{tokmatch, Token, Rest};
no_strmatch ->
no_tokmatch
@@ -658,63 +670,63 @@ slurp_token_of_type(int10, Pos, SrcStr) ->
% ak, ct, sg
%
% char: sophia char literal
slurp_token_of_type(ak, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_ak(),
case gsc_strmatch:match(StringMatcher, SrcStr) of
slurp_token_of_shape(ak, Pos, SrcStr) ->
StringMatcher = gs_strmatch:smr_sf_ak(),
case gs_strmatch:match(StringMatcher, SrcStr) of
no_strmatch ->
no_tokmatch;
{strmatch, TokenStr, Rest} ->
Token = #tk{type = ak, pos = Pos, string = TokenStr},
Token = #tk{shape = ak, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest}
end;
slurp_token_of_type(ct, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_ct(),
case gsc_strmatch:match(StringMatcher, SrcStr) of
slurp_token_of_shape(ct, Pos, SrcStr) ->
StringMatcher = gs_strmatch:smr_sf_ct(),
case gs_strmatch:match(StringMatcher, SrcStr) of
no_strmatch ->
no_tokmatch;
{strmatch, TokenStr, Rest} ->
Token = #tk{type = ct, pos = Pos, string = TokenStr},
Token = #tk{shape = ct, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest}
end;
slurp_token_of_type(sg, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_sg(),
case gsc_strmatch:match(StringMatcher, SrcStr) of
slurp_token_of_shape(sg, Pos, SrcStr) ->
StringMatcher = gs_strmatch:smr_sf_sg(),
case gs_strmatch:match(StringMatcher, SrcStr) of
no_strmatch ->
no_tokmatch;
{strmatch, TokenStr, Rest} ->
Token = #tk{type = sg, pos = Pos, string = TokenStr},
Token = #tk{shape = sg, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest}
end;
slurp_token_of_type(char, Pos, SrcStr) ->
StringMatcher = gsc_strmatch:smr_sf_char(),
case gsc_strmatch:match(StringMatcher, SrcStr) of
slurp_token_of_shape(char, Pos, SrcStr) ->
StringMatcher = gs_strmatch:smr_sf_char(),
case gs_strmatch:match(StringMatcher, SrcStr) of
no_strmatch ->
no_tokmatch;
{strmatch, TokenStr, Rest} ->
Token = #tk{type = char, pos = Pos, string = TokenStr},
Token = #tk{shape = char, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest}
end;
slurp_token_of_type(string, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of
slurp_token_of_shape(string, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_str(), SrcStr) of
no_strmatch ->
no_tokmatch;
{strmatch, TokenStr, Rest} ->
Token = #tk{type = string, pos = Pos, string = TokenStr},
Token = #tk{shape = string, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest}
end;
slurp_token_of_type(bytes, Pos, SrcStr) ->
case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of
slurp_token_of_shape(bytes, Pos, SrcStr) ->
case gs_strmatch:match(gs_strmatch:smr_sf_bytes(), SrcStr) of
no_strmatch ->
no_tokmatch;
{strmatch, TokenStr, Rest} ->
Token = #tk{type = bytes, pos = Pos, string = TokenStr},
Token = #tk{shape = bytes, pos = Pos, str = TokenStr},
{tokmatch, Token, Rest}
end;
slurp_token_of_type(NyiType, Pos, SrcStr) ->
Message = io_lib:format("cannot slurp token of type: ~p", [NyiType]),
slurp_token_of_shape(NyiType, Pos, SrcStr) ->
Message = io_lib:format("cannot slurp token of shape: ~p", [NyiType]),
error(#gsc_err{atom = nyi,
string = Message,
extra = [{token_type, NyiType},
str = Message,
extra = [{token_shape, NyiType},
{pos, Pos},
{rest, SrcStr}]}).
+7 -59
View File
@@ -4,15 +4,14 @@
% based on original sophia compiler
%
% parse layers:
% 1. gsc_tokenizer: SrcStr -> (Tokens | SigTokens)
% 1. gs_tokens: SrcStr -> (Tokens | SigTokens)
%
% SigTokens = not comment/whitespace
%
% layers:
% a. gsc_strmatch : matches string shapes
% b. gsc_so_scan : converts to so_scan shapes
% a. gs_strmatch : matches string shapes
% b. gso_scan : converts to so_scan shapes
%
% 2. gsc_ast: SigTokens -> AST
%
% terminology:
%
@@ -30,16 +29,8 @@
%
% @end
% TODO:
% - barf for outputs, slurp for inputs
% - architecture needs more careful thought but only after something works
% - too fuzzy right now
% - possibly:
% - rename parser layers sequentially:
% - gsc_
-module(gsc).
-export_type([
token/0
]).
@@ -48,10 +39,7 @@
sigtokens_from_file/1,
sigtokens_from_string/1,
tokens_from_file/1,
tokens_from_string/1,
ast_from_file/1,
ast_from_string/1,
ast_from_tokens/1
tokens_from_string/1
]).
-include("$gsc_include/gsc.hrl").
@@ -68,13 +56,13 @@
sigtokens_from_file(X) ->
case tokens_from_file(X) of
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
{ok, Y} -> {ok, gs_tokens:filter_significant(Y)};
Err -> Err
end.
sigtokens_from_string(X) ->
case tokens_from_string(X) of
{ok, Y} -> {ok, gsc_tokens:filter_significant(Y)};
{ok, Y} -> {ok, gs_tokens:filter_significant(Y)};
Err -> Err
end.
@@ -101,44 +89,4 @@ tokens_from_file(FilePath) ->
Tokens :: [tk()].
tokens_from_string(SrcStr) ->
gsc_tokens:tokens(SrcStr).
-spec ast_from_file(FilePath) -> Perhaps
when FilePath :: string(),
Perhaps :: {ok, AST} | {error, gsc_err()},
AST :: gsc_ast().
ast_from_file(FilePath) ->
case file:read_file(FilePath) of
{ok, FileBytes} -> ast_from_string(FileBytes);
Error -> Error
end.
-spec ast_from_string(SrcStr) -> Perhaps
when SrcStr :: string(),
Perhaps :: {ok, AST} | {error, gsc_err()},
AST :: gsc_ast().
ast_from_string(SrcStr) ->
case gsc_tokens:significant_tokens(SrcStr) of
{ok, SigTks} -> ast_from_tokens(SigTks);
Error -> Error
end.
-spec ast_from_tokens(SrcTokens) -> Perhaps
when SrcTokens :: [tk()],
Perhaps :: {ok, AST} | {error, gsc_err()},
AST :: gsc_ast().
ast_from_tokens(Tks) ->
SigTks = gsc_tokens:filter_significant(Tks),
case gsc_ast:gulp_file(SigTks) of
{gulp, AST} -> {ok, AST};
Error -> Error
end.
gs_tokens:tokens(SrcStr).
-216
View File
@@ -1,216 +0,0 @@
-module(gsc_parse_type_expr).
-export_type([
]).
-export([
unsafe_vtks_from_string/1,
gulp_vtks/1,
take_until_ifx_op/1
]).
-include("$gsc_include/gsc.hrl").
%------------------------------------------------------
% TYPES
%------------------------------------------------------
-type vtk_ifx_op() :: vtk_apply_to
| {'vtk_*', tk()}
| {'vtk_=>', tk()}.
-type vtk() :: tk()
| {vtk_plist, [tk()]}
| vtk_ifx_op().
-type gulped(X) :: {gulp, X}
| {error, any()}.
-type slurped(X) :: {slurp, X, Rest :: [tk()]}
| {error, any()}.
%------------------------------------------------------
% FUNCTIONS
%------------------------------------------------------
-spec unsafe_vtks_from_string(SrcStr) -> Vtks when
SrcStr :: string(),
Vtks :: [vtk()].
% @doc for testing
unsafe_vtks_from_string(S) ->
{ok, SigTks} = gsc_tokens:significant_tokens(S),
{gulp, Vtks} = gulp_vtks(SigTks),
Vtks.
% operators in descending order of exteriority
%
% precedence verbiage confuses me
%
% 1 + 2 * 3 ^ 4
%
% (+ 1 (* 2 (^ 3 4)))
%
% precedence is thinking about the operators as like
% having arms and legs and doing something. how much
% power do they have to bind to their neighbors. it's
% thinking of your operators as verbs in some state
% machine.
%
% i don't like when i have to model the state machine
% in my head when i'm reading code. everything should
% just be there. exteriority and interiority are
% properties of nouns, not properties of verbs.
%
% functional programming is all about offloading as
% much of your reasoning into nouns as possible. verbs
% change things. and change is bad.
-type chunk_strategy()
:: ast_parens % prefix paren (x y z)
| '=>' % infixr
| '*' % infix
| 'apply' % postfix paren f (...)
.
-record(ast_parens,
{open = none :: none | tk(),
inner = none :: none | [tk()],
close = none :: none | tk()}).
chunk_by(Strategy, Tokens) ->
chunk_by(Strategy, [], Tokens).
-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when
Strategy :: chunk_strategy(),
Oks :: [any()],
Errs :: [{error, Reason :: any()}],
Tokens :: [tk()],
Result :: {ok,
gulp_chunks_by(_, Stk, [], []) ->
{gulp, lists:reverse(Stk)};
gulp_chunks_by(_, _, Errs, []) ->
{error, {fixme, {?MODULE, ?LINE}, Errs}};
gulp_chunks_by(plist, Stk, Errs, Tokens) ->
case slurp_plist_rec(Tokens) of
{slurp, Plist, NewTokens} ->
gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens);
barf ->
[Token | NewTokens] = Tokens,
gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens);
Error ->
gulp_chunks_by(plist, Stk, [Error | Errs], Tokens);
end.
slurp_plist_rec(Tokens = [#tk{string = "(" | _]) ->
case gsc_tokens:slurp_plist(Tokens) of
{slurp, [], _} ->
barf;
{slurp, PTokens, NewTokens} ->
PTokensInner = pt_inner(PTokens),
end;
%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when
% Tokens :: [tk()],
% IfxTree :: ifx_tree().
%
%-spec chunk_by(ChunkStrategy, Tokens) -> Result when
% ChunkStrategy :: chunk_strategy(),
% Tokens :: [tk()],
% Result :: {ChunkStrategy,
-spec gulp_vtks(Tokens) -> Result when
Tokens :: [tk()],
Result :: gulped(VirtualTokens),
VirtualTokens :: [vtk()].
% @private
% for infix precedence/associativity resolution.
%
% better/original name (although misnomer) was
% insert_virtual_tokens. we're using the gulp verbiage
% because we're guarding against mistmatched delimiters
% @end
gulp_vtks(Tokens) ->
gulp_vtks([], Tokens).
gulp_vtks(Acc, Tks0) ->
% scan until next "(" | "*" | "=>"
case take_until_ifx_op(Tks0) of
% no infix op remaining, return arg
{_Pfx = Tks0,
_Sfx = []} ->
{gulp, lists:flatten([Acc, Tks0])};
% application
% "... foo(bar, baz) ..."
% ~> [..., foo, {plist, "(bar, baz)"}, ...]
{_Pfx = Tks1_BeforeOpen,
_Sfx = Tks2_OpenNAfter
= [#tk{string = "("} | _]} ->
case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of
{slurp, Tks2A_OpenToClose, Tks2B_AfterClose} ->
NewAcc = [Acc,
Tks1_BeforeOpen,
vtk_apply_to,
{vtk_plist, Tks2A_OpenToClose}],
gulp_vtks(NewAcc, Tks2B_AfterClose);
Error = {error, _} ->
Error
end;
% product
{_Pfx = Tks0_BeforeTimes,
_Sfx = [ Tk1A_Times = #tk{string = "*"}
| Tks1B_AfterTimes]} ->
NewAcc = [Acc,
Tks0_BeforeTimes,
{'vtk_*', Tk1A_Times}],
gulp_vtks(NewAcc, Tks1B_AfterTimes);
% funType
{_Pfx = Tks0_BeforeOp,
_Sfx = [ Tk1A_Op = #tk{string = "=>"}
| Tks1B_AfterOp]} ->
NewAcc = [Acc,
Tks0_BeforeOp,
{'vtk_=>', Tk1A_Op}],
gulp_vtks(NewAcc, Tks1B_AfterOp)
end.
-spec take_until_ifx_op(Tokens) -> Result when
Tokens :: [tk()],
Result :: {Taken, NewTokens},
Taken :: Tokens,
NewTokens :: Tokens.
% @doc
% consume tokens until one of ["(", "*", "=>"]
take_until_ifx_op(Tks) ->
take_until_ifx_op([], Tks).
take_until_ifx_op(Stack, []) ->
{lists:reverse(Stack), []};
take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) ->
TokStr = Token#tk.string,
Continue =
case TokStr of
% exit cases
"(" -> false;
"*" -> false;
"=>" -> false;
_ -> true
end,
case Continue of
true -> take_until_ifx_op([Token | Stack], NewTokens);
false -> {lists:reverse(Stack), Tokens}
end.
+15 -15
View File
@@ -1,6 +1,6 @@
% @doc compatibility layer to test against so_scan
%
% converts gsc_tokens data to so_scan tokens
% converts gs_tokens data to so_scan tokens
%
% Ref: so_scan.erl
-module(gso_scan).
@@ -76,14 +76,14 @@
-type so_symbol() :: so_kwd() | so_special_char() | atom().
-type so_token2() :: {Symbol :: so_symbol(),
Location :: gsc_pos()}.
Location :: tk_pos()}.
% FIXME
% this is 'id', 'con', qid
-type so_tk3type() :: char | string | hex | int | bytes | qid | qcon | tvar | id | con.
-type so_token3() :: {TokenType :: so_tk3type(),
Location :: gsc_pos(),
Location :: tk_pos(),
TokenValue :: term()}.
-type so_token() :: so_token2() | so_token3().
@@ -104,7 +104,7 @@
% @end
scan(SrcStr) ->
case gsc_tokens:tokens(SrcStr) of
case gs_tokens:tokens(SrcStr) of
{ok, SfLTokens} ->
SoTokens = to_so_tokens(SfLTokens),
{ok, SoTokens};
@@ -151,12 +151,12 @@ scan(SrcStr) ->
%
% so if we see an ak/ct/sg token, we summon evil ben
% carson to reconjoin the unconjoined twins
to_so_tokens([ AkTok = #tk{type = AkCtSg, pos = Pos}
to_so_tokens([ AkTok = #tk{shape = AkCtSg, pos = Pos}
| Sheeit])
when ak =:= AkCtSg;
ct =:= AkCtSg;
sg =:= AkCtSg ->
{#tk{string = FinalAkStr}, NewSheeit}
{#tk{str = FinalAkStr}, NewSheeit}
= ken_barson_rises(AkTok, Sheeit),
[{id, Pos, FinalAkStr}| to_so_tokens(NewSheeit)];
% this part is just lists:filtermap
@@ -259,9 +259,9 @@ to_so_tokens([]) ->
% `_`**: `smr_plus` requires >=1 base58 char to
% match; `ak_I`, `ak_0`, `ak__bar` all fall
% through to `id` and both tokenizers agree.
ken_barson_rises(AkTokAcc = #tk{string = AkStr},
SrcTokens = [#tk{type = CandidateType,
string = CandidateString}
ken_barson_rises(AkTokAcc = #tk{str = AkStr},
SrcTokens = [#tk{shape = CandidateType,
str = CandidateString}
| Rest]) ->
% candidate:
% dig out the token type and the string
@@ -273,7 +273,7 @@ ken_barson_rises(AkTokAcc = #tk{string = AkStr},
Smash ->
% dig out the token from LcTokApi
NewAkStr = AkStr ++ CandidateString,
NewAkTokAcc = AkTokAcc#tk{string = NewAkStr},
NewAkTokAcc = AkTokAcc#tk{str = NewAkStr},
ken_barson_rises(NewAkTokAcc, Rest);
Pass ->
{AkTokAcc, SrcTokens}
@@ -320,9 +320,9 @@ pass_types() ->
% follow-on tokens
% @end
to_so_token(#tk{type = SfTokenType,
to_so_token(#tk{shape = SfTokenType,
pos = Pos,
string = SfTokenStr}) ->
str = SfTokenStr}) ->
case SfTokenType of
%-----------------
% Ignored
@@ -371,7 +371,7 @@ to_so_token(#tk{type = SfTokenType,
NYI ->
Msg = io_lib:format("gsc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]),
error(#gsc_err{atom = nyi,
string = Msg})
str = Msg})
end.
%% ak/ct/sg all tokenize to id
@@ -393,7 +393,7 @@ so_parse_char([$' | Chars]) ->
[Char] -> Char;
_Bad ->
error(#gsc_err{atom = bad_token,
string = "Bad character literal: '" ++ Chars})
str = "Bad character literal: '" ++ Chars})
end.
so_parse_string([$" | Chars]) ->
@@ -435,7 +435,7 @@ unescape(Delim, [$\\, Code | Chars], Acc) ->
$t -> Ok($\t);
$v -> Ok($\v);
_ -> error(#gsc_err{atom = bad_escape_char,
string = "Bad control sequence: \\" ++ [Code]}) %% TODO
str = "Bad control sequence: \\" ++ [Code]}) %% TODO
end;
unescape(Delim, [C | Chars], Acc) ->
unescape(Delim, Chars, [C | Acc]).
+1 -1
View File
@@ -2,7 +2,7 @@
{type,lib}.
{modules,[]}.
{author,"Peter Harpending"}.
{prefix,none}.
{prefix,"gs"}.
{desc,"Exploratory sophia compiler rewrite"}.
{package_id,{"otpr","gsc",{0,1,0}}}.
{deps,[]}.