diff --git a/DONT_README.md b/DONT_README.md new file mode 100644 index 0000000..4e374a9 --- /dev/null +++ b/DONT_README.md @@ -0,0 +1,10 @@ +# TODONE + +# TODO + +- barf for outputs, slurp for inputs +- architecture needs more careful thought but only after something works +- too fuzzy right now +- possibly: + - rename parser layers sequentially: + - gsc_ diff --git a/README.md b/README.md index e69de29..d6233de 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,30 @@ +# gsc = gajumaru sophia compiler + +**This is _NOT_ the official Sophia compiler.** If you're looking for +that see https://git.qpq.swiss/QPQ-AG/sophia + +This is an incomplete prototype rewrite of the legacy (official) +sophia compiler in straightforward Erlang. It grew out of my (Peter +Harpending) own efforts to document the language and its relationship +to FATE (the gajumaru virtual machine). + +The goal for version 0.1 is to mirror the success behavior of the +legacy sophia compiler. + +# Setup + +``` +git clone https://git.qpq.swiss/QPQ-AG/gsc.git +``` + +Add the following to `~/.bashrc` or wheremstever: + +``` +export PATH=$PATH:/path/to/gsc/bin +``` + +To test run + +``` +gsc --help +``` diff --git a/cli/include/ansi.hrl b/cli/include/ansi.hrl new file mode 100644 index 0000000..934eed3 --- /dev/null +++ b/cli/include/ansi.hrl @@ -0,0 +1,199 @@ +% ANSI screen drawing macros in erlang +% +% Author: Peter Harpending +% Date: 2026-04-10 +% +% Copyright (C) 2026, QPQ AG + +% Not exhaustive, just what I need for the moment +% ref: https://gist.github.com/ConnerWill/d4b6c776b509add763e17f9f113fd25b +-define(ANSI_ESC, [27]). +-define(ANSI_CRLF, "\r\n"). +-define(ANSI_FF, [12]). +-define(ANSI_CLEAR, [12]). + +-define(ANSI_LINE(X), [X, ?ANSI_CRLF]). + +% MARKDOWN TIER TEXT FORMATTING + +% resets all formatting +-define(ANSI_RESET, [?ANSI_ESC, "[0m"]). + +-define(ANSI_BOLD, [?ANSI_ESC, "[1m"]). +-define(ANSI_DIM, [?ANSI_ESC, "[2m"]). +-define(ANSI_ITALIC, [?ANSI_ESC, "[3m"]). +-define(ANSI_ULINE, [?ANSI_ESC, "[4m"]). +-define(ANSI_BLINK, [?ANSI_ESC, "[5m"]). +-define(ANSI_INVERT, [?ANSI_ESC, "[7m"]). +-define(ANSI_INVIS, [?ANSI_ESC, "[8m"]). +-define(ANSI_STRIKE, [?ANSI_ESC, "[9m"]). + +% > Note: Both dim and bold modes are reset with the ESC[22m sequence. The +% > ESC[21m sequence is a non-specified sequence for double underline mode and +% > only work in some terminals and is reset with ESC[24m. +-define(ANSI_UNBOLD, [?ANSI_ESC, "[22m"]). +-define(ANSI_UNDIM, [?ANSI_ESC, "[22m"]). +-define(ANSI_UNITALIC, [?ANSI_ESC, "[23m"]). +-define(ANSI_UNULINE, [?ANSI_ESC, "[24m"]). +-define(ANSI_UNBLINK, [?ANSI_ESC, "[25m"]). +-define(ANSI_UNINVERT, [?ANSI_ESC, "[27m"]). +-define(ANSI_UNINVIS, [?ANSI_ESC, "[28m"]). +-define(ANSI_UNSTRIKE, [?ANSI_ESC, "[29m"]). + +-define(ANSI_BOLD(X), [?ANSI_BOLD, X, ?ANSI_UNBOLD]). +-define(ANSI_DIM(X), [?ANSI_DIM, X, ?ANSI_UNDIM]). +-define(ANSI_ITALIC(X), [?ANSI_ITALIC, X, ?ANSI_UNITALIC]). +-define(ANSI_ULINE(X), [?ANSI_ULINE, X, ?ANSI_UNULINE]). +-define(ANSI_BLINK(X), [?ANSI_BLINK, X, ?ANSI_UNBLINK]). +-define(ANSI_INVERT(X), [?ANSI_INVERT, X, ?ANSI_UNINVERT]). +-define(ANSI_INVIS(X), [?ANSI_INVIS, X, ?ANSI_UNINVIS]). +-define(ANSI_STRIKE(X), [?ANSI_STRIKE, X, ?ANSI_UNSTRIKE]). + + +% COLORS +% +% COLOR SetFG SetBG +% ----------------------------- +% Black 30 40 +% Red 31 41 +% Green 32 42 +% Yellow 33 43 +% Blue 34 44 +% Magenta 35 45 +% Cyan 36 46 +% White 37 47 +% Default 39 49 + +-define(ANSI_FG_RESET, [?ANSI_ESC, "[39m"]). +-define(ANSI_BG_RESET, [?ANSI_ESC, "[49m"]). + +-define(ANSI_FG_BLACK, [?ANSI_ESC, "[30m"]). +-define(ANSI_FG_RED, [?ANSI_ESC, "[31m"]). +-define(ANSI_FG_GREEN, [?ANSI_ESC, "[32m"]). +-define(ANSI_FG_YELLOW, [?ANSI_ESC, "[33m"]). +-define(ANSI_FG_BLUE, [?ANSI_ESC, "[34m"]). +-define(ANSI_FG_MAGENTA, [?ANSI_ESC, "[35m"]). +-define(ANSI_FG_CYAN, [?ANSI_ESC, "[36m"]). +-define(ANSI_FG_WHITE, [?ANSI_ESC, "[37m"]). + +-define(ANSI_BG_BLACK, [?ANSI_ESC, "[40m"]). +-define(ANSI_BG_RED, [?ANSI_ESC, "[41m"]). +-define(ANSI_BG_GREEN, [?ANSI_ESC, "[42m"]). +-define(ANSI_BG_YELLOW, [?ANSI_ESC, "[43m"]). +-define(ANSI_BG_BLUE, [?ANSI_ESC, "[44m"]). +-define(ANSI_BG_MAGENTA, [?ANSI_ESC, "[45m"]). +-define(ANSI_BG_CYAN, [?ANSI_ESC, "[46m"]). +-define(ANSI_BG_WHITE, [?ANSI_ESC, "[47m"]). + + +-define(ANSI_FG_BLACK(X), [?ANSI_FG_BLACK, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_RED(X), [?ANSI_FG_RED, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_GREEN(X), [?ANSI_FG_GREEN, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_YELLOW(X), [?ANSI_FG_YELLOW, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BLUE(X), [?ANSI_FG_BLUE, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_MAGENTA(X), [?ANSI_FG_MAGENTA, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_CYAN(X), [?ANSI_FG_CYAN, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_WHITE(X), [?ANSI_FG_WHITE, X, ?ANSI_FG_RESET]). + +-define(ANSI_BG_BLACK(X), [?ANSI_BG_BLACK, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_RED(X), [?ANSI_BG_RED, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_GREEN(X), [?ANSI_BG_GREEN, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_YELLOW(X), [?ANSI_BG_YELLOW, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BLUE(X), [?ANSI_BG_BLUE, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_MAGENTA(X), [?ANSI_BG_MAGENTA, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_CYAN(X), [?ANSI_BG_CYAN, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_WHITE(X), [?ANSI_BG_WHITE, X, ?ANSI_BG_RESET]). + +% bright colors +-define(ANSI_FG_BBLACK, [?ANSI_ESC, "[90m"]). +-define(ANSI_FG_BRED, [?ANSI_ESC, "[91m"]). +-define(ANSI_FG_BGREEN, [?ANSI_ESC, "[92m"]). +-define(ANSI_FG_BYELLOW, [?ANSI_ESC, "[93m"]). +-define(ANSI_FG_BBLUE, [?ANSI_ESC, "[94m"]). +-define(ANSI_FG_BMAGENTA, [?ANSI_ESC, "[95m"]). +-define(ANSI_FG_BCYAN, [?ANSI_ESC, "[96m"]). +-define(ANSI_FG_BWHITE, [?ANSI_ESC, "[97m"]). + +-define(ANSI_BG_BBLACK, [?ANSI_ESC, "[100m"]). +-define(ANSI_BG_BRED, [?ANSI_ESC, "[101m"]). +-define(ANSI_BG_BGREEN, [?ANSI_ESC, "[102m"]). +-define(ANSI_BG_BYELLOW, [?ANSI_ESC, "[103m"]). +-define(ANSI_BG_BBLUE, [?ANSI_ESC, "[104m"]). +-define(ANSI_BG_BMAGENTA, [?ANSI_ESC, "[105m"]). +-define(ANSI_BG_BCYAN, [?ANSI_ESC, "[106m"]). +-define(ANSI_BG_BWHITE, [?ANSI_ESC, "[107m"]). + +-define(ANSI_FG_BBLACK(X), [?ANSI_FG_BBLACK, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BRED(X), [?ANSI_FG_BRED, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BGREEN(X), [?ANSI_FG_BGREEN, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BYELLOW(X), [?ANSI_FG_BYELLOW, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BBLUE(X), [?ANSI_FG_BBLUE, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BMAGENTA(X), [?ANSI_FG_BMAGENTA, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BCYAN(X), [?ANSI_FG_BCYAN, X, ?ANSI_FG_RESET]). +-define(ANSI_FG_BWHITE(X), [?ANSI_FG_BWHITE, X, ?ANSI_FG_RESET]). + +-define(ANSI_BG_BBLACK(X), [?ANSI_BG_BBLACK, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BRED(X), [?ANSI_BG_BRED, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BGREEN(X), [?ANSI_BG_BGREEN, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BYELLOW(X), [?ANSI_BG_BYELLOW, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BBLUE(X), [?ANSI_BG_BBLUE, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BMAGENTA(X), [?ANSI_BG_BMAGENTA, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BCYAN(X), [?ANSI_BG_BCYAN, X, ?ANSI_BG_RESET]). +-define(ANSI_BG_BWHITE(X), [?ANSI_BG_BWHITE, X, ?ANSI_BG_RESET]). + +-define(ANSI_FG_RGB(R,G,B), + [?ANSI_ESC, + "[38;2;", + integer_to_list(R),";", + integer_to_list(G),";", + integer_to_list(B),"m"] +). +-define(ANSI_BG_RGB(R,G,B), + [?ANSI_ESC, + "[48;2;", + integer_to_list(R),";", + integer_to_list(G),";", + integer_to_list(B),"m"] +). + +-define(ANSI_FG_RGB(R,G,B,Chars), [?ANSI_FG_RGB(R,G,B), Chars, ?ANSI_FG_RESET]). +-define(ANSI_BG_RGB(R,G,B,Chars), [?ANSI_BG_RGB(R,G,B), Chars, ?ANSI_BG_RESET]). + + +% cursor controls +-define(ANSI_CUR_HOME, [?ANSI_ESC, "[H"]). + +-define(ANSI_CUR_XY(X, Y), [?ANSI_ESC, "[", integer_to_list(Y), ";", integer_to_list(X), "H"]). +-define(ANSI_CUR_UP(N), [?ANSI_ESC, "[", integer_to_list(N), "A"]). +-define(ANSI_CUR_DOWN(N), [?ANSI_ESC, "[", integer_to_list(N), "B"]). +-define(ANSI_CUR_RIGHT(N), [?ANSI_ESC, "[", integer_to_list(N), "C"]). +-define(ANSI_CUR_LEFT(N), [?ANSI_ESC, "[", integer_to_list(N), "D"]). +-define(ANSI_CUR_SAVE, [?ANSI_ESC, "7"]). +-define(ANSI_CUR_RESTORE, [?ANSI_ESC, "8"]). +-define(ANSI_CUR_QUERY, [?ANSI_ESC, "[6n"]). + +-define(ANSI_CUR_UP, ?ANSI_CUR_UP(1)). +-define(ANSI_CUR_DOWN, ?ANSI_CUR_DOWN(1)). +-define(ANSI_CUR_RIGHT, ?ANSI_CUR_RIGHT(1)). +-define(ANSI_CUR_LEFT, ?ANSI_CUR_LEFT(1)). + + +% relative movement "forward" +X=right, +Y=down +-define(ANSI_CUR_VECT(X, Y), + if X =< 0, Y =< 0 -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_UP(-1*Y)]; + X =< 0, 0 < Y -> [?ANSI_CUR_LEFT(-1*X), ?ANSI_CUR_DOWN(Y)]; + 0 < X, Y =< 0 -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_UP(-1*Y)]; + 0 < X, 0 < Y -> [?ANSI_CUR_RIGHT(X), ?ANSI_CUR_DOWN(Y)] + end +). + + + +-define(ANSI_ALTBUF, [?ANSI_ESC, "[?1049h"]). +-define(ANSI_UNALTBUF, [?ANSI_ESC, "[?1049l"]). + +-define(ANSI_CUR_INVIS, [?ANSI_ESC, "[?25l"]). +-define(ANSI_CUR_VIS, [?ANSI_ESC, "[?25h"]). + +-define(ANSI_WRAP, [?ANSI_ESC, "[=7h"]). +-define(ANSI_NOWRAP, [?ANSI_ESC, "[=7l"]). diff --git a/cli/priv/doi.txt b/cli/priv/doi.txt new file mode 100644 index 0000000..c2669e2 --- /dev/null +++ b/cli/priv/doi.txt @@ -0,0 +1,256 @@ +In Congress, July 4, 1776 + +The unanimous Declaration of the thirteen united States of America, + +When in the Course of human events, it becomes necessary for one +people to dissolve the political bands which have connected them with +another, and to assume among the powers of the earth, the separate +and equal station to which the Laws of Nature and of Nature's God +entitle them, a decent respect to the opinions of mankind requires +that they should declare the causes which impel them to the +separation. + +We hold these truths to be self-evident, that all men are created +equal, that they are endowed by their Creator with certain +unalienable Rights, that among these are Life, Liberty and the +pursuit of Happiness.--That to secure these rights, Governments are +instituted among Men, deriving their just powers from the consent of +the governed, --That whenever any Form of Government becomes +destructive of these ends, it is the Right of the People to alter or +to abolish it, and to institute new Government, laying its foundation +on such principles and organizing its powers in such form, as to them +shall seem most likely to effect their Safety and Happiness. +Prudence, indeed, will dictate that Governments long established +should not be changed for light and transient causes; and accordingly +all experience hath shewn, that mankind are more disposed to suffer, +while evils are sufferable, than to right themselves by abolishing +the forms to which they are accustomed. But when a long train of +abuses and usurpations, pursuing invariably the same Object evinces a +design to reduce them under absolute Despotism, it is their right, it +is their duty, to throw off such Government, and to provide new +Guards for their future security.--Such has been the patient +sufferance of these Colonies; and such is now the necessity which +constrains them to alter their former Systems of Government. The +history of the present King of Great Britain is a history of repeated +injuries and usurpations, all having in direct object the +establishment of an absolute Tyranny over these States. To prove +this, let Facts be submitted to a candid world. + + He has refused his Assent to Laws, the most wholesome and + necessary for the public good. + + He has forbidden his Governors to pass Laws of immediate and + pressing importance, unless suspended in their operation till his + Assent should be obtained; and when so suspended, he has utterly + neglected to attend to them. + + He has refused to pass other Laws for the accommodation of large + districts of people, unless those people would relinquish the + right of Representation in the Legislature, a right inestimable + to them and formidable to tyrants only. + + He has called together legislative bodies at places unusual, + uncomfortable, and distant from the depository of their public + Records, for the sole purpose of fatiguing them into compliance + with his measures. + + He has dissolved Representative Houses repeatedly, for opposing + with manly firmness his invasions on the rights of the people. + + He has refused for a long time, after such dissolutions, to cause + others to be elected; whereby the Legislative powers, incapable + of Annihilation, have returned to the People at large for their + exercise; the State remaining in the mean time exposed to all the + dangers of invasion from without, and convulsions within. + + He has endeavoured to prevent the population of these States; for + that purpose obstructing the Laws for Naturalization of + Foreigners; refusing to pass others to encourage their migrations + hither, and raising the conditions of new Appropriations of + Lands. + + He has obstructed the Administration of Justice, by refusing his + Assent to Laws for establishing Judiciary powers. + + He has made Judges dependent on his Will alone, for the tenure of + their offices, and the amount and payment of their salaries. + + He has erected a multitude of New Offices, and sent hither swarms + of Officers to harrass our people, and eat out their substance. + + He has kept among us, in times of peace, Standing Armies without + the Consent of our legislatures. + + He has affected to render the Military independent of and + superior to the Civil power. + + He has combined with others to subject us to a jurisdiction + foreign to our constitution, and unacknowledged by our laws; + giving his Assent to their Acts of pretended Legislation: + + For Quartering large bodies of armed troops among us: + + For protecting them, by a mock Trial, from punishment for any + Murders which they should commit on the Inhabitants of these + States: + + For cutting off our Trade with all parts of the world: + + For imposing Taxes on us without our Consent: + + For depriving us in many cases, of the benefits of Trial by Jury: + + For transporting us beyond Seas to be tried for pretended + offences: + + For abolishing the free System of English Laws in a neighbouring + Province, establishing therein an Arbitrary government, and + enlarging its Boundaries so as to render it at once an example + and fit instrument for introducing the same absolute rule into + these Colonies: + + For taking away our Charters, abolishing our most valuable Laws, + and altering fundamentally the Forms of our Governments: + + For suspending our own Legislatures, and declaring themselves + invested with power to legislate for us in all cases whatsoever. + + He has abdicated Government here, by declaring us out of his + Protection and waging War against us. + + He has plundered our seas, ravaged our Coasts, burnt our towns, + and destroyed the lives of our people. + + He is at this time transporting large Armies of foreign + Mercenaries to compleat the works of death, desolation and + tyranny, already begun with circumstances of Cruelty & perfidy + scarcely paralleled in the most barbarous ages, and totally + unworthy the Head of a civilized nation. + + He has constrained our fellow Citizens taken Captive on the high + Seas to bear Arms against their Country, to become the + executioners of their friends and Brethren, or to fall themselves + by their Hands. + + He has excited domestic insurrections amongst us, and has + endeavoured to bring on the inhabitants of our frontiers, the + merciless Indian Savages, whose known rule of warfare, is an + undistinguished destruction of all ages, sexes and conditions. + +In every stage of these Oppressions We have Petitioned for Redress in +the most humble terms: Our repeated Petitions have been answered only +by repeated injury. A Prince, whose character is thus marked by every +act which may define a Tyrant, is unfit to be the ruler of a free +people. + +Nor have We been wanting in attentions to our Brittish brethren. We +have warned them from time to time of attempts by their legislature +to extend an unwarrantable jurisdiction over us. We have reminded +them of the circumstances of our emigration and settlement here. We +have appealed to their native justice and magnanimity, and we have +conjured them by the ties of our common kindred to disavow these +usurpations, which, would inevitably interrupt our connections and +correspondence. They too have been deaf to the voice of justice and +of consanguinity. We must, therefore, acquiesce in the necessity, +which denounces our Separation, and hold them, as we hold the rest of +mankind, Enemies in War, in Peace Friends. + +We, therefore, the Representatives of the united States of America, +in General Congress, Assembled, appealing to the Supreme Judge of the +world for the rectitude of our intentions, do, in the Name, and by +Authority of the good People of these Colonies, solemnly publish and +declare, That these United Colonies are, and of Right ought to be +Free and Independent States; that they are Absolved from all +Allegiance to the British Crown, and that all political connection +between them and the State of Great Britain, is and ought to be +totally dissolved; and that as Free and Independent States, they have +full Power to levy War, conclude Peace, contract Alliances, establish +Commerce, and to do all other Acts and Things which Independent +States may of right do. And for the support of this Declaration, with +a firm reliance on the protection of divine Providence, we mutually +pledge to each other our Lives, our Fortunes and our sacred Honor. + +Georgia + Button Gwinnett + Lyman Hall + George Walton +North Carolina + William Hooper + Joseph Hewes + John Penn + +South Carolina + Edward Rutledge + Thomas Heyward, Jr. + Thomas Lynch, Jr. + Arthur Middleton + +Massachusetts + John Hancock + +Maryland + Samuel Chase + William Paca + Thomas Stone + Charles Carroll of Carrollton + +Virginia + George Wythe + Richard Henry Lee + Thomas Jefferson + Benjamin Harrison + Thomas Nelson, Jr. + Francis Lightfoot Lee + Carter Braxton + +Pennsylvania + Robert Morris + Benjamin Rush + Benjamin Franklin + John Morton + George Clymer + James Smith + George Taylor + James Wilson + George Ross + +Delaware + Caesar Rodney + George Read + Thomas McKean + +New York + William Floyd + Philip Livingston + Francis Lewis + Lewis Morris + +New Jersey + Richard Stockton + John Witherspoon + Francis Hopkinson + John Hart + Abraham Clark + +New Hampshire + Josiah Bartlett + William Whipple + +Massachusetts + Samuel Adams + John Adams + Robert Treat Paine + Elbridge Gerry + +Rhode Island + Stephen Hopkins + William Ellery + +Connecticut + Roger Sherman + Samuel Huntington + William Williams + Oliver Wolcott + +New Hampshire + Matthew Thornton diff --git a/cli/scratch/argparse b/cli/scratch/argparse new file mode 100644 index 0000000..499ab72 --- /dev/null +++ b/cli/scratch/argparse @@ -0,0 +1,44 @@ + + +%%===================================================== +%% ARG PARSING +%%===================================================== + + +%%----------------------------------------------------- +%% TOKENIZING +%%----------------------------------------------------- + +-record{ctk, + {shape = none :: none | '-' | '--' | str, + val = none :: none | [char()] | string() | string(), + str = none :: none | string()}). +-type ctk() :: #ctk{}. + + + +-spec tokenize(Args) -> CliTokens when + Args :: [string()], + CliTokens :: [ctk()]. +%% @private tokenize cli args + +tokenize(Args) -> + [tokenize_arg(S) || S <- Args]. + +tokenize_arg(Str = "--" ++ Val) -> + #ctk{shape = '--', + val = Val, + str = Str}; +tokenize_arg(Str = "-" ++ Val) -> + #ctk{shape = '-', + val = Val, + str = Str}; +tokenize_arg(Str) -> + #ctk{shape = str, + val = none, + str = Str}. + + +%%----------------------------------------------------- +%% PARSING +%%----------------------------------------------------- diff --git a/cli/src/gsc_cli.erl b/cli/src/gsc_cli.erl index 51aaae7..d1cadc3 100644 --- a/cli/src/gsc_cli.erl +++ b/cli/src/gsc_cli.erl @@ -1,11 +1,5 @@ %%% @doc -%%% GSC CLI: gsc_cli -%%% -%%% This module is currently named `gsc_cli', but you may want to change that. -%%% Remember that changing the name in `-module()' below requires renaming -%%% this file, and it is recommended to run `zx update .app` in the main -%%% project directory to make sure the ebin/gsc_cli.app file stays in -%%% sync with the project whenever you add, remove or rename a module. +%%% GSC CLI: explorer/harness for sfc iteration %%% @end -module(gsc_cli). @@ -16,10 +10,193 @@ -export([start/1]). +-include("$gsc_include/gsc.hrl"). +-include("ansi.hrl"). + +do_help() -> + io:format("~ts", [help_screen()]). + +help_screen() -> + ["you can't help people who refuse to help themsleves\n"]. -spec start(ArgV) -> ok when ArgV :: [string()]. +start([]) -> + do_eshell(), + ok; +start(["shell"]) -> + do_eshell(), + ok; +start(["eshell"]) -> + do_eshell(), + ok; start(ArgV) -> - ok = io:format("Hello, World! Args: ~tp~n", [ArgV]), + %io:format("ArgV: ~p~n", [ArgV]), + do(ArgV), zx:silent_stop(). + +do(["list"]) -> + do_tlist(); +do(["list", "tests"]) -> + do_tlist(); +do(["run", "tests"]) -> + io:format("TestModules = ~p~n", [known_modules_with_prefix("ts")]), + do_runall_tests(); +do(["tokenizers_agree", Foo]) -> + io:format("~p~n", [tokenizers_agree(Foo)]); +% slowly phasing out shitty names like lctokens +% tokens = native sfc token representation +do(["tokens", Foo]) -> do_tokens(Foo); +do(["color_tokens", Foo]) -> do_color_tokens(Foo); +do(["ctokens", Foo]) -> do_color_tokens(Foo); +do(["colour_tokens" | _]) -> do_doi(); +% so_tokens = so_scan tokens +do(["so", "tokens", Foo]) -> do_so_tokens(Foo); +do(["so_tokens", Foo]) -> do_so_tokens(Foo); +% gso_tokens = our mockery +do(["gso", "tokens", Foo]) -> do_gso_tokens(Foo); +do(["gso_tokens", Foo]) -> do_gso_tokens(Foo); +% print source file to screen with token boundaries highlighted +% script utility +do(["rmm", Foo]) -> + do_rmm(Foo); +do(Args) -> + io:format("bad args: ~p~n", [Args]), + do_help(). + +do_doi() -> + FP = zx:get_home() ++ "/priv/doi.txt", + Cmd = "less " ++ FP, + io:format("~s~n", [Cmd]). + + +do_runall_tests() -> + lists:foreach(fun run_mod_main/1, test_mods()). + +test_mods() -> + known_modules_with_prefix("gt_"). + +known_modules_with_prefix(Pfx) -> + ModsZipBeamsZipLoaded = code:all_available(), + kmp(Pfx, ModsZipBeamsZipLoaded, []). + +kmp(_Pfx, [], Acc) -> + lists:sort(Acc); +kmp(Pfx, [{ModStr, _BeamPath, _Loaded} | Rest], Acc) -> + case lists:prefix(Pfx, ModStr) of + false -> kmp(Pfx, Rest, Acc); + true -> kmp(Pfx, Rest, [list_to_atom(ModStr) | Acc]) + end. + +run_mod_main(Mod) -> + io:format("========================================\n" + "~p:main()\n" + "========================================\n", + [Mod]), + try + Mod:main() + catch + Err:ErrType:Trace -> + io:format("~p: ~p~n", [Err, ErrType]), + io:format("Trace:~n~p~n", [Trace]) + end. + +do_tlist() -> + lists:foreach( + fun(ModName) -> + io:format("~s~n", [ModName]) + end, + test_mods() + ). + + +-spec do_eshell() -> ok. +% @doc start an erlang shell + +do_eshell() -> + io:format("Welcome to the GSC shell!~n", []), + case shell:start_interactive() of + ok -> ok; + {error, already_started} -> ok; + {error, Reason} -> error(Reason) + end. + +tokenizers_agree(File) -> + so_tokens(File) =:= tokens(File). + + +do_tokens(FilePath) -> + [io:format("~p~n", [Tk]) || Tk <- tokens(FilePath)]. + +do_so_tokens(FilePath) -> + [io:format("~p~n", [Tk]) || Tk <- so_tokens(FilePath)]. + +do_gso_tokens(FilePath) -> + [io:format("~p~n", [Tk]) || Tk <- gso_tokens(FilePath)]. + + +% rmm = run module:main() with our context loaded +% useful for prototyping +do_rmm(FilePath) -> + case compile:file(FilePath) of + {ok, Mod} -> Mod:main(); + Error -> error(Error) + end. + + +so_tokens(FilePath) -> + {ok, FileBytes} = file:read_file(FilePath), + FileStr = unicode:characters_to_nfc_list(FileBytes), + {ok, Tokens} = so_scan:scan(FileStr), + Tokens. + +gso_tokens(FilePath) -> + {ok, FileBytes} = file:read_file(FilePath), + FileStr = unicode:characters_to_nfc_list(FileBytes), + {ok, Tokens} = gso_scan:scan(FileStr), + Tokens. + + +tokens(FilePath) -> + {ok, Tokens} = gsc:tokens_from_file(FilePath), + Tokens. + + +do_color_tokens(File) -> + case gsc:tokens_from_file(File) of + {ok, Tokens} -> + ColorizedSrcStr = colorize_tokens(chunk_color_wheel(), Tokens, ""), + Full = [?ANSI_INVERT, ColorizedSrcStr, ?ANSI_UNINVERT], + io:format("~s", [Full]); + Error -> + io:format("~p~n", [Error]) + end. + +chunk_color_wheel() -> + %[yellow, blue]. + [red, green, yellow, blue, magenta, cyan]. + + + +colorize_tokens(Wheel, [T | Ts], Acc) -> + {Color, NewWheel} = rotate(Wheel), + NewAcc = [Acc, colorize_token_str(Color, T)], + colorize_tokens(NewWheel, Ts, NewAcc); +colorize_tokens(_, [], Acc) -> + Acc. + +rotate([A | Rest]) -> + {A, Rest ++ [A]}. + +colorize_token_str(Color, #tk{str = Str}) -> + {Pfx, Sfx} = color_fixes(Color), + [Pfx, Str, Sfx]. + +color_fixes(red) -> {?ANSI_FG_RED, ?ANSI_FG_RESET}; +color_fixes(green) -> {?ANSI_FG_GREEN, ?ANSI_FG_RESET}; +color_fixes(yellow) -> {?ANSI_FG_YELLOW, ?ANSI_FG_RESET}; +color_fixes(blue) -> {?ANSI_FG_BLUE, ?ANSI_FG_RESET}; +color_fixes(magenta) -> {?ANSI_FG_MAGENTA, ?ANSI_FG_RESET}; +color_fixes(cyan) -> {?ANSI_FG_CYAN, ?ANSI_FG_RESET}. + diff --git a/cli/src/gt_tokens.erl b/cli/src/gt_tokens.erl new file mode 100644 index 0000000..9a5878f --- /dev/null +++ b/cli/src/gt_tokens.erl @@ -0,0 +1,157 @@ +% gsc tokenizer tests +-module(tsfp_gsc_tokenizer). + +-export([ + main/0, ct_dir/0 + %tokens_match/1 +]). +-include("$gsc_include/gsc.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +main() -> + %io:format("~p~n", [div_files()]), + %io:format("MAINNNNN!~n", []), + eunit:test(?MODULE, [verbose]). + %eunit:test(?MODULE). + + +% directory containing the tests for the tokenizer +ct_dir() -> + zx_daemon:get_home() ++ "/test-data/gt_tokens". + +agreement_tests_dir() -> + ct_dir() ++ "/tokenizers_agree". + + +% the divergences claude found between gsc tokenizer and so tokenizer +% +% mostly stupid corner cases like a string crossing a line boundary +% or unterminated block comment +% +% divergence files: "divergence" means so_scan disagrees with +% gsc_so_scan in one of the following ways: +% +% - one succeeds when the other errors +% - disagree on success case +% +% making errors agree on two programs that work differently is a +% fool's errand +div_files() -> + ContractsDir = agreement_tests_dir(), + % this is the equivalent of ls + % just has filenames, no /path/to/ prefix + {ok, Files} = file:list_dir(ContractsDir), + % originally i was a retard and didn't read the eunit + % documentation, so if any one test failed, the entire test suite + % would crash with no information regarding what happened + % + % so this was a hack to only run div01-div05 but not div06: + % + % % hack to fix one broken test at a time + % (FileName = "div0" ++ [Digit | _]) when Digit =< $9 -> + % FilePath = ct_dir() ++ "/" ++ FileName, + % {true, {FileName, FilePath}} + % (_) -> false + % + % Once i read the eunit docs and learned about test generators, I + % realized I could have only the failed test chimp out. what a + % concept. + % + % i also realized that printing the full filepath was a waste, so + % instead the test should know about the FileName (foo.bar) and the + % FilePath (/path/to/foo.bar). + % + % then i decided to start writing my own test contracts instead + % of having claude do it and i rean into the issue of vim swap + % files not lexing properly because they're not unicode + IsDivCt = + fun(FileName) -> + % need to filter out vim swap files + % originally was false-matching on ([$. | _]) + % like a man + % + % god this feels like putting my balls in a little tiny + % guillotine (even the guillotine is emasculating) but + % claude suggested this and i mean it's kind of the + % most idiomatic and like straightforward. most + % importantly it's declarative + % + % god i feel so defeated + case filename:extension(FileName) of + ".aes" -> + FilePath = ContractsDir ++ "/" ++ FileName, + {true, {FileName, FilePath}}; + _ -> + false + end + end, + lists:sort(lists:filtermap(IsDivCt, Files)). + + +%div_file_names() -> [N || {N, _} <- div_files()]. +%div_file_paths() -> [P || {_, P} <- div_files()]. + +tokstr_concat_test_() -> + % future proofing + ConcatTestFiles + = lists:flatten([ + div_files() + ]), + % exclude the contracts with like unterminated block comments + % where they don't tokenize properly + NonStupidFiles = + lists:filter( + fun + ({"div05_bcom_eof.aes", _}) -> false; + ({"div06_bcom_in_expr.aes", _}) -> false; + ({"div07_bcom_nested.aes", _}) -> false; + ({"div08_bcom_simple.aes", _}) -> false; + ({_, _}) -> true + end, + ConcatTestFiles + ), + %?debugFmt("ConcatTestFiles=~p", [ConcatTestFiles]), + {"file = sum(tokens)", + [concat_property(Name, Path) || {Name, Path} <- NonStupidFiles]}. + +concat_property(FileName, FilePath) -> + %?debugFmt("concat_property(~p, _)", [FileName]), + {ok, FileBytes} = file:read_file(FilePath), + FileChars = unicode:characters_to_nfc_list(FileBytes), + {FileName ++ ": file = sum(tokens)", + fun() -> + case gsc_tokenizer:tokens(FileChars) of + {ok, SfcTokens} -> + ConcatStr = concat_token_strs(SfcTokens, []), + ?assertEqual(FileChars, ConcatStr); + _Error -> + ok + end + end}. + +concat_token_strs([#gsc_token{string = S} | Rest], Acc) -> + concat_token_strs(Rest, [Acc, S]); +concat_token_strs([], Acc) -> + unicode:characters_to_nfc_list(Acc). + +% underscore marks this as a test *generator* +div_test_() -> + % divergence + DivFiles = div_files(), + %?debugFmt("DivFiles=~p", [DivFiles]), + {"claude tokenizer divergences fixed", [tokens_match(N, P) || {N, P} <- DivFiles]}. + +tokens_match(FileName, FilePath) -> + %?debugFmt("tokens_match(~p, _)", [FileName]), + % extracting data to be tested + SoTokens = sfp:so_tokens(FilePath), + SfTokens = sfp:gsc_so_tokens(FilePath), + {FileName ++ ": tokenizers_agree", + fun() -> + case {SoTokens, SfTokens} of + {{ok, So}, {ok, Sf}} -> ?assertEqual(So, Sf); + {{error, _}, {error, _}} -> ok; + {{ok, _}, {error, _}} -> error("so_scan succeeded and gsc_so_scan failed"); + {{error, _}, {ok, _}} -> error("so_scan failed and gsc_so_scan succeded") + end + end}. diff --git a/cli/test-data/gsc2_tokens_to_ast/aegora_base_contract-1.aes b/cli/test-data/gt_syntax/aegora_base_contract-1.aes similarity index 100% rename from cli/test-data/gsc2_tokens_to_ast/aegora_base_contract-1.aes rename to cli/test-data/gt_syntax/aegora_base_contract-1.aes diff --git a/cli/test-data/gsc2_tokens_to_ast/aegora_sales_contract-1.aes b/cli/test-data/gt_syntax/aegora_sales_contract-1.aes similarity index 100% rename from cli/test-data/gsc2_tokens_to_ast/aegora_sales_contract-1.aes rename to cli/test-data/gt_syntax/aegora_sales_contract-1.aes diff --git a/cli/test-data/gsc2_tokens_to_ast/evil.aes b/cli/test-data/gt_syntax/evil.aes similarity index 100% rename from cli/test-data/gsc2_tokens_to_ast/evil.aes rename to cli/test-data/gt_syntax/evil.aes diff --git a/cli/test-data/gsc2_tokens_to_ast/gym.aes b/cli/test-data/gt_syntax/gym.aes similarity index 100% rename from cli/test-data/gsc2_tokens_to_ast/gym.aes rename to cli/test-data/gt_syntax/gym.aes diff --git a/cli/test-data/gsc2_tokens_to_ast/hello.aes b/cli/test-data/gt_syntax/hello.aes similarity index 100% rename from cli/test-data/gsc2_tokens_to_ast/hello.aes rename to cli/test-data/gt_syntax/hello.aes diff --git a/cli/test-data/gsc2_tokens_to_ast/types.aes b/cli/test-data/gt_syntax/types.aes similarity index 100% rename from cli/test-data/gsc2_tokens_to_ast/types.aes rename to cli/test-data/gt_syntax/types.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_char.aes b/cli/test-data/gt_tokens/tokenizers_agree/ak_base58_char.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_char.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ak_base58_char.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_digit_0.aes b/cli/test-data/gt_tokens/tokenizers_agree/ak_base58_digit_0.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_digit_0.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ak_base58_digit_0.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_five_split.aes b/cli/test-data/gt_tokens/tokenizers_agree/ak_base58_five_split.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_five_split.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ak_base58_five_split.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_hex.aes b/cli/test-data/gt_tokens/tokenizers_agree/ak_base58_hex.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_hex.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ak_base58_hex.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_kwd_let.aes b/cli/test-data/gt_tokens/tokenizers_agree/ak_base58_kwd_let.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_kwd_let.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ak_base58_kwd_let.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_lower_l.aes b/cli/test-data/gt_tokens/tokenizers_agree/ak_base58_lower_l.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_lower_l.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ak_base58_lower_l.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_tvar.aes b/cli/test-data/gt_tokens/tokenizers_agree/ak_base58_tvar.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ak_base58_tvar.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ak_base58_tvar.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ct_base58_upper_O.aes b/cli/test-data/gt_tokens/tokenizers_agree/ct_base58_upper_O.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/ct_base58_upper_O.aes rename to cli/test-data/gt_tokens/tokenizers_agree/ct_base58_upper_O.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div01_bytes.aes b/cli/test-data/gt_tokens/tokenizers_agree/div01_bytes.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div01_bytes.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div01_bytes.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div02_char_hex_lower.aes b/cli/test-data/gt_tokens/tokenizers_agree/div02_char_hex_lower.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div02_char_hex_lower.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div02_char_hex_lower.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div03_char_hexbrace_lower.aes b/cli/test-data/gt_tokens/tokenizers_agree/div03_char_hexbrace_lower.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div03_char_hexbrace_lower.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div03_char_hexbrace_lower.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div04_multiline_string.aes b/cli/test-data/gt_tokens/tokenizers_agree/div04_multiline_string.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div04_multiline_string.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div04_multiline_string.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div05_bcom_eof.aes b/cli/test-data/gt_tokens/tokenizers_agree/div05_bcom_eof.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div05_bcom_eof.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div05_bcom_eof.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div06_bcom_in_expr.aes b/cli/test-data/gt_tokens/tokenizers_agree/div06_bcom_in_expr.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div06_bcom_in_expr.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div06_bcom_in_expr.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div07_bcom_nested.aes b/cli/test-data/gt_tokens/tokenizers_agree/div07_bcom_nested.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div07_bcom_nested.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div07_bcom_nested.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div08_bcom_simple.aes b/cli/test-data/gt_tokens/tokenizers_agree/div08_bcom_simple.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div08_bcom_simple.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div08_bcom_simple.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div09_str_bslash_nl.aes b/cli/test-data/gt_tokens/tokenizers_agree/div09_str_bslash_nl.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div09_str_bslash_nl.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div09_str_bslash_nl.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div10_char_nonascii.aes b/cli/test-data/gt_tokens/tokenizers_agree/div10_char_nonascii.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div10_char_nonascii.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div10_char_nonascii.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div11_char_high_codepoint.aes b/cli/test-data/gt_tokens/tokenizers_agree/div11_char_high_codepoint.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div11_char_high_codepoint.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div11_char_high_codepoint.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div12_string_high_codepoint.aes b/cli/test-data/gt_tokens/tokenizers_agree/div12_string_high_codepoint.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div12_string_high_codepoint.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div12_string_high_codepoint.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div13_col_drift_2byte_str.aes b/cli/test-data/gt_tokens/tokenizers_agree/div13_col_drift_2byte_str.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div13_col_drift_2byte_str.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div13_col_drift_2byte_str.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div14_col_drift_3byte_str.aes b/cli/test-data/gt_tokens/tokenizers_agree/div14_col_drift_3byte_str.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div14_col_drift_3byte_str.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div14_col_drift_3byte_str.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div15_col_drift_4byte_str.aes b/cli/test-data/gt_tokens/tokenizers_agree/div15_col_drift_4byte_str.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div15_col_drift_4byte_str.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div15_col_drift_4byte_str.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div16_col_drift_bcom.aes b/cli/test-data/gt_tokens/tokenizers_agree/div16_col_drift_bcom.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div16_col_drift_bcom.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div16_col_drift_bcom.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div17_col_drift_multi.aes b/cli/test-data/gt_tokens/tokenizers_agree/div17_col_drift_multi.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/div17_col_drift_multi.aes rename to cli/test-data/gt_tokens/tokenizers_agree/div17_col_drift_multi.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/id_quotes.aes b/cli/test-data/gt_tokens/tokenizers_agree/id_quotes.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/id_quotes.aes rename to cli/test-data/gt_tokens/tokenizers_agree/id_quotes.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/naked_ak.aes b/cli/test-data/gt_tokens/tokenizers_agree/naked_ak.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/naked_ak.aes rename to cli/test-data/gt_tokens/tokenizers_agree/naked_ak.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/sg_base58_underscore.aes b/cli/test-data/gt_tokens/tokenizers_agree/sg_base58_underscore.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/sg_base58_underscore.aes rename to cli/test-data/gt_tokens/tokenizers_agree/sg_base58_underscore.aes diff --git a/cli/test-data/gsc1_string_to_tokens/tokenizers_agree/sg_base58_upper_I.aes b/cli/test-data/gt_tokens/tokenizers_agree/sg_base58_upper_I.aes similarity index 100% rename from cli/test-data/gsc1_string_to_tokens/tokenizers_agree/sg_base58_upper_I.aes rename to cli/test-data/gt_tokens/tokenizers_agree/sg_base58_upper_I.aes diff --git a/cli/zomp.meta b/cli/zomp.meta index 5e0917a..cf2cd57 100644 --- a/cli/zomp.meta +++ b/cli/zomp.meta @@ -2,11 +2,11 @@ {type,cli}. {modules,[]}. {mod,"gsc_cli"}. -{prefix,none}. {author,"Peter Harpending"}. +{prefix,none}. {desc,"GSC CLI and test suite"}. {package_id,{"otpr","gsc_cli",{0,1,0}}}. -{deps,[{"otpr","gsc",{0,1,0}}]}. +{deps,[{"otpr","sophia",{9,0,0}},{"otpr","gsc",{0,1,0}}]}. {key_name,none}. {a_email,"peterharpending@qpq.swiss"}. {c_email,"peterharpending@qpq.swiss"}. diff --git a/include/gsc.hrl b/include/gsc.hrl index 34fee08..d601c3e 100644 --- a/include/gsc.hrl +++ b/include/gsc.hrl @@ -104,12 +104,12 @@ % specifically account for this error -record(gsc_err_bcom_unterminated, {prev_tokens :: [tk()], - break_pos :: gsc_pos(), + break_pos :: tk_pos(), rest :: string()}). -record(gsc_err_no_tokmatch, {prev_tokens :: [tk()], - break_pos :: gsc_pos(), + break_pos :: tk_pos(), rest :: string()}). @@ -133,7 +133,7 @@ % generic placeholder error for now -record(gsc_err, {atom :: atom(), - string = none :: none | iolist(), + str = none :: none | iolist(), extra = none :: none | any()}). % @doc all errors GSC can return conveniently listed in diff --git a/src/gsc_ast.erl b/scratch/gsc_ast.erl similarity index 97% rename from src/gsc_ast.erl rename to scratch/gsc_ast.erl index d14b5ad..3fd6b45 100644 --- a/src/gsc_ast.erl +++ b/scratch/gsc_ast.erl @@ -158,7 +158,7 @@ % %-type parse_error_() :: any(). %-record(parse_error, -% {pos = none :: none | gsc_pos(), +% {pos = none :: none | tk_pos(), % msg = "" :: string(), % subs = [] :: [parse_error_()], % extra = none :: any()}). @@ -196,13 +196,13 @@ %gulp_file([]) -> % {error, empty_file}; %gulp_file(Tokens) -> -% case gsc_tokens:take_block(Tokens) of +% case gs_tokens:take_block(Tokens) of % {Tokens, []} -> % gulp_block(fun gulp_top_decl/1, Tokens); % %gulp_file2([], [], Tokens); % {A, B} -> -% StartPos = gsc_tokens:start_pos(A), -% ErrPos = gsc_tokens:start_pos(B), +% StartPos = gs_tokens:start_pos(A), +% ErrPos = gs_tokens:start_pos(B), % Msg = efmt("gulp_file: block starting at ~p ends at ~p instead of EOF", % [StartPos, ErrPos]), % {error, #parse_error{pos = ErrPos, msg = Msg}} @@ -212,7 +212,7 @@ % %%gulp_file2(AccOks, AccErrs, Tokens = [_ | _]) -> %% % ItemTokens will be nonempty -%% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens), +%% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens), %% case gulp_top_decl(ItemTokens) of %% {gulp, Ok} -> gulp_file2([Ok | AccOks], AccErrs, NewTokens); %% Err -> gulp_file2(AccOks, [Err | AccErrs], NewTokens) @@ -258,7 +258,7 @@ % %gulp_block(GulpItem, AccOks, AccErrs, Tokens = [_ | _]) -> % % ItemTokens will be nonempty -% {ItemTokens, NewTokens} = gsc_tokens:take_block_item(Tokens), +% {ItemTokens, NewTokens} = gs_tokens:take_block_item(Tokens), % case GulpItem(ItemTokens) of % {gulp, Ok} -> gulp_block(GulpItem, [Ok | AccOks], AccErrs, NewTokens); % Err -> gulp_block(GulpItem, AccOks, [Err | AccErrs], NewTokens) @@ -284,7 +284,7 @@ %% | Using %% @end %gulp_top_decl(DeclTokens) -> -% case gsc_tokens:strings(3, DeclTokens) of +% case gs_tokens:strings(3, DeclTokens) of % ["payable", "contract", "interface"] -> % gulp_nyi(DeclTokens); % ["contract", "interface" | _] -> @@ -410,7 +410,7 @@ %% | (EModifier* 'entrypoint' | FModifier* 'function') Block(FunDecl) %% | Using %gulp_decl(Tokens) -> -% case gsc_tokens:strings(1, Tokens) of +% case gs_tokens:strings(1, Tokens) of % ["type"] -> gulp_type_alias(Tokens); % _ -> gulp_nyi(Tokens) % end. @@ -611,7 +611,7 @@ %% Type1 = {plist, Types} () (foo) (foo, bar) %% | {token, #tk{}} foo Bar.baz 'quux %slurp_type1(Tks) -> -% case gsc_tokens:slurp_plist(Tks) of +% case gs_tokens:slurp_plist(Tks) of % % head token is NOT open paren -> must be id/qid/tvar % {slurp, [], [Tk | NewTks]} -> % TkType = Tk#tk.type, @@ -633,7 +633,7 @@ % % %%slurp_type_expr_plist(Tks) -> -%% case gsc_tokens:slurp_plist(Tks) of +%% case gs_tokens:slurp_plist(Tks) of %% % head token is NOT open paren -> must be id/qid/tvar %% {slurp, [], [Tk | NewTks]} -> %% TkType = Tk#tk.type, diff --git a/scratch/gsc_parse_type_expr.erl b/scratch/gsc_parse_type_expr.erl new file mode 100644 index 0000000..52d39a6 --- /dev/null +++ b/scratch/gsc_parse_type_expr.erl @@ -0,0 +1,216 @@ +-module(gsc_parse_type_expr). +% +%-export_type([ +%]). +% +%-export([ +% unsafe_vtks_from_string/1, +% gulp_vtks/1, +% take_until_ifx_op/1 +%]). +% +%-include("$gsc_include/gsc.hrl"). +% +% +%%------------------------------------------------------ +%% TYPES +%%------------------------------------------------------ +% +%-type vtk_ifx_op() :: vtk_apply_to +% | {'vtk_*', tk()} +% | {'vtk_=>', tk()}. +% +%-type vtk() :: tk() +% | {vtk_plist, [tk()]} +% | vtk_ifx_op(). +% +% +%-type gulped(X) :: {gulp, X} +% | {error, any()}. +% +%-type slurped(X) :: {slurp, X, Rest :: [tk()]} +% | {error, any()}. +% +% +%%------------------------------------------------------ +%% FUNCTIONS +%%------------------------------------------------------ +% +% +%-spec unsafe_vtks_from_string(SrcStr) -> Vtks when +% SrcStr :: string(), +% Vtks :: [vtk()]. +% +%% @doc for testing +%unsafe_vtks_from_string(S) -> +% {ok, SigTks} = gs_tokens:significant_tokens(S), +% {gulp, Vtks} = gulp_vtks(SigTks), +% Vtks. +% +% +%% operators in descending order of exteriority +%% +%% precedence verbiage confuses me +%% +%% 1 + 2 * 3 ^ 4 +%% +%% (+ 1 (* 2 (^ 3 4))) +%% +%% precedence is thinking about the operators as like +%% having arms and legs and doing something. how much +%% power do they have to bind to their neighbors. it's +%% thinking of your operators as verbs in some state +%% machine. +%% +%% i don't like when i have to model the state machine +%% in my head when i'm reading code. everything should +%% just be there. exteriority and interiority are +%% properties of nouns, not properties of verbs. +%% +%% functional programming is all about offloading as +%% much of your reasoning into nouns as possible. verbs +%% change things. and change is bad. +%-type chunk_strategy() +% :: ast_parens % prefix paren (x y z) +% | '=>' % infixr +% | '*' % infix +% | 'apply' % postfix paren f (...) +% . +% +% +%-record(ast_parens, +% {open = none :: none | tk(), +% inner = none :: none | [tk()], +% close = none :: none | tk()}). +% +%chunk_by(Strategy, Tokens) -> +% chunk_by(Strategy, [], Tokens). +% +% +%-spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when +% Strategy :: chunk_strategy(), +% Oks :: [any()], +% Errs :: [{error, Reason :: any()}], +% Tokens :: [tk()], +% Result :: {ok, +% +%gulp_chunks_by(_, Stk, [], []) -> +% {gulp, lists:reverse(Stk)}; +%gulp_chunks_by(_, _, Errs, []) -> +% {error, {fixme, {?MODULE, ?LINE}, Errs}}; +%gulp_chunks_by(plist, Stk, Errs, Tokens) -> +% case slurp_plist_rec(Tokens) of +% {slurp, Plist, NewTokens} -> +% gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens); +% barf -> +% [Token | NewTokens] = Tokens, +% gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens); +% Error -> +% gulp_chunks_by(plist, Stk, [Error | Errs], Tokens); +% end. +% +%slurp_plist_rec(Tokens = [#tk{string = "(" | _]) -> +% case gs_tokens:slurp_plist(Tokens) of +% {slurp, [], _} -> +% barf; +% {slurp, PTokens, NewTokens} -> +% PTokensInner = pt_inner(PTokens), +% end; +% +%%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when +%% Tokens :: [tk()], +%% IfxTree :: ifx_tree(). +%% +%%-spec chunk_by(ChunkStrategy, Tokens) -> Result when +%% ChunkStrategy :: chunk_strategy(), +%% Tokens :: [tk()], +%% Result :: {ChunkStrategy, +% +% +% +%-spec gulp_vtks(Tokens) -> Result when +% Tokens :: [tk()], +% Result :: gulped(VirtualTokens), +% VirtualTokens :: [vtk()]. +% +%% @private +%% for infix precedence/associativity resolution. +%% +%% better/original name (although misnomer) was +%% insert_virtual_tokens. we're using the gulp verbiage +%% because we're guarding against mistmatched delimiters +%% @end +%gulp_vtks(Tokens) -> +% gulp_vtks([], Tokens). +% +%gulp_vtks(Acc, Tks0) -> +% % scan until next "(" | "*" | "=>" +% case take_until_ifx_op(Tks0) of +% % no infix op remaining, return arg +% {_Pfx = Tks0, +% _Sfx = []} -> +% {gulp, lists:flatten([Acc, Tks0])}; +% % application +% % "... foo(bar, baz) ..." +% % ~> [..., foo, {plist, "(bar, baz)"}, ...] +% {_Pfx = Tks1_BeforeOpen, +% _Sfx = Tks2_OpenNAfter +% = [#tk{string = "("} | _]} -> +% case gs_tokens:slurp_plist(Tks2_OpenNAfter) of +% {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} -> +% NewAcc = [Acc, +% Tks1_BeforeOpen, +% vtk_apply_to, +% {vtk_plist, Tks2A_OpenToClose}], +% gulp_vtks(NewAcc, Tks2B_AfterClose); +% Error = {error, _} -> +% Error +% end; +% % product +% {_Pfx = Tks0_BeforeTimes, +% _Sfx = [ Tk1A_Times = #tk{string = "*"} +% | Tks1B_AfterTimes]} -> +% NewAcc = [Acc, +% Tks0_BeforeTimes, +% {'vtk_*', Tk1A_Times}], +% gulp_vtks(NewAcc, Tks1B_AfterTimes); +% % funType +% {_Pfx = Tks0_BeforeOp, +% _Sfx = [ Tk1A_Op = #tk{string = "=>"} +% | Tks1B_AfterOp]} -> +% NewAcc = [Acc, +% Tks0_BeforeOp, +% {'vtk_=>', Tk1A_Op}], +% gulp_vtks(NewAcc, Tks1B_AfterOp) +% end. +% +% +% +%-spec take_until_ifx_op(Tokens) -> Result when +% Tokens :: [tk()], +% Result :: {Taken, NewTokens}, +% Taken :: Tokens, +% NewTokens :: Tokens. +%% @doc +%% consume tokens until one of ["(", "*", "=>"] +% +%take_until_ifx_op(Tks) -> +% take_until_ifx_op([], Tks). +% +%take_until_ifx_op(Stack, []) -> +% {lists:reverse(Stack), []}; +%take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) -> +% TokStr = Token#tk.string, +% Continue = +% case TokStr of +% % exit cases +% "(" -> false; +% "*" -> false; +% "=>" -> false; +% _ -> true +% end, +% case Continue of +% true -> take_until_ifx_op([Token | Stack], NewTokens); +% false -> {lists:reverse(Stack), Tokens} +% end. +% diff --git a/scratch/gsc_token_chunks.erl b/scratch/gsc_token_chunks.erl index f6b1899..2634676 100644 --- a/scratch/gsc_token_chunks.erl +++ b/scratch/gsc_token_chunks.erl @@ -54,16 +54,16 @@ % % % -%-spec start_pos([gsc_token()]) -> {value, gsc_pos()} | none. +%-spec start_pos([gsc_token()]) -> {value, tk_pos()} | none. % %start_pos([#gsc_token{pos = P}]) -> {value, P}; %start_pos([]) -> none. % % -%-spec end_pos([gsc_token()]) -> {value, gsc_pos()} | none. +%-spec end_pos([gsc_token()]) -> {value, tk_pos()} | none. % %end_pos([#gsc_token{pos = Pos, string = Str}]) -> -% {value, gsc_tokens:new_pos(Pos, Str)}; +% {value, gs_tokens:new_pos(Pos, Str)}; %end_pos([_ | T]) -> % end_pos(T); %end_pos([]) -> diff --git a/src/gsc_strmatch.erl b/src/gs_strmatch.erl similarity index 99% rename from src/gsc_strmatch.erl rename to src/gs_strmatch.erl index 02992a8..9fd6231 100644 --- a/src/gsc_strmatch.erl +++ b/src/gs_strmatch.erl @@ -70,7 +70,7 @@ % `contract` gets tokenized as a keyword and not a variable name), and then % calls into this module in order to match the string shape it's looking for. % @end --module(gsc_strmatch). +-module(gs_strmatch). %-compile([export_all, nowarn_export_all]). diff --git a/src/gsc_tokens.erl b/src/gs_tokens.erl similarity index 77% rename from src/gsc_tokens.erl rename to src/gs_tokens.erl index cab30e1..946b7c7 100644 --- a/src/gsc_tokens.erl +++ b/src/gs_tokens.erl @@ -16,11 +16,11 @@ % 2. to future-proof in case we decide to incrementally incorporate the gsc % code into the legacy sophia compiler % @end --module(gsc_tokens). +-module(gs_tokens). % meta -export([ - token_types_parse_order/0, + token_shapes_parse_order/0, kwds/0 ]). @@ -39,10 +39,11 @@ is_significant/1, filter_significant/1, significant_tokens/1, + tokens_from_iolist/1, tokens/1, slurp_token/2, - slurp_token_types/3, - slurp_token_of_type/3, + slurp_token_shapes/3, + slurp_token_of_shape/3, new_pos/2 ]). @@ -59,7 +60,7 @@ AtMostNStrings :: [string()]. % @doc return the strings of the first N tokens -strings(N, [#tk{string = S} | Rest]) when is_integer(N), N >= 1 -> +strings(N, [#tk{str = S} | Rest]) when is_integer(N), N >= 1 -> [S | strings(N-1, Rest)]; strings(_, []) -> []; @@ -159,7 +160,7 @@ take_block_item([]) -> % counterintuitive to end-users (who are programmers, entirely % unfamiliar with notions like stacks and open/close delimiters) -slurp_plist([Hd = #tk{string = "("} | Tl]) -> +slurp_plist([Hd = #tk{str = "("} | Tl]) -> slurp_dlist([Hd], [Hd], Tl); slurp_plist(Tks) -> {slurp, [], Tks}. @@ -170,30 +171,30 @@ slurp_dlist(All, [], NewTokens) -> {slurp, lists:reverse(All), NewTokens}; % WMA stack is nonempty % happy cases of opens getting popped -slurp_dlist(All, [#tk{string = "("} | NewOpen], - [#tk{string = ")"} = Tk | NewTks]) -> +slurp_dlist(All, [#tk{str = "("} | NewOpen], + [#tk{str = ")"} = Tk | NewTks]) -> slurp_dlist([Tk | All], NewOpen, NewTks); -slurp_dlist(All, [#tk{string = "["} | NewOpen], - [#tk{string = "]"} = Tk | NewTks]) -> +slurp_dlist(All, [#tk{str = "["} | NewOpen], + [#tk{str = "]"} = Tk | NewTks]) -> slurp_dlist([Tk | All], NewOpen, NewTks); -slurp_dlist(All, [#tk{string = "{"} | NewOpen], - [#tk{string = "}"} = Tk | NewTks]) -> +slurp_dlist(All, [#tk{str = "{"} | NewOpen], + [#tk{str = "}"} = Tk | NewTks]) -> slurp_dlist([Tk | All], NewOpen, NewTks); % happy: open delimiters getting pushed -slurp_dlist(All, Opens, [#tk{string = "("} = Tk | NewTks]) -> +slurp_dlist(All, Opens, [#tk{str = "("} = Tk | NewTks]) -> slurp_dlist([Tk | All], [Tk | Opens], NewTks); -slurp_dlist(All, Opens, [#tk{string = "["} = Tk | NewTks]) -> +slurp_dlist(All, Opens, [#tk{str = "["} = Tk | NewTks]) -> slurp_dlist([Tk | All], [Tk | Opens], NewTks); -slurp_dlist(All, Opens, [#tk{string = "{"} = Tk | NewTks]) -> +slurp_dlist(All, Opens, [#tk{str = "{"} = Tk | NewTks]) -> slurp_dlist([Tk | All], [Tk | Opens], NewTks); % sad: mismatch cases slurp_dlist(All, Opens, []) -> {error, {fixme, mismatch, Opens, none}}; -slurp_dlist(All, Opens, [#tk{string = "}"} = BadClose | _]) -> +slurp_dlist(All, Opens, [#tk{str = "}"} = BadClose | _]) -> {error, {fixme, mismatch, Opens, {value, BadClose}}}; -slurp_dlist(All, Opens, [#tk{string = "]"} = BadClose | _]) -> +slurp_dlist(All, Opens, [#tk{str = "]"} = BadClose | _]) -> {error, {fixme, mismatch, Opens, {value, BadClose}}}; -slurp_dlist(All, Opens, [#tk{string = ")"} = BadClose | _]) -> +slurp_dlist(All, Opens, [#tk{str = ")"} = BadClose | _]) -> {error, {fixme, mismatch, Opens, {value, BadClose}}}; % general case: non-terminal token gets pushed slurp_dlist(All, Opens, [Tk | NewTks]) -> @@ -206,15 +207,15 @@ slurp_dlist(All, Opens, [Tk | NewTks]) -> % This is parse order definition, list of keywords, etc % % -export([ -% token_types_parse_order/0, +% token_shapes_parse_order/0, % kwds/0 % ]). %------------------------------------------------------- --spec token_types_parse_order() -> [gsc_token_type()]. +-spec token_shapes_parse_order() -> [tk_shape()]. % @doc -% list of sophia tokens in parse order (if an earlier type matches, the later -% type isn't even checked) +% list of sophia token shapes in parse order (if an earlier shape matches, the later +% shape isn't even checked) % % % Rules = @@ -245,7 +246,7 @@ slurp_dlist(All, Opens, [Tk | NewTks]) -> % ], % @end -token_types_parse_order() -> +token_shapes_parse_order() -> % written in this style to be maximally editable lists:flatten([ % comments and whitespace @@ -282,8 +283,8 @@ kwds() -> % -export([ % tokens/1, % slurp_token/1, -% slurp_token_types/2, -% slurp_token_of_type/2 +% slurp_token_shapes/2, +% slurp_token_of_shape/2 % ]). %------------------------------------------------------- @@ -323,12 +324,23 @@ filter_significant(Tokens) -> -spec is_significant(Token) -> boolean() when Token :: tk(). -is_significant(#tk{type = bcom}) -> false; -is_significant(#tk{type = lcom}) -> false; -is_significant(#tk{type = ws}) -> false; +is_significant(#tk{shape = bcom}) -> false; +is_significant(#tk{shape = lcom}) -> false; +is_significant(#tk{shape = ws}) -> false; is_significant(_) -> true. +-spec tokens_from_iolist(SrcStr) -> Result when + SrcStr :: iolist(), + Result :: {ok, Tokens} + | {error, gsc_err()}, + Tokens :: [tk()]. + +% @doc alias for tokens/1 +tokens_from_iolist(S) -> tokens(S). + + + -spec tokens(SrcStr) -> Result when SrcStr :: iolist(), Result :: {ok, Tokens} @@ -349,7 +361,7 @@ tokens(Stack, _FinalPos, "") -> {ok, lists:reverse(Stack)}; tokens(Stack, Pos, SrcStr) -> case slurp_token(Pos, SrcStr) of - {tokmatch, NewToken = #tk{string = TokStr}, + {tokmatch, NewToken = #tk{str = TokStr}, NewSrcStr} -> NewPos = new_pos(Pos, TokStr), tokens([NewToken | Stack], NewPos, NewSrcStr); @@ -455,7 +467,7 @@ next_tabstop8(Col0) when Col0 >= 0 -> -spec slurp_token(Pos, SrcStr) -> Result - when Pos :: gsc_pos(), + when Pos :: tk_pos(), SrcStr :: string(), Result :: {tokmatch, Token, Rest} | no_tokmatch @@ -465,17 +477,17 @@ next_tabstop8(Col0) when Col0 >= 0 -> Rest :: string(). % @doc % grab a single token off the front of the string according to -% `token_types_parse_order/0' +% `token_shapes_parse_order/0' slurp_token(Pos, SrcStr) -> % this is the easiest format if i need to fuck with it - slurp_token_types(token_types_parse_order(), Pos, SrcStr). + slurp_token_shapes(token_shapes_parse_order(), Pos, SrcStr). --spec slurp_token_types(ParseOrder, Pos, SrcStr) -> Result - when ParseOrder :: [gsc_token_type()], - Pos :: gsc_pos(), +-spec slurp_token_shapes(ParseOrder, Pos, SrcStr) -> Result + when ParseOrder :: [tk_shape()], + Pos :: tk_pos(), SrcStr :: string(), Result :: {tokmatch, Token, Rest} | no_tokmatch @@ -485,22 +497,22 @@ slurp_token(Pos, SrcStr) -> Rest :: string(). % @doc % grab a single token off the front of the string according to -% `token_types_parse_order/0' +% `token_shapes_parse_order/0' -slurp_token_types([TokenType | TTs], Pos, SrcStr) -> - case slurp_token_of_type(TokenType, Pos, SrcStr) of +slurp_token_shapes([TokenType | TTs], Pos, SrcStr) -> + case slurp_token_of_shape(TokenType, Pos, SrcStr) of Match = {tokmatch, _, _} -> Match; - no_tokmatch -> slurp_token_types(TTs, Pos, SrcStr); + no_tokmatch -> slurp_token_shapes(TTs, Pos, SrcStr); IErr = {ierr, _} -> IErr; Error = {error, _} -> Error end; -slurp_token_types([], _Pos, _SrcStr) -> +slurp_token_shapes([], _Pos, _SrcStr) -> no_tokmatch. --spec slurp_token_of_type(TokenType, Pos, SrcStr) -> MaybeToken - when TokenType :: gsc_token_type(), - Pos :: gsc_pos(), +-spec slurp_token_of_shape(TokenType, Pos, SrcStr) -> MaybeToken + when TokenType :: tk_shape(), + Pos :: tk_pos(), SrcStr :: string(), MaybeToken :: {tokmatch, Token, Rest} | no_tokmatch @@ -509,7 +521,7 @@ slurp_token_types([], _Pos, _SrcStr) -> Token :: tk(), Rest :: string(). % @doc -% match a sophia token of a given type off the front of the string +% match a sophia token of a given shape off the front of the string % @end % COMMENTS AND WHITESPACE: lcom, bcom, ws @@ -518,27 +530,27 @@ slurp_token_types([], _Pos, _SrcStr) -> % % i am not going to bother writing a string matcher thing for this % FIXME: make a string matcher for line comments -slurp_token_of_type(lcom, Pos, SrcStr) -> +slurp_token_of_shape(lcom, Pos, SrcStr) -> case SrcStr of "//" ++ _ -> {Line, Rest} = takeline("", SrcStr), - Token = #tk{type = lcom, + Token = #tk{shape = lcom, pos = Pos, - string = Line}, + str = Line}, {tokmatch, Token, Rest}; _ -> no_tokmatch end; % Block comments cannot have a string matcher because they have a whole stack % thing keeping track of depth because of nested block comments -slurp_token_of_type(bcom, Pos, SrcStr0) -> +slurp_token_of_shape(bcom, Pos, SrcStr0) -> case SrcStr0 of "/*" ++ SrcStr1 -> case bcom("/*", 1, SrcStr1) of {ok, CommentStr, SrcStr2} -> - Token = #tk{type = bcom, + Token = #tk{shape = bcom, pos = Pos, - string = CommentStr}, + str = CommentStr}, {tokmatch, Token, SrcStr2}; Error -> Error @@ -546,15 +558,15 @@ slurp_token_of_type(bcom, Pos, SrcStr0) -> _ -> no_tokmatch end; -slurp_token_of_type(ws, Pos, SrcStr) -> - WhitespaceMatcher = gsc_strmatch:smr_sf_ws(), - case gsc_strmatch:match(WhitespaceMatcher, SrcStr) of +slurp_token_of_shape(ws, Pos, SrcStr) -> + WhitespaceMatcher = gs_strmatch:smr_sf_ws(), + case gs_strmatch:match(WhitespaceMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, WS, Rest} -> - Token = #tk{type = ws, + Token = #tk{shape = ws, pos = Pos, - string = WS}, + str = WS}, {tokmatch, Token, Rest} end; % KEYWORDS, OPERATORS, PUNCTUATION: kwd, op, punct @@ -568,88 +580,88 @@ slurp_token_of_type(ws, Pos, SrcStr) -> % % we know kwds are always ids, so we parse it as an id and see if it's one % of the kwds -slurp_token_of_type(kwd, Pos, SrcStr) -> - case slurp_token_of_type(id, Pos, SrcStr) of - {tokmatch, IdTok = #tk{string = IdStr}, Rest} -> +slurp_token_of_shape(kwd, Pos, SrcStr) -> + case slurp_token_of_shape(id, Pos, SrcStr) of + {tokmatch, IdTok = #tk{str = IdStr}, Rest} -> case lists:member(IdStr, kwds()) of false -> no_tokmatch; true -> - KwTok = IdTok#tk{type = kwd}, + KwTok = IdTok#tk{shape = kwd}, {tokmatch, KwTok, Rest} end; no_tokmatch -> no_tokmatch end; -slurp_token_of_type(op, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_op(), SrcStr) of +slurp_token_of_shape(op, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_op(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = op, pos = Pos, string = Str}, + Token = #tk{shape = op, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; -slurp_token_of_type(punct, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_punct(), SrcStr) of +slurp_token_of_shape(punct, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_punct(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = punct, pos = Pos, string = Str}, + Token = #tk{shape = punct, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; % SOPHIA VARIABLE NAMES: id, con, qid, qcon, tvar -slurp_token_of_type(id, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_id(), SrcStr) of +slurp_token_of_shape(id, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_id(), SrcStr) of {strmatch, IdStr, Rest} -> - Token = #tk{type = id, pos = Pos, string = IdStr}, + Token = #tk{shape = id, pos = Pos, str = IdStr}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; -slurp_token_of_type(con, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_con(), SrcStr) of +slurp_token_of_shape(con, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_con(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = con, pos = Pos, string = Str}, + Token = #tk{shape = con, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; -slurp_token_of_type(qid, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_qid(), SrcStr) of +slurp_token_of_shape(qid, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_qid(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = qid, pos = Pos, string = Str}, + Token = #tk{shape = qid, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; -slurp_token_of_type(qcon, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_qcon(), SrcStr) of +slurp_token_of_shape(qcon, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_qcon(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = qcon, pos = Pos, string = Str}, + Token = #tk{shape = qcon, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; -slurp_token_of_type(tvar, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_tvar(), SrcStr) of +slurp_token_of_shape(tvar, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_tvar(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = tvar, pos = Pos, string = Str}, + Token = #tk{shape = tvar, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; -slurp_token_of_type(int16, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_int16(), SrcStr) of +slurp_token_of_shape(int16, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_int16(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = int16, pos = Pos, string = Str}, + Token = #tk{shape = int16, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch end; -slurp_token_of_type(int10, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_int10(), SrcStr) of +slurp_token_of_shape(int10, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_int10(), SrcStr) of {strmatch, Str, Rest} -> - Token = #tk{type = int10, pos = Pos, string = Str}, + Token = #tk{shape = int10, pos = Pos, str = Str}, {tokmatch, Token, Rest}; no_strmatch -> no_tokmatch @@ -658,63 +670,63 @@ slurp_token_of_type(int10, Pos, SrcStr) -> % ak, ct, sg % % char: sophia char literal -slurp_token_of_type(ak, Pos, SrcStr) -> - StringMatcher = gsc_strmatch:smr_sf_ak(), - case gsc_strmatch:match(StringMatcher, SrcStr) of +slurp_token_of_shape(ak, Pos, SrcStr) -> + StringMatcher = gs_strmatch:smr_sf_ak(), + case gs_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #tk{type = ak, pos = Pos, string = TokenStr}, + Token = #tk{shape = ak, pos = Pos, str = TokenStr}, {tokmatch, Token, Rest} end; -slurp_token_of_type(ct, Pos, SrcStr) -> - StringMatcher = gsc_strmatch:smr_sf_ct(), - case gsc_strmatch:match(StringMatcher, SrcStr) of +slurp_token_of_shape(ct, Pos, SrcStr) -> + StringMatcher = gs_strmatch:smr_sf_ct(), + case gs_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #tk{type = ct, pos = Pos, string = TokenStr}, + Token = #tk{shape = ct, pos = Pos, str = TokenStr}, {tokmatch, Token, Rest} end; -slurp_token_of_type(sg, Pos, SrcStr) -> - StringMatcher = gsc_strmatch:smr_sf_sg(), - case gsc_strmatch:match(StringMatcher, SrcStr) of +slurp_token_of_shape(sg, Pos, SrcStr) -> + StringMatcher = gs_strmatch:smr_sf_sg(), + case gs_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #tk{type = sg, pos = Pos, string = TokenStr}, + Token = #tk{shape = sg, pos = Pos, str = TokenStr}, {tokmatch, Token, Rest} end; -slurp_token_of_type(char, Pos, SrcStr) -> - StringMatcher = gsc_strmatch:smr_sf_char(), - case gsc_strmatch:match(StringMatcher, SrcStr) of +slurp_token_of_shape(char, Pos, SrcStr) -> + StringMatcher = gs_strmatch:smr_sf_char(), + case gs_strmatch:match(StringMatcher, SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #tk{type = char, pos = Pos, string = TokenStr}, + Token = #tk{shape = char, pos = Pos, str = TokenStr}, {tokmatch, Token, Rest} end; -slurp_token_of_type(string, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_str(), SrcStr) of +slurp_token_of_shape(string, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_str(), SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #tk{type = string, pos = Pos, string = TokenStr}, + Token = #tk{shape = string, pos = Pos, str = TokenStr}, {tokmatch, Token, Rest} end; -slurp_token_of_type(bytes, Pos, SrcStr) -> - case gsc_strmatch:match(gsc_strmatch:smr_sf_bytes(), SrcStr) of +slurp_token_of_shape(bytes, Pos, SrcStr) -> + case gs_strmatch:match(gs_strmatch:smr_sf_bytes(), SrcStr) of no_strmatch -> no_tokmatch; {strmatch, TokenStr, Rest} -> - Token = #tk{type = bytes, pos = Pos, string = TokenStr}, + Token = #tk{shape = bytes, pos = Pos, str = TokenStr}, {tokmatch, Token, Rest} end; -slurp_token_of_type(NyiType, Pos, SrcStr) -> - Message = io_lib:format("cannot slurp token of type: ~p", [NyiType]), +slurp_token_of_shape(NyiType, Pos, SrcStr) -> + Message = io_lib:format("cannot slurp token of shape: ~p", [NyiType]), error(#gsc_err{atom = nyi, - string = Message, - extra = [{token_type, NyiType}, + str = Message, + extra = [{token_shape, NyiType}, {pos, Pos}, {rest, SrcStr}]}). diff --git a/src/gsc.erl b/src/gsc.erl index a5948e2..9bcb283 100644 --- a/src/gsc.erl +++ b/src/gsc.erl @@ -4,15 +4,14 @@ % based on original sophia compiler % % parse layers: -% 1. gsc_tokenizer: SrcStr -> (Tokens | SigTokens) +% 1. gs_tokens: SrcStr -> (Tokens | SigTokens) % % SigTokens = not comment/whitespace % % layers: -% a. gsc_strmatch : matches string shapes -% b. gsc_so_scan : converts to so_scan shapes +% a. gs_strmatch : matches string shapes +% b. gso_scan : converts to so_scan shapes % -% 2. gsc_ast: SigTokens -> AST % % terminology: % @@ -30,16 +29,8 @@ % % @end -% TODO: -% - barf for outputs, slurp for inputs -% - architecture needs more careful thought but only after something works -% - too fuzzy right now -% - possibly: -% - rename parser layers sequentially: -% - gsc_ -module(gsc). - -export_type([ token/0 ]). @@ -48,10 +39,7 @@ sigtokens_from_file/1, sigtokens_from_string/1, tokens_from_file/1, - tokens_from_string/1, - ast_from_file/1, - ast_from_string/1, - ast_from_tokens/1 + tokens_from_string/1 ]). -include("$gsc_include/gsc.hrl"). @@ -68,13 +56,13 @@ sigtokens_from_file(X) -> case tokens_from_file(X) of - {ok, Y} -> {ok, gsc_tokens:filter_significant(Y)}; + {ok, Y} -> {ok, gs_tokens:filter_significant(Y)}; Err -> Err end. sigtokens_from_string(X) -> case tokens_from_string(X) of - {ok, Y} -> {ok, gsc_tokens:filter_significant(Y)}; + {ok, Y} -> {ok, gs_tokens:filter_significant(Y)}; Err -> Err end. @@ -101,44 +89,4 @@ tokens_from_file(FilePath) -> Tokens :: [tk()]. tokens_from_string(SrcStr) -> - gsc_tokens:tokens(SrcStr). - - - --spec ast_from_file(FilePath) -> Perhaps - when FilePath :: string(), - Perhaps :: {ok, AST} | {error, gsc_err()}, - AST :: gsc_ast(). - -ast_from_file(FilePath) -> - case file:read_file(FilePath) of - {ok, FileBytes} -> ast_from_string(FileBytes); - Error -> Error - end. - - - --spec ast_from_string(SrcStr) -> Perhaps - when SrcStr :: string(), - Perhaps :: {ok, AST} | {error, gsc_err()}, - AST :: gsc_ast(). - -ast_from_string(SrcStr) -> - case gsc_tokens:significant_tokens(SrcStr) of - {ok, SigTks} -> ast_from_tokens(SigTks); - Error -> Error - end. - - - --spec ast_from_tokens(SrcTokens) -> Perhaps - when SrcTokens :: [tk()], - Perhaps :: {ok, AST} | {error, gsc_err()}, - AST :: gsc_ast(). - -ast_from_tokens(Tks) -> - SigTks = gsc_tokens:filter_significant(Tks), - case gsc_ast:gulp_file(SigTks) of - {gulp, AST} -> {ok, AST}; - Error -> Error - end. + gs_tokens:tokens(SrcStr). diff --git a/src/gsc_parse_type_expr.erl b/src/gsc_parse_type_expr.erl deleted file mode 100644 index 917477f..0000000 --- a/src/gsc_parse_type_expr.erl +++ /dev/null @@ -1,216 +0,0 @@ --module(gsc_parse_type_expr). - --export_type([ -]). - --export([ - unsafe_vtks_from_string/1, - gulp_vtks/1, - take_until_ifx_op/1 -]). - --include("$gsc_include/gsc.hrl"). - - -%------------------------------------------------------ -% TYPES -%------------------------------------------------------ - --type vtk_ifx_op() :: vtk_apply_to - | {'vtk_*', tk()} - | {'vtk_=>', tk()}. - --type vtk() :: tk() - | {vtk_plist, [tk()]} - | vtk_ifx_op(). - - --type gulped(X) :: {gulp, X} - | {error, any()}. - --type slurped(X) :: {slurp, X, Rest :: [tk()]} - | {error, any()}. - - -%------------------------------------------------------ -% FUNCTIONS -%------------------------------------------------------ - - --spec unsafe_vtks_from_string(SrcStr) -> Vtks when - SrcStr :: string(), - Vtks :: [vtk()]. - -% @doc for testing -unsafe_vtks_from_string(S) -> - {ok, SigTks} = gsc_tokens:significant_tokens(S), - {gulp, Vtks} = gulp_vtks(SigTks), - Vtks. - - -% operators in descending order of exteriority -% -% precedence verbiage confuses me -% -% 1 + 2 * 3 ^ 4 -% -% (+ 1 (* 2 (^ 3 4))) -% -% precedence is thinking about the operators as like -% having arms and legs and doing something. how much -% power do they have to bind to their neighbors. it's -% thinking of your operators as verbs in some state -% machine. -% -% i don't like when i have to model the state machine -% in my head when i'm reading code. everything should -% just be there. exteriority and interiority are -% properties of nouns, not properties of verbs. -% -% functional programming is all about offloading as -% much of your reasoning into nouns as possible. verbs -% change things. and change is bad. --type chunk_strategy() - :: ast_parens % prefix paren (x y z) - | '=>' % infixr - | '*' % infix - | 'apply' % postfix paren f (...) - . - - --record(ast_parens, - {open = none :: none | tk(), - inner = none :: none | [tk()], - close = none :: none | tk()}). - -chunk_by(Strategy, Tokens) -> - chunk_by(Strategy, [], Tokens). - - --spec chunk0(Strategy, Oks, Errs, Tokens) -> Result when - Strategy :: chunk_strategy(), - Oks :: [any()], - Errs :: [{error, Reason :: any()}], - Tokens :: [tk()], - Result :: {ok, - -gulp_chunks_by(_, Stk, [], []) -> - {gulp, lists:reverse(Stk)}; -gulp_chunks_by(_, _, Errs, []) -> - {error, {fixme, {?MODULE, ?LINE}, Errs}}; -gulp_chunks_by(plist, Stk, Errs, Tokens) -> - case slurp_plist_rec(Tokens) of - {slurp, Plist, NewTokens} -> - gulp_chunks_by(plist, [PList | Stk], Errs, NewTokens); - barf -> - [Token | NewTokens] = Tokens, - gulp_chunks_by(plist, [Token | Stk], Errs, NewTokens); - Error -> - gulp_chunks_by(plist, Stk, [Error | Errs], Tokens); - end. - -slurp_plist_rec(Tokens = [#tk{string = "(" | _]) -> - case gsc_tokens:slurp_plist(Tokens) of - {slurp, [], _} -> - barf; - {slurp, PTokens, NewTokens} -> - PTokensInner = pt_inner(PTokens), - end; - -%-spec gulp_ifx_tree(Tokens) -> gulped(IfxTree) when -% Tokens :: [tk()], -% IfxTree :: ifx_tree(). -% -%-spec chunk_by(ChunkStrategy, Tokens) -> Result when -% ChunkStrategy :: chunk_strategy(), -% Tokens :: [tk()], -% Result :: {ChunkStrategy, - - - --spec gulp_vtks(Tokens) -> Result when - Tokens :: [tk()], - Result :: gulped(VirtualTokens), - VirtualTokens :: [vtk()]. - -% @private -% for infix precedence/associativity resolution. -% -% better/original name (although misnomer) was -% insert_virtual_tokens. we're using the gulp verbiage -% because we're guarding against mistmatched delimiters -% @end -gulp_vtks(Tokens) -> - gulp_vtks([], Tokens). - -gulp_vtks(Acc, Tks0) -> - % scan until next "(" | "*" | "=>" - case take_until_ifx_op(Tks0) of - % no infix op remaining, return arg - {_Pfx = Tks0, - _Sfx = []} -> - {gulp, lists:flatten([Acc, Tks0])}; - % application - % "... foo(bar, baz) ..." - % ~> [..., foo, {plist, "(bar, baz)"}, ...] - {_Pfx = Tks1_BeforeOpen, - _Sfx = Tks2_OpenNAfter - = [#tk{string = "("} | _]} -> - case gsc_tokens:slurp_plist(Tks2_OpenNAfter) of - {slurp, Tks2A_OpenToClose, Tks2B_AfterClose} -> - NewAcc = [Acc, - Tks1_BeforeOpen, - vtk_apply_to, - {vtk_plist, Tks2A_OpenToClose}], - gulp_vtks(NewAcc, Tks2B_AfterClose); - Error = {error, _} -> - Error - end; - % product - {_Pfx = Tks0_BeforeTimes, - _Sfx = [ Tk1A_Times = #tk{string = "*"} - | Tks1B_AfterTimes]} -> - NewAcc = [Acc, - Tks0_BeforeTimes, - {'vtk_*', Tk1A_Times}], - gulp_vtks(NewAcc, Tks1B_AfterTimes); - % funType - {_Pfx = Tks0_BeforeOp, - _Sfx = [ Tk1A_Op = #tk{string = "=>"} - | Tks1B_AfterOp]} -> - NewAcc = [Acc, - Tks0_BeforeOp, - {'vtk_=>', Tk1A_Op}], - gulp_vtks(NewAcc, Tks1B_AfterOp) - end. - - - --spec take_until_ifx_op(Tokens) -> Result when - Tokens :: [tk()], - Result :: {Taken, NewTokens}, - Taken :: Tokens, - NewTokens :: Tokens. -% @doc -% consume tokens until one of ["(", "*", "=>"] - -take_until_ifx_op(Tks) -> - take_until_ifx_op([], Tks). - -take_until_ifx_op(Stack, []) -> - {lists:reverse(Stack), []}; -take_until_ifx_op(Stack, Tokens = [Token | NewTokens]) -> - TokStr = Token#tk.string, - Continue = - case TokStr of - % exit cases - "(" -> false; - "*" -> false; - "=>" -> false; - _ -> true - end, - case Continue of - true -> take_until_ifx_op([Token | Stack], NewTokens); - false -> {lists:reverse(Stack), Tokens} - end. - diff --git a/src/gso_scan.erl b/src/gso_scan.erl index 1bc341e..9b7fb2d 100644 --- a/src/gso_scan.erl +++ b/src/gso_scan.erl @@ -1,6 +1,6 @@ % @doc compatibility layer to test against so_scan % -% converts gsc_tokens data to so_scan tokens +% converts gs_tokens data to so_scan tokens % % Ref: so_scan.erl -module(gso_scan). @@ -76,14 +76,14 @@ -type so_symbol() :: so_kwd() | so_special_char() | atom(). -type so_token2() :: {Symbol :: so_symbol(), - Location :: gsc_pos()}. + Location :: tk_pos()}. % FIXME % this is 'id', 'con', qid -type so_tk3type() :: char | string | hex | int | bytes | qid | qcon | tvar | id | con. -type so_token3() :: {TokenType :: so_tk3type(), - Location :: gsc_pos(), + Location :: tk_pos(), TokenValue :: term()}. -type so_token() :: so_token2() | so_token3(). @@ -104,7 +104,7 @@ % @end scan(SrcStr) -> - case gsc_tokens:tokens(SrcStr) of + case gs_tokens:tokens(SrcStr) of {ok, SfLTokens} -> SoTokens = to_so_tokens(SfLTokens), {ok, SoTokens}; @@ -151,12 +151,12 @@ scan(SrcStr) -> % % so if we see an ak/ct/sg token, we summon evil ben % carson to reconjoin the unconjoined twins -to_so_tokens([ AkTok = #tk{type = AkCtSg, pos = Pos} +to_so_tokens([ AkTok = #tk{shape = AkCtSg, pos = Pos} | Sheeit]) when ak =:= AkCtSg; ct =:= AkCtSg; sg =:= AkCtSg -> - {#tk{string = FinalAkStr}, NewSheeit} + {#tk{str = FinalAkStr}, NewSheeit} = ken_barson_rises(AkTok, Sheeit), [{id, Pos, FinalAkStr}| to_so_tokens(NewSheeit)]; % this part is just lists:filtermap @@ -259,9 +259,9 @@ to_so_tokens([]) -> % `_`**: `smr_plus` requires >=1 base58 char to % match; `ak_I`, `ak_0`, `ak__bar` all fall % through to `id` and both tokenizers agree. -ken_barson_rises(AkTokAcc = #tk{string = AkStr}, - SrcTokens = [#tk{type = CandidateType, - string = CandidateString} +ken_barson_rises(AkTokAcc = #tk{str = AkStr}, + SrcTokens = [#tk{shape = CandidateType, + str = CandidateString} | Rest]) -> % candidate: % dig out the token type and the string @@ -273,7 +273,7 @@ ken_barson_rises(AkTokAcc = #tk{string = AkStr}, Smash -> % dig out the token from LcTokApi NewAkStr = AkStr ++ CandidateString, - NewAkTokAcc = AkTokAcc#tk{string = NewAkStr}, + NewAkTokAcc = AkTokAcc#tk{str = NewAkStr}, ken_barson_rises(NewAkTokAcc, Rest); Pass -> {AkTokAcc, SrcTokens} @@ -320,9 +320,9 @@ pass_types() -> % follow-on tokens % @end -to_so_token(#tk{type = SfTokenType, - pos = Pos, - string = SfTokenStr}) -> +to_so_token(#tk{shape = SfTokenType, + pos = Pos, + str = SfTokenStr}) -> case SfTokenType of %----------------- % Ignored @@ -371,7 +371,7 @@ to_so_token(#tk{type = SfTokenType, NYI -> Msg = io_lib:format("gsc_so_scan:to_so_token/1: unhandled token shape: ~p", [NYI]), error(#gsc_err{atom = nyi, - string = Msg}) + str = Msg}) end. %% ak/ct/sg all tokenize to id @@ -393,7 +393,7 @@ so_parse_char([$' | Chars]) -> [Char] -> Char; _Bad -> error(#gsc_err{atom = bad_token, - string = "Bad character literal: '" ++ Chars}) + str = "Bad character literal: '" ++ Chars}) end. so_parse_string([$" | Chars]) -> @@ -435,7 +435,7 @@ unescape(Delim, [$\\, Code | Chars], Acc) -> $t -> Ok($\t); $v -> Ok($\v); _ -> error(#gsc_err{atom = bad_escape_char, - string = "Bad control sequence: \\" ++ [Code]}) %% TODO + str = "Bad control sequence: \\" ++ [Code]}) %% TODO end; unescape(Delim, [C | Chars], Acc) -> unescape(Delim, Chars, [C | Acc]). diff --git a/zomp.meta b/zomp.meta index 631a48b..b37ac30 100644 --- a/zomp.meta +++ b/zomp.meta @@ -2,7 +2,7 @@ {type,lib}. {modules,[]}. {author,"Peter Harpending"}. -{prefix,none}. +{prefix,"gs"}. {desc,"Exploratory sophia compiler rewrite"}. {package_id,{"otpr","gsc",{0,1,0}}}. {deps,[]}.