194 lines
5.0 KiB
Erlang
194 lines
5.0 KiB
Erlang
% @doc
|
|
% helper functions for grabbing collections of tokens
|
|
% off the token stream
|
|
%
|
|
% generally assume no whitespace/comment tokens in
|
|
% input stream
|
|
-module(sfc_token_chunks).
|
|
|
|
%-export_type([
|
|
% chunk_shape/0,
|
|
% choke_reason/0
|
|
%]).
|
|
%
|
|
%-export([
|
|
% take/2,
|
|
% unsafe_block_to_items/1,
|
|
% barf/2,
|
|
% start_pos/1,
|
|
% end_pos/1
|
|
%]).
|
|
%
|
|
%% $sfc_include is so c() works from sfp eshell
|
|
%-include("$sfc_include/sfc.hrl").
|
|
%
|
|
%%------------------------------------------
|
|
%% Types
|
|
%%------------------------------------------
|
|
%
|
|
%-type chunk_shape()
|
|
% :: block
|
|
% | block_item
|
|
% | {block_item, Level :: pos_integer()}
|
|
% | block_as_items
|
|
% .
|
|
%
|
|
%% FIXME
|
|
%-type choke_reason() :: any().
|
|
%
|
|
%
|
|
%%------------------------------------------
|
|
%% functions
|
|
%%------------------------------------------
|
|
%
|
|
%% take = just split
|
|
%
|
|
%take(block, []) ->
|
|
% {[], []};
|
|
%take(block, [Hd = #sfc_token{pos = {_, BCol}} | Tl]) ->
|
|
% tw(fun(#sfc_token{pos = {_, TkCol}}) -> BCol =< TkCol end, [Hd], Tl);
|
|
%take(block_item, []) ->
|
|
% {[], []};
|
|
%take(block_item, [Hd = #sfc_token{pos = {_, ICol}} | Tl]) ->
|
|
% tw(fun(#sfc_token{pos = {_, TkCol}}) -> ICol < TkCol end, Tl).
|
|
%
|
|
%
|
|
%
|
|
%-spec start_pos([sfc_token()]) -> {value, sfc_pos()} | none.
|
|
%
|
|
%start_pos([#sfc_token{pos = P}]) -> {value, P};
|
|
%start_pos([]) -> none.
|
|
%
|
|
%
|
|
%-spec end_pos([sfc_token()]) -> {value, sfc_pos()} | none.
|
|
%
|
|
%end_pos([#sfc_token{pos = Pos, string = Str}]) ->
|
|
% {value, sfc_tokens:new_pos(Pos, Str)};
|
|
%end_pos([_ | T]) ->
|
|
% end_pos(T);
|
|
%end_pos([]) ->
|
|
% none.
|
|
%
|
|
%
|
|
%-spec barf(ChunkShape, SigTokens) -> Perhaps
|
|
% when ChunkShape :: chunk_shape(),
|
|
% SigTokens :: [Token],
|
|
% Perhaps :: {barf, Chunk, Rest}
|
|
% | {choke, Reason},
|
|
% Chunk :: [Token] % most
|
|
% | [[Token]], % block_as_items
|
|
% Rest :: [Token],
|
|
% Reason :: choke_reason(),
|
|
% Token :: sfc_token().
|
|
%
|
|
%% @doc
|
|
%% slurp/barf terminology comes from paredit mode in
|
|
%% emacs
|
|
%%
|
|
%% slurp ~= accepting input
|
|
%% barf ~= separating input
|
|
%%
|
|
%% slurp: (foo bar) baz ~> (foo bar baz)
|
|
%% barf : (foo bar baz) ~> foo (bar baz)
|
|
%
|
|
%barf(_, []) ->
|
|
% {barf, [], []};
|
|
%barf(block, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
|
|
% Take =
|
|
% fun(#sfc_token{pos = {_, TkCol}}) ->
|
|
% BlkCol =< TkCol
|
|
% end,
|
|
% {A, B} = tw(Take, T),
|
|
% {barf, [H | A], B};
|
|
%barf(block_item, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
|
|
% Take =
|
|
% fun(#sfc_token{pos = {_, TkCol}}) ->
|
|
% BlkCol < TkCol
|
|
% end,
|
|
% {A, B} = tw(Take, T),
|
|
% {barf, [H | A], B};
|
|
%% not needed for our case, future-proofing. see unsafe_block_to_items
|
|
%% for details
|
|
%barf({block_item, Level}, Tokens = [#sfc_token{pos = {_, StartLevel}} | _]) ->
|
|
% case Level =:= StartLevel of
|
|
% false -> {barf, [], Tokens};
|
|
% true -> barf(block_item, Tokens)
|
|
% end;
|
|
%% this has a fancy name in Haskell like Lens . lift ^. mapM_
|
|
%%
|
|
%% i think it's `sequence` actually, but not looking it up
|
|
%%
|
|
%% this barfs a block, and then uses unsafe_block_to_items/1 to split
|
|
%% the block tokens into individual items
|
|
%barf(block_as_items, Tokens) ->
|
|
% {barf, BlockTokens, Rest} = barf(block, Tokens),
|
|
% {barf, unsafe_block_to_items(BlockTokens), Rest};
|
|
%barf(_, _) ->
|
|
% {choke, #sfc_err_nyi{}}.
|
|
%
|
|
%
|
|
%
|
|
%
|
|
%
|
|
%
|
|
%-spec unsafe_block_to_items([Token]) -> [[Token]]
|
|
% when Token :: sfc_token().
|
|
%
|
|
%% @doc
|
|
%% PITFALL: this ASSUMES that the given list of tokens has the
|
|
%% property that all indent levels are >= that of the head... i.e. the
|
|
%% input to this is assumed to be the output of (e.g.) barf(block, _)
|
|
%%
|
|
%% the danger case is something my intuition is pointing to as a
|
|
%% possibility perhaps if you're doing some incremental parallel
|
|
%% stream parsing voodoo, naively parsing a block by greedily pulling
|
|
%% block items off the head of the list
|
|
%%
|
|
%% with the current way things work, we actually do not need to check
|
|
%% the indent level of each block item and make sure they're all the
|
|
%% same
|
|
%%
|
|
%% BLOCK =
|
|
%% foo
|
|
%% ...
|
|
%% bar
|
|
%% ...
|
|
%% baz
|
|
%% ...
|
|
%%
|
|
%% BLOCK_ITEM =
|
|
%% foo
|
|
%% ...
|
|
%%
|
|
%%
|
|
%% very important property of blocks is that each list item starts at
|
|
%% the same indent level.
|
|
%%
|
|
%% a concern would be that when we go to grab the bar item that
|
|
%% BarIndentLevel is somehow different from FooIndentLevel.
|
|
%%
|
|
%% let us reason through why it must be the case that FooIndentLevel
|
|
%% =:= BarIndentLevel
|
|
%%
|
|
%% 1. not (BarIndentLevel < FooIndentLevel); i.e.
|
|
%%
|
|
%% // impossible by call path:
|
|
%% foo ...
|
|
%% bar ...
|
|
%%
|
|
%% This is impossible because the call path ensures that all tokens
|
|
%% in BlkItems have indent level >= FooIndentLevel
|
|
%%
|
|
%% 2. not (FooIndentLevel < BarIndentLevel),
|
|
%%
|
|
%% // impossible because bar would get
|
|
%% // consumed by the foo block
|
|
%% foo ...
|
|
%% bar ...
|
|
%
|
|
%unsafe_block_to_items([]) ->
|
|
% [];
|
|
%unsafe_block_to_items(BlockTks) ->
|
|
% {barf, ItemTks, NewBlockTks} = barf(block_item, BlockTks),
|
|
% [ItemTks | unsafe_block_to_items(NewBlockTks)].
|