gsc/scratch/sfc_token_chunks.erl

% @doc
% helper functions for grabbing collections of tokens
% off the token stream
%
% generally assume no whitespace/comment tokens in
% input stream
-module(sfc_token_chunks).

%-export_type([
%    chunk_shape/0,
%    choke_reason/0
%]).
%
%-export([
%    take/2,
%    unsafe_block_to_items/1,
%    barf/2,
%    start_pos/1,
%    end_pos/1
%]).
%
%% $sfc_include is so c() works from sfp eshell
%-include("$sfc_include/sfc.hrl").
%
%%------------------------------------------
%% Types
%%------------------------------------------
%
%-type chunk_shape()
%    :: block
%     | block_item
%     | {block_item, Level :: pos_integer()}
%     | block_as_items
%     .
%
%% FIXME
%-type choke_reason() :: any().
%
%
%%------------------------------------------
%% functions
%%------------------------------------------
%
%% take = just split
%
%take(block, []) ->
%    {[], []};
%take(block, [Hd = #sfc_token{pos = {_, BCol}} | Tl]) ->
%    tw(fun(#sfc_token{pos = {_, TkCol}}) -> BCol =< TkCol end, [Hd], Tl);
%take(block_item, []) ->
%    {[], []};
%take(block_item, [Hd = #sfc_token{pos = {_, ICol}} | Tl]) ->
%    tw(fun(#sfc_token{pos = {_, TkCol}}) -> ICol < TkCol end, Tl).
%
%
%
%-spec start_pos([sfc_token()]) -> {value, sfc_pos()} | none.
%
%start_pos([#sfc_token{pos = P}]) -> {value, P};
%start_pos([])                    -> none.
%
%
%-spec end_pos([sfc_token()]) -> {value, sfc_pos()} | none.
%
%end_pos([#sfc_token{pos = Pos, string = Str}]) ->
%    {value, sfc_tokens:new_pos(Pos, Str)};
%end_pos([_ | T]) ->
%    end_pos(T);
%end_pos([]) ->
%    none.
%
%
%-spec barf(ChunkShape, SigTokens) -> Perhaps
%    when ChunkShape :: chunk_shape(),
%         SigTokens  :: [Token],
%         Perhaps :: {barf, Chunk, Rest}
%                  | {choke, Reason},
%         Chunk  :: [Token]      % most
%                 | [[Token]],   % block_as_items
%         Rest   :: [Token],
%         Reason :: choke_reason(),
%         Token :: sfc_token().
%
%% @doc
%% slurp/barf terminology comes from paredit mode in
%% emacs
%%
%% slurp ~= accepting input
%% barf ~= separating input
%%
%%   slurp: (foo bar) baz ~> (foo bar baz)
%%   barf : (foo bar baz) ~> foo (bar baz)
%
%barf(_, []) ->
%    {barf, [], []};
%barf(block, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
%    Take =
%        fun(#sfc_token{pos = {_, TkCol}}) ->
%            BlkCol =< TkCol
%        end,
%    {A, B} = tw(Take, T),
%    {barf, [H | A], B};
%barf(block_item, [H = #sfc_token{pos = {_, BlkCol}} | T]) ->
%    Take =
%        fun(#sfc_token{pos = {_, TkCol}}) ->
%            BlkCol < TkCol
%        end,
%    {A, B} = tw(Take, T),
%    {barf, [H | A], B};
%% not needed for our case, future-proofing. see unsafe_block_to_items
%% for details
%barf({block_item, Level}, Tokens = [#sfc_token{pos = {_, StartLevel}} | _]) ->
%    case Level =:= StartLevel of
%        false -> {barf, [], Tokens};
%        true  -> barf(block_item, Tokens)
%    end;
%% this has a fancy name in Haskell like Lens . lift ^. mapM_
%%
%% i think it's `sequence` actually, but not looking it up
%%
%% this barfs a block, and then uses unsafe_block_to_items/1 to split
%% the block tokens into individual items
%barf(block_as_items, Tokens) ->
%    {barf, BlockTokens, Rest} = barf(block, Tokens),
%    {barf, unsafe_block_to_items(BlockTokens), Rest};
%barf(_, _) ->
%    {choke, #sfc_err_nyi{}}.
%
%
%
%
%
%
%-spec unsafe_block_to_items([Token]) -> [[Token]]
%    when Token :: sfc_token().
%
%% @doc
%% PITFALL: this ASSUMES that the given list of tokens has the
%% property that all indent levels are >= that of the head... i.e. the
%% input to this is assumed to be the output of (e.g.) barf(block, _)
%%
%% the danger case is something my intuition is pointing to as a
%% possibility perhaps if you're doing some incremental parallel
%% stream parsing voodoo, naively parsing a block by greedily pulling
%% block items off the head of the list
%%
%% with the current way things work, we actually do not need to check
%% the indent level of each block item and make sure they're all the
%% same
%%
%%   BLOCK =
%%       foo
%%           ...
%%       bar
%%           ...
%%       baz
%%           ...
%%
%%   BLOCK_ITEM =
%%       foo
%%           ...
%%
%%
%% very important property of blocks is that each list item starts at
%% the same indent level.
%%
%% a concern would be that when we go to grab the bar item that
%% BarIndentLevel is somehow different from FooIndentLevel.
%%
%% let us reason through why it must be the case that FooIndentLevel
%% =:= BarIndentLevel
%%
%% 1. not (BarIndentLevel < FooIndentLevel); i.e.
%%
%%       // impossible by call path:
%%          foo ...
%%       bar ...
%%
%%    This is impossible because the call path ensures that all tokens
%%    in BlkItems have indent level >= FooIndentLevel
%%
%% 2. not (FooIndentLevel < BarIndentLevel),
%%
%%       // impossible because bar would get
%%       // consumed by the foo block
%%       foo ...
%%           bar ...
%
%unsafe_block_to_items([]) ->
%    [];
%unsafe_block_to_items(BlockTks) ->
%    {barf, ItemTks, NewBlockTks} = barf(block_item, BlockTks),
%    [ItemTks | unsafe_block_to_items(NewBlockTks)].