% @doc % helper functions for grabbing collections of tokens % off the token stream % % generally assume no whitespace/comment tokens in % input stream -module(sfc_token_chunks). %-export_type([ % chunk_shape/0, % choke_reason/0 %]). % %-export([ % take/2, % unsafe_block_to_items/1, % barf/2, % start_pos/1, % end_pos/1 %]). % %% $sfc_include is so c() works from sfp eshell %-include("$sfc_include/sfc.hrl"). % %%------------------------------------------ %% Types %%------------------------------------------ % %-type chunk_shape() % :: block % | block_item % | {block_item, Level :: pos_integer()} % | block_as_items % . % %% FIXME %-type choke_reason() :: any(). % % %%------------------------------------------ %% functions %%------------------------------------------ % %% take = just split % %take(block, []) -> % {[], []}; %take(block, [Hd = #sfc_token{pos = {_, BCol}} | Tl]) -> % tw(fun(#sfc_token{pos = {_, TkCol}}) -> BCol =< TkCol end, [Hd], Tl); %take(block_item, []) -> % {[], []}; %take(block_item, [Hd = #sfc_token{pos = {_, ICol}} | Tl]) -> % tw(fun(#sfc_token{pos = {_, TkCol}}) -> ICol < TkCol end, Tl). % % % %-spec start_pos([sfc_token()]) -> {value, sfc_pos()} | none. % %start_pos([#sfc_token{pos = P}]) -> {value, P}; %start_pos([]) -> none. % % %-spec end_pos([sfc_token()]) -> {value, sfc_pos()} | none. % %end_pos([#sfc_token{pos = Pos, string = Str}]) -> % {value, sfc_tokens:new_pos(Pos, Str)}; %end_pos([_ | T]) -> % end_pos(T); %end_pos([]) -> % none. % % %-spec barf(ChunkShape, SigTokens) -> Perhaps % when ChunkShape :: chunk_shape(), % SigTokens :: [Token], % Perhaps :: {barf, Chunk, Rest} % | {choke, Reason}, % Chunk :: [Token] % most % | [[Token]], % block_as_items % Rest :: [Token], % Reason :: choke_reason(), % Token :: sfc_token(). % %% @doc %% slurp/barf terminology comes from paredit mode in %% emacs %% %% slurp ~= accepting input %% barf ~= separating input %% %% slurp: (foo bar) baz ~> (foo bar baz) %% barf : (foo bar baz) ~> foo (bar baz) % %barf(_, []) -> % {barf, [], []}; %barf(block, [H = #sfc_token{pos = {_, BlkCol}} | T]) -> % Take = % fun(#sfc_token{pos = {_, TkCol}}) -> % BlkCol =< TkCol % end, % {A, B} = tw(Take, T), % {barf, [H | A], B}; %barf(block_item, [H = #sfc_token{pos = {_, BlkCol}} | T]) -> % Take = % fun(#sfc_token{pos = {_, TkCol}}) -> % BlkCol < TkCol % end, % {A, B} = tw(Take, T), % {barf, [H | A], B}; %% not needed for our case, future-proofing. see unsafe_block_to_items %% for details %barf({block_item, Level}, Tokens = [#sfc_token{pos = {_, StartLevel}} | _]) -> % case Level =:= StartLevel of % false -> {barf, [], Tokens}; % true -> barf(block_item, Tokens) % end; %% this has a fancy name in Haskell like Lens . lift ^. mapM_ %% %% i think it's `sequence` actually, but not looking it up %% %% this barfs a block, and then uses unsafe_block_to_items/1 to split %% the block tokens into individual items %barf(block_as_items, Tokens) -> % {barf, BlockTokens, Rest} = barf(block, Tokens), % {barf, unsafe_block_to_items(BlockTokens), Rest}; %barf(_, _) -> % {choke, #sfc_err_nyi{}}. % % % % % % %-spec unsafe_block_to_items([Token]) -> [[Token]] % when Token :: sfc_token(). % %% @doc %% PITFALL: this ASSUMES that the given list of tokens has the %% property that all indent levels are >= that of the head... i.e. the %% input to this is assumed to be the output of (e.g.) barf(block, _) %% %% the danger case is something my intuition is pointing to as a %% possibility perhaps if you're doing some incremental parallel %% stream parsing voodoo, naively parsing a block by greedily pulling %% block items off the head of the list %% %% with the current way things work, we actually do not need to check %% the indent level of each block item and make sure they're all the %% same %% %% BLOCK = %% foo %% ... %% bar %% ... %% baz %% ... %% %% BLOCK_ITEM = %% foo %% ... %% %% %% very important property of blocks is that each list item starts at %% the same indent level. %% %% a concern would be that when we go to grab the bar item that %% BarIndentLevel is somehow different from FooIndentLevel. %% %% let us reason through why it must be the case that FooIndentLevel %% =:= BarIndentLevel %% %% 1. not (BarIndentLevel < FooIndentLevel); i.e. %% %% // impossible by call path: %% foo ... %% bar ... %% %% This is impossible because the call path ensures that all tokens %% in BlkItems have indent level >= FooIndentLevel %% %% 2. not (FooIndentLevel < BarIndentLevel), %% %% // impossible because bar would get %% // consumed by the foo block %% foo ... %% bar ... % %unsafe_block_to_items([]) -> % []; %unsafe_block_to_items(BlockTks) -> % {barf, ItemTks, NewBlockTks} = barf(block_item, BlockTks), % [ItemTks | unsafe_block_to_items(NewBlockTks)].