Created
March 13, 2017 03:05
-
-
Save ZzZombo/54a216744ec2acda384fdbc094269a17 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
unit class Text::Generator:ver<0.0.1>:auth<ZzZombo>; | |
has %!word-chain; | |
has @!delimeters; | |
has @!whitespace; | |
grammar BasicText | |
{ | |
token TOP { (<whitespace> || <word> || <symbol>)* } | |
token whitespace { \s+ } | |
token word { <.alnum>+[<[' & . , -]+:Dash_Punctuation+:Connector_Punctuation> <.alnum>]* <.alnum>* } | |
token control { <.cntrl>+ } | |
token symbol { \S } #everything else. | |
token eof { $ } | |
} | |
class BasicText::Actions | |
{ | |
method TOP($/) | |
{ | |
my @tokens; | |
for $/[0].list -> $m | |
{ | |
my Str:D $type=<whitespace word control symbol eof>.first({$m{$_}:exists}); | |
@tokens.push: %(value=>~$m{$type},:$type); | |
} | |
make @tokens.push: %(value=>'',:type('eof')); | |
Nil; | |
} | |
} | |
method feed(Str:D $input,Bool:D :$from-file=False,:$grammar=BasicText,:$actions=BasicText::Actions,:%paired) | |
{ | |
%paired ||= %('(' => ')','[' => ']','{' => '}','༺' => '༻','༼' => '༽', | |
'᚛' => '᚜','⁅' => '⁆','⁽' => '⁾','₍' => '₎','⌈' => '⌉', | |
'⌊' => '⌋','〈' => '〉','❨' => '❩','❪' => '❫','❬' => '❭', | |
'❮' => '❯','❰' => '❱','❲' => '❳','❴' => '❵','⟅' => '⟆', | |
'⟦' => '⟧','⟨' => '⟩','⟪' => '⟫','⟬' => '⟭','⟮' => '⟯', | |
'⦃' => '⦄','⦅' => '⦆','⦇' => '⦈','⦉' => '⦊','⦋' => '⦌', | |
'⦍' => '⦐','⦏' => '⦎','⦑' => '⦒','⦓' => '⦔','⦕' => '⦖', | |
'⦗' => '⦘','⧘' => '⧙','⧚' => '⧛','⧼' => '⧽','⸢' => '⸣', | |
'⸤' => '⸥','⸦' => '⸧','⸨' => '⸩','〈' => '〉','《' => '》', | |
'「' => '」','『' => '』','【' => '】','〔' => '〕', | |
'〖' => '〗','〘' => '〙','〚' => '〛','﹙' => '﹚', | |
'﹛' => '﹜','﹝' => '﹞','(' => ')','[' => ']','{' => '}', | |
'⦅' => '⦆','「' => '」'); | |
my @input=$grammar.parse(' '~($from-file ?? $input.IO.slurp !! $input),:$actions).made; | |
my @pair-stack; | |
my Str ($whitespace,$prefix,$word,$postfix,$prev-word); | |
#say "INPUT: ",@input.perl; | |
for @input -> $elem | |
{ | |
#say quietly "PROCESSING $elem.perl() ({$prev-word.defined ?? $prev-word !! 'None'})"; | |
if $elem<type> ~~ <whitespace eof>.any || $word.defined && $elem<type> eq 'word' | |
{ | |
if $prefix.defined || $word.defined | |
{ | |
$word //= ''; | |
%!word-chain{$word} //= %({$_=>BagHash.new} for <children whitespace prefix postfix>); | |
++%!word-chain{$word}<count>; | |
++%!word-chain{$word}<whitespace>{$whitespace // ''}; | |
++%!word-chain{$word}<prefix>{$prefix // ''}; | |
++%!word-chain{$word}<postfix>{$postfix // ''}; | |
++%!word-chain{$prev-word}<children>{$word} if $prev-word.defined; | |
#say quietly "SAVED $word ({$prev-word.defined ?? $prev-word !! 'None'}): %!word-chain{$word}.perl()"; | |
$prev-word=$word; | |
undefine(($prefix,$word,$postfix)); | |
} | |
else | |
{ | |
$whitespace=$elem<value>; | |
} | |
} | |
elsif $elem<type> eq 'symbol' | |
{ | |
(!$word.defined ?? $prefix !! $postfix) ~= $elem<value>; | |
} | |
elsif $elem<type> eq 'word' | |
{ | |
$word=$elem<value>; | |
} | |
#say quietly "INFO: $elem<value>.perl()/$whitespace.defined()|$prefix.defined()|$word.defined()|$postfix.defined()/"; | |
#say quietly "INFO: $elem<value>.perl()/$whitespace|$prefix|$word|$postfix/"; | |
} | |
#%!word-chain.say; | |
Nil; | |
} | |
method generate(Num(Cool) $words where * ~~ 0..^Inf=1024,Cool:D :$start='',Cool:D :$end='') | |
{ | |
my Str:D @res; | |
my $word-bag=BagHash.new-from-pairs: |%!word-chain.map({.key=>.value<count>}); | |
my Str $word=$word-bag.roll; | |
my Str $prev-word; | |
my Int:D $word-count=0; | |
#for %!chain.pairs | |
{ | |
#say "|$_.key().perl()| ($_.value()<children>.perl()) || ($_.value()<after>.perl())"; | |
} | |
@res.push: $start if $start; | |
my @pair-stack; | |
repeat | |
{ | |
my $w = %!word-chain{$word}<whitespace>.roll // ''; | |
#$w.=tc if !$word-count || $prev-word ~~ /<[ . ! ? ]>/; | |
$w ~= %!word-chain{$word}<prefix>.roll // ''; | |
$w ~= $word; | |
$w ~= %!word-chain{$word}<postfix>.roll // ''; | |
@res.push: $w; | |
#say "<{$prev-word // 'N0N3'}> <$word> ",%!word-chain{$prev-word}<children>,' ',%!word-chain{$prev-word}<whitespace>; | |
$prev-word=$word; | |
$word=%!word-chain{$word}<children>.roll; | |
if !$word.defined | |
{ | |
#if $word-count > 8 || rand < 0.05 | |
{ | |
@res.push: $end if $end; | |
@res.push: $start if $start; | |
$word-count=0; | |
} | |
$word=$word-bag.roll; | |
} | |
} | |
until @res.elems>=$words; | |
@res.push: $end if $end; | |
@res.join.subst(/^\s+/,''); | |
} | |
#===== | |
use Text::Generator; | |
my $gen=Text::Generator.new; | |
$gen.feed('test.txt',:from-file); | |
#$gen.feed('The establishment of dedicated intelligence organizations was directly linked to the colonial rivalries between the major European powers and the accelerating development of military technology.'); | |
$gen.generate(:start(""),:end("\n")).say; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment