better sourcing of dataset while still being fast

2020-04-17 16:05:27 -04:00 · 2020-04-17 16:05:27 -04:00 · 23944bd256
commit 23944bd256
parent 8666124274
1 changed files with 1 additions and 1 deletions
--- a/bin/markov
+++ b/bin/markov
@ -1,6 +1,6 @@
 #!/bin/bash
-cat ~/irclogs/tc/* | grep '>' | awk '{$1=""; $2=""; $3=""}1' | shuf -n 500000 > /tmp/markylol
+echo ~/irclogs/tc/* | tr " " "\n" | shuf | tr "\n" " " | xargs cat | grep '>' | awk '{$1=""; $2=""; $3=""}1' | tail -n 80000  > /tmp/markylol
 nextword(){ awk '{for (I=1;I<=NF;I++) if ($I == "'$1'") {print $(I+1)};}' /tmp/markylol | shuf -n 1 ; }