better sourcing of dataset while still being fast

This commit is contained in:
lickthecheese 2020-04-17 16:05:27 -04:00
parent 8666124274
commit 23944bd256

View file

@ -1,6 +1,6 @@
#!/bin/bash
cat ~/irclogs/tc/* | grep '>' | awk '{$1=""; $2=""; $3=""}1' | shuf -n 500000 > /tmp/markylol
echo ~/irclogs/tc/* | tr " " "\n" | shuf | tr "\n" " " | xargs cat | grep '>' | awk '{$1=""; $2=""; $3=""}1' | tail -n 80000 > /tmp/markylol
nextword(){ awk '{for (I=1;I<=NF;I++) if ($I == "'$1'") {print $(I+1)};}' /tmp/markylol | shuf -n 1 ; }