better sourcing of dataset while still being fast
This commit is contained in:
parent
8666124274
commit
23944bd256
1 changed files with 1 additions and 1 deletions
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
cat ~/irclogs/tc/* | grep '>' | awk '{$1=""; $2=""; $3=""}1' | shuf -n 500000 > /tmp/markylol
|
echo ~/irclogs/tc/* | tr " " "\n" | shuf | tr "\n" " " | xargs cat | grep '>' | awk '{$1=""; $2=""; $3=""}1' | tail -n 80000 > /tmp/markylol
|
||||||
|
|
||||||
nextword(){ awk '{for (I=1;I<=NF;I++) if ($I == "'$1'") {print $(I+1)};}' /tmp/markylol | shuf -n 1 ; }
|
nextword(){ awk '{for (I=1;I<=NF;I++) if ($I == "'$1'") {print $(I+1)};}' /tmp/markylol | shuf -n 1 ; }
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue