min/modules/nlp.py

144 lines
3.9 KiB
Python
Raw Normal View History

2021-01-31 02:05:34 +00:00
from bot import *
2020-04-19 23:52:41 +00:00
2021-07-26 16:51:02 +00:00
import dataset, random, time, re
2020-04-19 23:52:41 +00:00
def get(l,i):
try:
if i <= len(l) and i >= 0:
return l[i]
else:
return ''
except IndexError:
return ''
2020-04-23 01:43:07 +00:00
async def rec(self, m):
2021-01-31 02:05:34 +00:00
prew = shared.db['prew']
noch = shared.db['noun']
beg = shared.db['beg']
end = shared.db['end']
2020-04-23 01:43:07 +00:00
words = m.split(' ')
if words[0] == 'admin' or len(words) < 2:
2020-04-23 02:14:17 +00:00
return
beg.insert(dict(word=words[0]))
end.insert(dict(word=words[-1]))
for w in range(len(words)):
if w > 0:
prew.insert_ignore(dict(pre3=get(words,w-3), pre2=get(words,w-2), pre=get(words,w-1), pro=get(words,w), pro2=get(words,w+1), pro3=get(words,w+2)),['id'])
2020-04-23 01:43:07 +00:00
noch.insert(dict(word=w))
2020-07-12 12:21:53 +00:00
async def getNoun(self, words, c):
2021-01-31 02:05:34 +00:00
if c in shared.cstate:
oldnoun = shared.cstate[c]
2020-07-12 12:21:53 +00:00
else:
oldnoun = None
2021-01-31 01:49:56 +00:00
nouns = shared.db['noun']
2020-07-12 12:21:53 +00:00
out = {}
for i in words:
out[i] = nouns.count(word=i)
2020-07-12 12:21:53 +00:00
noun = min(out, key=out.get)
2021-01-31 02:05:34 +00:00
conversation = shared.db['conver']
2020-07-12 12:21:53 +00:00
if oldnoun != None:
print("adding", [oldnoun,noun])
conversation.insert_ignore(dict(pre=oldnoun,pro=noun),['id'])
nextnoun = [i['pro'] for i in conversation.find(pre=noun)]
print("nextnoun:",nextnoun)
if len(nextnoun) > 0:
noun = random.choice(nextnoun)
2021-01-31 02:05:34 +00:00
shared.cstate[c] = noun
2020-07-12 12:21:53 +00:00
return noun
2020-04-23 01:43:07 +00:00
async def genOut(self, noun):
2021-01-31 02:05:34 +00:00
prew = shared.db['prew']
beg = shared.db['beg']
end = shared.db['end']
nouns = shared.db['noun']
2020-04-23 01:43:07 +00:00
iter=0
coun=0
2020-04-23 01:43:07 +00:00
out = [noun]
2021-11-12 00:11:02 +00:00
while (beg.find_one(word=out[0]) is None or nouns.count(word=out[0])-1 > iter * shared.enmul) and iter < shared.maxiter:
2020-04-23 20:05:16 +00:00
try:
2021-11-12 00:11:02 +00:00
out = [ random.choice(list(prew.find(pro=out[0],pro2=out[1],pro3=out[2])))['pre'] ] + out
2020-04-23 20:05:16 +00:00
except IndexError:
2021-11-12 00:11:02 +00:00
try:
out = [ random.choice(list(prew.find(pro=out[0],pro2=out[1])))['pre'] ] + out
except IndexError:
try:
out = [ random.choice(list(prew.find(pro=out[0])))['pre'] ] + out
except IndexError:
iter += 69
2020-04-23 01:43:07 +00:00
iter += 1
coun += 1
2020-04-23 01:43:07 +00:00
iter = 0
2021-11-12 00:11:02 +00:00
while (end.find_one(word=out[-1]) is None or nouns.count(word=out[-1])-1 > iter * shared.enmul) and iter < shared.maxiter:
2020-04-23 20:05:16 +00:00
try:
2021-11-12 00:11:02 +00:00
out.append(random.choice(list(prew.find(pre3=out[-3],pre2=out[-2],pre=out[-1])))['pro'])
2020-04-23 20:05:16 +00:00
except IndexError:
2021-11-12 00:11:02 +00:00
try:
out.append(random.choice(list(prew.find(pre2=out[-2],pre=out[-1])))['pro'])
except IndexError:
try:
out.append(random.choice(list(prew.find(pre=out[-1])))['pro'])
except IndexError:
iter += 69
2020-04-23 01:43:07 +00:00
iter += 1
coun += 1
if coun <= 3:
shared.enmul -= 1
2021-11-12 00:11:02 +00:00
elif coun >= shared.maxiter:
shared.enmul += 1
2020-04-23 01:43:07 +00:00
return out
async def filter(self, c, n, m):
2021-01-31 02:05:34 +00:00
if c in shared.qtime and shared.qtime[c] > time.time():
return
2021-01-31 02:05:34 +00:00
if m[:len(shared.prefix)] == shared.prefix:
m = m[len(shared.prefix):]
await go(self, c, n, m)
2021-05-22 22:55:16 +00:00
elif m[:len(self.nickname)+1] == self.nickname+' ':
m = m[len(self.nickname)+1:]
await go(self, c, n, m)
2021-05-22 20:51:55 +00:00
elif '#' not in c and n != self.nickname:
await go(self, c, n, m)
2020-04-27 14:42:23 +00:00
else:
if len(m.split(' ')) > 1:
2021-01-31 02:05:34 +00:00
if shared.learntime + shared.learndelay < time.time():
2020-05-15 20:44:43 +00:00
await rec(self, m)
2021-01-31 02:05:34 +00:00
shared.learntime = time.time()
async def go(self, c, n, m):
2020-04-23 01:43:07 +00:00
await rec(self, m)
2021-07-26 16:51:02 +00:00
words = re.sub(r'([\.,\?!])', r' \1', m).split()
2020-04-23 02:14:17 +00:00
if words[0] == 'admin':
return
msg = re.sub(r' ([\.,\?!])', r'\1', ' '.join(await genOut(self, await getNoun(self, words, c))))
if msg[-1] == "\x01" and msg[0] != "\x01":
msg = msg[:-1]
await self.message(c, msg)
2020-04-19 23:52:41 +00:00
async def init(self):
2021-01-31 02:05:34 +00:00
shared.qtime = {}
shared.learntime = 0
2021-06-02 14:27:38 +00:00
# delay between grabbing random messages and passively
# learning.
2021-05-22 22:55:16 +00:00
shared.learndelay = 1
2021-06-02 14:27:38 +00:00
# sentance ending weight, lower means longer sentances,
# higher means shorter sentances. this will need to slowly
# get larger as the database grows
shared.enmul = 9
2021-11-12 00:11:02 +00:00
shared.maxiter = 14
2020-04-23 01:43:07 +00:00
2021-06-02 14:27:38 +00:00
shared.rawm['nlp'] = filter
2021-01-31 02:05:34 +00:00
shared.cstate = {}