From ff16079e49ae585281e4ff0f2aed9620a7273644 Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Thu, 3 Oct 2019 15:49:53 +0200 Subject: [PATCH] upas/fs: speedup mtree and henter() move digest pointer into Mtree structrue and embed it into Idx struct (which is embedded in Message) to avoid one level of indirection during mtreecmp(). get rid of mtreeisdup(). instead we have mtreeadd() return the old message in case of a collision. this avoids double lookup. increase the hash table size for henter() and make it a prime. --- sys/src/cmd/upas/fs/cache.c | 9 +++-- sys/src/cmd/upas/fs/dat.h | 19 ++++----- sys/src/cmd/upas/fs/fs.c | 2 +- sys/src/cmd/upas/fs/mbox.c | 4 +- sys/src/cmd/upas/fs/mtree.c | 79 +++++++++++++++---------------------- 5 files changed, 50 insertions(+), 63 deletions(-) diff --git a/sys/src/cmd/upas/fs/cache.c b/sys/src/cmd/upas/fs/cache.c index 5312886b3..52ed93c79 100644 --- a/sys/src/cmd/upas/fs/cache.c +++ b/sys/src/cmd/upas/fs/cache.c @@ -319,14 +319,17 @@ found: void digestmessage(Mailbox *mb, Message *m) { + Message *old; + assert(m->digest == nil); m->digest = emalloc(SHA1dlen); sha1((uchar*)m->start, m->end - m->start, m->digest, nil); - if(mtreeisdup(mb, m)){ + old = mtreeadd(mb, m); + if(old != nil && old != m){ + m = mtreeadd(mb, old); logmsg(m, "dup detected"); m->deleted = Dup; /* no dups allowed */ - }else - mtreeadd(mb, m); + } dprint("%lud %#A\n", m->id, m->digest); } diff --git a/sys/src/cmd/upas/fs/dat.h b/sys/src/cmd/upas/fs/dat.h index d25656b0c..4aee557e6 100644 --- a/sys/src/cmd/upas/fs/dat.h +++ b/sys/src/cmd/upas/fs/dat.h @@ -42,10 +42,16 @@ enum { Nref = 10, }; +typedef struct { + Avl; + uchar *digest; +} Mtree; + typedef struct Idx Idx; struct Idx { + Mtree; + char *str; /* as read from idx file */ - uchar *digest; uchar flags; uvlong fileid; ulong lines; @@ -136,11 +142,6 @@ struct Message { }; }; -typedef struct { - Avl; - Message *m; -} Mtree; - typedef struct Mcache Mcache; struct Mcache { uvlong cached; @@ -256,10 +257,10 @@ void rmidx(char*, int); int vremove(char*); int rename(char *, char*, int); -int mtreecmp(Avl*, Avl*); -int mtreeisdup(Mailbox *, Message *); +void mtreeinit(Mailbox *); +void mtreefree(Mailbox *); Message* mtreefind(Mailbox*, uchar*); -void mtreeadd(Mailbox*, Message*); +Message* mtreeadd(Mailbox*, Message*); void mtreedelete(Mailbox*, Message*); enum { diff --git a/sys/src/cmd/upas/fs/fs.c b/sys/src/cmd/upas/fs/fs.c index e706e7eaa..fbb515928 100644 --- a/sys/src/cmd/upas/fs/fs.c +++ b/sys/src/cmd/upas/fs/fs.c @@ -122,7 +122,7 @@ static char hbuf[32*1024]; static uchar mbuf[16*1024 + IOHDRSZ]; static uchar mdata[16*1024 + IOHDRSZ]; static ulong path; /* incremented for each new file */ -static Hash *htab[1024]; +static Hash *htab[2053]; static Fcall rhdr; static Fcall thdr; static Fid *fids; diff --git a/sys/src/cmd/upas/fs/mbox.c b/sys/src/cmd/upas/fs/mbox.c index 5a23419db..705134abb 100644 --- a/sys/src/cmd/upas/fs/mbox.c +++ b/sys/src/cmd/upas/fs/mbox.c @@ -267,7 +267,7 @@ newmbox(char *path, char *name, int flags, Mailbox **r) mb->next = nil; mb->id = newid(); mb->root = newmessage(nil); - mb->mtree = avlcreate(mtreecmp); + mtreeinit(mb); *l = mb; @@ -1187,7 +1187,7 @@ mboxdecref(Mailbox *mb) if(mb->flags & ORCLOSE && mb->remove) if(mb->remove(mb, mb->rmflags)) rmidx(mb->path, mb->rmflags); - free(mb->mtree); + mtreefree(mb); free(mb->d); free(mb); } diff --git a/sys/src/cmd/upas/fs/mtree.c b/sys/src/cmd/upas/fs/mtree.c index fd490d952..b08a9dd51 100644 --- a/sys/src/cmd/upas/fs/mtree.c +++ b/sys/src/cmd/upas/fs/mtree.c @@ -2,79 +2,62 @@ #include #include "dat.h" -int +#define messageof(p) ((Message*)(((uchar*)&(p)->digest) - offsetof(Message, digest))) + +static int mtreecmp(Avl *va, Avl *vb) { - Mtree *a, *b; - - a = (Mtree*)va; - b = (Mtree*)vb; - return memcmp(a->m->digest, b->m->digest, SHA1dlen); + return memcmp(((Mtree*)va)->digest, ((Mtree*)vb)->digest, SHA1dlen); } -int -mtreeisdup(Mailbox *mb, Message *m) +void +mtreeinit(Mailbox *mb) { - Mtree t; + mb->mtree = avlcreate(mtreecmp); +} - assert(Topmsg(mb, m) && m->digest); - if(m->digest == nil) - return 0; - memset(&t, 0, sizeof t); - t.m = m; - if(avllookup(mb->mtree, &t, 0)) - return 1; - return 0; +void +mtreefree(Mailbox *mb) +{ + free(mb->mtree); + mb->mtree = nil; } Message* mtreefind(Mailbox *mb, uchar *digest) { - Message m0; Mtree t, *p; - m0.digest = digest; - memset(&t, 0, sizeof t); - t.m = &m0; - if(p = (Mtree*)avllookup(mb->mtree, &t, 0)) - return p->m; - return nil; + t.digest = digest; + if((p = (Mtree*)avllookup(mb->mtree, &t, 0)) == nil) + return nil; + return messageof(p); } -void +Message* mtreeadd(Mailbox *mb, Message *m) { - Avl *old; - Mtree *p; + Mtree *old; - assert(Topmsg(mb, m) && m->digest); - p = emalloc(sizeof *p); - p->m = m; - old = avlinsert(mb->mtree, p); - assert(old == 0); + assert(Topmsg(mb, m) && m->digest != nil); + if((old = (Mtree*)avlinsert(mb->mtree, m)) == nil) + return nil; + return messageof(old); } void mtreedelete(Mailbox *mb, Message *m) { - Mtree t, *p; + Mtree *old; assert(Topmsg(mb, m)); - memset(&t, 0, sizeof t); - t.m = m; - if(m->deleted & ~Deleted){ - if(m->digest == nil) - return; - p = (Mtree*)avllookup(mb->mtree, &t, 0); - if(p == nil || p->m != m) - return; - p = (Mtree*)avldelete(mb->mtree, &t); - free(p); + if(m->digest == nil) return; + if(m->deleted & ~Deleted){ + old = (Mtree*)avllookup(mb->mtree, m, 0); + if(old == nil || messageof(old) != m) + return; } - assert(m->digest); - p = (Mtree*)avldelete(mb->mtree, &t); - if(p == nil) - _assert("mtree delete fails"); - free(p); + old = (Mtree*)avldelete(mb->mtree, m); + assert(messageof(old) == m); }