upas/fs: speedup mtree and henter()

move digest pointer into Mtree structrue and embed it into Idx struct
(which is embedded in Message) to avoid one level of indirection
during mtreecmp().

get rid of mtreeisdup(). instead we have mtreeadd() return the old
message in case of a collision. this avoids double lookup.

increase the hash table size for henter() and make it a prime.
This commit is contained in:
cinap_lenrek 2019-10-03 15:49:53 +02:00
parent c0f464b98f
commit ff16079e49
5 changed files with 50 additions and 63 deletions

View file

@ -319,14 +319,17 @@ found:
void
digestmessage(Mailbox *mb, Message *m)
{
Message *old;
assert(m->digest == nil);
m->digest = emalloc(SHA1dlen);
sha1((uchar*)m->start, m->end - m->start, m->digest, nil);
if(mtreeisdup(mb, m)){
old = mtreeadd(mb, m);
if(old != nil && old != m){
m = mtreeadd(mb, old);
logmsg(m, "dup detected");
m->deleted = Dup; /* no dups allowed */
}else
mtreeadd(mb, m);
}
dprint("%lud %#A\n", m->id, m->digest);
}

View file

@ -42,10 +42,16 @@ enum {
Nref = 10,
};
typedef struct {
Avl;
uchar *digest;
} Mtree;
typedef struct Idx Idx;
struct Idx {
Mtree;
char *str; /* as read from idx file */
uchar *digest;
uchar flags;
uvlong fileid;
ulong lines;
@ -136,11 +142,6 @@ struct Message {
};
};
typedef struct {
Avl;
Message *m;
} Mtree;
typedef struct Mcache Mcache;
struct Mcache {
uvlong cached;
@ -256,10 +257,10 @@ void rmidx(char*, int);
int vremove(char*);
int rename(char *, char*, int);
int mtreecmp(Avl*, Avl*);
int mtreeisdup(Mailbox *, Message *);
void mtreeinit(Mailbox *);
void mtreefree(Mailbox *);
Message* mtreefind(Mailbox*, uchar*);
void mtreeadd(Mailbox*, Message*);
Message* mtreeadd(Mailbox*, Message*);
void mtreedelete(Mailbox*, Message*);
enum {

View file

@ -122,7 +122,7 @@ static char hbuf[32*1024];
static uchar mbuf[16*1024 + IOHDRSZ];
static uchar mdata[16*1024 + IOHDRSZ];
static ulong path; /* incremented for each new file */
static Hash *htab[1024];
static Hash *htab[2053];
static Fcall rhdr;
static Fcall thdr;
static Fid *fids;

View file

@ -267,7 +267,7 @@ newmbox(char *path, char *name, int flags, Mailbox **r)
mb->next = nil;
mb->id = newid();
mb->root = newmessage(nil);
mb->mtree = avlcreate(mtreecmp);
mtreeinit(mb);
*l = mb;
@ -1187,7 +1187,7 @@ mboxdecref(Mailbox *mb)
if(mb->flags & ORCLOSE && mb->remove)
if(mb->remove(mb, mb->rmflags))
rmidx(mb->path, mb->rmflags);
free(mb->mtree);
mtreefree(mb);
free(mb->d);
free(mb);
}

View file

@ -2,79 +2,62 @@
#include <libsec.h>
#include "dat.h"
int
#define messageof(p) ((Message*)(((uchar*)&(p)->digest) - offsetof(Message, digest)))
static int
mtreecmp(Avl *va, Avl *vb)
{
Mtree *a, *b;
a = (Mtree*)va;
b = (Mtree*)vb;
return memcmp(a->m->digest, b->m->digest, SHA1dlen);
return memcmp(((Mtree*)va)->digest, ((Mtree*)vb)->digest, SHA1dlen);
}
int
mtreeisdup(Mailbox *mb, Message *m)
void
mtreeinit(Mailbox *mb)
{
Mtree t;
mb->mtree = avlcreate(mtreecmp);
}
assert(Topmsg(mb, m) && m->digest);
if(m->digest == nil)
return 0;
memset(&t, 0, sizeof t);
t.m = m;
if(avllookup(mb->mtree, &t, 0))
return 1;
return 0;
void
mtreefree(Mailbox *mb)
{
free(mb->mtree);
mb->mtree = nil;
}
Message*
mtreefind(Mailbox *mb, uchar *digest)
{
Message m0;
Mtree t, *p;
m0.digest = digest;
memset(&t, 0, sizeof t);
t.m = &m0;
if(p = (Mtree*)avllookup(mb->mtree, &t, 0))
return p->m;
t.digest = digest;
if((p = (Mtree*)avllookup(mb->mtree, &t, 0)) == nil)
return nil;
return messageof(p);
}
void
Message*
mtreeadd(Mailbox *mb, Message *m)
{
Avl *old;
Mtree *p;
Mtree *old;
assert(Topmsg(mb, m) && m->digest);
p = emalloc(sizeof *p);
p->m = m;
old = avlinsert(mb->mtree, p);
assert(old == 0);
assert(Topmsg(mb, m) && m->digest != nil);
if((old = (Mtree*)avlinsert(mb->mtree, m)) == nil)
return nil;
return messageof(old);
}
void
mtreedelete(Mailbox *mb, Message *m)
{
Mtree t, *p;
Mtree *old;
assert(Topmsg(mb, m));
memset(&t, 0, sizeof t);
t.m = m;
if(m->deleted & ~Deleted){
if(m->digest == nil)
return;
p = (Mtree*)avllookup(mb->mtree, &t, 0);
if(p == nil || p->m != m)
return;
p = (Mtree*)avldelete(mb->mtree, &t);
free(p);
if(m->deleted & ~Deleted){
old = (Mtree*)avllookup(mb->mtree, m, 0);
if(old == nil || messageof(old) != m)
return;
}
assert(m->digest);
p = (Mtree*)avldelete(mb->mtree, &t);
if(p == nil)
_assert("mtree delete fails");
free(p);
old = (Mtree*)avldelete(mb->mtree, m);
assert(messageof(old) == m);
}