diff --git a/sys/src/cmd/git/delta.c b/sys/src/cmd/git/delta.c index 32af58b64..1841393b6 100644 --- a/sys/src/cmd/git/delta.c +++ b/sys/src/cmd/git/delta.c @@ -7,7 +7,6 @@ enum { Minchunk = 128, Maxchunk = 8192, Splitmask = (1<<8)-1, - }; static u32int geartab[] = { @@ -48,9 +47,7 @@ static u32int geartab[] = { static u64int hash(void *p, int n) { - uchar buf[SHA1dlen]; - sha1((uchar*)p, n, buf, nil); - return GETBE64(buf); + return murmurhash2(p, n); } static void @@ -172,23 +169,26 @@ emitdelta(Delta **pd, int *nd, int cpy, int off, int len) static int stretch(Dtab *dt, Dblock *b, uchar *s, uchar *e, int n) { - uchar *p, *q, *eb; + uchar *p0, *p, *q, *eb; if(b == nil) return n; p = s + n; q = dt->base + b->off + n; - eb = dt->base + dt->nbase; - while(n < (1<<24)-1){ + p0 = p; + if(dt->nbase < (1<<24)-1) + eb = dt->base + dt->nbase; + else + eb = dt->base + (1<<24)-1; + while(1){ if(p == e || q == eb) break; if(*p != *q) break; p++; q++; - n++; } - return n; + return n + (p - p0); } Delta* diff --git a/sys/src/cmd/git/git.h b/sys/src/cmd/git/git.h index 4e74333c5..88afcc1d7 100644 --- a/sys/src/cmd/git/git.h +++ b/sys/src/cmd/git/git.h @@ -303,6 +303,7 @@ int swapsuffix(char *, int, char *, char *, char *); char *strip(char *); int findrepo(char *, int); int showprogress(int, int); +u64int murmurhash2(void*, usize); /* packing */ void dtinit(Dtab *, Object*); diff --git a/sys/src/cmd/git/pack.c b/sys/src/cmd/git/pack.c index 34431730f..41eac2262 100644 --- a/sys/src/cmd/git/pack.c +++ b/sys/src/cmd/git/pack.c @@ -20,7 +20,7 @@ struct Metavec { struct Meta { Object *obj; - char *path; + vlong path; vlong mtime; /* The best delta we picked */ @@ -1284,17 +1284,18 @@ static int deltaordercmp(void *pa, void *pb) { Meta *a, *b; - int cmp; + vlong cmp; a = *(Meta**)pa; b = *(Meta**)pb; if(a->obj->type != b->obj->type) return a->obj->type - b->obj->type; - cmp = strcmp(a->path, b->path); + cmp = (b->path - a->path); if(cmp != 0) - return cmp; - if(a->mtime != b->mtime) - return a->mtime - b->mtime; + return (cmp < 0) ? -1 : 1; + cmp = a->mtime - b->mtime; + if(cmp != 0) + return (cmp < 0) ? -1 : 1; return memcmp(a->obj->hash.h, b->obj->hash.h, sizeof(a->obj->hash.h)); } @@ -1317,7 +1318,7 @@ writeordercmp(void *pa, void *pb) } static void -addmeta(Metavec *v, Objset *has, Object *o, char *path, vlong mtime) +addmeta(Metavec *v, Objset *has, Object *o, vlong pathid, vlong mtime) { Meta *m; @@ -1328,7 +1329,7 @@ addmeta(Metavec *v, Objset *has, Object *o, char *path, vlong mtime) return; m = emalloc(sizeof(Meta)); m->obj = o; - m->path = estrdup(path); + m->path = pathid; m->mtime = mtime; if(v->nmeta == v->metasz){ @@ -1342,7 +1343,6 @@ static void freemeta(Meta *m) { free(m->delta); - free(m->path); free(m); } @@ -1351,8 +1351,9 @@ loadtree(Metavec *v, Objset *has, Hash tree, char *dpath, vlong mtime) { Object *t, *o; Dirent *e; + vlong dh, eh; + int i, k, r; char *p; - int i, k; if(oshas(has, tree)) return 0; @@ -1363,7 +1364,8 @@ loadtree(Metavec *v, Objset *has, Hash tree, char *dpath, vlong mtime) unref(t); return 0; } - addmeta(v, has, t, dpath, mtime); + dh = murmurhash2(dpath, strlen(dpath)); + addmeta(v, has, t, dh, mtime); for(i = 0; i < t->tree->nent; i++){ e = &t->tree->ent[i]; if(oshas(has, e->h)) @@ -1372,14 +1374,16 @@ loadtree(Metavec *v, Objset *has, Hash tree, char *dpath, vlong mtime) continue; k = (e->mode & DMDIR) ? GTree : GBlob; o = clearedobject(e->h, k); - p = smprint("%s/%s", dpath, e->name); - if(k == GBlob) - addmeta(v, has, o, p, mtime); - else if(loadtree(v, has, e->h, p, mtime) == -1){ + if(k == GTree){ + p = smprint("%s/%s", dpath, e->name); + r = loadtree(v, has, e->h, p, mtime); free(p); - return -1; + if(r == -1) + return -1; + }else{ + eh = murmurhash2(e->name, strlen(e->name)); + addmeta(v, has, o, dh^eh, mtime); } - free(p); } unref(t); return 0; @@ -1400,7 +1404,7 @@ loadcommit(Metavec *v, Objset *has, Hash h) unref(c); return 0; } - addmeta(v, has, c, "", c->commit->ctime); + addmeta(v, has, c, 0, c->commit->ctime); r = loadtree(v, has, c->commit->tree, "", c->commit->ctime); unref(c); return r; diff --git a/sys/src/cmd/git/util.c b/sys/src/cmd/git/util.c index 1d2398a7e..99b1f8eea 100644 --- a/sys/src/cmd/git/util.c +++ b/sys/src/cmd/git/util.c @@ -10,6 +10,10 @@ Hash Zhash; int chattygit; int interactive = 1; +enum { + Seed = 2928213749ULL +}; + Object* emptydir(void) { @@ -391,3 +395,50 @@ qpop(Objq *q, Qelt *e) } return 1; } + +u64int +murmurhash2(void *pp, usize n) +{ + u32int m = 0x5bd1e995; + u32int r = 24; + u32int h, k; + u32int *w, *e; + uchar *p; + + h = Seed ^ n; + e = pp; + e += (n / 4); + for (w = pp; w != e; w++) { + /* + * NB: this is endian dependent. + * This is fine for use in git, since the + * hashes computed here are only ever used + * for in memory data structures. + * + * Pack files will differ when packed on + * machines with different endianness, + * but the results will still be correct. + */ + k = *w; + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + } + + p = (uchar*)w; + switch (n & 0x3) { + case 3: h ^= p[2] << 16; + case 2: h ^= p[1] << 8; + case 1: h ^= p[0] << 0; + h *= m; + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +}