git: performance enhancements
Inspired by some changes made in game of trees, I've implemented a number of speedups in git9. First, hashing the chunks during deltification with murmurhash instead of sha1 speeds up the delta search significantly. The stretch function was micro-optimized a bit as well, since that was taking a large portion of the time when chunking. Finally, the full path is not stored. We only care about grouping files with the same name and path. We don't care about the ordering. Therefore, only the hash of the path xored with the hash of the diretory is kept, which saves a bunch of mallocs and string munging. This reduces the time spent repacking some test repos significantly. 9front: % time git/repack deltifying 97473 objects: 100% writing 97473 objects: 100% indexing 97473 objects: 100% 58.85u 1.39s 61.82r git/repack % time /sys/src/cmd/git/6.repack deltifying 97473 objects: 100% writing 97473 objects: 100% indexing 97473 objects: 100% 43.86u 1.29s 47.51r /sys/src/cmd/git/6.repack openbsd: % time git/repack deltifying 2092325 objects: 100% writing 2092325 objects: 100% indexing 2092325 objects: 100% 1589.48u 45.03s 1729.18r git/repack % time /sys/src/cmd/git/6.repack deltifying 2092325 objects: 100% writing 2092325 objects: 100% indexing 2092325 objects: 100% 1238.68u 41.49s 1373.15r /sys/src/cmd/git/6.repack go: % time git/repack deltifying 529507 objects: 100% writing 529507 objects: 100% indexing 529507 objects: 100% 345.32u 7.71s 369.25r git/repack % time /sys/src/cmd/git/6.repack deltifying 529507 objects: 100% writing 529507 objects: 100% indexing 529507 objects: 100% 248.07u 4.47s 257.59r /sys/src/cmd/git/6.repack
This commit is contained in:
parent
57fa781188
commit
01a6de812c
4 changed files with 83 additions and 27 deletions
|
@ -7,7 +7,6 @@ enum {
|
||||||
Minchunk = 128,
|
Minchunk = 128,
|
||||||
Maxchunk = 8192,
|
Maxchunk = 8192,
|
||||||
Splitmask = (1<<8)-1,
|
Splitmask = (1<<8)-1,
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static u32int geartab[] = {
|
static u32int geartab[] = {
|
||||||
|
@ -48,9 +47,7 @@ static u32int geartab[] = {
|
||||||
static u64int
|
static u64int
|
||||||
hash(void *p, int n)
|
hash(void *p, int n)
|
||||||
{
|
{
|
||||||
uchar buf[SHA1dlen];
|
return murmurhash2(p, n);
|
||||||
sha1((uchar*)p, n, buf, nil);
|
|
||||||
return GETBE64(buf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -172,23 +169,26 @@ emitdelta(Delta **pd, int *nd, int cpy, int off, int len)
|
||||||
static int
|
static int
|
||||||
stretch(Dtab *dt, Dblock *b, uchar *s, uchar *e, int n)
|
stretch(Dtab *dt, Dblock *b, uchar *s, uchar *e, int n)
|
||||||
{
|
{
|
||||||
uchar *p, *q, *eb;
|
uchar *p0, *p, *q, *eb;
|
||||||
|
|
||||||
if(b == nil)
|
if(b == nil)
|
||||||
return n;
|
return n;
|
||||||
p = s + n;
|
p = s + n;
|
||||||
q = dt->base + b->off + n;
|
q = dt->base + b->off + n;
|
||||||
|
p0 = p;
|
||||||
|
if(dt->nbase < (1<<24)-1)
|
||||||
eb = dt->base + dt->nbase;
|
eb = dt->base + dt->nbase;
|
||||||
while(n < (1<<24)-1){
|
else
|
||||||
|
eb = dt->base + (1<<24)-1;
|
||||||
|
while(1){
|
||||||
if(p == e || q == eb)
|
if(p == e || q == eb)
|
||||||
break;
|
break;
|
||||||
if(*p != *q)
|
if(*p != *q)
|
||||||
break;
|
break;
|
||||||
p++;
|
p++;
|
||||||
q++;
|
q++;
|
||||||
n++;
|
|
||||||
}
|
}
|
||||||
return n;
|
return n + (p - p0);
|
||||||
}
|
}
|
||||||
|
|
||||||
Delta*
|
Delta*
|
||||||
|
|
|
@ -303,6 +303,7 @@ int swapsuffix(char *, int, char *, char *, char *);
|
||||||
char *strip(char *);
|
char *strip(char *);
|
||||||
int findrepo(char *, int);
|
int findrepo(char *, int);
|
||||||
int showprogress(int, int);
|
int showprogress(int, int);
|
||||||
|
u64int murmurhash2(void*, usize);
|
||||||
|
|
||||||
/* packing */
|
/* packing */
|
||||||
void dtinit(Dtab *, Object*);
|
void dtinit(Dtab *, Object*);
|
||||||
|
|
|
@ -20,7 +20,7 @@ struct Metavec {
|
||||||
|
|
||||||
struct Meta {
|
struct Meta {
|
||||||
Object *obj;
|
Object *obj;
|
||||||
char *path;
|
vlong path;
|
||||||
vlong mtime;
|
vlong mtime;
|
||||||
|
|
||||||
/* The best delta we picked */
|
/* The best delta we picked */
|
||||||
|
@ -1284,17 +1284,18 @@ static int
|
||||||
deltaordercmp(void *pa, void *pb)
|
deltaordercmp(void *pa, void *pb)
|
||||||
{
|
{
|
||||||
Meta *a, *b;
|
Meta *a, *b;
|
||||||
int cmp;
|
vlong cmp;
|
||||||
|
|
||||||
a = *(Meta**)pa;
|
a = *(Meta**)pa;
|
||||||
b = *(Meta**)pb;
|
b = *(Meta**)pb;
|
||||||
if(a->obj->type != b->obj->type)
|
if(a->obj->type != b->obj->type)
|
||||||
return a->obj->type - b->obj->type;
|
return a->obj->type - b->obj->type;
|
||||||
cmp = strcmp(a->path, b->path);
|
cmp = (b->path - a->path);
|
||||||
if(cmp != 0)
|
if(cmp != 0)
|
||||||
return cmp;
|
return (cmp < 0) ? -1 : 1;
|
||||||
if(a->mtime != b->mtime)
|
cmp = a->mtime - b->mtime;
|
||||||
return a->mtime - b->mtime;
|
if(cmp != 0)
|
||||||
|
return (cmp < 0) ? -1 : 1;
|
||||||
return memcmp(a->obj->hash.h, b->obj->hash.h, sizeof(a->obj->hash.h));
|
return memcmp(a->obj->hash.h, b->obj->hash.h, sizeof(a->obj->hash.h));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1317,7 +1318,7 @@ writeordercmp(void *pa, void *pb)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
addmeta(Metavec *v, Objset *has, Object *o, char *path, vlong mtime)
|
addmeta(Metavec *v, Objset *has, Object *o, vlong pathid, vlong mtime)
|
||||||
{
|
{
|
||||||
Meta *m;
|
Meta *m;
|
||||||
|
|
||||||
|
@ -1328,7 +1329,7 @@ addmeta(Metavec *v, Objset *has, Object *o, char *path, vlong mtime)
|
||||||
return;
|
return;
|
||||||
m = emalloc(sizeof(Meta));
|
m = emalloc(sizeof(Meta));
|
||||||
m->obj = o;
|
m->obj = o;
|
||||||
m->path = estrdup(path);
|
m->path = pathid;
|
||||||
m->mtime = mtime;
|
m->mtime = mtime;
|
||||||
|
|
||||||
if(v->nmeta == v->metasz){
|
if(v->nmeta == v->metasz){
|
||||||
|
@ -1342,7 +1343,6 @@ static void
|
||||||
freemeta(Meta *m)
|
freemeta(Meta *m)
|
||||||
{
|
{
|
||||||
free(m->delta);
|
free(m->delta);
|
||||||
free(m->path);
|
|
||||||
free(m);
|
free(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1351,8 +1351,9 @@ loadtree(Metavec *v, Objset *has, Hash tree, char *dpath, vlong mtime)
|
||||||
{
|
{
|
||||||
Object *t, *o;
|
Object *t, *o;
|
||||||
Dirent *e;
|
Dirent *e;
|
||||||
|
vlong dh, eh;
|
||||||
|
int i, k, r;
|
||||||
char *p;
|
char *p;
|
||||||
int i, k;
|
|
||||||
|
|
||||||
if(oshas(has, tree))
|
if(oshas(has, tree))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1363,7 +1364,8 @@ loadtree(Metavec *v, Objset *has, Hash tree, char *dpath, vlong mtime)
|
||||||
unref(t);
|
unref(t);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
addmeta(v, has, t, dpath, mtime);
|
dh = murmurhash2(dpath, strlen(dpath));
|
||||||
|
addmeta(v, has, t, dh, mtime);
|
||||||
for(i = 0; i < t->tree->nent; i++){
|
for(i = 0; i < t->tree->nent; i++){
|
||||||
e = &t->tree->ent[i];
|
e = &t->tree->ent[i];
|
||||||
if(oshas(has, e->h))
|
if(oshas(has, e->h))
|
||||||
|
@ -1372,14 +1374,16 @@ loadtree(Metavec *v, Objset *has, Hash tree, char *dpath, vlong mtime)
|
||||||
continue;
|
continue;
|
||||||
k = (e->mode & DMDIR) ? GTree : GBlob;
|
k = (e->mode & DMDIR) ? GTree : GBlob;
|
||||||
o = clearedobject(e->h, k);
|
o = clearedobject(e->h, k);
|
||||||
|
if(k == GTree){
|
||||||
p = smprint("%s/%s", dpath, e->name);
|
p = smprint("%s/%s", dpath, e->name);
|
||||||
if(k == GBlob)
|
r = loadtree(v, has, e->h, p, mtime);
|
||||||
addmeta(v, has, o, p, mtime);
|
|
||||||
else if(loadtree(v, has, e->h, p, mtime) == -1){
|
|
||||||
free(p);
|
free(p);
|
||||||
|
if(r == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
}else{
|
||||||
|
eh = murmurhash2(e->name, strlen(e->name));
|
||||||
|
addmeta(v, has, o, dh^eh, mtime);
|
||||||
}
|
}
|
||||||
free(p);
|
|
||||||
}
|
}
|
||||||
unref(t);
|
unref(t);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1400,7 +1404,7 @@ loadcommit(Metavec *v, Objset *has, Hash h)
|
||||||
unref(c);
|
unref(c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
addmeta(v, has, c, "", c->commit->ctime);
|
addmeta(v, has, c, 0, c->commit->ctime);
|
||||||
r = loadtree(v, has, c->commit->tree, "", c->commit->ctime);
|
r = loadtree(v, has, c->commit->tree, "", c->commit->ctime);
|
||||||
unref(c);
|
unref(c);
|
||||||
return r;
|
return r;
|
||||||
|
|
|
@ -10,6 +10,10 @@ Hash Zhash;
|
||||||
int chattygit;
|
int chattygit;
|
||||||
int interactive = 1;
|
int interactive = 1;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
Seed = 2928213749ULL
|
||||||
|
};
|
||||||
|
|
||||||
Object*
|
Object*
|
||||||
emptydir(void)
|
emptydir(void)
|
||||||
{
|
{
|
||||||
|
@ -391,3 +395,50 @@ qpop(Objq *q, Qelt *e)
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64int
|
||||||
|
murmurhash2(void *pp, usize n)
|
||||||
|
{
|
||||||
|
u32int m = 0x5bd1e995;
|
||||||
|
u32int r = 24;
|
||||||
|
u32int h, k;
|
||||||
|
u32int *w, *e;
|
||||||
|
uchar *p;
|
||||||
|
|
||||||
|
h = Seed ^ n;
|
||||||
|
e = pp;
|
||||||
|
e += (n / 4);
|
||||||
|
for (w = pp; w != e; w++) {
|
||||||
|
/*
|
||||||
|
* NB: this is endian dependent.
|
||||||
|
* This is fine for use in git, since the
|
||||||
|
* hashes computed here are only ever used
|
||||||
|
* for in memory data structures.
|
||||||
|
*
|
||||||
|
* Pack files will differ when packed on
|
||||||
|
* machines with different endianness,
|
||||||
|
* but the results will still be correct.
|
||||||
|
*/
|
||||||
|
k = *w;
|
||||||
|
k *= m;
|
||||||
|
k ^= k >> r;
|
||||||
|
k *= m;
|
||||||
|
|
||||||
|
h *= m;
|
||||||
|
h ^= k;
|
||||||
|
}
|
||||||
|
|
||||||
|
p = (uchar*)w;
|
||||||
|
switch (n & 0x3) {
|
||||||
|
case 3: h ^= p[2] << 16;
|
||||||
|
case 2: h ^= p[1] << 8;
|
||||||
|
case 1: h ^= p[0] << 0;
|
||||||
|
h *= m;
|
||||||
|
}
|
||||||
|
|
||||||
|
h ^= h >> 13;
|
||||||
|
h *= m;
|
||||||
|
h ^= h >> 15;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue