plan9fox/sys/src/cmd/git/util.c
Ori Bernstein 01a6de812c git: performance enhancements
Inspired by some changes made in game of trees, I've
implemented a number of speedups in git9.

First, hashing the chunks during deltification with
murmurhash instead of sha1 speeds up the delta search
significantly.

The stretch function was micro-optimized a bit as well,
since that was taking a large portion of the time when
chunking.

Finally, the full path is not stored. We only care about
grouping files with the same name and path. We don't care
about the ordering. Therefore, only the hash of the path
xored with the hash of the diretory is kept, which saves
a bunch of mallocs and string munging.

This reduces the time spent repacking some test repos
significantly.

9front:
	% time git/repack
	deltifying 97473 objects: 100%
	writing 97473 objects: 100%
	indexing 97473 objects: 100%
	58.85u 1.39s 61.82r 	 git/repack

	% time /sys/src/cmd/git/6.repack
	deltifying 97473 objects: 100%
	writing 97473 objects: 100%
	indexing 97473 objects: 100%
	43.86u 1.29s 47.51r 	 /sys/src/cmd/git/6.repack

openbsd:

	% time git/repack
	deltifying 2092325 objects: 100%
	writing 2092325 objects: 100%
	indexing 2092325 objects: 100%
	1589.48u 45.03s 1729.18r 	 git/repack

	% time /sys/src/cmd/git/6.repack
	deltifying 2092325 objects: 100%
	writing 2092325 objects: 100%
	indexing 2092325 objects: 100%
	1238.68u 41.49s 1373.15r 	 /sys/src/cmd/git/6.repack

go:
	% time git/repack
	deltifying 529507 objects: 100%
	writing 529507 objects: 100%
	indexing 529507 objects: 100%
	345.32u 7.71s 369.25r     git/repack

	% time /sys/src/cmd/git/6.repack
	deltifying 529507 objects: 100%
	writing 529507 objects: 100%
	indexing 529507 objects: 100%
	248.07u 4.47s 257.59r 	 /sys/src/cmd/git/6.repack
2022-05-28 16:38:07 +00:00

445 lines
6.8 KiB
C

#include <u.h>
#include <libc.h>
#include <ctype.h>
#include "git.h"
Reprog *authorpat;
Hash Zhash;
int chattygit;
int interactive = 1;
enum {
Seed = 2928213749ULL
};
Object*
emptydir(void)
{
static Object *e;
if(e != nil)
return ref(e);
e = emalloc(sizeof(Object));
e->hash = Zhash;
e->type = GTree;
e->tree = emalloc(sizeof(Tinfo));
e->tree->ent = nil;
e->tree->nent = 0;
e->flag |= Cloaded|Cparsed;
e->off = -1;
ref(e);
cache(e);
return e;
}
int
hasheq(Hash *a, Hash *b)
{
return memcmp(a->h, b->h, sizeof(a->h)) == 0;
}
static int
charval(int c, int *err)
{
if(c >= '0' && c <= '9')
return c - '0';
if(c >= 'a' && c <= 'f')
return c - 'a' + 10;
if(c >= 'A' && c <= 'F')
return c - 'A' + 10;
*err = 1;
return -1;
}
void *
emalloc(ulong n)
{
void *v;
v = mallocz(n, 1);
if(v == nil)
sysfatal("malloc: %r");
setmalloctag(v, getcallerpc(&n));
return v;
}
void *
eamalloc(ulong n, ulong sz)
{
uvlong na;
void *v;
na = (uvlong)n*(uvlong)sz;
if(na >= (1ULL<<30))
sysfatal("alloc: overflow");
v = mallocz(na, 1);
if(v == nil)
sysfatal("malloc: %r");
setmalloctag(v, getcallerpc(&n));
return v;
}
void *
erealloc(void *p, ulong n)
{
void *v;
v = realloc(p, n);
if(v == nil)
sysfatal("realloc: %r");
setmalloctag(v, getcallerpc(&p));
return v;
}
void *
earealloc(void *p, ulong n, ulong sz)
{
uvlong na;
void *v;
na = (uvlong)n*(uvlong)sz;
if(na >= (1ULL<<30))
sysfatal("alloc: overflow");
v = realloc(p, na);
if(v == nil)
sysfatal("realloc: %r");
setmalloctag(v, getcallerpc(&p));
return v;
}
char*
estrdup(char *s)
{
s = strdup(s);
if(s == nil)
sysfatal("strdup: %r");
setmalloctag(s, getcallerpc(&s));
return s;
}
int
Hfmt(Fmt *fmt)
{
Hash h;
int i, n, l;
char c0, c1;
l = 0;
h = va_arg(fmt->args, Hash);
for(i = 0; i < sizeof h.h; i++){
n = (h.h[i] >> 4) & 0xf;
c0 = (n >= 10) ? n-10 + 'a' : n + '0';
n = h.h[i] & 0xf;
c1 = (n >= 10) ? n-10 + 'a' : n + '0';
l += fmtprint(fmt, "%c%c", c0, c1);
}
return l;
}
int
Tfmt(Fmt *fmt)
{
int t;
int l;
t = va_arg(fmt->args, int);
switch(t){
case GNone: l = fmtprint(fmt, "none"); break;
case GCommit: l = fmtprint(fmt, "commit"); break;
case GTree: l = fmtprint(fmt, "tree"); break;
case GBlob: l = fmtprint(fmt, "blob"); break;
case GTag: l = fmtprint(fmt, "tag"); break;
case GOdelta: l = fmtprint(fmt, "odelta"); break;
case GRdelta: l = fmtprint(fmt, "gdelta"); break;
default: l = fmtprint(fmt, "?%d?", t); break;
}
return l;
}
int
Ofmt(Fmt *fmt)
{
Object *o;
int l;
o = va_arg(fmt->args, Object *);
print("== %H (%T) ==\n", o->hash, o->type);
switch(o->type){
case GTree:
l = fmtprint(fmt, "tree\n");
break;
case GBlob:
l = fmtprint(fmt, "blob %s\n", o->data);
break;
case GCommit:
l = fmtprint(fmt, "commit\n");
break;
case GTag:
l = fmtprint(fmt, "tag\n");
break;
default:
l = fmtprint(fmt, "invalid: %d\n", o->type);
break;
}
return l;
}
int
Qfmt(Fmt *fmt)
{
Qid q;
q = va_arg(fmt->args, Qid);
return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}",
q.path, QDIR(&q), (q.path >> 8), q.vers, q.type);
}
void
gitinit(void)
{
fmtinstall('H', Hfmt);
fmtinstall('T', Tfmt);
fmtinstall('O', Ofmt);
fmtinstall('Q', Qfmt);
inflateinit();
deflateinit();
authorpat = regcomp("[\t ]*(.*)[\t ]+([0-9]+)[\t ]+([\\-+]?[0-9]+)");
osinit(&objcache);
}
int
hparse(Hash *h, char *b)
{
int i, err;
err = 0;
for(i = 0; i < sizeof(h->h); i++){
err = 0;
h->h[i] = 0;
h->h[i] |= ((charval(b[2*i], &err) & 0xf) << 4);
h->h[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0);
if(err){
werrstr("invalid hash");
return -1;
}
}
return 0;
}
int
slurpdir(char *p, Dir **d)
{
int r, f;
if((f = open(p, OREAD)) == -1)
return -1;
r = dirreadall(f, d);
close(f);
return r;
}
int
hassuffix(char *base, char *suf)
{
int nb, ns;
nb = strlen(base);
ns = strlen(suf);
if(ns <= nb && strcmp(base + (nb - ns), suf) == 0)
return 1;
return 0;
}
int
swapsuffix(char *dst, int dstsz, char *base, char *oldsuf, char *suf)
{
int bl, ol, sl, l;
bl = strlen(base);
ol = strlen(oldsuf);
sl = strlen(suf);
l = bl + sl - ol;
if(l + 1 > dstsz || ol > bl)
return -1;
memmove(dst, base, bl - ol);
memmove(dst + bl - ol, suf, sl);
dst[l] = 0;
return l;
}
char *
strip(char *s)
{
char *e;
while(isspace(*s))
s++;
e = s + strlen(s);
while(e > s && isspace(*--e))
*e = 0;
return s;
}
void
_dprint(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vfprint(2, fmt, ap);
va_end(ap);
}
/* Finds the directory containing the git repo. */
int
findrepo(char *buf, int nbuf)
{
char *p, *suff;
suff = "/.git/HEAD";
if(getwd(buf, nbuf - strlen(suff) - 1) == nil)
return -1;
for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){
strcpy(p, suff);
if(access(buf, AEXIST) == 0){
p[p == buf] = '\0';
return 0;
}
*p = '\0';
}
werrstr("not a git repository");
return -1;
}
int
showprogress(int x, int pct)
{
if(!interactive)
return 0;
if(x > pct){
pct = x;
fprint(2, "\b\b\b\b%3d%%", pct);
}
return pct;
}
void
qinit(Objq *q)
{
memset(q, 0, sizeof(Objq));
q->nheap = 0;
q->heapsz = 8;
q->heap = eamalloc(q->heapsz, sizeof(Qelt));
}
void
qclear(Objq *q)
{
free(q->heap);
}
void
qput(Objq *q, Object *o, int color)
{
Qelt t;
int i;
if(q->nheap == q->heapsz){
q->heapsz *= 2;
q->heap = earealloc(q->heap, q->heapsz, sizeof(Qelt));
}
q->heap[q->nheap].o = o;
q->heap[q->nheap].color = color;
q->heap[q->nheap].ctime = o->commit->ctime;
for(i = q->nheap; i > 0; i = (i-1)/2){
if(q->heap[i].ctime < q->heap[(i-1)/2].ctime)
break;
t = q->heap[i];
q->heap[i] = q->heap[(i-1)/2];
q->heap[(i-1)/2] = t;
}
q->nheap++;
}
int
qpop(Objq *q, Qelt *e)
{
int i, l, r, m;
Qelt t;
if(q->nheap == 0)
return 0;
*e = q->heap[0];
if(--q->nheap == 0)
return 1;
i = 0;
q->heap[0] = q->heap[q->nheap];
while(1){
m = i;
l = 2*i+1;
r = 2*i+2;
if(l < q->nheap && q->heap[m].ctime < q->heap[l].ctime)
m = l;
if(r < q->nheap && q->heap[m].ctime < q->heap[r].ctime)
m = r;
if(m == i)
break;
t = q->heap[m];
q->heap[m] = q->heap[i];
q->heap[i] = t;
i = m;
}
return 1;
}
u64int
murmurhash2(void *pp, usize n)
{
u32int m = 0x5bd1e995;
u32int r = 24;
u32int h, k;
u32int *w, *e;
uchar *p;
h = Seed ^ n;
e = pp;
e += (n / 4);
for (w = pp; w != e; w++) {
/*
* NB: this is endian dependent.
* This is fine for use in git, since the
* hashes computed here are only ever used
* for in memory data structures.
*
* Pack files will differ when packed on
* machines with different endianness,
* but the results will still be correct.
*/
k = *w;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
}
p = (uchar*)w;
switch (n & 0x3) {
case 3: h ^= p[2] << 16;
case 2: h ^= p[1] << 8;
case 1: h ^= p[0] << 0;
h *= m;
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}