git/query: fix spurious merge requests

Due to the way LCA is defined, a using a strict LCA
on a graph like this:

 <--a--b--c--d--e--f--g
     \               /
       +-----h-------

can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.

As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.

This replaces our LCA algorithm with one based on the
painting we do while finding a twixt, making it give
the resutls we want.
git/query: fix spurious merge requests

Due to the way LCA is defined, a using a strict LCA
on a graph like this:

 <--a--b--c--d--e--f--g
     \               /
       +-----h-------

can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.

As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.

This replaces our LCA algorithm with one based on the
painting we do while finding a twixt.
This commit is contained in:
Ori Bernstein 2021-09-11 17:46:26 +00:00
parent 546f8cfeca
commit c7dcc82b0b
5 changed files with 278 additions and 251 deletions

View file

@ -18,6 +18,8 @@ typedef struct Idxent Idxent;
typedef struct Objlist Objlist; typedef struct Objlist Objlist;
typedef struct Dtab Dtab; typedef struct Dtab Dtab;
typedef struct Dblock Dblock; typedef struct Dblock Dblock;
typedef struct Objq Objq;
typedef struct Qelt Qelt;
enum { enum {
Pathmax = 512, Pathmax = 512,
@ -151,6 +153,20 @@ struct Objset {
int sz; int sz;
}; };
struct Qelt {
Object *o;
vlong mtime;
int color;
int dist;
};
struct Objq {
Qelt *heap;
int nheap;
int heapsz;
int nkeep;
};
struct Dtab { struct Dtab {
Object *o; Object *o;
uchar *base; uchar *base;
@ -301,3 +317,9 @@ int gitconnect(Conn *, char *, char *);
int readphase(Conn *); int readphase(Conn *);
int writephase(Conn *); int writephase(Conn *);
void closeconn(Conn *); void closeconn(Conn *);
/* queues */
void qinit(Objq*);
void qclear(Objq*);
void qput(Objq*, Object*, int, int);
int qpop(Objq*, Qelt*);

View file

@ -15,10 +15,8 @@ char *queryexpr;
char *commitid; char *commitid;
int shortlog; int shortlog;
Object **heap;
int nheap;
int heapsz;
Objset done; Objset done;
Objq objq;
Pfilt *pathfilt; Pfilt *pathfilt;
void void
@ -191,65 +189,11 @@ showquery(char *q)
exits(nil); exits(nil);
} }
static void
qput(Object *o)
{
Object *p;
int i;
if(oshas(&done, o->hash))
return;
osadd(&done, o);
if(nheap == heapsz){
heapsz *= 2;
heap = earealloc(heap, heapsz, sizeof(Object*));
}
heap[nheap++] = o;
for(i = nheap - 1; i > 0; i = (i-1)/2){
o = heap[i];
p = heap[(i-1)/2];
if(o->commit->mtime < p->commit->mtime)
break;
heap[i] = p;
heap[(i-1)/2] = o;
}
}
static Object*
qpop(void)
{
Object *o, *t;
int i, l, r, m;
if(nheap == 0)
return nil;
i = 0;
o = heap[0];
t = heap[--nheap];
heap[0] = t;
while(1){
m = i;
l = 2*i+1;
r = 2*i+2;
if(l < nheap && heap[m]->commit->mtime < heap[l]->commit->mtime)
m = l;
if(r < nheap && heap[m]->commit->mtime < heap[r]->commit->mtime)
m = r;
else
break;
t = heap[m];
heap[m] = heap[i];
heap[i] = t;
i = m;
}
return o;
}
static void static void
showcommits(char *c) showcommits(char *c)
{ {
Object *o, *p; Object *o, *p;
Qelt e;
int i; int i;
Hash h; Hash h;
@ -259,18 +203,20 @@ showcommits(char *c)
sysfatal("resolve %s: %r", c); sysfatal("resolve %s: %r", c);
if((o = readobject(h)) == nil) if((o = readobject(h)) == nil)
sysfatal("load %H: %r", h); sysfatal("load %H: %r", h);
heapsz = 8; qinit(&objq);
heap = eamalloc(heapsz, sizeof(Object*));
osinit(&done); osinit(&done);
qput(o); qput(&objq, o, 0, 0);
while((o = qpop()) != nil){ while(qpop(&objq, &e)){
show(o); show(e.o);
for(i = 0; i < o->commit->nparent; i++){ for(i = 0; i < e.o->commit->nparent; i++){
if((p = readobject(o->commit->parent[i])) == nil) if(oshas(&done, e.o->commit->parent[i]))
continue;
if((p = readobject(e.o->commit->parent[i])) == nil)
sysfatal("load %H: %r", o->commit->parent[i]); sysfatal("load %H: %r", o->commit->parent[i]);
qput(p); osadd(&done, p);
qput(&objq, p, 0, 0);
} }
unref(o); unref(e.o);
} }
} }

View file

@ -158,6 +158,7 @@ main(int argc, char **argv)
char query[2048], repo[512]; char query[2048], repo[512];
ARGBEGIN{ ARGBEGIN{
case 'd': chattygit++; break;
case 'p': fullpath++; break; case 'p': fullpath++; break;
case 'c': changes++; break; case 'c': changes++; break;
case 'r': reverse ^= 1; break; case 'r': reverse ^= 1; break;

View file

@ -5,8 +5,20 @@
#include "git.h" #include "git.h"
typedef struct Eval Eval; typedef struct Eval Eval;
typedef struct XObject XObject; typedef struct Lcaq Lcaq;
typedef struct Objq Objq;
struct Lcaq {
Objq;
Hash *head;
Hash *tail;
int nhead;
int ntail;
Object *best;
int dist;
};
enum { enum {
Blank, Blank,
@ -22,17 +34,10 @@ struct Eval {
int stksz; int stksz;
}; };
struct XObject { static char *colors[] = {
Object *obj; [Keep] "keep",
Object *mark; [Drop] "drop",
XObject *queue; [Blank] "blank",
XObject *next;
};
struct Objq {
Objq *next;
Object *o;
int color;
}; };
static Object zcommit = { static Object zcommit = {
@ -128,150 +133,98 @@ take(Eval *ev, char *m)
return 1; return 1;
} }
XObject* static int
hnode(XObject *ht[], Object *o) pickbest(Lcaq *q, Qelt *e, int color)
{ {
XObject *h; int i, best, exact;
int hh;
hh = o->hash.h[0] & 0xff; best = 0;
for(h = ht[hh]; h; h = h->next) exact = 0;
if(hasheq(&o->hash, &h->obj->hash)) if(color == Blank || e->color == color)
return h; return 0;
if(e->dist < q->dist){
h = emalloc(sizeof(*h)); dprint(1, "found best (dist %d < %d): %H\n", e->dist, q->dist, e->o->hash);
h->obj = o; best = 1;
h->mark = nil; }
h->queue = nil; for(i = 0; i < q->nhead; i++)
h->next = ht[hh]; if(hasheq(&q->head[i], &e->o->hash)){
ht[hh] = h; dprint(1, "found best (exact head): %H\n", e->o->hash);
return h; best = 1;
exact = 1;
}
for(i = 0; i < q->ntail; i++)
if(hasheq(&q->tail[i], &e->o->hash)){
dprint(1, "found best (exact tail): %H\n", e->o->hash);
best = 1;
exact = 1;
}
if(best){
q->best = e->o;
q->dist = e->dist;
}
return exact;
} }
Object* static int
ancestor(Object *a, Object *b) repaint(Lcaq *lcaq, Objset *keep, Objset *drop, Object *o, int dist, int ancestor)
{ {
Object *o, *p, *r; Lcaq objq;
XObject *ht[256]; Qelt e;
XObject *h, *q, *q1, *q2; Object *p;
int i; int i;
if(a == b) qinit(&objq);
return a; if((o = readobject(o->hash)) == nil)
if(a == nil || b == nil) return -1;
return nil; qput(&objq, o, Drop, dist);
r = nil; while(qpop(&objq, &e)){
memset(ht, 0, sizeof(ht)); o = e.o;
q1 = nil; if(oshas(drop, o->hash))
continue;
h = hnode(ht, a); if(ancestor && pickbest(lcaq, &e, Keep))
h->mark = a; goto out;
h->queue = q1; if(!oshas(keep, o->hash)){
q1 = h; dprint(2, "repaint: blank => drop %H\n", o->hash);
osadd(drop, o);
h = hnode(ht, b); continue;
h->mark = b; }
h->queue = q1; for(i = 0; i < o->commit->nparent; i++){
q1 = h; if(oshas(drop, o->commit->parent[i]))
continue;
while(1){ if((p = readobject(o->commit->parent[i])) == nil)
q2 = nil; goto out;
while(q = q1){ if(p->type != GCommit){
q1 = q->queue; fprint(2, "hash %H not commit\n", p->hash);
q->queue = nil; unref(p);
o = q->obj;
for(i = 0; i < o->commit->nparent; i++){
p = readobject(o->commit->parent[i]);
if(p == nil)
goto err;
h = hnode(ht, p);
if(h->mark != nil){
if(h->mark != q->mark){
r = h->obj;
goto done;
}
} else {
h->mark = q->mark;
h->queue = q2;
q2 = h;
}
} }
qput(&objq, p, Drop, e.dist+1);
} }
if(q2 == nil){ unref(e.o);
err:
werrstr("no common ancestor");
break;
}
q1 = q2;
} }
done: out:
for(i=0; i<nelem(ht); i++){ qclear(&objq);
while(h = ht[i]){
ht[i] = h->next;
free(h);
}
}
return r;
}
int
lca(Eval *ev)
{
Object *a, *b, *o;
if(ev->nstk < 2){
werrstr("ancestor needs 2 objects");
return -1;
}
a = pop(ev);
b = pop(ev);
o = ancestor(a, b);
if(o == nil)
return -1;
push(ev, o);
return 0; return 0;
} }
static int static int
repaint(Objset *keep, Objset *drop, Object *o) paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, int ancestor)
{ {
Object *p; Qelt e;
int i; Lcaq objq;
if(!oshas(keep, o->hash) && !oshas(drop, o->hash)){
dprint(2, "repaint: blank => drop %H\n", o->hash);
osadd(drop, o);
return 0;
}
if(oshas(keep, o->hash))
dprint(2, "repaint: keep => drop %H\n", o->hash);
osadd(drop, o);
for(i = 0; i < o->commit->nparent; i++){
if((p = readobject(o->commit->parent[i])) == nil)
return -1;
if(repaint(keep, drop, p) == -1)
return -1;
unref(p);
}
return 0;
}
int
findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres)
{
Objq *q, *e, *n, **p;
Objset keep, drop; Objset keep, drop;
Object *o, *c; Object *o, *c;
int i, ncolor; int i, ncolor;
e = nil;
q = nil;
p = &q;
osinit(&keep); osinit(&keep);
osinit(&drop); osinit(&drop);
qinit(&objq);
objq.head = head;
objq.nhead = nhead;
objq.tail = tail;
objq.ntail = ntail;
objq.dist = 1<<30;
for(i = 0; i < nhead; i++){ for(i = 0; i < nhead; i++){
if(hasheq(&head[i], &Zhash))
continue;
if((o = readobject(head[i])) == nil){ if((o = readobject(head[i])) == nil){
fprint(2, "warning: %H does not point at commit\n", o->hash); fprint(2, "warning: %H does not point at commit\n", o->hash);
werrstr("read head %H: %r", head[i]); werrstr("read head %H: %r", head[i]);
@ -282,17 +235,11 @@ findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres
unref(o); unref(o);
continue; continue;
} }
dprint(1, "twixt init: keep %H\n", o->hash); dprint(1, "init: keep %H\n", o->hash);
e = emalloc(sizeof(Objq)); qput(&objq, o, Keep, 0);
e->o = o;
e->color = Keep;
*p = e;
p = &e->next;
unref(o); unref(o);
} }
for(i = 0; i < ntail; i++){ for(i = 0; i < ntail; i++){
if(hasheq(&tail[i], &Zhash))
continue;
if((o = readobject(tail[i])) == nil){ if((o = readobject(tail[i])) == nil){
werrstr("read tail %H: %r", tail[i]); werrstr("read tail %H: %r", tail[i]);
return -1; return -1;
@ -303,77 +250,117 @@ findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres
continue; continue;
} }
dprint(1, "init: drop %H\n", o->hash); dprint(1, "init: drop %H\n", o->hash);
e = emalloc(sizeof(Objq)); qput(&objq, o, Drop, 0);
e->o = o;
e->color = Drop;
*p = e;
p = &e->next;
unref(o); unref(o);
} }
dprint(1, "finding twixt commits\n"); dprint(1, "finding twixt commits\n");
while(q != nil){ while(qpop(&objq, &e)){
if(oshas(&drop, q->o->hash)) if(oshas(&drop, e.o->hash))
ncolor = Drop; ncolor = Drop;
else if(oshas(&keep, q->o->hash)) else if(oshas(&keep, e.o->hash))
ncolor = Keep; ncolor = Keep;
else else
ncolor = Blank; ncolor = Blank;
if(ncolor == Drop || ncolor == Keep && q->color == Keep) if(ancestor && pickbest(&objq, &e, ncolor))
goto next; goto exactlca;
if(ncolor == Keep && q->color == Drop){ if(ncolor == Keep && e.color == Keep || ncolor == Drop)
if(repaint(&keep, &drop, q->o) == -1) continue;
if(ncolor == Keep && e.color == Drop){
if(repaint(&objq, &keep, &drop, e.o, e.dist, ancestor) == -1)
goto error; goto error;
}else if (ncolor == Blank) { }else if (ncolor == Blank) {
dprint(2, "visit: %s %H\n", q->color == Keep ? "keep" : "drop", q->o->hash); if(e.color == Keep)
if(q->color == Keep) osadd(&keep, e.o);
osadd(&keep, q->o);
else else
osadd(&drop, q->o); osadd(&drop, e.o);
for(i = 0; i < q->o->commit->nparent; i++){ o = readobject(e.o->hash);
if((c = readobject(q->o->commit->parent[i])) == nil) for(i = 0; i < o->commit->nparent; i++){
if((c = readobject(e.o->commit->parent[i])) == nil)
goto error; goto error;
if(c->type != GCommit){ if(c->type != GCommit){
fprint(2, "warning: %H does not point at commit\n", c->hash); fprint(2, "warning: %H does not point at commit\n", c->hash);
unref(c); unref(c);
continue; continue;
} }
dprint(2, "enqueue: %s %H\n", q->color == Keep ? "keep" : "drop", c->hash); dprint(2, "\tenqueue: %s %H\n", colors[e.color], c->hash);
n = emalloc(sizeof(Objq)); qput(&objq, c, e.color, e.dist+1);
n->color = q->color;
n->next = nil;
n->o = c;
e->next = n;
e = n;
unref(c); unref(c);
} }
}else{ unref(o);
sysfatal("oops");
} }
next:
n = q->next;
free(q);
q = n;
} }
*res = eamalloc(keep.nobj, sizeof(Object*)); exactlca:
*nres = 0; if(ancestor){
for(i = 0; i < keep.sz; i++){ dprint(1, "found ancestor\n");
if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){ if(objq.best == nil){
(*res)[*nres] = keep.obj[i]; *nres = 0;
(*nres)++; *res = nil;
}else{
*nres = 1;
*res = eamalloc(1, sizeof(Object*));
(*res)[0] = objq.best;
}
}else{
dprint(1, "found twixt\n");
*res = eamalloc(keep.nobj, sizeof(Object*));
*nres = 0;
for(i = 0; i < keep.sz; i++){
if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){
(*res)[*nres] = keep.obj[i];
(*nres)++;
}
} }
} }
osclear(&keep); osclear(&keep);
osclear(&drop); osclear(&drop);
return 0; return 0;
error: error:
for(; q != nil; q = n) { dprint(1, "twixt error: %r\n");
n = q->next; free(objq.heap);
free(q);
}
return -1; return -1;
} }
int
findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres)
{
return paint(head, nhead, tail, ntail, res, nres, 0);
}
Object*
ancestor(Object *a, Object *b)
{
Object **o, *r;
int n;
if(paint(&a->hash, 1, &b->hash, 1, &o, &n, 1) == -1 || n == 0)
return nil;
r = o[0];
free(o);
return ref(r);
}
int
lca(Eval *ev)
{
Object *a, *b, **o;
int n;
if(ev->nstk < 2){
werrstr("ancestor needs 2 objects");
return -1;
}
n = 0;
b = pop(ev);
a = pop(ev);
paint(&a->hash, 1, &b->hash, 1, &o, &n, 1);
if(n == 0)
return -1;
push(ev, *o);
free(o);
return 0;
}
static int static int
parent(Eval *ev) parent(Eval *ev)
{ {

View file

@ -321,3 +321,74 @@ showprogress(int x, int pct)
} }
return pct; return pct;
} }
void
qinit(Objq *q)
{
memset(q, 0, sizeof(Objq));
q->nheap = 0;
q->heapsz = 8;
q->heap = eamalloc(q->heapsz, sizeof(Qelt));
}
void
qclear(Objq *q)
{
free(q->heap);
}
void
qput(Objq *q, Object *o, int color, int dist)
{
Qelt t;
int i;
if(q->nheap == q->heapsz){
q->heapsz *= 2;
q->heap = earealloc(q->heap, q->heapsz, sizeof(Qelt));
}
q->heap[q->nheap].o = o;
q->heap[q->nheap].color = color;
q->heap[q->nheap].dist = dist;
q->heap[q->nheap].mtime = o->commit->mtime;
for(i = q->nheap; i > 0; i = (i-1)/2){
if(q->heap[i].mtime < q->heap[(i-1)/2].mtime)
break;
t = q->heap[i];
q->heap[i] = q->heap[(i-1)/2];
q->heap[(i-1)/2] = t;
}
q->nheap++;
}
int
qpop(Objq *q, Qelt *e)
{
int i, l, r, m;
Qelt t;
if(q->nheap == 0)
return 0;
*e = q->heap[0];
if(--q->nheap == 0)
return 1;
i = 0;
q->heap[0] = q->heap[q->nheap];
while(1){
m = i;
l = 2*i+1;
r = 2*i+2;
if(l < q->nheap && q->heap[m].mtime < q->heap[l].mtime)
m = l;
if(r < q->nheap && q->heap[m].mtime < q->heap[r].mtime)
m = r;
if(m == i)
break;
t = q->heap[m];
q->heap[m] = q->heap[i];
q->heap[i] = t;
i = m;
}
return 1;
}