git/query: refactor graph painting algorithm (findtwixt, lca)

We now keep track of 3 sets during traversal:
- keep: commits we've reached from head commits
- drop: commits we've reached from tail commits
- skip: ancestors of commits in both 'keep' and 'drop'

Commits in 'keep' and/or 'drop' may be added later to the 'skip' set
if we discover later that they are part of a common subgraph of the
head and tail commits.

From these sets we can calculate the commits we are interested in:
lca commits are those in 'keep' and 'drop', but not in 'skip'.
findtwixt commits are those in 'keep', but not in 'drop' or 'skip'.

The "LCA" commit returned is a common ancestor such that there are no
other common ancestors that can reach that commit.  Although there can
be multiple commits that meet this criteria, where one is technically
lower on the commit-graph than the other, these cases only happen in
complex merge arrangements and any choice is likely a decent merge
base.

Repainting is now done in paint() directly.  When we find a boundary
commit, we switch our paint color to 'skip'.  'skip' painting does
not stop when it hits another color; we continue until we are left
with only 'skip' commits on the queue.

This fixes several mishandled cases in the current algorithm:
1. If we hit the common subgraph from tail commits first (if the tail
   commit was newer than the head commit), we ended up traversing the
   entire commit graph.  This is because we couldn't distinguish
   between 'drop' commits that were part of the common subgraph, and
   those that were still looking for it.
2. If we traversed through an initial part of the common subgraph from
   head commits before reaching it from tail commits, these commits
   were returned from findtwixt even though they were also reachable
   from tail commits.
3. In the same case as 2, we might end up choosing an incorrect
   commit as the LCA, which is an ancestor of the real LCA.
This commit is contained in:
Michael Forney 2022-03-16 21:41:59 +00:00
parent 873f381e29
commit 2e47badb88
4 changed files with 63 additions and 142 deletions

View file

@ -160,14 +160,12 @@ struct Qelt {
Object *o; Object *o;
vlong mtime; vlong mtime;
int color; int color;
int dist;
}; };
struct Objq { struct Objq {
Qelt *heap; Qelt *heap;
int nheap; int nheap;
int heapsz; int heapsz;
int nkeep;
}; };
struct Dtab { struct Dtab {
@ -324,5 +322,5 @@ void closeconn(Conn *);
/* queues */ /* queues */
void qinit(Objq*); void qinit(Objq*);
void qclear(Objq*); void qclear(Objq*);
void qput(Objq*, Object*, int, int); void qput(Objq*, Object*, int);
int qpop(Objq*, Qelt*); int qpop(Objq*, Qelt*);

View file

@ -205,7 +205,7 @@ showcommits(char *c)
sysfatal("load %H: %r", h); sysfatal("load %H: %r", h);
qinit(&objq); qinit(&objq);
osinit(&done); osinit(&done);
qput(&objq, o, 0, 0); qput(&objq, o, 0);
while(qpop(&objq, &e)){ while(qpop(&objq, &e)){
show(e.o); show(e.o);
for(i = 0; i < e.o->commit->nparent; i++){ for(i = 0; i < e.o->commit->nparent; i++){
@ -214,7 +214,7 @@ showcommits(char *c)
if((p = readobject(e.o->commit->parent[i])) == nil) if((p = readobject(e.o->commit->parent[i])) == nil)
sysfatal("load %H: %r", o->commit->parent[i]); sysfatal("load %H: %r", o->commit->parent[i]);
osadd(&done, p); osadd(&done, p);
qput(&objq, p, 0, 0); qput(&objq, p, 0);
} }
unref(e.o); unref(e.o);
} }

View file

@ -5,25 +5,12 @@
#include "git.h" #include "git.h"
typedef struct Eval Eval; typedef struct Eval Eval;
typedef struct Lcaq Lcaq;
struct Lcaq {
Objq;
Hash *head;
Hash *tail;
int nhead;
int ntail;
Object *best;
int dist;
};
enum { enum {
Blank, Blank,
Keep, Keep,
Drop, Drop,
Skip,
}; };
struct Eval { struct Eval {
@ -38,6 +25,7 @@ static char *colors[] = {
[Keep] "keep", [Keep] "keep",
[Drop] "drop", [Drop] "drop",
[Blank] "blank", [Blank] "blank",
[Skip] "skip",
}; };
static Object zcommit = { static Object zcommit = {
@ -113,97 +101,20 @@ take(Eval *ev, char *m)
return 1; return 1;
} }
static int
pickbest(Lcaq *q, Qelt *e, int color)
{
int i, best, exact;
best = 0;
exact = 0;
if(color == Blank || e->color == color)
return 0;
if(e->dist < q->dist){
dprint(1, "found best (dist %d < %d): %H\n", e->dist, q->dist, e->o->hash);
best = 1;
}
for(i = 0; i < q->nhead; i++)
if(hasheq(&q->head[i], &e->o->hash)){
dprint(1, "found best (exact head): %H\n", e->o->hash);
best = 1;
exact = 1;
}
for(i = 0; i < q->ntail; i++)
if(hasheq(&q->tail[i], &e->o->hash)){
dprint(1, "found best (exact tail): %H\n", e->o->hash);
best = 1;
exact = 1;
}
if(best){
q->best = e->o;
q->dist = e->dist;
}
return exact;
}
static int
repaint(Lcaq *lcaq, Objset *keep, Objset *drop, Object *o, int dist, int ancestor)
{
Lcaq objq;
Qelt e;
Object *p;
int i;
qinit(&objq);
if((o = readobject(o->hash)) == nil)
return -1;
qput(&objq, o, Drop, dist);
while(qpop(&objq, &e)){
o = e.o;
if(oshas(drop, o->hash))
continue;
if(ancestor && pickbest(lcaq, &e, Keep))
goto out;
if(!oshas(keep, o->hash)){
dprint(2, "repaint: blank => drop %H\n", o->hash);
osadd(drop, o);
continue;
}
for(i = 0; i < o->commit->nparent; i++){
if(oshas(drop, o->commit->parent[i]))
continue;
if((p = readobject(o->commit->parent[i])) == nil)
goto out;
if(p->type != GCommit){
fprint(2, "hash %H not commit\n", p->hash);
unref(p);
}
qput(&objq, p, Drop, e.dist+1);
}
unref(e.o);
}
out:
qclear(&objq);
return 0;
}
static int static int
paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, int ancestor) paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, int ancestor)
{ {
Qelt e; Qelt e;
Lcaq objq; Objq objq;
Objset keep, drop; Objset keep, drop, skip;
Object *o, *c; Object *o, *c;
int i, ncolor; int i, nskip;
osinit(&keep); osinit(&keep);
osinit(&drop); osinit(&drop);
memset(&objq, 0, sizeof(objq)); osinit(&skip);
qinit(&objq); qinit(&objq);
objq.head = head; nskip = 0;
objq.nhead = nhead;
objq.tail = tail;
objq.ntail = ntail;
objq.dist = 1<<30;
for(i = 0; i < nhead; i++){ for(i = 0; i < nhead; i++){
if((o = readobject(head[i])) == nil){ if((o = readobject(head[i])) == nil){
@ -217,7 +128,7 @@ paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, in
continue; continue;
} }
dprint(1, "init: keep %H\n", o->hash); dprint(1, "init: keep %H\n", o->hash);
qput(&objq, o, Keep, 0); qput(&objq, o, Keep);
unref(o); unref(o);
} }
for(i = 0; i < ntail; i++){ for(i = 0; i < ntail; i++){
@ -231,70 +142,83 @@ paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, in
continue; continue;
} }
dprint(1, "init: drop %H\n", o->hash); dprint(1, "init: drop %H\n", o->hash);
qput(&objq, o, Drop, 0); qput(&objq, o, Drop);
unref(o); unref(o);
} }
dprint(1, "finding twixt commits\n"); dprint(1, "finding twixt commits\n");
while(qpop(&objq, &e)){ while(nskip != objq.nheap && qpop(&objq, &e)){
if(oshas(&drop, e.o->hash)) if(e.color == Skip)
ncolor = Drop; nskip--;
else if(oshas(&keep, e.o->hash)) if(oshas(&skip, e.o->hash))
ncolor = Keep;
else
ncolor = Blank;
if(ancestor && pickbest(&objq, &e, ncolor))
goto exactlca;
if(ncolor == Keep && e.color == Keep || ncolor == Drop)
continue; continue;
if(ncolor == Keep && e.color == Drop){ switch(e.color){
if(repaint(&objq, &keep, &drop, e.o, e.dist, ancestor) == -1) case Keep:
goto error; if(oshas(&keep, e.o->hash))
}else if (ncolor == Blank) { continue;
if(e.color == Keep) if(oshas(&drop, e.o->hash))
osadd(&keep, e.o); e.color = Skip;
else osadd(&keep, e.o);
osadd(&drop, e.o); break;
o = readobject(e.o->hash); case Drop:
for(i = 0; i < o->commit->nparent; i++){ if(oshas(&drop, e.o->hash))
if((c = readobject(e.o->commit->parent[i])) == nil) continue;
goto error; if(oshas(&keep, e.o->hash))
if(c->type != GCommit){ e.color = Skip;
fprint(2, "warning: %H does not point at commit\n", c->hash); osadd(&drop, e.o);
unref(c); break;
continue; case Skip:
} osadd(&skip, e.o);
dprint(2, "\tenqueue: %s %H\n", colors[e.color], c->hash); break;
qput(&objq, c, e.color, e.dist+1);
unref(c);
}
unref(o);
} }
o = readobject(e.o->hash);
for(i = 0; i < o->commit->nparent; i++){
if((c = readobject(e.o->commit->parent[i])) == nil)
goto error;
if(c->type != GCommit){
fprint(2, "warning: %H does not point at commit\n", c->hash);
unref(c);
continue;
}
dprint(2, "\tenqueue: %s %H\n", colors[e.color], c->hash);
qput(&objq, c, e.color);
unref(c);
if(e.color == Skip)
nskip++;
}
unref(o);
} }
exactlca:
if(ancestor){ if(ancestor){
dprint(1, "found ancestor\n"); dprint(1, "found ancestor\n");
if(objq.best == nil){ o = nil;
for(i = 0; i < keep.sz; i++){
o = keep.obj[i];
if(o != nil && oshas(&drop, o->hash) && !oshas(&skip, o->hash))
break;
}
if(i == keep.sz){
*nres = 0; *nres = 0;
*res = nil; *res = nil;
}else{ }else{
*nres = 1; *nres = 1;
*res = eamalloc(1, sizeof(Object*)); *res = eamalloc(1, sizeof(Object*));
(*res)[0] = objq.best; (*res)[0] = o;
} }
}else{ }else{
dprint(1, "found twixt\n"); dprint(1, "found twixt\n");
*res = eamalloc(keep.nobj, sizeof(Object*)); *res = eamalloc(keep.nobj, sizeof(Object*));
*nres = 0; *nres = 0;
for(i = 0; i < keep.sz; i++){ for(i = 0; i < keep.sz; i++){
if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){ o = keep.obj[i];
(*res)[*nres] = keep.obj[i]; if(o != nil && !oshas(&drop, o->hash) && !oshas(&skip, o->hash)){
(*res)[*nres] = o;
(*nres)++; (*nres)++;
} }
} }
} }
osclear(&keep); osclear(&keep);
osclear(&drop); osclear(&drop);
osclear(&skip);
return 0; return 0;
error: error:
dprint(1, "twixt error: %r\n"); dprint(1, "twixt error: %r\n");

View file

@ -338,7 +338,7 @@ qclear(Objq *q)
} }
void void
qput(Objq *q, Object *o, int color, int dist) qput(Objq *q, Object *o, int color)
{ {
Qelt t; Qelt t;
int i; int i;
@ -349,7 +349,6 @@ qput(Objq *q, Object *o, int color, int dist)
} }
q->heap[q->nheap].o = o; q->heap[q->nheap].o = o;
q->heap[q->nheap].color = color; q->heap[q->nheap].color = color;
q->heap[q->nheap].dist = dist;
q->heap[q->nheap].mtime = o->commit->mtime; q->heap[q->nheap].mtime = o->commit->mtime;
for(i = q->nheap; i > 0; i = (i-1)/2){ for(i = q->nheap; i > 0; i = (i-1)/2){
if(q->heap[i].mtime < q->heap[(i-1)/2].mtime) if(q->heap[i].mtime < q->heap[(i-1)/2].mtime)