2021-05-17 01:49:45 +00:00
|
|
|
#include <bio.h>
|
|
|
|
#include <mp.h>
|
|
|
|
#include <libsec.h>
|
|
|
|
#include <flate.h>
|
|
|
|
#include <regexp.h>
|
|
|
|
|
2022-01-02 03:37:23 +00:00
|
|
|
typedef struct Capset Capset;
|
2021-05-17 01:49:45 +00:00
|
|
|
typedef struct Conn Conn;
|
|
|
|
typedef struct Hash Hash;
|
|
|
|
typedef struct Delta Delta;
|
|
|
|
typedef struct Cinfo Cinfo;
|
|
|
|
typedef struct Tinfo Tinfo;
|
|
|
|
typedef struct Object Object;
|
|
|
|
typedef struct Objset Objset;
|
|
|
|
typedef struct Pack Pack;
|
|
|
|
typedef struct Buf Buf;
|
|
|
|
typedef struct Dirent Dirent;
|
|
|
|
typedef struct Idxent Idxent;
|
|
|
|
typedef struct Objlist Objlist;
|
|
|
|
typedef struct Dtab Dtab;
|
|
|
|
typedef struct Dblock Dblock;
|
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt, making it give
the resutls we want.
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt.
2021-09-11 17:46:26 +00:00
|
|
|
typedef struct Objq Objq;
|
|
|
|
typedef struct Qelt Qelt;
|
2021-05-17 01:49:45 +00:00
|
|
|
|
|
|
|
enum {
|
|
|
|
Pathmax = 512,
|
|
|
|
Npackcache = 32,
|
|
|
|
Hashsz = 20,
|
|
|
|
Pktmax = 65536,
|
2022-01-02 03:37:23 +00:00
|
|
|
KiB = 1024,
|
|
|
|
MiB = 1024*KiB,
|
2021-05-17 01:49:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
GNone = 0,
|
|
|
|
GCommit = 1,
|
|
|
|
GTree = 2,
|
|
|
|
GBlob = 3,
|
|
|
|
GTag = 4,
|
|
|
|
GOdelta = 6,
|
|
|
|
GRdelta = 7,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
Cloaded = 1 << 0,
|
|
|
|
Cidx = 1 << 1,
|
|
|
|
Ccache = 1 << 2,
|
|
|
|
Cexist = 1 << 3,
|
|
|
|
Cparsed = 1 << 5,
|
|
|
|
Cthin = 1 << 6,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
ConnGit,
|
|
|
|
ConnGit9,
|
|
|
|
ConnSsh,
|
|
|
|
ConnHttp,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Objlist {
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
int fd;
|
|
|
|
int state;
|
|
|
|
int stage;
|
|
|
|
|
|
|
|
Dir *top;
|
|
|
|
int ntop;
|
|
|
|
int topidx;
|
|
|
|
Dir *loose;
|
|
|
|
int nloose;
|
|
|
|
int looseidx;
|
|
|
|
Dir *pack;
|
|
|
|
int npack;
|
|
|
|
int packidx;
|
|
|
|
int nent;
|
|
|
|
int entidx;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Hash {
|
|
|
|
uchar h[20];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Conn {
|
|
|
|
int type;
|
|
|
|
int rfd;
|
|
|
|
int wfd;
|
|
|
|
|
|
|
|
/* only used by http */
|
|
|
|
int cfd;
|
|
|
|
char *url; /* note, first GET uses a different url */
|
|
|
|
char *dir;
|
|
|
|
char *direction;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Dirent {
|
|
|
|
char *name;
|
|
|
|
int mode;
|
|
|
|
Hash h;
|
|
|
|
char ismod;
|
|
|
|
char islink;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Object {
|
|
|
|
/* Git data */
|
|
|
|
Hash hash;
|
|
|
|
int type;
|
|
|
|
|
|
|
|
/* Cache */
|
|
|
|
int id;
|
|
|
|
int flag;
|
|
|
|
int refs;
|
|
|
|
Object *next;
|
|
|
|
Object *prev;
|
|
|
|
|
|
|
|
/* For indexing */
|
|
|
|
vlong off;
|
|
|
|
vlong len;
|
|
|
|
u32int crc;
|
|
|
|
|
|
|
|
/* Everything below here gets cleared */
|
|
|
|
char *all;
|
|
|
|
char *data;
|
|
|
|
/* size excludes header */
|
|
|
|
vlong size;
|
|
|
|
|
|
|
|
/* Significant win on memory use */
|
|
|
|
union {
|
|
|
|
Cinfo *commit;
|
|
|
|
Tinfo *tree;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Tinfo {
|
|
|
|
/* Tree */
|
|
|
|
Dirent *ent;
|
|
|
|
int nent;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Cinfo {
|
|
|
|
/* Commit */
|
|
|
|
Hash *parent;
|
|
|
|
int nparent;
|
|
|
|
Hash tree;
|
|
|
|
char *author;
|
|
|
|
char *committer;
|
|
|
|
char *msg;
|
|
|
|
int nmsg;
|
|
|
|
vlong ctime;
|
|
|
|
vlong mtime;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Objset {
|
|
|
|
Object **obj;
|
|
|
|
int nobj;
|
|
|
|
int sz;
|
|
|
|
};
|
|
|
|
|
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt, making it give
the resutls we want.
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt.
2021-09-11 17:46:26 +00:00
|
|
|
struct Qelt {
|
|
|
|
Object *o;
|
|
|
|
vlong mtime;
|
|
|
|
int color;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Objq {
|
|
|
|
Qelt *heap;
|
|
|
|
int nheap;
|
|
|
|
int heapsz;
|
|
|
|
};
|
|
|
|
|
2021-05-17 01:49:45 +00:00
|
|
|
struct Dtab {
|
|
|
|
Object *o;
|
|
|
|
uchar *base;
|
|
|
|
int nbase;
|
|
|
|
Dblock *b;
|
|
|
|
int nb;
|
|
|
|
int sz;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Dblock {
|
|
|
|
uchar *buf;
|
|
|
|
int len;
|
|
|
|
int off;
|
|
|
|
u64int hash;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Delta {
|
|
|
|
int cpy;
|
|
|
|
int off;
|
|
|
|
int len;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
#define GETBE16(b)\
|
|
|
|
((((b)[0] & 0xFFul) << 8) | \
|
|
|
|
(((b)[1] & 0xFFul) << 0))
|
|
|
|
|
|
|
|
#define GETBE32(b)\
|
|
|
|
((((b)[0] & 0xFFul) << 24) | \
|
|
|
|
(((b)[1] & 0xFFul) << 16) | \
|
|
|
|
(((b)[2] & 0xFFul) << 8) | \
|
|
|
|
(((b)[3] & 0xFFul) << 0))
|
|
|
|
#define GETBE64(b)\
|
|
|
|
((((b)[0] & 0xFFull) << 56) | \
|
|
|
|
(((b)[1] & 0xFFull) << 48) | \
|
|
|
|
(((b)[2] & 0xFFull) << 40) | \
|
|
|
|
(((b)[3] & 0xFFull) << 32) | \
|
|
|
|
(((b)[4] & 0xFFull) << 24) | \
|
|
|
|
(((b)[5] & 0xFFull) << 16) | \
|
|
|
|
(((b)[6] & 0xFFull) << 8) | \
|
|
|
|
(((b)[7] & 0xFFull) << 0))
|
|
|
|
|
|
|
|
#define PUTBE16(b, n)\
|
|
|
|
do{ \
|
|
|
|
(b)[0] = (n) >> 8; \
|
|
|
|
(b)[1] = (n) >> 0; \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define PUTBE32(b, n)\
|
|
|
|
do{ \
|
|
|
|
(b)[0] = (n) >> 24; \
|
|
|
|
(b)[1] = (n) >> 16; \
|
|
|
|
(b)[2] = (n) >> 8; \
|
|
|
|
(b)[3] = (n) >> 0; \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define PUTBE64(b, n)\
|
|
|
|
do{ \
|
|
|
|
(b)[0] = (n) >> 56; \
|
|
|
|
(b)[1] = (n) >> 48; \
|
|
|
|
(b)[2] = (n) >> 40; \
|
|
|
|
(b)[3] = (n) >> 32; \
|
|
|
|
(b)[4] = (n) >> 24; \
|
|
|
|
(b)[5] = (n) >> 16; \
|
|
|
|
(b)[6] = (n) >> 8; \
|
|
|
|
(b)[7] = (n) >> 0; \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define QDIR(qid) ((int)(qid)->path & (0xff))
|
|
|
|
#define isblank(c) \
|
|
|
|
(((c) != '\n') && isspace(c))
|
|
|
|
|
|
|
|
extern Reprog *authorpat;
|
|
|
|
extern Objset objcache;
|
2022-01-02 03:37:23 +00:00
|
|
|
extern vlong cachemax;
|
2021-05-17 01:49:45 +00:00
|
|
|
extern Hash Zhash;
|
|
|
|
extern int chattygit;
|
|
|
|
extern int interactive;
|
|
|
|
|
|
|
|
#pragma varargck type "H" Hash
|
|
|
|
#pragma varargck type "T" int
|
|
|
|
#pragma varargck type "O" Object*
|
|
|
|
#pragma varargck type "Q" Qid
|
|
|
|
int Hfmt(Fmt*);
|
|
|
|
int Tfmt(Fmt*);
|
|
|
|
int Ofmt(Fmt*);
|
|
|
|
int Qfmt(Fmt*);
|
|
|
|
|
|
|
|
void gitinit(void);
|
|
|
|
|
|
|
|
/* object io */
|
|
|
|
int resolverefs(Hash **, char *);
|
|
|
|
int resolveref(Hash *, char *);
|
|
|
|
int listrefs(Hash **, char ***);
|
|
|
|
Object *ancestor(Object *, Object *);
|
|
|
|
int findtwixt(Hash *, int, Hash *, int, Object ***, int *);
|
|
|
|
Object *readobject(Hash);
|
|
|
|
Object *clearedobject(Hash, int);
|
|
|
|
void parseobject(Object *);
|
|
|
|
int indexpack(char *, char *, Hash);
|
|
|
|
int writepack(int, Hash*, int, Hash*, int, Hash*);
|
|
|
|
int hasheq(Hash *, Hash *);
|
|
|
|
Object *ref(Object *);
|
|
|
|
void unref(Object *);
|
|
|
|
void cache(Object *);
|
|
|
|
Object *emptydir(void);
|
|
|
|
|
|
|
|
/* object sets */
|
|
|
|
void osinit(Objset *);
|
|
|
|
void osclear(Objset *);
|
|
|
|
void osadd(Objset *, Object *);
|
|
|
|
int oshas(Objset *, Hash);
|
|
|
|
Object *osfind(Objset *, Hash);
|
|
|
|
|
|
|
|
/* object listing */
|
|
|
|
Objlist *mkols(void);
|
|
|
|
int olsnext(Objlist *, Hash *);
|
|
|
|
void olsfree(Objlist *);
|
|
|
|
|
|
|
|
/* util functions */
|
|
|
|
#define dprint(lvl, ...) \
|
|
|
|
if(chattygit >= lvl) _dprint(__VA_ARGS__)
|
|
|
|
void _dprint(char *, ...);
|
|
|
|
void *eamalloc(ulong, ulong);
|
|
|
|
void *emalloc(ulong);
|
|
|
|
void *earealloc(void *, ulong, ulong);
|
|
|
|
void *erealloc(void *, ulong);
|
|
|
|
char *estrdup(char *);
|
|
|
|
int slurpdir(char *, Dir **);
|
|
|
|
int hparse(Hash *, char *);
|
|
|
|
int hassuffix(char *, char *);
|
|
|
|
int swapsuffix(char *, int, char *, char *, char *);
|
|
|
|
char *strip(char *);
|
|
|
|
int findrepo(char *, int);
|
|
|
|
int showprogress(int, int);
|
|
|
|
|
|
|
|
/* packing */
|
|
|
|
void dtinit(Dtab *, Object*);
|
|
|
|
void dtclear(Dtab*);
|
|
|
|
Delta* deltify(Object*, Dtab*, int*);
|
|
|
|
|
|
|
|
/* proto handling */
|
|
|
|
int readpkt(Conn*, char*, int);
|
|
|
|
int writepkt(Conn*, char*, int);
|
|
|
|
int flushpkt(Conn*);
|
|
|
|
void initconn(Conn*, int, int);
|
|
|
|
int gitconnect(Conn *, char *, char *);
|
|
|
|
int readphase(Conn *);
|
|
|
|
int writephase(Conn *);
|
|
|
|
void closeconn(Conn *);
|
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt, making it give
the resutls we want.
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt.
2021-09-11 17:46:26 +00:00
|
|
|
|
|
|
|
/* queues */
|
|
|
|
void qinit(Objq*);
|
|
|
|
void qclear(Objq*);
|
git/query: refactor graph painting algorithm (findtwixt, lca)
We now keep track of 3 sets during traversal:
- keep: commits we've reached from head commits
- drop: commits we've reached from tail commits
- skip: ancestors of commits in both 'keep' and 'drop'
Commits in 'keep' and/or 'drop' may be added later to the 'skip' set
if we discover later that they are part of a common subgraph of the
head and tail commits.
From these sets we can calculate the commits we are interested in:
lca commits are those in 'keep' and 'drop', but not in 'skip'.
findtwixt commits are those in 'keep', but not in 'drop' or 'skip'.
The "LCA" commit returned is a common ancestor such that there are no
other common ancestors that can reach that commit. Although there can
be multiple commits that meet this criteria, where one is technically
lower on the commit-graph than the other, these cases only happen in
complex merge arrangements and any choice is likely a decent merge
base.
Repainting is now done in paint() directly. When we find a boundary
commit, we switch our paint color to 'skip'. 'skip' painting does
not stop when it hits another color; we continue until we are left
with only 'skip' commits on the queue.
This fixes several mishandled cases in the current algorithm:
1. If we hit the common subgraph from tail commits first (if the tail
commit was newer than the head commit), we ended up traversing the
entire commit graph. This is because we couldn't distinguish
between 'drop' commits that were part of the common subgraph, and
those that were still looking for it.
2. If we traversed through an initial part of the common subgraph from
head commits before reaching it from tail commits, these commits
were returned from findtwixt even though they were also reachable
from tail commits.
3. In the same case as 2, we might end up choosing an incorrect
commit as the LCA, which is an ancestor of the real LCA.
2022-03-16 21:41:59 +00:00
|
|
|
void qput(Objq*, Object*, int);
|
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt, making it give
the resutls we want.
git/query: fix spurious merge requests
Due to the way LCA is defined, a using a strict LCA
on a graph like this:
<--a--b--c--d--e--f--g
\ /
+-----h-------
can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.
As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.
This replaces our LCA algorithm with one based on the
painting we do while finding a twixt.
2021-09-11 17:46:26 +00:00
|
|
|
int qpop(Objq*, Qelt*);
|