plan9fox/sys/src/cmd/git/git.h

#include <bio.h>
#include <mp.h>
#include <libsec.h>
#include <flate.h>
#include <regexp.h>

typedef struct Capset	Capset;
typedef struct Conn	Conn;
typedef struct Hash	Hash;
typedef struct Delta	Delta;
typedef struct Cinfo	Cinfo;
typedef struct Tinfo	Tinfo;
typedef struct Object	Object;
typedef struct Objset	Objset;
typedef struct Pack	Pack;
typedef struct Buf	Buf;
typedef struct Dirent	Dirent;
typedef struct Idxent	Idxent;
typedef struct Objlist	Objlist;
typedef struct Dtab	Dtab;
typedef struct Dblock	Dblock;
typedef struct Objq	Objq;
typedef struct Qelt	Qelt;

enum {
	Pathmax		= 512,
	Npackcache	= 32,
	Hashsz		= 20,
	Pktmax		= 65536,
	KiB		= 1024,
	MiB		= 1024*KiB,
};

enum {
	GNone	= 0,
	GCommit	= 1,
	GTree	= 2,
	GBlob	= 3,
	GTag	= 4,
	GOdelta	= 6,
	GRdelta	= 7,
};

enum {
	Cloaded	= 1 << 0,
	Cidx	= 1 << 1,
	Ccache	= 1 << 2,
	Cexist	= 1 << 3,
	Cparsed	= 1 << 5,
	Cthin	= 1 << 6,
};

enum {
	ConnGit,
	ConnGit9,
	ConnSsh,
	ConnHttp,
};

struct Objlist {
	int idx;

	int fd;
	int state;
	int stage;

	Dir *top;
	int ntop;
	int topidx;
	Dir *loose;
	int nloose;
	int looseidx;
	Dir *pack;
	int npack;
	int packidx;
	int nent;
	int entidx;
};

struct Hash {
	uchar h[20];
};

struct Conn {
	int type;
	int rfd;
	int wfd;

	/* only used by http */
	int cfd;
	char *url;	/* note, first GET uses a different url */
	char *dir;
	char *direction;
};

struct Dirent {
	char *name;
	int mode;
	Hash h;
	char ismod;
	char islink;
};

struct Object {
	/* Git data */
	Hash	hash;
	int	type;

	/* Cache */
	int	id;
	int	flag;
	int	refs;
	Object	*next;
	Object	*prev;

	/* For indexing */
	vlong	off;
	vlong	len;
	u32int	crc;

	/* Everything below here gets cleared */
	char	*all;
	char	*data;
	/* size excludes header */
	vlong	size;

	/* Significant win on memory use */
	union {
		Cinfo	*commit;
		Tinfo	*tree;
	};
};

struct Tinfo {
	/* Tree */
	Dirent	*ent;
	int	nent;
};

struct Cinfo {
	/* Commit */
	Hash	*parent;
	int	nparent;
	Hash	tree;
	char	*author;
	char	*committer;
	char	*msg;
	int	nmsg;
	vlong	ctime;
	vlong	mtime;
};

struct Objset {
	Object	**obj;
	int	nobj;
	int	sz;
};

struct Qelt {
	Object	*o;
	vlong	mtime;
	int	color;
};

struct Objq {
	Qelt	*heap;
	int	nheap;
	int	heapsz;
};

struct Dtab {
	Object	*o;
	uchar	*base;
	int	nbase;
	Dblock	*b;
	int	nb;
	int	sz;
};

struct Dblock {
	uchar	*buf;
	int	len;
	int	off;
	u64int	hash;
};

struct Delta {
	int	cpy;
	int	off;
	int	len;
};


#define GETBE16(b)\
		((((b)[0] & 0xFFul) <<  8) | \
		 (((b)[1] & 0xFFul) <<  0))

#define GETBE32(b)\
		((((b)[0] & 0xFFul) << 24) | \
		 (((b)[1] & 0xFFul) << 16) | \
		 (((b)[2] & 0xFFul) <<  8) | \
		 (((b)[3] & 0xFFul) <<  0))
#define GETBE64(b)\
		((((b)[0] & 0xFFull) << 56) | \
		 (((b)[1] & 0xFFull) << 48) | \
		 (((b)[2] & 0xFFull) << 40) | \
		 (((b)[3] & 0xFFull) << 32) | \
		 (((b)[4] & 0xFFull) << 24) | \
		 (((b)[5] & 0xFFull) << 16) | \
		 (((b)[6] & 0xFFull) <<  8) | \
		 (((b)[7] & 0xFFull) <<  0))

#define PUTBE16(b, n)\
	do{ \
		(b)[0] = (n) >> 8; \
		(b)[1] = (n) >> 0; \
	} while(0)

#define PUTBE32(b, n)\
	do{ \
		(b)[0] = (n) >> 24; \
		(b)[1] = (n) >> 16; \
		(b)[2] = (n) >> 8; \
		(b)[3] = (n) >> 0; \
	} while(0)

#define PUTBE64(b, n)\
	do{ \
		(b)[0] = (n) >> 56; \
		(b)[1] = (n) >> 48; \
		(b)[2] = (n) >> 40; \
		(b)[3] = (n) >> 32; \
		(b)[4] = (n) >> 24; \
		(b)[5] = (n) >> 16; \
		(b)[6] = (n) >> 8; \
		(b)[7] = (n) >> 0; \
	} while(0)

#define QDIR(qid)	((int)(qid)->path & (0xff))
#define isblank(c) \
	(((c) != '\n') && isspace(c))

extern Reprog	*authorpat;
extern Objset	objcache;
extern vlong	cachemax;
extern Hash	Zhash;
extern int	chattygit;
extern int	interactive;

#pragma varargck type "H" Hash
#pragma varargck type "T" int
#pragma varargck type "O" Object*
#pragma varargck type "Q" Qid
int Hfmt(Fmt*);
int Tfmt(Fmt*);
int Ofmt(Fmt*);
int Qfmt(Fmt*);

void gitinit(void);

/* object io */
int	resolverefs(Hash **, char *);
int	resolveref(Hash *, char *);
int	listrefs(Hash **, char ***);
Object	*ancestor(Object *, Object *);
int	findtwixt(Hash *, int, Hash *, int, Object ***, int *);
Object	*readobject(Hash);
Object	*clearedobject(Hash, int);
void	parseobject(Object *);
int	indexpack(char *, char *, Hash);
int	writepack(int, Hash*, int, Hash*, int, Hash*);
int	hasheq(Hash *, Hash *);
Object	*ref(Object *);
void	unref(Object *);
void	cache(Object *);
Object	*emptydir(void);

/* object sets */
void	osinit(Objset *);
void	osclear(Objset *);
void	osadd(Objset *, Object *);
int	oshas(Objset *, Hash);
Object	*osfind(Objset *, Hash);

/* object listing */
Objlist	*mkols(void);
int	olsnext(Objlist *, Hash *);
void	olsfree(Objlist *);

/* util functions */
#define dprint(lvl, ...) \
	if(chattygit >= lvl) _dprint(__VA_ARGS__)
void	_dprint(char *, ...);
void	*eamalloc(ulong, ulong);
void	*emalloc(ulong);
void	*earealloc(void *, ulong, ulong);
void	*erealloc(void *, ulong);
char	*estrdup(char *);
int	slurpdir(char *, Dir **);
int	hparse(Hash *, char *);
int	hassuffix(char *, char *);
int	swapsuffix(char *, int, char *, char *, char *);
char	*strip(char *);
int	findrepo(char *, int);
int	showprogress(int, int);

/* packing */
void	dtinit(Dtab *, Object*);
void	dtclear(Dtab*);
Delta*	deltify(Object*, Dtab*, int*);

/* proto handling */
int	readpkt(Conn*, char*, int);
int	writepkt(Conn*, char*, int);
int	flushpkt(Conn*);
void	initconn(Conn*, int, int);
int	gitconnect(Conn *, char *, char *);
int	readphase(Conn *);
int	writephase(Conn *);
void	closeconn(Conn *);

/* queues */
void	qinit(Objq*);
void	qclear(Objq*);
void	qput(Objq*, Object*, int);
int	qpop(Objq*, Qelt*);
git: got git? Add a snapshot of git9 to 9front. 2021-05-17 01:49:45 +00:00			`#include <bio.h>`
			`#include <mp.h>`
			`#include <libsec.h>`
			`#include <flate.h>`
			`#include <regexp.h>`

git: size cache in bytes, not objects git used to track cache size in object count, rather than bytes. This had the unfortunate effect of making memory use depend on the size of objects -- repos with lots of large objects could cause out of memory deaths. now, we track sizes in bytes, which should keep our memory usage flatter. 2022-01-02 03:37:23 +00:00			`typedef struct Capset Capset;`
git: got git? Add a snapshot of git9 to 9front. 2021-05-17 01:49:45 +00:00			`typedef struct Conn Conn;`
			`typedef struct Hash Hash;`
			`typedef struct Delta Delta;`
			`typedef struct Cinfo Cinfo;`
			`typedef struct Tinfo Tinfo;`
			`typedef struct Object Object;`
			`typedef struct Objset Objset;`
			`typedef struct Pack Pack;`
			`typedef struct Buf Buf;`
			`typedef struct Dirent Dirent;`
			`typedef struct Idxent Idxent;`
			`typedef struct Objlist Objlist;`
			`typedef struct Dtab Dtab;`
			`typedef struct Dblock Dblock;`
git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt, making it give the resutls we want. git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt. 2021-09-11 17:46:26 +00:00			`typedef struct Objq Objq;`
			`typedef struct Qelt Qelt;`
git: got git? Add a snapshot of git9 to 9front. 2021-05-17 01:49:45 +00:00
			`enum {`
			`Pathmax = 512,`
			`Npackcache = 32,`
			`Hashsz = 20,`
			`Pktmax = 65536,`
git: size cache in bytes, not objects git used to track cache size in object count, rather than bytes. This had the unfortunate effect of making memory use depend on the size of objects -- repos with lots of large objects could cause out of memory deaths. now, we track sizes in bytes, which should keep our memory usage flatter. 2022-01-02 03:37:23 +00:00			`KiB = 1024,`
			`MiB = 1024*KiB,`
git: got git? Add a snapshot of git9 to 9front. 2021-05-17 01:49:45 +00:00			`};`

			`enum {`
			`GNone = 0,`
			`GCommit = 1,`
			`GTree = 2,`
			`GBlob = 3,`
			`GTag = 4,`
			`GOdelta = 6,`
			`GRdelta = 7,`
			`};`

			`enum {`
			`Cloaded = 1 << 0,`
			`Cidx = 1 << 1,`
			`Ccache = 1 << 2,`
			`Cexist = 1 << 3,`
			`Cparsed = 1 << 5,`
			`Cthin = 1 << 6,`
			`};`

			`enum {`
			`ConnGit,`
			`ConnGit9,`
			`ConnSsh,`
			`ConnHttp,`
			`};`

			`struct Objlist {`
			`int idx;`

			`int fd;`
			`int state;`
			`int stage;`

			`Dir *top;`
			`int ntop;`
			`int topidx;`
			`Dir *loose;`
			`int nloose;`
			`int looseidx;`
			`Dir *pack;`
			`int npack;`
			`int packidx;`
			`int nent;`
			`int entidx;`
			`};`

			`struct Hash {`
			`uchar h[20];`
			`};`

			`struct Conn {`
			`int type;`
			`int rfd;`
			`int wfd;`

			`/* only used by http */`
			`int cfd;`
			`char url; / note, first GET uses a different url */`
			`char *dir;`
			`char *direction;`
			`};`

			`struct Dirent {`
			`char *name;`
			`int mode;`
			`Hash h;`
			`char ismod;`
			`char islink;`
			`};`

			`struct Object {`
			`/* Git data */`
			`Hash hash;`
			`int type;`

			`/* Cache */`
			`int id;`
			`int flag;`
			`int refs;`
			`Object *next;`
			`Object *prev;`

			`/* For indexing */`
			`vlong off;`
			`vlong len;`
			`u32int crc;`

			`/* Everything below here gets cleared */`
			`char *all;`
			`char *data;`
			`/* size excludes header */`
			`vlong size;`

			`/* Significant win on memory use */`
			`union {`
			`Cinfo *commit;`
			`Tinfo *tree;`
			`};`
			`};`

			`struct Tinfo {`
			`/* Tree */`
			`Dirent *ent;`
			`int nent;`
			`};`

			`struct Cinfo {`
			`/* Commit */`
			`Hash *parent;`
			`int nparent;`
			`Hash tree;`
			`char *author;`
			`char *committer;`
			`char *msg;`
			`int nmsg;`
			`vlong ctime;`
			`vlong mtime;`
			`};`

			`struct Objset {`
			`Object **obj;`
			`int nobj;`
			`int sz;`
			`};`

git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt, making it give the resutls we want. git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt. 2021-09-11 17:46:26 +00:00			`struct Qelt {`
			`Object *o;`
			`vlong mtime;`
			`int color;`
			`};`

			`struct Objq {`
			`Qelt *heap;`
			`int nheap;`
			`int heapsz;`
			`};`

git: got git? Add a snapshot of git9 to 9front. 2021-05-17 01:49:45 +00:00			`struct Dtab {`
			`Object *o;`
			`uchar *base;`
			`int nbase;`
			`Dblock *b;`
			`int nb;`
			`int sz;`
			`};`

			`struct Dblock {`
			`uchar *buf;`
			`int len;`
			`int off;`
			`u64int hash;`
			`};`

			`struct Delta {`
			`int cpy;`
			`int off;`
			`int len;`
			`};`


			`#define GETBE16(b)\`
			`((((b)[0] & 0xFFul) << 8) \| \`
			`(((b)[1] & 0xFFul) << 0))`

			`#define GETBE32(b)\`
			`((((b)[0] & 0xFFul) << 24) \| \`
			`(((b)[1] & 0xFFul) << 16) \| \`
			`(((b)[2] & 0xFFul) << 8) \| \`
			`(((b)[3] & 0xFFul) << 0))`
			`#define GETBE64(b)\`
			`((((b)[0] & 0xFFull) << 56) \| \`
			`(((b)[1] & 0xFFull) << 48) \| \`
			`(((b)[2] & 0xFFull) << 40) \| \`
			`(((b)[3] & 0xFFull) << 32) \| \`
			`(((b)[4] & 0xFFull) << 24) \| \`
			`(((b)[5] & 0xFFull) << 16) \| \`
			`(((b)[6] & 0xFFull) << 8) \| \`
			`(((b)[7] & 0xFFull) << 0))`

			`#define PUTBE16(b, n)\`
			`do{ \`
			`(b)[0] = (n) >> 8; \`
			`(b)[1] = (n) >> 0; \`
			`} while(0)`

			`#define PUTBE32(b, n)\`
			`do{ \`
			`(b)[0] = (n) >> 24; \`
			`(b)[1] = (n) >> 16; \`
			`(b)[2] = (n) >> 8; \`
			`(b)[3] = (n) >> 0; \`
			`} while(0)`

			`#define PUTBE64(b, n)\`
			`do{ \`
			`(b)[0] = (n) >> 56; \`
			`(b)[1] = (n) >> 48; \`
			`(b)[2] = (n) >> 40; \`
			`(b)[3] = (n) >> 32; \`
			`(b)[4] = (n) >> 24; \`
			`(b)[5] = (n) >> 16; \`
			`(b)[6] = (n) >> 8; \`
			`(b)[7] = (n) >> 0; \`
			`} while(0)`

			`#define QDIR(qid) ((int)(qid)->path & (0xff))`
			`#define isblank(c) \`
			`(((c) != '\n') && isspace(c))`

			`extern Reprog *authorpat;`
			`extern Objset objcache;`
git: size cache in bytes, not objects git used to track cache size in object count, rather than bytes. This had the unfortunate effect of making memory use depend on the size of objects -- repos with lots of large objects could cause out of memory deaths. now, we track sizes in bytes, which should keep our memory usage flatter. 2022-01-02 03:37:23 +00:00			`extern vlong cachemax;`
git: got git? Add a snapshot of git9 to 9front. 2021-05-17 01:49:45 +00:00			`extern Hash Zhash;`
			`extern int chattygit;`
			`extern int interactive;`

			`#pragma varargck type "H" Hash`
			`#pragma varargck type "T" int`
			`#pragma varargck type "O" Object*`
			`#pragma varargck type "Q" Qid`
			`int Hfmt(Fmt*);`
			`int Tfmt(Fmt*);`
			`int Ofmt(Fmt*);`
			`int Qfmt(Fmt*);`

			`void gitinit(void);`

			`/* object io */`
			`int resolverefs(Hash *, char );`
			`int resolveref(Hash , char );`
			`int listrefs(Hash , char *);`
			`Object ancestor(Object , Object *);`
			`int findtwixt(Hash , int, Hash , int, Object **, int );`
			`Object *readobject(Hash);`
			`Object *clearedobject(Hash, int);`
			`void parseobject(Object *);`
			`int indexpack(char , char , Hash);`
			`int writepack(int, Hash, int, Hash, int, Hash*);`
			`int hasheq(Hash , Hash );`
			`Object ref(Object );`
			`void unref(Object *);`
			`void cache(Object *);`
			`Object *emptydir(void);`

			`/* object sets */`
			`void osinit(Objset *);`
			`void osclear(Objset *);`
			`void osadd(Objset , Object );`
			`int oshas(Objset *, Hash);`
			`Object osfind(Objset , Hash);`

			`/* object listing */`
			`Objlist *mkols(void);`
			`int olsnext(Objlist , Hash );`
			`void olsfree(Objlist *);`

			`/* util functions */`
			`#define dprint(lvl, ...) \`
			`if(chattygit >= lvl) _dprint(__VA_ARGS__)`
			`void _dprint(char *, ...);`
			`void *eamalloc(ulong, ulong);`
			`void *emalloc(ulong);`
			`void earealloc(void , ulong, ulong);`
			`void erealloc(void , ulong);`
			`char estrdup(char );`
			`int slurpdir(char , Dir *);`
			`int hparse(Hash , char );`
			`int hassuffix(char , char );`
			`int swapsuffix(char , int, char , char , char );`
			`char strip(char );`
			`int findrepo(char *, int);`
			`int showprogress(int, int);`

			`/* packing */`
			`void dtinit(Dtab , Object);`
			`void dtclear(Dtab*);`
			`Delta* deltify(Object, Dtab, int*);`

			`/* proto handling */`
			`int readpkt(Conn, char, int);`
			`int writepkt(Conn, char, int);`
			`int flushpkt(Conn*);`
			`void initconn(Conn*, int, int);`
			`int gitconnect(Conn , char , char *);`
			`int readphase(Conn *);`
			`int writephase(Conn *);`
			`void closeconn(Conn *);`
git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt, making it give the resutls we want. git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt. 2021-09-11 17:46:26 +00:00
			`/* queues */`
			`void qinit(Objq*);`
			`void qclear(Objq*);`
git/query: refactor graph painting algorithm (findtwixt, lca) We now keep track of 3 sets during traversal: - keep: commits we've reached from head commits - drop: commits we've reached from tail commits - skip: ancestors of commits in both 'keep' and 'drop' Commits in 'keep' and/or 'drop' may be added later to the 'skip' set if we discover later that they are part of a common subgraph of the head and tail commits. From these sets we can calculate the commits we are interested in: lca commits are those in 'keep' and 'drop', but not in 'skip'. findtwixt commits are those in 'keep', but not in 'drop' or 'skip'. The "LCA" commit returned is a common ancestor such that there are no other common ancestors that can reach that commit. Although there can be multiple commits that meet this criteria, where one is technically lower on the commit-graph than the other, these cases only happen in complex merge arrangements and any choice is likely a decent merge base. Repainting is now done in paint() directly. When we find a boundary commit, we switch our paint color to 'skip'. 'skip' painting does not stop when it hits another color; we continue until we are left with only 'skip' commits on the queue. This fixes several mishandled cases in the current algorithm: 1. If we hit the common subgraph from tail commits first (if the tail commit was newer than the head commit), we ended up traversing the entire commit graph. This is because we couldn't distinguish between 'drop' commits that were part of the common subgraph, and those that were still looking for it. 2. If we traversed through an initial part of the common subgraph from head commits before reaching it from tail commits, these commits were returned from findtwixt even though they were also reachable from tail commits. 3. In the same case as 2, we might end up choosing an incorrect commit as the LCA, which is an ancestor of the real LCA. 2022-03-16 21:41:59 +00:00			`void qput(Objq, Object, int);`
git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt, making it give the resutls we want. git/query: fix spurious merge requests Due to the way LCA is defined, a using a strict LCA on a graph like this: <--a--b--c--d--e--f--g \ / +-----h------- can lead to spurious requests to merge. This happens because 'lca(b, g)' would return 'a', since it can be reached in one step from 'b', and 2 steps from 'g', while reaching 'b' from 'a' would be a longer path. As a result, we need to implement an lca variant that returns the starting node if one is reachable from the other, even if it's already found the technically correct least common ancestor. This replaces our LCA algorithm with one based on the painting we do while finding a twixt. 2021-09-11 17:46:26 +00:00			`int qpop(Objq, Qelt);`