plan9fox/sys/src/9/port/devswap.c
cinap_lenrek 1180631421 devswap: improve setswapchan()
- check for unusable file types like directories and append-only files.
- we should eigther error without any side effects or succeed.
2021-10-23 15:12:27 +00:00

614 lines
11 KiB
C

#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "../port/error.h"
#include <libsec.h>
#include <pool.h>
static int canflush(Proc*, Segment*);
static void executeio(void);
static void pageout(Proc*, Segment*);
static void pagepte(int, Page**);
static void pager(void*);
Image swapimage = {
.notext = 1,
};
static Chan *swapchan;
static uchar *swapbuf;
static AESstate *swapkey;
static Page **iolist;
static ulong ioptr;
static ushort ageclock;
static void
swapinit(void)
{
while(conf.nswap && conf.nswppo){
swapalloc.swmap = xalloc(conf.nswap);
if(swapalloc.swmap == nil)
break;
iolist = xalloc(conf.nswppo*sizeof(Page*));
if(iolist == nil){
xfree(swapalloc.swmap);
swapalloc.swmap = nil;
}
break;
}
if(swapalloc.swmap == nil || iolist == nil)
conf.nswap = conf.nswppo = 0;
swapalloc.top = &swapalloc.swmap[conf.nswap];
swapalloc.alloc = swapalloc.swmap;
swapalloc.last = swapalloc.swmap;
swapalloc.free = conf.nswap;
swapalloc.xref = 0;
kproc("pager", pager, 0);
}
static uintptr
newswap(void)
{
uchar *look;
lock(&swapalloc);
if(swapalloc.free == 0) {
unlock(&swapalloc);
return ~0;
}
look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
if(look == nil)
look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap);
*look = 2; /* ref for pte + io transaction */
swapalloc.last = look;
swapalloc.free--;
unlock(&swapalloc);
return (look-swapalloc.swmap) * BY2PG;
}
void
putswap(Page *p)
{
uchar *idx;
lock(&swapalloc);
idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
if(*idx == 0)
panic("putswap %#p ref == 0", p);
if(*idx == 255) {
if(swapalloc.xref == 0)
panic("putswap %#p xref == 0", p);
if(--swapalloc.xref == 0) {
for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) {
if(*idx == 255) {
*idx = 0;
swapalloc.free++;
}
}
}
} else {
if(--(*idx) == 0)
swapalloc.free++;
}
unlock(&swapalloc);
}
void
dupswap(Page *p)
{
uchar *idx;
lock(&swapalloc);
idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
if(*idx == 255)
swapalloc.xref++;
else {
if(++(*idx) == 255)
swapalloc.xref += 255;
}
unlock(&swapalloc);
}
int
swapcount(uintptr daddr)
{
return swapalloc.swmap[daddr/BY2PG];
}
void
kickpager(void)
{
wakeup(&swapalloc.r);
}
static int
reclaim(void)
{
ulong np;
for(;;){
if((np = pagereclaim(&fscache) + imagereclaim(0)) > 0){
if(0) print("reclaim: %lud fscache + inactive image\n", np);
} else if((np = pagereclaim(&swapimage)) > 0) {
if(0) print("reclaim: %lud swap\n", np);
} else if((np = imagereclaim(1)) > 0) {
if(0) print("reclaim: %lud active image\n", np);
}
if(!needpages(nil))
return 1; /* have pages, done */
if(np == 0)
return 0; /* didnt reclaim, need to swap */
sched();
}
}
static void
pager(void*)
{
Proc *p;
Segment *s;
int x, i;
while(waserror())
;
x = -1;
for(;;){
up->psstate = "Reclaim";
if(reclaim()){
up->psstate = "Idle";
wakeup(&palloc.pwait[0]);
wakeup(&palloc.pwait[1]);
sleep(&swapalloc.r, needpages, nil);
continue;
}
if(swapimage.c == nil || swapalloc.free == 0){
Killbig:
if(!freebroken())
killbig("out of memory");
sched();
continue;
}
i = ageclock;
do {
if(++x >= conf.nproc){
if(++ageclock == i)
goto Killbig;
x = 0;
}
p = proctab(x);
} while(p->state == Dead || p->noswap || !canqlock(&p->seglock));
up->psstate = "Pageout";
for(i = 0; i < NSEG; i++) {
if((s = p->seg[i]) != nil) {
switch(s->type&SG_TYPE) {
default:
break;
case SG_TEXT:
pageout(p, s);
break;
case SG_DATA:
case SG_BSS:
case SG_STACK:
case SG_SHARED:
pageout(p, s);
break;
}
}
}
qunlock(&p->seglock);
if(ioptr) {
up->psstate = "I/O";
executeio();
}
}
}
static void
pageout(Proc *p, Segment *s)
{
int type, i, size;
short age;
Pte *l;
Page **pg, *entry;
if(!canqlock(s)) /* We cannot afford to wait, we will surely deadlock */
return;
if(!canflush(p, s) /* Able to invalidate all tlbs with references */
|| waserror()) {
qunlock(s);
putseg(s);
return;
}
/* Pass through the pte tables looking for memory pages to swap out */
type = s->type&SG_TYPE;
size = s->mapsize;
for(i = 0; i < size; i++) {
l = s->map[i];
if(l == nil)
continue;
for(pg = l->first; pg <= l->last; pg++) {
entry = *pg;
if(pagedout(entry))
continue;
if(entry->modref & PG_REF) {
entry->modref &= ~PG_REF;
entry->refage = ageclock;
continue;
}
age = (short)(ageclock - entry->refage);
if(age < 16)
continue;
pagepte(type, pg);
}
}
poperror();
qunlock(s);
putseg(s);
}
static int
canflush(Proc *p, Segment *s)
{
int x, i;
if(incref(s) == 2) /* Easy if we are the only user */
return canpage(p);
/*
* Now we must do hardwork to ensure all processes which have tlb
* entries for this segment will be flushed if we succeed in paging it out
*/
for(x = 0; x < conf.nproc; x++){
p = proctab(x);
if(p->state == Dead)
continue;
for(i = 0; i < NSEG; i++){
if(p->seg[i] == s)
if(!canpage(p))
return 0;
}
}
return 1;
}
static void
pagepte(int type, Page **pg)
{
uintptr daddr;
Page *outp;
outp = *pg;
switch(type) {
case SG_TEXT: /* Revert to demand load */
putpage(outp);
*pg = nil;
break;
case SG_DATA:
case SG_BSS:
case SG_STACK:
case SG_SHARED:
if(ioptr >= conf.nswppo)
break;
/*
* get a new swap address with swapcount 2, one for the pte
* and one extra ref for us while we write the page to disk
*/
daddr = newswap();
if(daddr == ~0)
break;
/* clear any pages referring to it from the cache */
cachedel(&swapimage, daddr);
/* forget anything that it used to cache */
uncachepage(outp);
/*
* enter it into the cache so that a fault happening
* during the write will grab the page from the cache
* rather than one partially written to the disk
*/
outp->daddr = daddr;
cachepage(outp, &swapimage);
*pg = (Page*)(daddr|PG_ONSWAP);
/* Add page to IO transaction list */
iolist[ioptr++] = outp;
break;
}
}
static void
executeio(void)
{
Page *outp;
ulong i, j;
for(i = j = 0; i < ioptr; i++) {
outp = iolist[i];
assert(outp->ref > 0);
assert(outp->image == &swapimage);
assert(outp->daddr != ~0);
/* only write when swap address still in use */
if(swapcount(outp->daddr) > 1){
Chan *c = swapimage.c;
KMap *k = kmap(outp);
if(waserror()){
kunmap(k);
iolist[j++] = outp;
continue;
}
if(devtab[c->type]->write(c, (char*)VA(k), BY2PG, outp->daddr) != BY2PG)
error(Eshort);
kunmap(k);
poperror();
}
/* drop our extra swap reference */
putswap((Page*)outp->daddr);
/* Free up the page after I/O */
putpage(outp);
}
ioptr = j;
if(j) print("executeio (%lud/%lud): %s\n", j, i, up->errstr);
}
int
needpages(void*)
{
return palloc.freecount < swapalloc.headroom;
}
static void
setswapchan(Chan *c)
{
uvlong s;
if(waserror()){
cclose(c);
nexterror();
}
if(c->qid.type & (QTDIR|QTAPPEND|QTAUTH))
error(Ebadarg);
/*
* if this isn't a file, set the swap space
* to be at most the size of the partition
*/
if(devtab[c->type]->dc != L'M'){
Dir *d = dirchanstat(c);
s = d->length / BY2PG;
free(d);
} else {
s = conf.nswap;
}
if(s < conf.nswppo)
error("swap device too small");
if(swapimage.c != nil) {
if(swapalloc.free != conf.nswap)
error(Einuse);
cclose(swapimage.c);
swapimage.c = nil;
}
if(s < conf.nswap){
conf.nswap = s;
swapalloc.top = &swapalloc.swmap[conf.nswap];
swapalloc.free = conf.nswap;
}
c->flag &= ~CCACHE;
cclunk(c);
poperror();
swapchan = c;
swapimage.c = namec("#¶/swapfile", Aopen, ORDWR, 0);
}
enum {
Qdir,
Qswap,
Qswapfile,
};
static Dirtab swapdir[]={
".", {Qdir, 0, QTDIR}, 0, DMDIR|0555,
"swap", {Qswap}, 0, 0664,
"swapfile", {Qswapfile}, 0, 0600,
};
static Chan*
swapattach(char *spec)
{
return devattach(L'', spec);
}
static Walkqid*
swapwalk(Chan *c, Chan *nc, char **name, int nname)
{
return devwalk(c, nc, name, nname, swapdir, nelem(swapdir), devgen);
}
static int
swapstat(Chan *c, uchar *dp, int n)
{
return devstat(c, dp, n, swapdir, nelem(swapdir), devgen);
}
static Chan*
swapopen(Chan *c, int omode)
{
uchar key[128/8];
switch((ulong)c->qid.path){
case Qswapfile:
if(!iseve() || omode != ORDWR)
error(Eperm);
if(swapimage.c != nil)
error(Einuse);
if(swapchan == nil)
error(Egreg);
c->mode = openmode(omode);
c->flag |= COPEN;
c->offset = 0;
swapbuf = mallocalign(BY2PG, BY2PG, 0, 0);
swapkey = secalloc(sizeof(AESstate)*2);
if(swapbuf == nil || swapkey == nil)
error(Enomem);
genrandom(key, sizeof(key));
setupAESstate(&swapkey[0], key, sizeof(key), nil);
genrandom(key, sizeof(key));
setupAESstate(&swapkey[1], key, sizeof(key), nil);
memset(key, 0, sizeof(key));
return c;
}
return devopen(c, omode, swapdir, nelem(swapdir), devgen);
}
static void
swapclose(Chan *c)
{
if((c->flag & COPEN) == 0)
return;
switch((ulong)c->qid.path){
case Qswapfile:
cclose(swapchan);
swapchan = nil;
secfree(swapkey);
swapkey = nil;
free(swapbuf);
swapbuf = nil;
break;
}
}
static long
swapread(Chan *c, void *va, long n, vlong off)
{
char tmp[256]; /* must be >= 18*NUMSIZE (Qswap) */
ulong reclaim;
switch((ulong)c->qid.path){
case Qdir:
return devdirread(c, va, n, swapdir, nelem(swapdir), devgen);
case Qswap:
reclaim = imagecached() + fscache.pgref + swapimage.pgref;
snprint(tmp, sizeof tmp,
"%llud memory\n"
"%llud pagesize\n"
"%lud kernel\n"
"%lud/%lud user\n"
"%lud/%lud swap\n"
"%lud/%lud reclaim\n"
"%llud/%llud/%llud kernel malloc\n"
"%llud/%llud/%llud kernel draw\n"
"%llud/%llud/%llud kernel secret\n",
(uvlong)conf.npage*BY2PG,
(uvlong)BY2PG,
conf.npage-conf.upages,
palloc.user-palloc.freecount-reclaim, palloc.user,
conf.nswap-swapalloc.free, conf.nswap,
reclaim, palloc.user,
(uvlong)mainmem->curalloc,
(uvlong)mainmem->cursize,
(uvlong)mainmem->maxsize,
(uvlong)imagmem->curalloc,
(uvlong)imagmem->cursize,
(uvlong)imagmem->maxsize,
(uvlong)secrmem->curalloc,
(uvlong)secrmem->cursize,
(uvlong)secrmem->maxsize);
return readstr((ulong)off, va, n, tmp);
case Qswapfile:
if(n != BY2PG)
error(Ebadarg);
if(devtab[swapchan->type]->read(swapchan, va, n, off) != n)
error(Eio);
aes_xts_decrypt(&swapkey[0], &swapkey[1], off, va, va, n);
return n;
}
error(Egreg);
return 0;
}
static long
swapwrite(Chan *c, void *va, long n, vlong off)
{
char buf[256];
switch((ulong)c->qid.path){
case Qswap:
if(!iseve())
error(Eperm);
if(n >= sizeof buf)
error(Egreg);
memmove(buf, va, n); /* so we can NUL-terminate */
buf[n] = 0;
/* start a pager if not already started */
if(strncmp(buf, "start", 5) == 0)
kickpager();
else if(buf[0]>='0' && buf[0]<='9')
setswapchan(fdtochan(strtoul(buf, nil, 0), ORDWR, 1, 1));
else
error(Ebadctl);
return n;
case Qswapfile:
if(n != BY2PG)
error(Ebadarg);
aes_xts_encrypt(&swapkey[0], &swapkey[1], off, va, swapbuf, n);
if(devtab[swapchan->type]->write(swapchan, swapbuf, n, off) != n)
error(Eio);
return n;
}
error(Egreg);
return 0;
}
Dev swapdevtab = {
L'',
"swap",
devreset,
swapinit,
devshutdown,
swapattach,
swapwalk,
swapstat,
swapopen,
devcreate,
swapclose,
swapread,
devbread,
swapwrite,
devbwrite,
devremove,
devwstat,
};