plan9fox/sys/src/9/port/devfs.c
cinap_lenrek db971a6189 kernel: fix stat bugs
In a few places, we where using a fixed buffer of sizeof(Dir)+100
size for stat. This is not correct and fails if the name returned
in stat is long.

This results in being unable to seek to the end of file with a
long filename.

The kernel should do the same thing as dirfstat() from libc;
handling the conversion and buffer allocation and returning a
freeable Dir* pointer.

For this, a new dirchanstat() function was added.

The fstat syscall was not rewriting the name to the last path
element; fix it.

In addition, gracefully handle the mountfix case, reallocating
the buffer to accomidate the required stat length plus
size of the new name so dirsetname() does not fail.
2021-10-23 13:40:06 +00:00

1379 lines
26 KiB
C

/*
* File system devices.
* Follows device config in Ken's file server.
* Builds mirrors, concatenations, interleavings, and partitions
* of devices out of other (inner) devices.
* It is ok if inner devices are provided by this driver.
*
* Built files are grouped on different directories
* (called trees, and used to represent disks).
* The "#k/fs" tree is always available and never goes away.
* Configuration changes happen only while no I/O is in progress.
*
* Default sector size is one byte unless changed by the "disk" ctl.
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "ureg.h"
#include "../port/error.h"
#include "../port/sd.h"
#include <libsec.h>
enum
{
Fnone,
Fmirror, /* mirror of others */
Fcat, /* catenation of others */
Finter, /* interleaving of others */
Fpart, /* part of other */
Fclear, /* start over */
Fdel, /* delete a configure device */
Fdisk, /* set default tree and sector sz*/
Fcrypt, /* encrypted device */
Sectorsz = 1,
Blksize = 8*1024, /* for Finter only */
Cryptsectsz = 512, /* for Fcrypt only */
Incr = 5, /* Increments for the dev array */
/*
* All qids are decorated with the tree number.
* #k/fs is tree number 0, is automatically added and
* its first qid is for the ctl file. It never goes away.
*/
Qtop = 0, /* #k */
Qdir, /* directory (#k/fs) */
Qctl, /* ctl, only for #k/fs/ctl */
Qfirst, /* first qid assigned for device */
Iswrite = 0,
Isread,
Optional = 0,
Mustexist,
/* tunable parameters */
Maxconf = 4*1024, /* max length for config */
Ndevs = 32, /* max. inner devs per command */
Ntrees = 128, /* max. number of trees */
Maxretries = 3, /* max. retries of i/o errors */
Retrypause = 5000, /* ms. to pause between retries */
};
typedef struct Inner Inner;
typedef struct Fsdev Fsdev;
typedef struct Tree Tree;
typedef struct Key Key;
struct Inner
{
char *iname; /* inner device name */
vlong isize; /* size of inner device */
Chan *idev; /* inner device */
};
struct Fsdev
{
Ref; /* one per Chan doing I/O */
int gone; /* true if removed */
int vers; /* qid version for this device */
int type; /* Fnone, Fmirror, ... */
char *name; /* name for this fsdev */
Tree* tree; /* where the device is kept */
vlong size; /* min(inner[X].isize) */
vlong start; /* start address (for Fpart) */
uint ndevs; /* number of inner devices */
Inner *inner[Ndevs]; /* inner devices */
Key *key; /* crypt key */
};
struct Tree
{
char *name; /* name for #k/<name> */
Fsdev **devs; /* devices in dir. */
uint ndevs; /* number of devices */
uint nadevs; /* number of allocated devices in devs */
};
struct Key {
AESstate tweak, ecb;
};
#define dprint if(debug)print
extern Dev fsdevtab; /* forward */
static RWlock lck; /* r: use devices; w: change config */
static Tree fstree; /* The main "fs" tree. Never goes away */
static Tree *trees[Ntrees]; /* internal representation of config */
static int ntrees; /* max number of trees */
static int qidvers;
static char *disk; /* default tree name used */
static char *source; /* default inner device used */
static int sectorsz = Sectorsz; /* default sector size */
static char *confstr; /* textual configuration */
static int debug;
static Qid tqid = {Qtop, 0, QTDIR};
static Qid cqid = {Qctl, 0, 0};
static char* tnames[] = {
[Fmirror] "mirror",
[Fcat] "cat",
[Finter] "inter",
[Fpart] "part",
[Fcrypt] "crypt",
};
static Cmdtab configs[] = {
Fmirror,"mirror", 0,
Fcat, "cat", 0,
Finter, "inter", 0,
Fpart, "part", 0,
Fclear, "clear", 1,
Fdel, "del", 2,
Fdisk, "disk", 0,
Fcrypt, "crypt", 0,
};
static char Egone[] = "device is gone"; /* file has been removed */
static char*
seprintdev(char *s, char *e, Fsdev *mp)
{
int i;
if(mp == nil)
return seprint(s, e, "<null Fsdev>");
if(mp->type < 0 || mp->type >= nelem(tnames) || tnames[mp->type] == nil)
return seprint(s, e, "bad device type %d\n", mp->type);
s = strecpy(s, e, tnames[mp->type]);
if(mp->tree != &fstree)
s = seprint(s, e, " %s/%s", mp->tree->name, mp->name);
else
s = seprint(s, e, " %s", mp->name);
for(i = 0; i < mp->ndevs; i++)
s = seprint(s, e, " %s", mp->inner[i]->iname);
switch(mp->type){
case Fmirror:
case Fcat:
case Finter:
case Fcrypt:
s = strecpy(s, e, "\n");
break;
case Fpart:
s = seprint(s, e, " %ulld %ulld\n", mp->start, mp->size);
break;
default:
panic("#k: seprintdev bug");
}
return s;
}
static char*
seprintconf(char *s, char *e)
{
int i, j;
Tree *t;
*s = 0;
for(i = 0; i < ntrees; i++){
t = trees[i];
if(t != nil)
for(j = 0; j < t->nadevs; j++)
if(t->devs[j] != nil)
s = seprintdev(s, e, t->devs[j]);
}
return s;
}
/* called with lck w */
static void
setconfstr(void)
{
char *s;
s = confstr;
if(s == nil)
s = smalloc(Maxconf);
seprintconf(s, s+Maxconf);
confstr = s;
}
static vlong
mkpath(int tree, int devno)
{
return (tree&0xFFFF)<<16 | devno&0xFFFF;
}
static int
path2treeno(int q)
{
return q>>16 & 0xFFFF;
}
static int
path2devno(int q)
{
return q & 0xFFFF;
}
static Tree*
gettree(int i, int mustexist)
{
dprint("gettree %d\n", i);
if(i < 0)
panic("#k: bug: bad tree index %d in gettree", i);
if(i >= ntrees || trees[i] == nil)
if(mustexist)
error(Enonexist);
else
return nil;
return trees[i];
}
static Fsdev*
getdev(Tree *t, int i, int mustexist)
{
dprint("getdev %d\n", i);
if(i < 0)
panic("#k: bug: bad dev index %d in getdev", i);
if(i >= t->nadevs || t->devs[i] == nil)
if(mustexist)
error(Enonexist);
else
return nil;
return t->devs[i];
}
static Fsdev*
path2dev(int q)
{
Tree *t;
dprint("path2dev %ux\n", q);
t = gettree(path2treeno(q), Mustexist);
return getdev(t, path2devno(q) - Qfirst, Mustexist);
}
static Tree*
treealloc(char *name)
{
int i;
Tree *t;
dprint("treealloc %s\n", name);
for(i = 0; i < nelem(trees); i++)
if(trees[i] == nil)
break;
if(i == nelem(trees))
return nil;
t = trees[i] = mallocz(sizeof(Tree), 1);
if(t == nil)
error(Enomem);
if(i == ntrees)
ntrees++;
kstrdup(&t->name, name);
return t;
}
static Tree*
lookuptree(char *name)
{
int i;
dprint("lookuptree %s\n", name);
for(i = 0; i < ntrees; i++)
if(trees[i] != nil && strcmp(trees[i]->name, name) == 0)
return trees[i];
return nil;
}
static Fsdev*
devalloc(Tree *t, char *name)
{
int i, ndevs;
Fsdev *mp, **devs;
dprint("devalloc %s %s\n", t->name, name);
mp = mallocz(sizeof(Fsdev), 1);
if(mp == nil)
return nil;
for(i = 0; i < t->nadevs; i++)
if(t->devs[i] == nil)
break;
if(i >= t->nadevs){
if(t->nadevs % Incr == 0){
ndevs = t->nadevs + Incr;
devs = realloc(t->devs, ndevs * sizeof(Fsdev*));
if(devs == nil){
free(mp);
return nil;
}
t->devs = devs;
}
t->devs[t->nadevs] = nil;
t->nadevs++;
}
kstrdup(&mp->name, name);
mp->vers = ++qidvers;
mp->tree = t;
t->devs[i] = mp;
t->ndevs++;
return mp;
}
static void
deltree(Tree *t)
{
int i;
dprint("deltree %s\n", t->name);
for(i = 0; i < ntrees; i++)
if(trees[i] == t){
if(i > 0){ /* "fs" never goes away */
free(t->name);
free(t->devs);
free(t);
trees[i] = nil;
}
return;
}
panic("#k: deltree: bug: tree not found");
}
/*
* A device is gone and we know that all its users are gone.
* A tree is gone when all its devices are gone ("fs" is never gone).
* Must close devices outside locks, so we could nest our own devices.
*/
static void
mdeldev(Fsdev *mp)
{
int i;
Inner *in;
Tree *t;
dprint("deldev %s gone %d ref %uld\n", mp->name, mp->gone, mp->ref);
mp->gone = 1;
mp->vers = ++qidvers;
wlock(&lck);
t = mp->tree;
for(i = 0; i < t->nadevs; i++)
if(t->devs[i] == mp){
t->devs[i] = nil;
t->ndevs--;
if(t->ndevs == 0)
deltree(t);
break;
}
wunlock(&lck);
secfree(mp->key);
free(mp->name);
for(i = 0; i < mp->ndevs; i++){
in = mp->inner[i];
if(in->idev != nil)
cclose(in->idev);
free(in->iname);
free(in);
}
free(mp);
}
/*
* Delete one or all devices in one or all trees.
*/
static void
mdelctl(char *tname, char *dname)
{
int i, alldevs, alltrees, some;
Fsdev *mp;
Tree *t;
dprint("delctl %s\n", dname);
alldevs = strcmp(dname, "*") == 0;
alltrees = strcmp(tname, "*") == 0;
some = 0;
Again:
wlock(&lck);
for(i = 0; i < ntrees; i++){
t = trees[i];
if(t == nil)
continue;
if(alltrees == 0 && strcmp(t->name, tname) != 0)
continue;
for(i = 0; i < t->nadevs; i++){
mp = t->devs[i];
if(t->devs[i] == nil)
continue;
if(alldevs == 0 && strcmp(mp->name, dname) != 0)
continue;
/*
* Careful: must close outside locks and that
* may change the file tree we are looking at.
*/
some++;
mp->gone = 1;
if(mp->ref == 0){
incref(mp); /* keep it there */
wunlock(&lck);
mdeldev(mp);
goto Again; /* tree can change */
}
}
}
if(some)
setconfstr();
wunlock(&lck);
if(some == 0 && alltrees == 0)
error(Enonexist);
}
static void
setdsize(Fsdev* mp, vlong *ilen)
{
int i;
vlong inlen;
Inner *in;
dprint("setdsize %s\n", mp->name);
for (i = 0; i < mp->ndevs; i++){
in = mp->inner[i];
in->isize = ilen[i];
inlen = in->isize;
switch(mp->type){
case Finter:
/* truncate to multiple of Blksize */
inlen &= ~(Blksize-1);
in->isize = inlen;
/* fall through */
case Fmirror:
/* use size of smallest inner device */
if (mp->size == 0 || mp->size > inlen)
mp->size = inlen;
break;
case Fcat:
mp->size += inlen;
break;
case Fpart:
if(mp->start > inlen)
error("partition starts after device end");
if(inlen < mp->start + mp->size){
print("#k: %s: partition truncated from "
"%lld to %lld bytes\n", mp->name,
mp->size, inlen - mp->start);
mp->size = inlen - mp->start;
}
break;
case Fcrypt:
if(mp->start > inlen)
error("crypt starts after device end");
mp->size = (inlen - mp->start) & ~((vlong)Cryptsectsz-1);
break;
}
}
if(mp->type == Finter)
mp->size *= mp->ndevs;
}
static void
validdevname(Tree *t, char *dname)
{
int i;
for(i = 0; i < t->nadevs; i++)
if(t->devs[i] != nil && strcmp(t->devs[i]->name, dname) == 0)
error(Eexist);
}
static void
parseconfig(char *a, long n, Cmdbuf **cbp, Cmdtab **ctp)
{
Cmdbuf *cb;
Cmdtab *ct;
*cbp = cb = parsecmd(a, n);
*ctp = ct = lookupcmd(cb, configs, nelem(configs));
cb->f++; /* skip command */
cb->nf--;
switch(ct->index){
case Fmirror:
case Fcat:
case Finter:
if(cb->nf < 2)
error("too few arguments for ctl");
if(cb->nf - 1 > Ndevs)
error("too many devices in ctl");
break;
case Fdisk:
if(cb->nf < 1 || cb->nf > 3)
error("ctl usage: disk name [sz dev]");
break;
case Fpart:
if(cb->nf != 4 && (cb->nf != 3 || source == nil))
error("ctl usage: part new [file] off len");
break;
case Fcrypt:
if(cb->nf != 3)
error("ctl usage: crypt newname device keyhex");
break;
}
}
static void
parsename(char *name, char *disk, char **tree, char **dev)
{
char *slash;
slash = strchr(name, '/');
if(slash == nil){
if(disk != nil)
*tree = disk;
else
*tree = "fs";
*dev = name;
}else{
*tree = name;
*slash++ = 0;
*dev = slash;
}
validname(*tree, 0);
validname(*dev, 0);
}
/*
* Process a single line of configuration,
* often of the form "cmd newname idev0 idev1".
* locking is tricky, because we need a write lock to
* add/remove devices yet adding/removing them may lead
* to calls to this driver that require a read lock (when
* inner devices are also provided by us).
*/
static void
mconfig(char* a, long n)
{
int i;
vlong size, start;
vlong *ilen;
char *tname, *dname, *fakef[4];
uchar key[2*256/8];
int keylen;
Chan **idev;
Cmdbuf *cb;
Cmdtab *ct;
Fsdev *mp;
Inner *inprv;
Tree *t;
/* ignore comments & empty lines */
if (n < 1 || *a == '\0' || *a == '#' || *a == '\n')
return;
/* ignore historical config signature */
if (n >= 6 && memcmp(a, "fsdev:", 6) == 0)
return;
dprint("mconfig\n");
size = 0;
start = 0;
mp = nil;
cb = nil;
keylen = 0;
if(waserror()){
free(cb);
nexterror();
}
parseconfig(a, n, &cb, &ct);
switch (ct->index) {
case Fdisk:
kstrdup(&disk, cb->f[0]);
if(cb->nf >= 2)
sectorsz = strtoul(cb->f[1], 0, 0);
else
sectorsz = Sectorsz;
if(cb->nf == 3)
kstrdup(&source, cb->f[2]);
else{
free(source);
source = nil;
}
poperror();
free(cb);
return;
case Fclear:
poperror();
free(cb);
mdelctl("*", "*"); /* del everything */
return;
case Fcrypt:
if(cb->nf >= 4) {
start = strtoul(cb->f[3], 0, 0);
cb->nf = 3;
} else
start = 64*1024; /* cryptsetup header */
keylen = dec16(key, sizeof(key), cb->f[2], strlen(cb->f[2]));
switch(keylen){
default:
error("bad hexkey");
case 2*128/8:
case 2*256/8:
break;
}
cb->nf -= 1;
break;
case Fpart:
if(cb->nf == 3){
/*
* got a request in the format of sd(3),
* pretend we got one in our format.
* later we change end to be len.
*/
fakef[0] = cb->f[0];
fakef[1] = source;
fakef[2] = cb->f[1];
fakef[3] = cb->f[2];
cb->f = fakef;
cb->nf = 4;
}
start = strtoll(cb->f[2], nil, 10);
size = strtoll(cb->f[3], nil, 10);
if(cb->f == fakef)
size -= start; /* it was end */
cb->nf -= 2;
break;
}
parsename(cb->f[0], disk, &tname, &dname);
for(i = 1; i < cb->nf; i++)
validname(cb->f[i], 1);
if(ct->index == Fdel){
mdelctl(tname, dname);
poperror();
free(cb);
return;
}
/*
* Open all inner devices while we have only a read lock.
*/
poperror();
rlock(&lck);
idev = smalloc(sizeof(Chan*) * Ndevs);
ilen = smalloc(sizeof(vlong) * Ndevs);
if(waserror()){
runlock(&lck);
Fail:
for(i = 1; i < cb->nf; i++)
if(idev != nil && idev[i-1] != nil)
cclose(idev[i-1]);
if(mp != nil)
mdeldev(mp);
free(idev);
free(ilen);
free(cb);
nexterror();
}
for(i = 1; i < cb->nf; i++){
Dir *dir;
idev[i-1] = namec(cb->f[i], Aopen, ORDWR, 0);
dir = dirchanstat(idev[i-1]);
ilen[i-1] = dir->length;
free(dir);
}
poperror();
runlock(&lck);
/*
* Get a write lock and add the device if we can.
*/
wlock(&lck);
if(waserror()){
wunlock(&lck);
goto Fail;
}
t = lookuptree(tname);
if(t != nil)
validdevname(t, dname);
else{
t = treealloc(tname);
if(t == nil)
error("no more trees");
}
mp = devalloc(t, dname);
if(mp == nil){
if(t->ndevs == 0) /* it was created for us */
deltree(t); /* but we will not mdeldev() */
error(Enomem);
}
mp->type = ct->index;
if(mp->type == Fpart){
mp->start = start * sectorsz;
mp->size = size * sectorsz;
}
if(mp->type == Fcrypt) {
Key *k = secalloc(sizeof(Key));
setupAESstate(&k->tweak, &key[0], keylen/2, nil);
setupAESstate(&k->ecb, &key[keylen/2], keylen/2, nil);
memset(key, 0, sizeof(key));
mp->key = k;
mp->start = start;
}
for(i = 1; i < cb->nf; i++){
inprv = mp->inner[i-1] = mallocz(sizeof(Inner), 1);
if(inprv == nil)
error(Enomem);
mp->ndevs++;
kstrdup(&inprv->iname, cb->f[i]);
inprv->idev = idev[i-1];
idev[i-1] = nil;
}
setdsize(mp, ilen);
setconfstr();
wunlock(&lck);
poperror();
free(idev);
free(ilen);
free(cb);
}
static void
rdconf(void)
{
char *c, *e, *p, *s;
Chan *cc;
int mustrd;
/* only read config file once */
if (confstr != nil)
return;
wlock(&lck);
if (confstr != nil) {
wunlock(&lck);
return; /* already done */
}
/* add the std "fs" tree */
if(ntrees == 0){
fstree.name = "fs";
trees[ntrees++] = &fstree;
}
setconfstr();
wunlock(&lck);
dprint("rdconf\n");
/* identify the config file */
s = getconf("fsconfig");
if (s == nil){
mustrd = 0;
s = "/dev/sdC0/fscfg";
} else
mustrd = 1;
c = smalloc(Maxconf+1);
if(waserror()){
free(c);
if(!mustrd)
return;
nexterror();
}
/* read it */
cc = namec(s, Aopen, OREAD, 0);
if(waserror()){
cclose(cc);
nexterror();
}
devtab[cc->type]->read(cc, c, Maxconf, 0);
cclose(cc);
poperror();
/* process config copy one line at a time */
for (p = c; *p != '\0'; p = e){
e = strchr(p, '\n');
if (e == nil)
e = p + strlen(p);
else
e++;
mconfig(p, e - p);
}
free(c);
poperror(); /* c */
}
static int
mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
{
int treeno;
Fsdev *mp;
Qid qid;
Tree *t;
dprint("mgen %#ullx %d\n", c->qid.path, i);
qid.type = QTDIR;
qid.vers = 0;
if(c->qid.path == Qtop){
if(i == DEVDOTDOT){
devdir(c, tqid, "#k", 0, eve, 0775, dp);
return 1;
}
t = gettree(i, Optional);
if(t == nil){
dprint("no\n");
return -1;
}
qid.path = mkpath(i, Qdir);
devdir(c, qid, t->name, 0, eve, 0775, dp);
return 1;
}
treeno = path2treeno(c->qid.path);
t = gettree(treeno, Optional);
if(t == nil){
dprint("no\n");
return -1;
}
if((c->qid.type & QTDIR) != 0){
if(i == DEVDOTDOT){
devdir(c, tqid, "#k", 0, eve, 0775, dp);
return 1;
}
if(treeno == 0){
/* take care of #k/fs/ctl */
if(i == 0){
devdir(c, cqid, "ctl", 0, eve, 0664, dp);
return 1;
}
i--;
}
mp = getdev(t, i, Optional);
if(mp == nil){
dprint("no\n");
return -1;
}
qid.type = QTFILE;
qid.vers = mp->vers;
qid.path = mkpath(treeno, Qfirst+i);
devdir(c, qid, mp->name, mp->size, eve, 0664, dp);
return 1;
}
if(i == DEVDOTDOT){
qid.path = mkpath(treeno, Qdir);
devdir(c, qid, t->name, 0, eve, 0775, dp);
return 1;
}
dprint("no\n");
return -1;
}
static Chan*
mattach(char *spec)
{
dprint("mattach\n");
return devattach(fsdevtab.dc, spec);
}
static Walkqid*
mwalk(Chan *c, Chan *nc, char **name, int nname)
{
Walkqid *wq;
rdconf();
dprint("mwalk %llux\n", c->qid.path);
rlock(&lck);
if(waserror()){
runlock(&lck);
nexterror();
}
wq = devwalk(c, nc, name, nname, 0, 0, mgen);
poperror();
runlock(&lck);
return wq;
}
static int
mstat(Chan *c, uchar *db, int n)
{
int p;
Dir d;
Fsdev *mp;
Qid q;
Tree *t;
dprint("mstat %llux\n", c->qid.path);
rlock(&lck);
if(waserror()){
runlock(&lck);
nexterror();
}
p = c->qid.path;
memset(&d, 0, sizeof d);
switch(p){
case Qtop:
devdir(c, tqid, "#k", 0, eve, 0775, &d);
break;
case Qctl:
devdir(c, cqid, "ctl", 0, eve, 0664, &d);
break;
default:
t = gettree(path2treeno(p), Mustexist);
if(c->qid.type & QTDIR)
devdir(c, c->qid, t->name, 0, eve, 0775, &d);
else{
mp = getdev(t, path2devno(p) - Qfirst, Mustexist);
q = c->qid;
q.vers = mp->vers;
devdir(c, q, mp->name, mp->size, eve, 0664, &d);
}
}
n = convD2M(&d, db, n);
if (n == 0)
error(Ebadarg);
poperror();
runlock(&lck);
return n;
}
static Chan*
mopen(Chan *c, int omode)
{
int q;
Fsdev *mp;
dprint("mopen %llux\n", c->qid.path);
if((c->qid.type & QTDIR) && omode != OREAD)
error(Eperm);
if(c->qid.path != Qctl && (c->qid.type&QTDIR) == 0){
rlock(&lck);
if(waserror()){
runlock(&lck);
nexterror();
}
q = c->qid.path;
mp = path2dev(q);
if(mp->gone)
error(Egone);
incref(mp);
poperror();
runlock(&lck);
}
/*
* Our mgen does not return the info for the qid
* but only for its children. Don't use devopen here.
*/
c->offset = 0;
c->mode = openmode(omode);
c->flag |= COPEN;
return c;
}
static void
mclose(Chan *c)
{
int mustdel, q;
Fsdev *mp;
dprint("mclose %llux\n", c->qid.path);
if(c->qid.type & QTDIR || !(c->flag & COPEN))
return;
rlock(&lck);
if(waserror()){
runlock(&lck);
nexterror();
}
mustdel = 0;
mp = nil;
q = c->qid.path;
if(q == Qctl){
free(disk);
disk = nil; /* restore defaults */
free(source);
source = nil;
sectorsz = Sectorsz;
}else{
mp = path2dev(q);
if(mp->gone != 0 && mp->ref == 1)
mustdel = 1;
else
decref(mp);
}
poperror();
runlock(&lck);
if(mustdel)
mdeldev(mp);
}
static long
io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
{
long wl;
Chan *mc;
mc = in->idev;
if(mc == nil)
error(Egone);
if (waserror()) {
print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
in->iname, off, l, mp->name, (isread? "read": "write"),
(up && up->errstr? up->errstr: ""));
nexterror();
}
if (isread)
wl = devtab[mc->type]->read(mc, a, l, off);
else
wl = devtab[mc->type]->write(mc, a, l, off);
poperror();
return wl;
}
static long
cryptio(Fsdev *mp, int isread, uchar *a, long n, vlong off)
{
long l, m, o, nb;
uchar *b;
if((((ulong)off|n) & (Cryptsectsz-1)))
error(Ebadarg);
if(isread){
l = io(mp, mp->inner[0], Isread, a, n, off);
if(l > 0){
l &= ~(Cryptsectsz-1);
for(o=0; o<l; o+=Cryptsectsz)
aes_xts_decrypt(&mp->key->tweak, &mp->key->ecb,
off+o, a+o, a+o, Cryptsectsz);
}
return l;
}
nb = n < SDmaxio ? n : SDmaxio;
while((b = sdmalloc(nb)) == nil){
if(!waserror()){
resrcwait("no memory for cryptio");
poperror();
}
}
if(waserror()) {
sdfree(b);
nexterror();
}
for(l = 0; (m = n - l) > 0; l += m){
if(m > nb) m = nb;
for(o=0; o<m; o+=Cryptsectsz)
aes_xts_encrypt(&mp->key->tweak, &mp->key->ecb,
off+o, a+o, b+o, Cryptsectsz);
if(io(mp, mp->inner[0], Iswrite, b, m, off) != m)
error(Eio);
off += m;
a += m;
}
sdfree(b);
poperror();
return l;
}
/* NB: a transfer could span multiple inner devices */
static long
catio(Fsdev *mp, int isread, void *a, long n, vlong off)
{
int i;
long l, res;
Inner *in;
if(debug)
print("catio %d %p %ld %lld\n", isread, a, n, off);
res = n;
for (i = 0; n > 0 && i < mp->ndevs; i++){
in = mp->inner[i];
if (off >= in->isize){
off -= in->isize;
continue; /* not there yet */
}
if (off + n > in->isize)
l = in->isize - off;
else
l = n;
if(debug)
print("\tdev %d %p %ld %lld\n", i, a, l, off);
if (io(mp, in, isread, a, l, off) != l)
error(Eio);
a = (char*)a + l;
off = 0;
n -= l;
}
if(debug)
print("\tres %ld\n", res - n);
return res - n;
}
static long
interio(Fsdev *mp, int isread, void *a, long n, vlong off)
{
int i;
long boff, res, l, wl, wsz;
vlong woff, blk, mblk;
blk = off / Blksize;
boff = off % Blksize;
wsz = Blksize - boff;
res = n;
while(n > 0){
mblk = blk / mp->ndevs;
i = blk % mp->ndevs;
woff = mblk*Blksize + boff;
if (n > wsz)
l = wsz;
else
l = n;
wl = io(mp, mp->inner[i], isread, a, l, woff);
if (wl != l)
error(Eio);
blk++;
boff = 0;
wsz = Blksize;
a = (char*)a + l;
n -= l;
}
return res;
}
static long
mread(Chan *c, void *a, long n, vlong off)
{
int i, retry;
long l, res;
Fsdev *mp;
Tree *t;
dprint("mread %llux\n", c->qid.path);
rlock(&lck);
if(waserror()){
runlock(&lck);
nexterror();
}
res = -1;
if(c->qid.type & QTDIR){
res = devdirread(c, a, n, 0, 0, mgen);
goto Done;
}
if(c->qid.path == Qctl){
res = readstr((long)off, a, n, confstr);
goto Done;
}
t = gettree(path2treeno(c->qid.path), Mustexist);
mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
if(off >= mp->size){
res = 0;
goto Done;
}
if(off + n > mp->size)
n = mp->size - off;
if(n == 0){
res = 0;
goto Done;
}
switch(mp->type){
case Fcat:
res = catio(mp, Isread, a, n, off);
break;
case Finter:
res = interio(mp, Isread, a, n, off);
break;
case Fpart:
res = io(mp, mp->inner[0], Isread, a, n, mp->start + off);
break;
case Fmirror:
retry = 0;
do {
if (retry > 0) {
print("#k/%s: retry %d read for byte %,lld "
"count %ld: %s\n", mp->name, retry, off,
n, (up && up->errstr? up->errstr: ""));
/*
* pause before retrying in case it's due to
* a transient bus or controller problem.
*/
tsleep(&up->sleep, return0, 0, Retrypause);
}
for (i = 0; i < mp->ndevs; i++){
if (waserror())
continue;
l = io(mp, mp->inner[i], Isread, a, n, off);
poperror();
if (l >= 0){
res = l;
break; /* read a good copy */
}
}
} while (i == mp->ndevs && ++retry <= Maxretries);
if (retry > Maxretries) {
/* no mirror had a good copy of the block */
print("#k/%s: byte %,lld count %ld: CAN'T READ "
"from mirror: %s\n", mp->name, off, n,
(up && up->errstr? up->errstr: ""));
error(Eio);
} else if (retry > 0)
print("#k/%s: byte %,lld count %ld: retry read OK "
"from mirror: %s\n", mp->name, off, n,
(up && up->errstr? up->errstr: ""));
break;
case Fcrypt:
res = cryptio(mp, Isread, a, n, mp->start + off);
break;
}
Done:
poperror();
runlock(&lck);
return res;
}
static long
mwrite(Chan *c, void *a, long n, vlong off)
{
int i, allbad, anybad, retry;
long l, res;
Fsdev *mp;
Tree *t;
dprint("mwrite %llux\n", c->qid.path);
if (c->qid.type & QTDIR)
error(Eisdir);
if (c->qid.path == Qctl){
mconfig(a, n);
return n;
}
rlock(&lck);
if(waserror()){
runlock(&lck);
nexterror();
}
t = gettree(path2treeno(c->qid.path), Mustexist);
mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
if(off >= mp->size){
res = 0;
goto Done;
}
if(off + n > mp->size)
n = mp->size - off;
if(n == 0){
res = 0;
goto Done;
}
res = n;
switch(mp->type){
case Fcat:
res = catio(mp, Iswrite, a, n, off);
break;
case Finter:
res = interio(mp, Iswrite, a, n, off);
break;
case Fpart:
res = io(mp, mp->inner[0], Iswrite, a, n, mp->start + off);
if (res != n)
error(Eio);
break;
case Fmirror:
retry = 0;
do {
if (retry > 0) {
print("#k/%s: retry %d write for byte %,lld "
"count %ld: %s\n", mp->name, retry, off,
n, (up && up->errstr? up->errstr: ""));
/*
* pause before retrying in case it's due to
* a transient bus or controller problem.
*/
tsleep(&up->sleep, return0, 0, Retrypause);
}
allbad = 1;
anybad = 0;
for (i = mp->ndevs - 1; i >= 0; i--){
if (waserror()) {
anybad = 1;
continue;
}
l = io(mp, mp->inner[i], Iswrite, a, n, off);
poperror();
if (l == n)
allbad = 0; /* wrote a good copy */
else
anybad = 1;
}
} while (anybad && ++retry <= Maxretries);
if (allbad) {
/* no mirror took a good copy of the block */
print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
"to mirror: %s\n", mp->name, off, n,
(up && up->errstr? up->errstr: ""));
error(Eio);
} else if (retry > 0)
print("#k/%s: byte %,lld count %ld: retry wrote OK "
"to mirror: %s\n", mp->name, off, n,
(up && up->errstr? up->errstr: ""));
break;
case Fcrypt:
res = cryptio(mp, Iswrite, a, n, mp->start + off);
break;
}
Done:
poperror();
runlock(&lck);
return res;
}
Dev fsdevtab = {
'k',
"fs",
devreset,
devinit,
devshutdown,
mattach,
mwalk,
mstat,
mopen,
devcreate,
mclose,
mread,
devbread,
mwrite,
devbwrite,
devremove,
devwstat,
devpower,
devconfig,
};