reactos/rosapps/smartpdf/fitz/mupdf/pdf_open.c
Daniel Reimer a7fddf9c07 Delete all Trailing spaces in code.
svn path=/trunk/; revision=29689
2007-10-19 23:05:02 +00:00

596 lines
12 KiB
C

#include <fitz.h>
#include <mupdf.h>
static inline int iswhite(int ch)
{
return ch == '\000' || ch == '\011' || ch == '\012' ||
ch == '\014' || ch == '\015' || ch == '\040';
}
/*
* magic version tag and startxref
*/
static fz_error *
loadversion(pdf_xref *xref)
{
char buf[20];
int n;
n = fz_seek(xref->file, 0, 0);
if (n < 0)
return fz_ioerror(xref->file);
fz_readline(xref->file, buf, sizeof buf);
if (memcmp(buf, "%PDF-", 5) != 0)
return fz_throw("syntaxerror: corrupt version marker");
xref->version = atof(buf + 5);
pdf_logxref("version %g\n", xref->version);
return nil;
}
static fz_error *
readstartxref(pdf_xref *xref)
{
char buf[1024];
int t, n;
int i;
t = fz_seek(xref->file, 0, 2);
if (t == -1)
return fz_ioerror(xref->file);
t = fz_seek(xref->file, MAX(0, t - ((int)sizeof buf)), 0);
if (t == -1)
return fz_ioerror(xref->file);
n = fz_read(xref->file, buf, sizeof buf);
if (n == -1)
return fz_ioerror(xref->file);
for (i = n - 9; i >= 0; i--)
{
if (memcmp(buf + i, "startxref", 9) == 0)
{
i += 9;
while (iswhite(buf[i]) && i < n)
i ++;
xref->startxref = atoi(buf + i);
return nil;
}
}
return fz_throw("syntaxerror: could not find startxref");
}
#define WHITE_SPACE_CHARS " \n\t\r"
static const char *str_find_char(const char *txt, char c)
{
while (*txt != c) {
if (0 == *txt)
return NULL;
++txt;
}
return txt;
}
static int str_contains(const char *str, char c)
{
const char *pos = str_find_char(str, c);
if (!pos)
return 0;
return 1;
}
static void str_strip_right(char *txt, const char *to_strip)
{
char * new_end;
char c;
if (!txt || !to_strip)
return;
if (0 == *txt)
return;
/* point at the last character in the string */
new_end = txt + strlen(txt) - 1;
for (;;) {
c = *new_end;
if (!str_contains(to_strip, c))
break;
if (txt == new_end)
break;
--new_end;
}
if (str_contains(to_strip, *new_end))
new_end[0] = 0;
else
new_end[1] = 0;
}
static void str_strip_ws_right(char *txt)
{
str_strip_right(txt, WHITE_SPACE_CHARS);
}
/*
* trailer dictionary
*/
static fz_error *
readoldtrailer(pdf_xref *xref, char *buf, int cap)
{
int ofs, len;
char *s;
int n;
int t;
int c;
pdf_logxref("load old xref format trailer\n");
fz_readline(xref->file, buf, cap);
str_strip_ws_right(buf);
if (strcmp(buf, "xref") != 0)
return fz_throw("ioerror: missing xref");
while (1)
{
c = fz_peekbyte(xref->file);
if (!(c >= '0' && c <= '9'))
break;
n = fz_readline(xref->file, buf, cap);
if (n < 0)
return fz_ioerror(xref->file);
s = buf;
ofs = atoi(strsep(&s, " "));
len = atoi(strsep(&s, " "));
/* broken pdfs where the section is not on a separate line */
if (s && *s != '\0')
fz_seek(xref->file, -(n + buf - s + 2), 1);
t = fz_tell(xref->file);
if (t < 0)
return fz_ioerror(xref->file);
n = fz_seek(xref->file, t + 20 * len, 0);
if (n < 0)
return fz_ioerror(xref->file);
}
t = pdf_lex(xref->file, buf, cap, &n);
if (t != PDF_TTRAILER)
return fz_throw("syntaxerror: expected trailer");
t = pdf_lex(xref->file, buf, cap, &n);
if (t != PDF_TODICT)
return fz_throw("syntaxerror: expected trailer dictionary");
return pdf_parsedict(&xref->trailer, xref->file, buf, cap);
}
static fz_error *
readnewtrailer(pdf_xref *xref, char *buf, int cap)
{
pdf_logxref("load new xref format trailer\n");
return pdf_parseindobj(&xref->trailer, xref->file, buf, cap, nil, nil, nil);
}
static fz_error *
readtrailer(pdf_xref *xref, char *buf, int cap)
{
int n;
int c;
n = fz_seek(xref->file, xref->startxref, 0);
if (n < 0)
return fz_ioerror(xref->file);
c = fz_peekbyte(xref->file);
if (c == 'x')
return readoldtrailer(xref, buf, cap);
else if (c >= '0' && c <= '9')
return readnewtrailer(xref, buf, cap);
return fz_throw("syntaxerror: could not find xref");
}
/*
* xref tables
*/
static fz_error *
readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
int ofs, len;
char *s;
int n;
int t;
int i;
int c;
pdf_logxref("load old xref format\n");
fz_readline(xref->file, buf, cap);
str_strip_ws_right(buf);
if (strcmp(buf, "xref") != 0)
return fz_throw("syntaxerror: expected xref");
while (1)
{
c = fz_peekbyte(xref->file);
if (!(c >= '0' && c <= '9'))
break;
n = fz_readline(xref->file, buf, cap);
if (n < 0)
return fz_ioerror(xref->file);
s = buf;
ofs = atoi(strsep(&s, " "));
len = atoi(strsep(&s, " "));
/* broken pdfs where the section is not on a separate line */
if (s && *s != '\0')
{
fz_warn("syntaxerror: broken xref section");
fz_seek(xref->file, -(n + buf - s + 2), 1);
}
for (i = 0; i < len; i++)
{
n = fz_read(xref->file, buf, 20);
if (n < 0)
return fz_ioerror(xref->file);
if (n != 20)
return fz_throw("syntaxerror: truncated xref table");
if (!xref->table[ofs + i].type)
{
s = buf;
xref->table[ofs + i].ofs = atoi(s);
xref->table[ofs + i].gen = atoi(s + 11);
xref->table[ofs + i].type = s[17];
}
}
}
t = pdf_lex(xref->file, buf, cap, &n);
if (t != PDF_TTRAILER)
return fz_throw("syntaxerror: expected trailer");
t = pdf_lex(xref->file, buf, cap, &n);
if (t != PDF_TODICT)
return fz_throw("syntaxerror: expected trailer dictionary");
return pdf_parsedict(trailerp, xref->file, buf, cap);
}
static fz_error *
readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
fz_error *error;
fz_stream *stm;
fz_obj *trailer;
fz_obj *obj;
int oid, gen, stmofs;
int size, w0, w1, w2, i0, i1;
int i, n;
pdf_logxref("load new xref format\n");
error = pdf_parseindobj(&trailer, xref->file, buf, cap, &oid, &gen, &stmofs);
if (error)
return error;
if (oid < 0 || oid >= xref->len) {
error = fz_throw("rangecheck: object id out of range");
goto cleanup;
}
xref->table[oid].type = 'n';
xref->table[oid].gen = gen;
xref->table[oid].obj = fz_keepobj(trailer);
xref->table[oid].stmofs = stmofs;
obj = fz_dictgets(trailer, "Size");
if (!obj) {
error = fz_throw("syntaxerror: xref stream missing Size entry");
goto cleanup;
}
size = fz_toint(obj);
obj = fz_dictgets(trailer, "W");
if (!obj) {
error = fz_throw("syntaxerror: xref stream missing W entry");
goto cleanup;
}
w0 = fz_toint(fz_arrayget(obj, 0));
w1 = fz_toint(fz_arrayget(obj, 1));
w2 = fz_toint(fz_arrayget(obj, 2));
obj = fz_dictgets(trailer, "Index");
if (obj) {
i0 = fz_toint(fz_arrayget(obj, 0));
i1 = fz_toint(fz_arrayget(obj, 1));
}
else {
i0 = 0;
i1 = size;
}
if (i0 < 0 || i1 > xref->len) {
error = fz_throw("syntaxerror: xref stream has too many entries");
goto cleanup;
}
error = pdf_openstream(&stm, xref, oid, gen);
if (error)
goto cleanup;
for (i = i0; i < i0 + i1; i++)
{
int a = 0;
int b = 0;
int c = 0;
if (fz_peekbyte(stm) == EOF)
{
error = fz_throw("syntaxerror: truncated xref stream");
fz_dropstream(stm);
goto cleanup;
}
for (n = 0; n < w0; n++)
a = (a << 8) + fz_readbyte(stm);
for (n = 0; n < w1; n++)
b = (b << 8) + fz_readbyte(stm);
for (n = 0; n < w2; n++)
c = (c << 8) + fz_readbyte(stm);
if (!xref->table[i].type)
{
int t = w0 ? a : 1;
xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
xref->table[i].ofs = w2 ? b : 0;
xref->table[i].gen = w1 ? c : 0;
}
}
fz_dropstream(stm);
*trailerp = trailer;
return nil;
cleanup:
fz_dropobj(trailer);
return error;
}
static fz_error *
readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
{
int n;
int c;
n = fz_seek(xref->file, ofs, 0);
if (n < 0)
return fz_ioerror(xref->file);
c = fz_peekbyte(xref->file);
if (c == 'x')
return readoldxref(trailerp, xref, buf, cap);
else if (c >= '0' && c <= '9')
return readnewxref(trailerp, xref, buf, cap);
return fz_throw("syntaxerror: expected xref");
}
static fz_error *
readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap)
{
fz_error *error;
fz_obj *trailer;
fz_obj *prev;
fz_obj *xrefstm;
error = readxref(&trailer, xref, ofs, buf, cap);
if (error)
return error;
/* FIXME: do we overwrite free entries properly? */
xrefstm = fz_dictgets(trailer, "XrefStm");
if (xrefstm)
{
pdf_logxref("load xrefstm\n");
error = readxrefsections(xref, fz_toint(xrefstm), buf, cap);
if (error)
goto cleanup;
}
prev = fz_dictgets(trailer, "Prev");
if (prev)
{
pdf_logxref("load prev\n");
error = readxrefsections(xref, fz_toint(prev), buf, cap);
if (error)
goto cleanup;
}
fz_dropobj(trailer);
return nil;
cleanup:
fz_dropobj(trailer);
return error;
}
/*
* compressed object streams
*/
fz_error *
pdf_loadobjstm(pdf_xref *xref, int oid, int gen, char *buf, int cap)
{
fz_error *error;
fz_stream *stm;
fz_obj *objstm;
int *oidbuf;
int *ofsbuf;
fz_obj *obj;
int first;
int count;
int i, n, t;
pdf_logxref("loadobjstm %d %d\n", oid, gen);
error = pdf_loadobject(&objstm, xref, oid, gen);
if (error)
return error;
count = fz_toint(fz_dictgets(objstm, "N"));
first = fz_toint(fz_dictgets(objstm, "First"));
pdf_logxref(" count %d\n", count);
oidbuf = fz_malloc(count * sizeof(int));
if (!oidbuf) { error = fz_outofmem; goto cleanupobj; }
ofsbuf = fz_malloc(count * sizeof(int));
if (!ofsbuf) { error = fz_outofmem; goto cleanupoid; }
error = pdf_openstream(&stm, xref, oid, gen);
if (error)
goto cleanupofs;
for (i = 0; i < count; i++)
{
t = pdf_lex(stm, buf, cap, &n);
if (t != PDF_TINT)
{
error = fz_throw("syntaxerror: corrupt object stream");
goto cleanupstm;
}
oidbuf[i] = atoi(buf);
t = pdf_lex(stm, buf, cap, &n);
if (t != PDF_TINT)
{
error = fz_throw("syntaxerror: corrupt object stream");
goto cleanupstm;
}
ofsbuf[i] = atoi(buf);
}
n = fz_seek(stm, first, 0);
if (n < 0)
{
error = fz_ioerror(stm);
goto cleanupstm;
}
for (i = 0; i < count; i++)
{
/* FIXME: seek to first + ofsbuf[i] */
error = pdf_parsestmobj(&obj, stm, buf, cap);
if (error)
goto cleanupstm;
if (oidbuf[i] < 1 || oidbuf[i] >= xref->len)
{
error = fz_throw("rangecheck: object number out of range");
goto cleanupstm;
}
if (xref->table[oidbuf[i]].obj)
fz_dropobj(xref->table[oidbuf[i]].obj);
xref->table[oidbuf[i]].obj = obj;
}
fz_dropstream(stm);
fz_free(ofsbuf);
fz_free(oidbuf);
fz_dropobj(objstm);
return nil;
cleanupstm:
fz_dropstream(stm);
cleanupofs:
fz_free(ofsbuf);
cleanupoid:
fz_free(oidbuf);
cleanupobj:
fz_dropobj(objstm);
return error;
}
/*
* open and load xref tables from pdf
*/
fz_error *
pdf_loadxref(pdf_xref *xref, char *filename)
{
fz_error *error;
fz_obj *size;
int i;
char buf[65536]; /* yeowch! */
pdf_logxref("loadxref '%s' %p\n", filename, xref);
error = fz_openrfile(&xref->file, filename);
if (error)
return error;
error = loadversion(xref);
if (error)
return error;
error = readstartxref(xref);
if (error)
return error;
error = readtrailer(xref, buf, sizeof buf);
if (error)
return error;
size = fz_dictgets(xref->trailer, "Size");
if (!size)
return fz_throw("syntaxerror: trailer missing Size entry");
pdf_logxref(" size %d\n", fz_toint(size));
assert(xref->table == nil);
xref->cap = fz_toint(size);
xref->len = fz_toint(size);
xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));
if (!xref->table)
return fz_outofmem;
for (i = 0; i < xref->len; i++)
{
xref->table[i].ofs = 0;
xref->table[i].gen = 0;
xref->table[i].type = 0;
xref->table[i].mark = 0;
xref->table[i].stmbuf = nil;
xref->table[i].stmofs = 0;
xref->table[i].obj = nil;
}
error = readxrefsections(xref, xref->startxref, buf, sizeof buf);
if (error)
return error;
return nil;
}