reactos/rosapps/smartpdf/fitz/mupdf/pdf_repair.c
Klemens Friedl 435a566751 SmartPDF - lightweight pdf viewer app for rosapps
* sumatrapdf - vendor import
* everything compiles (libjpeg, poppler, fitz, sumatrapdf)
* does NOT link

(remove the comment tags in the parent directory.rbuild file (rosapps dir) to build it)

svn path=/trunk/; revision=29295
2007-09-29 08:39:35 +00:00

305 lines
5.6 KiB
C

#include <fitz.h>
#include <mupdf.h>
/*
* open pdf and scan objects to reconstruct xref table
*/
struct entry
{
int oid;
int gen;
int ofs;
int stmofs;
int stmlen;
};
static fz_error *
parseobj(fz_stream *file, char *buf, int cap, int *stmofs, int *stmlen,
int *isroot, int *isinfo)
{
fz_error *error;
fz_obj *dict = nil;
fz_obj *length;
fz_obj *filter;
fz_obj *type;
int tok, len;
*stmlen = -1;
*isroot = 0;
*isinfo = 0;
tok = pdf_lex(file, buf, cap, &len);
if (tok == PDF_TODICT)
{
error = pdf_parsedict(&dict, file, buf, cap);
if (error)
return error;
}
if (fz_isdict(dict))
{
type = fz_dictgets(dict, "Type");
if (fz_isname(type) && !strcmp(fz_toname(type), "Catalog"))
*isroot = 1;
filter = fz_dictgets(dict, "Filter");
if (fz_isname(filter) && !strcmp(fz_toname(filter), "Standard"))
return fz_throw("cannot repair encrypted files");
if (fz_dictgets(dict, "Producer"))
if (fz_dictgets(dict, "Creator"))
if (fz_dictgets(dict, "Title"))
*isinfo = 1;
}
while ( tok != PDF_TSTREAM &&
tok != PDF_TENDOBJ &&
tok != PDF_TERROR &&
tok != PDF_TEOF )
tok = pdf_lex(file, buf, cap, &len);
if (tok == PDF_TSTREAM)
{
int c = fz_readbyte(file);
if (c == '\r') {
c = fz_peekbyte(file);
if (c == '\n')
fz_readbyte(file);
}
*stmofs = fz_tell(file);
length = fz_dictgets(dict, "Length");
if (fz_isint(length))
{
fz_seek(file, *stmofs + fz_toint(length), 0);
tok = pdf_lex(file, buf, cap, &len);
if (tok == PDF_TENDSTREAM)
goto atobjend;
fz_seek(file, *stmofs, 0);
}
fz_read(file, buf, 9);
while (memcmp(buf, "endstream", 9) != 0)
{
c = fz_readbyte(file);
if (c == EOF)
break;
memmove(buf, buf + 1, 8);
buf[8] = c;
}
*stmlen = fz_tell(file) - *stmofs - 9;
atobjend:
tok = pdf_lex(file, buf, cap, &len);
if (tok == PDF_TENDOBJ)
;
}
if (dict)
fz_dropobj(dict);
return nil;
}
fz_error *
pdf_repairxref(pdf_xref *xref, char *filename)
{
fz_error *error;
fz_stream *file;
struct entry *list = nil;
int listlen;
int listcap;
int maxoid = 0;
char buf[65536];
int oid = 0;
int gen = 0;
int tmpofs, oidofs = 0, genofs = 0;
int isroot, rootoid = 0, rootgen = 0;
int isinfo, infooid = 0, infogen = 0;
int stmofs, stmlen;
int tok, len;
int next;
int i;
error = fz_openrfile(&file, filename);
if (error)
return error;
pdf_logxref("repairxref '%s' %p\n", filename, xref);
xref->file = file;
/* TODO: extract version */
listlen = 0;
listcap = 1024;
list = fz_malloc(listcap * sizeof(struct entry));
if (!list)
goto cleanup;
while (1)
{
tmpofs = fz_tell(file);
tok = pdf_lex(file, buf, sizeof buf, &len);
if (tok == PDF_TINT)
{
oidofs = genofs;
oid = gen;
genofs = tmpofs;
gen = atoi(buf);
}
if (tok == PDF_TOBJ)
{
error = parseobj(file, buf, sizeof buf, &stmofs, &stmlen, &isroot, &isinfo);
if (error)
goto cleanup;
if (isroot) {
pdf_logxref("found catalog: %d %d\n", oid, gen);
rootoid = oid;
rootgen = gen;
}
if (isinfo) {
pdf_logxref("found info: %d %d\n", oid, gen);
infooid = oid;
infogen = gen;
}
if (listlen + 1 == listcap)
{
struct entry *newlist;
listcap = listcap * 2;
newlist = fz_realloc(list, listcap * sizeof(struct entry));
if (!newlist) {
error = fz_outofmem;
goto cleanup;
}
list = newlist;
}
list[listlen].oid = oid;
list[listlen].gen = gen;
list[listlen].ofs = oidofs;
list[listlen].stmofs = stmofs;
list[listlen].stmlen = stmlen;
listlen ++;
if (oid > maxoid)
maxoid = oid;
}
if (tok == PDF_TERROR)
fz_readbyte(file);
if (tok == PDF_TEOF)
break;
}
if (rootoid == 0)
{
error = fz_throw("syntaxerror: could not find catalog");
goto cleanup;
}
error = fz_packobj(&xref->trailer,
"<< /Size %i /Root %r >>",
maxoid + 1, rootoid, rootgen);
if (error)
goto cleanup;
xref->len = maxoid + 1;
xref->cap = xref->len;
xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));
if (!xref->table)
{
error = fz_outofmem;
goto cleanup;
}
xref->table[0].type = 'f';
xref->table[0].mark = 0;
xref->table[0].ofs = 0;
xref->table[0].gen = 65535;
xref->table[0].stmbuf = nil;
xref->table[0].stmofs = 0;
xref->table[0].obj = nil;
for (i = 1; i < xref->len; i++)
{
xref->table[i].type = 'f';
xref->table[i].mark = 0;
xref->table[i].ofs = 0;
xref->table[i].gen = 0;
xref->table[i].stmbuf = nil;
xref->table[i].stmofs = 0;
xref->table[i].obj = nil;
}
for (i = 0; i < listlen; i++)
{
xref->table[list[i].oid].type = 'n';
xref->table[list[i].oid].ofs = list[i].ofs;
xref->table[list[i].oid].gen = list[i].gen;
xref->table[list[i].oid].mark = 0;
xref->table[list[i].oid].stmofs = list[i].stmofs;
/* corrected stream length */
if (list[i].stmlen >= 0)
{
fz_obj *dict, *length;
pdf_logxref("correct stream length %d %d = %d\n",
list[i].oid, list[i].gen, list[i].stmlen);
error = pdf_loadobject(&dict, xref, list[i].oid, list[i].gen);
if (error)
goto cleanup;
error = fz_newint(&length, list[i].stmlen);
if (error)
goto cleanup;
error = fz_dictputs(dict, "Length", length);
if (error)
goto cleanup;
pdf_updateobject(xref, list[i].oid, list[i].gen, dict);
fz_dropobj(dict);
}
}
next = 0;
for (i = xref->len - 1; i >= 0; i--)
{
if (xref->table[i].type == 'f')
{
xref->table[i].ofs = next;
if (xref->table[i].gen < 65535)
xref->table[i].gen ++;
next = i;
}
}
fz_free(list);
return nil;
cleanup:
assert(1 == file->refs);
fz_dropstream(file);
xref->file = NULL;
fz_free(list);
return error;
}