Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/mupdf/pdf/document.h
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,7 @@ typedef struct
int do_use_objstms; /* Use objstms if possible */
int compression_effort; /* 0 for default. 100 = max, 1 = min. */
int do_labels; /* Add labels to each object showing how it can be reached from the Root. */
int do_strip_invisible_text; /* Strip invisible text (text render mode 3). */
} pdf_write_options;

FZ_DATA extern const pdf_write_options pdf_default_write_options;
Expand Down
1 change: 1 addition & 0 deletions include/mupdf/pdf/interpret.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ typedef struct
int (*text_filter)(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox);
void (*after_text_object)(fz_context *ctx, void *opaque, pdf_document *doc, pdf_processor *chain, fz_matrix ctm);
int (*culler)(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type);
int strip_invisible_text;
}
pdf_sanitize_filter_options;

Expand Down
6 changes: 5 additions & 1 deletion source/pdf/pdf-op-filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,11 @@ filter_show_char(fz_context *ctx, pdf_sanitize_processor *p, int cid, int *unico
}
*unicode = ucsbuf[0];

if (p->options->text_filter || p->options->culler)
if (p->options->strip_invisible_text && gstate->pending.text.render == 3)
{
remove = 1;
}
else if (p->options->text_filter || p->options->culler)
{
fz_matrix ctm;
fz_rect bbox;
Expand Down
15 changes: 11 additions & 4 deletions source/pdf/pdf-write.c
Original file line number Diff line number Diff line change
Expand Up @@ -1721,7 +1721,7 @@ static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_st
}
}

static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii, int newlines)
static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii, int newlines, int strip_invisible_text)
{
int n = pdf_count_pages(ctx, doc);
int i;
Expand All @@ -1733,7 +1733,9 @@ static void clean_content_streams(fz_context *ctx, pdf_document *doc, int saniti
options.recurse = 1;
options.ascii = ascii;
options.newlines = newlines;
options.filters = sanitize ? list : NULL;
options.filters = sanitize || strip_invisible_text ? list : NULL;
if (strip_invisible_text)
sopts.strip_invisible_text = 1;
list[0].filter = pdf_new_sanitize_filter;
list[0].options = &sopts;

Expand Down Expand Up @@ -1916,6 +1918,8 @@ pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *ar
opts->do_clean = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "sanitize", &val))
opts->do_sanitize = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "strip-invisible-text", &val))
opts->do_strip_invisible_text = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "incremental", &val))
opts->do_incremental = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "objstms", &val))
Expand Down Expand Up @@ -1990,12 +1994,12 @@ prepare_for_save(fz_context *ctx, pdf_document *doc, const pdf_write_options *in
fz_throw(ctx, FZ_ERROR_ARGUMENT, "annotations need resynthesis before saving");

/* Rewrite (and possibly sanitize) the operator streams */
if (in_opts->do_clean || in_opts->do_sanitize)
if (in_opts->do_clean || in_opts->do_sanitize || in_opts->do_strip_invisible_text)
{
pdf_begin_operation(ctx, doc, "Clean content streams");
fz_try(ctx)
{
clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii, in_opts->do_pretty);
clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii, in_opts->do_pretty, in_opts->do_strip_invisible_text);
pdf_end_operation(ctx, doc);
}
fz_catch(ctx)
Expand Down Expand Up @@ -2722,6 +2726,7 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, cons
in_opts->do_linear ||
in_opts->do_clean ||
in_opts->do_sanitize ||
in_opts->do_strip_invisible_text ||
in_opts->do_appearance ||
in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
Expand Down Expand Up @@ -2864,6 +2869,8 @@ pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const
ADD_OPT("linearize=yes");
if (opts->do_clean)
ADD_OPT("clean=yes");
if (opts->do_strip_invisible_text)
ADD_OPT("strip-invisible-text=yes");
if (opts->do_sanitize)
ADD_OPT("sanitize=yes");
if (opts->do_incremental)
Expand Down
4 changes: 3 additions & 1 deletion source/tools/pdfclean.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ static int usage(void)
"\t-i\tcompress image streams\n"
"\t-c\tclean content streams\n"
"\t-s\tsanitize content streams\n"
"\t-I\tstrip invisible text\n"
"\t-t\tcompact object syntax\n"
"\t-tt\tindented object syntax\n"
"\t-L\twrite object labels\n"
Expand Down Expand Up @@ -133,7 +134,7 @@ int pdfclean_main(int argc, char **argv)
opts.write = pdf_default_write_options;
opts.write.dont_regenerate_id = 1;

while ((c = fz_getopt_long(argc, argv, "ade:fgilmp:stczDAE:LO:U:P:SZ", longopts)) != -1)
while ((c = fz_getopt_long(argc, argv, "ade:fgilmp:stczDAE:ILO:U:P:SZ", longopts)) != -1)
{
switch (c)
{
Expand All @@ -149,6 +150,7 @@ int pdfclean_main(int argc, char **argv)
case 'l': opts.write.do_linear += 1; break;
case 'c': opts.write.do_clean += 1; break;
case 's': opts.write.do_sanitize += 1; break;
case 'I': opts.write.do_strip_invisible_text += 1; break;
case 't': pretty = (pretty < 0) ? 0 : 1; break;
case 'A': opts.write.do_appearance += 1; break;
case 'L': opts.write.do_labels = 1; break;
Expand Down