Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ jobs:
# The GitHub runner user needs explicit permissions to access /dev/kvm
sudo chmod 666 /dev/kvm
- name: Run E2E tests
run: python3 tests/run_e2e_tests.py
run: ./tests/run_e2e_tests.py
- name: Upload artifacts on failure
if: failure()
uses: actions/upload-artifact@v7
Expand Down
2 changes: 1 addition & 1 deletion src/fitz_document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ const Document::OutlineItem* FitzDocument::GetOutline() {
if (outline_ptr.get() == nullptr) {
return nullptr;
}
FitzOutlineItem* root = FitzOutlineItem::Build(_fz_ctx, outline_ptr.get());
FitzOutlineItem* root = FitzOutlineItem::Build(_fz_ctx, _fz_doc, outline_ptr.get());
return root;
}

Expand Down
22 changes: 13 additions & 9 deletions src/fitz_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,41 +45,41 @@ const char* const DEFAULT_ROOT_OUTLINE_ITEM_TITLE = "TABLE OF CONTENTS";

FitzOutlineItem::~FitzOutlineItem() {}

FitzOutlineItem::FitzOutlineItem(fz_outline* src) {
FitzOutlineItem::FitzOutlineItem(fz_context* ctx, fz_document* doc, fz_outline* src) {
if (src == nullptr) {
_dest_page = -1;
} else {
_title = src->title;
_dest_page = src->page;
_dest_page = fz_page_number_from_location(ctx, doc, src->page);
}
}

int FitzOutlineItem::GetDestPage() const { return _dest_page; }

FitzOutlineItem* FitzOutlineItem::Build(fz_context* ctx, fz_outline* src) {
FitzOutlineItem* FitzOutlineItem::Build(fz_context* ctx, fz_document* doc, fz_outline* src) {
FitzOutlineItem* root = nullptr;
std::vector<std::unique_ptr<OutlineItem>> items;
BuildRecursive(src, &items);
BuildRecursive(ctx, doc, src, &items);
if (items.empty()) {
return nullptr;
} else if (items.size() == 1) {
root = dynamic_cast<FitzOutlineItem*>(items[0].release());
} else {
root = new FitzOutlineItem(nullptr);
root = new FitzOutlineItem(ctx, doc, nullptr);
root->_title = DEFAULT_ROOT_OUTLINE_ITEM_TITLE;
root->_children.swap(items);
}
return root;
}

void FitzOutlineItem::BuildRecursive(
fz_outline* src,
fz_context* ctx, fz_document* doc, fz_outline* src,
std::vector<std::unique_ptr<Document::OutlineItem>>* output) {
assert(output != nullptr);
for (fz_outline* i = src; i != nullptr; i = i->next) {
FitzOutlineItem* item = new FitzOutlineItem(i);
FitzOutlineItem* item = new FitzOutlineItem(ctx, doc, i);
if (i->down != nullptr) {
BuildRecursive(i->down, &(item->_children));
BuildRecursive(ctx, doc, i->down, &(item->_children));
}
output->push_back(std::unique_ptr<Document::OutlineItem>(item));
}
Expand All @@ -89,7 +89,11 @@ std::string GetPageText(fz_context* ctx, fz_page* page_struct, int line_sep) {
// 1. Render page.
fz_stext_options stext_options = {0};
FitzStextPageScopedPtr text_page(
ctx, fz_new_stext_page_from_page(ctx, page_struct, &stext_options));
ctx, fz_new_stext_page(ctx, fz_bound_page(ctx, page_struct)));
FitzDeviceScopedPtr dev(
ctx, fz_new_stext_device(ctx, text_page.get(), &stext_options));
fz_run_page(ctx, page_struct, dev.get(), fz_identity, nullptr);
fz_close_device(ctx, dev.get());

// 2. Build text.
std::string r;
Expand Down
7 changes: 4 additions & 3 deletions src/fitz_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,17 @@ class FitzOutlineItem : public Document::OutlineItem {
int GetDestPage() const;
// Factory method to create outline items from a fz_outline. This constructs
// the entire outline hierarchy. Does NOT take ownership. NOT thread-safe.
static FitzOutlineItem* Build(fz_context* ctx, fz_outline* src);
static FitzOutlineItem* Build(fz_context* ctx, fz_document* doc, fz_outline* src);

private:
// Destination page number.
int _dest_page;
// We disallow constructors; use the factory method Build() instead.
explicit FitzOutlineItem(fz_outline* src);
explicit FitzOutlineItem(fz_context* ctx, fz_document* doc, fz_outline* src);
// Recursive construction, called by Build().
static void BuildRecursive(
fz_outline* src, std::vector<std::unique_ptr<OutlineItem>>* output);
fz_context* ctx, fz_document* doc, fz_outline* src,
std::vector<std::unique_ptr<OutlineItem>>* output);
};

// Returns the text content of a page, using line_sep to separate lines. NOT
Expand Down
109 changes: 25 additions & 84 deletions src/pdf_document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ void PDFDocument::Render(

const Document::OutlineItem* PDFDocument::GetOutline() {
fz_outline* src = pdf_load_outline(_fz_context, _pdf_document);
return (src == nullptr) ? nullptr : PDFOutlineItem::Build(_fz_context, src);
return (src == nullptr) ? nullptr : PDFOutlineItem::Build(_fz_context, &_pdf_document->super, src);
}

int PDFDocument::Lookup(const OutlineItem* item) {
Expand Down Expand Up @@ -267,41 +267,17 @@ std::string PDFDocument::GetPageText(int page, int line_sep) {
// 1. Init MuPDF structures.
pdf_page* page_struct = GetPage(page);

#if MUPDF_VERSION < 10012
fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context);
#endif

// 2. Render page.
#if MUPDF_VERSION >= 10012
// 2. Render page into stext.
fz_stext_options stext_options = {0};
// See #elif MUPDF_VERSION >= 10009 block below.
fz_stext_page* text_page = fz_new_stext_page_from_page(
_fz_context, &(page_struct->super), &stext_options);
#elif MUPDF_VERSION >= 10010
fz_stext_options stext_options = {0};
// See #elif MUPDF_VERSION >= 10009 block below.
fz_stext_page* text_page = fz_new_stext_page_from_page(
_fz_context, &(page_struct->super), text_sheet, &stext_options);
#elif MUPDF_VERSION >= 10009
// The function below is a wrapper around fz_run_page that uses a fresh
// device. We can't use pdf_run_page to gather the text for us.
// These notes are also left in here in case MuPDF's API changes again.
fz_stext_page* text_page = fz_new_stext_page_from_page(
_fz_context, &(page_struct->super), text_sheet);
#else
fz_stext_page* text_page = fz_new_text_page(_fz_context);
fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page);
// I've no idea what fz_{begin,end}_page do, but without them pdf_run_page
// segfaults :-/
fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity);
pdf_run_page(
_fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr);
fz_end_page(_fz_context, dev);
#endif
fz_stext_page* text_page = fz_new_stext_page(
_fz_context, fz_bound_page(_fz_context, &(page_struct->super)));
fz_device* dev = fz_new_stext_device(_fz_context, text_page, &stext_options);
fz_run_page(_fz_context, &(page_struct->super), dev, fz_identity, nullptr);
fz_close_device(_fz_context, dev);
fz_drop_device(_fz_context, dev);

// 3. Build text.
std::string r;
#if MUPDF_VERSION >= 10012
for (fz_stext_block* text_block = text_page->first_block;
text_block != nullptr; text_block = text_block->next) {
if (text_block->type != FZ_STEXT_BLOCK_TEXT) {
Expand All @@ -311,33 +287,14 @@ std::string PDFDocument::GetPageText(int page, int line_sep) {
text_line != nullptr; text_line = text_line->next) {
for (fz_stext_char* text_char = text_line->first_char;
text_char != nullptr; text_char = text_char->next) {
{
const int c = text_char->c;
#else
for (fz_page_block* page_block = text_page->blocks;
page_block < text_page->blocks + text_page->len; ++page_block) {
assert(page_block != nullptr);
if (page_block->type != FZ_PAGE_BLOCK_TEXT) {
continue;
}
fz_stext_block* const text_block = page_block->u.text;
assert(text_block != nullptr);
for (fz_stext_line* text_line = text_block->lines;
text_line < text_block->lines + text_block->len; ++text_line) {
assert(text_line != nullptr);
for (fz_stext_span* text_span = text_line->first_span;
text_span != nullptr; text_span = text_span->next) {
for (int i = 0; i < text_span->len; ++i) {
const int c = text_span->text[i].c;
#endif
// A single UTF-8 character cannot take more than 4 bytes, but let's
// go for 8.
char buffer[8];
const int num_bytes = fz_runetochar(buffer, c);
assert(num_bytes <= static_cast<int>(sizeof(buffer)));
buffer[num_bytes] = '\0';
r += buffer;
}
const int c = text_char->c;
// A single UTF-8 character cannot take more than 4 bytes, but let's
// go for 8.
char buffer[8];
const int num_bytes = fz_runetochar(buffer, c);
assert(num_bytes <= static_cast<int>(sizeof(buffer)));
buffer[num_bytes] = '\0';
r += buffer;
}
if (!isspace(r.back())) {
r += line_sep;
Expand All @@ -347,56 +304,49 @@ std::string PDFDocument::GetPageText(int page, int line_sep) {

// 4. Clean up.
fz_drop_stext_page(_fz_context, text_page);
#if MUPDF_VERSION < 10012
fz_drop_stext_sheet(_fz_context, text_sheet);
#endif

return r;
}

PDFDocument::PDFOutlineItem::~PDFOutlineItem() {}

PDFDocument::PDFOutlineItem::PDFOutlineItem(fz_outline* src) {
PDFDocument::PDFOutlineItem::PDFOutlineItem(fz_context* ctx, fz_document* doc, fz_outline* src) {
if (src == nullptr) {
_dest_page = -1;
} else {
_title = src->title;
#if MUPDF_VERSION >= 10010
_dest_page = src->page;
#else
_dest_page = src->dest.ld.gotor.page;
#endif
_dest_page = fz_page_number_from_location(ctx, doc, src->page);
}
}

int PDFDocument::PDFOutlineItem::GetDestPage() const { return _dest_page; }

PDFDocument::PDFOutlineItem* PDFDocument::PDFOutlineItem::Build(
fz_context* ctx, fz_outline* src) {
fz_context* ctx, fz_document* doc, fz_outline* src) {
PDFOutlineItem* root = nullptr;
std::vector<std::unique_ptr<OutlineItem>> items;
BuildRecursive(src, &items);
BuildRecursive(ctx, doc, src, &items);
fz_drop_outline(ctx, src);
if (items.empty()) {
return nullptr;
} else if (items.size() == 1) {
root = dynamic_cast<PDFOutlineItem*>(items[0].release());
} else {
root = new PDFOutlineItem(nullptr);
root = new PDFOutlineItem(ctx, doc, nullptr);
root->_title = DEFAULT_ROOT_OUTLINE_ITEM_TITLE;
root->_children.swap(items);
}
return root;
}

void PDFDocument::PDFOutlineItem::BuildRecursive(
fz_outline* src,
fz_context* ctx, fz_document* doc, fz_outline* src,
std::vector<std::unique_ptr<Document::OutlineItem>>* output) {
assert(output != nullptr);
for (fz_outline* i = src; i != nullptr; i = i->next) {
PDFOutlineItem* item = new PDFOutlineItem(i);
PDFOutlineItem* item = new PDFOutlineItem(ctx, doc, i);
if (i->down != nullptr) {
BuildRecursive(i->down, &(item->_children));
BuildRecursive(ctx, doc, i->down, &(item->_children));
}
output->push_back(std::unique_ptr<Document::OutlineItem>(item));
}
Expand Down Expand Up @@ -441,17 +391,8 @@ fz_matrix PDFDocument::Transform(float zoom, int rotation) {
fz_irect PDFDocument::GetBoundingBox(
pdf_page* page_struct, const fz_matrix& m) {
assert(page_struct != nullptr);
#if MUPDF_VERSION >= 10014
return fz_round_rect(
fz_transform_rect(pdf_bound_page(_fz_context, page_struct), m));
#else
fz_rect bbox;
fz_irect ibbox;
return *fz_round_rect(
&ibbox,
fz_transform_rect(
pdf_bound_page(_fz_context, _pdf_document, page_struct, &bbox), &m));
#endif
fz_transform_rect(pdf_bound_page(_fz_context, page_struct, FZ_CROP_BOX), m));
}

#endif
Expand Down
7 changes: 4 additions & 3 deletions src/pdf_document.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,17 @@ class PDFDocument : public Document {
int GetDestPage() const;
// Factory method to create outline items from a fz_outline. This constructs
// the entire outline hierarchy. Takes ownership of src.
static PDFOutlineItem* Build(fz_context* ctx, fz_outline* src);
static PDFOutlineItem* Build(fz_context* ctx, fz_document* doc, fz_outline* src);

private:
// Destination page number.
int _dest_page;
// We disallow constructors; use the factory method Build() instead.
explicit PDFOutlineItem(fz_outline* src);
explicit PDFOutlineItem(fz_context* ctx, fz_document* doc, fz_outline* src);
// Recursive construction, called by Build().
static void BuildRecursive(
fz_outline* src, std::vector<std::unique_ptr<OutlineItem>>* output);
fz_context* ctx, fz_document* doc, fz_outline* src,
std::vector<std::unique_ptr<OutlineItem>>* output);
};

// Cache for pdf_page.
Expand Down
Binary file modified tests/qemu/testdata/outline_exit.ppm
Binary file not shown.
Binary file modified tests/qemu/testdata/outline_jump.ppm
Binary file not shown.
177 changes: 30 additions & 147 deletions tests/qemu/testdata/page1.ppm

Large diffs are not rendered by default.

513 changes: 252 additions & 261 deletions tests/qemu/testdata/page170.ppm

Large diffs are not rendered by default.

Binary file modified tests/qemu/testdata/rotate_left.ppm
Binary file not shown.
Binary file modified tests/qemu/testdata/rotate_right.ppm
Binary file not shown.
363 changes: 207 additions & 156 deletions tests/qemu/testdata/scroll_down.ppm

Large diffs are not rendered by default.

288 changes: 105 additions & 183 deletions tests/qemu/testdata/scroll_up.ppm

Large diffs are not rendered by default.

Binary file modified tests/qemu/testdata/search_jump.ppm
Binary file not shown.
Binary file modified tests/qemu/testdata/zoom_fit.ppm
Binary file not shown.
Loading