From f43b536d0c64057a031c63e6f5ea1bbdba65025a Mon Sep 17 00:00:00 2001 From: Guillaume Dott Date: Wed, 23 Feb 2022 17:24:13 +0100 Subject: [PATCH] Remove spaces from searched text PDF::Reader::PageTextReceiver removes spaces from runs. https://github.com/yob/pdf-reader/blob/v2.9.1/lib/pdf/reader/page_text_receiver.rb#L141 The easy solution for now is to remove spaces from searched strings. Another solution may be to create a custom PageTextReceiver to keep spaces. --- lib/pdf/reader/find_text.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/pdf/reader/find_text.rb b/lib/pdf/reader/find_text.rb index 00ebc0e..ea928f9 100644 --- a/lib/pdf/reader/find_text.rb +++ b/lib/pdf/reader/find_text.rb @@ -6,6 +6,8 @@ require_relative "find_text/version" module PDF::Reader::FindText def find_text(text) + text = text.tr(' ', '') + runs(merge: false).each_cons(text.size).select do |r| r.map(&:text).join == text end.map do |r|