From 355813462f1cf6c76f246034ed33e3148816e5e6 Mon Sep 17 00:00:00 2001 From: Guillaume Dott Date: Thu, 24 Feb 2022 10:00:10 +0100 Subject: [PATCH] Handle strings with spaces --- lib/pdf/reader/find_text.rb | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/pdf/reader/find_text.rb b/lib/pdf/reader/find_text.rb index ea928f9..1876f2b 100644 --- a/lib/pdf/reader/find_text.rb +++ b/lib/pdf/reader/find_text.rb @@ -5,14 +5,22 @@ require "pdf-reader" require_relative "find_text/version" module PDF::Reader::FindText - def find_text(text) - text = text.tr(' ', '') + def find_text(value) + runs(merge: false).each_cons(value.tr(' ', '').size).map do |chars| + string = merge_runs_with_max_length(chars, value.size) + string if string.text[0, value.size] == value + end.compact + end - runs(merge: false).each_cons(text.size).select do |r| - r.map(&:text).join == text - end.map do |r| - PDF::Reader::TextRun.new r.first.x, r.first.y, - r.sum(&:width), r.map(&:font_size).max, r.map(&:text).join + private + + def merge_runs_with_max_length(chars, length) + chars.inject do |string, char| + if string.mergable?(char) && string.text.size < length + string + char + else + string + end end end end