2022-02-23 16:55:46 +01:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2022-02-23 17:12:31 +01:00
|
|
|
require "pdf-reader"
|
|
|
|
|
2022-02-23 16:55:46 +01:00
|
|
|
require_relative "find_text/version"
|
|
|
|
|
2022-02-23 17:12:31 +01:00
|
|
|
module PDF::Reader::FindText
|
2022-02-24 10:00:10 +01:00
|
|
|
def find_text(value)
|
|
|
|
runs(merge: false).each_cons(value.tr(' ', '').size).map do |chars|
|
|
|
|
string = merge_runs_with_max_length(chars, value.size)
|
|
|
|
string if string.text[0, value.size] == value
|
|
|
|
end.compact
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
2022-02-23 17:24:13 +01:00
|
|
|
|
2022-02-24 10:00:10 +01:00
|
|
|
def merge_runs_with_max_length(chars, length)
|
|
|
|
chars.inject do |string, char|
|
|
|
|
if string.mergable?(char) && string.text.size < length
|
|
|
|
string + char
|
|
|
|
else
|
|
|
|
string
|
|
|
|
end
|
2022-02-23 16:55:46 +01:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2022-02-23 17:12:31 +01:00
|
|
|
|
|
|
|
PDF::Reader::Page.include PDF::Reader::FindText
|