pdf-reader-find_text/lib/pdf/reader/find_text.rb

29 lines
631 B
Ruby
Raw Normal View History

2022-02-23 16:55:46 +01:00
# frozen_string_literal: true
2022-02-23 17:12:31 +01:00
require "pdf-reader"
2022-02-23 16:55:46 +01:00
require_relative "find_text/version"
2022-02-23 17:12:31 +01:00
module PDF::Reader::FindText
2022-02-24 10:00:10 +01:00
def find_text(value)
runs(merge: false).each_cons(value.tr(' ', '').size).map do |chars|
string = merge_runs_with_max_length(chars, value.size)
string if string.text[0, value.size] == value
end.compact
end
private
2022-02-24 10:00:10 +01:00
def merge_runs_with_max_length(chars, length)
chars.inject do |string, char|
if string.mergable?(char) && string.text.size < length
string + char
else
string
end
2022-02-23 16:55:46 +01:00
end
end
end
2022-02-23 17:12:31 +01:00
PDF::Reader::Page.include PDF::Reader::FindText