pdf-reader-find_text/lib/pdf/reader/find_text.rb

21 lines
468 B
Ruby
Raw Normal View History

2022-02-23 16:55:46 +01:00
# frozen_string_literal: true
2022-02-23 17:12:31 +01:00
require "pdf-reader"
2022-02-23 16:55:46 +01:00
require_relative "find_text/version"
2022-02-23 17:12:31 +01:00
module PDF::Reader::FindText
def find_text(text)
text = text.tr(' ', '')
2022-02-23 17:12:31 +01:00
runs(merge: false).each_cons(text.size).select do |r|
r.map(&:text).join == text
end.map do |r|
PDF::Reader::TextRun.new r.first.x, r.first.y,
r.sum(&:width), r.map(&:font_size).max, r.map(&:text).join
2022-02-23 16:55:46 +01:00
end
end
end
2022-02-23 17:12:31 +01:00
PDF::Reader::Page.include PDF::Reader::FindText