From 64576a1c7e52221e815a634f620630030a9a6de1 Mon Sep 17 00:00:00 2001 From: Guillaume Dott Date: Wed, 23 Feb 2022 17:12:31 +0100 Subject: [PATCH] Add PDF::Reader::Page#find_text method --- .gitignore | 1 + README.md | 2 +- lib/pdf/reader/find_text.rb | 16 +++++++++++----- lib/pdf/reader/find_text/version.rb | 4 ++-- pdf-reader-find_text.gemspec | 4 ++-- test/pdf/reader/find_text_test.rb | 4 ++-- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 9106b2a..dc96cb9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ /pkg/ /spec/reports/ /tmp/ +/Gemfile.lock diff --git a/README.md b/README.md index 673179d..7a5b65e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Pdf::Reader::FindText +# PDF::Reader::FindText Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/pdf/reader/find_text`. To experiment with that code, run `bin/console` for an interactive prompt. diff --git a/lib/pdf/reader/find_text.rb b/lib/pdf/reader/find_text.rb index 3e52fba..00ebc0e 100644 --- a/lib/pdf/reader/find_text.rb +++ b/lib/pdf/reader/find_text.rb @@ -1,12 +1,18 @@ # frozen_string_literal: true +require "pdf-reader" + require_relative "find_text/version" -module Pdf - module Reader - module FindText - class Error < StandardError; end - # Your code goes here... +module PDF::Reader::FindText + def find_text(text) + runs(merge: false).each_cons(text.size).select do |r| + r.map(&:text).join == text + end.map do |r| + PDF::Reader::TextRun.new r.first.x, r.first.y, + r.sum(&:width), r.map(&:font_size).max, r.map(&:text).join end end end + +PDF::Reader::Page.include PDF::Reader::FindText diff --git a/lib/pdf/reader/find_text/version.rb b/lib/pdf/reader/find_text/version.rb index e34990e..80f21bf 100644 --- a/lib/pdf/reader/find_text/version.rb +++ b/lib/pdf/reader/find_text/version.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -module Pdf - module Reader +module PDF + class Reader module FindText VERSION = "0.1.0" end diff --git a/pdf-reader-find_text.gemspec b/pdf-reader-find_text.gemspec index c327931..6f48e92 100644 --- a/pdf-reader-find_text.gemspec +++ b/pdf-reader-find_text.gemspec @@ -4,7 +4,7 @@ require_relative "lib/pdf/reader/find_text/version" Gem::Specification.new do |spec| spec.name = "pdf-reader-find_text" - spec.version = Pdf::Reader::FindText::VERSION + spec.version = PDF::Reader::FindText::VERSION spec.authors = ["Guillaume Dott"] spec.email = ["guillaume+github@dott.fr"] @@ -31,7 +31,7 @@ Gem::Specification.new do |spec| spec.require_paths = ["lib"] # Uncomment to register a new dependency of your gem - # spec.add_dependency "example-gem", "~> 1.0" + spec.add_dependency "pdf-reader", "~> 2.9.2" # For more information and examples about making a new gem, checkout our # guide at: https://bundler.io/guides/creating_gem.html diff --git a/test/pdf/reader/find_text_test.rb b/test/pdf/reader/find_text_test.rb index 14f143a..2d1ca5b 100644 --- a/test/pdf/reader/find_text_test.rb +++ b/test/pdf/reader/find_text_test.rb @@ -2,9 +2,9 @@ require "test_helper" -class Pdf::Reader::FindTextTest < Minitest::Test +class PDF::Reader::FindTextTest < Minitest::Test def test_that_it_has_a_version_number - refute_nil ::Pdf::Reader::FindText::VERSION + refute_nil ::PDF::Reader::FindText::VERSION end def test_it_does_something_useful