diff --git a/docs/code_samples/default_v2.txt b/docs/code_samples/default_v2.txt index cb9873ba..366c53cf 100644 --- a/docs/code_samples/default_v2.txt +++ b/docs/code_samples/default_v2.txt @@ -33,7 +33,7 @@ input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_inference( input_source, - inference_params # Note: this parameter can also be provided as a Hash. + inference_params # This parameter can also be provided as a Hash. ) # Print a brief summary of the parsed data diff --git a/lib/mindee/parsing/v2/raw_text.rb b/lib/mindee/parsing/v2/raw_text.rb index 08cfec56..9fa829f8 100644 --- a/lib/mindee/parsing/v2/raw_text.rb +++ b/lib/mindee/parsing/v2/raw_text.rb @@ -15,6 +15,10 @@ def initialize(server_response) @pages.push RawTextPage.new(page) end end + + def to_s + "#{@pages.map(&:to_s).join("\n\n")}\n" + end end end end diff --git a/lib/mindee/parsing/v2/raw_text_page.rb b/lib/mindee/parsing/v2/raw_text_page.rb index ad004b81..3b952b62 100644 --- a/lib/mindee/parsing/v2/raw_text_page.rb +++ b/lib/mindee/parsing/v2/raw_text_page.rb @@ -5,13 +5,17 @@ module Parsing module V2 # Raw text extracted from a single page. class RawTextPage - # @return [Boolean] Text content of the page as a single string. '\n' is used to separate lines. + # @return [String] Text content of the page as a single string. '\n' is used to separate lines. attr_reader :content # @param server_response [Hash] Raw JSON parsed into a Hash. def initialize(server_response) @content = server_response['content'] end + + def to_s + @content + end end end end diff --git a/sig/mindee/parsing/v2/raw_text_page.rbs b/sig/mindee/parsing/v2/raw_text_page.rbs index a3579370..536d2c48 100644 --- a/sig/mindee/parsing/v2/raw_text_page.rbs +++ b/sig/mindee/parsing/v2/raw_text_page.rbs @@ -2,7 +2,7 @@ module Mindee module Parsing module V2 class RawTextPage - attr_reader content: string + attr_reader content: String def initialize: (Hash[String | Symbol, untyped]) -> void end diff --git a/spec/parsing/v2/inference_spec.rb b/spec/parsing/v2/inference_spec.rb index 9738ce1c..79f656bc 100644 --- a/spec/parsing/v2/inference_spec.rb +++ b/spec/parsing/v2/inference_spec.rb @@ -10,7 +10,8 @@ let(:standard_field_path) { File.join(inference_path, 'standard_field_types.json') } let(:standard_field_rst_path) { File.join(inference_path, 'standard_field_types.rst') } let(:location_field_path) { File.join(findoc_path, 'complete_with_coordinates.json') } - let(:raw_text_path) { File.join(inference_path, 'raw_texts.json') } + let(:raw_text_json_path) { File.join(inference_path, 'raw_texts.json') } + let(:raw_text_str_path) { File.join(inference_path, 'raw_texts.txt') } let(:blank_path) { File.join(findoc_path, 'blank.json') } let(:complete_path) { File.join(findoc_path, 'complete.json') } @@ -268,7 +269,7 @@ def load_standard_fields describe 'raw_text' do it 'exposes raw texts' do - response = load_v2_inference(raw_text_path) + response = load_v2_inference(raw_text_json_path) active_options = response.inference.active_options expect(active_options).not_to be_nil @@ -278,10 +279,16 @@ def load_standard_fields expect(raw_text).not_to be_nil expect(raw_text).to be_a(Mindee::Parsing::V2::RawText) + expect(raw_text.to_s).to eq(File.read(raw_text_str_path, encoding: 'UTF-8')) + expect(raw_text.pages.length).to eq(2) first = raw_text.pages.first expect(first).to be_a(Mindee::Parsing::V2::RawTextPage) expect(first.content).to eq('This is the raw text of the first page...') + + raw_text.pages.each do |page| + expect(page.content).to be_a(String) + end end end