Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/mindee/dependencies.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
module Mindee
# Centralized check for optional heavy dependencies
module Dependencies
# Checks the presence of dependencies.
def self.check_all_dependencies
require 'origami'
require 'mini_magick'
Expand All @@ -12,16 +13,20 @@ def self.check_all_dependencies
false
end

# Memoized check.
@all_deps_available = check_all_dependencies

# Checks whether all dependencies are available.
def self.all_deps_available?
check_all_dependencies
end

# Raises an error if dependencies are not available.
def self.require_all_deps!
raise LoadError, MINDEE_DEPENDENCIES_LOAD_ERROR unless all_deps_available?
end

# Error message to display when dependencies are not available.
MINDEE_DEPENDENCIES_LOAD_ERROR = 'Attempted to load Mindee PDF/Image tools without required dependencies. ' \
"If you need to process local files, please replace the 'mindee-lite' gem " \
"with the standard 'mindee' gem in your Gemfile."
Expand Down
2 changes: 1 addition & 1 deletion lib/mindee/image/image_compressor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ module ImageCompressor
# @param image [MiniMagick::Image, StringIO] Input image.
# @param quality [Integer, nil] Quality of the final file.
# @param max_width [Integer, nil] Maximum width. If not specified, the horizontal ratio will remain the same.
# @param max_height [Integer] Maximum height. If not specified, the vertical ratio will remain the same.
# @param max_height [Integer, nil] Maximum height. If not specified, the vertical ratio will remain the same.
# @return [StringIO]
def self.compress_image(image, quality: 85, max_width: nil, max_height: nil)
processed_image = ImageUtils.to_image(image)
Expand Down
9 changes: 8 additions & 1 deletion lib/mindee/input/base_parameters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class BaseParameters
# @param [String] model_id ID of the model
# @param [String, nil] file_alias File alias, if applicable.
# @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
# @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Boolean, nil] close_file Whether to close the file after parsing.
def initialize(
model_id,
Expand All @@ -40,6 +40,13 @@ def initialize(
@close_file = close_file.nil? || close_file
end

# Sets polling options after normalizing hash inputs.
# @param [Hash, PollingOptions, nil] polling_options
# @return [PollingOptions]
def polling_options=(polling_options)
@polling_options = get_clean_polling_options(polling_options)
end

# @return [String] Slug for the endpoint.
def self.slug
if self == BaseParameters
Expand Down
6 changes: 4 additions & 2 deletions lib/mindee/input/sources/local_input_source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,14 @@ def fix_pdf!(maximum_offset: 500)
# @return [StringIO] The fixed stream.
# @raise [Mindee::Error::MindeePDFError]
def self.fix_pdf(stream, maximum_offset: 500)
out_stream = StringIO.new
out_stream = StringIO.new(''.b)
stream.gets('%PDF-')
raise Error::MindeePDFError if stream.eof? || stream.pos > maximum_offset

stream.pos = stream.pos - 5
out_stream << stream.read
out_stream.write(stream.read.to_s.b)
out_stream.rewind
out_stream
end

# Cuts a PDF file according to provided options.
Expand Down
14 changes: 11 additions & 3 deletions lib/mindee/v2/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,16 @@ def enqueue(
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
# The source of the input document (local file or URL).
# @param params [Hash, Input::BaseParameters] Parameters for the inference.
# @param polling_options [Hash, PollingOptions, nil] Parameters for polling.
# @return [Parsing::BaseResponse]
def enqueue_and_get_result(
product,
input_source,
params
params,
polling_options = nil
)
enqueue_response = enqueue(product, input_source, params)
normalized_params = normalize_parameters(product.params_type, params)
normalized_params = normalize_parameters(product.params_type, params, polling_options: polling_options)
normalized_params.validate_async_params

if enqueue_response.job.id.nil? || enqueue_response.job.id.empty?
Expand Down Expand Up @@ -121,8 +123,14 @@ def search_models(model_name, model_type)

# If needed, converts the parsing options provided as a hash into a proper BaseParameters subclass object.
# @param params [Hash, Class<BaseParameters>] Params.
# @param polling_options [Hash, PollingOptions, nil] Polling options.
# @return [BaseParameters]
def normalize_parameters(param_class, params)
def normalize_parameters(param_class, params, polling_options: nil)
if params.is_a?(Hash)
params[:polling_options] = polling_options if polling_options
elsif params.is_a?(Mindee::Input::BaseParameters) && !polling_options.nil?
params.polling_options = polling_options
end
return param_class.from_hash(params: params) if params.is_a?(Hash)

params
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def self.slug
# @param [String] model_id ID of the model
# @param [String, nil] file_alias File alias, if applicable.
# @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
# @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Boolean, nil] close_file Whether to close the file after parsing.
def initialize(
model_id,
Expand Down
2 changes: 1 addition & 1 deletion lib/mindee/v2/product/crop/params/crop_parameters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def self.slug
# @param [String] model_id ID of the model
# @param [String, nil] file_alias File alias, if applicable.
# @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
# @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Boolean, nil] close_file Whether to close the file after parsing.
def initialize(
model_id,
Expand Down
2 changes: 1 addition & 1 deletion lib/mindee/v2/product/extraction/params/data_schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class DataSchema
# @return [Mindee::V2::Product::Extraction::Params::DataSchemaReplace]
attr_reader :replace

# @param data_schema [Hash, String]
# @param data_schema [Hash, DataSchema, String]
def initialize(data_schema)
case data_schema
when String
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def self.slug
# @param [String, nil] file_alias File alias, if applicable.
# @param [Array<String>, nil] webhook_ids
# @param [String, nil] text_context
# @param [Hash, nil] polling_options
# @param [Hash, PollingOptions, nil] polling_options
# @param [Boolean, nil] close_file
# @param [DataSchemaField, String, Hash nil] data_schema
def initialize(
Expand Down
2 changes: 1 addition & 1 deletion lib/mindee/v2/product/ocr/params/ocr_parameters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def self.slug
# @param [String] model_id ID of the model
# @param [String, nil] file_alias File alias, if applicable.
# @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
# @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Boolean, nil] close_file Whether to close the file after parsing.
def initialize(
model_id,
Expand Down
2 changes: 1 addition & 1 deletion lib/mindee/v2/product/split/params/split_parameters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def self.slug
# @param [String] model_id ID of the model
# @param [String, nil] file_alias File alias, if applicable.
# @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
# @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Hash, PollingOptions, nil] polling_options Options for polling. Set only if having timeout issues.
# @param [Boolean, nil] close_file Whether to close the file after parsing.
def initialize(
model_id,
Expand Down
1 change: 1 addition & 0 deletions sig/mindee/input/base_parameters.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ module Mindee

def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]]
def validate_async_params: () -> void
def polling_options=: (Hash[Symbol | String, untyped] | PollingOptions?) -> PollingOptions

private

Expand Down
4 changes: 2 additions & 2 deletions sig/mindee/v2/client.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ module Mindee

def enqueue: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> V2::Parsing::JobResponse

def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> T
def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params, ?Hash[String | Symbol, untyped] | Input::PollingOptions?) -> T

def search_models: (String?, String?) -> Mindee::V2::Parsing::Search::SearchResponse

def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void

def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Input::BaseParameters
def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params, ?polling_options: Hash[String | Symbol, untyped] | Input::PollingOptions?) -> Input::BaseParameters
end
end
end
33 changes: 31 additions & 2 deletions spec/v2/client_v2_integration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@
polygon: false,
confidence: false,
file_alias: 'rb_integration_test',
polling_options: polling,
text_context: 'this is a test'
)

response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params)
response = client.enqueue_and_get_result(
Mindee::V2::Product::Extraction::Extraction,
input,
inference_params,
polling
)

expect(response).not_to be_nil
expect(response.inference).not_to be_nil
Expand Down Expand Up @@ -68,6 +72,31 @@
expect(result.fields).not_to be_nil
end

it 'parses with legacy polling options successfully' do
src_path = File.join(V1_PRODUCT_DATA_DIR, 'financial_document', 'default_sample.jpg')
input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'multipage_cut-2.pdf')

polling = Mindee::Input::PollingOptions.new(
initial_delay_sec: 3.0,
delay_sec: 1.5,
max_retries: 80
)

inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new(
model_id,
rag: false,
raw_text: true,
polygon: false,
confidence: false,
file_alias: 'rb_integration_test',
polling_options: polling,
text_context: 'this is a test'
)
response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params)
expect(response).not_to be_nil
expect(response.inference).not_to be_nil
end

it 'parses a filled single-page image successfully' do
src_path = File.join(V1_PRODUCT_DATA_DIR, 'financial_document', 'default_sample.jpg')
input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'default_sample.jpg')
Expand Down
Loading