diff --git a/mindee/error/mindee_http_error.py b/mindee/error/mindee_http_error.py index 40c4fbf5..89936a11 100644 --- a/mindee/error/mindee_http_error.py +++ b/mindee/error/mindee_http_error.py @@ -1,5 +1,5 @@ from mindee.error.mindee_error import MindeeError -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class MindeeHTTPError(RuntimeError): diff --git a/mindee/input/url_input_source.py b/mindee/input/url_input_source.py index 1f32baea..972cb75b 100644 --- a/mindee/input/url_input_source.py +++ b/mindee/input/url_input_source.py @@ -10,6 +10,7 @@ from mindee.error.mindee_error import MindeeSourceError from mindee.input.bytes_input import BytesInput from mindee.logger import logger +from mindee.parsing.common.string_dict import StringDict class URLInputSource: @@ -173,7 +174,14 @@ def __fill_filename(self, filename=None) -> str: return filename @staticmethod - def __make_request(url, auth, headers, redirects, max_redirects) -> bytes: + def __make_request( + url, + auth, + headers, + redirects, + max_redirects, + http_client: httpx.Client | None = None, + ) -> bytes: """ Makes an HTTP request to the given URL, while following redirections. @@ -185,20 +193,36 @@ def __make_request(url, auth, headers, redirects, max_redirects) -> bytes: :return: The content of the response. :raises MindeeSourceError: If max redirects are exceeded or the request fails. """ - result = httpx.get(url, headers=headers, timeout=120, auth=auth) + get_kwargs: StringDict = { + "headers": headers, + "timeout": 120, + "auth": auth, + "follow_redirects": True, + } + if http_client is None: + result = httpx.get(url, **get_kwargs) + else: + result = http_client.get(url, **get_kwargs) if 299 < result.status_code < 400: if redirects == max_redirects: raise MindeeSourceError( - f"Can't reach URL after {redirects} out of {max_redirects} redirects, " + f"Can't reach URL after {redirects} out of {max_redirects} " + f"redirects, " f"aborting operation." ) return URLInputSource.__make_request( - result.headers["Location"], auth, headers, redirects + 1, max_redirects + result.headers["Location"], + auth, + headers, + redirects + 1, + max_redirects, + http_client, ) if result.status_code >= 400 or result.status_code < 200: raise MindeeSourceError( f"Couldn't retrieve file from server, error code {result.status_code}." ) - + if http_client is not None and not http_client.is_closed: + http_client.close() return result.content diff --git a/mindee/mindee_http/response_validation.py b/mindee/mindee_http/response_validation.py index fbeb423b..bcf0c771 100644 --- a/mindee/mindee_http/response_validation.py +++ b/mindee/mindee_http/response_validation.py @@ -2,7 +2,7 @@ import httpx -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict def is_valid_sync_response(response: httpx.Response) -> bool: diff --git a/mindee/mindee_http/settings_mixin.py b/mindee/mindee_http/settings_mixin.py index a5452869..59bc7f48 100644 --- a/mindee/mindee_http/settings_mixin.py +++ b/mindee/mindee_http/settings_mixin.py @@ -3,7 +3,7 @@ class SettingsMixin: base_url: str """Base URL for all V2 requests.""" - request_timeout: int + request_timeout: float """Timeout for all requests.""" def set_timeout(self, value: str | int) -> None: diff --git a/mindee/v1/client.py b/mindee/v1/client.py index cc500a85..a6ff7b51 100644 --- a/mindee/v1/client.py +++ b/mindee/v1/client.py @@ -1,5 +1,7 @@ from time import sleep +import httpx + from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import handle_error @@ -59,14 +61,21 @@ class Client(ClientMixin): """ api_key: str + """API key for all endpoints.""" + http_client: httpx.Client | None + """HTTP client for making requests.""" - def __init__(self, api_key: str = "") -> None: + def __init__( + self, api_key: str = "", http_client: httpx.Client | None = None + ) -> None: """ Mindee API Client. :param api_key: Your API key for all endpoints + :param http_client: HTTP client for making requests. """ self.api_key = api_key + self.http_client = http_client def parse( self, @@ -522,7 +531,8 @@ def _send_to_workflow( raise MindeeClientError("No input document provided") workflow_endpoint = WorkflowEndpoint( - WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id) + WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id), + self.http_client, ) response = workflow_endpoint.workflow_execution_post(input_source, options) @@ -555,8 +565,12 @@ def _build_endpoint( version=version, ) if account_name and len(account_name) > 0 and account_name != "mindee": - return CustomEndpoint(endpoint_name, account_name, version, api_settings) - return Endpoint(endpoint_name, account_name, version, api_settings) + return CustomEndpoint( + endpoint_name, account_name, version, api_settings, self.http_client + ) + return Endpoint( + endpoint_name, account_name, version, api_settings, self.http_client + ) def create_endpoint( self, @@ -583,3 +597,20 @@ def create_endpoint( ) version = "1" return self._build_endpoint(endpoint_name, account_name, version) + + def close(self): + """Close the HTTP client.""" + if self.http_client and not self.http_client.is_closed: + self.http_client.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def __del__(self): + """Ensure the HTTP client is closed when the object is garbage collected.""" + if self.http_client and self.http_client and not self.http_client.is_closed: + logger.info("Force-closing unclosed Mindee Client (V1) %s.", str(self)) + self.close() diff --git a/mindee/v1/mindee_http/base_endpoint.py b/mindee/v1/mindee_http/base_endpoint.py index 6b252d8a..8bb06404 100644 --- a/mindee/v1/mindee_http/base_endpoint.py +++ b/mindee/v1/mindee_http/base_endpoint.py @@ -1,13 +1,24 @@ +import httpx + from mindee.v1.mindee_http.base_settings import BaseSettings class BaseEndpoint: """Base endpoint class for the Mindee API.""" - def __init__(self, settings: BaseSettings) -> None: + settings: BaseSettings + """Settings relating to all endpoints.""" + http_client: httpx.Client | None + """HTTP client for making requests.""" + + def __init__( + self, settings: BaseSettings, http_client: httpx.Client | None = None + ) -> None: """ Base API endpoint class for all endpoints. :param settings: Settings relating to all endpoints. + :param http_client: HTTP client for making requests. """ self.settings = settings + self.http_client = http_client diff --git a/mindee/v1/mindee_http/endpoint.py b/mindee/v1/mindee_http/endpoint.py index 4d6e0a5d..e03fa216 100644 --- a/mindee/v1/mindee_http/endpoint.py +++ b/mindee/v1/mindee_http/endpoint.py @@ -1,8 +1,10 @@ +from collections.abc import Callable + import httpx from mindee.input.local_input_source import LocalInputSource from mindee.input.url_input_source import URLInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.mindee_http.base_endpoint import BaseEndpoint from mindee.v1.mindee_http.mindee_api import MindeeAPI @@ -13,7 +15,12 @@ class Endpoint(BaseEndpoint): settings: MindeeAPI def __init__( - self, url_name: str, owner: str, version: str, settings: MindeeAPI + self, + url_name: str, + owner: str, + version: str, + settings: MindeeAPI, + http_client: httpx.Client | None = None, ) -> None: """ Generic API endpoint for a product. @@ -21,8 +28,10 @@ def __init__( :param owner: owner of the product :param url_name: name of the product as it appears in the URL :param version: interface version + :param settings: settings for the API + :param http_client: HTTP client for making requests. """ - super().__init__(settings) + super().__init__(settings, http_client) self.owner = owner self.url_name = url_name self.version = version @@ -42,7 +51,8 @@ def predict_req_post( :param include_words: Include raw OCR words in the response :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param full_text: Whether to include the full OCR text response in compatible + APIs. :return: httpx response """ return self._custom_request( @@ -66,7 +76,8 @@ def predict_async_req_post( :param include_words: Include raw OCR words in the response :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param full_text: Whether to include the full OCR text response in compatible + APIs. :param workflow_id: Workflow ID. :param rag: If set, will enable Retrieval-Augmented Generation. :return: httpx response @@ -105,6 +116,8 @@ def _custom_request( if rag: params["rag"] = "true" + post_kwargs: StringDict = {} + if workflow_id: url = f"{self.settings.base_url}/v1/workflows/{workflow_id}/{route}" else: @@ -112,25 +125,21 @@ def _custom_request( if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = httpx.post( - url=url, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) else: - files = {"document": input_source.read_contents(close_file)} - response = httpx.post( - url=url, - files=files, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) - - return response + post_kwargs["files"] = {"document": input_source.read_contents(close_file)} + post_caller: Callable + if self.http_client is None or self.http_client.is_closed: + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url, + headers=self.settings.base_headers, + data=data, + params=params, + **post_kwargs, + ) def document_queue_req_get(self, queue_id: str) -> httpx.Response: """ @@ -138,21 +147,30 @@ def document_queue_req_get(self, queue_id: str) -> httpx.Response: :param queue_id: queue_id received from the API """ - return httpx.get( - f"{self.settings.url_root}/documents/queue/{queue_id}", + get_kwargs: StringDict = {"follow_redirects": True} + get_caller: Callable + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.settings.url_root}/documents/queue/{queue_id}", headers=self.settings.base_headers, - timeout=self.settings.request_timeout, - follow_redirects=True, + **get_kwargs, ) def openapi_get_req(self) -> httpx.Response: """Get the OpenAPI specification of the product.""" - return httpx.get( - f"{self.settings.url_root}/openapi.json", - headers=self.settings.base_headers, - timeout=self.settings.request_timeout, - follow_redirects=True, - ) + url = f"{self.settings.url_root}/openapi.json" + get_kwargs: StringDict = {} + get_caller: Callable + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller(url, headers=self.settings.base_headers, **get_kwargs) def document_feedback_req_put( self, document_id: str, feedback: StringDict @@ -163,11 +181,18 @@ def document_feedback_req_put( :param document_id: ID of the document to send feedback to. :param feedback: Feedback object to send. """ - return httpx.put( - f"{self.settings.base_url}/v1/documents/{document_id}/feedback", + put_kwargs: StringDict = {"follow_redirects": True} + put_caller: Callable + if self.http_client is None or self.http_client.is_closed: + put_caller = httpx.put + put_kwargs["timeout"] = self.settings.request_timeout + else: + put_caller = self.http_client.put + return put_caller( + url=f"{self.settings.url_root}/documents/{document_id}/feedback", headers=self.settings.base_headers, data=feedback, - timeout=self.settings.request_timeout, + **put_kwargs, ) @@ -184,17 +209,20 @@ def training_req_post( :return: httpx response :param close_file: Whether to `close()` the file after parsing it. """ - files = {"document": input_source.read_contents(close_file)} - params = {"training": True, "with_candidates": True} - - response = httpx.post( - f"{self.settings.url_root}/predict", - files=files, + post_kwargs: StringDict = {"follow_redirects": True} + post_caller: Callable + if self.http_client is None or self.http_client.is_closed: + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url=f"{self.settings.url_root}/predict", headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, + files={"document": input_source.read_contents(close_file)}, + params={"training": True, "with_candidates": True}, + **post_kwargs, ) - return response def training_async_req_post( self, input_source: LocalInputSource, close_file: bool = True @@ -206,17 +234,20 @@ def training_async_req_post( :return: httpx response :param close_file: Whether to `close()` the file after parsing it. """ - files = {"document": input_source.read_contents(close_file)} - params = {"training": True, "async": True} - - response = httpx.post( - f"{self.settings.url_root}/predict", - files=files, + post_kwargs: StringDict = {"follow_redirects": True} + post_caller: Callable + if self.http_client is None or self.http_client.is_closed: + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url=f"{self.settings.url_root}/predict", headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, + files={"document": input_source.read_contents(close_file)}, + params={"training": True, "async": True}, + **post_kwargs, ) - return response def document_req_del(self, document_id: str) -> httpx.Response: """ @@ -224,12 +255,19 @@ def document_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document """ - response = httpx.delete( - f"{self.settings.url_root}/documents/{document_id}", + + delete_kwargs: StringDict = {"follow_redirects": True} + delete_caller: Callable + if self.http_client is None or self.http_client.is_closed: + delete_caller = httpx.delete + delete_kwargs["timeout"] = self.settings.request_timeout + else: + delete_caller = self.http_client.delete + return delete_caller( + url=f"{self.settings.url_root}/documents/{document_id}", headers=self.settings.base_headers, - timeout=self.settings.request_timeout, + **delete_kwargs, ) - return response def documents_req_get(self, page_id: int = 1) -> httpx.Response: """ @@ -237,17 +275,21 @@ def documents_req_get(self, page_id: int = 1) -> httpx.Response: :param page_id: Page number """ - params = { - "page": page_id, - } - response = httpx.get( - f"{self.settings.url_root}/documents", + get_kwargs: StringDict = {"follow_redirects": True} + get_caller: Callable + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.settings.url_root}/documents", headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, - follow_redirects=True, + params={ + "page": page_id, + }, + **get_kwargs, ) - return response def document_req_get(self, document_id: str) -> httpx.Response: """ @@ -255,19 +297,25 @@ def document_req_get(self, document_id: str) -> httpx.Response: :param document_id: ID of the document """ - params = { - "include_annotations": True, - "include_candidates": True, - "global_orientation": True, + get_kwargs: StringDict = { + "follow_redirects": True, } - response = httpx.get( - f"{self.settings.url_root}/documents/{document_id}", + get_caller: Callable + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.settings.url_root}/documents/{document_id}", headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, - follow_redirects=True, + params={ + "include_annotations": True, + "include_candidates": True, + "global_orientation": True, + }, + **get_kwargs, ) - return response def annotations_req_post( self, document_id: str, annotations: dict @@ -279,13 +327,21 @@ def annotations_req_post( :param annotations: Annotations object :return: httpx response """ - response = httpx.post( - f"{self.settings.url_root}/documents/{document_id}/annotations", + post_kwargs: StringDict = { + "follow_redirects": True, + } + post_caller: Callable + if self.http_client is None or self.http_client.is_closed: + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url=f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, json=annotations, - timeout=self.settings.request_timeout, + **post_kwargs, ) - return response def annotations_req_put( self, document_id: str, annotations: dict @@ -297,13 +353,19 @@ def annotations_req_put( :param annotations: Annotations object :return: httpx response """ - response = httpx.put( - f"{self.settings.url_root}/documents/{document_id}/annotations", + put_kwargs: StringDict = {"follow_redirects": True} + put_caller: Callable + if self.http_client is None or self.http_client.is_closed: + put_caller = httpx.put + put_kwargs["timeout"] = self.settings.request_timeout + else: + put_caller = self.http_client.put + return put_caller( + url=f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, json=annotations, - timeout=self.settings.request_timeout, + **put_kwargs, ) - return response def annotations_req_del(self, document_id: str) -> httpx.Response: """ @@ -312,9 +374,15 @@ def annotations_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document to annotate :return: httpx response """ - response = httpx.delete( - f"{self.settings.url_root}/documents/{document_id}/annotations", + delete_kwargs: StringDict = {"follow_redirects": True} + delete_caller: Callable + if self.http_client is None or self.http_client.is_closed: + delete_caller = httpx.delete + delete_kwargs["timeout"] = self.settings.request_timeout + else: + delete_caller = self.http_client.delete + return delete_caller( + url=f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, - timeout=self.settings.request_timeout, + **delete_kwargs, ) - return response diff --git a/mindee/v1/mindee_http/workflow_endpoint.py b/mindee/v1/mindee_http/workflow_endpoint.py index 7217013c..d2e098fa 100644 --- a/mindee/v1/mindee_http/workflow_endpoint.py +++ b/mindee/v1/mindee_http/workflow_endpoint.py @@ -1,7 +1,10 @@ +from collections.abc import Callable + import httpx from mindee.input.local_input_source import LocalInputSource from mindee.input.url_input_source import URLInputSource +from mindee.parsing.common.string_dict import StringDict from mindee.v1.client_options.workflow_options import WorkflowOptions from mindee.v1.mindee_http.base_endpoint import BaseEndpoint from mindee.v1.mindee_http.workflow_settings import WorkflowSettings @@ -11,14 +14,17 @@ class WorkflowEndpoint(BaseEndpoint): """Workflow endpoint.""" settings: WorkflowSettings + """Settings object.""" - def __init__(self, settings: WorkflowSettings) -> None: + def __init__( + self, settings: WorkflowSettings, http_client: httpx.Client | None = None + ) -> None: """ Workflow Endpoint. :param settings: Settings object. """ - super().__init__(settings) + super().__init__(settings, http_client) def workflow_execution_post( self, @@ -47,25 +53,23 @@ def workflow_execution_post( params["full_text_ocr"] = "true" if options.rag: params["rag"] = "true" - + post_kwargs: StringDict = {} + files = None if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = httpx.post( - self.settings.url_root, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) else: files = {"document": input_source.read_contents(True)} - response = httpx.post( - self.settings.url_root, - files=files, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) - - return response + post_caller: Callable + if self.http_client is None or self.http_client.is_closed: + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + self.settings.url_root, + headers=self.settings.base_headers, + data=data, + params=params, + files=files, + **post_kwargs, + ) diff --git a/mindee/v1/parsing/custom/classification.py b/mindee/v1/parsing/custom/classification.py index c11099ff..bc5bd4a3 100644 --- a/mindee/v1/parsing/custom/classification.py +++ b/mindee/v1/parsing/custom/classification.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class ClassificationField: diff --git a/mindee/v1/parsing/custom/list.py b/mindee/v1/parsing/custom/list.py index 91982d8f..61ca7368 100644 --- a/mindee/v1/parsing/custom/list.py +++ b/mindee/v1/parsing/custom/list.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import FieldPositionMixin diff --git a/mindee/v1/parsing/generated/generated_list.py b/mindee/v1/parsing/generated/generated_list.py index 5712a5fc..d0fe6d3b 100644 --- a/mindee/v1/parsing/generated/generated_list.py +++ b/mindee/v1/parsing/generated/generated_list.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.generated.generated_object import ( GeneratedObjectField, is_generated_object, diff --git a/mindee/v1/parsing/generated/generated_object.py b/mindee/v1/parsing/generated/generated_object.py index f29e15e0..ab4736e4 100644 --- a/mindee/v1/parsing/generated/generated_object.py +++ b/mindee/v1/parsing/generated/generated_object.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.position import PositionField diff --git a/mindee/v1/parsing/standard/address.py b/mindee/v1/parsing/standard/address.py index 7ee280bd..7b0bfc25 100644 --- a/mindee/v1/parsing/standard/address.py +++ b/mindee/v1/parsing/standard/address.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.text import StringField diff --git a/mindee/v1/parsing/standard/amount.py b/mindee/v1/parsing/standard/amount.py index 84b68063..92f8d972 100644 --- a/mindee/v1/parsing/standard/amount.py +++ b/mindee/v1/parsing/standard/amount.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import ( BaseField, FieldPositionMixin, diff --git a/mindee/v1/parsing/standard/base.py b/mindee/v1/parsing/standard/base.py index 0362728d..40e21651 100644 --- a/mindee/v1/parsing/standard/base.py +++ b/mindee/v1/parsing/standard/base.py @@ -3,7 +3,7 @@ from mindee.geometry.polygon import Polygon from mindee.geometry.quadrilateral import Quadrilateral, get_bounding_box -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class FieldPositionMixin: diff --git a/mindee/v1/parsing/standard/boolean.py b/mindee/v1/parsing/standard/boolean.py index e69804d9..add3701c 100644 --- a/mindee/v1/parsing/standard/boolean.py +++ b/mindee/v1/parsing/standard/boolean.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v1/parsing/standard/classification.py b/mindee/v1/parsing/standard/classification.py index 8921d5a3..21a4dc02 100644 --- a/mindee/v1/parsing/standard/classification.py +++ b/mindee/v1/parsing/standard/classification.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField diff --git a/mindee/v1/parsing/standard/company_registration.py b/mindee/v1/parsing/standard/company_registration.py index 10a913ff..c309c670 100644 --- a/mindee/v1/parsing/standard/company_registration.py +++ b/mindee/v1/parsing/standard/company_registration.py @@ -1,4 +1,5 @@ -from mindee.parsing.common import StringDict, format_for_display +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import format_for_display from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v1/parsing/standard/date.py b/mindee/v1/parsing/standard/date.py index b3c9ea3b..f157cbb8 100644 --- a/mindee/v1/parsing/standard/date.py +++ b/mindee/v1/parsing/standard/date.py @@ -1,6 +1,6 @@ from datetime import date, datetime -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin ISO8601_DATE_FORMAT = "%Y-%m-%d" diff --git a/mindee/v1/parsing/standard/locale.py b/mindee/v1/parsing/standard/locale.py index c9f097af..57790535 100644 --- a/mindee/v1/parsing/standard/locale.py +++ b/mindee/v1/parsing/standard/locale.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField diff --git a/mindee/v1/parsing/standard/payment_details.py b/mindee/v1/parsing/standard/payment_details.py index bc9b7b7f..470a8262 100644 --- a/mindee/v1/parsing/standard/payment_details.py +++ b/mindee/v1/parsing/standard/payment_details.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v1/parsing/standard/position.py b/mindee/v1/parsing/standard/position.py index 502322f3..af7d4f77 100644 --- a/mindee/v1/parsing/standard/position.py +++ b/mindee/v1/parsing/standard/position.py @@ -1,7 +1,7 @@ from mindee.error.geometry_error import MindeeGeometryError from mindee.geometry.polygon import Polygon from mindee.geometry.quadrilateral import Quadrilateral, quadrilateral_from_prediction -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField diff --git a/mindee/v1/parsing/standard/tax.py b/mindee/v1/parsing/standard/tax.py index 3da6e431..410d0b85 100644 --- a/mindee/v1/parsing/standard/tax.py +++ b/mindee/v1/parsing/standard/tax.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import ( BaseField, FieldPositionMixin, diff --git a/mindee/v1/parsing/standard/text.py b/mindee/v1/parsing/standard/text.py index 8cf58c28..2f862ec6 100644 --- a/mindee/v1/parsing/standard/text.py +++ b/mindee/v1/parsing/standard/text.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v2/client.py b/mindee/v2/client.py index 22d9c5ab..1d54c9ca 100644 --- a/mindee/v2/client.py +++ b/mindee/v2/client.py @@ -1,6 +1,8 @@ from time import sleep from typing import TypeVar +import httpx + from mindee.client_mixin import ClientMixin from mindee.client_options.polling_options import PollingOptions from mindee.error.mindee_error import MindeeError @@ -27,14 +29,16 @@ class Client(ClientMixin): api_key: str | None mindee_api: MindeeAPIV2 - def __init__(self, api_key: str | None = None) -> None: + def __init__( + self, api_key: str | None = None, http_client: httpx.Client | None = None + ) -> None: """ Mindee API Client. :param api_key: Your API key for all endpoints """ self.api_key = api_key - self.mindee_api = MindeeAPIV2(api_key) + self.mindee_api = MindeeAPIV2(api_key, http_client) def enqueue( self, @@ -166,3 +170,21 @@ def search_models( :return: A list of models matching the provided criteria. """ return self.mindee_api.get_models(name, model_type) + + def close(self) -> None: + """Closes the underlying HTTP client.""" + self.mindee_api.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def __del__(self): + """Ensure the HTTP client is closed when the object is garbage collected.""" + mindee_api = getattr(self, "mindee_api", None) + if mindee_api: + httpx_client = getattr(self.mindee_api, "http_client", None) + if httpx_client and self.mindee_api: + self.mindee_api.delete_http_client() diff --git a/mindee/v2/error/mindee_http_error_v2.py b/mindee/v2/error/mindee_http_error_v2.py index 4bc97c0f..ac34c6ef 100644 --- a/mindee/v2/error/mindee_http_error_v2.py +++ b/mindee/v2/error/mindee_http_error_v2.py @@ -1,6 +1,6 @@ import json -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing import ErrorItem, ErrorResponse diff --git a/mindee/v2/mindee_http/mindee_api_v2.py b/mindee/v2/mindee_http/mindee_api_v2.py index d2e384b0..0e75baae 100644 --- a/mindee/v2/mindee_http/mindee_api_v2.py +++ b/mindee/v2/mindee_http/mindee_api_v2.py @@ -1,4 +1,6 @@ import os +from collections.abc import Callable +from typing import TypeVar import httpx @@ -18,6 +20,7 @@ is_valid_get_response, is_valid_post_response, ) +from mindee.v2.parsing import BaseResponse from mindee.v2.parsing.job.job_response import JobResponse from mindee.v2.parsing.search.search_response import SearchResponse @@ -30,6 +33,8 @@ REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" TIMEOUT_DEFAULT = 120 +ResponseT = TypeVar("ResponseT", bound=BaseResponse) + class MindeeAPIV2(SettingsMixin): """Settings class relating to API V2 requests.""" @@ -38,14 +43,16 @@ class MindeeAPIV2(SettingsMixin): """Root of the URL to use for polling.""" api_key: str | None """API Key for the client.""" + http_client: httpx.Client | None + """HTTP client for making requests.""" + request_timeout: float - def __init__(self, api_key: str | None): + def __init__(self, api_key: str | None, http_client: httpx.Client | None = None): self.api_key = ( api_key if api_key else os.environ.get(API_KEY_V2_ENV_NAME, API_KEY_V2_DEFAULT) ) - self.request_timeout = TIMEOUT_DEFAULT self.set_base_url(BASE_URL_DEFAULT) self.set_from_env() if not self.api_key: @@ -56,6 +63,10 @@ def __init__(self, api_key: str | None): f"'{API_KEY_V2_ENV_NAME}' environment variable." ) self.url_root = f"{self.base_url.rstrip('/')}" + self.http_client = http_client + self.request_timeout = float( + os.environ.get(REQUEST_TIMEOUT_ENV_NAME, TIMEOUT_DEFAULT) + ) @property def base_headers(self) -> dict[str, str]: @@ -84,7 +95,7 @@ def req_post_inference_enqueue( slug: str, ) -> httpx.Response: """ - Make an asynchronous request to POST a document for prediction on the V2 API. + Make a request to POST a document for enqueue on the V2 API. :param input_source: Input object. :param params: Options for the enqueueing of the document. @@ -93,27 +104,26 @@ def req_post_inference_enqueue( """ data = params.get_form_data() url = f"{self.url_root}/v2/{slug}/enqueue" - + post_kwargs: StringDict = {} if isinstance(input_source, LocalInputSource): - files = {"file": input_source.read_contents(params.close_file)} - response = httpx.post( - url=url, - files=files, - headers=self.base_headers, - data=data, - timeout=self.request_timeout, - ) + post_kwargs["files"] = { + "file": input_source.read_contents(params.close_file) + } elif isinstance(input_source, URLInputSource): data["url"] = input_source.url - response = httpx.post( - url=url, - headers=self.base_headers, - data=data, - timeout=self.request_timeout, - ) + + post_caller: Callable + if self.http_client is None or self.http_client.is_closed: + post_caller = httpx.post + post_kwargs["timeout"] = self.request_timeout else: - raise MindeeAPIV2Error("Invalid input source.") - return response + post_caller = self.http_client.post + return post_caller( + url, + headers=self.base_headers, + data=data, + **post_kwargs, + ) def req_get_job(self, job_id: str) -> httpx.Response: """ @@ -121,25 +131,40 @@ def req_get_job(self, job_id: str) -> httpx.Response: :param job_id: Job ID, returned by the enqueue request. """ - return httpx.get( - f"{self.url_root}/v2/jobs/{job_id}", + get_caller: Callable + get_kwargs: StringDict = {} + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.url_root}/v2/jobs/{job_id}", headers=self.base_headers, - timeout=self.request_timeout, follow_redirects=False, + **get_kwargs, ) - def req_get_inference_by_url(self, url) -> httpx.Response: + def req_get_inference_by_url(self, url: str) -> httpx.Response: """ - Sends a request matching a given inference_id. Returns either a Job or a Document. + Sends a request matching a given inference_id. Returns either a Job or a + Document. :param url: URL to use for the request. :return: Response object from the request. """ - return httpx.get( - url, + get_caller: Callable + get_kwargs: StringDict = {} + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=url, headers=self.base_headers, - timeout=self.request_timeout, follow_redirects=False, + **get_kwargs, ) def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: @@ -149,30 +174,42 @@ def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: :param inference_id: Inference ID, returned by the job request. :param slug: Slug of the inference, defaults to nothing. """ - - url = f"{self.url_root}/v2/{slug}/{inference_id}" - return httpx.get( - url, + get_caller: Callable + get_kwargs: StringDict = {} + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.url_root}/v2/{slug}/{inference_id}", headers=self.base_headers, - timeout=self.request_timeout, follow_redirects=False, + **get_kwargs, ) def req_get_search_models( - self, model_name: str | None, model_type: str | None + self, name: str | None, model_type: str | None ) -> httpx.Response: """ Searches for a list of models matching criteria. - :param model_name: Name pattern to search for. + :param name: Name pattern to search for. :param model_type: Type of model to search for (exact match). :return: Response object containing search results. """ - url = f"{self.url_root}/v2/search/models" - return httpx.get( - url, + get_caller: Callable + get_kwargs: StringDict = {} + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.url_root}/v2/search/models", headers=self.base_headers, - params={"name": model_name, "model_type": model_type}, - timeout=self.request_timeout, + params={"name": name, "model_type": model_type}, + follow_redirects=False, + **get_kwargs, ) def enqueue( @@ -184,7 +221,6 @@ def enqueue( :param params: Parameters :return: A valid inference Response. """ - response = self.req_post_inference_enqueue( input_source=input_source, params=params, slug=params.get_enqueue_slug() ) @@ -209,7 +245,7 @@ def get_job(self, job_id: str) -> JobResponse: handle_error_v2(dict_response) return JobResponse(dict_response) - def get_result(self, response_type, inference_id: str): + def get_result(self, response_type: type[ResponseT], inference_id: str): """ Get the result of an inference that was previously enqueued. @@ -223,7 +259,7 @@ def get_result(self, response_type, inference_id: str): handle_error_v2(dict_response) return response_type(dict_response) - def get_result_by_url(self, response_type, url: str): + def get_result_by_url(self, response_type: type[ResponseT], url: str): """ Get the result of an inference that was previously enqueued by its URL. @@ -258,5 +294,29 @@ def _response_json(response: httpx.Response) -> StringDict: return response.json() except httpx.DecodingError as e: raise MindeeHTTPUnknownErrorV2( - f"HTTP {response.status_code} response is not valid JSON: {response.text}" + f"HTTP {response.status_code} response is not valid JSON: " + f"{response.text}" ) from e + + def close(self) -> None: + """Closes the underlying HTTP client.""" + if self.http_client and not self.http_client.is_closed: + self.http_client.close() + + def __enter__(self): + self.http_client = httpx.Client() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def delete_http_client(self): + """Delete the underlying HTTP client.""" + httpx_client = getattr(self, "http_client", None) + if httpx_client and not self.http_client.is_closed: + logger.info("Force-closing unclosed Mindee Client (V2) %s.", str(self)) + self.close() + + def __del__(self): + """Ensure the HTTP client is closed when the object is garbage collected.""" + self.delete_http_client() diff --git a/mindee/v2/parsing/error/error_item.py b/mindee/v2/parsing/error/error_item.py index 50ec5bb1..c997585a 100644 --- a/mindee/v2/parsing/error/error_item.py +++ b/mindee/v2/parsing/error/error_item.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class ErrorItem: diff --git a/mindee/v2/parsing/error/error_response.py b/mindee/v2/parsing/error/error_response.py index eb9a62d0..351f1339 100644 --- a/mindee/v2/parsing/error/error_response.py +++ b/mindee/v2/parsing/error/error_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.error.error_item import ErrorItem diff --git a/mindee/v2/parsing/inference/base_inference.py b/mindee/v2/parsing/inference/base_inference.py index 80f129da..659c6a85 100644 --- a/mindee/v2/parsing/inference/base_inference.py +++ b/mindee/v2/parsing/inference/base_inference.py @@ -1,7 +1,7 @@ from abc import ABC from typing import TypeVar -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.inference_file import InferenceFile from mindee.v2.parsing.inference.inference_job import InferenceJob from mindee.v2.parsing.inference.inference_model import InferenceModel diff --git a/mindee/v2/parsing/inference/field/base_field.py b/mindee/v2/parsing/inference/field/base_field.py index 73f8ae19..b8a12450 100644 --- a/mindee/v2/parsing/inference/field/base_field.py +++ b/mindee/v2/parsing/inference/field/base_field.py @@ -1,6 +1,6 @@ from enum import Enum -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.field_confidence import FieldConfidence from mindee.v2.parsing.inference.field.field_location import FieldLocation diff --git a/mindee/v2/parsing/inference/field/factory.py b/mindee/v2/parsing/inference/field/factory.py index eebe6890..7cd76738 100644 --- a/mindee/v2/parsing/inference/field/factory.py +++ b/mindee/v2/parsing/inference/field/factory.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.error.mindee_api_v2_error import MindeeAPIV2Error from mindee.v2.parsing.inference.field.list_field import ListField from mindee.v2.parsing.inference.field.object_field import ObjectField diff --git a/mindee/v2/parsing/inference/field/field_location.py b/mindee/v2/parsing/inference/field/field_location.py index 8dae3c5f..31ca7056 100644 --- a/mindee/v2/parsing/inference/field/field_location.py +++ b/mindee/v2/parsing/inference/field/field_location.py @@ -1,5 +1,5 @@ from mindee.geometry.polygon import Polygon -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class FieldLocation: diff --git a/mindee/v2/parsing/inference/field/inference_fields.py b/mindee/v2/parsing/inference/field/inference_fields.py index 9890688d..9b73043c 100644 --- a/mindee/v2/parsing/inference/field/inference_fields.py +++ b/mindee/v2/parsing/inference/field/inference_fields.py @@ -1,7 +1,7 @@ from collections.abc import Callable from typing import TYPE_CHECKING, cast -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.base_field import BaseField, FieldType if TYPE_CHECKING: diff --git a/mindee/v2/parsing/inference/field/list_field.py b/mindee/v2/parsing/inference/field/list_field.py index 1701c961..af844aae 100644 --- a/mindee/v2/parsing/inference/field/list_field.py +++ b/mindee/v2/parsing/inference/field/list_field.py @@ -1,6 +1,6 @@ from collections.abc import Callable -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.base_field import BaseField, FieldType from mindee.v2.parsing.inference.field.object_field import ObjectField from mindee.v2.parsing.inference.field.simple_field import SimpleField diff --git a/mindee/v2/parsing/inference/field/simple_field.py b/mindee/v2/parsing/inference/field/simple_field.py index a1149036..574c2289 100644 --- a/mindee/v2/parsing/inference/field/simple_field.py +++ b/mindee/v2/parsing/inference/field/simple_field.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.base_field import BaseField, FieldType diff --git a/mindee/v2/parsing/inference/inference_active_options.py b/mindee/v2/parsing/inference/inference_active_options.py index 730eb8b7..4c894aef 100644 --- a/mindee/v2/parsing/inference/inference_active_options.py +++ b/mindee/v2/parsing/inference/inference_active_options.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class DataSchemaActiveOptions: diff --git a/mindee/v2/parsing/inference/inference_file.py b/mindee/v2/parsing/inference/inference_file.py index 98151ec9..5cd950f5 100644 --- a/mindee/v2/parsing/inference/inference_file.py +++ b/mindee/v2/parsing/inference/inference_file.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class InferenceFile: diff --git a/mindee/v2/parsing/inference/inference_job.py b/mindee/v2/parsing/inference/inference_job.py index 19519260..ac877545 100644 --- a/mindee/v2/parsing/inference/inference_job.py +++ b/mindee/v2/parsing/inference/inference_job.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class InferenceJob: diff --git a/mindee/v2/parsing/inference/inference_model.py b/mindee/v2/parsing/inference/inference_model.py index a93f65da..c44c4d0a 100644 --- a/mindee/v2/parsing/inference/inference_model.py +++ b/mindee/v2/parsing/inference/inference_model.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class InferenceModel: diff --git a/mindee/v2/parsing/inference/rag_metadata.py b/mindee/v2/parsing/inference/rag_metadata.py index a0c88c98..c9af29b5 100644 --- a/mindee/v2/parsing/inference/rag_metadata.py +++ b/mindee/v2/parsing/inference/rag_metadata.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class RAGMetadata: diff --git a/mindee/v2/parsing/inference/raw_text.py b/mindee/v2/parsing/inference/raw_text.py index eadb09dd..dae61af3 100644 --- a/mindee/v2/parsing/inference/raw_text.py +++ b/mindee/v2/parsing/inference/raw_text.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.raw_text_page import RawTextPage diff --git a/mindee/v2/parsing/inference/raw_text_page.py b/mindee/v2/parsing/inference/raw_text_page.py index e3eaf041..b10c68c7 100644 --- a/mindee/v2/parsing/inference/raw_text_page.py +++ b/mindee/v2/parsing/inference/raw_text_page.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class RawTextPage: diff --git a/mindee/v2/parsing/job/job.py b/mindee/v2/parsing/job/job.py index 9871b0b8..0952eeff 100644 --- a/mindee/v2/parsing/job/job.py +++ b/mindee/v2/parsing/job/job.py @@ -1,6 +1,6 @@ from datetime import datetime -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.error.error_response import ErrorResponse from mindee.v2.parsing.job.job_webhook import JobWebhook @@ -35,7 +35,7 @@ def __init__(self, raw_response: StringDict) -> None: self.id = raw_response["id"] self.status = raw_response["status"] self.error = ( - ErrorResponse(raw_response["error"]) if raw_response["error"] else None + ErrorResponse(raw_response["error"]) if raw_response.get("error") else None ) self.created_at = datetime.fromisoformat( raw_response["created_at"].replace("Z", "+00:00") diff --git a/mindee/v2/parsing/job/job_response.py b/mindee/v2/parsing/job/job_response.py index 948564c4..da9f64ca 100644 --- a/mindee/v2/parsing/job/job_response.py +++ b/mindee/v2/parsing/job/job_response.py @@ -1,5 +1,5 @@ -from mindee.parsing.common import StringDict from mindee.parsing.common.common_response import CommonResponse +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.job.job import Job diff --git a/mindee/v2/parsing/job/job_webhook.py b/mindee/v2/parsing/job/job_webhook.py index c060ed93..b9a7fb69 100644 --- a/mindee/v2/parsing/job/job_webhook.py +++ b/mindee/v2/parsing/job/job_webhook.py @@ -1,6 +1,6 @@ from datetime import datetime -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.error.error_response import ErrorResponse diff --git a/mindee/v2/parsing/search/pagination.py b/mindee/v2/parsing/search/paginationmetadata.py similarity index 97% rename from mindee/v2/parsing/search/pagination.py rename to mindee/v2/parsing/search/paginationmetadata.py index 34b63a3d..14e628e3 100644 --- a/mindee/v2/parsing/search/pagination.py +++ b/mindee/v2/parsing/search/paginationmetadata.py @@ -1,4 +1,4 @@ -class Pagination: +class PaginationMetadata: """Pagination metadata.""" per_page: int diff --git a/mindee/v2/parsing/search/search_response.py b/mindee/v2/parsing/search/search_response.py index 233be58d..5dea6b22 100644 --- a/mindee/v2/parsing/search/search_response.py +++ b/mindee/v2/parsing/search/search_response.py @@ -1,5 +1,5 @@ from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.search.pagination import Pagination +from mindee.v2.parsing.search.paginationmetadata import PaginationMetadata from mindee.v2.parsing.search.search_models import SearchModels @@ -8,12 +8,12 @@ class SearchResponse: models: SearchModels """Parsed search payload.""" - pagination: Pagination + pagination: PaginationMetadata """Pagination metadata for the search results.""" def __init__(self, raw_response: StringDict) -> None: self.models = SearchModels(raw_response["models"]) - self.pagination = Pagination(raw_response["pagination"]) + self.pagination = PaginationMetadata(raw_response["pagination"]) def __str__(self) -> str: """ diff --git a/mindee/v2/product/classification/classification_classifier.py b/mindee/v2/product/classification/classification_classifier.py index 7edfe975..98c16232 100644 --- a/mindee/v2/product/classification/classification_classifier.py +++ b/mindee/v2/product/classification/classification_classifier.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.extraction.extraction_response import ExtractionResponse diff --git a/mindee/v2/product/classification/classification_inference.py b/mindee/v2/product/classification/classification_inference.py index 7953059e..9e00ad60 100644 --- a/mindee/v2/product/classification/classification_inference.py +++ b/mindee/v2/product/classification/classification_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.classification.classification_result import ClassificationResult diff --git a/mindee/v2/product/classification/classification_response.py b/mindee/v2/product/classification/classification_response.py index 2e5380ed..c5e82c72 100644 --- a/mindee/v2/product/classification/classification_response.py +++ b/mindee/v2/product/classification/classification_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.classification.classification_inference import ( ClassificationInference, diff --git a/mindee/v2/product/classification/classification_result.py b/mindee/v2/product/classification/classification_result.py index 9bad332c..2d314ffb 100644 --- a/mindee/v2/product/classification/classification_result.py +++ b/mindee/v2/product/classification/classification_result.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.classification.classification_classifier import ( ClassificationClassifier, ) diff --git a/mindee/v2/product/crop/crop_inference.py b/mindee/v2/product/crop/crop_inference.py index 9ac140e0..6839e534 100644 --- a/mindee/v2/product/crop/crop_inference.py +++ b/mindee/v2/product/crop/crop_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.crop.crop_result import CropResult diff --git a/mindee/v2/product/crop/crop_item.py b/mindee/v2/product/crop/crop_item.py index 5ee0d6ea..10c61756 100644 --- a/mindee/v2/product/crop/crop_item.py +++ b/mindee/v2/product/crop/crop_item.py @@ -1,7 +1,7 @@ from mindee.image.extracted_image import ExtractedImage from mindee.image.image_extractor import extract_multiple_images_from_source from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field import FieldLocation from mindee.v2.product.extraction.extraction_response import ExtractionResponse diff --git a/mindee/v2/product/crop/crop_response.py b/mindee/v2/product/crop/crop_response.py index db9c273b..8b70cc19 100644 --- a/mindee/v2/product/crop/crop_response.py +++ b/mindee/v2/product/crop/crop_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.crop.crop_inference import CropInference diff --git a/mindee/v2/product/crop/crop_result.py b/mindee/v2/product/crop/crop_result.py index 59d11509..47561e90 100644 --- a/mindee/v2/product/crop/crop_result.py +++ b/mindee/v2/product/crop/crop_result.py @@ -1,5 +1,5 @@ from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.file_operations.crop import extract_multiple_crops from mindee.v2.file_operations.crop_files import CropFiles from mindee.v2.product.crop.crop_item import CropItem diff --git a/mindee/v2/product/extraction/extraction_inference.py b/mindee/v2/product/extraction/extraction_inference.py index c7a65aab..defdd454 100644 --- a/mindee/v2/product/extraction/extraction_inference.py +++ b/mindee/v2/product/extraction/extraction_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.parsing.inference.inference_active_options import InferenceActiveOptions from mindee.v2.product.extraction.extraction_result import ExtractionResult diff --git a/mindee/v2/product/extraction/extraction_response.py b/mindee/v2/product/extraction/extraction_response.py index 39c396d8..3dac7e7a 100644 --- a/mindee/v2/product/extraction/extraction_response.py +++ b/mindee/v2/product/extraction/extraction_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.extraction.extraction_inference import ExtractionInference diff --git a/mindee/v2/product/extraction/extraction_result.py b/mindee/v2/product/extraction/extraction_result.py index 42eb9160..9d97a0b6 100644 --- a/mindee/v2/product/extraction/extraction_result.py +++ b/mindee/v2/product/extraction/extraction_result.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field import InferenceFields from mindee.v2.parsing.inference.field.factory import parse_field from mindee.v2.parsing.inference.rag_metadata import RAGMetadata diff --git a/mindee/v2/product/ocr/ocr_inference.py b/mindee/v2/product/ocr/ocr_inference.py index acb7d330..60eda267 100644 --- a/mindee/v2/product/ocr/ocr_inference.py +++ b/mindee/v2/product/ocr/ocr_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.ocr.ocr_result import OCRResult diff --git a/mindee/v2/product/ocr/ocr_page.py b/mindee/v2/product/ocr/ocr_page.py index fa1243a6..5673b127 100644 --- a/mindee/v2/product/ocr/ocr_page.py +++ b/mindee/v2/product/ocr/ocr_page.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.ocr.ocr_word import OCRWord diff --git a/mindee/v2/product/ocr/ocr_response.py b/mindee/v2/product/ocr/ocr_response.py index 32b7d052..da21bfea 100644 --- a/mindee/v2/product/ocr/ocr_response.py +++ b/mindee/v2/product/ocr/ocr_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.ocr.ocr_inference import OCRInference diff --git a/mindee/v2/product/ocr/ocr_result.py b/mindee/v2/product/ocr/ocr_result.py index 6d01c60b..d91f4c6e 100644 --- a/mindee/v2/product/ocr/ocr_result.py +++ b/mindee/v2/product/ocr/ocr_result.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.ocr.ocr_page import OCRPage diff --git a/mindee/v2/product/split/split_inference.py b/mindee/v2/product/split/split_inference.py index 7540f1fc..6e68bdc2 100644 --- a/mindee/v2/product/split/split_inference.py +++ b/mindee/v2/product/split/split_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.split.split_result import SplitResult diff --git a/mindee/v2/product/split/split_range.py b/mindee/v2/product/split/split_range.py index f742b2cd..92140657 100644 --- a/mindee/v2/product/split/split_range.py +++ b/mindee/v2/product/split/split_range.py @@ -1,5 +1,5 @@ from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.pdf.extracted_pdf import ExtractedPDF from mindee.v2.file_operations.split import extract_single_split from mindee.v2.product.extraction.extraction_response import ExtractionResponse diff --git a/mindee/v2/product/split/split_response.py b/mindee/v2/product/split/split_response.py index be6e0673..ff8ace92 100644 --- a/mindee/v2/product/split/split_response.py +++ b/mindee/v2/product/split/split_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.split.split_inference import SplitInference diff --git a/mindee/v2/product/split/split_result.py b/mindee/v2/product/split/split_result.py index 04025322..ab3921bf 100644 --- a/mindee/v2/product/split/split_result.py +++ b/mindee/v2/product/split/split_result.py @@ -1,5 +1,5 @@ from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.file_operations.split import extract_multiple_splits from mindee.v2.file_operations.split_files import SplitFiles from mindee.v2.product.split.split_range import SplitRange diff --git a/pyproject.toml b/pyproject.toml index 795a1390..14b2296d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ requires-python = ">=3.10" dependencies = [ "pypdfium2>=4.0,<6.0", "Pillow>=12.2.0", - "httpx>=0.28.1,<1.0", + "httpx[http2]>=0.28.1,<1.0", ] [project.urls] @@ -50,6 +50,7 @@ test = [ "toml~=0.10.2", "pytest~=9.0.3", "pytest-cov~=7.1.0", + "respx~=0.23.1" ] docs = [ "sphinx~=9.1.0", diff --git a/tests/conftest.py b/tests/conftest.py index c130f51b..b5cc6b4a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import gc +import os import pytest @@ -7,3 +8,9 @@ def force_gc(): yield gc.collect() + + +@pytest.fixture(scope="session") +def findoc_model_id() -> str: + """Identifier of the Financial Document model, supplied through an env var.""" + return os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID", "") diff --git a/tests/v1/input/test_url_input_source_integration.py b/tests/v1/input/test_url_input_source_integration.py index 029e61a7..3973aa88 100644 --- a/tests/v1/input/test_url_input_source_integration.py +++ b/tests/v1/input/test_url_input_source_integration.py @@ -3,7 +3,7 @@ import pytest -from mindee import URLInputSource +from mindee.input.url_input_source import URLInputSource from mindee.v1.client import Client from mindee.v1.product.invoice import InvoiceV4 from tests.utils import cleanup_output_files diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index dc4ef3c2..ad4b9de2 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -1,8 +1,12 @@ +import concurrent.futures import json import os +import re +import time import httpx import pytest +import respx from mindee import ExtractionParameters, ExtractionResponse, LocalResponse from mindee.error.mindee_error import MindeeError @@ -30,11 +34,10 @@ def env_client(monkeypatch) -> Client: @pytest.fixture def custom_base_url_client(monkeypatch) -> Client: class _FakePostRespError: - status_code = 400 # any non-2xx will do + status_code = 400 is_error = True def json(self): - # Shape must match what handle_error_v2 expects return { "status": 0, "code": "000-000", @@ -157,8 +160,6 @@ def test_enqueue_and_parse_path_with_env_token(custom_base_url_client): def _assert_findoc_inference(response: ExtractionResponse): - # There are already detailed tests of the inference object. - # Here we are just testing whether the client can load OK. assert isinstance(response, ExtractionResponse) assert isinstance(response.inference, ExtractionInference) assert response.inference.id @@ -261,3 +262,79 @@ def test_queue_get(custom_base_url_client): assert not response.job.result_url assert len(response.job.webhooks) == 0 assert not response.job.error + + +@pytest.mark.v2 +def test_client_closes_httpx_connections() -> None: + client = Client(api_key="dummy_key") + client.close() + with pytest.raises( + AttributeError, match=r"NoneType' object has no attribute 'get'" + ): + client.mindee_api.http_client.get("https://google.com") + + +@pytest.mark.v2 +@respx.mock +def test_httpx_multiple_calls_thread_safety() -> None: + client = Client(api_key="dummy_key") + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + + def delayed_response(_: httpx.Request) -> httpx.Response: + job_json = json.loads((V2_DATA_DIR / "job" / "ok_processing.json").read_text()) + time.sleep(0.1) + return httpx.Response(201, json=job_json) + + url_pattern = re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue") + respx.post(url_pattern).mock(side_effect=delayed_response) + + def make_request(): + input_source = PathInput(input_path) + params = ExtractionParameters(model_id="dummy-model-id") + return client.enqueue(input_source, params) + + thread_count = 20 + successful_responses = 0 + with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor: + futures = [executor.submit(make_request) for _ in range(thread_count)] + + for future in concurrent.futures.as_completed(futures): + response = future.result() + if ( + response.job + and response.job.id == "12345678-1234-1234-1234-123456789ABC" + ): + successful_responses += 1 + + assert successful_responses == thread_count + + +@pytest.mark.v2 +@respx.mock +def test_explicit_timeout_failure(findoc_model_id) -> None: + respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( + side_effect=httpx.ReadTimeout("Simulated Read Timeout") + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + + with pytest.raises(httpx.ReadTimeout): + client.enqueue(input_source, params) + + +@pytest.mark.v2 +@respx.mock +def test_explicit_500_server_error(findoc_model_id: str) -> None: + respx.post(re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue")).mock( + return_value=httpx.Response(500, json={"message": "Internal Server Error"}) + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + with pytest.raises(MindeeHTTPUnknownErrorV2) as exc_info: + client.enqueue(input_source, params) + + assert "Couldn't deserialize server error" in str(exc_info.value) diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index ebf88ff0..1d9cd18b 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,24 +1,21 @@ import os from pathlib import Path +import httpx import pytest from mindee import ExtractionParameters from mindee.input.path_input import PathInput from mindee.input.url_input_source import URLInputSource from mindee.v2.client import Client -from mindee.v2.error.mindee_http_error_v2 import MindeeHTTPErrorV2 +from mindee.v2.error.mindee_http_error_v2 import ( + MindeeHTTPErrorV2, +) from mindee.v2.parsing import InferenceActiveOptions from mindee.v2.product.extraction.extraction_response import ExtractionResponse from tests.utils import FILE_TYPES_DIR, V2_PRODUCT_DATA_DIR -@pytest.fixture(scope="session") -def findoc_model_id() -> str: - """Identifier of the Financial Document model, supplied through an env var.""" - return os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID") - - @pytest.fixture(scope="session") def v2_client() -> Client: return Client() @@ -306,3 +303,53 @@ def test_data_schema_must_succeed( assert response.inference.active_options.data_schema.replace is True assert response.inference.result.fields["test_replace"] is not None assert response.inference.result.fields["test_replace"].value == "a test value" + + +@pytest.mark.integration +@pytest.mark.v2 +def test_custom_httpx_client_event_hook( + findoc_model_id: str, +) -> None: + request_urls = [] + + def log_request(request: httpx.Request): + request_urls.append(str(request.url)) + + httpx_client = httpx.Client(event_hooks={"request": [log_request]}) + client = Client(http_client=httpx_client) + + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + input_source = PathInput(input_path) + + params = ExtractionParameters( + model_id=findoc_model_id, + rag=False, + raw_text=False, + polygon=False, + confidence=False, + webhook_ids=[], + alias="py_integration_custom_httpx_client", + ) + + client.enqueue(input_source, params) + + assert len(request_urls) > 0 + assert any("enqueue" in url for url in request_urls) + + +@pytest.mark.v2 +@pytest.mark.integration +def test_http2_client(findoc_model_id) -> None: + httpx_client = httpx.Client(http2=True) + with Client(http_client=httpx_client) as client: + input_source = PathInput( + V2_PRODUCT_DATA_DIR + / "extraction" + / "financial_document" + / "default_sample.jpg" + ) + params = ExtractionParameters(model_id=findoc_model_id) + response = client.enqueue_and_get_result( + ExtractionResponse, input_source, params + ) + _basic_assert_success(response, page_count=1, model_id=findoc_model_id)