From a7ed13bef09e7dea34304c0fb63d6e43c8f22d70 Mon Sep 17 00:00:00 2001 From: Mukund Ananthu Date: Sat, 15 Jun 2024 00:36:43 +0000 Subject: [PATCH 01/30] feat: Add Publish Create Span for OTel --- google/cloud/pubsub_v1/publisher/client.py | 37 ++++++++++++++ google/cloud/pubsub_v1/types.py | 3 ++ setup.py | 2 + .../publisher/test_publisher_client.py | 51 +++++++++++++++++++ 4 files changed, 93 insertions(+) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 54b353276..e4784f029 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -22,6 +22,9 @@ import typing from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union import warnings +import sys +from datetime import datetime +from opentelemetry import trace from google.api_core import gapic_v1 from google.auth.credentials import AnonymousCredentials # type: ignore @@ -56,6 +59,9 @@ ordered_sequencer.OrderedSequencer, unordered_sequencer.UnorderedSequencer ] +_OPEN_TELEMETRY_TRACER_NAME = "com.google.cloud.pubsub.v1" +_OPEN_TELEMETRY_MESSAGING_SYSTEM = "gcp_pubsub" + class Client(publisher_client.PublisherClient): """A publisher client for Google Cloud Pub/Sub. @@ -153,6 +159,11 @@ def __init__( # The object controlling the message publishing flow self._flow_controller = FlowController(self.publisher_options.flow_control) + # Option indicating whether open telemetry is enabled or not. + self._open_telemetry_enabled = ( + self.publisher_options.enable_open_telemetry_tracing + ) + @classmethod def from_service_account_file( # type: ignore[override] cls, @@ -335,6 +346,7 @@ def publish( # type: ignore[override] pubsub_v1.publisher.exceptions.MessageTooLargeError: If publishing the ``message`` would exceed the max size limit on the backend. """ + # Sanity check: Is the data being sent as a bytestring? # If it is literally anything else, complain loudly about it. if not isinstance(data, bytes): @@ -368,6 +380,31 @@ def publish( # type: ignore[override] ) message = gapic_types.PubsubMessage.wrap(vanilla_pb) + if self._open_telemetry_enabled: + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name=f"{topic} create", + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.destination.name": topic, + "code.function": "google.cloud.pubsub.PublisherClient.publish", + "messaging.gcp_pubsub.message.ordering_key": ordering_key, + "messaging.operation": "create", + "gcp.project_id": topic.split("/")[1], + "messaging.message.envelope.size": sys.getsizeof(message), + }, + kind=trace.SpanKind.PRODUCER, + # TODO(mk): set end_on_exit=False. So that span + # ends only when publich RPC call is made. + # end_on_exit=False, + ) as span: + span.add_event( + name="publish start", + attributes={ + "timestamp": str(datetime.now()), + }, + ) + # Messages should go through flow control to prevent excessive # queuing on the client side (depending on the settings). try: diff --git a/google/cloud/pubsub_v1/types.py b/google/cloud/pubsub_v1/types.py index 3d071a189..b4adff1d2 100644 --- a/google/cloud/pubsub_v1/types.py +++ b/google/cloud/pubsub_v1/types.py @@ -174,6 +174,9 @@ class PublisherOptions(NamedTuple): "compatible with :class:`~.pubsub_v1.types.TimeoutType`." ) + enable_open_telemetry_tracing: bool = False + """Whether to enable OpenTelemetry tracing.""" + # Define the type class and default values for flow control settings. # diff --git a/setup.py b/setup.py index a6af31207..d3b584d5e 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,8 @@ "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", "grpc-google-iam-v1 >= 0.12.4, < 1.0.0dev", "grpcio-status >= 1.33.2", + "opentelemetry-api", + "opentelemetry-sdk", ] extras = {"libcst": "libcst >= 0.3.10"} url = "https://github.com/googleapis/python-pubsub" diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 9db5e0ef8..bba4d4507 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -43,6 +43,12 @@ from google.pubsub_v1.services.publisher.transports.grpc import PublisherGrpcTransport +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry import trace + + def _assert_retries_equal(retry, retry2): # Retry instances cannot be directly compared, because their predicates are # different instances of the same function. We thus manually compare their other @@ -248,6 +254,51 @@ def test_publish(creds): ) +def test_publish_otel(creds): + TOPIC = "projects/projectID/topics/topicID" + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, + ), + ) + + # Trace Provider setup. + provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + processor = SimpleSpanProcessor(memory_exporter) + provider.add_span_processor(processor) + trace.set_tracer_provider(provider) + + client.publish(TOPIC, b"message") + spans = memory_exporter.get_finished_spans() + + assert len(spans) == 1 + + assert spans[0].name == f"{TOPIC} create" + + # Verify attribute values + attributes = spans[0].attributes + assert attributes["messaging.system"] == "gcp_pubsub" + assert attributes["messaging.destination.name"] == TOPIC + assert attributes["code.function"] == "google.cloud.pubsub.PublisherClient.publish" + assert attributes["messaging.gcp_pubsub.message.ordering_key"] == "" + assert attributes["messaging.operation"] == "create" + assert attributes["gcp.project_id"] == "projectID" + assert "messaging.message.envelope.size" in attributes + + # Verify span kind + assert spans[0].kind == trace.SpanKind.PRODUCER + + # Verify events + assert len(spans[0].events) == 1 + + # Verify start event + start_event = spans[0].events[0] + assert start_event.name == "publish start" + assert "timestamp" in start_event.attributes + + def test_publish_error_exceeding_flow_control_limits(creds): publisher_options = types.PublisherOptions( flow_control=types.PublishFlowControl( From 524a5e185ef0e110df46f85e082aea76aa15d178 Mon Sep 17 00:00:00 2001 From: Mukund Ananthu Date: Wed, 19 Jun 2024 15:00:16 +0000 Subject: [PATCH 02/30] Add Publisher flow control span --- google/cloud/pubsub_v1/publisher/client.py | 18 ++++++++++++++---- .../publisher/test_publisher_client.py | 12 +++++++++++- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index e4784f029..22f43ec2c 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -25,6 +25,7 @@ import sys from datetime import datetime from opentelemetry import trace +from opentelemetry.trace.propagation import set_span_in_context from google.api_core import gapic_v1 from google.auth.credentials import AnonymousCredentials # type: ignore @@ -394,11 +395,10 @@ def publish( # type: ignore[override] "messaging.message.envelope.size": sys.getsizeof(message), }, kind=trace.SpanKind.PRODUCER, - # TODO(mk): set end_on_exit=False. So that span - # ends only when publich RPC call is made. + # TODO(mk): end_on_exit=False # end_on_exit=False, - ) as span: - span.add_event( + ) as publish_create_span: + publish_create_span.add_event( name="publish start", attributes={ "timestamp": str(datetime.now()), @@ -408,7 +408,17 @@ def publish( # type: ignore[override] # Messages should go through flow control to prevent excessive # queuing on the client side (depending on the settings). try: + if self._open_telemetry_enabled: + with tracer.start_as_current_span( + name="publisher flow control", + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(publish_create_span), + end_on_exit=False, + ) as publish_flow_control_span: + self._publish_flow_control_span = publish_flow_control_span self._flow_controller.add(message) + if self._open_telemetry_enabled: + self._publish_flow_control_span.end() except exceptions.FlowControlLimitError as exc: future = futures.Future() future.set_exception(exc) diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index bba4d4507..48e98f1e8 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -271,10 +271,12 @@ def test_publish_otel(creds): trace.set_tracer_provider(provider) client.publish(TOPIC, b"message") + spans = memory_exporter.get_finished_spans() - assert len(spans) == 1 + assert len(spans) == 2 + # Verify create span assert spans[0].name == f"{TOPIC} create" # Verify attribute values @@ -298,6 +300,14 @@ def test_publish_otel(creds): assert start_event.name == "publish start" assert "timestamp" in start_event.attributes + # Verify flow control span. + flow_control_span = spans[1] + assert flow_control_span.name == "publisher flow control" + assert flow_control_span.kind == trace.SpanKind.INTERNAL + + # Verify that flow control span is a child of the publish create span. + assert flow_control_span._parent[1] == spans[0]._context[1] + def test_publish_error_exceeding_flow_control_limits(creds): publisher_options = types.PublisherOptions( From 8bfa0a9af069023e57f7a999b8872f42020c126a Mon Sep 17 00:00:00 2001 From: Mukund Ananthu Date: Wed, 19 Jun 2024 15:42:40 +0000 Subject: [PATCH 03/30] Add publisher batching span --- google/cloud/pubsub_v1/publisher/client.py | 29 +++++++++++++++++-- .../publisher/test_publisher_client.py | 12 +++++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 22f43ec2c..6ef249a2c 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -62,6 +62,7 @@ _OPEN_TELEMETRY_TRACER_NAME = "com.google.cloud.pubsub.v1" _OPEN_TELEMETRY_MESSAGING_SYSTEM = "gcp_pubsub" +_OPEN_TELEMETRY_PUBLISHER_BATCHING = "publisher batching" class Client(publisher_client.PublisherClient): @@ -456,8 +457,32 @@ def on_publish_done(future): # Delegate the publishing to the sequencer. sequencer = self._get_or_create_sequencer(topic, ordering_key) - future = sequencer.publish(message, retry=retry, timeout=timeout) - future.add_done_callback(on_publish_done) + if self._open_telemetry_enabled: + with tracer.start_as_current_span( + name=_OPEN_TELEMETRY_PUBLISHER_BATCHING, + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(publish_create_span), + end_on_exit=False, + ) as publisher_batching_span: + self._publisher_batching_span = publisher_batching_span + try: + future = sequencer.publish(message, retry=retry, timeout=timeout) + future.add_done_callback(on_publish_done) + except BaseException as be: + # sequencer.publish() can throw Exceptions. If it does, + # record it in the publisher batching span before + # allowing it to bubble up. + if self._open_telemetry_enabled: + self._publisher_batching_span.end() + self._publisher_batching_span.record_exception( + exception=be, + ) + self._publisher_batching_span.set_status( + trace.Status(status_code=trace.StatusCode.ERROR)) + raise be + + if self._open_telemetry_enabled: + self._publisher_batching_span.end() # Create a timer thread if necessary to enforce the batching # timeout. diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 48e98f1e8..52d43ad45 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -274,7 +274,10 @@ def test_publish_otel(creds): spans = memory_exporter.get_finished_spans() - assert len(spans) == 2 + # span1 = Publish Create Span + # span2 = Publisher Flow Control Span + # span3 = Publisher Batching Span + assert len(spans) == 3 # Verify create span assert spans[0].name == f"{TOPIC} create" @@ -308,6 +311,13 @@ def test_publish_otel(creds): # Verify that flow control span is a child of the publish create span. assert flow_control_span._parent[1] == spans[0]._context[1] + # Verify Publisher Batching Span + publisher_batching_span = spans[2] + assert publisher_batching_span.name == "publisher batching" + assert publisher_batching_span.kind == trace.SpanKind.INTERNAL + # Verify Publisher Batching Span is child of Publish Create Span + assert publisher_batching_span._parent[1] == spans[0]._context[1] + def test_publish_error_exceeding_flow_control_limits(creds): publisher_options = types.PublisherOptions( From 88890e084fe9466e27284cd878e331e4356f4359 Mon Sep 17 00:00:00 2001 From: Mukund Ananthu Date: Wed, 19 Jun 2024 22:12:21 +0000 Subject: [PATCH 04/30] Add publish batching span exception recording --- google/cloud/pubsub_v1/publisher/client.py | 13 ++- tests/unit/pubsub_v1/conftest.py | 10 ++ .../publisher/test_publisher_client.py | 92 ++++++++++++++++++- 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 6ef249a2c..827294d48 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -423,6 +423,14 @@ def publish( # type: ignore[override] except exceptions.FlowControlLimitError as exc: future = futures.Future() future.set_exception(exc) + if self._open_telemetry_enabled: + self._publish_flow_control_span.record_exception( + exception=exc, + ) + self._publish_flow_control_span.set_status( + trace.Status(status_code=trace.StatusCode.ERROR) + ) + self._publish_flow_control_span.end() return future def on_publish_done(future): @@ -473,12 +481,13 @@ def on_publish_done(future): # record it in the publisher batching span before # allowing it to bubble up. if self._open_telemetry_enabled: - self._publisher_batching_span.end() self._publisher_batching_span.record_exception( exception=be, ) self._publisher_batching_span.set_status( - trace.Status(status_code=trace.StatusCode.ERROR)) + trace.Status(status_code=trace.StatusCode.ERROR) + ) + self._publisher_batching_span.end() raise be if self._open_telemetry_enabled: diff --git a/tests/unit/pubsub_v1/conftest.py b/tests/unit/pubsub_v1/conftest.py index dc4192931..897301343 100644 --- a/tests/unit/pubsub_v1/conftest.py +++ b/tests/unit/pubsub_v1/conftest.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from opentelemetry.sdk.trace import TracerProvider + import google.auth.credentials import pytest @@ -23,3 +25,11 @@ def creds(): GOOGLE_APPLICATION_CREDENTIALS set. """ yield google.auth.credentials.AnonymousCredentials() + + +@pytest.fixture(scope="session", autouse=True) +def provider(): + """ + Provide an Open Telemetry Tracer that can be re-used across tests. + """ + yield TracerProvider() diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 52d43ad45..57f98027a 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -44,7 +44,6 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter -from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry import trace @@ -219,6 +218,47 @@ def test_message_ordering_enabled(creds): assert client._enable_message_ordering +def test_publish_otel_batching_exception(creds, provider): + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, + ), + ) + + # Setup Open Telemetry tracing + memory_exporter = InMemorySpanExporter() + processor = SimpleSpanProcessor(memory_exporter) + provider.add_span_processor(processor) + trace.set_tracer_provider(provider) + + # Throw an exception when sequencer.publish() is called + sequencer = mock.Mock(spec=ordered_sequencer.OrderedSequencer) + sequencer.publish = mock.Mock(side_effect=RuntimeError("some error")) + client._get_or_create_sequencer = mock.Mock(return_value=sequencer) + + TOPIC = "projects/projectID/topics/topicID" + with pytest.raises(RuntimeError): + client.publish(TOPIC, b"message") + + spans = memory_exporter.get_finished_spans() + + # Span 1: Create Publish span + # Span 2: Publisher Flow Control span + # Span 3: Publisher Batching span + assert len(spans) == 3 + + publish_batching_span = spans[2] + assert publish_batching_span.name == "publisher batching" + assert publish_batching_span.kind == trace.SpanKind.INTERNAL + assert publish_batching_span._parent[1] == spans[0]._context[1] + + # Verify exception recorded by the Publisher Batching span. + assert publish_batching_span.status.status_code == trace.StatusCode.ERROR + assert len(publish_batching_span.events) == 1 + assert publish_batching_span.events[0].name == "exception" + + def test_publish(creds): client = publisher.Client(credentials=creds) @@ -254,7 +294,7 @@ def test_publish(creds): ) -def test_publish_otel(creds): +def test_publish_otel(creds, provider): TOPIC = "projects/projectID/topics/topicID" client = publisher.Client( credentials=creds, @@ -263,8 +303,6 @@ def test_publish_otel(creds): ), ) - # Trace Provider setup. - provider = TracerProvider() memory_exporter = InMemorySpanExporter() processor = SimpleSpanProcessor(memory_exporter) provider.add_span_processor(processor) @@ -341,6 +379,52 @@ def test_publish_error_exceeding_flow_control_limits(creds): future2.result() +def test_publish_otel_flow_control_exception(creds, provider): + publisher_options = types.PublisherOptions( + flow_control=types.PublishFlowControl( + message_limit=10, + byte_limit=150, + limit_exceeded_behavior=types.LimitExceededBehavior.ERROR, + ), + enable_open_telemetry_tracing=True, + ) + client = publisher.Client(credentials=creds, publisher_options=publisher_options) + + mock_batch = mock.Mock(spec=client._batch_class) + topic = "topic/path" + client._set_batch(topic, mock_batch) + + # Trace Provider setup. + memory_exporter = InMemorySpanExporter() + processor = SimpleSpanProcessor(memory_exporter) + provider.add_span_processor(processor) + trace.set_tracer_provider(provider) + + future1 = client.publish(topic, b"a" * 100) + future2 = client.publish(topic, b"b" * 100) + + future1.result() # no error, still within flow control limits + with pytest.raises(exceptions.FlowControlLimitError): + future2.result() + + spans = memory_exporter.get_finished_spans() + # Span 1 = Publish Create Span of first publish + # Span 2 = Publisher Flow Control Span of first publish + # Span 3 = Publisher Batching Span of first publish + # Span 4 = Publish Create Span of second publish + # Span 5 = Publisher Flow Control Span of second publish + assert len(spans) == 5 + + failed_flow_control_span = spans[4] + assert failed_flow_control_span.name == "publisher flow control" + assert failed_flow_control_span.kind == trace.SpanKind.INTERNAL + assert failed_flow_control_span._parent[1] == spans[3]._context[1] + assert failed_flow_control_span.status.status_code == trace.StatusCode.ERROR + + assert len(failed_flow_control_span.events) == 1 + assert failed_flow_control_span.events[0].name == "exception" + + def test_publish_data_not_bytestring_error(creds): client = publisher.Client(credentials=creds) topic = "topic/path" From 792346d548b5ac6bef1fdeca3213372063d48628 Mon Sep 17 00:00:00 2001 From: Mukund Ananthu Date: Thu, 20 Jun 2024 22:29:59 +0000 Subject: [PATCH 05/30] Use local vars for spans instead of member vars Avoids race conditions when multiple messages are published using the same publisher client instance. --- google/cloud/pubsub_v1/publisher/client.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 827294d48..a55116177 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -416,21 +416,21 @@ def publish( # type: ignore[override] context=set_span_in_context(publish_create_span), end_on_exit=False, ) as publish_flow_control_span: - self._publish_flow_control_span = publish_flow_control_span + pass self._flow_controller.add(message) if self._open_telemetry_enabled: - self._publish_flow_control_span.end() + publish_flow_control_span.end() except exceptions.FlowControlLimitError as exc: future = futures.Future() future.set_exception(exc) if self._open_telemetry_enabled: - self._publish_flow_control_span.record_exception( + publish_flow_control_span.record_exception( exception=exc, ) - self._publish_flow_control_span.set_status( + publish_flow_control_span.set_status( trace.Status(status_code=trace.StatusCode.ERROR) ) - self._publish_flow_control_span.end() + publish_flow_control_span.end() return future def on_publish_done(future): @@ -472,7 +472,7 @@ def on_publish_done(future): context=set_span_in_context(publish_create_span), end_on_exit=False, ) as publisher_batching_span: - self._publisher_batching_span = publisher_batching_span + pass try: future = sequencer.publish(message, retry=retry, timeout=timeout) future.add_done_callback(on_publish_done) @@ -481,17 +481,17 @@ def on_publish_done(future): # record it in the publisher batching span before # allowing it to bubble up. if self._open_telemetry_enabled: - self._publisher_batching_span.record_exception( + publisher_batching_span.record_exception( exception=be, ) - self._publisher_batching_span.set_status( + publisher_batching_span.set_status( trace.Status(status_code=trace.StatusCode.ERROR) ) - self._publisher_batching_span.end() + publisher_batching_span.end() raise be if self._open_telemetry_enabled: - self._publisher_batching_span.end() + publisher_batching_span.end() # Create a timer thread if necessary to enforce the batching # timeout. From becf0ce33744f43b2025d6e5a7f19547234f8280 Mon Sep 17 00:00:00 2001 From: Mukund Ananthu Date: Thu, 20 Jun 2024 22:38:36 +0000 Subject: [PATCH 06/30] Start publisher batching span from time batch lock is acquired We currently start the span after the lock is acquired. --- google/cloud/pubsub_v1/publisher/client.py | 64 +++++++++++----------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index a55116177..a9df8992a 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -442,43 +442,43 @@ def on_publish_done(future): if timeout is gapic_v1.method.DEFAULT: # if custom timeout not passed in timeout = self.publisher_options.timeout + if self._open_telemetry_enabled: + with tracer.start_as_current_span( + name=_OPEN_TELEMETRY_PUBLISHER_BATCHING, + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(publish_create_span), + end_on_exit=False, + ) as publisher_batching_span: + pass with self._batch_lock: - if self._is_stopped: - raise RuntimeError("Cannot publish on a stopped publisher.") - - # Set retry timeout to "infinite" when message ordering is enabled. - # Note that this then also impacts messages added with an empty - # ordering key. - if self._enable_message_ordering: - if retry is gapic_v1.method.DEFAULT: - # use the default retry for the publish GRPC method as a base - transport = self._transport - base_retry = transport._wrapped_methods[transport.publish]._retry - retry = base_retry.with_deadline(2.0**32) - # timeout needs to be overridden and set to infinite in - # addition to the retry deadline since both determine - # the duration for which retries are attempted. - timeout = 2.0**32 - elif retry is not None: - retry = retry.with_deadline(2.0**32) - timeout = 2.0**32 - - # Delegate the publishing to the sequencer. - sequencer = self._get_or_create_sequencer(topic, ordering_key) - if self._open_telemetry_enabled: - with tracer.start_as_current_span( - name=_OPEN_TELEMETRY_PUBLISHER_BATCHING, - kind=trace.SpanKind.INTERNAL, - context=set_span_in_context(publish_create_span), - end_on_exit=False, - ) as publisher_batching_span: - pass try: + if self._is_stopped: + raise RuntimeError("Cannot publish on a stopped publisher.") + + # Set retry timeout to "infinite" when message ordering is enabled. + # Note that this then also impacts messages added with an empty + # ordering key. + if self._enable_message_ordering: + if retry is gapic_v1.method.DEFAULT: + # use the default retry for the publish GRPC method as a base + transport = self._transport + base_retry = transport._wrapped_methods[transport.publish]._retry + retry = base_retry.with_deadline(2.0**32) + # timeout needs to be overridden and set to infinite in + # addition to the retry deadline since both determine + # the duration for which retries are attempted. + timeout = 2.0**32 + elif retry is not None: + retry = retry.with_deadline(2.0**32) + timeout = 2.0**32 + + # Delegate the publishing to the sequencer. + sequencer = self._get_or_create_sequencer(topic, ordering_key) future = sequencer.publish(message, retry=retry, timeout=timeout) future.add_done_callback(on_publish_done) except BaseException as be: - # sequencer.publish() can throw Exceptions. If it does, - # record it in the publisher batching span before + # Exceptions can be thrown when attempting to add messages to the batch. + # If they're thrown, record it in the publisher batching span before # allowing it to bubble up. if self._open_telemetry_enabled: publisher_batching_span.record_exception( From fa4f506190c318a04a8db00ad608595c4ee19ed5 Mon Sep 17 00:00:00 2001 From: Mukund Ananthu Date: Fri, 21 Jun 2024 18:25:39 +0000 Subject: [PATCH 07/30] Add trace context propagation --- google/cloud/pubsub_v1/publisher/client.py | 17 +++++++++++++ .../publisher/test_publisher_client.py | 25 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index a9df8992a..dc6dcf666 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -26,6 +26,7 @@ from datetime import datetime from opentelemetry import trace from opentelemetry.trace.propagation import set_span_in_context +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator from google.api_core import gapic_v1 from google.auth.credentials import AnonymousCredentials # type: ignore @@ -382,6 +383,18 @@ def publish( # type: ignore[override] ) message = gapic_types.PubsubMessage.wrap(vanilla_pb) + class OTelContextSetter(): + """ + Used by Open Telemetry for context propagation. + """ + + def set(self, carrier: gapic_types.PubsubMessage, key: str, value: str): + """ + Injects trace context into Pub/Sub message attributes with + "googclient_" prefix. + """ + carrier.attributes["googclient_" + key] = value + if self._open_telemetry_enabled: tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) with tracer.start_as_current_span( @@ -405,6 +418,10 @@ def publish( # type: ignore[override] "timestamp": str(datetime.now()), }, ) + TraceContextTextMapPropagator().inject( + carrier=message, + setter=OTelContextSetter(), + ) # Messages should go through flow control to prevent excessive # queuing on the client side (depending on the settings). diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 57f98027a..135fa807b 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -294,6 +294,31 @@ def test_publish(creds): ) +def test_publish_otel_context_propagation(creds, provider): + TOPIC = "projects/projectID/topics/topicID" + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, + ), + ) + + # Set up Open Telemetry tracing. + memory_exporter = InMemorySpanExporter() + processor = SimpleSpanProcessor(memory_exporter) + provider.add_span_processor(processor) + trace.set_tracer_provider(provider) + + flow_controller_add_mock = mock.Mock(spec=publisher.flow_controller.FlowController.add) + client._flow_controller.add = flow_controller_add_mock + client.publish(TOPIC, b"message") + + flow_controller_add_mock.assert_called_once() + args = flow_controller_add_mock.call_args.args + assert len(args) == 1 + assert "googclient_traceparent" in args[0].attributes + + def test_publish_otel(creds, provider): TOPIC = "projects/projectID/topics/topicID" client = publisher.Client( From 2fafbc9a76348af02585acee18f60e169e47e12a Mon Sep 17 00:00:00 2001 From: Mukund Date: Sat, 22 Jun 2024 21:34:27 +0000 Subject: [PATCH 08/30] Add publish RPC span * Add MessageWrappers to carry span context * Modify Unordered Sequencers to support MessageWrappers * Fix all tests that break with the changes. --- .../pubsub_v1/publisher/_batch/thread.py | 75 ++++++-- .../_sequencer/unordered_sequencer.py | 5 +- google/cloud/pubsub_v1/publisher/client.py | 30 ++- .../pubsub_v1/publisher/message_wrapper.py | 28 +++ .../sequencer/test_unordered_sequencer.py | 2 +- .../publisher/test_publisher_client.py | 177 ++++++++++-------- 6 files changed, 217 insertions(+), 100 deletions(-) create mode 100644 google/cloud/pubsub_v1/publisher/message_wrapper.py diff --git a/google/cloud/pubsub_v1/publisher/_batch/thread.py b/google/cloud/pubsub_v1/publisher/_batch/thread.py index 1617f8c90..42c1bdefa 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/thread.py +++ b/google/cloud/pubsub_v1/publisher/_batch/thread.py @@ -20,10 +20,13 @@ import typing from typing import Any, Callable, List, Optional, Sequence +from opentelemetry import trace + import google.api_core.exceptions from google.api_core import gapic_v1 from google.cloud.pubsub_v1.publisher import exceptions from google.cloud.pubsub_v1.publisher import futures +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper from google.cloud.pubsub_v1.publisher._batch import base from google.pubsub_v1 import types as gapic_types @@ -108,7 +111,7 @@ def __init__( # status changed from ACCEPTING_MESSAGES to any other # in order to avoid race conditions self._futures: List[futures.Future] = [] - self._messages: List[gapic_types.PubsubMessage] = [] + self._message_wrappers: List[MessageWrapper] = [] self._status = base.BatchStatus.ACCEPTING_MESSAGES # The initial size is not zero, we need to account for the size overhead @@ -134,9 +137,9 @@ def client(self) -> "PublisherClient": return self._client @property - def messages(self) -> Sequence[gapic_types.PubsubMessage]: + def message_wrappers(self) -> Sequence[gapic_types.PubsubMessage]: """The messages currently in the batch.""" - return self._messages + return self._message_wrappers @property def settings(self) -> "types.BatchSettings": @@ -259,7 +262,7 @@ def _commit(self) -> None: # https://github.com/googleapis/google-cloud-python/issues/8036 # Sanity check: If there are no messages, no-op. - if not self._messages: + if not self._message_wrappers: _LOGGER.debug("No messages to publish, exiting commit") self._status = base.BatchStatus.SUCCESS return @@ -270,18 +273,63 @@ def _commit(self) -> None: batch_transport_succeeded = True try: + if self._client._open_telemetry_enabled: + tracer = trace.get_tracer("com.google.cloud.pubsub.v1") + links = [] + for wrapper in self._message_wrappers: + span = wrapper.get_span() + if span.get_span_context().trace_flags.sampled: + links.append(trace.Link(span.get_span_context())) + with tracer.start_as_current_span( + name=f"{self._topic} publish", + attributes={ + "messaging.system": "com.google.cloud.pubsub.v1", + "messaging.destination.name": self._topic, + "gcp.project_id": self._topic.split("/")[1], + "messaging.batch.message_count": len(self._message_wrappers), + "messaging.operation": "publish", + "code.function": "_commit", + }, + links=links if len(links) > 0 else None, + kind=trace.SpanKind.CLIENT, + ) as publish_rpc_span: + ctx = publish_rpc_span.get_span_context() + for wrapper in self._message_wrappers: + wrapper.get_span().add_link( + ctx + ) # Performs retries for errors defined by the retry configuration. response = self._client._gapic_publish( topic=self._topic, - messages=self._messages, + messages=[wrapper.get_message() for wrapper in self._message_wrappers], retry=self._commit_retry, timeout=self._commit_timeout, ) + if self._client._open_telemetry_enabled: + publish_rpc_span.end() + for wrapper in self._message_wrappers: + wrapper.get_span().end() except google.api_core.exceptions.GoogleAPIError as exc: # We failed to publish, even after retries, so set the exception on # all futures and exit. self._status = base.BatchStatus.ERROR + if self._client._open_telemetry_enabled: + publish_rpc_span.record_exception( + exception=exc, + ) + publish_rpc_span.set_status( + trace.Status(status_code=trace.StatusCode.ERROR) + ) + publish_rpc_span.end() + for wrapper in self._message_wrappers: + span = wrapper.get_span() + span.record_exception(exception=exc) + span.set_status( + trace.Status(status_code=trace.StatusCode.ERROR), + ) + span.end() + batch_transport_succeeded = False if self._batch_done_callback is not None: # Failed to publish batch. @@ -326,7 +374,7 @@ def _commit(self) -> None: self._batch_done_callback(batch_transport_succeeded) def publish( - self, message: gapic_types.PubsubMessage + self, message_wrapper: MessageWrapper, ) -> Optional["pubsub_v1.publisher.futures.Future"]: """Publish a single message. @@ -351,12 +399,12 @@ def publish( """ # Coerce the type, just in case. - if not isinstance(message, gapic_types.PubsubMessage): + if not isinstance(message_wrapper.get_message(), gapic_types.PubsubMessage): # For performance reasons, the message should be constructed by directly # using the raw protobuf class, and only then wrapping it into the # higher-level PubsubMessage class. - vanilla_pb = _raw_proto_pubbsub_message(**message) - message = gapic_types.PubsubMessage.wrap(vanilla_pb) + vanilla_pb = _raw_proto_pubbsub_message(**message_wrapper.get_message()) + message_wrapper.set_message(vanilla_pb.gapic_types.PubsubMessage.wrap(vanilla_pb)) future = None @@ -369,7 +417,7 @@ def publish( return None size_increase = gapic_types.PublishRequest( - messages=[message] + messages=[message_wrapper.get_message()] )._pb.ByteSize() if (self._base_request_size + size_increase) > _SERVER_PUBLISH_MAX_BYTES: @@ -381,16 +429,15 @@ def publish( raise exceptions.MessageTooLargeError(err_msg) new_size = self._size + size_increase - new_count = len(self._messages) + 1 + new_count = len(self._message_wrappers) + 1 size_limit = min(self.settings.max_bytes, _SERVER_PUBLISH_MAX_BYTES) overflow = new_size > size_limit or new_count >= self.settings.max_messages - if not self._messages or not overflow: + if not self._message_wrappers or not overflow: # Store the actual message in the batch's message queue. - self._messages.append(message) + self._message_wrappers.append(message_wrapper) self._size = new_size - # Track the future on this batch (so that the result of the # future can be set). future = futures.Future() diff --git a/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py b/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py index 7d57aa821..a2522f5e1 100644 --- a/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py +++ b/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py @@ -18,7 +18,8 @@ from google.api_core import gapic_v1 from google.cloud.pubsub_v1.publisher._sequencer import base -from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper + if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.pubsub_v1.publisher import _batch @@ -115,7 +116,7 @@ def _create_batch( def publish( self, - message: gapic_types.PubsubMessage, + message: MessageWrapper, retry: "OptionalRetry" = gapic_v1.method.DEFAULT, timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT, ) -> "futures.Future": diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index dc6dcf666..73c70d246 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -39,10 +39,12 @@ from google.cloud.pubsub_v1.publisher._sequencer import ordered_sequencer from google.cloud.pubsub_v1.publisher._sequencer import unordered_sequencer from google.cloud.pubsub_v1.publisher.flow_controller import FlowController +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper from google.pubsub_v1 import gapic_version as package_version from google.pubsub_v1 import types as gapic_types from google.pubsub_v1.services.publisher import client as publisher_client + __version__ = package_version.__version__ if typing.TYPE_CHECKING: # pragma: NO COVER @@ -409,8 +411,7 @@ def set(self, carrier: gapic_types.PubsubMessage, key: str, value: str): "messaging.message.envelope.size": sys.getsizeof(message), }, kind=trace.SpanKind.PRODUCER, - # TODO(mk): end_on_exit=False - # end_on_exit=False, + end_on_exit=False, ) as publish_create_span: publish_create_span.add_event( name="publish start", @@ -448,6 +449,12 @@ def set(self, carrier: gapic_types.PubsubMessage, key: str, value: str): trace.Status(status_code=trace.StatusCode.ERROR) ) publish_flow_control_span.end() + + publish_create_span.record_exception(exc) + publish_create_span.set_status( + trace.Status(status_code=trace.StatusCode.ERROR) + ) + publish_create_span.end() return future def on_publish_done(future): @@ -491,7 +498,16 @@ def on_publish_done(future): # Delegate the publishing to the sequencer. sequencer = self._get_or_create_sequencer(topic, ordering_key) - future = sequencer.publish(message, retry=retry, timeout=timeout) + + message_wrapper = MessageWrapper( + message=message, + create_span=publish_create_span, + ) + future = sequencer.publish( + message=message_wrapper, + retry=retry, + timeout=timeout, + ) future.add_done_callback(on_publish_done) except BaseException as be: # Exceptions can be thrown when attempting to add messages to the batch. @@ -505,6 +521,14 @@ def on_publish_done(future): trace.Status(status_code=trace.StatusCode.ERROR) ) publisher_batching_span.end() + + publish_create_span.record_exception( + exception=be, + ) + publish_create_span.set_status( + trace.Status(status_code=trace.StatusCode.ERROR) + ) + publish_create_span.end() raise be if self._open_telemetry_enabled: diff --git a/google/cloud/pubsub_v1/publisher/message_wrapper.py b/google/cloud/pubsub_v1/publisher/message_wrapper.py new file mode 100644 index 000000000..f5389cfe7 --- /dev/null +++ b/google/cloud/pubsub_v1/publisher/message_wrapper.py @@ -0,0 +1,28 @@ +from google.pubsub_v1 import types as gapic_types +from opentelemetry import trace +from typing import Optional + + +class MessageWrapper(): + """ + Wraps Pub/Sub message with additional metadata required for + Open Telemetry tracing. + """ + + def __init__(self, message: gapic_types.PubsubMessage, create_span: trace.Span): + self._message = message + self._create_span = create_span + + def get_message(self) -> gapic_types.PubsubMessage: + return self._message + + def set_message(self, message: gapic_types.PubsubMessage) -> None: + self._message = message + + def get_span(self) -> Optional["trace.Span"]: + return self._create_span + + def __eq__(self, other: "MessageWrapper") -> bool: + if not isinstance(other, MessageWrapper): + return False + return self._message == other._message and self._create_span == other._create_span diff --git a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py index 01d9d6ca4..f7716694f 100644 --- a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py +++ b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py @@ -140,7 +140,7 @@ def test_publish_after_batch_error(): batch = client._batch_class( client, "topic_name", types.BatchSettings(max_latency=float("inf")) ) - batch._messages.append(mock.Mock(name="message")) # Make batch truthy (non-empty). + batch._message_wrappers.append(mock.Mock(name="message")) # Make batch truthy (non-empty). sequencer = unordered_sequencer.UnorderedSequencer(client, "topic_name") sequencer._set_batch(batch) diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 135fa807b..59317ed75 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -37,6 +37,7 @@ from google.cloud.pubsub_v1.publisher import exceptions from google.cloud.pubsub_v1.publisher._sequencer import ordered_sequencer +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper from google.pubsub_v1 import types as gapic_types from google.pubsub_v1.services.publisher import client as publisher_client @@ -243,15 +244,16 @@ def test_publish_otel_batching_exception(creds, provider): spans = memory_exporter.get_finished_spans() - # Span 1: Create Publish span - # Span 2: Publisher Flow Control span - # Span 3: Publisher Batching span + # Span 1: Publisher Flow Control span + # Span 2: Publisher Batching span(ended after exception) + # Span 3: Create Publish span(ended after exception) assert len(spans) == 3 - publish_batching_span = spans[2] + publish_batching_span = spans[1] + publish_create_span = spans[2] assert publish_batching_span.name == "publisher batching" assert publish_batching_span.kind == trace.SpanKind.INTERNAL - assert publish_batching_span._parent[1] == spans[0]._context[1] + assert publish_batching_span._parent[1] == publish_create_span._context[1] # Verify exception recorded by the Publisher Batching span. assert publish_batching_span.status.status_code == trace.StatusCode.ERROR @@ -286,9 +288,17 @@ def test_publish(creds): # Check mock. batch.publish.assert_has_calls( [ - mock.call(gapic_types.PubsubMessage(data=b"spam")), mock.call( - gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) + MessageWrapper( + message=gapic_types.PubsubMessage(data=b"spam"), + create_span=None, + ) + ), + mock.call( + MessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}), + create_span=None, + ) ), ] ) @@ -337,49 +347,26 @@ def test_publish_otel(creds, provider): spans = memory_exporter.get_finished_spans() - # span1 = Publish Create Span - # span2 = Publisher Flow Control Span - # span3 = Publisher Batching Span - assert len(spans) == 3 - - # Verify create span - assert spans[0].name == f"{TOPIC} create" - - # Verify attribute values - attributes = spans[0].attributes - assert attributes["messaging.system"] == "gcp_pubsub" - assert attributes["messaging.destination.name"] == TOPIC - assert attributes["code.function"] == "google.cloud.pubsub.PublisherClient.publish" - assert attributes["messaging.gcp_pubsub.message.ordering_key"] == "" - assert attributes["messaging.operation"] == "create" - assert attributes["gcp.project_id"] == "projectID" - assert "messaging.message.envelope.size" in attributes - - # Verify span kind - assert spans[0].kind == trace.SpanKind.PRODUCER + # Publish Create Span is still active, hence note returned + # by the exporter. + # span1 = Publisher Flow Control Span + # span2 = Publisher Batching Span + assert len(spans) == 2 - # Verify events - assert len(spans[0].events) == 1 - - # Verify start event - start_event = spans[0].events[0] - assert start_event.name == "publish start" - assert "timestamp" in start_event.attributes + flow_control_span, batching_span = spans # Verify flow control span. - flow_control_span = spans[1] assert flow_control_span.name == "publisher flow control" assert flow_control_span.kind == trace.SpanKind.INTERNAL # Verify that flow control span is a child of the publish create span. - assert flow_control_span._parent[1] == spans[0]._context[1] + # assert flow_control_span._parent[1] == spans[0]._context[1] # Verify Publisher Batching Span - publisher_batching_span = spans[2] - assert publisher_batching_span.name == "publisher batching" - assert publisher_batching_span.kind == trace.SpanKind.INTERNAL + assert batching_span.name == "publisher batching" + assert batching_span.kind == trace.SpanKind.INTERNAL # Verify Publisher Batching Span is child of Publish Create Span - assert publisher_batching_span._parent[1] == spans[0]._context[1] + # assert batching_span._parent[1] == spans[0]._context[1] def test_publish_error_exceeding_flow_control_limits(creds): @@ -405,49 +392,58 @@ def test_publish_error_exceeding_flow_control_limits(creds): def test_publish_otel_flow_control_exception(creds, provider): - publisher_options = types.PublisherOptions( - flow_control=types.PublishFlowControl( - message_limit=10, - byte_limit=150, - limit_exceeded_behavior=types.LimitExceededBehavior.ERROR, + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, ), - enable_open_telemetry_tracing=True, ) - client = publisher.Client(credentials=creds, publisher_options=publisher_options) - mock_batch = mock.Mock(spec=client._batch_class) - topic = "topic/path" - client._set_batch(topic, mock_batch) - - # Trace Provider setup. + # Setup Open Telemetry tracing memory_exporter = InMemorySpanExporter() processor = SimpleSpanProcessor(memory_exporter) provider.add_span_processor(processor) trace.set_tracer_provider(provider) - future1 = client.publish(topic, b"a" * 100) - future2 = client.publish(topic, b"b" * 100) + client._flow_controller = mock.Mock(spec=publisher.flow_controller.FlowController) + client._flow_controller.add = mock.Mock(side_effect=exceptions.FlowControlLimitError) - future1.result() # no error, still within flow control limits - with pytest.raises(exceptions.FlowControlLimitError): - future2.result() + TOPIC = "projects/projectID/topics/topicID" + client.publish(TOPIC, b"message") spans = memory_exporter.get_finished_spans() - # Span 1 = Publish Create Span of first publish - # Span 2 = Publisher Flow Control Span of first publish - # Span 3 = Publisher Batching Span of first publish - # Span 4 = Publish Create Span of second publish - # Span 5 = Publisher Flow Control Span of second publish - assert len(spans) == 5 - failed_flow_control_span = spans[4] - assert failed_flow_control_span.name == "publisher flow control" - assert failed_flow_control_span.kind == trace.SpanKind.INTERNAL - assert failed_flow_control_span._parent[1] == spans[3]._context[1] - assert failed_flow_control_span.status.status_code == trace.StatusCode.ERROR + # Span 1: Publisher Flow Control Span(closed after exception) + # Span 2: Publisher Create Span(closed after exception) + assert len(spans) == 2 + + flow_control_span, create_span = spans + + assert create_span.name == f"{TOPIC} create" + assert create_span.status.status_code == trace.StatusCode.ERROR + attributes = create_span.attributes + assert attributes["messaging.system"] == "gcp_pubsub" + assert attributes["messaging.destination.name"] == TOPIC + assert attributes["code.function"] == "google.cloud.pubsub.PublisherClient.publish" + assert attributes["messaging.gcp_pubsub.message.ordering_key"] == "" + assert attributes["messaging.operation"] == "create" + assert attributes["gcp.project_id"] == "projectID" + assert "messaging.message.envelope.size" in attributes + assert create_span.kind == trace.SpanKind.PRODUCER + + assert len(create_span.events) == 2 + # Verify start event + start_event = create_span.events[0] + assert start_event.name == "publish start" + assert "timestamp" in start_event.attributes + assert create_span.events[1].name == "exception" - assert len(failed_flow_control_span.events) == 1 - assert failed_flow_control_span.events[0].name == "exception" + assert flow_control_span.name == "publisher flow control" + assert flow_control_span.status.status_code == trace.StatusCode.ERROR + assert flow_control_span.kind == trace.SpanKind.INTERNAL + assert len(flow_control_span.events) == 1 + assert flow_control_span.events[0].name == "exception" + assert flow_control_span._parent[1] == create_span._context[1] def test_publish_data_not_bytestring_error(creds): @@ -561,7 +557,14 @@ def test_publish_attrs_bytestring(creds): # The attributes should have been sent as text. batch.publish.assert_called_once_with( - gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) + # gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) + MessageWrapper( + message=gapic_types.PubsubMessage( + data=b"foo", + attributes={"bar": "baz"}, + ), + create_span=None, + ) ) @@ -601,8 +604,12 @@ def test_publish_new_batch_needed(creds): commit_timeout=gapic_v1.method.DEFAULT, ) message_pb = gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) - batch1.publish.assert_called_once_with(message_pb) - batch2.publish.assert_called_once_with(message_pb) + wrapper = MessageWrapper( + message=message_pb, + create_span=None, + ) + batch1.publish.assert_called_once_with(wrapper) + batch2.publish.assert_called_once_with(wrapper) def test_publish_attrs_type_error(creds): @@ -625,9 +632,9 @@ def test_publish_custom_retry_overrides_configured_retry(creds): client.publish(topic, b"hello!", retry=mock.sentinel.custom_retry) fake_sequencer.publish.assert_called_once_with( - mock.ANY, retry=mock.sentinel.custom_retry, timeout=mock.ANY + message=mock.ANY, retry=mock.sentinel.custom_retry, timeout=mock.ANY ) - message = fake_sequencer.publish.call_args.args[0] + message = fake_sequencer.publish.call_args.kwargs["message"].get_message() assert message.data == b"hello!" @@ -644,9 +651,9 @@ def test_publish_custom_timeout_overrides_configured_timeout(creds): client.publish(topic, b"hello!", timeout=mock.sentinel.custom_timeout) fake_sequencer.publish.assert_called_once_with( - mock.ANY, retry=mock.ANY, timeout=mock.sentinel.custom_timeout + message=mock.ANY, retry=mock.ANY, timeout=mock.sentinel.custom_timeout ) - message = fake_sequencer.publish.call_args.args[0] + message = fake_sequencer.publish.call_args.kwargs["message"].get_message() assert message.data == b"hello!" @@ -806,12 +813,22 @@ def test_publish_with_ordering_key(creds): # Check mock. batch.publish.assert_has_calls( [ - mock.call(gapic_types.PubsubMessage(data=b"spam", ordering_key="k1")), mock.call( - gapic_types.PubsubMessage( - data=b"foo", attributes={"bar": "baz"}, ordering_key="k1" + MessageWrapper( + message=gapic_types.PubsubMessage(data=b"spam", ordering_key="k1"), + create_span=None, ) ), + mock.call( + MessageWrapper( + message=gapic_types.PubsubMessage( + data=b"foo", + attributes={"bar": "baz"}, + ordering_key="k1", + ), + create_span=None, + ), + ), ] ) From ba252cc64e016d559828a9ce8d9adf0ff50b8aeb Mon Sep 17 00:00:00 2001 From: Mukund Date: Sat, 22 Jun 2024 21:51:50 +0000 Subject: [PATCH 09/30] Fix lint --- google/cloud/pubsub_v1/publisher/_batch/thread.py | 13 +++++++------ google/cloud/pubsub_v1/publisher/client.py | 6 ++++-- google/cloud/pubsub_v1/publisher/message_wrapper.py | 6 ++++-- .../publisher/sequencer/test_unordered_sequencer.py | 4 +++- .../pubsub_v1/publisher/test_publisher_client.py | 12 +++++++++--- 5 files changed, 27 insertions(+), 14 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/_batch/thread.py b/google/cloud/pubsub_v1/publisher/_batch/thread.py index 42c1bdefa..7a773d703 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/thread.py +++ b/google/cloud/pubsub_v1/publisher/_batch/thread.py @@ -137,7 +137,7 @@ def client(self) -> "PublisherClient": return self._client @property - def message_wrappers(self) -> Sequence[gapic_types.PubsubMessage]: + def message_wrappers(self) -> Sequence[MessageWrapper]: """The messages currently in the batch.""" return self._message_wrappers @@ -295,9 +295,7 @@ def _commit(self) -> None: ) as publish_rpc_span: ctx = publish_rpc_span.get_span_context() for wrapper in self._message_wrappers: - wrapper.get_span().add_link( - ctx - ) + wrapper.get_span().add_link(ctx) # Performs retries for errors defined by the retry configuration. response = self._client._gapic_publish( topic=self._topic, @@ -374,7 +372,8 @@ def _commit(self) -> None: self._batch_done_callback(batch_transport_succeeded) def publish( - self, message_wrapper: MessageWrapper, + self, + message_wrapper: MessageWrapper, ) -> Optional["pubsub_v1.publisher.futures.Future"]: """Publish a single message. @@ -404,7 +403,9 @@ def publish( # using the raw protobuf class, and only then wrapping it into the # higher-level PubsubMessage class. vanilla_pb = _raw_proto_pubbsub_message(**message_wrapper.get_message()) - message_wrapper.set_message(vanilla_pb.gapic_types.PubsubMessage.wrap(vanilla_pb)) + message_wrapper.set_message( + vanilla_pb.gapic_types.PubsubMessage.wrap(vanilla_pb) + ) future = None diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 73c70d246..57ce692b2 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -385,7 +385,7 @@ def publish( # type: ignore[override] ) message = gapic_types.PubsubMessage.wrap(vanilla_pb) - class OTelContextSetter(): + class OTelContextSetter: """ Used by Open Telemetry for context propagation. """ @@ -486,7 +486,9 @@ def on_publish_done(future): if retry is gapic_v1.method.DEFAULT: # use the default retry for the publish GRPC method as a base transport = self._transport - base_retry = transport._wrapped_methods[transport.publish]._retry + base_retry = transport._wrapped_methods[ + transport.publish + ]._retry retry = base_retry.with_deadline(2.0**32) # timeout needs to be overridden and set to infinite in # addition to the retry deadline since both determine diff --git a/google/cloud/pubsub_v1/publisher/message_wrapper.py b/google/cloud/pubsub_v1/publisher/message_wrapper.py index f5389cfe7..1433150e7 100644 --- a/google/cloud/pubsub_v1/publisher/message_wrapper.py +++ b/google/cloud/pubsub_v1/publisher/message_wrapper.py @@ -3,7 +3,7 @@ from typing import Optional -class MessageWrapper(): +class MessageWrapper: """ Wraps Pub/Sub message with additional metadata required for Open Telemetry tracing. @@ -25,4 +25,6 @@ def get_span(self) -> Optional["trace.Span"]: def __eq__(self, other: "MessageWrapper") -> bool: if not isinstance(other, MessageWrapper): return False - return self._message == other._message and self._create_span == other._create_span + return ( + self._message == other._message and self._create_span == other._create_span + ) diff --git a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py index f7716694f..2fbd93487 100644 --- a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py +++ b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py @@ -140,7 +140,9 @@ def test_publish_after_batch_error(): batch = client._batch_class( client, "topic_name", types.BatchSettings(max_latency=float("inf")) ) - batch._message_wrappers.append(mock.Mock(name="message")) # Make batch truthy (non-empty). + batch._message_wrappers.append( + mock.Mock(name="message") + ) # Make batch truthy (non-empty). sequencer = unordered_sequencer.UnorderedSequencer(client, "topic_name") sequencer._set_batch(batch) diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 59317ed75..67d961749 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -296,7 +296,9 @@ def test_publish(creds): ), mock.call( MessageWrapper( - message=gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}), + message=gapic_types.PubsubMessage( + data=b"foo", attributes={"bar": "baz"} + ), create_span=None, ) ), @@ -319,7 +321,9 @@ def test_publish_otel_context_propagation(creds, provider): provider.add_span_processor(processor) trace.set_tracer_provider(provider) - flow_controller_add_mock = mock.Mock(spec=publisher.flow_controller.FlowController.add) + flow_controller_add_mock = mock.Mock( + spec=publisher.flow_controller.FlowController.add + ) client._flow_controller.add = flow_controller_add_mock client.publish(TOPIC, b"message") @@ -406,7 +410,9 @@ def test_publish_otel_flow_control_exception(creds, provider): trace.set_tracer_provider(provider) client._flow_controller = mock.Mock(spec=publisher.flow_controller.FlowController) - client._flow_controller.add = mock.Mock(side_effect=exceptions.FlowControlLimitError) + client._flow_controller.add = mock.Mock( + side_effect=exceptions.FlowControlLimitError + ) TOPIC = "projects/projectID/topics/topicID" client.publish(TOPIC, b"message") From f6df5a6becff693a2661af03b6356cf3c3b0aa2e Mon Sep 17 00:00:00 2001 From: Mukund Date: Sat, 22 Jun 2024 22:46:33 +0000 Subject: [PATCH 10/30] Fix broken tests --- .../cloud/pubsub_v1/publisher/_batch/base.py | 5 +- .../pubsub_v1/publisher/_batch/thread.py | 20 +- .../publisher/_sequencer/ordered_sequencer.py | 4 +- google/cloud/pubsub_v1/publisher/client.py | 5 +- .../pubsub_v1/publisher/message_wrapper.py | 24 +-- tests/unit/pubsub_v1/conftest.py | 22 ++- .../pubsub_v1/publisher/batch/test_base.py | 8 +- .../pubsub_v1/publisher/batch/test_thread.py | 179 +++++++++++++----- .../sequencer/test_ordered_sequencer.py | 5 +- .../sequencer/test_unordered_sequencer.py | 5 +- .../publisher/test_publisher_client.py | 52 +++-- 11 files changed, 206 insertions(+), 123 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/_batch/base.py b/google/cloud/pubsub_v1/publisher/_batch/base.py index 52505996b..e21297b9e 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/base.py +++ b/google/cloud/pubsub_v1/publisher/_batch/base.py @@ -24,6 +24,7 @@ from google.cloud import pubsub_v1 from google.cloud.pubsub_v1 import types from google.pubsub_v1 import types as gapic_types + from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper class Batch(metaclass=abc.ABCMeta): @@ -54,7 +55,7 @@ class Batch(metaclass=abc.ABCMeta): def __len__(self): """Return the number of messages currently in the batch.""" - return len(self.messages) + return len(self.message_wrappers) @staticmethod @abc.abstractmethod @@ -68,7 +69,7 @@ def make_lock(): # pragma: NO COVER @property @abc.abstractmethod - def messages(self) -> Sequence["gapic_types.PubsubMessage"]: # pragma: NO COVER + def message_wrappers(self) -> Sequence["MessageWrapper"]: # pragma: NO COVER """Return the messages currently in the batch. Returns: diff --git a/google/cloud/pubsub_v1/publisher/_batch/thread.py b/google/cloud/pubsub_v1/publisher/_batch/thread.py index 7a773d703..065a28712 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/thread.py +++ b/google/cloud/pubsub_v1/publisher/_batch/thread.py @@ -277,7 +277,7 @@ def _commit(self) -> None: tracer = trace.get_tracer("com.google.cloud.pubsub.v1") links = [] for wrapper in self._message_wrappers: - span = wrapper.get_span() + span = wrapper.create_span if span.get_span_context().trace_flags.sampled: links.append(trace.Link(span.get_span_context())) with tracer.start_as_current_span( @@ -295,18 +295,18 @@ def _commit(self) -> None: ) as publish_rpc_span: ctx = publish_rpc_span.get_span_context() for wrapper in self._message_wrappers: - wrapper.get_span().add_link(ctx) + wrapper.create_span.add_link(ctx) # Performs retries for errors defined by the retry configuration. response = self._client._gapic_publish( topic=self._topic, - messages=[wrapper.get_message() for wrapper in self._message_wrappers], + messages=[wrapper.message for wrapper in self._message_wrappers], retry=self._commit_retry, timeout=self._commit_timeout, ) if self._client._open_telemetry_enabled: publish_rpc_span.end() for wrapper in self._message_wrappers: - wrapper.get_span().end() + wrapper.create_span.end() except google.api_core.exceptions.GoogleAPIError as exc: # We failed to publish, even after retries, so set the exception on # all futures and exit. @@ -321,7 +321,7 @@ def _commit(self) -> None: ) publish_rpc_span.end() for wrapper in self._message_wrappers: - span = wrapper.get_span() + span = wrapper.create_span span.record_exception(exception=exc) span.set_status( trace.Status(status_code=trace.StatusCode.ERROR), @@ -398,13 +398,13 @@ def publish( """ # Coerce the type, just in case. - if not isinstance(message_wrapper.get_message(), gapic_types.PubsubMessage): + if not isinstance(message_wrapper.message, gapic_types.PubsubMessage): # For performance reasons, the message should be constructed by directly # using the raw protobuf class, and only then wrapping it into the # higher-level PubsubMessage class. - vanilla_pb = _raw_proto_pubbsub_message(**message_wrapper.get_message()) - message_wrapper.set_message( - vanilla_pb.gapic_types.PubsubMessage.wrap(vanilla_pb) + vanilla_pb = _raw_proto_pubbsub_message(**message_wrapper.message) + message_wrapper.message = vanilla_pb.gapic_types.PubsubMessage.wrap( + vanilla_pb ) future = None @@ -418,7 +418,7 @@ def publish( return None size_increase = gapic_types.PublishRequest( - messages=[message_wrapper.get_message()] + messages=[message_wrapper.message] )._pb.ByteSize() if (self._base_request_size + size_increase) > _SERVER_PUBLISH_MAX_BYTES: diff --git a/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py b/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py index 30c76a44f..c0d421a7b 100644 --- a/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py +++ b/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py @@ -23,7 +23,7 @@ from google.cloud.pubsub_v1.publisher import exceptions from google.cloud.pubsub_v1.publisher._sequencer import base as sequencer_base from google.cloud.pubsub_v1.publisher._batch import base as batch_base -from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.pubsub_v1 import types @@ -262,7 +262,7 @@ def _create_batch( def publish( self, - message: gapic_types.PubsubMessage, + message: MessageWrapper, retry: "OptionalRetry" = gapic_v1.method.DEFAULT, timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT, ) -> futures.Future: diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 57ce692b2..8d9e6a113 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -501,9 +501,12 @@ def on_publish_done(future): # Delegate the publishing to the sequencer. sequencer = self._get_or_create_sequencer(topic, ordering_key) + create_span = None + if self._open_telemetry_enabled: + create_span = publish_create_span message_wrapper = MessageWrapper( message=message, - create_span=publish_create_span, + create_span=create_span, ) future = sequencer.publish( message=message_wrapper, diff --git a/google/cloud/pubsub_v1/publisher/message_wrapper.py b/google/cloud/pubsub_v1/publisher/message_wrapper.py index 1433150e7..f59d3c0ee 100644 --- a/google/cloud/pubsub_v1/publisher/message_wrapper.py +++ b/google/cloud/pubsub_v1/publisher/message_wrapper.py @@ -1,30 +1,14 @@ from google.pubsub_v1 import types as gapic_types from opentelemetry import trace -from typing import Optional +from dataclasses import dataclass, field +@dataclass class MessageWrapper: """ Wraps Pub/Sub message with additional metadata required for Open Telemetry tracing. """ - def __init__(self, message: gapic_types.PubsubMessage, create_span: trace.Span): - self._message = message - self._create_span = create_span - - def get_message(self) -> gapic_types.PubsubMessage: - return self._message - - def set_message(self, message: gapic_types.PubsubMessage) -> None: - self._message = message - - def get_span(self) -> Optional["trace.Span"]: - return self._create_span - - def __eq__(self, other: "MessageWrapper") -> bool: - if not isinstance(other, MessageWrapper): - return False - return ( - self._message == other._message and self._create_span == other._create_span - ) + message: gapic_types.PubsubMessage + create_span: trace.Span = field(default=None) diff --git a/tests/unit/pubsub_v1/conftest.py b/tests/unit/pubsub_v1/conftest.py index 897301343..92023d42b 100644 --- a/tests/unit/pubsub_v1/conftest.py +++ b/tests/unit/pubsub_v1/conftest.py @@ -13,6 +13,9 @@ # limitations under the License. from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry import trace import google.auth.credentials import pytest @@ -27,9 +30,16 @@ def creds(): yield google.auth.credentials.AnonymousCredentials() -@pytest.fixture(scope="session", autouse=True) -def provider(): - """ - Provide an Open Telemetry Tracer that can be re-used across tests. - """ - yield TracerProvider() +@pytest.fixture(scope="function", autouse=True) +def set_trace_provider(): + provider = TracerProvider() + trace.set_tracer_provider(provider) + + +@pytest.fixture(scope="function") +def span_exporter(): + exporter = InMemorySpanExporter() + processor = SimpleSpanProcessor(exporter) + provider = trace.get_tracer_provider() + provider.add_span_processor(processor) + yield exporter diff --git a/tests/unit/pubsub_v1/publisher/batch/test_base.py b/tests/unit/pubsub_v1/publisher/batch/test_base.py index a95d72c12..eca6e08e2 100644 --- a/tests/unit/pubsub_v1/publisher/batch/test_base.py +++ b/tests/unit/pubsub_v1/publisher/batch/test_base.py @@ -20,6 +20,7 @@ from google.cloud.pubsub_v1 import types from google.cloud.pubsub_v1.publisher._batch.base import BatchStatus from google.cloud.pubsub_v1.publisher._batch.thread import Batch +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper from google.pubsub_v1 import types as gapic_types @@ -41,5 +42,10 @@ def create_batch(status, settings=types.BatchSettings()): def test_len(): batch = create_batch(status=BatchStatus.ACCEPTING_MESSAGES) assert len(batch) == 0 - batch.publish(gapic_types.PubsubMessage(data=b"foo")) + batch.publish( + MessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), + create_span=None, + ), + ) assert len(batch) == 1 diff --git a/tests/unit/pubsub_v1/publisher/batch/test_thread.py b/tests/unit/pubsub_v1/publisher/batch/test_thread.py index 2752d62a2..86ba9c2ef 100644 --- a/tests/unit/pubsub_v1/publisher/batch/test_thread.py +++ b/tests/unit/pubsub_v1/publisher/batch/test_thread.py @@ -36,6 +36,7 @@ from google.cloud.pubsub_v1.publisher._batch import thread from google.cloud.pubsub_v1.publisher._batch.thread import Batch from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper def create_client(): @@ -126,8 +127,18 @@ def test_commit_no_op(): def test_blocking__commit(): batch = create_batch() futures = ( - batch.publish({"data": b"This is my message."}), - batch.publish({"data": b"This is another message."}), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"This is my message."), + create_span=None, + ) + ), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"This is another message."), + create_span=None, + ) + ), ) # Set up the underlying API publish method to return a PublishResponse. @@ -160,7 +171,12 @@ def test_blocking__commit(): def test_blocking__commit_custom_retry(): batch = create_batch(commit_retry=mock.sentinel.custom_retry) - batch.publish({"data": b"This is my message."}) + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"This is my message."), + create_span=None, + ) + ) # Set up the underlying API publish method to return a PublishResponse. publish_response = gapic_types.PublishResponse(message_ids=["a"]) @@ -182,7 +198,12 @@ def test_blocking__commit_custom_retry(): def test_blocking__commit_custom_timeout(): batch = create_batch(commit_timeout=mock.sentinel.custom_timeout) - batch.publish({"data": b"This is my message."}) + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"This is my message."), + create_span=None, + ) + ) # Set up the underlying API publish method to return a PublishResponse. publish_response = gapic_types.PublishResponse(message_ids=["a"]) @@ -217,13 +238,23 @@ def api_publish_delay(topic="", messages=(), retry=None, timeout=None): ) with api_publish_patch: - batch.publish({"data": b"first message"}) + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"first message"), + create_span=None, + ) + ) start = datetime.datetime.now() event_set = api_publish_called.wait(timeout=1.0) if not event_set: # pragma: NO COVER pytest.fail("API publish was not called in time") - batch.publish({"data": b"second message"}) + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"second message"), + create_span=None, + ) + ) end = datetime.datetime.now() # While a batch commit in progress, waiting for the API publish call to @@ -266,8 +297,18 @@ def test_blocking__commit_no_messages(): def test_blocking__commit_wrong_messageid_length(): batch = create_batch() futures = ( - batch.publish({"data": b"blah blah blah"}), - batch.publish({"data": b"blah blah blah blah"}), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"blah blah blah"), + create_span=None, + ) + ), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"blah blah blah blah"), + create_span=None, + ) + ), ) # Set up a PublishResponse that only returns one message ID. @@ -287,8 +328,18 @@ def test_blocking__commit_wrong_messageid_length(): def test_block__commmit_api_error(): batch = create_batch() futures = ( - batch.publish({"data": b"blah blah blah"}), - batch.publish({"data": b"blah blah blah blah"}), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"blah blah blah"), + create_span=None, + ) + ), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"blah blah blah blah"), + create_span=None, + ) + ), ) # Make the API throw an error when publishing. @@ -306,8 +357,18 @@ def test_block__commmit_api_error(): def test_block__commmit_retry_error(): batch = create_batch() futures = ( - batch.publish({"data": b"blah blah blah"}), - batch.publish({"data": b"blah blah blah blah"}), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"blah blah blah"), + create_span=None, + ) + ), + batch.publish( + message_wrapper=MessageWrapper( + message=gapic_types.PubsubMessage(data=b"blah blah blah blah"), + create_span=None, + ) + ), ) # Make the API throw an error when publishing. @@ -325,23 +386,30 @@ def test_block__commmit_retry_error(): def test_publish_updating_batch_size(): batch = create_batch(topic="topic_foo") messages = ( - gapic_types.PubsubMessage(data=b"foobarbaz"), - gapic_types.PubsubMessage(data=b"spameggs"), - gapic_types.PubsubMessage(data=b"1335020400"), + MessageWrapper( + message=gapic_types.PubsubMessage(data=b"foobarbaz"), create_span=None + ), + MessageWrapper( + message=gapic_types.PubsubMessage(data=b"spameggs"), create_span=None + ), + MessageWrapper( + message=gapic_types.PubsubMessage(data=b"1335020400"), create_span=None + ), ) # Publish each of the messages, which should save them to the batch. futures = [batch.publish(message) for message in messages] # There should be three messages on the batch, and three futures. - assert len(batch.messages) == 3 + assert len(batch.message_wrappers) == 3 assert batch._futures == futures # The size should have been incremented by the sum of the size # contributions of each message to the PublishRequest. base_request_size = gapic_types.PublishRequest(topic="topic_foo")._pb.ByteSize() expected_request_size = base_request_size + sum( - gapic_types.PublishRequest(messages=[msg])._pb.ByteSize() for msg in messages + gapic_types.PublishRequest(messages=[msg.message])._pb.ByteSize() + for msg in messages ) assert batch.size == expected_request_size @@ -350,22 +418,22 @@ def test_publish_updating_batch_size(): def test_publish(): batch = create_batch() - message = gapic_types.PubsubMessage() - future = batch.publish(message) + wrapper = MessageWrapper(gapic_types.PubsubMessage(), None) + future = batch.publish(wrapper) - assert len(batch.messages) == 1 + assert len(batch.message_wrappers) == 1 assert batch._futures == [future] def test_publish_max_messages_zero(): batch = create_batch(topic="topic_foo", max_messages=0) - message = gapic_types.PubsubMessage(data=b"foobarbaz") + wrapper = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) with mock.patch.object(batch, "commit") as commit: - future = batch.publish(message) + future = batch.publish(wrapper) assert future is not None - assert len(batch.messages) == 1 + assert len(batch.message_wrappers) == 1 assert batch._futures == [future] commit.assert_called_once() @@ -373,30 +441,30 @@ def test_publish_max_messages_zero(): def test_publish_max_messages_enforced(): batch = create_batch(topic="topic_foo", max_messages=1) - message = gapic_types.PubsubMessage(data=b"foobarbaz") - message2 = gapic_types.PubsubMessage(data=b"foobarbaz2") + message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + message2 = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz2"), None) future = batch.publish(message) future2 = batch.publish(message2) assert future is not None assert future2 is None - assert len(batch.messages) == 1 + assert len(batch.message_wrappers) == 1 assert len(batch._futures) == 1 def test_publish_max_bytes_enforced(): batch = create_batch(topic="topic_foo", max_bytes=15) - message = gapic_types.PubsubMessage(data=b"foobarbaz") - message2 = gapic_types.PubsubMessage(data=b"foobarbaz2") + message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + message2 = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz2"), None) future = batch.publish(message) future2 = batch.publish(message2) assert future is not None assert future2 is None - assert len(batch.messages) == 1 + assert len(batch.message_wrappers) == 1 assert len(batch._futures) == 1 @@ -404,9 +472,9 @@ def test_publish_exceed_max_messages(): max_messages = 4 batch = create_batch(max_messages=max_messages) messages = ( - gapic_types.PubsubMessage(data=b"foobarbaz"), - gapic_types.PubsubMessage(data=b"spameggs"), - gapic_types.PubsubMessage(data=b"1335020400"), + MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None), + MessageWrapper(gapic_types.PubsubMessage(data=b"spameggs"), None), + MessageWrapper(gapic_types.PubsubMessage(data=b"1335020400"), None), ) # Publish each of the messages, which should save them to the batch. @@ -420,7 +488,9 @@ def test_publish_exceed_max_messages(): # When a fourth message is published, commit should be called. # No future will be returned in this case. - future = batch.publish(gapic_types.PubsubMessage(data=b"last one")) + future = batch.publish( + MessageWrapper(gapic_types.PubsubMessage(data=b"last one"), None) + ) commit.assert_called_once_with() assert future is None @@ -443,7 +513,7 @@ def test_publish_single_message_size_exceeds_server_size_limit(): assert request_size == 1001 # sanity check, just above the (mocked) server limit with pytest.raises(exceptions.MessageTooLargeError): - batch.publish(big_message) + batch.publish(MessageWrapper(big_message, None)) @mock.patch.object(thread, "_SERVER_PUBLISH_MAX_BYTES", 1000) @@ -463,8 +533,8 @@ def test_publish_total_messages_size_exceeds_server_size_limit(): assert 1000 < request_size < 1500 with mock.patch.object(batch, "commit") as fake_commit: - batch.publish(messages[0]) - batch.publish(messages[1]) + batch.publish(MessageWrapper(messages[0], None)) + batch.publish(MessageWrapper(messages[1], None)) # The server side limit should kick in and cause a commit. fake_commit.assert_called_once() @@ -472,21 +542,30 @@ def test_publish_total_messages_size_exceeds_server_size_limit(): def test_publish_dict(): batch = create_batch() - future = batch.publish({"data": b"foobarbaz", "attributes": {"spam": "eggs"}}) + message = MessageWrapper( + gapic_types.PubsubMessage(data=b"foobarbaz", attributes={"spam": "eggs"}), None + ) + future = batch.publish(message) # There should be one message on the batch. - expected_message = gapic_types.PubsubMessage( - data=b"foobarbaz", attributes={"spam": "eggs"} + expected_message = MessageWrapper( + gapic_types.PubsubMessage(data=b"foobarbaz", attributes={"spam": "eggs"}), None ) - assert batch.messages == [expected_message] + assert batch.message_wrappers == [expected_message] assert batch._futures == [future] def test_cancel(): batch = create_batch() futures = ( - batch.publish({"data": b"This is my message."}), - batch.publish({"data": b"This is another message."}), + batch.publish( + MessageWrapper(gapic_types.PubsubMessage(data=b"This is my message."), None) + ), + batch.publish( + MessageWrapper( + gapic_types.PubsubMessage(data=b"This is another message."), None + ) + ), ) batch.cancel(BatchCancellationReason.PRIOR_ORDERED_MESSAGE_FAILED) @@ -503,9 +582,9 @@ def test_do_not_commit_when_full_when_flag_is_off(): # Set commit_when_full flag to False batch = create_batch(max_messages=max_messages, commit_when_full=False) messages = ( - gapic_types.PubsubMessage(data=b"foobarbaz"), - gapic_types.PubsubMessage(data=b"spameggs"), - gapic_types.PubsubMessage(data=b"1335020400"), + MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None), + MessageWrapper(gapic_types.PubsubMessage(data=b"spameggs"), None), + MessageWrapper(gapic_types.PubsubMessage(data=b"1335020400"), None), ) with mock.patch.object(batch, "commit") as commit: @@ -514,7 +593,9 @@ def test_do_not_commit_when_full_when_flag_is_off(): assert len(futures) == 3 # When a fourth message is published, commit should not be called. - future = batch.publish(gapic_types.PubsubMessage(data=b"last one")) + future = batch.publish( + MessageWrapper(gapic_types.PubsubMessage(data=b"last one"), None) + ) assert commit.call_count == 0 assert future is None @@ -534,7 +615,7 @@ def test_batch_done_callback_called_on_success(): batch = create_batch(batch_done_callback=batch_done_callback_tracker) # Ensure messages exist. - message = gapic_types.PubsubMessage(data=b"foobarbaz") + message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) batch.publish(message) # One response for one published message. @@ -554,7 +635,7 @@ def test_batch_done_callback_called_on_publish_failure(): batch = create_batch(batch_done_callback=batch_done_callback_tracker) # Ensure messages exist. - message = gapic_types.PubsubMessage(data=b"foobarbaz") + message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) batch.publish(message) # One response for one published message. @@ -580,7 +661,7 @@ def test_batch_done_callback_called_on_publish_response_invalid(): batch = create_batch(batch_done_callback=batch_done_callback_tracker) # Ensure messages exist. - message = gapic_types.PubsubMessage(data=b"foobarbaz") + message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) batch.publish(message) # No message ids returned in successful publish response -> invalid. diff --git a/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py b/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py index 7570c2970..c9cd0b97e 100644 --- a/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py +++ b/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py @@ -27,12 +27,15 @@ from google.cloud.pubsub_v1 import publisher from google.cloud.pubsub_v1.publisher._sequencer import ordered_sequencer from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper _ORDERING_KEY = "ordering_key_1" def create_message(): - return gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) + return MessageWrapper( + gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}), None + ) def create_client(): diff --git a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py index 2fbd93487..e25d99e8a 100644 --- a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py +++ b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py @@ -27,10 +27,13 @@ from google.cloud.pubsub_v1.publisher._batch import base from google.cloud.pubsub_v1.publisher._sequencer import unordered_sequencer from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper def create_message(): - return gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) + return MessageWrapper( + gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}), None + ) def create_client(): diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 67d961749..d6adc4420 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -43,9 +43,6 @@ from google.pubsub_v1.services.publisher import client as publisher_client from google.pubsub_v1.services.publisher.transports.grpc import PublisherGrpcTransport - -from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter -from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry import trace @@ -219,7 +216,7 @@ def test_message_ordering_enabled(creds): assert client._enable_message_ordering -def test_publish_otel_batching_exception(creds, provider): +def test_publish_otel_batching_exception(creds, span_exporter): client = publisher.Client( credentials=creds, publisher_options=types.PublisherOptions( @@ -228,10 +225,10 @@ def test_publish_otel_batching_exception(creds, provider): ) # Setup Open Telemetry tracing - memory_exporter = InMemorySpanExporter() - processor = SimpleSpanProcessor(memory_exporter) - provider.add_span_processor(processor) - trace.set_tracer_provider(provider) + # memory_exporter = InMemorySpanExporter() + # processor = SimpleSpanProcessor(memory_exporter) + # provider.add_span_processor(processor) + # trace.set_tracer_provider(provider) # Throw an exception when sequencer.publish() is called sequencer = mock.Mock(spec=ordered_sequencer.OrderedSequencer) @@ -242,7 +239,7 @@ def test_publish_otel_batching_exception(creds, provider): with pytest.raises(RuntimeError): client.publish(TOPIC, b"message") - spans = memory_exporter.get_finished_spans() + spans = span_exporter.get_finished_spans() # Span 1: Publisher Flow Control span # Span 2: Publisher Batching span(ended after exception) @@ -306,7 +303,7 @@ def test_publish(creds): ) -def test_publish_otel_context_propagation(creds, provider): +def test_publish_otel_context_propagation(creds): TOPIC = "projects/projectID/topics/topicID" client = publisher.Client( credentials=creds, @@ -315,12 +312,6 @@ def test_publish_otel_context_propagation(creds, provider): ), ) - # Set up Open Telemetry tracing. - memory_exporter = InMemorySpanExporter() - processor = SimpleSpanProcessor(memory_exporter) - provider.add_span_processor(processor) - trace.set_tracer_provider(provider) - flow_controller_add_mock = mock.Mock( spec=publisher.flow_controller.FlowController.add ) @@ -333,7 +324,7 @@ def test_publish_otel_context_propagation(creds, provider): assert "googclient_traceparent" in args[0].attributes -def test_publish_otel(creds, provider): +def test_publish_otel(creds, span_exporter): TOPIC = "projects/projectID/topics/topicID" client = publisher.Client( credentials=creds, @@ -342,14 +333,15 @@ def test_publish_otel(creds, provider): ), ) - memory_exporter = InMemorySpanExporter() - processor = SimpleSpanProcessor(memory_exporter) - provider.add_span_processor(processor) - trace.set_tracer_provider(provider) + # # Setup Open Telemetry tracing + # memory_exporter = InMemorySpanExporter() + # processor = SimpleSpanProcessor(memory_exporter) + # provider.add_span_processor(processor) + # trace.set_tracer_provider(provider) client.publish(TOPIC, b"message") - spans = memory_exporter.get_finished_spans() + spans = span_exporter.get_finished_spans() # Publish Create Span is still active, hence note returned # by the exporter. @@ -395,7 +387,7 @@ def test_publish_error_exceeding_flow_control_limits(creds): future2.result() -def test_publish_otel_flow_control_exception(creds, provider): +def test_publish_otel_flow_control_exception(creds, span_exporter): client = publisher.Client( credentials=creds, publisher_options=types.PublisherOptions( @@ -404,10 +396,10 @@ def test_publish_otel_flow_control_exception(creds, provider): ) # Setup Open Telemetry tracing - memory_exporter = InMemorySpanExporter() - processor = SimpleSpanProcessor(memory_exporter) - provider.add_span_processor(processor) - trace.set_tracer_provider(provider) + # memory_exporter = InMemorySpanExporter() + # processor = SimpleSpanProcessor(memory_exporter) + # provider.add_span_processor(processor) + # trace.set_tracer_provider(provider) client._flow_controller = mock.Mock(spec=publisher.flow_controller.FlowController) client._flow_controller.add = mock.Mock( @@ -417,7 +409,7 @@ def test_publish_otel_flow_control_exception(creds, provider): TOPIC = "projects/projectID/topics/topicID" client.publish(TOPIC, b"message") - spans = memory_exporter.get_finished_spans() + spans = span_exporter.get_finished_spans() # Span 1: Publisher Flow Control Span(closed after exception) # Span 2: Publisher Create Span(closed after exception) @@ -640,7 +632,7 @@ def test_publish_custom_retry_overrides_configured_retry(creds): fake_sequencer.publish.assert_called_once_with( message=mock.ANY, retry=mock.sentinel.custom_retry, timeout=mock.ANY ) - message = fake_sequencer.publish.call_args.kwargs["message"].get_message() + message = fake_sequencer.publish.call_args.kwargs["message"].message assert message.data == b"hello!" @@ -659,7 +651,7 @@ def test_publish_custom_timeout_overrides_configured_timeout(creds): fake_sequencer.publish.assert_called_once_with( message=mock.ANY, retry=mock.ANY, timeout=mock.sentinel.custom_timeout ) - message = fake_sequencer.publish.call_args.kwargs["message"].get_message() + message = fake_sequencer.publish.call_args.kwargs["message"].message assert message.data == b"hello!" From 8f1f812b665d5d4e8e81371757e920aca907d793 Mon Sep 17 00:00:00 2001 From: Mukund Date: Mon, 24 Jun 2024 00:01:31 +0000 Subject: [PATCH 11/30] Update publish RPC span logic * Add tests for publish RPC green path --- .../pubsub_v1/publisher/_batch/thread.py | 33 ++- google/cloud/pubsub_v1/publisher/client.py | 3 +- .../pubsub_v1/publisher/message_wrapper.py | 4 +- tests/unit/pubsub_v1/conftest.py | 2 +- .../pubsub_v1/publisher/batch/test_thread.py | 209 +++++++++++++++++- 5 files changed, 235 insertions(+), 16 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/_batch/thread.py b/google/cloud/pubsub_v1/publisher/_batch/thread.py index 065a28712..77baf16de 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/thread.py +++ b/google/cloud/pubsub_v1/publisher/_batch/thread.py @@ -17,6 +17,7 @@ import logging import threading import time +from datetime import datetime import typing from typing import Any, Callable, List, Optional, Sequence @@ -278,7 +279,7 @@ def _commit(self) -> None: links = [] for wrapper in self._message_wrappers: span = wrapper.create_span - if span.get_span_context().trace_flags.sampled: + if span and span.is_recording(): links.append(trace.Link(span.get_span_context())) with tracer.start_as_current_span( name=f"{self._topic} publish", @@ -292,10 +293,13 @@ def _commit(self) -> None: }, links=links if len(links) > 0 else None, kind=trace.SpanKind.CLIENT, + end_on_exit=False, ) as publish_rpc_span: ctx = publish_rpc_span.get_span_context() for wrapper in self._message_wrappers: - wrapper.create_span.add_link(ctx) + span = wrapper.create_span + if span and span.is_recording(): + span.add_link(ctx) # Performs retries for errors defined by the retry configuration. response = self._client._gapic_publish( topic=self._topic, @@ -306,7 +310,15 @@ def _commit(self) -> None: if self._client._open_telemetry_enabled: publish_rpc_span.end() for wrapper in self._message_wrappers: - wrapper.create_span.end() + span = wrapper.create_span + if span: + span.add_event( + name="publish end", + attributes={ + "timestamp": str(datetime.now()), + }, + ) + span.end() except google.api_core.exceptions.GoogleAPIError as exc: # We failed to publish, even after retries, so set the exception on # all futures and exit. @@ -322,11 +334,12 @@ def _commit(self) -> None: publish_rpc_span.end() for wrapper in self._message_wrappers: span = wrapper.create_span - span.record_exception(exception=exc) - span.set_status( - trace.Status(status_code=trace.StatusCode.ERROR), - ) - span.end() + if span: + span.record_exception(exception=exc) + span.set_status( + trace.Status(status_code=trace.StatusCode.ERROR), + ) + span.end() batch_transport_succeeded = False if self._batch_done_callback is not None: @@ -398,7 +411,9 @@ def publish( """ # Coerce the type, just in case. - if not isinstance(message_wrapper.message, gapic_types.PubsubMessage): + if not isinstance( + message_wrapper.message, gapic_types.PubsubMessage + ): # pragma: NO COVER # For performance reasons, the message should be constructed by directly # using the raw protobuf class, and only then wrapping it into the # higher-level PubsubMessage class. diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 8d9e6a113..46b49dc48 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -27,6 +27,7 @@ from opentelemetry import trace from opentelemetry.trace.propagation import set_span_in_context from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry.propagators.textmap import Setter from google.api_core import gapic_v1 from google.auth.credentials import AnonymousCredentials # type: ignore @@ -385,7 +386,7 @@ def publish( # type: ignore[override] ) message = gapic_types.PubsubMessage.wrap(vanilla_pb) - class OTelContextSetter: + class OTelContextSetter(Setter): """ Used by Open Telemetry for context propagation. """ diff --git a/google/cloud/pubsub_v1/publisher/message_wrapper.py b/google/cloud/pubsub_v1/publisher/message_wrapper.py index f59d3c0ee..d40ae2ebc 100644 --- a/google/cloud/pubsub_v1/publisher/message_wrapper.py +++ b/google/cloud/pubsub_v1/publisher/message_wrapper.py @@ -2,6 +2,8 @@ from opentelemetry import trace from dataclasses import dataclass, field +from typing import Optional + @dataclass class MessageWrapper: @@ -11,4 +13,4 @@ class MessageWrapper: """ message: gapic_types.PubsubMessage - create_span: trace.Span = field(default=None) + create_span: Optional[trace.Span] = field(default=None) diff --git a/tests/unit/pubsub_v1/conftest.py b/tests/unit/pubsub_v1/conftest.py index 92023d42b..f3cd421e3 100644 --- a/tests/unit/pubsub_v1/conftest.py +++ b/tests/unit/pubsub_v1/conftest.py @@ -30,7 +30,7 @@ def creds(): yield google.auth.credentials.AnonymousCredentials() -@pytest.fixture(scope="function", autouse=True) +@pytest.fixture(scope="session", autouse=True) def set_trace_provider(): provider = TracerProvider() trace.set_tracer_provider(provider) diff --git a/tests/unit/pubsub_v1/publisher/batch/test_thread.py b/tests/unit/pubsub_v1/publisher/batch/test_thread.py index 86ba9c2ef..b0014ab8f 100644 --- a/tests/unit/pubsub_v1/publisher/batch/test_thread.py +++ b/tests/unit/pubsub_v1/publisher/batch/test_thread.py @@ -25,6 +25,8 @@ import pytest +from opentelemetry import trace + import google.api_core.exceptions from google.api_core import gapic_v1 from google.auth import credentials @@ -39,8 +41,13 @@ from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper -def create_client(): - return publisher.Client(credentials=credentials.AnonymousCredentials()) +def create_client(enable_open_telemetry=False): + return publisher.Client( + credentials=credentials.AnonymousCredentials(), + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=enable_open_telemetry, + ), + ) def create_batch( @@ -49,7 +56,8 @@ def create_batch( commit_when_full=True, commit_retry=gapic_v1.method.DEFAULT, commit_timeout: gapic_types.TimeoutType = gapic_v1.method.DEFAULT, - **batch_settings + enable_open_telemetry: bool = False, + **batch_settings, ): """Return a batch object suitable for testing. @@ -69,7 +77,7 @@ def create_batch( Returns: ~.pubsub_v1.publisher.batch.thread.Batch: A batch object. """ - client = create_client() + client = create_client(enable_open_telemetry=enable_open_telemetry) settings = types.BatchSettings(**batch_settings) return Batch( client, @@ -610,6 +618,199 @@ def __call__(self, success): self.success = success +# Refer https://opentelemetry.io/docs/languages/python/#version-support +@pytest.mark.skipif( + sys.version_info < (3, 8), reason="Open Telemetry requires python3.8 or higher" +) +def test_commit_otel_publish_rpc_span_exception(span_exporter): + TOPIC = "projects/projectID/topics/topicID" + batch = create_batch(topic=TOPIC, enable_open_telemetry=True) + + tracer = trace.get_tracer_provider().get_tracer("com.google.cloud.pubsub.v1") + with tracer.start_as_current_span(name="foo", end_on_exit=False) as create_span: + message = MessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), create_span=create_span + ) + batch.publish(message) + + # Mock publish error. + error = google.api_core.exceptions.InternalServerError("error") + + with mock.patch.object( + type(batch.client), + "_gapic_publish", + side_effect=error, + ): + batch._commit() + + spans = span_exporter.get_finished_spans() + # Span 1: publish RPC Span + # Span 2: create span of message. + assert len(spans) == 2 + + # Verify status of both spans recorded error and exception event. + for span in spans: + assert span.status.status_code == trace.status.StatusCode.ERROR + assert len(span.events) == 1 + assert span.events[0].name == "exception" + assert span.end_time is not None + + +def test_commit_otel_null_span(span_exporter): + """ + Test case checks the null case check scenario and appeases code coverage. + This scenario where open telemetry is enabled, yet the message added to the + batch does not contain a span should not arise in the library. + """ + TOPIC = "projects/projectID/topics/topic" + batch = create_batch( + topic=TOPIC, + enable_open_telemetry=True, + ) + + msg = MessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), + ) + + batch.publish(msg) + + publish_response = gapic_types.PublishResponse(message_ids=["a"]) + with mock.patch.object( + type(batch.client), "_gapic_publish", return_value=publish_response + ): + batch._commit() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == f"{TOPIC} publish" + assert spans[0].end_time is not None + + error = google.api_core.exceptions.InternalServerError("error") + batch = create_batch( + topic=TOPIC, + enable_open_telemetry=True, + ) + batch.publish(msg) + + with mock.patch.object( + type(batch.client), + "_gapic_publish", + side_effect=error, + ): + batch._commit() + + assert len(spans) == 1 + assert spans[0].name == f"{TOPIC} publish" + assert spans[0].end_time is not None + + +def test_commit_otel_publish_non_sampled(span_exporter): + TOPIC = "projects/projectID/topics/topic" + batch = create_batch( + topic=TOPIC, + enable_open_telemetry=True, + ) + + tracer = trace.get_tracer_provider().get_tracer("com.google.cloud.pubsub.v1") + with tracer.start_as_current_span(name="foo", end_on_exit=False) as span: + span.is_recording = mock.Mock(return_value=False) + msg = MessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), create_span=span + ) + + batch.publish(msg) + + publish_response = gapic_types.PublishResponse(message_ids=["a"]) + with mock.patch.object( + type(batch.client), "_gapic_publish", return_value=publish_response + ): + batch._commit() + + spans = span_exporter.get_finished_spans() + + # Span 1: publish RPC Span + # Span 2: create span of message. + assert len(spans) == 2 + publish_rpc_span, create_span = spans + assert len(publish_rpc_span.links) == 0 + assert len(create_span.links) == 0 + + +@pytest.mark.skipif( + sys.version_info < (3, 8), reason="Open Telemetry requires python3.8 or higher" +) +def test_commit_otel_publish_rpc_span(span_exporter): + TOPIC = "projects/projectID/topics/topic" + batch = create_batch( + topic=TOPIC, + enable_open_telemetry=True, + ) + + # Simulate message 1 published with its own publisher create span. + tracer = trace.get_tracer_provider().get_tracer("com.google.cloud.pubsub.v1") + with tracer.start_as_current_span(name="foo", end_on_exit=False) as create_span1: + msg1 = MessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), create_span=create_span1 + ) + + # Simulate message 2 published with its own publisher create span. + with tracer.start_as_current_span(name="bar", end_on_exit=False) as create_span2: + msg2 = MessageWrapper( + message=gapic_types.PubsubMessage(data=b"bar"), + create_span=create_span2, + ) + + # Add both messages to the batch. + batch.publish(msg1) + batch.publish(msg2) + + publish_response = gapic_types.PublishResponse(message_ids=["a", "b"]) + with mock.patch.object( + type(batch.client), "_gapic_publish", return_value=publish_response + ): + batch._commit() + + spans = span_exporter.get_finished_spans() + + # Span 1: publish RPC span - closed after publish RPC success. + # Span 2: publisher create span of message 1 - closed after publish RPC success. + # Span 3: publisher create span of message 2 - closed after publish RPC success. + assert len(spans) == 3 + # publish_rpc_span, create_span_1, create_span2, non_sampled_span = spans + publish_rpc_span, create_span_1, create_span2 = spans + + # Verify publish_rpc_span + assert publish_rpc_span.name == f"{TOPIC} publish" + assert publish_rpc_span.kind == trace.SpanKind.CLIENT + assert publish_rpc_span.end_time is not None + attributes = publish_rpc_span.attributes + assert attributes["messaging.system"] == "com.google.cloud.pubsub.v1" + assert attributes["messaging.destination.name"] == TOPIC + assert attributes["gcp.project_id"] == "projectID" + assert attributes["messaging.batch.message_count"] == 2 + assert attributes["messaging.operation"] == "publish" + assert attributes["code.function"] == "_commit" + assert publish_rpc_span.parent is None + # Verify the links correspond to the spans of the published messages. + assert len(publish_rpc_span.links) == 2 + assert publish_rpc_span.links[0].context[1] == create_span_1.context[1] + assert publish_rpc_span.links[1].context[1] == create_span2.context[1] + + # Verify spans of the published messages. + assert create_span_1.name == "foo" + assert create_span2.name == "bar" + + # Verify the publish create spans have been closed after publish success. + assert create_span1.end_time is not None + assert create_span2.end_time is not None + + # Verify publish end event added to the span + assert len(create_span1.events) == 1 + assert len(create_span2.events) == 1 + assert create_span1.events[0].name == "publish end" + assert create_span2.events[0].name == "publish end" + + def test_batch_done_callback_called_on_success(): batch_done_callback_tracker = BatchDoneCallbackTracker() batch = create_batch(batch_done_callback=batch_done_callback_tracker) From 2ab151820c0fca31c33e7bba3692f186050cef79 Mon Sep 17 00:00:00 2001 From: Mukund Date: Mon, 24 Jun 2024 21:58:53 +0000 Subject: [PATCH 12/30] Add subscriber option to enable open telemetry --- google/cloud/pubsub_v1/subscriber/client.py | 16 +++++++++++++++- google/cloud/pubsub_v1/types.py | 15 +++++++++++++++ .../subscriber/test_subscriber_client.py | 11 +++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/client.py b/google/cloud/pubsub_v1/subscriber/client.py index 0d0d36a0c..dcf91a5f0 100644 --- a/google/cloud/pubsub_v1/subscriber/client.py +++ b/google/cloud/pubsub_v1/subscriber/client.py @@ -67,7 +67,15 @@ class Client(subscriber_client.SubscriberClient): ) """ - def __init__(self, **kwargs: Any): + def __init__( + self, + subscriber_options: Union[types.SubscriberOptions, Sequence] = (), + **kwargs: Any + ): + assert ( + isinstance(subscriber_options, types.SubscriberOptions) + or len(subscriber_options) == 0 + ), "subscriber_options must be of type SubscriberOptions or an empty sequence." # Sanity check: Is our goal to use the emulator? # If so, create a grpc insecure channel with the emulator host # as the target. @@ -82,6 +90,12 @@ def __init__(self, **kwargs: Any): self._target = self._transport._host self._closed = False + self.subscriber_options = types.SubscriberOptions(*subscriber_options) + # Option indicating whether open telemetry is enabled or not. + self._open_telemetry_enabled = ( + self.subscriber_options.enable_open_telemetry_tracing + ) + @classmethod def from_service_account_file( # type: ignore[override] cls, filename: str, **kwargs: Any diff --git a/google/cloud/pubsub_v1/types.py b/google/cloud/pubsub_v1/types.py index b4adff1d2..a5309c9a7 100644 --- a/google/cloud/pubsub_v1/types.py +++ b/google/cloud/pubsub_v1/types.py @@ -131,6 +131,19 @@ class PublishFlowControl(NamedTuple): """The action to take when publish flow control limits are exceeded.""" +class SubscriberOptions(NamedTuple): + """ + Options for the subscriber client. + + Attributes: + enable_open_telemetry_tracing (bool): + Whether to enable OpenTelemetry tracing. Defaults to false. + """ + + enable_open_telemetry_tracing: bool = False + """Whether to enable OpenTelemetry tracing.""" + + # Define the default publisher options. # # This class is used when creating a publisher client to pass in options @@ -151,6 +164,8 @@ class PublisherOptions(NamedTuple): timeout (OptionalTimeout): Timeout settings for message publishing by the client. It should be compatible with :class:`~.pubsub_v1.types.TimeoutType`. + enable_open_telemetry_tracing (bool): + Whether to enable OpenTelemetry tracing. Defaults to false. """ enable_message_ordering: bool = False diff --git a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py index a09d85b00..45cfbe8c1 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py @@ -135,6 +135,17 @@ def test_init_emulator(monkeypatch): assert channel.target().decode("utf8") == _EXPECTED_TARGET +def test_otel_subscriber_option(creds): + client = subscriber.Client( + credentials=creds, + subscriber_options=types.SubscriberOptions(enable_open_telemetry_tracing=True), + ) + assert client._open_telemetry_enabled is True + + client = subscriber.Client(credentials=creds) + assert client._open_telemetry_enabled is False + + def test_class_method_factory(): patch = mock.patch( "google.oauth2.service_account.Credentials.from_service_account_file" From 7b97217e682f3ae75ea76e7c8a33c8e7185b7280 Mon Sep 17 00:00:00 2001 From: Mukund Date: Tue, 25 Jun 2024 22:26:48 +0000 Subject: [PATCH 13/30] Add messaging.message.id attribute in the publisher create spans --- google/cloud/pubsub_v1/publisher/_batch/thread.py | 5 ++++- google/cloud/pubsub_v1/publisher/client.py | 2 +- tests/unit/pubsub_v1/publisher/batch/test_thread.py | 7 +++++++ tests/unit/pubsub_v1/publisher/test_publisher_client.py | 8 +------- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/_batch/thread.py b/google/cloud/pubsub_v1/publisher/_batch/thread.py index 77baf16de..ad30194b5 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/thread.py +++ b/google/cloud/pubsub_v1/publisher/_batch/thread.py @@ -309,7 +309,9 @@ def _commit(self) -> None: ) if self._client._open_telemetry_enabled: publish_rpc_span.end() - for wrapper in self._message_wrappers: + for message_id, wrapper in zip( + response.message_ids, self._message_wrappers + ): span = wrapper.create_span if span: span.add_event( @@ -318,6 +320,7 @@ def _commit(self) -> None: "timestamp": str(datetime.now()), }, ) + span.set_attribute(key="messaging.message.id", value=message_id) span.end() except google.api_core.exceptions.GoogleAPIError as exc: # We failed to publish, even after retries, so set the exception on diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 46b49dc48..27bf76362 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -409,7 +409,7 @@ def set(self, carrier: gapic_types.PubsubMessage, key: str, value: str): "messaging.gcp_pubsub.message.ordering_key": ordering_key, "messaging.operation": "create", "gcp.project_id": topic.split("/")[1], - "messaging.message.envelope.size": sys.getsizeof(message), + "messaging.message.body.size": sys.getsizeof(message.data), }, kind=trace.SpanKind.PRODUCER, end_on_exit=False, diff --git a/tests/unit/pubsub_v1/publisher/batch/test_thread.py b/tests/unit/pubsub_v1/publisher/batch/test_thread.py index b0014ab8f..07d8ed76b 100644 --- a/tests/unit/pubsub_v1/publisher/batch/test_thread.py +++ b/tests/unit/pubsub_v1/publisher/batch/test_thread.py @@ -804,6 +804,13 @@ def test_commit_otel_publish_rpc_span(span_exporter): assert create_span1.end_time is not None assert create_span2.end_time is not None + # Verify message_ids returned from gapic publish are added as attributes + # to the publisher create spans of the messages. + assert "messaging.message.id" in create_span1.attributes + assert create_span1.attributes["messaging.message.id"] == "a" + assert "messaging.message.id" in create_span2.attributes + assert create_span2.attributes["messaging.message.id"] == "b" + # Verify publish end event added to the span assert len(create_span1.events) == 1 assert len(create_span2.events) == 1 diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index d6adc4420..f21ab1efa 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -395,12 +395,6 @@ def test_publish_otel_flow_control_exception(creds, span_exporter): ), ) - # Setup Open Telemetry tracing - # memory_exporter = InMemorySpanExporter() - # processor = SimpleSpanProcessor(memory_exporter) - # provider.add_span_processor(processor) - # trace.set_tracer_provider(provider) - client._flow_controller = mock.Mock(spec=publisher.flow_controller.FlowController) client._flow_controller.add = mock.Mock( side_effect=exceptions.FlowControlLimitError @@ -426,7 +420,7 @@ def test_publish_otel_flow_control_exception(creds, span_exporter): assert attributes["messaging.gcp_pubsub.message.ordering_key"] == "" assert attributes["messaging.operation"] == "create" assert attributes["gcp.project_id"] == "projectID" - assert "messaging.message.envelope.size" in attributes + assert "messaging.message.body.size" in attributes assert create_span.kind == trace.SpanKind.PRODUCER assert len(create_span.events) == 2 From 1f9644346b7d3265c3a69db2ed1d93ad6357a5a4 Mon Sep 17 00:00:00 2001 From: Mukund Date: Wed, 26 Jun 2024 16:03:24 +0000 Subject: [PATCH 14/30] Add subscribe span and record modack events --- .../opentelemetry/context_propagation.py | 16 +++++ .../_protocol/streaming_pull_manager.py | 43 +++++++++++++ .../publisher/test_publisher_client.py | 12 ---- .../subscriber/test_streaming_pull_manager.py | 61 +++++++++++++++++-- 4 files changed, 115 insertions(+), 17 deletions(-) create mode 100644 google/cloud/pubsub_v1/opentelemetry/context_propagation.py diff --git a/google/cloud/pubsub_v1/opentelemetry/context_propagation.py b/google/cloud/pubsub_v1/opentelemetry/context_propagation.py new file mode 100644 index 000000000..edbbc01c0 --- /dev/null +++ b/google/cloud/pubsub_v1/opentelemetry/context_propagation.py @@ -0,0 +1,16 @@ +from opentelemetry.propagators.textmap import Getter +from google.pubsub_v1 import types as gapic_types +import typing + + +class OTelContextGetter(Getter): + """TODO:Add docstring later""" + + def get( + self, carrier: gapic_types.PubsubMessage, key: str + ) -> typing.Optional[typing.List[str]]: + print(type(carrier)) + return carrier.attributes[key] + + def keys(self, carrier: gapic_types.PubsubMessage) -> typing.List[str]: + return carrier.attributes.keys() diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index b8531db17..1c398c847 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -22,9 +22,14 @@ import typing from typing import Any, Dict, Callable, Iterable, List, Optional, Set, Tuple import uuid +import sys import grpc # type: ignore +from opentelemetry import trace +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry.trace.propagation import set_span_in_context + from google.api_core import bidi from google.api_core import exceptions from google.cloud.pubsub_v1 import types @@ -34,6 +39,7 @@ from google.cloud.pubsub_v1.subscriber._protocol import leaser from google.cloud.pubsub_v1.subscriber._protocol import messages_on_hold from google.cloud.pubsub_v1.subscriber._protocol import requests +from google.cloud.pubsub_v1.opentelemetry.context_propagation import OTelContextGetter from google.cloud.pubsub_v1.subscriber.exceptions import ( AcknowledgeError, AcknowledgeStatus, @@ -91,6 +97,10 @@ code_pb2.UNAVAILABLE, } +_OPEN_TELEMETRY_TRACER_NAME = "com.google.cloud.pubsub.v1" +"""Open Telemetry Instrumenting module name.""" +_OPEN_TELEMETRY_MESSAGING_SYSTEM = "gcp_pubsub" + def _wrap_as_exception(maybe_exception: Any) -> BaseException: """Wrap an object as a Python exception, if needed. @@ -1075,6 +1085,35 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # protobuf message to significantly gain on attribute access performance. received_messages = response._pb.received_messages + if self._client._open_telemetry_enabled: + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + for message in response.received_messages: + parent_span = TraceContextTextMapPropagator().extract( + carrier=message.message, + getter=OTelContextGetter(), + ) + with tracer.start_as_current_span( + name=f"{self._subscription} subscribe", + kind=trace.SpanKind.CONSUMER, + context=set_span_in_context(parent_span) if parent_span else None, + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.destination.name": self._subscription, + "gcp.project_id": self._subscription.split("/")[1], + "messaging.message.id": message.message.message_id, + "messaging.message.body.size": sys.getsizeof( + message.message.data + ), + "messaging.gcp_pubsub.message.ack_id": message.ack_id, + "messaging.gcp_pubsub.message.ordering_key": message.message.ordering_key, + "messaging.gcp_pubsub.message.exactly_once_delivery": response.subscription_properties.exactly_once_delivery_enabled, + "code.function": "_on_response", + "messaging.gcp_pubsub.message.delivery_attempt": message.delivery_attempt, + }, + end_on_exit=False, + ) as subscribe_span: + pass + _LOGGER.debug( "Processing %s received message(s), currently on hold %s (bytes %s).", len(received_messages), @@ -1098,10 +1137,14 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # Immediately (i.e. without waiting for the auto lease management) # modack the messages we received, as this tells the server that we've # received them. + if self._client._open_telemetry_enabled: + subscribe_span.add_event(name="modack start") ack_id_gen = (message.ack_id for message in received_messages) expired_ack_ids = self._send_lease_modacks( ack_id_gen, self.ack_deadline, warn_on_invalid=False ) + if self._client._open_telemetry_enabled: + subscribe_span.add_event(name="modack end") with self._pause_resume_lock: assert self._scheduler is not None diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index f21ab1efa..3b344f18d 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -224,12 +224,6 @@ def test_publish_otel_batching_exception(creds, span_exporter): ), ) - # Setup Open Telemetry tracing - # memory_exporter = InMemorySpanExporter() - # processor = SimpleSpanProcessor(memory_exporter) - # provider.add_span_processor(processor) - # trace.set_tracer_provider(provider) - # Throw an exception when sequencer.publish() is called sequencer = mock.Mock(spec=ordered_sequencer.OrderedSequencer) sequencer.publish = mock.Mock(side_effect=RuntimeError("some error")) @@ -333,12 +327,6 @@ def test_publish_otel(creds, span_exporter): ), ) - # # Setup Open Telemetry tracing - # memory_exporter = InMemorySpanExporter() - # processor = SimpleSpanProcessor(memory_exporter) - # provider.add_span_processor(processor) - # trace.set_tracer_provider(provider) - client.publish(TOPIC, b"message") spans = span_exporter.get_finished_spans() diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 278f3e88e..e9afca613 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -183,7 +183,10 @@ def make_manager(**kwargs): client_ = mock.create_autospec(client.Client, instance=True) scheduler_ = mock.create_autospec(scheduler.Scheduler, instance=True) return streaming_pull_manager.StreamingPullManager( - client_, "subscription-name", scheduler=scheduler_, **kwargs + client_, + "projects/projectID/subscriptions/subscriptionID", + scheduler=scheduler_, + **kwargs ) @@ -1224,14 +1227,14 @@ def test_open_has_been_closed(): manager.open(mock.sentinel.callback, mock.sentinel.on_callback_error) -def make_running_manager(**kwargs): +def make_running_manager(enable_open_telemetry=False, **kwargs): manager = make_manager(**kwargs) manager._consumer = mock.create_autospec(bidi.BackgroundConsumer, instance=True) manager._consumer.is_active = True manager._dispatcher = mock.create_autospec(dispatcher.Dispatcher, instance=True) manager._leaser = mock.create_autospec(leaser.Leaser, instance=True) manager._heartbeater = mock.create_autospec(heartbeater.Heartbeater, instance=True) - + manager._client._open_telemetry_enabled = enable_open_telemetry return ( manager, manager._consumer, @@ -1415,7 +1418,10 @@ def test__get_initial_request(): initial_request = manager._get_initial_request(123) assert isinstance(initial_request, gapic_types.StreamingPullRequest) - assert initial_request.subscription == "subscription-name" + assert ( + initial_request.subscription + == "projects/projectID/subscriptions/subscriptionID" + ) assert initial_request.stream_ack_deadline_seconds == 123 assert initial_request.modify_deadline_ack_ids == [] assert initial_request.modify_deadline_seconds == [] @@ -1428,7 +1434,10 @@ def test__get_initial_request_wo_leaser(): initial_request = manager._get_initial_request(123) assert isinstance(initial_request, gapic_types.StreamingPullRequest) - assert initial_request.subscription == "subscription-name" + assert ( + initial_request.subscription + == "projects/projectID/subscriptions/subscriptionID" + ) assert initial_request.stream_ack_deadline_seconds == 123 assert initial_request.modify_deadline_ack_ids == [] assert initial_request.modify_deadline_seconds == [] @@ -1466,6 +1475,48 @@ def test__on_response_delivery_attempt(): assert msg2.delivery_attempt == 6 +def test__on_response_mod_ack_otel(span_exporter): + manager, _, dispatcher, leaser, _, scheduler = make_running_manager( + enable_open_telemetry=True + ) + manager._callback = mock.sentinel.callback + + # Set up the messages. + response = gapic_types.StreamingPullResponse( + received_messages=[ + gapic_types.ReceivedMessage( + ack_id="ack_1", + message=gapic_types.PubsubMessage(data=b"foo", message_id="1"), + ), + gapic_types.ReceivedMessage( + ack_id="ack_2", + message=gapic_types.PubsubMessage(data=b"bar", message_id="2"), + ), + ] + ) + + # adjust message bookkeeping in leaser + fake_leaser_add(leaser, init_msg_count=0, assumed_msg_size=80) + + # Actually run the method and chack that correct MODACK value is used. + with mock.patch.object( + type(manager), "ack_deadline", new=mock.PropertyMock(return_value=18) + ): + manager._on_response(response) + + dispatcher.modify_ack_deadline.assert_called_once_with( + [ + requests.ModAckRequest("ack_1", 18, None), + requests.ModAckRequest("ack_2", 18, None), + ], + 18, + ) + + # Subscribe span would still be active, hence would not be exported. + spans = span_exporter.get_finished_spans() + assert len(spans) == 0 + + def test__on_response_modifies_ack_deadline(): manager, _, dispatcher, leaser, _, scheduler = make_running_manager() manager._callback = mock.sentinel.callback From 0b3362a467c6af5769e2766772570a58f7cd7b59 Mon Sep 17 00:00:00 2001 From: Mukund Date: Wed, 26 Jun 2024 18:21:15 +0000 Subject: [PATCH 15/30] Add tests for OtelContextGetter --- .../cloud/pubsub_v1/opentelemetry/__init__.py | 0 .../opentelemetry/context_propagation.py | 7 ++-- .../_protocol/streaming_pull_manager.py | 19 ++++++----- .../subscriber/test_context_getter.py | 32 +++++++++++++++++++ 4 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 google/cloud/pubsub_v1/opentelemetry/__init__.py create mode 100644 tests/unit/pubsub_v1/subscriber/test_context_getter.py diff --git a/google/cloud/pubsub_v1/opentelemetry/__init__.py b/google/cloud/pubsub_v1/opentelemetry/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/google/cloud/pubsub_v1/opentelemetry/context_propagation.py b/google/cloud/pubsub_v1/opentelemetry/context_propagation.py index edbbc01c0..fd22a2d38 100644 --- a/google/cloud/pubsub_v1/opentelemetry/context_propagation.py +++ b/google/cloud/pubsub_v1/opentelemetry/context_propagation.py @@ -4,13 +4,12 @@ class OTelContextGetter(Getter): - """TODO:Add docstring later""" + """Used to extract Open Telemetry Context from message attributes.""" def get( self, carrier: gapic_types.PubsubMessage, key: str ) -> typing.Optional[typing.List[str]]: - print(type(carrier)) - return carrier.attributes[key] + return [carrier.attributes["googclient_" + key]] def keys(self, carrier: gapic_types.PubsubMessage) -> typing.List[str]: - return carrier.attributes.keys() + return list(carrier.attributes.keys()) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 1c398c847..c36e27189 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -28,7 +28,6 @@ from opentelemetry import trace from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator -from opentelemetry.trace.propagation import set_span_in_context from google.api_core import bidi from google.api_core import exceptions @@ -1087,28 +1086,28 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: if self._client._open_telemetry_enabled: tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) - for message in response.received_messages: - parent_span = TraceContextTextMapPropagator().extract( - carrier=message.message, + for received_message in response.received_messages: + parent_span_context = TraceContextTextMapPropagator().extract( + carrier=received_message.message, getter=OTelContextGetter(), ) with tracer.start_as_current_span( name=f"{self._subscription} subscribe", kind=trace.SpanKind.CONSUMER, - context=set_span_in_context(parent_span) if parent_span else None, + context=parent_span_context if parent_span_context else None, attributes={ "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, "messaging.destination.name": self._subscription, "gcp.project_id": self._subscription.split("/")[1], - "messaging.message.id": message.message.message_id, + "messaging.message.id": received_message.message.message_id, "messaging.message.body.size": sys.getsizeof( - message.message.data + received_message.message.data ), - "messaging.gcp_pubsub.message.ack_id": message.ack_id, - "messaging.gcp_pubsub.message.ordering_key": message.message.ordering_key, + "messaging.gcp_pubsub.message.ack_id": received_message.ack_id, + "messaging.gcp_pubsub.message.ordering_key": received_message.message.ordering_key, "messaging.gcp_pubsub.message.exactly_once_delivery": response.subscription_properties.exactly_once_delivery_enabled, "code.function": "_on_response", - "messaging.gcp_pubsub.message.delivery_attempt": message.delivery_attempt, + "messaging.gcp_pubsub.message.delivery_attempt": received_message.delivery_attempt, }, end_on_exit=False, ) as subscribe_span: diff --git a/tests/unit/pubsub_v1/subscriber/test_context_getter.py b/tests/unit/pubsub_v1/subscriber/test_context_getter.py new file mode 100644 index 000000000..aea21db49 --- /dev/null +++ b/tests/unit/pubsub_v1/subscriber/test_context_getter.py @@ -0,0 +1,32 @@ +from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.opentelemetry.context_propagation import OTelContextGetter + + +def test_get(): + message = gapic_types.PubsubMessage( + data=b"data", + attributes={ + "non_otel_key": "non_otel_key_value", + "googclient_traceparent": "traceparent_value", + }, + ) + val = OTelContextGetter().get(message, "traceparent") + assert val == ["traceparent_value"] + + val = OTelContextGetter().get(message, "non_existent_key") + assert val == [""] + + +def test_keys(): + message = gapic_types.PubsubMessage( + data=b"data", + attributes={ + "non_otel_key": "non_otel_key_value", + "googclient_traceparent": "traceparent_value", + "googclient_tracestate": "tracestate_value", + }, + ) + keys = OTelContextGetter().keys(message) + assert "non_otel_key" in keys + assert "googclient_traceparent" in keys + assert "googclient_tracestate" in keys From 81a96b4cea7bff3967755ee18bafa78dca176a40 Mon Sep 17 00:00:00 2001 From: Mukund Date: Thu, 27 Jun 2024 14:43:35 +0000 Subject: [PATCH 16/30] Update the trace names to be the name of the Pub/Sub package --- .../publish_message_wrapper.py} | 4 +- .../cloud/pubsub_v1/publisher/_batch/base.py | 6 +- .../pubsub_v1/publisher/_batch/thread.py | 18 +-- .../publisher/_sequencer/ordered_sequencer.py | 6 +- .../_sequencer/unordered_sequencer.py | 6 +- google/cloud/pubsub_v1/publisher/client.py | 10 +- .../_protocol/streaming_pull_manager.py | 2 +- .../pubsub_v1/publisher/batch/test_base.py | 8 +- .../pubsub_v1/publisher/batch/test_thread.py | 136 +++++++++--------- .../sequencer/test_ordered_sequencer.py | 6 +- .../sequencer/test_unordered_sequencer.py | 6 +- .../publisher/test_publisher_client.py | 28 ++-- 12 files changed, 131 insertions(+), 105 deletions(-) rename google/cloud/pubsub_v1/{publisher/message_wrapper.py => opentelemetry/publish_message_wrapper.py} (79%) diff --git a/google/cloud/pubsub_v1/publisher/message_wrapper.py b/google/cloud/pubsub_v1/opentelemetry/publish_message_wrapper.py similarity index 79% rename from google/cloud/pubsub_v1/publisher/message_wrapper.py rename to google/cloud/pubsub_v1/opentelemetry/publish_message_wrapper.py index d40ae2ebc..db3c04cca 100644 --- a/google/cloud/pubsub_v1/publisher/message_wrapper.py +++ b/google/cloud/pubsub_v1/opentelemetry/publish_message_wrapper.py @@ -6,11 +6,11 @@ @dataclass -class MessageWrapper: +class PublishMessageWrapper: """ Wraps Pub/Sub message with additional metadata required for Open Telemetry tracing. """ message: gapic_types.PubsubMessage - create_span: Optional[trace.Span] = field(default=None) + span: Optional[trace.Span] = field(default=None) diff --git a/google/cloud/pubsub_v1/publisher/_batch/base.py b/google/cloud/pubsub_v1/publisher/_batch/base.py index e21297b9e..56585b416 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/base.py +++ b/google/cloud/pubsub_v1/publisher/_batch/base.py @@ -24,7 +24,9 @@ from google.cloud import pubsub_v1 from google.cloud.pubsub_v1 import types from google.pubsub_v1 import types as gapic_types - from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper + from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, + ) class Batch(metaclass=abc.ABCMeta): @@ -69,7 +71,7 @@ def make_lock(): # pragma: NO COVER @property @abc.abstractmethod - def message_wrappers(self) -> Sequence["MessageWrapper"]: # pragma: NO COVER + def message_wrappers(self) -> Sequence["PublishMessageWrapper"]: # pragma: NO COVER """Return the messages currently in the batch. Returns: diff --git a/google/cloud/pubsub_v1/publisher/_batch/thread.py b/google/cloud/pubsub_v1/publisher/_batch/thread.py index ad30194b5..178b2a293 100644 --- a/google/cloud/pubsub_v1/publisher/_batch/thread.py +++ b/google/cloud/pubsub_v1/publisher/_batch/thread.py @@ -27,7 +27,9 @@ from google.api_core import gapic_v1 from google.cloud.pubsub_v1.publisher import exceptions from google.cloud.pubsub_v1.publisher import futures -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) from google.cloud.pubsub_v1.publisher._batch import base from google.pubsub_v1 import types as gapic_types @@ -112,7 +114,7 @@ def __init__( # status changed from ACCEPTING_MESSAGES to any other # in order to avoid race conditions self._futures: List[futures.Future] = [] - self._message_wrappers: List[MessageWrapper] = [] + self._message_wrappers: List[PublishMessageWrapper] = [] self._status = base.BatchStatus.ACCEPTING_MESSAGES # The initial size is not zero, we need to account for the size overhead @@ -138,7 +140,7 @@ def client(self) -> "PublisherClient": return self._client @property - def message_wrappers(self) -> Sequence[MessageWrapper]: + def message_wrappers(self) -> Sequence[PublishMessageWrapper]: """The messages currently in the batch.""" return self._message_wrappers @@ -278,7 +280,7 @@ def _commit(self) -> None: tracer = trace.get_tracer("com.google.cloud.pubsub.v1") links = [] for wrapper in self._message_wrappers: - span = wrapper.create_span + span = wrapper.span if span and span.is_recording(): links.append(trace.Link(span.get_span_context())) with tracer.start_as_current_span( @@ -297,7 +299,7 @@ def _commit(self) -> None: ) as publish_rpc_span: ctx = publish_rpc_span.get_span_context() for wrapper in self._message_wrappers: - span = wrapper.create_span + span = wrapper.span if span and span.is_recording(): span.add_link(ctx) # Performs retries for errors defined by the retry configuration. @@ -312,7 +314,7 @@ def _commit(self) -> None: for message_id, wrapper in zip( response.message_ids, self._message_wrappers ): - span = wrapper.create_span + span = wrapper.span if span: span.add_event( name="publish end", @@ -336,7 +338,7 @@ def _commit(self) -> None: ) publish_rpc_span.end() for wrapper in self._message_wrappers: - span = wrapper.create_span + span = wrapper.span if span: span.record_exception(exception=exc) span.set_status( @@ -389,7 +391,7 @@ def _commit(self) -> None: def publish( self, - message_wrapper: MessageWrapper, + message_wrapper: PublishMessageWrapper, ) -> Optional["pubsub_v1.publisher.futures.Future"]: """Publish a single message. diff --git a/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py b/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py index c0d421a7b..c623c2cef 100644 --- a/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py +++ b/google/cloud/pubsub_v1/publisher/_sequencer/ordered_sequencer.py @@ -23,7 +23,9 @@ from google.cloud.pubsub_v1.publisher import exceptions from google.cloud.pubsub_v1.publisher._sequencer import base as sequencer_base from google.cloud.pubsub_v1.publisher._batch import base as batch_base -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.pubsub_v1 import types @@ -262,7 +264,7 @@ def _create_batch( def publish( self, - message: MessageWrapper, + message: PublishMessageWrapper, retry: "OptionalRetry" = gapic_v1.method.DEFAULT, timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT, ) -> futures.Future: diff --git a/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py b/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py index a2522f5e1..1bf143b22 100644 --- a/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py +++ b/google/cloud/pubsub_v1/publisher/_sequencer/unordered_sequencer.py @@ -18,7 +18,9 @@ from google.api_core import gapic_v1 from google.cloud.pubsub_v1.publisher._sequencer import base -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) if typing.TYPE_CHECKING: # pragma: NO COVER @@ -116,7 +118,7 @@ def _create_batch( def publish( self, - message: MessageWrapper, + message: PublishMessageWrapper, retry: "OptionalRetry" = gapic_v1.method.DEFAULT, timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT, ) -> "futures.Future": diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 27bf76362..022207301 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -40,7 +40,9 @@ from google.cloud.pubsub_v1.publisher._sequencer import ordered_sequencer from google.cloud.pubsub_v1.publisher._sequencer import unordered_sequencer from google.cloud.pubsub_v1.publisher.flow_controller import FlowController -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) from google.pubsub_v1 import gapic_version as package_version from google.pubsub_v1 import types as gapic_types from google.pubsub_v1.services.publisher import client as publisher_client @@ -64,7 +66,7 @@ ordered_sequencer.OrderedSequencer, unordered_sequencer.UnorderedSequencer ] -_OPEN_TELEMETRY_TRACER_NAME = "com.google.cloud.pubsub.v1" +_OPEN_TELEMETRY_TRACER_NAME = "google.cloud.pubsub_v1.publisher" _OPEN_TELEMETRY_MESSAGING_SYSTEM = "gcp_pubsub" _OPEN_TELEMETRY_PUBLISHER_BATCHING = "publisher batching" @@ -505,9 +507,9 @@ def on_publish_done(future): create_span = None if self._open_telemetry_enabled: create_span = publish_create_span - message_wrapper = MessageWrapper( + message_wrapper = PublishMessageWrapper( message=message, - create_span=create_span, + span=create_span, ) future = sequencer.publish( message=message_wrapper, diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index c36e27189..b9a70077a 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -96,7 +96,7 @@ code_pb2.UNAVAILABLE, } -_OPEN_TELEMETRY_TRACER_NAME = "com.google.cloud.pubsub.v1" +_OPEN_TELEMETRY_TRACER_NAME = "google.cloud.pubsub_v1.subscriber" """Open Telemetry Instrumenting module name.""" _OPEN_TELEMETRY_MESSAGING_SYSTEM = "gcp_pubsub" diff --git a/tests/unit/pubsub_v1/publisher/batch/test_base.py b/tests/unit/pubsub_v1/publisher/batch/test_base.py index eca6e08e2..8da1a1aa6 100644 --- a/tests/unit/pubsub_v1/publisher/batch/test_base.py +++ b/tests/unit/pubsub_v1/publisher/batch/test_base.py @@ -20,7 +20,9 @@ from google.cloud.pubsub_v1 import types from google.cloud.pubsub_v1.publisher._batch.base import BatchStatus from google.cloud.pubsub_v1.publisher._batch.thread import Batch -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) from google.pubsub_v1 import types as gapic_types @@ -43,9 +45,9 @@ def test_len(): batch = create_batch(status=BatchStatus.ACCEPTING_MESSAGES) assert len(batch) == 0 batch.publish( - MessageWrapper( + PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"foo"), - create_span=None, + span=None, ), ) assert len(batch) == 1 diff --git a/tests/unit/pubsub_v1/publisher/batch/test_thread.py b/tests/unit/pubsub_v1/publisher/batch/test_thread.py index 07d8ed76b..ad8919721 100644 --- a/tests/unit/pubsub_v1/publisher/batch/test_thread.py +++ b/tests/unit/pubsub_v1/publisher/batch/test_thread.py @@ -38,7 +38,9 @@ from google.cloud.pubsub_v1.publisher._batch import thread from google.cloud.pubsub_v1.publisher._batch.thread import Batch from google.pubsub_v1 import types as gapic_types -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) def create_client(enable_open_telemetry=False): @@ -136,15 +138,15 @@ def test_blocking__commit(): batch = create_batch() futures = ( batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"This is my message."), - create_span=None, + span=None, ) ), batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"This is another message."), - create_span=None, + span=None, ) ), ) @@ -180,9 +182,9 @@ def test_blocking__commit(): def test_blocking__commit_custom_retry(): batch = create_batch(commit_retry=mock.sentinel.custom_retry) batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"This is my message."), - create_span=None, + span=None, ) ) @@ -207,9 +209,9 @@ def test_blocking__commit_custom_retry(): def test_blocking__commit_custom_timeout(): batch = create_batch(commit_timeout=mock.sentinel.custom_timeout) batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"This is my message."), - create_span=None, + span=None, ) ) @@ -247,9 +249,9 @@ def api_publish_delay(topic="", messages=(), retry=None, timeout=None): with api_publish_patch: batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"first message"), - create_span=None, + span=None, ) ) @@ -258,9 +260,9 @@ def api_publish_delay(topic="", messages=(), retry=None, timeout=None): if not event_set: # pragma: NO COVER pytest.fail("API publish was not called in time") batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"second message"), - create_span=None, + span=None, ) ) end = datetime.datetime.now() @@ -306,15 +308,15 @@ def test_blocking__commit_wrong_messageid_length(): batch = create_batch() futures = ( batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"blah blah blah"), - create_span=None, + span=None, ) ), batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"blah blah blah blah"), - create_span=None, + span=None, ) ), ) @@ -337,15 +339,15 @@ def test_block__commmit_api_error(): batch = create_batch() futures = ( batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"blah blah blah"), - create_span=None, + span=None, ) ), batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"blah blah blah blah"), - create_span=None, + span=None, ) ), ) @@ -366,15 +368,15 @@ def test_block__commmit_retry_error(): batch = create_batch() futures = ( batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"blah blah blah"), - create_span=None, + span=None, ) ), batch.publish( - message_wrapper=MessageWrapper( + message_wrapper=PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"blah blah blah blah"), - create_span=None, + span=None, ) ), ) @@ -394,14 +396,14 @@ def test_block__commmit_retry_error(): def test_publish_updating_batch_size(): batch = create_batch(topic="topic_foo") messages = ( - MessageWrapper( - message=gapic_types.PubsubMessage(data=b"foobarbaz"), create_span=None + PublishMessageWrapper( + message=gapic_types.PubsubMessage(data=b"foobarbaz"), span=None ), - MessageWrapper( - message=gapic_types.PubsubMessage(data=b"spameggs"), create_span=None + PublishMessageWrapper( + message=gapic_types.PubsubMessage(data=b"spameggs"), span=None ), - MessageWrapper( - message=gapic_types.PubsubMessage(data=b"1335020400"), create_span=None + PublishMessageWrapper( + message=gapic_types.PubsubMessage(data=b"1335020400"), span=None ), ) @@ -426,7 +428,7 @@ def test_publish_updating_batch_size(): def test_publish(): batch = create_batch() - wrapper = MessageWrapper(gapic_types.PubsubMessage(), None) + wrapper = PublishMessageWrapper(gapic_types.PubsubMessage(), None) future = batch.publish(wrapper) assert len(batch.message_wrappers) == 1 @@ -436,7 +438,7 @@ def test_publish(): def test_publish_max_messages_zero(): batch = create_batch(topic="topic_foo", max_messages=0) - wrapper = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + wrapper = PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) with mock.patch.object(batch, "commit") as commit: future = batch.publish(wrapper) @@ -449,8 +451,10 @@ def test_publish_max_messages_zero(): def test_publish_max_messages_enforced(): batch = create_batch(topic="topic_foo", max_messages=1) - message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) - message2 = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz2"), None) + message = PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + message2 = PublishMessageWrapper( + gapic_types.PubsubMessage(data=b"foobarbaz2"), None + ) future = batch.publish(message) future2 = batch.publish(message2) @@ -464,8 +468,10 @@ def test_publish_max_messages_enforced(): def test_publish_max_bytes_enforced(): batch = create_batch(topic="topic_foo", max_bytes=15) - message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) - message2 = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz2"), None) + message = PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + message2 = PublishMessageWrapper( + gapic_types.PubsubMessage(data=b"foobarbaz2"), None + ) future = batch.publish(message) future2 = batch.publish(message2) @@ -480,9 +486,9 @@ def test_publish_exceed_max_messages(): max_messages = 4 batch = create_batch(max_messages=max_messages) messages = ( - MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None), - MessageWrapper(gapic_types.PubsubMessage(data=b"spameggs"), None), - MessageWrapper(gapic_types.PubsubMessage(data=b"1335020400"), None), + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None), + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"spameggs"), None), + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"1335020400"), None), ) # Publish each of the messages, which should save them to the batch. @@ -497,7 +503,7 @@ def test_publish_exceed_max_messages(): # When a fourth message is published, commit should be called. # No future will be returned in this case. future = batch.publish( - MessageWrapper(gapic_types.PubsubMessage(data=b"last one"), None) + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"last one"), None) ) commit.assert_called_once_with() @@ -521,7 +527,7 @@ def test_publish_single_message_size_exceeds_server_size_limit(): assert request_size == 1001 # sanity check, just above the (mocked) server limit with pytest.raises(exceptions.MessageTooLargeError): - batch.publish(MessageWrapper(big_message, None)) + batch.publish(PublishMessageWrapper(big_message, None)) @mock.patch.object(thread, "_SERVER_PUBLISH_MAX_BYTES", 1000) @@ -541,8 +547,8 @@ def test_publish_total_messages_size_exceeds_server_size_limit(): assert 1000 < request_size < 1500 with mock.patch.object(batch, "commit") as fake_commit: - batch.publish(MessageWrapper(messages[0], None)) - batch.publish(MessageWrapper(messages[1], None)) + batch.publish(PublishMessageWrapper(messages[0], None)) + batch.publish(PublishMessageWrapper(messages[1], None)) # The server side limit should kick in and cause a commit. fake_commit.assert_called_once() @@ -550,13 +556,13 @@ def test_publish_total_messages_size_exceeds_server_size_limit(): def test_publish_dict(): batch = create_batch() - message = MessageWrapper( + message = PublishMessageWrapper( gapic_types.PubsubMessage(data=b"foobarbaz", attributes={"spam": "eggs"}), None ) future = batch.publish(message) # There should be one message on the batch. - expected_message = MessageWrapper( + expected_message = PublishMessageWrapper( gapic_types.PubsubMessage(data=b"foobarbaz", attributes={"spam": "eggs"}), None ) assert batch.message_wrappers == [expected_message] @@ -567,10 +573,12 @@ def test_cancel(): batch = create_batch() futures = ( batch.publish( - MessageWrapper(gapic_types.PubsubMessage(data=b"This is my message."), None) + PublishMessageWrapper( + gapic_types.PubsubMessage(data=b"This is my message."), None + ) ), batch.publish( - MessageWrapper( + PublishMessageWrapper( gapic_types.PubsubMessage(data=b"This is another message."), None ) ), @@ -590,9 +598,9 @@ def test_do_not_commit_when_full_when_flag_is_off(): # Set commit_when_full flag to False batch = create_batch(max_messages=max_messages, commit_when_full=False) messages = ( - MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None), - MessageWrapper(gapic_types.PubsubMessage(data=b"spameggs"), None), - MessageWrapper(gapic_types.PubsubMessage(data=b"1335020400"), None), + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None), + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"spameggs"), None), + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"1335020400"), None), ) with mock.patch.object(batch, "commit") as commit: @@ -602,7 +610,7 @@ def test_do_not_commit_when_full_when_flag_is_off(): # When a fourth message is published, commit should not be called. future = batch.publish( - MessageWrapper(gapic_types.PubsubMessage(data=b"last one"), None) + PublishMessageWrapper(gapic_types.PubsubMessage(data=b"last one"), None) ) assert commit.call_count == 0 assert future is None @@ -628,8 +636,8 @@ def test_commit_otel_publish_rpc_span_exception(span_exporter): tracer = trace.get_tracer_provider().get_tracer("com.google.cloud.pubsub.v1") with tracer.start_as_current_span(name="foo", end_on_exit=False) as create_span: - message = MessageWrapper( - message=gapic_types.PubsubMessage(data=b"foo"), create_span=create_span + message = PublishMessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), span=create_span ) batch.publish(message) @@ -668,7 +676,7 @@ def test_commit_otel_null_span(span_exporter): enable_open_telemetry=True, ) - msg = MessageWrapper( + msg = PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"foo"), ) @@ -714,8 +722,8 @@ def test_commit_otel_publish_non_sampled(span_exporter): tracer = trace.get_tracer_provider().get_tracer("com.google.cloud.pubsub.v1") with tracer.start_as_current_span(name="foo", end_on_exit=False) as span: span.is_recording = mock.Mock(return_value=False) - msg = MessageWrapper( - message=gapic_types.PubsubMessage(data=b"foo"), create_span=span + msg = PublishMessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), span=span ) batch.publish(msg) @@ -749,15 +757,15 @@ def test_commit_otel_publish_rpc_span(span_exporter): # Simulate message 1 published with its own publisher create span. tracer = trace.get_tracer_provider().get_tracer("com.google.cloud.pubsub.v1") with tracer.start_as_current_span(name="foo", end_on_exit=False) as create_span1: - msg1 = MessageWrapper( - message=gapic_types.PubsubMessage(data=b"foo"), create_span=create_span1 + msg1 = PublishMessageWrapper( + message=gapic_types.PubsubMessage(data=b"foo"), span=create_span1 ) # Simulate message 2 published with its own publisher create span. with tracer.start_as_current_span(name="bar", end_on_exit=False) as create_span2: - msg2 = MessageWrapper( + msg2 = PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"bar"), - create_span=create_span2, + span=create_span2, ) # Add both messages to the batch. @@ -823,7 +831,7 @@ def test_batch_done_callback_called_on_success(): batch = create_batch(batch_done_callback=batch_done_callback_tracker) # Ensure messages exist. - message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + message = PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) batch.publish(message) # One response for one published message. @@ -843,7 +851,7 @@ def test_batch_done_callback_called_on_publish_failure(): batch = create_batch(batch_done_callback=batch_done_callback_tracker) # Ensure messages exist. - message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + message = PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) batch.publish(message) # One response for one published message. @@ -869,7 +877,7 @@ def test_batch_done_callback_called_on_publish_response_invalid(): batch = create_batch(batch_done_callback=batch_done_callback_tracker) # Ensure messages exist. - message = MessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) + message = PublishMessageWrapper(gapic_types.PubsubMessage(data=b"foobarbaz"), None) batch.publish(message) # No message ids returned in successful publish response -> invalid. diff --git a/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py b/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py index c9cd0b97e..231c803f5 100644 --- a/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py +++ b/tests/unit/pubsub_v1/publisher/sequencer/test_ordered_sequencer.py @@ -27,13 +27,15 @@ from google.cloud.pubsub_v1 import publisher from google.cloud.pubsub_v1.publisher._sequencer import ordered_sequencer from google.pubsub_v1 import types as gapic_types -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) _ORDERING_KEY = "ordering_key_1" def create_message(): - return MessageWrapper( + return PublishMessageWrapper( gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}), None ) diff --git a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py index e25d99e8a..2caeb9afe 100644 --- a/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py +++ b/tests/unit/pubsub_v1/publisher/sequencer/test_unordered_sequencer.py @@ -27,11 +27,13 @@ from google.cloud.pubsub_v1.publisher._batch import base from google.cloud.pubsub_v1.publisher._sequencer import unordered_sequencer from google.pubsub_v1 import types as gapic_types -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) def create_message(): - return MessageWrapper( + return PublishMessageWrapper( gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}), None ) diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index 3b344f18d..fb2f876ca 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -37,7 +37,9 @@ from google.cloud.pubsub_v1.publisher import exceptions from google.cloud.pubsub_v1.publisher._sequencer import ordered_sequencer -from google.cloud.pubsub_v1.publisher.message_wrapper import MessageWrapper +from google.cloud.pubsub_v1.opentelemetry.publish_message_wrapper import ( + PublishMessageWrapper, +) from google.pubsub_v1 import types as gapic_types from google.pubsub_v1.services.publisher import client as publisher_client @@ -280,17 +282,17 @@ def test_publish(creds): batch.publish.assert_has_calls( [ mock.call( - MessageWrapper( + PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"spam"), - create_span=None, + span=None, ) ), mock.call( - MessageWrapper( + PublishMessageWrapper( message=gapic_types.PubsubMessage( data=b"foo", attributes={"bar": "baz"} ), - create_span=None, + span=None, ) ), ] @@ -538,12 +540,12 @@ def test_publish_attrs_bytestring(creds): # The attributes should have been sent as text. batch.publish.assert_called_once_with( # gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) - MessageWrapper( + PublishMessageWrapper( message=gapic_types.PubsubMessage( data=b"foo", attributes={"bar": "baz"}, ), - create_span=None, + span=None, ) ) @@ -584,9 +586,9 @@ def test_publish_new_batch_needed(creds): commit_timeout=gapic_v1.method.DEFAULT, ) message_pb = gapic_types.PubsubMessage(data=b"foo", attributes={"bar": "baz"}) - wrapper = MessageWrapper( + wrapper = PublishMessageWrapper( message=message_pb, - create_span=None, + span=None, ) batch1.publish.assert_called_once_with(wrapper) batch2.publish.assert_called_once_with(wrapper) @@ -794,19 +796,19 @@ def test_publish_with_ordering_key(creds): batch.publish.assert_has_calls( [ mock.call( - MessageWrapper( + PublishMessageWrapper( message=gapic_types.PubsubMessage(data=b"spam", ordering_key="k1"), - create_span=None, + span=None, ) ), mock.call( - MessageWrapper( + PublishMessageWrapper( message=gapic_types.PubsubMessage( data=b"foo", attributes={"bar": "baz"}, ordering_key="k1", ), - create_span=None, + span=None, ), ), ] From 9c0ac126724b3219c4659ec9e44ac9b4f1cd417f Mon Sep 17 00:00:00 2001 From: Mukund Date: Fri, 28 Jun 2024 19:38:01 +0000 Subject: [PATCH 17/30] Modify subscriber Message to contain OpenTelemetryData * Add the subscribe span to OpenTelemetryData --- .../opentelemetry/subscribe_spans_data.py | 22 ++++ .../subscriber/_protocol/dispatcher.py | 72 +++++++++- .../subscriber/_protocol/requests.py | 5 + .../_protocol/streaming_pull_manager.py | 31 +++-- google/cloud/pubsub_v1/subscriber/client.py | 5 + google/cloud/pubsub_v1/subscriber/message.py | 26 ++++ publisher_trace_provider.py | 59 +++++++++ .../pubsub_v1/subscriber/test_dispatcher.py | 110 +++++++++++++++- .../unit/pubsub_v1/subscriber/test_message.py | 124 ++++++++++++++++-- .../subscriber/test_streaming_pull_manager.py | 61 ++++++++- 10 files changed, 481 insertions(+), 34 deletions(-) create mode 100644 google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py create mode 100644 publisher_trace_provider.py diff --git a/google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py b/google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py new file mode 100644 index 000000000..e7b1a367b --- /dev/null +++ b/google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py @@ -0,0 +1,22 @@ +from dataclasses import dataclass +from typing import Optional + +from opentelemetry.trace.span import Span + + +@dataclass +class OpenTelemetryData: + """ + This class is for internal use by the library only. + Contains Open Telmetry data associated with a + google.cloud.pubsub_v1.subscriber.message.Message. Specifically it contains + the subscriber side spans associated with the message. + + This is so that the subsriber side spans can be ended by the library + after receiving the message back via an ack(), ack_with_response(), nack(). + """ + + subscribe_span: Optional[Span] = None + concurrrency_control_span: Optional[Span] = None + scheduler_span: Optional[Span] = None + process_span: Optional[Span] = None diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 15ad4abb3..c4a90023f 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -79,6 +79,13 @@ def __init__(self, manager: "StreamingPullManager", queue: "queue.Queue"): self._thread: Optional[threading.Thread] = None self._operational_lock = threading.Lock() + @property + def manager(self) -> "StreamingPullManager": + """Returns the Streaming Pull Manager associated with this dispatcher + instance. + """ + return self._manager + def start(self) -> None: """Start a thread to dispatch requests queued up by callbacks. @@ -179,7 +186,21 @@ def dispatch_callback(self, items: Sequence[RequestItem]) -> None: self.lease(lease_requests) if modack_requests: + for req in modack_requests: + if ( + req + and req.open_telemetry_data + and req.open_telemetry_data.subscribe_span + ): + req.open_telemetry_data.subscribe_span.add_event("modack start") self.modify_ack_deadline(modack_requests) + for req in modack_requests: + if ( + req + and req.open_telemetry_data + and req.open_telemetry_data.subscribe_span + ): + req.open_telemetry_data.subscribe_span.add_event("modack end") # Note: Drop and ack *must* be after lease. It's possible to get both # the lease and the ack/drop request in the same batch. @@ -243,6 +264,20 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: ack_reqs_dict=ack_reqs_dict, ) + if self._manager.open_telemetry_enabled: + for completed_ack in requests_completed: + if completed_ack.open_telemetry_data: + subscribe_span = ( + completed_ack.open_telemetry_data.subscribe_span + ) + if subscribe_span: + subscribe_span.set_attribute( + key="messaging.gcp_pubsub.result", + value="ack", + ) + subscribe_span.add_event("ack end") + subscribe_span.end() + # Remove the completed messages from lease management. self.drop(requests_completed) @@ -289,6 +324,20 @@ def _retry_acks(self, requests_to_retry): assert ( len(requests_to_retry) <= _ACK_IDS_BATCH_SIZE ), "Too many requests to be retried." + + if self._manager.open_telemetry_enabled: + for completed_ack in requests_completed: + if completed_ack.open_telemetry_data: + subscribe_span = ( + completed_ack.open_telemetry_data.subscribe_span + ) + if subscribe_span: + subscribe_span.set_attribute( + key="messaging.gcp_pubsub.result", + value="ack", + ) + subscribe_span.add_event("ack end") + subscribe_span.end() # Remove the completed messages from lease management. self.drop(requests_completed) @@ -342,9 +391,10 @@ def modify_ack_deadline( for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE) } requests_to_retry: List[requests.ModAckRequest] + requests_completed: List[requests.ModAckRequest] if default_deadline is None: # no further work needs to be done for `requests_to_retry` - _, requests_to_retry = self._manager.send_unary_modack( + requests_completed, requests_to_retry = self._manager.send_unary_modack( modify_deadline_ack_ids=list( itertools.islice(ack_ids_gen, _ACK_IDS_BATCH_SIZE) ), @@ -355,7 +405,7 @@ def modify_ack_deadline( default_deadline=None, ) else: - _, requests_to_retry = self._manager.send_unary_modack( + requests_completed, requests_to_retry = self._manager.send_unary_modack( modify_deadline_ack_ids=itertools.islice( ack_ids_gen, _ACK_IDS_BATCH_SIZE ), @@ -375,7 +425,7 @@ def modify_ack_deadline( functools.partial(self._retry_modacks, requests_to_retry), ) - def _retry_modacks(self, requests_to_retry): + def _retry_modacks(self, requests_to_retry: List[requests.ModAckRequest]): retry_delay_gen = exponential_sleep_generator( initial=_MIN_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS, maximum=_MAX_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS, @@ -405,11 +455,25 @@ def nack(self, items: Sequence[requests.NackRequest]) -> None: self.modify_ack_deadline( [ requests.ModAckRequest( - ack_id=item.ack_id, seconds=0, future=item.future + ack_id=item.ack_id, + seconds=0, + future=item.future, + open_telemetry_data=item.open_telemetry_data, ) for item in items ] ) + if self._manager.open_telemetry_enabled: + for item in items: + if item.open_telemetry_data: + subscribe_span = item.open_telemetry_data.subscribe_span + if subscribe_span: + subscribe_span.set_attribute( + key="messaging.gcp_pubsub.result", + value="nack", + ) + subscribe_span.add_event("nack end") + self.drop( [ requests.DropRequest( diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py index 9cd387545..da3235800 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py @@ -15,6 +15,8 @@ import typing from typing import NamedTuple, Optional +from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData + if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.pubsub_v1.subscriber import futures @@ -27,6 +29,7 @@ class AckRequest(NamedTuple): time_to_ack: float ordering_key: Optional[str] future: Optional["futures.Future"] + open_telemetry_data: Optional[OpenTelemetryData] = None class DropRequest(NamedTuple): @@ -45,6 +48,7 @@ class ModAckRequest(NamedTuple): ack_id: str seconds: float future: Optional["futures.Future"] + open_telemetry_data: Optional[OpenTelemetryData] = None class NackRequest(NamedTuple): @@ -52,3 +56,4 @@ class NackRequest(NamedTuple): byte_size: int ordering_key: Optional[str] future: Optional["futures.Future"] + open_telemetry_data: Optional[OpenTelemetryData] = None diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index b9a70077a..694d1bdfb 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -22,12 +22,12 @@ import typing from typing import Any, Dict, Callable, Iterable, List, Optional, Set, Tuple import uuid -import sys import grpc # type: ignore from opentelemetry import trace from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData from google.api_core import bidi from google.api_core import exceptions @@ -281,6 +281,7 @@ def __init__( await_callbacks_on_shutdown: bool = False, ): self._client = client + self._open_telemetry_enabled = client.open_telemetry_enabled self._subscription = subscription self._exactly_once_enabled = False self._flow_control = flow_control @@ -375,6 +376,11 @@ def __init__( self._consumer: Optional[bidi.BackgroundConsumer] = None self._heartbeater: Optional[heartbeater.Heartbeater] = None + @property + def open_telemetry_enabled(self) -> bool: + """Whether open telemetry is enabled.""" + return self._open_telemetry_enabled + @property def is_active(self) -> bool: """``True`` if this manager is actively streaming. @@ -1084,7 +1090,8 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # protobuf message to significantly gain on attribute access performance. received_messages = response._pb.received_messages - if self._client._open_telemetry_enabled: + subscribe_spans = [] + if self.open_telemetry_enabled(): tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) for received_message in response.received_messages: parent_span_context = TraceContextTextMapPropagator().extract( @@ -1100,7 +1107,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: "messaging.destination.name": self._subscription, "gcp.project_id": self._subscription.split("/")[1], "messaging.message.id": received_message.message.message_id, - "messaging.message.body.size": sys.getsizeof( + "messaging.message.body.size": len( received_message.message.data ), "messaging.gcp_pubsub.message.ack_id": received_message.ack_id, @@ -1111,7 +1118,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: }, end_on_exit=False, ) as subscribe_span: - pass + subscribe_spans.append(subscribe_span) _LOGGER.debug( "Processing %s received message(s), currently on hold %s (bytes %s).", @@ -1136,19 +1143,22 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # Immediately (i.e. without waiting for the auto lease management) # modack the messages we received, as this tells the server that we've # received them. - if self._client._open_telemetry_enabled: - subscribe_span.add_event(name="modack start") + if self.open_telemetry_enabled(): + for subscribe_span in subscribe_spans: + subscribe_span.add_event("modack start") ack_id_gen = (message.ack_id for message in received_messages) expired_ack_ids = self._send_lease_modacks( ack_id_gen, self.ack_deadline, warn_on_invalid=False ) - if self._client._open_telemetry_enabled: - subscribe_span.add_event(name="modack end") + if self.open_telemetry_enabled(): + for subscribe_span in subscribe_spans: + subscribe_span.add_event("modack end") with self._pause_resume_lock: assert self._scheduler is not None assert self._leaser is not None + i = 0 for received_message in received_messages: if ( not self._exactly_once_delivery_enabled() @@ -1161,6 +1171,10 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: self._scheduler.queue, self._exactly_once_delivery_enabled, ) + if self.open_telemetry_enabled(): + message._open_telemetry_data = OpenTelemetryData( + subscribe_span=subscribe_spans[i], + ) self._messages_on_hold.put(message) self._on_hold_bytes += message.size req = requests.LeaseRequest( @@ -1169,6 +1183,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: ordering_key=message.ordering_key, ) self._leaser.add([req]) + i = i + 1 self._maybe_release_messages() diff --git a/google/cloud/pubsub_v1/subscriber/client.py b/google/cloud/pubsub_v1/subscriber/client.py index dcf91a5f0..160bb0a13 100644 --- a/google/cloud/pubsub_v1/subscriber/client.py +++ b/google/cloud/pubsub_v1/subscriber/client.py @@ -117,6 +117,11 @@ def from_service_account_file( # type: ignore[override] from_service_account_json = from_service_account_file # type: ignore[assignment] + @property + def open_telemetry_enabled(self) -> bool: + """Returns whether Open Telemetry is enabled for the subscriber client.""" + return self._open_telemetry_enabled + @property def target(self) -> str: """Return the target (where the API is). diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index f744966a2..9c8a62a4b 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -25,6 +25,8 @@ from google.cloud.pubsub_v1.subscriber import futures from google.cloud.pubsub_v1.subscriber.exceptions import AcknowledgeStatus +from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData + if typing.TYPE_CHECKING: # pragma: NO COVER import datetime @@ -94,6 +96,7 @@ def __init__( delivery_attempt: int, request_queue: "queue.Queue", exactly_once_delivery_enabled_func: Callable[[], bool] = lambda: False, + open_telemetry_data: Optional[OpenTelemetryData] = None, ): """Construct the Message. @@ -119,6 +122,8 @@ def __init__( responsible for handling those requests. exactly_once_delivery_enabled_func (Callable[[], bool]): A Callable that returns whether exactly-once delivery is currently-enabled. Defaults to a lambda that always returns False. + open_telemetry_data (Optional[OpenTelemetryData]): + Internal only. Contains Open Telemetry data associated with the mesage. """ self._message = message self._ack_id = ack_id @@ -126,6 +131,7 @@ def __init__( self._request_queue = request_queue self._exactly_once_delivery_enabled_func = exactly_once_delivery_enabled_func self.message_id = message.message_id + self._open_telemetry_data = open_telemetry_data # The instantiation time is the time that this message # was received. Tracking this provides us a way to be smart about @@ -253,6 +259,10 @@ def ack(self) -> None: """ time_to_ack = math.ceil(time.time() - self._received_timestamp) + if self._open_telemetry_data: + subscriber_span = self._open_telemetry_data.subscribe_span + if subscriber_span: + subscriber_span.add_event(name="ack start") self._request_queue.put( requests.AckRequest( ack_id=self._ack_id, @@ -260,6 +270,7 @@ def ack(self) -> None: time_to_ack=time_to_ack, ordering_key=self.ordering_key, future=None, + open_telemetry_data=self._open_telemetry_data, ) ) @@ -302,6 +313,10 @@ def ack_with_response(self) -> "futures.Future": pubsub_v1.subscriber.exceptions.AcknowledgeError exception will be thrown. """ + if self._open_telemetry_data: + suscriber_span = self._open_telemetry_data.subscribe_span + if suscriber_span: + suscriber_span.add_event(name="ack start") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() @@ -317,6 +332,7 @@ def ack_with_response(self) -> "futures.Future": time_to_ack=time_to_ack, ordering_key=self.ordering_key, future=req_future, + open_telemetry_data=self._open_telemetry_data, ) ) return future @@ -429,12 +445,17 @@ def nack(self) -> None: may take place immediately or after a delay, and may arrive at this subscriber or another. """ + if self._open_telemetry_data: + subscriber_span = self._open_telemetry_data.subscribe_span + if subscriber_span: + subscriber_span.add_event("nack start") self._request_queue.put( requests.NackRequest( ack_id=self._ack_id, byte_size=self.size, ordering_key=self.ordering_key, future=None, + open_telemetry_data=self._open_telemetry_data, ) ) @@ -472,6 +493,10 @@ def nack_with_response(self) -> "futures.Future": will be thrown. """ + if self._open_telemetry_data: + subscriber_span = self._open_telemetry_data.subscribe_span + if subscriber_span: + subscriber_span.add_event("nack start") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() @@ -486,6 +511,7 @@ def nack_with_response(self) -> "futures.Future": byte_size=self.size, ordering_key=self.ordering_key, future=req_future, + open_telemetry_data=self._open_telemetry_data, ) ) diff --git a/publisher_trace_provider.py b/publisher_trace_provider.py new file mode 100644 index 000000000..a6f109387 --- /dev/null +++ b/publisher_trace_provider.py @@ -0,0 +1,59 @@ +# Utility class that implements tracerProvider and can be used to test the spans created during development +from typing import Callable +from google.cloud import pubsub_v1 +from concurrent import futures +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, +) + +provider = TracerProvider() +processor = BatchSpanProcessor(ConsoleSpanExporter()) +provider.add_span_processor(processor) + +# Sets the global default tracer provider +trace.set_tracer_provider(provider) + +# Creates a tracer from the global tracer provider +# tracer = trace.get_tracer("my.tracer.name") + +project_id = "cloud-pubsub-load-tests" +topic_id = "mukund-topic" + +publisher = pubsub_v1.PublisherClient(publisher_options=pubsub_v1.types.PublisherOptions( + enable_open_telemetry_tracing=True, +)) +topic_path = publisher.topic_path(project_id, topic_id) +publish_futures = [] + +# with tracer.start_as_current_span("tracer provider"): + + +def get_callback( + publish_future: pubsub_v1.publisher.futures.Future, data: str +) -> Callable[[pubsub_v1.publisher.futures.Future], None]: + def callback(publish_future: pubsub_v1.publisher.futures.Future) -> None: + try: + # Wait 60 seconds for the publish call to succeed. + print(publish_future.result(timeout=60)) + except futures.TimeoutError: + print(f"Publishing {data} timed out.") + + return callback + + +NUM_MESSAGES_TO_PUBLISH = 1 +for i in range(NUM_MESSAGES_TO_PUBLISH): + data = str(i) + # When you publish a message, the client returns a future. + publish_future = publisher.publish(topic_path, data.encode("utf-8")) + # Non-blocking. Publish failures are handled in the callback function. + publish_future.add_done_callback(get_callback(publish_future, data)) + publish_futures.append(publish_future) + +# Wait for all the publish futures to resolve before exiting. +futures.wait(publish_futures, return_when=futures.ALL_COMPLETED) + +print(f"Published messages with error handler to {topic_path}.") diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 89d72c61d..965e2c1d2 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -22,6 +22,9 @@ from google.cloud.pubsub_v1.subscriber._protocol import requests from google.cloud.pubsub_v1.subscriber._protocol import streaming_pull_manager from google.cloud.pubsub_v1.subscriber import futures +from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData + +from opentelemetry import trace # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: @@ -35,6 +38,34 @@ ) +def test_modack_otel(): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) + subscribe_span = mock.Mock(spec=trace.Span) + items = [ + requests.ModAckRequest( + "0", + 0, + None, + open_telemetry_data=OpenTelemetryData(subscribe_span=subscribe_span), + ), + requests.ModAckRequest("1", 0, None, open_telemetry_data=OpenTelemetryData()), + ] + + with mock.patch.object(dispatcher_, "modify_ack_deadline"): + dispatcher_.dispatch_callback(items) + + # Assert that modack start and end events were added to the subscribe span. + subscribe_span.add_event.assert_has_calls( + [ + mock.call("modack start"), + mock.call("modack end"), + ] + ) + + @pytest.mark.parametrize( "item,method_name", [ @@ -60,6 +91,14 @@ def test_dispatch_callback_active_manager(item, method_name): manager._exactly_once_delivery_enabled.assert_called() +def test_manager_property(): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager=manager, queue=mock.sentinel.queue) + assert dispatcher_.manager is manager + + @pytest.mark.parametrize( "item,method_name", [ @@ -365,10 +404,20 @@ def test_unknown_request_type(): dispatcher_.dispatch_callback(items) -def test_ack(): +@pytest.mark.parametrize( + "otel_enabled, otel_data", + [ + (False, None), + (True, OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span))), + (True, None), + (True, OpenTelemetryData()), + ], +) +def test_ack(otel_enabled, otel_data): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True ) + manager.open_telemetry_enabled = otel_enabled dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) items = [ @@ -378,7 +427,8 @@ def test_ack(): time_to_ack=20, ordering_key="", future=None, - ) + open_telemetry_data=otel_data, + ), ] manager.send_unary_ack.return_value = (items, []) dispatcher_.ack(items) @@ -390,6 +440,12 @@ def test_ack(): manager.leaser.remove.assert_called_once_with(items) manager.maybe_resume_consumer.assert_called_once() manager.ack_histogram.add.assert_called_once_with(20) + if otel_data and otel_data.subscribe_span: + otel_data.subscribe_span.set_attribute.assert_called_with( + key="messaging.gcp_pubsub.result", + value="ack", + ) + otel_data.subscribe_span.add_event.assert_called_with("ack end") def test_ack_no_time(): @@ -481,10 +537,20 @@ def test_retry_acks_in_new_thread(): assert ctor_call.kwargs["daemon"] -def test_retry_acks(): +@pytest.mark.parametrize( + "otel_enabled,otel_data", + [ + (False, None), + (True, OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span))), + (True, OpenTelemetryData()), + (True, None), + ], +) +def test_retry_acks(otel_enabled, otel_data): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True ) + manager.open_telemetry_enabled = otel_enabled dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) f = futures.Future() @@ -495,6 +561,7 @@ def test_retry_acks(): time_to_ack=20, ordering_key="", future=f, + open_telemetry_data=otel_data, ) ] # first and second `send_unary_ack` calls fail, third one succeeds @@ -516,6 +583,13 @@ def test_retry_acks(): ] ) + if otel_data and otel_data.subscribe_span: + otel_data.subscribe_span.set_attribute.assert_called_with( + key="messaging.gcp_pubsub.result", + value="ack", + ) + otel_data.subscribe_span.add_event.assert_called_with("ack end") + def test_retry_modacks_in_new_thread(): manager = mock.create_autospec( @@ -633,15 +707,29 @@ def test_drop_ordered_messages(): manager.maybe_resume_consumer.assert_called_once() -def test_nack(): +@pytest.mark.parametrize( + "otel_enabled,otel_data", + [ + (False, None), + (True, OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span))), + (True, None), + (True, OpenTelemetryData()), + ], +) +def test_nack(otel_enabled, otel_data): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True ) + manager.open_telemetry_enabled = otel_enabled dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) items = [ requests.NackRequest( - ack_id="ack_id_string", byte_size=10, ordering_key="", future=None + ack_id="ack_id_string", + byte_size=10, + ordering_key="", + future=None, + open_telemetry_data=otel_data, ) ] manager.send_unary_modack.return_value = (items, []) @@ -657,10 +745,20 @@ def test_nack(): ack_reqs_dict = call[1]["ack_reqs_dict"] assert ack_reqs_dict == { "ack_id_string": requests.ModAckRequest( - ack_id="ack_id_string", seconds=0, future=None + ack_id="ack_id_string", + seconds=0, + future=None, + open_telemetry_data=otel_data, ) } + if otel_data and otel_data.subscribe_span: + otel_data.subscribe_span.set_attribute.assert_called_with( + key="messaging.gcp_pubsub.result", + value="nack", + ) + otel_data.subscribe_span.add_event.assert_called_with("nack end") + def test_modify_ack_deadline(): manager = mock.create_autospec( diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 49b07b7fd..4502fdc29 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -16,6 +16,7 @@ import queue import sys import time +import pytest # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: @@ -29,6 +30,8 @@ from google.protobuf import timestamp_pb2 from google.pubsub_v1 import types as gapic_types from google.cloud.pubsub_v1.subscriber.exceptions import AcknowledgeStatus +from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData +from opentelemetry import trace RECEIVED = datetime.datetime(2012, 4, 21, 15, 0, tzinfo=datetime.timezone.utc) @@ -44,6 +47,7 @@ def create_message( delivery_attempt=0, ordering_key="", exactly_once_delivery_enabled=False, + open_telemetry_data=None, **attrs ): with mock.patch.object(time, "time") as time_: @@ -65,6 +69,7 @@ def create_message( delivery_attempt=delivery_attempt, request_queue=queue.Queue(), exactly_once_delivery_enabled_func=lambda: exactly_once_delivery_enabled, + open_telemetry_data=open_telemetry_data, ) return msg @@ -131,8 +136,44 @@ def check_call_types(mock, *args, **kwargs): assert isinstance(call_args[n], argtype) -def test_ack(): - msg = create_message(b"foo", ack_id="bogus_ack_id") +# def test_ack_otel(): +# otel_data = OpenTelemetryData( +# subscribe_span=mock.Mock(spec=trace.Span), +# ) +# msg = create_message( +# data=b"foo", +# ack_id="ack_id", +# open_telemetry_data=otel_data, +# ) +# with mock.patch.object(msg._request_queue, "put") as put: +# msg.ack() +# put.assert_called_once_with( +# requests.AckRequest( +# ack_id="ack_id", +# byte_size=30, +# time_to_ack=mock.ANY, +# ordering_key="", +# future=None, +# open_telemetry_data=otel_data, +# ) +# ) +# check_call_types(put, requests.AckRequest) + + +@pytest.mark.parametrize( + "otel_data,", + [ + None, + OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData(), + ], +) +def test_ack(otel_data): + msg = create_message( + data=b"foo", + ack_id="bogus_ack_id", + open_telemetry_data=otel_data, + ) with mock.patch.object(msg._request_queue, "put") as put: msg.ack() put.assert_called_once_with( @@ -142,13 +183,28 @@ def test_ack(): time_to_ack=mock.ANY, ordering_key="", future=None, + open_telemetry_data=otel_data, ) ) check_call_types(put, requests.AckRequest) - - -def test_ack_with_response_exactly_once_delivery_disabled(): - msg = create_message(b"foo", ack_id="bogus_ack_id") + if otel_data and otel_data.subscribe_span: + otel_data.subscribe_span.add_event.assert_called_with(name="ack start") + + +@pytest.mark.parametrize( + "otel_data", + [ + None, + OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData(), + ], +) +def test_ack_with_response_exactly_once_delivery_disabled(otel_data): + msg = create_message( + b"foo", + ack_id="bogus_ack_id", + open_telemetry_data=otel_data, + ) with mock.patch.object(msg._request_queue, "put") as put: future = msg.ack_with_response() put.assert_called_once_with( @@ -158,12 +214,16 @@ def test_ack_with_response_exactly_once_delivery_disabled(): time_to_ack=mock.ANY, ordering_key="", future=None, + open_telemetry_data=otel_data, ) ) assert future.result() == AcknowledgeStatus.SUCCESS assert future == message._SUCCESS_FUTURE check_call_types(put, requests.AckRequest) + if otel_data and otel_data.subscribe_span: + otel_data.subscribe_span.add_event.assert_called_with(name="ack start") + def test_ack_with_response_exactly_once_delivery_enabled(): msg = create_message( @@ -227,30 +287,66 @@ def test_modify_ack_deadline_with_response_exactly_once_delivery_enabled(): check_call_types(put, requests.ModAckRequest) -def test_nack(): - msg = create_message(b"foo", ack_id="bogus_ack_id") +@pytest.mark.parametrize( + "otel_data,", + [ + None, + OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData(), + ], +) +def test_nack(otel_data): + msg = create_message( + data=b"foo", + ack_id="bogus_ack_id", + open_telemetry_data=otel_data, + ) with mock.patch.object(msg._request_queue, "put") as put: msg.nack() put.assert_called_once_with( requests.NackRequest( - ack_id="bogus_ack_id", byte_size=30, ordering_key="", future=None + ack_id="bogus_ack_id", + byte_size=30, + ordering_key="", + future=None, + open_telemetry_data=otel_data, ) ) check_call_types(put, requests.NackRequest) - - -def test_nack_with_response_exactly_once_delivery_disabled(): - msg = create_message(b"foo", ack_id="bogus_ack_id") + if otel_data and otel_data.subscribe_span: + otel_data.subscribe_span.add_event.assert_called_with("nack start") + + +@pytest.mark.parametrize( + "otel_data", + [ + None, + OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData(), + ], +) +def test_nack_with_response_exactly_once_delivery_disabled(otel_data): + msg = create_message( + data=b"foo", + ack_id="bogus_ack_id", + open_telemetry_data=otel_data, + ) with mock.patch.object(msg._request_queue, "put") as put: future = msg.nack_with_response() put.assert_called_once_with( requests.NackRequest( - ack_id="bogus_ack_id", byte_size=30, ordering_key="", future=None + ack_id="bogus_ack_id", + byte_size=30, + ordering_key="", + future=None, + open_telemetry_data=otel_data, ) ) assert future.result() == AcknowledgeStatus.SUCCESS assert future == message._SUCCESS_FUTURE check_call_types(put, requests.NackRequest) + if otel_data and otel_data.subscribe_span: + otel_data.subscribe_span.add_event.assert_called_with("nack start") def test_nack_with_response_exactly_once_delivery_enabled(): diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index e9afca613..b5f86aa3c 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -46,6 +46,8 @@ from google.rpc import code_pb2 from google.rpc import error_details_pb2 +from opentelemetry import trace + @pytest.mark.parametrize( "exception,expected_cls", @@ -97,6 +99,7 @@ def test__wrap_callback_errors_error(callback_error): def test_constructor_and_default_state(): mock.sentinel.subscription = str() + mock.sentinel.client.open_telemetry_enabled = False manager = streaming_pull_manager.StreamingPullManager( mock.sentinel.client, mock.sentinel.subscription ) @@ -1234,7 +1237,7 @@ def make_running_manager(enable_open_telemetry=False, **kwargs): manager._dispatcher = mock.create_autospec(dispatcher.Dispatcher, instance=True) manager._leaser = mock.create_autospec(leaser.Leaser, instance=True) manager._heartbeater = mock.create_autospec(heartbeater.Heartbeater, instance=True) - manager._client._open_telemetry_enabled = enable_open_telemetry + manager._open_telemetry_enabled = mock.Mock(return_value=enable_open_telemetry) return ( manager, manager._consumer, @@ -1487,10 +1490,12 @@ def test__on_response_mod_ack_otel(span_exporter): gapic_types.ReceivedMessage( ack_id="ack_1", message=gapic_types.PubsubMessage(data=b"foo", message_id="1"), + delivery_attempt=1, ), gapic_types.ReceivedMessage( ack_id="ack_2", message=gapic_types.PubsubMessage(data=b"bar", message_id="2"), + delivery_attempt=1, ), ] ) @@ -1501,7 +1506,13 @@ def test__on_response_mod_ack_otel(span_exporter): # Actually run the method and chack that correct MODACK value is used. with mock.patch.object( type(manager), "ack_deadline", new=mock.PropertyMock(return_value=18) - ): + ), mock.patch("opentelemetry.trace.get_tracer") as mock_get_tracer: + mock_tracer = mock.MagicMock() + mock_get_tracer.return_value = mock_tracer + mock_span = mock.MagicMock() + mock_tracer.start_as_current_span.return_value = mock_span + mock_span.__enter__.return_value = mock_span + mock_span.__exit__.return_value = None manager._on_response(response) dispatcher.modify_ack_deadline.assert_called_once_with( @@ -1515,6 +1526,52 @@ def test__on_response_mod_ack_otel(span_exporter): # Subscribe span would still be active, hence would not be exported. spans = span_exporter.get_finished_spans() assert len(spans) == 0 + mock_get_tracer.assert_called_once_with("google.cloud.pubsub_v1.subscriber") + + mock_tracer.start_as_current_span.assert_has_calls( + [ + mock.call( + name="projects/projectID/subscriptions/subscriptionID subscribe", + kind=trace.SpanKind.CONSUMER, + context=None, + attributes={ + "messaging.system": "gcp_pubsub", + "messaging.destination.name": "projects/projectID/subscriptions/subscriptionID", + "gcp.project_id": "projectID", + "messaging.message.id": "1", + "messaging.message.body.size": 3, + "messaging.gcp_pubsub.message.ack_id": "ack_1", + "messaging.gcp_pubsub.message.ordering_key": "", + "messaging.gcp_pubsub.message.exactly_once_delivery": False, + "code.function": "_on_response", + "messaging.gcp_pubsub.message.delivery_attempt": 1, + }, + end_on_exit=False, + ), + mock.call().__enter__(), + mock.call().__exit__(None, None, None), + mock.call( + name="projects/projectID/subscriptions/subscriptionID subscribe", + kind=trace.SpanKind.CONSUMER, + context=None, + attributes={ + "messaging.system": "gcp_pubsub", + "messaging.destination.name": "projects/projectID/subscriptions/subscriptionID", + "gcp.project_id": "projectID", + "messaging.message.id": "2", + "messaging.message.body.size": 3, + "messaging.gcp_pubsub.message.ack_id": "ack_2", + "messaging.gcp_pubsub.message.ordering_key": "", + "messaging.gcp_pubsub.message.exactly_once_delivery": False, + "code.function": "_on_response", + "messaging.gcp_pubsub.message.delivery_attempt": 1, + }, + end_on_exit=False, + ), + mock.call().__enter__(), + mock.call().__exit__(None, None, None), + ], + ) def test__on_response_modifies_ack_deadline(): From 0ee007fc0fe8cf6a112a42a419ab93b96d550e12 Mon Sep 17 00:00:00 2001 From: Mukund Date: Mon, 1 Jul 2024 21:20:04 +0000 Subject: [PATCH 18/30] Add subscriber concurrency control span --- .../opentelemetry/subscribe_spans_data.py | 2 +- .../_protocol/streaming_pull_manager.py | 21 +++++++ google/cloud/pubsub_v1/subscriber/message.py | 4 ++ .../subscriber/test_streaming_pull_manager.py | 61 ++++--------------- 4 files changed, 39 insertions(+), 49 deletions(-) diff --git a/google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py b/google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py index e7b1a367b..67f0af3d9 100644 --- a/google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py +++ b/google/cloud/pubsub_v1/opentelemetry/subscribe_spans_data.py @@ -17,6 +17,6 @@ class OpenTelemetryData: """ subscribe_span: Optional[Span] = None - concurrrency_control_span: Optional[Span] = None + concurrency_control_span: Optional[Span] = None scheduler_span: Optional[Span] = None process_span: Optional[Span] = None diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 694d1bdfb..70e16c2e8 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -28,6 +28,7 @@ from opentelemetry import trace from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData +from opentelemetry.trace.propagation import set_span_in_context from google.api_core import bidi from google.api_core import exceptions @@ -132,6 +133,11 @@ def _wrap_callback_errors( message: The Pub/Sub message. """ try: + if ( + message.open_telemetry_data() + and message.open_telemetry_data().concurrrency_control_span + ): + message.open_telemetry_data().concurrrency_control_span.end() callback(message) except BaseException as exc: # Note: the likelihood of this failing is extremely low. This just adds @@ -633,6 +639,21 @@ def _schedule_message_on_hold( ) assert self._scheduler is not None assert self._callback is not None + if ( + self.open_telemetry_enabled() + and msg.open_telemetry_data + and msg.open_telemetry_data.subscribe_span + ): + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name="subscriber concurrency control", + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(msg.open_telemetry_data.subscribe_span), + end_on_exit=False, + ) as concurrency_control_span: + msg.open_telemetry_data.concurrency_control_span = ( + concurrency_control_span + ) self._scheduler.schedule(self._callback, msg) def send_unary_ack( diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index 9c8a62a4b..c5e098974 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -238,6 +238,10 @@ def delivery_attempt(self) -> Optional[int]: """ return self._delivery_attempt + @property + def open_telemetry_data(self) -> Optional[OpenTelemetryData]: + return self._open_telemetry_data + def ack(self) -> None: """Acknowledge the given message. diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index b5f86aa3c..e820c1c88 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -46,8 +46,6 @@ from google.rpc import code_pb2 from google.rpc import error_details_pb2 -from opentelemetry import trace - @pytest.mark.parametrize( "exception,expected_cls", @@ -67,6 +65,19 @@ def test__wrap_as_exception(exception, expected_cls): ) +def test__wrap_callback_errors_no_error_otel(): + msg = mock.Mock() + msg.open_telemetry_data = mock.Mock(return_value=None) + callback = mock.Mock() + on_callback_error = mock.Mock() + + streaming_pull_manager._wrap_callback_errors(callback, on_callback_error, msg) + + callback.assert_called_once_with(msg) + msg.nack.assert_not_called() + on_callback_error.assert_not_called() + + def test__wrap_callback_errors_no_error(): msg = mock.create_autospec(message.Message, instance=True) callback = mock.Mock() @@ -1526,52 +1537,6 @@ def test__on_response_mod_ack_otel(span_exporter): # Subscribe span would still be active, hence would not be exported. spans = span_exporter.get_finished_spans() assert len(spans) == 0 - mock_get_tracer.assert_called_once_with("google.cloud.pubsub_v1.subscriber") - - mock_tracer.start_as_current_span.assert_has_calls( - [ - mock.call( - name="projects/projectID/subscriptions/subscriptionID subscribe", - kind=trace.SpanKind.CONSUMER, - context=None, - attributes={ - "messaging.system": "gcp_pubsub", - "messaging.destination.name": "projects/projectID/subscriptions/subscriptionID", - "gcp.project_id": "projectID", - "messaging.message.id": "1", - "messaging.message.body.size": 3, - "messaging.gcp_pubsub.message.ack_id": "ack_1", - "messaging.gcp_pubsub.message.ordering_key": "", - "messaging.gcp_pubsub.message.exactly_once_delivery": False, - "code.function": "_on_response", - "messaging.gcp_pubsub.message.delivery_attempt": 1, - }, - end_on_exit=False, - ), - mock.call().__enter__(), - mock.call().__exit__(None, None, None), - mock.call( - name="projects/projectID/subscriptions/subscriptionID subscribe", - kind=trace.SpanKind.CONSUMER, - context=None, - attributes={ - "messaging.system": "gcp_pubsub", - "messaging.destination.name": "projects/projectID/subscriptions/subscriptionID", - "gcp.project_id": "projectID", - "messaging.message.id": "2", - "messaging.message.body.size": 3, - "messaging.gcp_pubsub.message.ack_id": "ack_2", - "messaging.gcp_pubsub.message.ordering_key": "", - "messaging.gcp_pubsub.message.exactly_once_delivery": False, - "code.function": "_on_response", - "messaging.gcp_pubsub.message.delivery_attempt": 1, - }, - end_on_exit=False, - ), - mock.call().__enter__(), - mock.call().__exit__(None, None, None), - ], - ) def test__on_response_modifies_ack_deadline(): From ddab02324939474885f28bab498745a3012446ff Mon Sep 17 00:00:00 2001 From: Mukund Date: Tue, 2 Jul 2024 15:01:34 +0000 Subject: [PATCH 19/30] Add scheduler span --- .../subscriber/_protocol/messages_on_hold.py | 20 +++++++++++ .../_protocol/streaming_pull_manager.py | 17 +++++----- .../subscriber/test_messages_on_hold.py | 33 ++++++++++++++++++- .../subscriber/test_streaming_pull_manager.py | 29 ++++++++++++++-- 4 files changed, 87 insertions(+), 12 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py b/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py index 63c2edbfa..9a3a13ad7 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py @@ -17,12 +17,18 @@ import typing from typing import Any, Callable, Iterable, Optional +from opentelemetry import trace +from opentelemetry.trace.propagation import set_span_in_context + if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.pubsub_v1 import subscriber _LOGGER = logging.getLogger(__name__) +_OPEN_TELEMETRY_TRACER_NAME = "google.cloud.pubsub_v1.subscriber" +"""Open Telemetry Instrumenting module name.""" + class MessagesOnHold(object): """Tracks messages on hold by ordering key. Not thread-safe.""" @@ -48,6 +54,8 @@ def __init__(self): # flight, but there are no pending messages. self._pending_ordered_messages = {} + self._tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + @property def size(self) -> int: """Return the number of messages on hold across ordered and unordered messages. @@ -100,6 +108,18 @@ def put(self, message: "subscriber.message.Message") -> None: Args: message: The message to put on hold. """ + if message.open_telemetry_data: + with self._tracer.start_as_current_span( + name="subscriber scheduler", + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context( + message.open_telemetry_data.subscribe_span + if message.open_telemetry_data.subscribe_span + else None + ), + end_on_exit=False, + ) as scheduler_span: + message.open_telemetry_data.scheduler_span = scheduler_span self._messages_on_hold.append(message) self._size = self._size + 1 diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 70e16c2e8..c6386527c 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -133,11 +133,11 @@ def _wrap_callback_errors( message: The Pub/Sub message. """ try: - if ( - message.open_telemetry_data() - and message.open_telemetry_data().concurrrency_control_span - ): - message.open_telemetry_data().concurrrency_control_span.end() + if message.open_telemetry_data: + if message.open_telemetry_data.concurrency_control_span: + message.open_telemetry_data.concurrency_control_span.end() + if message.open_telemetry_data.scheduler_span: + message.open_telemetry_data.scheduler_span.end() callback(message) except BaseException as exc: # Note: the likelihood of this failing is extremely low. This just adds @@ -381,6 +381,7 @@ def __init__( self._leaser: Optional[leaser.Leaser] = None self._consumer: Optional[bidi.BackgroundConsumer] = None self._heartbeater: Optional[heartbeater.Heartbeater] = None + self._tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) @property def open_telemetry_enabled(self) -> bool: @@ -644,8 +645,7 @@ def _schedule_message_on_hold( and msg.open_telemetry_data and msg.open_telemetry_data.subscribe_span ): - tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) - with tracer.start_as_current_span( + with self._tracer.start_as_current_span( name="subscriber concurrency control", kind=trace.SpanKind.INTERNAL, context=set_span_in_context(msg.open_telemetry_data.subscribe_span), @@ -1113,13 +1113,12 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: subscribe_spans = [] if self.open_telemetry_enabled(): - tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) for received_message in response.received_messages: parent_span_context = TraceContextTextMapPropagator().extract( carrier=received_message.message, getter=OTelContextGetter(), ) - with tracer.start_as_current_span( + with self._tracer.start_as_current_span( name=f"{self._subscription} subscribe", kind=trace.SpanKind.CONSUMER, context=parent_span_context if parent_span_context else None, diff --git a/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py b/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py index 5e1dcf91b..df08bba66 100644 --- a/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py +++ b/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py @@ -13,13 +13,24 @@ # limitations under the License. import queue +import sys from google.cloud.pubsub_v1.subscriber import message from google.cloud.pubsub_v1.subscriber._protocol import messages_on_hold from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData +from opentelemetry.trace.span import SpanContext +from opentelemetry import trace -def make_message(ack_id, ordering_key): +# special case python < 3.8 +if sys.version_info.major == 3 and sys.version_info.minor < 8: + import mock +else: + from unittest import mock + + +def make_message(ack_id, ordering_key, open_telemetry_data=None): proto_msg = gapic_types.PubsubMessage(data=b"Q", ordering_key=ordering_key) return message.Message( proto_msg._pb, @@ -27,6 +38,7 @@ def make_message(ack_id, ordering_key): 0, queue.Queue(), exactly_once_delivery_enabled_func=lambda: False, # pragma: NO COVER + open_telemetry_data=open_telemetry_data, ) @@ -37,6 +49,25 @@ def test_init(): assert moh.get() is None +def test_put_otel(): + moh = messages_on_hold.MessagesOnHold() + subscribe_span = mock.Mock(spec=SpanContext) + msg = make_message( + ack_id="ack_id1", + ordering_key="key1", + open_telemetry_data=OpenTelemetryData( + subscribe_span=subscribe_span, + ), + ) + moh.put(msg) + + scheduler_span = msg.open_telemetry_data.scheduler_span + assert scheduler_span is not None + assert scheduler_span.name == "subscriber scheduler" + assert scheduler_span.kind == trace.SpanKind.INTERNAL + assert scheduler_span.is_recording() + + def test_put_and_get_unordered_messages(): moh = messages_on_hold.MessagesOnHold() diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index e820c1c88..62a6b4246 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -41,6 +41,8 @@ from google.cloud.pubsub_v1.subscriber import exceptions as subscriber_exceptions from google.cloud.pubsub_v1.subscriber import futures from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData +from opentelemetry.trace import Span import grpc from google.rpc import status_pb2 from google.rpc import code_pb2 @@ -65,9 +67,27 @@ def test__wrap_as_exception(exception, expected_cls): ) -def test__wrap_callback_errors_no_error_otel(): +@pytest.mark.parametrize( + "otel_data", + [ + ( + OpenTelemetryData( + concurrency_control_span=mock.Mock(spec=Span), + scheduler_span=mock.Mock(spec=Span), + ) + ), + ( + OpenTelemetryData( + concurrency_control_span=None, + scheduler_span=None, + ) + ), + (None), + ], +) +def test__wrap_callback_errors_no_error_otel(otel_data): msg = mock.Mock() - msg.open_telemetry_data = mock.Mock(return_value=None) + msg.open_telemetry_data = otel_data callback = mock.Mock() on_callback_error = mock.Mock() @@ -77,6 +97,11 @@ def test__wrap_callback_errors_no_error_otel(): msg.nack.assert_not_called() on_callback_error.assert_not_called() + if otel_data and otel_data.concurrency_control_span: + otel_data.concurrency_control_span.end.assert_called_once() + if otel_data and otel_data.scheduler_span: + otel_data.scheduler_span.end.assert_called_once() + def test__wrap_callback_errors_no_error(): msg = mock.create_autospec(message.Message, instance=True) From a7665b6d6c8ce685dc11b94201074f600893afa1 Mon Sep 17 00:00:00 2001 From: Mukund Date: Tue, 2 Jul 2024 22:44:19 +0000 Subject: [PATCH 20/30] Refactor _wrap_callback_errors to contain subscription This is required when instantiating process spans right before invoking user provided callbacks --- .../_protocol/streaming_pull_manager.py | 7 ++++++- .../subscriber/test_streaming_pull_manager.py | 21 ++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index c6386527c..e2849b810 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -123,6 +123,7 @@ def _wrap_as_exception(maybe_exception: Any) -> BaseException: def _wrap_callback_errors( callback: Callable[["google.cloud.pubsub_v1.subscriber.message.Message"], Any], on_callback_error: Callable[[BaseException], Any], + subscription: str, message: "google.cloud.pubsub_v1.subscriber.message.Message", ): """Wraps a user callback so that if an exception occurs the message is @@ -138,6 +139,7 @@ def _wrap_callback_errors( message.open_telemetry_data.concurrency_control_span.end() if message.open_telemetry_data.scheduler_span: message.open_telemetry_data.scheduler_span.end() + callback(message) except BaseException as exc: # Note: the likelihood of this failing is extremely low. This just adds @@ -870,7 +872,10 @@ def open( raise ValueError("This manager has been closed and can not be re-used.") self._callback = functools.partial( - _wrap_callback_errors, callback, on_callback_error + _wrap_callback_errors, + callback, + on_callback_error, + self._subscription, ) # Create the RPC diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 62a6b4246..a65b273dc 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -91,7 +91,12 @@ def test__wrap_callback_errors_no_error_otel(otel_data): callback = mock.Mock() on_callback_error = mock.Mock() - streaming_pull_manager._wrap_callback_errors(callback, on_callback_error, msg) + streaming_pull_manager._wrap_callback_errors( + callback, + on_callback_error, + "projects/projectID/subscriptions/subscriptionID", + msg, + ) callback.assert_called_once_with(msg) msg.nack.assert_not_called() @@ -108,7 +113,12 @@ def test__wrap_callback_errors_no_error(): callback = mock.Mock() on_callback_error = mock.Mock() - streaming_pull_manager._wrap_callback_errors(callback, on_callback_error, msg) + streaming_pull_manager._wrap_callback_errors( + callback, + on_callback_error, + "projects/projectID/subscriptions/subscriptionID", + msg, + ) callback.assert_called_once_with(msg) msg.nack.assert_not_called() @@ -127,7 +137,12 @@ def test__wrap_callback_errors_error(callback_error): callback = mock.Mock(side_effect=callback_error) on_callback_error = mock.Mock() - streaming_pull_manager._wrap_callback_errors(callback, on_callback_error, msg) + streaming_pull_manager._wrap_callback_errors( + callback, + on_callback_error, + "projects/projectID/subscriptions/subscriptionID", + msg, + ) msg.nack.assert_called_once() on_callback_error.assert_called_once_with(callback_error) From cf6e4864067b808b32397104c1532f2f2d722175 Mon Sep 17 00:00:00 2001 From: Mukund Date: Thu, 4 Jul 2024 00:32:19 +0000 Subject: [PATCH 21/30] Add process span --- .../_protocol/streaming_pull_manager.py | 19 ++++++++++++- .../subscriber/test_streaming_pull_manager.py | 28 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index e2849b810..eb2ff6fa9 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -139,7 +139,24 @@ def _wrap_callback_errors( message.open_telemetry_data.concurrency_control_span.end() if message.open_telemetry_data.scheduler_span: message.open_telemetry_data.scheduler_span.end() - + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + subscribe_span = message.open_telemetry_data.subscribe_span + publisher_create_span_link = None + if subscribe_span and subscribe_span.parent: + publisher_create_span_link = trace.Link(subscribe_span.parent) + with tracer.start_as_current_span( + name=f"{subscription.split('/')[3]} process", + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + }, + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(subscribe_span) if subscribe_span else None, + links=[publisher_create_span_link] + if publisher_create_span_link + else None, + end_on_exit=False, + ) as process_span: + message.open_telemetry_data.process_span = process_span callback(message) except BaseException as exc: # Note: the likelihood of this failing is extremely low. This just adds diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index a65b273dc..b595b7056 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -43,6 +43,7 @@ from google.pubsub_v1 import types as gapic_types from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData from opentelemetry.trace import Span +from opentelemetry import trace import grpc from google.rpc import status_pb2 from google.rpc import code_pb2 @@ -67,6 +68,33 @@ def test__wrap_as_exception(exception, expected_cls): ) +def test__wrap_callback_errors_otel_process_span(): + msg = mock.Mock() + callback = mock.Mock() + tracer = trace.get_tracer("google.cloud.pubsub_v1.subscriber") + with tracer.start_as_current_span("publisher_create_span") as publisher_create_span: + with tracer.start_as_current_span("subscriber_span") as subscribe_span: + msg.open_telemetry_data.subscribe_span = subscribe_span + streaming_pull_manager._wrap_callback_errors( + callback, + mock.Mock(), + "projects/projectID/subscriptions/subscriptionID", + msg, + ) + + callback.assert_called_once() + process_span = msg.open_telemetry_data.process_span + assert process_span is not None + assert process_span.name == "subscriptionID process" + assert process_span.kind == trace.SpanKind.INTERNAL + assert process_span.parent.span_id == subscribe_span.context.span_id + # Verify that the link to create span of the associated message is present. + assert len(process_span.links) == 1 + assert ( + process_span.links[0].context.span_id == publisher_create_span.context.span_id + ) + + @pytest.mark.parametrize( "otel_data", [ From 9d7dfddb8bc5ab2583d6a09b12730cc23d92fdbe Mon Sep 17 00:00:00 2001 From: Mukund Date: Sat, 6 Jul 2024 06:46:08 +0000 Subject: [PATCH 22/30] Add ack called event to process span --- google/cloud/pubsub_v1/subscriber/message.py | 3 ++ .../unit/pubsub_v1/subscriber/test_message.py | 31 ++++--------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index c5e098974..d7da85d66 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -267,6 +267,9 @@ def ack(self) -> None: subscriber_span = self._open_telemetry_data.subscribe_span if subscriber_span: subscriber_span.add_event(name="ack start") + process_span = self._open_telemetry_data.process_span + if process_span: + process_span.add_event(name="ack called") self._request_queue.put( requests.AckRequest( ack_id=self._ack_id, diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 4502fdc29..f3d188421 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -136,35 +136,14 @@ def check_call_types(mock, *args, **kwargs): assert isinstance(call_args[n], argtype) -# def test_ack_otel(): -# otel_data = OpenTelemetryData( -# subscribe_span=mock.Mock(spec=trace.Span), -# ) -# msg = create_message( -# data=b"foo", -# ack_id="ack_id", -# open_telemetry_data=otel_data, -# ) -# with mock.patch.object(msg._request_queue, "put") as put: -# msg.ack() -# put.assert_called_once_with( -# requests.AckRequest( -# ack_id="ack_id", -# byte_size=30, -# time_to_ack=mock.ANY, -# ordering_key="", -# future=None, -# open_telemetry_data=otel_data, -# ) -# ) -# check_call_types(put, requests.AckRequest) - - @pytest.mark.parametrize( "otel_data,", [ None, - OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData( + subscribe_span=mock.Mock(spec=trace.Span), + process_span=mock.Mock(spec=trace.Span), + ), OpenTelemetryData(), ], ) @@ -189,6 +168,8 @@ def test_ack(otel_data): check_call_types(put, requests.AckRequest) if otel_data and otel_data.subscribe_span: otel_data.subscribe_span.add_event.assert_called_with(name="ack start") + if otel_data and otel_data.process_span: + otel_data.process_span.add_event.assert_called_with(name="ack called") @pytest.mark.parametrize( From 0801c4d7cf23223351826f49f62730a5b040e1f1 Mon Sep 17 00:00:00 2001 From: Mukund Date: Sat, 6 Jul 2024 06:49:11 +0000 Subject: [PATCH 23/30] Add ack called event for ack_with_response in process span --- google/cloud/pubsub_v1/subscriber/message.py | 3 +++ tests/unit/pubsub_v1/subscriber/test_message.py | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index d7da85d66..3bed90edb 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -324,6 +324,9 @@ def ack_with_response(self) -> "futures.Future": suscriber_span = self._open_telemetry_data.subscribe_span if suscriber_span: suscriber_span.add_event(name="ack start") + process_span = self._open_telemetry_data.process_span + if process_span: + process_span.add_event(name="ack called") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index f3d188421..7278ce1ee 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -176,7 +176,10 @@ def test_ack(otel_data): "otel_data", [ None, - OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData( + subscribe_span=mock.Mock(spec=trace.Span), + process_span=mock.Mock(spec=trace.Span), + ), OpenTelemetryData(), ], ) @@ -204,6 +207,8 @@ def test_ack_with_response_exactly_once_delivery_disabled(otel_data): if otel_data and otel_data.subscribe_span: otel_data.subscribe_span.add_event.assert_called_with(name="ack start") + if otel_data and otel_data.process_span: + otel_data.process_span.add_event.assert_called_with(name="ack called") def test_ack_with_response_exactly_once_delivery_enabled(): From 857d163546a2f75b954c108c7811ef27e4ec9682 Mon Sep 17 00:00:00 2001 From: Mukund Date: Sat, 6 Jul 2024 06:54:06 +0000 Subject: [PATCH 24/30] Add nack called event for process span --- google/cloud/pubsub_v1/subscriber/message.py | 6 ++++++ tests/unit/pubsub_v1/subscriber/test_message.py | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index 3bed90edb..2a5e222aa 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -459,6 +459,9 @@ def nack(self) -> None: subscriber_span = self._open_telemetry_data.subscribe_span if subscriber_span: subscriber_span.add_event("nack start") + process_span = self._open_telemetry_data.process_span + if process_span: + process_span.add_event("nack called") self._request_queue.put( requests.NackRequest( ack_id=self._ack_id, @@ -507,6 +510,9 @@ def nack_with_response(self) -> "futures.Future": subscriber_span = self._open_telemetry_data.subscribe_span if subscriber_span: subscriber_span.add_event("nack start") + process_span = self._open_telemetry_data.process_span + if process_span: + process_span.add_event("nack called") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 7278ce1ee..7389acecc 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -277,7 +277,10 @@ def test_modify_ack_deadline_with_response_exactly_once_delivery_enabled(): "otel_data,", [ None, - OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData( + subscribe_span=mock.Mock(spec=trace.Span), + process_span=mock.Mock(spec=trace.Span), + ), OpenTelemetryData(), ], ) @@ -301,13 +304,18 @@ def test_nack(otel_data): check_call_types(put, requests.NackRequest) if otel_data and otel_data.subscribe_span: otel_data.subscribe_span.add_event.assert_called_with("nack start") + if otel_data and otel_data.process_span: + otel_data.process_span.add_event.assert_called_with("nack called") @pytest.mark.parametrize( "otel_data", [ None, - OpenTelemetryData(subscribe_span=mock.Mock(spec=trace.Span)), + OpenTelemetryData( + subscribe_span=mock.Mock(spec=trace.Span), + process_span=mock.Mock(spec=trace.Span), + ), OpenTelemetryData(), ], ) @@ -333,6 +341,8 @@ def test_nack_with_response_exactly_once_delivery_disabled(otel_data): check_call_types(put, requests.NackRequest) if otel_data and otel_data.subscribe_span: otel_data.subscribe_span.add_event.assert_called_with("nack start") + if otel_data and otel_data.process_span: + otel_data.process_span.add_event.assert_called_with("nack called") def test_nack_with_response_exactly_once_delivery_enabled(): From c9625ff42ea10c13bd23d5ee952e4aa84c9bb2f8 Mon Sep 17 00:00:00 2001 From: Mukund Date: Sat, 6 Jul 2024 07:26:58 +0000 Subject: [PATCH 25/30] Fix build failures --- .../_protocol/streaming_pull_manager.py | 10 +++++----- .../subscriber/test_streaming_pull_manager.py | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index eb2ff6fa9..953c16f72 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -660,7 +660,7 @@ def _schedule_message_on_hold( assert self._scheduler is not None assert self._callback is not None if ( - self.open_telemetry_enabled() + self.open_telemetry_enabled and msg.open_telemetry_data and msg.open_telemetry_data.subscribe_span ): @@ -1134,7 +1134,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: received_messages = response._pb.received_messages subscribe_spans = [] - if self.open_telemetry_enabled(): + if self._open_telemetry_enabled: for received_message in response.received_messages: parent_span_context = TraceContextTextMapPropagator().extract( carrier=received_message.message, @@ -1185,14 +1185,14 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # Immediately (i.e. without waiting for the auto lease management) # modack the messages we received, as this tells the server that we've # received them. - if self.open_telemetry_enabled(): + if self._open_telemetry_enabled: for subscribe_span in subscribe_spans: subscribe_span.add_event("modack start") ack_id_gen = (message.ack_id for message in received_messages) expired_ack_ids = self._send_lease_modacks( ack_id_gen, self.ack_deadline, warn_on_invalid=False ) - if self.open_telemetry_enabled(): + if self._open_telemetry_enabled: for subscribe_span in subscribe_spans: subscribe_span.add_event("modack end") @@ -1213,7 +1213,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: self._scheduler.queue, self._exactly_once_delivery_enabled, ) - if self.open_telemetry_enabled(): + if self._open_telemetry_enabled: message._open_telemetry_data = OpenTelemetryData( subscribe_span=subscribe_spans[i], ) diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index b595b7056..3e5520f85 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -1316,7 +1316,7 @@ def make_running_manager(enable_open_telemetry=False, **kwargs): manager._dispatcher = mock.create_autospec(dispatcher.Dispatcher, instance=True) manager._leaser = mock.create_autospec(leaser.Leaser, instance=True) manager._heartbeater = mock.create_autospec(heartbeater.Heartbeater, instance=True) - manager._open_telemetry_enabled = mock.Mock(return_value=enable_open_telemetry) + manager._open_telemetry_enabled = enable_open_telemetry return ( manager, manager._consumer, @@ -1557,9 +1557,10 @@ def test__on_response_delivery_attempt(): assert msg2.delivery_attempt == 6 -def test__on_response_mod_ack_otel(span_exporter): +@pytest.mark.parametrize("otel_enabled", [(True), (False)]) +def test__on_response_mod_ack_otel(span_exporter, otel_enabled): manager, _, dispatcher, leaser, _, scheduler = make_running_manager( - enable_open_telemetry=True + enable_open_telemetry=otel_enabled, ) manager._callback = mock.sentinel.callback @@ -1605,6 +1606,18 @@ def test__on_response_mod_ack_otel(span_exporter): # Subscribe span would still be active, hence would not be exported. spans = span_exporter.get_finished_spans() assert len(spans) == 0 + if otel_enabled: + call_args = scheduler.schedule.call_args_list + assert len(call_args) == 2 + for call_arg in call_args: + args, _ = call_arg + otel_data = args[1].open_telemetry_data + assert otel_data.concurrency_control_span is not None + assert otel_data.concurrency_control_span.kind == trace.SpanKind.INTERNAL + assert ( + otel_data.concurrency_control_span.name + == "subscriber concurrency control" + ) def test__on_response_modifies_ack_deadline(): From 6d5d40cbd45c69318a433618823d4f6d152a6eab Mon Sep 17 00:00:00 2001 From: Mukund Date: Sun, 7 Jul 2024 20:04:01 +0000 Subject: [PATCH 26/30] Fix parent setting --- .../pubsub_v1/subscriber/_protocol/messages_on_hold.py | 8 +++----- .../subscriber/_protocol/streaming_pull_manager.py | 8 ++++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py b/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py index 9a3a13ad7..958bcdde7 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py @@ -112,11 +112,9 @@ def put(self, message: "subscriber.message.Message") -> None: with self._tracer.start_as_current_span( name="subscriber scheduler", kind=trace.SpanKind.INTERNAL, - context=set_span_in_context( - message.open_telemetry_data.subscribe_span - if message.open_telemetry_data.subscribe_span - else None - ), + context=set_span_in_context(message.open_telemetry_data.subscribe_span) + if message.open_telemetry_data.subscribe_span + else None, end_on_exit=False, ) as scheduler_span: message.open_telemetry_data.scheduler_span = scheduler_span diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 953c16f72..b780762a7 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1134,7 +1134,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: received_messages = response._pb.received_messages subscribe_spans = [] - if self._open_telemetry_enabled: + if self.open_telemetry_enabled: for received_message in response.received_messages: parent_span_context = TraceContextTextMapPropagator().extract( carrier=received_message.message, @@ -1185,14 +1185,14 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # Immediately (i.e. without waiting for the auto lease management) # modack the messages we received, as this tells the server that we've # received them. - if self._open_telemetry_enabled: + if self.open_telemetry_enabled: for subscribe_span in subscribe_spans: subscribe_span.add_event("modack start") ack_id_gen = (message.ack_id for message in received_messages) expired_ack_ids = self._send_lease_modacks( ack_id_gen, self.ack_deadline, warn_on_invalid=False ) - if self._open_telemetry_enabled: + if self.open_telemetry_enabled: for subscribe_span in subscribe_spans: subscribe_span.add_event("modack end") @@ -1213,7 +1213,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: self._scheduler.queue, self._exactly_once_delivery_enabled, ) - if self._open_telemetry_enabled: + if self.open_telemetry_enabled: message._open_telemetry_data = OpenTelemetryData( subscribe_span=subscribe_spans[i], ) From f5d13df4bd12ec59ce3736b1ee68b5a749b027fd Mon Sep 17 00:00:00 2001 From: Mukund Date: Sun, 7 Jul 2024 20:52:17 +0000 Subject: [PATCH 27/30] Add modack span for receipt modack --- .../_protocol/streaming_pull_manager.py | 22 +++++++++++++++++++ .../subscriber/test_streaming_pull_manager.py | 18 ++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index b780762a7..59e83faa9 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1186,8 +1186,29 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # modack the messages we received, as this tells the server that we've # received them. if self.open_telemetry_enabled: + subscriber_span_links = [] for subscribe_span in subscribe_spans: subscribe_span.add_event("modack start") + subscriber_span_links.append( + trace.Link(subscribe_span.get_span_context()) + ) + with self._tracer.start_as_current_span( + name=f"{self._subscription.split('/')[3]} modack", + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.batch.message_count": len(received_messages), + "messaging.gcp_pubsub.message.ack_deadline": self.ack_deadline, + "messaging.gcp_pubsub.is_receipt_modack": True, + "messaging.destination.name": self._subscription.split("/")[3], + "gcp.project_id": self._subscription.split("/")[1], + "messaging.operation.name": "modack", + "code.function": "_on_response", + }, + links=subscriber_span_links if subscriber_span_links else None, + kind=trace.SpanKind.CLIENT, + end_on_exit=False, + ) as receipt_modack_span: + pass ack_id_gen = (message.ack_id for message in received_messages) expired_ack_ids = self._send_lease_modacks( ack_id_gen, self.ack_deadline, warn_on_invalid=False @@ -1195,6 +1216,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: if self.open_telemetry_enabled: for subscribe_span in subscribe_spans: subscribe_span.add_event("modack end") + receipt_modack_span.end() with self._pause_resume_lock: assert self._scheduler is not None diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 3e5520f85..382367800 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -1604,8 +1604,9 @@ def test__on_response_mod_ack_otel(span_exporter, otel_enabled): ) # Subscribe span would still be active, hence would not be exported. + # Receipt modack span would be exported since it would be ended after receipt modacks are complete. spans = span_exporter.get_finished_spans() - assert len(spans) == 0 + assert len(spans) == (1 if otel_enabled else 0) if otel_enabled: call_args = scheduler.schedule.call_args_list assert len(call_args) == 2 @@ -1619,6 +1620,21 @@ def test__on_response_mod_ack_otel(span_exporter, otel_enabled): == "subscriber concurrency control" ) + receipt_modack_span = spans[0] + assert receipt_modack_span.name == "subscriptionID modack" + assert receipt_modack_span.kind == trace.SpanKind.CLIENT + assert len(receipt_modack_span.links) == 2 + + attributes = receipt_modack_span.attributes + assert attributes["messaging.system"] == "gcp_pubsub" + assert attributes["messaging.batch.message_count"] == 2 + assert attributes["messaging.gcp_pubsub.message.ack_deadline"] == 18 + assert attributes["messaging.gcp_pubsub.is_receipt_modack"] is True + assert attributes["messaging.destination.name"] == "subscriptionID" + assert attributes["gcp.project_id"] == "projectID" + assert attributes["messaging.operation.name"] == "modack" + assert attributes["code.function"] == "_on_response" + def test__on_response_modifies_ack_deadline(): manager, _, dispatcher, leaser, _, scheduler = make_running_manager() From 6eb054a20e8928a5dfb2d4e6219c0b5121342b25 Mon Sep 17 00:00:00 2001 From: Mukund Date: Sun, 7 Jul 2024 23:30:09 +0000 Subject: [PATCH 28/30] Plumb subscriber span information required for modack spans --- google/cloud/pubsub_v1/subscriber/_protocol/leaser.py | 6 ++++++ google/cloud/pubsub_v1/subscriber/_protocol/requests.py | 3 +++ .../subscriber/_protocol/streaming_pull_manager.py | 3 +++ 3 files changed, 12 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index 16018e384..0fc8ce962 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -22,6 +22,8 @@ import typing from typing import Dict, Iterable, Optional, Union +from opentelemetry import trace + from google.cloud.pubsub_v1.subscriber._protocol.dispatcher import _MAX_BATCH_LATENCY try: @@ -50,6 +52,7 @@ class _LeasedMessage(typing.NamedTuple): size: int ordering_key: Optional[str] + subscribe_span: Optional[trace.Span] class Leaser(object): @@ -98,6 +101,9 @@ def add(self, items: Iterable[requests.LeaseRequest]) -> None: sent_time=float("inf"), size=item.byte_size, ordering_key=item.ordering_key, + subscribe_span=item.subscribe_span + if item.subscribe_span + else None, ) self._bytes += item.byte_size else: diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py index da3235800..ec688a646 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py @@ -15,6 +15,8 @@ import typing from typing import NamedTuple, Optional +from opentelemetry import trace + from google.cloud.pubsub_v1.opentelemetry.subscribe_spans_data import OpenTelemetryData if typing.TYPE_CHECKING: # pragma: NO COVER @@ -42,6 +44,7 @@ class LeaseRequest(NamedTuple): ack_id: str byte_size: int ordering_key: Optional[str] + subscribe_span: Optional[trace.Span] = None class ModAckRequest(NamedTuple): diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 59e83faa9..1c87bc5fc 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1245,6 +1245,9 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: ack_id=message.ack_id, byte_size=message.size, ordering_key=message.ordering_key, + subscribe_span=subscribe_spans[i] + if self.open_telemetry_enabled + else None, ) self._leaser.add([req]) i = i + 1 From 5882f5fd2720c19b4016018cd15bc00fdeceac61 Mon Sep 17 00:00:00 2001 From: Mukund Date: Mon, 8 Jul 2024 15:09:22 +0000 Subject: [PATCH 29/30] Add non-receipt modack spans(lease management) --- .../pubsub_v1/subscriber/_protocol/leaser.py | 39 ++++++++++++++++++- .../unit/pubsub_v1/subscriber/test_leaser.py | 1 + 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index 0fc8ce962..68b82adb9 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -44,6 +44,9 @@ _LOGGER = logging.getLogger(__name__) _LEASE_WORKER_NAME = "Thread-LeaseMaintainer" +_OPEN_TELEMETRY_TRACER_NAME = "google.cloud.pubsub_v1.subscriber" +"""Open Telemetry Instrumenting module name.""" +_OPEN_TELEMETRY_MESSAGING_SYSTEM = "gcp_pubsub" class _LeasedMessage(typing.NamedTuple): @@ -196,7 +199,38 @@ def maintain_leases(self) -> None: expired_ack_ids = set() if ack_ids: _LOGGER.debug("Renewing lease for %d ack IDs.", len(ack_ids)) - + if self._manager.open_telemetry_enabled: + subscriber_span_links = [] + lease_requests = leased_messages.values() + for lease_request in lease_requests: + if lease_request.subscribe_span is not None: + subscriber_span_links.append( + trace.Link( + lease_request.subscribe_span.get_span_context() + ) + ) + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name=f"{self._manager._subscription} modack", + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.batch.message_count": len(ack_ids), + "messaging.gcp_pubsub.message.ack_deadline": deadline, + "messaging.gcp_pubsub.is_receipt_modack": False, + "messaging.destination.name": self._manager._subscription.split( + "/" + )[ + 3 + ], + "gcp.project_id": self._manager._subscription.split("/")[1], + "messaging.operation.name": "modack", + "code.function": "maintain_leases", + }, + links=subscriber_span_links if subscriber_span_links else None, + kind=trace.SpanKind.CLIENT, + end_on_exit=False, + ) as modack_span: + pass # NOTE: This may not work as expected if ``consumer.active`` # has changed since we checked it. An implementation # without any sort of race condition would require a @@ -208,6 +242,9 @@ def maintain_leases(self) -> None: ack_id_gen, deadline ) + if self._manager.open_telemetry_enabled: + modack_span.end() + start_time = time.time() # If exactly once delivery is enabled, we should drop all expired ack_ids from lease management. if self._manager._exactly_once_delivery_enabled() and len(expired_ack_ids): diff --git a/tests/unit/pubsub_v1/subscriber/test_leaser.py b/tests/unit/pubsub_v1/subscriber/test_leaser.py index f38717c6f..fbc7786b7 100644 --- a/tests/unit/pubsub_v1/subscriber/test_leaser.py +++ b/tests/unit/pubsub_v1/subscriber/test_leaser.py @@ -91,6 +91,7 @@ def create_manager(flow_control=types.FlowControl()): manager.flow_control = flow_control manager.ack_histogram = histogram.Histogram() manager._obtain_ack_deadline.return_value = 10 + manager._subscription = "projects/projectID/subscriptions/subscriptionID" return manager From 37c16a113f0edbdb55c2d0cc30b6bdee0be9d677 Mon Sep 17 00:00:00 2001 From: Mukund Date: Mon, 8 Jul 2024 19:48:15 +0000 Subject: [PATCH 30/30] Disable OpenTelemetry Tracing for Python versions <= 3.7 --- google/cloud/pubsub_v1/publisher/client.py | 19 +++++-- .../pubsub_v1/publisher/batch/test_thread.py | 8 +++ .../publisher/test_publisher_client.py | 52 +++++++++++++++++++ .../unit/pubsub_v1/subscriber/test_leaser.py | 48 +++++++++++++++-- 4 files changed, 120 insertions(+), 7 deletions(-) diff --git a/google/cloud/pubsub_v1/publisher/client.py b/google/cloud/pubsub_v1/publisher/client.py index 022207301..bd8450a27 100644 --- a/google/cloud/pubsub_v1/publisher/client.py +++ b/google/cloud/pubsub_v1/publisher/client.py @@ -167,10 +167,23 @@ def __init__( # The object controlling the message publishing flow self._flow_controller = FlowController(self.publisher_options.flow_control) - # Option indicating whether open telemetry is enabled or not. - self._open_telemetry_enabled = ( + # OpenTelemetry features used by the library are not supported in Python versions <= 3.7. + # Refer https://github.com/open-telemetry/opentelemetry-python/issues/3993#issuecomment-2211976389 + if ( self.publisher_options.enable_open_telemetry_tracing - ) + and sys.version_info.major == 3 + and sys.version_info.minor < 8 + ): + warnings.warn( + message="OpenTelemetry for Python version 3.7 or lower is not supported. Disabling open telemetry tracing.", + category=RuntimeWarning, + ) + self._open_telemetry_enabled = False + else: + # Option indicating whether open telemetry is enabled or not. + self._open_telemetry_enabled = ( + self.publisher_options.enable_open_telemetry_tracing + ) @classmethod def from_service_account_file( # type: ignore[override] diff --git a/tests/unit/pubsub_v1/publisher/batch/test_thread.py b/tests/unit/pubsub_v1/publisher/batch/test_thread.py index ad8919721..57a775f6f 100644 --- a/tests/unit/pubsub_v1/publisher/batch/test_thread.py +++ b/tests/unit/pubsub_v1/publisher/batch/test_thread.py @@ -664,6 +664,10 @@ def test_commit_otel_publish_rpc_span_exception(span_exporter): assert span.end_time is not None +# Refer https://opentelemetry.io/docs/languages/python/#version-support +@pytest.mark.skipif( + sys.version_info < (3, 8), reason="Open Telemetry requires python3.8 or higher" +) def test_commit_otel_null_span(span_exporter): """ Test case checks the null case check scenario and appeases code coverage. @@ -712,6 +716,10 @@ def test_commit_otel_null_span(span_exporter): assert spans[0].end_time is not None +# Refer https://opentelemetry.io/docs/languages/python/#version-support +@pytest.mark.skipif( + sys.version_info < (3, 8), reason="Open Telemetry requires python3.8 or higher" +) def test_commit_otel_publish_non_sampled(span_exporter): TOPIC = "projects/projectID/topics/topic" batch = create_batch( diff --git a/tests/unit/pubsub_v1/publisher/test_publisher_client.py b/tests/unit/pubsub_v1/publisher/test_publisher_client.py index fb2f876ca..3e1ce8a63 100644 --- a/tests/unit/pubsub_v1/publisher/test_publisher_client.py +++ b/tests/unit/pubsub_v1/publisher/test_publisher_client.py @@ -219,6 +219,19 @@ def test_message_ordering_enabled(creds): def test_publish_otel_batching_exception(creds, span_exporter): + if sys.version_info.major == 3 and sys.version_info.minor < 8: + with pytest.warns( + RuntimeWarning, + match="OpenTelemetry for Python version 3.7 or lower is not supported. Disabling open telemetry tracing.", + ): + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, + ), + ) + assert client._open_telemetry_enabled is False + return client = publisher.Client( credentials=creds, publisher_options=types.PublisherOptions( @@ -301,6 +314,19 @@ def test_publish(creds): def test_publish_otel_context_propagation(creds): TOPIC = "projects/projectID/topics/topicID" + if sys.version_info.major == 3 and sys.version_info.minor < 8: + with pytest.warns( + RuntimeWarning, + match="OpenTelemetry for Python version 3.7 or lower is not supported. Disabling open telemetry tracing.", + ): + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, + ), + ) + assert client._open_telemetry_enabled is False + return client = publisher.Client( credentials=creds, publisher_options=types.PublisherOptions( @@ -322,6 +348,19 @@ def test_publish_otel_context_propagation(creds): def test_publish_otel(creds, span_exporter): TOPIC = "projects/projectID/topics/topicID" + if sys.version_info.major == 3 and sys.version_info.minor < 8: + with pytest.warns( + RuntimeWarning, + match="OpenTelemetry for Python version 3.7 or lower is not supported. Disabling open telemetry tracing.", + ): + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, + ), + ) + assert client._open_telemetry_enabled is False + return client = publisher.Client( credentials=creds, publisher_options=types.PublisherOptions( @@ -378,6 +417,19 @@ def test_publish_error_exceeding_flow_control_limits(creds): def test_publish_otel_flow_control_exception(creds, span_exporter): + if sys.version_info.major == 3 and sys.version_info.minor < 8: + with pytest.warns( + RuntimeWarning, + match="OpenTelemetry for Python version 3.7 or lower is not supported. Disabling open telemetry tracing.", + ): + client = publisher.Client( + credentials=creds, + publisher_options=types.PublisherOptions( + enable_open_telemetry_tracing=True, + ), + ) + assert client._open_telemetry_enabled is False + return client = publisher.Client( credentials=creds, publisher_options=types.PublisherOptions( diff --git a/tests/unit/pubsub_v1/subscriber/test_leaser.py b/tests/unit/pubsub_v1/subscriber/test_leaser.py index fbc7786b7..e6d6e1b75 100644 --- a/tests/unit/pubsub_v1/subscriber/test_leaser.py +++ b/tests/unit/pubsub_v1/subscriber/test_leaser.py @@ -23,6 +23,8 @@ from google.cloud.pubsub_v1.subscriber._protocol import requests from google.cloud.pubsub_v1.subscriber._protocol import streaming_pull_manager +from opentelemetry import trace + # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: import mock @@ -82,7 +84,7 @@ def test_remove_negative_bytes(caplog): assert "unexpectedly negative" in caplog.text -def create_manager(flow_control=types.FlowControl()): +def create_manager(flow_control=types.FlowControl(), open_telemetry_enabled=False): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True ) @@ -92,6 +94,7 @@ def create_manager(flow_control=types.FlowControl()): manager.ack_histogram = histogram.Histogram() manager._obtain_ack_deadline.return_value = 10 manager._subscription = "projects/projectID/subscriptions/subscriptionID" + manager.open_telemetry_enabled = open_telemetry_enabled return manager @@ -137,23 +140,60 @@ def trigger_done(timeout): leaser._stop_event.wait = trigger_done -def test_maintain_leases_ack_ids(): - manager = create_manager() +@pytest.mark.parametrize( + "otel_enabled", + [ + (False), + (True), + ], +) +def test_maintain_leases_ack_ids(otel_enabled, span_exporter): + manager = create_manager(open_telemetry_enabled=otel_enabled) leaser_ = leaser.Leaser(manager) make_sleep_mark_event_as_done(leaser_) + subscribe_span_mock = mock.Mock() leaser_.add( [requests.LeaseRequest(ack_id="my ack id", byte_size=50, ordering_key="")] ) + leaser_.add( + [ + requests.LeaseRequest( + ack_id="my ack id2", + byte_size=50, + ordering_key="", + subscribe_span=subscribe_span_mock, + ) + ] + ) + manager._send_lease_modacks.return_value = set() leaser_.maintain_leases() assert len(manager._send_lease_modacks.mock_calls) == 1 call = manager._send_lease_modacks.mock_calls[0] ack_ids = list(call.args[0]) - assert ack_ids == ["my ack id"] + assert ack_ids == ["my ack id", "my ack id2"] assert call.args[1] == 10 + if otel_enabled: + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + modack_span = spans[0] + assert ( + modack_span.name == "projects/projectID/subscriptions/subscriptionID modack" + ) + assert modack_span.kind == trace.SpanKind.CLIENT + attributes = modack_span.attributes + assert attributes["messaging.system"] == "gcp_pubsub" + assert attributes["messaging.batch.message_count"] == 2 + assert attributes["messaging.gcp_pubsub.message.ack_deadline"] == 10 + assert attributes["messaging.gcp_pubsub.is_receipt_modack"] is False + assert attributes["messaging.destination.name"] == "subscriptionID" + assert attributes["messaging.operation.name"] == "modack" + assert attributes["code.function"] == "maintain_leases" + assert len(modack_span.links) == 1 + def test_maintain_leases_expired_ack_ids_ignored(): manager = create_manager()