Source code for opentelemetry.instrumentation.urllib

# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This library allows tracing HTTP requests made by the
`urllib <https://docs.python.org/3/library/urllib>`_ library.

Usage
-----
.. code-block:: python

    from urllib import request
    from opentelemetry.instrumentation.urllib import URLLibInstrumentor

    # You can optionally pass a custom TracerProvider to
    # URLLibInstrumentor().instrument()

    URLLibInstrumentor().instrument()
    req = request.Request('https://postman-echo.com/post', method="POST")
    r = request.urlopen(req)

Configuration
-------------

Request/Response hooks
**********************

The urllib instrumentation supports extending tracing behavior with the help of
request and response hooks. These are functions that are called back by the instrumentation
right after a Span is created for a request and right before the span is finished processing a response respectively.
The hooks can be configured as follows:

.. code:: python

    # `request_obj` is an instance of urllib.request.Request
    def request_hook(span, request_obj):
        pass

    # `request_obj` is an instance of urllib.request.Request
    # `response` is an instance of http.client.HTTPResponse
    def response_hook(span, request_obj, response)
        pass

    URLLibInstrumentor.instrument(
        request_hook=request_hook, response_hook=response_hook)
    )

Exclude lists
*************

To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB_EXCLUDED_URLS``
(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude.

For example,

::

    export OTEL_PYTHON_URLLIB_EXCLUDED_URLS="client/.*/info,healthcheck"

will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.

API
---
"""

import functools
import types
import typing
from http import client
from timeit import default_timer
from typing import Collection, Dict
from urllib.request import (  # pylint: disable=no-name-in-module,import-error
    OpenerDirector,
    Request,
)

from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.instrumentation.urllib.package import _instruments
from opentelemetry.instrumentation.urllib.version import __version__
from opentelemetry.instrumentation.utils import (
    http_status_to_status_code,
    is_http_instrumentation_enabled,
    suppress_http_instrumentation,
)
from opentelemetry.metrics import Histogram, get_meter
from opentelemetry.propagate import inject
from opentelemetry.semconv.metrics import MetricInstruments
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import Span, SpanKind, get_tracer
from opentelemetry.trace.status import Status
from opentelemetry.util.http import (
    ExcludeList,
    get_excluded_urls,
    parse_excluded_urls,
    remove_url_credentials,
)

_excluded_urls_from_env = get_excluded_urls("URLLIB")

_RequestHookT = typing.Optional[typing.Callable[[Span, Request], None]]
_ResponseHookT = typing.Optional[
    typing.Callable[[Span, Request, client.HTTPResponse], None]
]


[docs]class URLLibInstrumentor(BaseInstrumentor): """An instrumentor for urllib See `BaseInstrumentor` """
[docs] def instrumentation_dependencies(self) -> Collection[str]: return _instruments
def _instrument(self, **kwargs): """Instruments urllib module Args: **kwargs: Optional arguments ``tracer_provider``: a TracerProvider, defaults to global ``request_hook``: An optional callback invoked that is invoked right after a span is created. ``response_hook``: An optional callback which is invoked right before the span is finished processing a response ``excluded_urls``: A string containing a comma-delimited list of regexes used to exclude URLs from tracking """ tracer_provider = kwargs.get("tracer_provider") tracer = get_tracer( __name__, __version__, tracer_provider, schema_url="https://opentelemetry.io/schemas/1.11.0", ) excluded_urls = kwargs.get("excluded_urls") meter_provider = kwargs.get("meter_provider") meter = get_meter( __name__, __version__, meter_provider, schema_url="https://opentelemetry.io/schemas/1.11.0", ) histograms = _create_client_histograms(meter) _instrument( tracer, histograms, request_hook=kwargs.get("request_hook"), response_hook=kwargs.get("response_hook"), excluded_urls=_excluded_urls_from_env if excluded_urls is None else parse_excluded_urls(excluded_urls), ) def _uninstrument(self, **kwargs): _uninstrument()
[docs] def uninstrument_opener( self, opener: OpenerDirector ): # pylint: disable=no-self-use """uninstrument_opener a specific instance of urllib.request.OpenerDirector""" _uninstrument_from(opener, restore_as_bound_func=True)
def _instrument( tracer, histograms: Dict[str, Histogram], request_hook: _RequestHookT = None, response_hook: _ResponseHookT = None, excluded_urls: ExcludeList = None, ): """Enables tracing of all requests calls that go through :code:`urllib.Client._make_request`""" opener_open = OpenerDirector.open @functools.wraps(opener_open) def instrumented_open(opener, fullurl, data=None, timeout=None): if isinstance(fullurl, str): request_ = Request(fullurl, data) else: request_ = fullurl def get_or_create_headers(): return getattr(request_, "headers", {}) def call_wrapped(): return opener_open(opener, request_, data=data, timeout=timeout) return _instrumented_open_call( opener, request_, call_wrapped, get_or_create_headers ) def _instrumented_open_call( _, request, call_wrapped, get_or_create_headers ): # pylint: disable=too-many-locals if not is_http_instrumentation_enabled(): return call_wrapped() url = request.full_url if excluded_urls and excluded_urls.url_disabled(url): return call_wrapped() method = request.get_method().upper() span_name = method.strip() url = remove_url_credentials(url) labels = { SpanAttributes.HTTP_METHOD: method, SpanAttributes.HTTP_URL: url, } with tracer.start_as_current_span( span_name, kind=SpanKind.CLIENT, attributes=labels ) as span: exception = None if callable(request_hook): request_hook(span, request) headers = get_or_create_headers() inject(headers) with suppress_http_instrumentation(): start_time = default_timer() try: result = call_wrapped() # *** PROCEED except Exception as exc: # pylint: disable=W0703 exception = exc result = getattr(exc, "file", None) finally: elapsed_time = round((default_timer() - start_time) * 1000) if result is not None: code_ = result.getcode() labels[SpanAttributes.HTTP_STATUS_CODE] = str(code_) if span.is_recording() and code_ is not None: span.set_attribute(SpanAttributes.HTTP_STATUS_CODE, code_) span.set_status(Status(http_status_to_status_code(code_))) ver_ = str(getattr(result, "version", "")) if ver_: labels[ SpanAttributes.HTTP_FLAVOR ] = f"{ver_[:1]}.{ver_[:-1]}" _record_histograms( histograms, labels, request, result, elapsed_time ) if callable(response_hook): response_hook(span, request, result) if exception is not None: raise exception.with_traceback(exception.__traceback__) return result instrumented_open.opentelemetry_instrumentation_urllib_applied = True OpenerDirector.open = instrumented_open def _uninstrument(): """Disables instrumentation of :code:`urllib` through this module. Note that this only works if no other module also patches urllib.""" _uninstrument_from(OpenerDirector) def _uninstrument_from(instr_root, restore_as_bound_func=False): instr_func_name = "open" instr_func = getattr(instr_root, instr_func_name) if not getattr( instr_func, "opentelemetry_instrumentation_urllib_applied", False, ): return original = instr_func.__wrapped__ # pylint:disable=no-member if restore_as_bound_func: original = types.MethodType(original, instr_root) setattr(instr_root, instr_func_name, original) def _create_client_histograms(meter) -> Dict[str, Histogram]: histograms = { MetricInstruments.HTTP_CLIENT_DURATION: meter.create_histogram( name=MetricInstruments.HTTP_CLIENT_DURATION, unit="ms", description="Measures the duration of outbound HTTP requests.", ), MetricInstruments.HTTP_CLIENT_REQUEST_SIZE: meter.create_histogram( name=MetricInstruments.HTTP_CLIENT_REQUEST_SIZE, unit="By", description="Measures the size of HTTP request messages.", ), MetricInstruments.HTTP_CLIENT_RESPONSE_SIZE: meter.create_histogram( name=MetricInstruments.HTTP_CLIENT_RESPONSE_SIZE, unit="By", description="Measures the size of HTTP response messages.", ), } return histograms def _record_histograms( histograms, metric_attributes, request, response, elapsed_time ): histograms[MetricInstruments.HTTP_CLIENT_DURATION].record( elapsed_time, attributes=metric_attributes ) data = getattr(request, "data", None) request_size = 0 if data is None else len(data) histograms[MetricInstruments.HTTP_CLIENT_REQUEST_SIZE].record( request_size, attributes=metric_attributes ) if response is not None: response_size = int(response.headers.get("Content-Length", 0)) histograms[MetricInstruments.HTTP_CLIENT_RESPONSE_SIZE].record( response_size, attributes=metric_attributes )