diff --git a/src/lumigo_opentelemetry/libs/sampling.py b/src/lumigo_opentelemetry/libs/sampling.py index cf6aaee9..25f37d18 100644 --- a/src/lumigo_opentelemetry/libs/sampling.py +++ b/src/lumigo_opentelemetry/libs/sampling.py @@ -68,15 +68,20 @@ def get_description(self) -> str: def _extract_url(attributes: Attributes) -> Optional[str]: if attributes is not None: # if the url is already in the attributes, return it - if "http.url" in attributes: + if "url.full" in attributes: + return str(attributes["url.full"]) + elif "http.url" in attributes: return str(attributes["http.url"]) + # generate as much of the url as possible from the attributes # if we have the host and port, use them. it's even better if # we have the scheme as well host = "" if "http.host" in attributes: - if "http.scheme" in attributes: + if "url.scheme" in attributes: + host = f"{attributes['url.scheme']}://" + elif "http.scheme" in attributes: host = f"{attributes['http.scheme']}://" if ":" in attributes["http.host"]: @@ -90,7 +95,13 @@ def _extract_url(attributes: Attributes) -> Optional[str]: # if we have the target, use it. otherwise, fallback to route # or path - if "http.target" in attributes: + if "url.path" in attributes: + path += attributes["url.path"] + if "url.query" in attributes: + path += f"?{attributes['url.query']}" + if "url.fragment" in attributes: + path += f"#{attributes['url.fragment']}" + elif "http.target" in attributes: path += attributes["http.target"] elif "http.route" in attributes: path += attributes["http.route"] @@ -126,7 +137,6 @@ def _should_skip_span_on_route_match(url: Optional[str]) -> bool: """ if not url: return False - url_without_query_params = url.split("?")[0] filter_regex_string = os.environ.get(AUTO_FILTER_HTTP_ENDPOINTS_REGEX, "") if not filter_regex_string: return False @@ -138,4 +148,4 @@ def _should_skip_span_on_route_match(url: Optional[str]) -> bool: exc_info=True, ) return False - return filter_regex.search(url_without_query_params) is not None + return filter_regex.search(url) is not None diff --git a/src/test/unit/libs/test_sampling.py b/src/test/unit/libs/test_sampling.py index b1abbce1..04486cc0 100644 --- a/src/test/unit/libs/test_sampling.py +++ b/src/test/unit/libs/test_sampling.py @@ -1,86 +1,124 @@ +import pytest + from lumigo_opentelemetry.libs.sampling import ( _extract_url, _should_skip_span_on_route_match, ) -def test_extract_url(): - assert _extract_url({"http.url": "http://test.com"}) == "http://test.com" - assert ( - _extract_url({"http.url": "http://test.com/test", "http.route": "/test"}) - == "http://test.com/test" - ) - assert ( - _extract_url({"http.url": "http://test.com:8080/test", "http.route": "/test"}) - == "http://test.com:8080/test" - ) - assert ( - _extract_url({"http.scheme": "http", "http.host": "test.com"}) - == "http://test.com/" - ) - assert ( - _extract_url( - {"http.scheme": "http", "http.host": "test.com", "http.route": "/test"} - ) - == "http://test.com/test" - ) - assert ( - _extract_url( - {"http.scheme": "http", "http.host": "test.com:8080", "http.route": "/test"} - ) - == "http://test.com:8080/test" - ) - assert ( - _extract_url( +@pytest.mark.parametrize( + "attributes, expected_url", + [ + ({"url.full": "http://test.com"}, "http://test.com"), + ({"http.url": "http://test.com"}, "http://test.com"), + ( + {"url.full": "http://test.com/test", "http.route": "/test"}, + "http://test.com/test", + ), + ( + {"http.url": "http://test.com/test", "http.route": "/test"}, + "http://test.com/test", + ), + ( + {"url.full": "http://test.com:8080/test", "http.route": "/test"}, + "http://test.com:8080/test", + ), + ( + {"http.url": "http://test.com:8080/test", "http.route": "/test"}, + "http://test.com:8080/test", + ), + ({"url.scheme": "http", "http.host": "test.com"}, "http://test.com/"), + ({"http.scheme": "http", "http.host": "test.com"}, "http://test.com/"), + ( + {"url.scheme": "http", "http.host": "test.com", "http.route": "/test"}, + "http://test.com/test", + ), + ( + {"http.scheme": "http", "http.host": "test.com", "http.route": "/test"}, + "http://test.com/test", + ), + ( + { + "http.scheme": "http", + "http.host": "test.com:8080", + "http.route": "/test", + }, + "http://test.com:8080/test", + ), + ( { "http.scheme": "http", "http.host": "test.com:8080", "net.host.port": 8080, "http.route": "/test", - } - ) - == "http://test.com:8080/test" - ) - assert ( - _extract_url( + }, + "http://test.com:8080/test", + ), + ( { "http.scheme": "http", "http.host": "test.com", "net.host.port": 8080, "http.route": "/test", - } - ) - == "http://test.com:8080/test" - ) - assert _extract_url({"http.host": "test.com"}) == "test.com/" - assert ( - _extract_url({"http.host": "test.com", "http.route": "/test"}) - == "test.com/test" - ) - assert ( - _extract_url( - {"http.host": "test.com", "net.host.port": 8080, "http.route": "/test"} - ) - == "test.com:8080/test" - ) - assert _extract_url({"http.target": "/test"}) == "/test" - assert _extract_url({"http.target": "/test", "http.route": "/test"}) == "/test" - assert _extract_url({"http.target": "/test", "http.path": "/test"}) == "/test" - assert _extract_url({"http.route": "/test"}) == "/test" - assert _extract_url({"http.route": "/test", "http.path": "/test"}) == "/test" - assert _extract_url({"http.path": "/test"}) == "/test" - assert _extract_url({}) is None + }, + "http://test.com:8080/test", + ), + ({"http.host": "test.com"}, "test.com/"), + ({"http.host": "test.com", "http.route": "/test"}, "test.com/test"), + ( + {"http.host": "test.com", "net.host.port": 8080, "http.route": "/test"}, + "test.com:8080/test", + ), + ({"http.target": "/test"}, "/test"), + ({"http.target": "/test?a=b#hello"}, "/test?a=b#hello"), + ( + {"url.path": "/test", "url.query": "a=b", "url.fragment": "hello"}, + "/test?a=b#hello", + ), + ( + { + "url.path": "/test", + "url.query": "a=b", + "url.fragment": "hello", + "http.target": "/test?a=b#hello", + "http.route": "/test", + }, + "/test?a=b#hello", + ), + ({"http.target": "/test?a=b#hello", "http.route": "/test"}, "/test?a=b#hello"), + ({"http.target": "/test?a=b#hello", "http.path": "/test"}, "/test?a=b#hello"), + ({"http.route": "/test"}, "/test"), + ({"http.route": "/test", "http.path": "/test"}, "/test"), + ({"http.path": "/test"}, "/test"), + ({}, None), + ], +) +def test_extract_url(attributes, expected_url): + assert _extract_url(attributes) == expected_url def test_should_skip_span_on_route_match(monkeypatch): monkeypatch.setenv("LUMIGO_AUTO_FILTER_HTTP_ENDPOINTS_REGEX", ".*(localhost).*/$") assert _should_skip_span_on_route_match("http://localhost:5000/") is True + assert _should_skip_span_on_route_match("http://localhost:5000/?a=b") is False assert _should_skip_span_on_route_match("http://localhost:5000/unmatched") is False assert _should_skip_span_on_route_match("http://example.com:5000/") is False + + monkeypatch.setenv("LUMIGO_AUTO_FILTER_HTTP_ENDPOINTS_REGEX", ".*(localhost).*a=b") + assert _should_skip_span_on_route_match("http://localhost:5000/") is False + assert _should_skip_span_on_route_match("http://localhost:5000/?a=b") is True + assert ( + _should_skip_span_on_route_match("http://localhost:5000/unmatched?c=d&a=b") + is True + ) + assert _should_skip_span_on_route_match("http://example.com:5000/") is False + assert _should_skip_span_on_route_match("http://example.com:5000/?a=b&c=d") is False + monkeypatch.setenv( "LUMIGO_AUTO_FILTER_HTTP_ENDPOINTS_REGEX", ".*(localhost).*/matched" ) assert _should_skip_span_on_route_match("http://localhost:5000/matched") is True + assert _should_skip_span_on_route_match("http://localhost:5000/matched?a=b") is True assert _should_skip_span_on_route_match("http://localhost:5000/unmatched") is False assert ( _should_skip_span_on_route_match("http://example.com:5000/matched-incorrectly")