Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Another attempt at span monitoring #602

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion apps/opentelemetry/include/otel_span.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,18 @@
%% trace flags lowest bit is 1 but simply not propagated.
is_recording :: boolean() | undefined | '_',

instrumentation_scope :: opentelemetry:instrumentation_scope() | undefined | '_'
instrumentation_scope :: opentelemetry:instrumentation_scope() | undefined | '_',


%% this is the Erlang process the span is or has was last active in.
%% It is used for the optional process monitoring feature where a process
%% can be monitored and have all spans
%% active in that process be ended if the process exits for any reason.
pid :: pid() | undefined,

%% the span processors to run on start and end
%% mainly here so the span monitor can end the span
on_end_processors :: fun()
}).

-record(span_limits, {
Expand Down
6 changes: 3 additions & 3 deletions apps/opentelemetry/src/otel_attributes.erl
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ new(List, CountLimit, ValueLengthLimit) when is_list(List) ->
new(maps:from_list(List), CountLimit, ValueLengthLimit);
new(Map, CountLimit, ValueLengthLimit) when is_map(Map) ->
update_attributes(Map, #attributes{count_limit=CountLimit,
value_length_limit=ValueLengthLimit,
dropped=0,
map=#{}});
value_length_limit=ValueLengthLimit,
dropped=0,
map=#{}});
new(_, CountLimit, ValueLengthLimit) ->
#attributes{count_limit=CountLimit,
value_length_limit=ValueLengthLimit,
Expand Down
10 changes: 5 additions & 5 deletions apps/opentelemetry/src/otel_span_ets.erl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
handle_call/3,
handle_cast/2]).

-export([start_span/7,
-export([start_span/8,
end_span/1,
end_span/2,
get_ctx/1,
Expand Down Expand Up @@ -55,13 +55,13 @@ start_link(Opts) ->

%% @doc Start a span and insert into the active span ets table.
-spec start_span(otel_ctx:t(), opentelemetry:span_name(), otel_sampler:t(), otel_id_generator:t(),
otel_span:start_opts(), fun(), otel_tracer_server:instrumentation_scope() | undefined)
otel_span:start_opts(), fun(), fun(), otel_tracer_server:instrumentation_scope() | undefined)
-> opentelemetry:span_ctx().
start_span(Ctx, Name, Sampler, IdGeneratorModule, Opts, Processors, InstrumentationScope) ->
case otel_span_utils:start_span(Ctx, Name, Sampler, IdGeneratorModule, Opts) of
start_span(Ctx, Name, Sampler, IdGeneratorModule, Opts, OnStartProcessors, OnEndProcessors, InstrumentationScope) ->
case otel_span_utils:start_span(Ctx, Name, Sampler, IdGeneratorModule, OnEndProcessors, Opts) of
{SpanCtx=#span_ctx{is_recording=true}, Span=#span{}} ->
Span1 = Span#span{instrumentation_scope=InstrumentationScope},
Span2 = Processors(Ctx, Span1),
Span2 = OnStartProcessors(Ctx, Span1),
case storage_insert(Span2) of
true ->
SpanCtx;
Expand Down
110 changes: 110 additions & 0 deletions apps/opentelemetry/src/otel_span_monitor.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
%%%------------------------------------------------------------------------
%% Copyright 2020, OpenTelemetry Authors
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% @doc
%% Process that can optionally monitor the process a span is in and end the
%% span if the process stops for any reason with the span still unfinished.
%% @end
%%%-------------------------------------------------------------------------
-module(otel_span_monitor).

-behaviour(gen_server).

-export([start_link/0,
add/1]).

-export([init/1,
handle_call/3,
handle_cast/2,
handle_info/2]).

-include("otel_span_ets.hrl").
-include("otel_span.hrl").
-include("otel_tracer.hrl").

-define(SERVER, ?MODULE).

-record(state, {monitors :: #{pid() => reference()}}).

start_link() ->
gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).

%% @doc Monitor a process and end all spans that have been active in it
%% and are still alive the process stops.
-spec add(pid()) -> ok.
add(Pid) ->
gen_server:call(?SERVER, {monitor, Pid}).

init(_Opts) ->
{ok, #state{monitors=#{}}}.

handle_call({monitor, Pid}, _From, State=#state{monitors=Monitors})
when is_map_key(Pid, Monitors) ->
%% already being monitored
{reply, ok, State};

Check warning on line 55 in apps/opentelemetry/src/otel_span_monitor.erl

View check run for this annotation

Codecov / codecov/patch

apps/opentelemetry/src/otel_span_monitor.erl#L55

Added line #L55 was not covered by tests
handle_call({monitor, Pid}, _From, State=#state{monitors=Monitors}) ->
Ref = erlang:monitor(process, Pid),
{reply, ok, State#state{monitors=Monitors#{Pid => Ref}}}.


handle_cast(_Msg, State) ->
{noreply, State}.

Check warning on line 62 in apps/opentelemetry/src/otel_span_monitor.erl

View check run for this annotation

Codecov / codecov/patch

apps/opentelemetry/src/otel_span_monitor.erl#L62

Added line #L62 was not covered by tests

handle_info({'DOWN', Ref, process, Pid, Reason}, State=#state{monitors=Monitors}) ->
case maps:take(Pid, Monitors) of
{Ref, Monitors1} ->
end_spans(Pid, Reason),
{noreply, State#state{monitors=Monitors1}};
error ->
{noreply, State}

Check warning on line 70 in apps/opentelemetry/src/otel_span_monitor.erl

View check run for this annotation

Codecov / codecov/patch

apps/opentelemetry/src/otel_span_monitor.erl#L70

Added line #L70 was not covered by tests
end.

%%

%% ignore these functions because dialyzer doesn't like match spec use of '_'
-dialyzer({nowarn_function, end_spans/2}).
-dialyzer({nowarn_function, match_spec/2}).
-dialyzer({nowarn_function, end_span/3}).
-dialyzer({nowarn_function, select/1}).

%% TODO: need a `select_take' or `match_take' in ets
end_spans(Pid, Reason) ->
ReasonString = otel_utils:assert_to_binary(io_lib:format("~p", [Reason])),
DownAttributes = otel_span:process_attributes(#{finished_by_monitor => true}),
DownEvent = opentelemetry:event('process died', #{reason => ReasonString}),
Spans = select(Pid),
[begin
case ets:take(?SPAN_TAB, SpanId) of
[] ->
ok;

Check warning on line 90 in apps/opentelemetry/src/otel_span_monitor.erl

View check run for this annotation

Codecov / codecov/patch

apps/opentelemetry/src/otel_span_monitor.erl#L90

Added line #L90 was not covered by tests
[Span] ->
end_span(Span, DownEvent, DownAttributes)
end
end || SpanId <- Spans],
ok.

select(Pid) ->
ets:select(?SPAN_TAB, match_spec(Pid, '$1')).

match_spec(Pid, Return) ->
[{#span{span_id='$1', pid='$2', _='_'},
[{'=:=', '$2', Pid}],
[Return]}].

end_span(Span=#span{attributes=Attributes,
events=Events,
on_end_processors=Processors}, DownEvent, DownAttributes) ->
Span1 = Span#span{attributes=otel_attributes:set(DownAttributes, Attributes),
events=otel_events:add([DownEvent], Events)},
Processors(Span1).
9 changes: 8 additions & 1 deletion apps/opentelemetry/src/otel_span_sup.erl
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,21 @@ init([Config]) ->
type => worker,
modules => [otel_span_sweeper]},

Monitor = #{id => otel_span_monitor,
start => {otel_span_monitor, start_link, []},
restart => permanent,
shutdown => 5000,
type => worker,
modules => [otel_span_monitor]},

SpanHandler = #{id => otel_span_ets,
start => {otel_span_ets, start_link, [[]]},
restart => permanent,
shutdown => 5000,
type => worker,
modules => [otel_span_ets]},

ChildSpecs = [SpanHandler, Sweeper],
ChildSpecs = [SpanHandler, Monitor, Sweeper],
{ok, {SupFlags, ChildSpecs}}.

%% internal functions
17 changes: 11 additions & 6 deletions apps/opentelemetry/src/otel_span_utils.erl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
%%%-------------------------------------------------------------------------
-module(otel_span_utils).

-export([start_span/5,
-export([start_span/6,
end_span/1,
end_span/2]).

Expand All @@ -27,8 +27,8 @@
-include("otel_span.hrl").

-spec start_span(otel_ctx:t(), opentelemetry:span_name(), otel_sampler:t(), otel_id_generator:t(),
otel_span:start_opts()) -> {opentelemetry:span_ctx(), opentelemetry:span() | undefined}.
start_span(Ctx, Name, Sampler, IdGenerator, Opts) ->
fun(), otel_span:start_opts()) -> {opentelemetry:span_ctx(), opentelemetry:span() | undefined}.
start_span(Ctx, Name, Sampler, IdGenerator, OnEndProcessors, Opts) ->
SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(),
SpanAttributeValueLengthLimit= otel_span_limits:attribute_value_length_limit(),
EventCountLimit = otel_span_limits:event_count_limit(),
Expand All @@ -48,9 +48,12 @@ start_span(Ctx, Name, Sampler, IdGenerator, Opts) ->

Kind = maps:get(kind, Opts, ?SPAN_KIND_INTERNAL),
StartTime = maps:get(start_time, Opts, opentelemetry:timestamp()),
new_span(Ctx, Name, Sampler, IdGenerator, StartTime, Kind, Attributes, Events, Links).

new_span(Ctx, Name, Sampler, IdGeneratorModule, StartTime, Kind, Attributes, Events, Links) ->
case maps:get(monitor, Opts) of true -> otel_span_monitor:add(self()); false -> ok end,

new_span(Ctx, Name, Sampler, IdGenerator, StartTime, Kind, Attributes, Events, Links, OnEndProcessors).

new_span(Ctx, Name, Sampler, IdGeneratorModule, StartTime, Kind, Attributes, Events, Links, OnEndProcessors) ->
{NewSpanCtx, ParentSpanId} = new_span_ctx(Ctx, IdGeneratorModule),

TraceId = NewSpanCtx#span_ctx.trace_id,
Expand All @@ -70,7 +73,9 @@ new_span(Ctx, Name, Sampler, IdGeneratorModule, StartTime, Kind, Attributes, Eve
events=Events,
links=Links,
trace_flags=TraceFlags,
is_recording=IsRecording},
is_recording=IsRecording,
pid=self(),
on_end_processors=OnEndProcessors},

{NewSpanCtx#span_ctx{trace_flags=TraceFlags,
tracestate=TraceState,
Expand Down
4 changes: 2 additions & 2 deletions apps/opentelemetry/src/otel_tracer_default.erl
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@
%% @doc Starts an inactive Span and returns its SpanCtx.
-spec start_span(otel_ctx:t(), opentelemetry:tracer(), opentelemetry:span_name(),
otel_span:start_opts()) -> opentelemetry:span_ctx().
start_span(Ctx, {_, #tracer{on_start_processors=Processors,
start_span(Ctx, {_, #tracer{on_start_processors=OnStartProcessors,
on_end_processors=OnEndProcessors,
sampler=Sampler,
id_generator=IdGeneratorModule,
instrumentation_scope=InstrumentationScope}}, Name, Opts) ->
SpanCtx = otel_span_ets:start_span(Ctx, Name, Sampler, IdGeneratorModule, Opts, Processors, InstrumentationScope),
SpanCtx = otel_span_ets:start_span(Ctx, Name, Sampler, IdGeneratorModule, Opts, OnStartProcessors, OnEndProcessors, InstrumentationScope),
SpanCtx#span_ctx{span_sdk={otel_span_ets, OnEndProcessors}}.

-spec with_span(otel_ctx:t(), opentelemetry:tracer(), opentelemetry:span_name(),
Expand Down
40 changes: 38 additions & 2 deletions apps/opentelemetry/test/opentelemetry_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ all() ->
all_cases() ->
[with_span, macros, child_spans, disabled_sdk,
update_span_data, tracer_instrumentation_scope, tracer_previous_ctx, stop_temporary_app,
reset_after, attach_ctx, default_sampler, non_recording_ets_table,
reset_after, attach_ctx, parent_ctx, default_sampler, non_recording_ets_table,
root_span_sampling_always_on, root_span_sampling_always_off,
record_but_not_sample, record_exception_works, record_exception_with_message_works,
propagator_configuration, propagator_configuration_with_os_env, force_flush,
Expand Down Expand Up @@ -756,6 +756,40 @@ attach_ctx(Config) ->

ok.

parent_ctx(Config) ->
Tid = ?config(tid, Config),

Tracer = opentelemetry:get_tracer(),

SpanCtx1 = otel_tracer:start_span(Tracer, <<"span-1">>, #{}),
?set_current_span(SpanCtx1),
?assertMatch(SpanCtx1, ?current_span_ctx),

ParentCtx = otel_ctx:get_current(),

Pid = self(),

erlang:spawn(fun() ->
SpanCtx2 = otel_tracer:start_span(ParentCtx, Tracer, <<"span-2">>, #{}),
otel_span:end_span(SpanCtx2),

[Span2] = assert_all_exported(Tid, [SpanCtx2]),

Pid ! {span2, Span2}
end),

otel_span:end_span(SpanCtx1),

[Span1] = assert_all_exported(Tid, [SpanCtx1]),

receive
{span2, Span2} ->
?assertEqual(Span1#span.span_id, Span2#span.parent_span_id)
after
1000 ->
ct:fail(timeout)
end.

reset_after(Config) ->
Tid = ?config(tid, Config),

Expand Down Expand Up @@ -1101,7 +1135,9 @@ no_exporter(_Config) ->
%%

assert_all_exported(Tid, SpanCtxs) ->
[assert_exported(Tid, SpanCtx) || SpanCtx <- SpanCtxs].
lists:flatmap(fun(SpanCtx) ->
assert_exported(Tid, SpanCtx)
end, SpanCtxs).

assert_exported(Tid, #span_ctx{trace_id=TraceId,
span_id=SpanId}) ->
Expand Down
Loading
Loading