-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_lambda_cdf.py
executable file
·64 lines (52 loc) · 2.73 KB
/
plot_lambda_cdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Sample DataFrame with event names
data = pd.DataFrame({
# 'event_names': ['func_a', 'func_b', 'func_a', 'func_c', 'func_a', 'func_b', 'func_d']
'event_names': ['func_a', 'func_b', 'func_a', 'func_c', 'func_b', 'func_a', 'func-b', 'func_b', 'func_b', 'func_d']
})
data = pd.DataFrame()
data['event_names'] = pd.read_csv('~/Downloads/AzureFunctionsInvocationTraceForTwoWeeksJan2021/AzureFunctionsInvocationTraceForTwoWeeksJan2021.txt')['func']
# https://github.com/Azure/AzurePublicDataset/blob/master/AzureFunctionsInvocationTrace2021.md
# Count the frequency of each unique event
event_counts = data['event_names'].value_counts()
# Sort the frequencies in descending order
sorted_event_counts = event_counts.sort_values(ascending=False)
# Create a mapping of function names to numbers starting with 1
func_to_number = {func: number for number, func in enumerate(sorted_event_counts.index, start=1)}
# Replace function names with numbers in the original DataFrame
data['event_numbers'] = data['event_names'].map(func_to_number)
# Create a mapping of function names to numbers starting with 1
number_to_percent = lambda number: number / len(sorted_event_counts) * 100
# Replace function names with numbers in the original DataFrame
data['event_numbers'] = data['event_numbers'].map(number_to_percent)
# Re-count the frequency of each unique event number
sorted_event_counts = data['event_numbers'].value_counts(sort=True)
# Calculate the cumulative percentage
cumulative_percentage = np.cumsum(sorted_event_counts) / sorted_event_counts.sum() * 100
# print useful datapoints
functions = 10 # in percent
difference = abs(cumulative_percentage.index - functions)
index = difference.argmin()
function_percent = cumulative_percentage.index[index]
requests_percent = cumulative_percentage.values[index]
print(f"{function_percent:.2f}% of functions account for {requests_percent:.2f}% of requests")
perc = 0.7
per_hour = event_counts.quantile(perc) / 24 / 14 # 14 days, 24 hours
print(f"{perc*100}% of functions are called called {per_hour} times per hour or less on avarage")
# Plotting the Pareto chart
fig, ax = plt.subplots()
ax.bar(sorted_event_counts.index, sorted_event_counts.values, color='blue', alpha=0.7, label='Event Count')
ax.set_ylabel('Function Call Count', color='blue')
# Creating a second y-axis to plot the cumulative percentage
ax2 = ax.twinx()
ax2.plot(cumulative_percentage.index, cumulative_percentage.values, color='red', marker='x', label='Cumulative %')
ax2.set_ylabel('Cumulative %', color='red')
ax2.set_ylim(0, 110)
# Showing the plot
plt.title('Pareto Chart of Function Calls')
plt.grid()
ax.set_xlabel('Functions (normed %, sorted by call count)')
fig.tight_layout()
plt.show()