From ed9d9200ecb1597f55f72d9076c166f1627942e3 Mon Sep 17 00:00:00 2001 From: Michalius Date: Mon, 8 Apr 2024 16:00:57 +0200 Subject: [PATCH 1/4] Completed module flow_age_stats with a script for graph making --- .gitignore | 2 + Makefile.am | 1 + configure.ac | 1 + flow_age_stats/Makefile.am | 4 + flow_age_stats/README.md | 21 ++ flow_age_stats/example.png | Bin 0 -> 7778 bytes flow_age_stats/flow_age_stats.c | 369 ++++++++++++++++++++++++++++++++ flow_age_stats/plot.gp | 54 +++++ 8 files changed, 452 insertions(+) create mode 100644 flow_age_stats/Makefile.am create mode 100644 flow_age_stats/README.md create mode 100644 flow_age_stats/example.png create mode 100644 flow_age_stats/flow_age_stats.c create mode 100644 flow_age_stats/plot.gp diff --git a/.gitignore b/.gitignore index be26e0e3..adf18a47 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,5 @@ unirecfilter/lib/lex.yy.c unirecfilter/lib/liburfilter.pc unirecfilter/lib/parser.tab.c unirecfilter/lib/parser.tab.h +.gitignore +.vscode/ diff --git a/Makefile.am b/Makefile.am index 6fe09bab..2b95a9fd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -8,6 +8,7 @@ biflow_aggregator \ debug_sender \ device_classifier \ email_reporter \ +flow_age_stats \ flowcounter \ flow_meter \ ipv6stats \ diff --git a/configure.ac b/configure.ac index a642ebab..e22d03d2 100644 --- a/configure.ac +++ b/configure.ac @@ -227,6 +227,7 @@ AC_CONFIG_FILES([Makefile aggregator/Makefile anonymizer/Makefile backscatter/Makefile + flow_age_stats/Makefile biflow_aggregator/Makefile bloom_history/Makefile debug_sender/Makefile diff --git a/flow_age_stats/Makefile.am b/flow_age_stats/Makefile.am new file mode 100644 index 00000000..aae381ab --- /dev/null +++ b/flow_age_stats/Makefile.am @@ -0,0 +1,4 @@ +bin_PROGRAMS=flow_age_stats +flow_age_stats_SOURCES=flow_age_stats.c fields.c fields.h +flow_age_stats_LDADD=-ltrap -lunirec +include ../aminclude.am \ No newline at end of file diff --git a/flow_age_stats/README.md b/flow_age_stats/README.md new file mode 100644 index 00000000..29173752 --- /dev/null +++ b/flow_age_stats/README.md @@ -0,0 +1,21 @@ +--- +# Flow Age Stats module - README + +## Description +This module is used for making statistics about the age of incoming flow data. The statistics produced are minimal, maximal and average values for both first time the flow is encountered and the last time the flow is encountered. + +Additionally the module can output two text files (time_first.txt, time_last.txt) that each have a table of three columns. First is the max age of the flow. Second is the percentage of flows that are in that age group. Third is the flow count. + +## Interfaces +- Input: One UniRec interface + - Template MUST contain fields TIME_FIRST and TIME_LAST +- Output: None + +## Parameters +- '-t' If specified the module creates or opens a file where the tables will be outputed. (Caution - the module will overwrite files labeled time_first.txt, time_last.txt) + +## Graphs +This module also comes with a script that makes use of GNUplot to make graphs from the data that is outputed into files. You can see how the graph looks like below. + +![ExampleGraph](example.png) + diff --git a/flow_age_stats/example.png b/flow_age_stats/example.png new file mode 100644 index 0000000000000000000000000000000000000000..540890e1a7f2f5159a920f6997a76fbc59b86cc6 GIT binary patch literal 7778 zcmbVxXIN8R*JctD2mvV)xq4!w~Q^$W`z;N;pYaD#`NC=OkWUUr-eY zq$j?Y456t=h4cQHy(9yOzZ38g#dv=;h~*$bQY8%XU>=XDWNwCd6tz zYWD-U`hAivDs!B>lkgU5tS4FcQtGMr`%4J3%e8VFM_2&UV3_}WMistYaX2879^L)y ziULG$l<}G9BZthS1<|YLWW)+H9Y%f#yj!sUb48(3zS#WuVX@GS70xMyEProgj;*LU z8r*3EhT8tVk#ShYkK1!5Gd$z92~B`WVHPLx2_|K0i?Vp>wb~;ChX?&cO zoOk@`2ITxHAtV<8{rDVSvNZkhlc>q9ReL`PBT9P^0@_N)Z^4&lab5N3!JnPNjP?u? zX4oiW{bY=7uK?P3n<}DT5QpCLLZ0h8$Yt3N5L1YFyXH2ZWJpi%9P;_g&k6VMpa;mj zGWy}5sq$lyjgNdUu>X97YkbpY-O+LgeU+`n|EyTs6J?CPUEx)0ci#r6`?dhPUw&nL#hfgrOz58sOGY>B!HuX_C<)l31 z!m5IvXEy=wuc-<>k-L64q2zL!5Xlm5J74dLK(G2Uo%0ccM}#pfyyW(6X$W@gV!e@5 zlRi*B>yPLiIt_6S>l82OSr5GPKFiBb1iOMW9iNXMWn6f8y8g70p~SbLraVCjNwR=q z=D9TBUc5tM)#C({!Sd@dUk1bk^ovM~7B2x}_4erpeq4!^im!|3%e;i4OHJS4%w66G z`x3LSUD+rS_;RKSL)hZB2|lU|GqDr9YHojw^i}k*%&}kF%nJFe)9}3v7~cPTBkrZD zeulYA0w&L==u>6>CtWGJJxYYzRTNraJjgQWW#ZK*@D zZ-cbR<#)zp!lC7+If)bY6iXDc>*5AoLpaB5*_ii4Opj zw9~aGlbHEiZ)mEO2-G9TgtZoJ_v%rucGLJt3CqgJ?coh~m5nae^bqOF$TQND z?p7*JP-!fx#YLIUUo>4XO1zvs;7#>ihlxAQ{`P%0`BK-LKmD$e+mmCX~c@rGY-&LX5I=$bTnHZuB zV+lDfpJTQgNizN7Y-YBZ!XvE>j0bm8rwn7*PzHK4Sz zcy3MG@Qp}^ENR?#?rHjl2& z?{&NP6#ZgA-@Gwb{H;<*5{Hik3WW|{RCXr%Ai$XUIqo#U_7*0L7-rZ#PL{0?rWrI7 zv(DfI<-j2B)4}i;gd{Ph6eKZ@gTes2htjbkz!{wM?KO{7D2 z)P|y-RvHji!?zZHdq@|qd^xkKea2|QKkB_{>-=K}8JfDmF!3lC*)xmbA3gUJ^|yQ$ z5(E^jzK6Lhjb23+N4;J=VcA{}C^z@Y3`!nyoFEk`Jn0TrxEEO~mt&k}=R)#{63uK)l#by|$E z$$eoPjLdLm;^dU`)k#LZQSbY+P(&p|Lo&suEtj94ieFUE&M?kW7$& z{3nC78e}HsFo;OvU`06pWKmL|8d4Ya7AAU`auNHg{>P>Nx5Fsnh{npLe+&_;=Jm0f{OI4|U>6JEvyNfw4~?Rfa4gHe;o=-(I`cj4Q%96L&a`Wdlw=Y=(e79;yUF(VLusk|Y`= z|I;VR$Pe>2enTNabz?@(HQ!V5i7(Z%;uw1-@}J6uX$0Ayl7+&LbsXR-L<%w=os72( z=1+-6s~A_k8+{jYAkd)^WOe!|>@J(6(7k-A{Vc|OUvkXIs?Mn40RjZuPc>_tSJbPH z$1eVHp1f*9t}=#%&0`UDbsqa^^9SUkh_h+dD^|}~F3Z4+1iClZmmI&YS0-GSG>exP zqQMJlzk8q=$K^Rdd z9E5XoTaNL-%iQ8{Ju&QDGU~J}UV;Y>>b^Ga`40A2hR4}}cS8y8C&zv5R6K};KKA2& ztl~lR4}V%u?tur&W|v5>M zdn}?{Zu#mcfs~F&{96-5G|3s z!VNwLwi>U~z4!ONYzZW4+YE<*$PxKa3r5l0=Z@R#QoGjG-24%xw37HF8!%LLNQ;V8 zBn<~n@0s#KJ(w&svF@+Q37K*`^~#PvxcSr13b&OOg0M4;lDqG{tiE+(8ffJmhw>$D#xhGGXpf zhs$;t3*x1x8m;be!dSE#PY7jx3DWjhmf~w~zfU?st1}*($BGse6H0YmlgX~VbKlO= z9DeF@)eBym*&PQBhUAK)ZDGxQM71KE#%qq$-O03fniFS3&I7!lrLP<5uiEK{2({Du zyPV?iiToUdGDPa0laIVu6+8^QjpArOA$NQ-UxR^`UH-Z))1!()V9B{2LeG3OtYMA$ zFPs)9x1cFm!EYEf19&D)k>53dSU)0wKsUO@4U+2V*-Qld!S5x zxT=K&DL3WpBpk-_ZUx8ZW8sBjp@vPDJ)1P)77I5(q>=r>{tTx@GS_hJ5GcTNvRiO;)&4L8(`KtaAs=z7RQYhg9)#?L-0 zZH9DnOsALqOo{`*s2O74*D0)@wyExV>PJhPXM(9c&Hd&_2P{y<>8Tc-FIJFX1^SA7 zR_EkW^8KlXqC#nUIbYf>T1^aYM@?-%TFlN9Kq?qlbqp~iUspZCB$maJQqH>2uj)D` zAAjXI0L+AXMiL%KA9fyHYbDQPzGap)fgM_=9@kgS$}?nz8!Z1&4CK{ce=0OWbGcCk z>%s*s*P#T9ZUv~sB0>alfBNM*)bZQJgl^`}H+df$!ia3CN5C_-P_A+w=vLq0u!&}K za4cA170WWL3JxLGkm(xkUT@VSwHxz<%mu(KJ~PDuYk0Yz3=x%<+pkvp4Y+$zp`ynz z3%Muauk`3WpRupE&9^)B>eJtYy3$r;S=<*T%T@`q0D7@3fm2Vf4OJ*iHW?p+7j@j{ z7@TVM)S^|}k2eWlIXk8rJ!1h)M7Y0KR)`e;(LW^>E7y0s)VJyeZMPH`7+>d8ba!cH z&Yt}vKH3Cx6p?4i{fc~S>ap{!%FyNszOP&_qCvkeMGk!Gh3zzyzj$p&mT>TE%45?q z+tYF`5J|=V02`i06CNN3jW(R=zi}mnQH>~JN>TapN{K;}5Up^uyBQ!Cjj!q?^M4X9 z{`P)o+@w3P$-aKbobciAC}WTOkbJGTILI2mswgR?-f-|0z2M5A6niY z@zsvoMbRgH>FV7Z+Hcer7_?YK+in2@z3y-`CF6rpJJg%UV}f>c8rU;4@Vr-EPNc~72^x!^`*@!gG zb3S|@F13Hs`_skHF16h|?db23VCfUGYGWVxA}`bm%I);rbYM%hs|pU#2;J8v<=QO; zn1Y?xz_tC$YOb~42@FR4>m7!OEBSY(dTwjdWE*Ek7l%i;X3G@*6p06}0h^y%vp7Rw zVp*7!OgH1!9PmBt@XWwER6E$>$OFk_Damr~WVF7FRioTvSMlF&H!bqgG?FITvJkZ7 z29qjaAUh}b+jyD6tWj76&exANci~?f5;K}lhfFh+mp|iXhBAJ#A-u5mlfUj&RPpTZ zb{^1=^9b?}yvbEtD@VOLF!kZbc*-Vlu|QL0D`&sH%?){B<4c!Kq zp|hG*b3~B*(%@iYQ$aaF)9@FN^dPYnvRcK(B2=Q$`fPuPxO()S&7ouebMT^Q(cq_J zR8R!TvAPvuy8iqr%;@;qapFH*;X*223?n-r0Y?z58cU7-M=it5RcX z$K8qk;hrrB8iV$fE@TU6CI#8gb-%wtqb3cwpc3O9^Y%4X7?eTy6sh6lRhQ{DWxij| zTXD4U^EE(zRrj~W(t0jv3S2BpIQBAy9h|df!tMonYovk}Wper2K2QXi5Y02;&@bsu z^}U;FA%pCZw6E{RxV4?>yZ!D-od{d1@KIKsXMaNQG{_zJM9)?}2Lnoso@_}V$M@07 zPbDBA^^uIw#NG~+DwLb0u?g_9EmpPD(oUKOX39KN1LP&@6Bk8viRd5J*y&=43fdN* zUIGEbm;7iEbC3idZ;cNlnip&nTdiARw`cP#ZKQdI0d&l|MB&+h?Qf-b7ygJ_V0(J| zid{kD0wr+twC?#2KTE+NF2n!f#sBT||IN>)kqhZDzzm+9i$b^s6#8H=q3E|Tm&+9Q z{{)Lc3gl9B|Hka-bcx5J|ApC+phdWQSS;maECMXrLjMfq6o{$J#lI&BgYCwX8JZuU zMis_jGQhY>{(wa8h?-0Emg|0XO01(%T?Xj>i=r2O!r2m?=%!V$gX&i`1CJC)66Pn! zW-??&8oZxi!PX}uk7wg6K{cz@vbwDtg@ej_{3b)4Pq2cat&j)xI+@mBaYD%_B7(J# z7UYs?mlerc*tWNv#7Nrkea=dQAU3vF^<3|m(};-3q6Ys7{kpiC%m zhJN*vwM6-&LAJDbO3>uiOt0d)6qAJr&5RR;&N{ofsfJ3An}OqqQX$MJZdbOjcG!p{ z5Q?5hNIvI|SOb-3wqKJ3d3MLsDCkF1N-=OXVDJH)>`1tW|KbXz?%s%O+47&@Mp-mg z9w3?|t&W0umU$ClW+UrhRmBIpSL_H~vM0LR(Sf6xBeW7^wFrDehHLHIE` zC+IXf;=huOU6*+EFG^WP#H8pyr~bpdbguqibY+y@2<~g5L^=K`0z7H#Y-@-9jhKP$ zv6Gg<|3&xj+>Zhe{7d-j^2Y*T_h=*GKU>y;W^w-;2lUQE1n$Av)rXTYq&^9;LRm`M zJ{>0;m+$#Ej*JStx(c+3{}EzuQ*-M>#6^%aR$N8EBV}s=nW}IklT|97+I-WlR4|@8 ze|&$oM4C}YozSS=B?~j0CyX}dI%FT*?S24PMs8u%>ulfIXUCNRX}1?Lex&XbRgRJc z>dLw8P8R;kfaZfu|LK(pXrigTPH7cSvb6TTZ!ri?MEkPEDA5+M~w?;p*UW~}|YO0(z?l-d*0K5z7c z{2_K?yskC$77(Q)jM%-pv-%K7ie(H_?DpszaEmXPtX4w~MWI^2#zij_RTfS%ljdQ* z^q`~RWA8=EiM1PL;Snly2lPigXg{}Kl}XcdBFJGtccUkLHRVzjJu`L96#4p{Z_$v-;PU`CMYk z%f0azAGJqT6@OW(?8=thYP@B!jT>5E3<9~Vxi1@1_?-kOKA?OMR31>Ci5&&Vx!V9a zX%t8H7{zrJNF;YNjM+@=zqnm*5fb&L-kexjzgum0kW)E^i(P3AZ<&tR7MZc_aXTg` zvvX;l&foe~NQJq2RlVkhXqHD1clYlb9j}OU;F+a1qd1S!S25VsRKwb5JCaOj-<`=s6TOrYKMQNQ5&=WJ~z;P-=D z5_;%uZ8!zs_;ln#uTGvEnj(df_7i5J`vcrp_S<0uTnCf1OZ#Vi`RJm6C zInD&dV!&5IiojO4++*Uh$@rO%oFB|}406w46)g&zcuJ}CgvMJ%;M{%~L_}1LIvFaN0QFY9R2*(`$=4r=G z`Mi1yWoTtioct{44y6eE6H%E{5usq|tG>t?dN$g(_m|U?QT_ zv+BoV!@l!BvT1sNSWpT(ilZG>Z`;DI(RusK!Q5-lengDf%x)THZwuK5_YGNWw!W|= zZ~S`yOR(Kdxi#xhZ7tub$&vr~a|naCj$<#e)-kQ`Q5Tqtd1x%Dtb0`;oii(bkFK9Z zDCF+Zw3h*+nUJQoK#SFB?R0+m@2jU$=>E7s zd1GVdIBQHILyjeV1CCeZJpVkxu7l4g)~qb^qejNBSJ1-4S0+3$Jz5d;YqGI#cge!s zU48TKc-hWgQ+#60Z%=ponR*@l+ED$l-yvkqE4xL5sW*){v2=$W+qb_>2P01}BGM3U^5+uZj(C7rha c + * \date 2024 + */ +/* + * Copyright (C) 2013,2014,2015,2016 CESNET + * + * LICENSE TERMS + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the Company nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * ALTERNATIVELY, provided that this notice is retained in full, this + * product may be distributed under the terms of the GNU General Public + * License (GPL) version 2 or later, in which case the provisions + * of the GPL apply INSTEAD OF those given above. + * + * This software is provided ``as is'', and any express or implied + * warranties, including, but not limited to, the implied warranties of + * merchantability and fitness for a particular purpose are disclaimed. + * In no event shall the company or contributors be liable for any + * direct, indirect, incidental, special, exemplary, or consequential + * damages (including, but not limited to, procurement of substitute + * goods or services; loss of use, data, or profits; or business + * interruption) however caused and on any theory of liability, whether + * in contract, strict liability, or tort (including negligence or + * otherwise) arising in any way out of the use of this software, even + * if advised of the possibility of such damage. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "fields.h" +#include + +/** + * Linked list structure for storing flows in a certain interval + */ +typedef struct bins_t { + uint64_t max_age; //maximal duration of the bin TO DO + size_t count_first; + size_t count_last; + struct bins_t *next; +} bin; + + +/** + * Structure for storing statistics about flow ages +*/ +typedef struct stats_t { + uint64_t max; + uint64_t min; + uint64_t avg; +} stat; + +/** + * Definition of fields used in unirec templates (for both input and output interfaces) + */ +UR_FIELDS ( + time TIME_FIRST, + time TIME_LAST, +) + +trap_module_info_t *module_info = NULL; + + +/** + * Definition of basic module information - module name, module description, number of input and output interfaces + */ +#define MODULE_BASIC_INFO(BASIC) \ + BASIC("Flow Age Stats module", \ + "This module finds min, max and avg of ages of flow data from input.\n" \ + "The second function is making percentual histograms of flow ages and outputs them into a file when -t is specified.\n", 1, 0) + + +/** + * Definition of module parameter + */ +#define MODULE_PARAMS(PARAM)\ + PARAM('t', "table", "store data about the flows in files", no_argument, "none") + + +/** + * Function for creating the bins +*/ +bin* createNode(uint64_t max, uint64_t count){ + bin* new_node = (bin*)malloc(sizeof(bin)); + if (new_node == NULL) { + fprintf(stderr, "Error: Memory allocation failed\n"); + return NULL; + } + new_node->max_age = max; + new_node->count_first = count; + new_node->count_last = count; + new_node->next = NULL; + return new_node; +} + +static int stop = 0; + +/** + * Function to handle SIGTERM and SIGINT signals (used to stop the module) + */ +TRAP_DEFAULT_SIGNAL_HANDLER(stop = 1) + +int main(int argc, char **argv) +{ + int ret; + signed char opt; + + /* **** TRAP initialization **** */ + + /* + * Macro allocates and initializes module_info structure according to MODULE_BASIC_INFO and MODULE_PARAMS + * definitions on the lines 71 and 84 of this file. It also creates a string with short_opt letters for getopt + * function called "module_getopt_string" and long_options field for getopt_long function in variable "long_options" + */ + INIT_MODULE_INFO_STRUCT(MODULE_BASIC_INFO, MODULE_PARAMS) + + /* + * Let TRAP library parse program arguments, extract its parameters and initialize module interfaces + */ + TRAP_DEFAULT_INITIALIZATION(argc, argv, *module_info); + + /* + * Register signal handler. + */ + TRAP_REGISTER_DEFAULT_SIGNAL_HANDLER(); + + FILE* out = NULL; + int file = NULL; + /** + * Handling of arguments + */ + while ((opt = TRAP_GETOPT(argc, argv, module_getopt_string, long_options)) != -1) { + switch (opt) { + case 't': + file = 1; + break; + default: + fprintf(stderr, "Invalid arguments.\n"); + FREE_MODULE_INFO_STRUCT(MODULE_BASIC_INFO, MODULE_PARAMS); + TRAP_DEFAULT_FINALIZATION(); + return -1; + } + } + + /* **** Create UniRec templates **** */ + ur_template_t *in_tmplt = ur_create_input_template(0, "TIME_FIRST,TIME_LAST", NULL); + if (in_tmplt == NULL){ + fprintf(stderr, "Error: Input template could not be created.\n"); + return -1; + } + + //initialization of the structs for statistics like max, min, avg + stat first = {0, UINT64_MAX, 0}; + + stat last = {0, UINT64_MAX, 0}; + + //initialization of age bins + bin *head = createNode(1, 0); + bin *current = head; + for (uint64_t i = 10; i <= 600; i+=10) { + current->next = createNode(i, 0); + current = current->next; + } + current->next = createNode(0, 0); + + //initialization of time + time_t rawTime; + + + /* **** Main processing loop **** */ + size_t flow_count = 0; + time_t start_time; + time(&start_time); + + // Read data from input, process them and output them into file if specified + while (!stop) { + const void *in_rec; + uint16_t in_rec_size; + + // Receive data from input interface 0. + // Block if data are not available immediately (unless a timeout is set using trap_ifcctl) + ret = TRAP_RECEIVE(0, in_rec, in_rec_size, in_tmplt); + + // Handle possible errors + TRAP_DEFAULT_RECV_ERROR_HANDLING(ret, continue, break); + + // Check size of received data + if (in_rec_size < ur_rec_fixlen_size(in_tmplt)) { + if (in_rec_size <= 1) { + break; // End of data (used for testing purposes) + } else { + fprintf(stderr, "Error: data with wrong size received (expected size: >= %hu, received size: %hu)\n", + ur_rec_fixlen_size(in_tmplt), in_rec_size); + break; + } + } + + // PROCESS THE DATA + time(&rawTime); + struct tm* utc_timeinfo; + #ifdef _WIN32 + gmtime_s(&rawTime, &utc_timeinfo); + #else + utc_timeinfo = gmtime(&rawTime); + #endif + char time_received[20]; + strftime(time_received, 20, "%Y-%m-%dT%H:%M:%S", utc_timeinfo); + + ur_time_t* received = malloc(sizeof(ur_time_t)); + if(received == NULL){ + fprintf(stderr, "Error: Malloc for ur_time_t failed.\n"); + break; + } + uint8_t check = ur_time_from_string(received, time_received); + if(check == 1){ + fprintf(stderr, "Error: could not convert string to ur_time_t\n"); + break; + } + + ur_time_t time_first = ur_get(in_tmplt, in_rec, F_TIME_FIRST); + ur_time_t time_last = ur_get(in_tmplt, in_rec, F_TIME_LAST); + //time difference between time at which the flow was received vs the time in the record itself + uint64_t first_diff = ur_timediff(*received, time_first); + uint64_t last_diff = ur_timediff(*received, time_last); + //time will be in milliseconds + + flow_count++; + + //categorization into bins + bin* curr = head; + int first_inc = 0;// to make sure it only increments once + int last_inc = 0; + //loop for putting the flows into correct bins + while (curr != NULL){ + if (first_inc == 0){ + if(curr->max_age >= (first_diff/1000)){ + curr->count_first++; + first_inc++; + } + } + if (last_inc == 0){ + if (curr->max_age >= (last_diff/1000)){ + curr->count_last++; + last_inc++; + } + } + if(last_inc == 1 && first_inc == 1){ + break; + } + if(curr->next == NULL){ + if (first_inc == 0){ + curr->count_first++; + } + if(last_inc == 0){ + curr->count_last++; + } + break; + } + curr = curr->next; + } + + first.avg += first_diff; + last.avg += last_diff; + + //setting new max or min if needed for first + if(first.max < first_diff){ + first.max = first_diff; + } + else if (first.min > first_diff){ + first.min = first_diff; + } + + //setting new max or min if needed for last + if(last.max < last_diff){ + last.max = last_diff; + } + else if (last.min > last_diff){ + last.min = last_diff; + } + free(received); + } + + time_t end_time; + time(&end_time); + double runtime = difftime(end_time, start_time);//calculating runtimes + + printf("\nRuntime: %0.2lfs\n", runtime); + printf("Number of flows processed: %zu\n \n", flow_count); + printf("Minimal age of time_first: %0.2lf s\n", (double)first.min/1000);//from milliseconds to seconds + printf("Maximal age of time_first: %0.2lf s\n", (double)first.max/1000); + printf("Average age of time_first: %0.2lf s\n", (double)(first.avg/flow_count)/1000); + printf("Minimal age of time_last: %0.2lf s\n", (double)last.min/1000); + printf("Maximal age of time_last: %0.2lf s\n", (double)last.max/1000); + printf("Average age of time_last: %0.2lf s\n", (double)(last.avg/flow_count)/1000); + + //should be outputed to file if specified + if(file == 1){ + out = fopen("time_first.txt", "w"); + if (out == NULL){ + fprintf(stderr, "Error: Could not open file.\n"); + goto skip_output; + } + current = head; + while(current != NULL){ + fprintf(out, "%" PRIu64 "\t%0.2lf%%\t%zu\n", current->max_age, ((double)(current->count_first * 100)/flow_count), current->count_first); + current = current->next; + } + fclose(out); + + out = fopen("time_last.txt", "w"); + if (out == NULL){ + fprintf(stderr, "Error: Could not open file.\n"); + goto skip_output; + } + current = head; + while(current != NULL){ + fprintf(out, "%" PRIu64 "\t%0.2lf\t%zu\n", current->max_age, ((double)(current->count_last * 100)/flow_count), current->count_last); + current = current->next; + } + fclose(out); + } + + /* **** Cleanup **** */ + skip_output: + //cleanup of bins + current = head; + while(current != NULL){ + bin* next = current->next; + free(current); + current = next; + } + + // Do all necessary cleanup in libtrap before exiting + TRAP_DEFAULT_FINALIZATION(); + + // Release allocated memory for module_info structure + FREE_MODULE_INFO_STRUCT(MODULE_BASIC_INFO, MODULE_PARAMS) + + // Free unirec template + ur_free_template(in_tmplt); + ur_finalize(); + + return 0; +} diff --git a/flow_age_stats/plot.gp b/flow_age_stats/plot.gp new file mode 100644 index 00000000..1ba77365 --- /dev/null +++ b/flow_age_stats/plot.gp @@ -0,0 +1,54 @@ +# Set the output terminal +set terminal png enhanced font "Arial,12" +set output "time_first.png" + +# Set the title and axis labels +set title "TIME FIRST" +set xlabel "Age (s)" +set ylabel "Percentage (%)" +set y2label "Number of Flows" + +# Set the axis ranges +set xrange [1:600] +set yrange [0:*] +set y2range [0:*] + +# Set the tics and grid +set ytics nomirror +set y2tics nomirror +set grid +set xtics 10, 50 # Set x-axis tick marks at every 10th value, with minor ticks every 50th value + +# Set the style for solid bars +set style fill solid 1.0 + +# Plot the data +plot "time_first.txt" using 1:3 with boxes lc rgb "#4daf4a" title "Flow Counts" axes x1y2, \ + "time_first.txt" using 1:2 with lines lc rgb "#e41a1c" title "Percentage" axes x1y1 + +set terminal png enhanced font "Arial,12" +set output "time_last.png" + +# Set the title and axis labels +set title "TIME LAST" +set xlabel "Age (s)" +set ylabel "Percentage (%)" +set y2label "Number of Flows" + +# Set the axis ranges +set xrange [1:600] +set yrange [0:*] +set y2range [0:*] + +# Set the tics and grid +set ytics nomirror +set y2tics nomirror +set grid +set xtics 10, 50 # Set x-axis tick marks at every 10th value, with minor ticks every 50th value + +# Set the style for solid bars +set style fill solid 1.0 + +# Plot the data +plot "time_last.txt" using 1:3 with boxes lc rgb "#4daf4a" title "Flow Counts" axes x1y2, \ + "time_last.txt" using 1:2 with lines lc rgb "#e41a1c" title "Percentage" axes x1y1 \ No newline at end of file From 4c207be20ac153261ec37f6510de4a54faa66dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Barto=C5=A1?= Date: Wed, 12 Jun 2024 15:51:12 +0200 Subject: [PATCH 2/4] flow_age_stats: improved readme and some texts in source code --- flow_age_stats/README.md | 9 ++++----- flow_age_stats/flow_age_stats.c | 13 +++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/flow_age_stats/README.md b/flow_age_stats/README.md index 29173752..e599ba85 100644 --- a/flow_age_stats/README.md +++ b/flow_age_stats/README.md @@ -1,10 +1,9 @@ ---- # Flow Age Stats module - README ## Description -This module is used for making statistics about the age of incoming flow data. The statistics produced are minimal, maximal and average values for both first time the flow is encountered and the last time the flow is encountered. +This module is used for making statistics about the age of incoming flow data. The statistics produced are minimal, maximal and average values of the differences between the time a flow was received and its TIME_FIRST and TIME_LAST timestamps. -Additionally the module can output two text files (time_first.txt, time_last.txt) that each have a table of three columns. First is the max age of the flow. Second is the percentage of flows that are in that age group. Third is the flow count. +Additionally, the module can output histograms of flow age distribution. These are written as two text files (time_first.txt, time_last.txt) that each have a table of three columns. First is the max age of the flow (the end of bin range). Second is the percentage of flows that are in that age group. Third is the flow count. By default, the bins are 0-1s, 1s-10s, 10s-20s, ... 590s-600s, >600s. ## Interfaces - Input: One UniRec interface @@ -12,10 +11,10 @@ Additionally the module can output two text files (time_first.txt, time_last.txt - Output: None ## Parameters -- '-t' If specified the module creates or opens a file where the tables will be outputed. (Caution - the module will overwrite files labeled time_first.txt, time_last.txt) +- '-t' If specified, the module writes a file where the tables will be outputted. (Caution - the module will overwrite files labeled time_first.txt, time_last.txt) ## Graphs -This module also comes with a script that makes use of GNUplot to make graphs from the data that is outputed into files. You can see how the graph looks like below. +This module also comes with a script that makes use of GNUplot to make graphs from the data that is outputted into files. You can see how the graph looks like below. ![ExampleGraph](example.png) diff --git a/flow_age_stats/flow_age_stats.c b/flow_age_stats/flow_age_stats.c index fda55986..18c62dcb 100644 --- a/flow_age_stats/flow_age_stats.c +++ b/flow_age_stats/flow_age_stats.c @@ -1,11 +1,11 @@ /** * \file flow_age_stats.c - * \brief Module for flow data statistics outputed into a file or stdout. + * \brief Module computes statistics about flow data age. * \author Michal Matejka * \date 2024 */ /* - * Copyright (C) 2013,2014,2015,2016 CESNET + * Copyright (C) 2024 CESNET * * LICENSE TERMS * @@ -56,7 +56,7 @@ #include /** - * Linked list structure for storing flows in a certain interval + * Linked list structure for storing histogram of flows ages */ typedef struct bins_t { uint64_t max_age; //maximal duration of the bin TO DO @@ -92,7 +92,7 @@ trap_module_info_t *module_info = NULL; #define MODULE_BASIC_INFO(BASIC) \ BASIC("Flow Age Stats module", \ "This module finds min, max and avg of ages of flow data from input.\n" \ - "The second function is making percentual histograms of flow ages and outputs them into a file when -t is specified.\n", 1, 0) + "It can also make histograms of flow ages and output them into a file when -t is specified.\n", 1, 0) /** @@ -221,6 +221,7 @@ int main(int argc, char **argv) } // PROCESS THE DATA + // TODO: there is probably a faster method to get current time in ur_time_t than by conversion from string time(&rawTime); struct tm* utc_timeinfo; #ifdef _WIN32 @@ -322,7 +323,7 @@ int main(int argc, char **argv) if(file == 1){ out = fopen("time_first.txt", "w"); if (out == NULL){ - fprintf(stderr, "Error: Could not open file.\n"); + fprintf(stderr, "Error: Could not open file 'time_first.txt'.\n"); goto skip_output; } current = head; @@ -334,7 +335,7 @@ int main(int argc, char **argv) out = fopen("time_last.txt", "w"); if (out == NULL){ - fprintf(stderr, "Error: Could not open file.\n"); + fprintf(stderr, "Error: Could not open file 'time_last.txt'.\n"); goto skip_output; } current = head; From c1032d16b46d2e064d89324133993e75d9962252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Barto=C5=A1?= Date: Wed, 12 Jun 2024 15:52:09 +0200 Subject: [PATCH 3/4] flow_age_stats: fixed label of the last bin in histograms The max_age of the last bin is internally set to 0, which is confusing to user when printed out as is. Now it's labeled as "600+" (if the previous bin ends at 600). --- flow_age_stats/flow_age_stats.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/flow_age_stats/flow_age_stats.c b/flow_age_stats/flow_age_stats.c index 18c62dcb..bf710cdf 100644 --- a/flow_age_stats/flow_age_stats.c +++ b/flow_age_stats/flow_age_stats.c @@ -328,7 +328,15 @@ int main(int argc, char **argv) } current = head; while(current != NULL){ + if (current->next == NULL){ // last bin - print label as "+" instead of "0" + fprintf(out, "%" PRIu64 "+\t%0.2lf%%\t%zu\n", current->max_age, ((double)(current->count_first * 100)/flow_count), current->count_first); + break; + } fprintf(out, "%" PRIu64 "\t%0.2lf%%\t%zu\n", current->max_age, ((double)(current->count_first * 100)/flow_count), current->count_first); + if (current->next->next == NULL) { + // second-to-last bin - store the end of this bin so we can use it in the last one (to print "+" after it) + current->next->max_age = current->max_age; + } current = current->next; } fclose(out); @@ -340,7 +348,15 @@ int main(int argc, char **argv) } current = head; while(current != NULL){ - fprintf(out, "%" PRIu64 "\t%0.2lf\t%zu\n", current->max_age, ((double)(current->count_last * 100)/flow_count), current->count_last); + if (current->next == NULL){ // last bin - print label as "+" instead of "0" + fprintf(out, "%" PRIu64 "+\t%0.2lf%%\t%zu\n", current->max_age, ((double)(current->count_last * 100)/flow_count), current->count_last); + break; + } + fprintf(out, "%" PRIu64 "\t%0.2lf%%\t%zu\n", current->max_age, ((double)(current->count_last * 100)/flow_count), current->count_last); + if (current->next->next == NULL) { + // second-to-last bin - store the end of this bin so we can use it in the last one (to print "+" after it) + current->next->max_age = current->max_age; + } current = current->next; } fclose(out); From 2e14b30d4af6e4b0ec5840ff566778575c6d3d16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Barto=C5=A1?= Date: Wed, 12 Jun 2024 16:07:34 +0200 Subject: [PATCH 4/4] Reverted update of .gitignore --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index adf18a47..be26e0e3 100644 --- a/.gitignore +++ b/.gitignore @@ -37,5 +37,3 @@ unirecfilter/lib/lex.yy.c unirecfilter/lib/liburfilter.pc unirecfilter/lib/parser.tab.c unirecfilter/lib/parser.tab.h -.gitignore -.vscode/