\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MIN_OUTERR | \n",
+ " MAX_OUTERR | \n",
+ " AVG_OUTERR | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 23195123 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 23195124 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 23195125 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 23195126 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 23195127 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
23195128 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MIN_OUTERR MAX_OUTERR AVG_OUTERR\n",
+ "0 0 0 0\n",
+ "1 0 0 0\n",
+ "2 0 0 0\n",
+ "3 0 0 0\n",
+ "4 0 0 0\n",
+ "... ... ... ...\n",
+ "23195123 0 0 0\n",
+ "23195124 0 0 0\n",
+ "23195125 0 0 0\n",
+ "23195126 0 0 0\n",
+ "23195127 0 0 0\n",
+ "\n",
+ "[23195128 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[['YYYYMMDD','MIN_OUTERR','MAX_OUTERR','AVG_OUTERR']].iloc[:,1:]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "856b12e8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 0\n",
+ "1 0\n",
+ "2 0\n",
+ "3 0\n",
+ "4 0\n",
+ " ..\n",
+ "23195123 0\n",
+ "23195124 0\n",
+ "23195125 0\n",
+ "23195126 0\n",
+ "23195127 0\n",
+ "Name: MIN_OUTERR, Length: 23195128, dtype: int64"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[['MIN_OUTERR','MAX_OUTERR','AVG_OUTERR']].iloc[:,0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "98f07a03",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "save_path = '/repo/projects/timeseries_anomaly/EDA/results'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "15a93a5d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#vis_cols = ['INERR', 'OUTDROP', 'INNUPPS', 'OUTNUPPS', 'INDISCARD', 'INBCASTPPS']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "078c6ecb",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "vis_col: INERR\n",
+ ">>> plotting 20210620\n",
+ ">>> plotting 20210621\n",
+ ">>> plotting 20210622\n",
+ ">>> plotting 20210623\n",
+ ">>> plotting 20210624\n",
+ ">>> plotting 20210625\n",
+ ">>> plotting 20210626\n",
+ "vis_col: OUTDROP\n",
+ ">>> plotting 20210620\n",
+ ">>> plotting 20210621\n",
+ ">>> plotting 20210622\n",
+ ">>> plotting 20210623\n",
+ ">>> plotting 20210624\n",
+ ">>> plotting 20210625\n",
+ ">>> plotting 20210626\n",
+ "vis_col: INNUPPS\n",
+ ">>> plotting 20210620\n",
+ ">>> plotting 20210621\n",
+ ">>> plotting 20210622\n",
+ ">>> plotting 20210623\n",
+ ">>> plotting 20210624\n",
+ ">>> plotting 20210625\n",
+ ">>> plotting 20210626\n",
+ "vis_col: OUTNUPPS\n",
+ ">>> plotting 20210620\n",
+ ">>> plotting 20210621\n",
+ ">>> plotting 20210622\n",
+ ">>> plotting 20210623\n",
+ ">>> plotting 20210624\n",
+ ">>> plotting 20210625\n",
+ ">>> plotting 20210626\n",
+ "vis_col: INDISCARD\n",
+ ">>> plotting 20210620\n",
+ ">>> plotting 20210621\n",
+ ">>> plotting 20210622\n",
+ ">>> plotting 20210623\n",
+ ">>> plotting 20210624\n",
+ ">>> plotting 20210625\n",
+ ">>> plotting 20210626\n",
+ "vis_col: INBCASTPPS\n",
+ ">>> plotting 20210620\n",
+ ">>> plotting 20210621\n",
+ ">>> plotting 20210622\n",
+ ">>> plotting 20210623\n",
+ ">>> plotting 20210624\n",
+ ">>> plotting 20210625\n",
+ ">>> plotting 20210626\n"
+ ]
+ }
+ ],
+ "source": [
+ "for vis_col in vis_cols:\n",
+ " print(f'vis_col: {vis_col}')\n",
+ " tmp_cols = []\n",
+ " \n",
+ " for col in cols:\n",
+ " #print(f'>>>>>col: {col}')\n",
+ " if vis_col in col:\n",
+ " tmp_cols.append(col)\n",
+ " \n",
+ " #print(tmp_cols)\n",
+ " #print('--------------------')\n",
+ " tmp_df = df[tmp_cols]\n",
+ " \n",
+ " # make tmp dataframe\n",
+ " tmp_df = pd.concat([dates_df, tmp_df], axis = 1)\n",
+ " \n",
+ " # for dates\n",
+ " for date in dates:\n",
+ " print(f'>>> plotting {date}')\n",
+ " \n",
+ " # use data at certain date\n",
+ " date_df = tmp_df[tmp_df['YYYYMMDD']==date].iloc[:,1:]\n",
+ " \n",
+ " criteria = ['MAX', 'MIN', 'AVG']\n",
+ " \n",
+ " for c in criteria:\n",
+ " \n",
+ " if c == 'AVG':\n",
+ " plot_df = date_df.iloc[:,0]\n",
+ " elif c == 'MIN':\n",
+ " plot_df = date_df.iloc[:,1]\n",
+ " elif c == 'MAX':\n",
+ " plot_df = date_df.iloc[:,2]\n",
+ " \n",
+ " # define fig\n",
+ " fig = plt.figure()\n",
+ "\n",
+ " plot_df.plot(figsize = (12, 6))\n",
+ "\n",
+ " plt.title(f'{c}_{vis_col}({date}) ')\n",
+ "\n",
+ " plt.savefig(save_path+ f\"/{c}_{vis_col}_{date}.png\", dpi=200)\n",
+ " \n",
+ " plt.close('all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "ee353892",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "vis_col: INERR\n",
+ "> plotting 20210620\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0009929763219236904\n",
+ ">> MAX(MAX): 103\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0009929763219236904\n",
+ ">> MIN(MAX): 103\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0009929763219236904\n",
+ ">> AVG(MAX): 103\n",
+ "> plotting 20210621\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.1070327038198467\n",
+ ">> MAX(MAX): 9360\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.1070327038198467\n",
+ ">> MIN(MAX): 9360\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.1070327038198467\n",
+ ">> AVG(MAX): 9360\n",
+ "> plotting 20210622\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.21545382251356252\n",
+ ">> MAX(MAX): 12166\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.21545382251356252\n",
+ ">> MIN(MAX): 12166\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.21545382251356252\n",
+ ">> AVG(MAX): 12166\n",
+ "> plotting 20210623\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.1899605151255562\n",
+ ">> MAX(MAX): 20995\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.1899605151255562\n",
+ ">> MIN(MAX): 20995\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.1899605151255562\n",
+ ">> AVG(MAX): 20995\n",
+ "> plotting 20210624\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.3504950319532711\n",
+ ">> MAX(MAX): 87505\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.3504950319532711\n",
+ ">> MIN(MAX): 87505\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.3504950319532711\n",
+ ">> AVG(MAX): 87505\n",
+ "> plotting 20210625\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.09246079843196818\n",
+ ">> MAX(MAX): 16707\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.09246079843196818\n",
+ ">> MIN(MAX): 16707\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.09246079843196818\n",
+ ">> AVG(MAX): 16707\n",
+ "> plotting 20210626\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.013125300391402939\n",
+ ">> MAX(MAX): 3539\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.013125300391402939\n",
+ ">> MIN(MAX): 3539\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.013125300391402939\n",
+ ">> AVG(MAX): 3539\n",
+ "vis_col: OUTDROP\n",
+ "> plotting 20210620\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210621\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210622\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210623\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210624\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210625\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210626\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "vis_col: INNUPPS\n",
+ "> plotting 20210620\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 1.4232476784349255\n",
+ ">> MAX(MAX): 230423\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 1.4232476784349255\n",
+ ">> MIN(MAX): 230423\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 1.4232476784349255\n",
+ ">> AVG(MAX): 230423\n",
+ "> plotting 20210621\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 1.3255765156290704\n",
+ ">> MAX(MAX): 4693\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 1.3255765156290704\n",
+ ">> MIN(MAX): 4693\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 1.3255765156290704\n",
+ ">> AVG(MAX): 4693\n",
+ "> plotting 20210622\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 1.300698597051876\n",
+ ">> MAX(MAX): 3564\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 1.300698597051876\n",
+ ">> MIN(MAX): 3564\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 1.300698597051876\n",
+ ">> AVG(MAX): 3564\n",
+ "> plotting 20210623\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 1.3138820131438875\n",
+ ">> MAX(MAX): 6472\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 1.3138820131438875\n",
+ ">> MIN(MAX): 6472\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 1.3138820131438875\n",
+ ">> AVG(MAX): 6472\n",
+ "> plotting 20210624\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 1.414565846192937\n",
+ ">> MAX(MAX): 8387\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 1.414565846192937\n",
+ ">> MIN(MAX): 8387\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 1.414565846192937\n",
+ ">> AVG(MAX): 8387\n",
+ "> plotting 20210625\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 1.3606745082676048\n",
+ ">> MAX(MAX): 3692\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 1.3606745082676048\n",
+ ">> MIN(MAX): 3692\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 1.3606745082676048\n",
+ ">> AVG(MAX): 3692\n",
+ "> plotting 20210626\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 1.4019500500077176\n",
+ ">> MAX(MAX): 4121\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 1.4019500500077176\n",
+ ">> MIN(MAX): 4121\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 1.4019500500077176\n",
+ ">> AVG(MAX): 4121\n",
+ "vis_col: OUTNUPPS\n",
+ "> plotting 20210620\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 2065.437395895265\n",
+ ">> MAX(MAX): 33784710\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 2065.437395895265\n",
+ ">> MIN(MAX): 33784710\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 2065.437395895265\n",
+ ">> AVG(MAX): 33784710\n",
+ "> plotting 20210621\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 10884.217727320281\n",
+ ">> MAX(MAX): 45275418\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 10884.217727320281\n",
+ ">> MIN(MAX): 45275418\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 10884.217727320281\n",
+ ">> AVG(MAX): 45275418\n",
+ "> plotting 20210622\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 10274.470403238776\n",
+ ">> MAX(MAX): 34360111\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 10274.470403238776\n",
+ ">> MIN(MAX): 34360111\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 10274.470403238776\n",
+ ">> AVG(MAX): 34360111\n",
+ "> plotting 20210623\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 10379.628168063316\n",
+ ">> MAX(MAX): 49227501\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 10379.628168063316\n",
+ ">> MIN(MAX): 49227501\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 10379.628168063316\n",
+ ">> AVG(MAX): 49227501\n",
+ "> plotting 20210624\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 10318.102998850687\n",
+ ">> MAX(MAX): 46340093\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 10318.102998850687\n",
+ ">> MIN(MAX): 46340093\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 10318.102998850687\n",
+ ">> AVG(MAX): 46340093\n",
+ "> plotting 20210625\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 9422.836515565848\n",
+ ">> MAX(MAX): 32470807\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 9422.836515565848\n",
+ ">> MIN(MAX): 32470807\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 9422.836515565848\n",
+ ">> AVG(MAX): 32470807\n",
+ "> plotting 20210626\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 2315.964770284498\n",
+ ">> MAX(MAX): 26240727\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 2315.964770284498\n",
+ ">> MIN(MAX): 26240727\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 2315.964770284498\n",
+ ">> AVG(MAX): 26240727\n",
+ "vis_col: INDISCARD\n",
+ "> plotting 20210620\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 11.453699935354793\n",
+ ">> MAX(MAX): 8403755\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 11.453699935354793\n",
+ ">> MIN(MAX): 8403755\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 11.453699935354793\n",
+ ">> AVG(MAX): 8403755\n",
+ "> plotting 20210621\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 102.17135060146234\n",
+ ">> MAX(MAX): 3169587\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 102.17135060146234\n",
+ ">> MIN(MAX): 3169587\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 102.17135060146234\n",
+ ">> AVG(MAX): 3169587\n",
+ "> plotting 20210622\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 109.02586151696624\n",
+ ">> MAX(MAX): 3894820\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 109.02586151696624\n",
+ ">> MIN(MAX): 3894820\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 109.02586151696624\n",
+ ">> AVG(MAX): 3894820\n",
+ "> plotting 20210623\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 124.03730567319344\n",
+ ">> MAX(MAX): 3365716\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 124.03730567319344\n",
+ ">> MIN(MAX): 3365716\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 124.03730567319344\n",
+ ">> AVG(MAX): 3365716\n",
+ "> plotting 20210624\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 129.86653843994765\n",
+ ">> MAX(MAX): 9742979\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 129.86653843994765\n",
+ ">> MIN(MAX): 9742979\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 129.86653843994765\n",
+ ">> AVG(MAX): 9742979\n",
+ "> plotting 20210625\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 101.42783862050769\n",
+ ">> MAX(MAX): 3252166\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 101.42783862050769\n",
+ ">> MIN(MAX): 3252166\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 101.42783862050769\n",
+ ">> AVG(MAX): 3252166\n",
+ "> plotting 20210626\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 16.78371724870965\n",
+ ">> MAX(MAX): 877271\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 16.78371724870965\n",
+ ">> MIN(MAX): 877271\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 16.78371724870965\n",
+ ">> AVG(MAX): 877271\n",
+ "vis_col: INBCASTPPS\n",
+ "> plotting 20210620\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210621\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210622\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210623\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210624\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210625\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n",
+ "> plotting 20210626\n",
+ ">> MAX(MIN): 0\n",
+ ">> MAX(AVG): 0.0\n",
+ ">> MAX(MAX): 0\n",
+ ">> MIN(MIN): 0\n",
+ ">> MIN(AVG): 0.0\n",
+ ">> MIN(MAX): 0\n",
+ ">> AVG(MIN): 0\n",
+ ">> AVG(AVG): 0.0\n",
+ ">> AVG(MAX): 0\n"
+ ]
+ }
+ ],
+ "source": [
+ "for vis_col in vis_cols:\n",
+ " print(f'vis_col: {vis_col}')\n",
+ " tmp_cols = []\n",
+ " \n",
+ " for col in cols:\n",
+ " #print(f'>>>>>col: {col}')\n",
+ " if vis_col in col:\n",
+ " tmp_cols.append(col)\n",
+ " \n",
+ " #print(tmp_cols)\n",
+ " #print('--------------------')\n",
+ " tmp_df = df[tmp_cols]\n",
+ " \n",
+ " # make tmp dataframe\n",
+ " tmp_df = pd.concat([dates_df, tmp_df], axis = 1)\n",
+ " \n",
+ " # for dates\n",
+ " for date in dates:\n",
+ " print(f'> plotting {date}')\n",
+ " \n",
+ " # use data at certain date\n",
+ " date_df = tmp_df[tmp_df['YYYYMMDD']==date].iloc[:,1:]\n",
+ " \n",
+ " criteria = ['MAX', 'MIN', 'AVG']\n",
+ " \n",
+ " for c in criteria:\n",
+ " \n",
+ " if c == 'AVG':\n",
+ " plot_df = date_df.iloc[:,0]\n",
+ " plot_avg = plot_df.mean()\n",
+ " plot_min = plot_df.min()\n",
+ " plot_max = plot_df.max()\n",
+ " print(f'>> AVG(MIN): {plot_min}')\n",
+ " print(f'>> AVG(AVG): {plot_avg}')\n",
+ " print(f'>> AVG(MAX): {plot_max}')\n",
+ "\n",
+ " elif c == 'MIN':\n",
+ " plot_df = date_df.iloc[:,1]\n",
+ " plot_avg = plot_df.mean()\n",
+ " plot_min = plot_df.min()\n",
+ " plot_max = plot_df.max()\n",
+ " print(f'>> MIN(MIN): {plot_min}')\n",
+ " print(f'>> MIN(AVG): {plot_avg}')\n",
+ " print(f'>> MIN(MAX): {plot_max}')\n",
+ " elif c == 'MAX':\n",
+ " plot_df = date_df.iloc[:,2]\n",
+ " plot_avg = plot_df.mean()\n",
+ " plot_min = plot_df.min()\n",
+ " plot_max = plot_df.max()\n",
+ " print(f'>> MAX(MIN): {plot_min}')\n",
+ " print(f'>> MAX(AVG): {plot_avg}')\n",
+ " print(f'>> MAX(MAX): {plot_max}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "31923da3",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/etc/Hamon_EDA_ver2.ipynb b/etc/Hamon_EDA_ver2.ipynb
new file mode 100644
index 0000000..fcc0099
--- /dev/null
+++ b/etc/Hamon_EDA_ver2.ipynb
@@ -0,0 +1,2562 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "hHGnodIzL8ux",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "hHGnodIzL8ux",
+ "outputId": "845d8e3b-d218-48c9-def2-50578ea340bf"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mounted at /gdrive\n"
+ ]
+ }
+ ],
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/gdrive')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "617dbb54",
+ "metadata": {
+ "id": "617dbb54"
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import glob"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "69d77230",
+ "metadata": {
+ "id": "69d77230"
+ },
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('/gdrive/MyDrive/Colab Notebooks/total_df.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "31cb8a56",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "31cb8a56",
+ "outputId": "4c439845-84ab-40a8-a1b0-f6a181eeeb25"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['YYYYMMDD', 'HHMMSS', 'MNG_NO', 'IF_IDX', 'AVG_INBPS', 'MIN_INBPS',\n",
+ " 'MAX_INBPS', 'AVG_OUTBPS', 'MIN_OUTBPS', 'MAX_OUTBPS', 'AVG_INPPS',\n",
+ " 'MIN_INPPS', 'MAX_INPPS', 'AVG_OUTPPS', 'MIN_OUTPPS', 'MAX_OUTPPS',\n",
+ " 'AVG_INERR', 'MIN_INERR', 'MAX_INERR', 'AVG_OUTERR', 'MIN_OUTERR',\n",
+ " 'MAX_OUTERR', 'AVG_CRC', 'MIN_CRC', 'MAX_CRC', 'AVG_COLLISION',\n",
+ " 'MIN_COLLISION', 'MAX_COLLISION', 'AVG_INDROP', 'MIN_INDROP',\n",
+ " 'MAX_INDROP', 'AVG_OUTDROP', 'MIN_OUTDROP', 'MAX_OUTDROP',\n",
+ " 'AVG_INNUPPS', 'MIN_INNUPPS', 'MAX_INNUPPS', 'AVG_OUTNUPPS',\n",
+ " 'MIN_OUTNUPPS', 'MAX_OUTNUPPS', 'AVG_INDISCARD', 'MIN_INDISCARD',\n",
+ " 'MAX_INDISCARD', 'AVG_OUTDISCARD', 'MIN_OUTDISCARD', 'MAX_OUTDISCARD',\n",
+ " 'AVG_INMCASTPPS', 'MIN_INMCASTPPS', 'MAX_INMCASTPPS', 'AVG_OUTMCASTPPS',\n",
+ " 'MIN_OUTMCASTPPS', 'MAX_OUTMCASTPPS', 'AVG_INBCASTPPS',\n",
+ " 'MIN_INBCASTPPS', 'MAX_INBCASTPPS', 'AVG_OUTBCASTPPS',\n",
+ " 'MIN_OUTBCASTPPS', 'MAX_OUTBCASTPPS'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "3a4033a2",
+ "metadata": {
+ "id": "3a4033a2"
+ },
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "1c98808a",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "1c98808a",
+ "outputId": "821e069f-6ca8-41fb-fa25-f6c78b3f2bda"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "YYYYMMDD : 7\n",
+ "HHMMSS : 288\n",
+ "MNG_NO : 10686\n",
+ "IF_IDX : 26\n",
+ "AVG_INBPS : 4413650\n",
+ "MIN_INBPS : 4413650\n",
+ "MAX_INBPS : 4413650\n",
+ "AVG_OUTBPS : 1567959\n",
+ "MIN_OUTBPS : 1567959\n",
+ "MAX_OUTBPS : 1567959\n",
+ "AVG_INPPS : 44142\n",
+ "MIN_INPPS : 44142\n",
+ "MAX_INPPS : 44142\n",
+ "AVG_OUTPPS : 287257\n",
+ "MIN_OUTPPS : 287257\n",
+ "MAX_OUTPPS : 287257\n",
+ "AVG_INERR : 1302\n",
+ "MIN_INERR : 1302\n",
+ "MAX_INERR : 1302\n",
+ "AVG_OUTERR : 1065\n",
+ "MIN_OUTERR : 1065\n",
+ "MAX_OUTERR : 1065\n",
+ "AVG_CRC : 1\n",
+ "MIN_CRC : 1\n",
+ "MAX_CRC : 1\n",
+ "AVG_COLLISION : 1\n",
+ "MIN_COLLISION : 1\n",
+ "MAX_COLLISION : 1\n",
+ "AVG_INDROP : 1\n",
+ "MIN_INDROP : 1\n",
+ "MAX_INDROP : 1\n",
+ "AVG_OUTDROP : 1\n",
+ "MIN_OUTDROP : 1\n",
+ "MAX_OUTDROP : 1\n",
+ "AVG_INNUPPS : 897\n",
+ "MIN_INNUPPS : 897\n",
+ "MAX_INNUPPS : 897\n",
+ "AVG_OUTNUPPS : 287257\n",
+ "MIN_OUTNUPPS : 287257\n",
+ "MAX_OUTNUPPS : 287257\n",
+ "AVG_INDISCARD : 28382\n",
+ "MIN_INDISCARD : 28382\n",
+ "MAX_INDISCARD : 28382\n",
+ "AVG_OUTDISCARD : 709\n",
+ "MIN_OUTDISCARD : 709\n",
+ "MAX_OUTDISCARD : 709\n",
+ "AVG_INMCASTPPS : 1\n",
+ "MIN_INMCASTPPS : 1\n",
+ "MAX_INMCASTPPS : 1\n",
+ "AVG_OUTMCASTPPS : 1\n",
+ "MIN_OUTMCASTPPS : 1\n",
+ "MAX_OUTMCASTPPS : 1\n",
+ "AVG_INBCASTPPS : 1\n",
+ "MIN_INBCASTPPS : 1\n",
+ "MAX_INBCASTPPS : 1\n",
+ "AVG_OUTBCASTPPS : 1\n",
+ "MIN_OUTBCASTPPS : 1\n",
+ "MAX_OUTBCASTPPS : 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "for col in df.columns:\n",
+ " print(f'{col} : {len(df[col].unique())}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "d8ad2720",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "d8ad2720",
+ "outputId": "bfef1b51-f0f1-4b0c-dbc4-3a45c36c8efd"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(23195128, 58)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "43e80a22",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 226
+ },
+ "id": "43e80a22",
+ "outputId": "8ff31d12-e303-4c63-bbcc-2df2dad55c02"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "