Skip to content

Commit

Permalink
Post-processor module (#24)
Browse files Browse the repository at this point in the history
* starting point for post-processor

* organize output dir structure

* associate regex with equipment. prefix output directory

* add readme

* regex for vacuum pressure

* more info abt usage

* add tkinter GUI

* add launch button for post-processor

* clean up x-axis tic labels for longer plots

* fix window geometry

* reordering buttons in config tab
  • Loading branch information
mslaffin authored Nov 16, 2024
1 parent 7bbd6ce commit ff2935a
Show file tree
Hide file tree
Showing 5 changed files with 651 additions and 1 deletion.
49 changes: 48 additions & 1 deletion dashboard.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import subprocess
import sys
import os
import subsystem
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
from utils import MessagesFrame, SetupScripts, LogLevel
from usr.panel_config import save_pane_states, load_pane_states, saveFileExists
import serial.tools.list_ports


frames_config = [
("Oil System", 0, 50, 150),
("Visualization Gas Control", 0, 50, 150),
Expand Down Expand Up @@ -87,12 +92,54 @@ def create_main_control_notebook(self, frame):

# TODO: add main control buttons to main tab here

# Add stuff to Config tab
# Add Config tab elements
self.create_com_port_frame(config_tab)
self.create_post_processor_button(config_tab)
self.create_log_level_dropdown(config_tab)

save_layout_button = tk.Button(config_tab, text="Save Layout", command=self.save_current_pane_state)
save_layout_button.pack(side=tk.BOTTOM, anchor='se', padx=5, pady=5)

def create_post_processor_button(self, parent_frame):
"""Create a button to launch the standalone post-processor application"""
post_processor_frame = ttk.Frame(parent_frame)
post_processor_frame.pack(side=tk.TOP, anchor='nw', padx=5, pady=5)

ttk.Button(
post_processor_frame,
text="Launch Log Post-processor",
command=self.launch_post_processor
).pack(side=tk.LEFT, padx=5)

def launch_post_processor(self):
"""Launch the post-processor as a separate process"""
try:
# Get the directory where the current script is located
if getattr(sys, 'frozen', False):
# If running as a bundled executable
base_path = sys._MEIPASS # type: ignore
else:
# If running as a script
base_path = os.path.dirname(os.path.abspath(__file__))

# Path to the post processor script
post_processor_path = os.path.join(base_path, 'scripts/post-process/post_process_gui.py')

# Launch the post-processor script
if sys.platform.startswith('win'):
# On Windows, use pythonw to avoid console window
subprocess.Popen([sys.executable, post_processor_path],
creationflags=subprocess.CREATE_NO_WINDOW)
else:
# On other platforms
subprocess.Popen([sys.executable, post_processor_path])

self.logger.info("Log post-processor launched successfully")
except Exception as e:
self.logger.error(f"Failed to launch log post-processor: {str(e)}")
messagebox.showerror("Error",
f"Failed to launch log post-processor:\n{str(e)}")

def add_title(self, frame, title):
"""Add a title label to a frame."""
label = tk.Label(frame, text=title, font=("Helvetica", 10, "bold"))
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ pymodbus==3.7.3
pyserial==3.5
scipy==1.14.1
tkdial==0.0.7
pandas==2.1.3
48 changes: 48 additions & 0 deletions scripts/post-process/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
## Experimental Log Post-Processor

A script for analyzing EBEAM Dashboard log files.

### Features
- Extracts measurement values from log files
- Generates various output formats (csv, txt, graphical plots)
- Provides statistical analysis of extracted data

### Requirements
- python 3.6+
- required packages:
```
pip install pandas matplotlib
```

### Usage
```
python post-process.py -f <log_files> [-d <data_types>] [-o <output_formats>] [--outdir <output_directory>]
```

Arguments
- `-f, --files`: Path(s) to the log file(s) (required)
- `-d, --data`: Data types to extract (default: voltage, current, temperature)
- Choices: voltage, current, temperature, pressure
- `-o, --output`: Output formats (default: csv, plot)
- Choices: csv, xlsx, plot
- `--outdir`: Output directory path (default: 'output')


# Output Directory Structure
For a log file named log_2024-11-04_experiment.log, the following directory structure is created:

```
log_2024-11-04_output/
├── csv/
│ ├── log_2024-11-04_voltage.csv
│ ├── log_2024-11-04_current.csv
│ └── log_2024-11-04_temperature.csv
├── plots/
│ ├── log_2024-11-04_voltage.png # "Voltage Over Time"
│ ├── log_2024-11-04_current.png # "Current Over Time"
│ └── log_2024-11-04_temperature.png # "Cathode Temperatures Over Time"
└── statistics/
├── log_2024-11-04_voltage_stats.txt
├── log_2024-11-04_current_stats.txt
└── log_2024-11-04_temperature_stats.txt
```
251 changes: 251 additions & 0 deletions scripts/post-process/post_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
import re
import argparse
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import os

def parse_arguments():
parser = argparse.ArgumentParser(description='Post-process experimental log files.')
parser.add_argument('-f', '--files', nargs='+', required=True, help='Path(s) to the log file(s).')
parser.add_argument('-d', '--data', nargs='+', choices=['voltage', 'current', 'temperature', 'pressure'],
default=['voltage', 'current', 'temperature'],
help='Data types to extract.')
parser.add_argument('-o', '--output', choices=['csv', 'xlsx', 'plot'], nargs='+',
default=['csv', 'plot'],
help='Output formats.')
parser.add_argument('--outdir', default='output', help='Output directory path')
return parser.parse_args()

def get_patterns(data_types):
"""
Regex patterns for different equipment types and their measurements.
Structure:
{
'equipment_type': {
'pattern': regex_pattern,
'display_name': 'Equipment Display Name',
'measurements': ['measurement1', 'measurement2']
}
}
"""
patterns = {}

if 'voltage' in data_types or 'current' in data_types:
patterns['power_supply'] = {
'pattern': r'\[(\d{2}:\d{2}:\d{2})\] - DEBUG: Power supply (\d) readings - Voltage: ([\d.]+)V, Current: ([\d.]+)A, Mode: (.+)',
'display_name': 'Power Supply',
'measurements': ['voltage', 'current']
}

if 'temperature' in data_types:
patterns['cathode_temp'] = {
'pattern': r'\[(\d{2}:\d{2}:\d{2})\] - INFO: Unit (\d) Temperature: ([\d.]+) °C',
'display_name': 'Cathode',
'measurements': ['temperature']
}

if 'pressure' in data_types:
patterns['vacuum_pressure'] = {
'pattern': r'\[(\d{2}:\d{2}:\d{2})\] - INFO: Chamber pressure: ([\d.]+(?:E[+-]\d+)?)\s*mbar\s*\(([\d.]+(?:E[+-]\d+)?)\)',
'display_name': 'Vacuum Chamber',
'measurements': ['pressure']
}

return patterns

def parse_log_file(filename, patterns):
data = {key: [] for key in patterns.keys()}
try:
with open(filename, 'r') as file:
for line in file:
for equip_type, equip_info in patterns.items():
match = re.search(equip_info['pattern'], line)
if match:
time_str = match.group(1)

if equip_type == 'power_supply':
ps_number = int(match.group(2))
voltage = float(match.group(3))
current = float(match.group(4))
mode = match.group(5)
data[equip_type].append({
'timestamp': time_str,
'ps_number': ps_number,
'voltage': voltage,
'current': current,
'mode': mode
})
elif equip_type == 'cathode_temp':
sensor = int(match.group(2))
temperature = float(match.group(3))
data[equip_type].append({
'timestamp': time_str,
'sensor': sensor,
'temperature': temperature,
'equipment': 'cathode' # Tag the equipment type
})
elif equip_type == 'vacuum_pressure':
pressure = float(match.group(2))
raw_pressure = float(match.group(3))
data[equip_type].append({
'timestamp': time_str,
'pressure': pressure,
'raw_pressure': raw_pressure
})

for equip_type, readings in data.items():
print(f"Found {len(readings)} {patterns[equip_type]['display_name']} readings")
return data
except FileNotFoundError:
print(f"Error: File {filename} not found.")
return None
except Exception as e:
print(f"An error occurred while parsing {filename}: {e}")
return None

def get_output_dir(filename):
"""Create output directory name based on log file date prefix."""
base_name = os.path.basename(filename)
# Look for date pattern in filename (assuming format: log_YYYY-MM-DD_*)
match = re.search(r'log_(\d{4}-\d{2}-\d{2})', base_name)
if match:
date_str = match.group(1)
return f"log_{date_str}_output"
return "output" # Default if no date found

def ensure_output_dir(base_dir, subdir):
"""Create output directory structure if it doesn't exist."""
output_path = os.path.join(base_dir, subdir)
os.makedirs(output_path, exist_ok=True)
return output_path

def save_to_csv(df, output_path):
df.to_csv(output_path, index=False)
print(f"CSV saved to {output_path}")

def save_to_excel(df, output_path):
df.to_excel(output_path, index=False)
print(f"Excel file saved to {output_path}")

def plot_data(df, data_type, output_path):
"""
Create plots with improved time handling and consistent styling.
Args:
df (pd.DataFrame): DataFrame with timestamp column and data
data_type (str): Type of data to plot ('voltage', 'current', 'temperature', 'pressure')
output_path (str): Path to save the plot
"""
# Convert string timestamps to datetime objects
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%H:%M:%S')

plt.figure(figsize=(12, 6))

if data_type in ['voltage', 'current']:
for ps_number, group in df.groupby('ps_number'):
plt.plot(group['timestamp'], group[data_type], label=f'Power Supply {ps_number}', marker='o')
ylabel = 'Voltage (V)' if data_type == 'voltage' else 'Current (A)'
plt.ylabel(ylabel)

elif data_type == 'temperature':
for sensor, group in df.groupby('sensor'):
plt.plot(group['timestamp'], group['temperature'], label=f'Sensor {sensor}', marker='o')
plt.ylabel('Temperature (°C)')

elif data_type == 'pressure':
plt.plot(df['timestamp'], df['pressure'], label='Chamber Pressure', marker='o')
plt.yscale('log')
plt.ylabel('Pressure (mbar)')

# Improve x-axis formatting
plt.gcf().autofmt_xdate() # Rotate and align the tick labels

# Format time axis with appropriate intervals
locator = mdates.AutoDateLocator()
formatter = mdates.DateFormatter('%H:%M:%S')
plt.gca().xaxis.set_major_locator(locator)
plt.gca().xaxis.set_major_formatter(formatter)

# Ensure reasonable number of ticks
if len(df) > 20:
plt.gca().xaxis.set_major_locator(mdates.MinuteLocator(interval=1))

plt.title(f'{data_type.capitalize()} Over Time')
plt.xlabel('Time')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(output_path, dpi=300)
plt.close()
print(f"Plot saved to {output_path}")

def process_files(file_list, data_types, output_formats, output_dir):
for file in file_list:
print(f"\nProcessing file: {file}")
patterns = get_patterns(data_types)
parsed_data = parse_log_file(file, patterns)

if not parsed_data:
continue

output_dir = get_output_dir(file)
base_filename = os.path.splitext(os.path.basename(file))[0]

# Create subdirectories for each type of output
csv_dir = ensure_output_dir(output_dir, 'csv') if 'csv' in output_formats else None
plot_dir = ensure_output_dir(output_dir, 'plots') if 'plot' in output_formats else None
stats_dir = ensure_output_dir(output_dir, 'statistics')

for data_type in data_types:
if data_type in ['voltage', 'current'] and parsed_data['power_supply']:
df = pd.DataFrame(parsed_data['power_supply'])
df_sorted = df.sort_values('timestamp')

if csv_dir:
save_to_csv(df_sorted, os.path.join(csv_dir, f"{base_filename}_{data_type}.csv"))
if plot_dir:
plot_data(df_sorted, data_type, os.path.join(plot_dir, f"{base_filename}_{data_type}.png"))
save_statistics(df_sorted, data_type, os.path.join(stats_dir, f"{base_filename}_{data_type}_stats.txt"))

elif data_type == 'temperature' and parsed_data['cathode_temp']:
df = pd.DataFrame(parsed_data['cathode_temp'])
df_sorted = df.sort_values('timestamp')

if csv_dir:
save_to_csv(df_sorted, os.path.join(csv_dir, f"{base_filename}_temperature.csv"))
if plot_dir:
plot_data(df_sorted, 'temperature', os.path.join(plot_dir, f"{base_filename}_temperature.png"))
save_statistics(df_sorted, 'temperature', os.path.join(stats_dir, f"{base_filename}_temperature_stats.txt"))

elif data_type == 'pressure' and parsed_data['vacuum_pressure']:
df = pd.DataFrame(parsed_data['vacuum_pressure'])
df_sorted = df.sort_values('timestamp')

if csv_dir:
save_to_csv(df_sorted, os.path.join(csv_dir, f"{base_filename}_pressure.csv"))
if plot_dir:
plot_data(df_sorted, 'pressure', os.path.join(plot_dir, f"{base_filename}_pressure.png"))
save_statistics(df_sorted, 'pressure', os.path.join(stats_dir, f"{base_filename}_pressure_stats.txt"))

def save_statistics(df, data_type, output_path):
with open(output_path, 'w') as f:
f.write(f"{data_type.capitalize()} Summary Statistics\n")
group_by = 'ps_number' if data_type in ['voltage', 'current'] else 'sensor'
value_col = data_type if data_type in ['voltage', 'current'] else 'temperature'

for group, group_df in df.groupby(group_by):
f.write(f"\n{data_type.capitalize()} {group_by.replace('_', ' ').title()} {group}:\n")
stats = group_df[value_col].describe()
f.write(str(stats))
f.write('\n')
print(f"Statistics saved to {output_path}")


def main():
args = parse_arguments()
process_files(args.files, args.data, args.output, args.outdir)

if __name__ == "__main__":
main()
Loading

0 comments on commit ff2935a

Please sign in to comment.