diff --git a/README.md b/README.md index 70423e6..bd24079 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ pip install pagurus ### Options ``` -usage: pagurus [-h] [-t TAG] [-o OUTFILE] [-p PATH] [-d] [-r RATE] [-u USER] [-noh] [-mv] +usage: pagurus [-h] [-o OUTFILE] [-p PATH] [-d] [-r RATE] [-u USER] [-noh] [-mv] [-l ROLLING] [--json] [--envvar ENVVAR] -optional arguments: +options: -h, --help show this help message and exit -o OUTFILE, --outfile OUTFILE File name for csv. @@ -23,14 +23,18 @@ optional arguments: -r RATE, --rate RATE Polling rate for process. -u USER, --user USER Username to get stats for. -noh, --no-header Turn off writting the header. - -mv, --move Moves file from 'running' to 'complete' + -mv, --move Moves file from 'running' to 'done' directories + -l ROLLING, --rolling ROLLING + Time to roll file over to number to file name in ~minutes. + --json Output JSON strings instead of CSV lines + --envvar ENVVAR add environment var to output (can be specified multiple times) ``` ### Running pagurus as a wrapper for a single user ```bash # Start running wrapper in the background for username -pagurus -u username -mv -p /path/to/output/dir -o test.csv +pagurus -u $USER -mv -p /path/to/output/dir -o test.csv # Get the previous running PID of pagurus export PID=$! # Sleep for a few seconds to let everything start running diff --git a/bin/pagurus b/bin/pagurus index a249ae3..e82d4f7 100644 --- a/bin/pagurus +++ b/bin/pagurus @@ -10,6 +10,8 @@ import os import logging import sys import signal +import json + from typing import Dict, List from pathlib import Path @@ -27,26 +29,47 @@ class FileWriter: def __init__(self, outfile, header: List[str] = [""], write_header: bool = True, - rolling: bool = False) -> None: + rolling: bool = False, + jsonout: bool = False, + env: Dict = {}) -> None: self.extensions = { 'gz': 'csv.gz', 'bz2': 'csv.bz2', 'csv': 'csv' } + self.header: List[str] = header self.number: int = 0 self.write_header: bool = write_header self.rolling: bool = rolling + self.env: Dict = env + + # Create an appropriate formatting function + if jsonout: + # Formatter function that outputs a dictionary in JSON + if header == [""]: + raise Exception("header cannot be blank for JSON output") + if write_header: + logging.debug("forcing write_header to false due to --json flag") + self.write_header = False + + # Adds envs to the end of the dict for fmt_writer + def fmt(*args): + temp = dict(zip(self.header, args)) + temp.update(env) + return "{}\n".format(json.dumps(temp)) + self.fmt_func = lambda *args: fmt(*args) - # Make formater based on number of metrics in header - fmt = ",".join(["{}" for _ in range(len(header))]) - self.fmt_writer = fmt + "\n" - + else: + # Make formatter function based on number of metrics in header + fmt = ",".join(["{}" for _ in range(len(self.header))]) + fmt_writer = fmt + "\n" + self.fmt_func = lambda *args: fmt_writer.format(*args) self.outfile: Path = outfile self.next_file() def write(self, *args): - self.output_file.write(self.fmt_writer.format(*args)) + self.output_file.write(self.fmt_func(*args)) def flush(self): self.output_file.flush() @@ -70,7 +93,7 @@ class FileWriter: self.output_file = open(self.outfile, "w") def _write_header(self): - self.output_file.write(self.fmt_writer.format(*self.header)) + self.output_file.write(self.fmt_func(*self.header)) self.output_file.flush() def _renamer(self): @@ -231,10 +254,12 @@ def runner( path: str = ".", filename: str = "stats.csv", pole_rate: float = 0.1, username: str = "", write_header: bool = True, - move: bool = False, rolling: int = 0): + move: bool = False, rolling: int = 0, + json: bool = False, env: Dict = {}): """ Runs while your executable is still running and logs info - about running process to a csv file. + about running process to the output file, defaulting to CSV format + unless the --json flag is set Args: outfile (str, optional): output filename. Defaults to "stats.csv". @@ -270,8 +295,10 @@ def runner( "num_fds", "read_count", "write_count", "read_chars", "write_chars", "cmdline", "current_dir"] - stats_file = FileWriter( - outfile=outfile, header=header, write_header=write_header, rolling=True if rolling > 0 else False) + stats_file = FileWriter(outfile=outfile, header=header, + write_header=write_header, + rolling=True if rolling > 0 else False, + jsonout=json, env=env) itteration = 0 # Keep pulling data from the process while it's running while not killer.kill_now: @@ -282,26 +309,29 @@ def runner( pData = proc.as_dict() # Add new line to the file with relevant data - stats_file.write(datetime.now().strftime("%m-%d-%Y %H:%M:%S.%f"), - proc_num, - pData['ppid'], - pData['name'], - pData['num_threads'], - *get_cputimes(pData), - *get_meminfo(pData), - pData['memory_percent'], - pData['num_fds'], - *get_iocounters(pData), - cmd_data(pData), - pData['cwd'] - ) + stats = [datetime.now().strftime("%m-%d-%Y %H:%M:%S.%f"), + proc_num, + pData['ppid'], + pData['name'], + pData['num_threads'], + *get_cputimes(pData), + *get_meminfo(pData), + pData['memory_percent'], + pData['num_fds'], + *get_iocounters(pData), + cmd_data(pData), + pData['cwd']] + + stats_file.write(*stats) except psutil.NoSuchProcess as e: # Comes when a process is killed between getting the number and getting the data pass except AttributeError as e: + # logging.debug(f'Error ({type(e).__name__}): {e}') pass except TypeError as e: + # logging.debug(f'Error ({type(e).__name__}): {e}') pass except Exception as e: logging.error(f'Error ({type(e).__name__}): {e}') @@ -342,6 +372,9 @@ if __name__ == '__main__': help="Moves file from 'running' to 'done' directories", default=False, action='store_true') parser.add_argument("-l", "--rolling", type=int, help="Time to roll file over to number to file name in ~minutes.", default=0) + parser.add_argument("--json", default=False, action="store_true", help="Output JSON strings instead of CSV lines") + parser.add_argument("--envvar", action="append", default=[], + help="add environment var to output (can be specified multiple times)") args = parser.parse_args() @@ -358,6 +391,10 @@ if __name__ == '__main__': # so rolling*10 should be okay for minutes (on average) rolling = args.rolling * 10 + # Get's the environment variables once and places them into a dict + env = {ev: os.getenv(ev) for ev in args.envvar} + # Start the recorder runner(path=args.path, filename=args.outfile, pole_rate=args.rate, - username=args.user, write_header=args.no_header, move=args.move, rolling=rolling) + username=args.user, write_header=args.no_header, move=args.move, + rolling=rolling, json=args.json, env=env) diff --git a/bin/test_FileWriter.py b/bin/test_FileWriter.py new file mode 100644 index 0000000..b666547 --- /dev/null +++ b/bin/test_FileWriter.py @@ -0,0 +1,117 @@ +import pytest +import json +import os +from importlib.util import spec_from_loader, module_from_spec +from importlib.machinery import SourceFileLoader + +# Do some funny business to load the modules from a file without the .py extension +# see https://stackoverflow.com/a/43602645 +from importlib.util import spec_from_loader, module_from_spec +from importlib.machinery import SourceFileLoader + +spec = spec_from_loader("pagurus", SourceFileLoader("pagurus", "./pagurus")) +pagurus=module_from_spec(spec) +spec.loader.exec_module(pagurus) + +def test_FileWriter_csv(tmp_path): + print("Output file path", tmp_path) + header = ['name1',"name2","name3"] + outfile = tmp_path/f"test.csv" + fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True) + assert( fw.header == header) + assert( fw.write_header == True) + assert( fw.outfile == outfile) + assert( outfile.is_file()) + fw.write(0,1,2) + fw.close() + with open(outfile) as f: + contents = f.read() + print(contents) + lines = contents.splitlines() + assert( lines[0] == ",".join(header)) + assert( lines[1] == "0,1,2") + fw.outfile.unlink() + +def test_FileWriter_csv_envvar(tmp_path): + print("Output file path", tmp_path) + header = ['name1',"name2","name3"] + outfile = tmp_path/f"test.csv" + + # Clear out environment variables we will be testing with + try: + os.environ.pop("testytesty") + os.environ.pop("testytoasty") + except: + pass + os.environ['testytesty'] = 'test' + os.environ['testytoasty'] = 'test2' + fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,env=["testytesty","testytoasty"]) + header2 = header+["testytesty","testytoasty"] + + assert( fw.header == header2) + assert( fw.write_header == True) + assert( fw.outfile == outfile) + assert( outfile.is_file()) + fw.write(0,1,2,"test","test2") + fw.close() + with open(outfile) as f: + contents = f.read() + print(contents) + lines = contents.splitlines() + assert( lines[0] == ",".join(header2)) + assert( lines[1] == '0,1,2,test,test2') + fw.outfile.unlink() + + +def test_FileWriter_json(tmp_path): + print("Output file path", tmp_path) + header = ['name1',"name2","name3"] + outfile = tmp_path/f"test.json" + fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,jsonout=True) + assert( fw.header == header) + assert( fw.write_header == False) + assert( fw.outfile == outfile) + assert( outfile.is_file()) + fw.write(0,1,2) + fw.close() + with open(outfile) as f: + contents = f.read() + print(contents) + lines = contents.splitlines() + jsonout = json.dumps(dict(zip(header,[0,1,2]))) + assert( lines[0] == jsonout) + fw.outfile.unlink() + +def test_FileWriter_json_envvar(tmp_path): + print("Output file path", tmp_path) + header = ['name1',"name2","name3"] + outfile = tmp_path/f"test.json" + + # Clear out environment variables we will be testing with + try: + os.environ.pop("testytesty") + os.environ.pop("testytoasty") + except: + pass + with pytest.raises(KeyError) as e: + fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,jsonout=True,env=["testytesty","testytoasty"]) + os.environ['testytesty'] = 'test' + os.environ['testytoasty'] = 'test2' + fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,jsonout=True,env=["testytesty","testytoasty"]) + header2 = header+["testytesty","testytoasty"] + + assert( fw.header == header2) + assert( fw.write_header == False) + assert( fw.outfile == outfile) + assert( outfile.is_file()) + fw.write(0,1,2,"test","test2") + fw.close() + with open(outfile) as f: + contents = f.read() + print(contents) + lines = contents.splitlines() + jsonout = json.dumps(dict(zip(header2,[0,1,2,"test","test2"]))) + assert( lines[0] == jsonout) + fw.outfile.unlink() + + diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..1fd4893 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 160 diff --git a/setup.py b/setup.py index a142f18..91afaeb 100644 --- a/setup.py +++ b/setup.py @@ -22,8 +22,9 @@ url="https://github.com/tylern4/pagurus", author="Nick Tyler", author_email="tylern@lbl.gov", - version='1.1', + version='1.2', scripts=glob('bin/*'), + py_modules=[], install_requires=[ 'psutil==5.8.0', ],