Updated instruction form attributes #102
Annotations
100 errors and 8 warnings
examples/machine-files/plot_machine_file.py#L12
Expected 2 blank lines, found 1 (E302)
|
examples/machine-files/plot_machine_file.py#L62
Block comment should start with '# ' (E265)
|
kerncraft/cacheprediction.py#L5
'copy.copy' imported but unused (F401)
|
kerncraft/cacheprediction.py#L11
'numpy as np' imported but unused (F401)
|
kerncraft/cacheprediction.py#L14
'collections.defaultdict' imported but unused (F401)
|
kerncraft/cacheprediction.py#L68
'...'.format(...) has unused arguments at position(s): 1 (F523)
|
kerncraft/cacheprediction.py#L297
Ambiguous variable name 'l' (E741)
|
kerncraft/cacheprediction.py#L304
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/__init__.py#L1
"""Kerncraft static analytical performance modeling framework and tool."""
-__version__ = '0.8.15'
+
+__version__ = "0.8.15"
from .kerncraft import main
+
__main__ = main
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___
# 2. commit to RRZE-HPC/kerncraft's master branch
|
kerncraft/cacheprediction.py#L360
Local variable 'indices' is assigned to but never used (F841)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/__init__.py#L12
def get_header_path() -> str:
"""Return local folder path of header files."""
import os
- return os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + '/headers/'
+
+ return os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + "/headers/"
|
kerncraft/cacheprediction.py#L360
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/examples/machine-files/plot_machine_file.py#L5
import matplotlib.pyplot as plt
from matplotlib.ticker import EngFormatter
from kerncraft import machinemodel
-kernel_colors = 'bgrcmyk'
+kernel_colors = "bgrcmyk"
+
def main():
mm = machinemodel.MachineModel(sys.argv[1])
- kernels = sorted(mm['benchmarks']['kernels'])
- cache_levels = sorted(mm['benchmarks']['measurements'])
+ kernels = sorted(mm["benchmarks"]["kernels"])
+ cache_levels = sorted(mm["benchmarks"]["measurements"])
fig, axs = plt.subplots(len(cache_levels), 1, figsize=(7, 14), tight_layout=True)
lines = {}
for i, cache_level in enumerate(cache_levels):
max_bw = 0
max_bw_core = 0
axs[i].set_title(cache_level)
formatter1 = EngFormatter(places=0) # , sep="\N{THIN SPACE}") # U+2009
axs[i].yaxis.set_major_formatter(formatter1)
- if cache_level == 'L1':
+ if cache_level == "L1":
axs[i].set_ylabel("Bandwidth [B/s]")
else:
axs[i].set_ylabel("Bandwidth [B/s]\n(incl. write-allocate)")
- axs[i].set_xlabel('cores')
+ axs[i].set_xlabel("cores")
# axs[i].set_xscale('log')
for ki, kernel in enumerate(kernels):
- if cache_level == 'L1':
+ if cache_level == "L1":
# L1 does not have write-allocate, so everything is measured correctly
factor = 1.0
else:
- measurement_kernel_info = mm['benchmarks']['kernels'][kernel]
- factor = (float(measurement_kernel_info['read streams']['bytes']) +
- 2.0 * float(measurement_kernel_info['write streams']['bytes']) -
- float(measurement_kernel_info['read+write streams']['bytes'])) / \
- (float(measurement_kernel_info['read streams']['bytes']) +
- float(measurement_kernel_info['write streams']['bytes']))
+ measurement_kernel_info = mm["benchmarks"]["kernels"][kernel]
+ factor = (
+ float(measurement_kernel_info["read streams"]["bytes"])
+ + 2.0 * float(measurement_kernel_info["write streams"]["bytes"])
+ - float(measurement_kernel_info["read+write streams"]["bytes"])
+ ) / (
+ float(measurement_kernel_info["read streams"]["bytes"])
+ + float(measurement_kernel_info["write streams"]["bytes"])
+ )
- for SMT in mm['benchmarks']['measurements'][cache_level]:
+ for SMT in mm["benchmarks"]["measurements"][cache_level]:
measurements = [
- bw*factor
- for bw in mm['benchmarks']['measurements'][cache_level][SMT]['results'][kernel]]
- max_bw = max(measurements+[max_bw])
+ bw * factor
+ for bw in mm["benchmarks"]["measurements"][cache_level][SMT][
+ "results"
+ ][kernel]
+ ]
+ max_bw = max(measurements + [max_bw])
max_bw_core = max(max_bw_core, measurements[0])
- lines[kernel], = axs[i].plot(
+ (lines[kernel],) = axs[i].plot(
range(1, 1 + len(measurements)),
measurements,
- linestyle=['-', '--', '..', '-.'][SMT-1],
- color=kernel_colors[ki])
+ linestyle=["-", "--", "..", "-."][SMT - 1],
+ color=kernel_colors[ki],
+ )
axs[i].set_xlim(1)
- axs[i].axhline(max_bw, color='black')
- axs[i].axhline(max_bw_core, color='black')
- axs[i].set_yticks(np.append(axs[i].get_yticks(), [float(max_bw), float(max_bw_core)]))
- axs[i].set_xticks(range(1, 1+len(measurements)))
- fig.legend(lines.values(), lines.keys(), 'lower center', ncol=10)
- fig.savefig(sys.argv[1]+'.pdf')
- #plt.show()
+ axs[i].axhline(max_bw, color="black")
+ axs[i].axhline(max_bw_core, color="black")
+ axs[i].set_yticks(
+ np.append(axs[i].get_yticks(), [float(max_bw), float(max_bw_core)])
+ )
+ axs[i].set_xticks(range(1, 1 + len(measurements)))
+ fig.legend(lines.values(), lines.keys(), "lower center", ncol=10)
+ fig.savefig(sys.argv[1] + ".pdf")
+ # plt.show()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
|
kerncraft/cacheprediction.py#L411
Missing whitespace after ',' (E231)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L10
from .machinemodel import MachineModel
from .prefixedunit import PrefixedUnit
def create_parser():
- parser = argparse.ArgumentParser(description='Find optimal tiling sizes using the ECMData '
- 'model.')
- parser.add_argument('--machine', '-m', type=argparse.FileType('r'), required=True,
- help='Path to machine description yaml file.')
- parser.add_argument('--define', '-D', nargs=2, metavar=('KEY', 'VALUE'), default=[],
- action='append',
- help='Define fixed constants. Values must be integer.')
- parser.add_argument('--min-block-length', '-b', type=int, metavar='MIN', default=100)
- parser.add_argument('--verbose', '-v', action='count', default=0,
- help='Increases verbosity level.')
- parser.add_argument('--cores', '-c', metavar='CORES', type=int, default=1,
- help='Number of cores to be used in parallel. (default: 1)')
- parser.add_argument('description_file', metavar='FILE', type=argparse.FileType(),
- help='File with loop kernel description in YAML')
+ parser = argparse.ArgumentParser(
+ description="Find optimal tiling sizes using the ECMData " "model."
+ )
+ parser.add_argument(
+ "--machine",
+ "-m",
+ type=argparse.FileType("r"),
+ required=True,
+ help="Path to machine description yaml file.",
+ )
+ parser.add_argument(
+ "--define",
+ "-D",
+ nargs=2,
+ metavar=("KEY", "VALUE"),
+ default=[],
+ action="append",
+ help="Define fixed constants. Values must be integer.",
+ )
+ parser.add_argument(
+ "--min-block-length", "-b", type=int, metavar="MIN", default=100
+ )
+ parser.add_argument(
+ "--verbose", "-v", action="count", default=0, help="Increases verbosity level."
+ )
+ parser.add_argument(
+ "--cores",
+ "-c",
+ metavar="CORES",
+ type=int,
+ default=1,
+ help="Number of cores to be used in parallel. (default: 1)",
+ )
+ parser.add_argument(
+ "description_file",
+ metavar="FILE",
+ type=argparse.FileType(),
+ help="File with loop kernel description in YAML",
+ )
return parser
def simulate(kernel, model, define_dict, blocking_constant, blocking_length):
"""Setup and execute model with given blocking length"""
|
kerncraft/cacheprediction.py#L445
Continuation line over-indented for visual indent (E127)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L38
kernel.set_constant(k, v)
kernel.set_constant(blocking_constant, blocking_length)
model.analyze()
- return sum([cy for dscr, cy in model.results['cycles']])
+ return sum([cy for dscr, cy in model.results["cycles"]])
def run(parser, args):
# machine information
# Read machine description
machine = MachineModel(args.machine.name)
# process kernel description
description = str(args.description_file.read())
- yaml = ruamel.yaml.YAML(typ='unsafe')
+ yaml = ruamel.yaml.YAML(typ="unsafe")
yaml.register_class(PrefixedUnit)
kernel = KernelDescription(yaml.load(description))
# Add constants from define arguments
define_dict = {}
|
kerncraft/cacheprediction.py#L533
Line too long (120 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L68
var_type, var_size = var_info
for size in var_size:
for s in size.atoms(sympy.Symbol):
if s.name not in define_dict:
undefined_constants.add(s)
- assert len(undefined_constants) == 1, "There are multiple or none undefined constants {!r}. " \
+ assert len(undefined_constants) == 1, (
+ "There are multiple or none undefined constants {!r}. "
"Exactly one must be undefined.".format(undefined_constants)
+ )
blocking_constant = undefined_constants.pop()
if args.verbose >= 1:
print("blocking constant:", blocking_constant)
|
kerncraft/cacheprediction.py#L548
Local variable 'elements_per_cacheline' is assigned to but never used (F841)
|
kerncraft/cacheprediction.py#L554
Local variable 'inner_index' is assigned to but never used (F841)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L81
min_length = args.min_block_length
min_runtime = simulate(kernel, model, define_dict, blocking_constant, min_length)
# determain max search length
# upper bound: number of floats that fit into the last level cache
- max_length = int(machine['memory hierarchy'][-2]['size per group'])//4
+ max_length = int(machine["memory hierarchy"][-2]["size per group"]) // 4
if args.verbose >= 1:
print("upper search bound:", max_length)
- length = min_length*3
+ length = min_length * 3
while length < max_length:
runtime = simulate(kernel, model, define_dict, blocking_constant, length)
if args.verbose >= 1:
- print("min", min_length, min_runtime, "current", length, runtime, "max", max_length)
+ print(
+ "min",
+ min_length,
+ min_runtime,
+ "current",
+ length,
+ runtime,
+ "max",
+ max_length,
+ )
# Increase search window
if runtime > min_runtime:
max_length = length # and break
else:
|
kerncraft/cacheprediction.py#L555
Local variable 'inner_increment' is assigned to but never used (F841)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L104
length = (max_length - min_length) // 2 + min_length
# Execute simulation
runtime = simulate(kernel, model, define_dict, blocking_constant, length)
if args.verbose >= 1:
- print("min", min_length, min_runtime, "current", length, runtime, "max", max_length)
+ print(
+ "min",
+ min_length,
+ min_runtime,
+ "current",
+ length,
+ runtime,
+ "max",
+ max_length,
+ )
# Narrow search area
if runtime <= min_runtime:
min_runtime = runtime
min_length = length
|
kerncraft/cacheprediction.py#L561
Too many blank lines (2) (E303)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L120
print("found for {}:".format(blocking_constant))
print(length)
sys.exit(0)
else:
if args.verbose:
- print("nothing found. exceeded search window and not change in performance found.")
+ print(
+ "nothing found. exceeded search window and not change in performance found."
+ )
sys.exit(1)
def main():
# Create and populate parser
|
kerncraft/cacheprediction.py#L650
Too many blank lines (2) (E303)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L135
# BUSINESS LOGIC IS FOLLOWING
run(parser, args)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
|
kerncraft/cacheprediction.py#L672
Continuation line over-indented for visual indent (E127)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py#L6
"""Very simple interval implementation for integers (might also work on floats)."""
def __init__(self, *args, **kwargs):
"""If keywords *sane* is True (default: False), checks will not be done on given data."""
self.data = list(args)
- if not kwargs.get('sane', False):
+ if not kwargs.get("sane", False):
self.data = [d for d in self.data if d[1] > d[0]]
self._enforce_order()
self._enforce_no_overlap()
def _enforce_order(self):
|
kerncraft/cacheprediction.py#L695
Blank line contains whitespace (W293)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py#L18
self.data.sort(key=lambda d: d[0])
def _enforce_no_overlap(self, start_at=0):
"""Enforce that no ranges overlap in internal storage."""
i = start_at
- while i+1 < len(self.data):
- if self.data[i][1] >= self.data[i+1][0]:
+ while i + 1 < len(self.data):
+ if self.data[i][1] >= self.data[i + 1][0]:
# beginning of i+1-th range is contained in i-th range
- if self.data[i][1] < self.data[i+1][1]:
+ if self.data[i][1] < self.data[i + 1][1]:
# i+1-th range is longer, thus enlarge i-th range
- self.data[i][1] = self.data[i+1][1]
+ self.data[i][1] = self.data[i + 1][1]
# removed contained range
- del self.data[i+1]
+ del self.data[i + 1]
i += 1
def __and__(self, other):
"""Combine two intervals, under the assumption that they are sane."""
- return Intervals(*(self.data+other.data))
+ return Intervals(*(self.data + other.data))
def __len__(self):
"""Return sum of range lengths."""
- return int(sum(upper-lower for (lower, upper) in self.data))
+ return int(sum(upper - lower for (lower, upper) in self.data))
def __contains__(self, needle):
"""Return True if needle is contained in intervals."""
return any(lower <= needle < upper for (lower, upper) in self.data)
def __repr__(self):
"""Return string representation of object."""
- return str(self.__class__) + '(' + ', '.join([list.__repr__(d) for d in self.data]) + ')'
+ return (
+ str(self.__class__)
+ + "("
+ + ", ".join([list.__repr__(d) for d in self.data])
+ + ")"
+ )
def __eq__(self, other):
"""Return True if other contains exactly the same interval regions."""
return self.data == other.data
|
kerncraft/cacheprediction.py#L711
Continuation line under-indented for visual indent (E128)
|
kerncraft/iaca_get.py#L6
're' imported but unused (F401)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L10
import platform
from urllib.request import urlopen
url_dict = {
- 'v3.0': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-mac.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-lin64.zip',
+ "v3.0": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-mac.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-lin64.zip",
},
- 'v2.3': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-mac.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-lin64.zip',
+ "v2.3": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-mac.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-lin64.zip",
},
- 'v2.2': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-mac.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-lin64.zip',
+ "v2.2": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-mac.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-lin64.zip",
},
- 'v2.1': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-mac64.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-lin64.zip',
- }
+ "v2.1": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-mac64.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-lin64.zip",
+ },
}
+
class TemporaryDirectory:
def __enter__(self):
self.tempdir = tempfile.mkdtemp()
return self.tempdir
|
kerncraft/iaca_get.py#L16
Line too long (115 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L38
def __exit__(self, type_, value, traceback):
shutil.rmtree(self.tempdir)
def get_os():
- os_map = {'Darwin': 'mac', 'Linux': 'lin64'}
+ os_map = {"Darwin": "mac", "Linux": "lin64"}
system = platform.system()
- assert system in os_map, "Unsupported operating system (platform.system() should return " \
- "Linux or Darwin)."
+ assert system in os_map, (
+ "Unsupported operating system (platform.system() should return "
+ "Linux or Darwin)."
+ )
return os_map[system]
def search_path():
"""Return potential locations of IACA installation."""
operating_system = get_os()
# 1st choice: in ~/.kerncraft/iaca-{}
# 2nd choice: in package directory / iaca-{}
- return [os.path.expanduser("~/.kerncraft/iaca/{}/".format(operating_system)),
- os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + '/iaca/{}/'.format(
- operating_system)]
+ return [
+ os.path.expanduser("~/.kerncraft/iaca/{}/".format(operating_system)),
+ os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
+ + "/iaca/{}/".format(operating_system),
+ ]
def find_iaca():
"""Return (hopefully) valid installation of IACA."""
- requires = ['iaca2.2', 'iaca2.3', 'iaca3.0']
+ requires = ["iaca2.2", "iaca2.3", "iaca3.0"]
for path in search_path():
- path += 'bin/'
+ path += "bin/"
valid = True
for r in requires:
if not os.path.exists(path + r):
valid = False
break
if valid:
return path
- raise RuntimeError("No IACA installation found in {}. Run iaca_get command to fix this issue."
- "".format(search_path()))
+ raise RuntimeError(
+ "No IACA installation found in {}. Run iaca_get command to fix this issue."
+ "".format(search_path())
+ )
def main():
try:
path = find_iaca()
- print('IACA already installed at', path)
- if '--force' in sys.argv:
- sys.argv.remove('--force')
+ print("IACA already installed at", path)
+ if "--force" in sys.argv:
+ sys.argv.remove("--force")
else:
- print('For forced installation add --force')
+ print("For forced installation add --force")
sys.exit()
except RuntimeError:
pass
- if len(sys.argv) < 2 or sys.argv[1] != \
- "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul":
- print("Go to https://software.intel.com/protected-download/267266/157552 and read the"
- "Intel Pre-Release License Agreement.")
+ if (
+ len(sys.argv) < 2
+ or sys.argv[1]
+ != "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul"
+ ):
+ print(
+ "Go to https://software.intel.com/protected-download/267266/157552 and read the"
+ "Intel Pre-Release License Agreement."
+ )
print("")
- print("Add "
- "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul"
- " for installation of IACA.")
+ print(
+ "Add "
+ "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul"
+ " for installation of IACA."
+ )
sys.exit(1)
if len(sys.argv) >= 3:
- assert sys.argv[2] in ['lin64', 'mac']
+ assert sys.argv[2] in ["lin64", "mac"]
operating_system = sys.argv[2]
else:
operating_system = get_os()
# Locate and create IACA base directory, in reverse server order
base_dir = None
for path in reversed(search_path()):
- print("Trying " + path + ": ", end='', file=sys.stderr)
+ print("Trying " + path + ": ", end="", file=sys.stderr)
try:
os.makedirs(path)
base_dir = path
break
except PermissionError:
|
kerncraft/iaca_get.py#L17
Line too long (119 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L115
except OSError:
# Directory already exists
print("already exists.", file=sys.stderr)
continue
if base_dir is None:
- print('Aborted.', file=sys.stderr)
+ print("Aborted.", file=sys.stderr)
sys.exit(1)
else:
print("selected.", file=sys.stderr)
- print("IACA v2.1 (for manual use - only version analyzing latency):", file=sys.stderr)
- if operating_system == 'mac':
- operating_system_temp = 'mac64'
+ print(
+ "IACA v2.1 (for manual use - only version analyzing latency):", file=sys.stderr
+ )
+ if operating_system == "mac":
+ operating_system_temp = "mac64"
else:
operating_system_temp = operating_system
- url = url_dict['v2.1'][operating_system]
+ url = url_dict["v2.1"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system_temp))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system_temp))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system_temp), base_dir + 'v2.1')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system_temp), base_dir + "v2.1"
+ )
# Correct permissions of executables
print("Correcting permissions of binary...")
- st = os.stat(base_dir + 'v2.1/bin/iaca')
- os.chmod(
- base_dir + 'v2.1/bin/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- st = os.stat(base_dir + 'v2.1/bin/iaca.sh')
- os.chmod(
- base_dir + 'v2.1/bin/iaca.sh',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ st = os.stat(base_dir + "v2.1/bin/iaca")
+ os.chmod(
+ base_dir + "v2.1/bin/iaca",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
+ )
+ st = os.stat(base_dir + "v2.1/bin/iaca.sh")
+ os.chmod(
+ base_dir + "v2.1/bin/iaca.sh",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
)
# Fix iaca.sh
print("Fixing iaca.sh...", file=sys.stderr)
- iaca_sh = open(base_dir + 'v2.1/bin/iaca.sh').read()
- iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1)
- iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1)
- iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)',
- 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1)
- open(base_dir + 'v2.1/bin/iaca.sh', 'w').write(iaca_sh)
- print("IACA v2.1 installed to", os.getcwd() + '/' + base_dir + 'v2.1', file=sys.stderr)
+ iaca_sh = open(base_dir + "v2.1/bin/iaca.sh").read()
+ iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1)
+ iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1)
+ iaca_sh = iaca_sh.replace(
+ 'path=$(cd "$(dirname "$0")"; pwd)',
+ 'script=`readlink -f $0`\n\tpath=`dirname "$script"`',
+ 1,
+ )
+ open(base_dir + "v2.1/bin/iaca.sh", "w").write(iaca_sh)
+ print(
+ "IACA v2.1 installed to", os.getcwd() + "/" + base_dir + "v2.1", file=sys.stderr
+ )
print("IACA v2.2 (for NHM and WSM support):", file=sys.stderr)
- url = url_dict['v2.2'][operating_system]
+ url = url_dict["v2.2"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v2.2')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system), base_dir + "v2.2"
+ )
# Correct permissions of executables
print("Correcting permissions of binary...")
- st = os.stat(base_dir + 'v2.2/bin/iaca')
- os.chmod(
- base_dir + 'v2.2/bin/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- st = os.stat(base_dir + 'v2.2/bin/iaca.sh')
- os.chmod(
- base_dir + 'v2.2/bin/iaca.sh',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ st = os.stat(base_dir + "v2.2/bin/iaca")
+ os.chmod(
+ base_dir + "v2.2/bin/iaca",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
+ )
+ st = os.stat(base_dir + "v2.2/bin/iaca.sh")
+ os.chmod(
+ base_dir + "v2.2/bin/iaca.sh",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
)
# Fix iaca.sh
print("Fixing iaca.sh...", file=sys.stderr)
- iaca_sh = open(base_dir + 'v2.2/bin/iaca.sh').read()
- iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1)
- iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1)
- iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)',
- 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1)
- open(base_dir + 'v2.2/bin/iaca.sh', 'w').write(iaca_sh)
- print("IACA v2.2 installed to", os.getcwd() + '/' + base_dir + 'v2.2', file=sys.stderr)
+ iaca_sh = open(base_dir + "v2.2/bin/iaca.sh").read()
+ iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1)
+ iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1)
+ iaca_sh = iaca_sh.replace(
+ 'path=$(cd "$(dirname "$0")"; pwd)',
+ 'script=`readlink -f $0`\n\tpath=`dirname "$script"`',
+ 1,
+ )
+ open(base_dir + "v2.2/bin/iaca.sh", "w").write(iaca_sh)
+ print(
+ "IACA v2.2 installed to", os.getcwd() + "/" + base_dir + "v2.2", file=sys.stderr
+ )
print("IACA v2.3 (for SNB and IVY support):", file=sys.stderr)
- url = url_dict['v2.3'][operating_system]
+ url = url_dict["v2.3"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v2.3')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system), base_dir + "v2.3"
+ )
# Correct permissions of executables
print("Correcting permissions of binary...")
- st = os.stat(base_dir + 'v2.3/bin/iaca')
- os.chmod(
- base_dir + 'v2.3/bin/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- st = os.stat(base_dir + 'v2.3/bin/iaca.sh')
- os.chmod(
- base_dir + 'v2.3/bin/iaca.sh',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ st = os.stat(base_dir + "v2.3/bin/iaca")
+ os.chmod(
+ base_dir + "v2.3/bin/iaca",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
+ )
+ st = os.stat(base_dir + "v2.3/bin/iaca.sh")
+ os.chmod(
+ base_dir + "v2.3/bin/iaca.sh",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
)
# Fix iaca.sh
print("Fixing iaca.sh...", file=sys.stderr)
- iaca_sh = open(base_dir + 'v2.3/bin/iaca.sh').read()
- iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1)
- iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1)
- iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)',
- 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1)
- open(base_dir + 'v2.3/bin/iaca.sh', 'w').write(iaca_sh)
- print("IACA v2.3 installed to", os.getcwd() + '/' + base_dir + 'v2.3', file=sys.stderr)
+ iaca_sh = open(base_dir + "v2.3/bin/iaca.sh").read()
+ iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1)
+ iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1)
+ iaca_sh = iaca_sh.replace(
+ 'path=$(cd "$(dirname "$0")"; pwd)',
+ 'script=`readlink -f $0`\n\tpath=`dirname "$script"`',
+ 1,
+ )
+ open(base_dir + "v2.3/bin/iaca.sh", "w").write(iaca_sh)
+ print(
+ "IACA v2.3 installed to", os.getcwd() + "/" + base_dir + "v2.3", file=sys.stderr
+ )
print("IACA v3.0 (for HSW, BDW, SKL and SKX support):", file=sys.stderr)
- url = url_dict['v3.0'][operating_system]
+ url = url_dict["v3.0"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v3.0')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system), base_dir + "v3.0"
+ )
print("Correcting permissions of binary...", file=sys.stderr)
- st = os.stat(base_dir + 'v3.0/iaca')
- os.chmod(
- base_dir + 'v3.0/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- print("IACA v3.0 installed to", os.getcwd() + '/' + base_dir + 'v3.0', file=sys.stderr)
+ st = os.stat(base_dir + "v3.0/iaca")
+ os.chmod(
+ base_dir + "v3.0/iaca", st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ )
+ print(
+ "IACA v3.0 installed to", os.getcwd() + "/" + base_dir + "v3.0", file=sys.stderr
+ )
# Create unified bin directory to access both operating_systems
- os.mkdir(base_dir + 'bin')
- os.symlink('../v2.1/bin/iaca.sh', base_dir + 'bin/iaca2.1')
- os.symlink('../v2.2/bin/iaca.sh', base_dir + 'bin/iaca2.2')
- os.symlink('../v2.3/bin/iaca.sh', base_dir + 'bin/iaca2.3')
- os.symlink('../v3.0/iaca', base_dir + 'bin/iaca3.0')
- print('export PATH=' + base_dir + 'bin/:$PATH')
-
-
-if __name__ == '__main__':
+ os.mkdir(base_dir + "bin")
+ os.symlink("../v2.1/bin/iaca.sh", base_dir + "bin/iaca2.1")
+ os.symlink("../v2.2/bin/iaca.sh", base_dir + "bin/iaca2.2")
+ os.symlink("../v2.3/bin/iaca.sh", base_dir + "bin/iaca2.3")
+ os.symlink("../v3.0/iaca", base_dir + "bin/iaca3.0")
+ print("export PATH=" + base_dir + "bin/:$PATH")
+
+
+if __name__ == "__main__":
main()
|
kerncraft/iaca_get.py#L20
Line too long (114 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L33
Numbers can either be evenly distributed in a linear space (if *log* is False) or in a log
space (if *log* is True). If *log* is True, base is used to define the log space basis.
If *endpoint* is True, *stop* will be the last retruned value, as long as *num* >= 2.
"""
- assert type(start) is int and type(stop) is int and type(num) is int, \
- "start, stop and num need to be intergers"
+ assert (
+ type(start) is int and type(stop) is int and type(num) is int
+ ), "start, stop and num need to be intergers"
assert num >= 2, "num has to be atleast 2"
if log:
start = math.log(start, base)
stop = math.log(stop, base)
|
kerncraft/iaca_get.py#L21
Line too long (118 > 100 characters) (E501)
|
kerncraft/iaca_get.py#L24
Line too long (114 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L96
if base is given, the integers are evenly spaced on that base (default: 10)
"""
def __call__(self, parser, namespace, values, option_string=None):
"""Execute action."""
- message = ''
+ message = ""
if len(values) != 2:
- message = 'requires 2 arguments'
+ message = "requires 2 arguments"
else:
- m = re.match(r'(?P<start>\d+)(?:-(?P<stop>\d+)(?::(?P<num>\d+)'
- r'(:?(?P<log>log)(:?(?P<base>\d+))?)?)?)?',
- values[1])
+ m = re.match(
+ r"(?P<start>\d+)(?:-(?P<stop>\d+)(?::(?P<num>\d+)"
+ r"(:?(?P<log>log)(:?(?P<base>\d+))?)?)?)?",
+ values[1],
+ )
if m:
gd = m.groupdict()
- if gd['stop'] is None:
- values[1] = [int(gd['start'])]
- elif gd['num'] is None:
- values[1] = list(range(int(gd['start']), int(gd['stop']) + 1))
+ if gd["stop"] is None:
+ values[1] = [int(gd["start"])]
+ elif gd["num"] is None:
+ values[1] = list(range(int(gd["start"]), int(gd["stop"]) + 1))
else:
- log = gd['log'] is not None
- base = int(gd['base']) if gd['base'] is not None else 10
- values[1] = list(space(
- int(gd['start']), int(gd['stop']), int(gd['num']), log=log, base=base))
+ log = gd["log"] is not None
+ base = int(gd["base"]) if gd["base"] is not None else 10
+ values[1] = list(
+ space(
+ int(gd["start"]),
+ int(gd["stop"]),
+ int(gd["num"]),
+ log=log,
+ base=base,
+ )
+ )
else:
- message = 'second argument must match: start[-stop[:num[log[base]]]]'
+ message = "second argument must match: start[-stop[:num[log[base]]]]"
if message:
raise argparse.ArgumentError(self, message)
if hasattr(namespace, self.dest):
|
kerncraft/iaca_get.py#L25
Line too long (118 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L128
setattr(namespace, self.dest, [values])
class VersionAction(argparse.Action):
"""Reimplementation of the version action, because argparse's version outputs to stderr."""
- def __init__(self, option_strings, version, dest=argparse.SUPPRESS,
- default=argparse.SUPPRESS,
- help="show program's version number and exit"):
+
+ def __init__(
+ self,
+ option_strings,
+ version,
+ dest=argparse.SUPPRESS,
+ default=argparse.SUPPRESS,
+ help="show program's version number and exit",
+ ):
super(VersionAction, self).__init__(
option_strings=option_strings,
dest=dest,
default=default,
nargs=0,
- help=help)
+ help=help,
+ )
self.version = version
def __call__(self, parser, namespace, values, option_string=None):
print(parser.prog, self.version)
parser.exit()
def create_parser():
"""Return argparse parser."""
parser = argparse.ArgumentParser(
- description='Analytical performance modelling and benchmarking toolkit.',
- epilog='For help, examples, documentation and bug reports go to:\nhttps://github.com'
- '/RRZE-HPC/kerncraft\nLicense: AGPLv3',)
- parser.add_argument('--version', action=VersionAction, version='{}'.format(__version__))
- parser.add_argument('--machine', '-m', type=argparse.FileType('r'), required=True,
- help='Path to machine description yaml file.')
- parser.add_argument('--pmodel', '-p', choices=models.__all__, required=True, action='append',
- default=[], help='Performance model to apply')
- parser.add_argument('-D', '--define', nargs=2, metavar=('KEY', 'VALUE'), default=[],
- action=AppendStringRange,
- help='Define constant to be used in C code. Values must be integer or '
- 'match start-stop[:num[log[base]]]. If range is given, all '
- 'permutation s will be tested. Overwrites constants from testcase '
- 'file. Key can be . for default value for all used constants.')
- parser.add_argument('--verbose', '-v', action='count', default=0,
- help='Increases verbosity level.')
- parser.add_argument('code_file', metavar='FILE', type=argparse.FileType(),
- help='File with loop kernel C code')
- parser.add_argument('--asm-block', metavar='BLOCK', default='auto',
- help='Number of ASM block to mark for IACA, "auto" for automatic '
- 'selection or "manual" for interactiv selection.')
- parser.add_argument('--pointer-increment', metavar='INCR', default='auto', type=int_or_str,
- help='Increment of store pointer within one ASM block in bytes. If "auto": '
- 'automatic detection, error on failure to detect, if '
- '"auto_with_manual_fallback": fallback to manual input, or if '
- '"manual": always prompt user.')
- parser.add_argument('--store', metavar='PICKLE', type=argparse.FileType('a+b'),
- help='Addes results to PICKLE file for later processing.')
- parser.add_argument('--json', metavar='JSON', type=argparse.FileType('a+'),
- help='Stores result as JSON file for later processing')
- parser.add_argument('--unit', '-u', choices=['cy/CL', 'cy/It', 'It/s', 'FLOP/s'],
- help='Select the output unit, defaults to model specific if not given.')
- parser.add_argument('--cores', '-c', metavar='CORES', type=int, default=1,
- help='Number of cores to be used in parallel. (default: 1) '
- 'ECM model will consider the scaling of the last level cache and '
- 'predict the overall performance in addition to single-core behavior. '
- 'The benchmark mode will run the code with OpenMP on as many physical '
- 'cores.')
- parser.add_argument('--kernel-description', action='store_true',
- help='Use kernel description instead of analyzing the kernel code.')
- parser.add_argument('--clean-intermediates', action='store_true',
- help='If set, will delete all intermediate files after completion.')
+ description="Analytical performance modelling and benchmarking toolkit.",
+ epilog="For help, examples, documentation and bug reports go to:\nhttps://github.com"
+ "/RRZE-HPC/kerncraft\nLicense: AGPLv3",
+ )
+ parser.add_argument(
+ "--version", action=VersionAction, version="{}".format(__version__)
+ )
+ parser.add_argument(
+ "--machine",
+ "-m",
+ type=argparse.FileType("r"),
+ required=True,
+ help="Path to machine description yaml file.",
+ )
+ parser.add_argument(
+ "--pmodel",
+ "-p",
+ choices=models.__all__,
+ required=True,
+ action="append",
+ default=[],
+ help="Performance model to apply",
+ )
+ parser.add_argument(
+ "-D",
+ "--define",
+ nargs=2,
+ metavar=("KEY", "VALUE"),
+ default=[],
+ action=AppendStringRange,
+ help="Define constant to be used in C code. Values must be integer or "
+ "match start-stop[:num[log[base]]]. If range is given, all "
+ "permutation s will be tested. Overwrites constants from testcase "
+ "file. Key can be . for default value for all used constants.",
+ )
+ parser.add_argument(
+ "--verbose", "-v", action="count", default=0, help="Increases verbosity level."
+ )
+ parser.add_argument(
+ "code_file",
+ metavar="FILE",
+ type=argparse.FileType(),
+ help="File with loop kernel C code",
+ )
+ parser.add_argument(
+ "--asm-block",
+ metavar="BLOCK",
+ default="auto",
+ help='Number of ASM block to mark for IACA, "auto" for automatic '
+ 'selection or "manual" for interactiv selection.',
+ )
+ parser.add_argument(
+ "--pointer-increment",
+ metavar="INCR",
+ default="auto",
+ type=int_or_str,
+ help='Increment of store pointer within one ASM block in bytes. If "auto": '
+ "automatic detection, error on failure to detect, if "
+ '"auto_with_manual_fallback": fallback to manual input, or if '
+ '"manual": always prompt user.',
+ )
+ parser.add_argument(
+ "--store",
+ metavar="PICKLE",
+ type=argparse.FileType("a+b"),
+ help="Addes results to PICKLE file for later processing.",
+ )
+ parser.add_argument(
+ "--json",
+ metavar="JSON",
+ type=argparse.FileType("a+"),
+ help="Stores result as JSON file for later processing",
+ )
+ parser.add_argument(
+ "--unit",
+ "-u",
+ choices=["cy/CL", "cy/It", "It/s", "FLOP/s"],
+ help="Select the output unit, defaults to model specific if not given.",
+ )
+ parser.add_argument(
+ "--cores",
+ "-c",
+ metavar="CORES",
+ type=int,
+ default=1,
+ help="Number of cores to be used in parallel. (default: 1) "
+ "ECM model will consider the scaling of the last level cache and "
+ "predict the overall performance in addition to single-core behavior. "
+ "The benchmark mode will run the code with OpenMP on as many physical "
+ "cores.",
+ )
+ parser.add_argument(
+ "--kernel-description",
+ action="store_true",
+ help="Use kernel description instead of analyzing the kernel code.",
+ )
+ parser.add_argument(
+ "--clean-intermediates",
+ action="store_true",
+ help="If set, will delete all intermediate files after completion.",
+ )
# Needed for ECM, ECMData and RooflineFLOP models:
- parser.add_argument('--cache-predictor', '-P', choices=['LC', 'SIM'], default='SIM',
- help='Change cache predictor to use, options are LC (layer conditions) and '
- 'SIM (cache simulation with pycachesim), default is SIM.')
+ parser.add_argument(
+ "--cache-predictor",
+ "-P",
+ choices=["LC", "SIM"],
+ default="SIM",
+ help="Change cache predictor to use, options are LC (layer conditions) and "
+ "SIM (cache simulation with pycachesim), default is SIM.",
+ )
# Needed for ECM, RooflineASM and Benchmark models:
- parser.add_argument('--compiler', '-C', type=str, default=None,
- help='Compiler to use, default is first in machine description file.')
- parser.add_argument('--compiler-flags', type=str, default=None,
- help='Compiler flags to use. If not set, flags are taken from machine '
- 'description file (-std=c99 is always added).')
+ parser.add_argument(
+ "--compiler",
+ "-C",
+ type=str,
+ default=None,
+ help="Compiler to use, default is first in machine description file.",
+ )
+ parser.add_argument(
+ "--compiler-flags",
+ type=str,
+ default=None,
+ help="Compiler flags to use. If not set, flags are taken from machine "
+ "description file (-std=c99 is always added).",
+ )
# Needed for ECM and RooflineASM models:
- parser.add_argument('--incore-model', '-i', type=str, default="OSACA",
- help='In-core model to use, default is first in machine description file.')
+ parser.add_argument(
+ "--incore-model",
+ "-i",
+ type=str,
+ default="OSACA",
+ help="In-core model to use, default is first in machine description file.",
+ )
for m in models.__all__:
- ag = parser.add_argument_group('arguments for ' + m + ' model', getattr(models, m).name)
+ ag = parser.add_argument_group(
+ "arguments for " + m + " model", getattr(models, m).name
+ )
getattr(models, m).configure_arggroup(ag)
return parser
def check_arguments(args, parser):
"""
Check arguments passed by user that are not checked by argparse itself.
Also register files for closing.
"""
- if args.asm_block not in ['auto', 'manual']:
+ if args.asm_block not in ["auto", "manual"]:
try:
args.asm_block = int(args.asm_block)
except ValueError:
parser.error('--asm-block can only be "auto", "manual" or an integer')
# Set default unit depending on performance model requested
if not args.unit:
- if 'RooflineFLOP' in args.pmodel or 'RooflineASM' in args.pmodel or 'RooflineIACA' in args.pmodel:
- args.unit = 'FLOP/s'
+ if (
+ "RooflineFLOP" in args.pmodel
+ or "RooflineASM" in args.pmodel
+ or "RooflineIACA" in args.pmodel
+ ):
+ args.unit = "FLOP/s"
else:
- args.unit = 'cy/CL'
+ args.unit = "cy/CL"
# Register all opened files for closing at exit.
if args.store:
atexit.register(args.store.close)
if args.json:
|
kerncraft/iaca_get.py#L28
Line too long (116 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L243
if args.machine:
atexit.register(args.machine.close)
def to_tuple(x):
- '''Transform nested lists (and tuple) in purely nested tuples.'''
+ """Transform nested lists (and tuple) in purely nested tuples."""
if isinstance(x, (list, tuple)):
if len(x) >= 2:
return tuple(to_tuple(x[:1]) + to_tuple(x[1:]))
elif len(x) == 1:
return (to_tuple(x[0]),)
|
kerncraft/iaca_get.py#L29
Line too long (118 > 100 characters) (E501)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L261
identifier = []
for k in sorted(args.__dict__):
if k in kwargs:
identifier.append((k, kwargs[k]))
continue
- if k in ['verbose', 'store', 'unit', 'clean_intermediates']:
+ if k in ["verbose", "store", "unit", "clean_intermediates"]:
# Ignore these, as they do not change the outcome
continue
v = args.__dict__[k]
if isinstance(v, list):
v = to_tuple(v)
|
kerncraft/iaca_get.py#L33
Expected 2 blank lines, found 1 (E302)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L274
if isinstance(v, io.IOBase):
v = v.name
identifier.append((k, v))
return tuple(identifier)
+
def jsonify_obj(obj):
- #print("jsonify {}".format(str(obj) if len(str(obj)) < 15 else str(obj)[:12] + "..."))
+ # print("jsonify {}".format(str(obj) if len(str(obj)) < 15 else str(obj)[:12] + "..."))
# if obj is str, int, or float, keep it this way
if isinstance(obj, str) or isinstance(obj, int) or isinstance(obj, float):
return obj
# if obj is list, use recursion
elif isinstance(obj, list) or isinstance(obj, tuple):
|
kerncraft/incore_model.py#L8
'copy.copy' imported but unused (F401)
|
kerncraft/incore_model.py#L10
'pprint.pformat' imported but unused (F401)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L290
return tuple(new_list)
return new_list
# if obj is dict, use recursion
elif isinstance(obj, dict):
new_dict = {}
- for k,v in obj.items():
+ for k, v in obj.items():
# key must be one element
- k = str(k) if not (isinstance(k, str) or isinstance(k, int) or isinstance(k, float)) else k
+ k = (
+ str(k)
+ if not (
+ isinstance(k, str) or isinstance(k, int) or isinstance(k, float)
+ )
+ else k
+ )
new_dict[k] = jsonify_obj(v)
return new_dict
else:
return str(obj)
|
kerncraft/incore_model.py#L10
'pprint.pprint' imported but unused (F401)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L321
# process kernel
if not args.kernel_description:
code = str(args.code_file.read())
args.code_file.close()
code = clean_code(code)
- kernel = KernelCode(code, filename=args.code_file.name, machine=machine,
- keep_intermediates=not args.clean_intermediates)
+ kernel = KernelCode(
+ code,
+ filename=args.code_file.name,
+ machine=machine,
+ keep_intermediates=not args.clean_intermediates,
+ )
else:
description = str(args.code_file.read())
args.code_file.close()
- yaml = ruamel.yaml.YAML(typ='unsafe')
+ yaml = ruamel.yaml.YAML(typ="unsafe")
yaml.register_class(PrefixedUnit)
kernel = KernelDescription(yaml.load(description), machine=machine)
- loop_indices = set([symbol_pos_int(l['index']) for l in kernel.get_loop_stack()])
+ loop_indices = set([symbol_pos_int(l["index"]) for l in kernel.get_loop_stack()])
# define constants
required_consts = [v[1] for v in kernel.variables.values() if v[1] is not None]
- required_consts += [[l['start'], l['stop']] for l in kernel.get_loop_stack()]
+ required_consts += [[l["start"], l["stop"]] for l in kernel.get_loop_stack()]
required_consts += [i for a in kernel.sources.values() for i in a if i is not None]
- required_consts += [i for a in kernel.destinations.values() for i in a if i is not None]
+ required_consts += [
+ i for a in kernel.destinations.values() for i in a if i is not None
+ ]
# split into individual consts
required_consts = [i for l in required_consts for i in l]
required_consts = set([i for l in required_consts for i in l.free_symbols])
# remove loop indices
required_consts -= loop_indices
|
kerncraft/incore_model.py#L12
'textwrap' imported but unused (F401)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L347
if len(required_consts) > 0:
# build defines permutations
define_dict = OrderedDict()
args.define.sort()
# Prefill with default value, if any is given
- if '.' in [n for n,v in args.define]:
- default_const_values = dict(args.define)['.']
+ if "." in [n for n, v in args.define]:
+ default_const_values = dict(args.define)["."]
for name in required_consts:
name = str(name)
define_dict[str(name)] = [[str(name), v] for v in default_const_values]
for name, values in args.define:
if name not in [str(n) for n in required_consts]:
|
kerncraft/incore_model.py#L14
'io' imported but unused (F401)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L364
for v in values:
if v not in define_dict[name]:
define_dict[name].append([name, v])
define_product = list(itertools.product(*list(define_dict.values())))
# Check that all consts have been defined
- if set(required_consts).difference(set([symbol_pos_int(k) for k in define_dict.keys()])):
- raise ValueError("Not all constants have been defined. Required are: {}".format(
- required_consts))
+ if set(required_consts).difference(
+ set([symbol_pos_int(k) for k in define_dict.keys()])
+ ):
+ raise ValueError(
+ "Not all constants have been defined. Required are: {}".format(
+ required_consts
+ )
+ )
else:
define_product = [{}]
for define in define_product:
# Reset state of kernel
|
kerncraft/incore_model.py#L22
'osaca.semantics.MachineModel' imported but unused (F401)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L380
for k, v in define:
kernel.set_constant(k, v)
for model_name in uniquify(args.pmodel):
# print header
- print('{:^80}'.format(' kerncraft '), file=output_file)
- print('{:<40}{:>40}'.format(args.code_file.name, '-m ' + args.machine.name),
- file=output_file)
- print(' '.join(['-D {} {}'.format(k, v) for k, v in define]), file=output_file)
- print('{:-^80}'.format(' ' + model_name + ' '), file=output_file)
+ print("{:^80}".format(" kerncraft "), file=output_file)
+ print(
+ "{:<40}{:>40}".format(args.code_file.name, "-m " + args.machine.name),
+ file=output_file,
+ )
+ print(
+ " ".join(["-D {} {}".format(k, v) for k, v in define]), file=output_file
+ )
+ print("{:-^80}".format(" " + model_name + " "), file=output_file)
if args.verbose > 1:
if not args.kernel_description:
kernel.print_kernel_code(output_file=output_file)
- print('', file=output_file)
+ print("", file=output_file)
kernel.print_variables_info(output_file=output_file)
kernel.print_kernel_info(output_file=output_file)
if args.verbose > 0:
kernel.print_constants_info(output_file=output_file)
|
kerncraft/incore_model.py#L283
Line too long (121 > 100 characters) (E501)
|
kerncraft/incore_model.py#L309
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L402
model.analyze()
model.report(output_file=output_file)
# Add results to storage
result_identifier = identifier_from_arguments(
- args, define=to_tuple(define), pmodel=model_name)
+ args, define=to_tuple(define), pmodel=model_name
+ )
result_storage[result_identifier] = model.results
- print('', file=output_file)
+ print("", file=output_file)
# Save storage to file (if requested)
if args.store:
- temp_name = args.store.name + '.tmp'
- with open(temp_name, 'wb+') as f:
+ temp_name = args.store.name + ".tmp"
+ with open(temp_name, "wb+") as f:
pickle.dump(result_storage, f)
shutil.move(temp_name, args.store.name)
if args.json:
- temp_name = args.json.name + '.tmp'
+ temp_name = args.json.name + ".tmp"
json_dict = jsonify_obj(result_storage)
- with open(temp_name, 'w+') as f:
+ with open(temp_name, "w+") as f:
json.dump(json_dict, f, indent=4)
shutil.move(temp_name, args.json.name)
return result_storage
|
kerncraft/incore_model.py#L313
Line too long (104 > 100 characters) (E501)
|
kerncraft/incore_model.py#L316
Ambiguous variable name 'l' (E741)
|
kerncraft/incore_model.py#L482
Local variable 'increment' is assigned to but never used (F841)
|
kerncraft/incore_model.py#L566
Do not use bare 'except' (E722)
|
kerncraft/incore_model.py#L689
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L437
# BUSINESS LOGIC IS FOLLOWING
run(parser, args)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
|
kerncraft/incore_model.py#L775
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L22
def uneven_tuple_cmp(a, b):
length_diff = max(len(a), len(b)) - min(len(a), len(b))
if len(a) < len(b):
- a = (0,)*length_diff + a
+ a = (0,) * length_diff + a
elif len(b) < len(a):
- b = (0,)*length_diff + b
+ b = (0,) * length_diff + b
if a > b:
return 1
elif a < b:
return -1
else:
|
kerncraft/incore_model.py#L816
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L62
e = e.expand()
key = []
# split into terms
terms, gens = e.as_terms()
- assert gens == [first_s] or first_s is None and gens == [], \
- "Expression was split into unusable terms: {}, expected.".format(gens, first_s)
+ assert (
+ gens == [first_s] or first_s is None and gens == []
+ ), "Expression was split into unusable terms: {}, expected.".format(gens, first_s)
# extract exponent and coefficient
for term, (coeff, cpart, ncpart) in terms:
coeff_real, coeff_imag = coeff
assert coeff_imag == 0, "Not supporting imaginary coefficients."
# Sort order: exponent (cpart), factor
|
kerncraft/incore_model.py#L819
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L75
key[0] = (key[0][0], key[0][1])
# build key
key.sort(reverse=True)
# add missing exponent, coefficient tuples
i = 0
- for exponent in reversed(range(key[0][0]+1)):
+ for exponent in reversed(range(key[0][0] + 1)):
if len(key) > i and key[i][0] == exponent:
i += 1
continue
else:
key[i:i] = [(exponent, 0.0)]
|
kerncraft/incore_model.py#L893
Ambiguous variable name 'l' (E741)
|
kerncraft/incore_model.py#L894
Ambiguous variable name 'l' (E741)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L110
return 0
if isinstance(dimension_factor, sympy.Symbol):
return 1
# Replace all free symbols with one:
if not dimension_factor.free_symbols:
- raise ValueError("dimension_factor is neither a number, a symbol nor an expression based "
- "on symbols.")
+ raise ValueError(
+ "dimension_factor is neither a number, a symbol nor an expression based "
+ "on symbols."
+ )
free_symbols = list(dimension_factor.free_symbols)
for s in free_symbols[1:]:
dimension_factor = dimension_factor.subs(s, free_symbols[0])
if isinstance(dimension_factor, sympy.Pow):
return dimension_factor.as_base_exp()[1]
|
kerncraft/incore_model.py#L951
Local variable 'pointer_increment' is assigned to but never used (F841)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L172
if c:
coefficients[factor_idx] += c
break
# Test: reassemble original expression
- if expr != reduce(operator.add, [c*f for c, f in zip(coefficients, dimension_factors)], 0):
- raise ValueError("Unable to split expression and reproduce from coefficients and factors: "
- "{!r} with {!r}".format(terms, dimension_factors))
+ if expr != reduce(
+ operator.add, [c * f for c, f in zip(coefficients, dimension_factors)], 0
+ ):
+ raise ValueError(
+ "Unable to split expression and reproduce from coefficients and factors: "
+ "{!r} with {!r}".format(terms, dimension_factors)
+ )
return tuple(coefficients)
def canonical_relational(rel):
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L260
def __init__(self, kernel, machine, cores=1, symbolic=False):
"""Initialize layer condition based predictor from kernel and machine object."""
CachePredictor.__init__(self, kernel, machine, cores=cores)
if isinstance(kernel, KernelCode):
# Make use of caching for symbolic LC representation:
- file_name = 'LC_analysis.pickle.lzma'
+ file_name = "LC_analysis.pickle.lzma"
file_path = kernel.get_intermediate_location(
- file_name, machine_and_compiler_dependent=False, other_dependencies=[str(cores)])
+ file_name,
+ machine_and_compiler_dependent=False,
+ other_dependencies=[str(cores)],
+ )
lock_mode, lock_fp = kernel.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
self.results = compress_pickle.load(file_path)
lock_fp.close() # release lock
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L282
if not symbolic:
self.desymbolize()
def desymbolize(self):
"""Evaluate LCs and remove symbols"""
- for i, options in enumerate(self.results['cache']):
+ for i, options in enumerate(self.results["cache"]):
for o in options:
- if self.kernel.subs_consts(o['condition']):
- self.results['cache'][i] = o
+ if self.kernel.subs_consts(o["condition"]):
+ self.results["cache"][i] = o
break
def build_symbolic_LCs(self):
# check that layer conditions can be applied on this kernel:
# 1. All iterations may only have a step width of 1
loop_stack = list(self.kernel.get_loop_stack())
- if any([l['increment'] != 1 for l in loop_stack]):
- raise ValueError("Can not apply layer condition, since not all loops are of step "
- "length 1.")
+ if any([l["increment"] != 1 for l in loop_stack]):
+ raise ValueError(
+ "Can not apply layer condition, since not all loops are of step "
+ "length 1."
+ )
# 2. The order of iterations must be reflected in the order of indices in all array
# references containing the inner loop index. If the inner loop index is not part of the
# reference, the reference is simply ignored
- index_order = [symbol_pos_int(l['index']) for l in loop_stack]
- for var_name, arefs in chain(self.kernel.sources.items(), self.kernel.destinations.items()):
+ index_order = [symbol_pos_int(l["index"]) for l in loop_stack]
+ for var_name, arefs in chain(
+ self.kernel.sources.items(), self.kernel.destinations.items()
+ ):
try:
if next(iter(arefs)) is None:
# Anything that is a scalar may be ignored
continue
except StopIteration:
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L319
# Terms without any indices can be treat as constant offsets and are acceptable
if not idx:
continue
if len(idx) != 1:
- raise ValueError("Only one loop counter may appear per term. "
- "Problematic term: {}.".format(t))
+ raise ValueError(
+ "Only one loop counter may appear per term. "
+ "Problematic term: {}.".format(t)
+ )
else: # len(idx) == 1
idx = idx.pop()
# Check that number of multiplication match access order of iterator
- pow_dict = {k: v for k, v in t.as_powers_dict().items()
- if k != idx}
+ pow_dict = {
+ k: v for k, v in t.as_powers_dict().items() if k != idx
+ }
stride_dim = sum(pow_dict.values())
error = False
try:
- if loop_stack[-stride_dim-1]['index'] != idx.name:
+ if loop_stack[-stride_dim - 1]["index"] != idx.name:
error = True
except IndexError:
error = True
if error:
- raise ValueError("Number of multiplications in index term does not "
- "match loop counter order. "
- "Problematic term: {}.".format(t))
+ raise ValueError(
+ "Number of multiplications in index term does not "
+ "match loop counter order. "
+ "Problematic term: {}.".format(t)
+ )
# 3. Indices may only increase with one
- inner_index = symbol_pos_int(loop_stack[-1]['index'])
- inner_increment = loop_stack[-1]['increment']
- for aref in chain(chain(*self.kernel.sources.values()),
- chain(*self.kernel.destinations.values())):
+ inner_index = symbol_pos_int(loop_stack[-1]["index"])
+ inner_increment = loop_stack[-1]["increment"]
+ for aref in chain(
+ chain(*self.kernel.sources.values()),
+ chain(*self.kernel.destinations.values()),
+ ):
if aref is None:
continue
for expr in aref:
- diff = expr.subs(inner_index, 1+inner_increment) - expr.subs(inner_index, 1)
+ diff = expr.subs(inner_index, 1 + inner_increment) - expr.subs(
+ inner_index, 1
+ )
if diff != 0 and diff != 1:
# TODO support -1 aswell
- raise ValueError("Can not apply layer condition, array references may not "
- "increment more then one per iteration.")
+ raise ValueError(
+ "Can not apply layer condition, array references may not "
+ "increment more then one per iteration."
+ )
# FIXME handle multiple datatypes
element_size = self.kernel.datatypes_size[self.kernel.datatype]
indices = list([symbol_pos_int(l[0]) for l in self.kernel._loop_stack])
sympy_accesses = self.kernel.compile_sympy_accesses()
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L369
array_dimensions = self.kernel.variables[var_name][1]
# Skipping scalars
if array_dimensions is None:
continue
for dimension in range(len(array_dimensions)):
- dimension_factors.append(reduce(operator.mul, array_dimensions[dimension+1:],
- sympy.Integer(1)))
+ dimension_factors.append(
+ reduce(
+ operator.mul,
+ array_dimensions[dimension + 1 :],
+ sympy.Integer(1),
+ )
+ )
for a in sympy_accesses[var_name]:
o = split_sympy_access_in_dim_offset(a, dimension_factors)
accesses[var_name].append(o)
# Skip non-variable offsets, where acs is [None, None, None] (or similar) or only made
# up from constant offsets
if not any(accesses[var_name]) or not any(
- [a == inner_index or a.coeff(inner_index) != 0
- for a in chain.from_iterable(accesses[var_name])]):
+ [
+ a == inner_index or a.coeff(inner_index) != 0
+ for a in chain.from_iterable(accesses[var_name])
+ ]
+ ):
continue
destinations.update(
- [(var_name, tuple(r)) for r in self.kernel.destinations.get(var_name, [])])
+ [
+ (var_name, tuple(r))
+ for r in self.kernel.destinations.get(var_name, [])
+ ]
+ )
acs = list(accesses[var_name])
# If accesses are of unequal length, pad with leading zero elements
max_dims = max(map(len, acs))
for i in range(len(acs)):
if len(acs[i]) < max_dims:
- acs[i] = (sympy.Integer(0),)*(max_dims-len(acs[i])) + acs[i]
+ acs[i] = (sympy.Integer(0),) * (max_dims - len(acs[i])) + acs[i]
# Sort accesses by decreasing order
acs.sort(reverse=True)
# Transform back into sympy expressions
for i in range(len(acs)):
- acs[i] = reduce(sympy.Add, [f*df for f, df in zip(acs[i], dimension_factors)])
+ acs[i] = reduce(
+ sympy.Add, [f * df for f, df in zip(acs[i], dimension_factors)]
+ )
# Create reuse distances by substracting accesses pairwise in decreasing order
- distances += [(acs[i-1]-acs[i]).simplify() for i in range(1, len(acs))]
+ distances += [(acs[i - 1] - acs[i]).simplify() for i in range(1, len(acs))]
# Add infinity for each array
distances.append(sympy.oo)
# Sort distances by decreasing order
distances.sort(reverse=True, key=sympy_expr_abs_distance_key)
# Create copy of distances in bytes:
- distances_bytes = [d*element_size for d in distances]
+ distances_bytes = [d * element_size for d in distances]
# CAREFUL! From here on we are working in byte offsets and not in indices anymore.
# converting access sets to lists, otherwise pprint will fail during obligatory sorting step
- results = {'accesses': {k: sorted(list(v), key=cmp_to_key(uneven_tuple_cmp))
- for k,v in accesses.items()},
- 'distances': distances,
- 'destinations': destinations,
- 'distances_bytes': distances_bytes,
- 'cache': []}
-
- sum_array_sizes = sum(self.kernel.array_sizes(in_bytes=True, subs_consts=False).values())
+ results = {
+ "accesses": {
+ k: sorted(list(v), key=cmp_to_key(uneven_tuple_cmp))
+ for k, v in accesses.items()
+ },
+ "distances": distances,
+ "destinations": destinations,
+ "distances_bytes": distances_bytes,
+ "cache": [],
+ }
+
+ sum_array_sizes = sum(
+ self.kernel.array_sizes(in_bytes=True, subs_consts=False).values()
+ )
for c in self.machine.get_cachesim(self.cores).levels(with_mem=False):
# Assuming increasing order of cache sizes
options = []
# Full caching
- options.append({
- 'condition': canonical_relational(c.size() > sum_array_sizes),
- 'hits': len(distances),
- 'misses': 0,
- 'evicts': 0,
- 'tail': sympy.oo,
- })
-
- for tail in sorted(set([d.simplify().expand() for d in distances_bytes]), reverse=True,
- key=sympy_expr_abs_distance_key):
+ options.append(
+ {
+ "condition": canonical_relational(c.size() > sum_array_sizes),
+ "hits": len(distances),
+ "misses": 0,
+ "evicts": 0,
+ "tail": sympy.oo,
+ }
+ )
+
+ for tail in sorted(
+ set([d.simplify().expand() for d in distances_bytes]),
+ reverse=True,
+ key=sympy_expr_abs_distance_key,
+ ):
# Assuming decreasing order of tails
# Ignoring infinity tail:
if tail is sympy.oo:
continue
cache_requirement = (
# Sum of inter-access caches
- sum([d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) <= sympy_expr_abs_distance_key(tail)]
- ) +
+ sum(
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ <= sympy_expr_abs_distance_key(tail)
+ ]
+ )
+ +
# Tails
- tail*len([d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) >
- sympy_expr_abs_distance_key(tail)]))
+ tail
+ * len(
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ > sympy_expr_abs_distance_key(tail)
+ ]
+ )
+ )
condition = canonical_relational(cache_requirement <= c.size())
hits = len(
- [d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) <= sympy_expr_abs_distance_key(tail)])
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ <= sympy_expr_abs_distance_key(tail)
+ ]
+ )
misses = len(
- [d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) > sympy_expr_abs_distance_key(tail)])
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ > sympy_expr_abs_distance_key(tail)
+ ]
+ )
# Resulting analysis
- options.append({
- 'condition': condition,
- 'hits': hits,
- 'misses': misses,
- 'evicts': len(destinations),
- 'tail': tail})
+ options.append(
+ {
+ "condition": condition,
+ "hits": hits,
+ "misses": misses,
+ "evicts": len(destinations),
+ "tail": tail,
+ }
+ )
# If we encountered a True condition, break to not include multiple such.
if isinstance(condition, BooleanTrue):
break
- if not isinstance(options[-1]['condition'], BooleanTrue):
+ if not isinstance(options[-1]["condition"], BooleanTrue):
# Fallback: no condition matched
- options.append({
- 'condition': True,
- 'hits': 0,
- 'misses': len(distances),
- 'evicts': len(destinations),
- 'tail': 0
- })
-
- results['cache'].append(options)
+ options.append(
+ {
+ "condition": True,
+ "hits": 0,
+ "misses": len(distances),
+ "evicts": len(destinations),
+ "tail": 0,
+ }
+ )
+
+ results["cache"].append(options)
self.results = results
def get_loads(self):
"""Return a list with number of loaded cache lines per memory hierarchy level."""
# TODO FIXME L1 loads need to be derived from accesses
- return [float('nan')]+[c['misses'] for c in self.results['cache']]
+ return [float("nan")] + [c["misses"] for c in self.results["cache"]]
def get_hits(self):
"""Return a list with number of hit cache lines per memory hierarchy level."""
# At last level, all previous misses are hits
- return [c['hits'] for c in self.results['cache']]+[self.results['cache'][-1]['misses']]
+ return [c["hits"] for c in self.results["cache"]] + [
+ self.results["cache"][-1]["misses"]
+ ]
def get_misses(self):
"""Return a list with number of missed cache lines per memory hierarchy level."""
# At last level, there are no misses
- return [c['misses'] for c in self.results['cache']]+[0]
+ return [c["misses"] for c in self.results["cache"]] + [0]
def get_stores(self):
"""Return a list with number of stored cache lines per memory hierarchy level."""
# TODO FIXME L1 stores need to be derived from accesses
- return [float('nan')]+[c['evicts'] for c in self.results['cache']]
+ return [float("nan")] + [c["evicts"] for c in self.results["cache"]]
def get_evicts(self):
"""Return a list with number of evicted cache lines per memory hierarchy level."""
# At last level, there are no evicts
- return [c['evicts'] for c in self.results['cache']]+[0]
+ return [c["evicts"] for c in self.results["cache"]] + [0]
def get_infos(self):
"""Return verbose information about the predictor."""
return self.results
class CacheSimulationPredictor(CachePredictor):
"""Predictor class based on layer condition analysis."""
+
def __init__(self, kernel, machine, cores=1):
"""Initialize cache simulation based predictor from kernel and machine object."""
CachePredictor.__init__(self, kernel, machine, cores)
if isinstance(kernel, KernelCode):
# Make use of caching for symbolic LC representation:
- file_name = 'CSIM_analysis.pickle.lzma'
+ file_name = "CSIM_analysis.pickle.lzma"
file_path = kernel.get_intermediate_location(
- file_name, machine_and_compiler_dependent=False,
- other_dependencies=[str(cores)]+[str(t) for t in self.kernel.constants.items()])
+ file_name,
+ machine_and_compiler_dependent=False,
+ other_dependencies=[str(cores)]
+ + [str(t) for t in self.kernel.constants.items()],
+ )
lock_mode, lock_fp = kernel.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
cache = compress_pickle.load(file_path)
lock_fp.close() # release lock
- self.first_dim_factor = cache['first_dim_factor']
- self.stats = cache['stats']
- self.pretty_stats = cache['pretty_stats']
+ self.first_dim_factor = cache["first_dim_factor"]
+ self.stats = cache["stats"]
+ self.pretty_stats = cache["pretty_stats"]
else: # lock_mode == fcntl.LOCK_EX
# needs update
self.simulate()
compress_pickle.dump(
- {'first_dim_factor': self.first_dim_factor, 'stats': self.stats, 'pretty_stats': self.pretty_stats},
- file_path)
+ {
+ "first_dim_factor": self.first_dim_factor,
+ "stats": self.stats,
+ "pretty_stats": self.pretty_stats,
+ },
+ file_path,
+ )
lock_fp.close() # release lock
else:
# No caching support without filename for kernel code
self.simulate()
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L542
# Get the machine's cache model and simulator
self.csim = self.machine.get_cachesim(self.cores)
# FIXME handle multiple datatypes
element_size = self.kernel.datatypes_size[self.kernel.datatype]
- cacheline_size = self.machine['cacheline size']
+ cacheline_size = self.machine["cacheline size"]
elements_per_cacheline = int(cacheline_size // element_size)
- iterations_per_cacheline = (sympy.Integer(self.machine['cacheline size']) /
- sympy.Integer(self.kernel.bytes_per_iteration))
+ iterations_per_cacheline = sympy.Integer(
+ self.machine["cacheline size"]
+ ) / sympy.Integer(self.kernel.bytes_per_iteration)
# Gathering some loop information:
inner_loop = list(self.kernel.get_loop_stack(subs_consts=True))[-1]
- inner_index = symbol_pos_int(inner_loop['index'])
- inner_increment = inner_loop['increment'] # Calculate the number of iterations for warm-up
+ inner_index = symbol_pos_int(inner_loop["index"])
+ inner_increment = inner_loop[
+ "increment"
+ ] # Calculate the number of iterations for warm-up
total_length = self.kernel.iteration_length()
max_iterations = self.kernel.subs_consts(total_length)
max_cache_size = sum([c.size() for c in self.csim.levels(with_mem=False)])
-
# Warmup
# Phase 1:
# define warmup interval boundaries
max_steps = 100
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L574
warmup_iteration = max_iterations
complete_sweep = True
# print("warmup_iteration1", warmup_iteration)
offsets = self.kernel.compile_global_offsets(
- iteration=range(prev_warmup_iteration, warmup_iteration))
+ iteration=range(prev_warmup_iteration, warmup_iteration)
+ )
self.csim.loadstore(offsets, length=element_size)
invalid_entries = self.csim.count_invalid_entries()
# TODO more intelligent break criteria based on change of invalid entries might be
# useful for early termination.
# print("invalid_entries", invalid_entries)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L597
complete_sweep = True
prev_warmup_iteration = warmup_iteration
warmup_iteration = max_iterations
# print("warmup_iteration2", warmup_iteration, end="; ")
offsets = self.kernel.compile_global_offsets(
- iteration=range(prev_warmup_iteration, warmup_iteration))
+ iteration=range(prev_warmup_iteration, warmup_iteration)
+ )
self.csim.loadstore(offsets, length=element_size)
warmup_iteration = 0
if not complete_sweep and invalid_entries > 0:
- print("Warning: Unable to perform complete sweep nor initialize cache completely. "
- "This might introduce inaccuracies (additional cache misses) in the cache "
- "prediction.")
+ print(
+ "Warning: Unable to perform complete sweep nor initialize cache completely. "
+ "This might introduce inaccuracies (additional cache misses) in the cache "
+ "prediction."
+ )
# Phase 3:
# Iterate to safe handover point
prev_warmup_iteration = warmup_iteration
- warmup_iteration = self._align_iteration_with_cl_boundary(warmup_iteration, subtract=False)
+ warmup_iteration = self._align_iteration_with_cl_boundary(
+ warmup_iteration, subtract=False
+ )
if warmup_iteration != prev_warmup_iteration:
# print("warmup_iteration3", warmup_iteration)
offsets = self.kernel.compile_global_offsets(
- iteration=range(prev_warmup_iteration, warmup_iteration))
+ iteration=range(prev_warmup_iteration, warmup_iteration)
+ )
self.csim.loadstore(offsets, length=element_size)
# Reset stats to conclude warm-up phase
self.csim.reset_stats()
# Benchmark
- bench_iteration = self._align_iteration_with_cl_boundary(min(
- warmup_iteration + 100000, max_iterations - 1))
+ bench_iteration = self._align_iteration_with_cl_boundary(
+ min(warmup_iteration + 100000, max_iterations - 1)
+ )
# print("bench_iteration", bench_iteration)
- first_dim_factor = float((bench_iteration - warmup_iteration) / iterations_per_cacheline)
+ first_dim_factor = float(
+ (bench_iteration - warmup_iteration) / iterations_per_cacheline
+ )
# If end point is less than 100 cacheline away, warn user of inaccuracy
if not complete_sweep and first_dim_factor < 1000:
- print("Warning: benchmark iterations are very low ({} CL). This may lead to inaccurate "
- "cache predictions.".format(first_dim_factor))
+ print(
+ "Warning: benchmark iterations are very low ({} CL). This may lead to inaccurate "
+ "cache predictions.".format(first_dim_factor)
+ )
# Compile access needed for one cache-line
offsets = self.kernel.compile_global_offsets(
- iteration=range(warmup_iteration, bench_iteration))
+ iteration=range(warmup_iteration, bench_iteration)
+ )
# Run cache simulation
self.csim.loadstore(offsets, length=element_size)
# FIXME compile_global_offsets should already expand to element_size
# use stats to build results
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L644
self.csim.print_stats(file=sio)
pretty_stats = sio.getvalue()
sio.close()
self.pretty_stats = pretty_stats
-
def _align_iteration_with_cl_boundary(self, iteration, subtract=True):
"""Align iteration with cacheline boundary."""
# FIXME handle multiple datatypes
element_size = self.kernel.datatypes_size[self.kernel.datatype]
- cacheline_size = self.machine['cacheline size']
+ cacheline_size = self.machine["cacheline size"]
elements_per_cacheline = int(cacheline_size // element_size)
# Gathering some loop information:
inner_loop = list(self.kernel.get_loop_stack(subs_consts=True))[-1]
- inner_increment = inner_loop['increment']
+ inner_increment = inner_loop["increment"]
# do this by aligning either writes (preferred) or reads
# Assumption: writes (and reads) increase linearly
o = self.kernel.compile_global_offsets(iteration=iteration)[0]
if len(o[1]):
|
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L666
first_offset = min(o[1])
else:
# we use reads
first_offset = min(o[0])
- diff = first_offset - \
- (int(first_offset) >> self.csim.first_level.cl_bits << self.csim.first_level.cl_bits)
+ diff = first_offset - (
+ int(first_offset)
+ >> self.csim.first_level.cl_bits
+ << self.csim.first_level.cl_bits
+ )
if diff == 0:
return int(iteration)
elif subtract:
return int(iteration - (diff // element_size) // inner_increment)
else:
- return int(iteration + (elements_per_cacheline - diff // element_size)
- // inner_increment)
+ return int(
+ iteration
+ + (elements_per_cacheline - diff // element_size) // inner_increment
+ )
def get_loads(self):
"""Return a list with number of loaded cache lines per memory hierarchy level."""
- return [self.stats[cache_level]['LOAD_count'] / self.first_dim_factor
- for cache_level in range(len(self.machine['memory hierarchy']))]
+ return [
+ self.stats[cache_level]["LOAD_count"] / self.first_dim_factor
+ for cache_level in range(len(self.machine["memory hierarchy"]))
+ ]
def get_hits(self):
"""Return a list with number of hit cache lines per memory hierarchy level."""
- return [self.stats[cache_level]['HIT_count']/self.first_dim_factor
- for cache_level in range(len(self.machine['memory hierarchy']))]
+ return [
+ self.stats[cache_level]["HIT_count"] / self.first_dim_factor
+ for cache_level in range(len(self.machine["memory hierarchy"]))
+ ]
def get_misses(self):
"""Return a list with number of missed cache lines per memory hierarchy level."""
- return [self.stats[cache_level]['MISS_count']/self.first_dim_factor
- for cache_level in range(len(self.machine['memory hierarchy']))]
-
+ return [
+ self.stats[cache_level]["MISS_count"] / self.first_dim_factor
+ for cache_level in range(len(self.machine["memory hierarchy"]))
+ ]
+
def get_stores(self):
"""Return a list with number of stored cache lines per memory hierarchy level."""
- return [self.stats[cache_level]['STORE_count']/self.first_dim_factor
- for cache_level in range(len(self.machine['memory hierarchy']))]
+ return [
+ self.stats[cache_level]["STORE_count"] / self.first_dim_factor
+ for cache_level in range(len(self.machine["memory hierarchy"]))
+ ]
def get_evicts(self):
"""Return a list with number of evicted cache lines per memory hierarchy level."""
- return [self.stats[cache_level]['EVICT_count']/self.first_dim_factor
- for cache_level in range(len(self.machine['memory hierarchy']))]
+ return [
+ self.stats[cache_level]["EVICT_count"] / self.first_dim_factor
+ for cache_level in range(len(self.machine["memory hierarchy"]))
+ ]
def get_infos(self):
"""Return verbose information about the predictor."""
first_dim_factor = self.first_dim_factor
- infos = {'memory hierarchy': [], 'cache stats': self.stats,
- 'cachelines in stats': first_dim_factor,
- 'cache pretty output': self.pretty_stats}
- for cache_level, cache_info in list(enumerate(self.machine['memory hierarchy'])):
- infos['memory hierarchy'].append({
- 'index': len(infos['memory hierarchy']),
- 'level': '{}'.format(cache_info['level']),
- 'total loads': self.stats[cache_level]['LOAD_byte']/first_dim_factor,
- 'total misses': self.stats[cache_level]['MISS_byte']/first_dim_factor,
- 'total hits': self.stats[cache_level]['HIT_byte']/first_dim_factor,
- 'total stores': self.stats[cache_level]['STORE_byte']/first_dim_factor,
- 'total evicts': self.stats[cache_level]['EVICT_byte']/first_dim_factor,
- 'total lines load': self.stats[cache_level]['LOAD_count']/first_dim_factor,
- 'total lines misses': self.stats[cache_level]['MISS_count']/first_dim_factor,
- 'total lines hits': self.stats[cache_level]['HIT_count']/first_dim_factor,
- 'total lines stores': self.stats[cache_level]['STORE_count']/first_dim_factor,
- 'total lines evicts': self.stats[cache_level]['EVICT_count']/first_dim_factor,
- 'cycles': None,
- })
+ infos = {
+ "memory hierarchy": [],
+ "cache stats": self.stats,
+ "cachelines in stats": first_dim_factor,
+ "cache pretty output": self.pretty_stats,
+ }
+ for cache_level, cache_info in list(
+ enumerate(self.machine["memory hierarchy"])
+ ):
+ infos["memory hierarchy"].append(
+ {
+ "index": len(infos["memory hierarchy"]),
+ "level": "{}".format(cache_info["level"]),
+ "total loads": self.stats[cache_level]["LOAD_byte"]
+ / first_dim_factor,
+ "total misses": self.stats[cache_level]["MISS_byte"]
+ / first_dim_factor,
+ "total hits": self.stats[cache_level]["HIT_byte"]
+ / first_dim_factor,
+ "total stores": self.stats[cache_level]["STORE_byte"]
+ / first_dim_factor,
+ "total evicts": self.stats[cache_level]["EVICT_byte"]
+ / first_dim_factor,
+ "total lines load": self.stats[cache_level]["LOAD_count"]
+ / first_dim_factor,
+ "total lines misses": self.stats[cache_level]["MISS_count"]
+ / first_dim_factor,
+ "total lines hits": self.stats[cache_level]["HIT_count"]
+ / first_dim_factor,
+ "total lines stores": self.stats[cache_level]["STORE_count"]
+ / first_dim_factor,
+ "total lines evicts": self.stats[cache_level]["EVICT_count"]
+ / first_dim_factor,
+ "cycles": None,
+ }
+ )
return infos
|
/home/runner/work/kerncraft/kerncraft/kerncraft/models/__init__.py#L2
Collection of performance models.
This model combines all performance models currently supported by kerncraft. Only the performace
model class is exported, so please add new models to __all__.
"""
+
from .ecm import ECM, ECMData, ECMCPU
from .roofline import RooflineFLOP, RooflineASM
from .benchmark import Benchmark
from .layer_condition import LC
from .base import PerformanceModel
RooflineIACA = RooflineASM # for downward compatability
-__all__ = ['ECM', 'ECMData', 'ECMCPU', 'RooflineFLOP', 'RooflineASM', 'Benchmark', 'LC',
- 'PerformanceModel', 'RooflineIACA']
+__all__ = [
+ "ECM",
+ "ECMData",
+ "ECMCPU",
+ "RooflineFLOP",
+ "RooflineASM",
+ "Benchmark",
+ "LC",
+ "PerformanceModel",
+ "RooflineIACA",
+]
|
/home/runner/work/kerncraft/kerncraft/kerncraft/models/base.py#L1
#!/usr/bin/env python3
"""Performance model base class."""
+
class PerformanceModel:
"""Base class for performance models"""
+
# The name of the performance model (no abreviatation)
name = "performance-model name"
@classmethod
def configure_arggroup(cls, parser):
|
/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L112
if op.name.startswith(prefix):
register_class_usage[prefix].append(op.name)
# Identify and count packed and avx instructions
if re.match(
- r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]", line.mnemonic
+ r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]",
+ line.mnemonic,
):
if line.mnemonic.startswith("v"):
avx_instruction_ctr += 1
packed_instruction_ctr += 1
|
/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L145
# Extract destination references, ignoring var(%rip)
dst_mem_references = [
op
for op in line.semantic_operands["destination"]
- if isinstance(op, MemoryOperand) and (op.base is None or op.base.name != "rip")
+ if isinstance(op, MemoryOperand)
+ and (op.base is None or op.base.name != "rip")
]
if dst_mem_references:
if not stores_only:
stores_only = True
mem_references = []
mem_references += dst_mem_references
# If no destination references were found sofar, include source references
if not stores_only:
mem_references += [
- op for op in line.semantic_operands["source"] if isinstance(op, MemoryOperand)
+ op
+ for op in line.semantic_operands["source"]
+ if isinstance(op, MemoryOperand)
]
if re.match(r"^inc[bwlq]?$", line.mnemonic):
reg = line.operands[0].name
modified_registers.append(reg)
increments[reg] = 1
|
/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L210
if mem_references:
# we found memory references to work with
possible_idx_regs = list(
set(increments.keys()).intersection(
set(
- [mref.base.name for mref in mem_references if mref.base is not None]
- + [mref.index.name for mref in mem_references if mref.index is not None]
+ [
+ mref.base.name
+ for mref in mem_references
+ if mref.base is not None
+ ]
+ + [
+ mref.index.name
+ for mref in mem_references
+ if mref.index is not None
+ ]
)
)
)
for mref in mem_references:
for reg in list(possible_idx_regs):
|
/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L276
farithmetic_ctr += 1
elif line.mnemonic in ["add", "sub", "mul"]:
iarithmetic_ctr += 1
# Counting use of vector registers
for op in line.operands:
- if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in "zv":
+ if (
+ isinstance(op, RegisterOperand)
+ and op.prefix is not None
+ and op.prefix in "zv"
+ ):
vector_ctr += 1
# if isinstance(op, RegisterOperand) and 'range' in op.register and op.register.range[0].prefix in 'zv':
# vector_ctr += 1
# Count all instructions
instruction_ctr += 1
|
/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L315
modified_registers[AArch64.normalize_to_register_str(d)] += 1
for l in block:
for d in l.operands:
if isinstance(d, MemoryOperand):
if d.post_indexed is not False or d.pre_indexed:
- modified_registers[AArch64.normalize_to_register_str(d.base)] += 1
+ modified_registers[
+ AArch64.normalize_to_register_str(d.base)
+ ] += 1
inc = 1
if isinstance(d.post_indexed, dict):
inc = int(d.post_indexed["value"])
if d.pre_indexed:
inc = int(d.offset.value)
|
/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L333
# Extract and filter destination references (stores)
dst_mem_references = []
for dst in [
op
for op in chain(
- line.semantic_operands["destination"], line.semantic_operands["src_dst"]
+ line.semantic_operands["destination"],
+ line.semantic_operands["src_dst"],
)
if isinstance(op, MemoryOperand)
]:
# base or index must be a modified (i.e., changing) register
if (
- AArch64.normalize_to_register_str(dst.base) not in modified_registers
- and AArch64.normalize_to_register_str(dst.index) not in modified_registers
+ AArch64.normalize_to_register_str(dst.base)
+ not in modified_registers
+ and AArch64.normalize_to_register_str(dst.index)
+ not in modified_registers
):
continue
# offset operands with identifiers (e.g. `:lo12:gosa`) are ignored
if dst.offset is not None and isinstance(dst.offset, IdentifierOperand):
|
test-n-publish (3.9)
Node.js 16 actions are deprecated. Please update the following actions to use Node.js 20: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more information see: https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/.
|
test-n-publish (3.9)
The following actions uses node12 which is deprecated and will be forced to run on node16: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more info: https://github.blog/changelog/2023-06-13-github-actions-all-actions-will-run-on-node16-instead-of-node12-by-default/
|
test-n-publish (3.8)
Node.js 16 actions are deprecated. Please update the following actions to use Node.js 20: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more information see: https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/.
|
test-n-publish (3.8)
The following actions uses node12 which is deprecated and will be forced to run on node16: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more info: https://github.blog/changelog/2023-06-13-github-actions-all-actions-will-run-on-node16-instead-of-node12-by-default/
|
test-n-publish (3.10)
Node.js 16 actions are deprecated. Please update the following actions to use Node.js 20: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more information see: https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/.
|
test-n-publish (3.10)
The following actions uses node12 which is deprecated and will be forced to run on node16: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more info: https://github.blog/changelog/2023-06-13-github-actions-all-actions-will-run-on-node16-instead-of-node12-by-default/
|
test-n-publish (3.7)
Node.js 16 actions are deprecated. Please update the following actions to use Node.js 20: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more information see: https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/.
|
test-n-publish (3.7)
The following actions uses node12 which is deprecated and will be forced to run on node16: actions/checkout@v2, actions/setup-python@v2, codecov/codecov-action@v1. For more info: https://github.blog/changelog/2023-06-13-github-actions-all-actions-will-run-on-node16-instead-of-node12-by-default/
|