Update README.rst #95
195 errors
Black found 195 errors
Annotations
Check failure on line 9 in /home/runner/work/kerncraft/kerncraft/kerncraft/__init__.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/__init__.py#L1-L9
"""Kerncraft static analytical performance modeling framework and tool."""
-__version__ = '0.8.15'
+__version__ = "0.8.15"
from .kerncraft import main
+
__main__ = main
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___
# 2. commit to RRZE-HPC/kerncraft's master branch
Check failure on line 18 in /home/runner/work/kerncraft/kerncraft/kerncraft/__init__.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/__init__.py#L12-L18
def get_header_path() -> str:
"""Return local folder path of header files."""
import os
- return os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + '/headers/'
+
+ return os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + "/headers/"
github-actions / Black
/home/runner/work/kerncraft/kerncraft/examples/machine-files/plot_machine_file.py#L5-L67
import matplotlib.pyplot as plt
from matplotlib.ticker import EngFormatter
from kerncraft import machinemodel
-kernel_colors = 'bgrcmyk'
+kernel_colors = "bgrcmyk"
+
def main():
mm = machinemodel.MachineModel(sys.argv[1])
- kernels = sorted(mm['benchmarks']['kernels'])
- cache_levels = sorted(mm['benchmarks']['measurements'])
+ kernels = sorted(mm["benchmarks"]["kernels"])
+ cache_levels = sorted(mm["benchmarks"]["measurements"])
fig, axs = plt.subplots(len(cache_levels), 1, figsize=(7, 14), tight_layout=True)
lines = {}
for i, cache_level in enumerate(cache_levels):
max_bw = 0
max_bw_core = 0
axs[i].set_title(cache_level)
formatter1 = EngFormatter(places=0) # , sep="\N{THIN SPACE}") # U+2009
axs[i].yaxis.set_major_formatter(formatter1)
- if cache_level == 'L1':
+ if cache_level == "L1":
axs[i].set_ylabel("Bandwidth [B/s]")
else:
axs[i].set_ylabel("Bandwidth [B/s]\n(incl. write-allocate)")
- axs[i].set_xlabel('cores')
+ axs[i].set_xlabel("cores")
# axs[i].set_xscale('log')
for ki, kernel in enumerate(kernels):
- if cache_level == 'L1':
+ if cache_level == "L1":
# L1 does not have write-allocate, so everything is measured correctly
factor = 1.0
else:
- measurement_kernel_info = mm['benchmarks']['kernels'][kernel]
- factor = (float(measurement_kernel_info['read streams']['bytes']) +
- 2.0 * float(measurement_kernel_info['write streams']['bytes']) -
- float(measurement_kernel_info['read+write streams']['bytes'])) / \
- (float(measurement_kernel_info['read streams']['bytes']) +
- float(measurement_kernel_info['write streams']['bytes']))
+ measurement_kernel_info = mm["benchmarks"]["kernels"][kernel]
+ factor = (
+ float(measurement_kernel_info["read streams"]["bytes"])
+ + 2.0 * float(measurement_kernel_info["write streams"]["bytes"])
+ - float(measurement_kernel_info["read+write streams"]["bytes"])
+ ) / (
+ float(measurement_kernel_info["read streams"]["bytes"])
+ + float(measurement_kernel_info["write streams"]["bytes"])
+ )
- for SMT in mm['benchmarks']['measurements'][cache_level]:
+ for SMT in mm["benchmarks"]["measurements"][cache_level]:
measurements = [
- bw*factor
- for bw in mm['benchmarks']['measurements'][cache_level][SMT]['results'][kernel]]
- max_bw = max(measurements+[max_bw])
+ bw * factor
+ for bw in mm["benchmarks"]["measurements"][cache_level][SMT][
+ "results"
+ ][kernel]
+ ]
+ max_bw = max(measurements + [max_bw])
max_bw_core = max(max_bw_core, measurements[0])
- lines[kernel], = axs[i].plot(
+ (lines[kernel],) = axs[i].plot(
range(1, 1 + len(measurements)),
measurements,
- linestyle=['-', '--', '..', '-.'][SMT-1],
- color=kernel_colors[ki])
+ linestyle=["-", "--", "..", "-."][SMT - 1],
+ color=kernel_colors[ki],
+ )
axs[i].set_xlim(1)
- axs[i].axhline(max_bw, color='black')
- axs[i].axhline(max_bw_core, color='black')
- axs[i].set_yticks(np.append(axs[i].get_yticks(), [float(max_bw), float(max_bw_core)]))
- axs[i].set_xticks(range(1, 1+len(measurements)))
- fig.legend(lines.values(), lines.keys(), 'lower center', ncol=10)
- fig.savefig(sys.argv[1]+'.pdf')
- #plt.show()
+ axs[i].axhline(max_bw, color="black")
+ axs[i].axhline(max_bw_core, color="black")
+ axs[i].set_yticks(
+ np.append(axs[i].get_yticks(), [float(max_bw), float(max_bw_core)])
+ )
+ axs[i].set_xticks(range(1, 1 + len(measurements)))
+ fig.legend(lines.values(), lines.keys(), "lower center", ncol=10)
+ fig.savefig(sys.argv[1] + ".pdf")
+ # plt.show()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
Check failure on line 33 in /home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L9-L33
from .kernel import KernelDescription
from .machinemodel import MachineModel
def create_parser():
- parser = argparse.ArgumentParser(description='Find optimal tiling sizes using the ECMData '
- 'model.')
- parser.add_argument('--machine', '-m', type=argparse.FileType('r'), required=True,
- help='Path to machine description yaml file.')
- parser.add_argument('--define', '-D', nargs=2, metavar=('KEY', 'VALUE'), default=[],
- action='append',
- help='Define fixed constants. Values must be integer.')
- parser.add_argument('--min-block-length', '-b', type=int, metavar='MIN', default=100)
- parser.add_argument('--verbose', '-v', action='count', default=0,
- help='Increases verbosity level.')
- parser.add_argument('--cores', '-c', metavar='CORES', type=int, default=1,
- help='Number of cores to be used in parallel. (default: 1)')
- parser.add_argument('description_file', metavar='FILE', type=argparse.FileType(),
- help='File with loop kernel description in YAML')
+ parser = argparse.ArgumentParser(
+ description="Find optimal tiling sizes using the ECMData " "model."
+ )
+ parser.add_argument(
+ "--machine",
+ "-m",
+ type=argparse.FileType("r"),
+ required=True,
+ help="Path to machine description yaml file.",
+ )
+ parser.add_argument(
+ "--define",
+ "-D",
+ nargs=2,
+ metavar=("KEY", "VALUE"),
+ default=[],
+ action="append",
+ help="Define fixed constants. Values must be integer.",
+ )
+ parser.add_argument(
+ "--min-block-length", "-b", type=int, metavar="MIN", default=100
+ )
+ parser.add_argument(
+ "--verbose", "-v", action="count", default=0, help="Increases verbosity level."
+ )
+ parser.add_argument(
+ "--cores",
+ "-c",
+ metavar="CORES",
+ type=int,
+ default=1,
+ help="Number of cores to be used in parallel. (default: 1)",
+ )
+ parser.add_argument(
+ "description_file",
+ metavar="FILE",
+ type=argparse.FileType(),
+ help="File with loop kernel description in YAML",
+ )
return parser
def simulate(kernel, model, define_dict, blocking_constant, blocking_length):
"""Setup and execute model with given blocking length"""
Check failure on line 48 in /home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L37-L48
kernel.set_constant(k, v)
kernel.set_constant(blocking_constant, blocking_length)
model.analyze()
- return sum([cy for dscr, cy in model.results['cycles']])
+ return sum([cy for dscr, cy in model.results["cycles"]])
def run(parser, args):
# machine information
# Read machine description
Check failure on line 77 in /home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L65-L77
var_type, var_size = var_info
for size in var_size:
for s in size.atoms(sympy.Symbol):
if s.name not in define_dict:
undefined_constants.add(s)
- assert len(undefined_constants) == 1, "There are multiple or none undefined constants {!r}. " \
+ assert len(undefined_constants) == 1, (
+ "There are multiple or none undefined constants {!r}. "
"Exactly one must be undefined.".format(undefined_constants)
+ )
blocking_constant = undefined_constants.pop()
if args.verbose >= 1:
print("blocking constant:", blocking_constant)
Check failure on line 96 in /home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L78-L96
min_length = args.min_block_length
min_runtime = simulate(kernel, model, define_dict, blocking_constant, min_length)
# determain max search length
# upper bound: number of floats that fit into the last level cache
- max_length = int(machine['memory hierarchy'][-2]['size per group'])//4
+ max_length = int(machine["memory hierarchy"][-2]["size per group"]) // 4
if args.verbose >= 1:
print("upper search bound:", max_length)
- length = min_length*3
+ length = min_length * 3
while length < max_length:
runtime = simulate(kernel, model, define_dict, blocking_constant, length)
if args.verbose >= 1:
- print("min", min_length, min_runtime, "current", length, runtime, "max", max_length)
+ print(
+ "min",
+ min_length,
+ min_runtime,
+ "current",
+ length,
+ runtime,
+ "max",
+ max_length,
+ )
# Increase search window
if runtime > min_runtime:
max_length = length # and break
else:
Check failure on line 112 in /home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L101-L112
length = (max_length - min_length) // 2 + min_length
# Execute simulation
runtime = simulate(kernel, model, define_dict, blocking_constant, length)
if args.verbose >= 1:
- print("min", min_length, min_runtime, "current", length, runtime, "max", max_length)
+ print(
+ "min",
+ min_length,
+ min_runtime,
+ "current",
+ length,
+ runtime,
+ "max",
+ max_length,
+ )
# Narrow search area
if runtime <= min_runtime:
min_runtime = runtime
min_length = length
Check failure on line 128 in /home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L117-L128
print("found for {}:".format(blocking_constant))
print(length)
sys.exit(0)
else:
if args.verbose:
- print("nothing found. exceeded search window and not change in performance found.")
+ print(
+ "nothing found. exceeded search window and not change in performance found."
+ )
sys.exit(1)
def main():
# Create and populate parser
Check failure on line 139 in /home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L132-L139
# BUSINESS LOGIC IS FOLLOWING
run(parser, args)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
Check failure on line 17 in /home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py#L6-L17
"""Very simple interval implementation for integers (might also work on floats)."""
def __init__(self, *args, **kwargs):
"""If keywords *sane* is True (default: False), checks will not be done on given data."""
self.data = list(args)
- if not kwargs.get('sane', False):
+ if not kwargs.get("sane", False):
self.data = [d for d in self.data if d[1] > d[0]]
self._enforce_order()
self._enforce_no_overlap()
def _enforce_order(self):
Check failure on line 53 in /home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py#L18-L53
self.data.sort(key=lambda d: d[0])
def _enforce_no_overlap(self, start_at=0):
"""Enforce that no ranges overlap in internal storage."""
i = start_at
- while i+1 < len(self.data):
- if self.data[i][1] >= self.data[i+1][0]:
+ while i + 1 < len(self.data):
+ if self.data[i][1] >= self.data[i + 1][0]:
# beginning of i+1-th range is contained in i-th range
- if self.data[i][1] < self.data[i+1][1]:
+ if self.data[i][1] < self.data[i + 1][1]:
# i+1-th range is longer, thus enlarge i-th range
- self.data[i][1] = self.data[i+1][1]
+ self.data[i][1] = self.data[i + 1][1]
# removed contained range
- del self.data[i+1]
+ del self.data[i + 1]
i += 1
def __and__(self, other):
"""Combine two intervals, under the assumption that they are sane."""
- return Intervals(*(self.data+other.data))
+ return Intervals(*(self.data + other.data))
def __len__(self):
"""Return sum of range lengths."""
- return int(sum(upper-lower for (lower, upper) in self.data))
+ return int(sum(upper - lower for (lower, upper) in self.data))
def __contains__(self, needle):
"""Return True if needle is contained in intervals."""
return any(lower <= needle < upper for (lower, upper) in self.data)
def __repr__(self):
"""Return string representation of object."""
- return str(self.__class__) + '(' + ', '.join([list.__repr__(d) for d in self.data]) + ')'
+ return (
+ str(self.__class__)
+ + "("
+ + ", ".join([list.__repr__(d) for d in self.data])
+ + ")"
+ )
def __eq__(self, other):
"""Return True if other contains exactly the same interval regions."""
return self.data == other.data
Check failure on line 37 in /home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L10-L37
import platform
from urllib.request import urlopen
url_dict = {
- 'v3.0': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-mac.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-lin64.zip',
+ "v3.0": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-mac.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-lin64.zip",
},
- 'v2.3': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-mac.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-lin64.zip',
+ "v2.3": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-mac.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-lin64.zip",
},
- 'v2.2': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-mac.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-lin64.zip',
+ "v2.2": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-mac.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-lin64.zip",
},
- 'v2.1': {
- 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-mac64.zip',
- 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-lin64.zip',
- }
+ "v2.1": {
+ "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-mac64.zip",
+ "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-lin64.zip",
+ },
}
+
class TemporaryDirectory:
def __enter__(self):
self.tempdir = tempfile.mkdtemp()
return self.tempdir
Check failure on line 112 in /home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L38-L112
def __exit__(self, type_, value, traceback):
shutil.rmtree(self.tempdir)
def get_os():
- os_map = {'Darwin': 'mac', 'Linux': 'lin64'}
+ os_map = {"Darwin": "mac", "Linux": "lin64"}
system = platform.system()
- assert system in os_map, "Unsupported operating system (platform.system() should return " \
- "Linux or Darwin)."
+ assert system in os_map, (
+ "Unsupported operating system (platform.system() should return "
+ "Linux or Darwin)."
+ )
return os_map[system]
def search_path():
"""Return potential locations of IACA installation."""
operating_system = get_os()
# 1st choice: in ~/.kerncraft/iaca-{}
# 2nd choice: in package directory / iaca-{}
- return [os.path.expanduser("~/.kerncraft/iaca/{}/".format(operating_system)),
- os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + '/iaca/{}/'.format(
- operating_system)]
+ return [
+ os.path.expanduser("~/.kerncraft/iaca/{}/".format(operating_system)),
+ os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
+ + "/iaca/{}/".format(operating_system),
+ ]
def find_iaca():
"""Return (hopefully) valid installation of IACA."""
- requires = ['iaca2.2', 'iaca2.3', 'iaca3.0']
+ requires = ["iaca2.2", "iaca2.3", "iaca3.0"]
for path in search_path():
- path += 'bin/'
+ path += "bin/"
valid = True
for r in requires:
if not os.path.exists(path + r):
valid = False
break
if valid:
return path
- raise RuntimeError("No IACA installation found in {}. Run iaca_get command to fix this issue."
- "".format(search_path()))
+ raise RuntimeError(
+ "No IACA installation found in {}. Run iaca_get command to fix this issue."
+ "".format(search_path())
+ )
def main():
try:
path = find_iaca()
- print('IACA already installed at', path)
- if '--force' in sys.argv:
- sys.argv.remove('--force')
+ print("IACA already installed at", path)
+ if "--force" in sys.argv:
+ sys.argv.remove("--force")
else:
- print('For forced installation add --force')
+ print("For forced installation add --force")
sys.exit()
except RuntimeError:
pass
- if len(sys.argv) < 2 or sys.argv[1] != \
- "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul":
- print("Go to https://software.intel.com/protected-download/267266/157552 and read the"
- "Intel Pre-Release License Agreement.")
+ if (
+ len(sys.argv) < 2
+ or sys.argv[1]
+ != "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul"
+ ):
+ print(
+ "Go to https://software.intel.com/protected-download/267266/157552 and read the"
+ "Intel Pre-Release License Agreement."
+ )
print("")
- print("Add "
- "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul"
- " for installation of IACA.")
+ print(
+ "Add "
+ "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul"
+ " for installation of IACA."
+ )
sys.exit(1)
if len(sys.argv) >= 3:
- assert sys.argv[2] in ['lin64', 'mac']
+ assert sys.argv[2] in ["lin64", "mac"]
operating_system = sys.argv[2]
else:
operating_system = get_os()
# Locate and create IACA base directory, in reverse server order
base_dir = None
for path in reversed(search_path()):
- print("Trying " + path + ": ", end='', file=sys.stderr)
+ print("Trying " + path + ": ", end="", file=sys.stderr)
try:
os.makedirs(path)
base_dir = path
break
except PermissionError:
Check failure on line 263 in /home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L115-L263
except OSError:
# Directory already exists
print("already exists.", file=sys.stderr)
continue
if base_dir is None:
- print('Aborted.', file=sys.stderr)
+ print("Aborted.", file=sys.stderr)
sys.exit(1)
else:
print("selected.", file=sys.stderr)
- print("IACA v2.1 (for manual use - only version analyzing latency):", file=sys.stderr)
- if operating_system == 'mac':
- operating_system_temp = 'mac64'
+ print(
+ "IACA v2.1 (for manual use - only version analyzing latency):", file=sys.stderr
+ )
+ if operating_system == "mac":
+ operating_system_temp = "mac64"
else:
operating_system_temp = operating_system
- url = url_dict['v2.1'][operating_system]
+ url = url_dict["v2.1"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system_temp))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system_temp))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system_temp), base_dir + 'v2.1')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system_temp), base_dir + "v2.1"
+ )
# Correct permissions of executables
print("Correcting permissions of binary...")
- st = os.stat(base_dir + 'v2.1/bin/iaca')
- os.chmod(
- base_dir + 'v2.1/bin/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- st = os.stat(base_dir + 'v2.1/bin/iaca.sh')
- os.chmod(
- base_dir + 'v2.1/bin/iaca.sh',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ st = os.stat(base_dir + "v2.1/bin/iaca")
+ os.chmod(
+ base_dir + "v2.1/bin/iaca",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
+ )
+ st = os.stat(base_dir + "v2.1/bin/iaca.sh")
+ os.chmod(
+ base_dir + "v2.1/bin/iaca.sh",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
)
# Fix iaca.sh
print("Fixing iaca.sh...", file=sys.stderr)
- iaca_sh = open(base_dir + 'v2.1/bin/iaca.sh').read()
- iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1)
- iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1)
- iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)',
- 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1)
- open(base_dir + 'v2.1/bin/iaca.sh', 'w').write(iaca_sh)
- print("IACA v2.1 installed to", os.getcwd() + '/' + base_dir + 'v2.1', file=sys.stderr)
+ iaca_sh = open(base_dir + "v2.1/bin/iaca.sh").read()
+ iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1)
+ iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1)
+ iaca_sh = iaca_sh.replace(
+ 'path=$(cd "$(dirname "$0")"; pwd)',
+ 'script=`readlink -f $0`\n\tpath=`dirname "$script"`',
+ 1,
+ )
+ open(base_dir + "v2.1/bin/iaca.sh", "w").write(iaca_sh)
+ print(
+ "IACA v2.1 installed to", os.getcwd() + "/" + base_dir + "v2.1", file=sys.stderr
+ )
print("IACA v2.2 (for NHM and WSM support):", file=sys.stderr)
- url = url_dict['v2.2'][operating_system]
+ url = url_dict["v2.2"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v2.2')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system), base_dir + "v2.2"
+ )
# Correct permissions of executables
print("Correcting permissions of binary...")
- st = os.stat(base_dir + 'v2.2/bin/iaca')
- os.chmod(
- base_dir + 'v2.2/bin/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- st = os.stat(base_dir + 'v2.2/bin/iaca.sh')
- os.chmod(
- base_dir + 'v2.2/bin/iaca.sh',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ st = os.stat(base_dir + "v2.2/bin/iaca")
+ os.chmod(
+ base_dir + "v2.2/bin/iaca",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
+ )
+ st = os.stat(base_dir + "v2.2/bin/iaca.sh")
+ os.chmod(
+ base_dir + "v2.2/bin/iaca.sh",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
)
# Fix iaca.sh
print("Fixing iaca.sh...", file=sys.stderr)
- iaca_sh = open(base_dir + 'v2.2/bin/iaca.sh').read()
- iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1)
- iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1)
- iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)',
- 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1)
- open(base_dir + 'v2.2/bin/iaca.sh', 'w').write(iaca_sh)
- print("IACA v2.2 installed to", os.getcwd() + '/' + base_dir + 'v2.2', file=sys.stderr)
+ iaca_sh = open(base_dir + "v2.2/bin/iaca.sh").read()
+ iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1)
+ iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1)
+ iaca_sh = iaca_sh.replace(
+ 'path=$(cd "$(dirname "$0")"; pwd)',
+ 'script=`readlink -f $0`\n\tpath=`dirname "$script"`',
+ 1,
+ )
+ open(base_dir + "v2.2/bin/iaca.sh", "w").write(iaca_sh)
+ print(
+ "IACA v2.2 installed to", os.getcwd() + "/" + base_dir + "v2.2", file=sys.stderr
+ )
print("IACA v2.3 (for SNB and IVY support):", file=sys.stderr)
- url = url_dict['v2.3'][operating_system]
+ url = url_dict["v2.3"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v2.3')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system), base_dir + "v2.3"
+ )
# Correct permissions of executables
print("Correcting permissions of binary...")
- st = os.stat(base_dir + 'v2.3/bin/iaca')
- os.chmod(
- base_dir + 'v2.3/bin/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- st = os.stat(base_dir + 'v2.3/bin/iaca.sh')
- os.chmod(
- base_dir + 'v2.3/bin/iaca.sh',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ st = os.stat(base_dir + "v2.3/bin/iaca")
+ os.chmod(
+ base_dir + "v2.3/bin/iaca",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
+ )
+ st = os.stat(base_dir + "v2.3/bin/iaca.sh")
+ os.chmod(
+ base_dir + "v2.3/bin/iaca.sh",
+ st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH,
)
# Fix iaca.sh
print("Fixing iaca.sh...", file=sys.stderr)
- iaca_sh = open(base_dir + 'v2.3/bin/iaca.sh').read()
- iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1)
- iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1)
- iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)',
- 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1)
- open(base_dir + 'v2.3/bin/iaca.sh', 'w').write(iaca_sh)
- print("IACA v2.3 installed to", os.getcwd() + '/' + base_dir + 'v2.3', file=sys.stderr)
+ iaca_sh = open(base_dir + "v2.3/bin/iaca.sh").read()
+ iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1)
+ iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1)
+ iaca_sh = iaca_sh.replace(
+ 'path=$(cd "$(dirname "$0")"; pwd)',
+ 'script=`readlink -f $0`\n\tpath=`dirname "$script"`',
+ 1,
+ )
+ open(base_dir + "v2.3/bin/iaca.sh", "w").write(iaca_sh)
+ print(
+ "IACA v2.3 installed to", os.getcwd() + "/" + base_dir + "v2.3", file=sys.stderr
+ )
print("IACA v3.0 (for HSW, BDW, SKL and SKX support):", file=sys.stderr)
- url = url_dict['v3.0'][operating_system]
+ url = url_dict["v3.0"][operating_system]
print("Downloading", url, "...", file=sys.stderr)
zfile = zipfile.ZipFile(BytesIO(urlopen(url).read()))
- members = [n
- for n in zfile.namelist()
- if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))]
+ members = [
+ n
+ for n in zfile.namelist()
+ if "/." not in n and n.startswith("iaca-{:}/".format(operating_system))
+ ]
# Exctract to temp folder and copy to correct directory
print("Extracting...", file=sys.stderr)
with TemporaryDirectory() as tempdir:
zfile.extractall(tempdir, members=members)
- shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v3.0')
+ shutil.copytree(
+ tempdir + "/iaca-{}".format(operating_system), base_dir + "v3.0"
+ )
print("Correcting permissions of binary...", file=sys.stderr)
- st = os.stat(base_dir + 'v3.0/iaca')
- os.chmod(
- base_dir + 'v3.0/iaca',
- st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
- )
- print("IACA v3.0 installed to", os.getcwd() + '/' + base_dir + 'v3.0', file=sys.stderr)
+ st = os.stat(base_dir + "v3.0/iaca")
+ os.chmod(
+ base_dir + "v3.0/iaca", st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
+ )
+ print(
+ "IACA v3.0 installed to", os.getcwd() + "/" + base_dir + "v3.0", file=sys.stderr
+ )
# Create unified bin directory to access both operating_systems
- os.mkdir(base_dir + 'bin')
- os.symlink('../v2.1/bin/iaca.sh', base_dir + 'bin/iaca2.1')
- os.symlink('../v2.2/bin/iaca.sh', base_dir + 'bin/iaca2.2')
- os.symlink('../v2.3/bin/iaca.sh', base_dir + 'bin/iaca2.3')
- os.symlink('../v3.0/iaca', base_dir + 'bin/iaca3.0')
- print('export PATH=' + base_dir + 'bin/:$PATH')
-
-
-if __name__ == '__main__':
+ os.mkdir(base_dir + "bin")
+ os.symlink("../v2.1/bin/iaca.sh", base_dir + "bin/iaca2.1")
+ os.symlink("../v2.2/bin/iaca.sh", base_dir + "bin/iaca2.2")
+ os.symlink("../v2.3/bin/iaca.sh", base_dir + "bin/iaca2.3")
+ os.symlink("../v3.0/iaca", base_dir + "bin/iaca3.0")
+ print("export PATH=" + base_dir + "bin/:$PATH")
+
+
+if __name__ == "__main__":
main()
Check failure on line 35 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L22-L35
def uneven_tuple_cmp(a, b):
length_diff = max(len(a), len(b)) - min(len(a), len(b))
if len(a) < len(b):
- a = (0,)*length_diff + a
+ a = (0,) * length_diff + a
elif len(b) < len(a):
- b = (0,)*length_diff + b
+ b = (0,) * length_diff + b
if a > b:
return 1
elif a < b:
return -1
else:
Check failure on line 74 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L62-L74
e = e.expand()
key = []
# split into terms
terms, gens = e.as_terms()
- assert gens == [first_s] or first_s is None and gens == [], \
- "Expression was split into unusable terms: {}, expected.".format(gens, first_s)
+ assert (
+ gens == [first_s] or first_s is None and gens == []
+ ), "Expression was split into unusable terms: {}, expected.".format(gens, first_s)
# extract exponent and coefficient
for term, (coeff, cpart, ncpart) in terms:
coeff_real, coeff_imag = coeff
assert coeff_imag == 0, "Not supporting imaginary coefficients."
# Sort order: exponent (cpart), factor
Check failure on line 86 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L75-L86
key[0] = (key[0][0], key[0][1])
# build key
key.sort(reverse=True)
# add missing exponent, coefficient tuples
i = 0
- for exponent in reversed(range(key[0][0]+1)):
+ for exponent in reversed(range(key[0][0] + 1)):
if len(key) > i and key[i][0] == exponent:
i += 1
continue
else:
key[i:i] = [(exponent, 0.0)]
Check failure on line 122 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L110-L122
return 0
if isinstance(dimension_factor, sympy.Symbol):
return 1
# Replace all free symbols with one:
if not dimension_factor.free_symbols:
- raise ValueError("dimension_factor is neither a number, a symbol nor an expression based "
- "on symbols.")
+ raise ValueError(
+ "dimension_factor is neither a number, a symbol nor an expression based "
+ "on symbols."
+ )
free_symbols = list(dimension_factor.free_symbols)
for s in free_symbols[1:]:
dimension_factor = dimension_factor.subs(s, free_symbols[0])
if isinstance(dimension_factor, sympy.Pow):
return dimension_factor.as_base_exp()[1]
Check failure on line 185 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L172-L185
if c:
coefficients[factor_idx] += c
break
# Test: reassemble original expression
- if expr != reduce(operator.add, [c*f for c, f in zip(coefficients, dimension_factors)], 0):
- raise ValueError("Unable to split expression and reproduce from coefficients and factors: "
- "{!r} with {!r}".format(terms, dimension_factors))
+ if expr != reduce(
+ operator.add, [c * f for c, f in zip(coefficients, dimension_factors)], 0
+ ):
+ raise ValueError(
+ "Unable to split expression and reproduce from coefficients and factors: "
+ "{!r} with {!r}".format(terms, dimension_factors)
+ )
return tuple(coefficients)
def canonical_relational(rel):
Check failure on line 273 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L260-L273
def __init__(self, kernel, machine, cores=1, symbolic=False):
"""Initialize layer condition based predictor from kernel and machine object."""
CachePredictor.__init__(self, kernel, machine, cores=cores)
if isinstance(kernel, KernelCode):
# Make use of caching for symbolic LC representation:
- file_name = 'LC_analysis.pickle.lzma'
+ file_name = "LC_analysis.pickle.lzma"
file_path = kernel.get_intermediate_location(
- file_name, machine_and_compiler_dependent=False, other_dependencies=[str(cores)])
+ file_name,
+ machine_and_compiler_dependent=False,
+ other_dependencies=[str(cores)],
+ )
lock_mode, lock_fp = kernel.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
self.results = compress_pickle.load(file_path)
lock_fp.close() # release lock
Check failure on line 311 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L282-L311
if not symbolic:
self.desymbolize()
def desymbolize(self):
"""Evaluate LCs and remove symbols"""
- for i, options in enumerate(self.results['cache']):
+ for i, options in enumerate(self.results["cache"]):
for o in options:
- if self.kernel.subs_consts(o['condition']):
- self.results['cache'][i] = o
+ if self.kernel.subs_consts(o["condition"]):
+ self.results["cache"][i] = o
break
def build_symbolic_LCs(self):
# check that layer conditions can be applied on this kernel:
# 1. All iterations may only have a step width of 1
loop_stack = list(self.kernel.get_loop_stack())
- if any([l['increment'] != 1 for l in loop_stack]):
- raise ValueError("Can not apply layer condition, since not all loops are of step "
- "length 1.")
+ if any([l["increment"] != 1 for l in loop_stack]):
+ raise ValueError(
+ "Can not apply layer condition, since not all loops are of step "
+ "length 1."
+ )
# 2. The order of iterations must be reflected in the order of indices in all array
# references containing the inner loop index. If the inner loop index is not part of the
# reference, the reference is simply ignored
- index_order = [symbol_pos_int(l['index']) for l in loop_stack]
- for var_name, arefs in chain(self.kernel.sources.items(), self.kernel.destinations.items()):
+ index_order = [symbol_pos_int(l["index"]) for l in loop_stack]
+ for var_name, arefs in chain(
+ self.kernel.sources.items(), self.kernel.destinations.items()
+ ):
try:
if next(iter(arefs)) is None:
# Anything that is a scalar may be ignored
continue
except StopIteration:
Check failure on line 362 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L319-L362
# Terms without any indices can be treat as constant offsets and are acceptable
if not idx:
continue
if len(idx) != 1:
- raise ValueError("Only one loop counter may appear per term. "
- "Problematic term: {}.".format(t))
+ raise ValueError(
+ "Only one loop counter may appear per term. "
+ "Problematic term: {}.".format(t)
+ )
else: # len(idx) == 1
idx = idx.pop()
# Check that number of multiplication match access order of iterator
- pow_dict = {k: v for k, v in t.as_powers_dict().items()
- if k != idx}
+ pow_dict = {
+ k: v for k, v in t.as_powers_dict().items() if k != idx
+ }
stride_dim = sum(pow_dict.values())
error = False
try:
- if loop_stack[-stride_dim-1]['index'] != idx.name:
+ if loop_stack[-stride_dim - 1]["index"] != idx.name:
error = True
except IndexError:
error = True
if error:
- raise ValueError("Number of multiplications in index term does not "
- "match loop counter order. "
- "Problematic term: {}.".format(t))
+ raise ValueError(
+ "Number of multiplications in index term does not "
+ "match loop counter order. "
+ "Problematic term: {}.".format(t)
+ )
# 3. Indices may only increase with one
- inner_index = symbol_pos_int(loop_stack[-1]['index'])
- inner_increment = loop_stack[-1]['increment']
- for aref in chain(chain(*self.kernel.sources.values()),
- chain(*self.kernel.destinations.values())):
+ inner_index = symbol_pos_int(loop_stack[-1]["index"])
+ inner_increment = loop_stack[-1]["increment"]
+ for aref in chain(
+ chain(*self.kernel.sources.values()),
+ chain(*self.kernel.destinations.values()),
+ ):
if aref is None:
continue
for expr in aref:
- diff = expr.subs(inner_index, 1+inner_increment) - expr.subs(inner_index, 1)
+ diff = expr.subs(inner_index, 1 + inner_increment) - expr.subs(
+ inner_index, 1
+ )
if diff != 0 and diff != 1:
# TODO support -1 aswell
- raise ValueError("Can not apply layer condition, array references may not "
- "increment more then one per iteration.")
+ raise ValueError(
+ "Can not apply layer condition, array references may not "
+ "increment more then one per iteration."
+ )
# FIXME handle multiple datatypes
element_size = self.kernel.datatypes_size[self.kernel.datatype]
indices = list([symbol_pos_int(l[0]) for l in self.kernel._loop_stack])
sympy_accesses = self.kernel.compile_sympy_accesses()
Check failure on line 540 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L369-L540
array_dimensions = self.kernel.variables[var_name][1]
# Skipping scalars
if array_dimensions is None:
continue
for dimension in range(len(array_dimensions)):
- dimension_factors.append(reduce(operator.mul, array_dimensions[dimension+1:],
- sympy.Integer(1)))
+ dimension_factors.append(
+ reduce(
+ operator.mul,
+ array_dimensions[dimension + 1 :],
+ sympy.Integer(1),
+ )
+ )
for a in sympy_accesses[var_name]:
o = split_sympy_access_in_dim_offset(a, dimension_factors)
accesses[var_name].append(o)
# Skip non-variable offsets, where acs is [None, None, None] (or similar) or only made
# up from constant offsets
if not any(accesses[var_name]) or not any(
- [a == inner_index or a.coeff(inner_index) != 0
- for a in chain.from_iterable(accesses[var_name])]):
+ [
+ a == inner_index or a.coeff(inner_index) != 0
+ for a in chain.from_iterable(accesses[var_name])
+ ]
+ ):
continue
destinations.update(
- [(var_name, tuple(r)) for r in self.kernel.destinations.get(var_name, [])])
+ [
+ (var_name, tuple(r))
+ for r in self.kernel.destinations.get(var_name, [])
+ ]
+ )
acs = list(accesses[var_name])
# If accesses are of unequal length, pad with leading zero elements
max_dims = max(map(len, acs))
for i in range(len(acs)):
if len(acs[i]) < max_dims:
- acs[i] = (sympy.Integer(0),)*(max_dims-len(acs[i])) + acs[i]
+ acs[i] = (sympy.Integer(0),) * (max_dims - len(acs[i])) + acs[i]
# Sort accesses by decreasing order
acs.sort(reverse=True)
# Transform back into sympy expressions
for i in range(len(acs)):
- acs[i] = reduce(sympy.Add, [f*df for f, df in zip(acs[i], dimension_factors)])
+ acs[i] = reduce(
+ sympy.Add, [f * df for f, df in zip(acs[i], dimension_factors)]
+ )
# Create reuse distances by substracting accesses pairwise in decreasing order
- distances += [(acs[i-1]-acs[i]).simplify() for i in range(1, len(acs))]
+ distances += [(acs[i - 1] - acs[i]).simplify() for i in range(1, len(acs))]
# Add infinity for each array
distances.append(sympy.oo)
# Sort distances by decreasing order
distances.sort(reverse=True, key=sympy_expr_abs_distance_key)
# Create copy of distances in bytes:
- distances_bytes = [d*element_size for d in distances]
+ distances_bytes = [d * element_size for d in distances]
# CAREFUL! From here on we are working in byte offsets and not in indices anymore.
# converting access sets to lists, otherwise pprint will fail during obligatory sorting step
- results = {'accesses': {k: sorted(list(v), key=cmp_to_key(uneven_tuple_cmp))
- for k,v in accesses.items()},
- 'distances': distances,
- 'destinations': destinations,
- 'distances_bytes': distances_bytes,
- 'cache': []}
-
- sum_array_sizes = sum(self.kernel.array_sizes(in_bytes=True, subs_consts=False).values())
+ results = {
+ "accesses": {
+ k: sorted(list(v), key=cmp_to_key(uneven_tuple_cmp))
+ for k, v in accesses.items()
+ },
+ "distances": distances,
+ "destinations": destinations,
+ "distances_bytes": distances_bytes,
+ "cache": [],
+ }
+
+ sum_array_sizes = sum(
+ self.kernel.array_sizes(in_bytes=True, subs_consts=False).values()
+ )
for c in self.machine.get_cachesim(self.cores).levels(with_mem=False):
# Assuming increasing order of cache sizes
options = []
# Full caching
- options.append({
- 'condition': canonical_relational(c.size() > sum_array_sizes),
- 'hits': len(distances),
- 'misses': 0,
- 'evicts': 0,
- 'tail': sympy.oo,
- })
-
- for tail in sorted(set([d.simplify().expand() for d in distances_bytes]), reverse=True,
- key=sympy_expr_abs_distance_key):
+ options.append(
+ {
+ "condition": canonical_relational(c.size() > sum_array_sizes),
+ "hits": len(distances),
+ "misses": 0,
+ "evicts": 0,
+ "tail": sympy.oo,
+ }
+ )
+
+ for tail in sorted(
+ set([d.simplify().expand() for d in distances_bytes]),
+ reverse=True,
+ key=sympy_expr_abs_distance_key,
+ ):
# Assuming decreasing order of tails
# Ignoring infinity tail:
if tail is sympy.oo:
continue
cache_requirement = (
# Sum of inter-access caches
- sum([d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) <= sympy_expr_abs_distance_key(tail)]
- ) +
+ sum(
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ <= sympy_expr_abs_distance_key(tail)
+ ]
+ )
+ +
# Tails
- tail*len([d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) >
- sympy_expr_abs_distance_key(tail)]))
+ tail
+ * len(
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ > sympy_expr_abs_distance_key(tail)
+ ]
+ )
+ )
condition = canonical_relational(cache_requirement <= c.size())
hits = len(
- [d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) <= sympy_expr_abs_distance_key(tail)])
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ <= sympy_expr_abs_distance_key(tail)
+ ]
+ )
misses = len(
- [d for d in distances_bytes
- if sympy_expr_abs_distance_key(d) > sympy_expr_abs_distance_key(tail)])
+ [
+ d
+ for d in distances_bytes
+ if sympy_expr_abs_distance_key(d)
+ > sympy_expr_abs_distance_key(tail)
+ ]
+ )
# Resulting analysis
- options.append({
- 'condition': condition,
- 'hits': hits,
- 'misses': misses,
- 'evicts': len(destinations),
- 'tail': tail})
+ options.append(
+ {
+ "condition": condition,
+ "hits": hits,
+ "misses": misses,
+ "evicts": len(destinations),
+ "tail": tail,
+ }
+ )
# If we encountered a True condition, break to not include multiple such.
if isinstance(condition, BooleanTrue):
break
- if not isinstance(options[-1]['condition'], BooleanTrue):
+ if not isinstance(options[-1]["condition"], BooleanTrue):
# Fallback: no condition matched
- options.append({
- 'condition': True,
- 'hits': 0,
- 'misses': len(distances),
- 'evicts': len(destinations),
- 'tail': 0
- })
-
- results['cache'].append(options)
+ options.append(
+ {
+ "condition": True,
+ "hits": 0,
+ "misses": len(distances),
+ "evicts": len(destinations),
+ "tail": 0,
+ }
+ )
+
+ results["cache"].append(options)
self.results = results
def get_loads(self):
"""Return a list with number of loaded cache lines per memory hierarchy level."""
# TODO FIXME L1 loads need to be derived from accesses
- return [float('nan')]+[c['misses'] for c in self.results['cache']]
+ return [float("nan")] + [c["misses"] for c in self.results["cache"]]
def get_hits(self):
"""Return a list with number of hit cache lines per memory hierarchy level."""
# At last level, all previous misses are hits
- return [c['hits'] for c in self.results['cache']]+[self.results['cache'][-1]['misses']]
+ return [c["hits"] for c in self.results["cache"]] + [
+ self.results["cache"][-1]["misses"]
+ ]
def get_misses(self):
"""Return a list with number of missed cache lines per memory hierarchy level."""
# At last level, there are no misses
- return [c['misses'] for c in self.results['cache']]+[0]
+ return [c["misses"] for c in self.results["cache"]] + [0]
def get_stores(self):
"""Return a list with number of stored cache lines per memory hierarchy level."""
# TODO FIXME L1 stores need to be derived from accesses
- return [float('nan')]+[c['evicts'] for c in self.results['cache']]
+ return [float("nan")] + [c["evicts"] for c in self.results["cache"]]
def get_evicts(self):
"""Return a list with number of evicted cache lines per memory hierarchy level."""
# At last level, there are no evicts
- return [c['evicts'] for c in self.results['cache']]+[0]
+ return [c["evicts"] for c in self.results["cache"]] + [0]
def get_infos(self):
"""Return verbose information about the predictor."""
return self.results
class CacheSimulationPredictor(CachePredictor):
"""Predictor class based on layer condition analysis."""
+
def __init__(self, kernel, machine, cores=1):
"""Initialize cache simulation based predictor from kernel and machine object."""
CachePredictor.__init__(self, kernel, machine, cores)
if isinstance(kernel, KernelCode):
# Make use of caching for symbolic LC representation:
- file_name = 'CSIM_analysis.pickle.lzma'
+ file_name = "CSIM_analysis.pickle.lzma"
file_path = kernel.get_intermediate_location(
- file_name, machine_and_compiler_dependent=False,
- other_dependencies=[str(cores)]+[str(t) for t in self.kernel.constants.items()])
+ file_name,
+ machine_and_compiler_dependent=False,
+ other_dependencies=[str(cores)]
+ + [str(t) for t in self.kernel.constants.items()],
+ )
lock_mode, lock_fp = kernel.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
cache = compress_pickle.load(file_path)
lock_fp.close() # release lock
- self.first_dim_factor = cache['first_dim_factor']
- self.stats = cache['stats']
- self.pretty_stats = cache['pretty_stats']
+ self.first_dim_factor = cache["first_dim_factor"]
+ self.stats = cache["stats"]
+ self.pretty_stats = cache["pretty_stats"]
else: # lock_mode == fcntl.LOCK_EX
# needs update
self.simulate()
compress_pickle.dump(
- {'first_dim_factor': self.first_dim_factor, 'stats': self.stats, 'pretty_stats': self.pretty_stats},
- file_path)
+ {
+ "first_dim_factor": self.first_dim_factor,
+ "stats": self.stats,
+ "pretty_stats": self.pretty_stats,
+ },
+ file_path,
+ )
lock_fp.close() # release lock
else:
# No caching support without filename for kernel code
self.simulate()
Check failure on line 565 in /home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py
github-actions / Black
/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L542-L565
# Get the machine's cache model and simulator
self.csim = self.machine.get_cachesim(self.cores)
# FIXME handle multiple datatypes
element_size = self.kernel.datatypes_size[self.kernel.datatype]
- cacheline_size = self.machine['cacheline size']
+ cacheline_size = self.machine["cacheline size"]
elements_per_cacheline = int(cacheline_size // element_size)
- iterations_per_cacheline = (sympy.Integer(self.machine['cacheline size']) /
- sympy.Integer(self.kernel.bytes_per_iteration))
+ iterations_per_cacheline = sympy.Integer(
+ self.machine["cacheline size"]
+ ) / sympy.Integer(self.kernel.bytes_per_iteration)
# Gathering some loop information:
inner_loop = list(self.kernel.get_loop_stack(subs_consts=True))[-1]
- inner_index = symbol_pos_int(inner_loop['index'])
- inner_increment = inner_loop['increment'] # Calculate the number of iterations for warm-up
+ inner_index = symbol_pos_int(inner_loop["index"])
+ inner_increment = inner_loop[
+ "increment"
+ ] # Calculate the number of iterations for warm-up
total_length = self.kernel.iteration_length()
max_iterations = self.kernel.subs_consts(total_length)
max_cache_size = sum([c.size() for c in self.csim.levels(with_mem=False)])
-
# Warmup
# Phase 1:
# define warmup interval boundaries
max_steps = 100