-
Notifications
You must be signed in to change notification settings - Fork 0
/
searchindex.js
1 lines (1 loc) · 20.7 KB
/
searchindex.js
1
Search.setIndex({"docnames": ["DSPs", "DomainSpecificArchitectures", "about", "cpp", "fpgas", "gpus", "heterogenous", "index", "python"], "filenames": ["DSPs.rst", "DomainSpecificArchitectures.rst", "about.rst", "cpp.rst", "fpgas.rst", "gpus.rst", "heterogenous.rst", "index.rst", "python.rst"], "titles": ["Digital Signal Processors (DSPs)", "Domain Specific Architectures (DSAs)", "About Pashmina Cameron", "Optimizing C++", "Field Programmable Gate Arrays (FPGAs)", "Graphics Processing Units (GPUs)", "Heterogenous Compute", "Pashmina\u2019s Blog", "Optimizing Python"], "terms": {"whilst": [0, 4], "ar": [0, 1, 3, 4, 6, 8], "mostli": [0, 4], "found": [0, 6, 7, 8], "mobil": [0, 1, 6], "platform": 0, "thei": [0, 1, 3, 4, 6, 8], "an": [0, 1, 2, 3, 6, 8], "import": [0, 3, 8], "form": [0, 4, 5, 6], "acceler": [0, 1, 5, 6, 7], "A": [0, 1, 2, 3, 4, 6, 8], "i": [0, 1, 2, 3, 4, 5, 7, 8], "typic": [0, 1], "made": [0, 1, 3], "up": [0, 3, 4, 8], "number": [0, 2, 3, 4, 8], "parallel": [0, 4, 5, 8], "special": [0, 5, 6], "purpos": [0, 1, 4, 5, 6], "block": [0, 1, 3], "most": [0, 1, 4, 6, 8], "us": [0, 1, 2, 3, 4, 5, 6, 8], "applic": [0, 1, 4, 5, 6], "where": [0, 3, 5], "larg": [0, 3, 4, 5, 6, 8], "mathemat": 0, "oper": [0, 3, 5, 8], "need": [0, 3, 8], "perform": [0, 3, 5, 6], "quickli": 0, "repeatedli": 0, "data": [0, 2, 3, 5, 8], "seri": 0, "were": [0, 3, 5, 8], "origin": 0, "design": [0, 1, 6], "handl": [0, 6, 8], "analog": [0, 1], "flow": [0, 6], "activ": [0, 8], "wireless": 0, "transmiss": 0, "audio": 0, "veri": [0, 3, 4, 5, 6, 8], "effici": [0, 3, 4, 8], "build": 0, "ai": [0, 1, 2, 5], "process": [0, 1, 3, 6], "pipelin": [0, 4, 5], "convolut": 0, "There": [0, 3, 4, 6, 8], "specialist": [0, 5, 6], "individu": 0, "market": [0, 1, 5], "video": [0, 2, 6], "neural": [0, 1, 5], "network": [0, 1, 5], "howev": [0, 1, 3], "element": [0, 4, 8], "frequent": 0, "insid": 0, "soc": [0, 1], "hard": [0, 1, 3, 4, 8], "ip": [0, 4], "fpga": [0, 6], "see": [0, 1, 3, 4, 5, 8], "also": [0, 2, 3, 4, 8], "qualcomm": 0, "": [0, 1, 3, 6, 8], "snapdragon": 0, "provid": [0, 3, 4, 8], "servic": 0, "sdk": [0, 4], "visual": [0, 1], "automot": 0, "cadenc": 0, "visionc5": 0, "certain": 1, "have": [1, 3, 4, 5, 6, 8], "suffici": [1, 3, 4, 5], "demand": [1, 4, 6], "lead": [1, 3, 4, 5, 8], "ultim": [1, 2], "optmiz": 1, "techniqu": 1, "hardwar": [1, 4, 5, 6, 7, 8], "target": [1, 3, 5, 8], "one": [1, 3, 5, 6, 8], "exampl": [1, 3, 5, 6, 7, 8], "thi": [1, 3, 4, 5, 6, 8], "includ": [1, 2, 4, 6, 8], "cryptographi": 1, "compress": 1, "gngine": 1, "chain": [1, 3, 8], "hasher": 1, "In": [1, 2, 3, 5, 7, 8], "realiti": [1, 2], "mani": [1, 3], "fast": [1, 2, 4, 5, 7], "move": [1, 3, 5, 6, 8], "area": [1, 2, 6, 8], "trade": 1, "off": [1, 3, 8], "between": [1, 3], "fix": [1, 3], "some": [1, 3, 4, 5, 6, 8], "level": [1, 3, 8], "programablil": 1, "devic": [1, 2, 4, 6], "still": [1, 2, 5, 6, 8], "suit": [1, 4, 5, 6, 8], "total": [1, 8], "gener": [1, 2, 4, 5, 8], "usag": [1, 3, 4], "like": [1, 8], "cpu": [1, 3, 6], "categori": 1, "switch": 1, "processor": [1, 3, 5, 6, 8], "big": [1, 5, 8], "lot": [1, 3, 4, 8], "interest": [1, 2, 4], "case": [1, 4], "suitabl": [1, 5], "workload": 1, "close": [1, 8], "coupl": 1, "rout": [1, 3, 8], "filter": 1, "intel": [1, 3, 4, 8], "exascal": 1, "dataflow": 1, "engin": [1, 2, 4, 6, 7], "tell": 1, "how": [1, 3, 8], "yet": 1, "vector": [1, 3, 4, 6, 8], "nec": 1, "sx": 1, "aurora": 1, "particularli": 1, "growth": 1, "around": [1, 4, 8], "graphcor": 1, "googl": [1, 2], "tpu": 1, "detail": 1, "descript": [1, 4, 6], "hennesi": [1, 6], "patterson": [1, 3, 6], "arm": [1, 3, 7], "ml": 1, "mythic": 1, "unsual": 1, "approach": [1, 4, 6], "do": [1, 3, 5, 6, 8], "infer": 1, "other": [1, 3], "nn": 1, "under": [1, 3, 5, 8], "develop": [1, 2, 4, 5], "alreadi": [1, 4], "wikipedia": 1, "anoth": [1, 8], "imag": [1, 6], "which": [1, 2, 3, 4, 5, 8], "been": [1, 4, 5, 8], "long": 1, "time": [1, 3, 8], "recent": [1, 5, 6], "progress": [1, 5], "ha": [1, 3, 4, 5, 6, 8], "toward": [1, 6], "make": [1, 3, 4, 5, 6, 8], "them": [1, 3, 5, 6, 8], "more": [1, 3, 5, 6, 8], "flexibl": [1, 4, 6], "core": [1, 3, 6, 8], "programm": [1, 5, 8], "The": [1, 3, 4, 5, 6, 8], "wide": [1, 5, 6, 8], "varieti": [1, 8], "differ": [1, 3, 4, 6], "mean": [1, 4], "method": 1, "work": [1, 2, 3], "each": [1, 3, 4, 6], "through": [1, 8], "librari": [1, 2, 5], "tensorflow": 1, "halid": 1, "am": 2, "research": [2, 4, 7, 8], "appli": 2, "latest": [2, 6], "machin": [2, 3, 8], "learn": [2, 4, 5, 6, 8], "solv": 2, "real": 2, "world": 2, "challeng": 2, "goal": 2, "improv": [2, 3], "inclus": 2, "access": [2, 3, 8], "opportun": 2, "everyon": 2, "my": 2, "current": [2, 3, 4, 8], "model": 2, "comput": [2, 4, 7, 8], "vision": [2, 6], "both": [2, 3, 4, 8], "stochast": 2, "code": [2, 3, 4, 7, 8], "optim": [2, 4, 6, 7], "previous": 2, "microsoft": [2, 4], "cambridg": 2, "project": [2, 4], "hsd": 2, "optic": [2, 6], "storag": 2, "system": 2, "store": [2, 8], "holograph": 2, "crystal": 2, "high": [2, 5, 6], "power": [2, 4, 6, 8], "laser": 2, "silica": 2, "sustain": 2, "glass": 2, "speed": [2, 3, 4, 8], "chemistri": 2, "workflow": [2, 4], "contain": [2, 3, 7], "predict": [2, 8], "new": [2, 4, 6, 8], "drug": 2, "molecul": 2, "multi": [2, 3, 8], "properti": 2, "now": [2, 4, 8], "hous": 2, "novarti": 2, "human": 2, "motion": 2, "avatar": 2, "from": [2, 3, 6, 8], "spars": 2, "head": 2, "mount": 2, "signal": [2, 6], "web": 2, "co": [2, 5], "ran": 2, "msr": 2, "resid": 2, "program": [2, 6], "three": 2, "year": [2, 5, 6], "manag": [2, 3, 4, 8], "team": 2, "variou": [2, 5], "size": [2, 3, 7, 8], "befor": [2, 8], "join": 2, "2018": [2, 7], "led": [2, 5, 8], "r": 2, "d": 2, "part": [2, 3, 4, 6], "microfocu": 2, "hewlett": 2, "packard": 2, "enterpris": 2, "span": 2, "slam": 2, "3d": [2, 4], "reconstruct": 2, "chang": [2, 3, 4, 8], "detect": 2, "augment": 2, "industri": [2, 6], "solut": 2, "face": [2, 8], "recognit": 2, "object": 2, "licens": 2, "plate": 2, "intellig": 2, "scene": 2, "analysi": [2, 7], "did": [2, 3, 8], "phd": 2, "univers": 2, "gate": 2, "scholarship": 2, "previou": 2, "life": 2, "built": 2, "commerci": 2, "foreign": 2, "exchang": 2, "softwar": [2, 4, 5, 7], "integr": 2, "being": [2, 8], "full": [2, 4], "steam": 2, "todai": 2, "scholar": 2, "linkedin": 2, "email": 2, "pashabhi": 2, "yahoo": 2, "com": [2, 6], "introduct": [2, 3, 6], "autograd": 2, "confer": [2, 7], "uk": [2, 7], "2019": 2, "just": [2, 3, 7, 8], "enough": [2, 4, 7], "effort": [2, 3, 7, 8], "python": [2, 7], "root": [2, 8], "util": [2, 3, 8], "moler": 2, "tensorflow2": 2, "gnn": 2, "discuss": 3, "choleski": 3, "decomposit": 3, "we": [3, 4, 6, 7, 8], "window": [3, 8], "subsystem": 3, "linux": [3, 4, 8], "wsl": [3, 8], "benchmark": [3, 8], "respect": 3, "pure": [3, 8], "outlin": 3, "exist": [3, 4], "all": [3, 4, 5, 8], "avail": [3, 4, 8], "optimization_exampl": [3, 7, 8], "github": [3, 7, 8], "repo": [3, 7, 8], "For": [3, 8], "cuda": [3, 5, 8], "ship": 3, "nvidia": [3, 5, 6], "modifi": 3, "singl": 3, "precis": [3, 4, 5], "float": 3, "consist": 3, "two": [3, 4, 8], "variant": [3, 4, 8], "ll": 3, "t": [3, 8], "ldl": [3, 8], "here": [3, 4, 8], "except": [3, 8], "compar": 3, "review": [3, 8], "follow": [3, 4, 6, 8], "same": [3, 5, 8], "o3": 3, "ffast": 3, "math": 3, "bla": [3, 5, 8], "thread": [3, 8], "avx": [3, 7], "eigen": 3, "without": [3, 8], "mkl": [3, 8], "lapack": [3, 8], "implement": [3, 4, 6, 8], "set": [3, 8], "ourselv": [3, 8], "rather": [3, 6, 8], "than": [3, 8], "evalu": [3, 8], "aim": [3, 8], "far": [3, 8], "can": [3, 5, 6, 7, 8], "go": [3, 8], "basic": 3, "e": [3, 6, 8], "wise": 3, "recurs": 3, "instruct": [3, 8], "higher": 3, "after": [3, 8], "point": [3, 6, 8], "gain": [3, 5, 8], "onli": [3, 8], "better": [3, 4, 8], "similar": [3, 8], "perfom": 3, "mai": [3, 6, 7, 8], "achiev": 3, "write": [3, 8], "intrins": 3, "instead": [3, 8], "cost": [3, 4, 8], "involv": [3, 4, 5], "down": 3, "routin": [3, 8], "plug": 3, "place": [3, 6, 8], "hand": 3, "written": 3, "show": [3, 8], "suggest": 3, "handwritten": 3, "great": [3, 5], "small": [3, 8], "matric": [3, 8], "linear": [3, 8], "algebra": [3, 8], "you": [3, 6, 8], "expect": [3, 8], "sytem": 3, "combin": [3, 4], "layout": [3, 8], "multipl": [3, 8], "possibl": [3, 4, 5, 8], "further": [3, 7], "If": [3, 8], "gpu": [3, 6, 8], "give": [3, 8], "matrix": [3, 8], "fraction": [3, 8], "spent": [3, 8], "transfer": [3, 8], "so": [3, 6, 8], "worth": [3, 8], "order": [3, 5, 8], "minim": [3, 8], "trip": [3, 8], "wa": [3, 5], "disabl": [3, 8], "With": 3, "enabl": [3, 4, 6, 8], "default": 3, "becaus": [3, 8], "clock": 3, "frequenc": 3, "per": [3, 8], "temperatur": 3, "anaconda": [3, 8], "test": [3, 8], "taken": [3, 4, 8], "ubuntu": 3, "18": 3, "04": 3, "To": [3, 8], "verifi": 3, "find": 3, "download": 3, "extrem": [3, 4, 5, 6], "tune": 3, "check": 3, "max": [3, 8], "stai": 3, "roughli": 3, "constant": [3, 6], "when": [3, 4, 8], "run": [3, 4, 8], "physic": 3, "behaviour": 3, "control": [3, 4], "mkl_num_thread": 3, "omp_num_thread": 3, "read": 3, "ty": 3, "specif": [3, 4, 5, 6, 8], "allow": [3, 4, 5, 8], "latter": 3, "0": [3, 4, 8], "testcholeski": 3, "top": 3, "happen": 3, "appar": 3, "200": 3, "ani": [3, 4, 6, 8], "abov": 3, "measur": 3, "2": [3, 4, 5, 6, 8], "quit": 3, "well": [3, 5, 6, 8], "o2": 3, "highest": [3, 6], "request": 3, "sacrif": [3, 8], "safeti": 3, "littl": [3, 8], "ad": [3, 5], "turn": [3, 4], "doe": [3, 8], "notic": 3, "come": 3, "essenti": 3, "unsaf": 3, "due": 3, "propag": 3, "link": 3, "against": 3, "your": [3, 4, 8], "futur": [3, 7], "note": [3, 8], "while": [3, 8], "faster": 3, "understand": [3, 8], "implic": 3, "mitig": 3, "safer": 3, "either": 3, "function": [3, 8], "bottleneck": [3, 8], "let": [3, 6], "plai": 3, "wreak": 3, "havoc": 3, "downstream": 3, "depend": 3, "graph": 3, "below": 3, "definit": 3, "could": [3, 5, 8], "get": [3, 4, 5, 6, 8], "speedup": 3, "unit": [3, 4, 6, 8], "know": [3, 8], "accept": 3, "neg": 3, "gcc": 3, "bug": 3, "report": 3, "moment": 3, "ccflag": 3, "automat": [3, 8], "pick": 3, "linker": 3, "ld": 3, "circumv": 3, "issu": 3, "late": [3, 8], "got": 3, "vectoris": 3, "comparison": [3, 7, 8], "4": [3, 4, 8], "8": [3, 6, 8], "5": 3, "6": [3, 8], "7": 3, "3": [3, 4, 8], "put": [3, 5, 6, 8], "togeth": [3, 8], "takeawai": 3, "especi": [3, 6, 8], "64": 3, "256": 3, "closer": [3, 4], "onward": 3, "line": [3, 8], "flag": 3, "result": [3, 5, 8], "wors": 3, "inform": [3, 7, 8], "latenc": 3, "throughput": [3, 8], "screenshot": 3, "guid": 3, "look": [3, 7, 8], "good": [3, 4, 6, 8], "wai": [3, 5, 8], "choos": [3, 8], "determin": 3, "whether": [3, 8], "done": [3, 5], "actual": [3, 4], "redesign": 3, "sometim": [3, 5], "don": 3, "hope": 3, "disassembli": 3, "out": [3, 8], "add_avx": 3, "9": 3, "unrol": 3, "loop": [3, 8], "wherea": [3, 8], "didn": 3, "henc": [3, 4], "slower": [3, 8], "Of": 3, "cours": 3, "alwai": 3, "upgrad": 3, "newer": 3, "freedom": 3, "want": [3, 6, 8], "yourself": 3, "knowledg": 3, "awai": 3, "copi": 3, "correct": 3, "regist": 3, "would": 3, "bypass": [3, 8], "asm": 3, "wrong": 3, "convers": 3, "produc": [3, 8], "much": [3, 4, 5], "compact": 3, "op": 3, "jump": 3, "pairwis": [3, 8], "product": [3, 4], "40": 3, "220": 3, "harder": 3, "pragma": 3, "string": [3, 8], "those": [3, 4, 8], "print": 3, "file": 3, "basi": 3, "call": [3, 8], "cmake": 3, "These": [3, 4, 6], "therefor": 3, "invis": 3, "danger": 3, "should": 3, "separ": 3, "That": 3, "inspect": 3, "benefit": 3, "pollut": 3, "push_opt": 3, "void": 3, "functiontooptim": 3, "const": 3, "pop_opt": 3, "avx2": 3, "few": [3, 8], "via": [3, 4], "cat": 3, "proc": 3, "cpuinfo": 3, "particular": [3, 6], "pack": 3, "align": 3, "tutrial": 3, "mark": [3, 8], "harri": [3, 8], "2017": [3, 6], "m": 3, "ture": [3, 6], "award": [3, 6], "talk": 3, "dr": 3, "david": [3, 6], "john": [3, 6], "hennessi": 3, "neon": 3, "term": [4, 6], "cover": [4, 6], "class": [4, 5, 6, 8], "almost": [4, 6], "digit": 4, "circuit": 4, "runtim": 4, "architectur": [4, 5, 6, 8], "variabl": 4, "width": [4, 8], "across": [4, 8], "massiv": 4, "amount": [4, 5], "major": 4, "manufactur": 4, "who": [4, 8], "focuss": 4, "heavili": 4, "xilinx": 4, "altera": 4, "As": [4, 5, 6], "divid": 4, "player": 4, "modern": [4, 6], "mixtur": [4, 6], "reprogramm": 4, "somewhat": 4, "compon": 4, "memori": [4, 5], "arithmet": 4, "soft": 4, "logic": 4, "space": 4, "main": 4, "advantag": [4, 8], "over": [4, 8], "domain": [4, 6], "share": [4, 5], "rapid": 4, "scale": [4, 6], "cloud": 4, "instanc": 4, "requir": [4, 8], "agil": 4, "timescal": 4, "chip": [4, 5], "simpli": 4, "reprogram": 4, "tradition": 4, "languag": [4, 8], "verilog": 4, "vhdl": 4, "describ": 4, "becom": 4, "repres": [4, 8], "steep": [4, 5], "curv": [4, 5], "experienc": 4, "gone": 4, "tool": [4, 8], "compil": [4, 8], "familiar": 4, "take": [4, 6, 8], "hpc": [4, 5, 6], "studi": 4, "fft": 4, "ahm": 4, "sanaullah": 4, "martin": 4, "herbordt": 4, "sdaccel": 4, "simultan": 4, "multiprocess": 4, "defin": 4, "heterogen": [4, 7], "jose": 4, "nunez": 4, "yanez": 4, "et": 4, "l": [4, 8], "potenti": 4, "algorithm": [4, 5, 6, 8], "convent": 4, "respons": 4, "etc": [4, 8], "focu": [4, 6], "annouc": 4, "xeon": [4, 8], "packag": [4, 8], "hybrid": 4, "cip": 4, "rang": [4, 6], "directli": 4, "within": 4, "support": [4, 5, 8], "ultrascal": 4, "mpsoc": 4, "attach": 4, "normal": 4, "server": 4, "pcie": 4, "gen": 4, "factor": [4, 8], "start": [4, 6], "emerg": 4, "interconnect": [4, 5], "qpi": 4, "opencapi": 4, "ccix": 4, "coher": 4, "cach": [4, 7, 8], "greatli": 4, "type": [4, 6, 8], "catapult": 4, "brainwav": 4, "list": 4, "base": [4, 6], "offer": 4, "alibaba": 4, "amazon": [4, 6], "baidu": 4, "huawei": 4, "nimbix": 4, "tencent": 4, "earliest": 5, "initi": 5, "entir": [5, 8], "missus": 5, "limit": [5, 8], "what": [5, 8], "creativ": 5, "incredibli": 5, "capabl": 5, "becam": 5, "vertex": 5, "pixel": 5, "shader": 5, "wider": 5, "field": [5, 8], "huge": 5, "gem": 5, "predat": 5, "trend": [5, 6], "primarili": 5, "environ": [5, 8], "accompani": [5, 7], "kit": 5, "opencl": 5, "expos": 5, "friendli": [5, 8], "easier": 5, "leverag": 5, "doubl": [5, 6, 8], "driven": 5, "simul": 5, "custom": 5, "end": [5, 6], "nvlink": 5, "half": 5, "fp16": 5, "lower": [5, 8], "tensor": 5, "user": [5, 8], "help": [5, 8], "bandwidth": 5, "hbm": 5, "ram": 5, "ddr5": 5, "virtual": 5, "addit": [5, 7], "cubla": 5, "cudnn": 5, "deep": [5, 6], "ecosystem": 5, "evolv": [5, 6], "best": [5, 6, 8], "reach": 6, "moor": 6, "law": 6, "everi": [6, 8], "dennard": 6, "transistor": 6, "smaller": 6, "densiti": 6, "remain": [6, 8], "less": [6, 8], "overview": 6, "topic": 6, "quant": 6, "acm": 6, "lectur": 6, "youtub": 6, "tradit": 6, "meant": 6, "fairli": 6, "task": 6, "It": [6, 8], "seem": [6, 8], "obviou": 6, "thing": [6, 8], "conjunct": 6, "graphic": 6, "featur": 6, "enhanc": 6, "non": [6, 8], "dsp": 6, "characterist": 6, "stream": 6, "often": [6, 8], "path": 6, "cheap": 6, "dsa": 6, "degre": 6, "problem": 6, "own": 6, "section": 6, "intend": 6, "complet": 6, "brief": 6, "right": 6, "direct": 6, "increasingli": 6, "desktop": 6, "announc": 6, "xavier": 6, "sm": 6, "volta": 6, "stereo": 6, "onlin": 6, "http": 6, "www": 6, "nextplatform": 6, "focus": 6, "heteregen": 6, "upcom": 6, "hotchip": 6, "mainli": 6, "present": 6, "isca": 6, "academ": 6, "pdf": 7, "c": [7, 8], "effect": 7, "miss": 7, "assembli": [7, 8], "starter": 7, "512": 7, "julia": [7, 8], "e5": 8, "10": 8, "intal": 8, "pretti": 8, "distribut": 8, "scikit": 8, "document": 8, "standard": 8, "slow": 8, "idea": 8, "line_profil": 8, "pip": 8, "instal": 8, "add": 8, "decor": 8, "Then": 8, "kernprof": 8, "v": 8, "script": 8, "py": 8, "output": 8, "hit": 8, "content": 8, "19": 8, "20": 8, "def": 8, "approxmagnitud": 8, "x": 8, "y": 8, "1": 8, "13": 8, "65": 8, "min": 8, "22": 8, "23": 8, "15": 8, "return": 8, "414": 8, "think": 8, "about": 8, "heart": 8, "simpl": 8, "refactor": 8, "spy": 8, "intrus": 8, "modif": 8, "subprocess": 8, "nativ": 8, "specialis": 8, "llvm": 8, "byte": 8, "option": 8, "jit": 8, "float64": 8, "nopython": 8, "true": 8, "nogil": 8, "caus": 8, "fail": 8, "avoid": 8, "fall": 8, "back": 8, "silent": 8, "sure": 8, "slightli": 8, "dread": 8, "global": 8, "interpret": 8, "lock": 8, "sinc": 8, "deal": 8, "pbject": 8, "gil": 8, "safe": 8, "releas": 8, "usual": 8, "caveat": 8, "synchron": 8, "race": 8, "condit": 8, "disk": 8, "gave": 8, "creat": 8, "arrai": 8, "extend": 8, "version": 8, "guvector": 8, "subset": 8, "cannot": 8, "mix": 8, "try": 8, "stick": 8, "jitclass": 8, "member": 8, "format": 8, "raw": 8, "background": 8, "debug": 8, "mistak": 8, "mode": 8, "boon": 8, "recommend": 8, "stage": 8, "sake": 8, "readabl": 8, "cfunc": 8, "compat": 8, "callback": 8, "relat": 8, "hpat": 8, "simd": 8, "eas": 8, "save": 8, "modul": 8, "boundari": 8, "doesn": 8, "manipul": 8, "llvmlite": 8, "bind": 8, "set_opt": 8, "pointer": 8, "lv": 8, "14": 8, "estim": 8, "vf": 8, "sum": 8, "221": 8, "phi": 8, "000000e": 8, "00": 8, "b58": 8, "lr": 8, "ph": 8, "607": 8, "378": 8, "020": 8, "i64": 8, "239": 8, "024": 8, "436": 8, "376": 8, "019": 8, "440": 8, "nsw": 8, "nuw": 8, "522": 8, "mul": 8, "arg": 8, "assum": 8, "load": 8, "branch": 8, "statement": 8, "Not": 8, "meet": 8, "fastest": 8, "fastmath": 8, "forc": 8, "chose": 8, "understood": 8, "highli": 8, "reason": 8, "ever": 8, "baselin": 8, "squar": 8, "extra": 8, "o": 8, "n": 8, "occur": 8, "numpi": 8, "np": 8, "linalg": 8, "scipi": 8, "readbl": 8, "lend": 8, "itself": 8, "low": 8, "har": 8, "tradeoff": 8, "footnot": 8, "paper": 8, "sacrific": 8, "extent": 8, "immedi": 8, "comprehend": 8, "might": 8, "maximis": 8, "cython": 8, "lu": 8, "hood": 8, "blog": 8, "explan": 8}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"digit": 0, "signal": 0, "processor": 0, "dsp": 0, "domain": 1, "specif": 1, "architectur": 1, "dsa": 1, "program": [1, 4], "about": 2, "pashmina": [2, 7], "cameron": 2, "optim": [3, 8], "c": [3, 4], "algorithm": 3, "choic": 3, "conclus": [3, 8], "turbo": 3, "boost": 3, "numactl": 3, "compil": 3, "option": [3, 4], "version": 3, "best": 3, "case": 3, "limit": 3, "simd": 3, "debug": 3, "auto": 3, "gener": [3, 6], "assembli": 3, "select": 3, "refer": [3, 8], "field": 4, "programm": 4, "gate": 4, "arrai": 4, "fpga": 4, "what": [4, 6], "can": 4, "you": 4, "do": 4, "an": 4, "opencl": 4, "connect": 4, "cpu": 4, "integr": 4, "expans": 4, "bu": 4, "acceler": 4, "exampl": 4, "system": [4, 6], "usecas": 4, "graphic": 5, "process": 5, "unit": 5, "gpu": 5, "gpgpu": 5, "focus": 5, "featur": 5, "heterogen": 6, "comput": 6, "i": 6, "chip": 6, "soc": 6, "resourc": 6, "websit": 6, "confer": 6, "": 7, "blog": 7, "python": 8, "profil": 8, "updat": 8, "21": 8, "juli": 8, "2019": 8, "numba": 8, "analyz": 8, "perform": 8, "choleski": 8, "decomposit": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Digital Signal Processors (DSPs)": [[0, "digital-signal-processors-dsps"]], "Domain Specific Architectures (DSAs)": [[1, "domain-specific-architectures-dsas"]], "Programming DSAs": [[1, "programming-dsas"]], "About Pashmina Cameron": [[2, "about-pashmina-cameron"]], "Optimizing C++": [[3, "optimizing-c"]], "Algorithm choice": [[3, "algorithm-choice"]], "Conclusion": [[3, "conclusion"], [8, "conclusion"]], "Turbo Boost": [[3, "turbo-boost"]], "numactl": [[3, "numactl"]], "Compiler options": [[3, "compiler-options"]], "Compiler version": [[3, "compiler-version"]], "Best case limits for SIMD": [[3, "best-case-limits-for-simd"]], "Debugging auto generated assembly": [[3, "debugging-auto-generated-assembly"]], "Selective optimizations": [[3, "selective-optimizations"]], "References": [[3, "references"], [8, "references"]], "Field Programmable Gate Arrays (FPGAs)": [[4, "field-programmable-gate-arrays-fpgas"]], "What can you do with an FPGA?": [[4, "what-can-you-do-with-an-fpga"]], "Programming an FPGA": [[4, "programming-an-fpga"]], "C++ / OpenCL": [[4, "c-opencl"]], "Connectivity options": [[4, "connectivity-options"]], "CPU integrated FPGAs": [[4, "cpu-integrated-fpgas"]], "Expansion bus connected accelerators": [[4, "expansion-bus-connected-accelerators"]], "Example Systems / Usecases": [[4, "example-systems-usecases"]], "Graphics Processing Units (GPUs)": [[5, "graphics-processing-units-gpus"]], "GPGPU focused features": [[5, "gpgpu-focused-features"]], "Heterogenous Compute": [[6, "heterogenous-compute"]], "What is heterogenous compute?": [[6, "what-is-heterogenous-compute"]], "System on Chip (SoC)": [[6, "system-on-chip-soc"]], "General Resources": [[6, "general-resources"]], "Websites": [[6, "websites"]], "Conferences": [[6, "conferences"]], "Pashmina\u2019s Blog": [[7, "pashmina-s-blog"]], "Optimizing Python": [[8, "optimizing-python"]], "Profiling": [[8, "profiling"]], "Updated on 21 July 2019": [[8, null]], "Numba": [[8, "numba"]], "Analyzing performance of Cholesky decomposition": [[8, "analyzing-performance-of-cholesky-decomposition"]]}, "indexentries": {}})