Skip to content

Commit

Permalink
Run the benchmark suite with dynamic batch only (pytorch#97912)
Browse files Browse the repository at this point in the history
Symbolic shapes compile time on full CI with inductor is horribly long (even though our aot_eager local runs seemed to suggest that the added latency was only 10s per model.) To patch over the problem for now, run the benchmark suite with dynamic batch only.  This should absolve a lot of sins.

Signed-off-by: Edward Z. Yang <[email protected]>

Pull Request resolved: pytorch#97912
Approved by: https://github.com/janeyx99, https://github.com/desertfire
  • Loading branch information
ezyang authored and pytorchmergebot committed Mar 30, 2023
1 parent 4cce607 commit 97fc8ea
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 6 deletions.
4 changes: 2 additions & 2 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ test_perf_for_dashboard() {
--accuracy --"$dtype" --backend "$backend" "$@" \
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_accuracy.csv"
python "benchmarks/dynamo/$suite.py" \
--accuracy --"$dtype" --backend "$backend" --dynamic-shapes --disable-cudagraphs "$@" \
--accuracy --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_training_cuda_accuracy.csv"

# Run performance test
Expand All @@ -316,7 +316,7 @@ test_perf_for_dashboard() {
--performance --cold-start-latency --"$dtype" --backend "$backend" "$@" \
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_performance.csv"
python "benchmarks/dynamo/$suite.py" \
--performance --cold-start-latency --"$dtype" --backend "$backend" --dynamic-shapes --disable-cudagraphs "$@" \
--performance --cold-start-latency --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_training_cuda_performance.csv"
done
}
Expand Down
24 changes: 24 additions & 0 deletions benchmarks/dynamo/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1693,6 +1693,11 @@ def get_example_inputs(self):
action="store_true",
help="Runs a dynamic shapes version of the benchmark, if available.",
)
parser.add_argument(
"--dynamic-batch-only",
action="store_true",
help="Only assume batch dimension is dynamic. Implies --dynamic-shapes",
)
parser.add_argument(
"--specialize-int", action="store_true", help="Run with specialize_int=True."
)
Expand Down Expand Up @@ -1956,6 +1961,10 @@ def run(runner, args, original_dir=None):
if args.dynamic_ci_skips_only:
args.dynamic_shapes = True
args.ci = True
if args.dynamic_batch_only:
args.dynamic_shapes = True
torch._dynamo.config.assume_static_by_default = True
torch._dynamo.config.allow_ignore_mark_dynamic = True
if args.dynamic_shapes:
torch._dynamo.config.dynamic_shapes = True
if args.specialize_int:
Expand Down Expand Up @@ -2329,6 +2338,21 @@ def run(runner, args, original_dir=None):
elif args.bfloat16:
model, example_inputs = cast_to_bf16(model, example_inputs)

# Look for stuff that looks like batch size, and mark it dynamic.
# Better integration would integrate directly with benchmark suite
# but cannot conveniently do this
# NB: This must be done late enough so that we don't do more
# conversions on the inputs
# NB: Assumes only the first batch-y like dimension is the batch
def detect_and_mark_batch(t):
for i, s in enumerate(t.size()):
if s == batch_size:
torch._dynamo.mark_dynamic(t, i)
break

if args.dynamic_batch_only:
tree_map(detect_and_mark_batch, example_inputs)

if args.log_operator_inputs:
log_operator_inputs(
model, example_inputs, runner.model_iter_fn, name, args
Expand Down
7 changes: 7 additions & 0 deletions torch/_dynamo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@
# see [Note - on the state of mark_dynamic]
assume_static_by_default = False

# Typically, if you mark_dynamic a dimension, we will error if the dimension
# actually ended up getting specialized. This knob changes the behavior so
# that we don't error at all. This is helpful for our CI where I'm using a
# heuristic to mark batch dimensions as dynamic and the heuristic may get it
# wrong.
allow_ignore_mark_dynamic = False

# Set this to False to assume nn.Modules() contents are immutable (similar assumption as freezing)
guard_nn_modules = False

Expand Down
7 changes: 4 additions & 3 deletions torch/_dynamo/guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,9 +536,10 @@ def TENSOR_MATCH(self, guard: Guard):
f"hasattr({tensor_name}, '_dynamo_dynamic_indices') == False"
)
else:
assert not hasattr(
value, "_dynamo_dynamic_indices"
), f"Illegal Unreachable state, guard accumulation for dynamic tensor that should have been static. Initial static message: {tensor_static_reason_to_message(reason)}" # noqa: B950
if not config.allow_ignore_mark_dynamic:
assert not hasattr(
value, "_dynamo_dynamic_indices"
), f"Illegal Unreachable state, guard accumulation for dynamic tensor that should have been static. Initial static message: {tensor_static_reason_to_message(reason)}" # noqa: B950

if len(code) > 0:
self._produce_guard_code(guard, code)
Expand Down
5 changes: 4 additions & 1 deletion torch/_dynamo/variables/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,7 +1165,10 @@ def wrap_to_fake_tensor_and_record(
# Precedence: export constraints > eager constraints
constraint = dim2constraint.get(i)
if constraint is None:
if i in getattr(e, "_dynamo_dynamic_indices", set()):
if (
i in getattr(e, "_dynamo_dynamic_indices", set())
and not config.allow_ignore_mark_dynamic
):
constraint = RelaxedUnspecConstraint()
constraint_dims.append(constraint)

Expand Down
1 change: 1 addition & 0 deletions torch/fx/experimental/symbolic_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1579,6 +1579,7 @@ def create_symbol(
# Even if we're duck shaping, if we haven't seen this particular
# value before, we also create a new symbol
sympy_expr = sympy.Symbol(f"s{len(self.var_to_val)}", positive=True, integer=True)
log.info("create_symbol %s = %s", sympy_expr, val)
# We always associate vars to vals
self.var_to_val[sympy_expr] = sympy.Integer(val)
# Do the appending later, because we always want to populate this
Expand Down

0 comments on commit 97fc8ea

Please sign in to comment.