diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index 5fe1f916dc3598..45a8bfb511ad89 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -302,7 +302,7 @@ test_perf_for_dashboard() {
         --accuracy --"$dtype" --backend "$backend" "$@" \
         --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_accuracy.csv"
     python "benchmarks/dynamo/$suite.py" \
-        --accuracy --"$dtype" --backend "$backend" --dynamic-shapes --disable-cudagraphs "$@" \
+        --accuracy --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
         --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_training_cuda_accuracy.csv"
 
     # Run performance test
@@ -316,7 +316,7 @@ test_perf_for_dashboard() {
         --performance --cold-start-latency --"$dtype" --backend "$backend" "$@" \
         --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_performance.csv"
     python "benchmarks/dynamo/$suite.py" \
-        --performance --cold-start-latency --"$dtype" --backend "$backend" --dynamic-shapes --disable-cudagraphs "$@" \
+        --performance --cold-start-latency --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
         --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_training_cuda_performance.csv"
   done
 }
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index 12365561ceda54..19b8c0d5db0dc6 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -1693,6 +1693,11 @@ def get_example_inputs(self):
         action="store_true",
         help="Runs a dynamic shapes version of the benchmark, if available.",
     )
+    parser.add_argument(
+        "--dynamic-batch-only",
+        action="store_true",
+        help="Only assume batch dimension is dynamic.  Implies --dynamic-shapes",
+    )
     parser.add_argument(
         "--specialize-int", action="store_true", help="Run with specialize_int=True."
     )
@@ -1956,6 +1961,10 @@ def run(runner, args, original_dir=None):
     if args.dynamic_ci_skips_only:
         args.dynamic_shapes = True
         args.ci = True
+    if args.dynamic_batch_only:
+        args.dynamic_shapes = True
+        torch._dynamo.config.assume_static_by_default = True
+        torch._dynamo.config.allow_ignore_mark_dynamic = True
     if args.dynamic_shapes:
         torch._dynamo.config.dynamic_shapes = True
     if args.specialize_int:
@@ -2329,6 +2338,21 @@ def run(runner, args, original_dir=None):
             elif args.bfloat16:
                 model, example_inputs = cast_to_bf16(model, example_inputs)
 
+            # Look for stuff that looks like batch size, and mark it dynamic.
+            # Better integration would integrate directly with benchmark suite
+            # but cannot conveniently do this
+            # NB: This must be done late enough so that we don't do more
+            # conversions on the inputs
+            # NB: Assumes only the first batch-y like dimension is the batch
+            def detect_and_mark_batch(t):
+                for i, s in enumerate(t.size()):
+                    if s == batch_size:
+                        torch._dynamo.mark_dynamic(t, i)
+                        break
+
+            if args.dynamic_batch_only:
+                tree_map(detect_and_mark_batch, example_inputs)
+
             if args.log_operator_inputs:
                 log_operator_inputs(
                     model, example_inputs, runner.model_iter_fn, name, args
diff --git a/torch/_dynamo/config.py b/torch/_dynamo/config.py
index 4a962920ff0dd5..c8433901301350 100644
--- a/torch/_dynamo/config.py
+++ b/torch/_dynamo/config.py
@@ -68,6 +68,13 @@
 # see [Note - on the state of mark_dynamic]
 assume_static_by_default = False
 
+# Typically, if you mark_dynamic a dimension, we will error if the dimension
+# actually ended up getting specialized.  This knob changes the behavior so
+# that we don't error at all.  This is helpful for our CI where I'm using a
+# heuristic to mark batch dimensions as dynamic and the heuristic may get it
+# wrong.
+allow_ignore_mark_dynamic = False
+
 # Set this to False to assume nn.Modules() contents are immutable (similar assumption as freezing)
 guard_nn_modules = False
 
diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py
index 42b653e88cc10e..4a94cc6fa11fe8 100644
--- a/torch/_dynamo/guards.py
+++ b/torch/_dynamo/guards.py
@@ -536,9 +536,10 @@ def TENSOR_MATCH(self, guard: Guard):
                         f"hasattr({tensor_name}, '_dynamo_dynamic_indices') == False"
                     )
             else:
-                assert not hasattr(
-                    value, "_dynamo_dynamic_indices"
-                ), f"Illegal Unreachable state, guard accumulation for dynamic tensor that should have been static. Initial static message: {tensor_static_reason_to_message(reason)}"  # noqa: B950
+                if not config.allow_ignore_mark_dynamic:
+                    assert not hasattr(
+                        value, "_dynamo_dynamic_indices"
+                    ), f"Illegal Unreachable state, guard accumulation for dynamic tensor that should have been static. Initial static message: {tensor_static_reason_to_message(reason)}"  # noqa: B950
 
             if len(code) > 0:
                 self._produce_guard_code(guard, code)
diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py
index 001eec9eddf8c5..897c597d5ffcbd 100644
--- a/torch/_dynamo/variables/builder.py
+++ b/torch/_dynamo/variables/builder.py
@@ -1165,7 +1165,10 @@ def wrap_to_fake_tensor_and_record(
                 # Precedence: export constraints > eager constraints
                 constraint = dim2constraint.get(i)
                 if constraint is None:
-                    if i in getattr(e, "_dynamo_dynamic_indices", set()):
+                    if (
+                        i in getattr(e, "_dynamo_dynamic_indices", set())
+                        and not config.allow_ignore_mark_dynamic
+                    ):
                         constraint = RelaxedUnspecConstraint()
                 constraint_dims.append(constraint)
 
diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py
index 0c0b9e6a4db604..960ec2c92aea66 100644
--- a/torch/fx/experimental/symbolic_shapes.py
+++ b/torch/fx/experimental/symbolic_shapes.py
@@ -1579,6 +1579,7 @@ def create_symbol(
             # Even if we're duck shaping, if we haven't seen this particular
             # value before, we also create a new symbol
             sympy_expr = sympy.Symbol(f"s{len(self.var_to_val)}", positive=True, integer=True)
+            log.info("create_symbol %s = %s", sympy_expr, val)
             # We always associate vars to vals
             self.var_to_val[sympy_expr] = sympy.Integer(val)
             # Do the appending later, because we always want to populate this