pytorch · ManfeiBai · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
diff --git a/codegen/xla_native_functions.yaml b/codegen/xla_native_functions.yaml
@@ -292,6 +292,7 @@ supported:
   - rrelu_with_noise_backward
   - rsub.Scalar
   - rsub.Tensor
+  - scan
   - scatter.reduce
   - scatter.src
   - scatter.value

diff --git a/torch_xla/csrc/aten_xla_type.cpp b/torch_xla/csrc/aten_xla_type.cpp
@@ -2648,6 +2648,14 @@ at::Tensor scatter_reduce_helper(const at::Tensor& self, int64_t dim,
   }
 }
 
+at::Tensor XLANativeFunctions::scan(const at::Tensor& self,
+                                    const Callable f,
+                                    const at::Tensor& init,
+                                    const at::Tensor& xs) {
+  TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::");
+  return bridge::AtenFromXlaTensor(tensor_methods::scan(f, init, xs));
+}
+
 at::Tensor XLANativeFunctions::scatter(const at::Tensor& self, int64_t dim,
                                        const at::Tensor& index,
                                        const at::Tensor& src) {

diff --git a/torch_xla/csrc/ops/scan.cpp b/torch_xla/csrc/ops/scan.cpp
@@ -0,0 +1,32 @@
+#include "torch_xla/csrc/ops/scan.h"
+
+#include "torch_xla/csrc/helpers.h"
+#include "torch_xla/csrc/lowering_context.h"
+#include "torch_xla/csrc/xla_lower_util.h"
+
+namespace torch_xla {
+namespace {
+
+xla::Shape NodeOutputShape(const torch::lazy::Value& input) {
+  xla::Shape input_shape = GetXlaShape(input);
+  return input_shape;
+}
+
+}  // namespace
+
+Scan::Scan(const Callable f, const at::Tensor& init, const at::Tensor& xs)
+    : XlaNode(torch::lazy::OpKind(at::aten::scan), {f, init, xs},
+              [&]() { return NodeOutputShape(init); }, 2,) {}
+
+torch::lazy::NodePtr Scan::Clone(torch::lazy::OpList operands) const {
+  return torch::lazy::MakeNode<Scan>(operands.at(0), operands.at(1), operands.at(2));
+}
+
+XlaOpVector Scan::Lower(LoweringContext* loctx) const {
+  xla::XlaOp f = loctx->GetOutputOp(operand(0));
+  xla::XlaOp init = loctx->GetOutputOp(operand(1));
+  xla::XlaOp xs = loctx->GetOutputOp(operand(2));
+  return ReturnOps(BuildScan(f, init, xs), loctx);
+}
+
+}  // namespace torch_xla
diff --git a/torch_xla/csrc/ops/scan.h b/torch_xla/csrc/ops/scan.h
@@ -0,0 +1,22 @@
+#ifndef XLA_TORCH_XLA_CSRC_OPS_NATIVE_DROPOUT_H_
+#define XLA_TORCH_XLA_CSRC_OPS_NATIVE_DROPOUT_H_
+
+#include "torch_xla/csrc/ir.h"
+
+namespace torch_xla {
+
+// This node has no metadata, so it could have been implemented as generic-op in
+// ops.cpp, but since this might require special handling from upper IR layers,
+// it gets its own IR node class.
+class Scan : public XlaNode {
+ public:
+  Map(const Callable f, const at::Tensor& init, const at::Tensor& xs);
+
+  torch::lazy::NodePtr Clone(torch::lazy::OpList operands) const override;
+
+  XlaOpVector Lower(LoweringContext* loctx) const override;
+};
+
+}  // namespace torch_xla
+
+#endif  // XLA_TORCH_XLA_CSRC_OPS_NATIVE_DROPOUT_H_
diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp
@@ -105,6 +105,7 @@
 #include "torch_xla/csrc/ops/rrelu_with_noise.h"
 #include "torch_xla/csrc/ops/rrelu_with_noise_backward.h"
 #include "torch_xla/csrc/ops/scalar.h"
+#include "torch_xla/csrc/ops/scan.h"
 #include "torch_xla/csrc/ops/scatter.h"
 #include "torch_xla/csrc/ops/scatter_add.h"
 #include "torch_xla/csrc/ops/scatter_reduce.h"
@@ -2392,6 +2393,12 @@ void copy_(XLATensorPtr& input, XLATensorPtr& src) {
   }
 }
 
+XLATensorPtr scan(const Callable f, const XLATensorPtr& init,
+                  const XLATensorPtr& xs) {
+  return init->CreateFrom(torch::lazy::MakeNode<Scan>(
+      f->GetIrValue(), init->GetIrValue(), xs->GetIrValue()));
+}
+
 XLATensorPtr scatter(const XLATensorPtr& input, int64_t dim,
                      const XLATensorPtr& index, const XLATensorPtr& src) {
   return input->CreateFrom(torch::lazy::MakeNode<Scatter>(

diff --git a/torch_xla/csrc/tensor_methods.h b/torch_xla/csrc/tensor_methods.h
@@ -752,6 +752,9 @@ XLATensorPtr rsub(
 
 void copy_(XLATensorPtr& input, XLATensorPtr& src);
 
+XLATensorPtr scan(const Callable f, const XLATensorPtr& init,
+                  const XLATensorPtr& xs);
+
 XLATensorPtr scatter(const XLATensorPtr& input, int64_t dim,
                      const XLATensorPtr& index, const XLATensorPtr& src);
 XLATensorPtr scatter(const XLATensorPtr& input, int64_t dim,

diff --git a/torch_xla/csrc/xla_lower_util.cpp b/torch_xla/csrc/xla_lower_util.cpp
@@ -68,6 +68,20 @@ xla::XlaOp GetPromotedR1Mask(xla::XlaOp mask, const xla::Shape& input_shape) {
   return XlaHelpers::Flatten(GetPromotedMask(mask, input_shape));
 }
 
+xla::XlaOp BuildScan(const Callable f, const at::Tensor& init,
+                     const at::Tensor& xs) {
+  const xla::Shape& f_shape = ShapeHelper::ShapeOfXlaOp(f);
+  const xla::Shape& init_shape = ShapeHelper::ShapeOfXlaOp(init);
+  const xla::Shape& xs_shape = ShapeHelper::ShapeOfXlaOp(xs);
+  int64_t i = 0;
+  xla::Callable = generateXLACallable(f)
+  xla::XlaOp result = f(init);
+  while i < xs.size() - 1:
+    i = i + 1;
+    result = f(result);
+  return result;
+}
+
 bool ShouldUseDenseScatter(const torch::lazy::BackendDevice& device,
                            const xla::Shape& input_shape,
                            const xla::Shape& index_shape) {

diff --git a/torch_xla/csrc/xla_lower_util.h b/torch_xla/csrc/xla_lower_util.h
@@ -119,6 +119,9 @@ std::vector<xla::XlaOp> BuildAmpUpdateScale(const xla::XlaOp& current_scale,
                                             double scale_backoff_factor,
                                             int scale_growth_interval);
 
+xla::XlaOp BuildScan(const Callable f, const at::Tensor& init,
+                     const at::Tensor& xs);
+
 std::vector<xla::XlaOp> BuildSgdOptimizerStep(
     const xla::XlaOp& found_inf, const xla::XlaOp& step,
     const xla::XlaOp& param, const xla::XlaOp& buf, const xla::XlaOp& d_p,