diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9f49efa69..50e67f970 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,10 +29,10 @@ LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
 #string(REGEX REPLACE "^[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" VERSION_PATCH "${VERSION}")
 
 
-SET(PACKAGE_VERSION 4.0.0) # ${VERSION})
-SET(VERSION 4.0.0)
+SET(PACKAGE_VERSION 4.1.0) # ${VERSION})
+SET(VERSION 4.1.0)
 SET(SINGA_MAJOR_VERSION 4)
-SET(SINGA_MINOR_VERSION 0)
+SET(SINGA_MINOR_VERSION 1)
 SET(SINGA_PATCH_VERSION 0)
 #SET(SINGA_MAJOR_VERSION ${VERSION_MAJOR})  # 0 -
 #SET(SINGA_MINOR_VERSION ${VERSION_MINOR})  # 0 - 9
diff --git a/LICENSE b/LICENSE
index f448e8b2b..9c7ffb475 100644
--- a/LICENSE
+++ b/LICENSE
@@ -559,3 +559,59 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+===============================================================================
+SINGA bundles the following under MIT License: 
+examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/*
+
+MIT License
+
+Portions Copyright 2019-2021 ZomboDB, LLC.
+Portions Copyright 2021-2023 Technology Concepts & Design, Inc.
+Portions Copyright 2023 PgCentral Foundation, Inc.
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+===============================================================================
+SINGA bundles the following under The PostgreSQL License: 
+examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/*
+
+The PostgreSQL License
+
+Portions Copyright (c) 1996-2023, The PostgreSQL Global Development Group
+
+Portions Copyright (c) 1994, The Regents of the University of California
+
+Permission to use, copy, modify, and distribute this software and its documentation for any 
+purpose, without fee, and without a written agreement is hereby granted, provided that the above 
+copyright notice and this paragraph and the following two paragraphs appear in all copies.
+
+IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, 
+SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING 
+OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 
+THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, 
+AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, 
+UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
diff --git a/NOTICE b/NOTICE
index 4c824f1a6..a94d16e65 100644
--- a/NOTICE
+++ b/NOTICE
@@ -30,3 +30,16 @@ developers of Apache SINGA under Apache License, Version 2.0.
 ./doc/_static/images/sgd.png
 ./doc/_static/images/singa.png
 ./doc/_static/images/singav1-sw.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png
+./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png
\ No newline at end of file
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index bbe67726a..7a1f0eab4 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,3 +1,40 @@
+Release Notes - SINGA - Version singa-4.1.0
+
+SINGA is a distributed deep learning library.
+
+This release includes following changes:
+
+  * New examples
+    * Add an example for malaria detection using cell images.
+    * Add an example for structured data learning.
+
+  * Add support for models running on top of RDBMS
+    * Add support for in-database model definition and selection in RDBMS.
+    * Implement training-free model evaluation metrics for in-database model selection.
+    * Implement a coordinator to balance between training-free and training-based model evaluations 
+      for in-database model selection.
+    
+  * Enhance distributed training
+    * Add implementations for the sum error loss.
+    * Improve the optimizer to return model gradients.
+    * Improve the iterative checking for tensors and strings in the ModelMeta class.
+    
+  * Enhance example code
+    * Add support for flexible setting of training configurations for models, e.g., learning rates, 
+      weight decay, momentum, etc.
+    * Add implementations for dynamic models with varying layer sizes.
+
+  * Update the website
+    * Add illustrations for database integration.
+    * Update users of Apache SINGA.
+
+  * Fix bugs
+    * Update the NVIDIA_GPGKEY in the Dockerfile for building wheel files.
+    * Update the versions of dependencies in the wheel file.
+    * Fix the collections module in the model.py file.
+
+----------------------------------------------------------------------------------------------
+
 Release Notes - SINGA - Version singa-4.0.0
 
 SINGA is a distributed deep learning library.
diff --git a/doap/doap_SINGA.rdf b/doap/doap_SINGA.rdf
new file mode 100644
index 000000000..9203d5009
--- /dev/null
+++ b/doap/doap_SINGA.rdf
@@ -0,0 +1,63 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl"?>
+<rdf:RDF xml:lang="en"
+         xmlns="http://usefulinc.com/ns/doap#" 
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
+         xmlns:asfext="http://projects.apache.org/ns/asfext#"
+         xmlns:foaf="http://xmlns.com/foaf/0.1/">
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+   
+         https://www.apache.org/licenses/LICENSE-2.0
+   
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+  <Project rdf:about="https://singa.apache.org/">
+    <created>2023-09-06</created>
+    <license rdf:resource="https://spdx.org/licenses/Apache-2.0" />
+    <name>Apache SINGA</name>
+    <homepage rdf:resource="https://singa.apache.org/" />
+    <asfext:pmc rdf:resource="https://singa.apache.org" />
+    <shortdesc>A Distributed Deep Learning Library</shortdesc>
+    <description>Apache SINGA is an Apache top-level project for developing an open-source machine learning library. It provides a flexible architecture for scalable distributed training, is extensible to run over a wide range of hardware, and has a focus on healthcare applications.</description>
+    <mailing-list rdf:resource="https://singa.apache.org/docs/mail-lists/" />
+    <download-page rdf:resource="https://singa.apache.org/docs/downloads/" />
+    <programming-language>C++</programming-language>
+    <category rdf:resource="https://projects.apache.org/category/big-data" />
+    <release>
+      <Version>
+        <name>Apache SINGA 4.0.0</name>
+        <created>2023-04-07</created>
+        <revision>4.0.0</revision>
+      </Version>
+    </release>
+    <repository>
+      <SVNRepository>
+        <location rdf:resource="https://dist.apache.org/repos/dist/dev/singa/"/>
+        <browse rdf:resource="https://dist.apache.org/repos/dist/release/singa/"/>
+      </SVNRepository>
+    </repository>
+    <repository>
+      <GitRepository>
+        <location rdf:resource="https://github.com/apache/singa"/>
+        <browse rdf:resource="https://github.com/apache/singa"/>
+      </GitRepository>
+    </repository>
+    <maintainer>
+      <foaf:Person>
+        <foaf:name>LUO ZHAOJING</foaf:name>
+          <foaf:mbox rdf:resource="mailto:zhaojing@comp.nus.edu.sg"/>
+      </foaf:Person>
+    </maintainer>
+  </Project>
+</rdf:RDF>
+
diff --git a/examples/armnet/README.md b/examples/armnet/README.md
new file mode 100644
index 000000000..698161fe1
--- /dev/null
+++ b/examples/armnet/README.md
@@ -0,0 +1,24 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+## ARM-Net: Adaptive Relation Modeling Network for Structured Data
+
+![version](https://img.shields.io/badge/version-v3.5-green)
+![python](https://img.shields.io/badge/python-3.8.3-blue)
+![singa](https://img.shields.io/badge/singa-3.1.0-orange)
+
+This folder contains our Singa implementation of [ARM-Net: Adaptive Relation Modeling Network for Structured Data](https://dl.acm.org/doi/10.1145/3448016.3457321).
diff --git a/examples/cifar_distributed_cnn/run-rtx.sh b/examples/cifar_distributed_cnn/run-rtx.sh
index ff00642ef..3f39694bb 100755
--- a/examples/cifar_distributed_cnn/run-rtx.sh
+++ b/examples/cifar_distributed_cnn/run-rtx.sh
@@ -38,3 +38,7 @@ mpiexec -np 8 python train_mpi.py mlp cifar100 -l 0.015 -b 32
 mpiexec -np 8 python train_mpi.py alexnet mnist -l 0.015 -b 32
 mpiexec -np 8 python train_mpi.py alexnet cifar10 -l 0.015 -b 32
 mpiexec -np 8 python train_mpi.py alexnet cifar100 -l 0.015 -b 32
+
+# xceptionnet 
+mpiexec -np 8 python train_mpi.py xceptionnet mnist -l 0.015 -b 32
+mpiexec -np 8 python train_mpi.py xceptionnet cifar10 -l 0.015 -b 32
\ No newline at end of file
diff --git a/examples/cnn/run.sh b/examples/cnn/run.sh
index 9f1c4aa68..a536a1e81 100644
--- a/examples/cnn/run.sh
+++ b/examples/cnn/run.sh
@@ -17,6 +17,8 @@
 # under the License.
 #
 
+#!/usr/bin/env python -W ignore::DeprecationWarning
+
 ### mnist
 python train_cnn.py mlp mnist
 python train_cnn.py cnn mnist
diff --git a/examples/malaria_cnn/README.md b/examples/malaria_cnn/README.md
new file mode 100644
index 000000000..b9dcbf239
--- /dev/null
+++ b/examples/malaria_cnn/README.md
@@ -0,0 +1,44 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Singa for Malaria Detection Task
+
+## Malaria
+
+Malaria is caused by parasites and could be transmitted through infected mosquitoes. There are about 200 million cases worldwide, and about 400,000 deaths per year, therefore, malaria does lots of harm to global health.
+
+Although Malaria is a curable disease, inadequate diagnostics make it harder to reduce mortality, as a result, a fast and reliable diagnostic test is a promising and effective way to fight malaria.
+
+To mitigate the problem, we use Singa to implement a machine learning model to help with Malaria diagnosis. The dataset is from Kaggle https://www.kaggle.com/datasets/miracle9to9/files1?resource=download. Please download the dataset before running the scripts.
+
+## Structure
+
+* `data` includes the scripts for preprocessing Malaria image datasets.
+
+* `model` includes the CNN model construction codes by creating
+  a subclass of `Module` to wrap the neural network operations 
+  of each model.
+
+* `train_cnn.py` is the training script, which controls the training flow by
+  doing BackPropagation and SGD update.
+
+## Command
+```bash
+python train_cnn.py cnn malaria -dir pathToDataset
+```
\ No newline at end of file
diff --git a/examples/malaria_cnn/data/malaria.py b/examples/malaria_cnn/data/malaria.py
new file mode 100644
index 000000000..46422b739
--- /dev/null
+++ b/examples/malaria_cnn/data/malaria.py
@@ -0,0 +1,122 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+try:
+    import pickle
+except ImportError:
+    import cPickle as pickle
+
+import numpy as np
+import os
+import sys
+from PIL import Image
+
+
+# need to save to specific local directories
+def load_train_data(dir_path="/tmp/malaria", resize_size=(128, 128)):
+    dir_path = check_dataset_exist(dirpath=dir_path)
+    path_train_label_1 = os.path.join(dir_path, "training_set/Parasitized")
+    path_train_label_0 = os.path.join(dir_path, "training_set/Uninfected")
+    train_label_1 = load_image_path(os.listdir(path_train_label_1))
+    train_label_0 = load_image_path(os.listdir(path_train_label_0))
+    labels = []
+    Images = np.empty((len(train_label_1) + len(train_label_0),
+                       3, resize_size[0], resize_size[1]), dtype=np.uint8)
+    for i in range(len(train_label_0)):
+        image_path = os.path.join(path_train_label_0, train_label_0[i])
+        temp_image = np.array(Image.open(image_path).resize(
+            resize_size).convert("RGB")).transpose(2, 0, 1)
+        Images[i] = temp_image
+        labels.append(0)
+    for i in range(len(train_label_1)):
+        image_path = os.path.join(path_train_label_1, train_label_1[i])
+        temp_image = np.array(Image.open(image_path).resize(
+            resize_size).convert("RGB")).transpose(2, 0, 1)
+        Images[i + len(train_label_0)] = temp_image
+        labels.append(1)
+
+    Images = np.array(Images, dtype=np.float32)
+    labels = np.array(labels, dtype=np.int32)
+    return Images, labels
+
+
+# need to save to specific local directories
+def load_test_data(dir_path='/tmp/malaria', resize_size=(128, 128)):
+    dir_path = check_dataset_exist(dirpath=dir_path)
+    path_test_label_1 = os.path.join(dir_path, "testing_set/Parasitized")
+    path_test_label_0 = os.path.join(dir_path, "testing_set/Uninfected")
+    test_label_1 = load_image_path(os.listdir(path_test_label_1))
+    test_label_0 = load_image_path(os.listdir(path_test_label_0))
+    labels = []
+    Images = np.empty((len(test_label_1) + len(test_label_0),
+                       3, resize_size[0], resize_size[1]), dtype=np.uint8)
+    for i in range(len(test_label_0)):
+        image_path = os.path.join(path_test_label_0, test_label_0[i])
+        temp_image = np.array(Image.open(image_path).resize(
+            resize_size).convert("RGB")).transpose(2, 0, 1)
+        Images[i] = temp_image
+        labels.append(0)
+    for i in range(len(test_label_1)):
+        image_path = os.path.join(path_test_label_1, test_label_1[i])
+        temp_image = np.array(Image.open(image_path).resize(
+            resize_size).convert("RGB")).transpose(2, 0, 1)
+        Images[i + len(test_label_0)] = temp_image
+        labels.append(1)
+
+    Images = np.array(Images, dtype=np.float32)
+    labels = np.array(labels, dtype=np.int32)
+    return Images, labels
+
+
+def load_image_path(list):
+    new_list = []
+    for image_path in list:
+        if (image_path.endswith(".png") or image_path.endswith(".jpg")):
+            new_list.append(image_path)
+    return new_list
+
+
+def check_dataset_exist(dirpath):
+    if not os.path.exists(dirpath):
+        print(
+            'Please download the malaria dataset first'
+        )
+        sys.exit(0)
+    return dirpath
+
+
+def normalize(train_x, val_x):
+    mean = [0.5339, 0.4180, 0.4460]  # mean for malaria dataset
+    std = [0.3329, 0.2637, 0.2761]  # std for malaria dataset
+    train_x /= 255
+    val_x /= 255
+    for ch in range(0, 2):
+        train_x[:, ch, :, :] -= mean[ch]
+        train_x[:, ch, :, :] /= std[ch]
+        val_x[:, ch, :, :] -= mean[ch]
+        val_x[:, ch, :, :] /= std[ch]
+    return train_x, val_x
+
+
+def load(dir_path):
+    train_x, train_y = load_train_data(dir_path=dir_path)
+    val_x, val_y = load_test_data(dir_path=dir_path)
+    train_x, val_x = normalize(train_x, val_x)
+    train_y = train_y.flatten()
+    val_y = val_y.flatten()
+    return train_x, train_y, val_x, val_y
diff --git a/examples/malaria_cnn/model/cnn.py b/examples/malaria_cnn/model/cnn.py
new file mode 100644
index 000000000..856adb7e7
--- /dev/null
+++ b/examples/malaria_cnn/model/cnn.py
@@ -0,0 +1,94 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from singa import layer
+from singa import model
+
+
+class CNN(model.Model):
+
+    def __init__(self, num_classes=10, num_channels=1):
+        super(CNN, self).__init__()
+        self.num_classes = num_classes
+        self.input_size = 128
+        self.dimension = 4
+        self.conv1 = layer.Conv2d(num_channels, 32, 3, padding=0, activation="RELU")
+        self.conv2 = layer.Conv2d(32, 64, 3, padding=0, activation="RELU")
+        self.conv3 = layer.Conv2d(64, 64, 3, padding=0, activation="RELU")
+        self.linear1 = layer.Linear(128)
+        self.linear2 = layer.Linear(num_classes)
+        self.pooling1 = layer.MaxPool2d(2, 2, padding=0)
+        self.pooling2 = layer.MaxPool2d(2, 2, padding=0)
+        self.pooling3 = layer.MaxPool2d(2, 2, padding=0)
+        self.relu = layer.ReLU()
+        self.flatten = layer.Flatten()
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+        self.sigmoid = layer
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.pooling1(y)
+        y = self.conv2(y)
+        y = self.pooling2(y)
+        y = self.conv3(y)
+        y = self.pooling3(y)
+        y = self.flatten(y)
+        y = self.linear1(y)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(**kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+
+    Returns:
+        The created CNN model.
+    """
+    model = CNN(**kwargs)
+
+    return model
+
+
+__all__ = ['CNN', 'create_model']
diff --git a/examples/malaria_cnn/model/mlp.py b/examples/malaria_cnn/model/mlp.py
new file mode 100644
index 000000000..5f46bc321
--- /dev/null
+++ b/examples/malaria_cnn/model/mlp.py
@@ -0,0 +1,85 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+import argparse
+import numpy as np
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+
+class MLP(model.Model):
+
+    def __init__(self, perceptron_size=100, num_classes=10):
+        super(MLP, self).__init__()
+        self.num_classes = num_classes
+        self.dimension = 2
+
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(perceptron_size)
+        self.linear2 = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def forward(self, inputs):
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(**kwargs):
+    """Constructs a CNN model.
+
+    Returns:
+        The created CNN model.
+    """
+    model = MLP(**kwargs)
+
+    return model
+
+
+__all__ = ['MLP', 'create_model']
diff --git a/examples/malaria_cnn/run.sh b/examples/malaria_cnn/run.sh
new file mode 100644
index 000000000..14718208b
--- /dev/null
+++ b/examples/malaria_cnn/run.sh
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+### malaria dataset
+python train_cnn.py cnn malaria -dir pathToDataset
diff --git a/examples/mlp_postgresql/README.md b/examples/mlp_postgresql/README.md
new file mode 100644
index 000000000..e34f58197
--- /dev/null
+++ b/examples/mlp_postgresql/README.md
@@ -0,0 +1,23 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Multi-layer Perceptron (MLP) on top of PostgreSQL
+
+Examples inside this folder show how to run MLP models 
+on top of PostgreSQL.
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/Dockerfile b/examples/model_selection/TRAILS-Database-Native-Model-Selection/Dockerfile
new file mode 100644
index 000000000..9a14cff85
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/Dockerfile
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM ubuntu:20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install Python, Vim, and necessary libraries
+RUN apt-get update && \
+    apt-get install -y software-properties-common wget gnupg2 lsb-release git && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get install -y python3.6 python3-pip vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install necessary dependencies for PostgreSQL and Rust
+RUN apt-get update && \
+    apt-get install -y pkg-config libssl-dev libpq-dev libclang-dev curl && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install necessary dependencies for pgrx
+RUN apt-get update && \
+    apt-get install -y bison flex libreadline-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Create the postgres user
+USER root
+RUN adduser --disabled-password --gecos "" postgres && \
+    mkdir /project && \
+    adduser postgres sudo && \
+    chown -R postgres:postgres /project
+
+# Switch to the postgres user andInstall Rust and init the cargo
+USER postgres
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
+    echo 'source $HOME/.cargo/env' >> $HOME/.bashrc && \
+    /bin/bash -c "source $HOME/.cargo/env && cargo install cargo-pgrx --version '0.9.7' --locked" && \
+    /bin/bash -c "source $HOME/.cargo/env && cargo pgrx init"
+
+# Set environment variables for Rust and Python
+ENV PATH="/root/.cargo/bin:${PATH}"
+ENV PYTHONPATH="${PYTHONPATH}:/project/TRAILS/internal/ml/model_selection"
+
+WORKDIR /project
+COPY ./internal/ml/model_selection/requirement.txt ./requirement.txt
+RUN pip install -r requirement.txt
+
+RUN pip install https://www.comp.nus.edu.sg/~zhaojing/files/singa-3.1.0-cp38-cp38-manylinux2014_x86_64.whl
+
+# appendix
+USER root
+RUN apt-get update && apt-get install -y \
+    postgresql-client && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+USER postgres
+
+CMD ["tail", "-f", "/dev/null"]
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/README.md
new file mode 100644
index 000000000..31cbf703d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/README.md
@@ -0,0 +1,167 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with < this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Database-Native Model Selection 
+
+​																																																		-- based on Singa
+
+
+
+![image-20231020174425377](documents/image-20231020174425377.png)
+
+## Build Docker Image
+
+```bash
+git clone https://github.com/apache/singa.git
+cd singa/examples/model_selection/TRAILS-Database-Native-Model-Selection/
+docker build -t trails-singa .
+```
+
+## Run Docker Image
+Download exp_data.zip from https://www.dropbox.com/scl/fi/xz4teosklwmfc5j4x2ug6/exp_data.zip?rlkey=5fk2ttib0zt49suyppcjhsrn2&dl=0
+and unzip the exp_data/ folder to a specific directory (path_to_exp_data_folder)
+```bash
+docker run -d --name trails-singa \
+  --network="host" \
+  -v path_to_exp_data_folder:/project/exp_data \
+  trails-singa
+```
+
+## Start PostgreSQL Instance
+
+```bash
+# 1. Run docker container
+docker exec -it trails-singa bash 
+# 2. Clone the code
+cd ~
+git clone https://github.com/apache/singa.git
+cd singa/examples/model_selection/TRAILS-Database-Native-Model-Selection/
+# 3. Export PYTHONPATH
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+# 4. Start the RDBMS and then exit
+cd internal/pg_extension
+cargo pgrx run
+exit
+cd ../..
+# 5. Load data into RDBMS
+bash internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/frappe frappe
+# 6. Run database server
+cd internal/pg_extension
+cargo pgrx run
+
+```
+
+
+## Register Stored Procedure
+
+```sql
+CREATE OR REPLACE
+PROCEDURE model_selection_sp(
+    dataset TEXT,               --dataset name
+    selected_columns TEXT[],    --used columns
+    N INTEGER,                  --number of models to evaluate
+    batch_size INTEGER,         --batch size, for profiling, filtering
+    config_file TEXT            --config file path
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    -- global inputs/outputs
+    result_status TEXT;
+    column_list TEXT;
+BEGIN
+    -- combine the columns into a string
+    column_list := array_to_string(selected_columns, ', ');
+
+    -- 4. Run filtering phase to get top K models.
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                    LIMIT %s OFFSET 0
+                )
+                SELECT filtering_phase(
+                    json_agg(row_to_json(t))::text, %s, %s, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, batch_size, N, 1, config_file) INTO result_status;
+    RAISE NOTICE '4. run filtering phase, k models = %', result_status;
+
+END; $$;
+```
+
+# Compile the UDF
+
+```bash
+# Try compile the UDF
+DROP EXTENSION IF EXISTS pg_extension;
+CREATE EXTENSION pg_extension;
+```
+
+If the above fails, open another terminal and go into the docker via docker exec -it trails-singa bash
+Then run the following 
+```bash
+rm /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+vi /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+# Copy the following to the /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+-- src/lib.rs:66
+-- pg_extension::filtering_phase
+CREATE  FUNCTION "filtering_phase"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "n" INT, /* i32 */
+    "k" INT, /* i32 */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'filtering_phase_wrapper';
+```
+
+Go back to the first terminal and run the following in the database server again  
+```bash
+# Try compile the UDF
+DROP EXTENSION IF EXISTS pg_extension;
+CREATE EXTENSION pg_extension;
+```
+
+## Run Model Selection 
+
+```sql
+-- Template for calling 'model_selection_sp' stored procedure
+CALL model_selection_sp(
+    <TABLE_NAME>,             -- The name of the table or dataset from which data should be retrieved.
+    <COLUMN_NAMES_ARRAY>,     -- An array of column names to be considered in the model selection process.
+    <PARAMETER_1>,            -- Number of models to explore
+    <PARAMETER_2>,            -- Batch size
+    <CONFIG_FILE_PATH>        -- The file path to a configuration file needed for the process.
+);
+
+
+# For example
+CALL model_selection_sp(
+       'frappe_train',
+       ARRAY['col1', 'col2', 'col3', 'label'], 
+    10, 
+    32, 
+  '/home/postgres/singa/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini');
+```
+
+# Example Result
+
+![image-20231020174945226](documents/image-20231020174945226.png)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/dev_guide.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/dev_guide.md
new file mode 100644
index 000000000..3b0927a83
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/dev_guide.md
@@ -0,0 +1,251 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with < this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Change the permission
+
+```bash
+chmod -R 777 internal/pg_extension
+chmod -R 777 TRAILS
+```
+
+# PSQL CMD
+
+```sql
+psql -h localhost -p 28814 -U postgres 
+\c frappe
+\dt
+\d frappe_train
+DROP TABLE frappe_train;
+SELECT * FROM frappe_train LIMIT 10;
+SELECT * FROM frappe_test LIMIT 10;
+SELECT * FROM frappe_valid LIMIT 10;
+DROP DATABASE frappe;
+psql -U postgres
+```
+
+# Build and run the container
+
+```bash
+docker build -t trails .
+
+docker run -d --name trails \
+  --network="host" \
+  -v $(pwd)/TRAILS:/project/TRAILS \
+  -v /hdd1/xingnaili/exp_data/:/project/exp_data \
+  trails
+
+docker exec -it trails bash 
+```
+
+# This is in docker image already
+
+```bash
+# if those are already on docker, skip them.
+cargo install --locked cargo-pgrx
+# run after package update
+cargo pgrx init
+cargo pgrx new my_extension
+# just run this after code updates.
+cargo pgrx run
+```
+
+# Develop
+
+## Load data into database.
+
+```bash
+bash /project/TRAILS/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/frappe frappe
+bash /project/TRAILS/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/uci_diabetes uci_diabetes
+bash /project/TRAILS/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/criteo_full criteo
+```
+
+## 1. Compile
+
+In shell
+
+```bash
+cd ./internal/pg_extension/
+cargo clean
+rm -r /home/postgres/.pgrx/14.9/pgrx-install/lib/pg_extension.so
+cargo pgrx run
+rm /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+vi /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+paste the latest sqls
+# generate schema
+cargo pgrx schema >> /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+```
+
+In SQL
+
+```sql
+DROP EXTENSION IF EXISTS pg_extension;
+CREATE EXTENSION pg_extension;
+```
+
+## 2. Edit the config file
+
+Update the `nfield` in the `config.ini` file, it is == number of columns used. E.g, `ARRAY['col1', 'col2', 'col3', 'label']`  => `nfield` = 3
+
+## 3. Run it
+
+```sql
+CREATE EXTENSION pg_extension;
+
+# Test if the UDF is there or not
+SELECT *  FROM pg_proc  WHERE proname = 'model_selection_workloads';
+
+# micro
+select benchmark_filtering_phase_latency(4, '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+select benchmark_filtering_latency_in_db(5000, 'frappe', '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+select benchmark_filtering_latency_in_db(5000, 'uci_diabetes', '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+select benchmark_filtering_latency_in_db(4, 'criteo', '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+# Test coordinator
+SELECT coordinator('0.08244', '168.830156', '800', false, '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+# this is database name, columns used, time budget, batch size, and config file
+CALL model_selection_sp('dummy', ARRAY['col1', 'col2', 'col3', 'label'], '30', 32, '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+# end2end model selection
+CALL model_selection_end2end('dummy', ARRAY['col1', 'col2', 'col3', 'label'], '15', '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+# filtering & refinement with workloads
+CALL model_selection_workloads('dummy', ARRAY['col1', 'col2', 'col3', 'label'], 300, 3, '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+response = requests.post(args.refinement_url, json=data).json()
+
+```
+
+# Test the pg-extension works using pipython
+
+```sql
+# switch to a postgres
+su postgres
+
+CREATE EXTENSION plpython3u;
+
+CREATE FUNCTION py_version() RETURNS text AS $$
+import sys
+return sys.version
+$$ LANGUAGE plpython3u;
+
+SELECT py_version();
+
+CREATE OR REPLACE FUNCTION test_numpy()
+  RETURNS text
+LANGUAGE plpython3u
+AS $$
+import numpy
+import torch
+import sklearn
+import torchvision
+import tqdm
+print("asdf")
+return str(numpy.__version__) + " torch: " + str(torch.__version__)
+$$;
+
+SELECT test_numpy();
+
+CREATE EXTENSION my_extension;
+SELECT hello_my_extension();
+```
+
+# Container log
+
+Each line in your output represents a different process that is currently running on your PostgreSQL server. Here's what each one is doing:
+
+1.  `/bin/sh -c service postgresql start && tail -F /var/log/postgresql/postgresq` : This is the command that was used to start your PostgreSQL server. It also includes a command to continuously display new entries from the PostgreSQL log file.
+
+
+2.  `/usr/lib/postgresql/14/bin/postgres -D /var/lib/postgresql/14/main -c config` : This is the main PostgreSQL process. All other PostgreSQL processes are children of this process.
+
+
+3.  `postgres: 14/main: checkpointer` : The checkpointer process is responsible for making sure data changes get saved to disk regularly. This is important for database recovery in case of a crash.
+
+
+4.  `postgres: 14/main: background writer` : The background writer process is responsible for writing buffers to disk when they become dirty. This reduces the amount of work that needs to be done when a buffer is reused.
+
+
+5.  `postgres: 14/main: walwriter` : The walwriter process writes transaction logs (Write-Ahead Logs or WAL) to disk. This is also important for database recovery and replication.
+
+
+6.  `postgres: 14/main: autovacuum launcher` : The autovacuum launcher process starts autovacuum worker processes as needed. These processes automatically clean up and optimize the database.
+
+
+7.  `postgres: 14/main: stats collector` : The stats collector process collects statistics about the server's activity. This information can be viewed using the `pg_stat` family of system views.
+
+
+8.  `postgres: 14/main: logical replication launcher` : The logical replication launcher manages the worker processes that perform logical replication, copying data changes to other databases.
+
+
+9.  `tail -F /var/log/postgresql/postgresql-14-main.log` : This process is displaying the end of the PostgreSQL log file and updating as more entries are added.
+
+
+10.  `bash` : These are shell sessions, likely interactive ones you've started.
+
+
+11.  `/usr/lib/postgresql/14/bin/psql -h localhost -p 28814 pg_extension` : These are instances of the psql command line interface, connected to your database.
+
+
+12.  `postgres: postgres pg_extension 127.0.0.1(52236) CALL` : This is your currently running stored procedure.
+
+
+13.  `ps aux` : This is the command you ran to display the list of processes.
+
+Each process is part of the PostgreSQL database system and helps it to run efficiently and robustly.
+
+# MAC locally
+
+```bash
+conda activate firmest38
+export PYTHON_SYS_EXECUTABLE=/Users/kevin/opt/anaconda3/envs/firmest38/bin/python
+export DYLD_LIBRARY_PATH=/Users/kevin/opt/anaconda3/envs/firmest38/lib/:$DYLD_LIBRARY_PATH
+cargo run --features python
+```
+
+# What cargo run do?
+
+Before:
+
+```
+postgres     1  0.1  0.0   2612   588 ?        Ss   14:30   0:00 /bin/sh -c service postgresql start && tail -F /var/log/postgresql/postgresql-14-main.log
+postgres    20  0.1  0.0 214688 29332 ?        Ss   14:30   0:00 /usr/lib/postgresql/14/bin/postgres -D /var/lib/postgresql/14/main -c config_file=/etc/postgresql/14/main/postgresql.conf
+postgres    22  0.0  0.0 214688  6120 ?        Ss   14:30   0:00 postgres: 14/main: checkpointer 
+postgres    23  0.0  0.0 214688  6084 ?        Ss   14:30   0:00 postgres: 14/main: background writer 
+postgres    24  0.0  0.0 214688 10352 ?        Ss   14:30   0:00 postgres: 14/main: walwriter 
+postgres    25  0.0  0.0 215224  8864 ?        Ss   14:30   0:00 postgres: 14/main: autovacuum launcher 
+postgres    26  0.0  0.0  69280  5184 ?        Ss   14:30   0:00 postgres: 14/main: stats collector 
+postgres    27  0.0  0.0 215236  6972 ?        Ss   14:30   0:00 postgres: 14/main: logical replication launcher 
+postgres    38  0.0  0.0   2548   512 ?        S    14:30   0:00 tail -F /var/log/postgresql/postgresql-14-main.log
+postgres    39  0.1  0.0   4112  3424 pts/0    Ss+  14:30   0:00 bash
+postgres    48  0.1  0.0   4112  3424 pts/1    Ss   14:30   0:00 bash
+postgres    59  0.0  0.0   5896  2860 pts/1    R+   14:30   0:00 ps aux
+```
+
+After:
+
+
+
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png
new file mode 100644
index 000000000..9e73b270d
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png
new file mode 100644
index 000000000..d7b686d2b
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/cache_service.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/cache_service.py
new file mode 100644
index 000000000..87479a704
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/cache_service.py
@@ -0,0 +1,186 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+import threading
+import queue
+import psycopg2
+from typing import Any, List, Dict, Tuple
+from sanic import Sanic
+from sanic.response import json
+import calendar
+import os
+import logging
+
+log_logger_folder_name = "log_cache_service"
+if not os.path.exists(f"./{log_logger_folder_name}"):
+    os.makedirs(f"./{log_logger_folder_name}")
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s %(levelname)-8s %(message)s',
+                    datefmt='%d %b %Y %H:%M:%S',
+                    filename=f"./{log_logger_folder_name}/log_{str(calendar.timegm(time.gmtime()))}", filemode='w')
+
+USER = "postgres"
+HOST = "127.0.0.1"
+PORT = "28814"
+DB_NAME = "pg_extension"
+CACHE_SIZE = 10
+
+
+class CacheService:
+    def __init__(self, name_space: str, database: str, table: str, columns: List, batch_size: int, max_size: int = CACHE_SIZE):
+        """
+        name_space: train, valid, test
+        database: database to use
+        table: which table
+        columns: selected cols
+        max_size: max batches to cache
+        """
+        self.name_space = name_space
+        self.batch_size = batch_size
+        self.last_id = -1
+        self.database = database
+        self.table = table
+        self.columns = columns
+        self.queue = queue.Queue(maxsize=max_size)
+        self.thread = threading.Thread(target=self.fetch_data, daemon=True)
+        self.thread.start()
+
+    def decode_libsvm(self, columns):
+        map_func = lambda pair: (int(pair[0]), float(pair[1]))
+        # 0 is id, 1 is label
+        id, value = zip(*map(lambda col: map_func(col.split(':')), columns[2:]))
+        sample = {'id': list(id),
+                  'value': list(value),
+                  'y': int(columns[1])}
+        return sample
+
+    def pre_processing(self, mini_batch_data: List[Tuple]):
+        """
+        mini_batch_data: [('0', '0', '123:123', '123:123', '123:123',)
+        """
+        sample_lines = len(mini_batch_data)
+        feat_id = []
+        feat_value = []
+        y = []
+
+        for i in range(sample_lines):
+            row_value = mini_batch_data[i]
+            sample = self.decode_libsvm(row_value)
+            feat_id.append(sample['id'])
+            feat_value.append(sample['value'])
+            y.append(sample['y'])
+        return {'id': feat_id, 'value': feat_value, 'y': y}
+
+    def fetch_data(self):
+        with psycopg2.connect(database=self.database, user=USER, host=HOST, port=PORT) as conn:
+            while True:
+                try:
+                    # fetch and preprocess data from PostgreSQL
+                    batch, time_usg = self.fetch_and_preprocess(conn)
+                    self.queue.put(batch)
+                    print(f"Data is fetched, {self.name_space} queue_size={self.queue.qsize()}, time_usg={time_usg}")
+                    logger.info(f"Data is fetched, queue_size={self.queue.qsize()}, time_usg={time_usg}")
+                    # block until a free slot is available
+                    time.sleep(0.1)
+                except psycopg2.OperationalError:
+                    logger.exception("Lost connection to the database, trying to reconnect...")
+                    time.sleep(5)  # wait before trying to establish a new connection
+                    conn = psycopg2.connect(database=self.database, user=USER, host=HOST, port=PORT)
+
+    def fetch_and_preprocess(self, conn):
+        begin_time = time.time()
+        cur = conn.cursor()
+        # Assuming you want to get the latest 100 rows
+        columns_str = ', '.join(self.columns)
+        # Select rows greater than last_id
+        cur.execute(f"SELECT id, {columns_str} FROM {self.table} "
+                    f"WHERE id > {self.last_id} ORDER BY id ASC LIMIT {self.batch_size}")
+        rows = cur.fetchall()
+
+        if rows:
+            # Update last_id with max id of fetched rows
+            self.last_id = max(row[0] for row in rows)  # assuming 'id' is at index 0
+        else:
+            # If no more new rows, reset last_id to start over scan and return 'end_position'
+            self.last_id = -1
+            return "end_position", time.time() - begin_time
+
+        batch = self.pre_processing(rows)
+        return batch, time.time() - begin_time
+
+    def get(self):
+        return self.queue.get()
+
+    def is_empty(self):
+        return self.queue.empty()
+
+
+app = Sanic("CacheServiceApp")
+
+
+# start the server， this is from pg_ingerface
+@app.route("/", methods=["POST"])
+async def start_service(request):
+    try:
+        columns = request.json.get('columns')
+        # can only be train or valid
+        name_space = request.json.get('name_space')
+        table_name = request.json.get('table_name')
+        batch_size = request.json.get('batch_size')
+
+        if columns is None:
+            return json({"error": "No columns specified"}, status=400)
+        if name_space not in ["train", "valid", "test"]:
+            return json({"error": name_space + " is not correct"}, status=400)
+
+        print(f"columns are {columns}, name_space = {name_space}")
+
+        if not hasattr(app.ctx, f'{table_name}_{name_space}_cache'):
+            setattr(app.ctx, f'{table_name}_{name_space}_cache',
+                    CacheService(name_space, DB_NAME, table_name, columns, batch_size, CACHE_SIZE))
+
+        return json("OK")
+    except Exception as e:
+        return json({"error": str(e)}, status=500)
+
+
+# serve the data retrieve request from eva_service.py
+@app.route("/", methods=["GET"])
+async def serve_get_request(request):
+    name_space = request.args.get('name_space')
+    table_name = request.args.get('table_name')
+
+    # check if exist
+    if not hasattr(app.ctx, f'{table_name}_{name_space}_cache'):
+        return json({"error": f"{table_name}_{name_space}_cache not start yet"}, status=404)
+
+    # get data
+    data = getattr(app.ctx, f'{table_name}_{name_space}_cache').get()
+
+    # return
+    if data is None:
+        return json({"error": "No data available"}, status=404)
+    else:
+        return json(data)
+
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8093)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/trigger_cache_svc.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/trigger_cache_svc.py
new file mode 100644
index 000000000..2631abeab
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/trigger_cache_svc.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import requests
+
+url = 'http://localhost:8093/'
+columns = ['label', 'col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9', 'col10']
+response = requests.post(
+    url, json={'columns': columns,
+               'name_space': "train",
+               'table_name': "frappe_train",
+               "batch_size": 32})
+print(response.json())
+
+response = requests.post(
+    url, json={'columns': columns,
+               'name_space': "valid",
+               'table_name': "frappe_valid",
+               "batch_size": 1024})
+print(response.json())
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/README.md
new file mode 100644
index 000000000..3025139f1
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/README.md
@@ -0,0 +1,290 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with < this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# TRAILS: A Database Native Model Selection System
+
+![image-20230702035806963](documents/imgs/image-20230702035806963.png)
+
+[TOC]
+
+# Config Environments
+
+```bash
+# Create virtual env
+conda config --set ssl_verify false
+conda create -n "trails" python=3.8.10
+conda activate trails
+pip install -r requirement.txt 
+
+cd TRAILS
+
+# make a dir to store all results. 
+mkdir ../exp_data
+```
+
+# Reproduce the results
+
+## NAS-Bench-Tabular
+
+ NAS-Bench-Tabular can be either **download** or build from scratch.
+
+### Download NAS-Bench-Tabular
+
+1. **Download** the dataset using the following link, and extract them to `exp_data`
+
+```bash
+https://drive.google.com/file/d/1TGii9ymbmX81c9-GKWXbe_4Z64R8Btz1/view?usp=sharing
+```
+
+### Build NAS-Bench-Tabular
+
+2. Build the **NAS-Bench-Tabular** from scratch
+
+```python
+# Construct NAS-Bench-Tabular: 
+## 1. Training all models.
+bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
+bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh
+bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh
+
+## 2. Scoring all models using all TFMEMs.
+bash internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh
+bash internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh
+bash internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh
+```
+
+3. Build the **NAS-Bench-Img** from scratch
+
+   To facilitate the experiments and query speed (NASBENCH API is slow)
+
+   1. We retrieve all results from NASBENCH API and store them as a json file.
+   2. We score all models in NB201 and 28K models in NB101.
+   3. We search with  EA + Score and record the searching process in terms of
+       `run_id,  current_explored_model, top_400 highest scored model, time_usage` 
+        to SQLLite.
+
+```python
+# 1. Record NASBENCH API data into json file
+## This requires to install nats_bench: pip install nats_bench
+bash ./internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh
+
+# 2. Scoring all models using all TFMEMs.
+nohup bash ./internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh &
+
+# 3. Explore with EA ans score result and store exploring process into SQLLite
+bash ./internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh
+
+# 4. Generate the baseline. 
+bash ./internal/ml/model_selection/scripts/baseline_system_img.sh
+```
+
+The following experiment could then query filtering phase results based on `run_id`.
+
+## SLO-Aware 2Phase-MS
+
+With the above **NAS-Bench-Tabular**, we could run various experiments. 
+
+```bash
+# 1. Generate the results for drawing the figure
+## tabular data: training-base-ms
+bash internal/ml/model_selection/scripts/baseline_system_tab.sh
+## tabular data: training-free-ms, 2phase-ms
+nohup bash internal/ml/model_selection/scripts/anytime_tab.sh &
+## image data: training-base-ms, training-free-ms, 2phase-ms
+nohup bash internal/ml/model_selection/scripts/anytime_img_w_baseline.sh &
+
+# 2. Draw figure
+python internal/ml/model_selection/exps/macro/anytime_tab_draw.py
+python internal/ml/model_selection/exps/macro/anytime_img_draw.py
+```
+
+![image-20230702035554579](documents/imgs/image-20230702035554579.png)
+
+## Micro: Benchmark TFMEMs
+
+```bash
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+python ./internal/ml/model_selection/exps/micro/benchmark_correlation.py
+```
+
+![image-20230421214835152](./documents/imgs/image-20230421214835152.png)
+
+## Micro: Benchmark Budge-Aware Algorithm
+
+```bash
+bash internal/ml/model_selection/scripts/micro_budget_aware_alg.sh
+```
+
+![image-20230724111659545](./documents/imgs/image-20230724111659545.png)
+
+## Micro: Benchmark N, K, U 
+
+With ranking the models by ther TFMEM score in the filtering phase, we aim to determine
+
+1. Further examinng more models  (**K**) with each going through less training epoch (**U**) is more easier to find good model?
+   or examine less but each training more epochs?
+2. How many models to explore (**N**) and how many to keep (**K**) ?
+
+```bash
+bash internal/ml/model_selection/scripts/micro_nku_tradeoff.sh
+```
+
+This is the experimental result conducted at the UCI Diabetes datasets.
+Clearly,  expore more models in refinement phase (large **K** ) is more helpful to find the a better model. 
+Although increasing **U** can find a better model accurately, it runs more training epochs leading to higher training cost. 
+
+![image-20230722202555763](./documents/imgs/image-20230722202555763.png)
+
+Then we fix **U=1** for cost efficiency and determine N/K for higher searching effectiveness. 
+Clearly, K/N reaches 100 yields better scheduling result in both image and tabular dataset, thus, we set **N/K=100** in coordinator. 
+
+![image-20230724111325368](./documents/imgs/image-20230724111325368.png)
+
+![image-20230722205244718](./documents/imgs/image-20230722205244718.png)
+
+## Micro: Device Placement & Embedding Cache
+
+1. To measure the time usage for filtering phase on vairous hardware, run the following
+
+   ```bash
+   # Without embedding cache at the filtering phase
+   nohup bash internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh &
+   # With embedding cache at the filtering phase (faster)
+   nohup bash internal/ml/model_selection/scripts/latency_embedding_cache.sh &
+   # Draw graph
+   python ./internal/ml/model_selection/exps/micro/draw_filtering_latency.py
+   python ./internal/ml/model_selection/exps/micro/draw_filtering_memory_bar.py
+   python ./internal/ml/model_selection/exps/micro/draw_filtering_memory_line.py
+   python ./internal/ml/model_selection/exps/micro/draw_filtering_memory_cache_CPU.py
+   ```
+
+2. Further we measure the end-2-end latency under two CPU, GPU, and Hybrid. 
+
+   ```bash
+   nohup bash internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh &
+   ```
+
+## Micro: In-DB vs Out-DB filtering phase
+
+```bash
+# run out-of db, read data via psycopg2
+bash ./internal/ml/model_selection/scripts/latency_phase1_in_db.sh
+
+# run in-db query, read data via SPI
+select benchmark_filtering_latency_in_db(5000, 'frappe', '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+select benchmark_filtering_latency_in_db(5000, 'uci_diabetes', '/project/TRAILS/internal/ml/model_selection/config.ini');
+
+select benchmark_filtering_latency_in_db(5000, 'criteo', '/project/TRAILS/internal/ml/model_selection/config.ini');
+```
+
+## Micro: On-the-Fly Data transmission, Refinement
+
+```bash
+# start cache service
+python ./internal/cache-service/cache_service.py 
+python ./internal/cache-service/trigger_cache_svc.py
+# consume from the cache-svc
+
+
+```
+
+## Reproduce Figure7
+
+```bash
+python exps/main_v2/analysis/2.\ cost_draw.py
+python exps/main_v2/analysis/3.\ cost_train_based.py
+```
+
+![image-20230702035622198](documents/imgs/image-20230702035622198.png)
+
+## Reproduce Figure8
+
+```bash
+# draw figure 8(a) 
+python exps/main_v2/analysis/5.draw_IDMS_var_workloads.py
+# draw figure 8(b)
+python exps/main_v2/analysis/6.draw_IDMS_dataloading.py
+```
+
+![image-20230702035639502](documents/imgs/image-20230702035639502.png)
+# Baselines
+
+We compare with Training-Based MS, TabNAS, and training-free MS etc. 
+
+For image data, it already generated at the NAS-Bench-Img part, see above.
+
+# Appendix
+
+Here all experiments is on the Frappe dataset. 
+
+1. Computational Costs
+
+   ```bash
+   bash ./internal/ml/model_selection/exps/micro/resp/benchmark_cost.sh
+   ```
+
+2. Search Cost, multiple training-free or training-based combinations (warm-up / movel proposal)
+
+   ```bash
+   # get RL, RE, RS + training-based model evaluation
+   bash ./internal/ml/model_selection/scripts/micro_search_strategy.sh
+   # this will read previous file, and run warm-up/move proposal, and draw all together
+   bash ./internal/ml/model_selection/exps/micro/resp/benchmark_search_cost.sh
+   ```
+
+3. How des the K influence the result?
+
+   ```bash
+   python ./internal/ml/model_selection/exps/micro/resp/benchmark_k_fix_time.py
+   ```
+
+4. Nosy in selecting top K models
+
+   ```bash
+   python ./internal/ml/model_selection/exps/micro/resp/benchmark_noisy_influence.py
+   ```
+
+5. Weight-sharing result
+
+   ```bash
+   nohup bash internal/ml/model_selection/scripts/benchmark_weight_sharing.sh &
+   ```
+
+   
+
+   
+
+
+
+# Run end2end model selection
+
+download the dataset and put it in the `exp_data/data/structure_data`
+
+```
+python main.py --budget=100 --dataset=frappe
+```
+
+Check the log at the `logs_default`
+
+![image-20230421220338391](./documents/imgs/image-20230421220338391.png)
+
+![image-20230421220443231](./documents/imgs/image-20230421220443231.png)
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini
new file mode 100644
index 000000000..e7235b1d4
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini
@@ -0,0 +1,104 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+[DEFAULT]
+log_name = in_db_ms
+budget = 100
+device = cpu
+log_folder = ./internal/debug_singa_logger
+;log_folder = /project/TRAILS/log_score_time_frappe
+result_dir = ./internal/ml/model_selection/exp_result_singa/
+;result_dir = /project/TRAILS/internal/ml/model_selection/exp_result_sever_cache_sql_indb/
+num_points = 12
+max_load = -1
+
+[SAMPLER]
+search_space = mlp_sp
+population_size = 10
+sample_size = 3
+simple_score_sum = True
+
+[NB101]
+api_loc = nasbench_only108.pkl
+init_channels = 16
+bn = 1
+num_stacks = 3
+num_modules_per_stack = 3
+
+[NB201]
+init_w_type = none
+init_b_type = none
+arch_size = 1
+
+[MLP]
+num_layers = 4
+hidden_choice_len = 20
+
+[MLP_TRAINER]
+epoch = 20
+batch_size = 32
+lr = 0.002
+patience = 1
+iter_per_epoch = 200
+nfeat = 5500
+nfield = 10
+nemb = 10
+report_freq = 30
+workers = 0
+
+[DATASET]
+;base_dir = ../exp_data/
+base_dir = /hdd1/xingnaili/exp_data/
+dataset = frappe
+num_labels = 2
+
+[SEQ_TRAIN]
+worker_id = 0
+total_workers = 120
+total_models_per_worker = -1
+pre_partitioned_file = ./internal/ml/model_selection/exps/sampled_data/sampled_models_all.json
+
+[DIS_TRAIN]
+worker_each_gpu = 6
+gpu_num = 8
+
+[TUNE_INTERVAL]
+kn_rate = -1
+
+[ANYTIME]
+only_phase1 = False
+is_simulate = False
+
+
+[SERVER]
+refinement_url = http://localhost:8095/
+cache_svc_url = http://localhost:8093/
+
+[DB_CONFIG]
+db_name = pg_extension
+db_user = postgres
+db_host = 127.0.0.1
+db_port = 28814
+
+
+[SYS_PERFORMANCE]
+models_explore = -1
+# tfmem = express_flow
+tfmem = synflow
+embedding_cache_filtering = True
+concurrency = 1
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png
new file mode 100644
index 000000000..06a86f953
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png
new file mode 100644
index 000000000..dde2a761b
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png
new file mode 100644
index 000000000..c94d59cfd
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png
new file mode 100644
index 000000000..387f3ce49
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png
new file mode 100644
index 000000000..c63e58598
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png
new file mode 100644
index 000000000..0422e108b
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png
new file mode 100644
index 000000000..c33bee185
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png
new file mode 100644
index 000000000..527d1eb84
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png
new file mode 100644
index 000000000..bc1ae3af5
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png
new file mode 100644
index 000000000..8637aaee7
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png
new file mode 100644
index 000000000..2fb081d4e
Binary files /dev/null and b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/eva_service.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/eva_service.py
new file mode 100644
index 000000000..f3f5b4575
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/eva_service.py
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import calendar
+import os
+import time
+import argparse
+import configparser
+from sanic import Sanic
+from sanic.exceptions import InvalidUsage
+from sanic.response import json
+
+ts = calendar.timegm(time.gmtime())
+os.environ.setdefault("log_logger_folder_name", "log_eval_service")
+os.environ.setdefault("log_file_name", "eval_service_" + str(ts) + ".log")
+from src.logger import logger
+from src.eva_engine.run_ms import RunModelSelection
+from src.dataset_utils.stream_dataloader import StreamingDataLoader
+from shared_config import parse_config_arguments
+from typing import Any, List, Dict, Tuple
+
+
+def refinement_phase(u: int, k_models: List, dataset_name: str, config_file: str):
+    """
+    U: training-epoches
+    K-Models: k models to train
+    config_file: config file path
+    """
+    args = parse_config_arguments(config_file)
+    args.device = "cuda:7"
+    train_dataloader = StreamingDataLoader(
+        cache_svc_url=args.cache_svc_url, table_name=f"{dataset_name}_train", name_space="train")
+    eval_dataloader = StreamingDataLoader(
+        cache_svc_url=args.cache_svc_url, table_name=f"{dataset_name}_valid", name_space="valid")
+
+    try:
+        rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+        best_arch, best_arch_performance, _ = rms.refinement_phase(
+            U=u,
+            k_models=k_models,
+            train_loader=train_dataloader,
+            valid_loader=eval_dataloader)
+    finally:
+        train_dataloader.stop()
+        eval_dataloader.stop()
+    return {"best_arch": best_arch, "best_arch_performance": best_arch_performance}
+
+
+app = Sanic("evaApp")
+
+
+@app.route("/", methods=["POST"])
+async def start_refinement_phase(request):
+    # Check if request is JSON
+    if not request.json:
+        logger.info("Expecting JSON payload")
+        raise InvalidUsage("Expecting JSON payload")
+
+    u = request.json.get('u')
+    k_models = request.json.get('k_models')
+    dataset_name = request.json.get('dataset_name')
+    config_file = request.json.get('config_file')
+
+    if u is None or k_models is None or config_file is None:
+        logger.info(f"Missing 'u' or 'k_models' in JSON payload, {request.json}")
+        raise InvalidUsage("Missing 'u' or 'k_models' in JSON payload")
+
+    result = refinement_phase(u, k_models, dataset_name, config_file)
+
+    return json(result)
+
+
+if __name__ == "__main__":
+
+    result = refinement_phase(
+        u=1,
+        k_models=["8-8-8-8", "16-16-16-16"],
+        dataset_name="frappe",
+        config_file="/home/xingnaili/firmest_docker/TRAILS/internal/ml/model_selection/config.ini")
+
+    # app.run(host="0.0.0.0", port=8095)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/README.md
new file mode 100644
index 000000000..2adc07d97
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/README.md
@@ -0,0 +1,40 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with < this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Folder description
+
+## baseline
+
+We store the baseline algorithm here
+
+## benchmark_tfmem
+
+We benchmarking TFMEM here
+
+## macro/micro
+
+We benchmark the system from both macro and analysis component in micro
+
+## nas_bench_tabular
+
+We build a nas-bench-tabular dataset here
+
+## system
+
+We run the experiment here
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_img_lib.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_img_lib.py
new file mode 100644
index 000000000..7d4acb3e2
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_img_lib.py
@@ -0,0 +1,724 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+from matplotlib import pyplot as plt
+import seaborn as sns
+import numpy as np
+import palettable
+from matplotlib.ticker import MaxNLocator
+import numpy
+from src.common.constant import Config
+import matplotlib
+
+# lines' mark size
+set_marker_size = 15
+# points' mark size
+set_marker_point = 14
+# points' mark size
+set_font_size = 40
+set_lgend_size = 15
+set_tick_size = 20
+
+frontinsidebox = 23
+
+# update tick size
+matplotlib.rc('xtick', labelsize=set_tick_size)
+matplotlib.rc('ytick', labelsize=set_tick_size)
+
+plt.rcParams['axes.labelsize'] = set_tick_size
+
+mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
+mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
+                  set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
+line_shape_list = ['-.', '--', '-', ':']
+
+
+# this is for draw figure3 only
+def get_plot_compare_with_base_line_cfg(search_space, dataset, if_with_phase1=False):
+    if search_space == Config.NB201:
+        run_range_ = range(0, 100, 1)
+        if if_with_phase1:
+            draw_graph = draw_anytime_result_with_p1
+        else:
+            draw_graph = draw_anytime_result
+        # min, this is for plot only
+        if dataset == Config.c10:
+            # C10 array
+            budget_array = [0.017, 0.083] + list(range(1, 350, 4))
+            sub_graph_y1 = [91, 94.5]
+            sub_graph_y2 = [53.5, 55]
+            sub_graph_split = 60
+        elif dataset == Config.c100:
+            # C10 array
+            budget_array = [0.017, 0.083] + list(range(1, 350, 4))
+
+            sub_graph_y1 = [64, 73.5]
+            sub_graph_y2 = [15, 16]
+            sub_graph_split = 20
+        else:
+            # ImgNet X array
+            budget_array = [0.017, 0.083] + list(range(1, 350, 4))
+            sub_graph_y1 = [33, 48]
+            sub_graph_y2 = [15.5, 17]
+            sub_graph_split = 34
+    else:
+        # this is NB101 + C10, because only 101 has 20 run. others have 100 run.
+        run_range_ = range(0, 20, 1)
+        if if_with_phase1:
+            draw_graph = draw_anytime_result_one_graph_with_p1
+            # budget_array = list(range(1, 16, 1))
+            budget_array = numpy.arange(0.02, 15, 0.02).tolist()
+        else:
+            draw_graph = draw_anytime_result_one_graph
+            budget_array = [0.017, 0.083] + list(range(1, 2000, 8))
+
+        if dataset == Config.c10:
+            # C10 array
+            # budget_array = list(range(0, 2000, 1))
+            sub_graph_y1 = [90, 94.5]
+            sub_graph_y2 = [52, 55]
+            sub_graph_split = 60
+        else:
+            raise Exception
+
+    return run_range_, budget_array, sub_graph_y1, sub_graph_y2, sub_graph_split, draw_graph
+
+
+def draw_anytime_result(result_dir, y_acc_list_arr, x_T_list,
+                        x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
+                        annotations, lv,
+                        name_img, dataset,
+                        x1_lim=[], x2_lim=[],
+                        ):
+    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, dpi=100, gridspec_kw={'height_ratios': [4, 1]})
+    exp = np.array(y_acc_list_arr)
+    sys_acc_h = np.quantile(exp, .75, axis=0)
+    sys_acc_m = np.quantile(exp, .5, axis=0)
+    sys_acc_l = np.quantile(exp, .25, axis=0)
+
+    # plot simulate result of system
+    ax1.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=0.1)
+    ax1.plot(x_T_list, sys_acc_m, mark_list[-1], label="TRAILS")
+    ax2.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=0.1)
+
+    # plot simulate result of train-based line
+    ax1.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
+    ax1.plot(x_acc_train, y_acc_train_m, mark_list[-2], label="Training-based MS")
+    ax2.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
+
+    for i in range(len(annotations)):
+        ele = annotations[i]
+        if ele[1] < lv:
+            # convert to mins
+            ax2.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], fontsize=set_marker_size)
+        else:
+            ax1.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], fontsize=set_marker_size)
+        # ax2.scatter(ele[2]/60, ele[1]* 0.01, s=100, color="red")
+        # ax2.annotate(ele[0], (ele[2]/60, ele[1] * 0.01))
+
+    if len(x1_lim) > 0 and len(x2_lim) > 0:
+        ax1.set_ylim(x1_lim[0], x1_lim[1])  # 子图1设置y轴范围，只显示部分图
+        ax2.set_ylim(x2_lim[0], x2_lim[1])  # 子图2设置y轴范围，只显示部分图
+
+    ax1.spines['bottom'].set_visible(False)  # 关闭子图1中底部脊
+    ax2.spines['top'].set_visible(False)  ##关闭子图2中顶部脊
+    ax2.set_xticks(range(0, 31, 1))
+
+    d = .85  # 设置倾斜度
+    # 绘制断裂处的标记
+    kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
+                  linestyle='none', color='r', mec='r', mew=1, clip_on=False)
+    ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
+    ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
+
+    plt.tight_layout()
+    plt.xscale("symlog")
+    ax1.grid()
+    ax2.grid()
+    plt.xlabel("Time Budget given by user (min)", fontsize=set_font_size)
+    ax1.set_ylabel(f"Test accuracy on {dataset}", fontsize=set_font_size)
+    ax1.legend(ncol=1, fontsize=set_lgend_size)
+    ax2.legend(fontsize=set_lgend_size)
+    # plt.show()
+    plt.savefig(f"{result_dir}/any_time_{name_img}.pdf", bbox_inches='tight')
+
+
+def draw_anytime_result_one_graph(y_acc_list_arr, x_T_list,
+                                  x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
+                                  annotations, lv,
+                                  name_img, dataset,
+                                  x1_lim=[], x2_lim=[],
+                                  ):
+    # fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, dpi=100, gridspec_kw={'height_ratios': [5, 1]})
+    exp = np.array(y_acc_list_arr) * 100
+    sys_acc_h = np.quantile(exp, .75, axis=0)
+    sys_acc_m = np.quantile(exp, .5, axis=0)
+    sys_acc_l = np.quantile(exp, .25, axis=0)
+
+    # exp_time = np.array(real_time_used_arr)
+    # time_mean = np.quantile(exp_time, .5, axis=0)
+    time_mean = x_T_list
+
+    # plot simulate result of system
+    plt.fill_between(time_mean, sys_acc_l, sys_acc_h, alpha=0.1)
+    plt.plot(time_mean, sys_acc_m, "o-", label="TRAILS")
+    # plt.plot(time_mean, sys_acc_m, label="TRAILS")
+
+    # plot simulate result of train-based line
+    plt.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
+    plt.plot(x_acc_train, y_acc_train_m, "o-", label="Training-based MS")
+    # plt.plot(x_acc_train, y_acc_train_m,  label="Training-based MS")
+
+    if len(x1_lim) > 0:
+        plt.ylim(x1_lim[0], x1_lim[1])  # 子图1设置y轴范围，只显示部分图
+
+    d = .85  # 设置倾斜度
+    # 绘制断裂处的标记
+    kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
+                  linestyle='none', color='r', mec='r', mew=1, clip_on=False)
+    # plt.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
+    # plt.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
+
+    plt.tight_layout()
+    # plt.xscale("symlog")
+    plt.grid()
+    plt.xlabel("Time Budget given by user (min)", fontsize=set_font_size)
+    plt.ylabel(f"Test accuracy on {dataset}", fontsize=set_font_size)
+    plt.legend(ncol=1, fontsize=set_lgend_size)
+    plt.show()
+    # plt.savefig(f"amy_time_{name_img}.pdf", bbox_inches='tight')
+
+
+# those two function will plot phase 1 and phase 2
+def draw_anytime_result_with_p1(result_dir, y_acc_list_arr, x_T_list, y_acc_list_arr_p1, x_T_list_p1,
+                                x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
+                                annotations, lv,
+                                name_img, dataset, max_value,
+                                x1_lim=[], x2_lim=[],
+                                ):
+    fig, (ax1, ax2) = plt.subplots(
+        2, 1,
+        sharex=True,
+        dpi=100,
+        gridspec_kw={'height_ratios': [6, 1]})
+
+    shade_degree = 0.2
+
+    # plot simulate result of train-based line
+    ax1.plot(x_acc_train, y_acc_train_m, mark_list[-3] + line_shape_list[0], label="Training-Based MS",
+             markersize=mark_size_list[-3])
+    ax1.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=shade_degree)
+    ax2.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=shade_degree)
+
+    # plot simulate result of system
+    exp = np.array(y_acc_list_arr_p1)
+    sys_acc_p1_h = np.quantile(exp, .75, axis=0)
+    sys_acc_p1_m = np.quantile(exp, .5, axis=0)
+    sys_acc_p1_l = np.quantile(exp, .25, axis=0)
+    ax1.plot(x_T_list_p1, sys_acc_p1_m, mark_list[-2] + line_shape_list[1], label="Training-Free MS",
+             markersize=mark_size_list[-2])
+    ax1.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=shade_degree)
+    ax2.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=shade_degree)
+
+    # plot simulate result of system
+    exp = np.array(y_acc_list_arr)
+    sys_acc_h = np.quantile(exp, .75, axis=0)
+    sys_acc_m = np.quantile(exp, .5, axis=0)
+    sys_acc_l = np.quantile(exp, .25, axis=0)
+    ax1.plot(x_T_list, sys_acc_m, mark_list[-1] + line_shape_list[2], label="2Phase-MS", markersize=mark_size_list[-1])
+    ax1.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=shade_degree)
+    ax2.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=shade_degree)
+
+    print(f"speed-up on {dataset} = {x_acc_train[-1] / x_T_list[-2]}, "
+          f"t_train = {x_acc_train[-1]}, t_f = {x_T_list[-2]}")
+
+    for i in range(len(annotations)):
+        ele = annotations[i]
+        if ele[1] < lv:
+            # convert to mins
+            ax2.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], markersize=set_marker_point)
+        else:
+            ax1.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], markersize=set_marker_point)
+        # ax2.scatter(ele[2]/60, ele[1]* 0.01, s=100, color="red")
+        # ax2.annotate(ele[0], (ele[2]/60, ele[1] * 0.01))
+
+    if len(x1_lim) > 0 and len(x2_lim) > 0:
+        ax1.set_ylim(x1_lim[0], x1_lim[1])  # 子图1设置y轴范围，只显示部分图
+        ax2.set_ylim(x2_lim[0], x2_lim[1])  # 子图2设置y轴范围，只显示部分图
+
+    ax1.spines['bottom'].set_visible(False)  # 关闭子图1中底部脊
+    ax2.spines['top'].set_visible(False)  ##关闭子图2中顶部脊
+    ax2.set_xticks(range(0, 31, 1))
+
+    d = .85  # 设置倾斜度
+    # 绘制断裂处的标记
+    kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
+                  linestyle='none', color='r', mec='r', mew=1, clip_on=False)
+    ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
+    ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
+
+    plt.xscale("log")
+    ax1.grid()
+    ax2.grid()
+    plt.xlabel(r"Response Time Threshold $T_{max}$ (min)", fontsize=set_font_size)
+    ax1.set_ylabel(f"Test Acc on {'In-16'}", fontsize=set_font_size)
+    # ax1.legend(ncol=1, fontsize=set_lgend_size)
+    # ax2.legend(fontsize=set_lgend_size)
+
+    ax1.xaxis.label.set_size(set_tick_size)
+    ax1.yaxis.label.set_size(set_tick_size)
+    # ax1.set_xticks([])
+
+    ax2.xaxis.label.set_size(set_tick_size)
+    ax2.yaxis.label.set_size(set_tick_size)
+
+    ax1.yaxis.set_major_locator(MaxNLocator(nbins=4, integer=True))
+
+    ax1.axhline(max_value, color='r', linestyle='-', label='Global Best Accuracy')
+
+    tick_values = [0.01, 0.1, 1, 10, 100, 1000]
+    ax2.set_xticks(tick_values)
+    ax2.set_xticklabels([f'$10^{{{int(np.log10(val))}}}$' for val in tick_values])
+
+    # this is for unique hash
+    export_legend(
+        fig,
+        colnum=3,
+        unique_labels=['TE-NAS (Training-Free)', 'ENAS (Weight sharing)',
+                       'KNAS (Training-Free)', 'DARTS-V1 (Weight sharing)', 'DARTS-V2 (Weight sharing)',
+                       'Training-Based MS', 'Training-Free MS', '2Phase-MS', 'Global Best Accuracy'])
+    plt.tight_layout()
+    fig.savefig(f"{result_dir}/any_time_{name_img}_p1_from_0.1_sec.pdf", bbox_inches='tight')
+
+
+def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=[]):
+    fig2 = plt.figure(figsize=(5, 0.3))
+    lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
+    lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
+    # grab unique labels
+    if len(unique_labels) == 0:
+        unique_labels = set(labels)
+    # assign labels and legends in dict
+    legend_dict = dict(zip(labels, lines))
+    # query dict based on unique labels
+    unique_lines = [legend_dict[x] for x in unique_labels]
+    fig2.legend(unique_lines, unique_labels, loc='center',
+                ncol=colnum,
+                fancybox=True,
+                shadow=True, scatterpoints=1, fontsize=set_lgend_size)
+    fig2.tight_layout()
+    fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
+
+
+def draw_anytime_result_one_graph_with_p1(y_acc_list_arr, x_T_list, y_acc_list_arr_p1, x_T_list_p1,
+                                          x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
+                                          annotations, lv,
+                                          name_img, dataset,
+                                          x1_lim=[], x2_lim=[],
+                                          ):
+    # fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, dpi=100, gridspec_kw={'height_ratios': [5, 1]})
+
+    # plot simulate result of system
+    exp = np.array(y_acc_list_arr_p1) * 100
+    sys_acc_p1_h = np.quantile(exp, .75, axis=0)
+    sys_acc_p1_m = np.quantile(exp, .5, axis=0)
+    sys_acc_p1_l = np.quantile(exp, .25, axis=0)
+
+    plt.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=0.1)
+    plt.plot(x_T_list_p1, sys_acc_p1_m, "o-", label="TRAILS-P1")
+    # plt.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=0.1)
+
+    exp = np.array(y_acc_list_arr) * 100
+    sys_acc_h = np.quantile(exp, .75, axis=0)
+    sys_acc_m = np.quantile(exp, .5, axis=0)
+    sys_acc_l = np.quantile(exp, .25, axis=0)
+
+    # exp_time = np.array(real_time_used_arr)
+    # time_mean = np.quantile(exp_time, .5, axis=0)
+    time_mean = x_T_list
+
+    # plot simulate result of system
+    plt.fill_between(time_mean, sys_acc_l, sys_acc_h, alpha=0.1)
+    plt.plot(time_mean, sys_acc_m, "o-", label="TRAILS")
+    # plt.plot(time_mean, sys_acc_m, label="TRAILS")
+
+    # plot simulate result of train-based line
+    plt.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
+    plt.plot(x_acc_train, y_acc_train_m, "o-", label="Training-based MS")
+    # plt.plot(x_acc_train, y_acc_train_m,  label="Training-based MS")
+
+    if len(x1_lim) > 0:
+        plt.ylim(x1_lim[0], x1_lim[1])  # 子图1设置y轴范围，只显示部分图
+
+    d = .85  # 设置倾斜度
+    # 绘制断裂处的标记
+    kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
+                  linestyle='none', color='r', mec='r', mew=1, clip_on=False)
+    # plt.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
+    # plt.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
+
+    plt.tight_layout()
+    plt.xscale("symlog")
+    plt.grid()
+    plt.xlabel("Time Budget given by user (min)", fontsize=set_font_size)
+    plt.ylabel(f"Test accuracy on {dataset}", fontsize=set_font_size)
+    plt.legend(ncol=1, fontsize=set_lgend_size)
+    # plt.show()
+    plt.savefig(f"amy_time_{name_img}.pdf", bbox_inches='tight')
+
+
+# for K, U N trade-off
+def draw_grid_graph_with_budget(
+        acc, bt, b1, b2,
+        img_name: str, y_array: list, x_array: list):
+    """
+    :param acc: Two array list
+    :param bt:  Two array list
+    :param img_name: img name string
+    :return:
+    """
+
+    acc_new = np.array(acc)
+    acc = acc_new.tolist()
+
+    mask = np.array(acc)
+    mask[mask > 0] = 0
+    mask[mask < 0] = 1
+
+    bt = np.round(np.array(bt), 2).tolist()
+    mask2 = np.array(bt)
+    mask2[mask2 > 0] = 0
+    mask2[mask2 < 0] = 1
+
+    mask3 = np.array(b1)
+    mask3[mask3 > 0] = 0
+    mask3[mask3 < 0] = 1
+
+    mask4 = np.array(b2)
+    mask4[mask4 > 0] = 0
+    mask4[mask4 < 0] = 1
+
+    fig, ax = plt.subplots(2, 2, figsize=(15, 14))
+
+    linewidths = 0.5
+    sns.set(font_scale=3)
+    sns.heatmap(
+        data=acc,
+        vmax=99,
+        vmin=93,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".2f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
+        mask=mask,
+        square=True, linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .5},
+        ax=ax[0, 0]
+    )
+
+    sns.heatmap(
+        data=bt,
+        # vmax=,
+        vmin=-9,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".2f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
+        mask=mask2,
+        square=True, linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .5},
+        ax=ax[0, 1]
+    )
+
+    sns.heatmap(
+        data=b1,
+        vmax=17000,
+        vmin=15000,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".0f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
+        mask=mask4,
+        square=True, linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .5},
+        ax=ax[1, 0]
+    )
+
+    sns.heatmap(
+        data=b2,
+        # vmax=,
+        # vmin=-9,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".0f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
+        mask=mask4,
+        square=True, linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .5},
+        ax=ax[1, 1]
+    )
+
+    plt.tight_layout()
+    plt.xlabel("U (epoch)", fontsize=set_font_size)
+    plt.ylabel("K (# models)", fontsize=set_font_size)
+
+    for i in [0, 1]:
+        for j in [0, 1]:
+            ax[i, j].set_xticklabels(x_array, fontsize=set_font_size)
+            ax[i, j].set_yticklabels(y_array, fontsize=set_font_size)
+            ax[i, j].set_xlabel("U (# epoch)", fontsize=set_font_size)
+            ax[i, j].set_ylabel("K (# models)", fontsize=set_font_size)
+
+    ax[0, 0].set_title('Test Accuracy (%)', fontsize=set_font_size)
+    ax[0, 1].set_title(r'Time Budget $T$ (min)', fontsize=set_font_size)
+    ax[1, 0].set_title(r'$N$', fontsize=set_font_size)
+    ax[1, 1].set_title(r"$K \cdot U \cdot \log_{\eta}K$", fontsize=set_font_size)
+
+    plt.tight_layout()
+    fig.subplots_adjust(wspace=0.001, hspace=0.3)
+
+    # plt.show()
+    base_dr = os.getcwd()
+    path_gra = os.path.join(base_dr, f"{img_name}.pdf")
+    fig.savefig(path_gra, bbox_inches='tight')
+
+
+def draw_grid_graph_with_budget_only_Acc_and_T(
+        acc, bt, b1, b2,
+        img_name: str, y_array: list, x_array: list):
+    """
+    :param acc: Two array list
+    :param bt:  Two array list
+    :param img_name: img name string
+    :return:
+    """
+
+    acc_new = np.array(acc)
+    acc = acc_new.tolist()
+
+    mask = np.array(acc)
+    mask[mask > 0] = 0
+    mask[mask < 0] = 1
+
+    bt = np.round(np.array(bt), 2).tolist()
+    mask2 = np.array(bt)
+    mask2[mask2 > 0] = 0
+    mask2[mask2 < 0] = 1
+
+    mask3 = np.array(b1)
+    mask3[mask3 > 0] = 0
+    mask3[mask3 < 0] = 1
+
+    mask4 = np.array(b2)
+    mask4[mask4 > 0] = 0
+    mask4[mask4 < 0] = 1
+
+    fig, ax = plt.subplots(1, 2, figsize=(15, 14))
+
+    linewidths = 0.5
+    sns.set(font_scale=2)
+    sns.heatmap(
+        data=acc,
+        vmax=99,
+        vmin=93,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".2f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
+        mask=mask,
+        square=True,
+        linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .4},
+        ax=ax[0]
+    )
+
+    sns.heatmap(
+        data=bt,
+        vmax=600,
+        # vmin=-9,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".2f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
+        mask=mask2,
+        square=True,
+        linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .4},
+        ax=ax[1]
+    )
+
+    plt.tight_layout()
+    plt.xlabel("U (epoch)", fontsize=set_font_size)
+    plt.ylabel("K (# models)", fontsize=set_font_size)
+
+    for j in [0, 1]:
+        ax[j].set_xticklabels(x_array, fontsize=set_font_size)
+        ax[j].set_yticklabels(y_array, fontsize=set_font_size)
+        ax[j].set_xlabel("U (# epoch)", fontsize=set_font_size)
+        ax[j].set_ylabel("K (# models)", fontsize=set_font_size)
+
+    ax[0].set_title('Test Accuracy (%)', fontsize=set_font_size)
+    ax[1].set_title(r'Time Budget $T$ (min)', fontsize=set_font_size)
+
+    plt.tight_layout()
+    fig.subplots_adjust(wspace=0.3, hspace=0.3)
+
+    # plt.show()
+    base_dr = os.getcwd()
+    path_gra = os.path.join(base_dr, f"{img_name}.pdf")
+    fig.savefig(path_gra, bbox_inches='tight')
+
+
+def draw_grid_graph_with_budget_only_Acc(
+        acc, bt, b1, b2,
+        img_name: str, y_array: list, x_array: list):
+    """
+    :param acc: Two array list
+    :param bt:  Two array list
+    :param img_name: img name string
+    :return:
+    """
+
+    acc_new = np.array(acc)
+    acc = acc_new.tolist()
+
+    mask = np.array(acc)
+    mask[mask > 0] = 0
+    mask[mask < 0] = 1
+
+    fig = plt.figure(figsize=(7, 14))
+
+    linewidths = 0.5
+    sns.set(font_scale=2)
+    sns.heatmap(
+        data=acc,
+        vmax=99,
+        vmin=93,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".2f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
+        mask=mask,
+        square=True,
+        linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .4},
+        ax=fig
+    )
+
+    plt.tight_layout()
+    plt.xlabel("U (epoch)", fontsize=set_font_size)
+    plt.ylabel("K (# models)", fontsize=set_font_size)
+
+    plt.xticks(x_array, fontsize=set_font_size)
+    plt.yticks(y_array, fontsize=set_font_size)
+
+    plt.title('Test Accuracy (%)', fontsize=set_font_size)
+    plt.tight_layout()
+    # fig.subplots_adjust(wspace=0.3, hspace=0.3)
+    # plt.show()
+    base_dr = os.getcwd()
+    path_gra = os.path.join(base_dr, f"{img_name}.pdf")
+    fig.savefig(path_gra, bbox_inches='tight')
+
+
+def draw_grid_graph_with_budget_only_T(
+        acc, bt, b1, b2,
+        img_name: str, y_array: list, x_array: list):
+    """
+    :param acc: Two array list
+    :param bt:  Two array list
+    :param img_name: img name string
+    :return:
+    """
+
+    acc_new = np.array(acc)
+    acc = acc_new.tolist()
+
+    mask = np.array(acc)
+    mask[mask > 0] = 0
+    mask[mask < 0] = 1
+
+    bt = np.round(np.array(bt), 2).tolist()
+    mask2 = np.array(bt)
+    mask2[mask2 > 0] = 0
+    mask2[mask2 < 0] = 1
+
+    mask3 = np.array(b1)
+    mask3[mask3 > 0] = 0
+    mask3[mask3 < 0] = 1
+
+    mask4 = np.array(b2)
+    mask4[mask4 > 0] = 0
+    mask4[mask4 < 0] = 1
+
+    fig, ax = plt.subplots(1, 2, figsize=(15, 14))
+
+    linewidths = 0.5
+    sns.set(font_scale=2)
+    sns.heatmap(
+        data=acc,
+        vmax=99,
+        vmin=93,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".2f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
+        mask=mask,
+        square=True,
+        linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .4},
+        ax=ax[0]
+    )
+
+    sns.heatmap(
+        data=bt,
+        vmax=600,
+        # vmin=-9,
+        cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
+        annot=True,
+        fmt=".2f",
+        annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
+        mask=mask2,
+        square=True,
+        linewidths=linewidths,  # 每个方格外框显示，外框宽度设置
+        cbar_kws={"shrink": .4},
+        ax=ax[1]
+    )
+
+    plt.tight_layout()
+    plt.xlabel("U (epoch)", fontsize=set_font_size)
+    plt.ylabel("K (# models)", fontsize=set_font_size)
+
+    for j in [0, 1]:
+        ax[j].set_xticklabels(x_array, fontsize=set_font_size)
+        ax[j].set_yticklabels(y_array, fontsize=set_font_size)
+        ax[j].set_xlabel("U (# epoch)", fontsize=set_font_size)
+        ax[j].set_ylabel("K (# models)", fontsize=set_font_size)
+
+    ax[0].set_title('Test Accuracy (%)', fontsize=set_font_size)
+    ax[1].set_title(r'Time Budget $T$ (min)', fontsize=set_font_size)
+
+    plt.tight_layout()
+    fig.subplots_adjust(wspace=0.3, hspace=0.3)
+
+    # plt.show()
+    base_dr = os.getcwd()
+    path_gra = os.path.join(base_dr, f"{img_name}.pdf")
+    fig.savefig(path_gra, bbox_inches='tight')
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_tab_lib.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_tab_lib.py
new file mode 100644
index 000000000..6c30cc06b
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_tab_lib.py
@@ -0,0 +1,215 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+from matplotlib import pyplot as plt
+from matplotlib.ticker import MaxNLocator
+import warnings
+import matplotlib.cbook
+
+warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
+
+# lines' mark size
+set_marker_size = 1
+# points' mark size
+set_marker_point = 14
+# points' mark size
+set_font_size = 20
+set_lgend_size = 15
+set_tick_size = 20
+
+frontinsidebox = 23
+
+# update tick size
+matplotlib.rc('xtick', labelsize=set_tick_size)
+matplotlib.rc('ytick', labelsize=set_tick_size)
+
+plt.rcParams['axes.labelsize'] = set_tick_size
+
+mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
+mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
+                  set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
+line_shape_list = ['-.', '--', '-', ':']
+shade_degree = 0.2
+
+
+def Add_one_line(x_time_array: list, y_twod_budget: List[List], namespace: str, index, ax):
+    # training-based
+    x_ = x_time_array
+    y_ = y_twod_budget
+
+    if all(isinstance(item, list) for item in x_):
+        expx = np.array(x_)
+        x_m = np.quantile(expx, .5, axis=0)
+    else:
+        x_m = x_
+
+    exp = np.array(y_)
+    exp = np.where(exp > 10, exp, exp * 100)
+
+    y_h = np.quantile(exp, .75, axis=0)
+    y_m = np.quantile(exp, .5, axis=0)
+    y_l = np.quantile(exp, .25, axis=0)
+
+    ax.plot(x_m, y_m,
+            mark_list[int(index % len(mark_list))] + line_shape_list[int(index % len(line_shape_list))],
+            label=namespace,
+            markersize=mark_size_list[int(index % len(mark_list))],
+            linewidth=3
+            )
+
+    ax.fill_between(x_m, y_l, y_h, alpha=shade_degree)
+    return x_m
+
+
+def draw_structure_data_anytime(
+        all_lines: List,
+        dataset: str, name_img: str, max_value,
+        figure_size=(6.4, 4.5),
+        annotations=[],
+        x_ticks=None, y_ticks=None, unique_labels=None):
+    fig, ax = plt.subplots(figsize=figure_size)
+
+    # draw all lines
+    time_usage = []
+    for i, each_line_info in enumerate(all_lines):
+        _x_array = each_line_info[0]
+        _y_2d_array = each_line_info[1]
+        _name_space = each_line_info[2]
+        time_arr = Add_one_line(_x_array, _y_2d_array, _name_space, i, ax)
+        time_usage.append(time_arr)
+
+    # print(f"speed-up on {dataset} = {time_usage[0][-1] / time_usage[2][-2]}, "
+    #       f"t_train = {time_usage[0][-1]}, t_f = {time_usage[2][-2]}")
+
+    # plt.xscale("log")
+    # plt.grid()
+    # plt.xlabel(r"Time Budget $T$ (min)", fontsize=set_font_size)
+    # plt.ylabel(f"AUC on {dataset.upper()}", fontsize=set_font_size)
+
+    plt.xscale("log")
+    ax.grid()
+    ax.set_xlabel(r"Response Time Threshold $T_{max}$ (min)", fontsize=set_font_size)
+    ax.set_ylabel(f"AUC on {dataset.upper()}", fontsize=set_font_size)
+    # ax.set_xscale("log")
+    # ax.set_xlim(0.001, 10e4)
+    # ax.set_ylim(x1_lim[0], x1_lim[1])
+
+    if y_ticks is not None:
+        if y_ticks[0] is not None:
+            ax.set_ylim(bottom=y_ticks[0])
+        if y_ticks[1] is not None:
+            ax.set_ylim(top=y_ticks[1])
+        # ax.set_ylim(y_ticks[0], y_ticks[1])
+        # ax.set_yticks(y_ticks)
+        # ax.set_yticklabels(y_ticks)
+    if x_ticks is not None:
+        if x_ticks[0] is not None:
+            ax.set_xlim(left=x_ticks[0])
+        if x_ticks[1] is not None:
+            ax.set_xlim(right=x_ticks[1])
+
+    ax.yaxis.set_major_locator(MaxNLocator(nbins=6, integer=False))
+
+    if max_value > 0:
+        plt.axhline(max_value, color='r', linestyle='-', label='Global Best AUC')
+
+    for i in range(len(annotations)):
+        ele = annotations[i]
+        ax.plot(ele[2], ele[1], mark_list[i], label=ele[0], markersize=set_marker_point)
+
+    # export_legend(fig, filename="any_time_legend", unique_labels=["Training-Based MS", "Training-Free MS", "2Phase-MS", 'Global Best AUC'])
+    export_legend(ori_fig=fig, colnum=5, unique_labels=unique_labels)
+    plt.tight_layout()
+
+    fig.savefig(f"{name_img}.pdf", bbox_inches='tight')
+
+
+def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=None):
+    if unique_labels is None:
+        unique_labels = []
+    fig2 = plt.figure(figsize=(5, 0.3))
+    lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
+    lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
+    # grab unique labels
+    if len(unique_labels) == 0:
+        unique_labels = set(labels)
+    # assign labels and legends in dict
+    legend_dict = dict(zip(labels, lines))
+    # query dict based on unique labels
+    unique_lines = [legend_dict[x] for x in unique_labels]
+    fig2.legend(unique_lines, unique_labels, loc='center',
+                ncol=colnum,
+                fancybox=True,
+                shadow=True, scatterpoints=1, fontsize=set_lgend_size)
+    fig2.tight_layout()
+    fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
+
+
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+
+def plot_heatmap(data: List, fontsize: int,
+                 x_array_name: str, y_array_name: str,
+                 title: str, output_file: str,
+                 decimal_places: int,
+                 u_ticks, k_ticks,
+                 ):
+    labelsize = fontsize
+    # Convert the data to a NumPy array
+    data_array = np.array(data)
+
+    # Custom annotation function
+    def custom_annot(val):
+        return "{:.{}f}".format(val, decimal_places) if val > 0 else ""
+
+    # Convert the custom annotations to a 2D array
+    annot_array = np.vectorize(custom_annot)(data_array)
+
+    # Create a masked array to hide the cells with values less than or equal to 0
+    masked_data = np.ma.masked_array(data_array, data_array <= 0)
+
+    # Set the figure size (width, height) in inches
+    fig, ax = plt.subplots(figsize=(8, 4))
+
+    # Use the "viridis" colormap
+    cmap = "viridis"
+
+    # Create a heatmap
+    sns.heatmap(masked_data, annot=annot_array, fmt='', cmap=cmap, mask=masked_data.mask, ax=ax,
+                annot_kws={"size": fontsize, "ha": "center", "va": "center"},
+                xticklabels=u_ticks, yticklabels=k_ticks)
+
+    # Set axis labels
+    ax.set_xlabel(x_array_name, fontsize=fontsize)
+    ax.set_ylabel(y_array_name, fontsize=fontsize)
+
+    # Set x/y-axis tick size
+    ax.tick_params(axis='both', which='major', labelsize=labelsize)
+
+    # Set the title
+    # ax.set_title(title, fontsize=fontsize)
+
+    # Set tight layout
+    plt.tight_layout()
+
+    # Save the plot to a PDF file
+    plt.savefig(output_file)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py
new file mode 100644
index 000000000..9a19f007d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import calendar
+import json
+import os
+import time
+import traceback
+from singa import device as singa_device
+import numpy as np
+
+from exps.shared_args import parse_arguments
+
+if __name__ == "__main__":
+
+    args = parse_arguments()
+
+    # set the log name
+    gmt = time.gmtime()
+    ts = calendar.timegm(gmt)
+
+    os.environ.setdefault("log_logger_folder_name", f"{args.log_folder}")
+    os.environ.setdefault("log_file_name", f"{args.log_name}_{args.dataset}_ep{args.epoch}_{ts}.log")
+    os.environ.setdefault("base_dir", args.base_dir)
+
+    from src.logger import logger
+    from src.eva_engine.phase2.algo.trainer import ModelTrainer
+    from src.search_space.init_search_space import init_search_space
+    from src.dataset_utils.structure_data_loader import libsvm_dataloader
+
+    search_space_ins = init_search_space(args)
+    search_space_ins.load()
+
+    try:
+        # read the checkpoint
+        checkpoint_file_name = f"{args.result_dir}/train_config_tune_{args.dataset}_epo_{args.epoch}.json"
+
+        # 1. data loader
+        train_loader, val_loader, test_loader = libsvm_dataloader(
+            args=args,
+            data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+            nfield=args.nfield,
+            batch_size=args.batch_size)
+
+        # arch_id = "256-256-256-256"
+        arch_id = "128-128-128-128"
+        print(f"begin to train the {arch_id}")
+
+        model = search_space_ins.new_architecture(arch_id)
+        # model.init_embedding(requires_grad=True)
+        if args.device == 'cpu':
+            dev = singa_device.get_default_device()
+        else:  # GPU
+            dev = singa_device.create_cuda_gpu_on(args.local_rank)  # need to change to CPU device for CPU-only machines
+        dev.SetRandSeed(0)
+        np.random.seed(0)
+        # model.to(args.device)
+
+        valid_auc, total_run_time, train_log = ModelTrainer.fully_train_arch(
+            model=model,
+            use_test_acc=False,
+            epoch_num=args.epoch,
+            train_loader=train_loader,
+            val_loader=val_loader,
+            test_loader=test_loader,
+            args=args)
+
+        logger.info(f' ----- model id: {arch_id}, Val_AUC : {valid_auc} Total running time: '
+                    f'{total_run_time}-----')
+        print(f' ----- model id: {arch_id}, Val_AUC : {valid_auc} Total running time: '
+              f'{total_run_time}-----')
+
+        # update the shared model eval res
+        logger.info(f" ---- info: {json.dumps({arch_id: train_log})}")
+
+        print(f" ---- info: {json.dumps({arch_id: train_log})}")
+
+        logger.info(f" Saving result to: {checkpoint_file_name}")
+    except:
+        print(traceback.format_exc())
+        logger.info(traceback.format_exc())
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_dist_online.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_dist_online.py
new file mode 100644
index 000000000..e515647ec
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_dist_online.py
@@ -0,0 +1,163 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import calendar
+import json
+import logging
+import os
+import time
+
+from exps.shared_args import parse_arguments
+
+
+def partition_list_by_worker_id(lst, num_workers=15):
+    partitions = []
+    for i in range(num_workers):
+        partitions.append([])
+    for idx, item in enumerate(lst):
+        worker_id = idx % num_workers
+        partitions[worker_id].append(item)
+    return partitions
+
+
+def start_one_worker(queue, args, worker_id, my_partition, search_space_ins, res):
+    from src.tools.io_tools import write_json, read_json
+    gmt = time.gmtime()
+    ts = calendar.timegm(gmt)
+
+    os.environ.setdefault("log_file_name", f"{args.log_name}_{args.dataset}_wkid_{worker_id}_{ts}.log")
+    # import logging
+    logger = logging.getLogger(f"{args.dataset}_wkid_{worker_id}_{ts}")
+    if not os.path.exists(f"./{args.log_folder}"):
+        os.makedirs(f"./{args.log_folder}")
+    handler = logging.FileHandler(f"./{args.log_folder}/{args.log_name}_{args.dataset}_wkid_{worker_id}_{ts}.log")
+    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    from src.eva_engine.phase2.algo.trainer import ModelTrainer
+
+    if args.total_models_per_worker is None:
+        logger.info(
+            f" ---- begin exploring, current worker have  "
+            f"{len(my_partition)} models. explore all those models ")
+    else:
+        logger.info(f" ---- begin exploring, current worker have  "
+                    f"{len(my_partition)} models. but explore {args.total_models_per_worker} models ")
+
+    train_loader, val_loader, test_loader = queue.get()
+
+    checkpoint_file_name = f"./base_line_res_{args.dataset}/train_baseline_{args.dataset}_wkid_{worker_id}.json"
+    visited = read_json(checkpoint_file_name)
+    if visited == {}:
+        visited = {args.dataset: {}}
+        logger.info(f" ---- initialize checkpointing with {visited} . ")
+    else:
+        logger.info(f" ---- recovery from checkpointing with {len(visited[args.dataset])} model. ")
+
+    explored_arch_num = 0
+    for arch_index in my_partition:
+        print(f"begin to train the {arch_index}")
+        model = search_space_ins.new_architecture(res[arch_index]).to(args.device)
+        valid_auc, total_run_time, train_log = ModelTrainer.fully_train_arch(
+            model=model,
+            use_test_acc=False,
+            epoch_num=args.epoch,
+            train_loader=train_loader,
+            val_loader=val_loader,
+            test_loader=test_loader,
+            args=args, logger=logger)
+
+        logger.info(f' ----- model id: {res[arch_index]}, Val_AUC : {valid_auc} Total running time: '
+                    f'{total_run_time}-----')
+
+        # update the shared model eval res
+        logger.info(f" ---- exploring {explored_arch_num} model. ")
+        logger.info(f" ---- info: {json.dumps({res[arch_index]: train_log})}")
+        visited[args.dataset][res[arch_index]] = train_log
+        explored_arch_num += 1
+
+        if args.total_models_per_worker is not None and explored_arch_num > args.total_models_per_worker:
+            break
+
+        logger.info(f" Saving result to: {checkpoint_file_name}")
+        write_json(checkpoint_file_name, visited)
+
+
+if __name__ == "__main__":
+    mp.set_start_method('spawn', force=True)
+    args = parse_arguments()
+
+    # set the log name
+    gmt = time.gmtime()
+    ts = calendar.timegm(gmt)
+
+    os.environ.setdefault("log_file_name", f"{args.log_name}_{args.dataset}_main_{ts}.log")
+    os.environ.setdefault("base_dir", args.base_dir)
+
+    from src.search_space.init_search_space import init_search_space
+    from src.dataset_utils.structure_data_loader import libsvm_dataloader
+    from src.tools.io_tools import write_json, read_json
+    import torch.multiprocessing as mp
+
+    search_space_ins = init_search_space(args)
+    search_space_ins.load()
+
+    # 1. main process partition data and group results,
+    res = read_json(args.pre_partitioned_file)
+
+    total_workers = args.worker_each_gpu * args.gpu_num
+    all_partition = partition_list_by_worker_id(list(res.keys()), total_workers)
+
+    train_loader, val_loader, test_loader = libsvm_dataloader(
+        args=args,
+        data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+        nfield=args.nfield,
+        batch_size=args.batch_size)
+
+    # 2. put the shared dataloader into the queue,
+    queue = mp.Queue()
+
+    # 3. Create a list of processes to train the models
+    processes = []
+    worker_id = 0
+    for gpu_id in range(args.gpu_num):
+        for _ in range(args.worker_each_gpu):
+            if args.device != "cpu":
+                args.device = f"cuda:{gpu_id}"
+            print(f"running process {[args.device, worker_id, len(all_partition[worker_id])]}")
+            p = mp.Process(
+                target=start_one_worker,
+                args=(queue, args, worker_id, all_partition[worker_id], search_space_ins, res,
+                      )
+            )
+            p.start()
+            processes.append(p)
+            worker_id += 1
+
+    # 4. send to the queue
+    for gpu_id in range(args.gpu_num):
+        for _ in range(args.worker_each_gpu):
+            print("putting to queue ....")
+            queue.put((train_loader, val_loader, test_loader))
+
+    print("All processing are running, waiting all to finish....")
+    for p in processes:
+        p.join()
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py
new file mode 100644
index 000000000..cc1b44481
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py
@@ -0,0 +1,118 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import calendar
+import json
+import os
+import time
+
+from exps.shared_args import parse_arguments
+
+
+def partition_list_by_worker_id(lst, num_workers=15):
+    partitions = []
+    for i in range(num_workers):
+        partitions.append([])
+    for idx, item in enumerate(lst):
+        worker_id = idx % num_workers
+        partitions[worker_id].append(item)
+    return partitions
+
+
+if __name__ == "__main__":
+
+    args = parse_arguments()
+
+    # set the log name
+    gmt = time.gmtime()
+    ts = calendar.timegm(gmt)
+
+    os.environ.setdefault("log_logger_folder_name", f"{args.log_folder}")
+    os.environ.setdefault("log_file_name", f"{args.log_name}_{args.dataset}_wkid_{args.worker_id}_{ts}.log")
+    os.environ.setdefault("base_dir", args.base_dir)
+
+    from src.logger import logger
+    from src.eva_engine.phase2.algo.trainer import ModelTrainer
+    from src.search_space.init_search_space import init_search_space
+    from src.dataset_utils.structure_data_loader import libsvm_dataloader
+    from src.tools.io_tools import write_json, read_json
+
+    search_space_ins = init_search_space(args)
+    search_space_ins.load()
+
+    # 1. data loader
+    logger.info(f" Loading data....")
+    train_loader, val_loader, test_loader = libsvm_dataloader(
+        args=args,
+        data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+        nfield=args.nfield,
+        batch_size=args.batch_size)
+
+    res = read_json(args.pre_partitioned_file)
+
+    all_partition = partition_list_by_worker_id(list(res.keys()), args.total_workers)
+
+    if args.total_models_per_worker == -1:
+        logger.info(
+            f" ---- begin exploring, current worker have  "
+            f"{len(all_partition[args.worker_id])} models. explore all those models ")
+    else:
+        logger.info(f" ---- begin exploring, current worker have  "
+                    f"{len(all_partition[args.worker_id])} models. but explore {args.total_models_per_worker} models ")
+
+    # read the checkpoint
+    checkpoint_file_name = f"{args.result_dir}/train_baseline_{args.dataset}_wkid_{args.worker_id}.json"
+    visited = read_json(checkpoint_file_name)
+    if visited == {}:
+        visited = {args.dataset: {}}
+        logger.info(f" ---- initialize checkpointing with {visited} . ")
+    else:
+        logger.info(f" ---- recovery from checkpointing with {len(visited[args.dataset])} model. ")
+
+    explored_arch_num = 0
+    for arch_index in all_partition[args.worker_id]:
+        print(f"begin to train the {arch_index}")
+        if res[arch_index] in visited[args.dataset]:
+            logger.info(f" ---- model {res[arch_index]} already visited")
+            continue
+        model = search_space_ins.new_architecture(res[arch_index])
+        model.init_embedding(requires_grad=True)
+        model.to(args.device)
+        valid_auc, total_run_time, train_log = ModelTrainer.fully_train_arch(
+            model=model,
+            use_test_acc=False,
+            epoch_num=args.epoch,
+            train_loader=train_loader,
+            val_loader=val_loader,
+            test_loader=test_loader,
+            args=args)
+
+        logger.info(f' ----- model id: {res[arch_index]}, Val_AUC : {valid_auc} Total running time: '
+                    f'{total_run_time}-----')
+
+        # update the shared model eval res
+        logger.info(f" ---- exploring {explored_arch_num} model. ")
+        logger.info(f" ---- info: {json.dumps({res[arch_index]: train_log})}")
+        visited[args.dataset][res[arch_index]] = train_log
+        explored_arch_num += 1
+
+        if args.total_models_per_worker != -1 and explored_arch_num > args.total_models_per_worker:
+            break
+
+        logger.info(f" Saving result to: {checkpoint_file_name}")
+        write_json(checkpoint_file_name, visited)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py
new file mode 100644
index 000000000..5d6c16ec0
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py
@@ -0,0 +1,134 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import calendar
+import json
+import os
+import random
+import time
+from exps.shared_args import parse_arguments
+from datetime import datetime
+import gc
+
+# import tracemalloc
+# tracemalloc.start()
+#
+#
+# def print_memory_usg():
+#     snapshot = tracemalloc.take_snapshot()
+#     top_stats = snapshot.statistics('lineno')
+#     for stat in top_stats[:10]:  # top 10 memory-consuming lines
+#         print(stat)
+
+
+def generate_data_loader():
+    if args.dataset in [Config.c10, Config.c100, Config.imgNet]:
+        train_loader, val_loader, class_num = dataset.get_dataloader(
+            train_batch_size=args.batch_size,
+            test_batch_size=args.batch_size,
+            dataset=args.dataset,
+            num_workers=1,
+            datadir=os.path.join(args.base_dir, "data"))
+        test_loader = val_loader
+    else:
+        train_loader, val_loader, test_loader = libsvm_dataloader(
+            args=args,
+            data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+            nfield=args.nfield,
+            batch_size=args.batch_size)
+        class_num = args.num_labels
+
+    return train_loader, val_loader, test_loader, class_num
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+
+    # set the log name
+    gmt = time.gmtime()
+    ts = calendar.timegm(gmt)
+    os.environ.setdefault("log_logger_folder_name", f"{args.log_folder}")
+    os.environ.setdefault("log_file_name", args.log_name + "_" + str(ts) + ".log")
+    os.environ.setdefault("base_dir", args.base_dir)
+
+    from src.common.constant import Config
+    from src.common.structure import ModelAcquireData
+    from src.controller.sampler_all.seq_sampler import SequenceSampler
+    from src.eva_engine.phase1.evaluator import P1Evaluator
+    from src.logger import logger
+    from src.search_space.init_search_space import init_search_space
+    from src.dataset_utils.structure_data_loader import libsvm_dataloader
+    from src.tools.io_tools import write_json, read_json
+    from src.dataset_utils import dataset
+    from src.common.constant import Config, CommonVars
+
+    search_space_ins = init_search_space(args)
+
+    train_loader, val_loader, test_loader, class_num = generate_data_loader()
+
+    _evaluator = P1Evaluator(device=args.device,
+                             num_label=args.num_labels,
+                             dataset_name=args.dataset,
+                             search_space_ins=search_space_ins,
+                             train_loader=train_loader,
+                             is_simulate=False,
+                             metrics=args.tfmem,
+                             enable_cache=args.embedding_cache_filtering)
+
+    sampler = SequenceSampler(search_space_ins)
+
+    explored_n = 0
+    output_file = f"{args.result_dir}/score_{args.search_space}_{args.dataset}_batch_size_{args.batch_size}_{args.device}.json"
+    result = read_json(output_file)
+    print(f"begin to score all, currently we already explored {len(result.keys())}")
+    logger.info(f"begin to score all, currently we already explored {len(result.keys())}")
+    while True:
+        arch_id, arch_micro = sampler.sample_next_arch()
+        if arch_id is None:
+            logger.info("Stop exploring, meet None arch id")
+            break
+        if arch_id in result:
+            continue
+        if args.models_explore != -1 and explored_n > args.models_explore:
+            logger.info(f"Stop exploring, {explored_n} > {args.models_explore}")
+            break
+        # run the model selection
+        model_encoding = search_space_ins.serialize_model_encoding(arch_micro)
+        model_acquire_data = ModelAcquireData(model_id=arch_id,
+                                              model_encoding=model_encoding,
+                                              is_last=False)
+        data_str = model_acquire_data.serialize_model()
+        model_score = _evaluator.p1_evaluate(data_str)
+        explored_n += 1
+        result[arch_id] = model_score
+        # print(f" {datetime.now()} finish arch = {arch_id}, model_score = {model_score}")
+        if explored_n % 1000 == 0:
+            # print_memory_usg()
+            # _evaluator.force_gc()
+            print("3. [trails] Phase 1: filter phase explored " + str(explored_n)
+                  + "Total explored " + str(len(result)) +
+                  " model, model_id = " + str(arch_id) +
+                  " model_scores = " + json.dumps(model_score))
+        if explored_n % 1000 == 0:
+            # print_memory_usg()
+            # _evaluator.force_gc()
+            logger.info("3. [trails] Phase 1: filter phase explored " + str(explored_n) +
+                        " model, model_id = " + str(arch_id) +
+                        " model_scores = " + json.dumps(model_score))
+            write_json(output_file, result)
+    write_json(output_file, result)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_ecdf.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_ecdf.py
new file mode 100644
index 000000000..645f72f43
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_ecdf.py
@@ -0,0 +1,136 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import os
+from src.tools.io_tools import read_json
+
+# lines' mark size
+set_marker_size = 15
+# points' mark size
+set_marker_point = 14
+# points' mark size
+set_font_size = 25
+set_lgend_size = 15
+set_tick_size = 20
+
+frontinsidebox = 23
+
+# update tick size
+matplotlib.rc('xtick', labelsize=set_tick_size)
+matplotlib.rc('ytick', labelsize=set_tick_size)
+
+plt.rcParams['axes.labelsize'] = set_tick_size
+
+mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
+mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
+                  set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
+line_shape_list = ['-.', '--', '-', ':']
+shade_degree = 0.2
+base_dir = "../exp_data/"
+
+
+def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=None):
+    if unique_labels is None:
+        unique_labels = []
+    fig2 = plt.figure(figsize=(5, 0.3))
+    lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
+    lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
+    # grab unique labels
+    if len(unique_labels) == 0:
+        unique_labels = set(labels)
+    # assign labels and legends in dict
+    legend_dict = dict(zip(labels, lines))
+    # query dict based on unique labels
+    unique_lines = [legend_dict[x] for x in unique_labels]
+    fig2.legend(unique_lines, unique_labels, loc='center',
+                ncol=colnum,
+                fancybox=True,
+                shadow=True, scatterpoints=1, fontsize=set_lgend_size)
+    fig2.tight_layout()
+    fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
+
+
+def draw_edcf():
+    # extract train_auc and valid_auc into separate lists
+    for dataset, architectures in data_dict.items():
+
+        fig, ax = plt.subplots(figsize=(6.4, 3.5))
+        print(dataset)
+        train_auc = []
+        valid_auc = []
+        for architecture, epochs in architectures.items():
+            for epoch, metrics in epochs.items():
+                if str(epoch_sampled[dataset]) == epoch:
+                    train_auc.append(metrics["train_auc"])
+                    valid_auc.append(metrics["valid_auc"])
+                    break
+
+        # calculate and plot ECDF for train_auc
+        sorted_train_auc = np.sort(train_auc)
+        y_train = np.arange(1, len(sorted_train_auc) + 1) / len(sorted_train_auc)
+        plt.plot(sorted_train_auc, y_train, label='Training  AUC', linewidth=3, linestyle='--')
+
+        # calculate and plot ECDF for valid_auc
+        sorted_valid_auc = np.sort(valid_auc)
+        y_valid = np.arange(1, len(sorted_valid_auc) + 1) / len(sorted_valid_auc)
+        plt.plot(sorted_valid_auc, y_valid, label='Validation AUC', linewidth=3, linestyle='-')
+
+        y_m = np.quantile(sorted_valid_auc, .5, axis=0)
+        print("medium", y_m, "best", max(sorted_valid_auc))
+        # plt.xlim(left=0.45)
+
+        plt.grid()
+        plt.xlabel('Accuracy')
+        plt.ylabel('ECDF')
+        # plt.legend(loc='upper left', fontsize=set_lgend_size)
+        plt.tight_layout()
+        export_legend(ori_fig=fig, colnum=5)
+        fig.savefig(f"space_{dataset}.pdf", bbox_inches='tight')
+
+
+# dataset_used = "frappe"
+dataset_used = "uci_diabetes"
+# dataset_used = "criteo"
+
+
+epoch_sampled = {"frappe": 19, "uci_diabetes": 35, "criteo": 9}
+
+if dataset_used == "frappe":
+    mlp_train_frappe = os.path.join(
+        base_dir,
+        "tab_data/frappe/all_train_baseline_frappe.json")
+    data_dict = read_json(mlp_train_frappe)
+elif dataset_used == "uci_diabetes":
+    mlp_train_uci_diabetes = os.path.join(
+        base_dir,
+        "tab_data/uci_diabetes/all_train_baseline_uci_160k_40epoch.json")
+
+    data_dict = read_json(mlp_train_uci_diabetes)
+elif dataset_used == "criteo":
+    mlp_train_criteo = os.path.join(
+        base_dir,
+        "tab_data/criteo/all_train_baseline_criteo.json")
+
+    data_dict = read_json(mlp_train_criteo)
+else:
+    print("err")
+
+draw_edcf()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_param_auc.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_param_auc.py
new file mode 100644
index 000000000..0ff674823
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_param_auc.py
@@ -0,0 +1,144 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import os
+from src.tools.io_tools import read_json
+
+# lines' mark size
+set_marker_size = 15
+# points' mark size
+set_marker_point = 14
+# points' mark size
+set_font_size = 25
+set_lgend_size = 15
+set_tick_size = 20
+import matplotlib.ticker as ticker
+
+frontinsidebox = 23
+
+# update tick size
+matplotlib.rc('xtick', labelsize=set_tick_size)
+matplotlib.rc('ytick', labelsize=set_tick_size)
+
+plt.rcParams['axes.labelsize'] = set_tick_size
+
+mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
+mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
+                  set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
+line_shape_list = ['-.', '--', '-', ':']
+shade_degree = 0.2
+base_dir = "../exp_data/"
+
+
+def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=None):
+    if unique_labels is None:
+        unique_labels = []
+    fig2 = plt.figure(figsize=(5, 0.3))
+    lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
+    lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
+    # grab unique labels
+    if len(unique_labels) == 0:
+        unique_labels = set(labels)
+    # assign labels and legends in dict
+    legend_dict = dict(zip(labels, lines))
+    # query dict based on unique labels
+    unique_lines = [legend_dict[x] for x in unique_labels]
+    fig2.legend(unique_lines, unique_labels, loc='center',
+                ncol=colnum,
+                fancybox=True,
+                shadow=True, scatterpoints=1, fontsize=set_lgend_size)
+    fig2.tight_layout()
+    fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
+
+
+# Function to compute number of parameters for an architecture
+def compute_params(architecture):
+    layers = [int(layer) for layer in architecture.split('-')]
+    params = 0
+    for i in range(len(layers) - 1):
+        params += layers[i] * layers[i + 1]
+    # Add bias terms
+    params += sum(layers[1:])
+    return params
+
+
+# Function to convert large number into a string with 'k' for thousands
+def func(x, pos):  # formatter function takes tick label and tick position
+    if x == 0:
+        return f"0"
+    else:
+        s = f'{x / 1000000}M'
+        return s
+
+
+def draw_parameter_performance():
+    # extract train_auc and valid_auc into separate lists
+    for dataset, architectures in data_dict.items():
+        fig, ax = plt.subplots(figsize=(6.4, 4))
+        print(dataset)
+        param_sizes = []
+        valid_auc = []
+        for architecture, epochs in architectures.items():
+            for epoch, metrics in epochs.items():
+                if str(epoch_sampled[dataset]) == epoch:
+                    param_sizes.append(compute_params(architecture))
+                    valid_auc.append(metrics["valid_auc"])
+                    break
+
+        plt.scatter(param_sizes, valid_auc)
+        y_format = ticker.FuncFormatter(func)
+        ax.xaxis.set_major_formatter(y_format)
+        plt.grid()
+        plt.xlabel('Parameter Size')
+        plt.ylabel('Validation AUC')
+        # plt.legend(loc='upper left', fontsize=set_lgend_size)
+        plt.tight_layout()
+        export_legend(ori_fig=fig, colnum=5)
+        fig.savefig(f"para_{dataset}.jpg", bbox_inches='tight')
+
+
+dataset_used = "frappe"
+# dataset_used = "uci_diabetes"
+# dataset_used = "criteo"
+
+epoch_sampled = {"frappe": 19, "uci_diabetes": 35, "criteo": 9}
+
+if dataset_used == "frappe":
+    mlp_train_frappe = os.path.join(
+        base_dir,
+        "tab_data/frappe/all_train_baseline_frappe.json")
+    data_dict = read_json(mlp_train_frappe)
+elif dataset_used == "uci_diabetes":
+    mlp_train_uci_diabetes = os.path.join(
+        base_dir,
+        "tab_data/uci_diabetes/all_train_baseline_uci_160k_40epoch.json")
+
+    data_dict = read_json(mlp_train_uci_diabetes)
+elif dataset_used == "criteo":
+    mlp_train_criteo = os.path.join(
+        base_dir,
+        "tab_data/criteo/all_train_baseline_criteo.json")
+
+    data_dict = read_json(mlp_train_criteo)
+else:
+    print("err")
+
+draw_parameter_performance()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/init_env b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/init_env
new file mode 100644
index 000000000..b3204ea06
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/init_env
@@ -0,0 +1,12 @@
+
+
+
+
+export PYTHONPATH=$PYTHONPATH:/project/TRAILS/internal/ml/model_selection
+conda activate trails
+
+
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/main.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/main.py
new file mode 100644
index 000000000..38357f304
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/main.py
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# this is the main function of model selection.
+
+import calendar
+import os
+import time
+from src.common.constant import Config
+from src.dataset_utils.structure_data_loader import libsvm_dataloader
+from exps.shared_args import parse_arguments
+
+
+def generate_data_loader():
+    if args.dataset in [Config.c10, Config.c100, Config.imgNet]:
+        train_loader, val_loader, class_num = dataset.get_dataloader(
+            train_batch_size=args.batch_size,
+            test_batch_size=args.batch_size,
+            dataset=args.dataset,
+            num_workers=1,
+            datadir=os.path.join(args.base_dir, "data"))
+        test_loader = val_loader
+    else:
+        train_loader, val_loader, test_loader = libsvm_dataloader(
+            args=args,
+            data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+            nfield=args.nfield,
+            batch_size=args.batch_size)
+        class_num = args.num_labels
+
+    return train_loader, val_loader, test_loader, class_num
+
+
+def run_with_time_budget(time_budget: float):
+    """
+    :param time_budget: the given time budget, in second
+    :return:
+    """
+
+    # define dataLoader, and sample a mini-batch
+    train_loader, val_loader, test_loader, class_num = generate_data_loader()
+    args.num_labels = class_num
+    data_loader = [train_loader, val_loader, test_loader]
+
+    rms = RunModelSelection(args.search_space, args, is_simulate=False)
+    best_arch, _, _, _, _, _, _, _ = rms.select_model_online(time_budget, data_loader)
+
+    return best_arch
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+
+    # set the log name
+    gmt = time.gmtime()
+    ts = calendar.timegm(gmt)
+    os.environ.setdefault("log_file_name", args.log_name + "_" + str(ts) + ".log")
+    os.environ.setdefault("base_dir", args.base_dir)
+
+    from src.eva_engine.run_ms import RunModelSelection
+    from src.dataset_utils import dataset
+
+    run_with_time_budget(args.budget)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/pg_interface.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/pg_interface.py
new file mode 100644
index 000000000..98317c08a
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/pg_interface.py
@@ -0,0 +1,635 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import calendar
+import os
+import time
+import requests
+import json
+from typing import List, Dict
+import traceback
+import orjson
+from argparse import Namespace
+from shared_config import parse_config_arguments
+
+
+def exception_catcher(func):
+    def wrapper(encoded_str: str):
+        try:
+            # each functon accepts a json string
+            params = json.loads(encoded_str)
+            config_file = params.get("config_file")
+
+            # Parse the config file
+            args = parse_config_arguments(config_file)
+
+            # Set the environment variables
+            ts = calendar.timegm(time.gmtime())
+            os.environ.setdefault("base_dir", args.base_dir)
+            os.environ.setdefault("log_logger_folder_name", args.log_folder)
+            os.environ.setdefault("log_file_name", args.log_name + "_" + str(ts) + ".log")
+
+            # Call the original function with the parsed parameters
+            return func(params, args)
+        except Exception as e:
+            return orjson.dumps(
+                {"Errored": traceback.format_exc()}).decode('utf-8')
+
+    return wrapper
+
+from torch.utils.data import Dataset
+import torch
+class LibsvmDataset(Dataset):
+    """ Dataset loader for Libsvm data format """
+
+    @staticmethod
+    def decode_libsvm(columns):
+        map_func = lambda pair: (int(pair[0]), float(pair[1]))
+        id, value = zip(*map(lambda col: map_func(col.split(':')), columns[:-1]))
+        sample = {'id': torch.LongTensor(id),
+                  'value': torch.FloatTensor(value),
+                  'y': float(columns[-1])}
+        return sample
+
+    @staticmethod
+    def pre_processing(mini_batch_data: List[Dict]):
+        sample_lines = len(mini_batch_data)
+        nfields = len(mini_batch_data[0].keys()) - 1
+        feat_id = torch.LongTensor(sample_lines, nfields)
+        feat_value = torch.FloatTensor(sample_lines, nfields)
+        y = torch.FloatTensor(sample_lines)
+
+        for i in range(sample_lines):
+            row_value = mini_batch_data[i].values()
+            sample = LibsvmDataset.decode_libsvm(list(row_value))
+            feat_id[i] = sample['id']
+            feat_value[i] = sample['value']
+            y[i] = sample['y']
+        return feat_id, feat_value, y, sample_lines
+
+    def __init__(self, mini_batch_data: List[Dict]):
+        self.feat_id, self.feat_value, self.y, self.nsamples = \
+            LibsvmDataset.pre_processing(mini_batch_data)
+
+    def __len__(self):
+        return self.nsamples
+
+    def __getitem__(self, idx):
+        return {'id': self.feat_id[idx],
+                'value': self.feat_value[idx],
+                'y': self.y[idx]}
+
+
+def generate_dataloader(mini_batch_data, args):
+    from src.logger import logger
+    from torch.utils.data import DataLoader
+    logger.info(f"Begin to preprocessing dataset")
+    begin_time = time.time()
+    dataloader = DataLoader(LibsvmDataset(mini_batch_data),
+                            batch_size=args.batch_size,
+                            shuffle=True)
+    logger.info(f"Preprocessing dataset Done ! time_usage = {time.time() - begin_time}")
+    return dataloader
+
+
+@exception_catcher
+def model_selection(params: dict, args: Namespace):
+    from src.logger import logger
+    logger.info(f"begin run model_selection on UDF runtime with CPU only")
+
+    mini_batch_data = json.loads(params["mini_batch"])
+    budget = float(params["budget"])
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+
+    data_loader = [dataloader, dataloader, dataloader]
+
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    best_arch, best_arch_performance, time_usage, _, p1_trace_highest_score, p1_trace_highest_scored_models_id = \
+        rms.select_model_online_clean(
+            budget=budget,
+            data_loader=data_loader,
+            only_phase1=False,
+            run_workers=1)
+
+    return orjson.dumps(
+        {"best_arch": best_arch,
+         "best_arch_performance": best_arch_performance,
+         "time_usage": time_usage}).decode('utf-8')
+
+
+@exception_catcher
+def profiling_filtering_phase(params: dict, args: Namespace):
+    from src.logger import logger
+    logger.info(f"begin run profiling_filtering_phase CPU only")
+
+    mini_batch_m = params["mini_batch"]
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    logger.info(f"begin run filtering phase at {os.getcwd()}, with {mini_batch_m}")
+
+    mini_batch_data = json.loads(mini_batch_m)
+    dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+    data_loader = [dataloader, dataloader, dataloader]
+
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    score_time_per_model = rms.profile_filtering(data_loader=data_loader)
+
+    return orjson.dumps({"time": score_time_per_model}).decode('utf-8')
+
+
+@exception_catcher
+def profiling_refinement_phase(params: dict, args: Namespace):
+    from src.logger import logger
+    logger.info(f"begin run profiling_refinement_phase CPU only")
+
+    mini_batch_m = params["mini_batch"]
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    mini_batch_data = json.loads(mini_batch_m)
+
+    dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+    data_loader = [dataloader, dataloader, dataloader]
+
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    train_time_per_epoch = rms.profile_refinement(data_loader=data_loader)
+
+    return orjson.dumps({"time": train_time_per_epoch}).decode('utf-8')
+
+
+@exception_catcher
+def coordinator(params: dict, args: Namespace):
+    from src.logger import logger
+    logger.info(f"begin run coordinator")
+    # print (f"begin run coordinator")
+
+    budget = float(params["budget"])
+    score_time_per_model = float(params["score_time_per_model"])
+    train_time_per_epoch = float(params["train_time_per_epoch"])
+    only_phase1 = True if params["only_phase1"].lower() == "true" else False
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    logger.info(f"coordinator params: budget={budget}, "
+                f"score_time_per_model={score_time_per_model}, "
+                f"train_time_per_epoch={train_time_per_epoch}, "
+                f"only_phase1={only_phase1}")
+
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    K, U, N = rms.coordination(
+        budget=budget,
+        score_time_per_model=score_time_per_model,
+        train_time_per_epoch=train_time_per_epoch,
+        only_phase1=only_phase1)
+
+    return orjson.dumps(
+        {"k": K, "u": U, "n": N}).decode('utf-8')
+
+
+@exception_catcher
+def filtering_phase(params: dict, args: Namespace):
+    from src.logger import logger
+    logger.info(f"begin run filtering_phase CPU only")
+
+    # mini_batch_m = params["mini_batch"]
+    n = int(params["n"])
+    k = int(params["k"])
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    # mini_batch_data = json.loads(mini_batch_m)
+    # dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    k_models, _, _, _ = rms.filtering_phase(N=n, K=k)
+
+    return orjson.dumps({"k_models": k_models}).decode('utf-8')
+
+
+@exception_catcher
+def filtering_phase_dataLoader(params: dict, args: Namespace):
+    from src.logger import logger
+    logger.info(f"begin run filtering_phase CPU only")
+    # print (f"begin run filtering_phase CPU only")
+
+    mini_batch_m = params["mini_batch"]
+    # print ("mini_batch_m: ", mini_batch_m)
+
+
+    n = int(params["n"])
+    k = int(params["k"])
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    mini_batch_data = json.loads(mini_batch_m)
+    dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    k_models, _, _, _ = rms.filtering_phase(N=n, K=k, train_loader=dataloader)
+
+    return orjson.dumps({"k_models": k_models}).decode('utf-8')
+
+
+@exception_catcher
+def refinement_phase(params: dict, args: Namespace):
+    mini_batch_m = params["mini_batch"]
+    return orjson.dumps(
+        {"k_models": "k_models"}).decode('utf-8')
+
+
+@exception_catcher
+def model_selection_workloads(params: dict, args: Namespace):
+    """
+    Run filtering (explore N models) and refinement phase (refine K models) for benchmarking latency.
+    """
+
+    mini_batch_m = params["mini_batch"]
+    n = int(params["n"])
+    k = int(params["k"])
+
+    from src.logger import logger
+    logger.info(f"begin run model_selection_workloads on CPU only, explore N={n} and K={k}")
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    mini_batch_data = json.loads(mini_batch_m)
+    dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    k_models, _, _, _ = rms.filtering_phase(N=n, K=k, train_loader=dataloader)
+    best_arch, best_arch_performance, _ = rms.refinement_phase(
+        U=1,
+        k_models=k_models,
+        train_loader=dataloader,
+        valid_loader=dataloader)
+
+    return orjson.dumps(
+        {"best_arch": best_arch,
+         "best_arch_performance": best_arch_performance,
+         }).decode('utf-8')
+
+
+@exception_catcher
+def test_io(params: dict, args: Namespace):
+    return orjson.dumps({"inputs are": json.dumps(params)}).decode('utf-8')
+
+
+@exception_catcher
+def model_selection_trails(params: dict, args: Namespace):
+    from src.logger import logger
+    logger.info(f"begin run model_selection_trails CPU  + GPU")
+
+    mini_batch_data = json.loads(params["mini_batch"])
+    budget = float(params["budget"])
+
+    # 1. launch cache service
+    columns = list(mini_batch_data[0].keys())
+    requests.post(args.cache_svc_url,
+                  json={'columns': columns, 'name_space': "train", 'table_name': "dummy",
+                        "batch_size": len(mini_batch_data)})
+    requests.post(args.cache_svc_url,
+                  json={'columns': columns, 'name_space': "valid", 'table_name': "dummy",
+                        "batch_size": len(mini_batch_data)})
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    # 2. profiling & coordination
+    dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+    data_loader = [dataloader, dataloader, dataloader]
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+
+    begin_time = time.time()
+    score_time_per_model = rms.profile_filtering(data_loader)
+    train_time_per_epoch = rms.profile_refinement(data_loader)
+    K, U, N = rms.coordination(budget, score_time_per_model, train_time_per_epoch, False)
+
+    # 3. filtering
+    k_models, all_models, p1_trace_highest_score, p1_trace_highest_scored_models_id = rms.filtering_phase(
+        N, K, train_loader=data_loader[0])
+
+    # 4. Run refinement pahse
+    data = {'u': 1, 'k_models': k_models, "table_name": "dummy", "config_file": args.config_file}
+    response = requests.post(args.refinement_url, json=data).json()
+
+    best_arch, best_arch_performance = response["best_arch"], response["best_arch_performance"]
+
+    end_time = time.time()
+    real_time_usage = end_time - begin_time
+
+    return orjson.dumps(
+        {"best_arch": best_arch,
+         "best_arch_performance": best_arch_performance,
+         "time_usage": real_time_usage}).decode('utf-8')
+
+
+@exception_catcher
+def model_selection_trails_workloads(params: dict, args: Namespace):
+    """
+    Run filtering (explore N models) and refinement phase (refine K models) for benchmarking latency.
+    """
+
+    begin_time = time.time()
+    mini_batch_data = json.loads(params["mini_batch"])
+    n = int(params["n"])
+    k = int(params["k"])
+
+    # 1. launch cache service, for both train and valid.
+    # todo: use real data table or others
+    columns = list(mini_batch_data[0].keys())
+    requests.post(args.cache_svc_url,
+                  json={'columns': columns, 'name_space': "train", 'table_name': "dummy",
+                        "batch_size": len(mini_batch_data)})
+    requests.post(args.cache_svc_url,
+                  json={'columns': columns, 'name_space': "valid", 'table_name': "dummy",
+                        "batch_size": len(mini_batch_data)})
+
+    from src.logger import logger
+    logger.info(f"begin run model_selection_trails_workloads CPU + GPU, explore N={n} and K={k}")
+
+    from src.eva_engine.run_ms import RunModelSelection
+
+    # 2. filtering
+    dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
+    rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
+    k_models, _, _, _ = rms.filtering_phase(N=n, K=k, train_loader=dataloader)
+
+    # 3. Run refinement pahse
+    data = {'u': 1, 'k_models': k_models, "table_name": "dummy", "config_file": args.config_file}
+    response = requests.post(args.refinement_url, json=data).json()
+    best_arch, best_arch_performance = response["best_arch"], response["best_arch_performance"]
+    real_time_usage = time.time() - begin_time
+
+    return orjson.dumps(
+        {"best_arch": best_arch,
+         "best_arch_performance": best_arch_performance,
+         "time_usage": real_time_usage
+         }).decode('utf-8')
+
+
+# benchmarking code here
+@exception_catcher
+def benchmark_filtering_phase_latency(params: dict, args: Namespace):
+    from src.logger import logger
+    from src.common.structure import ModelAcquireData
+    from src.controller.sampler_all.seq_sampler import SequenceSampler
+    from src.eva_engine.phase1.evaluator import P1Evaluator
+    from src.search_space.init_search_space import init_search_space
+    from src.tools.io_tools import write_json, read_json
+    from src.tools.res_measure import print_cpu_gpu_usage
+    import torch
+    logger.info(f"begin run filtering_phase CPU only")
+
+    args.models_explore = int(params["explore_models"])
+
+    output_file = f"{args.result_dir}/score_{args.search_space}_{args.dataset}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
+    time_output_file = f"{args.result_dir}/time_score_{args.search_space}_{args.dataset}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
+    res_output_file = f"{args.result_dir}/resource_score_{args.search_space}_{args.dataset}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
+
+    # start the resource monitor
+    stop_event, thread = print_cpu_gpu_usage(interval=0.5, output_file=res_output_file)
+
+    db_config = {
+        "db_name": args.db_name,
+        "db_user": args.db_user,
+        "db_host": args.db_host,
+        "db_port": args.db_port,
+    }
+
+    search_space_ins = init_search_space(args)
+    _evaluator = P1Evaluator(device=args.device,
+                             num_label=args.num_labels,
+                             dataset_name=args.dataset,
+                             search_space_ins=search_space_ins,
+                             train_loader=None,
+                             is_simulate=False,
+                             metrics=args.tfmem,
+                             enable_cache=args.embedding_cache_filtering,
+                             db_config=db_config)
+
+    sampler = SequenceSampler(search_space_ins)
+    explored_n = 0
+    result = read_json(output_file)
+    print(f"begin to score all, currently we already explored {len(result.keys())}")
+    logger.info(f"begin to score all, currently we already explored {len(result.keys())}")
+
+    while True:
+        arch_id, arch_micro = sampler.sample_next_arch()
+        if arch_id is None:
+            break
+        if arch_id in result:
+            continue
+        if explored_n > args.models_explore:
+            break
+        # run the model selection
+        model_encoding = search_space_ins.serialize_model_encoding(arch_micro)
+        model_acquire_data = ModelAcquireData(model_id=arch_id,
+                                              model_encoding=model_encoding,
+                                              is_last=False)
+        data_str = model_acquire_data.serialize_model()
+        model_score = _evaluator.p1_evaluate(data_str)
+        explored_n += 1
+        result[arch_id] = model_score
+        if explored_n % 50 == 0:
+            logger.info(f"Evaluate {explored_n} models")
+            print(f"Evaluate {explored_n} models")
+
+    if _evaluator.if_cuda_avaiable():
+        torch.cuda.synchronize()
+
+    # the first two are used for warming up
+    _evaluator.time_usage["io_latency"] = \
+        sum(_evaluator.time_usage["track_io_model_load"][2:]) + \
+        sum(_evaluator.time_usage["track_io_model_release_each_50"]) + \
+        sum(_evaluator.time_usage["track_io_model_init"][2:]) + \
+        sum(_evaluator.time_usage["track_io_res_load"][2:]) + \
+        sum(_evaluator.time_usage["track_io_data_retrievel"][2:]) + \
+        sum(_evaluator.time_usage["track_io_data_preprocess"][2:])
+
+    _evaluator.time_usage["compute_latency"] = sum(_evaluator.time_usage["track_compute"][2:])
+    _evaluator.time_usage["latency"] = _evaluator.time_usage["io_latency"] + _evaluator.time_usage["compute_latency"]
+
+    _evaluator.time_usage["avg_compute_latency"] = \
+        _evaluator.time_usage["compute_latency"] \
+        / len(_evaluator.time_usage["track_compute"][2:])
+
+    write_json(output_file, result)
+    # compute time
+    write_json(time_output_file, _evaluator.time_usage)
+
+    # Then, at the end of your program, you can stop the thread:
+    print("Done, time sleep for 10 seconds")
+    # wait the resource montor flush
+    time.sleep(10)
+    stop_event.set()
+    thread.join()
+
+    return orjson.dumps({"Write to": time_output_file}).decode('utf-8')
+
+
+# Micro benchmarking filterting phaes
+search_space_ins = None
+_evaluator = None
+sampler = None
+
+
+@exception_catcher
+def in_db_filtering_state_init(params: dict, args: Namespace):
+    global search_space_ins, _evaluator, sampler
+    from src.logger import logger
+    from src.controller.sampler_all.seq_sampler import SequenceSampler
+    from src.eva_engine.phase1.evaluator import P1Evaluator
+    from src.search_space.init_search_space import init_search_space
+
+    db_config = {
+        "db_name": args.db_name,
+        "db_user": args.db_user,
+        "db_host": args.db_host,
+        "db_port": args.db_port,
+    }
+
+    # init once
+    # params["eva_results"] == "null" means it a new job
+    if params["eva_results"] == "null" or (search_space_ins is None and _evaluator is None and sampler is None):
+        logger.info(f'New job = {params["eva_results"]}, search_space_ins = {search_space_ins}')
+        search_space_ins = init_search_space(args)
+        _evaluator = P1Evaluator(device=args.device,
+                                 num_label=args.num_labels,
+                                 dataset_name=params["dataset"],
+                                 search_space_ins=search_space_ins,
+                                 train_loader=None,
+                                 is_simulate=False,
+                                 metrics=args.tfmem,
+                                 enable_cache=args.embedding_cache_filtering,
+                                 db_config=db_config,
+                                 data_retrievel="spi")
+        sampler = SequenceSampler(search_space_ins)
+
+    arch_id, arch_micro = sampler.sample_next_arch()
+    model_encoding = search_space_ins.serialize_model_encoding(arch_micro)
+
+    return orjson.dumps({"model_encoding": model_encoding, "arch_id": arch_id}).decode('utf-8')
+
+
+@exception_catcher
+def in_db_filtering_evaluate(params: dict, args: Namespace):
+    global search_space_ins, _evaluator, sampler
+    from src.common.structure import ModelAcquireData
+    from src.logger import logger
+    try:
+        if search_space_ins is None and _evaluator is None and sampler is None:
+            logger.info("search_space_ins, _evaluator, sampler is None")
+            return orjson.dumps({"error": "erroed, plz call init first"}).decode('utf-8')
+
+        sampled_result = json.loads(params["sample_result"])
+        arch_id, model_encoding = str(sampled_result["arch_id"]), str(sampled_result["model_encoding"])
+
+        mini_batch = json.loads(params["mini_batch"])
+        if mini_batch["status"] == "error":
+            return orjson.dumps({"error": mini_batch["message"]}).decode('utf-8')
+        logger.info(f"Begin evaluate {params['model_index']}, "
+                    f"with size of batch = {len(mini_batch['data'])}, "
+                    f"size of columns = {len(mini_batch['data'][0])}")
+        model_acquire_data = ModelAcquireData(model_id=arch_id,
+                                              model_encoding=model_encoding,
+                                              is_last=False,
+                                              spi_seconds=float(params["spi_seconds"]),
+                                              spi_mini_batch=mini_batch["data"],
+                                              )
+
+        model_score = _evaluator._p1_evaluate_online(model_acquire_data)
+        logger.info(f'Done evaluate {params["model_index"]}, '
+                    f'with {orjson.dumps({"index": params["model_index"], "score": model_score}).decode("utf-8")}')
+    except:
+        logger.info(orjson.dumps(
+            {"Errored": traceback.format_exc()}).decode('utf-8'))
+
+        return orjson.dumps(
+            {"Errored": traceback.format_exc()}).decode('utf-8')
+
+    return orjson.dumps({"index": params["model_index"], "score": model_score}).decode('utf-8')
+
+
+@exception_catcher
+def records_results(params: dict, args: Namespace):
+    global search_space_ins, _evaluator, sampler
+    from src.tools.io_tools import write_json
+    from src.logger import logger
+
+    try:
+        time_output_file = f"{args.result_dir}/time_score_{args.search_space}_{params['dataset']}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
+        _evaluator.time_usage["io_latency"] = \
+            sum(_evaluator.time_usage["track_io_model_load"][2:]) + \
+            sum(_evaluator.time_usage["track_io_model_release_each_50"]) + \
+            sum(_evaluator.time_usage["track_io_model_init"][2:]) + \
+            sum(_evaluator.time_usage["track_io_res_load"][2:]) + \
+            sum(_evaluator.time_usage["track_io_data_retrievel"][2:]) + \
+            sum(_evaluator.time_usage["track_io_data_preprocess"][2:])
+
+        _evaluator.time_usage["compute_latency"] = sum(_evaluator.time_usage["track_compute"][2:])
+        _evaluator.time_usage["latency"] = _evaluator.time_usage["io_latency"] + _evaluator.time_usage[
+            "compute_latency"]
+
+        _evaluator.time_usage["avg_compute_latency"] = \
+            _evaluator.time_usage["compute_latency"] \
+            / len(_evaluator.time_usage["track_compute"][2:])
+
+        logger.info(f"Saving time usag to {time_output_file}")
+        # compute time
+        write_json(time_output_file, _evaluator.time_usage)
+    except:
+        logger.info(orjson.dumps(
+            {"Errored": traceback.format_exc()}).decode('utf-8'))
+
+        return orjson.dumps(
+            {"Errored": traceback.format_exc()}).decode('utf-8')
+
+    return orjson.dumps({"Done": 1}).decode('utf-8')
+
+
+if __name__ == "__main__":
+    params = {}
+    params["budget"] = 10
+    params["score_time_per_model"] = 0.0211558125
+    params["train_time_per_epoch"] = 5.122203075885773
+    params["only_phase1"] = 'true'
+    params["config_file"] = './internal/ml/model_selection/config.ini'
+    print(coordinator(json.dumps(params)))
+
+    params = {}
+    params[
+        "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
+    params["n"] = 10
+    params["k"] = 1
+    params["config_file"] = './internal/ml/model_selection/config.ini'
+    print(filtering_phase_dataLoader(json.dumps(params)))
+
+    # params = {}
+    # params[
+    #     "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
+    # params["config_file"] = './internal/ml/model_selection/config.ini'
+    # print(profiling_refinement_phase(json.dumps(params)))
+    #
+    # params = {}
+    # params["budget"] = 10
+    # params[
+    #     "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
+    # params["config_file"] = './internal/ml/model_selection/config.ini'
+    # print(model_selection(json.dumps(params)))
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/requirement.txt b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/requirement.txt
new file mode 100644
index 000000000..591daefa5
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/requirement.txt
@@ -0,0 +1,54 @@
+aiofiles==23.1.0
+blessed==1.20.0
+certifi==2023.7.22
+charset-normalizer==3.2.0
+ConfigSpace==0.7.1
+contourpy==1.1.0
+cycler==0.11.0
+fonttools==4.41.0
+gpustat==1.1
+html5tagger==1.3.0
+httptools==0.6.0
+idna==3.4
+importlib-resources==6.0.0
+joblib==1.3.1
+kiwisolver==1.4.4
+matplotlib==3.7.2
+more-itertools==9.1.0
+multidict==6.0.4
+numpy==1.24.4
+nvidia-ml-py==12.535.77
+orjson==3.9.2
+packaging==23.1
+palettable==3.3.3
+pandas==2.0.3
+Pillow==10.0.0
+psutil==5.9.5
+psycopg2-binary==2.9.6
+pyparsing==3.0.9
+python-dateutil==2.8.2
+pytz==2023.3
+requests==2.31.0
+sanic==23.6.0
+sanic-routing==23.6.0
+scikit-learn==1.3.0
+scipy==1.10.1
+seaborn==0.12.2
+six==1.16.0
+sklearn==0.0
+thop @ git+https://github.com/Lyken17/pytorch-OpCounter.git@43c064afb71383501e41eaef9e8c8407265cf77f
+threadpoolctl==3.1.0
+torch==1.8.1
+torchaudio==0.8.1
+torchinfo==1.8.0
+torchvision==0.9.1
+tqdm==4.47.0
+tracerite==1.1.0
+typing_extensions==4.7.1
+tzdata==2023.3
+ujson==5.8.0
+urllib3==2.0.4
+uvloop==0.17.0
+wcwidth==0.2.6
+websockets==11.0.3
+zipp==3.16.2
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_img_w_baseline.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_img_w_baseline.sh
new file mode 100644
index 000000000..aef381053
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_img_w_baseline.sh
@@ -0,0 +1,58 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+############## c10 dataset ##############
+# run both 2phase-MS and training-free MS
+python internal/ml/model_selection/exps/macro/anytime_img.py \
+      --search_space nasbench201 \
+      --api_loc NAS-Bench-201-v1_1-096897.pth \
+      --epoch 200 \
+      --dataset cifar10 \
+      --num_labels 10 \
+      --base_dir ../exp_data/ \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## c100 dataset ##############
+python internal/ml/model_selection/exps/macro/anytime_img.py \
+      --search_space nasbench201 \
+      --api_loc NAS-Bench-201-v1_1-096897.pth \
+      --epoch 200 \
+      --dataset cifar100 \
+      --num_labels 100 \
+      --base_dir ../exp_data/ \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## imageNet dataset ##############
+python internal/ml/model_selection/exps/macro/anytime_img.py \
+      --search_space nasbench201 \
+      --api_loc NAS-Bench-201-v1_1-096897.pth \
+      --epoch 200 \
+      --dataset ImageNet16-120 \
+      --num_labels 120 \
+      --base_dir ../exp_data/ \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_tab.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_tab.sh
new file mode 100644
index 000000000..3bfb947d5
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_tab.sh
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+
+############## frappe dataset ##############
+
+# run the 2phase-MS
+python internal/ml/model_selection/exps/macro/anytime_simulate.py \
+      --search_space mlp_sp \
+      --num_layers 4 \
+      --hidden_choice_len 20 \
+      --batch_size 128 \
+      --nfeat 5500 \
+      --nfield 10 \
+        --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset frappe \
+      --num_labels 2 \
+      --only_phase1 False \
+      --is_simulate True \
+      --device cpu \
+      --log_folder any_time_frappe \
+      --result_dir ./internal/ml/model_selection/exp_result/ \
+      --num_points 5
+
+
+# run the training-free MS
+python internal/ml/model_selection/exps/macro/anytime_simulate.py \
+      --search_space mlp_sp \
+      --num_layers 4 \
+      --hidden_choice_len 20 \
+      --batch_size 128 \
+      --nfeat 5500 \
+      --nfield 10 \
+        --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset frappe \
+      --num_labels 2 \
+      --only_phase1 True \
+      --is_simulate True \
+      --device cpu \
+      --log_folder any_time_frappe \
+      --result_dir ./internal/ml/model_selection/exp_result/ \
+      --num_points 5
+
+
+############## uci dataset ##############
+
+# run the 2phase-MS
+python internal/ml/model_selection/exps/macro/anytime_simulate.py \
+      --search_space mlp_sp \
+      --num_layers 4 \
+      --hidden_choice_len 20 \
+      --batch_size 128 \
+      --nfeat 369 \
+      --nfield 43 \
+        --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset uci_diabetes \
+      --num_labels 2 \
+      --only_phase1 False \
+      --is_simulate True \
+      --device cpu \
+      --log_folder any_time_uci_diabetes \
+      --result_dir ./internal/ml/model_selection/exp_result/ \
+      --num_points 5
+
+
+# run the training-free MS
+python internal/ml/model_selection/exps/macro/anytime_simulate.py \
+      --search_space mlp_sp \
+      --num_layers 4 \
+      --hidden_choice_len 20 \
+      --batch_size 128 \
+      --nfeat 369 \
+      --nfield 43 \
+        --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset uci_diabetes \
+      --num_labels 2 \
+      --only_phase1 True \
+      --is_simulate True \
+      --device cpu \
+      --log_folder any_time_uci_diabetes \
+      --result_dir ./internal/ml/model_selection/exp_result/ \
+      --num_points 5
+
+
+############## criteo dataset ##############
+
+# run the 2phase-MS
+python internal/ml/model_selection/exps/macro/anytime_simulate.py \
+      --search_space mlp_sp \
+      --num_layers 4 \
+      --hidden_choice_len 10 \
+      --batch_size 128 \
+      --nfeat 2100000 \
+      --nfield 39 \
+        --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset criteo \
+      --num_labels 2 \
+      --only_phase1 False \
+      --is_simulate True \
+      --device cpu \
+      --log_folder any_time_criteo \
+      --result_dir ./internal/ml/model_selection/exp_result/ \
+      --num_points 5
+
+
+# run the training-free MS
+python internal/ml/model_selection/exps/macro/anytime_simulate.py \
+      --search_space mlp_sp \
+      --num_layers 4 \
+      --hidden_choice_len 10 \
+      --batch_size 128 \
+      --nfeat 2100000 \
+      --nfield 39 \
+        --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset criteo \
+      --num_labels 2 \
+      --only_phase1 True \
+      --is_simulate True \
+      --device cpu \
+      --log_folder any_time_criteo \
+      --result_dir ./internal/ml/model_selection/exp_result/ \
+      --num_points 5
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_img.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_img.sh
new file mode 100644
index 000000000..20d667917
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_img.sh
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+# run both training-based MS
+############## c10 dataset ##############
+python internal/ml/model_selection/exps/baseline/train_with_ea.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset cifar10 \
+  --num_labels 10 \
+  --base_dir ../exp_data/ \
+  --log_folder log_baseline_c10 \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## c100 dataset ##############
+python internal/ml/model_selection/exps/baseline/train_with_ea.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset cifar100 \
+  --num_labels 100 \
+  --base_dir ../exp_data/ \
+  --log_folder log_baseline_c100 \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## ImgNet dataset ##############
+python internal/ml/model_selection/exps/baseline/train_with_ea.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset ImageNet16-120 \
+  --num_labels 120 \
+  --base_dir ../exp_data/ \
+  --log_folder log_baseline_imgnet \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_tab.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_tab.sh
new file mode 100644
index 000000000..41a2d0056
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_tab.sh
@@ -0,0 +1,83 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+# run both training-based MS
+############## frappe dataset ##############
+python internal/ml/model_selection/exps/baseline/train_with_ea.py \
+  --search_space mlp_sp \
+  --num_layers 4 \
+  --hidden_choice_len 20 \
+  --epoch 19 \
+  --batch_size=512 \
+  --lr=0.001 \
+  --iter_per_epoch=200 \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --base_dir ../exp_data/ \
+  --dataset frappe \
+  --num_labels 2 \
+  --device=cpu \
+  --log_folder baseline_frappe \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## uci dataset ##############
+python internal/ml/model_selection/exps/baseline/train_with_ea.py \
+  --search_space mlp_sp \
+  --num_layers 4 \
+  --hidden_choice_len 20 \
+  --epoch 0 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --iter_per_epoch=200 \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --base_dir ../exp_data/ \
+  --dataset uci_diabetes \
+  --num_labels 2 \
+  --device=cpu \
+  --log_folder baseline_uci_diabetes \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## criteo dataset ##############
+python internal/ml/model_selection/exps/baseline/train_with_ea.py \
+  --search_space mlp_sp \
+  --num_layers 4 \
+  --hidden_choice_len 10 \
+  --epoch 9 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --iter_per_epoch=2000 \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --base_dir ../exp_data/ \
+  --dataset criteo \
+  --num_labels 2 \
+  --device=cpu \
+  --log_folder baseline_criteo \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/benchmark_weight_sharing.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/benchmark_weight_sharing.sh
new file mode 100644
index 000000000..d1c2db5b2
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/benchmark_weight_sharing.sh
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+python ./internal/ml/model_selection/exps/micro/resp/benchmark_weight_sharing.py  \
+    --log_name=baseline_train_based \
+    --search_space=mlp_sp \
+    --num_layers=4 \
+    --hidden_choice_len=20 \
+    --base_dir=/hdd1/xingnaili/exp_data/ \
+    --num_labels=2 \
+    --device=cuda:0 \
+    --batch_size=512 \
+    --lr=0.001 \
+    --epoch=20 \
+    --iter_per_epoch=200 \
+    --dataset=frappe \
+    --nfeat=5500 \
+    --nfield=10 \
+    --nemb=10 \
+    --log_folder=log_frappe \
+    --total_models_per_worker=-1 \
+    --result_dir=./internal/ml/model_selection/exp_result/
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/database/load_data_to_db.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/database/load_data_to_db.sh
new file mode 100644
index 000000000..dc7e0172f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/database/load_data_to_db.sh
@@ -0,0 +1,85 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#!/bin/bash
+
+# Check for proper number of command line args
+if [[ $# -ne 2 ]]; then
+    echo "Usage: $0 <data_path> <db_name>"
+    exit 1
+fi
+
+# Configurations
+DATA_PATH="$1"
+DB_NAME="$2"
+
+# Connection details
+HOST="localhost"
+PORT="28814"
+USERNAME="postgres"
+DBNAME="pg_extension"
+
+# Create the database
+echo "Creating database..."
+createdb -h $HOST -p $PORT -U $USERNAME $DBNAME
+
+# Define datasets to process
+datasets=("train" "valid" "test")
+
+# Loop over each dataset
+for dataset in "${datasets[@]}"; do
+    rm "${DATA_PATH}/${dataset}.csv"
+
+    # 1. Identify the number of columns
+    num_columns=$(awk 'NF > max { max = NF } END { print max }' "${DATA_PATH}/${dataset}.libsvm")
+
+    # 2. Create the table dynamically
+    create_table_cmd="CREATE TABLE ${DB_NAME}_${dataset} (id SERIAL PRIMARY KEY, label INTEGER"
+
+    for (( i=2; i<=$num_columns; i++ )); do
+        create_table_cmd+=", col$(($i-1)) TEXT"
+    done
+    create_table_cmd+=");"
+
+    echo "Creating ${dataset} table..."
+    echo $create_table_cmd | psql -h $HOST -p $PORT -U $USERNAME -d $DBNAME
+
+    # 3. Transform the libsvm format to CSV
+    echo "Transforming ${dataset} to CSV format..."
+
+    awk '{
+        for (i = 1; i <= NF; i++) {
+            printf "%s", $i;  # print each field as-is
+            if (i < NF) {
+                printf " ";  # if its not the last field, print a space
+            }
+        }
+        printf "\n";  # end of line
+    }' "${DATA_PATH}/${dataset}.libsvm" > "${DATA_PATH}/${dataset}.csv"
+
+    # 4. Import into PostgreSQL
+    columns="label"
+    for (( i=2; i<=$num_columns; i++ )); do
+        columns+=", col$(($i-1))"
+    done
+
+    echo "Loading ${dataset} into PostgreSQL..."
+    psql -h $HOST -p $PORT -U $USERNAME -d $DBNAME -c "\COPY ${DB_NAME}_${dataset}($columns) FROM '${DATA_PATH}/${dataset}.csv' DELIMITER ' '"
+done
+
+echo "Data load complete."
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache.sh
new file mode 100644
index 000000000..e9068a2bd
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache.sh
@@ -0,0 +1,138 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+#criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+########################## CPU ##############################
+# this is run on cpu, only change the device==cpu for all above
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+#criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache_concurrent.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache_concurrent.sh
new file mode 100644
index 000000000..f7eb18954
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache_concurrent.sh
@@ -0,0 +1,155 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+########################## CPU ##############################
+# this is run on cpu, only change the device==cpu for all above
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
+  --concurrency=8 \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_current_filter_cache/ \
+  --log_folder=log_score_time_frappe_cache
+
+#criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_current_filter_cache/ \
+  --log_folder=log_score_time_frappe_cache
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_current_filter_cache/ \
+  --log_folder=log_score_time_frappe_cache
+
+
+# here is concurrent run but no embedding cache
+#######################################################################################
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_current_filter_no_cache/ \
+  --log_folder=log_score_time_frappe_cache
+
+#criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_current_filter_no_cache/ \
+  --log_folder=log_score_time_frappe_cache
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_filter_exp_current_filter_no_cachecache/ \
+  --log_folder=log_score_time_frappe_cache
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh
new file mode 100644
index 000000000..65aabd3c8
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh
@@ -0,0 +1,227 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+#criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+
+# cifar 10
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=10 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=cifar10 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+
+# cifar 100
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=100 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=cifar100 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+
+# imageNet
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=120 \
+  --device=cuda:0 \
+  --batch_size=32 \
+  --dataset=ImageNet16-120 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+########################## CPU ##############################
+# this is run on cpu, only change the device==cpu for all above
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+# criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+
+# cifar 10
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=10 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=cifar10 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+
+# cifar 100
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=100 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=cifar100 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
+
+
+# imageNet
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
+  --embedding_cache_filtering=False \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=120 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=ImageNet16-120 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_in_db.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_in_db.sh
new file mode 100644
index 000000000..84406263d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_in_db.sh
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
+
+#criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase2.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase2.sh
new file mode 100644
index 000000000..84406263d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase2.sh
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+
+# frappe
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
+
+#criteo
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
+
+# uci
+python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
+  --embedding_cache_filtering=True \
+  --tfmem=express_flow \
+  --models_explore=5000 \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_budget_aware_alg.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_budget_aware_alg.sh
new file mode 100644
index 000000000..f91ae3ce1
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_budget_aware_alg.sh
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+############## c10 dataset ##############
+# run both 2phase-MS and training-free MS
+python internal/ml/model_selection/exps/micro/benchmark_budget_aware_alg.py \
+      --search_space nasbench201 \
+      --api_loc NAS-Bench-201-v1_1-096897.pth \
+      --dataset cifar10 \
+      --epoch 200 \
+      --base_dir ../exp_data/ \
+      --log_name logs_default \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## c100 dataset ##############
+python internal/ml/model_selection/exps/micro/benchmark_budget_aware_alg.py \
+      --search_space nasbench201 \
+      --api_loc NAS-Bench-201-v1_1-096897.pth \
+      --dataset cifar100 \
+      --epoch 200 \
+      --base_dir ../exp_data/ \
+      --log_name logs_default \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## imageNet dataset ##############
+python internal/ml/model_selection/exps/micro/benchmark_budget_aware_alg.py \
+      --search_space nasbench201 \
+      --api_loc NAS-Bench-201-v1_1-096897.pth \
+      --dataset ImageNet16-120 \
+      --epoch 200 \
+      --base_dir ../exp_data/ \
+      --log_name logs_default \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+
+############## draw graphs ##############
+python internal/ml/model_selection/exps/micro/draw_budget_aware_alg.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_nku_tradeoff.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_nku_tradeoff.sh
new file mode 100644
index 000000000..6aec7c195
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_nku_tradeoff.sh
@@ -0,0 +1,179 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+
+# ====================================
+# ====================================
+# determine the K and U tradeoff
+# ====================================
+# ====================================
+# frappe
+python internal/ml/model_selection/exps/micro/benchmark_ku.py \
+  --search_space mlp_sp \
+  --epoch 20 \
+  --hidden_choice_len 20 \
+  --dataset frappe \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+# uci
+python internal/ml/model_selection/exps/micro/benchmark_ku.py \
+  --search_space mlp_sp \
+  --hidden_choice_len 20 \
+  --epoch 5 \
+  --dataset uci_diabetes \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+# criteo
+python internal/ml/model_selection/exps/micro/benchmark_ku.py \
+  --search_space mlp_sp \
+  --hidden_choice_len 10 \
+  --epoch 10 \
+  --dataset criteo \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+# c10
+python internal/ml/model_selection/exps/micro/benchmark_ku.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset cifar10 \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+# c100
+python internal/ml/model_selection/exps/micro/benchmark_ku.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset cifar100 \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+# imageNet
+python internal/ml/model_selection/exps/micro/benchmark_ku.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset ImageNet16-120 \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+
+# ====================================
+# ====================================
+# determine the K and U tradeoff
+# ====================================
+# ====================================
+
+
+python internal/ml/model_selection/exps/micro/benchmark_nk.py \
+  --search_space mlp_sp \
+  --epoch 20 \
+  --hidden_choice_len 20 \
+  --dataset frappe \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+#uci
+python internal/ml/model_selection/exps/micro/benchmark_nk.py \
+  --search_space mlp_sp \
+  --hidden_choice_len 20 \
+  --epoch 5 \
+  --dataset uci_diabetes \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+# criteo
+python internal/ml/model_selection/exps/micro/benchmark_nk.py \
+  --search_space mlp_sp \
+  --hidden_choice_len 10 \
+  --epoch 10 \
+  --dataset criteo \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+
+# c10
+python internal/ml/model_selection/exps/micro/benchmark_nk.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset cifar10 \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+# c100
+python internal/ml/model_selection/exps/micro/benchmark_nk.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset cifar100 \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
+
+# imageNet
+python internal/ml/model_selection/exps/micro/benchmark_nk.py \
+  --search_space nasbench201 \
+  --api_loc NAS-Bench-201-v1_1-096897.pth \
+  --epoch 200 \
+  --dataset ImageNet16-120 \
+  --base_dir ../exp_data/ \
+  --only_phase1 True \
+  --is_simulate True \
+  --log_folder log_ku_tradeoff
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_score_metrics_relation.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_score_metrics_relation.sh
new file mode 100644
index 000000000..3e55e9e3d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_score_metrics_relation.sh
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+
+############## Frappe ##############
+# run both 2phase-MS and training-free MS
+python ./internal/ml/model_selection/exps/micro/benchmark_score_metrics.py \
+      --tfmem=express_flow \
+      --search_space mlp_sp \
+      --dataset frappe \
+      --base_dir ../exp_data/ \
+      --log_name logs_default \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## criteo dataset ##############
+python ./internal/ml/model_selection/exps/micro/benchmark_score_metrics.py \
+      --tfmem=express_flow \
+      --search_space mlp_sp \
+      --dataset criteo \
+      --base_dir ../exp_data/ \
+      --log_name logs_default \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## Uci dataset ##############
+python ./internal/ml/model_selection/exps/micro/benchmark_score_metrics.py \
+      --tfmem=express_flow \
+      --search_space=mlp_sp \
+      --dataset uci_diabetes \
+      --base_dir ../exp_data/ \
+      --log_name logs_default \
+      --result_dir ./internal/ml/model_selection/exp_result/
+
+
+############## draw graphs ##############
+python ./internal/ml/model_selection/exps/micro/draw_score_metric_relation.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_search_strategy.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_search_strategy.sh
new file mode 100644
index 000000000..4b9f37566
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_search_strategy.sh
@@ -0,0 +1,70 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+# rs
+python internal/ml/model_selection/exps/baseline/train_with_random.py \
+  --search_space mlp_sp \
+  --num_layers 4 \
+  --hidden_choice_len 20 \
+  --epoch 19 \
+  --batch_size=512 \
+  --lr=0.001 \
+  --iter_per_epoch=200 \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --base_dir ../exp_data/ \
+  --dataset frappe \
+  --num_labels 2 \
+  --device=cpu \
+  --log_folder baseline_frappe \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
+
+# rl
+python internal/ml/model_selection/exps/baseline/train_with_rl.py
+
+
+# re
+python internal/ml/model_selection/exps/baseline/train_with_ea.py \
+  --search_space mlp_sp \
+  --num_layers 4 \
+  --hidden_choice_len 20 \
+  --epoch 19 \
+  --batch_size=512 \
+  --lr=0.001 \
+  --iter_per_epoch=200 \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --base_dir ../exp_data/ \
+  --dataset frappe \
+  --num_labels 2 \
+  --device=cpu \
+  --log_folder baseline_frappe \
+  --result_dir ./internal/ml/model_selection/exp_result/
+
+# bohb
+python internal/ml/model_selection/exps/baseline/train_bohb.py
+
+############## draw the graph ##############
+python internal/ml/model_selection/exps/baseline/draw_benchmark_train_based.py  --dataset frappe
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh
new file mode 100644
index 000000000..8d71ff283
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+# pip install nats_bench
+
+python internal/ml/model_selection/exps/nas_bench_img/0_characterize_gt.py
+python internal/ml/model_selection/exps/nas_bench_img/0_parse_testacc_101.py
+python internal/ml/model_selection/exps/nas_bench_img/0_parse_testacc_201.py
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh
new file mode 100644
index 000000000..aea5ff9e7
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+# cifar10 + nb101
+python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
+  --search_space=nasbench101 \
+  --api_loc=nasbench_only108.pkl \
+  --base_dir=../exp_data/ \
+  --dataset=cifar10 \
+  --num_labels=10 \
+  --device=cpu \
+  --log_folder=log_img_explore_ea \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+
+# cifar10 + nb201
+python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=../exp_data/ \
+  --dataset=cifar10 \
+  --init_channels=16 \
+  --num_stacks=3 \
+  --num_modules_per_stack=3 \
+  --num_labels=10 \
+  --device=cpu \
+  --log_folder=log_img_explore_ea \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+
+# cifar100 + nb201
+python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=../exp_data/ \
+  --dataset=cifar100 \
+  --init_channels=16 \
+  --num_stacks=3 \
+  --num_modules_per_stack=3 \
+  --num_labels=100 \
+  --device=cpu \
+  --log_folder=log_img_explore_ea \
+  --result_dir=./internal/ml/model_selection/exp_result/
+
+
+# imgnet + nb201
+python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
+  --search_space=nasbench201 \
+  --api_loc=NAS-Bench-201-v1_1-096897.pth \
+  --base_dir=../exp_data/ \
+  --dataset=ImageNet16-120 \
+  --init_channels=16 \
+  --num_stacks=3 \
+  --num_modules_per_stack=3 \
+  --num_labels=120 \
+  --device=cpu \
+  --log_folder=log_img_explore_ea \
+  --result_dir=./internal/ml/model_selection/exp_result/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh
new file mode 100644
index 000000000..1e7e9bf3b
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh
@@ -0,0 +1,75 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+
+for i in {1..4}
+do
+    # cifar10 + nb101
+#    /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+#      --models_explore=1200 \
+#      --search_space=nasbench101 \
+#      --api_loc=nasbench_only108.pkl \
+#      --base_dir=/hdd1/xingnaili/exp_data/ \
+#      --dataset=cifar10 \
+#      --batch_size=32 \
+#      --num_labels=10 \
+#      --device=cuda:0 \
+#      --log_folder=log_score_all_img10_101 \
+#      --result_dir=./internal/ml/model_selection/exp_result/
+
+    # cifar10 + nb201
+    /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+      --models_explore=1200 \
+      --search_space=nasbench201 \
+      --api_loc=NAS-Bench-201-v1_1-096897.pth \
+      --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset=cifar10 \
+      --batch_size=32 \
+      --num_labels=10 \
+      --device=cpu \
+      --log_folder=log_score_all_img10 \
+      --result_dir=./internal/ml/model_selection/exp_result/
+
+    # cifar100 + nb201
+    /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+      --models_explore=1200 \
+      --search_space=nasbench201 \
+      --api_loc=NAS-Bench-201-v1_1-096897.pth \
+      --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset=cifar100 \
+      --batch_size=32 \
+      --num_labels=100 \
+      --device=cpu \
+      --log_folder=log_score_all_img100 \
+      --result_dir=./internal/ml/model_selection/exp_result/
+
+    # imgnet + nb201
+    /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+      --models_explore=1200 \
+      --search_space=nasbench201 \
+      --api_loc=NAS-Bench-201-v1_1-096897.pth \
+      --base_dir=/hdd1/xingnaili/exp_data/ \
+      --dataset=ImageNet16-120 \
+      --batch_size=32 \
+      --num_labels=120 \
+      --device=cpu \
+      --log_folder=log_score_all_img_imgnet \
+      --result_dir=./internal/ml/model_selection/exp_result/
+done
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh
new file mode 100644
index 000000000..004ecb1a6
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+
+nohup  python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+  --embedding_cache_filtering=True \
+  --models_explore=9999 \
+  --tfmem=express_flow \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_score_time_criteo  > outputCriScorAll.log&
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh
new file mode 100644
index 000000000..81d4ff12a
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+nohup  python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+  --embedding_cache_filtering=True \
+  --models_explore=159999 \
+  --tfmem=express_flow \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=frappe \
+  --nfeat=5500 \
+  --nfield=10 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_score_time_frappe  > output.log&
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh
new file mode 100644
index 000000000..99dfe0e4d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+nohup  python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+  --embedding_cache_filtering=True \
+  --models_explore=159999 \
+  --tfmem=express_flow \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cpu \
+  --batch_size=32 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_score_time_uci  > outputUciScoreALl.log&
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh
new file mode 100644
index 000000000..3d11671c8
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh
@@ -0,0 +1,63 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+worker_id=0
+GPU_NUM=9
+worker_each_gpu=6
+total_workers=$((worker_each_gpu*GPU_NUM))
+
+for((gpu_id=0; gpu_id < GPU_NUM; ++gpu_id)); do
+#  echo "GPU id is $gpu_id"
+  for((i=0; i < worker_each_gpu; ++i)); do
+    echo "Assign task to worker id is $worker_id"
+    echo "nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py  \
+    --log_name=baseline_train_based \
+    --search_space=mlp_sp \
+    --num_layers=4 \
+    --hidden_choice_len=10 \
+    --base_dir=../exp_data/ \
+    --num_labels=2 \
+    --device=cuda:$gpu_id \
+    --batch_size=1024 \
+    --lr=0.001 \
+    --epoch=10 \
+    --iter_per_epoch=2000 \
+    --dataset=criteo \
+    --nfeat=2100000 \
+    --nfield=39 \
+    --nemb=10 \
+    --worker_id=$worker_id \
+    --total_workers=$total_workers \
+    --workers=0 \
+    --log_folder=log_train_criteo \
+    --total_models_per_worker=-1 \
+    --result_dir=./internal/ml/model_selection/exp_result/ \
+    --pre_partitioned_file=./internal/ml/model_selection/exps/nas_bench_tabular/sampled_models_10000_models.json & ">> train_all_models_criteo_seq.sh
+
+#    sleep 1
+    worker_id=$((worker_id+1))
+  done
+done
+
+
+# pkill -9 -f 2.seq_train_online.py
+# run with bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh >criteobash &
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo_distirbuted.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo_distirbuted.sh
new file mode 100644
index 000000000..39e770124
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo_distirbuted.sh
@@ -0,0 +1,64 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# frappe
+python exps/main_v2/ground_truth/2.seq_train_dist_online.py  \
+    --log_name=baseline_train_based \
+    --search_space=mlp_sp \
+    --num_layers=4 \
+    --hidden_choice_len=20 \
+    --base_dir=../exp_data/ \
+    --num_labels=1 \
+    --device=gpu \
+    --batch_size=1024 \
+    --lr=0.001 \
+    --epoch=10 \
+    --iter_per_epoch=100 \
+    --dataset=frappe \
+    --nfeat=5500 \
+    --nfield=10 \
+    --nemb=10 \
+    --total_models_per_worker=10 \
+    --workers=0 \
+    --worker_each_gpu=1 \
+    --gpu_num=8 \
+    --log_folder=LogFrappee \
+    --pre_partitioned_file=./exps/main_v2/ground_truth/sampled_models_10000_models.json &
+
+# criteo
+python exps/main_v2/ground_truth/2.seq_train_dist_online.py  \
+    --log_name=baseline_train_based \
+    --search_space=mlp_sp \
+    --num_layers=4 \
+    --hidden_choice_len=10 \
+    --base_dir=../exp_data/ \
+    --num_labels=1 \
+    --device=gpu \
+    --batch_size=1024 \
+    --lr=0.001 \
+    --epoch=10 \
+    --iter_per_epoch=2000 \
+    --dataset=criteo \
+    --nfeat=2100000 \
+    --nfield=39 \
+    --nemb=10 \
+    --workers=0 \
+    --worker_each_gpu=9 \
+    --gpu_num=8 \
+    --log_folder=LogCriteo \
+    --pre_partitioned_file=./exps/main_v2/ground_truth/sampled_models_10000_models.json &
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh
new file mode 100644
index 000000000..397836405
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh
@@ -0,0 +1,63 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+worker_id=0
+GPU_NUM=8
+worker_each_gpu=4
+total_workers=$((worker_each_gpu*GPU_NUM))
+
+for((gpu_id=0; gpu_id < GPU_NUM; ++gpu_id)); do
+  for((i=0; i < worker_each_gpu; ++i)); do
+
+    echo "nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py  \
+    --log_name=baseline_train_based \
+    --search_space=mlp_sp \
+    --num_layers=4 \
+    --hidden_choice_len=20 \
+    --base_dir=../exp_data/ \
+    --num_labels=2 \
+    --device=cuda:$gpu_id \
+    --batch_size=1024 \
+    --lr=0.001 \
+    --epoch=40 \
+    --iter_per_epoch=200 \
+    --dataset=uci_diabetes \
+    --nfeat=369 \
+    --nfield=43 \
+    --nemb=10 \
+    --worker_id=$worker_id \
+    --total_workers=$total_workers \
+    --workers=0 \
+    --log_folder=log_train_uci  \
+    --total_models_per_worker=-1 \
+    --result_dir=./internal/ml/model_selection/exp_result/ \
+    --pre_partitioned_file=./internal/ml/model_selection/exps/nas_bench_tabular/uci_left_8k_models.json > outputuci.log& ">> train_all_models_diabetes_seq.sh
+
+    worker_id=$((worker_id+1))
+  done
+done
+
+
+# pkill -9 -f ./internal/ml/model_selection/exps/nas_bench_tabular//2.seq_train_online.py
+# pkill -9 -f /home/naili/miniconda3/envs/firmest_torch11/bin/python
+
+# run with bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh >ucibash &
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
new file mode 100644
index 000000000..8d4af9eac
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+worker_id=0
+GPU_NUM=8
+worker_each_gpu=16
+total_workers=$((worker_each_gpu*GPU_NUM))
+
+for((gpu_id=0; gpu_id < GPU_NUM; ++gpu_id)); do
+#  echo "GPU id is $gpu_id"
+  for((i=0; i < worker_each_gpu; ++i)); do
+    echo "nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py  \
+    --log_name=baseline_train_based \
+    --search_space=mlp_sp \
+    --num_layers=4 \
+    --hidden_choice_len=20 \
+    --base_dir=/home/shaofeng/naili/firmest_data/ \
+    --num_labels=2 \
+    --device=cuda:$gpu_id \
+    --batch_size=512 \
+    --lr=0.001 \
+    --epoch=20 \
+    --iter_per_epoch=200 \
+    --dataset=frappe \
+    --nfeat=5500 \
+    --nfield=10 \
+    --nemb=10 \
+    --worker_id=$worker_id \
+    --total_workers=$total_workers  \
+    --workers=0 \
+    --log_folder=log_frappe \
+    --total_models_per_worker=-1 \
+    --result_dir=./internal/ml/model_selection/exp_result/ \
+    --pre_partitioned_file=./internal/ml/model_selection/exps/nas_bench_tabular/sampled_models_all.json & ">> train_all_models_frappe_seq.sh
+
+    sleep 1
+    worker_id=$((worker_id+1))
+  done
+done
+
+
+# pkill -9 -f internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_one_model_dev.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_one_model_dev.sh
new file mode 100644
index 000000000..86e36c2f5
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_one_model_dev.sh
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+    --log_name=baseline_train_based \
+    --search_space=mlp_sp \
+    --num_layers=4 \
+    --hidden_choice_len=20 \
+    --base_dir=/hdd1/xingnaili/exp_data/ \
+    --num_labels=2 \
+    --device=cuda:0 \
+    --batch_size=512 \
+    --lr=0.001 \
+    --epoch=20 \
+    --iter_per_epoch=200 \
+    --dataset=frappe \
+    --nfeat=5500 \
+    --nfield=10 \
+    --nemb=10 \
+    --worker_id=0 \
+    --total_workers=1 \
+    --workers=1 \
+    --result_dir=./internal/ml/model_selection/exp_result/ \
+    --log_folder=log_frappe
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_criteo.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_criteo.sh
new file mode 100644
index 000000000..a3ea08790
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_criteo.sh
@@ -0,0 +1,162 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+# default setting.
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=5 \
+  --iter_per_epoch=2000 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_criteo_train_tune >criteo_5.log &
+
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=10 \
+  --iter_per_epoch=2000 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_criteo_train_tune >criteo_10.log &
+
+
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:1 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=20 \
+  --iter_per_epoch=2000 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_criteo_train_tune >criteo_20.log &
+
+
+
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:2 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=40 \
+  --iter_per_epoch=2000 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_criteo_train_tune >criteo_40.log &
+
+
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:3 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=60 \
+  --iter_per_epoch=2000 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_criteo_train_tune >criteo_60.log &
+
+
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:4 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=80 \
+  --iter_per_epoch=2000 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_criteo_train_tune >criteo_80.log &
+
+
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:5 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=100 \
+  --iter_per_epoch=2000 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_criteo_train_tune >criteo_100.log &
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_diabetes.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_diabetes.sh
new file mode 100644
index 000000000..697816e24
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_diabetes.sh
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:0 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=3 \
+  --iter_per_epoch=200 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_uci_train_tune >uci_3.log &
+
+
+
+nohup  python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:1 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=5 \
+  --iter_per_epoch=200 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_uci_train_tune >uci_5.log &
+
+
+# default setting.
+nohup  python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py  \
+  --log_name=baseline_train_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=20 \
+  --base_dir=../exp_data/ \
+  --num_labels=2 \
+  --device=cuda:2 \
+  --batch_size=1024 \
+  --lr=0.001 \
+  --epoch=7 \
+  --iter_per_epoch=200 \
+  --dataset=uci_diabetes \
+  --nfeat=369 \
+  --nfield=43 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_uci_train_tune >uci_7.log &
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/pre_processing/pre_processing_data.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/pre_processing/pre_processing_data.sh
new file mode 100644
index 000000000..a54caf042
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/pre_processing/pre_processing_data.sh
@@ -0,0 +1,63 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
+conda activate trails
+
+
+
+
+python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
+  --models_explore=1000 \
+  --log_name=score_based \
+  --search_space=mlp_sp \
+  --num_layers=4 \
+  --hidden_choice_len=10 \
+  --base_dir=/hdd1/xingnaili/exp_data/ \
+  --num_labels=2 \
+  --device=cuda:6 \
+  --batch_size=32 \
+  --dataset=criteo \
+  --nfeat=2100000 \
+  --nfield=39 \
+  --nemb=10 \
+  --workers=0 \
+  --result_dir=./internal/ml/model_selection/exp_result/ \
+  --log_folder=log_score_time_criteo  > outputCriScorAll.log&
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/shared_config.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/shared_config.py
new file mode 100644
index 000000000..f40ac15d6
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/shared_config.py
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import calendar
+import os
+import time
+import argparse
+import configparser
+
+
+def parse_config_arguments(config_path: str):
+    parser = configparser.ConfigParser()
+    parser.read(config_path)
+
+    args = argparse.Namespace()
+
+    # job config under DEFAULT
+    args.log_name = parser.get('DEFAULT', 'log_name')
+    args.budget = parser.getint('DEFAULT', 'budget')
+    args.device = parser.get('DEFAULT', 'device')
+    args.log_folder = parser.get('DEFAULT', 'log_folder')
+    args.result_dir = parser.get('DEFAULT', 'result_dir')
+    args.num_points = parser.getint('DEFAULT', 'num_points')
+    args.max_load = parser.getint('DEFAULT', 'max_load')
+
+    # sampler args
+    args.search_space = parser.get('SAMPLER', 'search_space')
+    args.population_size = parser.getint('SAMPLER', 'population_size')
+    args.sample_size = parser.getint('SAMPLER', 'sample_size')
+    args.simple_score_sum = parser.getboolean('SAMPLER', 'simple_score_sum')
+
+    # nb101 args
+    args.api_loc = parser.get('NB101', 'api_loc')
+    args.init_channels = parser.getint('NB101', 'init_channels')
+    args.bn = parser.getint('NB101', 'bn')
+    args.num_stacks = parser.getint('NB101', 'num_stacks')
+    args.num_modules_per_stack = parser.getint('NB101', 'num_modules_per_stack')
+
+    # nb201 args
+    args.init_w_type = parser.get('NB201', 'init_w_type')
+    args.init_b_type = parser.get('NB201', 'init_b_type')
+    args.arch_size = parser.getint('NB201', 'arch_size')
+
+    # mlp args
+    args.num_layers = parser.getint('MLP', 'num_layers')
+    args.hidden_choice_len = parser.getint('MLP', 'hidden_choice_len')
+
+    # mlp_trainer args
+    args.epoch = parser.getint('MLP_TRAINER', 'epoch')
+    args.batch_size = parser.getint('MLP_TRAINER', 'batch_size')
+    args.lr = parser.getfloat('MLP_TRAINER', 'lr')
+    args.patience = parser.getint('MLP_TRAINER', 'patience')
+    args.iter_per_epoch = parser.getint('MLP_TRAINER', 'iter_per_epoch')
+    args.nfeat = parser.getint('MLP_TRAINER', 'nfeat')
+    args.nfield = parser.getint('MLP_TRAINER', 'nfield')
+    args.nemb = parser.getint('MLP_TRAINER', 'nemb')
+    args.report_freq = parser.getint('MLP_TRAINER', 'report_freq')
+    args.workers = parser.getint('MLP_TRAINER', 'workers')
+
+    # dataset args
+    args.base_dir = parser.get('DATASET', 'base_dir')
+    args.dataset = parser.get('DATASET', 'dataset')
+    args.num_labels = parser.getint('DATASET', 'num_labels')
+
+    # seq_train args
+    args.worker_id = parser.getint('SEQ_TRAIN', 'worker_id')
+    args.total_workers = parser.getint('SEQ_TRAIN', 'total_workers')
+    args.total_models_per_worker = parser.getint('SEQ_TRAIN', 'total_models_per_worker')
+    args.pre_partitioned_file = parser.get('SEQ_TRAIN', 'pre_partitioned_file')
+
+    # dis_train args
+    args.worker_each_gpu = parser.getint('DIS_TRAIN', 'worker_each_gpu')
+    args.gpu_num = parser.getint('DIS_TRAIN', 'gpu_num')
+
+    # tune_interval args
+    args.kn_rate = parser.getint('TUNE_INTERVAL', 'kn_rate')
+
+    # anytime args
+    args.only_phase1 = parser.getboolean('ANYTIME', 'only_phase1')
+    args.is_simulate = parser.getboolean('ANYTIME', 'is_simulate')
+
+    # system performance exps
+    args.models_explore = parser.getint('SYS_PERFORMANCE', 'models_explore')
+    args.tfmem = parser.get('SYS_PERFORMANCE', 'tfmem')
+    args.embedding_cache_filtering = parser.getboolean('SYS_PERFORMANCE', 'embedding_cache_filtering')
+    args.concurrency = parser.getint('SYS_PERFORMANCE', 'concurrency')
+
+    args.refinement_url = parser.get('SERVER', 'refinement_url')
+    args.cache_svc_url = parser.get('SERVER', 'cache_svc_url')
+
+    # db config
+    args.db_name = parser.get('DB_CONFIG', 'db_name')
+    args.db_user = parser.get('DB_CONFIG', 'db_user')
+    args.db_host = parser.get('DB_CONFIG', 'db_host')
+    args.db_port = parser.get('DB_CONFIG', 'db_port')
+
+    return args
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/constant.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/constant.py
new file mode 100644
index 000000000..36227ec38
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/constant.py
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+class CommonVars:
+
+    # SAMPLER
+    TEST_SAMPLER = "sequence"
+    RANDOM_SAMPLER = "random"
+    RL_SAMPLER = "rl"
+    EA_SAMPLER = "ea"
+    BOHB_SAMPLER = "bohb"
+
+    # EVALUATOR
+    ExpressFlow = "express_flow"
+
+    GRAD_NORM = "grad_norm"
+    GRAD_PLAIN = "grad_plain"
+
+    JACOB_CONV = "jacob_conv"
+    NAS_WOT = "nas_wot"
+
+    NTK_CONDNUM = "ntk_cond_num"
+    NTK_TRACE = "ntk_trace"
+    NTK_TRACE_APPROX = "ntk_trace_approx"
+
+    PRUNE_FISHER = "fisher"
+    PRUNE_GRASP = "grasp"
+    PRUNE_SNIP = "snip"
+    PRUNE_SYNFLOW = "synflow"
+
+    WEIGHT_NORM = "weight_norm"
+
+    ALL_EVALUATOR = "all_matrix"
+
+    # SEARCH SPACE
+    NASBENCH101 = "nas-bench-101"
+    NASBENCH201 = "nas-bench-201"
+
+    # correlation coefficient metrics
+    KendallTau = "KendallTau"
+    Spearman = "Spearman"
+    Pearson = "Pearson"
+    AvgCorrelation = "average_correlation"
+    AllCorrelation = "all_correlation"
+
+
+class Config:
+
+    MLPSP = "mlp_sp"
+    NB101 = "nasbench101"
+    NB201 = "nasbench201"
+    DARTS = "darts"
+    NDS = "NDS"
+
+    # vision dataset
+    c10_valid = "cifar10-valid"
+    c10 = "cifar10"
+    c100 = "cifar100"
+    imgNet = "ImageNet16-120"
+
+    # struct dataset
+    Frappe = "frappe"
+    Criteo = "criteo"
+    UCIDataset = "uci_diabetes"
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/structure.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/structure.py
new file mode 100644
index 000000000..521f45f1e
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/structure.py
@@ -0,0 +1,107 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+
+
+class ModelEvaData:
+    """
+    Eva worker send score to search strategy
+    """
+
+    def __init__(self, model_id: str = None, model_score: dict = None):
+        if model_score is None:
+            model_score = {}
+        self.model_id = model_id
+        self.model_score = model_score
+
+    def serialize_model(self) -> str:
+        data = {"model_id": self.model_id,
+                "model_score": self.model_score}
+        return json.dumps(data)
+
+    @classmethod
+    def deserialize(cls, data_str: str):
+        data = json.loads(data_str)
+        res = cls(
+            data["model_id"],
+            data["model_score"])
+        return res
+
+
+class ModelAcquireData:
+    """
+    Eva worker get model from search strategy
+    The serialize/deserialize is for good scalability. The project can be decouple into multiple service
+    """
+
+    def __init__(self, model_id: str, model_encoding: str, is_last: bool = False,
+                 spi_seconds=None, spi_mini_batch=None):
+        self.is_last = is_last
+        self.model_id = model_id
+        self.model_encoding = model_encoding
+
+        # this is when using spi
+        self.spi_seconds = spi_seconds
+        self.spi_mini_batch = spi_mini_batch
+
+    def serialize_model(self) -> str:
+        data = {"is_last": self.is_last,
+                "model_id": self.model_id,
+                "model_encoding": self.model_encoding,
+                "spi_seconds": self.spi_seconds,
+                "spi_mini_batch": self.spi_mini_batch}
+
+        return json.dumps(data)
+
+    @classmethod
+    def deserialize(cls, data_str: str):
+        data = json.loads(data_str)
+        res = cls(
+            data["model_id"],
+            data["model_encoding"],
+            data["is_last"],
+            data["spi_mini_batch"],
+            data["spi_seconds"])
+        return res
+
+
+class ClientStruct:
+    """
+    Client get data
+    """
+
+    def __init__(self, budget: float, dataset: str):
+        self.budget = budget
+        self.dataset = dataset
+
+    @classmethod
+    def deserialize(cls, data_str: str):
+        data = json.loads(data_str)
+        res = cls(
+            data["budget"],
+            data["dataset"]
+        )
+        return res
+
+
+if __name__ == "__main__":
+    data = ModelEvaData("1", {"a": 1, "b": 2})
+    data_str = data.serialize_model()
+    res = ModelEvaData.deserialize(data_str)
+    print(res)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/__init__.py
new file mode 100644
index 000000000..222757523
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/__init__.py
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.common.constant import CommonVars
+from src.controller.sampler_ea.regularized_ea import RegularizedEASampler
+from src.controller.sampler_all.seq_sampler import SequenceSampler
+from src.controller.sampler_rl.reinforcement_learning import RLSampler
+from src.controller.sampler_rand.random_sample import RandomSampler
+from src.controller.sampler_all.seq_sampler import SequenceSampler
+
+sampler_register = {
+    CommonVars.TEST_SAMPLER: SequenceSampler,
+    # CommonVars.RANDOM_SAMPLER: RandomSampler,
+    CommonVars.RANDOM_SAMPLER: SequenceSampler,
+    CommonVars.RL_SAMPLER: RLSampler,
+    CommonVars.EA_SAMPLER: RegularizedEASampler,
+}
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/controler.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/controler.py
new file mode 100644
index 000000000..2770a72ee
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/controler.py
@@ -0,0 +1,191 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+from src.controller.core.sample import Sampler
+from src.third_pkg.models import CellStructure
+
+
+class ModelScore:
+    def __init__(self, model_id, score):
+        self.model_id = model_id
+        self.score = score
+
+    def __repr__(self):
+        return "m_{}_s_{}".format(self.model_id, self.score)
+
+
+# for binary insert
+def binary_insert_get_rank(rank_list: list, new_item: ModelScore) -> int:
+    """
+    Insert the new_item to rank_list, then get the rank of it.
+    :param rank_list:
+    :param new_item:
+    :return:
+    """
+    index = search_position(rank_list, new_item)
+    # search the position to insert into
+    rank_list.insert(index, new_item)
+    return index
+
+
+# O(logN) search the position to insert into
+def search_position(rank_list_m: list, new_item: ModelScore):
+    if len(rank_list_m) == 0:
+        return 0
+    left = 0
+    right = len(rank_list_m) - 1
+    while left + 1 < right:
+        mid = int((left + right) / 2)
+        if rank_list_m[mid].score <= new_item.score:
+            left = mid
+        else:
+            right = mid
+
+    # consider the time.
+    if rank_list_m[right].score <= new_item.score:
+        return right + 1
+    elif rank_list_m[left].score <= new_item.score:
+        return left + 1
+    else:
+        return left
+
+
+class SampleController(object):
+    """
+    Controller control the sample-score flow in the 1st phase.
+    It records the results in the history.
+    """
+
+    def __init__(self, search_strategy: Sampler):
+        # Current ea is better than others.
+        self.search_strategy = search_strategy
+
+        # the large the index, the better the model
+        self.ranked_models = []
+
+        # when simple_score_sum=False, records the model's score of each algorithm,
+        # use when simple_score_sum=True, record the model's sum score
+        self.history = {}
+
+    def sample_next_arch(self) -> (str, CellStructure):
+        """
+        Return a generator
+        :return:
+        """
+        return self.search_strategy.sample_next_arch(self.ranked_models)
+
+    def fit_sampler(self, arch_id: str, alg_score: dict, simple_score_sum: bool = False) -> float:
+        """
+        :param arch_id:
+        :param alg_score: {alg_name1: score1, alg_name2: score2}
+        :param simple_score_sum: if simply sum multiple scores (good performing),
+                             or sum over their rank (worse performing)
+        :return:
+        """
+        if simple_score_sum or len(alg_score.keys()) == 1:
+            score = self._use_pure_score_as_final_res(arch_id, alg_score)
+        else:
+            score = self._use_vote_rank_as_final_res(arch_id, alg_score)
+        self.search_strategy.fit_sampler(score)
+        return score
+
+    def _use_vote_rank_as_final_res(self, model_id: str, alg_score: dict):
+        """
+        :param model_id:
+        :param alg_score: {alg_name1: score1, alg_name2: score2}
+        """
+        # todo: bug: only all scores' under all arg is greater than previous one, then treat it as greater.
+        for alg in alg_score:
+            if alg not in self.history:
+                self.history[alg] = []
+
+        # add model and score to local list
+        for alg, score in alg_score.items():
+            binary_insert_get_rank(self.history[alg], ModelScore(model_id, score))
+
+        new_rank_score = self._re_rank_model_id(model_id, alg_score)
+        return new_rank_score
+
+    def _use_pure_score_as_final_res(self, model_id: str, alg_score: dict):
+        # get the key and sum the score of various alg
+        score_sum_key = "_".join(list(alg_score.keys()))
+        if score_sum_key not in self.history:
+            self.history[score_sum_key] = []
+        final_score = 0
+        for alg in alg_score:
+            final_score += float(alg_score[alg])
+        # insert and get rank
+        index = binary_insert_get_rank(self.history[score_sum_key], ModelScore(model_id, final_score))
+        self.ranked_models.insert(index, model_id)
+        return final_score
+
+    def _re_rank_model_id(self, model_id: str, alg_score: dict):
+        # todo: re-rank everything, to make it self.ranked_models more accurate.
+        model_new_rank_score = {}
+        current_explored_models = 0
+        for alg, score in alg_score.items():
+            for rank_index in range(len(self.history[alg])):
+                current_explored_models = len(self.history[alg])
+                ms_ins = self.history[alg][rank_index]
+                # rank = index + 1, since index can be 0
+                if ms_ins.model_id in model_new_rank_score:
+                    model_new_rank_score[ms_ins.model_id] += rank_index + 1
+                else:
+                    model_new_rank_score[ms_ins.model_id] = rank_index + 1
+
+        for ele in model_new_rank_score.keys():
+            model_new_rank_score[ele] = model_new_rank_score[ele] / current_explored_models
+
+        self.ranked_models = [k for k, v in sorted(model_new_rank_score.items(), key=lambda item: item[1])]
+        new_rank_score = model_new_rank_score[model_id]
+        return new_rank_score
+
+    def get_current_top_k_models(self, k=-1):
+        """
+        The model is already scored by: low -> high
+        :param k:
+        :return:
+        """
+        if k == -1:
+            # retur all models
+            return self.ranked_models
+        else:
+            return self.ranked_models[-k:]
+
+
+if __name__ == "__main__":
+
+    rank_list = []
+    begin = time.time()
+    score_list = [1, 2, 3, 1, 2]
+    for i in range(5):
+        ms = ModelScore(i, score_list[i])
+        binary_insert_get_rank(rank_list, ms)
+    print(rank_list)
+    print(time.time() - begin)
+
+    rank_list = []
+    begin = time.time()
+    score_list = [1, 1, 1, 1, 1]
+    for i in range(5):
+        ms = ModelScore(i, score_list[i])
+        binary_insert_get_rank(rank_list, ms)
+    print(rank_list)
+    print(time.time() - begin)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/__init__.py
new file mode 100644
index 000000000..4e04c2b3b
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/metrics.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/metrics.py
new file mode 100644
index 000000000..77eeea32d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/metrics.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from enum import Enum, auto
+
+
+class Metric(Enum):
+    RAW = auto()
+    ALL = auto()
+
+    TRAIN_ACCURACY = auto()
+    VAL_ACCURACY = auto()
+    TEST_ACCURACY = auto()
+
+    TRAIN_LOSS = auto()
+    VAL_LOSS = auto()
+    TEST_LOSS = auto()
+
+    TRAIN_TIME = auto()
+    VAL_TIME = auto()
+    TEST_TIME = auto()
+
+    FLOPS = auto()
+    LATENCY = auto()
+    PARAMETERS = auto()
+    EPOCH = auto()
+    HP = auto()
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/sample.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/sample.py
new file mode 100644
index 000000000..b48066925
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/sample.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from abc import abstractmethod
+
+from src.search_space.core.model_params import ModelMicroCfg
+from src.search_space.core.space import SpaceWrapper
+
+
+class Sampler:
+
+    def __init__(self, space: SpaceWrapper):
+        self.space = space
+
+    @abstractmethod
+    def sample_next_arch(self, sorted_model: list) -> (str, ModelMicroCfg):
+        """
+        Sample next architecture,
+        :param sorted_model: the scoted model,
+        :return:
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def fit_sampler(self, score: float):
+        """
+        Fit the sampler with architecture's score.
+        :param score:
+        :return:
+        """
+        raise NotImplementedError
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_EA/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_EA/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_EA/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_RL/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_RL/__init__.py
new file mode 100644
index 000000000..01d705720
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_RL/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py
new file mode 100644
index 000000000..4eaf04ff3
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import random
+
+from src.controller.core.sample import Sampler
+from src.search_space.core.model_params import ModelMicroCfg
+from src.search_space.core.space import SpaceWrapper
+
+
+class SequenceSampler(Sampler):
+
+    def __init__(self, space: SpaceWrapper):
+        super().__init__(space)
+
+        self.arch_gene = self.space.sample_all_models()
+
+    def sample_next_arch(self, sorted_model: list = None) -> (str, ModelMicroCfg):
+        """
+        Sample one random architecture, can sample max 10k architectures.
+        :return: arch_id, architecture
+        """
+
+        try:
+            arch_id, arch_micro = self.arch_gene.__next__()
+            return arch_id, arch_micro
+        except Exception as e:
+            if "StopIteration" in str(e):
+                print("the end")
+                return None, None
+            else:
+                raise e
+
+    def fit_sampler(self, score: float):
+        pass
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py
new file mode 100644
index 000000000..62126bef6
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py
@@ -0,0 +1,148 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import collections
+from src.search_space.core.model_params import ModelMicroCfg
+from src.controller.core.sample import Sampler
+import random
+from src.search_space.core.space import SpaceWrapper
+
+
+class Model(object):
+    def __init__(self):
+        self.arch = None
+        self.score = None
+
+    def __str__(self):
+        """Prints a readable version of this bitstring."""
+        return "{:}".format(self.arch)
+
+
+class RegularizedEASampler(Sampler):
+
+    def __init__(self, space: SpaceWrapper, population_size: int, sample_size: int):
+        super().__init__(space)
+
+        self.population_size = population_size
+        # list of object,
+        self.population = collections.deque()
+        # list of str, for duplicate checking
+        self.population_model_ids = collections.deque()
+
+        self.space = space
+        self.sample_size = sample_size
+        self.current_sampled = 0
+
+        # id here is to match the outside value.
+        self.current_arch_id = None
+        self.current_arch_micro = None
+
+        # use the visited to reduce the collapse
+        self.visited = {}
+        self.max_mutate_time = 2
+        self.max_mutate_sampler_time = 2
+
+    def sample_next_arch(self, sorted_model_ids: list) -> (str, ModelMicroCfg):
+        """
+        This function performs one evolution cycle. It produces a model and removes another.
+        Models are sampled randomly from the current population. If the population size is less than the
+        desired population size, a random architecture is added to the population.
+
+        :param sorted_model_ids: List of model ids sorted based on some criterion (not used here directly).
+        :return: Tuple of the architecture id and the architecture configuration (micro).
+        """
+        # Case 1: If population hasn't reached desired size, add random architectures
+        if len(self.population) < self.population_size:
+            while True:
+                arch_id, arch_micro = self.space.random_architecture_id()
+                # Ensure that EA population has no repeated value
+                if str(arch_id) not in self.population_model_ids:
+                    break
+            self.current_arch_micro = arch_micro
+            self.current_arch_id = arch_id
+            return arch_id, arch_micro
+
+        # Case 2: If population has reached desired size, evolve population
+        else:
+            cur_mutate_sampler_time = 0
+            is_found_new = False
+
+            # Keep attempting mutations for a maximum of 'max_mutate_sampler_time' times
+            while cur_mutate_sampler_time < self.max_mutate_sampler_time:
+                cur_mutate_time = 0
+
+                # Randomly select a sample of models from the population
+                sample = []
+                sample_ids = []
+                while len(sample) < self.sample_size:
+                    candidate = random.choice(list(self.population))
+                    candidate_id = self.population_model_ids[self.population.index(candidate)]
+                    sample.append(candidate)
+                    sample_ids.append(candidate_id)
+
+                # Select the best parent from the sample (based on the order in sorted_model_ids)
+                parent_id = max(sample_ids, key=lambda _id: sorted_model_ids.index(str(_id)))
+                parent = sample[sample_ids.index(parent_id)]
+
+                # Try to mutate the parent up to 'max_mutate_time' times
+                while cur_mutate_time < self.max_mutate_time:
+                    arch_id, arch_micro = self.space.mutate_architecture(parent.arch)
+
+                    # If the mutated architecture hasn't been visited or we've visited all possible architectures, stop
+                    if arch_id not in self.visited or len(self.space) == len(self.visited):
+                        self.visited[arch_id] = True
+                        is_found_new = True
+                        break
+                    cur_mutate_time += 1
+
+                # If we've found a new architecture, stop sampling
+                if is_found_new:
+                    break
+
+                cur_mutate_sampler_time += 1
+
+            # If we've hit the maximum number of mutation attempts, do nothing
+            if cur_mutate_time * cur_mutate_sampler_time == self.max_mutate_time * self.max_mutate_sampler_time:
+                pass
+
+            # Update current architecture details
+            self.current_arch_micro = arch_micro
+            self.current_arch_id = arch_id
+
+            return arch_id, arch_micro
+
+    def fit_sampler(self, score: float):
+        # if it's in Initialize stage, add to the population with random models.
+        if len(self.population) < self.population_size:
+            model = Model()
+            model.arch = self.current_arch_micro
+            model.score = score
+            self.population.append(model)
+            self.population_model_ids.append(self.current_arch_id)
+
+        # if it's in mutation stage
+        else:
+            child = Model()
+            child.arch = self.current_arch_micro
+            child.score = score
+
+            self.population.append(child)
+            self.population_model_ids.append(self.current_arch_id)
+            # Remove the oldest model.
+            self.population.popleft()
+            self.population_model_ids.popleft()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py
new file mode 100644
index 000000000..8c3125446
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.controller.core.sample import Sampler
+from src.search_space.core.space import SpaceWrapper
+from src.search_space.core.model_params import ModelMicroCfg
+
+
+class RandomSampler(Sampler):
+
+    def __init__(self, space: SpaceWrapper):
+        super().__init__(space)
+        self.visited = []
+
+    def sample_next_arch(self, sorted_model: list = None) -> (str, ModelMicroCfg):
+        while True:
+            arch_id, model_micro = self.space.random_architecture_id()
+
+            if arch_id not in self.visited:
+                self.visited.append(arch_id)
+                return str(arch_id), model_micro
+
+    def fit_sampler(self, score: float):
+        # random sampler can skip this.
+        pass
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py
new file mode 100644
index 000000000..a65eed36e
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.controller.core.sample import Sampler
+from src.search_space.core.space import SpaceWrapper
+from src.search_space.core.model_params import ModelMicroCfg
+from src.third_pkg.models import CellStructure
+
+
+class ExponentialMovingAverage(object):
+    """Class that maintains an exponential moving average."""
+
+    def __init__(self, momentum):
+        self._numerator = 0
+        self._denominator = 0
+        self._momentum = momentum
+
+    def update(self, value):
+        self._numerator = (
+            self._momentum * self._denominator + (1 - self._momentum) * value
+        )
+        self._denominator = self._momentum * self._denominator + (1 - self._momentum)
+
+    def value(self):
+        """Return the current value of the moving average"""
+        return self._numerator / self._denominator
+
+
+class RLSampler(Sampler):
+
+    def __init__(self, space: SpaceWrapper, args):
+
+        super().__init__(space)
+
+        self.policy = self.space.get_reinforcement_learning_policy(args.rl_learning_rate)
+        # update policy's parameters
+        self.baseline = ExponentialMovingAverage(args.rl_EMA_momentum)
+        self.log_prob = 0
+
+    def sample_next_arch(self, max_nodes: int) -> (str, ModelMicroCfg):
+        while True:
+            self.log_prob, action = self.policy.select_action()
+            arch_struct = self.policy.generate_arch(action)
+            arch_id = self.space.arch_to_id(arch_struct)
+            yield arch_id, arch_struct
+
+    def fit_sampler(self, score: float):
+        reward = score
+        self.baseline.update(reward)
+        self.policy.update_policy(reward, self.baseline.value(), self.log_prob)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/download_critero_and_avazu.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/download_critero_and_avazu.py
new file mode 100644
index 000000000..19989db39
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/download_critero_and_avazu.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import zipfile
+import urllib.request
+from tqdm import tqdm
+
+
+class DownloadProgressBar(tqdm):
+    def update_to(self, b=1, bsize=1, tsize=None):
+        if tsize is not None:
+            self.total = tsize
+        self.update(b * bsize - self.n)
+
+
+def download(url, output_path):
+    with DownloadProgressBar(unit='B', unit_scale=True,
+                             miniters=1, desc=url.split('/')[-1]) as t:
+        urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
+
+
+if __name__ == "__main__":
+    # if not os.path.exists('../data/avazu/'):
+    #     os.mkdir('../data/avazu/')
+    # print("Begin to download avazu data, the total size is 683MB...")
+    # download('https://worksheets.codalab.org/rest/bundles/0xf5ab597052744680b1a55986557472c7/contents/blob/', '../data/avazu/avazu.zip')
+    # print("Unzipping avazu dataset...")
+    # with zipfile.ZipFile('../data/avazu/avazu.zip', 'r') as zip_ref:
+    #     zip_ref.extractall('../data/avazu/')
+    # print("Done.")
+
+    if not os.path.exists('../exp_data/data/structure_data/criteo/'):
+        os.mkdir('../exp_data/data/structure_data/criteo/')
+    print("Begin to download criteo data, the total size is 3GB...")
+
+    output_path = '../exp_data/data/structure_data/criteo/criteo.zip'
+    if not os.path.exists(output_path):
+        download('https://worksheets.codalab.org/rest/bundles/0x8dca5e7bac42470aa445f9a205d177c6/contents/blob/',
+                 output_path)
+    print("Unzipping criteo dataset...")
+    with zipfile.ZipFile('../exp_data/data/structure_data/criteo/criteo.zip', 'r') as zip_ref:
+        zip_ref.extractall('../exp_data/data/structure_data/criteo/')
+    print("Done.")
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py
new file mode 100644
index 000000000..9a6587e9e
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import queue
+import threading
+import requests
+import time
+from src.logger import logger
+
+
+class SquenceDataLoader:
+    """
+    This will preoritically query data from cache-service
+    """
+
+    def __init__(self, cache_svc_url, table_name, name_space):
+        self.last_fetch_time = 0
+        self.table_name = table_name
+        # train, valid, test
+        self.name_space = name_space
+        self.end_signal = "end_position"
+        self.cache_svc_url = cache_svc_url
+        self.data_queue = queue.Queue(maxsize=10)
+        self.stop_event = threading.Event()
+        self.thread = threading.Thread(target=self.fetch_data, daemon=True)
+        self.thread.start()
+
+    def fetch_data(self):
+        while not self.stop_event.is_set():
+            response = requests.get(
+                f'{self.cache_svc_url}/',
+                params={
+                    'table_name': self.table_name,
+                    'name_space': self.name_space})
+
+            if response.status_code == 200:
+                batch = response.json()
+
+                # in trianing, we use iteraiton-per-epoch to control the end
+                if batch == self.end_signal:
+                    if self.name_space == "valid":
+                        # end_signal in inference, stop !
+                        logger.info("[StreamingDataLoader]: last iteration in valid is meet!")
+                        self.data_queue.put({self.end_signal: True})
+                    else:
+                        # end_signal in trianing, then keep training
+                        continue
+                else:
+                    import torch
+                    # convert to tensor again
+                    id_tensor = torch.LongTensor(batch['id'])
+                    value_tensor = torch.FloatTensor(batch['value'])
+                    y_tensor = torch.FloatTensor(batch['y'])
+                    data_tensor = {'id': id_tensor, 'value': value_tensor, 'y': y_tensor}
+                    self.data_queue.put(data_tensor)
+            else:
+                print(response.json())
+                time.sleep(5)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        print("compute time = ", time.time() - self.last_fetch_time)
+        self.last_fetch_time = time.time()
+        if self.data_queue.empty() and not self.thread.is_alive():
+            raise StopIteration
+        else:
+            data = self.data_queue.get(block=True)
+            if self.end_signal in data:
+                raise StopIteration
+            else:
+                return data
+
+    def __len__(self):
+        return self.data_queue.qsize()
+
+    def stop(self):
+        self.stop_event.set()
+        self.thread.join()
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py
new file mode 100644
index 000000000..f39499f33
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import queue
+import threading
+import requests
+import time
+from src.logger import logger
+
+
+class StreamingDataLoader:
+    """
+    This will preoritically query data from cache-service
+    """
+
+    def __init__(self, cache_svc_url, table_name, name_space):
+        self.last_fetch_time = 0
+        self.table_name = table_name
+        # train, valid, test
+        self.name_space = name_space
+        self.end_signal = "end_position"
+        self.cache_svc_url = cache_svc_url
+        self.data_queue = queue.Queue(maxsize=10)
+        self.stop_event = threading.Event()
+        self.thread = threading.Thread(target=self.fetch_data, daemon=True)
+        self.thread.start()
+
+    def fetch_data(self):
+        while not self.stop_event.is_set():
+            response = requests.get(
+                f'{self.cache_svc_url}/',
+                params={
+                    'table_name': self.table_name,
+                    'name_space': self.name_space})
+
+            if response.status_code == 200:
+                batch = response.json()
+
+                # in trianing, we use iteraiton-per-epoch to control the end
+                if batch == self.end_signal:
+                    if self.name_space == "valid":
+                        # end_signal in inference, stop !
+                        logger.info("[StreamingDataLoader]: last iteration in valid is meet!")
+                        self.data_queue.put({self.end_signal: True})
+                    else:
+                        # end_signal in trianing, then keep training
+                        continue
+                else:
+                    # convert to tensor again
+                    import torch
+                    id_tensor = torch.LongTensor(batch['id'])
+                    value_tensor = torch.FloatTensor(batch['value'])
+                    y_tensor = torch.FloatTensor(batch['y'])
+                    data_tensor = {'id': id_tensor, 'value': value_tensor, 'y': y_tensor}
+                    self.data_queue.put(data_tensor)
+            else:
+                print(response.json())
+                time.sleep(5)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        print("compute time = ", time.time() - self.last_fetch_time)
+        self.last_fetch_time = time.time()
+        if self.data_queue.empty() and not self.thread.is_alive():
+            raise StopIteration
+        else:
+            data = self.data_queue.get(block=True)
+            if self.end_signal in data:
+                raise StopIteration
+            else:
+                return data
+
+    def __len__(self):
+        return self.data_queue.qsize()
+
+    def stop(self):
+        self.stop_event.set()
+        self.thread.join()
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/__init__.py
new file mode 100644
index 000000000..e5ddb1e19
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/__init__.py
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.common.constant import *
+from src.eva_engine.phase1.algo.prune_synflow import SynFlowEvaluator
+
+# evaluator mapper to register many existing evaluation algorithms
+evaluator_register = {
+
+    # prune based
+    CommonVars.PRUNE_SYNFLOW: SynFlowEvaluator(),
+
+}
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/coordinator.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/coordinator.py
new file mode 100644
index 000000000..8142a5cd6
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/coordinator.py
@@ -0,0 +1,116 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.common.constant import Config
+from src.eva_engine.phase2.run_sh import BudgetAwareControllerSH
+from src.logger import logger
+from src.search_space.core.space import SpaceWrapper
+
+eta = 3
+
+
+def min_budget_calculation(search_space_ins: SpaceWrapper, dataset: str,
+                           N_K_ratio: int, sh: BudgetAwareControllerSH, t1_: float):
+    # Calculate the minimum budget requirements for both phases
+    K_max = int(len(search_space_ins) / N_K_ratio)
+
+    if search_space_ins.name == Config.NB101:
+        U_options = [4, 12, 16, 108]
+    elif search_space_ins.name == Config.NB201:
+        U_options = list(range(1, 200))
+    elif search_space_ins.name == Config.MLPSP:
+        # TODO: This is for benchmark only
+        if dataset == Config.Frappe:
+            MaxEpochTrained = 20
+        elif dataset == Config.Criteo:
+            MaxEpochTrained = 10
+        elif dataset == Config.UCIDataset:
+            MaxEpochTrained = 40
+        else:
+            raise NotImplementedError
+        U_options = list(range(1, MaxEpochTrained))
+    else:
+        raise NotImplementedError
+
+    U_min = U_options[0]
+    min_budget_required_both_phase = sh.pre_calculate_time_required(K=1, U=U_min)[1] + N_K_ratio * t1_
+
+    return K_max, U_options, U_min, min_budget_required_both_phase
+
+
+def schedule(dataset: str, sh: BudgetAwareControllerSH, T_: float, t1_: float, t2_: float, w_: int,
+             search_space_ins: SpaceWrapper, N_K_ratio: int,
+             only_phase1: bool = False):
+    """
+    :param dataset
+    :param sh: BudgetAwareControllerSH instnace
+    :param T_: user given time budget
+    :param t1_: time to score one model
+    :param t2_: time to train one model
+    :param w_: number of workers, for parallelly running.
+    :param search_space_ins: search spcae instance
+    :param N_K_ratio: N/K = N_K_ratio
+    :param only_phase1: Only use filtering phase.
+    """
+    if T_ < 1:
+        raise ValueError('Total time budget must be greater than 1 second')
+
+    K_max, U_options, U_min, min_budget_required_both_phase = min_budget_calculation(
+        search_space_ins, dataset, N_K_ratio, sh, t1_)
+
+    # collection of (best_K, best_U, best_N)
+    history = []
+
+    # Calculate phase 1
+    time_used = t1_
+    enable_phase2_at_least = sh.pre_calculate_time_required(K=2, U=U_min)[1] + 2 * N_K_ratio * t1_
+
+    if only_phase1 or enable_phase2_at_least > T_:
+        # all time give to phase1, explore n models
+        N_only = min(int(T_ / t1_), len(search_space_ins))
+        history.extend([(1, U_min, i) for i in range(1, N_only + 1) if i * t1_ <= T_])
+        if not history:
+            raise ValueError(
+                f' [trails] Only p1, Budget {T_} is too small, it\'s at least >= {time_used} with current worker, '
+                f'{t1_}, {t2_}, eta')
+
+    # Calculate phase 2, start from min U, if user given budget is larger enough, then evaluat each mode with more epoch
+    else:
+        # record all possible K, U pair meeting the SLO ( time used < T)
+        for K_ in range(2, min(int(T_ / t1_), K_max) + 1):
+            N_ = K_ * N_K_ratio
+            for U in U_options:
+                time_used = sh.pre_calculate_time_required(K=K_, U=U)[1] + N_ * t1_
+                if time_used > T_:
+                    break
+                else:
+                    history.append((K_, U, N_))
+        if not history:
+            raise ValueError(
+                f' [trails] Budget {T_} is too small, it\'s at least >= {min_budget_required_both_phase}'
+                f' with current worker, {t1_}, {t2_}, eta')
+
+    best_K, best_U, best_N = history[-1]
+    N_scored = best_N
+    B1_time_used = N_scored * t1_
+    B2_all_epoch, B2_time_used = sh.pre_calculate_time_required(K=best_K, U=best_U)
+
+    logger.info(
+        f' [trails] The schedule result: when T = {T_} second, N = {N_scored}, K = {best_K}, best_U = {best_U}, '
+        f'time_used = {B1_time_used + B2_time_used}')
+    return best_K, best_U, N_scored, B1_time_used, B2_time_used, B2_all_epoch
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py
new file mode 100644
index 000000000..01d705720
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py
new file mode 100644
index 000000000..1c671febd
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py
@@ -0,0 +1,425 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.eva_engine.phase1.algo.alg_base import Evaluator
+from src.common.constant import Config
+
+from singa import singa_wrap as singa
+from singa import device as singa_device
+from singa import tensor
+from singa import opt
+from singa import autograd
+from singa.opt import Optimizer
+from singa.opt import DecayScheduler
+from singa.opt import Constant
+import numpy as np
+import time
+import argparse
+from PIL import Image
+from numpy import linalg as LA
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+# singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+singa_dtype = {"float32": tensor.float32}
+
+### MSOptimizer
+class MSOptimizer(Optimizer):
+    def __call__(self, loss):
+        pn_p_g_list = self.call_with_returns(loss)
+        # print ("optimizer1 before self.step()")
+        # print ("optimizer1 before print len(pn_p_g_list): \n", len(pn_p_g_list))
+        self.step()
+        # print ("optimizer1 after print len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("optimizer1 after self.step()")
+        return pn_p_g_list
+
+    def call_with_returns(self, loss):
+        # print ("call_with_returns before apply loss.data: \n", loss.data)
+        pn_p_g_list = []
+        for p, g in autograd.backward(loss):
+            if p.name is None:
+                p.name = id(p)
+            self.apply(p.name, p, g)
+            # print ("call with returns")
+            # print ("p.name: \n", p.name)
+            # print ("p.data: \n", p.data)
+            # print ("g.data: \n", g.data)
+            pn_p_g_list.append([p.name, p, g])  # need iterables
+        # print ("call_with_returns after apply loss.data: \n", loss.data)
+        return pn_p_g_list
+
+# MSSGD -- actually no change of code
+class MSSGD(MSOptimizer):
+    """Implements stochastic gradient descent (optionally with momentum).
+
+    Nesterov momentum is based on the formula from `On the importance of initialization and momentum in deep learning`__.
+
+    Args:
+        lr(float): learning rate
+        momentum(float, optional): momentum factor(default: 0)
+        weight_decay(float, optional): weight decay(L2 penalty)(default: 0)
+        dampening(float, optional): dampening for momentum(default: 0)
+        nesterov(bool, optional): enables Nesterov momentum(default: False)
+
+    Typical usage example:
+        >> > from singa import opt
+        >> > optimizer = opt.SGD(lr=0.1, momentum=0.9)
+        >> > optimizer.update()
+
+    __ http: // www.cs.toronto.edu / %7Ehinton / absps / momentum.pdf
+
+    .. note::
+        The implementation of SGD with Momentum / Nesterov subtly differs from
+        Sutskever et. al. and implementations in some other frameworks.
+
+        Considering the specific case of Momentum, the update can be written as
+
+        .. math::
+                  v = \rho * v + g \\
+                  p = p - lr * v
+
+        where p, g, v and: math: `\rho` denote the parameters, gradient,
+        velocity, and momentum respectively.
+
+        This is in contrast to Sutskever et. al. and
+        other frameworks which employ an update of the form
+
+        .. math::
+             v = \rho * v + lr * g \\
+             p = p - v
+
+        The Nesterov version is analogously modified.
+    """
+
+    def __init__(self,
+                 lr=0.1,
+                 momentum=0,
+                 dampening=0,
+                 weight_decay=0,
+                 nesterov=False,
+                 dtype=tensor.float32):
+        super(MSSGD, self).__init__(lr)
+
+        # init momentum
+        if type(momentum) == float or type(momentum) == int:
+            if momentum < 0.0:
+                raise ValueError("Invalid momentum value: {}".format(momentum))
+            self.momentum = Constant(momentum)
+        elif isinstance(momentum, DecayScheduler):
+            self.momentum = momentum
+            momentum = momentum.init_value
+        else:
+            raise TypeError("Wrong momentum type")
+        # self.dtype = dtype
+        # self.mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        self.mom_value = self.momentum(self.step_counter)
+
+        # init dampening
+        if type(dampening) == float or type(dampening) == int:
+            self.dampening = Constant(dampening)
+        elif isinstance(dampening, DecayScheduler):
+            self.dampening = dampening
+            dampening = dampening.init_value
+        else:
+            raise TypeError("Wrong dampening type")
+        # self.dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        self.dam_value = self.dampening(self.step_counter)
+
+        # init weight_decay
+        if type(weight_decay) == float or type(weight_decay) == int:
+            if weight_decay < 0.0:
+                raise ValueError(
+                    "Invalid weight_decay value: {}".format(weight_decay))
+            self.weight_decay = Constant(weight_decay)
+        elif isinstance(weight_decay, DecayScheduler):
+            self.weight_decay = weight_decay
+        else:
+            raise TypeError("Wrong weight_decay type")
+        # self.decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        self.decay_value = self.weight_decay(self.step_counter)
+
+        # init other params
+        self.nesterov = nesterov
+        self.moments = dict()
+
+        # check value
+        if nesterov and (momentum <= 0 or dampening != 0):
+            raise ValueError(
+                "Nesterov momentum requires a momentum and zero dampening")
+
+    def apply(self, param_name, param_value, param_grad):
+        """Performs a single optimization step.
+
+        Args:
+                param_name(String): the name of the param
+                param_value(Tensor): param values to be update in-place
+                grad(Tensor): param gradients; the values may be updated
+                        in this function; cannot use it anymore
+        """
+        assert param_value.shape == param_grad.shape, ("shape mismatch",
+                                                       param_value.shape,
+                                                       param_grad.shape)
+        self.device_check(param_value, self.step_counter, self.lr_value,
+                          self.mom_value, self.dam_value, self.decay_value)
+
+        # derive dtype from input
+        # assert param_value.dtype == self.dtype
+
+        # TODO add branch operator
+        # if self.decay_value != 0:
+        if self.weight_decay.init_value != 0:
+            singa.Axpy(self.decay_value.data, param_value.data, param_grad.data)
+
+        if self.momentum.init_value != 0:
+            if param_name not in self.moments:
+                flag = param_value.device.graph_enabled()
+                param_value.device.EnableGraph(False)
+                self.moments[param_name] = tensor.zeros_like(param_value)
+                param_value.device.EnableGraph(flag)
+
+            buf = self.moments[param_name]
+            buf *= self.mom_value
+            alpha = 1.0 - self.dam_value
+            singa.Axpy(alpha.data, param_grad.data, buf.data)
+
+            if self.nesterov:
+                singa.Axpy(self.mom_value.data, buf.data, param_grad.data)
+            else:
+                param_grad = buf
+
+        minus_lr = 0.0 - self.lr_value
+        singa.Axpy(minus_lr.data, param_grad.data, param_value.data)
+
+    def step(self):
+        # increment step counter, lr and moment
+        # print ("before super step")
+        super().step()
+        # print ("after super step")
+        # print ("before custiomized step")
+        # mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        # dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        # decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        mom_value = self.momentum(self.step_counter)
+        dam_value = self.dampening(self.step_counter)
+        decay_value = self.weight_decay(self.step_counter)
+        self.mom_value.copy_from(mom_value)
+        self.dam_value.copy_from(dam_value)
+        self.decay_value.copy_from(decay_value)
+        # print ("after customized step")
+
+    def get_states(self):
+        states = super().get_states()
+        if self.mom_value > 0:
+            states[
+                'moments'] = self.moments  # a dict for 1st order moments tensors
+        return states
+
+    def set_states(self, states):
+        super().set_states(states)
+        if 'moments' in states:
+            self.moments = states['moments']
+            self.mom_value = self.momentum(self.step_counter)
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                                    offset[0]:offset[0] + x.shape[2],
+                                    offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                                     dtype=np.float32)
+    return X
+
+import torch
+class SynFlowEvaluator(Evaluator):
+
+    def __init__(self):
+        super().__init__()
+
+    def evaluate(self, arch, device, batch_data: object, batch_labels: torch.Tensor, space_name: str) -> float:
+        """
+        This is implementation of paper
+        "Pruning neural networks without any data by iteratively conserving synaptic flow"
+        The score takes 5 steps:
+            1. For each layer, for each parameter, calculate the absolute value |0|
+            2. Use a single all-one-vector with dim = [1, c, h, w] to run a forward,
+               Since only consider linear and Con2d operation, the forward output is multiple( [ |0l| for l in L] )
+            3. New loss function R = sum(output), and then run backward
+            4. for each layer, calculate Sl = Hadamard product( df/dw, w), where Sij=aij×bij
+            5. score = sum( [ Sl for l in layers ] )
+        Comments:
+            1. this is data-Agnostic
+            2. only compute on a single example
+        """
+        
+        ### singa configs
+        mssgd = MSSGD(lr=0.005, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype['float32'])
+        device_id = 0
+        max_epoch = 1
+        model = arch
+        graph = True
+        verbosity = 0
+        dist_option='plain'
+        spars=None
+        precision = 'float32'
+        global_rank = 0
+        world_size = 1
+
+        ### singa setups
+        # print ("device: \n", device)
+        if device == 'cpu':
+            dev = singa_device.get_default_device()
+        else:  # GPU
+            dev = singa_device.create_cuda_gpu_on(local_rank)  # need to change to CPU device for CPU-only machines
+        dev.SetRandSeed(0)
+        np.random.seed(0)
+
+        # For distributed training, sequential has better performance
+        if hasattr(mssgd, "communicator"):
+            DIST = True
+            sequential = True
+        else:
+            DIST = False
+            sequential = False
+
+        model.train()
+
+        ### process batch_data
+        x = batch_data.cpu().numpy() # Size([1, 100]) and all ones
+        x = x.astype(np_dtype[precision])
+        y = np.ones(x.shape[0], dtype=np.int32)
+        if model.dimension == 2:  # input data dimension
+            tx = tensor.Tensor(x.shape, dev, singa_dtype[precision])
+        ty = tensor.Tensor((x.shape[0],), dev, tensor.int32)
+
+        model.set_optimizer(mssgd)
+        model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+        dev.SetVerbosity(verbosity)
+
+
+        # 1. Convert params to their abs.
+        synflow_flag = True ### just change the model to the absolute value
+        tx.copy_from_numpy(x)  # dtype=np.float32
+        ty.copy_from_numpy(y)
+        # print ("before model forward ...")
+        pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+        # print ("---------------------------------------")
+        # print ("before absolute prune_synflow !!!nemb input vector!!! tensor.to_numpy(loss)[0]: ", tensor.to_numpy(loss)[0])
+        # print ("before absolute prune_synflow !!!nemb input vector!!! tensor.to_numpy(loss): ", tensor.to_numpy(loss)) 
+        # train_correct += accuracy(tensor.to_numpy(out), y)
+        # train_loss += tensor.to_numpy(loss)[0]
+        # all params turned to positive
+        for pn_p_g_item in pn_p_g_list:
+            # print ("absolute value parameter name: \n", pn_p_g_item[0])
+            param_np = tensor.to_numpy(pn_p_g_item[1])
+            # print ("param_np shape: \n", param_np.shape)
+            # print ("param_np sqrt norm: \n", np.sqrt(LA.norm(param_np)/param_np.size))
+            # print ("before abs np.min(tensor.to_numpy(pn_p_g_item[1])): \n", np.min(tensor.to_numpy(pn_p_g_item[1])))
+            pn_p_g_item[1] = tensor.abs(pn_p_g_item[1])  # tensor actually ..
+            # print ("after abs np.min(tensor.to_numpy(pn_p_g_item[1])): \n", np.min(tensor.to_numpy(pn_p_g_item[1])))
+            # print ("after abs pn_p_g_item[1][0]: \n", pn_p_g_item[1][0])
+
+        # 2. Compute gradients with input of one dummy example ( 1-vector with dimension [1, c, h, w] )
+        # 3.R = sum(output)
+        # 4. Select the gradients that we want to use for search/prune
+        # 5. Sum over all parameter's results to get the final score.
+        # score = sum([grad.sum() for grad in grads_abs])
+
+        # print ("calculate synflow")
+        synflow_flag = True
+        ### step 1: all one input
+        # Copy the patch data into input tensors
+        # tx.copy_from_numpy(np.ones(x.shape, dtype=np.float32))
+        tx.copy_from_numpy(x)  # dtype=np.float32 # actually it is all ones ... --> np.ones(x.shape, dtype=np.float32)
+        ty.copy_from_numpy(y)
+        ### step 2: all weights turned to positive (done)
+        ### step 3: new loss (done)
+        # print ("before model forward ...")
+        pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+        # print ("prune_synflow !!!nemb input vector!!! synflow step tensor.to_numpy(loss)[0]: ", tensor.to_numpy(loss)[0])
+        ### step 4: calculate the multiplication of weights
+        score = 0.0
+        for pn_p_g_item in pn_p_g_list:
+            # print ("calculate weight param * grad parameter name: \n", pn_p_g_item[0])
+            if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
+                # print ("pn_p_g_item[1].shape: \n", pn_p_g_item[1].shape)
+                # print ("tensor.to_numpy(pn_p_g_item[1][0]): ", tensor.to_numpy(pn_p_g_item[1][0]))
+                # print ("calculate synflow parameter name: \n", pn_p_g_item[0])
+                # print ("should be positive np.min(tensor.to_numpy(pn_p_g_item[1])): ", np.min(tensor.to_numpy(pn_p_g_item[1])))
+                # print ("weight should be positive tensor.to_numpy(pn_p_g_item[1][0])[0, :10]: ", tensor.to_numpy(pn_p_g_item[1][0])[0, :10])
+                # print ("gradients tensor.to_numpy(pn_p_g_item[2][0])[0, :10]: ", tensor.to_numpy(pn_p_g_item[2][0])[0, :10])
+                # print ()
+                score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1]) * tensor.to_numpy(pn_p_g_item[2])))
+        # print ("layer_hidden_list: \n", layer_hidden_list)
+        # print ("prune_synflow !!!one-hot input vector!!! absolute step tensor.to_numpy(loss)[0]: ", tensor.to_numpy(loss)[0])
+        print ("score: \n", score)
+
+        return score
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md
new file mode 100644
index 000000000..b7c96e884
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md
@@ -0,0 +1,18 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with < this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/README.md
new file mode 100644
index 000000000..b081affa7
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/README.md
@@ -0,0 +1,46 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Image Classification using Convolutional Neural Networks
+
+Examples inside this folder show how to train CNN models using 
+SINGA for image classification.
+
+* `data` includes the scripts for preprocessing image datasets.
+  Currently, MNIST, CIFAR10 and CIFAR100 are included.
+
+* `model` includes the CNN model construction codes by creating
+  a subclass of `Module` to wrap the neural network operations 
+  of each model. Then computational graph is enabled to optimized 
+  the memory and efficiency.
+
+* `autograd` includes the codes to train CNN models by calling the
+  [neural network operations](../../python/singa/autograd.py) imperatively. 
+  The computational graph is not created.
+
+* `train_cnn.py` is the training script, which controls the training flow by
+  doing BackPropagation and SGD update.
+
+* `train_multiprocess.py` is the script for distributed training on a single
+  node with multiple GPUs; it uses Python's multiprocessing module and NCCL.
+
+* `train_mpi.py` is the script for distributed training (among multiple nodes) 
+  using MPI and NCCL for communication.
+
+* `benchmark.py` tests the training throughput using `ResNet50` as the workload.
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/cifar10_multiprocess.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/cifar10_multiprocess.py
new file mode 100644
index 000000000..4b3cb0f43
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/cifar10_multiprocess.py
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from resnet_cifar10 import *
+import multiprocessing
+import sys
+
+if __name__ == '__main__':
+
+    # Generate a NCCL ID to be used for collective communication
+    nccl_id = singa.NcclIdHolder()
+
+    # Configure the number of GPUs to be used
+    world_size = int(sys.argv[1])
+
+    # Testing the experimental partial-parameter update asynchronous training
+    partial_update = True
+
+    process = []
+    for local_rank in range(0, world_size):
+        process.append(
+            multiprocessing.Process(target=train_cifar10,
+                                    args=(True, local_rank, world_size, nccl_id,
+                                          partial_update)))
+
+    for p in process:
+        p.start()
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_cnn.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_cnn.py
new file mode 100644
index 000000000..16752ceab
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_cnn.py
@@ -0,0 +1,304 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import singa_wrap as singa
+from singa import autograd
+from singa import layer
+from singa import tensor
+from singa import device
+from singa import opt
+import numpy as np
+import os
+import sys
+import gzip
+import codecs
+import time
+
+
+class CNN:
+
+    def __init__(self):
+        self.conv1 = layer.Conv2d(1, 20, 5, padding=0)
+        self.conv2 = layer.Conv2d(20, 50, 5, padding=0)
+        self.linear1 = layer.Linear(4 * 4 * 50, 500)
+        self.linear2 = layer.Linear(500, 10)
+        self.pooling1 = layer.MaxPool2d(2, 2, padding=0)
+        self.pooling2 = layer.MaxPool2d(2, 2, padding=0)
+        self.relu1 = layer.ReLU()
+        self.relu2 = layer.ReLU()
+        self.relu3 = layer.ReLU()
+        self.flatten = layer.Flatten()
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.relu1(y)
+        y = self.pooling1(y)
+        y = self.conv2(y)
+        y = self.relu2(y)
+        y = self.pooling2(y)
+        y = self.flatten(y)
+        y = self.linear1(y)
+        y = self.relu3(y)
+        y = self.linear2(y)
+        return y
+
+
+def check_dataset_exist(dirpath):
+    if not os.path.exists(dirpath):
+        print(
+            'The MNIST dataset does not exist. Please download the mnist dataset using download_mnist.py (e.g. python3 download_mnist.py)'
+        )
+        sys.exit(0)
+    return dirpath
+
+
+def load_dataset():
+    train_x_path = '/tmp/train-images-idx3-ubyte.gz'
+    train_y_path = '/tmp/train-labels-idx1-ubyte.gz'
+    valid_x_path = '/tmp/t10k-images-idx3-ubyte.gz'
+    valid_y_path = '/tmp/t10k-labels-idx1-ubyte.gz'
+
+    train_x = read_image_file(check_dataset_exist(train_x_path)).astype(
+        np.float32)
+    train_y = read_label_file(check_dataset_exist(train_y_path)).astype(
+        np.float32)
+    valid_x = read_image_file(check_dataset_exist(valid_x_path)).astype(
+        np.float32)
+    valid_y = read_label_file(check_dataset_exist(valid_y_path)).astype(
+        np.float32)
+    return train_x, train_y, valid_x, valid_y
+
+
+def read_label_file(path):
+    with gzip.open(path, 'rb') as f:
+        data = f.read()
+        assert get_int(data[:4]) == 2049
+        length = get_int(data[4:8])
+        parsed = np.frombuffer(data, dtype=np.uint8, offset=8).reshape((length))
+        return parsed
+
+
+def get_int(b):
+    return int(codecs.encode(b, 'hex'), 16)
+
+
+def read_image_file(path):
+    with gzip.open(path, 'rb') as f:
+        data = f.read()
+        assert get_int(data[:4]) == 2051
+        length = get_int(data[4:8])
+        num_rows = get_int(data[8:12])
+        num_cols = get_int(data[12:16])
+        parsed = np.frombuffer(data, dtype=np.uint8, offset=16).reshape(
+            (length, 1, num_rows, num_cols))
+        return parsed
+
+
+def to_categorical(y, num_classes):
+    y = np.array(y, dtype="int")
+    n = y.shape[0]
+    categorical = np.zeros((n, num_classes))
+    categorical[np.arange(n), y] = 1
+    categorical = categorical.astype(np.float32)
+    return categorical
+
+
+def accuracy(pred, target):
+    y = np.argmax(pred, axis=1)
+    t = np.argmax(target, axis=1)
+    a = y == t
+    return np.array(a, "int").sum()
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+# Function to sychronize SINGA TENSOR initial model parameters
+def synchronize(tensor, dist_opt):
+    dist_opt.all_reduce(tensor.data)
+    dist_opt.wait()
+    tensor /= dist_opt.world_size
+
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :, offset[0]:offset[0] + 28,
+                                    offset[1]:offset[1] + 28]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Data partition
+def data_partition(dataset_x, dataset_y, global_rank, world_size):
+    data_per_rank = dataset_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    return dataset_x[idx_start:idx_end], dataset_y[idx_start:idx_end]
+
+
+def train_mnist_cnn(DIST=False,
+                    local_rank=None,
+                    world_size=None,
+                    nccl_id=None,
+                    spars=0,
+                    topK=False,
+                    corr=True):
+
+    # Define the hypermeters for the mnist_cnn
+    max_epoch = 10
+    batch_size = 64
+    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)
+
+    # Prepare training and valadiation data
+    train_x, train_y, test_x, test_y = load_dataset()
+    IMG_SIZE = 28
+    num_classes = 10
+    train_y = to_categorical(train_y, num_classes)
+    test_y = to_categorical(test_y, num_classes)
+
+    # Normalization
+    train_x = train_x / 255
+    test_x = test_x / 255
+
+    if DIST:
+        # For distributed GPU training
+        sgd = opt.DistOpt(sgd,
+                          nccl_id=nccl_id,
+                          local_rank=local_rank,
+                          world_size=world_size)
+        dev = device.create_cuda_gpu_on(sgd.local_rank)
+
+        # Dataset partition for distributed training
+        train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
+                                          sgd.world_size)
+        test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
+                                        sgd.world_size)
+        world_size = sgd.world_size
+    else:
+        # For single GPU
+        dev = device.create_cuda_gpu()
+        world_size = 1
+
+    # Create model
+    model = CNN()
+
+    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
+    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_test_batch = test_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    if DIST:
+        #Sychronize the initial parameters
+        autograd.training = True
+        x = np.random.randn(batch_size, 1, IMG_SIZE,
+                            IMG_SIZE).astype(np.float32)
+        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
+        tx.copy_from_numpy(x)
+        ty.copy_from_numpy(y)
+        out = model.forward(tx)
+        loss = autograd.softmax_cross_entropy(out, ty)
+        for p, g in autograd.backward(loss):
+            synchronize(p, sgd)
+
+    # Training and evaulation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        autograd.training = True
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        for b in range(num_train_batch):
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            x = augmentation(x, batch_size)
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out = model.forward(tx)
+            loss = autograd.softmax_cross_entropy(out, ty)
+            train_correct += accuracy(tensor.to_numpy(out), y)
+            train_loss += tensor.to_numpy(loss)[0]
+            if DIST:
+                if (spars == 0):
+                    sgd.backward_and_update(loss, threshold=50000)
+                else:
+                    sgd.backward_and_sparse_update(loss,
+                                                   spars=spars,
+                                                   topK=topK,
+                                                   corr=corr)
+            else:
+                sgd(loss)
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, sgd, reducer)
+            train_loss = reduce_variable(train_loss, sgd, reducer)
+
+        # Output the training loss and accuracy
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        autograd.training = False
+        for b in range(num_test_batch):
+            x = test_x[b * batch_size:(b + 1) * batch_size]
+            y = test_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model.forward(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, sgd, reducer)
+
+        # Output the evaluation accuracy
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_test_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+
+if __name__ == '__main__':
+
+    DIST = False
+    train_mnist_cnn(DIST=DIST)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_dist.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_dist.py
new file mode 100644
index 000000000..3586127c4
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_dist.py
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from mnist_cnn import *
+
+if __name__ == '__main__':
+
+    DIST = True
+    train_mnist_cnn(DIST=DIST)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_multiprocess.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_multiprocess.py
new file mode 100644
index 000000000..f51344ff0
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_multiprocess.py
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from mnist_cnn import *
+import multiprocessing
+import sys
+
+if __name__ == '__main__':
+
+    # Generate a NCCL ID to be used for collective communication
+    nccl_id = singa.NcclIdHolder()
+
+    # Number of GPUs to be used
+    world_size = int(sys.argv[1])
+
+    process = []
+    for local_rank in range(0, world_size):
+        process.append(
+            multiprocessing.Process(target=train_mnist_cnn,
+                                    args=(True, local_rank, world_size, nccl_id)))
+
+    for p in process:
+        p.start()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py
new file mode 100644
index 000000000..754173699
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py
@@ -0,0 +1,292 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+try:
+    import pickle
+except ImportError:
+    import cPickle as pickle
+
+from singa import singa_wrap as singa
+from singa import autograd
+from singa import tensor
+from singa import device
+from singa import opt
+from PIL import Image
+import numpy as np
+import os
+import sys
+import time
+
+
+def load_dataset(filepath):
+    with open(filepath, 'rb') as fd:
+        try:
+            cifar10 = pickle.load(fd, encoding='latin1')
+        except TypeError:
+            cifar10 = pickle.load(fd)
+    image = cifar10['data'].astype(dtype=np.uint8)
+    image = image.reshape((-1, 3, 32, 32))
+    label = np.asarray(cifar10['labels'], dtype=np.uint8)
+    label = label.reshape(label.size, 1)
+    return image, label
+
+
+def load_train_data(dir_path='cifar-10-batches-py', num_batches=5):
+    labels = []
+    batchsize = 10000
+    images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
+    for did in range(1, num_batches + 1):
+        fname_train_data = dir_path + "/data_batch_{}".format(did)
+        image, label = load_dataset(check_dataset_exist(fname_train_data))
+        images[(did - 1) * batchsize:did * batchsize] = image
+        labels.extend(label)
+    images = np.array(images, dtype=np.float32)
+    labels = np.array(labels, dtype=np.int32)
+    return images, labels
+
+
+def load_test_data(dir_path='cifar-10-batches-py'):
+    images, labels = load_dataset(check_dataset_exist(dir_path + "/test_batch"))
+    return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def check_dataset_exist(dirpath):
+    if not os.path.exists(dirpath):
+        print(
+            'Please download the cifar10 dataset using download_data.py (e.g. python ~/singa/examples/cifar10/download_data.py py)'
+        )
+        sys.exit(0)
+    return dirpath
+
+
+def normalize_for_resnet(train_x, test_x):
+    mean = [0.4914, 0.4822, 0.4465]
+    std = [0.2023, 0.1994, 0.2010]
+    train_x /= 255
+    test_x /= 255
+    for ch in range(0, 2):
+        train_x[:, ch, :, :] -= mean[ch]
+        train_x[:, ch, :, :] /= std[ch]
+        test_x[:, ch, :, :] -= mean[ch]
+        test_x[:, ch, :, :] /= std[ch]
+    return train_x, test_x
+
+
+def resize_dataset(x, IMG_SIZE):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, IMG_SIZE, IMG_SIZE), dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (IMG_SIZE, IMG_SIZE), Image.BILINEAR),
+                                     dtype=np.float32)
+    return X
+
+
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :, offset[0]:offset[0] + 32,
+                                    offset[1]:offset[1] + 32]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+def accuracy(pred, target):
+    y = np.argmax(pred, axis=1)
+    t = np.argmax(target, axis=1)
+    a = y == t
+    return np.array(a, "int").sum()
+
+
+def to_categorical(y, num_classes):
+    y = np.array(y, dtype="int")
+    n = y.shape[0]
+    categorical = np.zeros((n, num_classes))
+    for i in range(0, n):
+        categorical[i, y[i]] = 1
+        categorical = categorical.astype(np.float32)
+    return categorical
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+# Function to sychronize SINGA TENSOR initial model parameters
+def synchronize(tensor, dist_opt):
+    dist_opt.all_reduce(tensor.data)
+    dist_opt.wait()
+    tensor /= dist_opt.world_size
+
+
+# Data partition
+def data_partition(dataset_x, dataset_y, global_rank, world_size):
+    data_per_rank = dataset_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    return dataset_x[idx_start:idx_end], dataset_y[idx_start:idx_end]
+
+
+def train_cifar10(DIST=False,
+                  local_rank=None,
+                  world_size=None,
+                  nccl_id=None,
+                  partial_update=False):
+
+    # Define the hypermeters for the train_cifar10
+    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)
+    max_epoch = 5
+    batch_size = 32
+
+    train_x, train_y = load_train_data()
+    test_x, test_y = load_test_data()
+    train_x, test_x = normalize_for_resnet(train_x, test_x)
+    IMG_SIZE = 224
+    num_classes = 10
+
+    if DIST:
+        # For distributed GPU training
+        sgd = opt.DistOpt(sgd,
+                          nccl_id=nccl_id,
+                          local_rank=local_rank,
+                          world_size=world_size)
+        dev = device.create_cuda_gpu_on(sgd.local_rank)
+
+        # Dataset partition for distributed training
+        train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
+                                          sgd.world_size)
+        test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
+                                        sgd.world_size)
+        world_size = sgd.world_size
+    else:
+        # For single GPU
+        dev = device.create_cuda_gpu()
+        world_size = 1
+
+    from resnet import resnet50
+    model = resnet50(num_classes=num_classes)
+
+    tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_test_batch = test_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    if DIST:
+        # Sychronize the initial parameters
+        autograd.training = True
+        x = np.random.randn(batch_size, 3, IMG_SIZE,
+                            IMG_SIZE).astype(np.float32)
+        y = np.zeros(shape=(batch_size,), dtype=np.int32)
+        tx.copy_from_numpy(x)
+        ty.copy_from_numpy(y)
+        out = model(tx)
+        loss = autograd.softmax_cross_entropy(out, ty)
+        param = []
+        for p, _ in autograd.backward(loss):
+            synchronize(p, sgd)
+            param.append(p)
+
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        autograd.training = True
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        for b in range(num_train_batch):
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            x = augmentation(x, batch_size)
+            x = resize_dataset(x, IMG_SIZE)
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out = model(tx)
+            loss = autograd.softmax_cross_entropy(out, ty)
+            train_correct += accuracy(tensor.to_numpy(out),
+                                      to_categorical(y, num_classes)).astype(
+                                          np.float32)
+            train_loss += tensor.to_numpy(loss)[0]
+            if not partial_update:
+                sgd.backward_and_update(loss)
+            else:
+                sgd.backward_and_partial_update(loss)
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, sgd, reducer)
+            train_loss = reduce_variable(train_loss, sgd, reducer)
+
+        # Output the training loss and accuracy
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        if partial_update:
+            # Sychronize parameters before evaluation phase
+            for p in param:
+                synchronize(p, sgd)
+
+        # Evaulation phase
+        autograd.training = False
+        for b in range(num_test_batch):
+            x = test_x[b * batch_size:(b + 1) * batch_size]
+            x = resize_dataset(x, IMG_SIZE)
+            y = test_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test),
+                                     to_categorical(y, num_classes))
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, sgd, reducer)
+
+        # Output the evaluation accuracy
+        if ((DIST == False) or (sgd.global_rank == 0)):
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_test_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+
+if __name__ == '__main__':
+
+    DIST = False
+    train_cifar10(DIST=DIST)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_dist.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_dist.py
new file mode 100644
index 000000000..6f9b56cee
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_dist.py
@@ -0,0 +1,87 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# the code is modified from
+# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+
+from singa import autograd
+from singa import tensor
+from singa import device
+from singa import opt
+
+import numpy as np
+from tqdm import trange
+
+if __name__ == "__main__":
+    sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5)
+    sgd = opt.DistOpt(sgd)
+
+    if (sgd.global_rank == 0):
+        print("Start intialization...........", flush=True)
+
+    dev = device.create_cuda_gpu_on(sgd.local_rank)
+
+    from resnet import resnet50
+    model = resnet50()
+
+    niters = 100
+    batch_size = 32
+    IMG_SIZE = 224
+
+    tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev)
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    autograd.training = True
+    x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
+    y = np.random.randint(0, 1000, batch_size, dtype=np.int32)
+    tx.copy_from_numpy(x)
+    ty.copy_from_numpy(y)
+
+    import time
+
+    dev.Sync()
+    start = time.time()
+    fd = 0
+    softmax = 0
+    update = 0
+    with trange(niters) as t:
+        for _ in t:
+            dev.Sync()
+            tick = time.time()
+            x = model(tx)
+            dev.Sync()
+            fd += time.time() - tick
+            tick = time.time()
+            loss = autograd.softmax_cross_entropy(x, ty)
+            dev.Sync()
+            softmax += time.time() - tick
+            sgd.backward_and_update(loss)
+
+    dev.Sync()
+    end = time.time()
+    throughput = float(sgd.world_size * niters * batch_size) / (end - start)
+    titer = (end - start) / float(niters)
+    tforward = float(fd) / float(niters)
+    tsoftmax = float(softmax) / float(niters)
+    tbackward = titer - tforward - tsoftmax
+
+    if (sgd.global_rank == 0):
+        print("\nThroughput = {} per second".format(throughput), flush=True)
+        print("Total={}, forward={}, softmax={}, backward={}".format(
+            titer, tforward, tsoftmax, tbackward),
+              flush=True)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/sparsification_mnist.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/sparsification_mnist.py
new file mode 100644
index 000000000..315605acd
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/sparsification_mnist.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from mnist_cnn import *
+import multiprocessing
+import sys
+
+if __name__ == '__main__':
+
+    # Generate a NCCL ID to be used for collective communication
+    nccl_id = singa.NcclIdHolder()
+
+    # Number of GPUs to be used
+    world_size = int(sys.argv[1])
+
+    # Use sparsification with parameters
+    topK = False  # When topK = False, Sparsification based on a constant absolute threshold
+    corr = True  # If True, uses local accumulate gradient for the correction
+    sparsThreshold = 0.05  # The constant absolute threshold for sparsification
+
+    process = []
+    for local_rank in range(0, world_size):
+        process.append(
+            multiprocessing.Process(target=train_mnist_cnn,
+                                    args=(True, local_rank, world_size, nccl_id,
+                                          sparsThreshold, topK, corr)))
+
+    for p in process:
+        p.start()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/xceptionnet.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/xceptionnet.py
new file mode 100644
index 000000000..8fb23d8cb
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/xceptionnet.py
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from singa import autograd
+from singa import tensor
+from singa import device
+from singa import layer
+from singa import opt
+
+import numpy as np
+from tqdm import trange
+
+# the code is modified from
+# https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/xception.py
+
+
+class Block(layer.Layer):
+
+    def __init__(self,
+                 in_filters,
+                 out_filters,
+                 reps,
+                 strides=1,
+                 padding=0,
+                 start_with_relu=True,
+                 grow_first=True):
+        super(Block, self).__init__()
+
+        if out_filters != in_filters or strides != 1:
+            self.skip = layer.Conv2d(in_filters,
+                                     out_filters,
+                                     1,
+                                     stride=strides,
+                                     padding=padding,
+                                     bias=False)
+            self.skipbn = layer.BatchNorm2d(out_filters)
+        else:
+            self.skip = None
+
+        self.layers = []
+
+        filters = in_filters
+        if grow_first:
+            self.layers.append(layer.ReLU())
+            self.layers.append(
+                layer.SeparableConv2d(in_filters,
+                                      out_filters,
+                                      3,
+                                      stride=1,
+                                      padding=1,
+                                      bias=False))
+            self.layers.append(layer.BatchNorm2d(out_filters))
+            filters = out_filters
+
+        for i in range(reps - 1):
+            self.layers.append(layer.ReLU())
+            self.layers.append(
+                layer.SeparableConv2d(filters,
+                                      filters,
+                                      3,
+                                      stride=1,
+                                      padding=1,
+                                      bias=False))
+            self.layers.append(layer.BatchNorm2d(filters))
+
+        if not grow_first:
+            self.layers.append(layer.ReLU())
+            self.layers.append(
+                layer.SeparableConv2d(in_filters,
+                                      out_filters,
+                                      3,
+                                      stride=1,
+                                      padding=1,
+                                      bias=False))
+            self.layers.append(layer.BatchNorm2d(out_filters))
+
+        if not start_with_relu:
+            self.layers = self.layers[1:]
+        else:
+            self.layers[0] = layer.ReLU()
+
+        if strides != 1:
+            self.layers.append(layer.MaxPool2d(3, strides, padding + 1))
+
+        self.register_layers(*self.layers)
+
+        self.add = layer.Add()
+
+    def forward(self, x):
+        y = self.layers[0](x)
+        for layer in self.layers[1:]:
+            if isinstance(y, tuple):
+                y = y[0]
+            y = layer(y)
+
+        if self.skip is not None:
+            skip = self.skip(x)
+            skip = self.skipbn(skip)
+        else:
+            skip = x
+        y = self.add(y, skip)
+        return y
+
+
+__all__ = ['Xception']
+
+
+class Xception(layer.Layer):
+    """
+    Xception optimized for the ImageNet dataset, as specified in
+    https://arxiv.org/pdf/1610.02357.pdf
+    """
+
+    def __init__(self, num_classes=1000):
+        """ Constructor
+        Args:
+            num_classes: number of classes
+        """
+        super(Xception, self).__init__()
+        self.num_classes = num_classes
+
+        self.conv1 = layer.Conv2d(3, 32, 3, 2, 0, bias=False)
+        self.bn1 = layer.BatchNorm2d(32)
+        self.relu1 = layer.ReLU()
+
+        self.conv2 = layer.Conv2d(32, 64, 3, 1, 1, bias=False)
+        self.bn2 = layer.BatchNorm2d(64)
+        self.relu2 = layer.ReLU()
+        # do relu here
+
+        self.block1 = Block(64,
+                            128,
+                            2,
+                            2,
+                            padding=0,
+                            start_with_relu=False,
+                            grow_first=True)
+        self.block2 = Block(128,
+                            256,
+                            2,
+                            2,
+                            padding=0,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block3 = Block(256,
+                            728,
+                            2,
+                            2,
+                            padding=0,
+                            start_with_relu=True,
+                            grow_first=True)
+
+        self.block4 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block5 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block6 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block7 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+
+        self.block8 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block9 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block10 = Block(728,
+                             728,
+                             3,
+                             1,
+                             start_with_relu=True,
+                             grow_first=True)
+        self.block11 = Block(728,
+                             728,
+                             3,
+                             1,
+                             start_with_relu=True,
+                             grow_first=True)
+
+        self.block12 = Block(728,
+                             1024,
+                             2,
+                             2,
+                             start_with_relu=True,
+                             grow_first=False)
+
+        self.conv3 = layer.SeparableConv2d(1024, 1536, 3, 1, 1)
+        self.bn3 = layer.BatchNorm2d(1536)
+        self.relu3 = layer.ReLU()
+
+        # Relu Layer
+        self.conv4 = layer.SeparableConv2d(1536, 2048, 3, 1, 1)
+        self.bn4 = layer.BatchNorm2d(2048)
+
+        self.relu4 = layer.ReLU()
+        self.globalpooling = layer.MaxPool2d(10, 1)
+        self.flatten = layer.Flatten()
+        self.fc = layer.Linear(2048, num_classes)
+
+    def features(self, input):
+        x = self.conv1(input)
+        x = self.bn1(x)
+        x = self.relu1(x)
+
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu2(x)
+
+        x = self.block1(x)
+        x = self.block2(x)
+        x = self.block3(x)
+        x = self.block4(x)
+        x = self.block5(x)
+        x = self.block6(x)
+        x = self.block7(x)
+        x = self.block8(x)
+        x = self.block9(x)
+        x = self.block10(x)
+        x = self.block11(x)
+        x = self.block12(x)
+
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu3(x)
+
+        x = self.conv4(x)
+        x = self.bn4(x)
+        return x
+
+    def logits(self, features):
+        x = self.relu4(features)
+        x = self.globalpooling(x)
+        x = self.flatten(x)
+        x = self.fc(x)
+        return x
+
+    def forward(self, input):
+        x = self.features(input)
+        x = self.logits(x)
+        return x
+
+
+if __name__ == '__main__':
+    model = Xception(num_classes=1000)
+    print('Start intialization............')
+    dev = device.create_cuda_gpu_on(0)
+    #dev = device.create_cuda_gpu()
+
+    niters = 20
+    batch_size = 16
+    IMG_SIZE = 299
+    sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5)
+
+    tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev)
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    autograd.training = True
+    x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
+    y = np.random.randint(0, 1000, batch_size, dtype=np.int32)
+    tx.copy_from_numpy(x)
+    ty.copy_from_numpy(y)
+
+    with trange(niters) as t:
+        for _ in t:
+            x = model(tx)
+            loss = autograd.softmax_cross_entropy(x, ty)
+            sgd(loss)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/benchmark.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/benchmark.py
new file mode 100644
index 000000000..9f69feee0
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/benchmark.py
@@ -0,0 +1,121 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# the code is modified from
+# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+
+from singa import opt
+from singa import device
+from singa import tensor
+
+import argparse
+import time
+import numpy as np
+from tqdm import trange
+
+
+def train_resnet(DIST=True, graph=True, sequential=False, verbosity=0):
+
+    # Define the hypermeters for the train_resnet
+    niters = 100
+    batch_size = 32
+    sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5)
+
+    IMG_SIZE = 224
+
+    # For distributed training, sequential has better throughput in the current version
+    if DIST == True:
+        sgd = opt.DistOpt(sgd)
+        world_size = sgd.world_size
+        local_rank = sgd.local_rank
+        global_rank = sgd.global_rank
+        sequential = True
+    else:
+        local_rank = 0
+        world_size = 1
+        global_rank = 0
+        sequential = False
+
+    dev = device.create_cuda_gpu_on(local_rank)
+
+    tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev)
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
+    y = np.random.randint(0, 1000, batch_size, dtype=np.int32)
+    tx.copy_from_numpy(x)
+    ty.copy_from_numpy(y)
+
+    dev.SetVerbosity(verbosity)
+    dev.SetSkipIteration(5)
+
+    # Construct the model
+    from model import resnet
+    model = resnet.resnet50(num_channels=3, num_classes=1000)
+
+    model.train()
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+
+    # Train model
+    dev.Sync()
+    start = time.time()
+    with trange(niters) as t:
+        for _ in t:
+            model(tx, ty, dist_option='fp32', spars=None)
+
+    dev.Sync()
+    end = time.time()
+    titer = (end - start) / float(niters)
+    throughput = float(niters * batch_size * world_size) / (end - start)
+    if global_rank == 0:
+        print("\nThroughput = {} per second".format(throughput), flush=True)
+        print("TotalTime={}".format(end - start), flush=True)
+        print("Total={}".format(titer), flush=True)
+        dev.PrintTimeProfiling()
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        description='Throughput test using Resnet 50')
+    parser.add_argument('--dist',
+                        '--enable-dist',
+                        default='False',
+                        action='store_true',
+                        help='enable distributed training',
+                        dest='DIST')
+    parser.add_argument('--no-graph',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('--verbosity',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    train_resnet(DIST=args.DIST,
+                 graph=args.graph,
+                 sequential=False,
+                 verbosity=args.verbosity)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py
new file mode 100644
index 000000000..5caaf30f4
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py
@@ -0,0 +1,89 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+try:
+    import pickle
+except ImportError:
+    import cPickle as pickle
+
+import numpy as np
+import os
+import sys
+
+
+def load_dataset(filepath):
+    with open(filepath, 'rb') as fd:
+        try:
+            cifar10 = pickle.load(fd, encoding='latin1')
+        except TypeError:
+            cifar10 = pickle.load(fd)
+    image = cifar10['data'].astype(dtype=np.uint8)
+    image = image.reshape((-1, 3, 32, 32))
+    label = np.asarray(cifar10['labels'], dtype=np.uint8)
+    label = label.reshape(label.size, 1)
+    return image, label
+
+
+def load_train_data(dir_path='/tmp/cifar-10-batches-py', num_batches=5):  # need to save to specific local directories
+    labels = []
+    batchsize = 10000
+    images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
+    for did in range(1, num_batches + 1):
+        fname_train_data = dir_path + "/data_batch_{}".format(did)
+        image, label = load_dataset(check_dataset_exist(fname_train_data))
+        images[(did - 1) * batchsize:did * batchsize] = image
+        labels.extend(label)
+    images = np.array(images, dtype=np.float32)
+    labels = np.array(labels, dtype=np.int32)
+    return images, labels
+
+
+def load_test_data(dir_path='/tmp/cifar-10-batches-py'):  # need to save to specific local directories
+    images, labels = load_dataset(check_dataset_exist(dir_path + "/test_batch"))
+    return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def check_dataset_exist(dirpath):
+    if not os.path.exists(dirpath):
+        print(
+            'Please download the cifar10 dataset using python data/download_cifar10.py'
+        )
+        sys.exit(0)
+    return dirpath
+
+
+def normalize(train_x, val_x):
+    mean = [0.4914, 0.4822, 0.4465]
+    std = [0.2023, 0.1994, 0.2010]
+    train_x /= 255
+    val_x /= 255
+    for ch in range(0, 2):
+        train_x[:, ch, :, :] -= mean[ch]
+        train_x[:, ch, :, :] /= std[ch]
+        val_x[:, ch, :, :] -= mean[ch]
+        val_x[:, ch, :, :] /= std[ch]
+    return train_x, val_x
+
+def load():
+    train_x, train_y = load_train_data()
+    val_x, val_y = load_test_data()
+    train_x, val_x = normalize(train_x, val_x)
+    train_y = train_y.flatten()
+    val_y = val_y.flatten()
+    return train_x, train_y, val_x, val_y
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py
new file mode 100644
index 000000000..88b943f07
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py
@@ -0,0 +1,81 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+try:
+    import pickle
+except ImportError:
+    import cPickle as pickle
+
+import numpy as np
+import os
+import sys
+
+
+def load_dataset(filepath):
+    with open(filepath, 'rb') as fd:
+        try:
+            cifar100 = pickle.load(fd, encoding='latin1')
+        except TypeError:
+            cifar100 = pickle.load(fd)
+    image = cifar100['data'].astype(dtype=np.uint8)
+    image = image.reshape((-1, 3, 32, 32))
+    label = np.asarray(cifar100['fine_labels'], dtype=np.uint8)
+    label = label.reshape(label.size, 1)
+    return image, label
+
+
+def load_train_data(dir_path='/tmp/cifar-100-python'):
+    images, labels = load_dataset(check_dataset_exist(dir_path + "/train"))
+    return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def load_test_data(dir_path='/tmp/cifar-100-python'):
+    images, labels = load_dataset(check_dataset_exist(dir_path + "/test"))
+    return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def check_dataset_exist(dirpath):
+    if not os.path.exists(dirpath):
+        print(
+            'Please download the cifar100 dataset using python data/download_cifar100.py'
+        )
+        sys.exit(0)
+    return dirpath
+
+
+def normalize(train_x, val_x):
+    mean = [0.4914, 0.4822, 0.4465]
+    std = [0.2023, 0.1994, 0.2010]
+    train_x /= 255
+    val_x /= 255
+    for ch in range(0, 2):
+        train_x[:, ch, :, :] -= mean[ch]
+        train_x[:, ch, :, :] /= std[ch]
+        val_x[:, ch, :, :] -= mean[ch]
+        val_x[:, ch, :, :] /= std[ch]
+    return train_x, val_x
+
+
+def load():
+    train_x, train_y = load_train_data()
+    val_x, val_y = load_test_data()
+    train_x, val_x = normalize(train_x, val_x)
+    train_y = train_y.flatten()
+    val_y = val_y.flatten()
+    return train_x, train_y, val_x, val_y
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar10.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar10.py
new file mode 100755
index 000000000..8e4467921
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar10.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+import urllib.request, urllib.parse, urllib.error
+import tarfile
+import os
+import sys
+
+
+def extract_tarfile(filepath):
+    if os.path.exists(filepath):
+        print('The tar file does exist. Extracting it now..')
+        with tarfile.open(filepath, 'r') as f:
+            f.extractall('/tmp/')  # need to specify a local directory
+        print('Finished!')
+        sys.exit(0)
+
+
+def do_download(dirpath, gzfile, url):
+    print('Downloading CIFAR from %s' % (url))
+    urllib.request.urlretrieve(url, gzfile)
+    extract_tarfile(gzfile)
+    print('Finished!')
+
+
+if __name__ == '__main__':
+    dirpath = '/tmp/'  # need to specify a local directory
+    gzfile = dirpath + 'cifar-10-python.tar.gz'
+    url = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
+    do_download(dirpath, gzfile, url)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar100.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar100.py
new file mode 100755
index 000000000..5f1e21b78
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar100.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from download_cifar10 import do_download
+
+if __name__ == '__main__':
+    dirpath = '/tmp/'  # need to specify a local directory
+    gzfile = dirpath + 'cifar-100-python.tar.gz'
+    url = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
+    do_download(dirpath, gzfile, url)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_mnist.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_mnist.py
new file mode 100644
index 000000000..65acb0e28
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_mnist.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import os
+import urllib.request
+
+
+def check_exist_or_download(url):
+
+    download_dir = '/tmp/'
+    name = url.rsplit('/', 1)[-1]
+    filename = os.path.join(download_dir, name)
+
+    if not os.path.isfile(filename):
+        print("Downloading %s" % url)
+        urllib.request.urlretrieve(url, filename)
+    else:
+        print("Already Downloaded: %s" % url)
+
+
+if __name__ == '__main__':
+
+    #url of the mnist dataset
+    train_x_url = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
+    train_y_url = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
+    valid_x_url = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
+    valid_y_url = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
+
+    #download the mnist dataset
+    check_exist_or_download(train_x_url)
+    check_exist_or_download(train_y_url)
+    check_exist_or_download(valid_x_url)
+    check_exist_or_download(valid_y_url)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/mnist.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/mnist.py
new file mode 100644
index 000000000..b25bf5e67
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/mnist.py
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import numpy as np
+import os
+import sys
+import gzip
+import codecs
+
+
+def check_dataset_exist(dirpath):
+    if not os.path.exists(dirpath):
+        print(
+            'The MNIST dataset does not exist. Please download the mnist dataset using python data/download_mnist.py'
+        )
+        sys.exit(0)
+    return dirpath
+
+
+def load_dataset():
+    train_x_path = '/tmp/train-images-idx3-ubyte.gz'  # need to change to local disk
+    train_y_path = '/tmp/train-labels-idx1-ubyte.gz'  # need to change to local disk
+    valid_x_path = '/tmp/t10k-images-idx3-ubyte.gz'  # need to change to local disk
+    valid_y_path = '/tmp/t10k-labels-idx1-ubyte.gz'  # need to change to local disk
+
+    train_x = read_image_file(check_dataset_exist(train_x_path)).astype(
+        np.float32)
+    train_y = read_label_file(check_dataset_exist(train_y_path)).astype(
+        np.float32)
+    valid_x = read_image_file(check_dataset_exist(valid_x_path)).astype(
+        np.float32)
+    valid_y = read_label_file(check_dataset_exist(valid_y_path)).astype(
+        np.float32)
+    return train_x, train_y, valid_x, valid_y
+
+
+def read_label_file(path):
+    with gzip.open(path, 'rb') as f:
+        data = f.read()
+        assert get_int(data[:4]) == 2049
+        length = get_int(data[4:8])
+        parsed = np.frombuffer(data, dtype=np.uint8, offset=8).reshape((length))
+        return parsed
+
+
+def get_int(b):
+    return int(codecs.encode(b, 'hex'), 16)
+
+
+def read_image_file(path):
+    with gzip.open(path, 'rb') as f:
+        data = f.read()
+        assert get_int(data[:4]) == 2051
+        length = get_int(data[4:8])
+        num_rows = get_int(data[8:12])
+        num_cols = get_int(data[12:16])
+        parsed = np.frombuffer(data, dtype=np.uint8, offset=16).reshape(
+            (length, 1, num_rows, num_cols))
+        return parsed
+
+
+def normalize(train_x, val_x):
+    train_x /= 255
+    val_x /= 255
+    return train_x, val_x
+
+
+def load():
+    train_x, train_y, val_x, val_y = load_dataset()
+    train_x, val_x = normalize(train_x, val_x)
+    train_x = train_x.astype(np.float32)
+    val_x = val_x.astype(np.float32)
+    train_y = train_y.astype(np.int32)
+    val_y = val_y.astype(np.int32)
+    return train_x, train_y, val_x, val_y
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/alexnet.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/alexnet.py
new file mode 100644
index 000000000..cad7b1e3f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/alexnet.py
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+
+
+class AlexNet(model.Model):
+
+    def __init__(self, num_classes=10, num_channels=1):
+        super(AlexNet, self).__init__()
+        self.num_classes = num_classes
+        self.input_size = 224
+        self.dimension = 4
+        self.conv1 = layer.Conv2d(num_channels, 64, 11, stride=4, padding=2)
+        self.conv2 = layer.Conv2d(64, 192, 5, padding=2)
+        self.conv3 = layer.Conv2d(192, 384, 3, padding=1)
+        self.conv4 = layer.Conv2d(384, 256, 3, padding=1)
+        self.conv5 = layer.Conv2d(256, 256, 3, padding=1)
+        self.linear1 = layer.Linear(4096)
+        self.linear2 = layer.Linear(4096)
+        self.linear3 = layer.Linear(num_classes)
+        self.pooling1 = layer.MaxPool2d(2, 2, padding=0)
+        self.pooling2 = layer.MaxPool2d(2, 2, padding=0)
+        self.pooling3 = layer.MaxPool2d(2, 2, padding=0)
+        self.avg_pooling1 = layer.AvgPool2d(3, 2, padding=0)
+        self.relu1 = layer.ReLU()
+        self.relu2 = layer.ReLU()
+        self.relu3 = layer.ReLU()
+        self.relu4 = layer.ReLU()
+        self.relu5 = layer.ReLU()
+        self.relu6 = layer.ReLU()
+        self.relu7 = layer.ReLU()
+        self.flatten = layer.Flatten()
+        self.dropout1 = layer.Dropout()
+        self.dropout2 = layer.Dropout()
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.relu1(y)
+        y = self.pooling1(y)
+        y = self.conv2(y)
+        y = self.relu2(y)
+        y = self.pooling2(y)
+        y = self.conv3(y)
+        y = self.relu3(y)
+        y = self.conv4(y)
+        y = self.relu4(y)
+        y = self.conv5(y)
+        y = self.relu5(y)
+        y = self.pooling3(y)
+        y = self.avg_pooling1(y)
+        y = self.flatten(y)
+        y = self.dropout1(y)
+        y = self.linear1(y)
+        y = self.relu6(y)
+        y = self.dropout2(y)
+        y = self.linear2(y)
+        y = self.relu7(y)
+        y = self.linear3(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a AlexNet model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+    
+    Returns:
+        The created AlexNet model.
+    
+    """
+    model = AlexNet(**kwargs)
+
+    return model
+
+
+__all__ = ['AlexNet', 'create_model']
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/cnn.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/cnn.py
new file mode 100644
index 000000000..3877e83af
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/cnn.py
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+
+
+class CNN(model.Model):
+
+    def __init__(self, num_classes=10, num_channels=1):
+        super(CNN, self).__init__()
+        self.num_classes = num_classes
+        self.input_size = 28
+        self.dimension = 4
+        self.conv1 = layer.Conv2d(num_channels, 20, 5, padding=0, activation="RELU")
+        self.conv2 = layer.Conv2d(20, 50, 5, padding=0, activation="RELU")
+        self.linear1 = layer.Linear(500)
+        self.linear2 = layer.Linear(num_classes)
+        self.pooling1 = layer.MaxPool2d(2, 2, padding=0)
+        self.pooling2 = layer.MaxPool2d(2, 2, padding=0)
+        self.relu = layer.ReLU()
+        self.flatten = layer.Flatten()
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.pooling1(y)
+        y = self.conv2(y)
+        y = self.pooling2(y)
+        y = self.flatten(y)
+        y = self.linear1(y)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+
+    Returns:
+        The created CNN model.
+    """
+    model = CNN(**kwargs)
+
+    return model
+
+
+__all__ = ['CNN', 'create_model']
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/resnet.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/resnet.py
new file mode 100644
index 000000000..28b5f9949
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/resnet.py
@@ -0,0 +1,300 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# the code is modified from
+# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+
+from singa import layer
+from singa import model
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return layer.Conv2d(
+        in_planes,
+        out_planes,
+        3,
+        stride=stride,
+        padding=1,
+        bias=False,
+    )
+
+
+class BasicBlock(layer.Layer):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = layer.BatchNorm2d(planes)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = layer.BatchNorm2d(planes)
+        self.relu1 = layer.ReLU()
+        self.add = layer.Add()
+        self.relu2 = layer.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu1(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = self.add(out, residual)
+        out = self.relu2(out)
+
+        return out
+
+
+class Bottleneck(layer.Layer):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = layer.Conv2d(inplanes, planes, 1, bias=False)
+        self.bn1 = layer.BatchNorm2d(planes)
+        self.relu1 = layer.ReLU()
+        self.conv2 = layer.Conv2d(planes,
+                                  planes,
+                                  3,
+                                  stride=stride,
+                                  padding=1,
+                                  bias=False)
+        self.bn2 = layer.BatchNorm2d(planes)
+        self.relu2 = layer.ReLU()
+        self.conv3 = layer.Conv2d(planes,
+                                  planes * self.expansion,
+                                  1,
+                                  bias=False)
+        self.bn3 = layer.BatchNorm2d(planes * self.expansion)
+
+        self.add = layer.Add()
+        self.relu3 = layer.ReLU()
+
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu1(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu2(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = self.add(out, residual)
+        out = self.relu3(out)
+
+        return out
+
+
+__all__ = [
+    'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
+]
+
+
+class ResNet(model.Model):
+
+    def __init__(self, block, layers, num_classes=10, num_channels=3):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        self.num_classes = num_classes
+        self.input_size = 224
+        self.dimension = 4
+        self.conv1 = layer.Conv2d(num_channels,
+                                  64,
+                                  7,
+                                  stride=2,
+                                  padding=3,
+                                  bias=False)
+        self.bn1 = layer.BatchNorm2d(64)
+        self.relu = layer.ReLU()
+        self.maxpool = layer.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1, layers1 = self._make_layer(block, 64, layers[0])
+        self.layer2, layers2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3, layers3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4, layers4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = layer.AvgPool2d(7, stride=1)
+        self.flatten = layer.Flatten()
+        self.fc = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+        self.register_layers(*layers1, *layers2, *layers3, *layers4)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            conv = layer.Conv2d(
+                self.inplanes,
+                planes * block.expansion,
+                1,
+                stride=stride,
+                bias=False,
+            )
+            bn = layer.BatchNorm2d(planes * block.expansion)
+
+            def _downsample(x):
+                return bn(conv(x))
+
+            downsample = _downsample
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        def forward(x):
+            for layer in layers:
+                x = layer(x)
+            return x
+
+        return forward, layers
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = self.flatten(x)
+        x = self.fc(x)
+
+        return x
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet.
+    
+    Returns:
+        The created ResNet-18 model.
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+
+    return model
+
+
+def resnet34(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet.
+
+    Returns:
+        The created ResNet-34 model.
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+
+    return model
+
+
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet.
+
+    Returns:
+        The created ResNet-50 model.
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+
+    return model
+
+
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet.
+
+    Returns:
+        The created ResNet-101 model.
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+
+    return model
+
+
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet.
+
+    Returns:
+        The created ResNet-152 model.
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+
+    return model
+
+
+__all__ = [
+    'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
+]
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/xceptionnet.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/xceptionnet.py
new file mode 100644
index 000000000..34440ab9d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/xceptionnet.py
@@ -0,0 +1,311 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# the code is modified from
+# https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/xception.py
+
+from singa import layer
+from singa import model
+
+
+class Block(layer.Layer):
+
+    def __init__(self,
+                 in_filters,
+                 out_filters,
+                 reps,
+                 strides=1,
+                 padding=0,
+                 start_with_relu=True,
+                 grow_first=True):
+        super(Block, self).__init__()
+
+        if out_filters != in_filters or strides != 1:
+            self.skip = layer.Conv2d(in_filters,
+                                     out_filters,
+                                     1,
+                                     stride=strides,
+                                     padding=padding,
+                                     bias=False)
+            self.skipbn = layer.BatchNorm2d(out_filters)
+        else:
+            self.skip = None
+
+        self.layers = []
+
+        filters = in_filters
+        if grow_first:
+            self.layers.append(layer.ReLU())
+            self.layers.append(
+                layer.SeparableConv2d(in_filters,
+                                      out_filters,
+                                      3,
+                                      stride=1,
+                                      padding=1,
+                                      bias=False))
+            self.layers.append(layer.BatchNorm2d(out_filters))
+            filters = out_filters
+
+        for i in range(reps - 1):
+            self.layers.append(layer.ReLU())
+            self.layers.append(
+                layer.SeparableConv2d(filters,
+                                      filters,
+                                      3,
+                                      stride=1,
+                                      padding=1,
+                                      bias=False))
+            self.layers.append(layer.BatchNorm2d(filters))
+
+        if not grow_first:
+            self.layers.append(layer.ReLU())
+            self.layers.append(
+                layer.SeparableConv2d(in_filters,
+                                      out_filters,
+                                      3,
+                                      stride=1,
+                                      padding=1,
+                                      bias=False))
+            self.layers.append(layer.BatchNorm2d(out_filters))
+
+        if not start_with_relu:
+            self.layers = self.layers[1:]
+        else:
+            self.layers[0] = layer.ReLU()
+
+        if strides != 1:
+            self.layers.append(layer.MaxPool2d(3, strides, padding + 1))
+
+        self.register_layers(*self.layers)
+
+        self.add = layer.Add()
+
+    def forward(self, x):
+        y = self.layers[0](x)
+        for layer in self.layers[1:]:
+            if isinstance(y, tuple):
+                y = y[0]
+            y = layer(y)
+
+        if self.skip is not None:
+            skip = self.skip(x)
+            skip = self.skipbn(skip)
+        else:
+            skip = x
+        y = self.add(y, skip)
+        return y
+
+
+class Xception(model.Model):
+    """
+    Xception optimized for the ImageNet dataset, as specified in
+    https://arxiv.org/pdf/1610.02357.pdf
+    """
+
+    def __init__(self, num_classes=10, num_channels=3):
+        """ Constructor
+        Args:
+            num_classes: number of classes
+        """
+        super(Xception, self).__init__()
+        self.num_classes = num_classes
+        self.input_size = 299
+        self.dimension = 4
+
+        self.conv1 = layer.Conv2d(num_channels, 32, 3, 2, 0, bias=False)
+        self.bn1 = layer.BatchNorm2d(32)
+        self.relu1 = layer.ReLU()
+
+        self.conv2 = layer.Conv2d(32, 64, 3, 1, 1, bias=False)
+        self.bn2 = layer.BatchNorm2d(64)
+        self.relu2 = layer.ReLU()
+        # do relu here
+
+        self.block1 = Block(64,
+                            128,
+                            2,
+                            2,
+                            padding=0,
+                            start_with_relu=False,
+                            grow_first=True)
+        self.block2 = Block(128,
+                            256,
+                            2,
+                            2,
+                            padding=0,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block3 = Block(256,
+                            728,
+                            2,
+                            2,
+                            padding=0,
+                            start_with_relu=True,
+                            grow_first=True)
+
+        self.block4 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block5 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block6 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block7 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+
+        self.block8 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block9 = Block(728,
+                            728,
+                            3,
+                            1,
+                            start_with_relu=True,
+                            grow_first=True)
+        self.block10 = Block(728,
+                             728,
+                             3,
+                             1,
+                             start_with_relu=True,
+                             grow_first=True)
+        self.block11 = Block(728,
+                             728,
+                             3,
+                             1,
+                             start_with_relu=True,
+                             grow_first=True)
+
+        self.block12 = Block(728,
+                             1024,
+                             2,
+                             2,
+                             start_with_relu=True,
+                             grow_first=False)
+
+        self.conv3 = layer.SeparableConv2d(1024, 1536, 3, 1, 1)
+        self.bn3 = layer.BatchNorm2d(1536)
+        self.relu3 = layer.ReLU()
+
+        # do relu here
+        self.conv4 = layer.SeparableConv2d(1536, 2048, 3, 1, 1)
+        self.bn4 = layer.BatchNorm2d(2048)
+
+        self.relu4 = layer.ReLU()
+        self.globalpooling = layer.MaxPool2d(10, 1)
+        self.flatten = layer.Flatten()
+        self.fc = layer.Linear(num_classes)
+
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def features(self, input):
+        x = self.conv1(input)
+        x = self.bn1(x)
+        x = self.relu1(x)
+
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu2(x)
+
+        x = self.block1(x)
+        x = self.block2(x)
+        x = self.block3(x)
+        x = self.block4(x)
+        x = self.block5(x)
+        x = self.block6(x)
+        x = self.block7(x)
+        x = self.block8(x)
+        x = self.block9(x)
+        x = self.block10(x)
+        x = self.block11(x)
+        x = self.block12(x)
+
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu3(x)
+
+        x = self.conv4(x)
+        x = self.bn4(x)
+        return x
+
+    def logits(self, features):
+        x = self.relu4(features)
+        x = self.globalpooling(x)
+        x = self.flatten(x)
+        x = self.fc(x)
+        return x
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.logits(x)
+        return x
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a Xceptionnet model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+
+    Returns:
+        The created Xceptionnet model.
+    """
+    model = Xception(**kwargs)
+
+    return model
+
+
+__all__ = ['Xception', 'create_model']
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/pkg_model_code/model.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/pkg_model_code/model.py
new file mode 100644
index 000000000..98884584f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/pkg_model_code/model.py
@@ -0,0 +1,357 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+'''
+This script includes Model class for python users
+to use Computational Graph in their model.
+'''
+
+import os
+import gc
+import time
+import json
+import zipfile
+import numpy as np
+from functools import wraps
+from collections import Iterable
+
+from singa import tensor
+from singa import autograd
+from singa import layer
+from .tensor import Tensor
+from . import singa_wrap as singa
+
+
+class ModelMeta(layer.LayerMeta):
+
+    def buffer_operation(func):
+
+        def remove_creator(tensors):
+            if not tensors:
+                return
+
+            if isinstance(tensors, Iterable):
+                if isinstance(tensors, str):
+                    return
+                else:
+                    for item in tensors:
+                        if isinstance(item, Iterable):
+                            remove_creator(item)
+                        elif isinstance(item, tensor.Tensor):
+                            item.creator = None
+            elif isinstance(tensors, tensor.Tensor):
+                tensors.creator = None
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self.graph_mode and self.training:
+                if len(args) == 0:
+                    raise ValueError('expect at least one input tensor')
+
+                if isinstance(args[0], list):
+                    assert isinstance(
+                        args[0][0],
+                        Tensor), ('function expects PlaceHolders or Tensors')
+                    dev = args[0][0].device
+                else:
+                    assert isinstance(
+                        args[0],
+                        Tensor), ('function expects PlaceHolders or Tensors')
+                    dev = args[0].device
+
+                if not self._buffered:
+                    # buffer operations
+                    dev.EnableGraph(True)
+                    self._results = func(self, *args, **kwargs)
+                    dev.Sync()
+                    dev.EnableGraph(False)
+                    self._buffered = True
+
+                    # deconstruct Operations before running the entire graph
+                    remove_creator(self._results)
+
+                    # make sure all Operations are deallocated
+                    gc.collect()
+
+                # run graph
+                dev.RunGraph(self.sequential)
+                return self._results
+            else:
+                return func(self, *args, **kwargs)
+
+        return wrapper
+
+    def __new__(cls, name, bases, attr):
+        if 'train_one_batch' in attr:
+            attr['train_one_batch'] = ModelMeta.buffer_operation(
+                attr['train_one_batch'])
+
+        return super(ModelMeta, cls).__new__(cls, name, bases, attr)
+
+
+class Model(layer.Layer, metaclass=ModelMeta):
+    """ Base class for your neural network models.
+
+    Example usage::
+
+        import numpy as np
+        from singa import opt
+        from singa import tensor
+        from singa import device
+        from singa import autograd
+        from singa import layer
+        from singa import model
+
+        class MyModel(model.Model):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+                self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+                self.conv1 = layer.Conv2d(1, 20, 5, padding=0)
+                self.conv2 = layer.Conv2d(20, 50, 5, padding=0)
+                self.sgd = opt.SGD(lr=0.01)
+
+            def forward(self, x):
+                y = self.conv1(x)
+                y = self.conv2(y)
+                return y
+
+            def train_one_batch(self, x, y):
+                out = self.forward(x)
+                loss = self.softmax_cross_entropy(out, y)
+                self.sgd(loss)
+                return out, loss
+
+    """
+
+    # save load states constant
+    TENSOR_DICT_FILENAME = '/tensor_dict.npz'
+    STATES_ATTR_FILENAME = '/states_attr.json'
+    MODEL_STATE_TYPE = 0
+    AUX_STATE_TYPE = 1
+
+    def __init__(self):
+        """
+        Initializes internal Model state
+        """
+        super(Model, self).__init__()
+
+        self.training = True
+        self.graph_mode = True
+        self.sequential = False
+        self._buffered = False
+        self._results = None
+
+    def compile(self, inputs, is_train=True, use_graph=False, sequential=False):
+        """ Compile and initialize the model
+
+        This function will automatically derive the shape of parameters
+        in each sublayer based on the shape of input placeholders. It will
+        also do some settings.
+
+        Args:
+            inputs(list): the list of input tensors(placeholders)
+            is_train(bool): when is_trainis True, this model will enter
+            training mode, otherwise it will enter the evaluation mode
+            use_graph(bool): when use_graph is True, computational graph
+            will be used to train this model
+            sequential(bool): when sequential is True, model will execute ops
+            in the graph follow the order of joining the graph
+        """
+        assert len(inputs) > 0 and isinstance(inputs[0], Tensor), (
+            'compile function expects PlaceHolders or Tensors')
+
+        dev = inputs[0].device
+        dev.EnableGraph(True)
+        self.forward(*inputs)
+        dev.EnableGraph(False)
+        dev.ResetGraph()
+
+        autograd.training = is_train
+        self.training = is_train
+        self.graph_mode = use_graph
+        self.sequential = sequential
+
+    def forward(self, *input):
+        """Defines the computation performed in every forward propagation.
+
+        Should be overridden by all subclasses.
+
+        Args:
+            *input: the input training data for the model
+
+        Returns:
+            out: the outputs of the forward propagation.
+        """
+        raise NotImplementedError
+
+    def train_one_batch(self, *input, **kwargs):
+        """Defines the computation performed in every training iteration
+
+        Should be overridden by all subclasses.
+
+        Args:
+            *input: the arguments of train_one_batch
+            **kwargs: the keyword arguments of train_one_batch
+        """
+        raise NotImplementedError
+
+    def train(self, mode=True):
+        """Set the model in evaluation mode.
+
+        Args:
+            mode(bool): when mode is True, this model will enter training mode
+        """
+        self.training = mode
+        autograd.training = mode
+
+    def eval(self):
+        """Sets the model in evaluation mode.
+        """
+        self.train(mode=False)
+
+    def graph(self, mode=True, sequential=False):
+        """ Turn on the computational graph. Specify execution mode.
+
+        Args:
+            mode(bool): when mode is True, model will use computational graph
+            sequential(bool): when sequential is True, model will execute ops
+            in the graph follow the order of joining the graph
+        """
+        self.graph_mode = mode
+        self.sequential = sequential
+
+    def __get_name__(self):
+        return self.__class__.__name__
+
+    def __call__(self, *input, **kwargs):
+        if self.training:
+            return self.train_one_batch(*input, **kwargs)
+        else:
+            return self.forward(*input, **kwargs)
+
+    def save_states(self, fpath, aux_states={}):
+        """Save states.
+
+        Args:
+            fpath: output file path (without the extension)
+            aux_states(dict): values are standard data types or Tensor,
+                              e.g., epoch ID, learning rate, optimizer states
+        """
+        assert not os.path.isfile(fpath), (
+            "Failed to save states, %s is already existed." % fpath)
+
+        states = self.get_states()
+
+        # save states data and attr
+        tensor_dict = {}
+        states_attr = {}
+        for k, v in states.items():
+            assert isinstance(v, tensor.Tensor), "Only tensor state is allowed"
+            tensor_dict[k] = tensor.to_numpy(v)
+            states_attr[k] = {
+                'state_type': self.MODEL_STATE_TYPE,
+                'shape': v.shape,
+                'dtype': v.dtype
+            }
+
+        for k, v in aux_states.items():
+            assert isinstance(v,
+                              tensor.Tensor), "Only tensor aux state is allowed"
+            tensor_dict[k] = tensor.to_numpy(v)
+            states_attr[k] = {
+                'state_type': self.AUX_STATE_TYPE,
+                'shape': v.shape,
+                'dtype': v.dtype
+            }
+
+        # save to files
+        timestamp = time.time()
+        tmp_dir = '/tmp/singa_save_states_%s' % timestamp
+        os.mkdir(tmp_dir)
+        tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME
+        states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME
+
+        np.savez(tensor_dict_fp, **tensor_dict)
+
+        with open(states_attr_fp, 'w') as fp:
+            json.dump(states_attr, fp)
+
+        compression = zipfile.ZIP_DEFLATED
+        with zipfile.ZipFile(fpath, mode="w") as zf:
+            zf.write(tensor_dict_fp,
+                     os.path.basename(tensor_dict_fp),
+                     compress_type=compression)
+            zf.write(states_attr_fp,
+                     os.path.basename(states_attr_fp),
+                     compress_type=compression)
+
+        # clean up tmp files
+        os.remove(tensor_dict_fp)
+        os.remove(states_attr_fp)
+        os.rmdir(tmp_dir)
+
+    def load_states(self, fpath):
+        """Load the model states and auxiliary states from disk.
+
+        Usage:
+            m = MyModel()
+            m.compile(...)
+            aux_states = m.load_states('mymodel.zip')
+
+        Args:
+            path: input file path (without the extension)
+        Returns:
+            dict
+        """
+
+        assert os.path.isfile(fpath), (
+            "Failed to load states, %s is not exist." % fpath)
+
+        timestamp = time.time()
+        tmp_dir = '/tmp/singa_load_states_%s' % timestamp
+        os.mkdir(tmp_dir)
+
+        with zipfile.ZipFile(fpath, 'r') as zf:
+            zf.extractall(tmp_dir)
+
+        tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME
+        states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME
+
+        with open(states_attr_fp) as f:
+            states_attr = json.load(f)
+
+        tensor_dict = np.load(tensor_dict_fp)
+
+        # restore singa tensor from numpy
+        model_states = dict()
+        aux_states = dict()
+
+        for k in tensor_dict.files:
+            if states_attr[k]['state_type'] == self.MODEL_STATE_TYPE:
+                model_states[k] = tensor.from_numpy(tensor_dict[k])
+            elif states_attr[k]['state_type'] == self.AUX_STATE_TYPE:
+                aux_states[k] = tensor.from_numpy(tensor_dict[k])
+
+        # restore model_states
+        self.set_states(model_states)
+
+        # clean up tmp files
+        os.remove(tensor_dict_fp)
+        os.remove(states_attr_fp)
+        os.rmdir(tmp_dir)
+        return aux_states
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/run.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/run.sh
new file mode 100644
index 000000000..a536a1e81
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/run.sh
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#!/usr/bin/env python -W ignore::DeprecationWarning
+
+### mnist
+python train_cnn.py mlp mnist
+python train_cnn.py cnn mnist
+python train_cnn.py resnet mnist
+python train_cnn.py alexnet mnist
+
+### cifar10
+python train_cnn.py mlp cifar10
+python train_cnn.py cnn cifar10
+python train_cnn.py resnet cifar10
+python train_cnn.py alexnet cifar10
+
+### cifar100
+python train_cnn.py mlp cifar100
+python train_cnn.py cnn cifar100
+python train_cnn.py resnet cifar100
+python train_cnn.py alexnet cifar100
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_cnn.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_cnn.py
new file mode 100644
index 000000000..c17e1b6c4
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_cnn.py
@@ -0,0 +1,564 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import singa_wrap as singa
+from singa import device
+from singa import tensor
+from singa import opt
+from singa import autograd
+from singa.opt import Optimizer
+from singa.opt import DecayScheduler
+from singa.opt import Constant
+import numpy as np
+import time
+import argparse
+from PIL import Image
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+### MSOptimizer
+class MSOptimizer(Optimizer):
+    def __call__(self, loss):
+        pn_p_g_list = self.call_with_returns(loss)
+        self.step()
+        return pn_p_g_list
+
+    def call_with_returns(self, loss):
+        # print ("call_with_returns loss.data: \n", loss.data)
+        pn_p_g_list = []
+        for p, g in autograd.backward(loss):
+            if p.name is None:
+                p.name = id(p)
+            self.apply(p.name, p, g)
+            # print ("call with returns")
+            # print ("p.name: \n", p.name)
+            # print ("p.data: \n", p.data)
+            # print ("g.data: \n", g.data)
+            pn_p_g_list.append([p.name, p, g])  # need iterables
+        return pn_p_g_list
+
+# MSSGD -- actually no change of code
+class MSSGD(MSOptimizer):
+    """Implements stochastic gradient descent (optionally with momentum).
+
+    Nesterov momentum is based on the formula from `On the importance of initialization and momentum in deep learning`__.
+
+    Args:
+        lr(float): learning rate
+        momentum(float, optional): momentum factor(default: 0)
+        weight_decay(float, optional): weight decay(L2 penalty)(default: 0)
+        dampening(float, optional): dampening for momentum(default: 0)
+        nesterov(bool, optional): enables Nesterov momentum(default: False)
+
+    Typical usage example:
+        >> > from singa import opt
+        >> > optimizer = opt.SGD(lr=0.1, momentum=0.9)
+        >> > optimizer.update()
+
+    __ http: // www.cs.toronto.edu / %7Ehinton / absps / momentum.pdf
+
+    .. note::
+        The implementation of SGD with Momentum / Nesterov subtly differs from
+        Sutskever et. al. and implementations in some other frameworks.
+
+        Considering the specific case of Momentum, the update can be written as
+
+        .. math::
+                  v = \rho * v + g \\
+                  p = p - lr * v
+
+        where p, g, v and: math: `\rho` denote the parameters, gradient,
+        velocity, and momentum respectively.
+
+        This is in contrast to Sutskever et. al. and
+        other frameworks which employ an update of the form
+
+        .. math::
+             v = \rho * v + lr * g \\
+             p = p - v
+
+        The Nesterov version is analogously modified.
+    """
+
+    def __init__(self,
+                 lr=0.1,
+                 momentum=0,
+                 dampening=0,
+                 weight_decay=0,
+                 nesterov=False,
+                 dtype=tensor.float32):
+        super(MSSGD, self).__init__(lr, dtype)
+
+        # init momentum
+        if type(momentum) == float or type(momentum) == int:
+            if momentum < 0.0:
+                raise ValueError("Invalid momentum value: {}".format(momentum))
+            self.momentum = Constant(momentum)
+        elif isinstance(momentum, DecayScheduler):
+            self.momentum = momentum
+            momentum = momentum.init_value
+        else:
+            raise TypeError("Wrong momentum type")
+        self.mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+
+        # init dampening
+        if type(dampening) == float or type(dampening) == int:
+            self.dampening = Constant(dampening)
+        elif isinstance(dampening, DecayScheduler):
+            self.dampening = dampening
+            dampening = dampening.init_value
+        else:
+            raise TypeError("Wrong dampening type")
+        self.dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+
+        # init weight_decay
+        if type(weight_decay) == float or type(weight_decay) == int:
+            if weight_decay < 0.0:
+                raise ValueError(
+                    "Invalid weight_decay value: {}".format(weight_decay))
+            self.weight_decay = Constant(weight_decay)
+        elif isinstance(weight_decay, DecayScheduler):
+            self.weight_decay = weight_decay
+        else:
+            raise TypeError("Wrong weight_decay type")
+        self.decay_value = self.weight_decay(self.step_counter).as_type(
+            self.dtype)
+
+        # init other params
+        self.nesterov = nesterov
+        self.moments = dict()
+
+        # check value
+        if nesterov and (momentum <= 0 or dampening != 0):
+            raise ValueError(
+                "Nesterov momentum requires a momentum and zero dampening")
+
+    def apply(self, param_name, param_value, param_grad):
+        """Performs a single optimization step.
+
+        Args:
+                param_name(String): the name of the param
+                param_value(Tensor): param values to be update in-place
+                grad(Tensor): param gradients; the values may be updated
+                        in this function; cannot use it anymore
+        """
+        assert param_value.shape == param_grad.shape, ("shape mismatch",
+                                                       param_value.shape,
+                                                       param_grad.shape)
+        self.device_check(param_value, self.step_counter, self.lr_value,
+                          self.mom_value, self.dam_value, self.decay_value)
+
+        # derive dtype from input
+        assert param_value.dtype == self.dtype
+
+        # TODO add branch operator
+        # if self.decay_value != 0:
+        if self.weight_decay.init_value != 0:
+            singa.Axpy(self.decay_value.data, param_value.data, param_grad.data)
+
+        if self.momentum.init_value != 0:
+            if param_name not in self.moments:
+                flag = param_value.device.graph_enabled()
+                param_value.device.EnableGraph(False)
+                self.moments[param_name] = tensor.zeros_like(param_value)
+                param_value.device.EnableGraph(flag)
+
+            buf = self.moments[param_name]
+            buf *= self.mom_value
+            alpha = 1.0 - self.dam_value
+            singa.Axpy(alpha.data, param_grad.data, buf.data)
+
+            if self.nesterov:
+                singa.Axpy(self.mom_value.data, buf.data, param_grad.data)
+            else:
+                param_grad = buf
+
+        minus_lr = 0.0 - self.lr_value
+        singa.Axpy(minus_lr.data, param_grad.data, param_value.data)
+
+    def step(self):
+        # increment step counter, lr and moment
+        super().step()
+        mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        self.mom_value.copy_from(mom_value)
+        self.dam_value.copy_from(dam_value)
+        self.decay_value.copy_from(decay_value)
+
+    def get_states(self):
+        states = super().get_states()
+        if self.mom_value > 0:
+            states[
+                'moments'] = self.moments  # a dict for 1st order moments tensors
+        return states
+
+    def set_states(self, states):
+        super().set_states(states)
+        if 'moments' in states:
+            self.moments = states['moments']
+            self.mom_value = self.momentum(self.step_counter)
+
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                                    offset[0]:offset[0] + x.shape[2],
+                                    offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                                     dtype=np.float32)
+    return X
+
+
+def run(global_rank,
+        world_size,
+        local_rank,
+        max_epoch,
+        batch_size,
+        model,
+        data,
+        mssgd,
+        graph,
+        verbosity,
+        dist_option='plain',
+        spars=None,
+        precision='float32'):
+    # dev = device.create_cuda_gpu_on(local_rank)  # need to change to CPU device for CPU-only machines
+    dev = device.get_default_device()
+    dev.SetRandSeed(0)
+    np.random.seed(0)
+
+    if data == 'cifar10':
+        from data import cifar10
+        train_x, train_y, val_x, val_y = cifar10.load()
+    elif data == 'cifar100':
+        from data import cifar100
+        train_x, train_y, val_x, val_y = cifar100.load()
+    elif data == 'mnist':
+        from data import mnist
+        train_x, train_y, val_x, val_y = mnist.load()
+
+
+    num_channels = train_x.shape[1]
+    image_size = train_x.shape[2]
+    data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
+    num_classes = (np.max(train_y) + 1).item()
+
+    if model == 'resnet':
+        from model import resnet
+        model = resnet.resnet50(num_channels=num_channels,
+                                num_classes=num_classes)
+    elif model == 'xceptionnet':
+        from model import xceptionnet
+        model = xceptionnet.create_model(num_channels=num_channels,
+                                         num_classes=num_classes)
+    elif model == 'cnn':
+        from model import cnn
+        model = cnn.create_model(num_channels=num_channels,
+                                 num_classes=num_classes)
+    elif model == 'alexnet':
+        from model import alexnet
+        model = alexnet.create_model(num_channels=num_channels,
+                                     num_classes=num_classes)
+    elif model == 'mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from mlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes)
+    
+    elif model == 'msmlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from msmlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes)
+
+    # For distributed training, sequential has better performance
+    if hasattr(mssgd, "communicator"):
+        DIST = True
+        sequential = True
+    else:
+        DIST = False
+        sequential = False
+
+    if DIST:
+        train_x, train_y, val_x, val_y = partition(global_rank, world_size,
+                                                   train_x, train_y, val_x,
+                                                   val_y)
+
+    if model.dimension == 4:
+        tx = tensor.Tensor(
+            (batch_size, num_channels, model.input_size, model.input_size), dev,
+            singa_dtype[precision])
+    elif model.dimension == 2:
+        tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+        np.reshape(train_x, (train_x.shape[0], -1))
+        np.reshape(val_x, (val_x.shape[0], -1))
+
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_val_batch = val_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    # Attach model to graph
+    model.set_optimizer(mssgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+    dev.SetVerbosity(verbosity)
+
+    # Training and evaluation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if global_rank == 0:
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        model.train()
+        print ("num_train_batch: \n", num_train_batch)
+        print ()
+        for b in range(num_train_batch):
+            if b % 200 == 0:
+                print ("b: \n", b)
+            # Generate the patch data in this iteration
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            if model.dimension == 4:
+                x = augmentation(x, batch_size)
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+
+            synflow_flag = False
+            # Train the model
+            if epoch == (max_epoch - 1) and b == (num_train_batch - 1):  ### synflow calcuation for the last batch
+                print ("last epoch calculate synflow")
+                synflow_flag = True
+                ### step 1: all one input
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(np.ones(x.shape, dtype=np.float32))
+                ty.copy_from_numpy(y)
+                ### step 2: all weights turned to positive (done)
+                ### step 3: new loss (done)
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                ### step 4: calculate the multiplication of weights
+                synflow_score = 0.0
+                for pn_p_g_item in pn_p_g_list:
+                    print ("calculate weight param * grad parameter name: \n", pn_p_g_item[0])
+                    if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
+                        print ("pn_p_g_item[1].shape: \n", pn_p_g_item[1].shape)
+                        synflow_score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1]) * tensor.to_numpy(pn_p_g_item[2])))
+                print ("synflow_score: \n", synflow_score)
+            elif epoch == (max_epoch - 1) and b == (num_train_batch - 2): # all weights turned to positive
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+                # all params turned to positive
+                for pn_p_g_item in pn_p_g_list:
+                    print ("absolute value parameter name: \n", pn_p_g_item[0])
+                    pn_p_g_item[1] = tensor.abs(pn_p_g_item[1])  # tensor actually ...
+            else:  # normal train steps
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                # print ("normal before model(tx, ty, synflow_flag, dist_option, spars)")
+                # print ("train_cnn tx: \n", tx)
+                # print ("train_cnn ty: \n", ty)
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                # print ("normal after model(tx, ty, synflow_flag, dist_option, spars)")
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, mssgd, reducer)
+            train_loss = reduce_variable(train_loss, mssgd, reducer)
+
+        if global_rank == 0:
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        model.eval()
+        for b in range(num_val_batch):
+            x = val_x[b * batch_size:(b + 1) * batch_size]
+            if model.dimension == 4:
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = val_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, mssgd, reducer)
+
+        # Output the evaluation accuracy
+        if global_rank == 0:
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_val_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+    dev.PrintTimeProfiling()
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument(
+        'model',
+        choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'msmlp', 'alexnet'],
+        default='cnn')
+    parser.add_argument('data',
+                        choices=['mnist', 'cifar10', 'cifar100'],
+                        default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=3,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    run(0,
+        1,
+        args.device_id,
+        args.max_epoch,
+        args.batch_size,
+        args.model,
+        args.data,
+        mssgd,
+        args.graph,
+        args.verbosity,
+        precision=args.precision)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_mpi.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_mpi.py
new file mode 100644
index 000000000..563d4b2c5
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_mpi.py
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+
+from singa import singa_wrap as singa
+from singa import opt
+from singa import tensor
+import argparse
+import train_cnn
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument('model',
+                        choices=['cnn', 'resnet', 'xceptionnet', 'mlp'],
+                        default='cnn')
+    parser.add_argument('data', choices=['mnist', 'cifar10', 'cifar100'], default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=10,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    parser.add_argument('-d',
+                        '--dist-option',
+                        default='plain',
+                        choices=['plain','half','partialUpdate','sparseTopK','sparseThreshold'],
+                        help='distibuted training options',
+                        dest='dist_option')  # currently partialUpdate support graph=False only
+    parser.add_argument('-s',
+                        '--sparsification',
+                        default='0.05',
+                        type=float,
+                        help='the sparsity parameter used for sparsification, between 0 to 1',
+                        dest='spars')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    sgd = opt.DistOpt(sgd)
+
+    train_cnn.run(sgd.global_rank, sgd.world_size, sgd.local_rank, args.max_epoch,
+              args.batch_size, args.model, args.data, sgd, args.graph,
+              args.verbosity, args.dist_option, args.spars, args.precision)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_ms_model.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_ms_model.py
new file mode 100644
index 000000000..8cdda8fe1
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_ms_model.py
@@ -0,0 +1,592 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import singa_wrap as singa
+from singa import device
+from singa import tensor
+from singa import opt
+from singa import autograd
+from singa.opt import Optimizer
+from singa.opt import DecayScheduler
+from singa.opt import Constant
+import numpy as np
+import time
+import argparse
+from PIL import Image
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+### MSOptimizer
+class MSOptimizer(Optimizer):
+    def __call__(self, loss):
+        pn_p_g_list = self.call_with_returns(loss)
+        self.step()
+        return pn_p_g_list
+
+    def call_with_returns(self, loss):
+        # print ("call_with_returns loss.data: \n", loss.data)
+        pn_p_g_list = []
+        for p, g in autograd.backward(loss):
+            if p.name is None:
+                p.name = id(p)
+            self.apply(p.name, p, g)
+            # print ("call with returns")
+            # print ("p.name: \n", p.name)
+            # print ("p.data: \n", p.data)
+            # print ("g.data: \n", g.data)
+            pn_p_g_list.append([p.name, p, g])  # need iterables
+        return pn_p_g_list
+
+# MSSGD -- actually no change of code
+class MSSGD(MSOptimizer):
+    """Implements stochastic gradient descent (optionally with momentum).
+
+    Nesterov momentum is based on the formula from `On the importance of initialization and momentum in deep learning`__.
+
+    Args:
+        lr(float): learning rate
+        momentum(float, optional): momentum factor(default: 0)
+        weight_decay(float, optional): weight decay(L2 penalty)(default: 0)
+        dampening(float, optional): dampening for momentum(default: 0)
+        nesterov(bool, optional): enables Nesterov momentum(default: False)
+
+    Typical usage example:
+        >> > from singa import opt
+        >> > optimizer = opt.SGD(lr=0.1, momentum=0.9)
+        >> > optimizer.update()
+
+    __ http: // www.cs.toronto.edu / %7Ehinton / absps / momentum.pdf
+
+    .. note::
+        The implementation of SGD with Momentum / Nesterov subtly differs from
+        Sutskever et. al. and implementations in some other frameworks.
+
+        Considering the specific case of Momentum, the update can be written as
+
+        .. math::
+                  v = \rho * v + g \\
+                  p = p - lr * v
+
+        where p, g, v and: math: `\rho` denote the parameters, gradient,
+        velocity, and momentum respectively.
+
+        This is in contrast to Sutskever et. al. and
+        other frameworks which employ an update of the form
+
+        .. math::
+             v = \rho * v + lr * g \\
+             p = p - v
+
+        The Nesterov version is analogously modified.
+    """
+
+    def __init__(self,
+                 lr=0.1,
+                 momentum=0,
+                 dampening=0,
+                 weight_decay=0,
+                 nesterov=False,
+                 dtype=tensor.float32):
+        super(MSSGD, self).__init__(lr, dtype)
+
+        # init momentum
+        if type(momentum) == float or type(momentum) == int:
+            if momentum < 0.0:
+                raise ValueError("Invalid momentum value: {}".format(momentum))
+            self.momentum = Constant(momentum)
+        elif isinstance(momentum, DecayScheduler):
+            self.momentum = momentum
+            momentum = momentum.init_value
+        else:
+            raise TypeError("Wrong momentum type")
+        self.mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+
+        # init dampening
+        if type(dampening) == float or type(dampening) == int:
+            self.dampening = Constant(dampening)
+        elif isinstance(dampening, DecayScheduler):
+            self.dampening = dampening
+            dampening = dampening.init_value
+        else:
+            raise TypeError("Wrong dampening type")
+        self.dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+
+        # init weight_decay
+        if type(weight_decay) == float or type(weight_decay) == int:
+            if weight_decay < 0.0:
+                raise ValueError(
+                    "Invalid weight_decay value: {}".format(weight_decay))
+            self.weight_decay = Constant(weight_decay)
+        elif isinstance(weight_decay, DecayScheduler):
+            self.weight_decay = weight_decay
+        else:
+            raise TypeError("Wrong weight_decay type")
+        self.decay_value = self.weight_decay(self.step_counter).as_type(
+            self.dtype)
+
+        # init other params
+        self.nesterov = nesterov
+        self.moments = dict()
+
+        # check value
+        if nesterov and (momentum <= 0 or dampening != 0):
+            raise ValueError(
+                "Nesterov momentum requires a momentum and zero dampening")
+
+    def apply(self, param_name, param_value, param_grad):
+        """Performs a single optimization step.
+
+        Args:
+                param_name(String): the name of the param
+                param_value(Tensor): param values to be update in-place
+                grad(Tensor): param gradients; the values may be updated
+                        in this function; cannot use it anymore
+        """
+        assert param_value.shape == param_grad.shape, ("shape mismatch",
+                                                       param_value.shape,
+                                                       param_grad.shape)
+        self.device_check(param_value, self.step_counter, self.lr_value,
+                          self.mom_value, self.dam_value, self.decay_value)
+
+        # derive dtype from input
+        assert param_value.dtype == self.dtype
+
+        # TODO add branch operator
+        # if self.decay_value != 0:
+        if self.weight_decay.init_value != 0:
+            singa.Axpy(self.decay_value.data, param_value.data, param_grad.data)
+
+        if self.momentum.init_value != 0:
+            if param_name not in self.moments:
+                flag = param_value.device.graph_enabled()
+                param_value.device.EnableGraph(False)
+                self.moments[param_name] = tensor.zeros_like(param_value)
+                param_value.device.EnableGraph(flag)
+
+            buf = self.moments[param_name]
+            buf *= self.mom_value
+            alpha = 1.0 - self.dam_value
+            singa.Axpy(alpha.data, param_grad.data, buf.data)
+
+            if self.nesterov:
+                singa.Axpy(self.mom_value.data, buf.data, param_grad.data)
+            else:
+                param_grad = buf
+
+        minus_lr = 0.0 - self.lr_value
+        singa.Axpy(minus_lr.data, param_grad.data, param_value.data)
+
+    def step(self):
+        # increment step counter, lr and moment
+        super().step()
+        mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        self.mom_value.copy_from(mom_value)
+        self.dam_value.copy_from(dam_value)
+        self.decay_value.copy_from(decay_value)
+
+    def get_states(self):
+        states = super().get_states()
+        if self.mom_value > 0:
+            states[
+                'moments'] = self.moments  # a dict for 1st order moments tensors
+        return states
+
+    def set_states(self, states):
+        super().set_states(states)
+        if 'moments' in states:
+            self.moments = states['moments']
+            self.mom_value = self.momentum(self.step_counter)
+
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                                    offset[0]:offset[0] + x.shape[2],
+                                    offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                                     dtype=np.float32)
+    return X
+
+
+def run(global_rank,
+        world_size,
+        local_rank,
+        layer_hidden_list,
+        max_epoch,
+        batch_size,
+        model,
+        data,
+        mssgd,
+        graph,
+        verbosity,
+        dist_option='plain',
+        spars=None,
+        precision='float32'):
+    # dev = device.create_cuda_gpu_on(local_rank)  # need to change to CPU device for CPU-only machines
+    dev = device.get_default_device()
+    dev.SetRandSeed(0)
+    np.random.seed(0)
+
+    if data == 'cifar10':
+        from data import cifar10
+        train_x, train_y, val_x, val_y = cifar10.load()
+    elif data == 'cifar100':
+        from data import cifar100
+        train_x, train_y, val_x, val_y = cifar100.load()
+    elif data == 'mnist':
+        from data import mnist
+        train_x, train_y, val_x, val_y = mnist.load()
+
+
+    num_channels = train_x.shape[1]
+    image_size = train_x.shape[2]
+    data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
+    num_classes = (np.max(train_y) + 1).item()
+
+    if model == 'resnet':
+        from model import resnet
+        model = resnet.resnet50(num_channels=num_channels,
+                                num_classes=num_classes)
+    elif model == 'xceptionnet':
+        from model import xceptionnet
+        model = xceptionnet.create_model(num_channels=num_channels,
+                                         num_classes=num_classes)
+    elif model == 'cnn':
+        from model import cnn
+        model = cnn.create_model(num_channels=num_channels,
+                                 num_classes=num_classes)
+    elif model == 'alexnet':
+        from model import alexnet
+        model = alexnet.create_model(num_channels=num_channels,
+                                     num_classes=num_classes)
+    elif model == 'mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from mlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes)
+    
+    elif model == 'msmlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from msmlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes)
+
+    elif model == 'ms_model_mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from ms_model_mlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes, 
+                                    layer_hidden_list=layer_hidden_list)
+    # print ("model: \n", model)
+
+    # For distributed training, sequential has better performance
+    if hasattr(mssgd, "communicator"):
+        DIST = True
+        sequential = True
+    else:
+        DIST = False
+        sequential = False
+
+    if DIST:
+        train_x, train_y, val_x, val_y = partition(global_rank, world_size,
+                                                   train_x, train_y, val_x,
+                                                   val_y)
+
+    if model.dimension == 4:
+        tx = tensor.Tensor(
+            (batch_size, num_channels, model.input_size, model.input_size), dev,
+            singa_dtype[precision])
+    elif model.dimension == 2:
+        tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+        np.reshape(train_x, (train_x.shape[0], -1))
+        np.reshape(val_x, (val_x.shape[0], -1))
+
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_val_batch = val_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    # Attach model to graph
+    model.set_optimizer(mssgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+    dev.SetVerbosity(verbosity)
+
+    # Training and evaluation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if global_rank == 0:
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        model.train()
+        print ("num_train_batch: \n", num_train_batch)
+        print ()
+        for b in range(num_train_batch):
+            # if b % 200 == 0:
+            #     print ("b: \n", b)
+            # Generate the patch data in this iteration
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            if model.dimension == 4:
+                x = augmentation(x, batch_size)
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+
+            synflow_flag = False
+            # Train the model
+            if epoch == (max_epoch - 1) and b == (num_train_batch - 1):  ### synflow calcuation for the last batch
+                print ("last epoch calculate synflow")
+                synflow_flag = True
+                ### step 1: all one input
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(np.ones(x.shape, dtype=np.float32))
+                ty.copy_from_numpy(y)
+                ### step 2: all weights turned to positive (done)
+                ### step 3: new loss (done)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                ### step 4: calculate the multiplication of weights
+                synflow_score = 0.0
+                for pn_p_g_item in pn_p_g_list:
+                    print ("calculate weight param * grad parameter name: \n", pn_p_g_item[0])
+                    if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
+                        print ("pn_p_g_item[1].shape: \n", pn_p_g_item[1].shape)
+                        synflow_score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1]) * tensor.to_numpy(pn_p_g_item[2])))
+                print ("layer_hidden_list: \n", layer_hidden_list)
+                print ("synflow_score: \n", synflow_score)
+            elif epoch == (max_epoch - 1) and b == (num_train_batch - 2): # all weights turned to positive
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+                # all params turned to positive
+                for pn_p_g_item in pn_p_g_list:
+                    print ("absolute value parameter name: \n", pn_p_g_item[0])
+                    pn_p_g_item[1] = tensor.abs(pn_p_g_item[1])  # tensor actually ...
+            else:  # normal train steps
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                # print ("normal before model(tx, ty, synflow_flag, dist_option, spars)")
+                # print ("train_cnn tx: \n", tx)
+                # print ("train_cnn ty: \n", ty)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                # print ("normal after model(tx, ty, synflow_flag, dist_option, spars)")
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, mssgd, reducer)
+            train_loss = reduce_variable(train_loss, mssgd, reducer)
+
+        if global_rank == 0:
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        model.eval()
+        for b in range(num_val_batch):
+            x = val_x[b * batch_size:(b + 1) * batch_size]
+            if model.dimension == 4:
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = val_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, mssgd, reducer)
+
+        # Output the evaluation accuracy
+        if global_rank == 0:
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_val_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+    dev.PrintTimeProfiling()
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument(
+        'model',
+        choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'msmlp', 'alexnet', 'ms_model_mlp'],
+        default='cnn')
+    parser.add_argument('data',
+                        choices=['mnist', 'cifar10', 'cifar100'],
+                        default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=2,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    # mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+
+    DEFAULT_LAYER_CHOICES_4 = [8, 16, 24, 32]
+    for layer1 in DEFAULT_LAYER_CHOICES_4:
+        for layer2 in DEFAULT_LAYER_CHOICES_4:
+            for layer3 in DEFAULT_LAYER_CHOICES_4:
+                for layer4 in DEFAULT_LAYER_CHOICES_4:
+                    layer_hidden_list = [layer1, layer2+1, layer3+2, layer4+3]
+                    # print ("layer_hidden_list: \n", layer_hidden_list)
+                    mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+                    run(0,
+                        1,
+                        args.device_id,
+                        layer_hidden_list,
+                        args.max_epoch,
+                        args.batch_size,
+                        args.model,
+                        args.data,
+                        mssgd,
+                        args.graph,
+                        args.verbosity,
+                        precision=args.precision)
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_multiprocess.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_multiprocess.py
new file mode 100644
index 000000000..182dd35ee
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_multiprocess.py
@@ -0,0 +1,111 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+
+from singa import singa_wrap as singa
+from singa import opt
+from singa import tensor
+import argparse
+import train_cnn
+import multiprocessing
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+def run(args, local_rank, world_size, nccl_id):
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    sgd = opt.DistOpt(sgd, nccl_id=nccl_id, local_rank=local_rank, world_size=world_size)
+    train_cnn.run(sgd.global_rank, sgd.world_size, sgd.local_rank, args.max_epoch,
+              args.batch_size, args.model, args.data, sgd, args.graph,
+              args.verbosity, args.dist_option, args.spars, args.precision)
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument('model',
+                        choices=['resnet', 'xceptionnet', 'cnn', 'mlp'],
+                        default='cnn')
+    parser.add_argument('data', choices=['cifar10', 'cifar100', 'mnist'], default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=10,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    parser.add_argument('-w',
+                        '--world-size',
+                        default=2,
+                        type=int,
+                        help='number of gpus to be used',
+                        dest='world_size')
+    parser.add_argument('-d',
+                        '--dist-option',
+                        default='plain',
+                        choices=['plain','half','partialUpdate','sparseTopK','sparseThreshold'],
+                        help='distibuted training options',
+                        dest='dist_option') # currently partialUpdate support graph=False only
+    parser.add_argument('-s',
+                        '--sparsification',
+                        default='0.05',
+                        type=float,
+                        help='the sparsity parameter used for sparsification, between 0 to 1',
+                        dest='spars')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    # Generate a NCCL ID to be used for collective communication
+    nccl_id = singa.NcclIdHolder()
+
+    process = []
+    for local_rank in range(0, args.world_size):
+        process.append(
+            multiprocessing.Process(target=run,
+                                    args=(args, local_rank, args.world_size, nccl_id)))
+
+    for p in process:
+        p.start()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/model.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/model.py
new file mode 100644
index 000000000..454b382d5
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/model.py
@@ -0,0 +1,226 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+from singa.autograd import Operator
+from singa.layer import Layer
+from singa import singa_wrap as singa
+import argparse
+import numpy as np
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+#### self-defined loss begin
+
+### from autograd.py
+class SumError(Operator):
+
+    def __init__(self):
+        super(SumError, self).__init__()
+        # self.t = t.data
+
+    def forward(self, x):
+        # self.err = singa.__sub__(x, self.t)
+        self.data_x = x
+        # sqr = singa.Square(self.err)
+        # loss = singa.SumAll(sqr)
+        loss = singa.SumAll(x)
+        # self.n = 1
+        # for s in x.shape():
+        #     self.n *= s
+        # loss /= self.n
+        return loss
+
+    def backward(self, dy=1.0):
+        # dx = self.err
+        dev = device.get_default_device()
+        dx = tensor.Tensor(self.data_x.shape, dev, singa_dtype['float32'])
+        dx.copy_from_numpy(np.ones(self.data_x.shape))
+        # dx *= float(2 / self.n)
+        dx *= dy
+        return dx
+
+def se_loss(x):
+    # assert x.shape == t.shape, "input and target shape different: %s, %s" % (
+    #     x.shape, t.shape)
+    return SumError()(x)[0]
+
+### from layer.py
+class SumErrorLayer(Layer):
+    """
+    Generate a MeanSquareError operator
+    """
+
+    def __init__(self):
+        super(SumErrorLayer, self).__init__()
+
+    def forward(self, x):
+        return se_loss(x)
+
+#### self-defined loss end
+
+class MSMLP(model.Model):
+
+    def __init__(self, data_size=10, perceptron_size=100, num_classes=10, layer_hidden_list=[10,10,10,10]):
+        super(MSMLP, self).__init__()
+        self.num_classes = num_classes
+        self.dimension = 2
+
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(layer_hidden_list[0])
+        self.linear2 = layer.Linear(layer_hidden_list[1])
+        self.linear3 = layer.Linear(layer_hidden_list[2])
+        self.linear4 = layer.Linear(layer_hidden_list[3])
+        self.linear5 = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+        self.sum_error = SumErrorLayer()
+    
+    def forward(self, inputs):
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        y = self.relu(y)
+        y = self.linear3(y)
+        y = self.relu(y)
+        y = self.linear4(y)
+        y = self.relu(y)
+        y = self.linear5(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars, synflow_flag):
+        # print ("in train_one_batch")
+        out = self.forward(x)
+        # print ("train_one_batch x.data: \n", x.data)
+        # print ("train_one_batch y.data: \n", y.data)
+        # print ("train_one_batch out.data: \n", out.data)
+        if synflow_flag:
+            # print ("sum_error")
+            loss = self.sum_error(out)
+        else:  # normal training
+            # print ("softmax_cross_entropy")
+            loss = self.softmax_cross_entropy(out, y)
+        # print ("train_one_batch loss.data: \n", loss.data)
+
+        if dist_option == 'plain':
+            # print ("before pn_p_g_list = self.optimizer(loss)")
+            pn_p_g_list = self.optimizer(loss)
+            # print ("after pn_p_g_list = self.optimizer(loss)")
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        # print ("len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0]))
+        # print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0])
+        # print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data)
+        # print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data)
+        return pn_p_g_list, out, loss
+        # return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+    
+    Returns:
+        The created CNN model.
+    """
+    model = MSMLP(**kwargs)
+
+    return model
+
+
+__all__ = ['MLP', 'create_model']
+
+if __name__ == "__main__":
+    np.random.seed(0)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # choose one precision
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)
+
+    dev = device.create_cuda_gpu_on(0)
+    sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
+    tx = tensor.Tensor((400, 2), dev, precision)
+    ty = tensor.Tensor((400,), dev, tensor.int32)
+    model = MLP(data_size=2, perceptron_size=3, num_classes=2)
+
+    # attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
+    model.train()
+
+    for i in range(args.max_epoch):
+        tx.copy_from_numpy(data)
+        ty.copy_from_numpy(label)
+        out, loss = model(tx, ty, 'fp32', spars=None)
+
+        if i % 100 == 0:
+            print("training loss = ", tensor.to_numpy(loss)[0])
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/native.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/native.py
new file mode 100644
index 000000000..a82ec3b24
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/native.py
@@ -0,0 +1,137 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import tensor
+from singa.tensor import Tensor
+from singa import autograd
+from singa import opt
+import numpy as np
+from singa import device
+import argparse
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    np.random.seed(0)
+
+    autograd.training = True
+
+    # prepare training data in numpy array
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)])
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32)
+
+    def to_categorical(y, num_classes):
+        """
+        Converts a class vector (integers) to binary class matrix.
+
+        Args:
+            y: class vector to be converted into a matrix
+                (integers from 0 to num_classes).
+            num_classes: total number of classes.
+
+        Returns:
+            A binary matrix representation of the input.
+        """
+        y = np.array(y, dtype="int")
+        n = y.shape[0]
+        categorical = np.zeros((n, num_classes))
+        categorical[np.arange(n), y] = 1
+        return categorical
+
+    label = to_categorical(label, 2).astype(np.float32)
+    print("train_data_shape:", data.shape)
+    print("train_label_shape:", label.shape)
+
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    dev = device.create_cuda_gpu()
+
+    inputs = Tensor(data=data, device=dev)
+    target = Tensor(data=label, device=dev)
+
+    inputs = inputs.as_type(precision)
+    target = target.as_type(tensor.int32)
+
+    w0_np = np.random.normal(0, 0.1, (2, 3)).astype(np_precision)
+    w0 = Tensor(data=w0_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0 = Tensor(shape=(3,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0.set_value(0.0)
+
+    w1_np = np.random.normal(0, 0.1, (3, 2)).astype(np_precision)
+    w1 = Tensor(data=w1_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1 = Tensor(shape=(2,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1.set_value(0.0)
+
+    sgd = opt.SGD(0.05, 0.8)
+
+    # training process
+    for i in range(args.max_epoch):
+        x = autograd.matmul(inputs, w0)
+        x = autograd.add_bias(x, b0)
+        x = autograd.relu(x)
+        x = autograd.matmul(x, w1)
+        x = autograd.add_bias(x, b1)
+        loss = autograd.softmax_cross_entropy(x, target)
+        sgd(loss)
+
+        if i % 100 == 0:
+            print("%d, training loss = " % i, tensor.to_numpy(loss)[0])
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/model.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/model.py
new file mode 100644
index 000000000..c0f0b7b4e
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/model.py
@@ -0,0 +1,217 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+from singa.autograd import Operator
+from singa.layer import Layer
+from singa import singa_wrap as singa
+import argparse
+import numpy as np
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+#### self-defined loss begin
+
+### from autograd.py
+class SumError(Operator):
+
+    def __init__(self):
+        super(SumError, self).__init__()
+        # self.t = t.data
+
+    def forward(self, x):
+        # self.err = singa.__sub__(x, self.t)
+        self.data_x = x
+        # sqr = singa.Square(self.err)
+        # loss = singa.SumAll(sqr)
+        loss = singa.SumAll(x)
+        # self.n = 1
+        # for s in x.shape():
+        #     self.n *= s
+        # loss /= self.n
+        return loss
+
+    def backward(self, dy=1.0):
+        # dx = self.err
+        dev = device.get_default_device()
+        dx = tensor.Tensor(self.data_x.shape, dev, singa_dtype['float32'])
+        dx.copy_from_numpy(np.ones(self.data_x.shape))
+        # dx *= float(2 / self.n)
+        dx *= dy
+        return dx
+
+def se_loss(x):
+    # assert x.shape == t.shape, "input and target shape different: %s, %s" % (
+    #     x.shape, t.shape)
+    return SumError()(x)[0]
+
+### from layer.py
+class SumErrorLayer(Layer):
+    """
+    Generate a MeanSquareError operator
+    """
+
+    def __init__(self):
+        super(SumErrorLayer, self).__init__()
+
+    def forward(self, x):
+        return se_loss(x)
+
+#### self-defined loss end
+
+class MSMLP(model.Model):
+
+    def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
+        super(MSMLP, self).__init__()
+        self.num_classes = num_classes
+        self.dimension = 2
+
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(perceptron_size)
+        self.linear2 = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+        self.sum_error = SumErrorLayer()
+    
+    def forward(self, inputs):
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars, synflow_flag):
+        # print ("in train_one_batch")
+        out = self.forward(x)
+        # print ("train_one_batch x.data: \n", x.data)
+        # print ("train_one_batch y.data: \n", y.data)
+        # print ("train_one_batch out.data: \n", out.data)
+        if synflow_flag:
+            # print ("sum_error")
+            loss = self.sum_error(out)
+        else:  # normal training
+            # print ("softmax_cross_entropy")
+            loss = self.softmax_cross_entropy(out, y)
+        # print ("train_one_batch loss.data: \n", loss.data)
+
+        if dist_option == 'plain':
+            # print ("before pn_p_g_list = self.optimizer(loss)")
+            pn_p_g_list = self.optimizer(loss)
+            # print ("after pn_p_g_list = self.optimizer(loss)")
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        # print ("len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0]))
+        # print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0])
+        # print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data)
+        # print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data)
+        return pn_p_g_list, out, loss
+        # return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+    
+    Returns:
+        The created CNN model.
+    """
+    model = MSMLP(**kwargs)
+
+    return model
+
+
+__all__ = ['MLP', 'create_model']
+
+if __name__ == "__main__":
+    np.random.seed(0)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # choose one precision
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)
+
+    dev = device.create_cuda_gpu_on(0)
+    sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
+    tx = tensor.Tensor((400, 2), dev, precision)
+    ty = tensor.Tensor((400,), dev, tensor.int32)
+    model = MLP(data_size=2, perceptron_size=3, num_classes=2)
+
+    # attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
+    model.train()
+
+    for i in range(args.max_epoch):
+        tx.copy_from_numpy(data)
+        ty.copy_from_numpy(label)
+        out, loss = model(tx, ty, 'fp32', spars=None)
+
+        if i % 100 == 0:
+            print("training loss = ", tensor.to_numpy(loss)[0])
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/native.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/native.py
new file mode 100644
index 000000000..a82ec3b24
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/native.py
@@ -0,0 +1,137 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import tensor
+from singa.tensor import Tensor
+from singa import autograd
+from singa import opt
+import numpy as np
+from singa import device
+import argparse
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    np.random.seed(0)
+
+    autograd.training = True
+
+    # prepare training data in numpy array
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)])
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32)
+
+    def to_categorical(y, num_classes):
+        """
+        Converts a class vector (integers) to binary class matrix.
+
+        Args:
+            y: class vector to be converted into a matrix
+                (integers from 0 to num_classes).
+            num_classes: total number of classes.
+
+        Returns:
+            A binary matrix representation of the input.
+        """
+        y = np.array(y, dtype="int")
+        n = y.shape[0]
+        categorical = np.zeros((n, num_classes))
+        categorical[np.arange(n), y] = 1
+        return categorical
+
+    label = to_categorical(label, 2).astype(np.float32)
+    print("train_data_shape:", data.shape)
+    print("train_label_shape:", label.shape)
+
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    dev = device.create_cuda_gpu()
+
+    inputs = Tensor(data=data, device=dev)
+    target = Tensor(data=label, device=dev)
+
+    inputs = inputs.as_type(precision)
+    target = target.as_type(tensor.int32)
+
+    w0_np = np.random.normal(0, 0.1, (2, 3)).astype(np_precision)
+    w0 = Tensor(data=w0_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0 = Tensor(shape=(3,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0.set_value(0.0)
+
+    w1_np = np.random.normal(0, 0.1, (3, 2)).astype(np_precision)
+    w1 = Tensor(data=w1_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1 = Tensor(shape=(2,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1.set_value(0.0)
+
+    sgd = opt.SGD(0.05, 0.8)
+
+    # training process
+    for i in range(args.max_epoch):
+        x = autograd.matmul(inputs, w0)
+        x = autograd.add_bias(x, b0)
+        x = autograd.relu(x)
+        x = autograd.matmul(x, w1)
+        x = autograd.add_bias(x, b1)
+        loss = autograd.softmax_cross_entropy(x, target)
+        sgd(loss)
+
+        if i % 100 == 0:
+            print("%d, training loss = " % i, tensor.to_numpy(loss)[0])
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/vote.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/vote.py
new file mode 100644
index 000000000..1f1ee1f39
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/vote.py
@@ -0,0 +1,133 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.eva_engine.phase1.algo.alg_base import Evaluator
+from .utils.autograd_hacks import *
+from src.common.constant import Config
+
+class VoteEvaluator(Evaluator):
+
+    def __init__(self):
+        super().__init__()
+
+    def evaluate(self, arch: nn.Module, device, batch_data: object, batch_labels: torch.Tensor, space_name: str) -> float:
+        """
+        This is simply sum over all weigth's norm to calculate models performance
+        :param arch:
+        :param device: CPU or GPU
+        :param batch_data:
+        :param batch_labels:
+        :return:
+        """
+
+        pass
+
+
+def vote_between_two_arch(arch1_info: dict, arch2_info: dict, metric: list, space: str):
+    """
+    Return which architecture is better,
+    :param arch1_info:
+    :param arch2_info:
+    :param metric:
+    :param space:
+    :return:
+    """
+    left_vote = 0
+    right_vote = 0
+    for m_name in metric:
+        # if this metrics vote to left
+        if vote_to_left[space](m_name,
+                               float(arch1_info["scores"][m_name]["score"]),
+                               float(arch2_info["scores"][m_name]["score"])):
+            left_vote += 1
+        else:
+            right_vote += 1
+
+    if left_vote > right_vote:
+        return arch1_info["architecture_id"]
+    else:
+        return arch2_info["architecture_id"]
+
+
+def compare_score_201(m_name: str, s1: float, s2: float) -> bool:
+    """
+    Return if s1 is better than s2,
+    :param m_name:
+    :param s1:
+    :param s2:
+    :return: if s1 is better than s2
+    """
+    if m_name == "grad_norm":
+        return s1 > s2
+    if m_name == "grad_plain":
+        return s1 < s2
+    if m_name == "ntk_cond_num":
+        return s1 < s2
+    if m_name == "ntk_trace":
+        return s1 > s2
+    if m_name == "ntk_trace_approx":
+        return s1 > s2
+    if m_name == "fisher":
+        return s1 > s2
+    if m_name == "grasp":
+        return s1 > s2
+    if m_name == "snip":
+        return s1 > s2
+    if m_name == "synflow":
+        return s1 > s2
+    if m_name == "weight_norm":
+        return s1 > s2
+    if m_name == "nas_wot":
+        return s1 > s2
+
+
+def compare_score_101(m_name: str, s1: float, s2: float) -> bool:
+    """
+    Return if s1 is better than s2,
+    :param m_name:
+    :param s1:
+    :param s2:
+    :return: if s1 is better than s2
+    """
+    if m_name == "grad_norm":
+        return s1 < s2
+    if m_name == "grad_plain":
+        return s1 < s2
+    if m_name == "ntk_cond_num":
+        return s1 < s2
+    if m_name == "ntk_trace":
+        return s1 < s2
+    if m_name == "ntk_trace_approx":
+        return s1 < s2
+    if m_name == "fisher":
+        return s1 < s2
+    if m_name == "grasp":
+        return s1 > s2
+    if m_name == "snip":
+        return s1 < s2
+    if m_name == "synflow":
+        return s1 > s2
+    if m_name == "weight_norm":
+        return s1 > s2
+    if m_name == "nas_wot":
+        return s1 > s2
+
+
+vote_to_left = {}
+vote_to_left["101"] = compare_score_101
+vote_to_left["201"] = compare_score_201
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py
new file mode 100644
index 000000000..259ebea6d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py
@@ -0,0 +1,553 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+from src.tools import utils
+
+from singa import singa_wrap as singa
+from singa import device as singa_device
+from singa import tensor
+from singa import opt
+from singa import autograd
+from singa.opt import Optimizer
+from singa.opt import DecayScheduler
+from singa.opt import Constant
+import numpy as np
+import time
+import argparse
+from PIL import Image
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+# singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+singa_dtype = {"float32": tensor.float32}
+
+### MSOptimizer
+class MSOptimizer(Optimizer):
+    def __call__(self, loss):
+        pn_p_g_list = self.call_with_returns(loss)
+        # print ("optimizer1 before self.step()")
+        # print ("optimizer1 before print len(pn_p_g_list): \n", len(pn_p_g_list))
+        self.step()
+        # print ("optimizer1 after print len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("optimizer1 after self.step()")
+        return pn_p_g_list
+
+    def call_with_returns(self, loss):
+        # print ("call_with_returns before apply loss.data: \n", loss.data)
+        pn_p_g_list = []
+        for p, g in autograd.backward(loss):
+            if p.name is None:
+                p.name = id(p)
+            self.apply(p.name, p, g)
+            # print ("call with returns")
+            # print ("p.name: \n", p.name)
+            # print ("p.data: \n", p.data)
+            # print ("g.data: \n", g.data)
+            pn_p_g_list.append([p.name, p, g])  # need iterables
+        # print ("call_with_returns after apply loss.data: \n", loss.data)
+        return pn_p_g_list
+
+class MSSGD(MSOptimizer):
+    """Implements stochastic gradient descent (optionally with momentum).
+
+    Nesterov momentum is based on the formula from `On the importance of initialization and momentum in deep learning`__.
+
+    Args:
+        lr(float): learning rate
+        momentum(float, optional): momentum factor(default: 0)
+        weight_decay(float, optional): weight decay(L2 penalty)(default: 0)
+        dampening(float, optional): dampening for momentum(default: 0)
+        nesterov(bool, optional): enables Nesterov momentum(default: False)
+
+    Typical usage example:
+        >> > from singa import opt
+        >> > optimizer = opt.SGD(lr=0.1, momentum=0.9)
+        >> > optimizer.update()
+
+    __ http: // www.cs.toronto.edu / %7Ehinton / absps / momentum.pdf
+
+    .. note::
+        The implementation of SGD with Momentum / Nesterov subtly differs from
+        Sutskever et. al. and implementations in some other frameworks.
+
+        Considering the specific case of Momentum, the update can be written as
+
+        .. math::
+                  v = \rho * v + g \\
+                  p = p - lr * v
+
+        where p, g, v and: math: `\rho` denote the parameters, gradient,
+        velocity, and momentum respectively.
+
+        This is in contrast to Sutskever et. al. and
+        other frameworks which employ an update of the form
+
+        .. math::
+             v = \rho * v + lr * g \\
+             p = p - v
+
+        The Nesterov version is analogously modified.
+    """
+
+    def __init__(self,
+                 lr=0.1,
+                 momentum=0,
+                 dampening=0,
+                 weight_decay=0,
+                 nesterov=False,
+                 dtype=tensor.float32):
+        super(MSSGD, self).__init__(lr)
+
+        # init momentum
+        if type(momentum) == float or type(momentum) == int:
+            if momentum < 0.0:
+                raise ValueError("Invalid momentum value: {}".format(momentum))
+            self.momentum = Constant(momentum)
+        elif isinstance(momentum, DecayScheduler):
+            self.momentum = momentum
+            momentum = momentum.init_value
+        else:
+            raise TypeError("Wrong momentum type")
+        # self.dtype = dtype
+        # self.mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        self.mom_value = self.momentum(self.step_counter)
+
+        # init dampening
+        if type(dampening) == float or type(dampening) == int:
+            self.dampening = Constant(dampening)
+        elif isinstance(dampening, DecayScheduler):
+            self.dampening = dampening
+            dampening = dampening.init_value
+        else:
+            raise TypeError("Wrong dampening type")
+        # self.dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        self.dam_value = self.dampening(self.step_counter)
+
+        # init weight_decay
+        if type(weight_decay) == float or type(weight_decay) == int:
+            if weight_decay < 0.0:
+                raise ValueError(
+                    "Invalid weight_decay value: {}".format(weight_decay))
+            self.weight_decay = Constant(weight_decay)
+        elif isinstance(weight_decay, DecayScheduler):
+            self.weight_decay = weight_decay
+        else:
+            raise TypeError("Wrong weight_decay type")
+        # self.decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        self.decay_value = self.weight_decay(self.step_counter)
+
+        # init other params
+        self.nesterov = nesterov
+        self.moments = dict()
+
+        # check value
+        if nesterov and (momentum <= 0 or dampening != 0):
+            raise ValueError(
+                "Nesterov momentum requires a momentum and zero dampening")
+
+    def apply(self, param_name, param_value, param_grad):
+        """Performs a single optimization step.
+
+        Args:
+                param_name(String): the name of the param
+                param_value(Tensor): param values to be update in-place
+                grad(Tensor): param gradients; the values may be updated
+                        in this function; cannot use it anymore
+        """
+        assert param_value.shape == param_grad.shape, ("shape mismatch",
+                                                       param_value.shape,
+                                                       param_grad.shape)
+        self.device_check(param_value, self.step_counter, self.lr_value,
+                          self.mom_value, self.dam_value, self.decay_value)
+
+        # derive dtype from input
+        # assert param_value.dtype == self.dtype
+
+        # TODO add branch operator
+        # if self.decay_value != 0:
+        if self.weight_decay.init_value != 0:
+            singa.Axpy(self.decay_value.data, param_value.data, param_grad.data)
+
+        if self.momentum.init_value != 0:
+            if param_name not in self.moments:
+                flag = param_value.device.graph_enabled()
+                param_value.device.EnableGraph(False)
+                self.moments[param_name] = tensor.zeros_like(param_value)
+                param_value.device.EnableGraph(flag)
+
+            buf = self.moments[param_name]
+            buf *= self.mom_value
+            alpha = 1.0 - self.dam_value
+            singa.Axpy(alpha.data, param_grad.data, buf.data)
+
+            if self.nesterov:
+                singa.Axpy(self.mom_value.data, buf.data, param_grad.data)
+            else:
+                param_grad = buf
+
+        minus_lr = 0.0 - self.lr_value
+        singa.Axpy(minus_lr.data, param_grad.data, param_value.data)
+
+    def step(self):
+        # increment step counter, lr and moment
+        # print ("before super step")
+        super().step()
+        # print ("after super step")
+        # print ("before custiomized step")
+        # mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        # dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        # decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        mom_value = self.momentum(self.step_counter)
+        dam_value = self.dampening(self.step_counter)
+        decay_value = self.weight_decay(self.step_counter)
+        self.mom_value.copy_from(mom_value)
+        self.dam_value.copy_from(dam_value)
+        self.decay_value.copy_from(decay_value)
+        # print ("after customized step")
+
+    def get_states(self):
+        states = super().get_states()
+        if self.mom_value > 0:
+            states[
+                'moments'] = self.moments  # a dict for 1st order moments tensors
+        return states
+
+    def set_states(self, states):
+        super().set_states(states)
+        if 'moments' in states:
+            self.moments = states['moments']
+            self.mom_value = self.momentum(self.step_counter)
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                                    offset[0]:offset[0] + x.shape[2],
+                                    offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    # print ("in accuracy y shape: ", y.shape)
+    # print ("in accuracy target shape: ", target.shape)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                                     dtype=np.float32)
+    return X
+
+from torch.utils.data import DataLoader
+class ModelTrainer:
+
+    @classmethod
+    def fully_train_arch(cls,
+                         model,
+                         use_test_acc: bool,
+                         epoch_num,
+                         train_loader: DataLoader,
+                         val_loader: DataLoader,
+                         test_loader: DataLoader,
+                         args,
+                         logger=None
+                         ) -> (float, float, dict):
+        """
+        Args:
+            model:
+            use_test_acc:
+            epoch_num: how many epoch, set by scheduler
+            train_loader:
+            val_loader:
+            test_loader:
+            args:
+        Returns:
+        """
+
+        if logger is None:
+            from src.logger import logger
+            logger = logger
+
+        start_time, best_valid_auc = time.time(), 0.
+        
+        num_labels = args.num_labels
+        lr = args.lr
+        iter_per_epoch = args.iter_per_epoch
+        # report_freq = args.report_freq
+        # given_patience = args.patience
+
+        # assign new values
+        args.epoch_num = epoch_num
+
+        # for multiple classification
+
+        # optimizer
+        precision = 'float32'
+        mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-4, dtype=singa_dtype[precision])
+        device_id = 0
+        max_epoch = epoch_num
+        graph = True
+        verbosity = 0
+        dist_option='plain'
+        spars=None
+        global_rank = 0
+        world_size = 1
+
+        # training params
+        if args.device == 'cpu':
+            dev = singa_device.get_default_device()
+        else:  # GPU
+            dev = singa_device.create_cuda_gpu_on(args.local_rank)  # need to change to CPU device for CPU-only machines
+        dev.SetRandSeed(0)
+
+        # For distributed training, sequential has better performance
+        if hasattr(mssgd, "communicator"):
+            DIST = True
+            sequential = True
+        else:
+            DIST = False
+            sequential = False
+
+        info_dic = {}
+        valid_auc = -1
+        valid_loss = 0
+
+        ### singa data
+        tx = tensor.Tensor((args.batch_size, args.nfeat), dev, singa_dtype[precision])
+        ty = tensor.Tensor((args.batch_size,), dev, tensor.int32)
+        ### singa data
+        
+        model.set_optimizer(mssgd)
+        model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+        dev.SetVerbosity(verbosity)
+
+        # Training and evaluation loop
+        for epoch in range(max_epoch):
+            start_time = time.time()
+            logger.info(f'Epoch [{epoch:3d}/{epoch_num:3d}]')
+            # np.random.shuffle(idx)
+
+            if global_rank == 0:
+                print('Starting Epoch %d:' % (epoch))
+
+            # Training phase
+            train_correct = np.zeros(shape=[1], dtype=np.float32)
+            test_correct = np.zeros(shape=[1], dtype=np.float32)
+            train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+            model.train()
+            # print ("num_train_batch: \n", num_train_batch)
+            # print ()
+            batch_idx = 0
+            # for b in range(num_train_batch):
+            for batch_idx, batch in enumerate(train_loader):
+                if batch_idx % 50 == 0:
+                    print ("trainer.py train batch_idx: \n", batch_idx)
+                # Generate the batch data in this iteration
+                # x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+                # if model.dimension == 4:
+                #     x = augmentation(x, batch_size)
+                #     if (image_size != model.input_size):
+                #         x = resize_dataset(x, model.input_size)
+                # x = x.astype(np_dtype[precision])
+                # y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+                y = batch['y'].cpu().numpy()
+                batch['id'] = batch['id'].cpu().numpy().astype(int)
+                # batch['value'] = batch['value'].to(args.device)
+                x = np.zeros((batch['id'].shape[0], args.nfeat), dtype=np.float32)
+                # print ("target shape: ", target.shape)
+                # print ("target: ", target)
+                # print ("batch['id'] shape: ", batch['id'].shape)
+                # print ("batch['id']: ", batch['id'])
+                # print ("batch['value'] shape: ", batch['value'].shape)
+                # print ("batch['value']: ", batch['value'])
+                # print ("batch['id'].cpu().numpy().astype(int): \n", batch['id'].cpu().numpy().astype(int))
+                for i in range(batch['id'].shape[0]):
+                    x[i][batch['id'][i]] = (np.float32)(1.0)
+                x = x.astype(dtype=np.float32)
+                y = y.astype(dtype=np.int32)
+
+                if x.shape[0] != args.batch_size:  # last batch not processing
+                    continue
+
+                synflow_flag = False
+                # Train the model
+                # if True: # normal train steps
+                # Copy the patch data into input tensors
+                # print ("normal train steps\n")
+                # print ("x.astype(np.float32): \n", x.astype(np.float32))
+                # print ("y: \n", y)
+                tx = tensor.Tensor(x.shape, dev, singa_dtype[precision])
+                ty = tensor.Tensor((y.shape[0],), dev, tensor.int32) 
+                tx.copy_from_numpy(x)  # dtype=np.float32
+                # print ("tx: \n", tx)
+                ty.copy_from_numpy(y)
+                # print ("ty: \n", ty)
+                # print ("normal before model(tx, ty, synflow_flag, dist_option, spars)")
+                # print ("train_cnn tx: \n", tx)
+                # print ("train_cnn ty: \n", ty)
+                # print ("trainer.py train before model forward ...")
+                # print ("model: ", model)
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                # print ("trainer.py train normal after model(tx, ty, synflow_flag, dist_option, spars)")
+                # print ("trainer.py train tx shape: ", tx.shape)
+                # print ("trainer.py train ty shape: ", ty.shape)
+                # print ("trainer.py train out.shape: ", out.shape)
+                # print ("trainer.py train out: ", out)
+                # print ("trainer.py train y shape: ", y.shape)
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+
+            if DIST:
+                # Reduce the evaluation accuracy and loss from multiple devices
+                reducer = tensor.Tensor((1,), dev, tensor.float32)
+                train_correct = reduce_variable(train_correct, mssgd, reducer)
+                train_loss = reduce_variable(train_loss, mssgd, reducer)
+
+            if global_rank == 0:
+                print('Training loss = %f, training accuracy = %f' %
+                    (train_loss, train_correct /
+                    (batch_idx * args.batch_size * world_size)),
+                    flush=True)
+                print ("train total batch_idx: ", batch_idx)
+                train_metric = train_correct / (batch_idx * args.batch_size * world_size)
+
+            # Evaluation phase
+            model.eval()
+            batch_idx = 0
+            # for b in range(num_val_batch):
+            # print ("evaluation begins")
+            for batch_idx, batch in enumerate(test_loader):
+                # print ("trainer.py test batch_idx: \n", batch_idx)
+                # x = val_x[b * batch_size:(b + 1) * batch_size]
+                # if model.dimension == 4:
+                #     if (image_size != model.input_size):
+                #         x = resize_dataset(x, model.input_size)
+                # x = x.astype(np_dtype[precision])
+                # y = val_y[b * batch_size:(b + 1) * batch_size]
+                # batch['value'] = batch['value'].cpu().numpy().astype(np_dtype[precision])
+                # x = batch['value'].cpu().numpy().astype(np_dtype[precision])
+
+                y = batch['y'].cpu().numpy()
+                batch['id'] = batch['id'].cpu().numpy().astype(int)
+                # batch['value'] = batch['value'].to(args.device)
+                x = np.zeros((batch['id'].shape[0], args.nfeat), dtype=np.float32)
+                # print ("target shape: ", target.shape)
+                # print ("target: ", target)
+                # print ("batch['id'] shape: ", batch['id'].shape)
+                # print ("batch['id']: ", batch['id'])
+                # print ("batch['value'] shape: ", batch['value'].shape)
+                # print ("batch['value']: ", batch['value'])
+                # print ("batch['id'].cpu().numpy().astype(int): \n", batch['id'].cpu().numpy().astype(int))
+                for i in range(batch['id'].shape[0]):
+                    x[i][batch['id'][i]] = (np.float32)(1.0)
+                # print ("x[1]: \n", x[1])
+                x = x.astype(dtype=np.float32)
+                y = y.astype(dtype=np.int32)
+
+                if x.shape[0] != (args.batch_size * 8):  # last batch not processing
+                    # print ("trainer.py test batch_idx: ", batch_idx)
+                    # print ("trainer.py test x.shape: ", x.shape)
+                    continue
+
+                tx = tensor.Tensor(x.shape, dev, singa_dtype[precision])
+                ty = tensor.Tensor((y.shape[0],), dev, tensor.int32)
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                # print ("trainer.py test tx shape: ", tx.shape)
+                out_test = model(tx)
+                # print ("trainer.py test out_test shape: ", out_test.shape)
+                # print ("trainer.py test y shape: ", y.shape)
+                # print ("trainer.py out_test: ", out_test)
+                # print ("trainer.py y: ", y)
+                test_correct += accuracy(tensor.to_numpy(out_test), y)
+                # print ("test_correct: ", test_correct)
+
+            if DIST:
+                # Reduce the evaulation accuracy from multiple devices
+                test_correct = reduce_variable(test_correct, mssgd, reducer)
+
+            # Output the evaluation accuracy
+            if global_rank == 0:
+                print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                    (test_correct / (batch_idx * args.batch_size * 8 * world_size),
+                    time.time() - start_time),
+                    flush=True)
+                # print ("test all batch_idx: ", batch_idx)
+                test_metric = test_correct / (batch_idx * args.batch_size * 8 * world_size)
+
+                
+            info_dic[epoch] = {
+                "train_metric": str(train_metric[0]),
+                "test_metric": str(test_metric[0]),
+                "train_loss": str(train_loss[0]),
+                # "valid_loss": valid_loss,
+                "train_test_total_time": str(time.time() - start_time)}
+
+        dev.PrintTimeProfiling()
+
+        # return valid_auc, time.time() - start_time, info_dic
+        print ("info_dic: ", info_dic)
+        return test_metric, time.time() - start_time, info_dic
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py
new file mode 100644
index 000000000..e5610c89c
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py
@@ -0,0 +1,144 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from copy import copy
+from src.common.constant import Config
+
+
+class BudgetAwareControllerSR:
+    def __init__(self, evaluator, time_per_epoch, max_unit=200):
+        """
+        :param evaluator:
+        :param max_unit:  for 201, it's 200, for 101 it's 108
+        """
+        self._evaluator = evaluator
+        self.max_unit_per_model = max_unit
+        self.time_per_epoch = time_per_epoch
+        self.name = "SUCCREJCT"
+
+    def schedule_budget_per_model_based_on_T(self, space_name, fixed_time_budget, K_):
+        # for benchmarking only phase 2
+
+        # try different K and U combinations
+        # only consider 15625 arches in this paper
+        # min_budget_required: when K = 1, N = min_budget_required * 1
+        if space_name == Config.NB101:
+            U_options = [4, 12, 16, 108]
+        else:
+            U_options = list(range(1, 200))
+
+        history = []
+
+        for U in U_options:
+            expected_time_used = self.pre_calculate_epoch_required(K_, U) * self.time_per_epoch
+            if expected_time_used > fixed_time_budget:
+                break
+            else:
+                history.append(U)
+        if len(history) == 0:
+            raise f"{fixed_time_budget} is too small for current config"
+        return history[-1]
+
+    def pre_calculate_epoch_required(self, K, U):
+        """
+        :param K: candidates lists
+        :param U: min resource each candidate needs
+        :return:
+        """
+        total_epoch_each_rounds = K * U
+        min_budget_required = 0
+
+        previous_epoch = None
+        while True:
+            cur_cand_num = K
+            if cur_cand_num == 1:
+                break
+            # number of each res given to each cand, pick lower bound
+            epoch_per_model = int(total_epoch_each_rounds / cur_cand_num)
+            if previous_epoch is None:
+                previous_epoch = epoch_per_model
+            elif previous_epoch == epoch_per_model:
+                # which means the epoch don't increase, no need to re-evaluate each component
+                K = cur_cand_num - 1
+                continue
+
+            if epoch_per_model >= self.max_unit_per_model:
+                epoch_per_model = self.max_unit_per_model
+            # evaluate each arch
+            min_budget_required += epoch_per_model * cur_cand_num
+            # sort from min to max
+            if epoch_per_model == self.max_unit_per_model:
+                # each model is fully evaluated, just return top 1
+                K = 1
+            else:
+                # only keep 1/eta, pick lower bound
+                K = cur_cand_num - 1
+        return min_budget_required
+
+    def run_phase2(self, U: int, candidates_m: list):
+        """
+        :param candidates_m: candidates lists
+        :param U: min resource each candidate needs
+        :return:
+        """
+        # print(f" *********** begin BudgetAwareControllerSR with U={U}, K={len(candidates_m)} ***********")
+        candidates = copy(candidates_m)
+        total_epoch_each_rounds = len(candidates) * U
+        min_budget_required = 0
+        previous_epoch = None
+        scored_cand = None
+        while True:
+            cur_cand_num = len(candidates)
+            if cur_cand_num == 1:
+                break
+            total_score = []
+            # number of each res given to each cand, pick lower bound
+            epoch_per_model = int(total_epoch_each_rounds / cur_cand_num)
+
+            if previous_epoch is None:
+                previous_epoch = epoch_per_model
+            elif previous_epoch == epoch_per_model:
+                # which means the epoch don't increase, no need to re-evaluate each component
+                num_keep = cur_cand_num - 1
+                candidates = [ele[0] for ele in scored_cand[-num_keep:]]
+                continue
+
+            if epoch_per_model >= self.max_unit_per_model:
+                epoch_per_model = self.max_unit_per_model
+
+            # print(f"[successive_reject]: {cur_cand_num} model left, "
+            #       f"and evaluate each model with {epoch_per_model} epoch")
+            # evaluate each arch
+            for cand in candidates:
+                score = self._evaluator.p2_evaluate(cand, epoch_per_model)
+                total_score.append((cand, score))
+                min_budget_required += epoch_per_model
+            # sort from min to max
+            scored_cand = sorted(total_score, key=lambda x: x[1])
+
+            if epoch_per_model == self.max_unit_per_model:
+                # each model is fully evaluated, just return top 1
+                candidates = [scored_cand[-1][0]]
+            else:
+                # only keep m-1, remove the worst one
+                num_keep = cur_cand_num - 1
+                candidates = [ele[0] for ele in scored_cand[-num_keep:]]
+
+        return candidates[0], None, min_budget_required
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py
new file mode 100644
index 000000000..02c9b8e70
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py
@@ -0,0 +1,94 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from copy import copy
+from random import randint
+from src.common.constant import Config
+
+
+# UniformAllocation
+class UniformAllocation:
+
+    def __init__(self, evaluator, time_per_epoch, max_unit=200):
+        """
+        :param evaluator:
+        :param max_unit:  for 201, it's 200, for 101 it's 108
+        """
+        self._evaluator = evaluator
+        self.max_unit_per_model = max_unit
+        self.time_per_epoch = time_per_epoch
+        self.name = "UNIFORM"
+
+    def schedule_budget_per_model_based_on_T(self, space_name, fixed_time_budget, K_):
+        # for benchmarking only phase 2
+
+        # try different K and U combinations
+        # only consider 15625 arches in this paper
+        # min_budget_required: when K = 1, N = min_budget_required * 1
+        if space_name == Config.NB101:
+            U_options = [4, 12, 16, 108]
+        else:
+            U_options = list(range(1, 200))
+
+        history = []
+
+        for U in U_options:
+            expected_time_used = self.pre_calculate_epoch_required(K_, U) * self.time_per_epoch
+            if expected_time_used > fixed_time_budget:
+                break
+            else:
+                history.append(U)
+        return history[-1]
+
+    def pre_calculate_epoch_required(self, K, U):
+        """
+        :param B: total budget for phase 2
+        :param U: mini unit computation for each modle
+        :param candidates_m:
+        :return:
+        """
+        return K*U
+
+    def run_phase2(self, U: int, candidates_m: list):
+        """
+        :param U: mini unit computation for each modle
+        :param candidates_m:
+        :return:
+        """
+
+        # print(f" *********** begin uniformly_allocate with U={U}, K={len(candidates_m)} ***********")
+
+        candidates = copy(candidates_m)
+        min_budget_required = 0
+
+        if U >= self.max_unit_per_model:
+            U = self.max_unit_per_model
+
+        # print(f"[uniformly_allocate]: uniformly allocate {U} epoch to each model")
+
+        total_score = []
+        for cand in candidates:
+            score = self._evaluator.p2_evaluate(cand, U)
+            total_score.append((cand, score))
+            min_budget_required += U
+        # sort from min to max
+        scored_cand = sorted(total_score, key=lambda x: x[1])
+        candidate = scored_cand[-1][0]
+        return candidate, None, min_budget_required
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/logger/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/logger/__init__.py
new file mode 100644
index 000000000..29d916802
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/logger/__init__.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+import os
+
+if os.environ.get("log_logger_folder_name") == None:
+    log_logger_folder_name = "logs_default"
+    if not os.path.exists(f"./{log_logger_folder_name}"):
+        os.makedirs(f"./{log_logger_folder_name}")
+else:
+    log_logger_folder_name = os.environ.get("log_logger_folder_name")
+    if not os.path.exists(log_logger_folder_name):
+        os.makedirs(log_logger_folder_name)
+
+logger = logging.getLogger(__name__)
+
+if os.environ.get("log_file_name") == None:
+    log_name = f"{log_logger_folder_name}/test.log"
+else:
+    log_name = f"{log_logger_folder_name}/" + os.environ.get("log_file_name")
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(levelname)-8s %(message)s',
+                    datefmt='%d %b %Y %H:%M:%S',
+                    filename=log_name, filemode='w')
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/README.md
new file mode 100644
index 000000000..568528670
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/README.md
@@ -0,0 +1,20 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with < this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+This is used to parse the local json file which is the result of the all experiments
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_explore_ea.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_explore_ea.py
new file mode 100644
index 000000000..b03299774
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_explore_ea.py
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+import os
+import sqlite3
+import traceback
+
+from src.common.constant import Config
+
+base_folder_dir = os.environ.get("base_dir")
+if base_folder_dir is None: base_folder_dir = os.getcwd()
+base_dir = os.path.join(base_folder_dir, "img_data", "ground_truth")
+print("local api running at {}".format(base_dir))
+
+# sum score is better
+tf_smt_file_NB101C10 = os.path.join(base_dir, "TFMEM_101_c10_100run_8k_models_score_sum")
+tf_smt_file_NB201C10 = os.path.join(base_dir, "TFMEM_201_c10_100run_score_sum")
+tf_smt_file_NB201C100 = os.path.join(base_dir, "TFMEM_201_c100_100run_score_sum")
+tf_smt_file_NB201Img = os.path.join(base_dir, "TFMEM_201_imgNet_100run_score_sum")
+
+# rank is not as good as sum
+# tf_smt_file_NB201C10 = os.path.join(base_dir, "TFMEM_201_c10_100run_rank_bugs")
+# tf_smt_file_NB201C100 = os.path.join(base_dir, "TFMEM_201_c100_200run_rank")
+# tf_smt_file_NB201Img = os.path.join(base_dir, "TFMEM_201_imgNet_200run_rank")
+
+con = None
+cur = None
+
+
+# fetch result from simulated result
+def fetch_from_db(space_name, dataset, run_id_m, N_m):
+    """
+    :param run_id_m: run_id 100 max
+    :param B1_m: number of models evaluted
+    :return:
+    """
+    global con
+    global cur
+    if con is None:
+        if space_name == Config.NB201:
+            if dataset == Config.c10:
+                tf_smt_used = tf_smt_file_NB201C10
+            elif dataset == Config.c100:
+                tf_smt_used = tf_smt_file_NB201C100
+            elif dataset == Config.imgNet:
+                tf_smt_used = tf_smt_file_NB201Img
+            else:
+                print(f"{dataset} is Not implemented")
+                raise
+        elif space_name == Config.NB101:
+            if dataset == Config.c10:
+                tf_smt_used = tf_smt_file_NB101C10
+            else:
+                print(f"{dataset}Not implemented")
+                raise
+        else:
+            print(f"{space_name} is Not implemented")
+            raise
+
+        print(tf_smt_used)
+        con = sqlite3.connect(tf_smt_used)
+        cur = con.cursor()
+
+    res = cur.execute(
+        "SELECT * FROM simulateExp WHERE run_num = {} and model_explored = {}".format(run_id_m, N_m))
+    fetch_res = res.fetchone()
+
+    try:
+        arch_id = fetch_res[2]
+        candidates = json.loads(fetch_res[3])
+        current_time = float(fetch_res[4])
+    except:
+        print(traceback.format_exc())
+        raise f"res is None when using run_id ={run_id_m} and bm = {N_m}"
+
+    return arch_id, candidates, current_time
+
+
+if __name__ == '__main__':
+    print(fetch_from_db(Config.NB201, Config.c10, 3, 10))
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_train_baseline.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_train_baseline.py
new file mode 100644
index 000000000..7b8150516
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_train_baseline.py
@@ -0,0 +1,129 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import numpy as np
+from src.common.constant import Config
+from src.tools.io_tools import read_json
+
+base_dir_folder = os.environ.get("base_dir")
+if base_dir_folder is None:base_dir_folder = os.getcwd()
+base_dir = os.path.join(base_dir_folder, "img_data")
+
+print("gt_api running at {}".format(base_dir))
+train_base201_c10 = os.path.join(base_dir, "train_based_201_c10.json")
+train_base201_c100 = os.path.join(base_dir, "train_based_201_c100.json")
+train_base201_img = os.path.join(base_dir, "train_based_201_img.json")
+
+train_base101_c10 = os.path.join(base_dir, "train_based_101_c10_100run_24k_models.json")
+
+
+def post_processing_train_base_result(search_space, dataset, x_max_value: int = None):
+
+    if search_space == Config.NB201 and dataset == Config.c10:
+        data = read_json(train_base201_c10)
+
+    elif search_space == Config.NB201 and dataset == Config.c100:
+        data = read_json(train_base201_c100)
+    elif search_space == Config.NB201 and dataset == Config.imgNet:
+        data = read_json(train_base201_img)
+
+    elif search_space == Config.NB101 and dataset == Config.c10:
+        data = read_json(train_base101_c10)
+    else:
+        print(f"Cannot read dataset {dataset} of file")
+        raise
+
+    # data is in form of
+    """
+    data[run_id] = {}
+    data[run_id]["arch_id_list"]
+    data[run_id]["current_best_acc"] 
+    data[run_id]["x_axis_time"] 
+    """
+
+    acc_got_row = []
+    time_used_row = []
+    min_arch_across_all_run = 15625
+    for run_id in data:
+        acc_got_row.append(data[run_id]["current_best_acc"])
+        time_used_row.append(data[run_id]["x_axis_time"])
+        if len(data[run_id]["current_best_acc"]) < min_arch_across_all_run:
+            min_arch_across_all_run = len(data[run_id]["current_best_acc"])
+
+    # for each run, only use min_arch_across_all_run
+    for i in range(len(acc_got_row)):
+        acc_got_row[i] = acc_got_row[i][:min_arch_across_all_run]
+        time_used_row[i] = time_used_row[i][:min_arch_across_all_run]
+
+    acc_got = np.array(acc_got_row)
+    time_used = np.array(time_used_row)
+
+    if data['0']["current_best_acc"][-1] < 1:
+        acc_got = acc_got * 100
+
+    acc_l = np.quantile(acc_got, 0.25, axis=0)
+    acc_m = np.quantile(acc_got, 0.5, axis=0)
+    acc_h = np.quantile(acc_got, 0.75, axis=0)
+
+    time_l = np.quantile(time_used, 0.25, axis=0)
+    time_m = np.quantile(time_used, 0.5, axis=0).tolist()
+    time_h = np.quantile(time_used, 0.75, axis=0)
+
+    x_list = [ele/60 for ele in time_m]
+    y_list_low = acc_l[:len(x_list)]
+    y_list_m = acc_m[:len(x_list)]
+    y_list_high = acc_h[:len(x_list)]
+
+    # if the x array max value is provided.
+    if x_max_value is not None:
+        final_x_list = []
+        final_x_list_low = []
+        final_x_list_m = []
+        final_x_list_high = []
+        for i in range(len(x_list)):
+            if x_list[i] <= x_max_value:
+                final_x_list.append(x_list[i])
+                final_x_list_low.append(y_list_low[i])
+                final_x_list_m.append(y_list_m[i])
+                final_x_list_high.append(y_list_high[i])
+            else:
+                break
+        return final_x_list, final_x_list_low, final_x_list_m, final_x_list_high
+    else:
+        return x_list, y_list_low.tolist(), y_list_m.tolist(), y_list_high.tolist()
+
+
+if __name__ == "__main__":
+    search_space = Config.NB201
+    dataset = Config.c100
+    x_list, y_list_low, y_list_m, y_list_high = post_processing_train_base_result(search_space, dataset)
+
+    from matplotlib import pyplot as plt
+
+    plt.fill_between(x_list, y_list_low, y_list_high, alpha=0.1)
+    plt.plot(x_list, y_list_m, "-*", label="Training-based")
+
+    plt.xscale("symlog")
+    plt.grid()
+    plt.xlabel("Time Budget given by user (mins)")
+    plt.ylabel("Test Accuracy")
+    plt.legend()
+    plt.show()
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/interface.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/interface.py
new file mode 100644
index 000000000..d2d335cee
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/interface.py
@@ -0,0 +1,141 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# query ground truth
+from src.common.constant import Config, CommonVars
+from src.query_api.query_api_img import Gt201, Gt101
+from src.query_api.query_api_mlp import GTMLP
+from src.query_api.query_api_img import ImgScoreQueryApi
+from typing import *
+
+
+def profile_NK_trade_off(dataset):
+    """
+    This is get from the profling result.  
+    We try various N/K combinations, and find this is better.
+    """
+    if dataset == Config.c10:
+        return 85
+    elif dataset == Config.c100:
+        return 85
+    elif dataset == Config.imgNet:
+        return 130
+    else:
+        return 30
+
+
+class SimulateTrain:
+
+    def __init__(self, space_name: str):
+        """
+        :param space_name: NB101 or NB201, MLP
+        """
+        self.space_name = space_name
+        self.api = None
+
+    # get the test_acc and time usage to train of this arch_id
+    def get_ground_truth(self, arch_id: str, dataset: str, epoch_num: int = None, total_epoch: int = 200):
+        """
+        :param arch_id: 
+        :param dataset: 
+        :param epoch_num: which epoch's performance to return
+        :param total_epoch: 
+        """
+        if self.space_name == Config.NB101:
+            self.api = Gt101()
+            acc, time_usage = self.api.get_c10_test_info(arch_id, dataset, epoch_num)
+            return acc, time_usage
+
+        elif self.space_name == Config.NB201:
+            self.api = Gt201()
+            if total_epoch == 200:
+                acc, time_usage = self.api.query_200_epoch(arch_id, dataset, epoch_num)
+            else:  # 12
+                acc, time_usage = self.api.query_12_epoch(arch_id, dataset, epoch_num)
+            return acc, time_usage
+
+        elif self.space_name == Config.MLPSP:
+            self.api = GTMLP(dataset)
+            acc, time_usage = self.api.get_valid_auc(arch_id, epoch_num)
+            return acc, time_usage
+
+        else:
+            raise NotImplementedError
+
+    # get the high acc of k arch with highest score
+    def get_high_acc_top_10(self, top10):
+        all_top10_acc = []
+        time_usage = 0
+        for arch_id in top10:
+            score_, time_usage_ = self.get_ground_truth(arch_id)
+            all_top10_acc.append(score_)
+            time_usage += time_usage_
+        return max(all_top10_acc), time_usage
+
+    def get_best_arch_id(self, top10):
+        cur_best = 0
+        res = None
+        for arch_id in top10:
+            acc, _ = self.get_ground_truth(arch_id)
+            if acc > cur_best:
+                cur_best = acc
+                res = arch_id
+        return res
+
+    def query_all_model_ids(self, dataset):
+        if self.space_name == Config.NB101:
+            self.api = Gt101()
+        elif self.space_name == Config.NB201:
+            self.api = Gt201()
+        elif self.space_name == Config.MLPSP:
+            self.api = GTMLP(dataset)
+        return self.api.get_all_trained_model_ids()
+
+
+class SimulateScore:
+    def __init__(self, space_name: str, dataset_name: str):
+        """
+        :param space_name: NB101 or NB201, MLP
+        :param dataset_name: NB101 or NB201, MLP
+        """
+        self.space_name = space_name
+        if self.space_name == Config.MLPSP:
+            self.api = GTMLP(dataset_name)
+        else:
+            self.api = ImgScoreQueryApi(self.space_name, dataset_name)
+
+    # get the test_acc and time usage to train of this arch_id
+    def query_tfmem_rank_score(self, arch_id) -> Dict:
+        # todo: here we use the global rank, other than dymalically update the rank
+        # todo: so, we directly return the rank_score, instead of the mutilpel_algs score
+        # return {"nas_wot": self.api.get_metrics_score(arch_id, dataset)["nas_wot"],
+        #         "synflow": self.api.get_metrics_score(arch_id, dataset)["synflow"],
+        #         }
+        return self.api.get_global_rank_score(arch_id)
+
+    def query_all_tfmem_score(self, arch_id) -> Dict:
+        """
+        return {alg_name: score}
+        """
+        return self.api.api_get_score(arch_id)
+
+    def query_all_model_ids(self, dataset) -> List:
+        """
+        return all models_ids as a list
+        """
+        return self.api.get_all_scored_model_ids()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_img.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_img.py
new file mode 100644
index 000000000..dd3f4ca9c
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_img.py
@@ -0,0 +1,295 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import random
+from src.common.constant import Config
+from src.tools.io_tools import read_json, write_json
+from src.query_api.singleton import Singleton
+from src.tools.io_tools import read_pickle
+from src.tools.compute import generate_global_rank
+
+
+base_dir_folder = os.environ.get("base_dir")
+if base_dir_folder is None: base_dir_folder = os.getcwd()
+base_dir = os.path.join(base_dir_folder, "img_data")
+print("local api running at {}".format(base_dir))
+
+# todo: move all those to a config file
+# score result
+pre_score_path_101C10 = os.path.join(base_dir, "score_101_15k_c10_128.json")
+pre_score_path_201C10 = os.path.join(base_dir, "score_201_15k_c10_bs32_ic16.json")
+pre_score_path_201C100 = os.path.join(base_dir, "score_201_15k_c100_bs32_ic16.json")
+pre_score_path_201IMG = os.path.join(base_dir, "score_201_15k_imgNet_bs32_ic16.json")
+
+# expreflow
+expreflow_score_path_101C10 = os.path.join(base_dir, "score_nasbench101_cifar10_batch_size_32_cpu.json")
+# expreflow_score_path_201C10 = os.path.join(base_dir, "score_nasbench201_cifar10_batch_size_32_cpu.json")
+# expreflow_score_path_201C100 = os.path.join(base_dir, "score_nasbench201_cifar100_batch_size_32_cpu.json")
+# expreflow_score_path_201IMG = os.path.join(base_dir, "score_nasbench201_ImageNet16-120_batch_size_32_cpu.json")
+
+expreflow_score_path_201C10 = os.path.join(base_dir_folder, "score_scale_traj_width/score_nasbench201_cifar10_batch_size_32_cpu.json")
+expreflow_score_path_201C100 = os.path.join(base_dir_folder, "score_scale_traj_width/score_nasbench201_cifar100_batch_size_32_cpu.json")
+expreflow_score_path_201IMG = os.path.join(base_dir_folder, "score_scale_traj_width/score_nasbench201_ImageNet16-120_batch_size_32_cpu.json")
+
+# training accuracy result.
+gt201 = os.path.join(base_dir, "ground_truth/201_allEpoch_info")
+gt101 = os.path.join(base_dir, "ground_truth/101_allEpoch_info_json")
+gt101P = os.path.join(base_dir, "ground_truth/nasbench1_accuracy.p")
+id_to_hash_path = os.path.join(base_dir, "ground_truth/nb101_id_to_hash.json")
+
+
+# We pre-compute the time usage, and get a range,
+# Then we randomly pick one value from the range each time
+def guess_score_time(search_space_m, dataset):
+    if search_space_m == Config.NB101:
+        return Gt101.guess_score_time()
+    if search_space_m == Config.NB201:
+        return Gt201.guess_score_time(dataset)
+
+
+def guess_train_one_epoch_time(search_space_m, dataset):
+    if search_space_m == Config.NB101:
+        return Gt101().guess_train_one_epoch_time()
+    if search_space_m == Config.NB201:
+        return Gt201().guess_train_one_epoch_time(dataset)
+
+
+class ImgScoreQueryApi:
+    # Multiton pattern
+    # use those algoroithm => new tfmem
+    default_alg_name_list = ["nas_wot", "synflow"]
+    _instances = {}
+
+    def __new__(cls, search_space_name: str, dataset: str):
+        if (search_space_name, dataset) not in cls._instances:
+            instance = super(ImgScoreQueryApi, cls).__new__(cls)
+            instance.search_space_name, instance.dataset = search_space_name, dataset
+
+            # read pre-scored file path
+            if search_space_name == Config.NB201:
+                if dataset == Config.c10:
+                    instance.pre_score_path = pre_score_path_201C10
+                    instance.express_score_path = expreflow_score_path_201C10
+                elif dataset == Config.c100:
+                    instance.pre_score_path = pre_score_path_201C100
+                    instance.express_score_path = expreflow_score_path_201C100
+                elif dataset == Config.imgNet:
+                    instance.pre_score_path = pre_score_path_201IMG
+                    instance.express_score_path = expreflow_score_path_201IMG
+            if search_space_name == Config.NB101:
+                instance.pre_score_path = pre_score_path_101C10
+                instance.express_score_path = expreflow_score_path_101C10
+
+            instance.data = read_json(instance.pre_score_path)
+            express_score_data = read_json(instance.express_score_path)
+            for arch_id in express_score_data:
+                if arch_id in instance.data:
+                    instance.data[arch_id].update(express_score_data[arch_id])
+                else:
+                    instance.data[arch_id] = express_score_data[arch_id]
+
+            instance.global_rank = generate_global_rank(
+                instance.data, instance.default_alg_name_list)
+
+            cls._instances[(search_space_name, dataset)] = instance
+        return cls._instances[(search_space_name, dataset)]
+
+    def api_get_score(self, arch_id: str, tfmem: str = None):
+        # retrieve score from pre-scored file
+        if tfmem is None:
+            return self.data[arch_id]
+        else:
+            return {tfmem: float(self.data[arch_id][tfmem])}
+
+    def update_existing_data(self, arch_id, alg_name, score_str):
+        """
+        Add new arch's inf into data
+        :param arch_id:
+        :param alg_name:
+        :param score_str:
+        :return:
+        """
+        if str(arch_id) not in self.data:
+            self.data[str(arch_id)] = {}
+        else:
+            self.data[str(arch_id)] = self.data[str(arch_id)]
+        self.data[str(arch_id)][alg_name] = '{:f}'.format(score_str)
+
+    def is_arch_and_alg_inside_data(self, arch_id, alg_name):
+        if arch_id in self.data and alg_name in self.data[arch_id]:
+            return True
+        else:
+            return False
+
+    def is_arch_inside_data(self, arch_id):
+        if arch_id in self.data:
+            return True
+        else:
+            return False
+
+    def get_len_data(self):
+        return len(self.data)
+
+    def save_latest_data(self):
+        """
+        update the latest score data
+        """
+        write_json(self.pre_score_path, self.data)
+
+    def get_all_scored_model_ids(self):
+        return list(self.data.keys())
+
+    def get_global_rank_score(self, arch_id):
+        return self.global_rank[arch_id]
+
+
+class Gt201(metaclass=Singleton):
+
+    @classmethod
+    def guess_score_time(cls, dataset=Config.c10):
+        return random.randint(3315, 4502) * 0.0001
+
+    def __init__(self):
+        self.data201 = read_json(gt201)
+
+    def get_c10valid_200epoch_test_info(self, arch_id: int):
+        """
+        cifar10-valid means train with train set, valid with validation dataset
+        Thus, acc is lower than train with train+valid.
+        :param arch_id:
+        :return:
+        """
+        return self.query_200_epoch(str(arch_id), Config.c10_valid)
+
+    def get_c10_200epoch_test_info(self, arch_id: int):
+        """
+        cifar10-valid means train with train set, valid with validation dataset
+        Thus, acc is lower than train with train+valid.
+        :param arch_id:
+        :return:
+        """
+        return self.query_200_epoch(str(arch_id), Config.c10)
+
+    def get_c100_200epoch_test_info(self, arch_id: int):
+        return self.query_200_epoch(str(arch_id), Config.c100)
+
+    def get_imgNet_200epoch_test_info(self, arch_id: int):
+        return self.query_200_epoch(str(arch_id), Config.imgNet)
+
+    def query_200_epoch(self, arch_id: str, dataset, epoch_num: int = 199):
+        if epoch_num is None or epoch_num > 199:
+            epoch_num = 199
+        arch_id = str(arch_id)
+        t_acc = self.data201[arch_id]["200"][dataset][str(epoch_num)]["test_accuracy"]
+        time_usage = self.data201[arch_id]["200"][dataset][str(epoch_num)]["time_usage"]
+        return t_acc, time_usage
+
+    def query_12_epoch(self, arch_id: str, dataset, epoch_num: int = 11):
+        if epoch_num is None or epoch_num > 11:
+            epoch_num = 11
+        arch_id = str(arch_id)
+        t_acc = self.data201[arch_id]["12"][dataset][str(epoch_num)]["test_accuracy"]
+        time_usage = self.data201[arch_id]["12"][dataset][str(epoch_num)]["time_usage"]
+        return t_acc, time_usage
+
+    def count_models(self):
+        return len(self.data201)
+
+    def guess_train_one_epoch_time(self, dataset):
+        if dataset == Config.c10:
+            dataset = Config.c10_valid
+        # pick the max value over 5k arch training time, it's 40
+        # res = 0
+        # for arch_id in range(15624):
+        #     _, time_usage = self.query_200_epoch(str(arch_id), dataset, 1)
+        #     if time_usage > res:
+        #         res = time_usage
+        # return res
+        return 40
+
+    def get_all_trained_model_ids(self):
+        # 201 all data has the same model set.
+        return list(self.data201.keys())
+
+
+class Gt101(metaclass=Singleton):
+
+    @classmethod
+    def guess_score_time(cls):
+        return random.randint(1169, 1372) * 0.0001
+
+    def __init__(self):
+        self.data101_from_zerocost = read_pickle(gt101P)
+        self.id_to_hash_map = read_json(id_to_hash_path)
+        self.data101_full = read_json(gt101)
+
+    def get_c10_test_info(self, arch_id: str, dataset: str = Config.c10, epoch_num: int = 108):
+        """
+        Default use 108 epoch for c10, this is the largest epoch number.
+        :param dataset:
+        :param arch_id: architecture id
+        :param epoch_num: query the result of the specific epoch number
+        :return:
+        """
+        if dataset != Config.c10:
+            raise "NB101 only have c10 results"
+
+        if epoch_num is None or epoch_num > 108:
+            epoch_num = 108
+        elif epoch_num > 36:
+            epoch_num = 36
+        elif epoch_num > 12:
+            epoch_num = 12
+        elif epoch_num > 4:
+            epoch_num = 4
+        else:
+            epoch_num = 4
+        arch_id = str(arch_id)
+        # this is acc from zero-cost paper, which only record 108 epoch' result [test, valid, train]
+        # t_acc = self.data101_from_zerocost[self.id_to_hash_map[arch_id]][0]
+        # this is acc from parse_testacc_101.py,
+        t_acc = self.data101_full[arch_id][Config.c10][str(epoch_num)]["test-accuracy"]
+        time_usage = self.data101_full[arch_id][Config.c10][str(epoch_num)]["time_usage"]
+        # print(f"[Debug]: Acc different = {t_acc_usage - t_acc}")
+        return t_acc, time_usage
+
+    def count_models(self):
+        return len(self.data101_from_zerocost)
+
+    def guess_train_one_epoch_time(self):
+        # only have information for 4 epoch
+        d = dict.fromkeys(self.data101_full)
+        keys = random.sample(list(d), 15000)
+
+        # pick the max value over 5k arch training time
+        res = 0
+        for rep_time in range(15000):
+            arch_id = keys[rep_time]
+            _, time_usage = self.get_c10_test_info(arch_id=arch_id, dataset=Config.c10, epoch_num=4)
+            if time_usage > res:
+                res = time_usage
+        return res
+
+    def get_all_trained_model_ids(self):
+        return list(self.data101_full.keys())
+
+
+if __name__ == "__main__":
+    lapi = ImgScoreQueryApi(Config.NB101, Config.c10)
+    lapi.get_len_data()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_mlp.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_mlp.py
new file mode 100644
index 000000000..affeacaa4
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_mlp.py
@@ -0,0 +1,163 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+from src.common.constant import Config
+from src.tools.compute import generate_global_rank
+from src.tools.io_tools import read_json
+
+base_dir = os.environ.get("base_dir")
+if base_dir is None: base_dir = os.getcwd()
+print("base_dir is {}".format(base_dir))
+
+# todo: move all those to a config file
+# MLP related ground truth
+mlp_train_frappe = os.path.join(base_dir, "tab_data/frappe/all_train_baseline_frappe.json")
+mlp_train_uci_diabetes = os.path.join(base_dir, "tab_data/uci_diabetes/all_train_baseline_uci_160k_40epoch.json")
+mlp_train_criteo = os.path.join(base_dir, "tab_data/criteo/all_train_baseline_criteo.json")
+
+# score result
+mlp_score_frappe = os.path.join(base_dir, "tab_data/frappe/score_frappe_batch_size_32_local_finish_all_models.json")
+# mlp_score_frappe = os.path.join(base_dir, "tab_data/frappe/score_frappe_batch_size_32_nawot_synflow.json")
+mlp_score_uci = os.path.join(base_dir, "tab_data/uci_diabetes/score_uci_diabetes_batch_size_32_all_metrics.json")
+mlp_score_criteo = os.path.join(base_dir, "tab_data/criteo/score_criteo_batch_size_32.json")
+
+#  0.8028456677612497
+# todo: here is for debug expressFlow only
+exp_mlp_score_frappe = os.path.join(base_dir, "score_scale_traj_width/score_mlp_sp_frappe_batch_size_32_cpu.json")
+exp_mlp_score_uci = os.path.join(base_dir, "score_scale_traj_width/score_mlp_sp_uci_diabetes_batch_size_32_cpu.json")
+exp_mlp_score_criteo = os.path.join(base_dir, "score_scale_traj_width/score_mlp_sp_criteo_batch_size_32_cpu.json")
+
+# todo here we use weigth sharing.
+mlp_score_frappe_weight_share = os.path.join(base_dir, "tab_data/weight_share_nas_frappe.json")
+
+# pre computed result
+score_one_model_time_dict = {
+    "cpu": {
+        Config.Frappe: 0.0211558125,
+        Config.UCIDataset: 0.015039052631578948,
+        Config.Criteo: 0.6824370454545454
+    },
+    "gpu": {
+        Config.Frappe: 0.013744457142857143,
+        Config.UCIDataset: 0.008209692307692308,
+        Config.Criteo: 0.6095493157894737
+    }
+}
+
+train_one_epoch_time_dict = {
+    "cpu": {
+        Config.Frappe: 5.122203075885773,
+        Config.UCIDataset: 4.16297769,
+        Config.Criteo: 422
+    },
+    "gpu": {
+        Config.Frappe: 2.8,
+        Config.UCIDataset: 1.4,
+        Config.Criteo: 125
+    }
+}
+
+
+class GTMLP:
+    _instances = {}
+    # use those algoroithm => new tfmem
+    default_alg_name_list = ["nas_wot", "synflow"]
+    device = "cpu"
+
+    def __new__(cls, dataset: str):
+        if dataset not in cls._instances:
+            instance = super(GTMLP, cls).__new__(cls)
+            instance.dataset = dataset
+            if dataset == Config.Frappe:
+                instance.mlp_train_path = mlp_train_frappe
+                instance.mlp_score_path = mlp_score_frappe
+                instance.mlp_score_path_expressflow = exp_mlp_score_frappe
+                instance.mlp_score_path_weight_share = mlp_score_frappe_weight_share
+            elif dataset == Config.Criteo:
+                instance.mlp_train_path = mlp_train_criteo
+                instance.mlp_score_path = mlp_score_criteo
+                instance.mlp_score_path_expressflow = exp_mlp_score_criteo
+                instance.mlp_score_path_weight_share = "./not_exist"
+            elif dataset == Config.UCIDataset:
+                instance.mlp_train_path = mlp_train_uci_diabetes
+                instance.mlp_score_path = mlp_score_uci
+                instance.mlp_score_path_expressflow = exp_mlp_score_uci
+                instance.mlp_score_path_weight_share = "./not_exist"
+            instance.mlp_train = read_json(instance.mlp_train_path)
+            instance.mlp_score = read_json(instance.mlp_score_path)
+
+            # todo: here we combine two json dict, remove later
+            mlp_score_expressflow = read_json(instance.mlp_score_path_expressflow)
+            for arch_id in mlp_score_expressflow:
+                if arch_id in instance.mlp_score:
+                    instance.mlp_score[arch_id].update(mlp_score_expressflow[arch_id])
+
+            mlp_score_weight_share = read_json(instance.mlp_score_path_weight_share)
+            for arch_id in mlp_score_weight_share:
+                if arch_id in instance.mlp_score:
+                    instance.mlp_score[arch_id].update({"weight_share": mlp_score_weight_share[arch_id]})
+
+            instance.mlp_global_rank = generate_global_rank(
+                instance.mlp_score, instance.default_alg_name_list)
+
+            cls._instances[dataset] = instance
+        return cls._instances[dataset]
+
+    def get_all_trained_model_ids(self):
+        return list(self.mlp_train[self.dataset].keys())
+
+    def get_all_scored_model_ids(self):
+        return list(self.mlp_score.keys())
+
+    def get_score_one_model_time(self, device: str):
+        _train_time_per_epoch = score_one_model_time_dict[device].get(self.dataset)
+        if _train_time_per_epoch is None:
+            raise NotImplementedError
+        return _train_time_per_epoch
+
+    def get_train_one_epoch_time(self, device: str):
+        _train_time_per_epoch = train_one_epoch_time_dict[device].get(self.dataset)
+        if _train_time_per_epoch is None:
+            raise NotImplementedError
+        return _train_time_per_epoch
+
+    def get_valid_auc(self, arch_id: str, epoch_num: int):
+        # todo: due to the too many job contention on server, the time usage may not valid.
+        time_usage = (int(epoch_num) + 1) * self.get_train_one_epoch_time(self.device)
+        if self.dataset == Config.Frappe:
+            if epoch_num is None or epoch_num >= 20: epoch_num = 19
+            t_acc = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["valid_auc"]
+            return t_acc, time_usage
+        elif self.dataset == Config.Criteo:
+            if epoch_num is None or epoch_num >= 10: epoch_num = 9
+            t_acc = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["valid_auc"]
+            return t_acc, time_usage
+        elif self.dataset == Config.UCIDataset:
+            if epoch_num is None or epoch_num >= 40: epoch_num = 39
+            t_acc = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["valid_auc"]
+            return t_acc, time_usage
+        else:
+            raise NotImplementedError
+
+    def api_get_score(self, arch_id: str) -> dict:
+        score_dic = self.mlp_score[arch_id]
+        return score_dic
+
+    def get_global_rank_score(self, arch_id):
+        return self.mlp_global_rank[arch_id]
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/singleton.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/singleton.py
new file mode 100644
index 000000000..24814b119
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/singleton.py
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import threading
+
+
+class Singleton(type):
+    _instances = {}
+    _lock = threading.Lock()
+
+    def __call__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            with cls._lock:
+                if cls not in cls._instances:
+                    cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+        return cls._instances[cls]
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/__init__.py
new file mode 100644
index 000000000..4e04c2b3b
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/model_params.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/model_params.py
new file mode 100644
index 000000000..811bf71c7
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/model_params.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+class ModelMacroCfg:
+    """
+    Macro search space config
+    Search Space basic init,  use bn or not, input features, output labels, etc. 
+    """
+
+    def __init__(self, num_labels):
+        """
+        Args:
+            num_labels: output labels.
+        """
+        self.num_labels = num_labels
+
+
+class ModelMicroCfg:
+    """
+    Micro space cfg
+    Identifier for each model, connection patter, operations etc.
+    encoding = serialized(ModelMicroCfg)
+    """
+
+    def __init__(self):
+        pass
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/init_search_space.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/init_search_space.py
new file mode 100644
index 000000000..8d46ebdbd
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/init_search_space.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+from src.common.constant import Config
+from src.search_space.core.space import SpaceWrapper
+from src.query_api.query_api_img import ImgScoreQueryApi
+
+def init_search_space(args) -> SpaceWrapper:
+    """
+    :param args:
+    :param loapi: Local score API, records all scored arch, 101 use it to detect which arch is scored.
+    :return:
+    """
+    # elif args.search_space == Config.MLPSP:
+    if args.search_space == Config.MLPSP:
+        from .mlp_api.space import MlpSpace
+        from .mlp_api.model_params import MlpMacroCfg
+        from .mlp_api.space import DEFAULT_LAYER_CHOICES_20, DEFAULT_LAYER_CHOICES_10
+        print ("src/search_space/init_search_space.py config.MLPSP")
+        if args.hidden_choice_len == 10:
+            model_cfg = MlpMacroCfg(
+                args.nfield,
+                args.nfeat,
+                args.nemb,
+                args.num_layers,
+                args.num_labels,
+                DEFAULT_LAYER_CHOICES_10)
+        else:
+            model_cfg = MlpMacroCfg(
+                args.nfield,
+                args.nfeat,
+                args.nemb,
+                args.num_layers,
+                args.num_labels,
+                DEFAULT_LAYER_CHOICES_20)
+
+        return MlpSpace(model_cfg)
+    else:
+        raise Exception
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/model_params.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/model_params.py
new file mode 100644
index 000000000..7edf35e1d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/model_params.py
@@ -0,0 +1,34 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.search_space.core.model_params import ModelMacroCfg
+
+
+class MlpMacroCfg(ModelMacroCfg):
+
+    def __init__(self, nfield: int, nfeat: int, nemb: int,
+                 num_layers: int,
+                 num_labels: int,
+                 layer_choices: list):
+        super().__init__(num_labels)
+
+        self.nfield = nfield
+        self.nfeat = nfeat
+        self.nemb = nemb
+        self.layer_choices = layer_choices
+        self.num_layers = num_layers
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py
new file mode 100644
index 000000000..e3372525d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.search_space.core.rl_policy import RLPolicyBase
+
+
+class RLMlpSPaceTopology(RLPolicyBase):
+    def __init__(self, search_space, rl_learning_rate, max_nodes=4):
+        super().__init__()
+
+    def generate_arch(self, config):
+        pass
+
+    def select_action(self):
+        pass
+
+    def _sample_new_cfg(self):
+        pass
+
+    def update_policy(self, reward, baseline_values, log_prob):
+        pass
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/space.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/space.py
new file mode 100644
index 000000000..8336750ae
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/space.py
@@ -0,0 +1,643 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+import itertools
+import random
+import time
+from copy import deepcopy
+from typing import Generator
+
+from src.common.constant import Config, CommonVars
+from src.eva_engine import evaluator_register
+from src.eva_engine.phase2.algo.trainer import ModelTrainer
+from src.logger import logger
+from src.search_space.core.model_params import ModelMicroCfg, ModelMacroCfg
+from src.search_space.core.space import SpaceWrapper
+from src.search_space.mlp_api.model_params import MlpMacroCfg
+from src.query_api.interface import profile_NK_trade_off
+from src.query_api.query_api_mlp import GTMLP
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+from singa.autograd import Operator
+from singa.layer import Layer
+from singa import singa_wrap as singa
+import argparse
+import numpy as np
+
+# Useful constants
+
+DEFAULT_LAYER_CHOICES_20 = [8, 16, 24, 32,  # 8
+                            48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256,  # 16
+                            384, 512]
+DEFAULT_LAYER_CHOICES_10 = [8, 16, 32,
+                            48, 96, 112, 144, 176, 240,
+                            384]
+
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+# singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+singa_dtype = {"float32": tensor.float32}
+
+class MlpMicroCfg(ModelMicroCfg):
+
+    @classmethod
+    def builder(cls, encoding: str):
+        return MlpMicroCfg([int(ele) for ele in encoding.split("-")])
+
+    def __init__(self, hidden_layer_list: list):
+        super().__init__()
+        self.hidden_layer_list = hidden_layer_list
+
+    def __str__(self):
+        return "-".join(str(x) for x in self.hidden_layer_list)
+
+#### self-defined loss begin
+
+### from autograd.py
+class SumError(Operator):
+
+    def __init__(self):
+        super(SumError, self).__init__()
+        # self.t = t.data
+
+    def forward(self, x):
+        # self.err = singa.__sub__(x, self.t)
+        self.data_x = x
+        # print ("SumError forward x: ", x)
+        # print ("SumError forward x.L2(): ", x.L2())
+        # print ("SumError forward x shape(): ", x.shape())
+        # sqr = singa.Square(self.err)
+        # loss = singa.SumAll(sqr)
+        loss = singa.SumAll(x)
+        # self.n = 1
+        # for s in x.shape():
+        #     self.n *= s
+        # loss /= self.n
+        return loss
+
+    def backward(self, dy=1.0):
+        # dx = self.err
+        dev = device.get_default_device()
+        # print ("backward self.data_x.shape(): ", self.data_x.shape())
+        dx = tensor.Tensor(self.data_x.shape(), dev, singa_dtype['float32'])
+        dx.copy_from_numpy(np.ones(self.data_x.shape(), dtype=np.float32))
+        # print ("SumError backward dx data: ", dx.data)
+        # dx *= float(2 / self.n)
+        dx.data *= float(dy)
+        return dx.data
+
+def se_loss(x):
+    # assert x.shape == t.shape, "input and target shape different: %s, %s" % (
+    #     x.shape, t.shape)
+    return SumError()(x)[0]
+
+### from layer.py
+class SumErrorLayer(Layer):
+    """
+    Generate a MeanSquareError operator
+    """
+
+    def __init__(self):
+        super(SumErrorLayer, self).__init__()
+
+    def forward(self, x):
+        return se_loss(x)
+
+#### self-defined loss end
+
+class SINGADNNModel(model.Model):
+
+    def __init__(self, nfield: int, nfeat: int, nemb: int,
+                 hidden_layer_list: list, dropout_rate: float,
+                 noutput: int, use_bn: bool = True):
+    # def __init__(self, data_size=10, perceptron_size=100, num_classes=10, layer_hidden_list=[10,10,10,10]):
+        super(SINGADNNModel, self).__init__()
+        # self.num_classes = num_classes
+        self.dimension = 2  # data dimension = 2
+
+        self.mlp_ninput = nfield * nemb
+        self.nfeat = nfeat
+
+        layer_hidden_list = []
+        for index, layer_size in enumerate(hidden_layer_list):
+            layer_hidden_list.append(layer_size)
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(layer_hidden_list[0])
+        # print ("linear1.in_features: ", self.linear1.in_features)
+        # print ("linear1.out_features: ", self.linear1.out_features)
+        self.linear2 = layer.Linear(layer_hidden_list[1])
+        # print ("linear2.in_features: ", self.linear2.in_features)
+        # print ("linear2.out_features: ", self.linear2.out_features)
+        self.linear3 = layer.Linear(layer_hidden_list[2])
+        # print ("linear3.in_features: ", self.linear3.in_features)
+        # print ("linear3.out_features: ", self.linear3.out_features)
+        self.linear4 = layer.Linear(layer_hidden_list[3])
+        # print ("linear4.in_features: ", self.linear4.in_features)
+        # print ("linear4.out_features: ", self.linear4.out_features)
+        self.linear5 = layer.Linear(noutput)
+        # print ("linear5.in_features: ", self.linear5.in_features)
+        # print ("linear5.out_features: ", self.linear5.out_features)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+        self.sum_error = SumErrorLayer()
+        # for weight-sharing
+        self.is_masked_subnet = False
+        self.hidden_layer_list = hidden_layer_list
+        # Initialize subnet mask with ones
+        self.subnet_mask = [np.ones(size) for size in hidden_layer_list]
+    
+    def forward(self, inputs):
+        # print ("in space.py forward")
+        # print ("in space.py inputs shape: ", inputs.shape)
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        y = self.relu(y)
+        y = self.linear3(y)
+        y = self.relu(y)
+        y = self.linear4(y)
+        y = self.relu(y)
+        y = self.linear5(y)
+        return y
+   
+    def generate_all_ones_embedding(self):
+        """
+        Only for the MLP
+        Returns:
+        """
+        import torch
+        # batch_data = torch.ones(1, self.mlp_ninput).double()  # embedding
+        batch_data = torch.ones(1, self.nfeat).double()  # one-hot
+        # print ("batch_data shape: ", batch_data.shape)
+        return batch_data
+
+    def sample_subnet(self, arch_id: str, device: str):
+        # arch_id e.g., '128-128-128-128'
+        sizes = list(map(int, arch_id.split('-')))
+        self.is_masked_subnet = True
+        # randomly mask neurons in the layers.
+
+        for idx, size in enumerate(sizes):
+            # Create a mask of ones and zeros with the required length
+            mask = np.concatenate([
+                np.ones(size),
+                np.zeros(self.hidden_layer_list[idx] - size)],
+                dim=0)
+            # Shuffle the mask to randomize which neurons are active
+            mask = mask[np.random.permutation(mask.size(0))]
+            self.subnet_mask[idx] = mask
+
+    def train_one_batch(self, x, y, dist_option, spars, synflow_flag):
+        # print ("space.py in train_one_batch")
+        out = self.forward(x)
+        # print ("train_one_batch out shape: ", out.shape)
+        # print ("train_one_batch tensor.to_numpy(out): ", tensor.to_numpy(out))
+        # print ("space.py train_one_batch x.shape: \n", x.shape)
+        # print ("train_one_batch y.data: \n", y.data)
+        # print ("space.py train_one_batch out.shape: \n", out.shape)
+        if synflow_flag:
+            # print ("train_one_batch sum_error")
+            loss = self.sum_error(out)
+            # print ("sum_error loss data: ", loss.data)
+        else:  # normal training
+            # print ("train_one_batch softmax_cross_entropy")
+            loss = self.softmax_cross_entropy(out, y)
+            # print ("softmax_cross_entropy loss.data: ", loss.data)
+        # print ("train_one_batch loss.data: \n", loss.data)
+
+        if dist_option == 'plain':
+            # print ("before pn_p_g_list = self.optimizer(loss)")
+            pn_p_g_list = self.optimizer(loss)
+            # print ("after pn_p_g_list = self.optimizer(loss)")
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        # print ("len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0]))
+        # print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0])
+        # print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data)
+        # print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data)
+        return pn_p_g_list, out, loss
+        # return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+    
+    Returns:
+        The created CNN model.
+    """
+    model = SINGADNNModel(**kwargs)
+
+    return model
+
+
+__all__ = ['SINGADNNModel', 'create_model']
+
+from torch.utils.data import DataLoader
+class MlpSpace(SpaceWrapper):
+    def __init__(self, modelCfg: MlpMacroCfg):
+        super().__init__(modelCfg, Config.MLPSP)
+
+    def load(self):
+        pass
+
+    @classmethod
+    def serialize_model_encoding(cls, arch_micro: ModelMicroCfg) -> str:
+        assert isinstance(arch_micro, MlpMicroCfg)
+        return str(arch_micro)
+
+    @classmethod
+    def deserialize_model_encoding(cls, model_encoding: str) -> ModelMicroCfg:
+        return MlpMicroCfg.builder(model_encoding)
+
+    @classmethod
+    def new_arch_scratch(cls, arch_macro: ModelMacroCfg, arch_micro: ModelMicroCfg, bn: bool = True):
+        assert isinstance(arch_micro, MlpMicroCfg)
+        assert isinstance(arch_macro, MlpMacroCfg)
+        # mlp = DNNModel(
+        mlp = SINGADNNModel(
+            nfield=arch_macro.nfield,
+            nfeat=arch_macro.nfeat,
+            nemb=arch_macro.nemb,
+            hidden_layer_list=arch_micro.hidden_layer_list,
+            dropout_rate=0,
+            noutput=arch_macro.num_labels,
+            use_bn=bn,
+        )
+        return mlp
+
+    def new_arch_scratch_with_default_setting(self, model_encoding: str, bn: bool):
+        model_micro = MlpSpace.deserialize_model_encoding(model_encoding)
+        return MlpSpace.new_arch_scratch(self.model_cfg, model_micro, bn)
+
+    def new_architecture(self, arch_id: str):
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        """
+        Args:
+            arch_id: arch id is the same as encoding.
+        Returns:
+        """
+        arch_micro = MlpSpace.deserialize_model_encoding(arch_id)
+        assert isinstance(arch_micro, MlpMicroCfg)
+        # print ("src/search_space/mlp_api/space.py new_architecture")
+        # print ("src/search_space/mlp_api/space.py arch_micro:\n", arch_micro)
+        # mlp = DNNModel(
+        mlp = SINGADNNModel(
+            nfield=self.model_cfg.nfield,
+            nfeat=self.model_cfg.nfeat,
+            nemb=self.model_cfg.nemb,
+            hidden_layer_list=arch_micro.hidden_layer_list,
+            dropout_rate=0,
+            noutput=self.model_cfg.num_labels)
+        return mlp
+
+    def new_architecture_with_micro_cfg(self, arch_micro: ModelMicroCfg):
+        assert isinstance(arch_micro, MlpMicroCfg)
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        # mlp = DNNModel(
+        mlp = SINGADNNModel(
+            nfield=self.model_cfg.nfield,
+            nfeat=self.model_cfg.nfeat,
+            nemb=self.model_cfg.nemb,
+            hidden_layer_list=arch_micro.hidden_layer_list,
+            dropout_rate=0,
+            noutput=self.model_cfg.num_labels)
+        return mlp
+
+    def profiling_score_time(
+            self, dataset: str,
+            train_loader: DataLoader = None, val_loader: DataLoader = None,
+            args=None, is_simulate: bool = False):
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+
+        device = "cpu"
+        if is_simulate:
+            gtmlp = GTMLP(dataset)
+            # todo, we use hybird here.
+            # those are from the pre-calculator
+            _train_time_per_epoch = gtmlp.get_score_one_model_time("cpu")
+            score_time = _train_time_per_epoch
+        else:
+
+            # get a random batch.
+            import torch
+            batch = iter(train_loader).__next__()
+            target = batch['y'].type(torch.LongTensor)
+            batch['id'] = batch['id'].to(device)
+            batch['value'] = batch['value'].to(device)
+            target = target.to(device)
+            # .reshape(target.shape[0], self.model_cfg.num_labels).
+
+            # pick the largest net to train
+            # super_net = DNNModel(
+            super_net = SINGADNNModel(
+                nfield=args.nfield,
+                nfeat=args.nfeat,
+                nemb=args.nemb,
+                hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
+                dropout_rate=0,
+                noutput=self.model_cfg.num_labels)
+            super_net.init_embedding(requires_grad=False)
+            super_net.to(device)
+            # measure score time,
+            score_time_begin = time.time()
+            naswot_score, _ = evaluator_register[CommonVars.NAS_WOT].evaluate_wrapper(
+                arch=super_net,
+                device=device,
+                batch_data=batch,
+                batch_labels=target)
+
+            # re-init hte net
+            del super_net
+            # super_net = DNNModel(
+            super_net = SINGADNNModel(
+                nfield=args.nfield,
+                nfeat=args.nfeat,
+                nemb=args.nemb,
+                hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
+                dropout_rate=0,
+                noutput=self.model_cfg.num_labels,
+                use_bn=False)
+            super_net.init_embedding(requires_grad=False)
+            super_net.to(device)
+            synflow_score, _ = evaluator_register[CommonVars.PRUNE_SYNFLOW].evaluate_wrapper(
+                arch=super_net,
+                device=device,
+                batch_data=batch,
+                batch_labels=target)
+
+            score_time = time.time() - score_time_begin
+
+            # re-init hte net
+            del super_net
+        return score_time
+
+    def profiling_train_time(self, dataset: str,
+                             train_loader: DataLoader = None, val_loader: DataLoader = None,
+                             args=None, is_simulate: bool = False):
+
+        device = args.device
+
+        if is_simulate:
+            gtmlp = GTMLP(dataset)
+            # todo, find a ideal server, and use 512 model to profile.
+            # those are from the pre-calculator
+            _train_time_per_epoch = gtmlp.get_train_one_epoch_time(device)
+        else:
+            # super_net = DNNModel(
+            super_net = SINGADNNModel(
+                nfield=args.nfield,
+                nfeat=args.nfeat,
+                nemb=args.nemb,
+                hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
+                dropout_rate=0,
+                noutput=self.model_cfg.num_labels)
+            super_net.init_embedding(requires_grad=True)
+            super_net.to(device)
+            # only train for ony iteratin to evaluat the time usage.
+            targs = copy.deepcopy(args)
+            valid_auc, train_time_epoch, train_log = ModelTrainer.fully_train_arch(
+                model=super_net,
+                use_test_acc=False,
+                epoch_num=1,
+                train_loader=train_loader,
+                val_loader=val_loader,
+                test_loader=val_loader,
+                args=targs)
+            del super_net
+            _train_time_per_epoch = train_time_epoch
+
+        return _train_time_per_epoch
+
+    def profiling(self, dataset: str,
+                  train_loader: DataLoader = None, val_loader: DataLoader = None,
+                  args=None, is_simulate: bool = False) -> (float, float, int):
+
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        device = args.device
+
+        if is_simulate:
+            gtmlp = GTMLP(dataset)
+            # todo, we use hybird here.
+            # those are from the pre-calculator
+            _train_time_per_epoch = gtmlp.get_score_one_model_time("cpu")
+            score_time = _train_time_per_epoch
+        else:
+            import torch
+            # get a random batch.
+            batch = iter(train_loader).__next__()
+            target = batch['y'].type(torch.LongTensor)
+            batch['id'] = batch['id'].to(device)
+            batch['value'] = batch['value'].to(device)
+            target = target.to(device)
+            # .reshape(target.shape[0], self.model_cfg.num_labels).
+
+            # pick the largest net to train
+            # super_net = DNNModel(
+            super_net = SINGADNNModel(
+                nfield=args.nfield,
+                nfeat=args.nfeat,
+                nemb=args.nemb,
+                hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
+                dropout_rate=0,
+                noutput=self.model_cfg.num_labels)
+            super_net.init_embedding(requires_grad=False)
+            super_net.to(device)
+
+            # measure score time,
+            score_time_begin = time.time()
+            naswot_score, _ = evaluator_register[CommonVars.NAS_WOT].evaluate_wrapper(
+                arch=super_net,
+                device=device,
+                batch_data=batch,
+                batch_labels=target)
+
+            # re-init hte net
+            del super_net
+            # super_net = DNNModel(
+            super_net = SINGADNNModel(
+                nfield=args.nfield,
+                nfeat=args.nfeat,
+                nemb=args.nemb,
+                hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
+                dropout_rate=0,
+                noutput=self.model_cfg.num_labels,
+                use_bn=False)
+            super_net.init_embedding(requires_grad=False)
+            super_net.to(device)
+
+            synflow_score, _ = evaluator_register[CommonVars.PRUNE_SYNFLOW].evaluate_wrapper(
+                arch=super_net,
+                device=device,
+                batch_data=batch,
+                batch_labels=target)
+
+            score_time = time.time() - score_time_begin
+
+            # re-init hte net
+            del super_net
+
+        if is_simulate:
+            gtmlp = GTMLP(dataset)
+            # todo, find a ideal server, and use 512 model to profile.
+            # those are from the pre-calculator
+            _train_time_per_epoch = gtmlp.get_train_one_epoch_time(device)
+        else:
+            # super_net = DNNModel(
+            super_net = SINGADNNModel(
+                nfield=args.nfield,
+                nfeat=args.nfeat,
+                nemb=args.nemb,
+                hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
+                dropout_rate=0,
+                noutput=self.model_cfg.num_labels)
+            super_net.init_embedding(requires_grad=True)
+            super_net.to(device)
+
+            # only train for ony iteratin to evaluat the time usage.
+            targs = copy.deepcopy(args)
+            valid_auc, train_time_epoch, train_log = ModelTrainer.fully_train_arch(
+                model=super_net,
+                use_test_acc=False,
+                epoch_num=1,
+                train_loader=train_loader,
+                val_loader=val_loader,
+                test_loader=val_loader,
+                args=targs)
+            del super_net
+            _train_time_per_epoch = train_time_epoch
+
+        # todo: this is pre-defined by using img Dataset, suppose each epoch only train 200 iterations
+        score_time_per_model = score_time
+        train_time_per_epoch = _train_time_per_epoch
+        if args.kn_rate != -1:
+            n_k_ratio = args.kn_rate
+        else:
+            n_k_ratio = profile_NK_trade_off(dataset)
+        print(f"Profiling results:  score_time_per_model={score_time_per_model},"
+              f" train_time_per_epoch={train_time_per_epoch}")
+        logger.info(f"Profiling results:  score_time_per_model={score_time_per_model},"
+                    f" train_time_per_epoch={train_time_per_epoch}")
+        return score_time_per_model, train_time_per_epoch, n_k_ratio
+
+    def micro_to_id(self, arch_struct: ModelMicroCfg) -> str:
+        assert isinstance(arch_struct, MlpMicroCfg)
+        return str(arch_struct.hidden_layer_list)
+
+    def __len__(self):
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        return len(self.model_cfg.layer_choices) ** self.model_cfg.num_layers
+
+    def get_arch_size(self, arch_micro: ModelMicroCfg) -> int:
+        assert isinstance(arch_micro, MlpMicroCfg)
+        result = 1
+        for ele in arch_micro.hidden_layer_list:
+            result = result * ele
+        return result
+
+    def sample_all_models(self) -> Generator[str, ModelMicroCfg, None]:
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        # 2-dimensional matrix for the search spcae
+        space = []
+        for _ in range(self.model_cfg.num_layers):
+            space.append(self.model_cfg.layer_choices)
+
+        # generate all possible combinations
+        combinations = itertools.product(*space)
+
+        # encoding each of them
+        while True:
+            # debug only
+            # yield "8-16-32-64", MlpMicroCfg([8, 16, 32, 64])
+            ele = combinations.__next__()
+            model_micro = MlpMicroCfg(list(ele))
+            model_encoding = str(model_micro)
+            yield model_encoding, model_micro
+
+    def random_architecture_id(self) -> (str, ModelMicroCfg):
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        arch_encod = []
+        for _ in range(self.model_cfg.num_layers):
+            layer_size = random.choice(self.model_cfg.layer_choices)
+            arch_encod.append(layer_size)
+
+        model_micro = MlpMicroCfg(arch_encod)
+        # this is the model id == str(model micro)
+        model_encoding = str(model_micro)
+        return model_encoding, model_micro
+
+    '''Below is for EA'''
+
+    def mutate_architecture(self, parent_arch: ModelMicroCfg) -> (str, ModelMicroCfg):
+        assert isinstance(parent_arch, MlpMicroCfg)
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        child_layer_list = deepcopy(parent_arch.hidden_layer_list)
+
+        # 1. choose layer index
+        chosen_hidden_layer_index = random.choice(list(range(len(child_layer_list))))
+
+        # 2. choose size of the layer index, increase the randomness
+        while True:
+            cur_layer_size = child_layer_list[chosen_hidden_layer_index]
+            mutated_layer_size = random.choice(self.model_cfg.layer_choices)
+            if mutated_layer_size != cur_layer_size:
+                child_layer_list[chosen_hidden_layer_index] = mutated_layer_size
+                new_model = MlpMicroCfg(child_layer_list)
+                return str(new_model), new_model
+
+    def mutate_architecture_move_proposal(self, parent_arch: ModelMicroCfg):
+        assert isinstance(parent_arch, MlpMicroCfg)
+        assert isinstance(self.model_cfg, MlpMacroCfg)
+        child_layer_list = deepcopy(parent_arch.hidden_layer_list)
+
+        all_combs = set()
+        # 1. choose layer index
+        for chosen_hidden_layer_index in list(range(len(child_layer_list))):
+
+            # 2. choose size of the layer index, increase the randomness
+            while True:
+                cur_layer_size = child_layer_list[chosen_hidden_layer_index]
+                mutated_layer_size = random.choice(self.model_cfg.layer_choices)
+                if mutated_layer_size != cur_layer_size:
+                    child_layer_list[chosen_hidden_layer_index] = mutated_layer_size
+                    new_model = MlpMicroCfg(child_layer_list)
+                    all_combs.add((str(new_model), new_model))
+                    break
+
+        return list(all_combs)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/__init__.py
new file mode 100644
index 000000000..01d705720
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/weight_initializers.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/weight_initializers.py
new file mode 100644
index 000000000..de1c54442
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/weight_initializers.py
@@ -0,0 +1,78 @@
+# Copyright 2021 Samsung Electronics Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+def init_net(net, w_type, b_type):
+    """
+    Init network with various algorithms
+    :param net:
+    :param w_type:
+    :param b_type:
+    :return:
+    """
+    if w_type == 'none':
+        pass
+    elif w_type == 'xavier':
+        net.apply(_init_weights_vs)
+    elif w_type == 'kaiming':
+        net.apply(_init_weights_he)
+    elif w_type == 'zero':
+        net.apply(_init_weights_zero)
+    else:
+        raise NotImplementedError(f'init_type={w_type} is not supported.')
+
+    if b_type == 'none':
+        pass
+    elif b_type == 'xavier':
+        net.apply(_init_bias_vs)
+    elif b_type == 'kaiming':
+        net.apply(_init_bias_he)
+    elif b_type == 'zero':
+        net.apply(_init_bias_zero)
+    else:
+        raise NotImplementedError(f'init_type={b_type} is not supported.')
+
+import torch.nn as nn
+
+def _init_weights_vs(m):
+    if type(m) == nn.Linear or type(m) == nn.Conv2d:
+        nn.init.xavier_normal_(m.weight)
+
+
+def _init_bias_vs(m):
+    if type(m) == nn.Linear or type(m) == nn.Conv2d:
+        if m.bias is not None:
+            nn.init.xavier_normal_(m.bias)
+
+
+def _init_weights_he(m):
+    if type(m) == nn.Linear or type(m) == nn.Conv2d:
+        nn.init.kaiming_normal_(m.weight)
+
+
+def _init_bias_he(m):
+    if type(m) == nn.Linear or type(m) == nn.Conv2d:
+        if m.bias is not None:
+            nn.init.kaiming_normal_(m.bias)
+
+
+def _init_weights_zero(m):
+    if type(m) == nn.Linear or type(m) == nn.Conv2d:
+        m.weight.data.fill_(.0)
+
+
+def _init_bias_zero(m):
+    if type(m) == nn.Linear or type(m) == nn.Conv2d:
+        if m.bias is not None:
+            m.bias.data.fill_(.0)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/__init__.py
new file mode 100644
index 000000000..01d705720
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/genotypes.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/genotypes.py
new file mode 100644
index 000000000..fa9400186
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/genotypes.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import namedtuple
+
+
+Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
+
+PRIMITIVES = [
+    'none',
+    'max_pool_3x3',
+    'avg_pool_3x3',
+    'skip_connect',
+    'sep_conv_3x3',
+    'sep_conv_5x5',
+    'dil_conv_3x3',
+    'dil_conv_5x5'
+]
+
+NUM_VERTICES = 4
+NUM_OPS = 7
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/model.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/model.py
new file mode 100644
index 000000000..f8be9a9dd
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/model.py
@@ -0,0 +1,308 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from operations import *
+from .utils import drop_path
+
+
+class Cell(nn.Module):
+
+    def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
+        super(Cell, self).__init__()
+        # print(C_prev_prev, C_prev, C)
+
+        if reduction_prev:
+            self.preprocess0 = FactorizedReduce(C_prev_prev, C)
+        else:
+            self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
+        self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
+
+        if reduction:
+            op_names, indices = zip(*genotype.reduce)
+            concat = genotype.reduce_concat
+        else:
+            op_names, indices = zip(*genotype.normal)
+            concat = genotype.normal_concat
+        self._compile(C, op_names, indices, concat, reduction)
+
+    def _compile(self, C, op_names, indices, concat, reduction):
+        assert len(op_names) == len(indices)
+        self._steps = len(op_names) // 2
+        self._concat = concat
+        self.multiplier = len(concat)
+
+        self._ops = nn.ModuleList()
+        for name, index in zip(op_names, indices):
+            stride = 2 if reduction and index < 2 else 1
+            op = OPS[name](C, stride, True)
+            self._ops += [op]
+        self._indices = indices
+
+    def forward(self, s0, s1, drop_prob):
+        s0 = self.preprocess0(s0)
+        s1 = self.preprocess1(s1)
+
+        states = [s0, s1]
+        for i in range(self._steps):
+            h1 = states[self._indices[2 * i]]
+            h2 = states[self._indices[2 * i + 1]]
+            op1 = self._ops[2 * i]
+            op2 = self._ops[2 * i + 1]
+            h1 = op1(h1)
+            h2 = op2(h2)
+            if self.training and drop_prob > 0.:
+                if not isinstance(op1, Identity):
+                    h1 = drop_path(h1, drop_prob)
+                if not isinstance(op2, Identity):
+                    h2 = drop_path(h2, drop_prob)
+            s = h1 + h2
+            states += [s]
+        return torch.cat([states[i] for i in self._concat], dim=1)
+
+
+class AuxiliaryHeadCIFAR(nn.Module):
+
+    def __init__(self, C, num_classes):
+        """assuming input size 8x8"""
+        super(AuxiliaryHeadCIFAR, self).__init__()
+        self.features = nn.Sequential(
+            nn.ReLU(inplace=True),
+            # image size = 2 x 2
+            nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False),
+            nn.Conv2d(C, 128, 1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 768, 2, bias=False),
+            nn.BatchNorm2d(768),
+            nn.ReLU(inplace=True)
+        )
+        self.classifier = nn.Linear(768, num_classes)
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x.view(x.size(0), -1))
+        return x
+
+
+class AuxiliaryHeadTinyImageNet(nn.Module):
+
+    def __init__(self, C, num_classes):
+        """assuming input size 8x8"""
+        super(AuxiliaryHeadTinyImageNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.ReLU(inplace=False),
+            # image size = 2 x 2
+            nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False),
+            nn.Conv2d(C, 128, 1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=False),
+            nn.Conv2d(128, 768, 2, bias=False),
+            nn.BatchNorm2d(768),
+            nn.ReLU(inplace=False)
+        )
+        self.classifier = nn.Linear(768, num_classes)
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x.view(x.size(0), -1))
+        return x
+
+
+class AuxiliaryHeadImageNet(nn.Module):
+
+    def __init__(self, C, num_classes):
+        """assuming input size 14x14"""
+        super(AuxiliaryHeadImageNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.ReLU(inplace=True),
+            nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
+            nn.Conv2d(C, 128, 1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 768, 2, bias=False),
+            # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
+            # Commenting it out for consistency with the experiments in the paper.
+            # nn.BatchNorm2d(768),
+            nn.ReLU(inplace=True)
+        )
+        self.classifier = nn.Linear(768, num_classes)
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x.view(x.size(0), -1))
+        return x
+
+
+class NetworkCIFAR(nn.Module):
+
+    def __init__(self, C, num_classes, layers, auxiliary, genotype):
+        super(NetworkCIFAR, self).__init__()
+        self._layers = layers
+        self._auxiliary = auxiliary
+
+        stem_multiplier = 3
+        C_curr = stem_multiplier * C
+        self.stem = nn.Sequential(
+            nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
+            nn.BatchNorm2d(C_curr)
+        )
+
+        C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
+        self.cells = nn.ModuleList()
+        reduction_prev = False
+        for i in range(layers):
+            if i in [layers // 3, 2 * layers // 3]:
+                C_curr *= 2
+                reduction = True
+            else:
+                reduction = False
+            cell = Cell(genotype, C_prev_prev, C_prev,
+                        C_curr, reduction, reduction_prev)
+            reduction_prev = reduction
+            self.cells += [cell]
+            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+            if i == 2 * layers // 3:
+                C_to_auxiliary = C_prev
+
+        if auxiliary:
+            self.auxiliary_head = AuxiliaryHeadCIFAR(
+                C_to_auxiliary, num_classes)
+        self.global_pooling = nn.AdaptiveAvgPool2d(1)
+        self.classifier = nn.Linear(C_prev, num_classes)
+
+    def forward(self, input):
+        logits_aux = None
+        s0 = s1 = self.stem(input)
+        for i, cell in enumerate(self.cells):
+            s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+            if i == 2 * self._layers // 3:
+                if self._auxiliary and self.training:
+                    logits_aux = self.auxiliary_head(s1)
+        out = self.global_pooling(s1)
+        logits = self.classifier(out.view(out.size(0), -1))
+        return logits, logits_aux
+
+
+class NetworkTinyImageNet(nn.Module):
+
+    def __init__(self, C, num_classes, layers, auxiliary, genotype):
+        super(NetworkTinyImageNet, self).__init__()
+        self._layers = layers
+        self._auxiliary = auxiliary
+
+        stem_multiplier = 3
+        C_curr = stem_multiplier * C
+        self.stem = nn.Sequential(
+            nn.Conv2d(3, C_curr, 3, stride=2, padding=1, bias=False),
+            nn.BatchNorm2d(C_curr)
+        )
+
+        C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
+        self.cells = nn.ModuleList()
+        reduction_prev = False
+        for i in range(layers):
+            if i in [layers // 3, 2 * layers // 3]:
+                C_curr *= 2
+                reduction = True
+            else:
+                reduction = False
+            cell = Cell(genotype, C_prev_prev, C_prev,
+                        C_curr, reduction, reduction_prev)
+            reduction_prev = reduction
+            self.cells += [cell]
+            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+            if i == 2 * layers // 3:
+                C_to_auxiliary = C_prev
+
+        if auxiliary:
+            self.auxiliary_head = AuxiliaryHeadCIFAR(
+                C_to_auxiliary, num_classes)
+        self.global_pooling = nn.AdaptiveAvgPool2d(1)
+        self.classifier = nn.Linear(C_prev, num_classes)
+
+    def forward(self, input):
+        logits_aux = None
+        s0 = s1 = self.stem(input)
+        for i, cell in enumerate(self.cells):
+            s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+            if i == 2 * self._layers // 3:
+                if self._auxiliary and self.training:
+                    logits_aux = self.auxiliary_head(s1)
+        out = self.global_pooling(s1)
+        logits = self.classifier(out.view(out.size(0), -1))
+        return logits, logits_aux
+
+
+class NetworkImageNet(nn.Module):
+
+    def __init__(self, C, num_classes, layers, auxiliary, genotype):
+        super(NetworkImageNet, self).__init__()
+        self._layers = layers
+        self._auxiliary = auxiliary
+
+        self.stem0 = nn.Sequential(
+            nn.Conv2d(3, C // 2, kernel_size=3,
+                      stride=2, padding=1, bias=False),
+            nn.BatchNorm2d(C // 2),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
+            nn.BatchNorm2d(C),
+        )
+
+        self.stem1 = nn.Sequential(
+            nn.ReLU(inplace=True),
+            nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
+            nn.BatchNorm2d(C),
+        )
+
+        C_prev_prev, C_prev, C_curr = C, C, C
+
+        self.cells = nn.ModuleList()
+        reduction_prev = True
+        for i in range(layers):
+            if i in [layers // 3, 2 * layers // 3]:
+                C_curr *= 2
+                reduction = True
+            else:
+                reduction = False
+            cell = Cell(genotype, C_prev_prev, C_prev,
+                        C_curr, reduction, reduction_prev)
+            reduction_prev = reduction
+            self.cells += [cell]
+            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+            if i == 2 * layers // 3:
+                C_to_auxiliary = C_prev
+
+        if auxiliary:
+            self.auxiliary_head = AuxiliaryHeadImageNet(
+                C_to_auxiliary, num_classes)
+        self.global_pooling = nn.AvgPool2d(7)
+        self.classifier = nn.Linear(C_prev, num_classes)
+
+    def forward(self, input):
+        logits_aux = None
+        s0 = self.stem0(input)
+        s1 = self.stem1(s0)
+        for i, cell in enumerate(self.cells):
+            s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+            if i == 2 * self._layers // 3:
+                if self._auxiliary and self.training:
+                    logits_aux = self.auxiliary_head(s1)
+        out = self.global_pooling(s1)
+        logits = self.classifier(out.view(out.size(0), -1))
+        return logits, logits_aux
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/util_convert.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/util_convert.py
new file mode 100644
index 000000000..fee590530
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/util_convert.py
@@ -0,0 +1,126 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from scipy.special import softmax
+from .genotypes import *
+
+
+def genotype(weights, steps=4, multiplier=4):
+    def _parse(weights):
+        gene = []
+        n = 2
+        start = 0
+        for i in range(steps):
+            end = start + n
+            W = weights[start:end].copy()
+            edges = sorted(range(i + 2), key=lambda x: -max(
+                W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
+            for j in edges:
+                k_best = None
+                for k in range(len(W[j])):
+                    if k != PRIMITIVES.index('none'):
+                        if k_best is None or W[j][k] > W[j][k_best]:
+                            k_best = k
+                gene.append((PRIMITIVES[k_best], j))
+            start = end
+            n += 1
+        return gene
+
+    gene_normal = _parse(softmax(weights[0], axis=-1))
+    gene_reduce = _parse(softmax(weights[1], axis=-1))
+
+    concat = range(2 + steps - multiplier, steps + 2)
+    genotype = Genotype(
+        normal=gene_normal, normal_concat=concat,
+        reduce=gene_reduce, reduce_concat=concat
+    )
+    return genotype
+
+
+# from naslib
+def convert_genotype_to_compact(genotype):
+    """Converts Genotype to the compact representation"""
+    OPS = [
+        "max_pool_3x3",
+        "avg_pool_3x3",
+        "skip_connect",
+        "sep_conv_3x3",
+        "sep_conv_5x5",
+        "dil_conv_3x3",
+        "dil_conv_5x5",
+    ]
+    compact = []
+
+    for i, cell_type in enumerate(["normal", "reduce"]):
+        cell = eval("genotype." + cell_type)
+        compact.append([])
+
+        for j in range(8):
+            compact[i].append((cell[j][1], OPS.index(cell[j][0])))
+
+    compact_tuple = (tuple(compact[0]), tuple(compact[1]))
+    return compact_tuple
+
+
+# from naslib
+def convert_compact_to_genotype(compact):
+    """Converts the compact representation to a Genotype"""
+    OPS = [
+        "max_pool_3x3",
+        "avg_pool_3x3",
+        "skip_connect",
+        "sep_conv_3x3",
+        "sep_conv_5x5",
+        "dil_conv_3x3",
+        "dil_conv_5x5",
+    ]
+    genotype = []
+
+    for i in range(2):
+        cell = compact[i]
+        genotype.append([])
+
+        for j in range(8):
+            genotype[i].append((OPS[cell[j][1]], cell[j][0]))
+
+    return Genotype(
+        normal=genotype[0],
+        normal_concat=[2, 3, 4, 5],
+        reduce=genotype[1],
+        reduce_concat=[2, 3, 4, 5],
+    )
+    # TODO: need to check with Colin and/or Arber
+    #  return Genotype(
+    #     normal = genotype[0],
+    #     normal_concat = [2, 3, 4, 5, 6],
+    #     reduce = genotype[1],
+    #     reduce_concat = [4, 5, 6]
+    # )
+
+
+# from naslib
+def make_compact_mutable(compact):
+    # convert tuple to list so that it is mutable
+    arch_list = []
+    for cell in compact:
+        arch_list.append([])
+        for pair in cell:
+            arch_list[-1].append([])
+            for num in pair:
+                arch_list[-1][-1].append(num)
+    return arch_list
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/graph_util.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/graph_util.py
new file mode 100644
index 000000000..b3e8194f0
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/graph_util.py
@@ -0,0 +1,168 @@
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utility functions used by generate_graph.py."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import itertools
+
+import numpy as np
+
+
+def gen_is_edge_fn(bits):
+    """Generate a boolean function for the edge connectivity.
+
+  Given a bitstring FEDCBA and a 4x4 matrix, the generated matrix is
+    [[0, A, B, D],
+     [0, 0, C, E],
+     [0, 0, 0, F],
+     [0, 0, 0, 0]]
+
+  Note that this function is agnostic to the actual matrix dimension due to
+  order in which elements are filled out (column-major, starting from least
+  significant bit). For example, the same FEDCBA bitstring (0-padded) on a 5x5
+  matrix is
+    [[0, A, B, D, 0],
+     [0, 0, C, E, 0],
+     [0, 0, 0, F, 0],
+     [0, 0, 0, 0, 0],
+     [0, 0, 0, 0, 0]]
+
+  Args:
+    bits: integer which will be interpreted as a bit mask.
+
+  Returns:
+    vectorized function that returns True when an edge is present.
+  """
+
+    def is_edge(x, y):
+        """Is there an edge from x to y (0-indexed)?"""
+        if x >= y:
+            return 0
+        # Map x, y to index into bit string
+        index = x + (y * (y - 1) // 2)
+        return (bits >> index) % 2 == 1
+
+    return np.vectorize(is_edge)
+
+
+def is_full_dag(matrix):
+    """Full DAG == all vertices on a path from vert 0 to (V-1).
+
+  i.e. no disconnected or "hanging" vertices.
+
+  It is sufficient to check for:
+    1) no rows of 0 except for row V-1 (only output vertex has no out-edges)
+    2) no cols of 0 except for col 0 (only input vertex has no in-edges)
+
+  Args:
+    matrix: V x V upper-triangular adjacency matrix
+
+  Returns:
+    True if the there are no dangling vertices.
+  """
+    shape = np.shape(matrix)
+
+    rows = matrix[:shape[0] - 1, :] == 0
+    rows = np.all(rows, axis=1)  # Any row with all 0 will be True
+    rows_bad = np.any(rows)
+
+    cols = matrix[:, 1:] == 0
+    cols = np.all(cols, axis=0)  # Any col with all 0 will be True
+    cols_bad = np.any(cols)
+
+    return (not rows_bad) and (not cols_bad)
+
+
+def num_edges(matrix):
+    """Computes number of edges in adjacency matrix."""
+    return np.sum(matrix)
+
+
+def hash_module(matrix, labeling):
+    """Computes a graph-invariance MD5 hash of the matrix and label pair.
+
+  Args:
+    matrix: np.ndarray square upper-triangular adjacency matrix.
+    labeling: list of int labels of length equal to both dimensions of
+      matrix.
+
+  Returns:
+    MD5 hash of the matrix and labeling.
+  """
+    vertices = np.shape(matrix)[0]
+    in_edges = np.sum(matrix, axis=0).tolist()
+    out_edges = np.sum(matrix, axis=1).tolist()
+
+    assert len(in_edges) == len(out_edges) == len(labeling)
+    hashes = list(zip(out_edges, in_edges, labeling))
+    hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
+    # Computing this up to the diameter is probably sufficient but since the
+    # operation is fast, it is okay to repeat more times.
+    for _ in range(vertices):
+        new_hashes = []
+        for v in range(vertices):
+            in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
+            out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
+            new_hashes.append(hashlib.md5(
+                (''.join(sorted(in_neighbors)) + '|' +
+                 ''.join(sorted(out_neighbors)) + '|' +
+                 hashes[v]).encode('utf-8')).hexdigest())
+        hashes = new_hashes
+    fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()
+
+    return fingerprint
+
+
+def permute_graph(graph, label, permutation):
+    """Permutes the graph and labels based on permutation.
+
+  Args:
+    graph: np.ndarray adjacency matrix.
+    label: list of labels of same length as graph dimensions.
+    permutation: a permutation list of ints of same length as graph dimensions.
+
+  Returns:
+    np.ndarray where vertex permutation[v] is vertex v from the original graph
+  """
+    # vertex permutation[v] in new graph is vertex v in the old graph
+    forward_perm = zip(permutation, list(range(len(permutation))))
+    inverse_perm = [x[1] for x in sorted(forward_perm)]
+    edge_fn = lambda x, y: graph[inverse_perm[x], inverse_perm[y]] == 1
+    new_matrix = np.fromfunction(np.vectorize(edge_fn),
+                                 (len(label), len(label)),
+                                 dtype=np.int8)
+    new_label = [label[inverse_perm[i]] for i in range(len(label))]
+    return new_matrix, new_label
+
+
+def is_isomorphic(graph1, graph2):
+    """Exhaustively checks if 2 graphs are isomorphic."""
+    matrix1, label1 = np.array(graph1[0]), graph1[1]
+    matrix2, label2 = np.array(graph2[0]), graph2[1]
+    assert np.shape(matrix1) == np.shape(matrix2)
+    assert len(label1) == len(label2)
+
+    vertices = np.shape(matrix1)[0]
+    # Note: input and output in our constrained graphs always map to themselves
+    # but this script does not enforce that.
+    for perm in itertools.permutations(range(0, vertices)):
+        pmatrix1, plabel1 = permute_graph(matrix1, label1, perm)
+        if np.array_equal(pmatrix1, matrix2) and plabel1 == label2:
+            return True
+
+    return False
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/model_spec.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/model_spec.py
new file mode 100644
index 000000000..5d5992119
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/model_spec.py
@@ -0,0 +1,343 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+import hashlib
+import itertools
+
+import numpy as np
+
+# Graphviz is optional and only required for visualization.
+try:
+    import graphviz  # pylint: disable=g-import-not-at-top
+except ImportError:
+    pass
+
+INPUT = "input"
+OUTPUT = "output"
+CONV3X3 = "conv3x3-bn-relu"
+CONV1X1 = "conv1x1-bn-relu"
+MAXPOOL3X3 = "maxpool3x3"
+OPS = [CONV3X3, CONV1X1, MAXPOOL3X3]
+
+NUM_VERTICES = 7
+OP_SPOTS = NUM_VERTICES - 2
+MAX_EDGES = 9
+
+
+class NASBench101ModelSpec(object):
+    """Model specification given adjacency matrix and labeling."""
+
+    def __init__(self, matrix, ops, data_format='channels_last'):
+        """Initialize the module spec.
+
+    Args:
+      matrix: ndarray or nested list with shape [V, V] for the adjacency matrix.
+      ops: V-length list of labels for the base ops used. The first and last
+        elements are ignored because they are the input and output vertices
+        which have no operations. The elements are retained to keep consistent
+        indexing.
+      data_format: channels_last or channels_first.
+
+    Raises:
+      ValueError: invalid matrix or ops
+    """
+        if not isinstance(matrix, np.ndarray):
+            matrix = np.array(matrix)
+        shape = np.shape(matrix)
+        if len(shape) != 2 or shape[0] != shape[1]:
+            raise ValueError('matrix must be square')
+        if shape[0] != len(ops):
+            raise ValueError('length of ops must match matrix dimensions')
+        if not is_upper_triangular(matrix):
+            raise ValueError('matrix must be upper triangular')
+
+        # Both the original and pruned matrices are deep copies of the matrix and
+        # ops so any changes to those after initialization are not recognized by the
+        # spec.
+        self.original_matrix = copy.deepcopy(matrix)
+        self.original_ops = copy.deepcopy(ops)
+
+        self.matrix = copy.deepcopy(matrix)
+        self.ops = copy.deepcopy(ops)
+        self.valid_spec = True
+        self._prune()
+
+        self.data_format = data_format
+
+    def _prune(self):
+        """Prune the extraneous parts of the graph.
+
+    General procedure:
+      1) Remove parts of graph not connected to input.
+      2) Remove parts of graph not connected to output.
+      3) Reorder the vertices so that they are consecutive after steps 1 and 2.
+
+    These 3 steps can be combined by deleting the rows and columns of the
+    vertices that are not reachable from both the input and output (in reverse).
+    """
+        num_vertices = np.shape(self.original_matrix)[0]
+
+        # DFS forward from input
+        visited_from_input = set([0])
+        frontier = [0]
+        while frontier:
+            top = frontier.pop()
+            for v in range(top + 1, num_vertices):
+                if self.original_matrix[top, v] and v not in visited_from_input:
+                    visited_from_input.add(v)
+                    frontier.append(v)
+
+        # DFS backward from output
+        visited_from_output = set([num_vertices - 1])
+        frontier = [num_vertices - 1]
+        while frontier:
+            top = frontier.pop()
+            for v in range(0, top):
+                if self.original_matrix[v, top] and v not in visited_from_output:
+                    visited_from_output.add(v)
+                    frontier.append(v)
+
+        # Any vertex that isn't connected to both input and output is extraneous to
+        # the computation graph.
+        extraneous = set(range(num_vertices)).difference(
+            visited_from_input.intersection(visited_from_output))
+
+        # If the non-extraneous graph is less than 2 vertices, the input is not
+        # connected to the output and the spec is invalid.
+        if len(extraneous) > num_vertices - 2:
+            self.matrix = None
+            self.ops = None
+            self.valid_spec = False
+            return
+
+        self.matrix = np.delete(self.matrix, list(extraneous), axis=0)
+        self.matrix = np.delete(self.matrix, list(extraneous), axis=1)
+        for index in sorted(extraneous, reverse=True):
+            del self.ops[index]
+
+    def hash_spec(self, canonical_ops):
+        """Computes the isomorphism-invariant graph hash of this spec.
+
+    Args:
+      canonical_ops: list of operations in the canonical ordering which they
+        were assigned (i.e. the order provided in the config['available_ops']).
+
+    Returns:
+      MD5 hash of this spec which can be used to query the dataset.
+    """
+        # Invert the operations back to integer label indices used in graph gen.
+        labeling = [-1] + [canonical_ops.index(op) for op in self.ops[1:-1]] + [-2]
+        return hash_module(self.matrix, labeling)
+
+    def visualize(self):
+        """Creates a dot graph. Can be visualized in colab directly."""
+        num_vertices = np.shape(self.matrix)[0]
+        g = graphviz.Digraph()
+        g.node(str(0), 'input')
+        for v in range(1, num_vertices - 1):
+            g.node(str(v), self.ops[v])
+        g.node(str(num_vertices - 1), 'output')
+
+        for src in range(num_vertices - 1):
+            for dst in range(src + 1, num_vertices):
+                if self.matrix[src, dst]:
+                    g.edge(str(src), str(dst))
+
+        return g
+
+    @classmethod
+    def random_sample_one_architecture(cls, dataset_api: dict, min_size=7):
+        """
+    This will sample a random architecture and update the edges in the
+    naslib object accordingly.
+    From the NASBench repository:
+    one-hot adjacency matrix
+    draw [0,1] for each slot in the adjacency matrix
+    """
+        while True:
+            matrix = np.random.choice([0, 1], size=(NUM_VERTICES, NUM_VERTICES))
+            matrix = np.triu(matrix, 1)
+            ops = np.random.choice(OPS, size=min_size).tolist()
+            ops[0] = INPUT
+            ops[-1] = OUTPUT
+            spec = dataset_api["api"].ModelSpec(matrix=matrix, ops=ops)
+            if not dataset_api["nb101_data"].is_valid(spec):
+                continue
+
+            spec = NASBench101ModelSpec(matrix, ops)
+            # only sample model with 7 nodes.
+            if len(spec.matrix) == min_size:
+                break
+
+        return spec
+
+
+def is_upper_triangular(matrix):
+    """True if matrix is 0 on diagonal and below."""
+    for src in range(np.shape(matrix)[0]):
+        for dst in range(0, src + 1):
+            if matrix[src, dst] != 0:
+                return False
+
+    return True
+
+
+def gen_is_edge_fn(bits):
+    """Generate a boolean function for the edge connectivity.
+
+  Given a bitstring FEDCBA and a 4x4 matrix, the generated matrix is
+    [[0, A, B, D],
+     [0, 0, C, E],
+     [0, 0, 0, F],
+     [0, 0, 0, 0]]
+
+  Note that this function is agnostic to the actual matrix dimension due to
+  order in which elements are filled out (column-major, starting from least
+  significant bit). For example, the same FEDCBA bitstring (0-padded) on a 5x5
+  matrix is
+    [[0, A, B, D, 0],
+     [0, 0, C, E, 0],
+     [0, 0, 0, F, 0],
+     [0, 0, 0, 0, 0],
+     [0, 0, 0, 0, 0]]
+
+  Args:
+    bits: integer which will be interpreted as a bit mask.
+
+  Returns:
+    vectorized function that returns True when an edge is present.
+  """
+
+    def is_edge(x, y):
+        """Is there an edge from x to y (0-indexed)?"""
+        if x >= y:
+            return 0
+        # Map x, y to index into bit string
+        index = x + (y * (y - 1) // 2)
+        return (bits >> index) % 2 == 1
+
+    return np.vectorize(is_edge)
+
+
+def is_full_dag(matrix):
+    """Full DAG == all vertices on a path from vert 0 to (V-1).
+
+  i.e. no disconnected or "hanging" vertices.
+
+  It is sufficient to check for:
+    1) no rows of 0 except for row V-1 (only output vertex has no out-edges)
+    2) no cols of 0 except for col 0 (only input vertex has no in-edges)
+
+  Args:
+    matrix: V x V upper-triangular adjacency matrix
+
+  Returns:
+    True if the there are no dangling vertices.
+  """
+    shape = np.shape(matrix)
+
+    rows = matrix[:shape[0] - 1, :] == 0
+    rows = np.all(rows, axis=1)  # Any row with all 0 will be True
+    rows_bad = np.any(rows)
+
+    cols = matrix[:, 1:] == 0
+    cols = np.all(cols, axis=0)  # Any col with all 0 will be True
+    cols_bad = np.any(cols)
+
+    return (not rows_bad) and (not cols_bad)
+
+
+def num_edges(matrix):
+    """Computes number of edges in adjacency matrix."""
+    return np.sum(matrix)
+
+
+def hash_module(matrix, labeling):
+    """Computes a graph-invariance MD5 hash of the matrix and label pair.
+
+  Args:
+    matrix: np.ndarray square upper-triangular adjacency matrix.
+    labeling: list of int labels of length equal to both dimensions of
+      matrix.
+
+  Returns:
+    MD5 hash of the matrix and labeling.
+  """
+    vertices = np.shape(matrix)[0]
+    in_edges = np.sum(matrix, axis=0).tolist()
+    out_edges = np.sum(matrix, axis=1).tolist()
+
+    assert len(in_edges) == len(out_edges) == len(labeling)
+    hashes = list(zip(out_edges, in_edges, labeling))
+    hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
+    # Computing this up to the diameter is probably sufficient but since the
+    # operation is fast, it is okay to repeat more times.
+    for _ in range(vertices):
+        new_hashes = []
+        for v in range(vertices):
+            in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
+            out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
+            new_hashes.append(hashlib.md5(
+                (''.join(sorted(in_neighbors)) + '|' +
+                 ''.join(sorted(out_neighbors)) + '|' +
+                 hashes[v]).encode('utf-8')).hexdigest())
+        hashes = new_hashes
+    fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()
+
+    return fingerprint
+
+
+def permute_graph(graph, label, permutation):
+    """Permutes the graph and labels based on permutation.
+
+  Args:
+    graph: np.ndarray adjacency matrix.
+    label: list of labels of same length as graph dimensions.
+    permutation: a permutation list of ints of same length as graph dimensions.
+
+  Returns:
+    np.ndarray where vertex permutation[v] is vertex v from the original graph
+  """
+    # vertex permutation[v] in new graph is vertex v in the old graph
+    forward_perm = zip(permutation, list(range(len(permutation))))
+    inverse_perm = [x[1] for x in sorted(forward_perm)]
+    edge_fn = lambda x, y: graph[inverse_perm[x], inverse_perm[y]] == 1
+    new_matrix = np.fromfunction(np.vectorize(edge_fn),
+                                 (len(label), len(label)),
+                                 dtype=np.int8)
+    new_label = [label[inverse_perm[i]] for i in range(len(label))]
+    return new_matrix, new_label
+
+
+def is_isomorphic(graph1, graph2):
+    """Exhaustively checks if 2 graphs are isomorphic."""
+    matrix1, label1 = np.array(graph1[0]), graph1[1]
+    matrix2, label2 = np.array(graph2[0]), graph2[1]
+    assert np.shape(matrix1) == np.shape(matrix2)
+    assert len(label1) == len(label2)
+
+    vertices = np.shape(matrix1)[0]
+    # Note: input and output in our constrained graphs always map to themselves
+    # but this script does not enforce that.
+    for perm in itertools.permutations(range(0, vertices)):
+        pmatrix1, plabel1 = permute_graph(matrix1, label1, perm)
+        if np.array_equal(pmatrix1, matrix2) and plabel1 == label2:
+            return True
+
+    return False
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/nb101_api.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/nb101_api.py
new file mode 100644
index 000000000..0990d13ce
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/nb101_api.py
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+"""This is a NAS-Bench-101 version.
+
+Before using this API, download the data files from the links in the README.
+
+Usage:
+  # Load the data from file (this will take some time)
+  nasbench = api.NASBench('/path/to/pickle/or/shelve')
+
+  # Create an Inception-like module (5x5 convolution replaced with two 3x3
+  # convolutions).
+  model_spec = api.ModelSpec(
+      # Adjacency matrix of the module
+      matrix=[[0, 1, 1, 1, 0, 1, 0],    # input layer
+              [0, 0, 0, 0, 0, 0, 1],    # 1x1 conv
+              [0, 0, 0, 0, 0, 0, 1],    # 3x3 conv
+              [0, 0, 0, 0, 1, 0, 0],    # 5x5 conv (replaced by two 3x3's)
+              [0, 0, 0, 0, 0, 0, 1],    # 5x5 conv (replaced by two 3x3's)
+              [0, 0, 0, 0, 0, 0, 1],    # 3x3 max-pool
+              [0, 0, 0, 0, 0, 0, 0]],   # output layer
+      # Operations at the vertices of the module, matches order of matrix
+      ops=[INPUT, CONV1X1, CONV3X3, CONV3X3, CONV3X3, MAXPOOL3X3, OUTPUT])
+
+
+  # Query this model from dataset
+  data = nasbench.query(model_spec)
+
+Adjacency matrices are expected to be upper-triangular 0-1 matrices within the
+defined search space (7 vertices, 9 edges, 3 allowed ops). The first and last
+operations must be 'input' and 'output'. The other operations should be from
+config['available_ops']. Currently, the available operations are:
+  CONV3X3 = "conv3x3-bn-relu"
+  CONV1X1 = "conv1x1-bn-relu"
+  MAXPOOL3X3 = "maxpool3x3"
+
+When querying a spec, the spec will first be automatically pruned (removing
+unused vertices and edges along with ops). If the pruned spec is still out of
+the search space, an OutOfDomainError will be raised, otherwise the data is
+returned.
+
+The returned data object is a dictionary with the following keys:
+  - module_adjacency: numpy array for the adjacency matrix
+  - module_operations: list of operation labels
+  - trainable_parameters: number of trainable parameters in the model
+  - training_time: the total training time in seconds up to this point
+  - train_accuracy: training accuracy
+  - validation_accuracy: validation_accuracy
+  - test_accuracy: testing accuracy
+
+Instead of querying the dataset for a single run of a model, it is also possible
+to retrieve all metrics for a given spec, using:
+
+  fixed_stats, computed_stats = nasbench.get_metrics_from_spec(model_spec)
+
+The fixed_stats is a dictionary with the keys:
+  - module_adjacency
+  - module_operations
+  - trainable_parameters
+
+The computed_stats is a dictionary from epoch count to a list of metric
+dicts. For example, computed_stats[108][0] contains the metrics for the first
+repeat of the provided model trained to 108 epochs. The available keys are:
+  - halfway_training_time
+  - halfway_train_accuracy
+  - halfway_validation_accuracy
+  - halfway_test_accuracy
+  - final_training_time
+  - final_train_accuracy
+  - final_validation_accuracy
+  - final_test_accuracy
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import random
+import time
+import shelve
+import hashlib
+import _pickle as pickle
+import numpy as np
+
+
+class OutOfDomainError(Exception):
+  """Indicates that the requested graph is outside of the search domain."""
+
+
+class NASBench(object):
+  """User-facing API for accessing the NASBench dataset."""
+
+  def __init__(self, dataset_file, seed=None, data_format='pickle'):
+    """Initialize dataset, this should only be done once per experiment.
+
+    Args:
+      dataset_file: path to .tfrecord file containing the dataset.
+      seed: random seed used for sampling queried models. Two NASBench objects
+        created with the same seed will return the same data points when queried
+        with the same models in the same order. By default, the seed is randomly
+        generated.
+    """
+    self.config = {
+        'module_vertices': 7,
+        'max_edges': 9,
+        'num_repeats': 3,
+        'available_ops': ['conv3x3-bn-relu', 'conv1x1-bn-relu', 'maxpool3x3'],
+    }
+    random.seed(seed)
+
+    print('Loading dataset from file... This may take a few minutes...')
+    start = time.time()
+
+    # Stores the fixed statistics that are independent of evaluation (i.e.,
+    # adjacency matrix, operations, and number of parameters).
+    # hash --> metric name --> scalar
+    self.fixed_statistics = {}
+
+    # Stores the statistics that are computed via training and evaluating the
+    # model on CIFAR-10. Statistics are computed for multiple repeats of each
+    # model at each max epoch length.
+    # hash --> epochs --> repeat index --> metric name --> scalar
+    self.computed_statistics = {}
+
+    # Valid queriable epoch lengths. {4, 12, 36, 108} for the full dataset or
+    # {108} for the smaller dataset with only the 108 epochs.
+    self.valid_epochs = set()
+
+    # open the database
+    if data_format == 'shelve':
+        with shelve.open(dataset_file, 'r') as shelf:
+          for module_hash in shelf:
+            # Parse the data from the data file.
+            fixed_statistics, computed_statistics = shelf[module_hash]
+
+            self.fixed_statistics[module_hash] = fixed_statistics
+            self.computed_statistics[module_hash] = computed_statistics
+
+            self.valid_epochs.update(set(computed_statistics.keys()))
+    elif data_format == 'pickle':
+        with open(dataset_file, 'rb') as f:
+            data = pickle.load(f)
+        for module_hash, stats in data.items():
+            self.fixed_statistics[module_hash] = stats[0]
+            self.computed_statistics[module_hash] = stats[1]
+
+            self.valid_epochs.update(set(stats[1].keys()))
+    else:
+        raise Exception('Data format not supported')
+
+    elapsed = time.time() - start
+    print('Loaded dataset in %d seconds' % elapsed)
+
+    self.history = {}
+    self.training_time_spent = 0.0
+    self.total_epochs_spent = 0
+
+  def query(self, model_spec, epochs=108, stop_halfway=False):
+    """Fetch one of the evaluations for this model spec.
+
+    Each call will sample one of the config['num_repeats'] evaluations of the
+    model. This means that repeated queries of the same model (or isomorphic
+    models) may return identical metrics.
+
+    This function will increment the budget counters for benchmarking purposes.
+    See self.training_time_spent, and self.total_epochs_spent.
+
+    This function also allows querying the evaluation metrics at the halfway
+    point of training using stop_halfway. Using this option will increment the
+    budget counters only up to the halfway point.
+
+    Args:
+      model_spec: ModelSpec object.
+      epochs: number of epochs trained. Must be one of the evaluated number of
+        epochs, [4, 12, 36, 108] for the full dataset.
+      stop_halfway: if True, returned dict will only contain the training time
+        and accuracies at the halfway point of training (num_epochs/2).
+        Otherwise, returns the time and accuracies at the end of training
+        (num_epochs).
+
+    Returns:
+      dict containing the evaluated data for this object.
+
+    Raises:
+      OutOfDomainError: if model_spec or num_epochs is outside the search space.
+    """
+    if epochs not in self.valid_epochs:
+      raise OutOfDomainError('invalid number of epochs, must be one of %s'
+                             % self.valid_epochs)
+
+    fixed_stat, computed_stat = self.get_metrics_from_spec(model_spec)
+    sampled_index = random.randint(0, self.config['num_repeats'] - 1)
+    computed_stat = computed_stat[epochs][sampled_index]
+
+    data = {}
+    data['module_adjacency'] = fixed_stat['module_adjacency']
+    data['module_operations'] = fixed_stat['module_operations']
+    data['trainable_parameters'] = fixed_stat['trainable_parameters']
+
+    if stop_halfway:
+      data['training_time'] = computed_stat['halfway_training_time']
+      data['train_accuracy'] = computed_stat['halfway_train_accuracy']
+      data['validation_accuracy'] = computed_stat['halfway_validation_accuracy']
+      data['test_accuracy'] = computed_stat['halfway_test_accuracy']
+    else:
+      data['training_time'] = computed_stat['final_training_time']
+      data['train_accuracy'] = computed_stat['final_train_accuracy']
+      data['validation_accuracy'] = computed_stat['final_validation_accuracy']
+      data['test_accuracy'] = computed_stat['final_test_accuracy']
+
+    self.training_time_spent += data['training_time']
+    if stop_halfway:
+      self.total_epochs_spent += epochs // 2
+    else:
+      self.total_epochs_spent += epochs
+
+    return data
+
+  def is_valid(self, model_spec):
+    """Checks the validity of the model_spec.
+
+    For the purposes of benchmarking, this does not increment the budget
+    counters.
+
+    Args:
+      model_spec: ModelSpec object.
+
+    Returns:
+      True if model is within space.
+    """
+    try:
+      self._check_spec(model_spec)
+    except OutOfDomainError:
+      return False
+
+    return True
+
+  def get_budget_counters(self):
+    """Returns the time and budget counters."""
+    return self.training_time_spent, self.total_epochs_spent
+
+  def reset_budget_counters(self):
+    """Reset the time and epoch budget counters."""
+    self.training_time_spent = 0.0
+    self.total_epochs_spent = 0
+
+  def hash_iterator(self):
+    """Returns iterator over all unique model hashes."""
+    return self.fixed_statistics.keys()
+
+  def get_metrics_from_hash(self, module_hash):
+    """Returns the metrics for all epochs and all repeats of a hash.
+
+    This method is for dataset analysis and should not be used for benchmarking.
+    As such, it does not increment any of the budget counters.
+
+    Args:
+      module_hash: MD5 hash, i.e., the values yielded by hash_iterator().
+
+    Returns:
+      fixed stats and computed stats of the model spec provided.
+    """
+    fixed_stat = copy.deepcopy(self.fixed_statistics[module_hash])
+    computed_stat = copy.deepcopy(self.computed_statistics[module_hash])
+    return fixed_stat, computed_stat
+
+  def get_metrics_from_spec(self, model_spec):
+    """Returns the metrics for all epochs and all repeats of a model.
+
+    This method is for dataset analysis and should not be used for benchmarking.
+    As such, it does not increment any of the budget counters.
+
+    Args:
+      model_spec: ModelSpec object.
+
+    Returns:
+      fixed stats and computed stats of the model spec provided.
+    """
+    self._check_spec(model_spec)
+    module_hash = self._hash_spec(model_spec)
+    return self.get_metrics_from_hash(module_hash)
+
+  def _check_spec(self, model_spec):
+    """Checks that the model spec is within the dataset."""
+    if not model_spec.valid_spec:
+      raise OutOfDomainError('invalid spec, provided graph is disconnected.')
+
+    num_vertices = len(model_spec.ops)
+    num_edges = np.sum(model_spec.matrix)
+
+    if num_vertices > self.config['module_vertices']:
+      raise OutOfDomainError('too many vertices, got %d (max vertices = %d)'
+                             % (num_vertices, config['module_vertices']))
+
+    if num_edges > self.config['max_edges']:
+      raise OutOfDomainError('too many edges, got %d (max edges = %d)'
+                             % (num_edges, self.config['max_edges']))
+
+    if model_spec.ops[0] != 'input':
+      raise OutOfDomainError('first operation should be \'input\'')
+    if model_spec.ops[-1] != 'output':
+      raise OutOfDomainError('last operation should be \'output\'')
+    for op in model_spec.ops[1:-1]:
+      if op not in self.config['available_ops']:
+        raise OutOfDomainError('unsupported op %s (available ops = %s)'
+                               % (op, self.config['available_ops']))
+
+  def _hash_spec(self, model_spec):
+    """Returns the MD5 hash for a provided model_spec."""
+    return model_spec.hash_spec(self.config['available_ops'])
+
+
+class ModelSpec(object):
+  """Model specification given adjacency matrix and labeling."""
+
+  def __init__(self, matrix, ops, data_format='channels_last'):
+    """Initialize the module spec.
+
+    Args:
+      matrix: ndarray or nested list with shape [V, V] for the adjacency matrix.
+      ops: V-length list of labels for the base ops used. The first and last
+        elements are ignored because they are the input and output vertices
+        which have no operations. The elements are retained to keep consistent
+        indexing.
+      data_format: channels_last or channels_first.
+
+    Raises:
+      ValueError: invalid matrix or ops
+    """
+    if not isinstance(matrix, np.ndarray):
+      matrix = np.array(matrix)
+    shape = np.shape(matrix)
+    if len(shape) != 2 or shape[0] != shape[1]:
+      raise ValueError('matrix must be square')
+    if shape[0] != len(ops):
+      raise ValueError('length of ops must match matrix dimensions')
+    if not is_upper_triangular(matrix):
+      raise ValueError('matrix must be upper triangular')
+
+    # Both the original and pruned matrices are deep copies of the matrix and
+    # ops so any changes to those after initialization are not recognized by the
+    # spec.
+    self.original_matrix = copy.deepcopy(matrix)
+    self.original_ops = copy.deepcopy(ops)
+
+    self.matrix = copy.deepcopy(matrix)
+    self.ops = copy.deepcopy(ops)
+    self.valid_spec = True
+    self._prune()
+
+    self.data_format = data_format
+
+  def _prune(self):
+    """Prune the extraneous parts of the graph.
+
+    General procedure:
+      1) Remove parts of graph not connected to input.
+      2) Remove parts of graph not connected to output.
+      3) Reorder the vertices so that they are consecutive after steps 1 and 2.
+
+    These 3 steps can be combined by deleting the rows and columns of the
+    vertices that are not reachable from both the input and output (in reverse).
+    """
+    num_vertices = np.shape(self.original_matrix)[0]
+
+    # DFS forward from input
+    visited_from_input = set([0])
+    frontier = [0]
+    while frontier:
+      top = frontier.pop()
+      for v in range(top + 1, num_vertices):
+        if self.original_matrix[top, v] and v not in visited_from_input:
+          visited_from_input.add(v)
+          frontier.append(v)
+
+    # DFS backward from output
+    visited_from_output = set([num_vertices - 1])
+    frontier = [num_vertices - 1]
+    while frontier:
+      top = frontier.pop()
+      for v in range(0, top):
+        if self.original_matrix[v, top] and v not in visited_from_output:
+          visited_from_output.add(v)
+          frontier.append(v)
+
+    # Any vertex that isn't connected to both input and output is extraneous to
+    # the computation graph.
+    extraneous = set(range(num_vertices)).difference(
+        visited_from_input.intersection(visited_from_output))
+
+    # If the non-extraneous graph is less than 2 vertices, the input is not
+    # connected to the output and the spec is invalid.
+    if len(extraneous) > num_vertices - 2:
+      self.matrix = None
+      self.ops = None
+      self.valid_spec = False
+      return
+
+    self.matrix = np.delete(self.matrix, list(extraneous), axis=0)
+    self.matrix = np.delete(self.matrix, list(extraneous), axis=1)
+    for index in sorted(extraneous, reverse=True):
+      del self.ops[index]
+
+  def hash_spec(self, canonical_ops):
+    """Computes the isomorphism-invariant graph hash of this spec.
+
+    Args:
+      canonical_ops: list of operations in the canonical ordering which they
+        were assigned (i.e. the order provided in the config['available_ops']).
+
+    Returns:
+      MD5 hash of this spec which can be used to query the dataset.
+    """
+    # Invert the operations back to integer label indices used in graph gen.
+    labeling = [-1] + [canonical_ops.index(op) for op in self.ops[1:-1]] + [-2]
+    return hash_module(self.matrix, labeling)
+
+
+def is_upper_triangular(matrix):
+  """True if matrix is 0 on diagonal and below."""
+  for src in range(np.shape(matrix)[0]):
+    for dst in range(0, src + 1):
+      if matrix[src, dst] != 0:
+        return False
+
+  return True
+
+
+def hash_module(matrix, labeling):
+  """Computes a graph-invariance MD5 hash of the matrix and label pair.
+
+  Args:
+    matrix: np.ndarray square upper-triangular adjacency matrix.
+    labeling: list of int labels of length equal to both dimensions of
+      matrix.
+
+  Returns:
+    MD5 hash of the matrix and labeling.
+  """
+  vertices = np.shape(matrix)[0]
+  in_edges = np.sum(matrix, axis=0).tolist()
+  out_edges = np.sum(matrix, axis=1).tolist()
+
+  assert len(in_edges) == len(out_edges) == len(labeling)
+  hashes = list(zip(out_edges, in_edges, labeling))
+  hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
+  # Computing this up to the diameter is probably sufficient but since the
+  # operation is fast, it is okay to repeat more times.
+  for _ in range(vertices):
+    new_hashes = []
+    for v in range(vertices):
+      in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
+      out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
+      new_hashes.append(hashlib.md5(
+          (''.join(sorted(in_neighbors)) + '|' +
+           ''.join(sorted(out_neighbors)) + '|' +
+           hashes[v]).encode('utf-8')).hexdigest())
+    hashes = new_hashes
+  fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()
+
+  return fingerprint
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2.py
new file mode 100644
index 000000000..e845b6442
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2.py
@@ -0,0 +1,117 @@
+# Copyright 2021 Samsung Electronics Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from .nasbench2_ops import *
+
+
+def gen_searchcell_mask_from_arch_str(arch_str):
+    nodes = arch_str.split('+') 
+    nodes = [node[1:-1].split('|') for node in nodes]
+    nodes = [[op_and_input.split('~')  for op_and_input in node] for node in nodes]
+
+    keep_mask = []
+    for curr_node_idx in range(len(nodes)):
+            for prev_node_idx in range(curr_node_idx+1): 
+                _op = [edge[0] for edge in nodes[curr_node_idx] if int(edge[1]) == prev_node_idx]
+                assert len(_op) == 1, 'The arch string does not follow the assumption of 1 connection between two nodes.'
+                for _op_name in OPS.keys():
+                    keep_mask.append(_op[0] == _op_name)
+    return keep_mask
+
+
+def get_model_from_arch_str(arch_str, num_classes, bn=True, init_channels=16):
+    keep_mask = gen_searchcell_mask_from_arch_str(arch_str)
+    net = NAS201Model(arch_str=arch_str, num_classes=num_classes, use_bn=bn, keep_mask=keep_mask, stem_ch=init_channels)
+    return net
+
+
+def get_super_model(num_classes, use_bn=True):
+    net = NAS201Model(arch_str=arch_str, num_classes=num_classes, use_bn=use_bn)
+    return net
+
+
+class NAS201Model(nn.Module):
+
+    def __init__(self, arch_str, num_classes, use_bn=True, keep_mask=None, stem_ch=16):
+        super(NAS201Model, self).__init__()
+        self.arch_str=arch_str
+        self.num_classes=num_classes
+        self.use_bn= use_bn
+        self.stem_ch = stem_ch
+
+        self.stem = stem(out_channels=stem_ch, use_bn=use_bn)
+        self.stack_cell1 = nn.Sequential(*[SearchCell(in_channels=stem_ch, out_channels=stem_ch, stride=1, affine=False, track_running_stats=False, use_bn=use_bn, keep_mask=keep_mask) for i in range(5)])
+        self.reduction1 = reduction(in_channels=stem_ch, out_channels=stem_ch*2)
+        self.stack_cell2 = nn.Sequential(*[SearchCell(in_channels=stem_ch*2, out_channels=stem_ch*2, stride=1, affine=False, track_running_stats=False, use_bn=use_bn, keep_mask=keep_mask) for i in range(5)])
+        self.reduction2 = reduction(in_channels=stem_ch*2, out_channels=stem_ch*4)
+        self.stack_cell3 = nn.Sequential(*[SearchCell(in_channels=stem_ch*4, out_channels=stem_ch*4, stride=1, affine=False, track_running_stats=False, use_bn=use_bn, keep_mask=keep_mask) for i in range(5)])
+        self.top = top(in_dims=stem_ch*4, num_classes=num_classes, use_bn=use_bn)
+
+    def forward(self, x):
+        x = self.stem(x)        
+
+        x = self.stack_cell1(x)
+        x = self.reduction1(x)
+
+        x = self.stack_cell2(x)
+        x = self.reduction2(x)
+
+        x = self.stack_cell3(x)
+
+        x = self.top(x)
+        return x
+    
+    def get_prunable_copy(self, bn=False):
+        model_new = get_model_from_arch_str(self.arch_str, self.num_classes, use_bn=bn, init_channels=self.stem_ch)
+
+        #TODO this is quite brittle and doesn't work with nn.Sequential when bn is different
+        # it is only required to maintain initialization -- maybe init after get_punable_copy?
+        model_new.load_state_dict(self.state_dict(), strict=False)
+        model_new.train()
+
+        return model_new
+    
+
+def get_arch_str_from_model(net):
+    search_cell = net.stack_cell1[0].options
+    keep_mask = net.stack_cell1[0].keep_mask
+    num_nodes = net.stack_cell1[0].num_nodes
+
+    nodes = []
+    idx = 0
+    for curr_node in range(num_nodes -1):
+        edges = []
+        for prev_node in range(curr_node+1): # n-1 prev nodes
+            for _op_name in OPS.keys():
+                if keep_mask[idx]:
+                    edges.append(f'{_op_name}~{prev_node}')
+                idx += 1
+        node_str = '|'.join(edges)
+        node_str = f'|{node_str}|'
+        nodes.append(node_str) 
+    arch_str = '+'.join(nodes)
+    return arch_str
+
+
+if __name__ == "__main__":
+    arch_str = '|nor_conv_3x3~0|+|none~0|none~1|+|avg_pool_3x3~0|nor_conv_3x3~1|nor_conv_3x3~2|'
+    
+    n = get_model_from_arch_str(arch_str=arch_str, num_classes=10)
+    print(n.stack_cell1[0])
+    
+    arch_str2 = get_arch_str_from_model(n)
+    print(arch_str)
+    print(arch_str2)
+    print(f'Are the two arch strings same? {arch_str == arch_str2}')
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2_ops.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2_ops.py
new file mode 100644
index 000000000..efcdba322
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2_ops.py
@@ -0,0 +1,160 @@
+# Copyright 2021 Samsung Electronics Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import torch.nn as nn
+
+class ReLUConvBN(nn.Module):
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, affine, track_running_stats=True, use_bn=True, name='ReLUConvBN'):
+        super(ReLUConvBN, self).__init__()
+        self.name = name
+        if use_bn:
+            self.op = nn.Sequential(
+                nn.ReLU(inplace=False),
+                nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=not affine),
+                nn.BatchNorm2d(out_channels, affine=affine, track_running_stats=track_running_stats)
+                )
+        else:
+            self.op = nn.Sequential(
+                nn.ReLU(inplace=False),
+                nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=not affine)
+                )
+
+    def forward(self, x):
+        return self.op(x)
+
+class Identity(nn.Module):
+    def __init__(self, name='Identity'):
+        self.name = name
+        super(Identity, self).__init__()
+
+    def forward(self, x):
+        return x
+
+class Zero(nn.Module):
+
+  def __init__(self, stride, name='Zero'):
+    self.name = name
+    super(Zero, self).__init__()
+    self.stride = stride
+
+  def forward(self, x):
+    if self.stride == 1:
+      return x.mul(0.)
+    return x[:,:,::self.stride,::self.stride].mul(0.)
+
+class POOLING(nn.Module):
+    def __init__(self, kernel_size, stride, padding, name='POOLING'):
+        super(POOLING, self).__init__()
+        self.name = name
+        self.avgpool = nn.AvgPool2d(kernel_size=kernel_size, stride=1, padding=1, count_include_pad=False)
+
+    def forward(self, x):
+        return self.avgpool(x)
+
+
+class reduction(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(reduction, self).__init__()
+        self.residual = nn.Sequential(
+                            nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
+                            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, bias=False))
+
+        self.conv_a = ReLUConvBN(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1, dilation=1, affine=True, track_running_stats=True)
+        self.conv_b = ReLUConvBN(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1, dilation=1, affine=True, track_running_stats=True)
+
+    def forward(self, x):
+        basicblock = self.conv_a(x)
+        basicblock = self.conv_b(basicblock)
+        residual = self.residual(x)
+        return residual + basicblock
+
+class stem(nn.Module):
+    def __init__(self, out_channels, use_bn=True):
+        super(stem, self).__init__()
+        if use_bn:
+            self.net = nn.Sequential(
+                    nn.Conv2d(in_channels=3, out_channels=out_channels, kernel_size=3, padding=1, bias=False),
+                    nn.BatchNorm2d(out_channels))
+        else:
+            self.net = nn.Sequential(
+                    nn.Conv2d(in_channels=3, out_channels=out_channels, kernel_size=3, padding=1, bias=False)
+            )
+
+    def forward(self, x):
+        return self.net(x)
+
+class top(nn.Module):
+    def __init__(self, in_dims, num_classes, use_bn=True):
+        super(top, self).__init__()
+        if use_bn:
+            self.lastact = nn.Sequential(nn.BatchNorm2d(in_dims), nn.ReLU(inplace=True))
+        else:
+            self.lastact = nn.ReLU(inplace=True)
+        self.global_pooling = nn.AdaptiveAvgPool2d(1)
+        self.classifier  = nn.Linear(in_dims, num_classes)
+
+    def forward(self, x):
+        x = self.lastact(x)
+        x = self.global_pooling(x)
+        x = x.view(x.size(0), -1)
+        logits = self.classifier(x)
+        return logits
+
+
+class SearchCell(nn.Module):
+
+    def __init__(self, in_channels, out_channels, stride, affine, track_running_stats, use_bn=True, num_nodes=4, keep_mask=None):
+        super(SearchCell, self).__init__()
+        self.num_nodes = num_nodes
+        self.options = nn.ModuleList()
+        for curr_node in range(self.num_nodes-1):
+            for prev_node in range(curr_node+1): 
+                for _op_name in OPS.keys():
+                    op = OPS[_op_name](in_channels, out_channels, stride, affine, track_running_stats, use_bn)
+                    self.options.append(op)
+
+        if keep_mask is not None:
+            self.keep_mask = keep_mask
+        else:
+            self.keep_mask = [True]*len(self.options)
+
+    def forward(self, x):
+        outs = [x]
+
+        idx = 0
+        for curr_node in range(self.num_nodes-1):
+            edges_in = []
+            for prev_node in range(curr_node+1): # n-1 prev nodes
+                for op_idx in range(len(OPS.keys())):
+                    if self.keep_mask[idx]:
+                        edges_in.append(self.options[idx](outs[prev_node]))
+                    idx += 1
+            node_output = sum(edges_in)
+            outs.append(node_output)
+        
+        return outs[-1]
+
+
+
+OPS = {
+    'none' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: Zero(stride, name='none'),
+    'avg_pool_3x3' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: POOLING(3, 1, 1, name='avg_pool_3x3'),
+    'nor_conv_3x3' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: ReLUConvBN(in_channels, out_channels, 3, 1, 1, 1, affine, track_running_stats, use_bn, name='nor_conv_3x3'),
+    'nor_conv_1x1' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: ReLUConvBN(in_channels, out_channels, 1, 1, 0, 1, affine, track_running_stats, use_bn, name='nor_conv_1x1'),
+    'skip_connect' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: Identity(name='skip_connect'),
+}
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/__init__.py
new file mode 100644
index 000000000..3df60b02f
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/compute.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/compute.py
new file mode 100644
index 000000000..0400ef546
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/compute.py
@@ -0,0 +1,137 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# for binary insert
+from typing import List
+import numpy as np
+
+
+def binary_insert_get_rank(rank_list: list, new_item: List) -> int:
+    """
+    Insert the new_item to rank_list, then get the rank of it.
+    :param rank_list: 0: id, 1: score
+    :param new_item:
+    :return:
+    """
+    index = search_position(rank_list, new_item)
+    # search the position to insert into
+    rank_list.insert(index, new_item)
+    return index
+
+
+# O(logN) search the position to insert into
+def search_position(rank_list_m: list, new_item: List):
+    if len(rank_list_m) == 0:
+        return 0
+    left = 0
+    right = len(rank_list_m) - 1
+    while left + 1 < right:
+        mid = int((left + right) / 2)
+        if rank_list_m[mid][1] <= new_item[1]:
+            left = mid
+        else:
+            right = mid
+
+    # consider the time.
+    if rank_list_m[right][1] <= new_item[1]:
+        return right + 1
+    elif rank_list_m[left][1] <= new_item[1]:
+        return left + 1
+    else:
+        return left
+
+
+def generate_global_rank(ml_data_score_dic: dict, alg_name_list: List) -> dict:
+    """
+    ml_data_score_dic: { model_id: {alg: score1, alg2: score2} }
+    return: { model_id: {alg1_alg2: rank_score} }
+    """
+
+    history = {}
+    for alg in alg_name_list:
+            history[alg] = []
+
+    for arch_id, arch_score in ml_data_score_dic.items():
+        # add model and score to local list
+        for alg, score in arch_score.items():
+            if alg in alg_name_list:
+                binary_insert_get_rank(history[alg], [str(arch_id), float(score)])
+
+    # convert multiple scores into rank value
+    model_new_rank_score = {}
+    current_explored_models = 0
+    for alg in alg_name_list:
+        current_explored_models = len(history[alg])
+        for rank_index in range(len(history[alg])):
+            ms_ins = history[alg][rank_index]
+            # rank = index + 1, since index can be 0
+            if ms_ins[0] in model_new_rank_score:
+                model_new_rank_score[ms_ins[0]] += rank_index + 1
+            else:
+                model_new_rank_score[ms_ins[0]] = rank_index + 1
+
+    for ele in model_new_rank_score.keys():
+        model_new_rank_score[ele] = \
+            {"_".join(list(alg_name_list)): model_new_rank_score[ele] / current_explored_models}
+
+    return model_new_rank_score
+
+
+def log_scale_x_array(num_points, max_minute, base=10) -> list:
+    """
+    return a list of mins in log scale distance.
+    """
+    # Set the minimum and maximum values for the log scale
+    min_val = 1  # 1 second
+    max_val = max_minute * 60  # 1440 minutes converted to seconds
+
+    # Generate the log scale values
+    log_vals = np.logspace(np.log10(min_val), np.log10(max_val), num=num_points, base=base)
+
+    # Convert the log scale values to minutes
+    log_vals_min = log_vals / 60
+
+    # Print the log scale values in minutes
+
+    return log_vals_min.tolist()
+
+
+def sample_in_log_scale(lst: List, num_points: int) -> List:
+    indices = np.logspace(0, np.log10(len(lst) - 1), num_points + num_points // 2, dtype=int)
+    # Remove any duplicate indices
+    indices = np.unique(indices)
+    return list(indices)
+
+
+def sample_in_log_scale_new(lstM: List, num_points: int) -> List:
+    lst = np.array(lstM)
+    # Create an evenly spaced array in the log scale domain
+    evenly_spaced_log_x = np.linspace(np.log10(lst.min()), np.log10(lst.max()), num_points)
+    # Convert the new array back to the original scale
+    evenly_spaced_x = 10 ** evenly_spaced_log_x
+    # Find the indices of the sampled points in the original x-array
+    indices = [np.abs(lst - point).argmin() for point in evenly_spaced_x]
+    return indices
+
+
+def sample_in_line_scale(lst: List, num_points: int) -> List:
+    indices = np.linspace(0, len(lst) - 1, num_points, dtype=int)
+    # Remove any duplicate indices
+    indices = np.unique(indices)
+    return list(indices)
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/correlation.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/correlation.py
new file mode 100644
index 000000000..eb227f5c3
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/correlation.py
@@ -0,0 +1,133 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from scipy import stats
+from src.common.constant import CommonVars
+import numpy as np
+from src.logger import logger
+from sklearn import metrics
+
+
+class CorCoefficient:
+
+    @staticmethod
+    def measure(x1: list, x2: list, measure_metrics: str = CommonVars.AllCorrelation) -> dict:
+        """
+        Measure the correlation coefficient between x1 and x2
+        It requires that each dataset be normally distributed.
+        :param x1: list1
+        :param x2: list2
+        :param measure_metrics: str
+        :return: correlation，
+            Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation.
+            Correlations of -1 or +1 imply an exact linear relationship
+        """
+
+        result = {}
+        if measure_metrics == CommonVars.KendallTau:
+            correlation, p_value = stats.kendalltau(x1, x2, nan_policy='omit')
+            result[CommonVars.KendallTau] = correlation
+        elif measure_metrics == CommonVars.Spearman:
+            correlation, p_value = stats.spearmanr(x1, x2, nan_policy='omit')
+            result[CommonVars.Spearman] = correlation
+        elif measure_metrics == CommonVars.Pearson:
+            correlation, p_value = stats.pearsonr(x1, x2)
+            result[CommonVars.Pearson] = correlation
+        elif measure_metrics == CommonVars.AvgCorrelation:
+            # calculate average over all
+            correlation1, p_value = stats.kendalltau(x1, x2, nan_policy='omit')
+            correlation2, p_value = stats.spearmanr(x1, x2, nan_policy='omit')
+            correlation3, p_value = stats.pearsonr(x1, x2)
+            correlation = (correlation1 + correlation2 + correlation3) / 3
+            result[CommonVars.AvgCorrelation] = correlation
+        elif measure_metrics == CommonVars.AllCorrelation:
+            correlation1, p_value = stats.kendalltau(x1, x2, nan_policy='omit')
+            correlation2, p_value = stats.spearmanr(x1, x2, nan_policy='omit')
+            correlation3, p_value = stats.pearsonr(x1, x2)
+            correlation4 = (correlation1 + correlation2 + correlation3) / 3
+            result[CommonVars.KendallTau] = correlation1
+            result[CommonVars.Spearman] = correlation2
+            result[CommonVars.Pearson] = correlation3
+            result[CommonVars.AvgCorrelation] = correlation4
+        else:
+            raise NotImplementedError(measure_metrics + " is not implemented")
+
+        return result
+
+    @staticmethod
+    def compare(ytest, test_pred):
+        ytest = np.array(ytest)
+        test_pred = np.array(test_pred)
+        METRICS = [
+            "mae",
+            "rmse",
+            "pearson",
+            "spearman",
+            "kendalltau",
+            "kt_2dec",
+            "kt_1dec",
+            "precision_10",
+            "precision_20",
+            "full_ytest",
+            "full_testpred",
+        ]
+        metrics_dict = {}
+
+        try:
+            metrics_dict["mae"] = np.mean(abs(test_pred - ytest))
+            metrics_dict["rmse"] = metrics.mean_squared_error(
+                ytest, test_pred, squared=False
+            )
+            metrics_dict["pearson"] = np.abs(np.corrcoef(ytest, test_pred)[1, 0])
+            metrics_dict["spearman"] = stats.spearmanr(ytest, test_pred)[0]
+            metrics_dict["kendalltau"] = stats.kendalltau(ytest, test_pred)[0]
+            metrics_dict["kt_2dec"] = stats.kendalltau(
+                ytest, np.round(test_pred, decimals=2)
+            )[0]
+            metrics_dict["kt_1dec"] = stats.kendalltau(
+                ytest, np.round(test_pred, decimals=1)
+            )[0]
+            print("ytest = ", ytest)
+            print("test_pred = ", test_pred)
+            for k in [10, 20]:
+                top_ytest = np.array(
+                    [y > sorted(ytest)[max(-len(ytest), -k - 1)] for y in ytest]
+                )
+                top_test_pred = np.array(
+                    [
+                        y > sorted(test_pred)[max(-len(test_pred), -k - 1)]
+                        for y in test_pred
+                    ]
+                )
+                metrics_dict["precision_{}".format(k)] = (
+                        sum(top_ytest & top_test_pred) / k
+                )
+            metrics_dict["full_ytest"] = ytest.tolist()
+            metrics_dict["full_testpred"] = test_pred.tolist()
+
+        except:
+            for metric in METRICS:
+                metrics_dict[metric] = float("nan")
+        if np.isnan(metrics_dict["pearson"]) or not np.isfinite(
+                metrics_dict["pearson"]
+        ):
+            logger.info("Error when computing metrics. ytest and test_pred are:")
+            logger.info(ytest)
+            logger.info(test_pred)
+
+        return metrics_dict
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/io_tools.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/io_tools.py
new file mode 100644
index 000000000..e657b9e04
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/io_tools.py
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+import pickle
+import os.path
+
+
+def read_json(file_name):
+    print(f"Loading {file_name}...")
+    is_exist = os.path.exists(file_name)
+    if is_exist:
+        with open(file_name, 'r') as readfile:
+            data = json.load(readfile)
+        return data
+    else:
+        print(f"{file_name} is not exist")
+        return {}
+
+
+def write_json(file_name, data):
+    print(f"writting {file_name}...")
+    with open(file_name, 'w') as outfile:
+        outfile.write(json.dumps(data))
+
+
+def read_pickle(file_name):
+    print(f"Loading pickel {file_name}...")
+    with open(file_name, 'rb') as f:
+        data = pickle.load(f)
+    return data
+
+
+def write_pickle(file_name, data):
+    print(f"writing pickle {file_name}...")
+    with open('filename.pickle', 'wb') as handle:
+        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+
+if __name__ == "__main__":
+    a = {1:1}
+    write_json("./asdf.json", a)
+    b = {2:2323}
+    write_json("./asdf.json", b)
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/res_measure.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/res_measure.py
new file mode 100644
index 000000000..93270ae31
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/res_measure.py
@@ -0,0 +1,102 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import psutil
+import gpustat
+import threading
+import time
+from src.tools.io_tools import write_json
+import sys
+
+def print_cpu_gpu_usage(interval=1, output_file="path_to_folder", stop_event=None):
+    def print_usage():
+        print("Starting to print usage")  # Debugging print
+        # Get current process
+        main_process = psutil.Process(os.getpid())
+
+        # Create an empty dictionary to store metrics
+        metrics = {'cpu_usage': [], 'memory_usage': [], 'gpu_usage': []}
+
+        while not stop_event.is_set():
+            cpu_percent = 0
+            mem_usage_mb = 0
+            main_process.cpu_percent()
+            for process in main_process.children(recursive=True):  # Include all child processes
+                try:
+                    cpu_percent += process.cpu_percent()
+                    mem_usage_mb += process.memory_info().rss / (1024 ** 2)
+                except psutil.NoSuchProcess:
+                    # Process does not exist, so add 0 to cpu_percent and mem_usage_mb
+                    pass
+            cpu_percent += main_process.cpu_percent()
+            mem_usage_mb += main_process.memory_info().rss / (1024 ** 2)
+
+            metrics['cpu_usage'].append(cpu_percent)
+            metrics['memory_usage'].append(mem_usage_mb)
+
+            try:
+                gpu_stats = gpustat.GPUStatCollection.new_query()
+                for gpu in gpu_stats:
+                    metrics['gpu_usage'].append((gpu.index, gpu.utilization, gpu.memory_used))
+            except Exception as e:
+                pass
+                # print(f"Exception encountered when fetching GPU stats: {e}")
+
+            # If it's time to write metrics to a file, do so
+            if len(metrics['cpu_usage']) % 40 == 0:
+                write_json(output_file, metrics)
+
+            time.sleep(interval)
+
+        print("Stop monitering, flust to disk")
+        write_json(output_file, metrics)
+
+    stop_event = stop_event or threading.Event()
+    thread = threading.Thread(target=print_usage)
+    thread.start()
+    return stop_event, thread
+
+def get_variable_memory_size(obj):
+    # If it's a PyTorch tensor and on the GPU
+    import torch
+    if torch.is_tensor(obj) and obj.is_cuda:
+        return obj.element_size() * obj.nelement()
+    else:
+        return sys.getsizeof(obj)
+
+def print_memory_usage():
+    # Get current process
+    main_process = psutil.Process(os.getpid())
+    # Create an empty dictionary to store metrics
+    metrics = {'cpu_usage': [], 'memory_usage': []}
+    cpu_percent = 0
+    mem_usage_mb = 0
+    main_process.cpu_percent()
+    for process in main_process.children(recursive=True):  # Include all child processes
+        try:
+            cpu_percent += process.cpu_percent()
+            mem_usage_mb += process.memory_info().rss / (1024 ** 2)
+        except psutil.NoSuchProcess:
+            # Process does not exist, so add 0 to cpu_percent and mem_usage_mb
+            pass
+    cpu_percent += main_process.cpu_percent()
+    mem_usage_mb += main_process.memory_info().rss / (1024 ** 2)
+    metrics['cpu_usage'].append(cpu_percent)
+    metrics['memory_usage'].append(mem_usage_mb)
+    print(metrics)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/utils.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/utils.py
new file mode 100644
index 000000000..8e4232caa
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/utils.py
@@ -0,0 +1,472 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import math
+import os
+import random
+import sys
+import time
+import warnings
+
+import numpy
+import numpy as np
+import shutil
+import logging
+
+warnings.filterwarnings("error")
+
+
+def timeSince(since=None, s=None):
+    if s is None:
+        s = int(time.time() - since)
+    m = math.floor(s / 60)
+    s %= 60
+    h = math.floor(m / 60)
+    m %= 60
+    return '%dh %dm %ds' % (h, m, s)
+
+
+class AvgrageMeter(object):
+    """Computes and stores the average and current value"""
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+import torch
+def get_correct_num(y, target):
+    pred_label = torch.argmax(y, dim=1)
+    return (target == pred_label).sum().item()
+
+
+def accuracy(output, target, topk=(1,)):
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res
+
+
+class Cutout(object):
+    def __init__(self, length):
+        self.length = length
+
+    def __call__(self, img):
+        h, w = img.size(1), img.size(2)
+        mask = np.ones((h, w), np.float32)
+        y = np.random.randint(h)
+        x = np.random.randint(w)
+
+        y1 = np.clip(y - self.length // 2, 0, h)
+        y2 = np.clip(y + self.length // 2, 0, h)
+        x1 = np.clip(x - self.length // 2, 0, w)
+        x2 = np.clip(x + self.length // 2, 0, w)
+
+        mask[y1:y2, x1:x2] = 0.
+        mask = torch.from_numpy(mask)
+        mask = mask.expand_as(img)
+        img *= mask
+        return img
+
+import torchvision.transforms as transforms
+def _data_transforms_cifar10(args):
+    CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
+    CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
+
+    train_transform = transforms.Compose([
+        transforms.RandomCrop(32, padding=4),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
+    ])
+    if args.cutout:
+        train_transform.transforms.append(Cutout(args.cutout_length))
+
+    valid_transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
+    ])
+    return train_transform, valid_transform
+
+import torchvision.datasets as dset
+def _get_cifar10(args):
+    train_transform, valid_transform = _data_transforms_cifar10(args)
+    train_data = dset.CIFAR10(
+        root=args.data, train=True, download=True, transform=train_transform
+    )
+    valid_data = dset.CIFAR10(
+        root=args.data, train=False, download=True, transform=valid_transform
+    )
+
+    train_queue = torch.utils.data.DataLoader(
+        train_data,
+        batch_size=args.batch_size,
+        shuffle=True,
+        pin_memory=True,
+        num_workers=4,
+    )
+
+    valid_queue = torch.utils.data.DataLoader(
+        valid_data,
+        batch_size=args.batch_size,
+        shuffle=False,
+        pin_memory=True,
+        num_workers=4,
+    )
+    return train_queue, valid_queue
+
+
+def _get_dist_cifar10(args):
+    train_transform, valid_transform = _data_transforms_cifar10(args)
+    train_data = dset.CIFAR10(
+        root=args.data, train=True, download=True, transform=train_transform
+    )
+    valid_data = dset.CIFAR10(
+        root=args.data, train=False, download=True, transform=valid_transform
+    )
+
+    sampler = torch.utils.data.distributed.DistributedSampler(
+        train_data, num_replicas=args.gpu_num, rank=args.local_rank)
+
+    train_queue = torch.utils.data.DataLoader(
+        train_data,
+        batch_size=args.batch_size // args.gpu_num,
+        pin_memory=True,
+        num_workers=4,
+        drop_last=True,
+        sampler=sampler
+    )
+
+    valid_queue = torch.utils.data.DataLoader(
+        valid_data,
+        batch_size=args.batch_size,
+        shuffle=False,
+        pin_memory=True,
+        num_workers=4,
+    )
+    return train_queue, valid_queue, sampler
+
+
+def _get_dist_imagenet(args):
+    traindir = os.path.join(args.data_dir, 'train')
+    valdir = os.path.join(args.data_dir, 'val')
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+
+    train_dataset = dset.ImageFolder(
+        traindir,
+        transforms.Compose([
+            transforms.RandomResizedCrop(224),
+            transforms.RandomHorizontalFlip(),
+            transforms.ColorJitter(
+                brightness=0.4,
+                contrast=0.4,
+                saturation=0.4,
+                hue=0.2),
+            transforms.ToTensor(),
+            normalize,
+        ]))
+
+    sampler = torch.utils.data.distributed.DistributedSampler(
+        train_dataset, num_replicas=args.gpu_num, rank=args.local_rank)
+
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size // args.gpu_num, num_workers=max(args.gpu_num * 2, 4),
+        pin_memory=True, drop_last=True, sampler=sampler)
+
+    val_loader = torch.utils.data.DataLoader(
+        dset.ImageFolder(valdir, transforms.Compose([
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            normalize,
+        ])),
+        batch_size=args.batch_size, shuffle=False,
+        num_workers=4, pin_memory=True)
+
+    return train_loader, val_loader, sampler
+
+
+def _data_transforms_cifar100(args):
+    CIFAR_MEAN = [0.5070751592371323, 0.48654887331495095, 0.4409178433670343]
+    CIFAR_STD = [0.2673342858792401, 0.2564384629170883, 0.27615047132568404]
+
+    train_transform = transforms.Compose([
+        transforms.RandomCrop(32, padding=4),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
+    ])
+    if args.cutout:
+        train_transform.transforms.append(Cutout(args.cutout_length))
+
+    valid_transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
+    ])
+    return train_transform, valid_transform
+
+
+def _get_cifar100(args):
+    train_transform, valid_transform = _data_transforms_cifar100(args)
+    train_data = dset.CIFAR100(
+        root=args.data, train=True, download=True, transform=train_transform
+    )
+    valid_data = dset.CIFAR100(
+        root=args.data, train=False, download=True, transform=valid_transform
+    )
+
+    train_queue = torch.utils.data.DataLoader(
+        train_data,
+        batch_size=args.batch_size,
+        shuffle=True,
+        pin_memory=True,
+        num_workers=4,
+    )
+
+    valid_queue = torch.utils.data.DataLoader(
+        valid_data,
+        batch_size=args.batch_size,
+        shuffle=False,
+        pin_memory=True,
+        num_workers=4,
+    )
+    return train_queue, valid_queue
+
+
+def _get_imagenet_tiny(args):
+    traindir = os.path.join(args.data, 'train')
+    validdir = os.path.join(args.data, 'val')
+    normalize = transforms.Normalize(
+        mean=[0.4802, 0.4481, 0.3975],
+        std=[0.2302, 0.2265, 0.2262]
+    )
+    train_transform = transforms.Compose([
+        transforms.RandomCrop(64, padding=4),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        normalize,
+    ])
+    if args.cutout:
+        train_transform.transforms.append(Cutout(args.cutout_length))
+
+    train_data = dset.ImageFolder(
+        traindir,
+        train_transform
+    )
+    valid_data = dset.ImageFolder(
+        validdir,
+        transforms.Compose([
+            transforms.ToTensor(),
+            normalize,
+        ])
+    )
+
+    train_queue = torch.utils.data.DataLoader(
+        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4)
+
+    valid_queue = torch.utils.data.DataLoader(
+        valid_data, batch_size=args.batch_size // 2, shuffle=False, pin_memory=True, num_workers=4)
+    return train_queue, valid_queue
+
+
+def count_parameters_in_MB(model):
+    return np.sum([np.prod(v.size()) for v in model.parameters()]) / 1e6
+
+
+def count_parameters(model):
+    """
+    Get element number of all parameters matrix.
+    :param model:
+    :return:
+    """
+    return sum([torch.numel(v) for v in model.parameters()])
+
+
+def save(model, model_path):
+    torch.save(model.state_dict(), model_path)
+
+
+def load(model, model_path):
+    model.load_state_dict(torch.load(model_path))
+
+
+def load_ckpt(ckpt_path):
+    print(f'=> loading checkpoint {ckpt_path}...')
+    try:
+        checkpoint = torch.load(ckpt_path)
+    except:
+        print(f"=> fail loading {ckpt_path}...");
+        exit()
+    return checkpoint
+
+
+def save_ckpt(ckpt, file_dir, file_name='model.ckpt', is_best=False):
+    if not os.path.exists(file_dir): os.makedirs(file_dir)
+    ckpt_path = os.path.join(file_dir, file_name)
+    torch.save(ckpt, ckpt_path)
+    if is_best: shutil.copyfile(ckpt_path, os.path.join(file_dir, f'best_{file_name}'))
+
+
+def drop_path(x, drop_prob, dims=(0,)):
+    from torch.autograd import Variable
+    var_size = [1 for _ in range(x.dim())]
+    for i in dims:
+        var_size[i] = x.size(i)
+    if drop_prob > 0.:
+        keep_prob = 1. - drop_prob
+        mask = Variable(torch.cuda.FloatTensor(*var_size).bernoulli_(keep_prob))
+        x.div_(keep_prob)
+        x.mul_(mask)
+    return x
+
+
+def create_exp_dir(path, scripts_to_save=None):
+    if not os.path.exists(path):
+        os.makedirs(path)
+    print('Experiment dir : {}'.format(path))
+
+    if scripts_to_save is not None:
+        os.makedirs(os.path.join(path, 'tools'))
+        for script in scripts_to_save:
+            dst_file = os.path.join(path, 'tools', os.path.basename(script))
+            shutil.copyfile(script, dst_file)
+
+
+class Performance(object):
+    def __init__(self, path):
+        self.path = path
+        self.data = None
+
+    def update(self, alphas_normal, alphas_reduce, val_loss):
+        import torch.nn.functional as F
+        a_normal = F.softmax(alphas_normal, dim=-1)
+        # print("alpha normal size: ", a_normal.data.size())
+        a_reduce = F.softmax(alphas_reduce, dim=-1)
+        # print("alpha reduce size: ", a_reduce.data.size())
+        data = np.concatenate([a_normal.data.view(-1),
+                               a_reduce.data.view(-1),
+                               np.array([val_loss.data])]).reshape(1, -1)
+        if self.data is not None:
+            self.data = np.concatenate([self.data, data], axis=0)
+        else:
+            self.data = data
+
+    def save(self):
+        np.save(self.path, self.data)
+
+
+def logger(log_dir, need_time=True, need_stdout=False):
+    log = logging.getLogger(__name__)
+    log.setLevel(logging.DEBUG)
+    fh = logging.FileHandler(log_dir)
+    fh.setLevel(logging.DEBUG)
+    formatter = logging.Formatter(fmt='%(asctime)s %(message)s', datefmt='%m/%d/%Y-%I:%M:%S')
+    if need_stdout:
+        ch = logging.StreamHandler(sys.stdout)
+        ch.setLevel(logging.DEBUG)
+        log.addHandler(ch)
+    if need_time:
+        fh.setFormatter(formatter)
+        if need_stdout:
+            ch.setFormatter(formatter)
+    log.addHandler(fh)
+    return log
+
+import torch.nn as nn
+class CrossEntropyLabelSmooth(nn.Module):
+
+    def __init__(self, num_classes, epsilon):
+        super(CrossEntropyLabelSmooth, self).__init__()
+        self.num_classes = num_classes
+        self.epsilon = epsilon
+        self.logsoftmax = nn.LogSoftmax(dim=1)
+
+    def forward(self, inputs, targets):
+        log_probs = self.logsoftmax(inputs)
+        targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
+        targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
+        loss = (-targets * log_probs).mean(0).sum()
+        return loss
+
+
+def roc_auc_compute_fn(y_pred, y_target):
+    """ IGNITE.CONTRIB.METRICS.ROC_AUC """
+    try:
+        from sklearn.metrics import roc_auc_score
+    except ImportError:
+        raise RuntimeError("This contrib module requires sklearn to be installed.")
+
+    if y_pred.requires_grad:
+        y_pred = y_pred.detach()
+
+    if y_target.is_cuda:
+        y_target = y_target.cpu()
+    if y_pred.is_cuda:
+        y_pred = y_pred.cpu()
+
+    y_true = y_target.numpy()
+    y_pred = y_pred.numpy()
+    try:
+        return roc_auc_score(y_true, y_pred)
+    except ValueError:
+        # print('ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.')
+        return 0.
+
+
+def load_checkpoint(args):
+    try:
+        return torch.load(args.resume)
+    except RuntimeError:
+        raise RuntimeError(f"Fail to load checkpoint at {args.resume}")
+
+
+def save_checkpoint(ckpt, is_best, file_dir, file_name='model.ckpt'):
+    if not os.path.exists(file_dir):
+        os.makedirs(file_dir)
+    ckpt_name = "{0}{1}".format(file_dir, file_name)
+    torch.save(ckpt, ckpt_name)
+    if is_best: shutil.copyfile(ckpt_name, "{0}{1}".format(file_dir, 'best_' + file_name))
+
+
+def seed_everything(seed=2022):
+    ''' [reference] https://gist.github.com/KirillVladimirov/005ec7f762293d2321385580d3dbe335 '''
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.backends.cudnn.deterministic = True
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/.cargo/config.toml b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/.cargo/config.toml
new file mode 100644
index 000000000..9dd8fcf87
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/.cargo/config.toml
@@ -0,0 +1,3 @@
+[target.'cfg(target_os="macos")']
+# Postgres symbols won't be available until runtime
+rustflags = ["-Clink-arg=-Wl,-undefined,dynamic_lookup"]
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/Cargo.toml b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/Cargo.toml
new file mode 100644
index 000000000..5c3e74739
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/Cargo.toml
@@ -0,0 +1,39 @@
+[package]
+name = "pg_extension"
+version = "0.1.0"
+edition = "2021"
+
+[lib]
+crate-type = ["cdylib"]
+
+[features]
+default = ["pg14", "python"]
+python = ["pyo3"]
+pg11 = ["pgrx/pg11", "pgrx-tests/pg11" ]
+pg12 = ["pgrx/pg12", "pgrx-tests/pg12" ]
+pg13 = ["pgrx/pg13", "pgrx-tests/pg13" ]
+pg14 = ["pgrx/pg14", "pgrx-tests/pg14" ]
+pg15 = ["pgrx/pg15", "pgrx-tests/pg15" ]
+pg_test = []
+
+[dependencies]
+pgrx = "=0.9.7"
+pgrx-pg-sys = "=0.9.7"
+serde_json = { version = "1.0.85", features = ["preserve_order"] }
+pyo3 = { version = "0.17", features = ["auto-initialize"], optional = true }
+once_cell = "1.8.0"
+log = "0.4.14"
+serde = "1.0"
+serde_derive = "1.0"
+
+[dev-dependencies]
+pgrx-tests = "=0.9.7"
+
+[profile.dev]
+panic = "unwind"
+
+[profile.release]
+panic = "unwind"
+opt-level = 3
+lto = "fat"
+codegen-units = 1
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/pg_extension.control b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/pg_extension.control
new file mode 100644
index 000000000..a28d2e9d8
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/pg_extension.control
@@ -0,0 +1,5 @@
+comment = 'pg_extension:  Created by pgrx'
+default_version = '@CARGO_VERSION@'
+module_pathname = '$libdir/pg_extension'
+relocatable = false
+superuser = true
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/filter_phase.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/filter_phase.sql
new file mode 100644
index 000000000..0fbfc05ae
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/filter_phase.sql
@@ -0,0 +1,54 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+CREATE OR REPLACE
+PROCEDURE model_selection_sp(
+    dataset TEXT,               --dataset name
+    selected_columns TEXT[],    --used columns
+    N INTEGER,                  --number of models to evaluate
+    batch_size INTEGER,         --batch size, for profiling, filtering
+    config_file TEXT            --config file path
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    -- global inputs/outputs
+    result_status TEXT;
+    column_list TEXT;
+BEGIN
+    -- combine the columns into a string
+    column_list := array_to_string(selected_columns, ', ');
+
+    -- 4. Run filtering phase to get top K models.
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                    LIMIT %s OFFSET 0
+                )
+                SELECT filtering_phase(
+                    json_agg(row_to_json(t))::text, %s, %s, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, batch_size, N, 1, config_file) INTO result_status;
+    RAISE NOTICE '4. run filtering phase, k models = %', result_status;
+
+END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu.sql
new file mode 100644
index 000000000..bcbf5c0fc
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu.sql
@@ -0,0 +1,50 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+CREATE OR REPLACE
+PROCEDURE model_selection_end2end(
+    dataset TEXT,               --dataset name
+    selected_columns TEXT[],    --used columns
+    budget TEXT,                --user given time budget
+    config_file TEXT            --config file path
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    -- global inputs/outputs
+    result_status TEXT;
+    column_list TEXT;
+
+BEGIN
+    -- combine the columns into a string
+    column_list := array_to_string(selected_columns, ', ');
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                )
+                SELECT model_selection(
+                    json_agg(row_to_json(t))::text, %L, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, budget, config_file) INTO result_status;
+    RAISE NOTICE '1. model_selection result: %', result_status;
+END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu_workloads.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu_workloads.sql
new file mode 100644
index 000000000..882be67f4
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu_workloads.sql
@@ -0,0 +1,51 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+CREATE OR REPLACE
+PROCEDURE model_selection_workloads(
+    dataset TEXT,               --dataset name
+    selected_columns TEXT[],    --used columns
+    N INTEGER,                  --explore N models
+    K INTEGER,                  --keep K models
+    config_file TEXT            --config file path
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    -- global inputs/outputs
+    result_status TEXT;
+    column_list TEXT;
+
+BEGIN
+    -- combine the columns into a string
+    column_list := array_to_string(selected_columns, ', ');
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                )
+                SELECT model_selection_workloads(
+                    json_agg(row_to_json(t))::text, %s, %s, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, N, K, config_file) INTO result_status;
+    RAISE NOTICE '1. model_selection result: %', result_status;
+END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_dev.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_dev.sql
new file mode 100644
index 000000000..6a7297547
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_dev.sql
@@ -0,0 +1,99 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+CREATE OR REPLACE
+PROCEDURE model_selection_sp(
+    dataset TEXT,               --dataset name
+    selected_columns TEXT[],    --used columns
+    budget TEXT,                --user given time budget
+    batch_size INTEGER,         --batch size, for profiling, filtering
+    config_file TEXT            --config file path
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    -- global inputs/outputs
+    result_status TEXT;
+    column_list TEXT;
+
+    -- UDF outputs
+    score_time TEXT;
+    train_time TEXT;
+    coordinator_k integer;
+    coordinator_u integer;
+    coordinator_n integer;
+BEGIN
+    -- combine the columns into a string
+    column_list := array_to_string(selected_columns, ', ');
+
+    -- 1. Profiling time to score a model with TFMEM
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                    LIMIT %s OFFSET 0
+                )
+                SELECT profiling_filtering_phase(
+                    json_agg(row_to_json(t))::text, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, batch_size, config_file) INTO result_status;
+    score_time := json_extract_path_text(result_status::json, 'time');
+    RAISE NOTICE '1. profiling_filtering_phase, get score_time: %', score_time;
+
+    -- 2. Profiling time of training a model for one epoch
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                    LIMIT %s OFFSET 0
+                )
+                SELECT profiling_refinement_phase(
+                    json_agg(row_to_json(t))::text, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, batch_size, config_file) INTO result_status;
+    train_time := json_extract_path_text(result_status::json, 'time');
+    RAISE NOTICE '2. profiling_refinement_phase, get train_time: %', train_time;
+
+    -- 3. Coordinator to get N, K ,U
+    EXECUTE format('SELECT "coordinator"(%L, %L, %L, false, %L)', score_time, train_time, budget, config_file) INTO result_status;
+
+    coordinator_k := (json_extract_path_text(result_status::json, 'k'))::integer;
+    coordinator_u := (json_extract_path_text(result_status::json, 'u'))::integer;
+    coordinator_n := (json_extract_path_text(result_status::json, 'n'))::integer;
+    RAISE NOTICE '3. coordinator result: k = %, u = %, n = %', coordinator_k, coordinator_u, coordinator_n;
+
+    -- 4. Run filtering phase to get top K models.
+    EXECUTE format('
+            WITH batch_rows AS (
+                SELECT %s
+                FROM %I
+                ORDER BY RANDOM()
+                LIMIT %s OFFSET 0
+            )
+            SELECT filtering_phase(
+                json_agg(row_to_json(t))::text, %s, %s, %L
+            )
+            FROM batch_rows AS t', column_list, dataset, batch_size, coordinator_n, coordinator_k, config_file) INTO result_status;
+    RAISE NOTICE '4. run filtering phase, k models = %', result_status;
+
+END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails.sql
new file mode 100644
index 000000000..3d91da501
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails.sql
@@ -0,0 +1,50 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+CREATE OR REPLACE
+PROCEDURE model_selection_end2end(
+    dataset TEXT,               --dataset name
+    selected_columns TEXT[],    --used columns
+    budget TEXT,                --user given time budget
+    config_file TEXT            --config file path
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    -- global inputs/outputs
+    result_status TEXT;
+    column_list TEXT;
+
+BEGIN
+    -- combine the columns into a string
+    column_list := array_to_string(selected_columns, ', ');
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                )
+                SELECT model_selection_trails(
+                    json_agg(row_to_json(t))::text, %L, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, budget, config_file) INTO result_status;
+    RAISE NOTICE '1. model_selection result: %', result_status;
+END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails_workloads.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails_workloads.sql
new file mode 100644
index 000000000..aaf62c63d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails_workloads.sql
@@ -0,0 +1,51 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+CREATE OR REPLACE
+PROCEDURE model_selection_workloads(
+    dataset TEXT,               --dataset name
+    selected_columns TEXT[],    --used columns
+    N INTEGER,                  --explore N models
+    K INTEGER,                  --keep K models
+    config_file TEXT            --config file path
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    -- global inputs/outputs
+    result_status TEXT;
+    column_list TEXT;
+
+BEGIN
+    -- combine the columns into a string
+    column_list := array_to_string(selected_columns, ', ');
+    EXECUTE format('
+                WITH batch_rows AS (
+                    SELECT %s
+                    FROM %I
+                    ORDER BY RANDOM()
+                )
+                SELECT model_selection_trails_workloads(
+                    json_agg(row_to_json(t))::text, %s, %s, %L
+                )
+                FROM batch_rows AS t', column_list, dataset, N, K, config_file) INTO result_status;
+    RAISE NOTICE '1. model_selection result: %', result_status;
+END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/pg_extension--0.1.0.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/pg_extension--0.1.0.sql
new file mode 100644
index 000000000..434082d9d
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/pg_extension--0.1.0.sql
@@ -0,0 +1,160 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*
+This file is auto generated by pgrx.
+
+The ordering of items is not stable, it is driven by a dependency graph.
+*/
+
+-- src/lib.rs:80
+-- pg_extension::refinement_phase
+CREATE  FUNCTION "refinement_phase"(
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'refinement_phase_wrapper';
+
+-- src/lib.rs:31
+-- pg_extension::profiling_refinement_phase
+CREATE  FUNCTION "profiling_refinement_phase"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'profiling_refinement_phase_wrapper';
+
+-- src/lib.rs:16
+-- pg_extension::profiling_filtering_phase
+CREATE  FUNCTION "profiling_filtering_phase"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'profiling_filtering_phase_wrapper';
+
+-- src/lib.rs:66
+-- pg_extension::filtering_phase
+CREATE  FUNCTION "filtering_phase"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "n" INT, /* i32 */
+    "k" INT, /* i32 */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'filtering_phase_wrapper';
+
+-- src/lib.rs:46
+-- pg_extension::coordinator
+CREATE  FUNCTION "coordinator"(
+    "time_score" TEXT, /* alloc::string::String */
+    "time_train" TEXT, /* alloc::string::String */
+    "time_budget" TEXT, /* alloc::string::String */
+    "only_phase1" bool, /* bool */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'coordinator_wrapper';
+
+
+-- src/lib.rs:110
+-- pg_extension::model_selection_workloads
+CREATE  FUNCTION "model_selection_workloads"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "n" INT, /* i32 */
+    "k" INT, /* i32 */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_workloads_wrapper';
+
+-- src/lib.rs:138
+-- pg_extension::model_selection_trails_workloads
+CREATE  FUNCTION "model_selection_trails_workloads"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "n" INT, /* i32 */
+    "k" INT, /* i32 */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_trails_workloads_wrapper';
+
+-- src/lib.rs:125
+-- pg_extension::model_selection_trails
+CREATE  FUNCTION "model_selection_trails"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "time_budget" TEXT, /* alloc::string::String */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_trails_wrapper';
+
+-- src/lib.rs:94
+-- pg_extension::model_selection
+CREATE  FUNCTION "model_selection"(
+    "mini_batch" TEXT, /* alloc::string::String */
+    "time_budget" TEXT, /* alloc::string::String */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_wrapper';
+
+-- src/lib.rs:153
+-- pg_extension::benchmark_filtering_phase_latency
+CREATE  FUNCTION "benchmark_filtering_phase_latency"(
+    "explore_models" INT, /* i32 */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'benchmark_filtering_phase_latency_wrapper';
+
+
+-- src/lib.rs:152
+-- pg_extension::benchmark_filtering_phase_latency
+CREATE  FUNCTION "benchmark_filtering_phase_latency"(
+    "explore_models" INT, /* i32 */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'benchmark_filtering_phase_latency_wrapper';
+
+-- src/lib.rs:163
+-- pg_extension::benchmark_filtering_latency_in_db
+CREATE  FUNCTION "benchmark_filtering_latency_in_db"(
+    "explore_models" INT, /* i32 */
+    "dataset" TEXT, /* alloc::string::String */
+    "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+    IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'benchmark_filtering_latency_in_db_wrapper';
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ml_register.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ml_register.rs
new file mode 100644
index 000000000..5ca539653
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ml_register.rs
@@ -0,0 +1,76 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+use log::error;
+use once_cell::sync::Lazy;
+use pyo3::prelude::*;
+use pyo3::types::PyTuple;
+
+
+pub fn run_python_function(
+    py_module: &Lazy<Py<PyModule>>,
+    parameters: &String,
+    function_name: &str,
+) -> serde_json::Value {
+    let parameters_str = parameters.to_string();
+    let results = Python::with_gil(|py| -> String {
+        let run_script: Py<PyAny> = py_module.getattr(py, function_name).unwrap().into();
+        let result = run_script.call1(
+            py,
+            PyTuple::new(
+                py,
+                &[parameters_str.into_py(py)],
+            ),
+        );
+        let result = match result {
+            Err(e) => {
+                let traceback = e.traceback(py).unwrap().format().unwrap();
+                error!("{traceback} {e}");
+                format!("{traceback} {e}")
+            }
+            Ok(o) => o.extract(py).unwrap(),
+        };
+        result
+    });
+
+    serde_json::from_str(&results).unwrap()
+}
+
+
+/*
+ Python Module Path for Model Selection
+ */
+pub static PY_MODULE: Lazy<Py<PyModule>> = Lazy::new(|| {
+    Python::with_gil(|py| -> Py<PyModule> {
+        let src = include_str!(concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/../ml/model_selection/pg_interface.py"
+        ));
+        PyModule::from_code(py, src, "", "").unwrap().into()
+    })
+});
+
+
+
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/mod.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/mod.rs
new file mode 100644
index 000000000..4e976d605
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/mod.rs
@@ -0,0 +1,26 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#[cfg(feature = "python")]
+pub mod ms;
+mod ml_register;
+mod model;
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/model.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/model.rs
new file mode 100644
index 000000000..61268fea4
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/model.rs
@@ -0,0 +1,39 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+use serde::{Serialize, Deserialize};
+
+
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct Frappe {
+    pub(crate) id: i32,
+    pub(crate) label: i32,
+    pub(crate) col1: String,
+    pub(crate) col2: String,
+    pub(crate) col3: String,
+    pub(crate) col4: String,
+    pub(crate) col5: String,
+    pub(crate) col6: String,
+    pub(crate) col7: String,
+    pub(crate) col8: String,
+    pub(crate) col9: String,
+    pub(crate) col10: String,
+}
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ms.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ms.rs
new file mode 100644
index 000000000..ca946aa09
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ms.rs
@@ -0,0 +1,228 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+use serde_json::json;
+use std::collections::HashMap;
+use pgrx::prelude::*;
+use crate::bindings::ml_register::PY_MODULE;
+use crate::bindings::ml_register::run_python_function;
+use std::time::{Instant, Duration};
+
+
+pub fn profiling_filtering_phase(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "profiling_filtering_phase")
+}
+
+
+pub fn profiling_refinement_phase(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "profiling_refinement_phase")
+}
+
+
+pub fn coordinator(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "coordinator")
+}
+
+
+pub fn filtering_phase(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "filtering_phase_dataLoader")
+}
+
+
+pub fn refinement_phase() -> serde_json::Value {
+    let task = "refinement_phase".to_string();
+    run_python_function(&PY_MODULE, &task, "refinement_phase")
+}
+
+
+// this two are filtering + refinement in UDF runtime
+pub fn model_selection(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "model_selection")
+}
+
+
+pub fn model_selection_workloads(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "model_selection_workloads")
+}
+
+
+// this two are filtering + refinement in GPU server
+pub fn model_selection_trails(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "model_selection_trails")
+}
+
+
+pub fn model_selection_trails_workloads(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "model_selection_trails_workloads")
+}
+
+// micro benchmarks
+
+pub fn benchmark_filtering_phase_latency(
+    task: &String
+) -> serde_json::Value {
+    run_python_function(&PY_MODULE, task, "benchmark_filtering_phase_latency")
+}
+
+pub fn benchmark_filtering_latency_in_db(
+    explore_models: i32, dataset: &String, config_file: &String) -> serde_json::Value {
+
+    let overall_start_time = Instant::now();
+
+    let database_name = "pg_extension";
+    let mut last_id = 0;
+    let mut eva_results = serde_json::Value::Null; // Initializing the eva_results
+
+    for i in 1..explore_models {
+
+        // Step 1: Initialize State in Python
+        let mut task_map = HashMap::new();
+        task_map.insert("config_file", config_file.clone());
+        task_map.insert("dataset", dataset.clone());
+        task_map.insert("eva_results", eva_results.to_string());
+        let task_json = json!(task_map).to_string();
+
+        // here it cache a state
+        let sample_result = run_python_function(
+            &PY_MODULE,
+            &task_json,
+            "in_db_filtering_state_init");
+
+        // 2. query data via SPI
+        let start_time = Instant::now();
+        let results: Result<Vec<Vec<String>>, String> = Spi::connect(|client| {
+            let query = format!("SELECT * FROM {}_train WHERE id > {} ORDER BY id ASC LIMIT 32", dataset, last_id);
+            let mut cursor = client.open_cursor(&query, None);
+            let table = match cursor.fetch(32) {
+                Ok(table) => table,
+                Err(e) => return Err(e.to_string()), // Convert the error to a string and return
+            };
+
+            let mut mini_batch = Vec::new();
+
+            for row in table.into_iter() {
+                let mut each_row = Vec::new();
+                // add primary key
+                let col0 = match row.get::<i32>(1) {
+                    Ok(Some(val)) => {
+                        // Update last_id with the retrieved value
+                        if val > 100000{
+                            last_id = 0;
+                        }else{
+                            last_id = val
+                        }
+                        val.to_string()
+                    }
+                    Ok(None) => "".to_string(), // Handle the case when there's no valid value
+                    Err(e) => e.to_string(),
+                };
+                each_row.push(col0);
+                // add label
+                let col1 = match row.get::<i32>(2) {
+                    Ok(val) => val.map(|i| i.to_string()).unwrap_or_default(),
+                    Err(e) => e.to_string(),
+                };
+                each_row.push(col1);
+                // add fields
+                let texts: Vec<String> = (3..row.columns()+1)
+                    .filter_map(|i| {
+                        match row.get::<&str>(i) {
+                            Ok(Some(s)) => Some(s.to_string()),
+                            Ok(None) => None,
+                            Err(e) => Some(e.to_string()),  // Convert error to string
+                        }
+                    }).collect();
+                each_row.extend(texts);
+                mini_batch.push(each_row)
+            }
+            // return
+            Ok(mini_batch)
+        });
+        // serialize the mini-batch data
+        let tup_table = match results {
+            Ok(data) => {
+                serde_json::json!({
+                        "status": "success",
+                        "data": data
+                    })
+            }
+            Err(e) => {
+                serde_json::json!({
+                    "status": "error",
+                    "message": format!("Error while connecting: {}", e)
+                })
+            }
+        };
+
+        let end_time = Instant::now();
+        let elapsed_time = end_time.duration_since(start_time);
+        let elapsed_seconds = elapsed_time.as_secs_f64();
+
+        // Step 3: model evaluate in Python
+        let mut eva_task_map = HashMap::new();
+        eva_task_map.insert("config_file", config_file.clone());
+        eva_task_map.insert("sample_result", sample_result.to_string());
+        let mini_batch_json = tup_table.to_string();
+        eva_task_map.insert("mini_batch", mini_batch_json);
+        eva_task_map.insert("spi_seconds", elapsed_seconds.to_string());
+        eva_task_map.insert("model_index", i.to_string());
+
+        let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
+
+        eva_results = run_python_function(
+            &PY_MODULE,
+            &eva_task_json,
+            "in_db_filtering_evaluate");
+    }
+
+    let mut record_task_map = HashMap::new();
+    record_task_map.insert("config_file", config_file.clone());
+    record_task_map.insert("dataset", dataset.clone());
+    let record_task_json = json!(record_task_map).to_string();
+    run_python_function(
+        &PY_MODULE,
+        &record_task_json,
+        "records_results");
+
+    let overall_end_time = Instant::now();
+    let overall_elapsed_time = overall_end_time.duration_since(overall_start_time);
+    let overall_elapsed_seconds = overall_elapsed_time.as_secs_f64();
+
+    // Step 4: Return to PostgresSQL
+    return serde_json::json!(overall_elapsed_seconds.to_string());
+}
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/lib.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/lib.rs
new file mode 100644
index 000000000..5ff49a708
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/lib.rs
@@ -0,0 +1,192 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+use pgrx::prelude::*;
+pgrx::pg_module_magic!();
+use serde_json::json;
+use std::collections::HashMap;
+
+pub mod bindings;
+extern crate serde_derive;
+
+/*
+ * @param mini_batch: mini_batch of data. Assume all columns are string type in
+ * libsvm codding
+ */
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "profiling_filtering_phase")]
+#[allow(unused_variables)]
+pub fn profiling_filtering_phase(mini_batch: String, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("mini_batch", mini_batch);
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::profiling_filtering_phase(&task_json).to_string()
+}
+
+/*
+ * @param mini_batch: training for one iteration.
+ * libsvm codding
+ */
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "profiling_refinement_phase")]
+#[allow(unused_variables)]
+pub fn profiling_refinement_phase(mini_batch: String, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("mini_batch", mini_batch);
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::profiling_refinement_phase(&task_json).to_string()
+}
+
+/*
+ * @param mini_batch: training for one iteration.
+ * libsvm codding
+ */
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "coordinator")]
+#[allow(unused_variables)]
+pub fn coordinator(time_score: String, time_train: String, time_budget: String, only_phase1: bool,
+                   config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("budget", time_budget);
+    task_map.insert("score_time_per_model", time_score);
+    task_map.insert("train_time_per_epoch", time_train);
+    task_map.insert("only_phase1", only_phase1.to_string());
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::coordinator(&task_json).to_string()
+}
+
+
+/*
+ * @param mini_batch: mini_batch of data. Assume all columns are string type in
+ * libsvm codding
+ */
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "filtering_phase")]
+#[allow(unused_variables)]
+pub fn filtering_phase(mini_batch: String, n: i32, k: i32, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("mini_batch", mini_batch);
+    task_map.insert("n", n.to_string());
+    task_map.insert("k", k.to_string());
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::filtering_phase(&task_json).to_string()
+}
+
+
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "refinement_phase")]
+#[allow(unused_variables)]
+pub fn refinement_phase(config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::refinement_phase().to_string()
+}
+
+
+/*
+ End-2-End model selection, All in UDF runtime.
+ */
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "model_selection")]
+#[allow(unused_variables)]
+pub fn model_selection(mini_batch: String, time_budget: String, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("mini_batch", mini_batch);
+    task_map.insert("budget", time_budget);
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::model_selection(&task_json).to_string()
+}
+
+/*
+ * @param mini_batch: mini_batch of data. Assume all columns are string type in
+ * libsvm codding
+ */
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "model_selection_workloads")]
+#[allow(unused_variables)]
+pub fn model_selection_workloads(mini_batch: String, n: i32, k: i32, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("mini_batch", mini_batch);
+    task_map.insert("n", n.to_string());
+    task_map.insert("k", k.to_string());
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::model_selection_workloads(&task_json).to_string()
+}
+
+
+// this two are filtering + refinement in GPU server
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "model_selection_trails")]
+#[allow(unused_variables)]
+pub fn model_selection_trails(mini_batch: String, time_budget: String, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("mini_batch", mini_batch);
+    task_map.insert("budget", time_budget);
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::model_selection_trails(&task_json).to_string()
+}
+
+
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "model_selection_trails_workloads")]
+#[allow(unused_variables)]
+pub fn model_selection_trails_workloads(mini_batch: String, n: i32, k: i32, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("mini_batch", mini_batch);
+    task_map.insert("n", n.to_string());
+    task_map.insert("k", k.to_string());
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::model_selection_trails_workloads(&task_json).to_string()
+}
+
+// micro benchmarks
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "benchmark_filtering_phase_latency")]
+#[allow(unused_variables)]
+pub fn benchmark_filtering_phase_latency(explore_models: i32, config_file: String) -> String {
+    let mut task_map = HashMap::new();
+    task_map.insert("explore_models", explore_models.to_string());
+    task_map.insert("config_file", config_file);
+    let task_json = json!(task_map).to_string();
+    crate::bindings::ms::benchmark_filtering_phase_latency(&task_json).to_string()
+}
+
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "benchmark_filtering_latency_in_db")]
+#[allow(unused_variables)]
+pub fn benchmark_filtering_latency_in_db(
+    explore_models: i32, dataset: String, config_file: String) -> String {
+    crate::bindings::ms::benchmark_filtering_latency_in_db(explore_models, &dataset, &config_file).to_string()
+}
+
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/test/lib.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/test/lib.rs
new file mode 100644
index 000000000..bb91c2981
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/test/lib.rs
@@ -0,0 +1,21 @@
+/************************************************************
+* 
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/requirement.txt b/examples/model_selection/TRAILS-Database-Native-Model-Selection/requirement.txt
new file mode 100644
index 000000000..ab233d87b
--- /dev/null
+++ b/examples/model_selection/TRAILS-Database-Native-Model-Selection/requirement.txt
@@ -0,0 +1,32 @@
+ConfigSpace==0.7.1
+contourpy==1.1.0
+cycler==0.11.0
+fonttools==4.41.0
+importlib-resources==6.0.0
+joblib==1.3.1
+kiwisolver==1.4.4
+matplotlib==3.7.2
+more-itertools==9.1.0
+numpy==1.24.4
+orjson==3.9.2
+packaging==23.1
+palettable==3.3.3
+pandas==2.0.3
+Pillow==10.0.0
+pyparsing==3.0.9
+python-dateutil==2.8.2
+pytz==2023.3
+scikit-learn==1.3.0
+scipy==1.10.1
+seaborn==0.12.2
+six==1.16.0
+sklearn==0.0
+threadpoolctl==3.1.0
+torch==1.8.1
+torchaudio==0.8.1
+torchvision==0.9.1
+tqdm==4.47.0
+typing_extensions==4.7.1
+tzdata==2023.3
+zipp==3.16.2
+requests==2.31.0
diff --git a/examples/model_selection_psql/README.md b/examples/model_selection_psql/README.md
new file mode 100644
index 000000000..c78fca5f1
--- /dev/null
+++ b/examples/model_selection_psql/README.md
@@ -0,0 +1,22 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Two-Phase Model Selection on PostgreSQL 
+
+Examples inside this folder show how to select a well-performing model using SInga inside RDBMS, such as PostgreSQL.
\ No newline at end of file
diff --git a/examples/model_selection_psql/ms_mlp/run.sh b/examples/model_selection_psql/ms_mlp/run.sh
new file mode 100644
index 000000000..5e78f5f2d
--- /dev/null
+++ b/examples/model_selection_psql/ms_mlp/run.sh
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#!/usr/bin/env python -W ignore::DeprecationWarning
+
+### Static Models
+python train_mlp.py ms_model_mlp mnist
+
+### Dynamic Models
+python train_ms_model.py ms_model_mlp mnist
diff --git a/examples/model_selection_psql/ms_mlp/train_cnn.py b/examples/model_selection_psql/ms_mlp/train_cnn.py
new file mode 100644
index 000000000..6eab096b9
--- /dev/null
+++ b/examples/model_selection_psql/ms_mlp/train_cnn.py
@@ -0,0 +1,329 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import singa_wrap as singa
+from singa import device
+from singa import tensor
+from singa import opt
+import numpy as np
+import time
+import argparse
+from PIL import Image
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                                    offset[0]:offset[0] + x.shape[2],
+                                    offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                                     dtype=np.float32)
+    return X
+
+
+def run(global_rank,
+        world_size,
+        local_rank,
+        max_epoch,
+        batch_size,
+        model,
+        data,
+        sgd,
+        graph,
+        verbosity,
+        dist_option='plain',
+        spars=None,
+        precision='float32'):
+    dev = device.create_cuda_gpu_on(local_rank)  # need to change to CPU device for CPU-only machines
+    dev.SetRandSeed(0)
+    np.random.seed(0)
+
+    if data == 'cifar10':
+        from data import cifar10
+        train_x, train_y, val_x, val_y = cifar10.load()
+    elif data == 'cifar100':
+        from data import cifar100
+        train_x, train_y, val_x, val_y = cifar100.load()
+    elif data == 'mnist':
+        from data import mnist
+        train_x, train_y, val_x, val_y = mnist.load()
+
+
+    num_channels = train_x.shape[1]
+    image_size = train_x.shape[2]
+    data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
+    num_classes = (np.max(train_y) + 1).item()
+
+    if model == 'resnet':
+        from model import resnet
+        model = resnet.resnet50(num_channels=num_channels,
+                                num_classes=num_classes)
+    elif model == 'xceptionnet':
+        from model import xceptionnet
+        model = xceptionnet.create_model(num_channels=num_channels,
+                                         num_classes=num_classes)
+    elif model == 'cnn':
+        from model import cnn
+        model = cnn.create_model(num_channels=num_channels,
+                                 num_classes=num_classes)
+    elif model == 'alexnet':
+        from model import alexnet
+        model = alexnet.create_model(num_channels=num_channels,
+                                     num_classes=num_classes)
+    elif model == 'mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from mlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes)
+
+    # For distributed training, sequential has better performance
+    if hasattr(sgd, "communicator"):
+        DIST = True
+        sequential = True
+    else:
+        DIST = False
+        sequential = False
+
+    if DIST:
+        train_x, train_y, val_x, val_y = partition(global_rank, world_size,
+                                                   train_x, train_y, val_x,
+                                                   val_y)
+
+    if model.dimension == 4:
+        tx = tensor.Tensor(
+            (batch_size, num_channels, model.input_size, model.input_size), dev,
+            singa_dtype[precision])
+    elif model.dimension == 2:
+        tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+        np.reshape(train_x, (train_x.shape[0], -1))
+        np.reshape(val_x, (val_x.shape[0], -1))
+
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_val_batch = val_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    # Attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+    dev.SetVerbosity(verbosity)
+
+    # Training and evaluation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if global_rank == 0:
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        model.train()
+        for b in range(num_train_batch):
+            # if b % 100 == 0:
+            #     print ("b: \n", b)
+            # Generate the patch data in this iteration
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            if model.dimension == 4:
+                x = augmentation(x, batch_size)
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+            # Copy the patch data into input tensors
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+
+            # Train the model
+            out, loss = model(tx, ty, dist_option, spars)
+            train_correct += accuracy(tensor.to_numpy(out), y)
+            train_loss += tensor.to_numpy(loss)[0]
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, sgd, reducer)
+            train_loss = reduce_variable(train_loss, sgd, reducer)
+
+        if global_rank == 0:
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        model.eval()
+        for b in range(num_val_batch):
+            x = val_x[b * batch_size:(b + 1) * batch_size]
+            if model.dimension == 4:
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = val_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, sgd, reducer)
+
+        # Output the evaluation accuracy
+        if global_rank == 0:
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_val_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+    dev.PrintTimeProfiling()
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument(
+        'model',
+        choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'alexnet'],
+        default='cnn')
+    parser.add_argument('data',
+                        choices=['mnist', 'cifar10', 'cifar100'],
+                        default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=100,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    run(0,
+        1,
+        args.device_id,
+        args.max_epoch,
+        args.batch_size,
+        args.model,
+        args.data,
+        sgd,
+        args.graph,
+        args.verbosity,
+        precision=args.precision)
diff --git a/examples/model_selection_psql/ms_mlp/train_mlp.py b/examples/model_selection_psql/ms_mlp/train_mlp.py
new file mode 100644
index 000000000..3c084ab11
--- /dev/null
+++ b/examples/model_selection_psql/ms_mlp/train_mlp.py
@@ -0,0 +1,588 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import singa_wrap as singa
+from singa import device
+from singa import tensor
+from singa import opt
+from singa import autograd
+from singa.opt import Optimizer
+from singa.opt import DecayScheduler
+from singa.opt import Constant
+import numpy as np
+import time
+import argparse
+from PIL import Image
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+### MSOptimizer
+class MSOptimizer(Optimizer):
+    def __call__(self, loss):
+        pn_p_g_list = self.call_with_returns(loss)
+        self.step()
+        return pn_p_g_list
+
+    def call_with_returns(self, loss):
+        # print ("call_with_returns loss.data: \n", loss.data)
+        pn_p_g_list = []
+        for p, g in autograd.backward(loss):
+            if p.name is None:
+                p.name = id(p)
+            self.apply(p.name, p, g)
+            pn_p_g_list.append(p.name, p, g)
+            # print ("call with returns")
+            # print ("p.name: \n", p.name)
+            # print ("p.data: \n", p.data)
+            # print ("g.data: \n", g.data)
+        return pn_p_g_list
+
+class MSSGD(MSOptimizer):
+    """Implements stochastic gradient descent (optionally with momentum).
+
+    Nesterov momentum is based on the formula from `On the importance of initialization and momentum in deep learning`__.
+
+    Args:
+        lr(float): learning rate
+        momentum(float, optional): momentum factor(default: 0)
+        weight_decay(float, optional): weight decay(L2 penalty)(default: 0)
+        dampening(float, optional): dampening for momentum(default: 0)
+        nesterov(bool, optional): enables Nesterov momentum(default: False)
+
+    Typical usage example:
+        >> > from singa import opt
+        >> > optimizer = opt.SGD(lr=0.1, momentum=0.9)
+        >> > optimizer.update()
+
+    __ http: // www.cs.toronto.edu / %7Ehinton / absps / momentum.pdf
+
+    .. note::
+        The implementation of SGD with Momentum / Nesterov subtly differs from
+        Sutskever et. al. and implementations in some other frameworks.
+
+        Considering the specific case of Momentum, the update can be written as
+
+        .. math::
+                  v = \rho * v + g \\
+                  p = p - lr * v
+
+        where p, g, v and: math: `\rho` denote the parameters, gradient,
+        velocity, and momentum respectively.
+
+        This is in contrast to Sutskever et. al. and
+        other frameworks which employ an update of the form
+
+        .. math::
+             v = \rho * v + lr * g \\
+             p = p - v
+
+        The Nesterov version is analogously modified.
+    """
+
+    def __init__(self,
+                 lr=0.1,
+                 momentum=0,
+                 dampening=0,
+                 weight_decay=0,
+                 nesterov=False,
+                 dtype=tensor.float32):
+        super(MSSGD, self).__init__(lr, dtype)
+
+        # init momentum
+        if type(momentum) == float or type(momentum) == int:
+            if momentum < 0.0:
+                raise ValueError("Invalid momentum value: {}".format(momentum))
+            self.momentum = Constant(momentum)
+        elif isinstance(momentum, DecayScheduler):
+            self.momentum = momentum
+            momentum = momentum.init_value
+        else:
+            raise TypeError("Wrong momentum type")
+        self.mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+
+        # init dampening
+        if type(dampening) == float or type(dampening) == int:
+            self.dampening = Constant(dampening)
+        elif isinstance(dampening, DecayScheduler):
+            self.dampening = dampening
+            dampening = dampening.init_value
+        else:
+            raise TypeError("Wrong dampening type")
+        self.dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+
+        # init weight_decay
+        if type(weight_decay) == float or type(weight_decay) == int:
+            if weight_decay < 0.0:
+                raise ValueError(
+                    "Invalid weight_decay value: {}".format(weight_decay))
+            self.weight_decay = Constant(weight_decay)
+        elif isinstance(weight_decay, DecayScheduler):
+            self.weight_decay = weight_decay
+        else:
+            raise TypeError("Wrong weight_decay type")
+        self.decay_value = self.weight_decay(self.step_counter).as_type(
+            self.dtype)
+
+        # init other params
+        self.nesterov = nesterov
+        self.moments = dict()
+
+        # check value
+        if nesterov and (momentum <= 0 or dampening != 0):
+            raise ValueError(
+                "Nesterov momentum requires a momentum and zero dampening")
+
+    def apply(self, param_name, param_value, param_grad):
+        """Performs a single optimization step.
+
+        Args:
+                param_name(String): the name of the param
+                param_value(Tensor): param values to be update in-place
+                grad(Tensor): param gradients; the values may be updated
+                        in this function; cannot use it anymore
+        """
+        assert param_value.shape == param_grad.shape, ("shape mismatch",
+                                                       param_value.shape,
+                                                       param_grad.shape)
+        self.device_check(param_value, self.step_counter, self.lr_value,
+                          self.mom_value, self.dam_value, self.decay_value)
+
+        # derive dtype from input
+        assert param_value.dtype == self.dtype
+
+        # TODO add branch operator
+        # if self.decay_value != 0:
+        if self.weight_decay.init_value != 0:
+            singa.Axpy(self.decay_value.data, param_value.data, param_grad.data)
+
+        if self.momentum.init_value != 0:
+            if param_name not in self.moments:
+                flag = param_value.device.graph_enabled()
+                param_value.device.EnableGraph(False)
+                self.moments[param_name] = tensor.zeros_like(param_value)
+                param_value.device.EnableGraph(flag)
+
+            buf = self.moments[param_name]
+            buf *= self.mom_value
+            alpha = 1.0 - self.dam_value
+            singa.Axpy(alpha.data, param_grad.data, buf.data)
+
+            if self.nesterov:
+                singa.Axpy(self.mom_value.data, buf.data, param_grad.data)
+            else:
+                param_grad = buf
+
+        minus_lr = 0.0 - self.lr_value
+        singa.Axpy(minus_lr.data, param_grad.data, param_value.data)
+
+    def step(self):
+        # increment step counter, lr and moment
+        super().step()
+        mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        self.mom_value.copy_from(mom_value)
+        self.dam_value.copy_from(dam_value)
+        self.decay_value.copy_from(decay_value)
+
+    def get_states(self):
+        states = super().get_states()
+        if self.mom_value > 0:
+            states[
+                'moments'] = self.moments  # a dict for 1st order moments tensors
+        return states
+
+    def set_states(self, states):
+        super().set_states(states)
+        if 'moments' in states:
+            self.moments = states['moments']
+            self.mom_value = self.momentum(self.step_counter)
+
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                               offset[0]:offset[0] + x.shape[2],
+                               offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                dtype=np.float32)
+    return X
+
+
+def run(global_rank,
+        world_size,
+        local_rank,
+        max_epoch,
+        batch_size,
+        model,
+        data,
+        mssgd,
+        graph,
+        verbosity,
+        dist_option='plain',
+        spars=None,
+        precision='float32'):
+    # dev = device.create_cuda_gpu_on(local_rank)  # need to change to CPU device for CPU-only machines
+    dev = device.get_default_device()
+    dev.SetRandSeed(0)
+    np.random.seed(0)
+
+    if data == 'cifar10':
+        from data import cifar10
+        train_x, train_y, val_x, val_y = cifar10.load()
+    elif data == 'cifar100':
+        from data import cifar100
+        train_x, train_y, val_x, val_y = cifar100.load()
+    elif data == 'mnist':
+        from data import mnist
+        train_x, train_y, val_x, val_y = mnist.load()
+
+
+    num_channels = train_x.shape[1]
+    image_size = train_x.shape[2]
+    data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
+    num_classes = (np.max(train_y) + 1).item()
+
+    if model == 'resnet':
+        from model import resnet
+        model = resnet.resnet50(num_channels=num_channels,
+                                num_classes=num_classes)
+    elif model == 'xceptionnet':
+        from model import xceptionnet
+        model = xceptionnet.create_model(num_channels=num_channels,
+                                         num_classes=num_classes)
+    elif model == 'cnn':
+        from model import cnn
+        model = cnn.create_model(num_channels=num_channels,
+                                 num_classes=num_classes)
+    elif model == 'alexnet':
+        from model import alexnet
+        model = alexnet.create_model(num_channels=num_channels,
+                                     num_classes=num_classes)
+    elif model == 'mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from mlp import model
+        model = model.create_model(data_size=data_size,
+                                   num_classes=num_classes)
+
+    elif model == 'msmlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from msmlp import model
+        model = model.create_model(data_size=data_size,
+                                   num_classes=num_classes)
+    
+    elif model == 'ms_model_mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from ms_model_mlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes, 
+                                    layer_hidden_list=layer_hidden_list)
+    # print ("model: \n", model)
+
+
+    # For distributed training, sequential has better performance
+    if hasattr(mssgd, "communicator"):
+        DIST = True
+        sequential = True
+    else:
+        DIST = False
+        sequential = False
+
+    if DIST:
+        train_x, train_y, val_x, val_y = partition(global_rank, world_size,
+                                                   train_x, train_y, val_x,
+                                                   val_y)
+
+    if model.dimension == 4:
+        tx = tensor.Tensor(
+            (batch_size, num_channels, model.input_size, model.input_size), dev,
+            singa_dtype[precision])
+    elif model.dimension == 2:
+        tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+        np.reshape(train_x, (train_x.shape[0], -1))
+        np.reshape(val_x, (val_x.shape[0], -1))
+
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_val_batch = val_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    # Attach model to graph
+    model.set_optimizer(mssgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+    dev.SetVerbosity(verbosity)
+
+    # Training and evaluation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if global_rank == 0:
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        model.train()
+        print ("num_train_batch: \n", num_train_batch)
+        print ()
+        for b in range(num_train_batch):
+            # if b % 200 == 0:
+            #     print ("b: \n", b)
+            # Generate the patch data in this iteration
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            if model.dimension == 4:
+                x = augmentation(x, batch_size)
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+
+            synflow_flag = False
+            # Train the model
+            if epoch == (max_epoch - 1) and b == (num_train_batch - 1):  ### synflow calcuation for the last batch
+                print ("last epoch calculate synflow")
+                synflow_flag = True
+                ### step 1: all one input
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(np.ones(x.shape, dtype=np.float32))
+                ty.copy_from_numpy(y)
+                ### step 2: all weights turned to positive (done)
+                ### step 3: new loss (done)
+                ### print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                ### step 4: calculate the multiplication of weights
+                synflow_score = 0.0
+                for pn_p_g_item in pn_p_g_list:
+                    print ("calculate weight param * grad parameter name: \n", pn_p_g_item[0])
+                    if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
+                        print ("pn_p_g_item[1].shape: \n", pn_p_g_item[1].shape)
+                        synflow_score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1]) * tensor.to_numpy(pn_p_g_item[2])))
+                print ("layer_hidden_list: \n", layer_hidden_list)
+                print ("synflow_score: \n", synflow_score)
+            elif epoch == (max_epoch - 1) and b == (num_train_batch - 2): # all weights turned to positive
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+                # all params turned to positive
+                for pn_p_g_item in pn_p_g_list:
+                    print ("absolute value parameter name: \n", pn_p_g_item[0])
+                    pn_p_g_item[1] = tensor.abs(pn_p_g_item[1])  # return tensor already
+            else:  # normal train steps
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                # print ("normal before model(tx, ty, synflow_flag, dist_option, spars)")
+                # print ("train_cnn tx: \n", tx)
+                # print ("train_cnn ty: \n", ty)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                # print ("normal after model(tx, ty, synflow_flag, dist_option, spars)")
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, mssgd, reducer)
+            train_loss = reduce_variable(train_loss, mssgd, reducer)
+
+        if global_rank == 0:
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        model.eval()
+        for b in range(num_val_batch):
+            x = val_x[b * batch_size:(b + 1) * batch_size]
+            if model.dimension == 4:
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = val_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, mssgd, reducer)
+
+        # Output the evaluation accuracy
+        if global_rank == 0:
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_val_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+    dev.PrintTimeProfiling()
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument(
+        'model',
+        choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'msmlp', 'alexnet', 'ms_model_mlp'],
+        default='cnn')
+    parser.add_argument('data',
+                        choices=['mnist', 'cifar10', 'cifar100'],
+                        default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=3,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    DEFAULT_LAYER_CHOICES_4 = [8, 16, 24, 32]
+    for layer1 in DEFAULT_LAYER_CHOICES_4:
+        for layer2 in DEFAULT_LAYER_CHOICES_4:
+            for layer3 in DEFAULT_LAYER_CHOICES_4:
+                for layer4 in DEFAULT_LAYER_CHOICES_4:
+                    layer_hidden_list = [layer1, layer2+1, layer3+2, layer4+3]
+                    # print ("layer_hidden_list: \n", layer_hidden_list)
+                    mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+                    run(0,
+                        1,
+                        args.device_id,
+                        layer_hidden_list,
+                        args.max_epoch,
+                        args.batch_size,
+                        args.model,
+                        args.data,
+                        mssgd,
+                        args.graph,
+                        args.verbosity,
+                        precision=args.precision)
diff --git a/examples/model_selection_psql/ms_mlp/train_mpi.py b/examples/model_selection_psql/ms_mlp/train_mpi.py
new file mode 100644
index 000000000..563d4b2c5
--- /dev/null
+++ b/examples/model_selection_psql/ms_mlp/train_mpi.py
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+
+from singa import singa_wrap as singa
+from singa import opt
+from singa import tensor
+import argparse
+import train_cnn
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument('model',
+                        choices=['cnn', 'resnet', 'xceptionnet', 'mlp'],
+                        default='cnn')
+    parser.add_argument('data', choices=['mnist', 'cifar10', 'cifar100'], default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=10,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    parser.add_argument('-d',
+                        '--dist-option',
+                        default='plain',
+                        choices=['plain','half','partialUpdate','sparseTopK','sparseThreshold'],
+                        help='distibuted training options',
+                        dest='dist_option')  # currently partialUpdate support graph=False only
+    parser.add_argument('-s',
+                        '--sparsification',
+                        default='0.05',
+                        type=float,
+                        help='the sparsity parameter used for sparsification, between 0 to 1',
+                        dest='spars')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    sgd = opt.DistOpt(sgd)
+
+    train_cnn.run(sgd.global_rank, sgd.world_size, sgd.local_rank, args.max_epoch,
+              args.batch_size, args.model, args.data, sgd, args.graph,
+              args.verbosity, args.dist_option, args.spars, args.precision)
diff --git a/examples/model_selection_psql/ms_mlp/train_ms_model.py b/examples/model_selection_psql/ms_mlp/train_ms_model.py
new file mode 100644
index 000000000..3da53b257
--- /dev/null
+++ b/examples/model_selection_psql/ms_mlp/train_ms_model.py
@@ -0,0 +1,584 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import singa_wrap as singa
+from singa import device
+from singa import tensor
+from singa import opt
+from singa import autograd
+from singa.opt import Optimizer
+from singa.opt import DecayScheduler
+from singa.opt import Constant
+import numpy as np
+import time
+import argparse
+from PIL import Image
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float32": tensor.float32}
+# singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+### MSOptimizer
+class MSOptimizer(Optimizer):
+    def __call__(self, loss):
+        pn_p_g_list = self.call_with_returns(loss)
+        # print ("optimizer1 before self.step()")
+        # print ("optimizer1 before print len(pn_p_g_list): \n", len(pn_p_g_list))
+        self.step()
+        # print ("optimizer1 after print len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("optimizer1 after self.step()")
+        return pn_p_g_list
+
+    def call_with_returns(self, loss):
+        # print ("call_with_returns before apply loss.data: \n", loss.data)
+        pn_p_g_list = []
+        for p, g in autograd.backward(loss):
+            if p.name is None:
+                p.name = id(p)
+            self.apply(p.name, p, g)
+            # print ("call with returns")
+            # print ("p.name: \n", p.name)
+            # print ("p.data: \n", p.data)
+            # print ("g.data: \n", g.data)
+            pn_p_g_list.append([p.name, p, g])  # need iterables
+        # print ("call_with_returns after apply loss.data: \n", loss.data)
+        return pn_p_g_list
+
+# MSSGD -- sub class of MSOptimizer
+class MSSGD(MSOptimizer):
+    """Implements stochastic gradient descent (optionally with momentum).
+
+    Nesterov momentum is based on the formula from `On the importance of initialization and momentum in deep learning`__.
+
+    Args:
+        lr(float): learning rate
+        momentum(float, optional): momentum factor(default: 0)
+        weight_decay(float, optional): weight decay(L2 penalty)(default: 0)
+        dampening(float, optional): dampening for momentum(default: 0)
+        nesterov(bool, optional): enables Nesterov momentum(default: False)
+
+    Typical usage example:
+        >> > from singa import opt
+        >> > optimizer = opt.SGD(lr=0.1, momentum=0.9)
+        >> > optimizer.update()
+
+    __ http: // www.cs.toronto.edu / %7Ehinton / absps / momentum.pdf
+
+    .. note::
+        The implementation of SGD with Momentum / Nesterov subtly differs from
+        Sutskever et. al. and implementations in some other frameworks.
+
+        Considering the specific case of Momentum, the update can be written as
+
+        .. math::
+                  v = \rho * v + g \\
+                  p = p - lr * v
+
+        where p, g, v and: math: `\rho` denote the parameters, gradient,
+        velocity, and momentum respectively.
+
+        This is in contrast to Sutskever et. al. and
+        other frameworks which employ an update of the form
+
+        .. math::
+             v = \rho * v + lr * g \\
+             p = p - v
+
+        The Nesterov version is analogously modified.
+    """
+
+    def __init__(self,
+                 lr=0.1,
+                 momentum=0,
+                 dampening=0,
+                 weight_decay=0,
+                 nesterov=False,
+                 dtype=tensor.float32):
+        super(MSSGD, self).__init__(lr)
+
+        # init momentum
+        if type(momentum) == float or type(momentum) == int:
+            if momentum < 0.0:
+                raise ValueError("Invalid momentum value: {}".format(momentum))
+            self.momentum = Constant(momentum)
+        elif isinstance(momentum, DecayScheduler):
+            self.momentum = momentum
+            momentum = momentum.init_value
+        else:
+            raise TypeError("Wrong momentum type")
+        # self.dtype = dtype
+        # self.mom_value = self.momentum(self.step_counter).as_type(self.dtype)
+        self.mom_value = self.momentum(self.step_counter)
+
+        # init dampening
+        if type(dampening) == float or type(dampening) == int:
+            self.dampening = Constant(dampening)
+        elif isinstance(dampening, DecayScheduler):
+            self.dampening = dampening
+            dampening = dampening.init_value
+        else:
+            raise TypeError("Wrong dampening type")
+        # self.dam_value = self.dampening(self.step_counter).as_type(self.dtype)
+        self.dam_value = self.dampening(self.step_counter)
+
+        # init weight_decay
+        if type(weight_decay) == float or type(weight_decay) == int:
+            if weight_decay < 0.0:
+                raise ValueError(
+                    "Invalid weight_decay value: {}".format(weight_decay))
+            self.weight_decay = Constant(weight_decay)
+        elif isinstance(weight_decay, DecayScheduler):
+            self.weight_decay = weight_decay
+        else:
+            raise TypeError("Wrong weight_decay type")
+        # self.decay_value = self.weight_decay(self.step_counter).as_type(self.dtype)
+        self.decay_value = self.weight_decay(self.step_counter)
+
+        # init other params
+        self.nesterov = nesterov
+        self.moments = dict()
+
+        # check value
+        if nesterov and (momentum <= 0 or dampening != 0):
+            raise ValueError(
+                "Nesterov momentum requires a momentum and zero dampening")
+        
+    def apply(self, param_name, param_value, param_grad):
+        """Performs a single optimization step.
+
+        Args:
+                param_name(String): the name of the param
+                param_value(Tensor): param values to be update in-place
+                grad(Tensor): param gradients; the values may be updated
+                        in this function; cannot use it anymore
+        """
+        assert param_value.shape == param_grad.shape, ("shape mismatch",
+                                                       param_value.shape,
+                                                       param_grad.shape)
+        self.device_check(param_value, self.step_counter, self.lr_value,
+                          self.mom_value, self.dam_value, self.decay_value)
+
+        # derive dtype from input
+        # assert param_value.dtype == self.dtype
+
+        # TODO add branch operator
+        # if self.decay_value != 0:
+        if self.weight_decay.init_value != 0:
+            singa.Axpy(self.decay_value.data, param_value.data, param_grad.data)
+
+        if self.momentum.init_value != 0:
+            if param_name not in self.moments:
+                flag = param_value.device.graph_enabled()
+                param_value.device.EnableGraph(False)
+                self.moments[param_name] = tensor.zeros_like(param_value)
+                param_value.device.EnableGraph(flag)
+
+            buf = self.moments[param_name]
+            buf *= self.mom_value
+            alpha = 1.0 - self.dam_value
+            singa.Axpy(alpha.data, param_grad.data, buf.data)
+
+            if self.nesterov:
+                singa.Axpy(self.mom_value.data, buf.data, param_grad.data)
+            else:
+                param_grad = buf
+
+        minus_lr = 0.0 - self.lr_value
+        singa.Axpy(minus_lr.data, param_grad.data, param_value.data)
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                                    offset[0]:offset[0] + x.shape[2],
+                                    offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                                     dtype=np.float32)
+    return X
+
+def run(global_rank,
+        world_size,
+        local_rank,
+        layer_hidden_list,
+        max_epoch,
+        batch_size,
+        model,
+        data,
+        mssgd,
+        graph,
+        verbosity,
+        dist_option='plain',
+        spars=None,
+        precision='float32'):
+    # dev = device.create_cuda_gpu_on(local_rank)  # need to change to CPU device for CPU-only machines
+    dev = device.get_default_device()
+    dev.SetRandSeed(0)
+    np.random.seed(0)
+
+    if data == 'cifar10':
+        from data import cifar10
+        train_x, train_y, val_x, val_y = cifar10.load()
+    elif data == 'cifar100':
+        from data import cifar100
+        train_x, train_y, val_x, val_y = cifar100.load()
+    elif data == 'mnist':
+        from data import mnist
+        train_x, train_y, val_x, val_y = mnist.load()
+
+
+    num_channels = train_x.shape[1]
+    image_size = train_x.shape[2]
+    data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
+    num_classes = (np.max(train_y) + 1).item()
+
+    if model == 'resnet':
+        from model import resnet
+        model = resnet.resnet50(num_channels=num_channels,
+                                num_classes=num_classes)
+    elif model == 'xceptionnet':
+        from model import xceptionnet
+        model = xceptionnet.create_model(num_channels=num_channels,
+                                         num_classes=num_classes)
+    elif model == 'cnn':
+        from model import cnn
+        model = cnn.create_model(num_channels=num_channels,
+                                 num_classes=num_classes)
+    elif model == 'alexnet':
+        from model import alexnet
+        model = alexnet.create_model(num_channels=num_channels,
+                                     num_classes=num_classes)
+    elif model == 'mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from mlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes)
+    
+    elif model == 'msmlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from msmlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes)
+
+    elif model == 'ms_model_mlp':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        from ms_model_mlp import model
+        model = model.create_model(data_size=data_size,
+                                    num_classes=num_classes, 
+                                    layer_hidden_list=layer_hidden_list)
+    # print ("model: \n", model)
+
+    # For distributed training, sequential has better performance
+    if hasattr(mssgd, "communicator"):
+        DIST = True
+        sequential = True
+    else:
+        DIST = False
+        sequential = False
+
+    if DIST:
+        train_x, train_y, val_x, val_y = partition(global_rank, world_size,
+                                                   train_x, train_y, val_x,
+                                                   val_y)
+
+    if model.dimension == 4:
+        tx = tensor.Tensor(
+            (batch_size, num_channels, model.input_size, model.input_size), dev,
+            singa_dtype[precision])
+    elif model.dimension == 2:
+        tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+        np.reshape(train_x, (train_x.shape[0], -1))
+        np.reshape(val_x, (val_x.shape[0], -1))
+
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_val_batch = val_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    # Attach model to graph
+    model.set_optimizer(mssgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+    dev.SetVerbosity(verbosity)
+
+    # Training and evaluation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if global_rank == 0:
+            print('Starting Epoch %d:' % (epoch))
+
+        # Training phase
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        model.train()
+        print ("num_train_batch: \n", num_train_batch)
+        print ()
+        for b in range(num_train_batch):
+            if b % 100 == 0:
+                print ("b: \n", b)
+            # Generate the patch data in this iteration
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            if model.dimension == 4:
+                x = augmentation(x, batch_size)
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+
+            synflow_flag = False
+            # Train the model
+            if epoch == (max_epoch - 1) and b == (num_train_batch - 1):  ### synflow calcuation for the last batch
+                # print ("last epoch calculate synflow")
+                synflow_flag = True
+                ### step 1: all one input
+                # Copy the patch data into input tensors
+                tx.copy_from_numpy(np.ones(x.shape, dtype=np.float32))
+                ty.copy_from_numpy(y)
+                ### step 2: all weights turned to positive (done)
+                ### step 3: new loss (done)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                ### step 4: calculate the multiplication of weights
+                synflow_score = 0.0
+                for pn_p_g_item in pn_p_g_list:
+                    # print ("calculate weight param * grad parameter name: \n", pn_p_g_item[0])
+                    if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
+                        # print ("pn_p_g_item[1].shape: \n", pn_p_g_item[1].shape)
+                        synflow_score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1]) * tensor.to_numpy(pn_p_g_item[2])))
+                # print ("layer_hidden_list: \n", layer_hidden_list)
+                # print ("synflow_score: \n", synflow_score)
+            elif epoch == (max_epoch - 1) and b == (num_train_batch - 2): # all weights turned to positive
+                # Copy the patch data into input tensors
+                # print ("all weights turned to positive\n")
+                # print ("x: \n", x)
+                # print ("y: \n", y)
+                tx.copy_from_numpy(x)
+                ty.copy_from_numpy(y)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                # print ("after model forward ...")
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+                # all params turned to positive
+                for pn_p_g_item in pn_p_g_list:
+                    # print ("absolute value parameter name: \n", pn_p_g_item[0])
+                    pn_p_g_item[1] = tensor.abs(pn_p_g_item[1])  # tensor actually ...
+            else:  # normal train steps
+                # Copy the patch data into input tensors
+                # print ("normal train steps\n")
+                # print ("x.astype(np.float32): \n", x.astype(np.float32))
+                # print ("y: \n", y)
+                tx.copy_from_numpy(x.astype(np.float32))
+                # print ("tx: \n", tx)
+                ty.copy_from_numpy(y)
+                # print ("ty: \n", ty)
+                # print ("normal before model(tx, ty, synflow_flag, dist_option, spars)")
+                # print ("train_cnn tx: \n", tx)
+                # print ("train_cnn ty: \n", ty)
+                # print ("before model forward ...")
+                pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+                # print ("normal after model(tx, ty, synflow_flag, dist_option, spars)")
+                train_correct += accuracy(tensor.to_numpy(out), y)
+                train_loss += tensor.to_numpy(loss)[0]
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, mssgd, reducer)
+            train_loss = reduce_variable(train_loss, mssgd, reducer)
+
+        if global_rank == 0:
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        model.eval()
+        for b in range(num_val_batch):
+            x = val_x[b * batch_size:(b + 1) * batch_size]
+            if model.dimension == 4:
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = val_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, mssgd, reducer)
+
+        # Output the evaluation accuracy
+        if global_rank == 0:
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_val_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+    dev.PrintTimeProfiling()
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument(
+        'model',
+        choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'msmlp', 'alexnet', 'ms_model_mlp'],
+        default='cnn')
+    parser.add_argument('data',
+                        choices=['mnist', 'cifar10', 'cifar100'],
+                        default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=2,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    # mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+
+    DEFAULT_LAYER_CHOICES_4 = [8, 16, 24, 32]
+    for layer1 in DEFAULT_LAYER_CHOICES_4:
+        for layer2 in DEFAULT_LAYER_CHOICES_4:
+            for layer3 in DEFAULT_LAYER_CHOICES_4:
+                for layer4 in DEFAULT_LAYER_CHOICES_4:
+                    layer_hidden_list = [layer1, layer2+1, layer3+2, layer4+3]
+                    mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+                    run(0,
+                        1,
+                        args.device_id,
+                        layer_hidden_list,
+                        args.max_epoch,
+                        args.batch_size,
+                        args.model,
+                        args.data,
+                        mssgd,
+                        args.graph,
+                        args.verbosity,
+                        precision=args.precision)
+
diff --git a/examples/model_selection_psql/ms_mlp/train_multiprocess.py b/examples/model_selection_psql/ms_mlp/train_multiprocess.py
new file mode 100644
index 000000000..182dd35ee
--- /dev/null
+++ b/examples/model_selection_psql/ms_mlp/train_multiprocess.py
@@ -0,0 +1,111 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+
+from singa import singa_wrap as singa
+from singa import opt
+from singa import tensor
+import argparse
+import train_cnn
+import multiprocessing
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+def run(args, local_rank, world_size, nccl_id):
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    sgd = opt.DistOpt(sgd, nccl_id=nccl_id, local_rank=local_rank, world_size=world_size)
+    train_cnn.run(sgd.global_rank, sgd.world_size, sgd.local_rank, args.max_epoch,
+              args.batch_size, args.model, args.data, sgd, args.graph,
+              args.verbosity, args.dist_option, args.spars, args.precision)
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument('model',
+                        choices=['resnet', 'xceptionnet', 'cnn', 'mlp'],
+                        default='cnn')
+    parser.add_argument('data', choices=['cifar10', 'cifar100', 'mnist'], default='mnist')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=10,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    parser.add_argument('-w',
+                        '--world-size',
+                        default=2,
+                        type=int,
+                        help='number of gpus to be used',
+                        dest='world_size')
+    parser.add_argument('-d',
+                        '--dist-option',
+                        default='plain',
+                        choices=['plain','half','partialUpdate','sparseTopK','sparseThreshold'],
+                        help='distibuted training options',
+                        dest='dist_option') # currently partialUpdate support graph=False only
+    parser.add_argument('-s',
+                        '--sparsification',
+                        default='0.05',
+                        type=float,
+                        help='the sparsity parameter used for sparsification, between 0 to 1',
+                        dest='spars')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    # Generate a NCCL ID to be used for collective communication
+    nccl_id = singa.NcclIdHolder()
+
+    process = []
+    for local_rank in range(0, args.world_size):
+        process.append(
+            multiprocessing.Process(target=run,
+                                    args=(args, local_rank, args.world_size, nccl_id)))
+
+    for p in process:
+        p.start()
diff --git a/examples/model_selection_psql/ms_model_mlp/model.py b/examples/model_selection_psql/ms_model_mlp/model.py
new file mode 100644
index 000000000..70d1a1748
--- /dev/null
+++ b/examples/model_selection_psql/ms_model_mlp/model.py
@@ -0,0 +1,224 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+from singa.autograd import Operator
+from singa.layer import Layer
+from singa import singa_wrap as singa
+import argparse
+import numpy as np
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+#### self-defined loss begin
+
+### from autograd.py
+class SumError(Operator):
+
+    def __init__(self):
+        super(SumError, self).__init__()
+        # self.t = t.data
+
+    def forward(self, x):
+        # self.err = singa.__sub__(x, self.t)
+        self.data_x = x
+        # sqr = singa.Square(self.err)
+        # loss = singa.SumAll(sqr)
+        loss = singa.SumAll(x)
+        # self.n = 1
+        # for s in x.shape():
+        #     self.n *= s
+        # loss /= self.n
+        return loss
+
+    def backward(self, dy=1.0):
+        # dx = self.err
+        dev = device.get_default_device()
+        dx = tensor.Tensor(self.data_x.shape, dev, singa_dtype['float32'])
+        dx.copy_from_numpy(np.ones(self.data_x.shape))
+        # dx *= float(2 / self.n)
+        dx *= dy
+        return dx
+
+def se_loss(x):
+    # assert x.shape == t.shape, "input and target shape different: %s, %s" % (
+    #     x.shape, t.shape)
+    return SumError()(x)[0]
+
+### from layer.py
+class SumErrorLayer(Layer):
+    """
+    Generate a MeanSquareError operator
+    """
+
+    def __init__(self):
+        super(SumErrorLayer, self).__init__()
+
+    def forward(self, x):
+        return se_loss(x)
+
+class MSMLP(model.Model):
+
+    def __init__(self, data_size=10, perceptron_size=100, num_classes=10, layer_hidden_list=[10,10,10,10]):
+        super(MSMLP, self).__init__()
+        self.num_classes = num_classes
+        self.dimension = 2
+
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(layer_hidden_list[0])
+        self.linear2 = layer.Linear(layer_hidden_list[1])
+        self.linear3 = layer.Linear(layer_hidden_list[2])
+        self.linear4 = layer.Linear(layer_hidden_list[3])
+        self.linear5 = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+        self.sum_error = SumErrorLayer()
+
+    def forward(self, inputs):
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        y = self.relu(y)
+        y = self.linear3(y)
+        y = self.relu(y)
+        y = self.linear4(y)
+        y = self.relu(y)
+        y = self.linear5(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars, synflow_flag):
+        # print ("in train_one_batch")
+        out = self.forward(x)
+        # print ("train_one_batch x.data: \n", x.data)
+        # print ("train_one_batch y.data: \n", y.data)
+        # print ("train_one_batch out.data: \n", out.data)
+        if synflow_flag:
+            # print ("sum_error")
+            loss = self.sum_error(out)
+        else:  # normal training
+            # print ("softmax_cross_entropy")
+            loss = self.softmax_cross_entropy(out, y)
+        # print ("train_one_batch loss.data: \n", loss.data)
+
+        if dist_option == 'plain':
+            # print ("before pn_p_g_list = self.optimizer(loss)")
+            pn_p_g_list = self.optimizer(loss)
+            # print ("after pn_p_g_list = self.optimizer(loss)")
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        # print ("len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0]))
+        # print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0])
+        # print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data)
+        # print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data)
+        return pn_p_g_list, out, loss
+        # return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+    
+    Returns:
+        The created CNN model.
+    """
+    model = MSMLP(**kwargs)
+
+    return model
+
+
+__all__ = ['MLP', 'create_model']
+
+if __name__ == "__main__":
+    np.random.seed(0)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # choose one precision
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)
+
+    dev = device.create_cuda_gpu_on(0)
+    sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
+    tx = tensor.Tensor((400, 2), dev, precision)
+    ty = tensor.Tensor((400,), dev, tensor.int32)
+    model = MLP(data_size=2, perceptron_size=3, num_classes=2)
+
+    # attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
+    model.train()
+
+    for i in range(args.max_epoch):
+        tx.copy_from_numpy(data)
+        ty.copy_from_numpy(label)
+        out, loss = model(tx, ty, 'fp32', spars=None)
+
+        if i % 100 == 0:
+            print("training loss = ", tensor.to_numpy(loss)[0])
\ No newline at end of file
diff --git a/examples/model_selection_psql/ms_model_mlp/native.py b/examples/model_selection_psql/ms_model_mlp/native.py
new file mode 100644
index 000000000..a82ec3b24
--- /dev/null
+++ b/examples/model_selection_psql/ms_model_mlp/native.py
@@ -0,0 +1,137 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import tensor
+from singa.tensor import Tensor
+from singa import autograd
+from singa import opt
+import numpy as np
+from singa import device
+import argparse
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    np.random.seed(0)
+
+    autograd.training = True
+
+    # prepare training data in numpy array
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)])
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32)
+
+    def to_categorical(y, num_classes):
+        """
+        Converts a class vector (integers) to binary class matrix.
+
+        Args:
+            y: class vector to be converted into a matrix
+                (integers from 0 to num_classes).
+            num_classes: total number of classes.
+
+        Returns:
+            A binary matrix representation of the input.
+        """
+        y = np.array(y, dtype="int")
+        n = y.shape[0]
+        categorical = np.zeros((n, num_classes))
+        categorical[np.arange(n), y] = 1
+        return categorical
+
+    label = to_categorical(label, 2).astype(np.float32)
+    print("train_data_shape:", data.shape)
+    print("train_label_shape:", label.shape)
+
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    dev = device.create_cuda_gpu()
+
+    inputs = Tensor(data=data, device=dev)
+    target = Tensor(data=label, device=dev)
+
+    inputs = inputs.as_type(precision)
+    target = target.as_type(tensor.int32)
+
+    w0_np = np.random.normal(0, 0.1, (2, 3)).astype(np_precision)
+    w0 = Tensor(data=w0_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0 = Tensor(shape=(3,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0.set_value(0.0)
+
+    w1_np = np.random.normal(0, 0.1, (3, 2)).astype(np_precision)
+    w1 = Tensor(data=w1_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1 = Tensor(shape=(2,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1.set_value(0.0)
+
+    sgd = opt.SGD(0.05, 0.8)
+
+    # training process
+    for i in range(args.max_epoch):
+        x = autograd.matmul(inputs, w0)
+        x = autograd.add_bias(x, b0)
+        x = autograd.relu(x)
+        x = autograd.matmul(x, w1)
+        x = autograd.add_bias(x, b1)
+        loss = autograd.softmax_cross_entropy(x, target)
+        sgd(loss)
+
+        if i % 100 == 0:
+            print("%d, training loss = " % i, tensor.to_numpy(loss)[0])
diff --git a/examples/model_selection_psql/msmlp/model.py b/examples/model_selection_psql/msmlp/model.py
new file mode 100644
index 000000000..70bc2341d
--- /dev/null
+++ b/examples/model_selection_psql/msmlp/model.py
@@ -0,0 +1,209 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+from singa.autograd import Operator
+from singa.layer import Layer
+from singa import singa_wrap as singa
+import argparse
+import numpy as np
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+### refer to autograd.py
+class SumError(Operator):
+
+    def __init__(self):
+        super(SumError, self).__init__()
+        # self.t = t.data
+
+    def forward(self, x):
+        # self.err = singa.__sub__(x, self.t)
+        self.data_x = x
+        # sqr = singa.Square(self.err)
+        # loss = singa.SumAll(sqr)
+        loss = singa.SumAll(x)
+        # self.n = 1
+        # for s in x.shape():
+        #     self.n *= s
+        # loss /= self.n
+        return loss
+    
+    def backward(self, dy=1.0):
+        # dx = self.err
+        dev = device.get_default_device()
+        dx = tensor.Tensor(self.data_x.shape, dev, singa_dtype['float32'])
+        dx.copy_from_numpy(np.ones(self.data_x.shape))
+        # dx *= float(2 / self.n)
+        dx *= dy
+        return dx
+
+def se_loss(x):
+    return SumError()(x)[0]
+
+### refer to layer.py
+class SumErrorLayer(Layer):
+    """
+    Generate a SumError Layer
+    """
+
+    def __init__(self):
+        super(SumErrorLayer, self).__init__()
+
+    def forward(self, x):
+        return se_loss(x)
+
+class MSMLP(model.Model):
+
+    def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
+        super(MSMLP, self).__init__()
+        self.num_classes = num_classes
+        self.dimension = 2
+
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(perceptron_size)
+        self.linear2 = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+        self.sum_error = SumErrorLayer()  # for synflow backward
+    
+    def forward(self, inputs):
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, synflow_flag, dist_option, spars):
+        # print ("in train_one_batch")
+        out = self.forward(x)
+        # print ("train_one_batch x.data: \n", x.data)
+        # print ("train_one_batch y.data: \n", y.data)
+        # print ("train_one_batch out.data: \n", out.data)
+        if synflow_flag:
+            loss = self.sum_error(out)
+            # print ("sum_error")
+        else:  # normal training
+            loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            # print ("before pn_p_g_list = self.optimizer(loss)")
+            pn_p_g_list = self.optimizer(loss)
+            # print ("after pn_p_g_list = self.optimizer(loss)")
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        # print ("len(pn_p_g_list): \n", len(pn_p_g_list))
+        # print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0]))
+        # print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0])
+        # print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data)
+        # print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data)
+        return pn_p_g_list, out, loss
+        # return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+    
+    Returns:
+        The created CNN model.
+    """
+    model = MSMLP(**kwargs)
+
+    return model
+
+
+__all__ = ['MLP', 'create_model']
+
+if __name__ == "__main__":
+    np.random.seed(0)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # choose one precision
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)
+
+    dev = device.create_cuda_gpu_on(0)
+    sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
+    tx = tensor.Tensor((400, 2), dev, precision)
+    ty = tensor.Tensor((400,), dev, tensor.int32)
+    model = MLP(data_size=2, perceptron_size=3, num_classes=2)
+
+    # attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
+    model.train()
+
+    for i in range(args.max_epoch):
+        tx.copy_from_numpy(data)
+        ty.copy_from_numpy(label)
+        out, loss = model(tx, ty, 'fp32', spars=None)
+
+        if i % 100 == 0:
+            print("training loss = ", tensor.to_numpy(loss)[0])
diff --git a/examples/model_selection_psql/msmlp/native.py b/examples/model_selection_psql/msmlp/native.py
new file mode 100644
index 000000000..a82ec3b24
--- /dev/null
+++ b/examples/model_selection_psql/msmlp/native.py
@@ -0,0 +1,137 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import tensor
+from singa.tensor import Tensor
+from singa import autograd
+from singa import opt
+import numpy as np
+from singa import device
+import argparse
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    np.random.seed(0)
+
+    autograd.training = True
+
+    # prepare training data in numpy array
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)])
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32)
+
+    def to_categorical(y, num_classes):
+        """
+        Converts a class vector (integers) to binary class matrix.
+
+        Args:
+            y: class vector to be converted into a matrix
+                (integers from 0 to num_classes).
+            num_classes: total number of classes.
+
+        Returns:
+            A binary matrix representation of the input.
+        """
+        y = np.array(y, dtype="int")
+        n = y.shape[0]
+        categorical = np.zeros((n, num_classes))
+        categorical[np.arange(n), y] = 1
+        return categorical
+
+    label = to_categorical(label, 2).astype(np.float32)
+    print("train_data_shape:", data.shape)
+    print("train_label_shape:", label.shape)
+
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    dev = device.create_cuda_gpu()
+
+    inputs = Tensor(data=data, device=dev)
+    target = Tensor(data=label, device=dev)
+
+    inputs = inputs.as_type(precision)
+    target = target.as_type(tensor.int32)
+
+    w0_np = np.random.normal(0, 0.1, (2, 3)).astype(np_precision)
+    w0 = Tensor(data=w0_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0 = Tensor(shape=(3,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b0.set_value(0.0)
+
+    w1_np = np.random.normal(0, 0.1, (3, 2)).astype(np_precision)
+    w1 = Tensor(data=w1_np,
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1 = Tensor(shape=(2,),
+                device=dev,
+                dtype=precision,
+                requires_grad=True,
+                stores_grad=True)
+    b1.set_value(0.0)
+
+    sgd = opt.SGD(0.05, 0.8)
+
+    # training process
+    for i in range(args.max_epoch):
+        x = autograd.matmul(inputs, w0)
+        x = autograd.add_bias(x, b0)
+        x = autograd.relu(x)
+        x = autograd.matmul(x, w1)
+        x = autograd.add_bias(x, b1)
+        loss = autograd.softmax_cross_entropy(x, target)
+        sgd(loss)
+
+        if i % 100 == 0:
+            print("%d, training loss = " % i, tensor.to_numpy(loss)[0])
diff --git a/examples/model_selection_psql/pkg_model_code/model.py b/examples/model_selection_psql/pkg_model_code/model.py
new file mode 100644
index 000000000..5e84e47ca
--- /dev/null
+++ b/examples/model_selection_psql/pkg_model_code/model.py
@@ -0,0 +1,366 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+'''
+This script includes Model class for python users
+to use Computational Graph in their model.
+'''
+
+import os
+import gc
+import time
+import json
+import zipfile
+import numpy as np
+from functools import wraps
+# from collections import Iterable
+try:
+    from collections.abc import Iterable
+except ImportError:
+    from collections import Iterable
+
+from singa import tensor
+from singa import autograd
+from singa import layer
+from .tensor import Tensor
+from . import singa_wrap as singa
+
+class ModelMeta(layer.LayerMeta):
+
+    def buffer_operation(func):
+
+        def remove_creator(tensors):
+            if not tensors:
+                return
+
+            # if isinstance(tensors, Iterable):
+            #     if isinstance(tensors, str):
+            #         return
+            #     else:
+            #         for item in tensors:
+            #             if isinstance(item, Iterable):
+            #                 remove_creator(item)
+            #             elif isinstance(item, tensor.Tensor):
+            #                 item.creator = None
+            if isinstance(tensors, Iterable):
+                for item in tensors:
+                    if isinstance(item, Iterable):
+                        remove_creator(item)
+                    elif isinstance(item, tensor.Tensor):
+                        item.creator = None
+            elif isinstance(tensors, tensor.Tensor):
+                tensors.creator = None
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self.graph_mode and self.training:
+                if len(args) == 0:
+                    raise ValueError('expect at least one input tensor')
+
+                if isinstance(args[0], list):
+                    assert isinstance(
+                        args[0][0],
+                        Tensor), ('function expects PlaceHolders or Tensors')
+                    dev = args[0][0].device
+                else:
+                    assert isinstance(
+                        args[0],
+                        Tensor), ('function expects PlaceHolders or Tensors')
+                    dev = args[0].device
+
+                if not self._buffered:
+                    # buffer operations
+                    dev.EnableGraph(True)
+                    self._results = func(self, *args, **kwargs)
+                    dev.Sync()
+                    dev.EnableGraph(False)
+                    self._buffered = True
+
+                    # deconstruct Operations before running the entire graph
+                    remove_creator(self._results)
+
+                    # make sure all Operations are deallocated
+                    gc.collect()
+
+                # run graph
+                dev.RunGraph(self.sequential)
+                return self._results
+            else:
+                return func(self, *args, **kwargs)
+
+        return wrapper
+
+    def __new__(cls, name, bases, attr):
+        if 'train_one_batch' in attr:
+            attr['train_one_batch'] = ModelMeta.buffer_operation(
+                attr['train_one_batch'])
+
+        return super(ModelMeta, cls).__new__(cls, name, bases, attr)
+
+
+class Model(layer.Layer, metaclass=ModelMeta):
+    """ Base class for your neural network models.
+
+    Example usage::
+
+        import numpy as np
+        from singa import opt
+        from singa import tensor
+        from singa import device
+        from singa import autograd
+        from singa import layer
+        from singa import model
+
+        class MyModel(model.Model):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+                self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+                self.conv1 = layer.Conv2d(1, 20, 5, padding=0)
+                self.conv2 = layer.Conv2d(20, 50, 5, padding=0)
+                self.sgd = opt.SGD(lr=0.01)
+
+            def forward(self, x):
+                y = self.conv1(x)
+                y = self.conv2(y)
+                return y
+
+            def train_one_batch(self, x, y):
+                out = self.forward(x)
+                loss = self.softmax_cross_entropy(out, y)
+                self.sgd(loss)
+                return out, loss
+
+    """
+
+    # save load states constant
+    TENSOR_DICT_FILENAME = '/tensor_dict.npz'
+    STATES_ATTR_FILENAME = '/states_attr.json'
+    MODEL_STATE_TYPE = 0
+    AUX_STATE_TYPE = 1
+
+    def __init__(self):
+        """
+        Initializes internal Model state
+        """
+        super(Model, self).__init__()
+
+        self.training = True
+        self.graph_mode = True
+        self.sequential = False
+        self._buffered = False
+        self._results = None
+
+    def compile(self, inputs, is_train=True, use_graph=False, sequential=False):
+        """ Compile and initialize the model
+
+        This function will automatically derive the shape of parameters
+        in each sublayer based on the shape of input placeholders. It will
+        also do some settings.
+
+        Args:
+            inputs(list): the list of input tensors(placeholders)
+            is_train(bool): when is_trainis True, this model will enter
+            training mode, otherwise it will enter the evaluation mode
+            use_graph(bool): when use_graph is True, computational graph
+            will be used to train this model
+            sequential(bool): when sequential is True, model will execute ops
+            in the graph follow the order of joining the graph
+        """
+        assert len(inputs) > 0 and isinstance(inputs[0], Tensor), (
+            'compile function expects PlaceHolders or Tensors')
+
+        dev = inputs[0].device
+        dev.EnableGraph(True)
+        self.forward(*inputs)
+        dev.EnableGraph(False)
+        dev.ResetGraph()
+
+        autograd.training = is_train
+        self.training = is_train
+        self.graph_mode = use_graph
+        self.sequential = sequential
+
+    def forward(self, *input):
+        """Defines the computation performed in every forward propagation.
+
+        Should be overridden by all subclasses.
+
+        Args:
+            *input: the input training data for the model
+
+        Returns:
+            out: the outputs of the forward propagation.
+        """
+        raise NotImplementedError
+
+    def train_one_batch(self, *input, **kwargs):
+        """Defines the computation performed in every training iteration
+
+        Should be overridden by all subclasses.
+
+        Args:
+            *input: the arguments of train_one_batch
+            **kwargs: the keyword arguments of train_one_batch
+        """
+        raise NotImplementedError
+
+    def train(self, mode=True):
+        """Set the model in evaluation mode.
+
+        Args:
+            mode(bool): when mode is True, this model will enter training mode
+        """
+        self.training = mode
+        autograd.training = mode
+
+    def eval(self):
+        """Sets the model in evaluation mode.
+        """
+        self.train(mode=False)
+
+    def graph(self, mode=True, sequential=False):
+        """ Turn on the computational graph. Specify execution mode.
+
+        Args:
+            mode(bool): when mode is True, model will use computational graph
+            sequential(bool): when sequential is True, model will execute ops
+            in the graph follow the order of joining the graph
+        """
+        self.graph_mode = mode
+        self.sequential = sequential
+
+    def __get_name__(self):
+        return self.__class__.__name__
+
+    def __call__(self, *input, **kwargs):
+        if self.training:
+            return self.train_one_batch(*input, **kwargs)
+        else:
+            return self.forward(*input, **kwargs)
+
+    def save_states(self, fpath, aux_states={}):
+        """Save states.
+
+        Args:
+            fpath: output file path (without the extension)
+            aux_states(dict): values are standard data types or Tensor,
+                              e.g., epoch ID, learning rate, optimizer states
+        """
+        assert not os.path.isfile(fpath), (
+            "Failed to save states, %s is already existed." % fpath)
+
+        states = self.get_states()
+
+        # save states data and attr
+        tensor_dict = {}
+        states_attr = {}
+        for k, v in states.items():
+            assert isinstance(v, tensor.Tensor), "Only tensor state is allowed"
+            tensor_dict[k] = tensor.to_numpy(v)
+            states_attr[k] = {
+                'state_type': self.MODEL_STATE_TYPE,
+                'shape': v.shape,
+                'dtype': v.dtype
+            }
+
+        for k, v in aux_states.items():
+            assert isinstance(v,
+                              tensor.Tensor), "Only tensor aux state is allowed"
+            tensor_dict[k] = tensor.to_numpy(v)
+            states_attr[k] = {
+                'state_type': self.AUX_STATE_TYPE,
+                'shape': v.shape,
+                'dtype': v.dtype
+            }
+
+        # save to files
+        timestamp = time.time()
+        tmp_dir = '/tmp/singa_save_states_%s' % timestamp
+        os.mkdir(tmp_dir)
+        tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME
+        states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME
+
+        np.savez(tensor_dict_fp, **tensor_dict)
+
+        with open(states_attr_fp, 'w') as fp:
+            json.dump(states_attr, fp)
+
+        compression = zipfile.ZIP_DEFLATED
+        with zipfile.ZipFile(fpath, mode="w") as zf:
+            zf.write(tensor_dict_fp,
+                     os.path.basename(tensor_dict_fp),
+                     compress_type=compression)
+            zf.write(states_attr_fp,
+                     os.path.basename(states_attr_fp),
+                     compress_type=compression)
+
+        # clean up tmp files
+        os.remove(tensor_dict_fp)
+        os.remove(states_attr_fp)
+        os.rmdir(tmp_dir)
+
+    def load_states(self, fpath):
+        """Load the model states and auxiliary states from disk.
+
+        Usage:
+            m = MyModel()
+            m.compile(...)
+            aux_states = m.load_states('mymodel.zip')
+
+        Args:
+            path: input file path (without the extension)
+        Returns:
+            dict
+        """
+
+        assert os.path.isfile(fpath), (
+            "Failed to load states, %s is not exist." % fpath)
+
+        timestamp = time.time()
+        tmp_dir = '/tmp/singa_load_states_%s' % timestamp
+        os.mkdir(tmp_dir)
+
+        with zipfile.ZipFile(fpath, 'r') as zf:
+            zf.extractall(tmp_dir)
+
+        tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME
+        states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME
+
+        with open(states_attr_fp) as f:
+            states_attr = json.load(f)
+
+        tensor_dict = np.load(tensor_dict_fp)
+
+        # restore singa tensor from numpy
+        model_states = dict()
+        aux_states = dict()
+
+        for k in tensor_dict.files:
+            if states_attr[k]['state_type'] == self.MODEL_STATE_TYPE:
+                model_states[k] = tensor.from_numpy(tensor_dict[k])
+            elif states_attr[k]['state_type'] == self.AUX_STATE_TYPE:
+                aux_states[k] = tensor.from_numpy(tensor_dict[k])
+
+        # restore model_states
+        self.set_states(model_states)
+
+        # clean up tmp files
+        os.remove(tensor_dict_fp)
+        os.remove(states_attr_fp)
+        os.rmdir(tmp_dir)
+        return aux_states
diff --git a/setup.py b/setup.py
index b3147e57e..cfd87d611 100644
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,7 @@
 from datetime import date
 
 # stable version
-VERSION = '4.0.0'
+VERSION = '4.1.0'
 # get the git hash
 # git_hash = subprocess.check_output(["git", "describe"]).strip().split('-')[-1][1:]
 # comment the next line to build wheel for stable version
diff --git a/tool/conda/singa/meta.yaml b/tool/conda/singa/meta.yaml
index 5a01ef3a9..5bcc36212 100644
--- a/tool/conda/singa/meta.yaml
+++ b/tool/conda/singa/meta.yaml
@@ -20,7 +20,7 @@
 # https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#templating-with-jinja
 # {% set data = load_setup_py_data(setup_file='../../../python/singa/setup.py', from_recipe_dir=True) %}
 
-{% set version = "4.0.0" %}
+{% set version = "4.1.0" %}
 
 package:
   name: singa
diff --git a/tool/docker/devel/centos6/cuda10/Dockerfile.manylinux2014 b/tool/docker/devel/centos6/cuda10/Dockerfile.manylinux2014
index 1472c9fc9..107e3465c 100644
--- a/tool/docker/devel/centos6/cuda10/Dockerfile.manylinux2014
+++ b/tool/docker/devel/centos6/cuda10/Dockerfile.manylinux2014
@@ -60,8 +60,8 @@ RUN /opt/python/cp38-cp38/bin/pip install numpy
 # install cuda and cudnn
 # Refer to https://gitlab.com/nvidia/container-images/cuda/-/tree/master/dist for other cuda and cudnn versions
 # 10.2-base-centos7
-RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
-    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/7fa2af80.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
+RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
+    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
     echo "$NVIDIA_GPGKEY_SUM  /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict  -
 COPY cuda.repo /etc/yum.repos.d/cuda.repo
 ENV CUDA_VERSION 10.2.89