diff --git a/evals/registry/data/sql/co_sql.jsonl b/evals/registry/data/sql/co_sql.jsonl
new file mode 100644
index 0000000000..7e406a8dde
--- /dev/null
+++ b/evals/registry/data/sql/co_sql.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a039a58e30d58517f567467974551247716f03faab4f166922c67a030a68d8f3
+size 151668
diff --git a/evals/registry/evals/co-sql.yaml b/evals/registry/evals/co-sql.yaml
new file mode 100644
index 0000000000..7381eb363e
--- /dev/null
+++ b/evals/registry/evals/co-sql.yaml
@@ -0,0 +1,11 @@
+co-sql:
+  id: co-sql.dev.v0
+  metrics: [accuracy]
+  description: Evaluates performance on a 100 samples of the CoSQL dataset, a conversational version of Text-to-SQL tasks. Each conversation simulates a real-world DB scenario where a user asks NLP questions and a SQL expert retrieves answers in response. Yu, Tao, et al. \"CoSQL A Conversational Text-to-SQL Challenge Towards Cross-Domain Natural Language Interfaces to Databases\" https://arxiv.org/abs/1909.05378
+co-sql.dev.v0:
+  class: evals.elsuite.modelgraded.classify:ModelBasedClassify
+  args:
+    samples_jsonl: sql/co_sql.jsonl
+    eval_type: cot_classify
+    modelgraded_spec: sql
+