From ad3ccdf41afa081d8abd6372f343551cda83384c Mon Sep 17 00:00:00 2001
From: John Marshall <john.marshall@populationgenomics.org.au>
Date: Sat, 20 Jan 2024 07:11:22 +1300
Subject: [PATCH 01/26] Correct the dates of recent releases (#14161)

---
 hail/python/hail/docs/change_log.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md
index c0f232b4637..05479d4eda2 100644
--- a/hail/python/hail/docs/change_log.md
+++ b/hail/python/hail/docs/change_log.md
@@ -55,7 +55,7 @@ critically depend on experimental functionality.**
 
 ## Version 0.2.127
 
-Released 2023-12-08
+Released 2024-01-12
 
 If you have an Apple M1 laptop, verify that
 
@@ -162,7 +162,7 @@ Released 2023-09-21
 
 ## Version 0.2.123
 
-Released 2023-09-18
+Released 2023-09-19
 
 ### New Features
 
@@ -189,7 +189,7 @@ Released 2023-09-07
 
 ## Version 0.2.121
 
-Released 2023-08-31
+Released 2023-09-06
 
 ### New Features
 
@@ -250,7 +250,7 @@ Released 2023-08-31
 
 ## Version 0.2.120
 
-Released 2023-07-20
+Released 2023-07-27
 
 ### New Features
 - (hail#13206) The VDS Combiner now works in Query-on-Batch.
@@ -300,7 +300,7 @@ Released 2023-06-28
 
 ## Version 0.2.118
 
-Released 2023-05-30
+Released 2023-06-13
 
 ### New Features
 
@@ -318,7 +318,7 @@ Released 2023-05-30
 
 ## Version 0.2.117
 
-Released 2023-05-19
+Released 2023-05-22
 
 ### New Features
 

From 2bfa530eafdaf0955108050bb79a8d93963b96c0 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Mon, 22 Jan 2024 10:40:38 -0500
Subject: [PATCH 02/26] [hailtop] remove dead code in test (#14181)

AFAICT, this does a copy that has no effect. We blow away dest_dir so we
can't possibly verify that this copy was correct. Unless there's some
side effect that I'm misunderstanding?
---
 hail/python/test/hailtop/inter_cloud/test_copy.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/hail/python/test/hailtop/inter_cloud/test_copy.py b/hail/python/test/hailtop/inter_cloud/test_copy.py
index 27c42f9eb87..f9c15e85669 100644
--- a/hail/python/test/hailtop/inter_cloud/test_copy.py
+++ b/hail/python/test/hailtop/inter_cloud/test_copy.py
@@ -472,10 +472,6 @@ async def test_file_and_directory_error_with_slash_empty_file(
     for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST):
         dest_base = await fresh_dir(fs, bases, cloud_scheme)
 
-        await Copier.copy(fs, sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type))
-
-        dest_base = await fresh_dir(fs, bases, cloud_scheme)
-
         await Copier.copy(fs, sema, Transfer(f'{src_base}empty/', dest_base.rstrip('/'), treat_dest_as=transfer_type))
 
         await collect_files(await fs.listfiles(f'{dest_base}'))

From ae7b87ffd683fab3bc00a8e66388e51f93b2d42f Mon Sep 17 00:00:00 2001
From: jigold <jigold@users.noreply.github.com>
Date: Mon, 22 Jan 2024 11:43:38 -0500
Subject: [PATCH 03/26] [hailtop.utils] Add address not available as a
 retryable error (#14185)

---
 hail/python/hailtop/utils/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py
index 583c32afc3d..78a3abd7c9a 100644
--- a/hail/python/hailtop/utils/utils.py
+++ b/hail/python/hailtop/utils/utils.py
@@ -532,6 +532,7 @@ async def bounded_gather2(
 RETRYABLE_ERRNOS = {
     # these should match (where an equivalent exists) nettyRetryableErrorNumbers in
     # is/hail/services/package.scala
+    errno.EADDRNOTAVAIL,
     errno.ETIMEDOUT,
     errno.ECONNREFUSED,
     errno.EHOSTUNREACH,

From 28582597ca1e93c7e7fb3da2415b27e8de7fdea4 Mon Sep 17 00:00:00 2001
From: Daniel Goldstein <danielgold95@gmail.com>
Date: Mon, 22 Jan 2024 12:21:33 -0500
Subject: [PATCH 04/26] [batch] Add json parsing and severity to GCP Ops Agent
 config (#14187)

Currently the Ops Agent does not do any parsing of the log message, so
the log entry in Google Logging looks like:

```
jsonPayload: {
  message: "{"severity":"INFO","levelname":"INFO","asctime":"2024-01-22 16:10:45,748","filename":"worker.py","funcNameAndLine":"<module>:3461","message":"closed","hail_log":1}"
}
```

The `parse_json` processor extracts the json fields from the message
into fields on the `jsonPayload` so it looks like this

```
jsonPayload: {
asctime: "2024-01-22 16:14:06,098"
filename: "worker.py"
funcNameAndLine: "<module>:180"
hail_log: 1
levelname: "INFO"
message: "CLOUD gcp"
}
```

and only the new `message` field is displayed in the Google Logging row
instead of the whole json.

This also adds a `severity` field on the log entry so filters such as
`SEVERITY!=INFO` work as expected.
---
 batch/batch/cloud/gcp/driver/create_instance.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/batch/batch/cloud/gcp/driver/create_instance.py b/batch/batch/cloud/gcp/driver/create_instance.py
index d800090356f..57d3000b286 100644
--- a/batch/batch/cloud/gcp/driver/create_instance.py
+++ b/batch/batch/cloud/gcp/driver/create_instance.py
@@ -230,6 +230,8 @@ def scheduling() -> dict:
       - /batch/jvm-container-logs/jvm-*.log
       record_log_file_path: true
   processors:
+    parse_message:
+      type: parse_json
     labels:
       type: modify_fields
       fields:
@@ -237,11 +239,13 @@ def scheduling() -> dict:
           static_value: $NAMESPACE
         labels.instance_id:
           static_value: $INSTANCE_ID
+        severity:
+          move_from: jsonPayload.severity
   service:
     log_level: error
     pipelines:
       default_pipeline:
-        processors: [labels]
+        processors: [parse_message, labels]
         receivers: [runlog, workerlog, jvmlog]
 
 metrics:

From 8ae336f6811f4d4435666b0c616693ba37cf2c71 Mon Sep 17 00:00:00 2001
From: Edmund Higham <ehigham@users.noreply.github.com>
Date: Mon, 22 Jan 2024 13:12:07 -0500
Subject: [PATCH 05/26] [compiler] Emit `Let` Bindings Iteratively (#14163)

Previously `Emit(?:Stream)?$` would emit let bindings recursively,
regardless of if that binding was used.
If a stream is not used, `Emit(?:Stream)?$` would define its missing
labels, making emission recursive.
This can lead to stack overflows for large numbers of let-bindings (and
does so for the benchmark benchmark `matrix-multi-write-nothing`).

By not emitting unused streams, we can make let-binding emission
iterative.
---
 .../src/main/scala/is/hail/expr/ir/Emit.scala | 88 ++++++++++++-------
 .../is/hail/expr/ir/EmitCodeBuilder.scala     | 31 ++++---
 hail/src/main/scala/is/hail/expr/ir/Env.scala |  2 +-
 .../is/hail/expr/ir/streams/EmitStream.scala  | 20 ++---
 4 files changed, 81 insertions(+), 60 deletions(-)

diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala
index 6397686596c..2061e685536 100644
--- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala
@@ -88,6 +88,7 @@ case class EmitEnv(bindings: Env[EmitValue], inputValues: IndexedSeq[EmitValue])
     }
     (paramTypes, params, recreateFromMB)
   }
+
 }
 
 object Emit {
@@ -675,11 +676,7 @@ abstract class EstimableEmitter[C] {
   def estimatedSize: Int
 }
 
-class Emit[C](
-  val ctx: EmitContext,
-  val cb: EmitClassBuilder[C],
-) {
-  emitSelf =>
+class Emit[C](val ctx: EmitContext, val cb: EmitClassBuilder[C]) {
 
   val methods: mutable.Map[(String, Seq[Type], Seq[SType], SType), EmitMethodBuilder[C]] =
     mutable.Map()
@@ -801,6 +798,7 @@ class Emit[C](
 
     def emitI(
       ir: IR,
+      cb: EmitCodeBuilder = cb,
       region: Value[Region] = region,
       env: EmitEnv = env,
       container: Option[AggContainer] = container,
@@ -840,19 +838,17 @@ class Emit[C](
 
         emitI(cond).consume(cb, {}, m => cb.if_(m.asBoolean.value, emitVoid(cnsq), emitVoid(altr)))
 
-      case Let(bindings, body) =>
-        def go(env: EmitEnv): IndexedSeq[(String, IR)] => Unit = {
-          case (name, value) +: rest =>
-            val xVal =
-              if (value.typ.isInstanceOf[TStream]) emitStream(value, region, env = env)
-              else emit(value, env = env)
-
-            cb.withScopedMaybeStreamValue(xVal, s"let_$name")(ev => go(env.bind(name, ev))(rest))
-          case Seq() =>
-            emitVoid(body, env = env)
-        }
-
-        go(env)(bindings)
+      case let: Let =>
+        emitLet(
+          emitI = (ir, cb, env) =>
+            if (ir.typ.isInstanceOf[TStream]) emitStream(ir, region, env = env).toI(cb)
+            else emitI(ir, cb = cb, env = env),
+          emitBody = (ir, cb, env) => emitVoid(ir, cb, env = env),
+        )(
+          let,
+          cb,
+          env,
+        )
 
       case StreamFor(a, valueName, body) =>
         emitStream(a, region).toI(cb).consume(
@@ -1448,7 +1444,7 @@ class Emit[C](
           sorter.sort(
             cb,
             region,
-            makeDependentSortingFunction(cb, sct, lessThan, env, emitSelf, Array(left, right)),
+            makeDependentSortingFunction(cb, sct, lessThan, env, this, Array(left, right)),
           )
           sorter.toRegion(cb, x.typ)
         }
@@ -3559,22 +3555,18 @@ class Emit[C](
 
     val result: EmitCode = (ir: @unchecked) match {
 
-      case Let(bindings, body) =>
+      case let: Let =>
         EmitCode.fromI(mb) { cb =>
-          def go(env: EmitEnv): IndexedSeq[(String, IR)] => IEmitCode = {
-            case (name, value) +: rest =>
-              val xVal =
-                if (value.typ.isInstanceOf[TStream]) emitStream(value, region, env = env)
-                else emit(value, env = env)
-
-              cb.withScopedMaybeStreamValue(xVal, s"let_$name") { ev =>
-                go(env.bind(name, ev))(rest)
-              }
-            case Seq() =>
-              emitI(body, cb, env = env)
-          }
-
-          go(env)(bindings)
+          emitLet(
+            emitI = (ir, cb, env) =>
+              if (ir.typ.isInstanceOf[TStream]) emitStream(ir, region, env = env).toI(cb)
+              else emitI(ir, cb = cb, env = env),
+            emitBody = (ir, cb, env) => emitI(ir, cb, env = env),
+          )(
+            let,
+            cb,
+            env,
+          )
         }
 
       case Ref(name, t) =>
@@ -3701,6 +3693,34 @@ class Emit[C](
     (cb: EmitCodeBuilder, region: Value[Region], l: Value[_], r: Value[_]) =>
       cb.memoize(cb.invokeCode[Boolean](sort, cb.this_, region, l, r))
   }
+
+  def emitLet[A](
+    emitI: (IR, EmitCodeBuilder, EmitEnv) => IEmitCode,
+    emitBody: (IR, EmitCodeBuilder, EmitEnv) => A,
+  )(
+    let: Let,
+    cb: EmitCodeBuilder,
+    env: EmitEnv,
+  ): A = {
+    val uses: mutable.Set[String] =
+      ctx.usesAndDefs.uses.get(let) match {
+        case Some(refs) => refs.map(_.t.name)
+        case None => mutable.Set.empty
+      }
+
+    emitBody(
+      let.body,
+      cb,
+      let.bindings.foldLeft(env) { case (newEnv, (name, ir)) =>
+        if (!uses.contains(name)) newEnv
+        else {
+          val value = emitI(ir, cb, newEnv)
+          val memo = cb.memoizeMaybeStreamValue(value, s"let_$name")
+          newEnv.bind(name, memo)
+        }
+      },
+    )
+  }
 }
 
 object NDArrayEmitter {
diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala
index 0d9a1f726ab..b386f374de5 100644
--- a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala
@@ -160,24 +160,27 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten
   }
 
   def withScopedMaybeStreamValue[T](ec: EmitCode, name: String)(f: EmitValue => T): T = {
-    if (ec.st.isRealizable) {
-      f(memoizeField(ec, name))
-    } else {
-      assert(ec.st.isInstanceOf[SStream])
-      val ev = if (ec.required)
-        EmitValue(None, ec.toI(this).get(this, ""))
+    val ev = memoizeMaybeStreamValue(ec.toI(this), name)
+    val res = f(ev)
+    ec.pv match {
+      case ss: SStreamValue =>
+        ss.defineUnusedLabels(emb)
+      case _ =>
+    }
+    res
+  }
+
+  def memoizeMaybeStreamValue(iec: IEmitCode, name: String): EmitValue =
+    if (iec.st.isRealizable) memoizeField(iec, name)
+    else {
+      assert(iec.st.isInstanceOf[SStream])
+      if (iec.required) EmitValue(None, iec.get(this, ""))
       else {
         val m = emb.genFieldThisRef[Boolean](name + "_missing")
-        ec.toI(this).consume(this, assign(m, true), _ => assign(m, false))
-        EmitValue(Some(m), ec.pv)
-      }
-      val res = f(ev)
-      ec.pv match {
-        case ss: SStreamValue => ss.defineUnusedLabels(emb)
+        iec.consume(this, assign(m, true), _ => assign(m, false))
+        EmitValue(Some(m), iec.value)
       }
-      res
     }
-  }
 
   def memoizeField(v: IEmitCode, name: String): EmitValue = {
     require(v.st.isRealizable)
diff --git a/hail/src/main/scala/is/hail/expr/ir/Env.scala b/hail/src/main/scala/is/hail/expr/ir/Env.scala
index bd2a40384cc..8a6783ec9c1 100644
--- a/hail/src/main/scala/is/hail/expr/ir/Env.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/Env.scala
@@ -150,7 +150,7 @@ class Env[V] private (val m: Map[Env.K, V]) {
   def apply(name: String): V = m(name)
 
   def lookup(name: String): V =
-    m.get(name).getOrElse(throw new RuntimeException(s"Cannot find $name in $m"))
+    m.getOrElse(name, throw new RuntimeException(s"Cannot find $name in $m"))
 
   def lookupOption(name: String): Option[V] = m.get(name)
 
diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala
index e64f790bda5..710584f2503 100644
--- a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala
@@ -364,17 +364,15 @@ object EmitStream {
             SStreamValue(producer)
           }
 
-      case Let(bindings, body) =>
-        def go(env: EmitEnv): IndexedSeq[(String, IR)] => IEmitCode = {
-          case (name, value) +: rest =>
-            cb.withScopedMaybeStreamValue(
-              EmitCode.fromI(cb.emb)(cb => emit(value, cb, env = env)),
-              s"let_$name",
-            )(ev => go(env.bind(name, ev))(rest))
-          case Seq() =>
-            produce(body, cb, env = env)
-        }
-        go(env)(bindings)
+      case let: Let =>
+        emitter.emitLet(
+          emitI = (ir, cb, env) => emit(ir, cb, env = env),
+          emitBody = (ir, cb, env) => produce(ir, cb, env = env),
+        )(
+          let,
+          cb,
+          env,
+        )
 
       case In(n, _) =>
         // this, Code[Region], ...

From 42a072ec1aeb662a8dbc02d8c3b78301efa071a1 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Tue, 23 Jan 2024 12:16:49 -0500
Subject: [PATCH 06/26] [prometheus] 90 day retention (#14194)

Open question: we're using ~20GiB on /prometheus for 15d. We request
150GiB (and get closer to 146GiB). Should we increase the storage to
give ourselves more slack? Assuming linear scaling, 90d would use 120GiB
(26GiB of slack).


https://hail.zulipchat.com/#narrow/stream/300487-Hail-Batch-Dev/topic/Grafana.20retention.20period
```
/prometheus $ df -h
Filesystem                Size      Used Available Use% Mounted on
overlay                  94.3G     28.9G     65.3G  31% /
tmpfs                    64.0M         0     64.0M   0% /dev
tmpfs                     3.6G         0      3.6G   0% /sys/fs/cgroup
/dev/sdf                146.6G     18.9G    127.6G  13% /prometheus
/dev/sda1                94.3G     28.9G     65.3G  31% /etc/prometheus
/dev/sda1                94.3G     28.9G     65.3G  31% /etc/hosts
/dev/sda1                94.3G     28.9G     65.3G  31% /dev/termination-log
/dev/sda1                94.3G     28.9G     65.3G  31% /etc/hostname
/dev/sda1                94.3G     28.9G     65.3G  31% /etc/resolv.conf
shm                      64.0M      4.0K     64.0M   0% /dev/shm
tmpfs                     5.5G     12.0K      5.5G   0% /var/run/secrets/kubernetes.io/serviceaccount
tmpfs                     3.6G         0      3.6G   0% /proc/acpi
tmpfs                    64.0M         0     64.0M   0% /proc/kcore
tmpfs                    64.0M         0     64.0M   0% /proc/keys
tmpfs                    64.0M         0     64.0M   0% /proc/timer_list
tmpfs                     3.6G         0      3.6G   0% /proc/scsi
tmpfs                     3.6G         0      3.6G   0% /sys/firmware
```
---
 prometheus/prometheus.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prometheus/prometheus.yaml b/prometheus/prometheus.yaml
index ede6863d7d7..a8150f07c01 100644
--- a/prometheus/prometheus.yaml
+++ b/prometheus/prometheus.yaml
@@ -296,7 +296,7 @@ spec:
           - "/bin/prometheus"
           - "--config.file=/etc/prometheus/prometheus.yml"
           - "--storage.tsdb.path=/prometheus"
-          - "--storage.tsdb.retention.time=15d"
+          - "--storage.tsdb.retention.time=90d"
           - "--web.console.libraries=/usr/share/prometheus/console_libraries"
           - "--web.console.templates=/usr/share/prometheus/consoles"
           - "--web.enable-lifecycle"

From 0411c8937cba4f721ea642397ca9abce4eb493eb Mon Sep 17 00:00:00 2001
From: Daniel Goldstein <danielgold95@gmail.com>
Date: Wed, 24 Jan 2024 13:28:28 -0500
Subject: [PATCH 07/26] [gear] Make csrf cookie samesite=strict (#14180)

Currently, the `_csrf` cookie is made available to all subdomains of
`.hail.is`. This means that if I first visit `batch.hail.is` I get a
`_csrf` cookie set for `.hail.is`. That cookie is then reused if I visit
`ci.hail.is`. Even more awkward, the same value of the cookie will get
reused if I then visit `batch.azure.hail.is`. This isn't that big of a
deal, these can all be considered part of the same application that the
hail team delivers and secures, but it is very little work to set
stricter bounds on where this cookie is sent. By removing the `domain`
attribute and using `samesite='strict'`, the cookie's domain will be set
by the browser to the domain of the request whose response included the
`Set-Cookie` header, e.g. `batch.hail.is` or `internal.hail.is`.
`Strict` mode then ensures that the cookie will only be sent to that
exact domain, meaning that each application is guaranteed to receive the
`_csrf` token that it itself delivered, and a `_csrf` token from CI
cannot be used to take actions against Batch.

This should not have an adverse impact on existing users' browser
sessions. In `render_template` we preserve the value of an existing
`_csrf` cookie so this change should do the following:
- Logged in user visits a page with an existing widely scoped
(`.hail.is`) `_csrf` cookie
- The server returns a `Set-Cookie` header with a new `_csrf` cookie for
strictly the `batch.hail.is` domain but with the same token value as the
original `_csrf` cookie
- The user now has two cookies and the browser could send either one on
a given request, but it does not matter because they have the same value
- If the user logs out and back in, their old widely scoped cookie will
be cleared and they only get the strict cookie from now on.
---
 devbin/dev_proxy.py                 | 2 +-
 web_common/web_common/web_common.py | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/devbin/dev_proxy.py b/devbin/dev_proxy.py
index a277f7f4d34..268a2d33afb 100644
--- a/devbin/dev_proxy.py
+++ b/devbin/dev_proxy.py
@@ -48,7 +48,7 @@ async def render_html(request: web.Request, context: dict):
     # Make links point back to the local dev server and not use
     # the dev namespace path rewrite shenanigans.
     context['page_context']['base_path'] = ''
-    return await render_template(SERVICE, request, **context, cookie_domain='localhost:8000')
+    return await render_template(SERVICE, request, **context)
 
 
 async def on_startup(app: web.Application):
diff --git a/web_common/web_common/web_common.py b/web_common/web_common/web_common.py
index 511f1eb984f..3aca0472740 100644
--- a/web_common/web_common/web_common.py
+++ b/web_common/web_common/web_common.py
@@ -79,8 +79,6 @@ async def render_template(
     userdata: Optional[UserData],
     file: str,
     page_context: Dict[str, Any],
-    *,
-    cookie_domain: Optional[str] = None,
 ) -> web.Response:
     if request.headers.get('x-hail-return-jinja-context'):
         if userdata and userdata['is_developer']:
@@ -98,6 +96,5 @@ async def render_template(
     context['csrf_token'] = csrf_token
 
     response = aiohttp_jinja2.render_template(file, request, context)
-    domain = cookie_domain or deploy_config._domain
-    response.set_cookie('_csrf', csrf_token, domain=domain, secure=True, httponly=True)
+    response.set_cookie('_csrf', csrf_token, secure=True, httponly=True, samesite='strict')
     return response

From b7bde56d5aad1fa8d1c28b46a1f06b00c45bc8bf Mon Sep 17 00:00:00 2001
From: jigold <jigold@users.noreply.github.com>
Date: Wed, 24 Jan 2024 14:10:26 -0500
Subject: [PATCH 08/26] [batch] Stop writing to v2 billing tables (#13892)

This PR modifies the billing triggers to stop writing to the v2 billing
tables as well as remove the check for whether the equivalent v2 rows
have been "migrated" when writing to the v3 tables. Stacked on #13891.
---
 batch/sql/estimated-current.sql        | 135 +++----------------------
 batch/sql/remove-v2-billing-writes.sql | 120 ++++++++++++++++++++++
 build.yaml                             |   3 +
 3 files changed, 135 insertions(+), 123 deletions(-)
 create mode 100644 batch/sql/remove-v2-billing-writes.sql

diff --git a/batch/sql/estimated-current.sql b/batch/sql/estimated-current.sql
index 74aa7ea114c..fc3d6f99707 100644
--- a/batch/sql/estimated-current.sql
+++ b/batch/sql/estimated-current.sql
@@ -595,16 +595,6 @@ BEGIN
   SET cur_billing_date = CAST(UTC_DATE() AS DATE);
 
   IF msec_diff_rollup != 0 THEN
-    INSERT INTO aggregated_billing_project_user_resources_v2 (billing_project, user, resource_id, token, `usage`)
-    SELECT billing_project, `user`,
-      resource_id,
-      rand_token,
-      msec_diff_rollup * quantity
-    FROM attempt_resources
-    JOIN batches ON batches.id = attempt_resources.batch_id
-    WHERE batch_id = NEW.batch_id AND job_id = NEW.job_id AND attempt_id = NEW.attempt_id
-    ON DUPLICATE KEY UPDATE `usage` = `usage` + msec_diff_rollup * quantity;
-
     INSERT INTO aggregated_billing_project_user_resources_v3 (billing_project, user, resource_id, token, `usage`)
     SELECT batches.billing_project, batches.`user`,
       attempt_resources.deduped_resource_id,
@@ -612,68 +602,26 @@ BEGIN
       msec_diff_rollup * quantity
     FROM attempt_resources
     JOIN batches ON batches.id = attempt_resources.batch_id
-    INNER JOIN aggregated_billing_project_user_resources_v2 ON
-      aggregated_billing_project_user_resources_v2.billing_project = batches.billing_project AND
-      aggregated_billing_project_user_resources_v2.user = batches.user AND
-      aggregated_billing_project_user_resources_v2.resource_id = attempt_resources.resource_id AND
-      aggregated_billing_project_user_resources_v2.token = rand_token
-    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id AND migrated = 1
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
     ON DUPLICATE KEY UPDATE `usage` = aggregated_billing_project_user_resources_v3.`usage` + msec_diff_rollup * quantity;
 
-    INSERT INTO aggregated_job_group_resources_v2 (batch_id, resource_id, token, `usage`)
-    SELECT batch_id,
-      resource_id,
-      rand_token,
-      msec_diff_rollup * quantity
-    FROM attempt_resources
-    WHERE batch_id = NEW.batch_id AND job_id = NEW.job_id AND attempt_id = NEW.attempt_id
-    ON DUPLICATE KEY UPDATE `usage` = `usage` + msec_diff_rollup * quantity;
-
     INSERT INTO aggregated_job_group_resources_v3 (batch_id, resource_id, token, `usage`)
     SELECT attempt_resources.batch_id,
       attempt_resources.deduped_resource_id,
       rand_token,
       msec_diff_rollup * quantity
     FROM attempt_resources
-    JOIN aggregated_job_group_resources_v2 ON
-      aggregated_job_group_resources_v2.batch_id = attempt_resources.batch_id AND
-      aggregated_job_group_resources_v2.resource_id = attempt_resources.resource_id AND
-      aggregated_job_group_resources_v2.token = rand_token
-    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id AND migrated = 1
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
     ON DUPLICATE KEY UPDATE `usage` = aggregated_job_group_resources_v3.`usage` + msec_diff_rollup * quantity;
 
-    INSERT INTO aggregated_job_resources_v2 (batch_id, job_id, resource_id, `usage`)
-    SELECT batch_id, job_id,
-      resource_id,
-      msec_diff_rollup * quantity
-    FROM attempt_resources
-    WHERE batch_id = NEW.batch_id AND job_id = NEW.job_id AND attempt_id = NEW.attempt_id
-    ON DUPLICATE KEY UPDATE `usage` = `usage` + msec_diff_rollup * quantity;
-
     INSERT INTO aggregated_job_resources_v3 (batch_id, job_id, resource_id, `usage`)
     SELECT attempt_resources.batch_id, attempt_resources.job_id,
       attempt_resources.deduped_resource_id,
       msec_diff_rollup * quantity
     FROM attempt_resources
-    JOIN aggregated_job_resources_v2 ON
-      aggregated_job_resources_v2.batch_id = attempt_resources.batch_id AND
-      aggregated_job_resources_v2.job_id = attempt_resources.job_id AND
-      aggregated_job_resources_v2.resource_id = attempt_resources.resource_id
-    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id AND migrated = 1
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
     ON DUPLICATE KEY UPDATE `usage` = aggregated_job_resources_v3.`usage` + msec_diff_rollup * quantity;
 
-    INSERT INTO aggregated_billing_project_user_resources_by_date_v2 (billing_date, billing_project, user, resource_id, token, `usage`)
-    SELECT cur_billing_date,
-      billing_project,
-      `user`,
-      resource_id,
-      rand_token,
-      msec_diff_rollup * quantity
-    FROM attempt_resources
-    JOIN batches ON batches.id = attempt_resources.batch_id
-    WHERE batch_id = NEW.batch_id AND job_id = NEW.job_id AND attempt_id = NEW.attempt_id
-    ON DUPLICATE KEY UPDATE `usage` = `usage` + msec_diff_rollup * quantity;
-
     INSERT INTO aggregated_billing_project_user_resources_by_date_v3 (billing_date, billing_project, user, resource_id, token, `usage`)
     SELECT cur_billing_date,
       batches.billing_project,
@@ -683,13 +631,7 @@ BEGIN
       msec_diff_rollup * quantity
     FROM attempt_resources
     JOIN batches ON batches.id = attempt_resources.batch_id
-    JOIN aggregated_billing_project_user_resources_by_date_v2 ON
-      aggregated_billing_project_user_resources_by_date_v2.billing_date = cur_billing_date AND
-      aggregated_billing_project_user_resources_by_date_v2.billing_project = batches.billing_project AND
-      aggregated_billing_project_user_resources_by_date_v2.user = batches.user AND
-      aggregated_billing_project_user_resources_by_date_v2.resource_id = attempt_resources.resource_id AND
-      aggregated_billing_project_user_resources_by_date_v2.token = rand_token
-    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id AND migrated = 1
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
     ON DUPLICATE KEY UPDATE `usage` = aggregated_billing_project_user_resources_by_date_v3.`usage` + msec_diff_rollup * quantity;
   END IF;
 END $$
@@ -866,10 +808,6 @@ BEGIN
   DECLARE cur_n_tokens INT;
   DECLARE rand_token INT;
   DECLARE cur_billing_date DATE;
-  DECLARE bp_user_resources_migrated BOOLEAN DEFAULT FALSE;
-  DECLARE bp_user_resources_by_date_migrated BOOLEAN DEFAULT FALSE;
-  DECLARE batch_resources_migrated BOOLEAN DEFAULT FALSE;
-  DECLARE job_resources_migrated BOOLEAN DEFAULT FALSE;
 
   SELECT billing_project, user INTO cur_billing_project, cur_user
   FROM batches WHERE id = NEW.batch_id;
@@ -887,74 +825,25 @@ BEGIN
   SET cur_billing_date = CAST(UTC_DATE() AS DATE);
 
   IF msec_diff_rollup != 0 THEN
-    INSERT INTO aggregated_billing_project_user_resources_v2 (billing_project, user, resource_id, token, `usage`)
-    VALUES (cur_billing_project, cur_user, NEW.resource_id, rand_token, NEW.quantity * msec_diff_rollup)
+    INSERT INTO aggregated_billing_project_user_resources_v3 (billing_project, user, resource_id, token, `usage`)
+    VALUES (cur_billing_project, cur_user, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
     ON DUPLICATE KEY UPDATE
       `usage` = `usage` + NEW.quantity * msec_diff_rollup;
 
-    SELECT migrated INTO bp_user_resources_migrated
-    FROM aggregated_billing_project_user_resources_v2
-    WHERE billing_project = cur_billing_project AND user = cur_user AND resource_id = NEW.resource_id AND token = rand_token
-    FOR UPDATE;
-
-    IF bp_user_resources_migrated THEN
-      INSERT INTO aggregated_billing_project_user_resources_v3 (billing_project, user, resource_id, token, `usage`)
-      VALUES (cur_billing_project, cur_user, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
-      ON DUPLICATE KEY UPDATE
-        `usage` = `usage` + NEW.quantity * msec_diff_rollup;
-    END IF;
-
-    INSERT INTO aggregated_job_group_resources_v2 (batch_id, resource_id, token, `usage`)
-    VALUES (NEW.batch_id, NEW.resource_id, rand_token, NEW.quantity * msec_diff_rollup)
+    INSERT INTO aggregated_job_group_resources_v3 (batch_id, resource_id, token, `usage`)
+    VALUES (NEW.batch_id, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
     ON DUPLICATE KEY UPDATE
       `usage` = `usage` + NEW.quantity * msec_diff_rollup;
 
-    SELECT migrated INTO batch_resources_migrated
-    FROM aggregated_job_group_resources_v2
-    WHERE batch_id = NEW.batch_id AND resource_id = NEW.resource_id AND token = rand_token
-    FOR UPDATE;
-
-    IF batch_resources_migrated THEN
-      INSERT INTO aggregated_job_group_resources_v3 (batch_id, resource_id, token, `usage`)
-      VALUES (NEW.batch_id, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
-      ON DUPLICATE KEY UPDATE
-        `usage` = `usage` + NEW.quantity * msec_diff_rollup;
-    END IF;
-
-    INSERT INTO aggregated_job_resources_v2 (batch_id, job_id, resource_id, `usage`)
-    VALUES (NEW.batch_id, NEW.job_id, NEW.resource_id, NEW.quantity * msec_diff_rollup)
+    INSERT INTO aggregated_job_resources_v3 (batch_id, job_id, resource_id, `usage`)
+    VALUES (NEW.batch_id, NEW.job_id, NEW.deduped_resource_id, NEW.quantity * msec_diff_rollup)
     ON DUPLICATE KEY UPDATE
       `usage` = `usage` + NEW.quantity * msec_diff_rollup;
 
-    SELECT migrated INTO job_resources_migrated
-    FROM aggregated_job_resources_v2
-    WHERE batch_id = NEW.batch_id AND job_id = NEW.job_id AND resource_id = NEW.resource_id
-    FOR UPDATE;
-
-    IF job_resources_migrated THEN
-      INSERT INTO aggregated_job_resources_v3 (batch_id, job_id, resource_id, `usage`)
-      VALUES (NEW.batch_id, NEW.job_id, NEW.deduped_resource_id, NEW.quantity * msec_diff_rollup)
-      ON DUPLICATE KEY UPDATE
-        `usage` = `usage` + NEW.quantity * msec_diff_rollup;
-    END IF;
-
-    INSERT INTO aggregated_billing_project_user_resources_by_date_v2 (billing_date, billing_project, user, resource_id, token, `usage`)
-    VALUES (cur_billing_date, cur_billing_project, cur_user, NEW.resource_id, rand_token, NEW.quantity * msec_diff_rollup)
+    INSERT INTO aggregated_billing_project_user_resources_by_date_v3 (billing_date, billing_project, user, resource_id, token, `usage`)
+    VALUES (cur_billing_date, cur_billing_project, cur_user, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
     ON DUPLICATE KEY UPDATE
       `usage` = `usage` + NEW.quantity * msec_diff_rollup;
-
-    SELECT migrated INTO bp_user_resources_by_date_migrated
-    FROM aggregated_billing_project_user_resources_by_date_v2
-    WHERE billing_date = cur_billing_date AND billing_project = cur_billing_project AND user = cur_user
-      AND resource_id = NEW.resource_id AND token = rand_token
-    FOR UPDATE;
-
-    IF bp_user_resources_by_date_migrated THEN
-      INSERT INTO aggregated_billing_project_user_resources_by_date_v3 (billing_date, billing_project, user, resource_id, token, `usage`)
-      VALUES (cur_billing_date, cur_billing_project, cur_user, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
-      ON DUPLICATE KEY UPDATE
-        `usage` = `usage` + NEW.quantity * msec_diff_rollup;
-    END IF;
   END IF;
 END $$
 
diff --git a/batch/sql/remove-v2-billing-writes.sql b/batch/sql/remove-v2-billing-writes.sql
new file mode 100644
index 00000000000..fedeea0facb
--- /dev/null
+++ b/batch/sql/remove-v2-billing-writes.sql
@@ -0,0 +1,120 @@
+DELIMITER $$
+
+DROP TRIGGER IF EXISTS attempts_after_update $$
+CREATE TRIGGER attempts_after_update AFTER UPDATE ON attempts
+FOR EACH ROW
+BEGIN
+  DECLARE job_cores_mcpu INT;
+  DECLARE cur_billing_project VARCHAR(100);
+  DECLARE msec_diff_rollup BIGINT;
+  DECLARE cur_n_tokens INT;
+  DECLARE rand_token INT;
+  DECLARE cur_billing_date DATE;
+
+  SELECT n_tokens INTO cur_n_tokens FROM globals LOCK IN SHARE MODE;
+  SET rand_token = FLOOR(RAND() * cur_n_tokens);
+
+  SELECT cores_mcpu INTO job_cores_mcpu FROM jobs
+  WHERE batch_id = NEW.batch_id AND job_id = NEW.job_id;
+
+  SELECT billing_project INTO cur_billing_project FROM batches WHERE id = NEW.batch_id;
+
+  SET msec_diff_rollup = (GREATEST(COALESCE(NEW.rollup_time - NEW.start_time, 0), 0) -
+                          GREATEST(COALESCE(OLD.rollup_time - OLD.start_time, 0), 0));
+
+  SET cur_billing_date = CAST(UTC_DATE() AS DATE);
+
+  IF msec_diff_rollup != 0 THEN
+    INSERT INTO aggregated_billing_project_user_resources_v3 (billing_project, user, resource_id, token, `usage`)
+    SELECT batches.billing_project, batches.`user`,
+      attempt_resources.deduped_resource_id,
+      rand_token,
+      msec_diff_rollup * quantity
+    FROM attempt_resources
+    JOIN batches ON batches.id = attempt_resources.batch_id
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
+    ON DUPLICATE KEY UPDATE `usage` = aggregated_billing_project_user_resources_v3.`usage` + msec_diff_rollup * quantity;
+
+    INSERT INTO aggregated_job_group_resources_v3 (batch_id, resource_id, token, `usage`)
+    SELECT attempt_resources.batch_id,
+      attempt_resources.deduped_resource_id,
+      rand_token,
+      msec_diff_rollup * quantity
+    FROM attempt_resources
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
+    ON DUPLICATE KEY UPDATE `usage` = aggregated_job_group_resources_v3.`usage` + msec_diff_rollup * quantity;
+
+    INSERT INTO aggregated_job_resources_v3 (batch_id, job_id, resource_id, `usage`)
+    SELECT attempt_resources.batch_id, attempt_resources.job_id,
+      attempt_resources.deduped_resource_id,
+      msec_diff_rollup * quantity
+    FROM attempt_resources
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
+    ON DUPLICATE KEY UPDATE `usage` = aggregated_job_resources_v3.`usage` + msec_diff_rollup * quantity;
+
+    INSERT INTO aggregated_billing_project_user_resources_by_date_v3 (billing_date, billing_project, user, resource_id, token, `usage`)
+    SELECT cur_billing_date,
+      batches.billing_project,
+      batches.`user`,
+      attempt_resources.deduped_resource_id,
+      rand_token,
+      msec_diff_rollup * quantity
+    FROM attempt_resources
+    JOIN batches ON batches.id = attempt_resources.batch_id
+    WHERE attempt_resources.batch_id = NEW.batch_id AND attempt_resources.job_id = NEW.job_id AND attempt_id = NEW.attempt_id
+    ON DUPLICATE KEY UPDATE `usage` = aggregated_billing_project_user_resources_by_date_v3.`usage` + msec_diff_rollup * quantity;
+  END IF;
+END $$
+
+DROP TRIGGER IF EXISTS attempt_resources_after_insert $$
+CREATE TRIGGER attempt_resources_after_insert AFTER INSERT ON attempt_resources
+FOR EACH ROW
+BEGIN
+  DECLARE cur_start_time BIGINT;
+  DECLARE cur_rollup_time BIGINT;
+  DECLARE cur_billing_project VARCHAR(100);
+  DECLARE cur_user VARCHAR(100);
+  DECLARE msec_diff_rollup BIGINT;
+  DECLARE cur_n_tokens INT;
+  DECLARE rand_token INT;
+  DECLARE cur_billing_date DATE;
+
+  SELECT billing_project, user INTO cur_billing_project, cur_user
+  FROM batches WHERE id = NEW.batch_id;
+
+  SELECT n_tokens INTO cur_n_tokens FROM globals LOCK IN SHARE MODE;
+  SET rand_token = FLOOR(RAND() * cur_n_tokens);
+
+  SELECT start_time, rollup_time INTO cur_start_time, cur_rollup_time
+  FROM attempts
+  WHERE batch_id = NEW.batch_id AND job_id = NEW.job_id AND attempt_id = NEW.attempt_id
+  LOCK IN SHARE MODE;
+
+  SET msec_diff_rollup = GREATEST(COALESCE(cur_rollup_time - cur_start_time, 0), 0);
+
+  SET cur_billing_date = CAST(UTC_DATE() AS DATE);
+
+  IF msec_diff_rollup != 0 THEN
+    INSERT INTO aggregated_billing_project_user_resources_v3 (billing_project, user, resource_id, token, `usage`)
+    VALUES (cur_billing_project, cur_user, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
+    ON DUPLICATE KEY UPDATE
+      `usage` = `usage` + NEW.quantity * msec_diff_rollup;
+
+    INSERT INTO aggregated_job_group_resources_v3 (batch_id, resource_id, token, `usage`)
+    VALUES (NEW.batch_id, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
+    ON DUPLICATE KEY UPDATE
+      `usage` = `usage` + NEW.quantity * msec_diff_rollup;
+
+    INSERT INTO aggregated_job_resources_v3 (batch_id, job_id, resource_id, `usage`)
+    VALUES (NEW.batch_id, NEW.job_id, NEW.deduped_resource_id, NEW.quantity * msec_diff_rollup)
+    ON DUPLICATE KEY UPDATE
+      `usage` = `usage` + NEW.quantity * msec_diff_rollup;
+
+    INSERT INTO aggregated_billing_project_user_resources_by_date_v3 (billing_date, billing_project, user, resource_id, token, `usage`)
+    VALUES (cur_billing_date, cur_billing_project, cur_user, NEW.deduped_resource_id, rand_token, NEW.quantity * msec_diff_rollup)
+    ON DUPLICATE KEY UPDATE
+      `usage` = `usage` + NEW.quantity * msec_diff_rollup;
+  END IF;
+END $$
+
+DELIMITER ;
diff --git a/build.yaml b/build.yaml
index 212120b24e9..e4256804708 100644
--- a/build.yaml
+++ b/build.yaml
@@ -2358,6 +2358,9 @@ steps:
       - name: rename-job-groups-tables
         script: /io/sql/rename-job-groups-tables.sql
         online: false  # this must be offline
+      - name: remove-v2-billing-writes
+        script: /io/sql/remove-v2-billing-writes.sql
+        online: true
     inputs:
       - from: /repo/batch/sql
         to: /io/sql

From 728f43bab4a474442b61d746e1881fa450f7ade5 Mon Sep 17 00:00:00 2001
From: Patrick Schultz <pschultz@broadinstitute.org>
Date: Fri, 26 Jan 2024 07:17:17 -0500
Subject: [PATCH 09/26] [query] fix remaining scala warnings (#14188)

---
 hail/build.gradle                             |  3 +-
 .../scala/is/hail/backend/BackendUtils.scala  |  2 +
 .../is/hail/backend/ExecuteContext.scala      |  2 -
 .../is/hail/backend/local/LocalBackend.scala  |  2 +-
 .../hail/backend/service/ServiceBackend.scala | 18 +++------
 .../is/hail/backend/service/Worker.scala      |  4 +-
 .../scala/is/hail/experimental/package.scala  |  2 +-
 .../expr/ir/AbstractMatrixTableSpec.scala     |  1 -
 .../main/scala/is/hail/expr/ir/BinaryOp.scala |  2 -
 .../scala/is/hail/expr/ir/BinarySearch.scala  |  9 -----
 .../src/main/scala/is/hail/expr/ir/Emit.scala | 20 ++--------
 .../is/hail/expr/ir/EmitClassBuilder.scala    |  7 ----
 .../hail/expr/ir/ExtractIntervalFilters.scala | 19 ----------
 .../scala/is/hail/expr/ir/GenericLines.scala  |  3 +-
 .../scala/is/hail/expr/ir/LowerMatrixIR.scala |  1 -
 .../scala/is/hail/expr/ir/MatrixWriter.scala  |  1 -
 .../is/hail/expr/ir/NativeReaderOptions.scala |  5 +--
 .../is/hail/expr/ir/NormalizeNames.scala      |  3 ++
 .../is/hail/expr/ir/PruneDeadFields.scala     |  4 --
 .../main/scala/is/hail/expr/ir/Simplify.scala | 19 ----------
 .../main/scala/is/hail/expr/ir/TableIR.scala  |  3 +-
 .../expr/ir/agg/CollectAsSetAggregator.scala  |  2 -
 .../expr/ir/agg/StagedBlockLinkedList.scala   |  3 --
 .../is/hail/expr/ir/functions/Functions.scala |  3 --
 .../expr/ir/functions/LocusFunctions.scala    |  1 -
 .../expr/ir/functions/NDArrayFunctions.scala  |  2 +-
 .../ir/lowering/LowerDistributedSort.scala    |  1 -
 .../hail/expr/ir/ndarrays/EmitNDArray.scala   |  2 +-
 .../expr/ir/orderings/IterableOrdering.scala  |  2 -
 .../is/hail/expr/ir/streams/EmitStream.scala  |  9 +----
 .../main/scala/is/hail/io/IndexBTree.scala    |  2 +-
 .../main/scala/is/hail/io/InputBuffers.scala  |  4 +-
 .../avro/UnsafeAvroTableReaderOptions.scala   |  1 -
 .../is/hail/io/bgen/BgenRDDPartitions.scala   |  2 -
 .../is/hail/io/bgen/StagedBGENReader.scala    |  1 -
 .../hail/io/compress/BGzipOutputStream.scala  |  4 +-
 .../scala/is/hail/io/fs/AzureStorageFS.scala  |  5 ---
 .../main/scala/is/hail/io/fs/HadoopFS.scala   |  2 +-
 .../scala/is/hail/io/index/IndexReader.scala  |  1 -
 .../scala/is/hail/io/index/IndexWriter.scala  |  5 +--
 .../scala/is/hail/io/plink/LoadPlink.scala    |  3 +-
 .../scala/is/hail/io/tabix/TabixReader.scala  |  2 +-
 .../main/scala/is/hail/io/vcf/LoadVCF.scala   |  9 +----
 .../scala/is/hail/linalg/BlockMatrix.scala    |  8 ++--
 .../is/hail/linalg/LinalgCodeUtils.scala      |  2 +-
 hail/src/main/scala/is/hail/lir/PST.scala     |  2 +-
 .../scala/is/hail/methods/LocalLDPrune.scala  | 19 ----------
 .../is/hail/methods/LogisticRegression.scala  |  2 -
 .../is/hail/methods/PoissonRegression.scala   |  2 +-
 .../src/main/scala/is/hail/methods/Skat.scala |  4 +-
 .../main/scala/is/hail/misc/BGZipBlocks.scala |  2 +-
 .../scala/is/hail/rvd/AbstractRVDSpec.scala   |  1 -
 hail/src/main/scala/is/hail/rvd/RVD.scala     |  4 --
 .../scala/is/hail/services/BatchConfig.scala  |  3 --
 .../scala/is/hail/services/DeployConfig.scala |  3 --
 .../services/batch_client/BatchClient.scala   |  2 +-
 .../is/hail/sparkextras/ContextRDD.scala      |  3 --
 .../GeneralizedChiSquaredDistribution.scala   |  6 +--
 .../scala/is/hail/stats/RegressionUtils.scala |  1 -
 .../main/scala/is/hail/stats/package.scala    |  4 +-
 .../hail/types/physical/PCanonicalLocus.scala |  2 +-
 .../stypes/interfaces/SBaseStruct.scala       |  1 -
 .../physical/stypes/interfaces/SNDArray.scala |  4 +-
 .../is/hail/types/virtual/TNDArray.scala      |  5 ---
 .../scala/is/hail/types/virtual/Type.scala    |  2 +-
 .../scala/is/hail/utils/ErrorHandling.scala   |  1 -
 .../is/hail/utils/FlipbookIterator.scala      |  2 +-
 .../is/hail/utils/StringEscapeUtils.scala     |  1 -
 .../is/hail/utils/StringSocketAppender.scala  |  5 +--
 .../scala/is/hail/variant/HardCallView.scala  |  2 +-
 .../is/hail/variant/ReferenceGenome.scala     |  1 -
 .../is/hail/annotations/UnsafeSuite.scala     |  6 ---
 .../test/scala/is/hail/asm4s/ASM4SSuite.scala | 38 +++++++++----------
 .../scala/is/hail/asm4s/{A.java => Foo.java}  |  2 +-
 .../is/hail/expr/ir/Aggregators2Suite.scala   |  3 +-
 .../is/hail/expr/ir/AggregatorsSuite.scala    |  2 +-
 .../is/hail/expr/ir/EmitStreamSuite.scala     | 29 --------------
 .../test/scala/is/hail/expr/ir/IRSuite.scala  | 10 +----
 .../scala/is/hail/expr/ir/MatrixIRSuite.scala |  1 -
 .../is/hail/expr/ir/MemoryLeakSuite.scala     |  2 +-
 .../scala/is/hail/expr/ir/OrderingSuite.scala |  8 ----
 .../is/hail/expr/ir/StagedBTreeSuite.scala    |  1 -
 .../scala/is/hail/expr/ir/TableIRSuite.scala  |  4 +-
 .../expr/ir/analyses/SemanticHashSuite.scala  |  4 +-
 .../is/hail/expr/ir/table/TableGenSuite.scala | 26 ++++++-------
 .../scala/is/hail/io/IndexBTreeSuite.scala    |  1 -
 .../test/scala/is/hail/io/IndexSuite.scala    | 10 +----
 .../test/scala/is/hail/io/fs/FSSuite.scala    |  8 ++--
 .../src/test/scala/is/hail/io/fs/FakeFS.scala |  4 +-
 .../is/hail/methods/LocalLDPruneSuite.scala   |  2 -
 .../is/hail/methods/MultiArray2Suite.scala    | 20 +++++-----
 .../is/hail/stats/FisherExactTestSuite.scala  |  4 --
 .../scala/is/hail/stats/eigSymDSuite.scala    |  1 -
 .../hail/types/physical/PNDArraySuite.scala   |  3 +-
 .../is/hail/utils/FlipbookIteratorSuite.scala |  2 +-
 .../is/hail/utils/PartitionCountsSuite.scala  |  2 +-
 .../utils/RichDenseMatrixDoubleSuite.scala    |  2 +-
 .../is/hail/utils/TreeTraversalSuite.scala    |  6 +--
 .../test/scala/is/hail/utils/UtilsSuite.scala |  1 -
 .../hail/variant/ReferenceGenomeSuite.scala   |  7 ----
 100 files changed, 126 insertions(+), 373 deletions(-)
 rename hail/src/test/scala/is/hail/asm4s/{A.java => Foo.java} (89%)

diff --git a/hail/build.gradle b/hail/build.gradle
index c111fa6e2ba..32125ba7121 100644
--- a/hail/build.gradle
+++ b/hail/build.gradle
@@ -70,7 +70,8 @@ tasks.withType(ScalaCompile) {
         "-deprecation",
         "-unchecked",
         "-Ywarn-unused:_,-explicits,-implicits",
-        "-Wconf:cat=unused-locals:w,cat=unused:info,any:w",
+//        "-Wconf:cat=unused-locals:w,cat=unused:info,any:w",
+        "-Wconf:any:e",
         "-Ypartial-unification",
     ]
 
diff --git a/hail/src/main/scala/is/hail/backend/BackendUtils.scala b/hail/src/main/scala/is/hail/backend/BackendUtils.scala
index fd70d0457e5..78bb30fd0f0 100644
--- a/hail/src/main/scala/is/hail/backend/BackendUtils.scala
+++ b/hail/src/main/scala/is/hail/backend/BackendUtils.scala
@@ -10,6 +10,7 @@ import is.hail.io.fs._
 import is.hail.services._
 import is.hail.utils._
 
+import scala.annotation.nowarn
 import scala.util.Try
 
 object BackendUtils {
@@ -93,6 +94,7 @@ class BackendUtils(
 
       results
     case Some(cachedResults) =>
+      @nowarn("cat=unused-pat-vars&msg=pattern var c")
       val remainingContexts =
         for {
           c @ (_, k) <- contexts.zipWithIndex
diff --git a/hail/src/main/scala/is/hail/backend/ExecuteContext.scala b/hail/src/main/scala/is/hail/backend/ExecuteContext.scala
index 5fbb4d197cf..07a411bb309 100644
--- a/hail/src/main/scala/is/hail/backend/ExecuteContext.scala
+++ b/hail/src/main/scala/is/hail/backend/ExecuteContext.scala
@@ -136,8 +136,6 @@ class ExecuteContext(
 
   private val cleanupFunctions = mutable.ArrayBuffer[() => Unit]()
 
-  private[this] val broadcasts = mutable.ArrayBuffer.empty[BroadcastValue[_]]
-
   val memo: mutable.Map[Any, Any] = new mutable.HashMap[Any, Any]()
 
   val taskContext: HailTaskContext = new LocalTaskContext(0, 0)
diff --git a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala
index ac4ec419b29..cdb3105b012 100644
--- a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala
+++ b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala
@@ -208,7 +208,7 @@ class LocalBackend(
       throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${Pretty(ctx, ir)}")
 
     if (ir.typ == TVoid) {
-      val (pt, f) = ctx.timer.time("Compile") {
+      val (_, f) = ctx.timer.time("Compile") {
         Compile[AsmFunction1RegionUnit](
           ctx,
           FastSeq(),
diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
index 48e91a7b107..f2fc277e3a2 100644
--- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
+++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
@@ -154,7 +154,6 @@ class ServiceBackend(
     fs: FS,
     collection: Array[Array[Byte]],
     stageIdentifier: String,
-    dependency: Option[TableStageDependency] = None,
     f: (Array[Byte], HailTaskContext, HailClassLoader, FS) => Array[Byte],
   ): (String, String, Int) = {
     val backendContext = _backendContext.asInstanceOf[ServiceBackendContext]
@@ -291,7 +290,7 @@ class ServiceBackend(
     f: (Array[Byte], HailTaskContext, HailClassLoader, FS) => Array[Byte]
   ): Array[Array[Byte]] = {
     val (token, root, n) =
-      submitAndWaitForBatch(_backendContext, fs, collection, stageIdentifier, dependency, f)
+      submitAndWaitForBatch(_backendContext, fs, collection, stageIdentifier, f)
 
     log.info(s"parallelizeAndComputeWithIndex: $token: reading results")
     val startTime = System.nanoTime()
@@ -321,14 +320,8 @@ class ServiceBackend(
   )(
     f: (Array[Byte], HailTaskContext, HailClassLoader, FS) => Array[Byte]
   ): (Option[Throwable], IndexedSeq[(Array[Byte], Int)]) = {
-    val (token, root, n) = submitAndWaitForBatch(
-      _backendContext,
-      fs,
-      collection.map(_._1).toArray,
-      stageIdentifier,
-      dependency,
-      f,
-    )
+    val (token, root, _) =
+      submitAndWaitForBatch(_backendContext, fs, collection.map(_._1).toArray, stageIdentifier, f)
     log.info(s"parallelizeAndComputeWithIndex: $token: reading results")
     val startTime = System.nanoTime()
     val r @ (_, results) = runAllKeepFirstError(executor) {
@@ -372,7 +365,6 @@ class ServiceBackend(
         MakeTuple.ordered(FastSeq(x)),
         optimize = true,
       )
-      val retPType = pt.asInstanceOf[PBaseStruct]
       val elementType = pt.fields(0).typ
       val off = ctx.scopedExecution((hcl, fs, htc, r) => f(hcl, fs, htc, r).apply(r))
       val codec = TypedCodecSpec(
@@ -455,7 +447,7 @@ object ServiceBackendAPI {
     assert(argv.length == 7, argv.toFastSeq)
 
     val scratchDir = argv(0)
-    val logFile = argv(1)
+    // val logFile = argv(1)
     val jarLocation = argv(2)
     val kind = argv(3)
     assert(kind == Main.DRIVER)
@@ -473,7 +465,7 @@ object ServiceBackendAPI {
     val batchClient = new BatchClient(s"$scratchDir/secrets/gsa-key/key.json")
     log.info("BatchClient allocated.")
 
-    var batchId =
+    val batchId =
       BatchConfig.fromConfigFile(s"$scratchDir/batch-config/batch-config.json").map(_.batchId)
     log.info("BatchConfig parsed.")
 
diff --git a/hail/src/main/scala/is/hail/backend/service/Worker.scala b/hail/src/main/scala/is/hail/backend/service/Worker.scala
index b4adb68e65b..ad0b2498954 100644
--- a/hail/src/main/scala/is/hail/backend/service/Worker.scala
+++ b/hail/src/main/scala/is/hail/backend/service/Worker.scala
@@ -104,8 +104,8 @@ object Worker {
       throw new IllegalArgumentException(s"expected seven arguments, not: ${argv.length}")
     }
     val scratchDir = argv(0)
-    val logFile = argv(1)
-    var jarLocation = argv(2)
+    // val logFile = argv(1)
+    // var jarLocation = argv(2)
     val kind = argv(3)
     assert(kind == Main.WORKER)
     val root = argv(4)
diff --git a/hail/src/main/scala/is/hail/experimental/package.scala b/hail/src/main/scala/is/hail/experimental/package.scala
index 9ac4ba51d71..623c7e4cb69 100644
--- a/hail/src/main/scala/is/hail/experimental/package.scala
+++ b/hail/src/main/scala/is/hail/experimental/package.scala
@@ -30,7 +30,7 @@ package object experimental {
     if (ac <= 1 || an == 0) // FAF should not be calculated on singletons
       0.0
     else {
-      var f = (af: Double) => ac.toDouble - 1 - qpois(ci, an.toDouble * af)
+      val f = (af: Double) => ac.toDouble - 1 - qpois(ci, an.toDouble * af)
       val root = uniroot(f, lower, upper, tol)
       val rounder = 1d / (precision / 100d)
       var max_af = math.round(root.getOrElse(0.0) * rounder) / rounder
diff --git a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala
index 37b584196e8..9b12157d5af 100644
--- a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala
@@ -75,7 +75,6 @@ object RelationalSpec {
 
   def read(fs: FS, path: String): RelationalSpec = {
     val jv = readMetadata(fs, path)
-    val references = readReferences(fs, path, jv)
 
     (jv \ "name").extract[String] match {
       case "TableSpec" => TableSpec.fromJValue(fs, path, jv)
diff --git a/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala b/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala
index 17941317b04..91be6146c88 100644
--- a/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala
@@ -118,8 +118,6 @@ object BinaryOp {
           case _ => incompatible(lt, rt, op)
         }
       case (TBoolean, TBoolean) =>
-        val ll = coerce[Boolean](l)
-        val rr = coerce[Boolean](r)
         op match {
           case _ => incompatible(lt, rt, op)
         }
diff --git a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala
index dd61e95f18a..146502af3ee 100644
--- a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala
@@ -286,15 +286,6 @@ object BinarySearch {
     }
   }
 
-  private def runSearchUnit(
-    cb: EmitCodeBuilder,
-    haystack: SIndexableValue,
-    compare: Comparator,
-    found: (Value[Int], Value[Int], Value[Int]) => Unit,
-    notFound: Value[Int] => Unit,
-  ): Unit =
-    runSearchBoundedUnit(cb, haystack, compare, 0, haystack.loadLength(), found, notFound)
-
   private def runSearchBounded[T: TypeInfo](
     cb: EmitCodeBuilder,
     haystack: SIndexableValue,
diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala
index 2061e685536..0d3163a1e01 100644
--- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala
@@ -13,7 +13,7 @@ import is.hail.expr.ir.streams.{EmitStream, StreamProducer, StreamUtils}
 import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer, TypedCodecSpec}
 import is.hail.io.fs.FS
 import is.hail.linalg.{BLAS, LAPACK, LinalgCodeUtils}
-import is.hail.types.{tcoerce, TypeWithRequiredness, VirtualTypeWithReq}
+import is.hail.types.{TypeWithRequiredness, VirtualTypeWithReq, tcoerce}
 import is.hail.types.physical._
 import is.hail.types.physical.stypes._
 import is.hail.types.physical.stypes.concrete._
@@ -25,8 +25,8 @@ import is.hail.variant.ReferenceGenome
 
 import scala.collection.mutable
 import scala.language.existentials
-
 import java.io._
+import scala.annotation.nowarn
 
 // class for holding all information computed ahead-of-time that we need in the emitter
 object EmitContext {
@@ -766,6 +766,7 @@ class Emit[C](val ctx: EmitContext, val cb: EmitClassBuilder[C]) {
 
     val mb: EmitMethodBuilder[C] = cb.emb.asInstanceOf[EmitMethodBuilder[C]]
 
+    @nowarn("cat=unused-locals&msg=local default argument")
     def emit(
       ir: IR,
       mb: EmitMethodBuilder[C] = mb,
@@ -2788,7 +2789,7 @@ class Emit[C](val ctx: EmitContext, val cb: EmitClassBuilder[C]) {
         }
 
       case ResultOp(idx, sig) =>
-        val AggContainer(aggs, sc, _) = container.get
+        val AggContainer(_, sc, _) = container.get
 
         val rvAgg = agg.Extract.getAgg(sig)
         rvAgg.result(cb, sc.states(idx), region)
@@ -3530,16 +3531,6 @@ class Emit[C](val ctx: EmitContext, val cb: EmitClassBuilder[C]) {
     ): IEmitCode =
       this.emitI(ir, cb, region, env, container, loopEnv)
 
-    def emitVoid(
-      ir: IR,
-      env: EmitEnv = env,
-      container: Option[AggContainer] = container,
-      loopEnv: Option[Env[LoopRef]] = loopEnv,
-    ): Code[Unit] =
-      EmitCodeBuilder.scopedVoid(mb) { cb =>
-        this.emitVoid(cb, ir, region, env, container, loopEnv)
-      }
-
     def emitStream(ir: IR, outerRegion: Value[Region], env: EmitEnv = env): EmitCode =
       EmitCode.fromI(mb)(cb =>
         EmitStream.produce(this, ir, cb, cb.emb, outerRegion, env, container)
@@ -3669,7 +3660,6 @@ class Emit[C](val ctx: EmitContext, val cb: EmitClassBuilder[C]) {
     )
 
     sort.emitWithBuilder[Boolean] { cb =>
-      val region = sort.getCodeParam[Region](1)
       val leftEC = cb.memoize(
         EmitCode.present(sort, elemSCT.loadToSValue(cb, sort.getCodeParam(2)(elemSCT.ti))),
         "sort_leftEC",
@@ -3794,8 +3784,6 @@ object NDArrayEmitter {
     rightShape: IndexedSeq[Value[Long]],
     errorID: Int,
   ): IndexedSeq[Value[Long]] = {
-    val mb = cb.emb
-
     assert(leftShape.nonEmpty)
     assert(rightShape.nonEmpty)
 
diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala
index ab84caa8fe3..78391c6e975 100644
--- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala
@@ -384,18 +384,11 @@ final class EmitClassBuilder[C](val emodb: EmitModuleBuilder, val cb: ClassBuild
       newPField(name, st),
     )
 
-  private[this] val typMap: mutable.Map[Type, Value[_ <: Type]] =
-    mutable.Map()
-
-  private[this] val pTypeMap: mutable.Map[PType, Value[_ <: PType]] = mutable.Map()
-
   private[this] type CompareMapKey = (SType, SType)
 
   private[this] val memoizedComparisons: mutable.Map[CompareMapKey, CodeOrdering] =
     mutable.Map[CompareMapKey, CodeOrdering]()
 
-  def numTypes: Int = typMap.size
-
   private[this] val decodedLiteralsField = genFieldThisRef[Array[Long]]("decoded_lits")
 
   def literalsArray(): Value[Array[Long]] = decodedLiteralsField
diff --git a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala
index 2f22c69d3ba..77280de352c 100644
--- a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala
@@ -168,7 +168,6 @@ class KeySetLattice(ctx: ExecuteContext, keyType: TStruct) extends Lattice {
     if (v.isEmpty) return top
 
     val builder = mutable.ArrayBuilder.make[Interval]()
-    var i = 0
     if (v.head.left != IntervalEndpoint(Row(), -1)) {
       builder += Interval(IntervalEndpoint(Row(), -1), v.head.left)
     }
@@ -751,24 +750,6 @@ class ExtractIntervalFilters(ctx: ExecuteContext, keyType: TStruct) {
   private def literalSizeOkay(lit: Any): Boolean = lit.asInstanceOf[Iterable[_]].size <=
     MAX_LITERAL_SIZE
 
-  private def wrapInRow(intervals: IndexedSeq[Interval]): IndexedSeq[Interval] = intervals
-    .map { interval =>
-      Interval(
-        IntervalEndpoint(Row(interval.left.point), interval.left.sign),
-        IntervalEndpoint(Row(interval.right.point), interval.right.sign),
-      )
-    }
-
-  private def intervalFromComparison(v: Any, op: ComparisonOp[_]): Interval = {
-    (op: @unchecked) match {
-      case _: EQ => Interval(endpoint(v, -1), endpoint(v, 1))
-      case GT(_, _) => Interval(negInf, endpoint(v, -1)) // value > key
-      case GTEQ(_, _) => Interval(negInf, endpoint(v, 1)) // value >= key
-      case LT(_, _) => Interval(endpoint(v, 1), posInf) // value < key
-      case LTEQ(_, _) => Interval(endpoint(v, -1), posInf) // value <= key
-    }
-  }
-
   private def posInf: IntervalEndpoint = IntervalEndpoint(Row(), 1)
 
   private def negInf: IntervalEndpoint = IntervalEndpoint(Row(), -1)
diff --git a/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala b/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala
index cff71fffeed..8adb4fe75eb 100644
--- a/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala
@@ -67,7 +67,7 @@ object GenericLines {
         private var eof = false
         private var closed = false
 
-        private var buf = new Array[Byte](64 * 1024)
+        private val buf = new Array[Byte](64 * 1024)
         private var bufOffset = 0L
         private var bufMark = 0
         private var bufPos = 0
@@ -339,7 +339,6 @@ object GenericLines {
     }
     val body: (FS, Any) => CloseableIterator[GenericLine] = { (fs: FS, context: Any) =>
       val contextRow = context.asInstanceOf[Row]
-      val index = contextRow.getAs[Int](0)
       val file = contextRow.getAs[String](1)
       val chrom = contextRow.getAs[String](2)
       val start = contextRow.getAs[Int](3)
diff --git a/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala
index 8e68ff1c5ab..07b26c4bb56 100644
--- a/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala
@@ -1035,7 +1035,6 @@ object LowerMatrixIR {
           .aggregate(makeTuple(applyAggOp(Count(), FastSeq(), FastSeq()), 'global(colsField).len))
       case MatrixAggregate(child, query) =>
         val lc = lower(ctx, child, ab)
-        val idx = Symbol(genUID())
         TableAggregate(
           lc,
           aggExplodeIR(
diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala
index e801bc70985..f27faab7cf5 100644
--- a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala
@@ -1011,7 +1011,6 @@ case class VCFPartitionWriter(
         _writeB(cb, v.toBytes(cb).loadBytes(cb))
       case v: SCallValue =>
         val ploidy = v.ploidy(cb)
-        val phased = v.isPhased(cb)
         cb.if_(ploidy.ceq(0), cb._fatal("VCF spec does not support 0-ploid calls."))
         cb.if_(ploidy.ceq(1), cb._fatal("VCF spec does not support phased haploid calls."))
         val c = v.canonicalCall(cb)
diff --git a/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala b/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala
index fdbecc51aa9..d0c31e07a69 100644
--- a/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala
@@ -21,12 +21,11 @@ class NativeReaderOptionsSerializer() extends CustomSerializer[NativeReaderOptio
           NativeReaderOptions(intervals, intervalPointType, filterIntervals)
         },
         { case opts: NativeReaderOptions =>
-          implicit val fmt = format
           val ty = TArray(TInterval(opts.intervalPointType))
-          (("name" -> opts.getClass.getSimpleName) ~
+          ("name" -> opts.getClass.getSimpleName) ~
             ("intervals" -> JSONAnnotationImpex.exportAnnotation(opts.intervals, ty)) ~
             ("intervalPointType" -> opts.intervalPointType.parsableString()) ~
-            ("filterIntervals" -> opts.filterIntervals))
+            ("filterIntervals" -> opts.filterIntervals)
         },
       )
     )
diff --git a/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala b/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala
index 93593109cc3..108834eb585 100644
--- a/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala
@@ -3,6 +3,8 @@ package is.hail.expr.ir
 import is.hail.backend.ExecuteContext
 import is.hail.utils.StackSafe._
 
+import scala.annotation.nowarn
+
 class NormalizeNames(normFunction: Int => String, allowFreeVariables: Boolean = false) {
   var count: Int = 0
 
@@ -23,6 +25,7 @@ class NormalizeNames(normFunction: Int => String, allowFreeVariables: Boolean =
   private def normalizeIR(ir: BaseIR, env: BindingEnv[String], context: Array[String] = Array())
     : StackFrame[BaseIR] = {
 
+    @nowarn("cat=unused-locals&msg=default argument")
     def normalizeBaseIR(next: BaseIR, env: BindingEnv[String] = env): StackFrame[BaseIR] =
       call(normalizeIR(next, env, context :+ ir.getClass().getName()))
 
diff --git a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala
index 9744b0730f4..67a1a2f8d04 100644
--- a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala
@@ -1109,7 +1109,6 @@ object PruneDeadFields {
         memoizeMatrixIR(ctx, child, dep, memo)
       case MatrixColsTail(child, _) => memoizeMatrixIR(ctx, child, requestedType, memo)
       case CastTableToMatrix(child, entriesFieldName, colsFieldName, _) =>
-        val m = Map(MatrixType.entriesIdentifier -> entriesFieldName)
         val childDep = child.typ.copy(
           key = requestedType.rowKey,
           globalType = unify(
@@ -1679,7 +1678,6 @@ object PruneDeadFields {
           memoizeValueIR(ctx, aggIR, requestedType.asInstanceOf[TDict].valueType, memo),
         )
       case AggArrayPerElement(a, elementName, indexName, aggBody, knownLength, isScan) =>
-        val aType = a.typ.asInstanceOf[TArray]
         val bodyEnv = memoizeValueIR(ctx, aggBody, TIterable.elementType(requestedType), memo)
         if (isScan) {
           val valueType =
@@ -1778,7 +1776,6 @@ object PruneDeadFields {
         val sType = requestedType.asInstanceOf[TStruct]
         val insFieldNames = fields.map(_._1).toSet
         val rightDep = sType.filter(f => insFieldNames.contains(f.name))._1
-        val rightDepFields = rightDep.fieldNames.toSet
         val leftDep = TStruct(
           old.typ.asInstanceOf[TStruct]
             .fields
@@ -1815,7 +1812,6 @@ object PruneDeadFields {
           }
         )
       case GetTupleElement(o, idx) =>
-        val childTupleType = o.typ.asInstanceOf[TTuple]
         val tupleDep = TTuple(FastSeq(TupleField(idx, requestedType)))
         memoizeValueIR(ctx, o, tupleDep, memo)
       case ConsoleLog(message, result) =>
diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala
index 6c0da6b3cc9..1da59d653b7 100644
--- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala
@@ -68,24 +68,6 @@ object Simplify {
   private[this] def rewriteBlockMatrixNode: BlockMatrixIR => Option[BlockMatrixIR] =
     blockMatrixRules.lift
 
-  /** Returns true if 'x' propagates missingness, meaning if any child of 'x' evaluates to missing,
-    * then 'x' will evaluate to missing.
-    */
-  private[this] def isStrict(x: IR): Boolean = {
-    x match {
-      case _: Apply |
-          _: ApplySeeded |
-          _: ApplyUnaryPrimOp |
-          _: ApplyBinaryPrimOp |
-          _: ArrayRef |
-          _: ArrayLen |
-          _: GetField |
-          _: GetTupleElement => true
-      case ApplyComparisonOp(op, _, _) => op.strict
-      case _ => false
-    }
-  }
-
   /** Returns true if any strict child of 'x' is NA. A child is strict if 'x' evaluates to missing
     * whenever the child does.
     */
@@ -484,7 +466,6 @@ object Simplify {
 
           allRefsCanBePassedThrough(Let(after.toFastSeq, body))
         } =>
-      val r = Ref(name, x.typ)
       val fieldNames = newFields.map(_._1).toArray
       val newFieldMap = newFields.toMap
       val newFieldRefs = newFieldMap.map { case (k, ir) =>
diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala
index fc0a39fb4b8..c54afe4635b 100644
--- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala
@@ -3511,7 +3511,7 @@ case class TableExplode(child: TableIR, path: IndexedSeq[String]) extends TableI
         0,
       ))
 
-    val (len, l) = Compile[AsmFunction2RegionLongInt](
+    val (_, l) = Compile[AsmFunction2RegionLongInt](
       ctx,
       FastSeq((
         "row",
@@ -3972,7 +3972,6 @@ case class TableAggregateByKey(child: TableIR, expr: IR) extends TableIR {
           var current: Long = 0
           val rowKey: WritableRegionValue = WritableRegionValue(sm, keyType, ctx.freshRegion())
           val consumerRegion: Region = ctx.region
-          val newRV = RegionValue(consumerRegion)
 
           def hasNext: Boolean = {
             if (isEnd || (current == 0 && !it.hasNext)) {
diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala
index 7f7078ee609..efc71582ee7 100644
--- a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala
@@ -161,8 +161,6 @@ class AppendOnlySetState(val kb: EmitClassBuilder[_], vt: VirtualTypeWithReq)
 
   def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit = {
     val kDec = et.buildDecoder(t.virtualType, kb)
-    val km = kb.genFieldThisRef[Boolean]("km")
-    val kv = kb.genFieldThisRef("kv")(typeToTypeInfo(t))
 
     { (cb: EmitCodeBuilder, ib: Value[InputBuffer]) =>
       init(cb)
diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala b/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala
index 84fde3a5e01..f2895416721 100644
--- a/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala
@@ -75,9 +75,6 @@ class StagedBlockLinkedList(val elemType: PType, val kb: EmitClassBuilder[_]) {
   private def next(n: Node): Code[Long] =
     Region.loadAddress(nodeType.fieldOffset(n, "next"))
 
-  private def hasNext(n: Node): Code[Boolean] =
-    next(n) cne nil
-
   private def setNext(cb: EmitCodeBuilder, n: Node, nNext: Node): Unit =
     cb += Region.storeAddress(nodeType.fieldOffset(n, "next"), nNext)
 
diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala
index 4a68c9247c5..357a84685f9 100644
--- a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala
@@ -6,7 +6,6 @@ import is.hail.backend.{ExecuteContext, HailStateManager}
 import is.hail.experimental.ExperimentalFunctions
 import is.hail.expr.ir._
 import is.hail.io.bgen.BGENFunctions
-import is.hail.types._
 import is.hail.types.physical._
 import is.hail.types.physical.stypes.{EmitType, SType, SValue}
 import is.hail.types.physical.stypes.concrete._
@@ -308,8 +307,6 @@ abstract class RegistryFunctions {
 
   def registerAll(): Unit
 
-  private val boxes = mutable.Map[String, Box[Type]]()
-
   def tv(name: String): TVariable =
     TVariable(name)
 
diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala
index f43809bb4b1..234a33d45a1 100644
--- a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala
@@ -701,7 +701,6 @@ object LocusFunctions extends RegistryFunctions {
             val iT = interval.st.asInstanceOf[SInterval]
             val srcRG = iT.pointType.asInstanceOf[SLocus].rg
             val destRG = rt.types(0).asInstanceOf[PInterval].pointType.asInstanceOf[PLocus].rg
-            val er = EmitRegion(cb.emb, r)
             val intervalObj = Code.checkcast[Interval](svalueToJavaValue(cb, r, interval))
             val lifted = cb.newLocal[(Interval, Boolean)](
               "liftover_locus_interval_lifted",
diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala
index 29d43a79b67..18adad9f0b3 100644
--- a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala
@@ -413,7 +413,7 @@ object NDArrayFunctions extends RegistryFunctions {
           SNDArrayPointerValue
         ]
         val row = cb.newLocal[Long]("rowIdx")
-        val IndexedSeq(nRows, nCols) = newBlock.shapes
+        val IndexedSeq(nRows, _) = newBlock.shapes
         cb.for_(
           cb.assign(row, 0L),
           row < nRows.get,
diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala
index c57893459e8..8330fa7c6dc 100644
--- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala
@@ -54,7 +54,6 @@ object LowerDistributedSort {
     val rowsType = resultPType.fieldType("rows").asInstanceOf[PArray]
     val rowType = rowsType.elementType.asInstanceOf[PStruct]
     val rows = rowsAndGlobal.getAs[IndexedSeq[Annotation]](0)
-    val kType = TStruct(sortFields.map(f => (f.field, rowType.virtualType.fieldType(f.field))): _*)
 
     val sortedRows = localAnnotationSort(ctx, rows, sortFields, rowType.virtualType)
 
diff --git a/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala b/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala
index 2001b4de4e3..2f8165c4e7e 100644
--- a/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala
@@ -593,7 +593,7 @@ object EmitNDArray {
                         shape.indices.map(idx => { (cb: EmitCodeBuilder, outerStep: Value[Long]) =>
                           // SlicingIndices is a map from my coordinates to my child's coordinates.
                           val whichSlicingAxis = slicingIndices(idx)
-                          val (start, stop, sliceStep) = slicingValueTriples(idx)
+                          val (_, _, sliceStep) = slicingValueTriples(idx)
                           val innerStep = cb.newLocal[Long](
                             "ndarray_producer_slice_child_step",
                             sliceStep * outerStep,
diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala
index 62a71a4ade9..af4950b160e 100644
--- a/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala
@@ -123,8 +123,6 @@ object IterableOrdering {
 
         val lhs = x.asIndexable
         val rhs = y.asIndexable
-        val gt = cb.newLocal("gt", false)
-        val eq = cb.newLocal("eq", true)
 
         loop(cb, lhs, rhs) { (lhs, rhs) =>
           val gt = elemGt(cb, lhs, rhs)
diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala
index 710584f2503..45734a73949 100644
--- a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala
@@ -149,7 +149,7 @@ object EmitStream {
       cb: EmitCodeBuilder,
       region: Value[Region] = outerRegion,
       env: EmitEnv = env,
-      container: Option[AggContainer] = container,
+      container: Option[AggContainer],
     ): Unit =
       emitter.emitVoid(cb, ir, region, env, container, None)
 
@@ -170,8 +170,7 @@ object EmitStream {
       streamIR: IR,
       elementPType: PType,
       cb: EmitCodeBuilder,
-      outerRegion: Value[Region] = outerRegion,
-      env: EmitEnv = env,
+      env: EmitEnv,
     ): IEmitCode = {
       val ecb = cb.emb.genEmitClass[NoBoxLongIterator]("stream_to_iter")
       ecb.cb.addInterface(typeInfo[MissingnessAsMethod].iname)
@@ -2917,8 +2916,6 @@ object EmitStream {
                     producers.flatMap(_.length) match {
                       case Seq() => None
                       case ls =>
-                        val len = mb.genFieldThisRef[Int]("zip_asl_len")
-                        val lenTemp = mb.genFieldThisRef[Int]("zip_asl_len_temp")
                         Some({ cb: EmitCodeBuilder =>
                           val len = cb.newLocal[Int]("zip_len", ls.head(cb))
                           ls.tail.foreach { compL =>
@@ -3370,7 +3367,6 @@ object EmitStream {
                 makeProducer,
                 eltType,
                 cb,
-                outerRegion,
                 env.bind(ctxName, cb.memoize(contextsArray.loadElement(cb, idx))),
               )
                 .get(cb, "streams in zipJoinProducers cannot be missing")
@@ -3640,7 +3636,6 @@ object EmitStream {
                 .storageType
                 .asInstanceOf[PCanonicalStruct]
 
-            val region = mb.genFieldThisRef[Region]("smm_region")
             val regionArray = mb.genFieldThisRef[Array[Region]]("smm_region_array")
 
             val staticMemManagementArray =
diff --git a/hail/src/main/scala/is/hail/io/IndexBTree.scala b/hail/src/main/scala/is/hail/io/IndexBTree.scala
index a6c2be81e13..11cf0d4fece 100644
--- a/hail/src/main/scala/is/hail/io/IndexBTree.scala
+++ b/hail/src/main/scala/is/hail/io/IndexBTree.scala
@@ -182,7 +182,7 @@ class IndexBTree(indexFileName: String, fs: FS, branchingFactor: Int = 1024) ext
   def queryIndex(query: Long): Option[Long] = {
     require(query >= 0)
 
-    val (index, result) = traverseTree(query, 0L, 1)
+    val (_, result) = traverseTree(query, 0L, 1)
 
     if (result != -1L)
       Option(result)
diff --git a/hail/src/main/scala/is/hail/io/InputBuffers.scala b/hail/src/main/scala/is/hail/io/InputBuffers.scala
index 25d3c77cc51..97ad9bfc1d3 100644
--- a/hail/src/main/scala/is/hail/io/InputBuffers.scala
+++ b/hail/src/main/scala/is/hail/io/InputBuffers.scala
@@ -185,7 +185,7 @@ final class MemoryInputBuffer(mb: MemoryBuffer) extends InputBuffer {
   def readBytes(toRegion: Region, toOff: Long, n: Int): Unit = mb.readBytes(toOff, n)
 
   def readBytesArray(n: Int): Array[Byte] = {
-    var arr = new Array[Byte](n)
+    val arr = new Array[Byte](n)
     mb.readBytesArray(arr, n)
     arr
   }
@@ -457,7 +457,7 @@ final class BlockingInputBuffer(blockSize: Int, in: InputBlockBuffer) extends In
   }
 
   def readBytesArray(n: Int): Array[Byte] = {
-    var arr = new Array[Byte](n)
+    val arr = new Array[Byte](n)
     read(arr, 0, n)
     arr
   }
diff --git a/hail/src/main/scala/is/hail/io/avro/UnsafeAvroTableReaderOptions.scala b/hail/src/main/scala/is/hail/io/avro/UnsafeAvroTableReaderOptions.scala
index adb151693d7..14a2c5b8175 100644
--- a/hail/src/main/scala/is/hail/io/avro/UnsafeAvroTableReaderOptions.scala
+++ b/hail/src/main/scala/is/hail/io/avro/UnsafeAvroTableReaderOptions.scala
@@ -29,7 +29,6 @@ class UnsafeAvroTableReaderOptionsSerializer
           UnsafeAvroTableReaderOptions(key, intervals, intervalPointType)
         },
         { case UnsafeAvroTableReaderOptions(key, intervals, intervalPointType) =>
-          implicit val fmt: Formats = format
           val ty = TArray(TInterval(intervalPointType))
           ("name" -> UnsafeAvroTableReaderOptions.getClass.getSimpleName) ~
             ("key" -> key) ~
diff --git a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala
index 7904bbf7457..0241e824747 100644
--- a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala
+++ b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala
@@ -51,8 +51,6 @@ object BgenRDDPartitions extends Logging {
     nPartitions: Option[Int],
     keyType: Type,
   ): IndexedSeq[FilePartitionInfo] = {
-    val fs = ctx.fs
-
     val fileRangeBounds = checkFilesDisjoint(ctx, files, keyType)
     val intervalOrdering = TInterval(keyType).ordering(ctx.stateManager)
 
diff --git a/hail/src/main/scala/is/hail/io/bgen/StagedBGENReader.scala b/hail/src/main/scala/is/hail/io/bgen/StagedBGENReader.scala
index bccc68a1c23..f16d9177a49 100644
--- a/hail/src/main/scala/is/hail/io/bgen/StagedBGENReader.scala
+++ b/hail/src/main/scala/is/hail/io/bgen/StagedBGENReader.scala
@@ -115,7 +115,6 @@ object StagedBGENReader {
       val nAlleles2 = cb.newLocal[Int]("nAlleles2")
       val minPloidy = cb.newLocal[Int]("minPloidy")
       val maxPloidy = cb.newLocal[Int]("maxPloidy")
-      val longPloidy = cb.newLocal[Long]("longPloidy")
       val ploidy = cb.newLocal[Int]("ploidy")
       val phase = cb.newLocal[Int]("phase")
       val nBitsPerProb = cb.newLocal[Int]("nBitsPerProb")
diff --git a/hail/src/main/scala/is/hail/io/compress/BGzipOutputStream.scala b/hail/src/main/scala/is/hail/io/compress/BGzipOutputStream.scala
index e84be3825d2..bdea7344437 100644
--- a/hail/src/main/scala/is/hail/io/compress/BGzipOutputStream.scala
+++ b/hail/src/main/scala/is/hail/io/compress/BGzipOutputStream.scala
@@ -75,7 +75,7 @@ class BGzipOutputStream(out: OutputStream) extends CompressionOutputStream(out)
     var numBytesRemaining = length
 
     while (numBytesRemaining > 0) {
-      var bytesToWrite =
+      val bytesToWrite =
         math.min(uncompressedBuffer.length - numUncompressedBytes, numBytesRemaining)
       System.arraycopy(bytes, currentPosition, uncompressedBuffer, numUncompressedBytes,
         bytesToWrite)
@@ -111,7 +111,7 @@ class BGzipOutputStream(out: OutputStream) extends CompressionOutputStream(out)
     crc32.reset()
     crc32.update(uncompressedBuffer, 0, numUncompressedBytes)
 
-    val totalBlockSize: Int = writeGzipBlock(compressedSize, numUncompressedBytes, crc32.getValue)
+    writeGzipBlock(compressedSize, numUncompressedBytes, crc32.getValue)
 
     numUncompressedBytes = 0 // reset variable
   }
diff --git a/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala
index 4c5ec74e2d1..05613fc3945 100644
--- a/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala
+++ b/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala
@@ -25,7 +25,6 @@ import java.io.{ByteArrayOutputStream, FileNotFoundException, OutputStream}
 import java.nio.file.Paths
 import java.time.Duration
 
-import org.apache.log4j.Logger
 import org.json4s.Formats
 import org.json4s.jackson.JsonMethods
 
@@ -88,10 +87,6 @@ object AzureStorageFS {
   private val AZURE_HTTPS_URI_REGEX =
     "^https:\\/\\/([a-z0-9_\\-\\.]+)\\.blob\\.core\\.windows\\.net\\/([a-z0-9_\\-\\.]+)(\\/.*)?".r
 
-  private val log = Logger.getLogger(getClass.getName)
-
-  val schemes: Array[String] = Array("hail-az", "https")
-
   def parseUrl(filename: String): AzureStorageFSURL = {
     val scheme = filename.split(":")(0)
     if (scheme == "hail-az") {
diff --git a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala
index 7f6fbb6f6b4..285cfd578c1 100644
--- a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala
+++ b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala
@@ -126,7 +126,7 @@ class HadoopFS(private[this] var conf: SerializableHadoopConfiguration) extends
     new hadoop.fs.Path(filename).getFileSystem(conf.value)
 
   def listDirectory(url: URL): Array[FileListEntry] = {
-    var statuses = url.hadoopFs.globStatus(url.hadoopPath)
+    val statuses = url.hadoopFs.globStatus(url.hadoopPath)
     if (statuses == null) {
       throw new FileNotFoundException(url.toString)
     } else {
diff --git a/hail/src/main/scala/is/hail/io/index/IndexReader.scala b/hail/src/main/scala/is/hail/io/index/IndexReader.scala
index b75fb68b6c4..4ae410403cd 100644
--- a/hail/src/main/scala/is/hail/io/index/IndexReader.scala
+++ b/hail/src/main/scala/is/hail/io/index/IndexReader.scala
@@ -172,7 +172,6 @@ class IndexReader(
     } else {
       val node = readInternalNode(offset)
       val children = node.children
-      val n = children.length
       val idx = children.upperBound(key, ordering.lt, _.firstKey)
       upperBound(key, level - 1, children(idx - 1).indexFileOffset)
     }
diff --git a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala
index 345e1901601..20a2e974240 100644
--- a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala
+++ b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala
@@ -1,6 +1,6 @@
 package is.hail.io.index
 
-import is.hail.annotations.{Annotation, Region, RegionPool, RegionValueBuilder}
+import is.hail.annotations.{Annotation, Region, RegionPool}
 import is.hail.asm4s.{HailClassLoader, _}
 import is.hail.backend.{ExecuteContext, HailStateManager, HailTaskContext}
 import is.hail.expr.ir.{
@@ -110,7 +110,6 @@ class IndexWriter(
   attributes: Map[String, Any],
 ) extends AutoCloseable {
   private val region = Region(pool = pool)
-  private val rvb = new RegionValueBuilder(sm, region)
 
   def appendRow(x: Annotation, offset: Long, annotation: Annotation): Unit = {
     val koff = keyType.unstagedStoreJavaObject(sm, x, region)
@@ -370,7 +369,7 @@ class StagedIndexWriter(
 ) {
   require(branchingFactor > 1)
 
-  private var elementIdx = cb.genFieldThisRef[Long]()
+  private val elementIdx = cb.genFieldThisRef[Long]()
   private val ob = cb.genFieldThisRef[OutputBuffer]()
   private val utils = new StagedIndexWriterUtils(cb.genFieldThisRef[IndexWriterUtils]())
 
diff --git a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala
index e36a67e89e3..9c4cd0d5f43 100644
--- a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala
+++ b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala
@@ -112,7 +112,7 @@ object LoadPlink {
     val idBuilder = new BoxedArrayBuilder[String]
     val structBuilder = new BoxedArrayBuilder[Row]
 
-    val m = fs.readLines(filename) {
+    fs.readLines(filename) {
       _.foreachLine { line =>
         val split = line.split(delimiter)
         if (split.length != 6)
@@ -181,7 +181,6 @@ object LoadPlink {
 
 object MatrixPLINKReader {
   def fromJValue(ctx: ExecuteContext, jv: JValue): MatrixPLINKReader = {
-    val backend = ctx.backend
     val fs = ctx.fs
 
     implicit val formats: Formats = DefaultFormats
diff --git a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala
index 891eb1130ad..ee1f09d56fd 100644
--- a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala
+++ b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala
@@ -98,7 +98,7 @@ class TabixReader(val filePath: String, fs: FS, idxFilePath: Option[String] = No
       fatal(s"Hail only supports tabix indexing for VCF, found format code $format")
     val colSeq = readInt(is)
     val colBeg = readInt(is)
-    val colEnd = readInt(is)
+    readInt(is) // colEnd
     val meta = readInt(is)
     // meta char for VCF is '#'
     if (meta != '#')
diff --git a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala
index 12e5e3ee04a..28d9ce62903 100644
--- a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala
+++ b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala
@@ -1570,8 +1570,8 @@ object LoadVCF {
                 val prefix = if (excerptStart > 0) "... " else ""
                 val suffix = if (excerptEnd < line.length) " ..." else ""
 
-                var caretPad = prefix.length + pos - excerptStart
-                var pad = " " * caretPad
+                val caretPad = prefix.length + pos - excerptStart
+                val pad = " " * caretPad
 
                 fatal(
                   s"${source.locationString(pos)}: ${e.msg}\n$prefix$excerpt$suffix\n$pad^\noffending line: @1\nsee the Hail log for the full offending line",
@@ -1790,8 +1790,6 @@ object MatrixVCFReader {
     }
     checkGzipOfGlobbedFiles(params.files, fileListEntries, params.forceGZ, params.gzAsBGZ)
 
-    val entryFloatType = LoadVCF.getEntryFloatType(params.entryFloatTypeName)
-
     val headerLines1 = getHeaderLines(
       fs,
       params.headerFile.getOrElse(fileListEntries.head.getPath),
@@ -1803,10 +1801,7 @@ object MatrixVCFReader {
       if (params.headerFile.isEmpty) {
         val header1Bc = backend.broadcast(header1)
 
-        val localCallFields = params.callFields
-        val localFloatType = entryFloatType
         val files = fileListEntries.map(_.getPath)
-        val localArrayElementsRequired = params.arrayElementsRequired
         val localFilterAndReplace = params.filterAndReplace
 
         val fsConfigBC = backend.broadcast(fs.getConfiguration())
diff --git a/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala b/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala
index 241720184fe..18c08349811 100644
--- a/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala
+++ b/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala
@@ -295,7 +295,7 @@ object BlockMatrix {
 
     val d = digitsNeeded(bms.length)
     val fsBc = fs.broadcast
-    val partitionCounts = collectMatrices(bms)
+    collectMatrices(bms)
       .mapPartitionsWithIndex { case (i, it) =>
         assert(it.hasNext)
         val m = it.next()
@@ -339,7 +339,7 @@ object BlockMatrix {
 
     val compressionExtension = compression.map(x => "." + x).getOrElse("")
 
-    val partitionCounts = collectMatrices(bms)
+    collectMatrices(bms)
       .mapPartitionsWithIndex { case (i, it) =>
         assert(it.hasNext)
         val m = it.next()
@@ -2375,7 +2375,7 @@ class BlockMatrixReadRowBlockedRDD(
     ) {
   import BlockMatrixReadRowBlockedRDD._
 
-  private[this] val BlockMatrixMetadata(blockSize, nRows, nCols, maybeFiltered, partFiles) =
+  private[this] val BlockMatrixMetadata(blockSize, nRows, nCols, _, partFiles) =
     metadata
 
   private[this] val gp = GridPartitioner(blockSize, nRows, nCols)
@@ -2411,7 +2411,6 @@ class BlockMatrixReadRowBlockedRDD(
     Iterator.single { ctx =>
       val region = ctx.region
       val rvb = new RegionValueBuilder(HailStateManager(Map.empty), region)
-      val rv = RegionValue(region)
       val firstRow = rowsForPartition(0)
       var blockRow = (firstRow / blockSize).toInt
       val fs = fsBc.value
@@ -2519,7 +2518,6 @@ class BlockMatrixCachedPartFile(
       )
       in.readDoubles(cache, startWritingAt, doublesToRead)
       cacheEnd = doublesToRead + startWritingAt
-      var i = 0
       fileIndex += doublesToRead
       assert(doublesToRead > 0)
     }
diff --git a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala
index 9d499791bbd..d38c9f7b7df 100644
--- a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala
+++ b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala
@@ -51,7 +51,7 @@ object LinalgCodeUtils {
       PCanonicalNDArray(pndv.st.elementType.storageType().setRequired(true), pndv.st.nDims, false)
     val strides = pt.makeColumnMajorStrides(shape, cb)
 
-    val (dataFirstElementAddress, dataFinisher) =
+    val (_, dataFinisher) =
       pt.constructDataFunction(shape, strides, cb, region)
     // construct an SNDArrayCode with undefined contents
     val result = dataFinisher(cb)
diff --git a/hail/src/main/scala/is/hail/lir/PST.scala b/hail/src/main/scala/is/hail/lir/PST.scala
index 0c9d36c8b6d..a8f6b685ff0 100644
--- a/hail/src/main/scala/is/hail/lir/PST.scala
+++ b/hail/src/main/scala/is/hail/lir/PST.scala
@@ -315,7 +315,7 @@ class PSTBuilder(
   // find regions in [start, end]
   // no edges from [0, start) target (start, end]
   private def findRegions(start: Int, end: Int): Unit = {
-    var regionStarts = new IntArrayBuilder()
+    val regionStarts = new IntArrayBuilder()
     regionStarts += start
 
     // find subregions of [start, end]
diff --git a/hail/src/main/scala/is/hail/methods/LocalLDPrune.scala b/hail/src/main/scala/is/hail/methods/LocalLDPrune.scala
index b23bc42345c..6e22f021523 100644
--- a/hail/src/main/scala/is/hail/methods/LocalLDPrune.scala
+++ b/hail/src/main/scala/is/hail/methods/LocalLDPrune.scala
@@ -11,8 +11,6 @@ import is.hail.variant._
 
 import java.util
 
-import org.apache.spark.rdd.RDD
-
 object BitPackedVector {
   final val GENOTYPES_PER_PACK: Int = 32
   final val BITS_PER_PACK: Int = 2 * GENOTYPES_PER_PACK
@@ -278,21 +276,6 @@ object LocalLDPrune {
     keepVariant
   }
 
-  private def pruneLocal(
-    inputRDD: RDD[BitPackedVector],
-    r2Threshold: Double,
-    windowSize: Int,
-    queueSize: Int,
-  ): RDD[BitPackedVector] = {
-    inputRDD.mapPartitions(
-      { it =>
-        val queue = new util.ArrayDeque[BitPackedVector](queueSize)
-        it.filter(bpvv => pruneLocal(queue, bpvv, r2Threshold, windowSize, queueSize))
-      },
-      preservesPartitioning = true,
-    )
-  }
-
   def apply(
     ctx: ExecuteContext,
     mt: MatrixValue,
@@ -337,8 +320,6 @@ case class LocalLDPrune(
 
   def execute(ctx: ExecuteContext, mv: MatrixValue): TableValue = {
     val nSamples = mv.nCols
-    val fullRowPType = mv.rvRowPType
-    val localCallField = callField
     val tableType = typ(mv.typ)
     val ts = TableExecuteIntermediate(mv.toTableValue).asTableStage(ctx).mapPartition(Some(
       tableType.key
diff --git a/hail/src/main/scala/is/hail/methods/LogisticRegression.scala b/hail/src/main/scala/is/hail/methods/LogisticRegression.scala
index 66ec3e26fae..d8b79558e98 100644
--- a/hail/src/main/scala/is/hail/methods/LogisticRegression.scala
+++ b/hail/src/main/scala/is/hail/methods/LogisticRegression.scala
@@ -40,8 +40,6 @@ case class LogisticRegression(
     val tableType = typ(mv.typ)
     val newRVDType = tableType.canonicalRVDType
 
-    val multiPhenoSchema = TStruct(("logistic_regression", TArray(logRegTest.schema)))
-
     val (yVecs, cov, completeColIdx) =
       RegressionUtils.getPhenosCovCompleteSamples(mv, yFields.toArray, covFields.toArray)
 
diff --git a/hail/src/main/scala/is/hail/methods/PoissonRegression.scala b/hail/src/main/scala/is/hail/methods/PoissonRegression.scala
index 2fbe2315447..b174616d86d 100644
--- a/hail/src/main/scala/is/hail/methods/PoissonRegression.scala
+++ b/hail/src/main/scala/is/hail/methods/PoissonRegression.scala
@@ -60,7 +60,7 @@ case class PoissonRegression(
       + s"    with input variable x, and $k additional ${plural(k, "covariate")}...")
 
     val nullModel = new PoissonRegressionModel(cov, y)
-    var nullFit = nullModel.fit(None, maxIter = maxIterations, tol = tolerance)
+    val nullFit = nullModel.fit(None, maxIter = maxIterations, tol = tolerance)
 
     if (!nullFit.converged)
       fatal("Failed to fit poisson regression null model (standard MLE with covariates only): " + (
diff --git a/hail/src/main/scala/is/hail/methods/Skat.scala b/hail/src/main/scala/is/hail/methods/Skat.scala
index 12f9b0556c7..f8c3ae4b088 100644
--- a/hail/src/main/scala/is/hail/methods/Skat.scala
+++ b/hail/src/main/scala/is/hail/methods/Skat.scala
@@ -133,8 +133,6 @@ object Skat {
       q, dof, evals, noncentrality, s, iterations, accuracy,
     )
     val x = result.value
-    val nIntegrations = result.nIterations
-    val converged = result.converged
     val fault = result.fault
     val pval = 1 - x
 
@@ -204,7 +202,7 @@ case class Skat(
           s"sample; found ${badVals.length} ${plural(badVals.length, "violation")} starting with ${badVals(0)}")
     }
 
-    val (keyGsWeightRdd, keyType) =
+    val (keyGsWeightRdd, _) =
       computeKeyGsWeightRdd(mv, xField, completeColIdx, keyField, weightField)
 
     val backend = HailContext.backend
diff --git a/hail/src/main/scala/is/hail/misc/BGZipBlocks.scala b/hail/src/main/scala/is/hail/misc/BGZipBlocks.scala
index a7ecd958e3a..e5461cd7c11 100644
--- a/hail/src/main/scala/is/hail/misc/BGZipBlocks.scala
+++ b/hail/src/main/scala/is/hail/misc/BGZipBlocks.scala
@@ -8,7 +8,7 @@ import java.io.InputStream
 object BGZipBlocks {
   // Print block starts of block gzip (bgz) file
   def apply(fs: FS, file: String): Unit = {
-    var buf = new Array[Byte](64 * 1024)
+    val buf = new Array[Byte](64 * 1024)
 
     // position of 'buf[0]' in input stream
     var bufPos = 0L
diff --git a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala
index d7abcd7331e..b1bf4ad3395 100644
--- a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala
+++ b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala
@@ -81,7 +81,6 @@ object AbstractRVDSpec {
     val (part0Count, bytesWritten) =
       using(fs.create(partsPath + "/" + filePath)) { os =>
         using(RVDContext.default(execCtx.r.pool)) { ctx =>
-          val rvb = ctx.rvb
           RichContextRDDRegionValue.writeRowsPartition(codecSpec.buildEncoder(execCtx, rowType))(
             ctx,
             rows.iterator.map { a =>
diff --git a/hail/src/main/scala/is/hail/rvd/RVD.scala b/hail/src/main/scala/is/hail/rvd/RVD.scala
index 0dd3a1c6ec1..1bacb52afe8 100644
--- a/hail/src/main/scala/is/hail/rvd/RVD.scala
+++ b/hail/src/main/scala/is/hail/rvd/RVD.scala
@@ -20,7 +20,6 @@ import scala.reflect.ClassTag
 
 import java.util
 
-import org.apache.commons.lang3.StringUtils
 import org.apache.spark.{Partitioner, SparkContext, TaskContext}
 import org.apache.spark.rdd.{RDD, ShuffledRDD}
 import org.apache.spark.sql.Row
@@ -1198,7 +1197,6 @@ object RVD {
       def _coerce(typ: RVDType, crdd: CRDD): RVD = empty(execCtx, typ)
     }
 
-    val numPartitions = keys.getNumPartitions
     val keyInfo = getKeyInfo(execCtx, fullType, partitionKey, keys)
 
     if (keyInfo.isEmpty)
@@ -1408,7 +1406,6 @@ object RVD {
       _makeIndexWriter(_, theHailClassLoaderForSparkWorkers, SparkTaskContext.get(), _)
 
     val partDigits = digitsNeeded(nPartitions)
-    val fileDigits = digitsNeeded(rvds.length)
     for (i <- 0 until nRVDs) {
       val path = paths(i)
       fs.mkDir(path + "/rows/rows/parts")
@@ -1456,7 +1453,6 @@ object RVD {
         .par
         .foreach { case (partFiles, i) =>
           val fs = fsBc.value
-          val s = StringUtils.leftPad(i.toString, fileDigits, '0')
           val basePath = paths(i)
           RichContextRDDRegionValue.writeSplitSpecs(
             fs,
diff --git a/hail/src/main/scala/is/hail/services/BatchConfig.scala b/hail/src/main/scala/is/hail/services/BatchConfig.scala
index ff2d0f753d2..661bc94e638 100644
--- a/hail/src/main/scala/is/hail/services/BatchConfig.scala
+++ b/hail/src/main/scala/is/hail/services/BatchConfig.scala
@@ -4,13 +4,10 @@ import is.hail.utils._
 
 import java.io.{File, FileInputStream}
 
-import org.apache.log4j.Logger
 import org.json4s._
 import org.json4s.jackson.JsonMethods
 
 object BatchConfig {
-  private[this] val log = Logger.getLogger("BatchConfig")
-
   def fromConfigFile(file: String): Option[BatchConfig] =
     if (new File(file).exists()) {
       using(new FileInputStream(file))(in => Some(fromConfig(JsonMethods.parse(in))))
diff --git a/hail/src/main/scala/is/hail/services/DeployConfig.scala b/hail/src/main/scala/is/hail/services/DeployConfig.scala
index b4d195ee9df..55ccdf36dfd 100644
--- a/hail/src/main/scala/is/hail/services/DeployConfig.scala
+++ b/hail/src/main/scala/is/hail/services/DeployConfig.scala
@@ -4,13 +4,10 @@ import is.hail.utils._
 
 import java.io.{File, FileInputStream}
 
-import org.apache.log4j.Logger
 import org.json4s._
 import org.json4s.jackson.JsonMethods
 
 object DeployConfig {
-  private[this] val log = Logger.getLogger("DeployConfig")
-
   private[this] lazy val default: DeployConfig = fromConfigFile()
   private[this] var _get: DeployConfig = null
 
diff --git a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala
index a8a01b8f98f..cb23ecbf852 100644
--- a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala
+++ b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala
@@ -193,7 +193,7 @@ class BatchClient(
       // at most, 5s
       val now = System.nanoTime()
       val elapsed = now - start
-      var d = math.max(
+      val d = math.max(
         math.min(
           (0.1 * (0.8 + Random.nextFloat() * 0.4) * (elapsed / 1000.0 / 1000)).toInt,
           5000,
diff --git a/hail/src/main/scala/is/hail/sparkextras/ContextRDD.scala b/hail/src/main/scala/is/hail/sparkextras/ContextRDD.scala
index 692d617dc94..fe9c4d4e4ac 100644
--- a/hail/src/main/scala/is/hail/sparkextras/ContextRDD.scala
+++ b/hail/src/main/scala/is/hail/sparkextras/ContextRDD.scala
@@ -402,9 +402,6 @@ class ContextRDD[T: ClassTag](
   def preferredLocations(partition: Partition): Seq[String] =
     rdd.preferredLocations(partition)
 
-  private[this] def clean[U <: AnyRef](value: U): U =
-    ExposedUtils.clean(value)
-
   def partitions: Array[Partition] = rdd.partitions
 
   def partitioner: Option[Partitioner] = rdd.partitioner
diff --git a/hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala b/hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala
index b1751238dc2..c9d52cf6431 100644
--- a/hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala
+++ b/hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala
@@ -175,13 +175,13 @@ class DaviesAlgorithm(
   def truncation(_u: Double, _tausq: Double): Double = {
     counter()
     var u = _u
-    var tausq = _tausq
+    val tausq = _tausq
 
     var sum1 = 0.0
     var prod2 = 0.0
     var prod3 = 0.0
     var s = 0
-    var sum2 = (sigsq + tausq) * square(u)
+    val sum2 = (sigsq + tausq) * square(u)
     var prod1 = 2.0 * sum2
     u = 2.0 * u
 
@@ -587,7 +587,7 @@ object GeneralizedChiSquaredDistribution {
     assert(lim >= 0)
     assert(acc >= 0)
 
-    val (value, trace, fault) = new DaviesAlgorithm(c, n, lb, nc, lim, sigma).cdf(acc)
+    val (value, _, fault) = new DaviesAlgorithm(c, n, lb, nc, lim, sigma).cdf(acc)
 
     assert(fault >= 0 && fault <= 2, fault)
 
diff --git a/hail/src/main/scala/is/hail/stats/RegressionUtils.scala b/hail/src/main/scala/is/hail/stats/RegressionUtils.scala
index 96cfa015f6c..81533606a65 100644
--- a/hail/src/main/scala/is/hail/stats/RegressionUtils.scala
+++ b/hail/src/main/scala/is/hail/stats/RegressionUtils.scala
@@ -57,7 +57,6 @@ object RegressionUtils {
   // IndexedSeq indexed by column, Array by field
   def getColumnVariables(mv: MatrixValue, names: Array[String])
     : IndexedSeq[Array[Option[Double]]] = {
-    val colType = mv.typ.colType
     assert(names.forall(name => mv.typ.colType.field(name).typ == TFloat64))
     val fieldIndices = names.map { name =>
       val field = mv.typ.colType.field(name)
diff --git a/hail/src/main/scala/is/hail/stats/package.scala b/hail/src/main/scala/is/hail/stats/package.scala
index 207deed677a..2b87f1d320f 100644
--- a/hail/src/main/scala/is/hail/stats/package.scala
+++ b/hail/src/main/scala/is/hail/stats/package.scala
@@ -203,7 +203,7 @@ package object stats {
     val hgd = new HypergeometricDistribution(null, popSize, numSuccessPopulation, sampleSize)
     val epsilon = 2.220446e-16
 
-    def dhyper(k: Int, logProb: Boolean = false): Double =
+    def dhyper(k: Int, logProb: Boolean): Double =
       if (logProb) hgd.logProbability(k) else hgd.probability(k)
 
     val logdc = support.map(dhyper(_, logProb = true))
@@ -214,7 +214,7 @@ package object stats {
       d.map(_ / d.sum)
     }
 
-    def phyper(k: Int, lower_tail: Boolean = true): Double =
+    def phyper(k: Int, lower_tail: Boolean): Double =
       if (lower_tail)
         hgd.cumulativeProbability(k)
       else
diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala
index 82b2ea5c08b..402d34f3c0d 100644
--- a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala
+++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala
@@ -13,7 +13,7 @@ import is.hail.utils.FastSeq
 import is.hail.variant._
 
 object PCanonicalLocus {
-  private def representation(required: Boolean = false): PCanonicalStruct = PCanonicalStruct(
+  private def representation(required: Boolean): PCanonicalStruct = PCanonicalStruct(
     required,
     "contig" -> PCanonicalString(required = true),
     "position" -> PInt32(required = true),
diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala
index 2a5810d5291..ecbd34cdc61 100644
--- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala
+++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala
@@ -17,7 +17,6 @@ object SBaseStruct {
     val rt = s2.st.virtualType.asInstanceOf[TStruct]
     val resultVType = TStruct.concat(lt, rt)
 
-    val st1 = s1.st
     val st2 = s2.st
 
     (s1, s2) match {
diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala
index 3d44aa37b38..53791e5c651 100644
--- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala
+++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala
@@ -652,7 +652,7 @@ object SNDArray {
     work: SNDArrayValue,
     blocksize: Value[Long],
   ): Unit = {
-    val Seq(m, n) = A.shapes
+    val Seq(_, n) = A.shapes
     SNDArray.geqrt(A, T, work, blocksize, cb)
     // copy upper triangle of A0 to R
     SNDArray.copyMatrix(cb, "U", A.slice(cb, (null, n), ColonIndex), R)
@@ -803,7 +803,7 @@ object SNDArray {
     T: SNDArrayValue,
     work: SNDArrayValue,
   ): Unit = {
-    val Seq(m, n) = A.shapes
+    val Seq(_, n) = A.shapes
     SNDArray.geqr(cb, A, T, work)
     // copy upper triangle of A0 to R
     SNDArray.copyMatrix(cb, "U", A.slice(cb, (null, n), ColonIndex), R)
diff --git a/hail/src/main/scala/is/hail/types/virtual/TNDArray.scala b/hail/src/main/scala/is/hail/types/virtual/TNDArray.scala
index c5f10636c8e..3743350a5b3 100644
--- a/hail/src/main/scala/is/hail/types/virtual/TNDArray.scala
+++ b/hail/src/main/scala/is/hail/types/virtual/TNDArray.scala
@@ -122,9 +122,4 @@ final case class TNDArray(elementType: Type, nDimsBase: NatBase) extends Type {
   override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = null
 
   lazy val shapeType: TTuple = TTuple(Array.fill(nDims)(TInt64): _*)
-
-  private lazy val representation = TStruct(
-    ("shape", shapeType),
-    ("data", TArray(elementType)),
-  )
 }
diff --git a/hail/src/main/scala/is/hail/types/virtual/Type.scala b/hail/src/main/scala/is/hail/types/virtual/Type.scala
index db03335ffa6..1df05838fa3 100644
--- a/hail/src/main/scala/is/hail/types/virtual/Type.scala
+++ b/hail/src/main/scala/is/hail/types/virtual/Type.scala
@@ -138,7 +138,7 @@ abstract class Type extends BaseType with Serializable {
   def query(fields: String*): Querier = query(fields.toList)
 
   def query(path: List[String]): Querier = {
-    val (t, q) = queryTyped(path)
+    val (_, q) = queryTyped(path)
     q
   }
 
diff --git a/hail/src/main/scala/is/hail/utils/ErrorHandling.scala b/hail/src/main/scala/is/hail/utils/ErrorHandling.scala
index 5718ed0f766..176df006080 100644
--- a/hail/src/main/scala/is/hail/utils/ErrorHandling.scala
+++ b/hail/src/main/scala/is/hail/utils/ErrorHandling.scala
@@ -59,7 +59,6 @@ trait ErrorHandling {
   def handleForPython(e: Throwable): (String, String, Int) = {
     val short = deepestMessage(e)
     val expanded = expandException(e, false)
-    val logExpanded = expandException(e, true)
 
     def searchForErrorCode(exception: Throwable): Int = {
       if (exception.isInstanceOf[HailException]) {
diff --git a/hail/src/main/scala/is/hail/utils/FlipbookIterator.scala b/hail/src/main/scala/is/hail/utils/FlipbookIterator.scala
index 7118dccbaa9..f42616cea09 100644
--- a/hail/src/main/scala/is/hail/utils/FlipbookIterator.scala
+++ b/hail/src/main/scala/is/hail/utils/FlipbookIterator.scala
@@ -28,7 +28,7 @@ abstract class StateMachine[A] {
 object StateMachine {
   def terminal[A]: StateMachine[A] = new StateMachine[A] {
     val isValid = false
-    var value: A = _
+    def value: A = ???
     def advance(): Unit = {}
   }
 }
diff --git a/hail/src/main/scala/is/hail/utils/StringEscapeUtils.scala b/hail/src/main/scala/is/hail/utils/StringEscapeUtils.scala
index e4f000b76c5..5c5c452d268 100644
--- a/hail/src/main/scala/is/hail/utils/StringEscapeUtils.scala
+++ b/hail/src/main/scala/is/hail/utils/StringEscapeUtils.scala
@@ -135,7 +135,6 @@ object StringEscapeUtils {
   def unescapeString(str: String, sb: StringBuilder): String = {
     sb.clear()
 
-    val sz = str.length()
     var hadSlash = false
     var inUnicode = false
     lazy val unicode = new StringBuilder(capacity = 4)
diff --git a/hail/src/main/scala/is/hail/utils/StringSocketAppender.scala b/hail/src/main/scala/is/hail/utils/StringSocketAppender.scala
index 44ad28b8232..6f05eea2f93 100644
--- a/hail/src/main/scala/is/hail/utils/StringSocketAppender.scala
+++ b/hail/src/main/scala/is/hail/utils/StringSocketAppender.scala
@@ -19,13 +19,11 @@ object StringSocketAppender {
 }
 
 class StringSocketAppender() extends AppenderSkeleton {
-  private var remoteHost: String = _
   private var address: InetAddress = _
   private var port: Int = _
   private var os: OutputStream = _
-  private var reconnectionDelay = StringSocketAppender.DEFAULT_RECONNECTION_DELAY
+  private val reconnectionDelay = StringSocketAppender.DEFAULT_RECONNECTION_DELAY
   private var connector: SocketConnector = null
-  private var counter = 0
   private var patternLayout: PatternLayout = _
   private var initialized: Boolean = false
 
@@ -34,7 +32,6 @@ class StringSocketAppender() extends AppenderSkeleton {
   def connect(host: String, port: Int, format: String): Unit = {
     this.port = port
     this.address = InetAddress.getByName(host)
-    this.remoteHost = host
     this.patternLayout = new PatternLayout(format)
     connect(address, port)
     initialized = true
diff --git a/hail/src/main/scala/is/hail/variant/HardCallView.scala b/hail/src/main/scala/is/hail/variant/HardCallView.scala
index f7b812839c4..cc5e715e847 100644
--- a/hail/src/main/scala/is/hail/variant/HardCallView.scala
+++ b/hail/src/main/scala/is/hail/variant/HardCallView.scala
@@ -22,7 +22,7 @@ final class ArrayGenotypeView(rvType: PStruct) {
     }
   }
 
-  private val (gtExists, gtIndex, gtType) = lookupField("GT", _ == PCanonicalCall())
+  private val (gtExists, gtIndex, _) = lookupField("GT", _ == PCanonicalCall())
 
   private val (gpExists, gpIndex, _gpType) = lookupField(
     "GP",
diff --git a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
index 760e7c12d10..7412b32ff9b 100644
--- a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
+++ b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
@@ -640,7 +640,6 @@ object ReferenceGenome {
     mtContigs: Array[String] = Array.empty[String],
     parInput: Array[String] = Array.empty[String],
   ): ReferenceGenome = {
-    val tmpdir = ctx.localTmpdir
     val fs = ctx.fs
 
     if (!fs.isFile(fastaFile))
diff --git a/hail/src/test/scala/is/hail/annotations/UnsafeSuite.scala b/hail/src/test/scala/is/hail/annotations/UnsafeSuite.scala
index 7ad2b893463..2730821034a 100644
--- a/hail/src/test/scala/is/hail/annotations/UnsafeSuite.scala
+++ b/hail/src/test/scala/is/hail/annotations/UnsafeSuite.scala
@@ -71,9 +71,6 @@ class UnsafeSuite extends HailSuite {
     val region2 = Region(pool = pool)
     val region3 = Region(pool = pool)
     val region4 = Region(pool = pool)
-    val rvb = new RegionValueBuilder(sm, region)
-
-    val path = ctx.createTmpPath("test-codec", "ser")
 
     val g = Type.genStruct
       .flatMap(t => Gen.zip(Gen.const(t), t.genValue(sm)))
@@ -83,7 +80,6 @@ class UnsafeSuite extends HailSuite {
       val pt = PType.canonical(t).asInstanceOf[PStruct]
 
       val requestedType = subsetType(t).asInstanceOf[TStruct]
-      val prt = PType.canonical(requestedType).asInstanceOf[PStruct]
 
       val a2 = subset(t, requestedType, a)
       assert(requestedType.typeCheck(a2))
@@ -329,8 +325,6 @@ class UnsafeSuite extends HailSuite {
   @Test def testUnsafeOrdering(): Unit = {
     val region = Region(pool = pool)
     val region2 = Region(pool = pool)
-    val rvb = new RegionValueBuilder(sm, region)
-    val rvb2 = new RegionValueBuilder(sm, region2)
 
     val g = PType.genStruct
       .flatMap(t => Gen.zip(Gen.const(t), Gen.zip(t.genValue(sm), t.genValue(sm))))
diff --git a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala
index dbfbcb32e45..ceb3bee5bf4 100644
--- a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala
+++ b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala
@@ -68,44 +68,44 @@ class ASM4SSuite extends HailSuite {
   }
 
   @Test def get(): Unit = {
-    val fb = FunctionBuilder[A, Int]("F")
-    fb.emit(fb.getArg[A](1).getField[Int]("i"))
+    val fb = FunctionBuilder[Foo, Int]("F")
+    fb.emit(fb.getArg[Foo](1).getField[Int]("i"))
     val i = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)
 
-    val a = new A
+    val a = new Foo
     assert(i(a) == 5)
   }
 
   @Test def invoke(): Unit = {
-    val fb = FunctionBuilder[A, Int]("F")
-    fb.emit(fb.getArg[A](1).invoke[Int]("f"))
+    val fb = FunctionBuilder[Foo, Int]("F")
+    fb.emit(fb.getArg[Foo](1).invoke[Int]("f"))
     val i = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)
 
-    val a = new A
+    val a = new Foo
     assert(i(a) == 6)
   }
 
   @Test def invoke2(): Unit = {
-    val fb = FunctionBuilder[A, Int]("F")
-    fb.emit(fb.getArg[A](1).invoke[Int, Int]("g", 6))
+    val fb = FunctionBuilder[Foo, Int]("F")
+    fb.emit(fb.getArg[Foo](1).invoke[Int, Int]("g", 6))
     val j = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)
 
-    val a = new A
+    val a = new Foo
     assert(j(a) == 11)
   }
 
   @Test def newInstance(): Unit = {
     val fb = FunctionBuilder[Int]("F")
-    fb.emit(Code.newInstance[A]().invoke[Int]("f"))
+    fb.emit(Code.newInstance[Foo]().invoke[Int]("f"))
     val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)
     assert(f() == 6)
   }
 
   @Test def put(): Unit = {
     val fb = FunctionBuilder[Int]("F")
-    val inst = fb.newLocal[A]()
+    val inst = fb.newLocal[Foo]()
     fb.emit(Code(
-      inst.store(Code.newInstance[A]()),
+      inst.store(Code.newInstance[Foo]()),
       inst.put("i", -2),
       inst.getField[Int]("i"),
     ))
@@ -115,11 +115,11 @@ class ASM4SSuite extends HailSuite {
 
   @Test def staticPut(): Unit = {
     val fb = FunctionBuilder[Int]("F")
-    val inst = fb.newLocal[A]()
+    val inst = fb.newLocal[Foo]()
     fb.emit(Code(
-      inst.store(Code.newInstance[A]()),
+      inst.store(Code.newInstance[Foo]()),
       inst.put("j", -2),
-      Code.getStatic[A, Int]("j"),
+      Code.getStatic[Foo, Int]("j"),
     ))
     val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)
     assert(f() == -2)
@@ -174,11 +174,11 @@ class ASM4SSuite extends HailSuite {
 
   @Test def anewarray(): Unit = {
     val fb = FunctionBuilder[Int]("F")
-    val arr = fb.newLocal[Array[A]]()
+    val arr = fb.newLocal[Array[Foo]]()
     fb.emit(Code(
-      arr.store(newArray[A](2)),
-      arr(0) = Code.newInstance[A](),
-      arr(1) = Code.newInstance[A](),
+      arr.store(newArray[Foo](2)),
+      arr(0) = Code.newInstance[Foo](),
+      arr(1) = Code.newInstance[Foo](),
       arr(0).getField[Int]("i") + arr(1).getField[Int]("i"),
     ))
     val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)
diff --git a/hail/src/test/scala/is/hail/asm4s/A.java b/hail/src/test/scala/is/hail/asm4s/Foo.java
similarity index 89%
rename from hail/src/test/scala/is/hail/asm4s/A.java
rename to hail/src/test/scala/is/hail/asm4s/Foo.java
index 91e5ea28600..dc44fbf412a 100644
--- a/hail/src/test/scala/is/hail/asm4s/A.java
+++ b/hail/src/test/scala/is/hail/asm4s/Foo.java
@@ -1,6 +1,6 @@
 package is.hail.asm4s;
 
-public class A {
+public class Foo {
     public static int j = 11;
     public int i = 5;
 
diff --git a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala
index 540210d8d4c..c657bd9a302 100644
--- a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala
@@ -708,7 +708,6 @@ class Aggregators2Suite extends HailSuite {
 
   @Test def testNestedArrayElementsAgg(): Unit = {
     val alstate1 = ArrayLenAggSig(knownLength = false, FastSeq(sumAggSig))
-    val aestate1 = AggElementsAggSig(FastSeq(sumAggSig))
     val alstate2 = ArrayLenAggSig(knownLength = false, FastSeq[PhysicalAggSig](alstate1))
 
     val init = InitOp(
@@ -899,7 +898,7 @@ class Aggregators2Suite extends HailSuite {
     val eltsPrimitive = Array.tabulate(rows.length)(i => FastSeq(GetField(ArrayRef(rref, i), "b")))
 
     val expected = Set("abcd", "foo", null)
-    val expectedPrimitive = Set(5L, -2L, 7L, null)
+    val expectedPrimitive: Set[Any] = Set(5L, -2L, 7L, null)
 
     val aggsig =
       PhysicalAggSig(CollectAsSet(), CollectAsSetStateSig(VirtualTypeWithReq(PCanonicalString())))
diff --git a/hail/src/test/scala/is/hail/expr/ir/AggregatorsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/AggregatorsSuite.scala
index 3c1f6f22991..08e7c9da26b 100644
--- a/hail/src/test/scala/is/hail/expr/ir/AggregatorsSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/AggregatorsSuite.scala
@@ -1050,7 +1050,7 @@ class AggregatorsSuite extends HailSuite {
     val agg =
       FastSeq(Row("EUR", true, 1), Row("EUR", false, 2), Row("AFR", true, 3), Row("AFR", null, 4))
     val aggType = TStruct("k1" -> TString, "k2" -> TBoolean, "x" -> TInt32)
-    val expected = Map(
+    val expected: Map[String, Map[Any, Seq[Int]]] = Map(
       "EUR" -> Map(true -> FastSeq(1), false -> FastSeq(2)),
       "AFR" -> Map(true -> FastSeq(3), (null, FastSeq(4))),
     )
diff --git a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala
index 99b7b77b478..d0abf67043a 100644
--- a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala
@@ -25,35 +25,6 @@ class EmitStreamSuite extends HailSuite {
 
   implicit val execStrats = ExecStrategy.compileOnly
 
-  private def compile1[T: TypeInfo, R: TypeInfo](f: (EmitMethodBuilder[_], Value[T]) => Code[R])
-    : T => R = {
-    val fb = EmitFunctionBuilder[T, R](ctx, "stream_test")
-    val mb = fb.apply_method
-    mb.emit(f(mb, mb.getCodeParam[T](1)))
-    val asmFn = fb.result()(theHailClassLoader)
-    asmFn.apply
-  }
-
-  private def compile2[T: TypeInfo, U: TypeInfo, R: TypeInfo](
-    f: (EmitMethodBuilder[_], Code[T], Code[U]) => Code[R]
-  ): (T, U) => R = {
-    val fb = EmitFunctionBuilder[T, U, R](ctx, "F")
-    val mb = fb.apply_method
-    mb.emit(f(mb, mb.getCodeParam[T](1), mb.getCodeParam[U](2)))
-    val asmFn = fb.result()(theHailClassLoader)
-    asmFn.apply
-  }
-
-  private def compile3[T: TypeInfo, U: TypeInfo, V: TypeInfo, R: TypeInfo](
-    f: (EmitMethodBuilder[_], Code[T], Code[U], Code[V]) => Code[R]
-  ): (T, U, V) => R = {
-    val fb = EmitFunctionBuilder[T, U, V, R](ctx, "F")
-    val mb = fb.apply_method
-    mb.emit(f(mb, mb.getCodeParam[T](1), mb.getCodeParam[U](2), mb.getCodeParam[V](3)))
-    val asmFn = fb.result()(theHailClassLoader)
-    asmFn.apply
-  }
-
   def log(str: Code[String], enabled: Boolean = false): Code[Unit] =
     if (enabled) Code._println(str) else Code._empty
 
diff --git a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala
index 61d26123181..f10e0ead8f3 100644
--- a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala
@@ -1470,7 +1470,7 @@ class IRSuite extends HailSuite {
     val t = TDict(TInt32, TString)
     assertEvalsTo(CastToArray(NA(t)), null)
 
-    val d = Map(1 -> "a", 2 -> null, (null, "c"))
+    val d: Map[Any, Any] = Map(1 -> "a", 2 -> null, (null, "c"))
     assertEvalsTo(
       CastToArray(In(0, t)),
       // wtf you can't do null -> ...
@@ -1515,7 +1515,7 @@ class IRSuite extends HailSuite {
     val t = TDict(TInt32, TString)
     assertEvalsTo(invoke("contains", TBoolean, NA(t), I32(2)), null)
 
-    val d = Map(1 -> "a", 2 -> null, (null, "c"))
+    val d: Map[Any, Any] = Map(1 -> "a", 2 -> null, (null, "c"))
     assertEvalsTo(invoke("contains", TBoolean, In(0, t), NA(TInt32)), FastSeq((d, t)), true)
     assertEvalsTo(invoke("contains", TBoolean, In(0, t), I32(2)), FastSeq((d, t)), true)
     assertEvalsTo(invoke("contains", TBoolean, In(0, t), I32(0)), FastSeq((d, t)), false)
@@ -2096,7 +2096,6 @@ class IRSuite extends HailSuite {
 
     val data = 0 until 10
     val shape = FastSeq(2L, 5L)
-    val nDim = 2
 
     val positives = makeNDArray(data.map(_.toDouble), shape, True())
     val negatives = NDArrayMap(positives, "e", ApplyUnaryPrimOp(Negate, Ref("e", TFloat64)))
@@ -3422,10 +3421,7 @@ class IRSuite extends HailSuite {
         "newChunk" -> TNDArray(TFloat64, Nat(2)),
       )),
     )
-    val mat = Ref("mat", TNDArray(TFloat64, Nat(2)))
-    val aa = Ref("aa", TArray(TArray(TInt32)))
     val sta = Ref("sta", TStream(TArray(TInt32)))
-    val da = Ref("da", TArray(TTuple(TInt32, TString)))
     val std = Ref("std", TStream(TTuple(TInt32, TString)))
     val v = Ref("v", TInt32)
     val s = Ref("s", TStruct("x" -> TInt32, "y" -> TInt64, "z" -> TFloat64))
@@ -4390,8 +4386,6 @@ class IRSuite extends HailSuite {
   }
 
   @Test def testTailLoopNDMemory(): Unit = {
-    implicit val execStrats = ExecStrategy.compileOnly
-
     val ndType = TNDArray(TInt32, Nat(2))
 
     val ndSum: IR = TailLoop(
diff --git a/hail/src/test/scala/is/hail/expr/ir/MatrixIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/MatrixIRSuite.scala
index bd7e93ef277..9ef28b85a72 100644
--- a/hail/src/test/scala/is/hail/expr/ir/MatrixIRSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/MatrixIRSuite.scala
@@ -232,7 +232,6 @@ class MatrixIRSuite extends HailSuite {
 
   @Test(dataProvider = "explodeRowsData")
   def testMatrixExplode(path: IndexedSeq[String], collection: IndexedSeq[Integer]): Unit = {
-    val tarray = TArray(TInt32)
     val range = rangeMatrix(5, 2, None)
 
     val field = path.init.foldRight(path.last -> toIRArray(collection))(_ -> IRStruct(_))
diff --git a/hail/src/test/scala/is/hail/expr/ir/MemoryLeakSuite.scala b/hail/src/test/scala/is/hail/expr/ir/MemoryLeakSuite.scala
index 9ac4874d2cd..07727bdc187 100644
--- a/hail/src/test/scala/is/hail/expr/ir/MemoryLeakSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/MemoryLeakSuite.scala
@@ -18,7 +18,7 @@ class MemoryLeakSuite extends HailSuite {
       val lit = Literal(TSet(TString), (0 until litSize).map(_.toString).toSet)
       val queries = Literal(TArray(TString), (0 until size).map(_.toString).toFastSeq)
       ExecuteContext.scoped() { ctx =>
-        val r = eval(
+        eval(
           ToArray(
             mapIR(ToStream(queries))(r => ir.invoke("contains", TBoolean, lit, r))
           ),
diff --git a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala
index 85e730489d9..57d38090233 100644
--- a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala
@@ -79,7 +79,6 @@ class OrderingSuite extends HailSuite {
     val p = Prop.forAll(compareGen) { case (t, a) =>
       pool.scopedRegion { region =>
         val pType = PType.canonical(t).asInstanceOf[PStruct]
-        val rvb = new RegionValueBuilder(sm, region)
 
         val v = pType.unstagedStoreJavaObject(sm, a, region)
 
@@ -236,7 +235,6 @@ class OrderingSuite extends HailSuite {
     val p = Prop.forAll(compareGen) { case (t, a1, a2) =>
       pool.scopedRegion { region =>
         val pType = PType.canonical(t)
-        val rvb = new RegionValueBuilder(sm, region)
 
         val v1 = pType.unstagedStoreJavaObject(sm, a1, region)
 
@@ -291,7 +289,6 @@ class OrderingSuite extends HailSuite {
     val p = Prop.forAll(compareGen) { case (t, a1, a2) =>
       pool.scopedRegion { region =>
         val pType = PType.canonical(t)
-        val rvb = new RegionValueBuilder(sm, region)
 
         val v1 = pType.unstagedStoreJavaObject(sm, a1, region)
 
@@ -480,14 +477,11 @@ class OrderingSuite extends HailSuite {
       val pArray = PCanonicalArray(pt)
 
       pool.scopedRegion { region =>
-        val rvb = new RegionValueBuilder(sm, region)
-
         val soff = pset.unstagedStoreJavaObject(sm, set, region)
 
         val eoff = pTuple.unstagedStoreJavaObject(sm, Row(elem), region)
 
         val fb = EmitFunctionBuilder[Region, Long, Long, Int](ctx, "binary_search")
-        val cregion = fb.getCodeParam[Region](1).load()
         val cset = fb.getCodeParam[Long](2)
         val cetuple = fb.getCodeParam[Long](3)
 
@@ -685,8 +679,6 @@ class OrderingSuite extends HailSuite {
   def rowDoubleOrderingData(): Array[Array[Any]] = {
     val xs =
       Array[Any](null, Double.NegativeInfinity, -0.0, 0.0, 1.0, Double.PositiveInfinity, Double.NaN)
-    val as = Array(null: IndexedSeq[Any]) ++
-      (for (x <- xs) yield FastSeq[Any](x))
     val ss = Array[Any](null, "a", "aa")
 
     val rs = for {
diff --git a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala
index 23ea06b0e90..8c923f5bc59 100644
--- a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala
@@ -69,7 +69,6 @@ object BTreeBackedSet {
     val root = fb.genFieldThisRef[Long]()
     val r = fb.genFieldThisRef[Region]()
     val ib = fb.getCodeParam[InputBuffer](2)
-    val ib2 = fb.genFieldThisRef[InputBuffer]()
 
     val km = fb.genFieldThisRef[Boolean]()
     val kv = fb.genFieldThisRef[Long]()
diff --git a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala
index 4696c798610..6c1cd4a8bde 100644
--- a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala
@@ -914,7 +914,6 @@ class TableIRSuite extends HailSuite {
   }
 
   @Test def testTableWrite(): Unit = {
-    implicit val execStrats = ExecStrategy.interpretOnly
     val table = TableRange(5, 4)
     val path = ctx.createTmpPath("test-table-write", "ht")
     Interpret[Unit](ctx, TableWrite(table, TableNativeWriter(path)))
@@ -927,7 +926,6 @@ class TableIRSuite extends HailSuite {
   }
 
   @Test def testWriteKeyDistinctness(): Unit = {
-    implicit val execStrats = ExecStrategy.interpretOnly
     val rt = TableRange(40, 4)
     val idxRef = GetField(Ref("row", rt.typ.rowType), "idx")
     val at = TableMapRows(
@@ -1581,7 +1579,7 @@ class TableIRSuite extends HailSuite {
     )
   }
 
-  @Test def testRepartitionCostEstimate: Unit = {
+  @Test def testRepartitionCostEstimate(): Unit = {
     val empty = RVDPartitioner.empty(ctx.stateManager, TStruct(Array.empty[Field]))
     val some = RVDPartitioner.unkeyed(ctx.stateManager, _)
 
diff --git a/hail/src/test/scala/is/hail/expr/ir/analyses/SemanticHashSuite.scala b/hail/src/test/scala/is/hail/expr/ir/analyses/SemanticHashSuite.scala
index 730fbc8b0fb..866cb000cfe 100644
--- a/hail/src/test/scala/is/hail/expr/ir/analyses/SemanticHashSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/analyses/SemanticHashSuite.scala
@@ -309,7 +309,7 @@ class SemanticHashSuite extends HailSuite {
     val fs =
       new FakeFS {
         override def eTag(url: FakeURL): Option[String] =
-          throw new FileNotFoundException(url.getPath())
+          throw new FileNotFoundException(url.getPath)
       }
 
     val ir =
@@ -345,7 +345,7 @@ class SemanticHashSuite extends HailSuite {
       override def glob(url: FakeURL): Array[FileListEntry] =
         Array(new FileListEntry {
           override def getPath: String = url.getPath
-          override def getActualUrl(): String = url.getPath
+          override def getActualUrl: String = url.getPath
           override def getModificationTime: lang.Long = ???
           override def getLen: Long = ???
           override def isDirectory: Boolean = ???
diff --git a/hail/src/test/scala/is/hail/expr/ir/table/TableGenSuite.scala b/hail/src/test/scala/is/hail/expr/ir/table/TableGenSuite.scala
index 2364dfae8bd..8cc3d8b6cdf 100644
--- a/hail/src/test/scala/is/hail/expr/ir/table/TableGenSuite.scala
+++ b/hail/src/test/scala/is/hail/expr/ir/table/TableGenSuite.scala
@@ -20,7 +20,7 @@ class TableGenSuite extends HailSuite {
   implicit val execStrategy = ExecStrategy.lowering
 
   @Test(groups = Array("construction", "typecheck"))
-  def testWithInvalidContextsType: Unit = {
+  def testWithInvalidContextsType(): Unit = {
     val ex = intercept[IllegalArgumentException] {
       mkTableGen(contexts = Some(Str("oh noes :'("))).typecheck()
     }
@@ -31,7 +31,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("construction", "typecheck"))
-  def testWithInvalidGlobalsType: Unit = {
+  def testWithInvalidGlobalsType(): Unit = {
     val ex = intercept[IllegalArgumentException] {
       mkTableGen(
         globals = Some(Str("oh noes :'(")),
@@ -44,7 +44,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("construction", "typecheck"))
-  def testWithInvalidBodyType: Unit = {
+  def testWithInvalidBodyType(): Unit = {
     val ex = intercept[IllegalArgumentException] {
       mkTableGen(body = Some(Str("oh noes :'("))).typecheck()
     }
@@ -54,7 +54,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("construction", "typecheck"))
-  def testWithInvalidBodyElementType: Unit = {
+  def testWithInvalidBodyElementType(): Unit = {
     val ex = intercept[IllegalArgumentException] {
       mkTableGen(body =
         Some(MakeStream(IndexedSeq(Str("oh noes :'(")), TStream(TString)))
@@ -66,7 +66,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("construction", "typecheck"))
-  def testWithInvalidPartitionerKeyType: Unit = {
+  def testWithInvalidPartitionerKeyType(): Unit = {
     val ex = intercept[IllegalArgumentException] {
       mkTableGen(partitioner =
         Some(RVDPartitioner.empty(ctx.stateManager, TStruct("does-not-exist" -> TInt32)))
@@ -76,7 +76,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("construction", "typecheck"))
-  def testWithTooLongPartitionerKeyType: Unit = {
+  def testWithTooLongPartitionerKeyType(): Unit = {
     val ex = intercept[IllegalArgumentException] {
       mkTableGen(partitioner =
         Some(RVDPartitioner.empty(ctx.stateManager, TStruct("does-not-exist" -> TInt32)))
@@ -86,7 +86,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("requiredness"))
-  def testRequiredness: Unit = {
+  def testRequiredness(): Unit = {
     val table = mkTableGen()
     val analysis = Requiredness(table, ctx)
     analysis.lookup(table).required shouldBe true
@@ -94,14 +94,14 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("lowering"))
-  def testLowering: Unit = {
+  def testLowering(): Unit = {
     val table = TestUtils.collect(mkTableGen())
     val lowered = LowerTableIR(table, DArrayLowering.All, ctx, LoweringAnalyses(table, ctx))
     assertEvalsTo(lowered, Row(FastSeq(0, 0).map(Row(_)), Row(0)))
   }
 
   @Test(groups = Array("lowering"))
-  def testNumberOfContextsMatchesPartitions: Unit = {
+  def testNumberOfContextsMatchesPartitions(): Unit = {
     val errorId = 42
     val table = TestUtils.collect(mkTableGen(
       partitioner = Some(RVDPartitioner.unkeyed(ctx.stateManager, 0)),
@@ -116,7 +116,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("lowering"))
-  def testRowsAreCorrectlyKeyed: Unit = {
+  def testRowsAreCorrectlyKeyed(): Unit = {
     val errorId = 56
     val table = TestUtils.collect(mkTableGen(
       partitioner = Some(new RVDPartitioner(
@@ -139,14 +139,14 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("optimization", "prune"))
-  def testPruneNoUnusedFields: Unit = {
+  def testPruneNoUnusedFields(): Unit = {
     val start = mkTableGen()
     val pruned = PruneDeadFields(ctx, start)
     pruned.typ shouldBe start.typ
   }
 
   @Test(groups = Array("optimization", "prune"))
-  def testPruneGlobals: Unit = {
+  def testPruneGlobals(): Unit = {
     val cname = "contexts"
     val start = mkTableGen(
       cname = Some(cname),
@@ -165,7 +165,7 @@ class TableGenSuite extends HailSuite {
   }
 
   @Test(groups = Array("optimization", "prune"))
-  def testPruneContexts: Unit = {
+  def testPruneContexts(): Unit = {
     val start = mkTableGen()
     val TableGetGlobals(pruned) = PruneDeadFields(ctx, TableGetGlobals(start))
     pruned.typ should not be start.typ
diff --git a/hail/src/test/scala/is/hail/io/IndexBTreeSuite.scala b/hail/src/test/scala/is/hail/io/IndexBTreeSuite.scala
index b69656846a9..4682309ba0b 100644
--- a/hail/src/test/scala/is/hail/io/IndexBTreeSuite.scala
+++ b/hail/src/test/scala/is/hail/io/IndexBTreeSuite.scala
@@ -34,7 +34,6 @@ class IndexBTreeSuite extends HailSuite {
     property("query gives same answer as array") =
       forAll(arraySizeGenerator) { case (depth: Int, arraySize: Int) =>
         val arrayRandomStarts = fillRandomArray(arraySize)
-        val maxLong = arrayRandomStarts.takeRight(1)(0)
         val index = ctx.createTmpPath("testBtree", "idx")
 
         fs.delete(index, true)
diff --git a/hail/src/test/scala/is/hail/io/IndexSuite.scala b/hail/src/test/scala/is/hail/io/IndexSuite.scala
index 5ba86181a2c..18e4bbee6be 100644
--- a/hail/src/test/scala/is/hail/io/IndexSuite.scala
+++ b/hail/src/test/scala/is/hail/io/IndexSuite.scala
@@ -40,8 +40,6 @@ class IndexSuite extends HailSuite {
     branchingFactor: Int,
     attributes: Map[String, Any],
   ): Unit = {
-    val bufferSpec = BufferSpec.default
-
     val iw = IndexWriter.builder(ctx, keyType, annotationType, branchingFactor, attributes)(
       file,
       theHailClassLoader,
@@ -100,7 +98,7 @@ class IndexSuite extends HailSuite {
   @Test(dataProvider = "elements")
   def writeReadGivesSameAsInput(data: Array[String]): Unit = {
     val file = ctx.createTmpPath("test", "idx")
-    val attributes = Map("foo" -> true, "bar" -> 5)
+    val attributes: Map[String, Any] = Map("foo" -> true, "bar" -> 5)
 
     val a: (Int) => Annotation = (i: Int) => Row(i % 2 == 0)
 
@@ -155,9 +153,6 @@ class IndexSuite extends HailSuite {
       )
       val index = indexReader(file, TStruct.empty)
 
-      val n = stringsWithDups.length
-      val f = { i: Int => stringsWithDups(i) }
-
       val expectedResult = Array(
         "aardvark" -> 0,
         "bear" -> 0,
@@ -191,9 +186,6 @@ class IndexSuite extends HailSuite {
       )
       val index = indexReader(file, TStruct.empty)
 
-      val n = stringsWithDups.length
-      val f = { i: Int => stringsWithDups(i) }
-
       val expectedResult = Array(
         "aardvark" -> 0,
         "bear" -> 2,
diff --git a/hail/src/test/scala/is/hail/io/fs/FSSuite.scala b/hail/src/test/scala/is/hail/io/fs/FSSuite.scala
index 506de8078fd..0901c6a2f57 100644
--- a/hail/src/test/scala/is/hail/io/fs/FSSuite.scala
+++ b/hail/src/test/scala/is/hail/io/fs/FSSuite.scala
@@ -30,7 +30,7 @@ trait FSSuite extends TestNGSuite {
 
   def pathsRelRoot(root: String, statuses: Array[FileListEntry]): Set[String] =
     statuses.map { status =>
-      var p = status.getPath
+      val p = status.getPath
       assert(p.startsWith(root), s"$p $root")
       p.drop(root.length)
     }.toSet
@@ -73,8 +73,8 @@ trait FSSuite extends TestNGSuite {
 
   @Test def testFileStatusOnDirIsFailure(): Unit = {
     val f = r("/dir")
-    TestUtils.interceptException[FileNotFoundException](r("/dir"))(
-      fs.fileStatus(r("/dir"))
+    TestUtils.interceptException[FileNotFoundException](f)(
+      fs.fileStatus(f)
     )
   }
 
@@ -213,7 +213,7 @@ trait FSSuite extends TestNGSuite {
     assert(pathsRelRoot(root, statuses) == Set(""))
   }
 
-  @Test def testFileEndingWithPeriod: Unit = {
+  @Test def testFileEndingWithPeriod(): Unit = {
     val f = fs.makeQualified(t())
     fs.touch(f + "/foo.")
     val statuses = fs.listDirectory(f)
diff --git a/hail/src/test/scala/is/hail/io/fs/FakeFS.scala b/hail/src/test/scala/is/hail/io/fs/FakeFS.scala
index 26578742e57..d91a6e57339 100644
--- a/hail/src/test/scala/is/hail/io/fs/FakeFS.scala
+++ b/hail/src/test/scala/is/hail/io/fs/FakeFS.scala
@@ -1,8 +1,8 @@
 package is.hail.io.fs
 
 case class FakeURL(path: String) extends FSURL {
-  def getPath(): String = path
-  def getActualUrl(): String = path
+  def getPath: String = path
+  def getActualUrl: String = path
 }
 
 abstract class FakeFS extends FS {
diff --git a/hail/src/test/scala/is/hail/methods/LocalLDPruneSuite.scala b/hail/src/test/scala/is/hail/methods/LocalLDPruneSuite.scala
index 232b6ad02d6..ebee4aa797d 100644
--- a/hail/src/test/scala/is/hail/methods/LocalLDPruneSuite.scala
+++ b/hail/src/test/scala/is/hail/methods/LocalLDPruneSuite.scala
@@ -157,7 +157,6 @@ class LocalLDPruneSuite extends HailSuite {
   ): Boolean = {
 
     val locallyPrunedRDD = getLocallyPrunedRDDWithGT(unprunedMatrixTable, locallyPrunedTable)
-    val nSamples = unprunedMatrixTable.nCols
 
     val r2Matrix = LocalLDPruneSuite.correlationMatrixGT(locallyPrunedRDD.map {
       case (_, _, gs) => gs
@@ -188,7 +187,6 @@ class LocalLDPruneSuite extends HailSuite {
   ): Boolean = {
 
     val locallyPrunedRDD = getLocallyPrunedRDDWithGT(unprunedMatrixTable, locallyPrunedTable)
-    val nSamples = unprunedMatrixTable.nCols
 
     val locallyUncorrelated = {
       locallyPrunedRDD.mapPartitions(
diff --git a/hail/src/test/scala/is/hail/methods/MultiArray2Suite.scala b/hail/src/test/scala/is/hail/methods/MultiArray2Suite.scala
index 8ac664a0423..0026478fb9c 100644
--- a/hail/src/test/scala/is/hail/methods/MultiArray2Suite.scala
+++ b/hail/src/test/scala/is/hail/methods/MultiArray2Suite.scala
@@ -9,7 +9,7 @@ class MultiArray2Suite extends HailSuite {
   @Test def test() = {
 
     // test multiarray of size 0 will be created
-    val ma0 = MultiArray2.fill[Int](0, 0)(0)
+    MultiArray2.fill[Int](0, 0)(0)
 
     // test multiarray of size 0 that apply nothing out
     intercept[IllegalArgumentException] {
@@ -25,12 +25,12 @@ class MultiArray2Suite extends HailSuite {
 
     // bad multiarray initiation -- negative number
     intercept[IllegalArgumentException] {
-      val a = MultiArray2.fill[Int](-5, 5)(0)
+      MultiArray2.fill[Int](-5, 5)(0)
     }
 
     // bad multiarray initiation -- negative number
     intercept[IllegalArgumentException] {
-      val a = MultiArray2.fill[Int](5, -5)(0)
+      MultiArray2.fill[Int](5, -5)(0)
     }
 
     val ma1 = MultiArray2.fill[Int](10, 3)(0)
@@ -41,7 +41,7 @@ class MultiArray2Suite extends HailSuite {
 
     // Catch exception if try to apply value that is not in indices of multiarray
     intercept[IllegalArgumentException] {
-      val foo = ma1(100, 100)
+      ma1(100, 100)
     }
 
     val ma2 = MultiArray2.fill[Int](10, 3)(0)
@@ -70,29 +70,29 @@ class MultiArray2Suite extends HailSuite {
       assert(row(idx) == ((row.i * idx, "foo")))
 
     intercept[IllegalArgumentException] {
-      val x = ma5.row(100)
+      ma5.row(100)
     }
 
     intercept[ArrayIndexOutOfBoundsException] {
       val x = ma5.row(0)
-      val y = x(100)
+      x(100)
     }
 
     intercept[IllegalArgumentException] {
-      val x = ma5.row(-5)
+      ma5.row(-5)
     }
 
     intercept[IllegalArgumentException] {
-      val x = ma5.column(100)
+      ma5.column(100)
     }
 
     intercept[IllegalArgumentException] {
-      val x = ma5.column(-5)
+      ma5.column(-5)
     }
 
     intercept[ArrayIndexOutOfBoundsException] {
       val x = ma5.column(0)
-      val y = x(100)
+      x(100)
     }
 
     // Test column slice
diff --git a/hail/src/test/scala/is/hail/stats/FisherExactTestSuite.scala b/hail/src/test/scala/is/hail/stats/FisherExactTestSuite.scala
index d4a7cef2f1a..bf5d68aa584 100644
--- a/hail/src/test/scala/is/hail/stats/FisherExactTestSuite.scala
+++ b/hail/src/test/scala/is/hail/stats/FisherExactTestSuite.scala
@@ -7,10 +7,6 @@ import org.testng.annotations.Test
 class FisherExactTestSuite extends HailSuite {
 
   @Test def testPvalue(): Unit = {
-    val N = 200
-    val K = 100
-    val k = 10
-    val n = 15
     val a = 5
     val b = 10
     val c = 95
diff --git a/hail/src/test/scala/is/hail/stats/eigSymDSuite.scala b/hail/src/test/scala/is/hail/stats/eigSymDSuite.scala
index e1e6ab7e4da..73bcb32e9e4 100644
--- a/hail/src/test/scala/is/hail/stats/eigSymDSuite.scala
+++ b/hail/src/test/scala/is/hail/stats/eigSymDSuite.scala
@@ -24,7 +24,6 @@ class eigSymDSuite extends HailSuite {
     val svdK = svd(K)
     val eigSymK = eigSym(K)
     val eigSymDK = eigSymD(K)
-    val eigSymRK = eigSymR(K)
 
     // eigSymD = svdW
     for (j <- 0 until n) {
diff --git a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala
index 6e445ecb14c..c91333fe690 100644
--- a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala
+++ b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala
@@ -371,7 +371,6 @@ class PNDArraySuite extends PhysicalTestUtils {
     val fb = EmitFunctionBuilder[Region, Region, Region, Long](ctx, "ref_count_test")
     val codeRegion1 = fb.getCodeParam[Region](1)
     val codeRegion2 = fb.getCodeParam[Region](2)
-    val codeRegion3 = fb.getCodeParam[Region](3)
 
     try {
       fb.emitWithBuilder { cb =>
@@ -386,7 +385,7 @@ class PNDArraySuite extends PhysicalTestUtils {
 
         // Region 2 gets an ndarray at ndaddress2, plus a reference to the one at ndarray 1.
         val (_, snd2Finisher) = nd.constructDataFunction(shapeSeq, shapeSeq, cb, codeRegion2)
-        val snd2 = snd2Finisher(cb)
+        snd2Finisher(cb)
         cb.assign(r2PointerToNDAddress1, nd.store(cb, codeRegion2, snd1, true))
 
         // Return the 1st ndarray
diff --git a/hail/src/test/scala/is/hail/utils/FlipbookIteratorSuite.scala b/hail/src/test/scala/is/hail/utils/FlipbookIteratorSuite.scala
index 8f463eefbaa..85a33e681db 100644
--- a/hail/src/test/scala/is/hail/utils/FlipbookIteratorSuite.scala
+++ b/hail/src/test/scala/is/hail/utils/FlipbookIteratorSuite.scala
@@ -329,7 +329,7 @@ class FlipbookIteratorSuite extends HailSuite {
       val a: Array[Box[Int]] = Array.fill(3)(default)
       var i = 0;
       while (i < ar.size) {
-        var v = ar(i)
+        val v = ar(i)
         a(v._2) = v._1
         i += 1
       }
diff --git a/hail/src/test/scala/is/hail/utils/PartitionCountsSuite.scala b/hail/src/test/scala/is/hail/utils/PartitionCountsSuite.scala
index 51b1d566041..b64466e3f07 100644
--- a/hail/src/test/scala/is/hail/utils/PartitionCountsSuite.scala
+++ b/hail/src/test/scala/is/hail/utils/PartitionCountsSuite.scala
@@ -43,7 +43,7 @@ class PartitionCountsSuite extends TestNGSuite {
   }
 
   @Test def testIncrementalPCSubset() = {
-    var pcs = Array(0L, 0L, 5L, 6L, 4L, 3L, 3L, 3L, 2L, 1L)
+    val pcs = Array(0L, 0L, 5L, 6L, 4L, 3L, 3L, 3L, 2L, 1L)
 
     def headOffset(n: Long) =
       incrementalPCSubsetOffset(n, 0 until pcs.length)(_.map(pcs))
diff --git a/hail/src/test/scala/is/hail/utils/RichDenseMatrixDoubleSuite.scala b/hail/src/test/scala/is/hail/utils/RichDenseMatrixDoubleSuite.scala
index 549025e4d64..a8d05321bf4 100644
--- a/hail/src/test/scala/is/hail/utils/RichDenseMatrixDoubleSuite.scala
+++ b/hail/src/test/scala/is/hail/utils/RichDenseMatrixDoubleSuite.scala
@@ -31,7 +31,7 @@ class RichDenseMatrixDoubleSuite extends HailSuite {
     val mT = m.t
     RichDenseMatrixDouble.exportToDoubles(fs, fileT, mT, forceRowMajor = true)
     val lmT2 = RichDenseMatrixDouble.importFromDoubles(fs, fileT, 100, 50, rowMajor = true)
-    assert(mT === mT)
+    assert(mT === lmT2)
 
     TestUtils.interceptFatal("Premature") {
       RichDenseMatrixDouble.importFromDoubles(fs, fileT, 100, 100, rowMajor = true)
diff --git a/hail/src/test/scala/is/hail/utils/TreeTraversalSuite.scala b/hail/src/test/scala/is/hail/utils/TreeTraversalSuite.scala
index 32836c3207a..0106d66c23a 100644
--- a/hail/src/test/scala/is/hail/utils/TreeTraversalSuite.scala
+++ b/hail/src/test/scala/is/hail/utils/TreeTraversalSuite.scala
@@ -8,21 +8,21 @@ class TreeTraversalSuite {
   def binaryTree(i: Int): Iterator[Int] =
     (1 to 2).map(2 * i + _).iterator.filter(_ < 7)
 
-  @Test def testPostOrder =
+  @Test def testPostOrder() =
     Assert.assertEquals(
       TreeTraversal.postOrder(binaryTree)(0).toArray,
       Array(3, 4, 1, 5, 6, 2, 0),
       "",
     )
 
-  @Test def testPreOrder =
+  @Test def testPreOrder() =
     Assert.assertEquals(
       TreeTraversal.preOrder(binaryTree)(0).toArray,
       Array(0, 1, 3, 4, 2, 5, 6),
       "",
     )
 
-  @Test def levelOrder =
+  @Test def levelOrder() =
     Assert.assertEquals(
       TreeTraversal.levelOrder(binaryTree)(0).toArray,
       (0 to 6).toArray,
diff --git a/hail/src/test/scala/is/hail/utils/UtilsSuite.scala b/hail/src/test/scala/is/hail/utils/UtilsSuite.scala
index f79493d1dd2..24a5423ed58 100644
--- a/hail/src/test/scala/is/hail/utils/UtilsSuite.scala
+++ b/hail/src/test/scala/is/hail/utils/UtilsSuite.scala
@@ -143,7 +143,6 @@ class UtilsSuite extends HailSuite {
     assert(c2.toSeq == Seq("a", "b", "c", "a_1", "a_2", "c_1", "a_3"))
     assert(diff.toSeq == Seq("a" -> "a_1", "a" -> "a_2", "c" -> "c_1", "a" -> "a_3"))
 
-    val c3 = Array("a", "b", "c", "a", "a", "c", "a")
     val (c4, diff2) = mangle(c1, "D" * _)
     assert(c4.toSeq == Seq("a", "b", "c", "aD", "aDD", "cD", "aDDD"))
     assert(diff2.toSeq == Seq("a" -> "aD", "a" -> "aDD", "c" -> "cD", "a" -> "aDDD"))
diff --git a/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala b/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala
index 5b1124aff65..1f7c361f914 100644
--- a/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala
+++ b/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala
@@ -123,11 +123,6 @@ class ReferenceGenomeSuite extends HailSuite {
     assert(rg.compare("X", "Y") < 0)
     assert(rg.compare("Y", "X") > 0)
     assert(rg.compare("Y", "MT") < 0)
-
-    // Test loci
-    val l1 = Locus("1", 25)
-    val l2 = Locus("1", 13000)
-    val l3 = Locus("2", 26)
   }
 
   @Test def testWriteToFile(): Unit = {
@@ -230,7 +225,6 @@ class ReferenceGenomeSuite extends HailSuite {
     withExecuteContext() { ctx =>
       val grch38 = ctx.getReference(ReferenceGenome.GRCh38)
       val fb = EmitFunctionBuilder[String, Boolean](ctx, "serialize_rg")
-      val cb = fb.ecb
       val rgfield = fb.getReferenceGenome(grch38.name)
       fb.emit(rgfield.invoke[String, Boolean]("isValidContig", fb.getCodeParam[String](1)))
 
@@ -248,7 +242,6 @@ class ReferenceGenomeSuite extends HailSuite {
 
       val fb =
         EmitFunctionBuilder[String, Locus, Double, (Locus, Boolean)](ctx, "serialize_with_liftover")
-      val cb = fb.ecb
       val rgfield = fb.getReferenceGenome(grch37.name)
       fb.emit(rgfield.invoke[String, Locus, Double, (Locus, Boolean)](
         "liftoverLocus",

From 5fde6f2a8cf0d520769efd6f9ea2db10a033f976 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Fri, 26 Jan 2024 15:25:14 -0500
Subject: [PATCH 10/26] [spark_backend] avoid infinite recursion when
 initialization fails (#14199)

---
 hail/python/hail/backend/py4j_backend.py | 3 +++
 hail/python/hail/utils/java.py           | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/hail/python/hail/backend/py4j_backend.py b/hail/python/hail/backend/py4j_backend.py
index 9a4794833fc..9fcb9d61579 100644
--- a/hail/python/hail/backend/py4j_backend.py
+++ b/hail/python/hail/backend/py4j_backend.py
@@ -62,6 +62,9 @@ def deco(*args, **kwargs):
             if s.startswith('java.util.NoSuchElementException'):
                 raise
 
+            if not Env.is_fully_initialized:
+                raise ValueError('Error occurred during Hail initialization.') from e
+
             tpl = Env.jutils().handleForPython(e.java_exception)
             deepest, full, error_id = tpl._1(), tpl._2(), tpl._3()
             raise fatal_error_from_java_error_triplet(deepest, full, error_id) from None
diff --git a/hail/python/hail/utils/java.py b/hail/python/hail/utils/java.py
index 45beeab70d0..fa711fd38c6 100644
--- a/hail/python/hail/utils/java.py
+++ b/hail/python/hail/utils/java.py
@@ -66,6 +66,10 @@ def hc() -> 'hail.context.HailContext':
         assert Env._hc is not None
         return Env._hc
 
+    @staticmethod
+    def is_fully_initialized() -> bool:
+        return Env._hc is not None
+
     @staticmethod
     async def _async_hc() -> 'hail.context.HailContext':
         if not Env._hc:

From feed09d221ab753e04c7659a6c895442cee06d70 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Mon, 29 Jan 2024 15:32:22 -0500
Subject: [PATCH 11/26] [rotate_keys.py] make deletion non-interactive as well
 (#14208)

Rotation has a non-interactive mode for when you are rotating keys in an
expected state. I added the same behavior for the bulk delete mode.
---
 devbin/rotate_keys.py | 57 ++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/devbin/rotate_keys.py b/devbin/rotate_keys.py
index 7ccbb1f5d86..e07e80abbc7 100644
--- a/devbin/rotate_keys.py
+++ b/devbin/rotate_keys.py
@@ -304,9 +304,10 @@ async def add_new_keys(
 
     for sa in service_accounts_under_consideration:
         sa.list_keys(sys.stdout)
-        if interactive:
-            if input('Create new key?\nOnly yes will be accepted: ') != 'yes':
-                continue
+
+        if interactive and input('Create new key?\nOnly yes will be accepted: ') != 'yes':
+            print(f'Doing nothing for this key.')
+            continue
 
         new_key, key_data = await iam_manager.create_new_key(sa)
         sa.add_new_key(new_key)
@@ -317,7 +318,11 @@ async def add_new_keys(
 
 
 async def delete_old_keys(
-    service_accounts: List[ServiceAccount], iam_manager: IAMManager, focus: Optional[RotationState] = None
+    service_accounts: List[ServiceAccount],
+    iam_manager: IAMManager,
+    *,
+    focus: Optional[RotationState] = None,
+    interactive: bool,
 ):
     async def delete_old_and_refresh(sa: ServiceAccount):
         to_delete = sa.redundant_user_keys()
@@ -333,23 +338,27 @@ async def delete_old_and_refresh(sa: ServiceAccount):
         if sa.disabled or focus is not None and rotation_state != focus:
             continue
         sa.list_keys(sys.stdout)
-        if input('Delete all but the newest key?\nOnly yes will be accepted: ') == 'yes':
-            if rotation_state == RotationState.READY_FOR_DELETE:
+
+        if interactive and input('Delete all but the newest key?\nOnly yes will be accepted: ') != 'yes':
+            print(f'Doing nothing for this key.')
+            continue
+
+        if rotation_state == RotationState.READY_FOR_DELETE:
+            await delete_old_and_refresh(sa)
+        elif rotation_state == RotationState.IN_PROGRESS:
+            warnings.warn(
+                'The most recent key was generated less than '
+                'thirty days ago. Old keys should not be deleted '
+                'as they might still be in use.',
+                stacklevel=2,
+            )
+            if input('Are you sure you want to delete old keys? ') == 'yes':
                 await delete_old_and_refresh(sa)
-            elif rotation_state == RotationState.IN_PROGRESS:
-                warnings.warn(
-                    'The most recent key was generated less than '
-                    'thirty days ago. Old keys should not be deleted '
-                    'as they might still be in use.',
-                    stacklevel=2,
-                )
-                if input('Are you sure you want to delete old keys? ') == 'yes':
-                    await delete_old_and_refresh(sa)
-            else:
-                warnings.warn(
-                    f'Cannot delete keys in rotation state: {rotation_state}',
-                    stacklevel=2,
-                )
+        else:
+            warnings.warn(
+                f'Cannot delete keys in rotation state: {rotation_state}',
+                stacklevel=2,
+            )
 
 
 async def main():
@@ -421,11 +430,13 @@ async def main():
         if action == 'interactive-update':
             await add_new_keys(service_accounts, iam_manager, k8s_manager, interactive=True)
         elif action == 'delete':
-            await delete_old_keys(service_accounts, iam_manager)
+            await delete_old_keys(service_accounts, iam_manager, interactive=True)
         elif action == 'delete-ready-only':
-            await delete_old_keys(service_accounts, iam_manager, focus=RotationState.READY_FOR_DELETE)
+            await delete_old_keys(
+                service_accounts, iam_manager, focus=RotationState.READY_FOR_DELETE, interactive=False
+            )
         elif action == 'delete-in-progress-only':
-            await delete_old_keys(service_accounts, iam_manager, focus=RotationState.IN_PROGRESS)
+            await delete_old_keys(service_accounts, iam_manager, focus=RotationState.IN_PROGRESS, interactive=False)
         else:
             print('Doing nothing')
     finally:

From 4324736acb18256336ff9b7599b9a0751ecfa383 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Mon, 29 Jan 2024 16:10:14 -0500
Subject: [PATCH 12/26] [hailtop] allow configuration of default HTTP timeout
 (#14206)

Until we have a mechanism to infer the correct timeout based on network
conditions, this provides an escape hatch for users on flaky network
connections such as wifi.
---
 hail/python/hailtop/config/variables.py       |  1 +
 .../hailctl/config/config_variables.py        | 12 ++++++++++++
 hail/python/hailtop/httpx.py                  | 19 ++++++++++++++-----
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/hail/python/hailtop/config/variables.py b/hail/python/hailtop/config/variables.py
index cfd82a3d774..036bae78d26 100644
--- a/hail/python/hailtop/config/variables.py
+++ b/hail/python/hailtop/config/variables.py
@@ -19,3 +19,4 @@ class ConfigVariable(str, Enum):
     QUERY_BATCH_WORKER_MEMORY = 'query/batch_worker_memory'
     QUERY_NAME_PREFIX = 'query/name_prefix'
     QUERY_DISABLE_PROGRESS_BAR = 'query/disable_progress_bar'
+    HTTP_TIMEOUT_IN_SECONDS = 'http/timeout_in_seconds'
diff --git a/hail/python/hailtop/hailctl/config/config_variables.py b/hail/python/hailtop/hailctl/config/config_variables.py
index 72781907dab..50d555ef953 100644
--- a/hail/python/hailtop/hailctl/config/config_variables.py
+++ b/hail/python/hailtop/hailctl/config/config_variables.py
@@ -9,6 +9,14 @@
 ConfigVariableInfo = namedtuple('ConfigVariableInfo', ['help_msg', 'validation'])
 
 
+def _is_float_str(x: str) -> bool:
+    try:
+        float(x)
+        return True
+    except ValueError:
+        return False
+
+
 def config_variables():
     from hailtop.batch_client.parse import CPU_REGEXPAT, MEMORY_REGEXPAT  # pylint: disable=import-outside-toplevel
     from hailtop.aiotools.router_fs import RouterAsyncFS  # pylint: disable=import-outside-toplevel
@@ -124,6 +132,10 @@ def config_variables():
                 help_msg='Disable the progress bar with a value of 1. Enable the progress bar with a value of 0',
                 validation=(lambda x: x in ('0', '1'), 'should be a value of 0 or 1'),
             ),
+            ConfigVariable.HTTP_TIMEOUT_IN_SECONDS: ConfigVariableInfo(
+                help_msg='The default timeout for HTTP requests in seconds.',
+                validation=(_is_float_str, 'should be a float or an int like 42.42 or 42'),
+            ),
         }
 
     return _config_variables
diff --git a/hail/python/hailtop/httpx.py b/hail/python/hailtop/httpx.py
index a40a6b5cf8b..17605dcd4c4 100644
--- a/hail/python/hailtop/httpx.py
+++ b/hail/python/hailtop/httpx.py
@@ -8,6 +8,7 @@
 
 from .tls import internal_client_ssl_context, external_client_ssl_context
 from .config.deploy_config import get_deploy_config
+from .config import ConfigVariable, configuration_of
 
 
 class ClientResponseError(aiohttp.ClientResponseError):
@@ -101,15 +102,23 @@ def __init__(
 
         assert 'connector' not in kwargs
 
-        if timeout is None:
-            timeout = aiohttp.ClientTimeout(total=5)
-        if isinstance(timeout, (float, int)):
-            timeout = aiohttp.ClientTimeout(total=timeout)
+        configuration_of_timeout = configuration_of(ConfigVariable.HTTP_TIMEOUT_IN_SECONDS, timeout, 5)
+        del timeout
+
+        if isinstance(configuration_of_timeout, str):
+            configuration_of_timeout = float(configuration_of_timeout)
+        if isinstance(configuration_of_timeout, (float, int)):
+            configuration_of_timeout = aiohttp.ClientTimeout(total=configuration_of_timeout)
+        assert isinstance(configuration_of_timeout, aiohttp.ClientTimeout)
 
         self.loop = asyncio.get_running_loop()
         self.raise_for_status = raise_for_status
         self.client_session = aiohttp.ClientSession(
-            *args, timeout=timeout, raise_for_status=False, connector=aiohttp.TCPConnector(ssl=tls), **kwargs
+            *args,
+            timeout=configuration_of_timeout,
+            raise_for_status=False,
+            connector=aiohttp.TCPConnector(ssl=tls),
+            **kwargs,
         )
 
     def request(

From f56f579cf5dd4da1621395d8b5fc5c262cba18ae Mon Sep 17 00:00:00 2001
From: Christopher Vittal <cvittal@broadinstitute.org>
Date: Mon, 29 Jan 2024 15:42:59 -0600
Subject: [PATCH 13/26] [vds/combiner] Better calculation of
 ref_block_max_length (#14178)

Use the patch in place function of `store_ref_block_max_length` to
compute `ref_block_max_length` rather than computing it on a zip join
pipeline, causing that zip join pipeline to be executed twice.
---
 .../hail/vds/combiner/variant_dataset_combiner.py  | 14 +++-----------
 hail/python/hail/vds/variant_dataset.py            |  2 +-
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/hail/python/hail/vds/combiner/variant_dataset_combiner.py b/hail/python/hail/vds/combiner/variant_dataset_combiner.py
index 8eab7aa765e..27dab5fb9ed 100644
--- a/hail/python/hail/vds/combiner/variant_dataset_combiner.py
+++ b/hail/python/hail/vds/combiner/variant_dataset_combiner.py
@@ -428,19 +428,11 @@ def step(self):
             self._job_id += 1
 
     def _write_final(self, vds):
-        fd = VariantDataset.ref_block_max_length_field
+        vds.write(self._output_path)
 
-        if fd not in vds.reference_data.globals:
+        if VariantDataset.ref_block_max_length_field not in vds.reference_data.globals:
             info("VDS combiner: computing reference block max length...")
-            max_len = vds.reference_data.aggregate_entries(
-                hl.agg.max(vds.reference_data.END + 1 - vds.reference_data.locus.position)
-            )
-            info(f"VDS combiner: max reference block length is {max_len}")
-            vds = VariantDataset(
-                reference_data=vds.reference_data.annotate_globals(**{fd: max_len}), variant_data=vds.variant_data
-            )
-
-        vds.write(self._output_path)
+            hl.vds.store_ref_block_max_length(self._output_path)
 
     def _step_vdses(self):
         current_bin = original_bin = min(self._vdses)
diff --git a/hail/python/hail/vds/variant_dataset.py b/hail/python/hail/vds/variant_dataset.py
index 1a02f68026c..28d2c2b8be7 100644
--- a/hail/python/hail/vds/variant_dataset.py
+++ b/hail/python/hail/vds/variant_dataset.py
@@ -83,7 +83,7 @@ def store_ref_block_max_length(vds_path):
     ----------
     vds_path : :obj:`str`
     """
-    vds = hl.vds.read_vds(vds_path)
+    vds = read_vds(vds_path, _warn_no_ref_block_max_length=False)
 
     if VariantDataset.ref_block_max_length_field in vds.reference_data.globals:
         warning(f"VDS at {vds_path} already contains a global annotation with the max reference block length")

From 497f8a90d83d6223fccc3cef91b0c0473a38137f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 31 Jan 2024 10:04:42 -0500
Subject: [PATCH 14/26] Bump jupyterlab from 4.0.9 to 4.0.12 in
 /hail/python/dev (#14218)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [jupyterlab](https://github.com/jupyterlab/jupyterlab) from 4.0.9
to 4.0.12.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/jupyterlab/jupyterlab/releases">jupyterlab's
releases</a>.</em></p>
<blockquote>
<h2>v4.0.12</h2>
<h2>4.0.12</h2>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/compare/v4.0.11...69079ec413cbe6d173f0a667c15802b76423ece5">Full
Changelog</a>)</p>
<h3>Bugs fixed</h3>
<ul>
<li>Fix jupyterlab downgrade issue on extension installation <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15650">#15650</a>
(<a
href="https://github.com/Sarthug99"><code>@​Sarthug99</code></a>)</li>
<li>Fix search highlights removal on clearing input box <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15690">#15690</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
<li>Add scroll margin to headings for better alignment <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15703">#15703</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
<li>Fix shortcut UI failing on filtering when empty command is given <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15695">#15695</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
<li>Fix connection loop issue with standalone foreign document in LSP <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15262">#15262</a>
(<a
href="https://github.com/trungleduc"><code>@​trungleduc</code></a>)</li>
<li>Fix outputarea package from not detecting updates <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15642">#15642</a>
(<a href="https://github.com/MFA-X-AI"><code>@​MFA-X-AI</code></a>)</li>
</ul>
<h3>Maintenance and upkeep improvements</h3>
<ul>
<li>Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15524">#15524</a>:
Fix visual tests <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15578">#15578</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
</ul>
<h3>Documentation improvements</h3>
<ul>
<li>Remove Python 3.0, Notebook 5 mentions from contributor docs <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15710">#15710</a>
(<a
href="https://github.com/JasonWeill"><code>@​JasonWeill</code></a>)</li>
</ul>
<h3>Contributors to this release</h3>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/graphs/contributors?from=2024-01-19&amp;to=2024-01-30&amp;type=c">GitHub
contributors page for this release</a>)</p>
<p><a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3AFoSuCloud+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​FoSuCloud</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Agithub-actions+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​github-actions</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Aj264415+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​j264415</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3AJasonWeill+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​JasonWeill</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Ajupyterlab-bot+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​jupyterlab-bot</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Ajupyterlab-probot+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​jupyterlab-probot</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Akrassowski+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​krassowski</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Alumberbot-app+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​lumberbot-app</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Ameeseeksmachine+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​meeseeksmachine</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Awelcome+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​welcome</code></a></p>
<h2>v4.0.11</h2>
<h2>4.0.11</h2>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/compare/v4.0.10...0708330843fd087134a239d2ad6005b1d543e246">Full
Changelog</a>)</p>
<h3>Security fixes</h3>
<ul>
<li>Potential authentication and CSRF tokens leak in JupyterLab (<a
href="https://github.com/jupyterlab/jupyterlab/security/advisories/GHSA-44cc-43rp-5947">GHSA-44cc-43rp-5947</a>)</li>
<li>SXSS in Markdown Preview (<a
href="https://github.com/jupyterlab/jupyterlab/security/advisories/GHSA-4m77-cmpx-vjc4">GHSA-4m77-cmpx-vjc4</a>)</li>
</ul>
<h3>Bugs fixed</h3>
<ul>
<li>Fixes focus indicator on input checkbox for Firefox <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15612">#15612</a>
(<a
href="https://github.com/alden-ilao"><code>@​alden-ilao</code></a>)</li>
</ul>
<h3>Documentation improvements</h3>
<ul>
<li>Fix link to yarn docs in extension migration guide <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15640">#15640</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
</ul>
<h3>Contributors to this release</h3>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/graphs/contributors?from=2023-12-29&amp;to=2024-01-19&amp;type=c">GitHub
contributors page for this release</a>)</p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/jupyterlab/jupyterlab/blob/@jupyterlab/lsp@4.0.12/CHANGELOG.md">jupyterlab's
changelog</a>.</em></p>
<blockquote>
<h2>4.0.12</h2>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/compare/v4.0.11...69079ec413cbe6d173f0a667c15802b76423ece5">Full
Changelog</a>)</p>
<h3>Bugs fixed</h3>
<ul>
<li>Fix jupyterlab downgrade issue on extension installation <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15650">#15650</a>
(<a
href="https://github.com/Sarthug99"><code>@​Sarthug99</code></a>)</li>
<li>Fix search highlights removal on clearing input box <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15690">#15690</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
<li>Add scroll margin to headings for better alignment <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15703">#15703</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
<li>Fix shortcut UI failing on filtering when empty command is given <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15695">#15695</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
<li>Fix connection loop issue with standalone foreign document in LSP <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15262">#15262</a>
(<a
href="https://github.com/trungleduc"><code>@​trungleduc</code></a>)</li>
<li>Fix outputarea package from not detecting updates <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15642">#15642</a>
(<a href="https://github.com/MFA-X-AI"><code>@​MFA-X-AI</code></a>)</li>
</ul>
<h3>Maintenance and upkeep improvements</h3>
<ul>
<li>Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15524">#15524</a>:
Fix visual tests <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15578">#15578</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
</ul>
<h3>Documentation improvements</h3>
<ul>
<li>Remove Python 3.0, Notebook 5 mentions from contributor docs <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15710">#15710</a>
(<a
href="https://github.com/JasonWeill"><code>@​JasonWeill</code></a>)</li>
</ul>
<h3>Contributors to this release</h3>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/graphs/contributors?from=2024-01-19&amp;to=2024-01-30&amp;type=c">GitHub
contributors page for this release</a>)</p>
<p><a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3AFoSuCloud+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​FoSuCloud</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Agithub-actions+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​github-actions</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Aj264415+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​j264415</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3AJasonWeill+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​JasonWeill</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Ajupyterlab-bot+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​jupyterlab-bot</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Ajupyterlab-probot+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​jupyterlab-probot</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Akrassowski+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​krassowski</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Alumberbot-app+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​lumberbot-app</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Ameeseeksmachine+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​meeseeksmachine</code></a>
| <a
href="https://github.com/search?q=repo%3Ajupyterlab%2Fjupyterlab+involves%3Awelcome+updated%3A2024-01-19..2024-01-30&amp;type=Issues"><code>@​welcome</code></a></p>
<!-- raw HTML omitted -->
<h2>4.0.11</h2>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/compare/v4.0.10...0708330843fd087134a239d2ad6005b1d543e246">Full
Changelog</a>)</p>
<h3>Security fixes</h3>
<ul>
<li>Potential authentication and CSRF tokens leak in JupyterLab (<a
href="https://github.com/jupyterlab/jupyterlab/security/advisories/GHSA-44cc-43rp-5947">GHSA-44cc-43rp-5947</a>)</li>
<li>SXSS in Markdown Preview (<a
href="https://github.com/jupyterlab/jupyterlab/security/advisories/GHSA-4m77-cmpx-vjc4">GHSA-4m77-cmpx-vjc4</a>)</li>
</ul>
<h3>Bugs fixed</h3>
<ul>
<li>Fixes focus indicator on input checkbox for Firefox <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15612">#15612</a>
(<a
href="https://github.com/alden-ilao"><code>@​alden-ilao</code></a>)</li>
</ul>
<h3>Documentation improvements</h3>
<ul>
<li>Fix link to yarn docs in extension migration guide <a
href="https://redirect.github.com/jupyterlab/jupyterlab/pull/15640">#15640</a>
(<a
href="https://github.com/krassowski"><code>@​krassowski</code></a>)</li>
</ul>
<h3>Contributors to this release</h3>
<p>(<a
href="https://github.com/jupyterlab/jupyterlab/graphs/contributors?from=2023-12-29&amp;to=2024-01-19&amp;type=c">GitHub
contributors page for this release</a>)</p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/e7a1af706875c8ab183101ff68b38e836181028c"><code>e7a1af7</code></a>
[ci skip] Publish 4.0.12</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/69079ec413cbe6d173f0a667c15802b76423ece5"><code>69079ec</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15710">#15710</a>:
Removes Python 3.0, Notebook 5 mentions from contributor ...</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/353707e4d0d3fe788a4a2a5b1f228352f1918806"><code>353707e</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15524">#15524</a>:
Fix visual tests (<a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15578">#15578</a>)</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/482aaa0054a2de18a257d7b5520234eb44648a9f"><code>482aaa0</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15650">#15650</a>:
Fix jupyterlab downgrade issue on extension installation ...</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/58fb4a9b630cd22d7b064b3c8fef503b11afe077"><code>58fb4a9</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15690">#15690</a>:
Fix search highlights removal on clearing input box (<a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15712">#15712</a>)</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/5062929f60ac770c7048350260636d3f63ea1c8d"><code>5062929</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15703">#15703</a>
on branch 4.0.x (Add scroll margin to headings for better ...</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/c00b0ca0cc631992c7681473d88208fcc74de00e"><code>c00b0ca</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15642">#15642</a>:
Fix outputarea package from not detecting updates (<a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15652">#15652</a>)</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/8326236cf42c43ee1ed9657c09d7c4cbad5973f1"><code>8326236</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15262">#15262</a>
on branch 4.0.x (Fix connection loop issue with standalone...</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/8a5acf284b1e6defc6b15f22ec11c3cae67eaba1"><code>8a5acf2</code></a>
Backport PR <a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15695">#15695</a>:
Fix shortcut UI failing on filtering when empty command i...</li>
<li><a
href="https://github.com/jupyterlab/jupyterlab/commit/9d4a3611bae6868f7de7584f8c0b16b8a7535e70"><code>9d4a361</code></a>
Automated Changelog Entry - Remove 1 placeholder entries. (<a
href="https://redirect.github.com/jupyterlab/jupyterlab/issues/15667">#15667</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/jupyterlab/jupyterlab/compare/@jupyterlab/lsp@4.0.9...@jupyterlab/lsp@4.0.12">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=jupyterlab&package-manager=pip&previous-version=4.0.9&new-version=4.0.12)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 hail/python/dev/pinned-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hail/python/dev/pinned-requirements.txt b/hail/python/dev/pinned-requirements.txt
index 5a44b10439e..a7a0385d54d 100644
--- a/hail/python/dev/pinned-requirements.txt
+++ b/hail/python/dev/pinned-requirements.txt
@@ -220,7 +220,7 @@ jupyter-server==2.12.1
     #   notebook-shim
 jupyter-server-terminals==0.5.0
     # via jupyter-server
-jupyterlab==4.0.9
+jupyterlab==4.0.12
     # via notebook
 jupyterlab-pygments==0.3.0
     # via nbconvert

From ef5c352343688b4a2a3ec78e29b28ec0f35599f6 Mon Sep 17 00:00:00 2001
From: Christopher Vittal <cvittal@broadinstitute.org>
Date: Wed, 31 Jan 2024 17:56:40 -0600
Subject: [PATCH 15/26] [vds/combiner] Add sanity check on uniqueness of gvcf
 paths/sample names (#14207)

---
 .../hail/vds/combiner/variant_dataset_combiner.py   | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/hail/python/hail/vds/combiner/variant_dataset_combiner.py b/hail/python/hail/vds/combiner/variant_dataset_combiner.py
index 27dab5fb9ed..4d476031944 100644
--- a/hail/python/hail/vds/combiner/variant_dataset_combiner.py
+++ b/hail/python/hail/vds/combiner/variant_dataset_combiner.py
@@ -669,6 +669,19 @@ def new_combiner(
         raise ValueError("at least one  of 'gvcf_paths' or 'vds_paths' must be nonempty")
     if gvcf_paths is None:
         gvcf_paths = []
+    if len(gvcf_paths) > 0:
+        if len(set(gvcf_paths)) != len(gvcf_paths):
+            duplicates = [gvcf for gvcf, count in collections.Counter(gvcf_paths).items() if count > 1]
+            duplicates = '\n    '.join(duplicates)
+            raise ValueError(f'gvcf paths should be unique, the following paths are repeated:{duplicates}')
+        if gvcf_sample_names is not None and len(set(gvcf_sample_names)) != len(gvcf_sample_names):
+            duplicates = [gvcf for gvcf, count in collections.Counter(gvcf_sample_names).items() if count > 1]
+            duplicates = '\n    '.join(duplicates)
+            raise ValueError(
+                "provided sample names ('gvcf_sample_names') should be unique, "
+                f'the following names are repeated:{duplicates}'
+            )
+
     if vds_paths is None:
         vds_paths = []
     if vds_sample_counts is not None and len(vds_paths) != len(vds_sample_counts):

From 534037881530248fc025f5381964a26405338c69 Mon Sep 17 00:00:00 2001
From: Daniel Goldstein <danielgold95@gmail.com>
Date: Wed, 31 Jan 2024 19:39:33 -0500
Subject: [PATCH 16/26] [batch] Add metadata server to batch jobs in GCP
 (#14019)

Implements a basic GCP metadata server for user jobs as described in
https://github.com/hail-is/hail-rfcs/pull/12. It implements only so much
as is needed for `hail` and `gcloud` to get access tokens for hail GSAs
so they can then make API calls to GCS or Hail Batch. With this in place
user jobs should no longer require GSA key files, but removing them is
future work and requires a well-communicated deprecation and removal
process.
---
 batch/batch/cloud/azure/worker/worker_api.py  |   4 +
 .../batch/cloud/gcp/driver/create_instance.py |   6 +-
 .../batch/cloud/gcp/worker/metadata_server.py | 109 ++++++++++++++++++
 batch/batch/cloud/gcp/worker/worker_api.py    |  35 +++---
 batch/batch/globals.py                        |   2 +-
 batch/batch/worker/worker.py                  |  20 ++++
 batch/batch/worker/worker_api.py              |   6 +
 batch/test/test_batch.py                      |  51 ++++++--
 build.yaml                                    |   2 +-
 ci/Dockerfile.ci-utils                        |   4 +-
 .../hailtop/aiocloud/aiogoogle/__init__.py    |  10 +-
 .../aiocloud/aiogoogle/client/__init__.py     |   2 +
 .../client/metadata_server_client.py          |  30 +++++
 .../hailtop/aiocloud/aiogoogle/credentials.py |  14 ++-
 14 files changed, 261 insertions(+), 34 deletions(-)
 create mode 100644 batch/batch/cloud/gcp/worker/metadata_server.py
 create mode 100644 hail/python/hailtop/aiocloud/aiogoogle/client/metadata_server_client.py

diff --git a/batch/batch/cloud/azure/worker/worker_api.py b/batch/batch/cloud/azure/worker/worker_api.py
index c9e3f8f7cfe..779bc13fc8d 100644
--- a/batch/batch/cloud/azure/worker/worker_api.py
+++ b/batch/batch/cloud/azure/worker/worker_api.py
@@ -6,6 +6,7 @@
 
 import aiohttp
 import orjson
+from aiohttp import web
 
 from hailtop import httpx
 from hailtop.aiocloud import aioazure
@@ -60,6 +61,9 @@ async def user_container_registry_credentials(self, credentials: Dict[str, str])
         credentials = orjson.loads(base64.b64decode(credentials['key.json']).decode())
         return {'username': credentials['appId'], 'password': credentials['password']}
 
+    def create_metadata_server_app(self, credentials: Dict[str, str]) -> web.Application:
+        raise NotImplementedError
+
     def instance_config_from_config_dict(self, config_dict: Dict[str, str]) -> AzureSlimInstanceConfig:
         return AzureSlimInstanceConfig.from_dict(config_dict)
 
diff --git a/batch/batch/cloud/gcp/driver/create_instance.py b/batch/batch/cloud/gcp/driver/create_instance.py
index 57d3000b286..9fc7aaaaa59 100644
--- a/batch/batch/cloud/gcp/driver/create_instance.py
+++ b/batch/batch/cloud/gcp/driver/create_instance.py
@@ -266,9 +266,9 @@ def scheduling() -> dict:
 iptables --table nat --append POSTROUTING --source 172.20.0.0/15 --jump MASQUERADE
 
 # [public]
-# Block public traffic to the metadata server
-iptables --append FORWARD --source 172.21.0.0/16 --destination 169.254.169.254 --jump DROP
-# But allow the internal gateway
+# Send public jobs' metadata server requests to the batch worker itself
+iptables --table nat --append PREROUTING --source 172.21.0.0/16 --destination 169.254.169.254 -p tcp -j REDIRECT --to-ports 5555
+# Allow the internal gateway
 iptables --append FORWARD --destination $INTERNAL_GATEWAY_IP --jump ACCEPT
 # And this worker
 iptables --append FORWARD --destination $IP_ADDRESS --jump ACCEPT
diff --git a/batch/batch/cloud/gcp/worker/metadata_server.py b/batch/batch/cloud/gcp/worker/metadata_server.py
new file mode 100644
index 00000000000..5475c9982a9
--- /dev/null
+++ b/batch/batch/cloud/gcp/worker/metadata_server.py
@@ -0,0 +1,109 @@
+from aiohttp import web
+
+from hailtop.aiocloud import aiogoogle
+
+from ....globals import HTTP_CLIENT_MAX_SIZE
+
+
+class AppKeys:
+    USER_CREDENTIALS = web.AppKey('credentials', aiogoogle.GoogleServiceAccountCredentials)
+    GCE_METADATA_SERVER_CLIENT = web.AppKey('ms_client', aiogoogle.GoogleMetadataServerClient)
+
+
+async def root(_):
+    return web.Response(text='computeMetadata/\n')
+
+
+async def project_id(request: web.Request):
+    metadata_server_client = request.app[AppKeys.GCE_METADATA_SERVER_CLIENT]
+    return web.Response(text=await metadata_server_client.project())
+
+
+async def numeric_project_id(request: web.Request):
+    metadata_server_client = request.app[AppKeys.GCE_METADATA_SERVER_CLIENT]
+    return web.Response(text=await metadata_server_client.numeric_project_id())
+
+
+async def service_accounts(request: web.Request):
+    gsa_email = request.app[AppKeys.USER_CREDENTIALS].email
+    return web.Response(text=f'default\n{gsa_email}\n')
+
+
+async def user_service_account(request: web.Request):
+    gsa_email = request.app[AppKeys.USER_CREDENTIALS].email
+    recursive = request.query.get('recursive')
+    # https://cloud.google.com/compute/docs/metadata/querying-metadata
+    # token is not included in the recursive version, presumably as that
+    # is not simple metadata but requires requesting an access token
+    if recursive == 'true':
+        return web.json_response(
+            {
+                'aliases': ['default'],
+                'email': gsa_email,
+                'scopes': ['https://www.googleapis.com/auth/cloud-platform'],
+            },
+        )
+    return web.Response(text='aliases\nemail\nscopes\ntoken\n')
+
+
+async def user_email(request: web.Request):
+    return web.Response(text=request.app[AppKeys.USER_CREDENTIALS].email)
+
+
+async def user_token(request: web.Request):
+    access_token = await request.app[AppKeys.USER_CREDENTIALS]._get_access_token()
+    return web.json_response({
+        'access_token': access_token.token,
+        'expires_in': access_token.expires_in,
+        'token_type': 'Bearer',
+    })
+
+
+@web.middleware
+async def middleware(request: web.Request, handler):
+    credentials = request.app[AppKeys.USER_CREDENTIALS]
+    gsa = request.match_info.get('gsa')
+    if gsa and gsa not in (credentials.email, 'default'):
+        raise web.HTTPBadRequest()
+
+    response = await handler(request)
+    response.enable_compression()
+
+    # `gcloud` does not properly respect `charset`, which aiohttp automatically
+    # sets so we have to explicitly erase it
+    # See https://github.com/googleapis/google-auth-library-python/blob/b935298aaf4ea5867b5778bcbfc42408ba4ec02c/google/auth/compute_engine/_metadata.py#L170
+    if 'application/json' in response.headers['Content-Type']:
+        response.headers['Content-Type'] = 'application/json'
+    response.headers['Metadata-Flavor'] = 'Google'
+    response.headers['Server'] = 'Metadata Server for VM'
+    response.headers['X-XSS-Protection'] = '0'
+    response.headers['X-Frame-Options'] = 'SAMEORIGIN'
+    return response
+
+
+def create_app(
+    credentials: aiogoogle.GoogleServiceAccountCredentials,
+    metadata_server_client: aiogoogle.GoogleMetadataServerClient,
+) -> web.Application:
+    app = web.Application(
+        client_max_size=HTTP_CLIENT_MAX_SIZE,
+        middlewares=[middleware],
+    )
+    app[AppKeys.USER_CREDENTIALS] = credentials
+    app[AppKeys.GCE_METADATA_SERVER_CLIENT] = metadata_server_client
+
+    app.add_routes([
+        web.get('/', root),
+        web.get('/computeMetadata/v1/project/project-id', project_id),
+        web.get('/computeMetadata/v1/project/numeric-project-id', numeric_project_id),
+        web.get('/computeMetadata/v1/instance/service-accounts/', service_accounts),
+        web.get('/computeMetadata/v1/instance/service-accounts/{gsa}/', user_service_account),
+        web.get('/computeMetadata/v1/instance/service-accounts/{gsa}/email', user_email),
+        web.get('/computeMetadata/v1/instance/service-accounts/{gsa}/token', user_token),
+    ])
+
+    async def close_credentials(_):
+        await credentials.close()
+
+    app.on_cleanup.append(close_credentials)
+    return app
diff --git a/batch/batch/cloud/gcp/worker/worker_api.py b/batch/batch/cloud/gcp/worker/worker_api.py
index 3865ad67dd3..173bcb99b15 100644
--- a/batch/batch/cloud/gcp/worker/worker_api.py
+++ b/batch/batch/cloud/gcp/worker/worker_api.py
@@ -3,17 +3,18 @@
 import tempfile
 from typing import Dict, List
 
-import aiohttp
 import orjson
+from aiohttp import web
 
 from hailtop import httpx
 from hailtop.aiocloud import aiogoogle
 from hailtop.auth.auth import IdentityProvider
-from hailtop.utils import check_exec_output, retry_transient_errors
+from hailtop.utils import check_exec_output
 
 from ....worker.worker_api import CloudWorkerAPI, ContainerRegistryCredentials
 from ..instance_config import GCPSlimInstanceConfig
 from .disk import GCPDisk
+from .metadata_server import create_app
 
 
 class GCPWorkerAPI(CloudWorkerAPI):
@@ -24,14 +25,24 @@ class GCPWorkerAPI(CloudWorkerAPI):
     async def from_env() -> 'GCPWorkerAPI':
         project = os.environ['PROJECT']
         zone = os.environ['ZONE'].rsplit('/', 1)[1]
-        compute_client = aiogoogle.GoogleComputeClient(project)
-        return GCPWorkerAPI(project, zone, compute_client)
+        worker_credentials = aiogoogle.GoogleInstanceMetadataCredentials()
+        http_session = httpx.ClientSession()
+        return GCPWorkerAPI(project, zone, worker_credentials, http_session)
 
-    def __init__(self, project: str, zone: str, compute_client: aiogoogle.GoogleComputeClient):
+    def __init__(
+        self,
+        project: str,
+        zone: str,
+        worker_credentials: aiogoogle.GoogleInstanceMetadataCredentials,
+        http_session: httpx.ClientSession,
+    ):
         self.project = project
         self.zone = zone
-        self._compute_client = compute_client
+        self._http_session = http_session
+        self._metadata_server_client = aiogoogle.GoogleMetadataServerClient(http_session)
+        self._compute_client = aiogoogle.GoogleComputeClient(project)
         self._gcsfuse_credential_files: Dict[str, str] = {}
+        self._worker_credentials = worker_credentials
 
     @property
     def cloud_specific_env_vars_for_user_jobs(self) -> List[str]:
@@ -53,13 +64,7 @@ def create_disk(self, instance_name: str, disk_name: str, size_in_gb: int, mount
         )
 
     async def worker_container_registry_credentials(self, session: httpx.ClientSession) -> ContainerRegistryCredentials:
-        token_dict = await retry_transient_errors(
-            session.post_read_json,
-            'http://169.254.169.254/computeMetadata/v1/instance/service-accounts/default/token',
-            headers={'Metadata-Flavor': 'Google'},
-            timeout=aiohttp.ClientTimeout(total=60),  # type: ignore
-        )
-        access_token = token_dict['access_token']
+        access_token = await self._worker_credentials.access_token()
         return {'username': 'oauth2accesstoken', 'password': access_token}
 
     async def user_container_registry_credentials(self, credentials: Dict[str, str]) -> ContainerRegistryCredentials:
@@ -68,6 +73,10 @@ async def user_container_registry_credentials(self, credentials: Dict[str, str])
             access_token = await sa_credentials.access_token()
         return {'username': 'oauth2accesstoken', 'password': access_token}
 
+    def create_metadata_server_app(self, credentials: Dict[str, str]) -> web.Application:
+        key = orjson.loads(base64.b64decode(credentials['key.json']).decode())
+        return create_app(aiogoogle.GoogleServiceAccountCredentials(key), self._metadata_server_client)
+
     def instance_config_from_config_dict(self, config_dict: Dict[str, str]) -> GCPSlimInstanceConfig:
         return GCPSlimInstanceConfig.from_dict(config_dict)
 
diff --git a/batch/batch/globals.py b/batch/batch/globals.py
index 134878338d5..316771774f4 100644
--- a/batch/batch/globals.py
+++ b/batch/batch/globals.py
@@ -23,7 +23,7 @@
 
 BATCH_FORMAT_VERSION = 7
 STATUS_FORMAT_VERSION = 5
-INSTANCE_VERSION = 26
+INSTANCE_VERSION = 27
 
 MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024
 RESERVED_STORAGE_GB_PER_CORE = 5
diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py
index b049bc62b33..64bcf90ac30 100644
--- a/batch/batch/worker/worker.py
+++ b/batch/batch/worker/worker.py
@@ -263,6 +263,8 @@ async def init(self):
                 for service in HAIL_SERVICES:
                     hosts.write(f'{INTERNAL_GATEWAY_IP} {service}.hail\n')
             hosts.write(f'{INTERNAL_GATEWAY_IP} internal.hail\n')
+            if CLOUD == 'gcp':
+                hosts.write('169.254.169.254 metadata metadata.google.internal')
 
         # Jobs on the private network should have access to the metadata server
         # and our vdc. The public network should not so we use google's public
@@ -760,6 +762,7 @@ def __init__(
         command: List[str],
         cpu_in_mcpu: int,
         memory_in_bytes: int,
+        user_credentials: Optional[Dict[str, str]],
         network: Optional[Union[bool, str]] = None,
         port: Optional[int] = None,
         timeout: Optional[int] = None,
@@ -777,6 +780,7 @@ def __init__(
         self.command = command
         self.cpu_in_mcpu = cpu_in_mcpu
         self.memory_in_bytes = memory_in_bytes
+        self.user_credentials = user_credentials
         self.network = network
         self.port = port
         self.timeout = timeout
@@ -820,6 +824,8 @@ def __init__(
 
         self.monitor: Optional[ResourceUsageMonitor] = None
 
+        self.metadata_app_runner: Optional[web.AppRunner] = None
+
     async def create(self):
         self.state = 'creating'
         try:
@@ -959,6 +965,9 @@ async def _cleanup(self):
         if self._cleaned_up:
             return
 
+        if self.metadata_app_runner:
+            await self.metadata_app_runner.cleanup()
+
         assert self._run_fut is None
         try:
             if self.overlay_mounted:
@@ -1025,6 +1034,14 @@ async def _setup_network_namespace(self):
                 else:
                     assert self.network is None or self.network == 'public'
                     self.netns = await network_allocator.allocate_public()
+            if self.user_credentials and CLOUD == 'gcp':
+                assert CLOUD_WORKER_API
+                self.metadata_app_runner = web.AppRunner(
+                    CLOUD_WORKER_API.create_metadata_server_app(self.user_credentials)
+                )
+                await self.metadata_app_runner.setup()
+                site = web.TCPSite(self.metadata_app_runner, self.netns.host_ip, 5555)
+                await site.start()
         except asyncio.TimeoutError:
             log.exception(network_allocator.task_manager.tasks)
             raise
@@ -1454,6 +1471,7 @@ def copy_container(
         cpu_in_mcpu=cpu_in_mcpu,
         memory_in_bytes=memory_in_bytes,
         volume_mounts=volume_mounts,
+        user_credentials=job.credentials,
         stdin=json.dumps(files),
     )
 
@@ -1778,6 +1796,7 @@ def __init__(
             command=job_spec['process']['command'],
             cpu_in_mcpu=self.cpu_in_mcpu,
             memory_in_bytes=self.memory_in_bytes,
+            user_credentials=self.credentials,
             network=job_spec.get('network'),
             port=job_spec.get('port'),
             timeout=job_spec.get('timeout'),
@@ -2536,6 +2555,7 @@ async def create_and_start(
             command=command,
             cpu_in_mcpu=n_cores * 1000,
             memory_in_bytes=total_memory_bytes,
+            user_credentials=None,
             env=[f'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB={off_heap_memory_per_core_mib}', f'HAIL_CLOUD={CLOUD}'],
             volume_mounts=volume_mounts,
             log_path=f'/batch/jvm-container-logs/jvm-{index}.log',
diff --git a/batch/batch/worker/worker_api.py b/batch/batch/worker/worker_api.py
index d665b61aa6a..016f759b467 100644
--- a/batch/batch/worker/worker_api.py
+++ b/batch/batch/worker/worker_api.py
@@ -1,6 +1,8 @@
 import abc
 from typing import Dict, List, TypedDict
 
+from aiohttp import web
+
 from hailtop import httpx
 from hailtop.utils import CalledProcessError, sleep_before_try
 
@@ -33,6 +35,10 @@ async def worker_container_registry_credentials(self, session: httpx.ClientSessi
     async def user_container_registry_credentials(self, credentials: Dict[str, str]) -> ContainerRegistryCredentials:
         raise NotImplementedError
 
+    @abc.abstractmethod
+    def create_metadata_server_app(self, credentials: Dict[str, str]) -> web.Application:
+        raise NotImplementedError
+
     @abc.abstractmethod
     def instance_config_from_config_dict(self, config_dict: Dict[str, str]) -> InstanceConfig:
         raise NotImplementedError
diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py
index 212a9e522e9..b5e4288a05e 100644
--- a/batch/test/test_batch.py
+++ b/batch/test/test_batch.py
@@ -8,7 +8,7 @@
 import pytest
 
 from hailtop import httpx
-from hailtop.auth import hail_credentials
+from hailtop.auth import get_userinfo, hail_credentials
 from hailtop.batch.backend import HAIL_GENETICS_HAILTOP_IMAGE
 from hailtop.batch_client import BatchNotCreatedError, JobNotSubmittedError
 from hailtop.batch_client.aioclient import BatchClient as AioBatchClient
@@ -1098,24 +1098,57 @@ def test_duplicate_parents(client: BatchClient):
 
 
 @skip_in_azure
-def test_verify_no_access_to_google_metadata_server(client: BatchClient):
+def test_hail_metadata_server_uses_correct_user_credentials(client: BatchClient):
     b = create_batch(client)
-    j = b.create_job(os.environ['HAIL_CURL_IMAGE'], ['curl', '-fsSL', 'metadata.google.internal', '--max-time', '10'])
+    userinfo = get_userinfo()
+    assert userinfo
+    hail_identity = userinfo['hail_identity']
+    j = b.create_job(
+        os.environ['HAIL_CURL_IMAGE'],
+        ['curl', '-fsSL', 'metadata.google.internal/computeMetadata/v1/instance/service-accounts/', '--max-time', '10'],
+    )
     b.submit()
     status = j.wait()
-    assert status['state'] == 'Failed', str((status, b.debug_info()))
     job_log = j.log()
-    assert "Could not resolve host" in job_log['main'], str((job_log, b.debug_info()))
+    service_accounts = set(sa.strip() for sa in job_log['main'].split())
+    assert status['state'] == 'Success', str((status, b.debug_info()))
+    assert service_accounts == set(('default', hail_identity))
 
 
-def test_verify_no_access_to_metadata_server(client: BatchClient):
+@skip_in_azure
+def test_gcloud_works_with_hail_metadata_server(client: BatchClient):
+    b = create_batch(client)
+    token = secrets.token_urlsafe(16)
+    tmpdir = os.environ['HAIL_BATCH_REMOTE_TMPDIR']
+    random_dir = f'{tmpdir}/{token}'
+    script = f"""
+set -ex
+unset GOOGLE_APPLICATION_CREDENTIALS
+gcloud config list account
+echo "hello" >hello.txt
+gcloud storage cp hello.txt {random_dir}/hello.txt
+gcloud storage ls {random_dir}
+gcloud storage rm -r {random_dir}/
+"""
+    j = b.create_job(os.environ['CI_UTILS_IMAGE'], ['/bin/bash', '-c', script])
+    b.submit()
+    status = j.wait()
+    assert status['state'] == 'Success', str((status, b.debug_info()))
+
+
+def test_hail_metadata_server_available_only_in_gcp(client: BatchClient):
+    cloud = os.environ['HAIL_CLOUD']
     b = create_batch(client)
     j = b.create_job(os.environ['HAIL_CURL_IMAGE'], ['curl', '-fsSL', '169.254.169.254', '--max-time', '10'])
     b.submit()
     status = j.wait()
-    assert status['state'] == 'Failed', str((status, b.debug_info()))
-    job_log = j.log()
-    assert "Connection timeout" in job_log['main'], str((job_log, b.debug_info()))
+    if cloud == 'gcp':
+        assert status['state'] == 'Success', str((status, b.debug_info()))
+    else:
+        assert cloud == 'azure'
+        assert status['state'] == 'Failed', str((status, b.debug_info()))
+        job_log = j.log()
+        assert "Connection timeout" in job_log['main'], str((job_log, b.debug_info()))
 
 
 def test_submit_batch_in_job(client: BatchClient, remote_tmpdir: str):
diff --git a/build.yaml b/build.yaml
index e4256804708..123542089d3 100644
--- a/build.yaml
+++ b/build.yaml
@@ -2730,7 +2730,7 @@ steps:
       export HAIL_CLOUD="{{ global.cloud }}"
       export HAIL_PRODUCTION_DOMAIN="{{ global.domain }}"
       export HAIL_GPU_IMAGE="{{ gpu_image.image }}"
-      hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }}/test_batch/{{ token }}/
+      export HAIL_BATCH_REMOTE_TMPDIR="{{ global.test_storage_uri }}/test_batch/{{ token }}/"
 
       hail-pip-install -r /io/dev-requirements.txt
 
diff --git a/ci/Dockerfile.ci-utils b/ci/Dockerfile.ci-utils
index ab87759cd5b..27e43e9c292 100644
--- a/ci/Dockerfile.ci-utils
+++ b/ci/Dockerfile.ci-utils
@@ -2,8 +2,8 @@ ARG BASE_IMAGE={{ base_image.image }}
 FROM $BASE_IMAGE AS base
 
 # source: https://cloud.google.com/storage/docs/gsutil_install#linux
-RUN curl --remote-name https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-421.0.0-linux-x86_64.tar.gz && \
-    tar -xf google-cloud-sdk-421.0.0-linux-x86_64.tar.gz && \
+RUN curl --remote-name https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-447.0.0-linux-x86_64.tar.gz && \
+    tar -xf google-cloud-sdk-447.0.0-linux-x86_64.tar.gz && \
     curl --remote-name https://dl.k8s.io/release/v1.21.14/bin/linux/amd64/kubectl && \
     install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
 ENV PATH $PATH:/google-cloud-sdk/bin
diff --git a/hail/python/hailtop/aiocloud/aiogoogle/__init__.py b/hail/python/hailtop/aiocloud/aiogoogle/__init__.py
index 29c9f980b91..c8e8d21a80d 100644
--- a/hail/python/hailtop/aiocloud/aiogoogle/__init__.py
+++ b/hail/python/hailtop/aiocloud/aiogoogle/__init__.py
@@ -5,12 +5,18 @@
     GoogleComputeClient,
     GoogleIAmClient,
     GoogleLoggingClient,
+    GoogleMetadataServerClient,
     GoogleStorageClient,
     GCSRequesterPaysConfiguration,
     GoogleStorageAsyncFS,
     GoogleStorageAsyncFSFactory,
 )
-from .credentials import GoogleCredentials, GoogleApplicationDefaultCredentials, GoogleServiceAccountCredentials
+from .credentials import (
+    GoogleCredentials,
+    GoogleApplicationDefaultCredentials,
+    GoogleServiceAccountCredentials,
+    GoogleInstanceMetadataCredentials,
+)
 from .user_config import get_gcs_requester_pays_configuration
 
 
@@ -19,12 +25,14 @@
     'GoogleCredentials',
     'GoogleApplicationDefaultCredentials',
     'GoogleServiceAccountCredentials',
+    'GoogleInstanceMetadataCredentials',
     'GoogleBigQueryClient',
     'GoogleBillingClient',
     'GoogleContainerClient',
     'GoogleComputeClient',
     'GoogleIAmClient',
     'GoogleLoggingClient',
+    'GoogleMetadataServerClient',
     'GoogleStorageClient',
     'GoogleStorageAsyncFS',
     'GoogleStorageAsyncFSFactory',
diff --git a/hail/python/hailtop/aiocloud/aiogoogle/client/__init__.py b/hail/python/hailtop/aiocloud/aiogoogle/client/__init__.py
index 97862eab671..d31a3b885c9 100644
--- a/hail/python/hailtop/aiocloud/aiogoogle/client/__init__.py
+++ b/hail/python/hailtop/aiocloud/aiogoogle/client/__init__.py
@@ -4,6 +4,7 @@
 from .compute_client import GoogleComputeClient
 from .iam_client import GoogleIAmClient
 from .logging_client import GoogleLoggingClient
+from .metadata_server_client import GoogleMetadataServerClient
 from .storage_client import (
     GCSRequesterPaysConfiguration,
     GoogleStorageClient,
@@ -18,6 +19,7 @@
     'GoogleComputeClient',
     'GoogleIAmClient',
     'GoogleLoggingClient',
+    'GoogleMetadataServerClient',
     'GCSRequesterPaysConfiguration',
     'GoogleStorageClient',
     'GoogleStorageAsyncFS',
diff --git a/hail/python/hailtop/aiocloud/aiogoogle/client/metadata_server_client.py b/hail/python/hailtop/aiocloud/aiogoogle/client/metadata_server_client.py
new file mode 100644
index 00000000000..b716830ae06
--- /dev/null
+++ b/hail/python/hailtop/aiocloud/aiogoogle/client/metadata_server_client.py
@@ -0,0 +1,30 @@
+from typing import Optional
+
+import aiohttp
+
+from hailtop import httpx
+from hailtop.utils import retry_transient_errors
+
+
+class GoogleMetadataServerClient:
+    def __init__(self, http_session: httpx.ClientSession):
+        self._session = http_session
+        self._project_id: Optional[str] = None
+        self._numeric_project_id: Optional[str] = None
+
+    async def project(self) -> str:
+        if self._project_id is None:
+            self._project_id = await retry_transient_errors(self._get_text, '/project/project-id')
+        return self._project_id
+
+    async def numeric_project_id(self) -> str:
+        if self._numeric_project_id is None:
+            self._numeric_project_id = await retry_transient_errors(self._get_text, '/project/numeric-project-id')
+        return self._numeric_project_id
+
+    async def _get_text(self, path: str) -> str:
+        url = f'http://metadata.google.internal/computeMetadata/v1{path}'
+        headers = {'Metadata-Flavor': 'Google'}
+        timeout = aiohttp.ClientTimeout(total=60)
+        res = await self._session.get_read(url, headers=headers, timeout=timeout)
+        return res.decode('utf-8')
diff --git a/hail/python/hailtop/aiocloud/aiogoogle/credentials.py b/hail/python/hailtop/aiocloud/aiogoogle/credentials.py
index e2507017fbd..a98f7cbb08e 100644
--- a/hail/python/hailtop/aiocloud/aiogoogle/credentials.py
+++ b/hail/python/hailtop/aiocloud/aiogoogle/credentials.py
@@ -19,11 +19,13 @@ class GoogleExpiringAccessToken:
     def from_dict(data: dict) -> 'GoogleExpiringAccessToken':
         now = time.time()
         token = data['access_token']
-        expiry_time = now + data['expires_in'] // 2
-        return GoogleExpiringAccessToken(token, expiry_time)
+        expires_in = data['expires_in']
+        expiry_time = now + expires_in // 2
+        return GoogleExpiringAccessToken(token, expires_in, expiry_time)
 
-    def __init__(self, token, expiry_time: int):
+    def __init__(self, token, expires_in: int, expiry_time: int):
         self.token = token
+        self.expires_in = expires_in
         self._expiry_time = expiry_time
 
     def expired(self) -> bool:
@@ -171,13 +173,17 @@ async def _get_access_token(self) -> GoogleExpiringAccessToken:
 # https://developers.google.com/identity/protocols/oauth2/service-account
 # studying `gcloud --log-http print-access-token` was also useful
 class GoogleServiceAccountCredentials(GoogleCredentials):
-    def __init__(self, key, **kwargs):
+    def __init__(self, key: dict, **kwargs):
         super().__init__(**kwargs)
         self.key = key
 
     def __str__(self):
         return f'GoogleServiceAccountCredentials for {self.key["client_email"]}'
 
+    @property
+    def email(self) -> str:
+        return self.key['client_email']
+
     async def _get_access_token(self) -> GoogleExpiringAccessToken:
         now = int(time.time())
         scope = ' '.join(self._scopes)

From f47efb4d4f95c9377cb1d15b4c06a61e4139334d Mon Sep 17 00:00:00 2001
From: Daniel Goldstein <danielgold95@gmail.com>
Date: Thu, 1 Feb 2024 09:42:43 -0500
Subject: [PATCH 17/26] [qob] Update scala deploy config to use new base_path
 field (#14195)

I forgot to include the changes in #14056 to the scala code as well.
This favors using `basePath` in the Scala deploy config over the
`defaultNamespace`.
---
 .../scala/is/hail/services/DeployConfig.scala | 52 +++++++++++--------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/hail/src/main/scala/is/hail/services/DeployConfig.scala b/hail/src/main/scala/is/hail/services/DeployConfig.scala
index 55ccdf36dfd..95ca0e01ece 100644
--- a/hail/src/main/scala/is/hail/services/DeployConfig.scala
+++ b/hail/src/main/scala/is/hail/services/DeployConfig.scala
@@ -42,7 +42,7 @@ object DeployConfig {
     if (file != null) {
       using(new FileInputStream(file))(in => fromConfig(JsonMethods.parse(in)))
     } else
-      fromConfig("external", "default", "hail.is")
+      fromConfig("external", "default", "hail.is", None)
   }
 
   def fromConfig(config: JValue): DeployConfig = {
@@ -51,15 +51,28 @@ object DeployConfig {
       (config \ "location").extract[String],
       (config \ "default_namespace").extract[String],
       (config \ "domain").extract[Option[String]].getOrElse("hail.is"),
+      (config \ "base_path").extract[Option[String]],
     )
   }
 
-  def fromConfig(location: String, defaultNamespace: String, domain: String): DeployConfig =
-    new DeployConfig(
-      sys.env.getOrElse(toEnvVarName("location"), location),
-      sys.env.getOrElse(toEnvVarName("default_namespace"), defaultNamespace),
-      sys.env.getOrElse(toEnvVarName("domain"), domain),
-    )
+  def fromConfig(
+    locationFromConfig: String,
+    defaultNamespaceFromConfig: String,
+    domainFromConfig: String,
+    basePathFromConfig: Option[String],
+  ): DeployConfig = {
+    val location = sys.env.getOrElse(toEnvVarName("location"), locationFromConfig)
+    val defaultNamespace =
+      sys.env.getOrElse(toEnvVarName("default_namespace"), defaultNamespaceFromConfig)
+    val domain = sys.env.getOrElse(toEnvVarName("domain"), domainFromConfig)
+    val basePath = sys.env.get(toEnvVarName("basePath")).orElse(basePathFromConfig)
+
+    (basePath, defaultNamespace) match {
+      case (None, ns) if ns != "default" =>
+        new DeployConfig(location, ns, s"internal.$domain", Some(s"/$ns"))
+      case _ => new DeployConfig(location, defaultNamespace, domain, basePath)
+    }
+  }
 
   private[this] def toEnvVarName(s: String): String =
     "HAIL_" + s.toUpperCase
@@ -69,6 +82,7 @@ class DeployConfig(
   val location: String,
   val defaultNamespace: String,
   val domain: String,
+  val basePath: Option[String],
 ) {
 
   def scheme(baseScheme: String = "http"): String =
@@ -77,34 +91,28 @@ class DeployConfig(
     else
       baseScheme
 
-  def getServiceNamespace(service: String): String =
-    defaultNamespace
-
   def domain(service: String): String = {
-    val ns = getServiceNamespace(service)
     location match {
       case "k8s" =>
-        s"$service.$ns"
+        s"$service.$defaultNamespace"
       case "gce" =>
-        if (ns == "default")
+        if (basePath.isEmpty)
           s"$service.hail"
         else
           "internal.hail"
       case "external" =>
-        if (ns == "default")
+        if (basePath.isEmpty)
           s"$service.$domain"
         else
-          s"internal.$domain"
+          domain
     }
   }
 
-  def basePath(service: String): String = {
-    val ns = getServiceNamespace(service)
-    if (ns == "default")
-      ""
-    else
-      s"/$ns/$service"
-  }
+  def basePath(service: String): String =
+    basePath match {
+      case Some(base) => s"$base/$service"
+      case None => ""
+    }
 
   def baseUrl(service: String, baseScheme: String = "http"): String =
     s"${scheme(baseScheme)}://${domain(service)}${basePath(service)}"

From 171b39df2f5e2754ff493e091513174e47cc8a3f Mon Sep 17 00:00:00 2001
From: jigold <jigold@users.noreply.github.com>
Date: Thu, 1 Feb 2024 12:32:37 -0500
Subject: [PATCH 18/26] [batch] Update IP Fee pricing for February 2024 price
 increase (#14190)

Fixes #13784

Here's the GCP documentation:
https://cloud.google.com/vpc/pricing-announce-external-ips

We were previously billing the same IP-Fee for both spot and regular
instances. I changed it so we're billing for each instance type
accordingly.

Following #13542, I hard coded the new resource rates.
---
 batch/batch/cloud/gcp/instance_config.py   |  2 +-
 batch/batch/cloud/gcp/resources.py         |  9 ++--
 batch/sql/update-ip-fee-resource.py        | 52 ++++++++++++++++++++++
 build.yaml                                 |  3 ++
 hail/python/hailtop/batch/docs/service.rst | 23 ++++++----
 5 files changed, 75 insertions(+), 14 deletions(-)
 create mode 100644 batch/sql/update-ip-fee-resource.py

diff --git a/batch/batch/cloud/gcp/instance_config.py b/batch/batch/cloud/gcp/instance_config.py
index 02e96662c11..2789ff6adcc 100644
--- a/batch/batch/cloud/gcp/instance_config.py
+++ b/batch/batch/cloud/gcp/instance_config.py
@@ -57,7 +57,7 @@ def create(
             GCPStaticSizedDiskResource.create(product_versions, 'pd-ssd', boot_disk_size_gb, region),
             data_disk_resource,
             GCPDynamicSizedDiskResource.create(product_versions, 'pd-ssd', region),
-            GCPIPFeeResource.create(product_versions, 1024),
+            GCPIPFeeResource.create(product_versions, 1024, preemptible),
             GCPServiceFeeResource.create(product_versions),
             GCPSupportLogsSpecsAndFirewallFees.create(product_versions),
         ]
diff --git a/batch/batch/cloud/gcp/resources.py b/batch/batch/cloud/gcp/resources.py
index 5fe098fcebf..e058d9d0cf5 100644
--- a/batch/batch/cloud/gcp/resources.py
+++ b/batch/batch/cloud/gcp/resources.py
@@ -270,8 +270,9 @@ class GCPIPFeeResource(IPFeeResourceMixin, GCPResource):
     TYPE = 'gcp_ip_fee'
 
     @staticmethod
-    def product_name(base: int) -> str:
-        return f'ip-fee/{base}'
+    def product_name(base: int, preemptible: bool) -> str:
+        preemptible_str = 'preemptible' if preemptible else 'nonpreemptible'
+        return f'ip-fee/{preemptible_str}/{base}'
 
     @staticmethod
     def from_dict(data: Dict[str, Any]) -> 'GCPIPFeeResource':
@@ -279,8 +280,8 @@ def from_dict(data: Dict[str, Any]) -> 'GCPIPFeeResource':
         return GCPIPFeeResource(data['name'])
 
     @staticmethod
-    def create(product_versions: ProductVersions, base: int) -> 'GCPIPFeeResource':
-        product = GCPIPFeeResource.product_name(base)
+    def create(product_versions: ProductVersions, base: int, preemptible: bool) -> 'GCPIPFeeResource':
+        product = GCPIPFeeResource.product_name(base, preemptible)
         name = product_versions.resource_name(product)
         assert name, product
         return GCPIPFeeResource(name)
diff --git a/batch/sql/update-ip-fee-resource.py b/batch/sql/update-ip-fee-resource.py
new file mode 100644
index 00000000000..5edc5893127
--- /dev/null
+++ b/batch/sql/update-ip-fee-resource.py
@@ -0,0 +1,52 @@
+import os
+import asyncio
+from gear import Database, transaction, Transaction
+
+
+async def main():
+    cloud = os.environ['HAIL_CLOUD']
+    if cloud != 'gcp':
+        return
+
+    db = Database()
+    await db.async_init()
+    try:
+        @transaction(db)
+        async def insert(tx: Transaction):
+            await tx.execute_many(
+                '''
+INSERT INTO latest_product_versions (product, version)
+VALUES (%s, %s);
+''',
+                [('ip-fee/preemptible/1024', '1'),
+                 ('ip-fee/nonpreemptible/1024', '1')]
+            )
+
+            # https://cloud.google.com/vpc/pricing-announce-external-ips
+            # from hailtop.utils import rate_instance_hour_to_fraction_msec
+            # spot_ip_fee = rate_instance_hour_to_fraction_msec(0.0025, 1024)
+            spot_ip_fee = 6.781684027777778e-13
+            # standard_ip_fee = rate_instance_hour_to_fraction_msec(0.005, 1024)
+            standard_ip_fee = 1.3563368055555557e-12
+
+            await tx.execute_many(
+                '''
+INSERT INTO resources (resource, rate)
+VALUES (%s, %s);
+''',
+                [('ip-fee/preemptible/1024/1', spot_ip_fee),
+                 ('ip-fee/nonpreemptible/1024/1', standard_ip_fee)]
+            )
+
+            await tx.execute_update('''
+UPDATE resources
+SET deduped_resource_id = resource_id
+WHERE resource = 'ip-fee/preemptible/1024/1' OR resource = 'ip-fee/nonpreemptible/1024/1';
+''')
+
+        await insert()
+    finally:
+        await db.async_close()
+
+
+asyncio.run(main())
diff --git a/build.yaml b/build.yaml
index 123542089d3..fedb42fc571 100644
--- a/build.yaml
+++ b/build.yaml
@@ -2361,6 +2361,9 @@ steps:
       - name: remove-v2-billing-writes
         script: /io/sql/remove-v2-billing-writes.sql
         online: true
+      - name: update-ip-fee-resource
+        script: /io/sql/update-ip-fee-resource.py
+        online: true
     inputs:
       - from: /repo/batch/sql
         to: /io/sql
diff --git a/hail/python/hailtop/batch/docs/service.rst b/hail/python/hailtop/batch/docs/service.rst
index f20beb1ce4c..a0d38c4bf20 100644
--- a/hail/python/hailtop/batch/docs/service.rst
+++ b/hail/python/hailtop/batch/docs/service.rst
@@ -105,7 +105,7 @@ Billing
 -------
 
 The cost for executing a job depends on the underlying machine type, the region in which the VM is running in,
-and how much CPU and memory is being requested. Currently, Batch runs most jobs on 16 core, preemptible, n1
+and how much CPU and memory is being requested. Currently, Batch runs most jobs on 16 core, spot, n1
 machines with 10 GB of persistent SSD boot disk and 375 GB of local SSD. The costs are as follows:
 
 - Compute cost
@@ -116,11 +116,11 @@ machines with 10 GB of persistent SSD boot disk and 375 GB of local SSD. The cos
         based on the current spot prices for a given worker type and the region in which the worker is running in.
         You can use :meth:`.Job.regions` to specify which regions to run a job in.
 
-   = $0.01 per core per hour for **preemptible standard** worker types
+   = $0.01 per core per hour for **spot standard** worker types
 
-   = $0.012453 per core per hour for **preemptible highmem** worker types
+   = $0.012453 per core per hour for **spot highmem** worker types
 
-   = $0.0074578 per core per hour for **preemptible highcpu** worker types
+   = $0.0074578 per core per hour for **spot highcpu** worker types
 
    = $0.04749975 per core per hour for **nonpreemptible standard** worker types
 
@@ -163,22 +163,27 @@ machines with 10 GB of persistent SSD boot disk and 375 GB of local SSD. The cos
 
 
 - IP network cost
-   = $0.00025 per core per hour
+   = $0.0003125 per core per hour for **nonpreemptible** worker types
+
+   = $0.00015625 per core per hour for **spot** worker types
 
 - Service cost
    = $0.01 per core per hour
 
+- Logs, Specs, and Firewall Fee
+   = $0.005 per core per hour
+
 
-The sum of these costs is **$0.021935** per core/hour for standard workers, **$0.024388** per core/hour
-for highmem workers, and **$0.019393** per core/hour for highcpu workers. There is also an additional
+The sum of these costs is **$0.02684125** per core/hour for standard spot workers, **$0.02929425** per core/hour
+for highmem spot workers, and **$0.02429905** per core/hour for highcpu spot workers. There is also an additional
 cost of **$0.00023** per GB per hour of extra storage requested.
 
 At any given moment as many as four cores of the cluster may come from a 4 core machine if the worker type
 is standard. If a job is scheduled on this machine, then the cost per core hour is **$0.02774** plus
 **$0.00023** per GB per hour storage of extra storage requested.
 
-For jobs that run on non-preemptible machines, the costs are **$0.060462** per core/hour for standard workers, **$0.072114** per core/hour
-for highmem workers, and **$0.048365** per core/hour for highcpu workers.
+For jobs that run on non-preemptible machines, the costs are **$0.06449725** per core/hour for standard workers, **$0.076149** per core/hour
+for highmem workers, and **$0.0524218** per core/hour for highcpu workers.
 
 .. note::
 

From 7f12473d5477274eb54fb0873efab3b008ad09f9 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Thu, 1 Feb 2024 15:10:41 -0500
Subject: [PATCH 19/26] [query] avoid code explosion for trivial upcasts
 (#14232)

---
 .../scala/is/hail/expr/ir/PruneDeadFields.scala  | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala
index 67a1a2f8d04..013b40b584b 100644
--- a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala
@@ -2853,12 +2853,16 @@ object PruneDeadFields {
       ir
     else {
       val result = ir.typ match {
-        case _: TStruct =>
-          bindIR(ir) { ref =>
-            val ms = MakeStruct(rType.asInstanceOf[TStruct].fields.map { f =>
-              f.name -> upcast(ctx, GetField(ref, f.name), f.typ)
-            })
-            If(IsNA(ref), NA(ms.typ), ms)
+        case tstruct: TStruct =>
+          if (rType.asInstanceOf[TStruct].fields.forall(f => tstruct.field(f.name).typ == f.typ)) {
+            SelectFields(ir, rType.asInstanceOf[TStruct].fields.map(f => f.name))
+          } else {
+            bindIR(ir) { ref =>
+              val ms = MakeStruct(rType.asInstanceOf[TStruct].fields.map { f =>
+                f.name -> upcast(ctx, GetField(ref, f.name), f.typ)
+              })
+              If(IsNA(ref), NA(ms.typ), ms)
+            }
           }
         case ts: TStream =>
           val ra = rType.asInstanceOf[TStream]

From dace919d1ab51cb8104aeaac4445555b28d601bc Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Fri, 2 Feb 2024 10:25:39 -0500
Subject: [PATCH 20/26] [hailtop.batch] add default_regions to hb.Batch,
 improve docs (#14224)

`hb.Batch` now supports `default_regions` which completes the natural
hierarchy of: config, envvar, backend, batch, job. I went a little hog
wild with examples. I think we should have more examples everywhere!

The ServiceBackend doc page also had several basic formatting issues
which I addressed.
---
 hail/python/hailtop/batch/backend.py          | 110 ++++++++++++++----
 hail/python/hailtop/batch/batch.py            |  24 ++--
 hail/python/hailtop/batch/docs/service.rst    |  83 +++++++++++--
 .../batch/test_batch_service_backend.py       |  30 ++++-
 4 files changed, 203 insertions(+), 44 deletions(-)

diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py
index 3987ff41a84..c36594fc61e 100644
--- a/hail/python/hailtop/batch/backend.py
+++ b/hail/python/hailtop/batch/backend.py
@@ -413,42 +413,100 @@ async def _async_close(self):
 
 
 class ServiceBackend(Backend[bc.Batch]):
-    ANY_REGION: ClassVar[List[str]] = ['any_region']
-
     """Backend that executes batches on Hail's Batch Service on Google Cloud.
 
     Examples
     --------
 
-    >>> service_backend = ServiceBackend(billing_project='my-billing-account', remote_tmpdir='gs://my-bucket/temporary-files/') # doctest: +SKIP
-    >>> b = Batch(backend=service_backend) # doctest: +SKIP
+    Create and use a backend that bills to the Hail Batch billing project named "my-billing-account"
+    and stores temporary intermediate files in "gs://my-bucket/temporary-files".
+
+    >>> import hailtop.batch as hb
+    >>> service_backend = hb.ServiceBackend(
+    ...     billing_project='my-billing-account',
+    ...     remote_tmpdir='gs://my-bucket/temporary-files/'
+    ... )  # doctest: +SKIP
+    >>> b = hb.Batch(backend=service_backend)  # doctest: +SKIP
+    >>> j = b.new_job()  # doctest: +SKIP
+    >>> j.command('echo hello world!')  # doctest: +SKIP
     >>> b.run() # doctest: +SKIP
-    >>> service_backend.close() # doctest: +SKIP
 
-    If the Hail configuration parameters batch/billing_project and
-    batch/remote_tmpdir were previously set with ``hailctl config set``, then
-    one may elide the `billing_project` and `remote_tmpdir` parameters.
+    Same as above, but set the billing project and temporary intermediate folders via a
+    configuration file::
 
-    >>> service_backend = ServiceBackend()
-    >>> b = Batch(backend=service_backend)
-    >>> b.run() # doctest: +SKIP
-    >>> service_backend.close()
+        cat >my-batch-script.py >>EOF
+        import hailtop.batch as hb
+        b = hb.Batch(backend=ServiceBackend())
+        j = b.new_job()
+        j.command('echo hello world!')
+        b.run()
+        EOF
+        hailctl config set batch/billing_project my-billing-account
+        hailctl config set batch/remote_tmpdir gs://my-bucket/temporary-files/
+        python3 my-batch-script.py
+
+    Same as above, but also specify the use of the :class:`.ServiceBackend` via configuration file::
+
+        cat >my-batch-script.py >>EOF
+        import hailtop.batch as hb
+        b = hb.Batch()
+        j = b.new_job()
+        j.command('echo hello world!')
+        b.run()
+        EOF
+        hailctl config set batch/billing_project my-billing-account
+        hailctl config set batch/remote_tmpdir gs://my-bucket/temporary-files/
+        hailctl config set batch/backend service
+        python3 my-batch-script.py
+
+    Create a backend which stores temporary intermediate files in
+    "https://my-account.blob.core.windows.net/my-container/tempdir".
+
+    >>> service_backend = hb.ServiceBackend(
+    ...     billing_project='my-billing-account',
+    ...     remote_tmpdir='https://my-account.blob.core.windows.net/my-container/tempdir'
+    ... )  # doctest: +SKIP
+
+    Require all jobs in all batches in this backend to execute in us-central1::
+
+    >>> b = hb.Batch(backend=hb.ServiceBackend(regions=['us-central1']))
+
+    Same as above, but using a configuration file::
+
+        hailctl config set batch/regions us-central1
+        python3 my-batch-script.py
 
+    Same as above, but using the ``HAIL_BATCH_REGIONS`` environment variable::
+
+        export HAIL_BATCH_REGIONS=us-central1
+        python3 my-batch-script.py
+
+    Permit jobs to execute in *either* us-central1 or us-east1::
+
+    >>> b = hb.Batch(backend=hb.ServiceBackend(regions=['us-central1', 'us-east1']))
+
+    Same as above, but using a configuration file::
+
+        hailctl config set batch/regions us-central1,us-east1
+
+    Allow reading or writing to buckets even though they are "cold" storage:
+
+    >>> b = hb.Batch(
+    ...     backend=hb.ServiceBackend(
+    ...         gcs_bucket_allow_list=['cold-bucket', 'cold-bucket2'],
+    ...     ),
+    ... )
 
     Parameters
     ----------
     billing_project:
         Name of billing project to use.
     bucket:
-        Name of bucket to use. Should not include the ``gs://`` prefix. Cannot be used with
-        `remote_tmpdir`. Temporary data will be stored in the "/batch" folder of this
-        bucket. This argument is deprecated. Use `remote_tmpdir` instead.
+        This argument is deprecated. Use `remote_tmpdir` instead.
     remote_tmpdir:
-        Temporary data will be stored in this cloud storage folder. Cannot be used with deprecated
-        argument `bucket`. Paths should match a GCS URI like gs://<BUCKET_NAME>/<PATH> or an ABS
-        URI of the form https://<ACCOUNT_NAME>.blob.core.windows.net/<CONTAINER_NAME>/<PATH>.
+        Temporary data will be stored in this cloud storage folder.
     google_project:
-        DEPRECATED. Please use gcs_requester_pays_configuration.
+        This argument is deprecated. Use `gcs_requester_pays_configuration` instead.
     gcs_requester_pays_configuration : either :class:`str` or :class:`tuple` of :class:`str` and :class:`list` of :class:`str`, optional
         If a string is provided, configure the Google Cloud Storage file system to bill usage to the
         project identified by that string. If a tuple is provided, configure the Google Cloud
@@ -458,15 +516,19 @@ class ServiceBackend(Backend[bc.Batch]):
         The authorization token to pass to the batch client.
         Should only be set for user delegation purposes.
     regions:
-        Cloud region(s) to run jobs in. Use py:staticmethod:`.ServiceBackend.supported_regions` to list the
-        available regions to choose from. Use py:attribute:`.ServiceBackend.ANY_REGION` to signify the default is jobs
-        can run in any available region. The default is jobs can run in any region unless a default value has
-        been set with hailctl. An example invocation is `hailctl config set batch/regions "us-central1,us-east1"`.
+        Cloud regions in which jobs may run. :attr:`.ServiceBackend.ANY_REGION` indicates jobs may
+        run in any region. If unspecified or ``None``, the ``batch/regions`` Hail configuration
+        variable is consulted. See examples above. If none of these variables are set, then jobs may
+        run in any region. :meth:`.ServiceBackend.supported_regions` lists the available regions.
     gcs_bucket_allow_list:
         A list of buckets that the :class:`.ServiceBackend` should be permitted to read from or write to, even if their
-        default policy is to use "cold" storage. Should look like ``["bucket1", "bucket2"]``.
+        default policy is to use "cold" storage.
+
     """
 
+    ANY_REGION: ClassVar[List[str]] = ['any_region']
+    """A special value that indicates a job may run in any region."""
+
     @staticmethod
     def supported_regions():
         """
diff --git a/hail/python/hailtop/batch/batch.py b/hail/python/hailtop/batch/batch.py
index f2370628db0..5026ebde873 100644
--- a/hail/python/hailtop/batch/batch.py
+++ b/hail/python/hailtop/batch/batch.py
@@ -24,7 +24,8 @@ class Batch:
     --------
     Create a batch object:
 
-    >>> p = Batch()
+    >>> import hailtop.batch as hb
+    >>> p = hb.Batch()
 
     Create a new job that prints "hello":
 
@@ -35,6 +36,10 @@ class Batch:
 
     >>> p.run()
 
+    Require all jobs in this batch to execute in us-central1:
+
+    >>> b = hb.Batch(backend=hb.ServiceBackend(), default_regions=['us-central1'])
+
     Notes
     -----
 
@@ -77,6 +82,9 @@ class Batch:
     default_storage:
         Storage setting to use by default if not specified by a job. Only
         applicable for the :class:`.ServiceBackend`. See :meth:`.Job.storage`.
+    default_regions:
+        Cloud regions in which jobs may run. When unspecified or ``None``, use the regions attribute of
+        :class:`.ServiceBackend`. See :class:`.ServiceBackend` for details.
     default_timeout:
         Maximum time in seconds for a job to run before being killed. Only
         applicable for the :class:`.ServiceBackend`. If `None`, there is no
@@ -157,6 +165,7 @@ def __init__(
         default_memory: Optional[Union[int, str]] = None,
         default_cpu: Optional[Union[float, int, str]] = None,
         default_storage: Optional[Union[int, str]] = None,
+        default_regions: Optional[List[str]] = None,
         default_timeout: Optional[Union[float, int]] = None,
         default_shell: Optional[str] = None,
         default_python_image: Optional[str] = None,
@@ -195,6 +204,9 @@ def __init__(
         self._default_memory = default_memory
         self._default_cpu = default_cpu
         self._default_storage = default_storage
+        self._default_regions = default_regions
+        if self._default_regions is None and isinstance(self._backend, _backend.ServiceBackend):
+            self._default_regions = self._backend.regions
         self._default_timeout = default_timeout
         self._default_shell = default_shell
         self._default_python_image = default_python_image
@@ -316,14 +328,13 @@ def new_bash_job(
             j.cpu(self._default_cpu)
         if self._default_storage is not None:
             j.storage(self._default_storage)
+        if self._default_regions is not None:
+            j.regions(self._default_regions)
         if self._default_timeout is not None:
             j.timeout(self._default_timeout)
         if self._default_spot is not None:
             j.spot(self._default_spot)
 
-        if isinstance(self._backend, _backend.ServiceBackend):
-            j.regions(self._backend.regions)
-
         self._jobs.append(j)
         return j
 
@@ -388,14 +399,13 @@ def hello(name):
             j.cpu(self._default_cpu)
         if self._default_storage is not None:
             j.storage(self._default_storage)
+        if self._default_regions is not None:
+            j.regions(self._default_regions)
         if self._default_timeout is not None:
             j.timeout(self._default_timeout)
         if self._default_spot is not None:
             j.spot(self._default_spot)
 
-        if isinstance(self._backend, _backend.ServiceBackend):
-            j.regions(self._backend.regions)
-
         self._jobs.append(j)
         return j
 
diff --git a/hail/python/hailtop/batch/docs/service.rst b/hail/python/hailtop/batch/docs/service.rst
index a0d38c4bf20..9b8eff78491 100644
--- a/hail/python/hailtop/batch/docs/service.rst
+++ b/hail/python/hailtop/batch/docs/service.rst
@@ -232,22 +232,15 @@ error messages in the terminal window.
 Submitting a Batch to the Service
 ---------------------------------
 
+.. warning::
+
+   To avoid substantial network costs, ensure your jobs and data reside in the same `region`_.
+
 To execute a batch on the Batch service rather than locally, first
 construct a :class:`.ServiceBackend` object with a billing project and
 bucket for storing intermediate files. Your service account must have read
 and write access to the bucket.
 
-.. warning::
-
-   By default, the Batch Service runs jobs in any region in the US. Make sure you have considered additional `ingress and
-   egress fees <https://cloud.google.com/storage/pricing>`_ when using regional buckets and container or artifact
-   registries. Multi-regional buckets also have additional replication fees when writing data. A good rule of thumb is to use
-   a multi-regional artifact registry for Docker images and regional buckets for data. You can then specify which region(s)
-   you want your job to run in with :meth:`.Job.regions`. To set the default region(s) for all jobs, you can set the input
-   regions argument to :class:`.ServiceBackend` or use hailctl to set the default value. An example invocation is
-   `hailctl config set batch/regions "us-central1,us-east1"`. You can also get the full list of supported regions
-   with py:staticmethod:`.ServiceBackend.supported_regions`.
-
 Next, pass the :class:`.ServiceBackend` object to the :class:`.Batch` constructor
 with the parameter name `backend`.
 
@@ -257,7 +250,7 @@ and execute the following batch:
 
 .. code-block:: python
 
-    >>> import hailtop.batch as hb # doctest: +SKIP
+    >>> import hailtop.batch as hb
     >>> backend = hb.ServiceBackend('my-billing-project', remote_tmpdir='gs://my-bucket/batch/tmp/') # doctest: +SKIP
     >>> b = hb.Batch(backend=backend, name='test') # doctest: +SKIP
     >>> j = b.new_job(name='hello') # doctest: +SKIP
@@ -276,6 +269,72 @@ have previously set them with ``hailctl``:
 
     A trial billing project is automatically created for you with the name {USERNAME}-trial
 
+.. _region:
+
+Regions
+-------
+
+Data and compute both reside in a physical location. In Google Cloud Platform, the location of data
+is controlled by the location of the containing bucket. ``gcloud`` can determine the location of a
+bucket::
+
+    gcloud storage buckets describe gs://my-bucket
+
+If your compute resides in a different location from the data it reads or writes, then you will
+accrue substantial `network charges <https://cloud.google.com/storage/pricing#network-pricing>`__.
+
+To avoid network charges ensure all your data is in one region and specify that region in one of the
+following five ways. As a running example, we consider data stored in `us-central1`. The options are
+listed from highest to lowest precedence.
+
+1. :meth:`.Job.regions`:
+
+   .. code-block:: python
+
+       >>> b = hb.Batch(backend=hb.ServiceBackend())
+       >>> j = b.new_job()
+       >>> j.regions(['us-central1'])
+
+2. The ``default_regions`` parameter of :class:`.Batch`:
+
+   .. code-block:: python
+
+       >>> b = hb.Batch(backend=hb.ServiceBackend(), default_regions=['us-central1'])
+
+
+3. The ``regions`` parameter of :class:`.ServiceBackend`:
+
+   .. code-block:: python
+
+       >>> b = hb.Batch(backend=hb.ServiceBackend(regions=['us-central1']))
+
+4. The ``HAIL_BATCH_REGIONS`` environment variable:
+
+   .. code-block:: sh
+
+       export HAIL_BATCH_REGIONS=us-central1
+       python3 my-batch-script.py
+
+5. The ``batch/region`` configuration variable:
+
+   .. code-block:: sh
+
+       hailctl config set batch/regions us-central1
+       python3 my-batch-script.py
+
+.. warning::
+
+   If none of the five options above are specified, your job may run in *any* region!
+
+In Google Cloud Platform, the location of a multi-region bucket is considered *different* from any
+region within that multi-region. For example, if a VM in the `us-central1` region reads data from a
+bucket in the `us` multi-region, this incurs network charges becuse `us` is not considered equal to
+`us-central1`.
+
+Container (aka Docker) images are a form of data. In Google Cloud Platform, we recommend storing
+your images in a multi-regional artifact registry, which at time of writing, despite being
+"multi-regional", does not incur network charges in the manner described above.
+
 
 Using the UI
 ------------
diff --git a/hail/python/test/hailtop/batch/test_batch_service_backend.py b/hail/python/test/hailtop/batch/test_batch_service_backend.py
index cb8f0291ff3..0932d53d296 100644
--- a/hail/python/test/hailtop/batch/test_batch_service_backend.py
+++ b/hail/python/test/hailtop/batch/test_batch_service_backend.py
@@ -798,7 +798,7 @@ async def foo(i, j):
 
 
 def test_specify_job_region(backend: ServiceBackend):
-    b = batch(backend, cancel_after_n_failures=1)
+    b = batch(backend)
     j = b.new_job('region')
     possible_regions = backend.supported_regions()
     j.regions(possible_regions)
@@ -809,6 +809,34 @@ def test_specify_job_region(backend: ServiceBackend):
     assert res_status['state'] == 'success', str((res_status, res.debug_info()))
 
 
+def test_job_regions_controls_job_execution_region(backend: ServiceBackend):
+    the_region = backend.supported_regions()[0]
+
+    b = batch(backend)
+    j = b.new_job()
+    j.regions([the_region])
+    j.command('true')
+    res = b.run()
+
+    assert res
+    job_status = res.get_job(1).status()
+    assert job_status['status']['region'] == the_region, str((job_status, res.debug_info()))
+
+
+def test_job_regions_overrides_batch_regions(backend: ServiceBackend):
+    the_region = backend.supported_regions()[0]
+
+    b = batch(backend, default_regions=['some-other-region'])
+    j = b.new_job()
+    j.regions([the_region])
+    j.command('true')
+    res = b.run()
+
+    assert res
+    job_status = res.get_job(1).status()
+    assert job_status['status']['region'] == the_region, str((job_status, res.debug_info()))
+
+
 def test_always_copy_output(backend: ServiceBackend, output_tmpdir: str):
     output_path = os.path.join(output_tmpdir, 'test_always_copy_output.txt')
 

From 3cb79ec3786d5616db6e84f79d0bc50fcf09c596 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Fri, 2 Feb 2024 11:11:18 -0500
Subject: [PATCH 21/26] [batch] silence instance logs (#14243)

`oldwarn` is somehow `None` which spams us with instance log errors. We
can revisit the warning level in a PR if this is really important.

https://cloudlogging.app.goo.gl/VmUohrJSNo6EjsK56
---
 batch/batch/worker/worker.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py
index 64bcf90ac30..c265e80564f 100644
--- a/batch/batch/worker/worker.py
+++ b/batch/batch/worker/worker.py
@@ -14,7 +14,6 @@
 import tempfile
 import traceback
 import uuid
-import warnings
 from collections import defaultdict
 from contextlib import AsyncExitStack, ExitStack
 from typing import (
@@ -95,19 +94,6 @@
 with open('/subdomains.txt', 'r', encoding='utf-8') as subdomains_file:
     HAIL_SERVICES = [line.rstrip() for line in subdomains_file.readlines()]
 
-oldwarn = warnings.warn
-
-
-def deeper_stack_level_warn(*args, **kwargs):
-    if 'stacklevel' in kwargs:
-        kwargs['stacklevel'] = max(kwargs['stacklevel'], 5)
-    else:
-        kwargs['stacklevel'] = 5
-    return oldwarn(*args, **kwargs)
-
-
-warnings.warn = deeper_stack_level_warn
-
 
 class BatchWorkerAccessLogger(AccessLogger):
     def __init__(self, logger: logging.Logger, log_format: str):

From d2615543476bde5d01061499c92f26124b85caf3 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Fri, 2 Feb 2024 14:21:47 -0500
Subject: [PATCH 22/26] [dependencies] mass update (#14233)

---
 batch/pinned-requirements.txt                 |  20 ++--
 benchmark/python/pinned-requirements.txt      |   6 +-
 ci/pinned-requirements.txt                    |   4 +-
 gear/pinned-requirements.txt                  |  16 +--
 hail/python/dev/pinned-requirements.txt       | 108 ++++++++----------
 hail/python/dev/requirements.txt              |   2 +
 .../hailtop/batch/batch_pool_executor.py      |   4 +-
 hail/python/hailtop/pinned-requirements.txt   |  34 +++---
 hail/python/hailtop/utils/__init__.py         |   2 +
 hail/python/hailtop/utils/utils.py            |   6 +
 hail/python/pinned-requirements.txt           |  50 ++++----
 web_common/pinned-requirements.txt            |  12 +-
 12 files changed, 133 insertions(+), 131 deletions(-)

diff --git a/batch/pinned-requirements.txt b/batch/pinned-requirements.txt
index 6d32427ac40..b678b8b0db0 100644
--- a/batch/pinned-requirements.txt
+++ b/batch/pinned-requirements.txt
@@ -6,14 +6,14 @@
 #
 aiodocker==0.21.0
     # via -r hail/batch/requirements.txt
-aiohttp==3.9.1
+aiohttp==3.9.3
     # via
     #   -c hail/batch/../gear/pinned-requirements.txt
     #   -c hail/batch/../hail/python/dev/pinned-requirements.txt
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   -c hail/batch/../web_common/pinned-requirements.txt
     #   aiodocker
-aiorwlock==1.3.0
+aiorwlock==1.4.0
     # via -r hail/batch/requirements.txt
 aiosignal==1.3.1
     # via
@@ -30,7 +30,7 @@ async-timeout==4.0.3
     #   -c hail/batch/../web_common/pinned-requirements.txt
     #   -r hail/batch/requirements.txt
     #   aiohttp
-attrs==23.1.0
+attrs==23.2.0
     # via
     #   -c hail/batch/../gear/pinned-requirements.txt
     #   -c hail/batch/../hail/python/dev/pinned-requirements.txt
@@ -39,7 +39,7 @@ attrs==23.1.0
     #   aiohttp
 dictdiffer==0.9.0
     # via -r hail/batch/requirements.txt
-frozenlist==1.4.0
+frozenlist==1.4.1
     # via
     #   -c hail/batch/../gear/pinned-requirements.txt
     #   -c hail/batch/../hail/python/dev/pinned-requirements.txt
@@ -62,7 +62,7 @@ multidict==6.0.4
     #   -c hail/batch/../web_common/pinned-requirements.txt
     #   aiohttp
     #   yarl
-numpy==1.26.2
+numpy==1.26.3
     # via
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   pandas
@@ -71,7 +71,7 @@ packaging==23.2
     #   -c hail/batch/../hail/python/dev/pinned-requirements.txt
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   plotly
-pandas==2.1.4
+pandas==2.2.0
     # via
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   -r hail/batch/requirements.txt
@@ -85,7 +85,7 @@ python-dateutil==2.8.2
     #   -c hail/batch/../hail/python/dev/pinned-requirements.txt
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   pandas
-pytz==2023.3.post1
+pytz==2023.4
     # via
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   pandas
@@ -99,16 +99,16 @@ tenacity==8.2.3
     # via
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   plotly
-typing-extensions==4.8.0
+typing-extensions==4.9.0
     # via
     #   -c hail/batch/../hail/python/dev/pinned-requirements.txt
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   aiodocker
-tzdata==2023.3
+tzdata==2023.4
     # via
     #   -c hail/batch/../hail/python/pinned-requirements.txt
     #   pandas
-yarl==1.9.3
+yarl==1.9.4
     # via
     #   -c hail/batch/../gear/pinned-requirements.txt
     #   -c hail/batch/../hail/python/dev/pinned-requirements.txt
diff --git a/benchmark/python/pinned-requirements.txt b/benchmark/python/pinned-requirements.txt
index 95ce5f36afa..6ecbfbec2ca 100644
--- a/benchmark/python/pinned-requirements.txt
+++ b/benchmark/python/pinned-requirements.txt
@@ -10,7 +10,7 @@ contourpy==1.2.0
     #   matplotlib
 cycler==0.12.1
     # via matplotlib
-fonttools==4.47.0
+fonttools==4.47.2
     # via matplotlib
 importlib-resources==6.1.1
     # via matplotlib
@@ -18,7 +18,7 @@ kiwisolver==1.4.5
     # via matplotlib
 matplotlib==3.8.2
     # via -r hail/benchmark/python/requirements.txt
-numpy==1.26.2
+numpy==1.26.3
     # via
     #   -c hail/benchmark/python/../../hail/python/pinned-requirements.txt
     #   contourpy
@@ -28,7 +28,7 @@ packaging==23.2
     #   -c hail/benchmark/python/../../hail/python/dev/pinned-requirements.txt
     #   -c hail/benchmark/python/../../hail/python/pinned-requirements.txt
     #   matplotlib
-pillow==10.1.0
+pillow==10.2.0
     # via
     #   -c hail/benchmark/python/../../hail/python/dev/pinned-requirements.txt
     #   -c hail/benchmark/python/../../hail/python/pinned-requirements.txt
diff --git a/ci/pinned-requirements.txt b/ci/pinned-requirements.txt
index 9bcfd442c2c..12daad20f0d 100644
--- a/ci/pinned-requirements.txt
+++ b/ci/pinned-requirements.txt
@@ -26,7 +26,7 @@ click==8.1.7
     #   -c hail/ci/../hail/python/dev/pinned-requirements.txt
     #   -c hail/ci/../hail/python/pinned-requirements.txt
     #   zulip
-cryptography==41.0.7
+cryptography==42.0.2
     # via
     #   -c hail/ci/../hail/python/pinned-requirements.txt
     #   pyjwt
@@ -56,7 +56,7 @@ requests[security]==2.31.0
     #   -c hail/ci/../hail/python/dev/pinned-requirements.txt
     #   -c hail/ci/../hail/python/pinned-requirements.txt
     #   zulip
-typing-extensions==4.8.0
+typing-extensions==4.9.0
     # via
     #   -c hail/ci/../hail/python/dev/pinned-requirements.txt
     #   -c hail/ci/../hail/python/pinned-requirements.txt
diff --git a/gear/pinned-requirements.txt b/gear/pinned-requirements.txt
index 4189b27d894..244e2bb15a9 100644
--- a/gear/pinned-requirements.txt
+++ b/gear/pinned-requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile --output-file=hail/gear/pinned-requirements.txt hail/gear/requirements.txt
 #
-aiohttp==3.9.1
+aiohttp==3.9.3
     # via
     #   -c hail/gear/../hail/python/dev/pinned-requirements.txt
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
@@ -27,7 +27,7 @@ async-timeout==4.0.3
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
     #   -c hail/gear/../hail/python/pinned-requirements.txt
     #   aiohttp
-attrs==23.1.0
+attrs==23.2.0
     # via
     #   -c hail/gear/../hail/python/dev/pinned-requirements.txt
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
@@ -51,18 +51,18 @@ charset-normalizer==3.3.2
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
     #   -c hail/gear/../hail/python/pinned-requirements.txt
     #   requests
-frozenlist==1.4.0
+frozenlist==1.4.1
     # via
     #   -c hail/gear/../hail/python/dev/pinned-requirements.txt
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
     #   -c hail/gear/../hail/python/pinned-requirements.txt
     #   aiohttp
     #   aiosignal
-google-api-core==2.15.0
+google-api-core==2.16.1
     # via google-api-python-client
-google-api-python-client==2.111.0
+google-api-python-client==2.116.0
     # via google-cloud-profiler
-google-auth==2.23.4
+google-auth==2.27.0
     # via
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
     #   -c hail/gear/../hail/python/pinned-requirements.txt
@@ -98,7 +98,7 @@ multidict==6.0.4
     #   -c hail/gear/../hail/python/pinned-requirements.txt
     #   aiohttp
     #   yarl
-orjson==3.9.10
+orjson==3.9.12
     # via
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
     #   -c hail/gear/../hail/python/pinned-requirements.txt
@@ -183,7 +183,7 @@ wrapt==1.16.0
     #   -c hail/gear/../hail/python/dev/pinned-requirements.txt
     #   -c hail/gear/../hail/python/pinned-requirements.txt
     #   prometheus-async
-yarl==1.9.3
+yarl==1.9.4
     # via
     #   -c hail/gear/../hail/python/dev/pinned-requirements.txt
     #   -c hail/gear/../hail/python/hailtop/pinned-requirements.txt
diff --git a/hail/python/dev/pinned-requirements.txt b/hail/python/dev/pinned-requirements.txt
index a7a0385d54d..96fb9d708b9 100644
--- a/hail/python/dev/pinned-requirements.txt
+++ b/hail/python/dev/pinned-requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile --output-file=hail/hail/python/dev/pinned-requirements.txt hail/hail/python/dev/requirements.txt
 #
-aiohttp==3.9.1
+aiohttp==3.9.3
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   aiohttp-devtools
@@ -14,7 +14,7 @@ aiosignal==1.3.1
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   aiohttp
-alabaster==0.7.13
+alabaster==0.7.16
     # via sphinx
 anyio==4.2.0
     # via
@@ -38,7 +38,7 @@ async-timeout==4.0.3
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   aiohttp
-attrs==23.1.0
+attrs==23.2.0
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   aiohttp
@@ -49,7 +49,7 @@ babel==2.14.0
     # via
     #   jupyterlab-server
     #   sphinx
-beautifulsoup4==4.12.2
+beautifulsoup4==4.12.3
     # via nbconvert
 bleach==6.1.0
     # via nbconvert
@@ -72,9 +72,8 @@ click==8.1.7
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   -r hail/hail/python/dev/requirements.txt
     #   aiohttp-devtools
-    #   black
     #   curlylint
-comm==0.2.0
+comm==0.2.1
     # via
     #   ipykernel
     #   ipywidgets
@@ -90,7 +89,7 @@ defusedxml==0.7.1
     # via nbconvert
 devtools==0.12.2
     # via aiohttp-devtools
-dill==0.3.7
+dill==0.3.8
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   pylint
@@ -112,13 +111,13 @@ executing==2.0.1
     # via
     #   devtools
     #   stack-data
-fastjsonschema==2.19.0
+fastjsonschema==2.19.1
     # via nbformat
 filelock==3.13.1
     # via virtualenv
 fqdn==1.5.1
     # via jsonschema
-frozenlist==1.4.0
+frozenlist==1.4.1
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   aiohttp
@@ -136,7 +135,7 @@ idna==3.6
     #   yarl
 imagesize==1.4.1
     # via sphinx
-importlib-metadata==7.0.0
+importlib-metadata==7.0.1
     # via
     #   jupyter-client
     #   jupyter-lsp
@@ -146,7 +145,7 @@ importlib-metadata==7.0.0
     #   sphinx
 iniconfig==2.0.0
     # via pytest
-ipykernel==6.27.1
+ipykernel==6.29.0
     # via
     #   jupyter
     #   jupyter-console
@@ -178,12 +177,12 @@ json5==0.9.14
     # via jupyterlab-server
 jsonpointer==2.4
     # via jsonschema
-jsonschema[format-nongpl]==4.20.0
+jsonschema[format-nongpl]==4.21.1
     # via
     #   jupyter-events
     #   jupyterlab-server
     #   nbformat
-jsonschema-specifications==2023.11.2
+jsonschema-specifications==2023.12.1
     # via jsonschema
 jupyter==1.0.0
     # via -r hail/hail/python/dev/requirements.txt
@@ -196,7 +195,7 @@ jupyter-client==8.6.0
     #   qtconsole
 jupyter-console==6.6.3
     # via jupyter
-jupyter-core==5.5.1
+jupyter-core==5.7.1
     # via
     #   ipykernel
     #   jupyter-client
@@ -209,16 +208,16 @@ jupyter-core==5.5.1
     #   qtconsole
 jupyter-events==0.9.0
     # via jupyter-server
-jupyter-lsp==2.2.1
+jupyter-lsp==2.2.2
     # via jupyterlab
-jupyter-server==2.12.1
+jupyter-server==2.12.5
     # via
     #   jupyter-lsp
     #   jupyterlab
     #   jupyterlab-server
     #   notebook
     #   notebook-shim
-jupyter-server-terminals==0.5.0
+jupyter-server-terminals==0.5.2
     # via jupyter-server
 jupyterlab==4.0.12
     # via notebook
@@ -232,7 +231,7 @@ jupyterlab-widgets==3.0.9
     # via ipywidgets
 lazy-object-proxy==1.10.0
     # via astroid
-markupsafe==2.1.3
+markupsafe==2.1.4
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   jinja2
@@ -250,12 +249,11 @@ multidict==6.0.4
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   aiohttp
     #   yarl
-mypy-extensions==1.0.0
-    # via black
 nbclient==0.9.0
     # via nbconvert
-nbconvert==7.13.0
+nbconvert==7.13.1
     # via
+    #   -r hail/hail/python/dev/requirements.txt
     #   jupyter
     #   jupyter-server
     #   nbsphinx
@@ -267,7 +265,7 @@ nbformat==5.9.2
     #   nbsphinx
 nbsphinx==0.9.3
     # via -r hail/hail/python/dev/requirements.txt
-nest-asyncio==1.5.8
+nest-asyncio==1.6.0
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   ipykernel
@@ -275,13 +273,13 @@ nodeenv==1.8.0
     # via
     #   pre-commit
     #   pyright
-notebook==7.0.6
+notebook==7.0.7
     # via jupyter
 notebook-shim==0.2.3
     # via
     #   jupyterlab
     #   notebook
-overrides==7.4.0
+overrides==7.7.0
     # via jupyter-server
 packaging==23.2
     # via
@@ -295,29 +293,26 @@ packaging==23.2
     #   qtconsole
     #   qtpy
     #   sphinx
-pandocfilters==1.5.0
+pandocfilters==1.5.1
     # via nbconvert
 parso==0.8.3
     # via jedi
 parsy==1.1.0
     # via curlylint
 pathspec==0.12.1
-    # via
-    #   black
-    #   curlylint
+    # via curlylint
 pexpect==4.9.0
     # via ipython
-pillow==10.1.0
+pillow==10.2.0
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   -r hail/hail/python/dev/requirements.txt
-platformdirs==4.1.0
+platformdirs==4.2.0
     # via
-    #   black
     #   jupyter-core
     #   pylint
     #   virtualenv
-pluggy==1.3.0
+pluggy==1.4.0
     # via pytest
 pre-commit==3.6.0
     # via -r hail/hail/python/dev/requirements.txt
@@ -327,7 +322,7 @@ prompt-toolkit==3.0.43
     # via
     #   ipython
     #   jupyter-console
-psutil==5.9.7
+psutil==5.9.8
     # via ipykernel
 ptyprocess==0.7.0
     # via
@@ -353,9 +348,9 @@ pygments==2.17.2
     #   sphinx
 pylint==2.17.7
     # via -r hail/hail/python/dev/requirements.txt
-pyright==1.1.341
+pyright==1.1.349
     # via -r hail/hail/python/dev/requirements.txt
-pytest==7.4.3
+pytest==7.4.4
     # via
     #   -r hail/hail/python/dev/requirements.txt
     #   pytest-asyncio
@@ -373,7 +368,7 @@ pytest-html==1.22.1
     # via -r hail/hail/python/dev/requirements.txt
 pytest-instafail==0.5.0
     # via -r hail/hail/python/dev/requirements.txt
-pytest-metadata==3.0.0
+pytest-metadata==3.1.0
     # via pytest-html
 pytest-timeout==2.2.0
     # via -r hail/hail/python/dev/requirements.txt
@@ -406,7 +401,7 @@ qtconsole==5.5.1
     # via jupyter
 qtpy==2.4.1
     # via qtconsole
-referencing==0.32.0
+referencing==0.33.0
     # via
     #   jsonschema
     #   jsonschema-specifications
@@ -424,7 +419,7 @@ rfc3986-validator==0.1.1
     # via
     #   jsonschema
     #   jupyter-events
-rpds-py==0.15.2
+rpds-py==0.17.1
     # via
     #   jsonschema
     #   referencing
@@ -451,22 +446,17 @@ sphinx==6.2.1
     #   nbsphinx
     #   sphinx-autodoc-typehints
     #   sphinx-rtd-theme
-    #   sphinxcontrib-applehelp
-    #   sphinxcontrib-devhelp
-    #   sphinxcontrib-htmlhelp
     #   sphinxcontrib-jquery
     #   sphinxcontrib-katex
-    #   sphinxcontrib-qthelp
-    #   sphinxcontrib-serializinghtml
 sphinx-autodoc-typehints==1.23.0
     # via -r hail/hail/python/dev/requirements.txt
 sphinx-rtd-theme==1.3.0
     # via -r hail/hail/python/dev/requirements.txt
-sphinxcontrib-applehelp==1.0.7
+sphinxcontrib-applehelp==1.0.8
     # via sphinx
-sphinxcontrib-devhelp==1.0.5
+sphinxcontrib-devhelp==1.0.6
     # via sphinx
-sphinxcontrib-htmlhelp==2.0.4
+sphinxcontrib-htmlhelp==2.0.5
     # via sphinx
 sphinxcontrib-jquery==4.1
     # via sphinx-rtd-theme
@@ -474,9 +464,9 @@ sphinxcontrib-jsmath==1.0.1
     # via sphinx
 sphinxcontrib-katex==0.9.9
     # via -r hail/hail/python/dev/requirements.txt
-sphinxcontrib-qthelp==1.0.6
+sphinxcontrib-qthelp==1.0.7
     # via sphinx
-sphinxcontrib-serializinghtml==1.1.9
+sphinxcontrib-serializinghtml==1.1.10
     # via sphinx
 stack-data==0.6.3
     # via ipython
@@ -490,7 +480,6 @@ toml==0.10.2
     # via curlylint
 tomli==2.0.1
     # via
-    #   black
     #   jupyterlab
     #   pylint
     #   pytest
@@ -505,7 +494,7 @@ tornado==6.4
     #   jupyterlab
     #   notebook
     #   terminado
-traitlets==5.14.0
+traitlets==5.14.1
     # via
     #   comm
     #   ipykernel
@@ -525,13 +514,13 @@ traitlets==5.14.0
     #   qtconsole
 types-chardet==5.0.4.6
     # via -r hail/hail/python/dev/requirements.txt
-types-decorator==5.1.8.4
+types-decorator==5.1.8.20240106
     # via -r hail/hail/python/dev/requirements.txt
-types-deprecated==1.2.9.3
+types-deprecated==1.2.9.20240106
     # via -r hail/hail/python/dev/requirements.txt
 types-pymysql==1.1.0.1
     # via -r hail/hail/python/dev/requirements.txt
-types-python-dateutil==2.8.19.14
+types-python-dateutil==2.8.19.20240106
     # via
     #   -r hail/hail/python/dev/requirements.txt
     #   arrow
@@ -539,23 +528,22 @@ types-pyyaml==6.0.12.12
     # via -r hail/hail/python/dev/requirements.txt
 types-requests==2.31.0.6
     # via -r hail/hail/python/dev/requirements.txt
-types-setuptools==69.0.0.0
+types-setuptools==69.0.0.20240125
     # via -r hail/hail/python/dev/requirements.txt
-types-six==1.16.21.9
+types-six==1.16.21.20240106
     # via -r hail/hail/python/dev/requirements.txt
-types-tabulate==0.9.0.3
+types-tabulate==0.9.0.20240106
     # via -r hail/hail/python/dev/requirements.txt
 types-urllib3==1.26.25.14
     # via
     #   -r hail/hail/python/dev/requirements.txt
     #   types-requests
-typing-extensions==4.8.0
+typing-extensions==4.9.0
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   anyio
     #   astroid
     #   async-lru
-    #   black
     #   ipython
     #   pylint
 uri-template==1.3.0
@@ -568,7 +556,7 @@ virtualenv==20.25.0
     # via pre-commit
 watchfiles==0.21.0
     # via aiohttp-devtools
-wcwidth==0.2.12
+wcwidth==0.2.13
     # via prompt-toolkit
 webcolors==1.13
     # via jsonschema
@@ -586,7 +574,7 @@ wrapt==1.16.0
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   astroid
-yarl==1.9.3
+yarl==1.9.4
     # via
     #   -c hail/hail/python/dev/../pinned-requirements.txt
     #   aiohttp
diff --git a/hail/python/dev/requirements.txt b/hail/python/dev/requirements.txt
index 5dc26e18440..51c57b7bb92 100644
--- a/hail/python/dev/requirements.txt
+++ b/hail/python/dev/requirements.txt
@@ -23,6 +23,8 @@ jupyter>=1.0.0,<2
 sphinxcontrib.katex>=0.9.0,<1
 fswatch>=0.1.1,<1
 wheel>=0.41,<0.42
+# https://github.com/jupyter/nbconvert/issues/2092
+nbconvert<7.14
 
 # library type stubs
 types-Deprecated
diff --git a/hail/python/hailtop/batch/batch_pool_executor.py b/hail/python/hailtop/batch/batch_pool_executor.py
index 8e8a5102572..c5b88349478 100644
--- a/hail/python/hailtop/batch/batch_pool_executor.py
+++ b/hail/python/hailtop/batch/batch_pool_executor.py
@@ -7,7 +7,7 @@
 import dill
 import functools
 
-from hailtop.utils import secret_alnum_string, partition, async_to_blocking
+from hailtop.utils import secret_alnum_string, partition, async_to_blocking, the_empty_async_generator
 import hailtop.batch_client.aioclient as low_level_batch_client
 from hailtop.batch_client.parse import parse_cpu_in_mcpu
 from hailtop.aiotools.router_fs import RouterAsyncFS
@@ -232,7 +232,7 @@ async def async_map(
     ) -> AsyncGenerator[int, None]:
         """Aysncio compatible version of :meth:`.map`."""
         if not iterables:
-            return (x for x in range(0))
+            return the_empty_async_generator()
 
         if chunksize > 1:
             list_per_argument = [list(x) for x in iterables]
diff --git a/hail/python/hailtop/pinned-requirements.txt b/hail/python/hailtop/pinned-requirements.txt
index 70adf3f3f6e..b6c845832e8 100644
--- a/hail/python/hailtop/pinned-requirements.txt
+++ b/hail/python/hailtop/pinned-requirements.txt
@@ -6,17 +6,17 @@
 #
 aiodns==2.0.0
     # via -r hail/hail/python/hailtop/requirements.txt
-aiohttp==3.9.1
+aiohttp==3.9.3
     # via -r hail/hail/python/hailtop/requirements.txt
 aiosignal==1.3.1
     # via aiohttp
 async-timeout==4.0.3
     # via aiohttp
-attrs==23.1.0
+attrs==23.2.0
     # via aiohttp
 azure-common==1.1.28
     # via azure-mgmt-storage
-azure-core==1.29.5
+azure-core==1.29.7
     # via
     #   azure-identity
     #   azure-mgmt-core
@@ -30,9 +30,9 @@ azure-mgmt-storage==20.1.0
     # via -r hail/hail/python/hailtop/requirements.txt
 azure-storage-blob==12.19.0
     # via -r hail/hail/python/hailtop/requirements.txt
-boto3==1.33.1
+boto3==1.34.32
     # via -r hail/hail/python/hailtop/requirements.txt
-botocore==1.33.1
+botocore==1.34.32
     # via
     #   -r hail/hail/python/hailtop/requirements.txt
     #   boto3
@@ -53,20 +53,20 @@ click==8.1.7
     # via typer
 commonmark==0.9.1
     # via rich
-cryptography==41.0.7
+cryptography==42.0.2
     # via
     #   azure-identity
     #   azure-storage-blob
     #   msal
     #   pyjwt
-dill==0.3.7
+dill==0.3.8
     # via -r hail/hail/python/hailtop/requirements.txt
-frozenlist==1.4.0
+frozenlist==1.4.1
     # via
     #   -r hail/hail/python/hailtop/requirements.txt
     #   aiohttp
     #   aiosignal
-google-auth==2.23.4
+google-auth==2.27.0
     # via
     #   -r hail/hail/python/hailtop/requirements.txt
     #   google-auth-oauthlib
@@ -90,11 +90,11 @@ jmespath==1.0.1
     #   botocore
 jproperties==2.1.1
     # via -r hail/hail/python/hailtop/requirements.txt
-msal==1.25.0
+msal==1.26.0
     # via
     #   azure-identity
     #   msal-extensions
-msal-extensions==1.0.0
+msal-extensions==1.1.0
     # via azure-identity
 msrest==0.7.1
     # via azure-mgmt-storage
@@ -102,12 +102,14 @@ multidict==6.0.4
     # via
     #   aiohttp
     #   yarl
-nest-asyncio==1.5.8
+nest-asyncio==1.6.0
     # via -r hail/hail/python/hailtop/requirements.txt
 oauthlib==3.2.2
     # via requests-oauthlib
-orjson==3.9.10
+orjson==3.9.12
     # via -r hail/hail/python/hailtop/requirements.txt
+packaging==23.2
+    # via msal-extensions
 portalocker==2.8.2
     # via msal-extensions
 protobuf==3.20.2
@@ -146,7 +148,7 @@ rich==12.6.0
     # via -r hail/hail/python/hailtop/requirements.txt
 rsa==4.9
     # via google-auth
-s3transfer==0.8.0
+s3transfer==0.10.0
     # via boto3
 six==1.16.0
     # via
@@ -160,7 +162,7 @@ tabulate==0.9.0
     # via -r hail/hail/python/hailtop/requirements.txt
 typer==0.9.0
     # via -r hail/hail/python/hailtop/requirements.txt
-typing-extensions==4.8.0
+typing-extensions==4.9.0
     # via
     #   azure-core
     #   azure-storage-blob
@@ -172,5 +174,5 @@ urllib3==1.26.18
     #   requests
 uvloop==0.19.0 ; sys_platform != "win32"
     # via -r hail/hail/python/hailtop/requirements.txt
-yarl==1.9.3
+yarl==1.9.4
     # via aiohttp
diff --git a/hail/python/hailtop/utils/__init__.py b/hail/python/hailtop/utils/__init__.py
index c86dd92cae0..3a1e4295c0e 100644
--- a/hail/python/hailtop/utils/__init__.py
+++ b/hail/python/hailtop/utils/__init__.py
@@ -1,6 +1,7 @@
 from .time import time_msecs, time_msecs_str, humanize_timedelta_msecs, parse_timestamp_msecs, time_ns
 from .utils import (
     unzip,
+    the_empty_async_generator,
     async_to_blocking,
     blocking_to_async,
     AsyncWorkerPool,
@@ -72,6 +73,7 @@
 
 __all__ = [
     'time_msecs',
+    'the_empty_async_generator',
     'time_msecs_str',
     'humanize_timedelta_msecs',
     'unzip',
diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py
index 78a3abd7c9a..ded19ee8e20 100644
--- a/hail/python/hailtop/utils/utils.py
+++ b/hail/python/hailtop/utils/utils.py
@@ -13,6 +13,7 @@
     AsyncIterator,
     Iterator,
     Union,
+    AsyncGenerator,
 )
 from typing import Literal, Sequence
 from typing_extensions import ParamSpec
@@ -64,6 +65,11 @@
 P = ParamSpec("P")
 
 
+async def the_empty_async_generator() -> AsyncGenerator[T, None]:
+    if False:  # pylint: disable=using-constant-test
+        yield  # The appearance of the keyword `yield` forces Python to make this function into a generator
+
+
 def unpack_comma_delimited_inputs(inputs: List[str]) -> List[str]:
     return [s.strip() for comma_separated_steps in inputs for s in comma_separated_steps.split(',') if s.strip()]
 
diff --git a/hail/python/pinned-requirements.txt b/hail/python/pinned-requirements.txt
index 56a6d00cb26..edd6d15b216 100644
--- a/hail/python/pinned-requirements.txt
+++ b/hail/python/pinned-requirements.txt
@@ -8,7 +8,7 @@ aiodns==2.0.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-aiohttp==3.9.1
+aiohttp==3.9.3
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
@@ -20,7 +20,7 @@ async-timeout==4.0.3
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   aiohttp
-attrs==23.1.0
+attrs==23.2.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   aiohttp
@@ -30,7 +30,7 @@ azure-common==1.1.28
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   azure-mgmt-storage
-azure-core==1.29.5
+azure-core==1.29.7
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   azure-identity
@@ -53,13 +53,13 @@ azure-storage-blob==12.19.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-bokeh==3.3.2
+bokeh==3.3.4
     # via -r hail/hail/python/requirements.txt
-boto3==1.33.1
+boto3==1.34.32
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-botocore==1.33.1
+botocore==1.34.32
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
@@ -93,7 +93,7 @@ commonmark==0.9.1
     #   rich
 contourpy==1.2.0
     # via bokeh
-cryptography==41.0.7
+cryptography==42.0.2
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   azure-identity
@@ -104,17 +104,17 @@ decorator==4.4.2
     # via -r hail/hail/python/requirements.txt
 deprecated==1.2.14
     # via -r hail/hail/python/requirements.txt
-dill==0.3.7
+dill==0.3.8
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-frozenlist==1.4.0
+frozenlist==1.4.1
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
     #   aiohttp
     #   aiosignal
-google-auth==2.23.4
+google-auth==2.27.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
@@ -152,14 +152,14 @@ jproperties==2.1.1
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-markupsafe==2.1.3
+markupsafe==2.1.4
     # via jinja2
-msal==1.25.0
+msal==1.26.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   azure-identity
     #   msal-extensions
-msal-extensions==1.0.0
+msal-extensions==1.1.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   azure-identity
@@ -172,11 +172,11 @@ multidict==6.0.4
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   aiohttp
     #   yarl
-nest-asyncio==1.5.8
+nest-asyncio==1.6.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-numpy==1.26.2
+numpy==1.26.3
     # via
     #   -r hail/hail/python/requirements.txt
     #   bokeh
@@ -187,21 +187,23 @@ oauthlib==3.2.2
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   requests-oauthlib
-orjson==3.9.10
+orjson==3.9.12
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
 packaging==23.2
     # via
+    #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   bokeh
+    #   msal-extensions
     #   plotly
-pandas==2.1.4
+pandas==2.2.0
     # via
     #   -r hail/hail/python/requirements.txt
     #   bokeh
 parsimonious==0.10.0
     # via -r hail/hail/python/requirements.txt
-pillow==10.1.0
+pillow==10.2.0
     # via bokeh
 plotly==5.18.0
     # via -r hail/hail/python/requirements.txt
@@ -254,14 +256,14 @@ python-json-logger==2.0.7
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-pytz==2023.3.post1
+pytz==2023.4
     # via pandas
 pyyaml==6.0.1
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
     #   bokeh
-regex==2023.10.3
+regex==2023.12.25
     # via parsimonious
 requests==2.31.0
     # via
@@ -284,7 +286,7 @@ rsa==4.9
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   google-auth
-s3transfer==0.8.0
+s3transfer==0.10.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   boto3
@@ -313,14 +315,14 @@ typer==0.9.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   -r hail/hail/python/hailtop/requirements.txt
-typing-extensions==4.8.0
+typing-extensions==4.9.0
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   azure-core
     #   azure-storage-blob
     #   janus
     #   typer
-tzdata==2023.3
+tzdata==2023.4
     # via pandas
 urllib3==1.26.18
     # via
@@ -335,7 +337,7 @@ wrapt==1.16.0
     # via deprecated
 xyzservices==2023.10.1
     # via bokeh
-yarl==1.9.3
+yarl==1.9.4
     # via
     #   -c hail/hail/python/hailtop/pinned-requirements.txt
     #   aiohttp
diff --git a/web_common/pinned-requirements.txt b/web_common/pinned-requirements.txt
index de64e419ff1..308986c378b 100644
--- a/web_common/pinned-requirements.txt
+++ b/web_common/pinned-requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile --output-file=hail/web_common/pinned-requirements.txt hail/web_common/requirements.txt
 #
-aiohttp==3.9.1
+aiohttp==3.9.3
     # via
     #   -c hail/web_common/../gear/pinned-requirements.txt
     #   -c hail/web_common/../hail/python/dev/pinned-requirements.txt
@@ -24,13 +24,13 @@ async-timeout==4.0.3
     #   -c hail/web_common/../hail/python/dev/pinned-requirements.txt
     #   -c hail/web_common/../hail/python/pinned-requirements.txt
     #   aiohttp
-attrs==23.1.0
+attrs==23.2.0
     # via
     #   -c hail/web_common/../gear/pinned-requirements.txt
     #   -c hail/web_common/../hail/python/dev/pinned-requirements.txt
     #   -c hail/web_common/../hail/python/pinned-requirements.txt
     #   aiohttp
-frozenlist==1.4.0
+frozenlist==1.4.1
     # via
     #   -c hail/web_common/../gear/pinned-requirements.txt
     #   -c hail/web_common/../hail/python/dev/pinned-requirements.txt
@@ -49,9 +49,9 @@ jinja2==3.1.3
     #   -c hail/web_common/../hail/python/pinned-requirements.txt
     #   -r hail/web_common/requirements.txt
     #   aiohttp-jinja2
-libsass==0.22.0
+libsass==0.23.0
     # via -r hail/web_common/requirements.txt
-markupsafe==2.1.3
+markupsafe==2.1.4
     # via
     #   -c hail/web_common/../hail/python/dev/pinned-requirements.txt
     #   -c hail/web_common/../hail/python/pinned-requirements.txt
@@ -63,7 +63,7 @@ multidict==6.0.4
     #   -c hail/web_common/../hail/python/pinned-requirements.txt
     #   aiohttp
     #   yarl
-yarl==1.9.3
+yarl==1.9.4
     # via
     #   -c hail/web_common/../gear/pinned-requirements.txt
     #   -c hail/web_common/../hail/python/dev/pinned-requirements.txt

From d4679ebd8824777826a571edbccb687018670c4d Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Fri, 2 Feb 2024 18:37:08 -0500
Subject: [PATCH 23/26] [query] Use valid globals reference in MWZJ and TABK
 (#14246)

CHANGELOG: Fix a bug, introduced in 0.2.114, in which
`Table.multi_way_zip_join` and `Table.aggregate_by_key` could throw
"NoSuchElementException: Ref with name `__iruid_...`" when one or more
of the tables had a number of partitions substantially different from
the desired number of output partitions.

Fixes https://github.com/hail-is/hail/issues/14245.

In both MultiWayZipJoin and TableAggregateByKey, we repartition the
child but neglect to use the new globals `Ref` from the repartitioned
child. As long as `repartitionNoShuffle` does not create a new
TableStage with new globals, this is fine, but that is not, in general,
true. It seems that recently, in lowered backends, when the repartition
cost is deemed "high" we generate a fresh TableStage with a fresh
globals ref.
---
 hail/python/test/hail/table/test_table.py     | 17 +++++
 .../hail/expr/ir/lowering/LowerTableIR.scala  | 65 ++++++++++---------
 2 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/hail/python/test/hail/table/test_table.py b/hail/python/test/hail/table/test_table.py
index ed2c5d6bc09..f76659aa8db 100644
--- a/hail/python/test/hail/table/test_table.py
+++ b/hail/python/test/hail/table/test_table.py
@@ -668,6 +668,23 @@ def test_multi_way_zip_join_key_downcast2(self):
         ht = hl.Table.multi_way_zip_join(vcfs, 'data', 'new_globals')
         assert exp_count == ht._force_count()
 
+    def test_multi_way_zip_join_highly_unbalanced_partitions__issue_14245(self):
+        def import_vcf(file: str, partitions: int):
+            return (
+                hl.import_vcf(file, force_bgz=True, reference_genome='GRCh38', min_partitions=partitions)
+                .rows()
+                .select()
+            )
+
+        hl.Table.multi_way_zip_join(
+            [
+                import_vcf(resource('gvcfs/HG00096.g.vcf.gz'), 100),
+                import_vcf(resource('gvcfs/HG00268.g.vcf.gz'), 1),
+            ],
+            'data',
+            'new_globals',
+        ).write(new_temp_file(extension='ht'))
+
     def test_index_maintains_count(self):
         t1 = hl.Table.parallelize(
             [{'a': 'foo', 'b': 1}, {'a': 'bar', 'b': 2}, {'a': 'bar', 'b': 2}],
diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala
index b1e5451131c..60212f0de1d 100644
--- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala
@@ -1197,39 +1197,39 @@ object LowerTableIR {
 
       case TableAggregateByKey(child, expr) =>
         val loweredChild = lower(child)
-
-        loweredChild.repartitionNoShuffle(
+        val repartitioned = loweredChild.repartitionNoShuffle(
           ctx,
           loweredChild.partitioner.coarsen(child.typ.key.length).strictify(),
         )
-          .mapPartition(Some(child.typ.key)) { partition =>
-            Let(
-              FastSeq("global" -> loweredChild.globals),
-              mapIR(StreamGroupByKey(partition, child.typ.key, missingEqual = true)) { groupRef =>
-                StreamAgg(
-                  groupRef,
-                  "row",
-                  bindIRs(
-                    ArrayRef(
-                      ApplyAggOp(
-                        FastSeq(I32(1)),
-                        FastSeq(SelectFields(Ref("row", child.typ.rowType), child.typ.key)),
-                        AggSignature(Take(), FastSeq(TInt32), FastSeq(child.typ.keyType)),
-                      ),
-                      I32(0),
-                    ), // FIXME: would prefer a First() agg op
-                    expr,
-                  ) { case Seq(key, value) =>
-                    MakeStruct(child.typ.key.map(k =>
-                      (k, GetField(key, k))
-                    ) ++ expr.typ.asInstanceOf[TStruct].fieldNames.map { f =>
-                      (f, GetField(value, f))
-                    })
-                  },
-                )
-              },
-            )
-          }
+
+        repartitioned.mapPartition(Some(child.typ.key)) { partition =>
+          Let(
+            FastSeq("global" -> repartitioned.globals),
+            mapIR(StreamGroupByKey(partition, child.typ.key, missingEqual = true)) { groupRef =>
+              StreamAgg(
+                groupRef,
+                "row",
+                bindIRs(
+                  ArrayRef(
+                    ApplyAggOp(
+                      FastSeq(I32(1)),
+                      FastSeq(SelectFields(Ref("row", child.typ.rowType), child.typ.key)),
+                      AggSignature(Take(), FastSeq(TInt32), FastSeq(child.typ.keyType)),
+                    ),
+                    I32(0),
+                  ), // FIXME: would prefer a First() agg op
+                  expr,
+                ) { case Seq(key, value) =>
+                  MakeStruct(child.typ.key.map(k =>
+                    (k, GetField(key, k))
+                  ) ++ expr.typ.asInstanceOf[TStruct].fieldNames.map { f =>
+                    (f, GetField(value, f))
+                  })
+                },
+              )
+            },
+          )
+        }
 
       case TableDistinct(child) =>
         val loweredChild = lower(child)
@@ -2155,7 +2155,10 @@ object LowerTableIR {
         )
         val repartitioned = lowered.map(_.repartitionNoShuffle(ctx, newPartitioner))
         val newGlobals = MakeStruct(FastSeq(
-          globalName -> MakeArray(lowered.map(_.globals), TArray(lowered.head.globalType))
+          globalName -> MakeArray(
+            repartitioned.map(_.globals),
+            TArray(repartitioned.head.globalType),
+          )
         ))
         val globalsRef = Ref(genUID(), newGlobals.typ)
 

From 0b929233e1eddecd3399ac328330c59279344bf8 Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Fri, 2 Feb 2024 19:15:48 -0500
Subject: [PATCH 24/26] [query] support importing empty JSON objects (#14202)

@patrick-schultz I'm not sure if this makes sense or not, but I observed
it while working on something else. It seems weird but acceptable to
import an empty dictionary as any struct. Does this seem reasonable to
you? How have we avoided this bug for so long?

I'm not familiar enough with this code to know how to simply reproduce
the bug and add a corresponding test. Thoughts?
---
 .../scala/is/hail/expr/AnnotationImpex.scala  | 10 +++++---
 .../scala/is/hail/methods/ExprSuite.scala     | 25 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala b/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala
index 089914e097e..aed45885ccf 100644
--- a/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala
+++ b/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala
@@ -263,11 +263,15 @@ object JSONAnnotationImpex {
         if (t.size == 0)
           Annotation.empty
         else {
-          val annotationSize =
-            if (padNulls) t.size
-            else jfields.map { case (name, _) =>
+          val annotationSize = if (padNulls) {
+            t.size
+          } else if (jfields.size == 0) {
+            0
+          } else {
+            jfields.map { case (name, _) =>
               t.selfField(name).map(_.index).getOrElse(-1)
             }.max + 1
+          }
           val a = Array.fill[Any](annotationSize)(null)
 
           for ((name, jv2) <- jfields) {
diff --git a/hail/src/test/scala/is/hail/methods/ExprSuite.scala b/hail/src/test/scala/is/hail/methods/ExprSuite.scala
index 96a84a94d85..35541ce4a3e 100644
--- a/hail/src/test/scala/is/hail/methods/ExprSuite.scala
+++ b/hail/src/test/scala/is/hail/methods/ExprSuite.scala
@@ -6,12 +6,13 @@ import is.hail.check.Prop._
 import is.hail.check.Properties
 import is.hail.expr._
 import is.hail.expr.ir.IRParser
-import is.hail.types.virtual.{TInt32, Type}
+import is.hail.types.virtual._
 import is.hail.utils.StringEscapeUtils._
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 import org.testng.annotations.Test
+import org.apache.spark.sql.Row
 
 class ExprSuite extends HailSuite {
 
@@ -70,6 +71,28 @@ class ExprSuite extends HailSuite {
     p.check()
   }
 
+  @Test def testImportEmptyJSONObjectAsStruct(): Unit =
+    assert(JSONAnnotationImpex.importAnnotation(parse("{}"), TStruct()) == Row())
+
+  @Test def testExportEmptyJSONObjectAsStruct(): Unit =
+    assert(compact(render(JSONAnnotationImpex.exportAnnotation(Row(), TStruct()))) == "{}")
+
+  @Test def testRoundTripEmptyJSONObject(): Unit = {
+    val actual = JSONAnnotationImpex.exportAnnotation(
+      JSONAnnotationImpex.importAnnotation(parse("{}"), TStruct()),
+      TStruct(),
+    )
+    assert(compact(render(actual)) == "{}")
+  }
+
+  @Test def testRoundTripEmptyStruct(): Unit = {
+    val actual = JSONAnnotationImpex.importAnnotation(
+      JSONAnnotationImpex.exportAnnotation(Row(), TStruct()),
+      TStruct(),
+    )
+    assert(actual == Row())
+  }
+
   @Test def testImpexes(): Unit = {
 
     val g = for {

From 7a418eb664f08154339a8f9e0a11e418a8f8f125 Mon Sep 17 00:00:00 2001
From: Will Tyler <will.tyler11@gmail.com>
Date: Sat, 3 Feb 2024 00:59:35 +0000
Subject: [PATCH 25/26] Fix an error in the MatrixTable tutorial (#14239)

### Description

In this pull request, I fix an error in the MatrixTable tutorial. The
tutorial shows some genotype data and erroneously states that all the
genotypes that are shown are homozygous reference (0/0). In fact, there
are also some heterozygous (0/1) and homozygous alternate (1/1)
genotypes in the displayed data. In this pull request, I remove the
erroneous statement.

### Testing

I ran the notebook to confirm that the notebook displays a mix of
genotypes, not just homozygous reference. You can view the erroneous
version of the tutorial
[here](https://hail.is/docs/0.2/tutorials/07-matrixtable.html#MatrixTable-operations).
---
 hail/python/hail/docs/tutorials/07-matrixtable.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hail/python/hail/docs/tutorials/07-matrixtable.ipynb b/hail/python/hail/docs/tutorials/07-matrixtable.ipynb
index d0d005b24b0..aebc842473f 100644
--- a/hail/python/hail/docs/tutorials/07-matrixtable.ipynb
+++ b/hail/python/hail/docs/tutorials/07-matrixtable.ipynb
@@ -287,7 +287,7 @@
     }
    },
    "source": [
-    "All homozygous reference, which is not surprising.  Let's look at the distribution of genotype calls:"
+    "Let's look at the distribution of genotype calls:"
    ]
   },
   {
@@ -443,7 +443,7 @@
  "metadata": {
   "celltoolbar": "Slideshow",
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -457,9 +457,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }

From 671deef8f156f62227f5061485a40d001b29b61b Mon Sep 17 00:00:00 2001
From: Dan King <daniel.zidan.king@gmail.com>
Date: Tue, 6 Feb 2024 11:24:38 -0500
Subject: [PATCH 26/26] [fs] support hfs.ls on a bucket (#14176)

Teaches `hfs.ls('gs://bucket/')` to list the files and directories at
the top-level of the bucket.

In `main` that command raises because this line of `_ls_no_glob` raises:

```python3
maybe_sb_and_t, maybe_contents = await asyncio.gather(
    self._size_bytes_and_time_modified_or_none(path), ls_as_dir()
)
```

In particular, `statfile` raises a cloud-specific, esoteric error about
a malformed URL or empty object names:

```python3
async def _size_bytes_and_time_modified_or_none(self, path: str) -> Optional[Tuple[int, float]]:
    try:
        # Hadoop semantics: creation time is used if the object has no notion of last modification time.
        file_status = await self.afs.statfile(path)
        return (await file_status.size(), file_status.time_modified().timestamp())
    except FileNotFoundError:
        return None
```

I decided to add a sub-class of `FileNotFoundError` which is
self-describing: `IsABucketError`.

I changed most methods to raise that error when given a bucket URL. The
two interesting cases:

1. `isdir`. This raises an error but I could also see this returning
`True`. A bucket is like a directory whose path/name is empty.

2. `isfile`. This returns False but I could also see this raising an
error. This just seems convenient, we know the bucket is not a file so
we should say so.

---

Apparently `hfs.ls` had no current tests because the globbing system
doesn't work with Azure https:// URLs. I fixed it to use
`AsyncFSURL.with_new_path_component` which is resilient to Azure https
weirdness. However, I had to change `with_new_path_component` to treat
an empty path in a special way. I wanted this to hold:

```
actual = str(afs.parse_url('gs://bucket').with_new_path_component('bar'))
expected = 'gs://bucket/bar'
assert actual == expected
```

But `with_new_path_component` interacts badly with
`GoogleAsyncFSURL.__str__` to return this:

```
'gs://bucket//bar'
```
---
 hail/python/hail/backend/local_backend.py     |  7 +-
 hail/python/hail/backend/service_backend.py   | 10 ++-
 hail/python/hailtop/aiocloud/aioaws/fs.py     | 47 ++++++++----
 hail/python/hailtop/aiocloud/aioazure/fs.py   | 31 ++++++--
 .../aiogoogle/client/storage_client.py        | 47 +++++++-----
 hail/python/hailtop/aiotools/__init__.py      |  2 +
 hail/python/hailtop/aiotools/fs/__init__.py   |  3 +-
 hail/python/hailtop/aiotools/fs/exceptions.py |  4 +
 hail/python/hailtop/aiotools/fs/fs.py         | 26 ++++++-
 hail/python/hailtop/aiotools/local_fs.py      |  8 +-
 hail/python/hailtop/aiotools/router_fs.py     |  4 +-
 hail/python/hailtop/fs/router_fs.py           | 74 +++++++++++--------
 .../test/hailtop/inter_cloud/test_fs.py       | 52 ++++++++++++-
 13 files changed, 230 insertions(+), 85 deletions(-)

diff --git a/hail/python/hail/backend/local_backend.py b/hail/python/hail/backend/local_backend.py
index 595bd04e42e..4a1131970a1 100644
--- a/hail/python/hail/backend/local_backend.py
+++ b/hail/python/hail/backend/local_backend.py
@@ -1,4 +1,5 @@
 from typing import Optional, Union, Tuple, List
+from contextlib import ExitStack
 import os
 import sys
 
@@ -31,6 +32,7 @@ def __init__(
         gcs_requester_pays_project: Optional[str] = None,
         gcs_requester_pays_buckets: Optional[str] = None,
     ):
+        self._exit_stack = ExitStack()
         assert gcs_requester_pays_project is not None or gcs_requester_pays_buckets is None
 
         spark_home = find_spark_home()
@@ -59,6 +61,7 @@ def __init__(
             die_on_exit=True,
         )
         self._gateway = JavaGateway(gateway_parameters=GatewayParameters(port=port, auto_convert=True))
+        self._exit_stack.callback(self._gateway.shutdown)
 
         hail_package = getattr(self._gateway.jvm, 'is').hail
 
@@ -75,7 +78,7 @@ def __init__(
 
         super(LocalBackend, self).__init__(self._gateway.jvm, jbackend, jhc)
 
-        self._fs = RouterFS()
+        self._fs = self._exit_stack.enter_context(RouterFS())
         self._logger = None
 
         self._initialize_flags({})
@@ -108,7 +111,7 @@ def register_ir_function(
 
     def stop(self):
         super().stop()
-        self._gateway.shutdown()
+        self._exit_stack.close()
         uninstall_exception_handler()
 
     @property
diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py
index 1fb95c30115..d7e86bb0433 100644
--- a/hail/python/hail/backend/service_backend.py
+++ b/hail/python/hail/backend/service_backend.py
@@ -207,6 +207,7 @@ async def create(
         gcs_requester_pays_configuration: Optional[GCSRequesterPaysConfiguration] = None,
         gcs_bucket_allow_list: Optional[List[str]] = None,
     ):
+        async_exit_stack = AsyncExitStack()
         billing_project = configuration_of(ConfigVariable.BATCH_BILLING_PROJECT, billing_project, None)
         if billing_project is None:
             raise ValueError(
@@ -221,9 +222,11 @@ async def create(
             gcs_kwargs={'gcs_requester_pays_configuration': gcs_requester_pays_configuration},
             gcs_bucket_allow_list=gcs_bucket_allow_list,
         )
+        async_exit_stack.push_async_callback(async_fs.close)
         sync_fs = RouterFS(async_fs)
         if batch_client is None:
             batch_client = await BatchClient.create(billing_project, _token=credentials_token)
+            async_exit_stack.push_async_callback(batch_client.close)
         batch_attributes: Dict[str, str] = dict()
         remote_tmpdir = get_remote_tmpdir('ServiceBackend', remote_tmpdir=remote_tmpdir)
 
@@ -288,6 +291,7 @@ async def create(
             worker_cores=worker_cores,
             worker_memory=worker_memory,
             regions=regions,
+            async_exit_stack=async_exit_stack,
         )
         sb._initialize_flags(flags)
         return sb
@@ -308,6 +312,7 @@ def __init__(
         worker_cores: Optional[Union[int, str]],
         worker_memory: Optional[str],
         regions: List[str],
+        async_exit_stack: AsyncExitStack,
     ):
         super(ServiceBackend, self).__init__()
         self.billing_project = billing_project
@@ -329,6 +334,7 @@ def __init__(
         self.regions = regions
 
         self._batch: Batch = self._create_batch()
+        self._async_exit_stack = async_exit_stack
 
     def _create_batch(self) -> Batch:
         return self._batch_client.create_batch(attributes=self.batch_attributes)
@@ -362,9 +368,7 @@ def stop(self):
         hail_event_loop().run_until_complete(self._stop())
 
     async def _stop(self):
-        async with AsyncExitStack() as stack:
-            stack.push_async_callback(self._async_fs.close)
-            stack.push_async_callback(self._batch_client.close)
+        await self._async_exit_stack.aclose()
         self.functions = []
         self._registered_ir_function_names = set()
 
diff --git a/hail/python/hailtop/aiocloud/aioaws/fs.py b/hail/python/hailtop/aiocloud/aioaws/fs.py
index f2e369c5cd4..8c22c851692 100644
--- a/hail/python/hailtop/aiocloud/aioaws/fs.py
+++ b/hail/python/hailtop/aiocloud/aioaws/fs.py
@@ -35,6 +35,7 @@
     AsyncFSURL,
     MultiPartCreate,
     FileAndDirectoryError,
+    IsABucketError,
 )
 from hailtop.aiotools.fs.exceptions import UnexpectedEOFError
 from hailtop.aiotools.fs.stream import (
@@ -325,6 +326,9 @@ def __init__(self, bucket: str, path: str):
         self._bucket = bucket
         self._path = path
 
+    def __repr__(self):
+        return f'S3AsyncFSURL({self._bucket}, {self._path})'
+
     @property
     def bucket_parts(self) -> List[str]:
         return [self._bucket]
@@ -344,6 +348,9 @@ def scheme(self) -> str:
     def with_path(self, path) -> 'S3AsyncFSURL':
         return S3AsyncFSURL(self._bucket, path)
 
+    def with_root_path(self) -> 'S3AsyncFSURL':
+        return self.with_path('')
+
     def __str__(self) -> str:
         return f's3://{self._bucket}/{self._path}'
 
@@ -399,8 +406,11 @@ def valid_url(url: str) -> bool:
         return url.startswith('s3://')
 
     @staticmethod
-    def parse_url(url: str) -> S3AsyncFSURL:
-        return S3AsyncFSURL(*S3AsyncFS.get_bucket_and_name(url))
+    def parse_url(url: str, *, error_if_bucket: bool = False) -> S3AsyncFSURL:
+        fsurl = S3AsyncFSURL(*S3AsyncFS.get_bucket_and_name(url))
+        if error_if_bucket and fsurl._path == '':
+            raise IsABucketError
+        return fsurl
 
     @staticmethod
     def get_bucket_and_name(url: str) -> Tuple[str, str]:
@@ -423,22 +433,24 @@ def get_bucket_and_name(url: str) -> Tuple[str, str]:
         return (bucket, name)
 
     async def open(self, url: str) -> ReadableStream:
-        bucket, name = self.get_bucket_and_name(url)
+        fsurl = self.parse_url(url, error_if_bucket=True)
         try:
-            resp = await blocking_to_async(self._thread_pool, self._s3.get_object, Bucket=bucket, Key=name)
+            resp = await blocking_to_async(
+                self._thread_pool, self._s3.get_object, Bucket=fsurl._bucket, Key=fsurl._path
+            )
             return blocking_readable_stream_to_async(self._thread_pool, cast(BinaryIO, resp['Body']))
         except self._s3.exceptions.NoSuchKey as e:
             raise FileNotFoundError(url) from e
 
     async def _open_from(self, url: str, start: int, *, length: Optional[int] = None) -> ReadableStream:
-        bucket, name = self.get_bucket_and_name(url)
+        fsurl = self.parse_url(url, error_if_bucket=True)
         range_str = f'bytes={start}-'
         if length is not None:
             assert length >= 1
             range_str += str(start + length - 1)
         try:
             resp = await blocking_to_async(
-                self._thread_pool, self._s3.get_object, Bucket=bucket, Key=name, Range=range_str
+                self._thread_pool, self._s3.get_object, Bucket=fsurl._bucket, Key=fsurl._path, Range=range_str
             )
             return blocking_readable_stream_to_async(self._thread_pool, cast(BinaryIO, resp['Body']))
         except self._s3.exceptions.NoSuchKey as e:
@@ -489,12 +501,12 @@ async def create(self, url: str, *, retry_writes: bool = True) -> S3CreateManage
         # interface.  This has the disadvantage that the read must
         # complete before the write can begin (unlike the current
         # code, that copies 128MB parts in 256KB chunks).
-        bucket, name = self.get_bucket_and_name(url)
-        return S3CreateManager(self, bucket, name)
+        fsurl = self.parse_url(url, error_if_bucket=True)
+        return S3CreateManager(self, fsurl._bucket, fsurl._path)
 
     async def multi_part_create(self, sema: asyncio.Semaphore, url: str, num_parts: int) -> MultiPartCreate:
-        bucket, name = self.get_bucket_and_name(url)
-        return S3MultiPartCreate(sema, self, bucket, name, num_parts)
+        fsurl = self.parse_url(url, error_if_bucket=True)
+        return S3MultiPartCreate(sema, self, fsurl._bucket, fsurl._path, num_parts)
 
     async def mkdir(self, url: str) -> None:
         pass
@@ -503,9 +515,11 @@ async def makedirs(self, url: str, exist_ok: bool = False) -> None:
         pass
 
     async def statfile(self, url: str) -> FileStatus:
-        bucket, name = self.get_bucket_and_name(url)
+        fsurl = self.parse_url(url, error_if_bucket=True)
         try:
-            resp = await blocking_to_async(self._thread_pool, self._s3.head_object, Bucket=bucket, Key=name)
+            resp = await blocking_to_async(
+                self._thread_pool, self._s3.head_object, Bucket=fsurl._bucket, Key=fsurl._path
+            )
             return S3HeadObjectFileStatus(resp, url)
         except botocore.exceptions.ClientError as e:
             if e.response['ResponseMetadata']['HTTPStatusCode'] == 404:
@@ -579,8 +593,10 @@ async def staturl(self, url: str) -> str:
         return await self._staturl_parallel_isfile_isdir(url)
 
     async def isfile(self, url: str) -> bool:
+        bucket, name = self.get_bucket_and_name(url)
+        if name == '':
+            return False
         try:
-            bucket, name = self.get_bucket_and_name(url)
             await blocking_to_async(self._thread_pool, self._s3.head_object, Bucket=bucket, Key=name)
             return True
         except botocore.exceptions.ClientError as e:
@@ -589,6 +605,7 @@ async def isfile(self, url: str) -> bool:
             raise e
 
     async def isdir(self, url: str) -> bool:
+        self.parse_url(url, error_if_bucket=True)
         try:
             async for _ in await self.listfiles(url, recursive=True):
                 return True
@@ -597,9 +614,9 @@ async def isdir(self, url: str) -> bool:
             return False
 
     async def remove(self, url: str) -> None:
+        fsurl = self.parse_url(url, error_if_bucket=True)
         try:
-            bucket, name = self.get_bucket_and_name(url)
-            await blocking_to_async(self._thread_pool, self._s3.delete_object, Bucket=bucket, Key=name)
+            await blocking_to_async(self._thread_pool, self._s3.delete_object, Bucket=fsurl._bucket, Key=fsurl._path)
         except self._s3.exceptions.NoSuchKey as e:
             raise FileNotFoundError(url) from e
 
diff --git a/hail/python/hailtop/aiocloud/aioazure/fs.py b/hail/python/hailtop/aiocloud/aioazure/fs.py
index 780d2e73eca..5023e56e096 100644
--- a/hail/python/hailtop/aiocloud/aioazure/fs.py
+++ b/hail/python/hailtop/aiocloud/aioazure/fs.py
@@ -30,6 +30,7 @@
     FileStatus,
     FileAndDirectoryError,
     UnexpectedEOFError,
+    IsABucketError,
 )
 
 from .credentials import AzureCredentials
@@ -298,6 +299,9 @@ def __init__(self, account: str, container: str, path: str, query: Optional[str]
         self._path = path
         self._query = query
 
+    def __repr__(self):
+        return f'AzureAsyncFSURL({self._account}, {self._container}, {self._path}, {self._query})'
+
     @property
     def bucket_parts(self) -> List[str]:
         return [self._account, self._container]
@@ -326,6 +330,9 @@ def base(self) -> str:
     def with_path(self, path) -> 'AzureAsyncFSURL':
         return self.__class__(self._account, self._container, path, self._query)
 
+    def with_root_path(self) -> 'AzureAsyncFSURL':
+        return self.with_path('')
+
     def __str__(self) -> str:
         return self.base if not self._query else f'{self.base}?{self._query}'
 
@@ -440,7 +447,14 @@ async def generate_sas_token(
         return token
 
     @staticmethod
-    def parse_url(url: str) -> AzureAsyncFSURL:
+    def parse_url(url: str, *, error_if_bucket: bool = False) -> AzureAsyncFSURL:
+        fsurl = AzureAsyncFS._parse_url(url)
+        if error_if_bucket and fsurl._path == '':
+            raise IsABucketError
+        return fsurl
+
+    @staticmethod
+    def _parse_url(url: str) -> AzureAsyncFSURL:
         colon_index = url.find(':')
         if colon_index == -1:
             raise ValueError(f'invalid URL: {url}')
@@ -513,9 +527,10 @@ def get_container_client(self, url: AzureAsyncFSURL) -> ContainerClient:
 
     @handle_public_access_error
     async def open(self, url: str) -> ReadableStream:
+        parsed_url = self.parse_url(url, error_if_bucket=True)
         if not await self.exists(url):
             raise FileNotFoundError
-        client = self.get_blob_client(self.parse_url(url))
+        client = self.get_blob_client(parsed_url)
         return AzureReadableStream(client, url)
 
     @handle_public_access_error
@@ -523,11 +538,12 @@ async def _open_from(self, url: str, start: int, *, length: Optional[int] = None
         assert length is None or length >= 1
         if not await self.exists(url):
             raise FileNotFoundError
-        client = self.get_blob_client(self.parse_url(url))
+        client = self.get_blob_client(self.parse_url(url, error_if_bucket=True))
         return AzureReadableStream(client, url, offset=start, length=length)
 
     async def create(self, url: str, *, retry_writes: bool = True) -> AsyncContextManager[WritableStream]:  # pylint: disable=unused-argument
-        return AzureCreateManager(self.get_blob_client(self.parse_url(url)))
+        parsed_url = self.parse_url(url, error_if_bucket=True)
+        return AzureCreateManager(self.get_blob_client(parsed_url))
 
     async def multi_part_create(self, sema: asyncio.Semaphore, url: str, num_parts: int) -> MultiPartCreate:
         client = self.get_blob_client(self.parse_url(url))
@@ -545,7 +561,7 @@ async def isfile(self, url: str) -> bool:
 
     @handle_public_access_error
     async def isdir(self, url: str) -> bool:
-        fs_url = self.parse_url(url)
+        fs_url = self.parse_url(url, error_if_bucket=True)
         assert not fs_url.path or fs_url.path.endswith('/'), fs_url.path
         client = self.get_container_client(fs_url)
         async for _ in client.walk_blobs(name_starts_with=fs_url.path, include=['metadata'], delimiter='/'):
@@ -560,8 +576,8 @@ async def makedirs(self, url: str, exist_ok: bool = False) -> None:
 
     @handle_public_access_error
     async def statfile(self, url: str) -> FileStatus:
+        parsed_url = self.parse_url(url, error_if_bucket=True)
         try:
-            parsed_url = self.parse_url(url)
             blob_props = await self.get_blob_client(parsed_url).get_blob_properties()
             return AzureFileStatus(blob_props, parsed_url)
         except azure.core.exceptions.ResourceNotFoundError as e:
@@ -639,7 +655,8 @@ async def staturl(self, url: str) -> str:
 
     async def remove(self, url: str) -> None:
         try:
-            await self.get_blob_client(self.parse_url(url)).delete_blob()
+            parsed_url = self.parse_url(url, error_if_bucket=True)
+            await self.get_blob_client(parsed_url).delete_blob()
         except azure.core.exceptions.ResourceNotFoundError as e:
             raise FileNotFoundError(url) from e
 
diff --git a/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py b/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py
index 8055cc2a71f..b77d2a8c32d 100644
--- a/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py
+++ b/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py
@@ -21,6 +21,7 @@
     FileAndDirectoryError,
     MultiPartCreate,
     UnexpectedEOFError,
+    IsABucketError,
 )
 from hailtop.aiotools import FeedableAsyncIterable, WriteBuffer
 
@@ -578,6 +579,9 @@ def __init__(self, bucket: str, path: str):
         self._bucket = bucket
         self._path = path
 
+    def __repr__(self):
+        return f'GoogleStorageAsyncFSURL({self._bucket}, {self._path})'
+
     @property
     def bucket_parts(self) -> List[str]:
         return [self._bucket]
@@ -597,6 +601,9 @@ def scheme(self) -> str:
     def with_path(self, path) -> 'GoogleStorageAsyncFSURL':
         return GoogleStorageAsyncFSURL(self._bucket, path)
 
+    def with_root_path(self) -> 'GoogleStorageAsyncFSURL':
+        return self.with_path('')
+
     def __str__(self) -> str:
         return f'gs://{self._bucket}/{self._path}'
 
@@ -645,8 +652,11 @@ def valid_url(url: str) -> bool:
         return url.startswith('gs://')
 
     @staticmethod
-    def parse_url(url: str) -> GoogleStorageAsyncFSURL:
-        return GoogleStorageAsyncFSURL(*GoogleStorageAsyncFS.get_bucket_and_name(url))
+    def parse_url(url: str, *, error_if_bucket: bool = False) -> GoogleStorageAsyncFSURL:
+        fsurl = GoogleStorageAsyncFSURL(*GoogleStorageAsyncFS.get_bucket_and_name(url))
+        if error_if_bucket and fsurl._path == '':
+            raise IsABucketError
+        return fsurl
 
     @staticmethod
     def get_bucket_and_name(url: str) -> Tuple[str, str]:
@@ -673,25 +683,26 @@ def get_bucket_and_name(url: str) -> Tuple[str, str]:
         return (bucket, name)
 
     async def open(self, url: str) -> GetObjectStream:
-        bucket, name = self.get_bucket_and_name(url)
-        return await self._storage_client.get_object(bucket, name)
+        fsurl = self.parse_url(url, error_if_bucket=True)
+        return await self._storage_client.get_object(fsurl._bucket, fsurl._path)
 
     async def _open_from(self, url: str, start: int, *, length: Optional[int] = None) -> GetObjectStream:
-        bucket, name = self.get_bucket_and_name(url)
+        fsurl = self.parse_url(url, error_if_bucket=True)
         range_str = f'bytes={start}-'
         if length is not None:
             assert length >= 1
             range_str += str(start + length - 1)
-        return await self._storage_client.get_object(bucket, name, headers={'Range': range_str})
+        return await self._storage_client.get_object(fsurl._bucket, fsurl._path, headers={'Range': range_str})
 
     async def create(self, url: str, *, retry_writes: bool = True) -> WritableStream:
-        bucket, name = self.get_bucket_and_name(url)
+        fsurl = self.parse_url(url, error_if_bucket=True)
         params = {'uploadType': 'resumable' if retry_writes else 'media'}
-        return await self._storage_client.insert_object(bucket, name, params=params)
+        return await self._storage_client.insert_object(fsurl._bucket, fsurl._path, params=params)
 
     async def multi_part_create(
         self, sema: asyncio.Semaphore, url: str, num_parts: int
     ) -> GoogleStorageMultiPartCreate:
+        self.parse_url(url, error_if_bucket=True)
         return GoogleStorageMultiPartCreate(sema, self, url, num_parts)
 
     async def staturl(self, url: str) -> str:
@@ -705,8 +716,8 @@ async def makedirs(self, url: str, exist_ok: bool = False) -> None:
 
     async def statfile(self, url: str) -> GetObjectFileStatus:
         try:
-            bucket, name = self.get_bucket_and_name(url)
-            return GetObjectFileStatus(await self._storage_client.get_object_metadata(bucket, name), url)
+            fsurl = self.parse_url(url, error_if_bucket=True)
+            return GetObjectFileStatus(await self._storage_client.get_object_metadata(fsurl._bucket, fsurl._path), url)
         except aiohttp.ClientResponseError as e:
             if e.status == 404:
                 raise FileNotFoundError(url) from e
@@ -784,12 +795,12 @@ async def cons(first_entry, it) -> AsyncIterator[FileListEntry]:
 
     async def isfile(self, url: str) -> bool:
         try:
-            bucket, name = self.get_bucket_and_name(url)
+            fsurl = self.parse_url(url)
             # if name is empty, get_object_metadata behaves like list objects
             # the urls are the same modulo the object name
-            if not name:
+            if not fsurl._path:
                 return False
-            await self._storage_client.get_object_metadata(bucket, name)
+            await self._storage_client.get_object_metadata(fsurl._bucket, fsurl._path)
             return True
         except aiohttp.ClientResponseError as e:
             if e.status == 404:
@@ -797,10 +808,10 @@ async def isfile(self, url: str) -> bool:
             raise
 
     async def isdir(self, url: str) -> bool:
-        bucket, name = self.get_bucket_and_name(url)
-        assert not name or name.endswith('/'), name
-        params = {'prefix': name, 'delimiter': '/', 'includeTrailingDelimiter': 'true', 'maxResults': 1}
-        async for page in await self._storage_client.list_objects(bucket, params=params):
+        fsurl = self.parse_url(url, error_if_bucket=True)
+        assert not fsurl._path or fsurl.path.endswith('/'), fsurl._path
+        params = {'prefix': fsurl._path, 'delimiter': '/', 'includeTrailingDelimiter': 'true', 'maxResults': 1}
+        async for page in await self._storage_client.list_objects(fsurl._bucket, params=params):
             prefixes = page.get('prefixes')
             items = page.get('items')
             return bool(prefixes or items)
@@ -808,6 +819,8 @@ async def isdir(self, url: str) -> bool:
 
     async def remove(self, url: str) -> None:
         bucket, name = self.get_bucket_and_name(url)
+        if name == '':
+            raise IsABucketError(url)
         try:
             await self._storage_client.delete_object(bucket, name)
         except aiohttp.ClientResponseError as e:
diff --git a/hail/python/hailtop/aiotools/__init__.py b/hail/python/hailtop/aiotools/__init__.py
index 89ff43b3a0d..f926ee8868d 100644
--- a/hail/python/hailtop/aiotools/__init__.py
+++ b/hail/python/hailtop/aiotools/__init__.py
@@ -6,6 +6,7 @@
     MultiPartCreate,
     FileAndDirectoryError,
     UnexpectedEOFError,
+    IsABucketError,
     Copier,
     ReadableStream,
     WritableStream,
@@ -33,6 +34,7 @@
     'FileAndDirectoryError',
     'MultiPartCreate',
     'UnexpectedEOFError',
+    'IsABucketError',
     'WeightedSemaphore',
     'WriteBuffer',
     'Copier',
diff --git a/hail/python/hailtop/aiotools/fs/__init__.py b/hail/python/hailtop/aiotools/fs/__init__.py
index 5ceadb88136..8c941c1144f 100644
--- a/hail/python/hailtop/aiotools/fs/__init__.py
+++ b/hail/python/hailtop/aiotools/fs/__init__.py
@@ -1,6 +1,6 @@
 from .fs import AsyncFS, AsyncFSURL, AsyncFSFactory, MultiPartCreate, FileListEntry, FileStatus
 from .copier import Copier, CopyReport, SourceCopier, SourceReport, Transfer, TransferReport
-from .exceptions import UnexpectedEOFError, FileAndDirectoryError
+from .exceptions import UnexpectedEOFError, FileAndDirectoryError, IsABucketError
 from .stream import (
     ReadableStream,
     EmptyReadableStream,
@@ -29,4 +29,5 @@
     'FileStatus',
     'FileAndDirectoryError',
     'UnexpectedEOFError',
+    'IsABucketError',
 ]
diff --git a/hail/python/hailtop/aiotools/fs/exceptions.py b/hail/python/hailtop/aiotools/fs/exceptions.py
index d6c14c6a547..ed4a24b912d 100644
--- a/hail/python/hailtop/aiotools/fs/exceptions.py
+++ b/hail/python/hailtop/aiotools/fs/exceptions.py
@@ -4,3 +4,7 @@ class UnexpectedEOFError(Exception):
 
 class FileAndDirectoryError(Exception):
     pass
+
+
+class IsABucketError(FileNotFoundError):
+    pass
diff --git a/hail/python/hailtop/aiotools/fs/fs.py b/hail/python/hailtop/aiotools/fs/fs.py
index e7b74810b6a..5a0fed643d5 100644
--- a/hail/python/hailtop/aiotools/fs/fs.py
+++ b/hail/python/hailtop/aiotools/fs/fs.py
@@ -218,9 +218,27 @@ def scheme(self) -> str:
     def with_path(self, path) -> "AsyncFSURL":
         pass
 
-    def with_new_path_component(self, new_path_component) -> "AsyncFSURL":
-        prefix = self.path if self.path.endswith("/") else self.path + "/"
-        suffix = new_path_component[1:] if new_path_component.startswith("/") else new_path_component
+    @abc.abstractmethod
+    def with_root_path(self) -> "AsyncFSURL":
+        pass
+
+    def with_new_path_component(self, new_path_component: str) -> "AsyncFSURL":
+        if new_path_component == '':
+            raise ValueError('new path component must be non-empty')
+        return self.with_new_path_components(new_path_component)
+
+    def with_new_path_components(self, *parts: str) -> "AsyncFSURL":
+        if len(parts) == 0:
+            return self
+
+        prefix = self.path
+        if not prefix.endswith("/") and not prefix == '':
+            prefix += "/"
+
+        suffix = '/'.join(parts)
+        if suffix[0] == '/':
+            suffix = suffix[1:]
+
         return self.with_path(prefix + suffix)
 
     @abc.abstractmethod
@@ -250,7 +268,7 @@ def valid_url(url: str) -> bool:
 
     @staticmethod
     @abc.abstractmethod
-    def parse_url(url: str) -> AsyncFSURL:
+    def parse_url(url: str, *, error_if_bucket: bool = False) -> AsyncFSURL:
         pass
 
     @abc.abstractmethod
diff --git a/hail/python/hailtop/aiotools/local_fs.py b/hail/python/hailtop/aiotools/local_fs.py
index 0a1f5b33104..f1bbd98cfa4 100644
--- a/hail/python/hailtop/aiotools/local_fs.py
+++ b/hail/python/hailtop/aiotools/local_fs.py
@@ -114,6 +114,9 @@ class LocalAsyncFSURL(AsyncFSURL):
     def __init__(self, path: str):
         self._path = path
 
+    def __repr__(self) -> str:
+        return f'LocalAsyncFSURL({self.path})'
+
     @property
     def bucket_parts(self) -> List[str]:
         return []
@@ -133,6 +136,9 @@ def scheme(self) -> str:
     def with_path(self, path) -> 'LocalAsyncFSURL':
         return LocalAsyncFSURL(path)
 
+    def with_root_path(self) -> 'LocalAsyncFSURL':
+        return self.with_path('/')
+
     def __str__(self) -> str:
         return self._path
 
@@ -246,7 +252,7 @@ def valid_url(url: str) -> bool:
         return url.startswith('file://') or '://' not in url
 
     @staticmethod
-    def parse_url(url: str) -> LocalAsyncFSURL:
+    def parse_url(url: str, *, error_if_bucket: bool = False) -> LocalAsyncFSURL:
         return LocalAsyncFSURL(LocalAsyncFS._get_path(url))
 
     @staticmethod
diff --git a/hail/python/hailtop/aiotools/router_fs.py b/hail/python/hailtop/aiotools/router_fs.py
index f82cd8183aa..78311a385a1 100644
--- a/hail/python/hailtop/aiotools/router_fs.py
+++ b/hail/python/hailtop/aiotools/router_fs.py
@@ -52,9 +52,9 @@ def copy_part_size(url: str) -> int:
         return klass.copy_part_size(url)
 
     @staticmethod
-    def parse_url(url: str) -> AsyncFSURL:
+    def parse_url(url: str, *, error_if_bucket: bool = False) -> AsyncFSURL:
         klass = RouterAsyncFS._fs_class(url)
-        return klass.parse_url(url)
+        return klass.parse_url(url, error_if_bucket=error_if_bucket)
 
     @staticmethod
     def _fs_class(url: str) -> Type[AsyncFS]:
diff --git a/hail/python/hailtop/fs/router_fs.py b/hail/python/hailtop/fs/router_fs.py
index 65ebe387f78..32412ffacd7 100644
--- a/hail/python/hailtop/fs/router_fs.py
+++ b/hail/python/hailtop/fs/router_fs.py
@@ -1,4 +1,5 @@
-from typing import List, AsyncContextManager, BinaryIO, Optional, Tuple, Dict, Any
+from typing import List, AsyncContextManager, BinaryIO, Optional, Tuple, Dict, Any, Type
+from types import TracebackType
 import asyncio
 import io
 import os
@@ -6,7 +7,14 @@
 import glob
 import fnmatch
 
-from hailtop.aiotools.fs import Copier, Transfer, FileListEntry as AIOFileListEntry, ReadableStream, WritableStream
+from hailtop.aiotools.fs import (
+    Copier,
+    Transfer,
+    FileListEntry as AIOFileListEntry,
+    ReadableStream,
+    WritableStream,
+    AsyncFSURL,
+)
 from hailtop.aiotools.router_fs import RouterAsyncFS
 from hailtop.utils import bounded_gather2, async_to_blocking
 
@@ -184,6 +192,20 @@ def __init__(
             local_kwargs=local_kwargs, gcs_kwargs=gcs_kwargs, azure_kwargs=azure_kwargs, s3_kwargs=s3_kwargs
         )
 
+    def __enter__(self):
+        return self
+
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Optional[TracebackType],
+    ):
+        self.close()
+
+    def close(self):
+        async_to_blocking(self.afs.close())
+
     @property
     def _gcs_kwargs(self) -> Optional[Dict[str, Any]]:
         return self.afs._gcs_kwargs
@@ -291,13 +313,19 @@ async def ls_no_glob(path) -> List[FileListEntry]:
             except FileNotFoundError:
                 return []
 
+        async def list_within_each_prefix(prefixes: List[AsyncFSURL], parts: List[str]) -> List[List[FileListEntry]]:
+            pfs = [functools.partial(ls_no_glob, str(prefix.with_new_path_components(*parts))) for prefix in prefixes]
+            return await bounded_gather2(sema, *pfs, cancel_on_error=True)
+
         url = self.afs.parse_url(path)
         if any(glob.escape(bucket_part) != bucket_part for bucket_part in url.bucket_parts):
             raise ValueError(f'glob pattern only allowed in path (e.g. not in bucket): {path}')
 
         blobpath = url.path
-        components = blobpath.split('/')
-        assert len(components) > 0
+        if blobpath == '':
+            components = []
+        else:
+            components = blobpath.split('/')
 
         glob_components = []
         running_prefix = []
@@ -311,48 +339,30 @@ async def ls_no_glob(path) -> List[FileListEntry]:
                 running_prefix = []
 
         suffix_components: List[str] = running_prefix
-        if len(url.bucket_parts) > 0:
-            first_prefix = [url.scheme + ':', '', *url.bucket_parts]
-        else:
-            assert url.scheme == 'file'
-            if path.startswith('file://'):
-                first_prefix = ['file:', '', '']
-            else:
-                first_prefix = []
-
         cached_stats_for_each_cumulative_prefix: Optional[List[FileListEntry]] = None
-        cumulative_prefixes = [first_prefix]
+        cumulative_prefixes: List[AsyncFSURL] = [url.with_root_path()]
 
         for intervening_components, single_component_glob_pattern in glob_components:
-            stats_grouped_by_prefix = await bounded_gather2(
-                sema,
-                *[
-                    functools.partial(ls_no_glob, '/'.join([*cumulative_prefix, *intervening_components]))
-                    for cumulative_prefix in cumulative_prefixes
-                ],
-                cancel_on_error=True,
-            )
+            stats_grouped_by_prefix = await list_within_each_prefix(cumulative_prefixes, intervening_components)
             cached_stats_for_each_cumulative_prefix = [
                 stat
                 for stats_for_one_prefix, cumulative_prefix in zip(stats_grouped_by_prefix, cumulative_prefixes)
                 for stat in stats_for_one_prefix
                 if fnmatch.fnmatch(
-                    stat.path, '/'.join([*cumulative_prefix, *intervening_components, single_component_glob_pattern])
+                    stat.path,
+                    str(
+                        cumulative_prefix.with_new_path_components(
+                            *intervening_components, single_component_glob_pattern
+                        )
+                    ),
                 )
             ]
-            cumulative_prefixes = [stat.path.split('/') for stat in cached_stats_for_each_cumulative_prefix]
+            cumulative_prefixes = [self.afs.parse_url(stat.path) for stat in cached_stats_for_each_cumulative_prefix]
 
         if len(suffix_components) == 0 and cached_stats_for_each_cumulative_prefix is not None:
             found_stats = cached_stats_for_each_cumulative_prefix
         else:
-            found_stats_grouped_by_prefix = await bounded_gather2(
-                sema,
-                *[
-                    functools.partial(ls_no_glob, '/'.join([*cumulative_prefix, *suffix_components]))
-                    for cumulative_prefix in cumulative_prefixes
-                ],
-                cancel_on_error=True,
-            )
+            found_stats_grouped_by_prefix = await list_within_each_prefix(cumulative_prefixes, suffix_components)
             found_stats = [stat for stats in found_stats_grouped_by_prefix for stat in stats]
 
         if len(glob_components) == 0 and len(found_stats) == 0:
diff --git a/hail/python/test/hailtop/inter_cloud/test_fs.py b/hail/python/test/hailtop/inter_cloud/test_fs.py
index 44d5c09f2d6..00bf2f4cba0 100644
--- a/hail/python/test/hailtop/inter_cloud/test_fs.py
+++ b/hail/python/test/hailtop/inter_cloud/test_fs.py
@@ -9,7 +9,8 @@
 from hailtop.aiotools.fs.fs import AsyncFSURL
 import pytest
 from hailtop.utils import secret_alnum_string, retry_transient_errors, bounded_gather2
-from hailtop.aiotools import LocalAsyncFS, UnexpectedEOFError, AsyncFS
+from hailtop.fs.router_fs import RouterFS
+from hailtop.aiotools import LocalAsyncFS, UnexpectedEOFError, AsyncFS, IsABucketError
 from hailtop.aiotools.router_fs import RouterAsyncFS
 from hailtop.aiocloud.aioaws import S3AsyncFS
 from hailtop.aiocloud.aioazure import AzureAsyncFS
@@ -630,3 +631,52 @@ async def test_rmtree_on_symlink_to_directory():
         finally:
             await fs.rmtree(sema, str(base))
             assert not await fs.isdir(str(base))
+
+
+async def test_operations_on_a_bucket_url_is_error(filesystem: Tuple[asyncio.Semaphore, AsyncFS, AsyncFSURL]):
+    _, fs, base = filesystem
+
+    if base.scheme in ('', 'file'):
+        return
+
+    bucket_url = str(base.with_path(''))
+
+    with pytest.raises(IsABucketError):
+        await fs.isdir(bucket_url)
+
+    assert await fs.isfile(bucket_url) is False
+
+    with pytest.raises(IsABucketError):
+        await fs.statfile(bucket_url)
+
+    with pytest.raises(IsABucketError):
+        await fs.remove(bucket_url)
+
+    with pytest.raises(IsABucketError):
+        await fs.create(bucket_url)
+
+    with pytest.raises(IsABucketError):
+        await fs.open(bucket_url)
+
+
+async def test_hfs_ls_bucket_url_not_an_error(filesystem: Tuple[asyncio.Semaphore, AsyncFS, AsyncFSURL]):
+    _, fs, base = filesystem
+
+    if base.scheme in ('', 'file'):
+        return
+
+    await fs.write(str(base.with_new_path_component('abc123')), b'foo')  # ensure the bucket is non-empty
+
+    bucket_url = str(base.with_path(''))
+    with RouterFS() as fs:
+        fs.ls(bucket_url)
+
+
+async def test_with_new_path_component(filesystem: Tuple[asyncio.Semaphore, AsyncFS, AsyncFSURL]):
+    _, _, base = filesystem
+
+    assert str(base.with_path('').with_new_path_component('abc')) == str(base.with_path('abc'))
+    assert str(base.with_path('abc').with_new_path_component('def')) == str(base.with_path('abc/def'))
+
+    actual = base.with_path('abc').with_new_path_component('def').with_new_path_component('ghi')
+    assert str(actual) == str(base.with_path('abc/def/ghi'))