diff --git a/arch/xtensa/core/CMakeLists.txt b/arch/xtensa/core/CMakeLists.txt
index 1e4b045085ec5b..f3122c1a5504da 100644
--- a/arch/xtensa/core/CMakeLists.txt
+++ b/arch/xtensa/core/CMakeLists.txt
@@ -21,7 +21,7 @@ zephyr_library_sources_ifdef(CONFIG_XTENSA_ENABLE_BACKTRACE debug_helpers_asm.S)
 zephyr_library_sources_ifdef(CONFIG_DEBUG_COREDUMP coredump.c)
 zephyr_library_sources_ifdef(CONFIG_TIMING_FUNCTIONS timing.c)
 zephyr_library_sources_ifdef(CONFIG_GDBSTUB gdbstub.c)
-zephyr_library_sources_ifdef(CONFIG_XTENSA_MMU ptables.c)
+zephyr_library_sources_ifdef(CONFIG_XTENSA_MMU ptables.c mmu.c)
 zephyr_library_sources_ifdef(CONFIG_USERSPACE userspace.S)
 zephyr_library_sources_ifdef(CONFIG_XTENSA_SYSCALL_USE_HELPER syscall_helper.c)
 
diff --git a/arch/xtensa/core/README-MMU.txt b/arch/xtensa/core/README-MMU.txt
new file mode 100644
index 00000000000000..499a251cdf2f2f
--- /dev/null
+++ b/arch/xtensa/core/README-MMU.txt
@@ -0,0 +1,268 @@
+# Xtensa MMU Operation
+
+As with other elements of the architecture, paged virtual memory
+management on Xtensa is somewhat unique.  And there is similarly a
+lack of introductory material available.  This document is an attempt
+to introduce the architecture at an overview/tutorial level, and to
+describe Zephyr's specific implementation choices.
+
+## General TLB Operation
+
+The Xtensa MMU operates on top of a fairly conventional TLB cache.
+The TLB stores virtual to physical translation for individual pages of
+memory.  It is partitioned into an automatically managed
+4-way-set-associative bank of entries mapping 4k pages, and 3-6
+"special" ways storing mappings under OS control.  Some of these are
+for mapping pages larger than 4k, which Zephyr does not directly
+support.  A few are for bootstrap and initialization, and will be
+discussed below.
+
+Like the L1 cache, the TLB is split into separate instruction and data
+entries.  Zephyr manages both as needed, but symmetrically.  The
+architecture technically supports separately-virtualized instruction
+and data spaces, but the hardware page table refill mechanism (see
+below) does not, and Zephyr's memory spaces are unified regardless.
+
+The TLB may be loaded with permissions and attributes controlling
+cacheability, access control based on ring (i.e. the contents of the
+RING field of the PS register) and togglable write and execute access.
+Memory access, even with a matching TLB entry, may therefore create
+Kernel/User exceptions as desired to enforce permissions choices on
+userspace code.
+
+Live TLB entries are tagged with an 8-bit "ASID" value derived from
+their ring field of the PTE that loaded them, via a simple translation
+specified in the RASID special register.  The intent is that each
+non-kernel address space will get a separate ring 3 ASID set in RASID,
+such that you can switch between them without a TLB flush.  The ASID
+value of ring zero is fixed at 1, it may not be changed.  (An ASID
+value of zero is used to tag an invalid/unmapped TLB entry at
+initialization, but this mechanism isn't accessible to OS code except
+in special circumstances, and in any case there is already an invalid
+attribute value that can be used in a PTE).
+
+## Virtually-mapped Page Tables
+
+Xtensa has a unique (and, to someone exposed for the first time,
+extremely confusing) "page table" format.  The simplest was to begin
+to explain this is just to describe the (quite simple) hardware
+behavior:
+
+On a TLB miss, the hardware immediately does a single fetch (at ring 0
+privilege) from RAM by adding the "desired address right shifted by
+10 bits with the bottom two bits set to zero" (i.e. the page frame
+number in units of 4 bytes) to the value in the PTEVADDR special
+register.  If this load succeeds, then the word is treated as a PTE
+with which to fill the TLB and use for a (restarted) memory access.
+This is extremely simple (just one extra hardware state that does just
+one thing the hardware can already do), and quite fast (only one
+memory fetch vs. e.g. the 2-5 fetches required to walk a page table on
+x86).
+
+This special "refill" fetch is otherwise identical to any other memory
+access, meaning it too uses the TLB to translate from a virtual to
+physical address.  Which means that the page tables occupy a 4M region
+of virtual, not physical, address space, in the same memory space
+occupied by the running code.  The 1024 pages in that range (not all
+of which might be mapped in physical memory) are a linear array of
+1048576 4-byte PTE entries, each describing a mapping for 4k of
+virtual memory.  Note especially that exactly one of those pages
+contains the 1024 PTE entries for the 4M page table itself, pointed to
+by PTEVADDR.
+
+Obviously, the page table memory being virtual means that the fetch
+can fail: there are 1024 possible pages in a complete page table
+covering all of memory, and the ~16 entry TLB clearly won't contain
+entries mapping all of them.  If we are missing a TLB entry for the
+page translation we want (NOT for the original requested address, we
+already know we're missing that TLB entry), the hardware has exactly
+one more special trick: it throws a TLB Miss exception (there are two,
+one each for instruction/data TLBs, but in Zephyr they operate
+identically).
+
+The job of that exception handler is simply to ensure that the TLB has
+an entry for the page table page we want.  And the simplest way to do
+that is to just load the faulting PTE as an address, which will then
+go through the same refill process above.  This second TLB fetch in
+the exception handler may result in an invalid/inapplicable mapping
+within the 4M page table region.  This is an typical/expected runtime
+fault, and simply indicates unmapped memory.  The result is TLB miss
+exception from within the TLB miss exception handler (i.e. while the
+EXCM bit is set).  This will produce a Double Exception fault, which
+is handled by the OS identically to a general Kernel/User data access
+prohibited exception.
+
+After the TLB refill exception, the original faulting instruction is
+restarted, which retries the refill process, which succeeds in
+fetching a new TLB entry, which is then used to service the original
+memory access.  (And may then result in yet another exception if it
+turns out that the TLB entry doesn't permit the access requested, of
+course.)
+
+## Special Cases
+
+The page-tables-specified-in-virtual-memory trick works very well in
+practice.  But it does have a chicken/egg problem with the initial
+state.  Because everything depends on state in the TLB, something
+needs to tell the hardware how to find a physical address using the
+TLB to begin the process.  Here we exploit the separate
+non-automatically-refilled TLB ways to store bootstrap records.
+
+First, note that the refill process to load a PTE requires that the 4M
+space of PTE entries be resolvable by the TLB directly, without
+requiring another refill.  This 4M mapping is provided by a single
+page of PTE entries (which itself lives in the 4M page table region!).
+This page must always be in the TLB.
+
+Thankfully, for the data TLB Xtensa provides 3 special/non-refillable
+ways (ways 7-9) with at least one 4k page mapping each.  We can use
+one of these to "pin" the top-level page table entry in place,
+ensuring that a refill access will be able to find a PTE address.
+
+But now note that the load from that PTE address for the refill is
+done in an exception handler.  And running an exception handler
+requires doing a fetch via the instruction TLB.  And that obviously
+means that the page(s) containing the exception handler must never
+require a refill exception of its own.
+
+Ideally we would just pin the vector/handler page in the ITLB in the
+same way we do for data, but somewhat inexplicably, Xtensa does not
+provide 4k "pinnable" ways in the instruction TLB (frankly this seems
+like a design flaw).
+
+Instead, we load ITLB entries for vector handlers via the refill
+mechanism using the data TLB, and so need the refill mechanism for the
+vector page to succeed always.  The way to do this is to similarly pin
+the page table page containing the (single) PTE for the vector page in
+the data TLB, such that instruction fetches always find their TLB
+mapping via refill, without requiring an exception.
+
+## Initialization
+
+Unlike most other architectures, Xtensa does not have a "disable" mode
+for the MMU.  Virtual address translation through the TLB is active at
+all times.  There therefore needs to be a mechanism for the CPU to
+execute code before the OS is able to initialize a refillable page
+table.
+
+The way Xtensa resolves this (on the hardware Zephyr supports, see the
+note below) is to have an 8-entry set ("way 6") of 512M pages able to
+cover all of memory.  These 8 entries are initialized as valid, with
+attributes specifying that they are accessible only to an ASID of 1
+(i.e. the fixed ring zero / kernel ASID), writable, executable, and
+uncached.  So at boot the CPU relies on these TLB entries to provide a
+clean view of hardware memory.
+
+But that means that enabling page-level translation requires some
+care, as the CPU will throw an exception ("multi hit") if a memory
+access matches more than one live entry in the TLB.  The
+initialization algorithm is therefore:
+
+0. Start with a fully-initialized page table layout, including the
+   top-level "L1" page containing the mappings for the page table
+   itself.
+
+1. Ensure that the initialization routine does not cross a page
+   boundary (to prevent stray TLB refill exceptions), that it occupies
+   a separate 4k page than the exception vectors (which we must
+   temporarily double-map), and that it operates entirely in registers
+   (to avoid doing memory access at inopportune moments).
+
+2. Pin the L1 page table PTE into the data TLB.  This creates a double
+   mapping condition, but it is safe as nothing will use it until we
+   start refilling.
+
+3. Pin the page table page containing the PTE for the TLB miss
+   exception handler into the data TLB.  This will likewise not be
+   accessed until the double map condition is resolved.
+
+4. Set PTEVADDR appropriately.  The CPU state to handle refill
+   exceptions is now complete, but cannot be used until we resolve the
+   double mappings.
+
+5. Disable the initial/way6 data TLB entries first, by setting them to
+   an ASID of zero.  This is safe as the code being executed is not
+   doing data accesses yet (including refills), and will resolve the
+   double mapping conditions we created above.
+
+6. Disable the initial/way6 instruction TLBs second.  The very next
+   instruction following the invalidation of the currently-executing
+   code page will then cause a TLB refill exception, which will work
+   normally because we just resolved the final double-map condition.
+   (Pedantic note: if the vector page and the currently-executing page
+   are in different 512M way6 pages, disable the mapping for the
+   exception handlers first so the trap from our current code can be
+   handled.  Currently Zephyr doesn't handle this condition as in all
+   reasonable hardware these regions will be near each other)
+
+Note: there is a different variant of the Xtensa MMU architecture
+where the way 5/6 pages are immutable, and specify a set of
+unchangable mappings from the final 384M of memory to the bottom and
+top of physical memory.  The intent here would (presumably) be that
+these would be used by the kernel for all physical memory and that the
+remaining memory space would be used for virtual mappings.  This
+doesn't match Zephyr's architecture well, as we tend to assume
+page-level control over physical memory (e.g. .text/.rodata is cached
+but .data is not on SMP, etc...).  And in any case we don't have any
+such hardware to experiment with.  But with a little address
+translation we could support this.
+
+## ASID vs. Virtual Mapping
+
+The ASID mechanism in Xtensa works like other architectures, and is
+intended to be used similarly.  The intent of the design is that at
+context switch time, you can simply change RADID and the page table
+data, and leave any existing mappings in place in the TLB using the
+old ASID value(s).  So in the common case where you switch back,
+nothing needs to be flushed.
+
+Unfortunately this runs afoul of the virtual mapping of the page
+refill: data TLB entries storing the 4M page table mapping space are
+stored at ASID 1 (ring 0), they can't change when the page tables
+change!  So this region naively would have to be flushed, which is
+tantamount to flushing the entire TLB regardless (the TLB is much
+smaller than the 1024-page PTE array).
+
+The resolution in Zephyr is to give each ASID its own PTEVADDR mapping
+in virtual space, such that the page tables don't overlap.  This is
+expensive in virtual address space: assigning 4M of space to each of
+the 256 ASIDs (actually 254 as 0 and 1 are never used by user access)
+would take a full gigabyte of address space.  Zephyr optimizes this a
+bit by deriving a unique sequential ASID from the hardware address of
+the statically allocated array of L1 page table pages.
+
+Note, obviously, that any change of the mappings within an ASID
+(e.g. to re-use it for another memory domain, or just for any runtime
+mapping change other than mapping previously-unmapped pages) still
+requires a TLB flush, and always will.
+
+## SMP/Cache Interaction
+
+A final important note is that the hardware PTE refill fetch works
+like any other CPU memory access, and in particular it is governed by
+the cacheability attributes of the TLB entry through which it was
+loaded.  This means that if the page table entries are marked
+cacheable, then the hardware TLB refill process will be downstream of
+the L1 data cache on the CPU.  If the physical memory storing page
+tables has been accessed recently by the CPU (for a refill of another
+page mapped within the same cache line, or to change the tables) then
+the refill will be served from the data cache and not main memory.
+
+This may or may not be desirable depending on access patterns.  It
+lets the L1 data cache act as a "L2 TLB" for applications with a lot
+of access variability.  But it also means that the TLB entries end up
+being stored twice in the same CPU, wasting transistors that could
+presumably store other useful data.
+
+But it it also important to note that the L1 data cache on Xtensa is
+incoherent!  The cache being used for refill reflects the last access
+on the current CPU only, and not of the underlying memory being
+mapped.  Page table changes in the data cache of one CPU will be
+invisible to the data cache of another.  There is no simple way of
+notifying another CPU of changes to page mappings beyond doing
+system-wide flushes on all cpus every time a memory domain is
+modified.
+
+The result is that, when SMP is enabled, Zephyr must ensure that all
+page table mappings in the system are set uncached.  The OS makes no
+attempt to bolt on a software coherence layer.
diff --git a/arch/xtensa/core/include/xtensa_mmu_priv.h b/arch/xtensa/core/include/xtensa_mmu_priv.h
index cf72c92138373c..7b1030786f424d 100644
--- a/arch/xtensa/core/include/xtensa_mmu_priv.h
+++ b/arch/xtensa/core/include/xtensa_mmu_priv.h
@@ -132,15 +132,8 @@
  *
  * PTE_ENTRY_ADDRESS = PTEVADDR + ((VADDR / 4096) * 4)
  */
-#define Z_XTENSA_PTE_ENTRY_VADDR(vaddr) \
-	(Z_XTENSA_PTEVADDR + (((vaddr) / KB(4)) * 4))
-
-/*
- * The address of the top level page where the page
- * is located in the virtual address.
- */
-#define Z_XTENSA_PAGE_TABLE_VADDR \
-	Z_XTENSA_PTE_ENTRY_VADDR(Z_XTENSA_PTEVADDR)
+#define Z_XTENSA_PTE_ENTRY_VADDR(base, vaddr) \
+	((base) + (((vaddr) / KB(4)) * 4))
 
 /*
  * Get asid for a given ring from rasid register.
@@ -349,4 +342,8 @@ static inline void xtensa_dtlb_vaddr_invalidate(void *vaddr)
 	}
 }
 
+void xtensa_init_paging(uint32_t *l1_page);
+
+void xtensa_set_paging(uint32_t asid, uint32_t *l1_page);
+
 #endif /* ZEPHYR_ARCH_XTENSA_XTENSA_MMU_PRIV_H_ */
diff --git a/arch/xtensa/core/mmu.c b/arch/xtensa/core/mmu.c
new file mode 100644
index 00000000000000..24e47a42b9ded3
--- /dev/null
+++ b/arch/xtensa/core/mmu.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright 2023 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stdint.h>
+#include <stdbool.h>
+#include <zephyr/kernel.h>
+#include <xtensa/config/core-isa.h>
+#include <xtensa_mmu_priv.h>
+
+#define ASID_INVALID 0
+
+struct tlb_regs {
+	uint32_t rasid;
+	uint32_t ptevaddr;
+	uint32_t ptepin_as;
+	uint32_t ptepin_at;
+	uint32_t vecpin_as;
+	uint32_t vecpin_at;
+};
+
+static void compute_regs(uint32_t user_asid, uint32_t *l1_page, struct tlb_regs *regs)
+{
+	uint32_t vecbase = XTENSA_RSR("VECBASE");
+
+	__ASSERT_NO_MSG((((uint32_t)l1_page) & 0xfff) == 0);
+	__ASSERT_NO_MSG((user_asid == 0) || ((user_asid > 2) &&
+				(user_asid < Z_XTENSA_MMU_SHARED_ASID)));
+
+	/* We don't use ring 1, ring 0 ASID must be 1 */
+	regs->rasid = (Z_XTENSA_MMU_SHARED_ASID << 24) |
+		      (user_asid << 16) | 0x000201;
+
+	/* Derive PTEVADDR from ASID so each domain gets its own PTE area */
+	regs->ptevaddr = CONFIG_XTENSA_MMU_PTEVADDR + user_asid * 0x400000;
+
+	/* The ptables code doesn't add the mapping for the l1 page itself */
+	l1_page[Z_XTENSA_L1_POS(regs->ptevaddr)] =
+		(uint32_t)l1_page | Z_XTENSA_PAGE_TABLE_ATTR;
+
+	regs->ptepin_at = (uint32_t)l1_page;
+	regs->ptepin_as = Z_XTENSA_PTE_ENTRY_VADDR(regs->ptevaddr, regs->ptevaddr)
+			  | Z_XTENSA_MMU_PTE_WAY;
+
+	/* Pin mapping for refilling the vector address into the ITLB
+	 * (for handling TLB miss exceptions). Note: this is NOT an
+	 * instruction TLB entry for the vector code itself, it's a
+	 * DATA TLB entry for the page containing the vector mapping
+	 * so the refill on instruction fetch can find it. The
+	 * hardware doesn't have a 4k pinnable instruction TLB way,
+	 * frustratingly.
+	 */
+	uint32_t vb_pte = l1_page[Z_XTENSA_L1_POS(vecbase)];
+
+	regs->vecpin_at = vb_pte;
+	regs->vecpin_as = Z_XTENSA_PTE_ENTRY_VADDR(regs->ptevaddr, vecbase)
+			  | Z_XTENSA_MMU_VECBASE_WAY;
+}
+
+/* Switch to a new page table.  There are four items we have to set in
+ * the hardware: the PTE virtual address, the ring/ASID mapping
+ * register, and two pinned entries in the data TLB handling refills
+ * for the page tables and the vector handlers.
+ *
+ * These can be done in any order, provided that we ensure that no
+ * memory access which cause a TLB miss can happen during the process.
+ * This means that we must work entirely within registers in a single
+ * asm block.  Also note that instruction fetches are memory accesses
+ * too, which means we cannot cross a page boundary which might reach
+ * a new page not in the TLB (a single jump to an aligned address that
+ * holds our five instructions is sufficient to guarantee that: I
+ * couldn't think of a way to do the alignment statically that also
+ * interoperated well with inline assembly).
+ */
+void xtensa_set_paging(uint32_t user_asid, uint32_t *l1_page)
+{
+	/* Optimization note: the registers computed here are pure
+	 * functions of the two arguments.  With a minor API tweak,
+	 * they could be cached in e.g. a thread struct instead of
+	 * being recomputed.  This is called on context switch paths
+	 * and is performance-sensitive.
+	 */
+	struct tlb_regs regs;
+
+	compute_regs(user_asid, l1_page, &regs);
+
+	__asm__ volatile("j 1f\n"
+			 ".align 16\n" /* enough for 5 insns */
+			 "1:\n"
+			 "wsr %0, PTEVADDR\n"
+			 "wsr %1, RASID\n"
+			 "wdtlb %2, %3\n"
+			 "wdtlb %4, %5\n"
+			 "isync"
+			 :: "r"(regs.ptevaddr), "r"(regs.rasid),
+			    "r"(regs.ptepin_at), "r"(regs.ptepin_as),
+			    "r"(regs.vecpin_at), "r"(regs.vecpin_as));
+}
+
+/* This is effectively the same algorithm from xtensa_set_paging(),
+ * but it also disables the hardware-initialized 512M TLB entries in
+ * way 6 (because the hardware disallows duplicate TLB mappings).  For
+ * instruction fetches this produces a critical ordering constraint:
+ * the instruction following the invalidation of ITLB entry mapping
+ * the current PC will by definition create a refill condition, which
+ * will (because the data TLB was invalidated) cause a refill
+ * exception.  Therefore this step must be the very last one, once
+ * everything else is setup up and working, which includes the
+ * invalidation of the virtual PTEVADDR area so that the resulting
+ * refill can complete.
+ *
+ * Note that we can't guarantee that the compiler won't insert a data
+ * fetch from our stack memory after exit from the asm block (while it
+ * might be double-mapped), so we invalidate that data TLB inside the
+ * asm for correctness.  The other 13 entries get invalidated in a C
+ * loop at the end.
+ */
+void xtensa_init_paging(uint32_t *l1_page)
+{
+	extern char z_xt_init_pc; /* defined in asm below */
+	struct tlb_regs regs;
+
+#if CONFIG_MP_MAX_NUM_CPUS > 1
+	/* The incoherent cache can get into terrible trouble if it's
+	 * allowed to cache PTEs differently across CPUs.  We require
+	 * that all page tables supplied by the OS have exclusively
+	 * uncached mappings for page data, but can't do anything
+	 * about earlier code/firmware.  Dump the cache to be safe.
+	 */
+	sys_cache_data_flush_and_invd_all();
+#endif
+
+	compute_regs(ASID_INVALID, l1_page, &regs);
+
+	uint32_t idtlb_pte = (regs.ptevaddr & 0xe0000000) | XCHAL_SPANNING_WAY;
+	uint32_t idtlb_stk = (((uint32_t)&regs) & ~0xfff) | XCHAL_SPANNING_WAY;
+	uint32_t iitlb_pc  = (((uint32_t)&z_xt_init_pc) & ~0xfff) | XCHAL_SPANNING_WAY;
+
+	/* Note: the jump is mostly pedantry, as it's almost
+	 * inconceivable that a hardware memory region at boot is
+	 * going to cross a 512M page boundary.  But we need the entry
+	 * symbol to get the address above, so the jump is here for
+	 * symmetry with the set_paging() code.
+	 */
+	__asm__ volatile("j z_xt_init_pc\n"
+			 ".align 32\n" /* room for 10 insns */
+			 ".globl z_xt_init_pc\n"
+			 "z_xt_init_pc:\n"
+			 "wsr %0, PTEVADDR\n"
+			 "wsr %1, RASID\n"
+			 "wdtlb %2, %3\n"
+			 "wdtlb %4, %5\n"
+			 "idtlb %6\n" /* invalidate pte */
+			 "idtlb %7\n" /* invalidate stk */
+			 "isync\n"
+			 "iitlb %8\n" /* invalidate pc */
+			 "isync\n" /* <--- traps a ITLB miss */
+			 :: "r"(regs.ptevaddr), "r"(regs.rasid),
+			    "r"(regs.ptepin_at), "r"(regs.ptepin_as),
+			    "r"(regs.vecpin_at), "r"(regs.vecpin_as),
+			    "r"(idtlb_pte), "r"(idtlb_stk), "r"(iitlb_pc));
+
+	/* Invalidate the remaining (unused by this function)
+	 * initialization entries. Now we're flying free with our own
+	 * page table.
+	 */
+	for (int i = 0; i < 8; i++) {
+		uint32_t ixtlb = (i * 0x2000000000) | XCHAL_SPANNING_WAY;
+
+		if (ixtlb != iitlb_pc) {
+			__asm__ volatile("iitlb %0" :: "r"(ixtlb));
+		}
+		if (ixtlb != idtlb_stk && ixtlb != idtlb_pte) {
+			__asm__ volatile("idtlb %0" :: "r"(ixtlb));
+		}
+	}
+	__asm__ volatile("isync");
+}
diff --git a/arch/xtensa/core/ptables.c b/arch/xtensa/core/ptables.c
index 2130a870e7757d..17950114544f4b 100644
--- a/arch/xtensa/core/ptables.c
+++ b/arch/xtensa/core/ptables.c
@@ -218,42 +218,6 @@ static inline uint32_t *alloc_l2_table(void)
 	return NULL;
 }
 
-/**
- * @brief Switch page tables
- *
- * This switches the page tables to the incoming ones (@a ptables).
- * Since data TLBs to L2 page tables are auto-filled, @a dtlb_inv
- * can be used to invalidate these data TLBs. @a cache_inv can be
- * set to true to invalidate cache to the page tables.
- *
- * @param[in] ptables Page tables to be switched to.
- * @param[in] dtlb_inv True if to invalidate auto-fill data TLBs.
- * @param[in] cache_inv True if to invalidate cache to page tables.
- */
-static ALWAYS_INLINE void switch_page_tables(uint32_t *ptables, bool dtlb_inv, bool cache_inv)
-{
-	if (cache_inv) {
-		sys_cache_data_flush_and_invd_all();
-	}
-
-	/* Invalidate data TLB to L1 page table */
-	xtensa_dtlb_vaddr_invalidate((void *)Z_XTENSA_PAGE_TABLE_VADDR);
-
-	/* Now map the pagetable itself with KERNEL asid to avoid user thread
-	 * from tampering with it.
-	 */
-	xtensa_dtlb_entry_write_sync(
-		Z_XTENSA_PTE((uint32_t)ptables, Z_XTENSA_KERNEL_RING, Z_XTENSA_PAGE_TABLE_ATTR),
-		Z_XTENSA_TLB_ENTRY(Z_XTENSA_PAGE_TABLE_VADDR, Z_XTENSA_MMU_PTE_WAY));
-
-	if (dtlb_inv) {
-		/* Since L2 page tables are auto-refilled,
-		 * invalidate all of them to flush the old entries out.
-		 */
-		xtensa_tlb_autorefill_invalidate();
-	}
-}
-
 static void map_memory_range(const uint32_t start, const uint32_t end,
 			     const uint32_t attrs, bool shared)
 {
@@ -345,6 +309,17 @@ static void xtensa_init_page_tables(void)
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif
+	/* Finally, the direct-mapped pages used in the page tables
+	 * must be fixed up to use the same cache attribute (but these
+	 * must be writable, obviously).  They shouldn't be left at
+	 * the default.
+	 */
+	map_memory_range((uint32_t) &l1_page_table[0],
+			 (uint32_t) &l1_page_table[CONFIG_XTENSA_MMU_NUM_L1_TABLES],
+			 Z_XTENSA_PAGE_TABLE_ATTR | Z_XTENSA_MMU_W, false);
+	map_memory_range((uint32_t) &l2_page_tables[0],
+			 (uint32_t) &l2_page_tables[CONFIG_XTENSA_MMU_NUM_L2_TABLES],
+			 Z_XTENSA_PAGE_TABLE_ATTR | Z_XTENSA_MMU_W, false);
 
 	sys_cache_data_flush_all();
 }
@@ -356,9 +331,6 @@ __weak void arch_xtensa_mmu_post_init(bool is_core0)
 
 void z_xtensa_mmu_init(void)
 {
-	volatile uint8_t entry;
-	uint32_t ps, vecbase;
-
 	if (_current_cpu->id == 0) {
 		/* This is normally done via arch_kernel_init() inside z_cstart().
 		 * However, before that is called, we go through the sys_init of
@@ -369,111 +341,7 @@ void z_xtensa_mmu_init(void)
 		xtensa_init_page_tables();
 	}
 
-	/* Set the page table location in the virtual address */
-	xtensa_ptevaddr_set((void *)Z_XTENSA_PTEVADDR);
-
-	/* Set rasid */
-	xtensa_rasid_asid_set(Z_XTENSA_MMU_SHARED_ASID, Z_XTENSA_SHARED_RING);
-
-	/* Next step is to invalidate the tlb entry that contains the top level
-	 * page table. This way we don't cause a multi hit exception.
-	 */
-	xtensa_dtlb_entry_invalidate_sync(Z_XTENSA_TLB_ENTRY(Z_XTENSA_PAGE_TABLE_VADDR, 6));
-	xtensa_itlb_entry_invalidate_sync(Z_XTENSA_TLB_ENTRY(Z_XTENSA_PAGE_TABLE_VADDR, 6));
-
-	/* We are not using a flat table page, so we need to map
-	 * only the top level page table (which maps the page table itself).
-	 *
-	 * Lets use one of the wired entry, so we never have tlb miss for
-	 * the top level table.
-	 */
-	xtensa_dtlb_entry_write(Z_XTENSA_PTE((uint32_t)z_xtensa_kernel_ptables,
-					     Z_XTENSA_KERNEL_RING, Z_XTENSA_PAGE_TABLE_ATTR),
-			Z_XTENSA_TLB_ENTRY(Z_XTENSA_PAGE_TABLE_VADDR, Z_XTENSA_MMU_PTE_WAY));
-
-	/* Before invalidate the text region in the TLB entry 6, we need to
-	 * map the exception vector into one of the wired entries to avoid
-	 * a page miss for the exception.
-	 */
-	__asm__ volatile("rsr.vecbase %0" : "=r"(vecbase));
-
-	xtensa_itlb_entry_write_sync(
-		Z_XTENSA_PTE(vecbase, Z_XTENSA_KERNEL_RING,
-			Z_XTENSA_MMU_X | Z_XTENSA_MMU_CACHED_WT),
-		Z_XTENSA_TLB_ENTRY(
-			Z_XTENSA_PTEVADDR + MB(4), 3));
-
-	xtensa_dtlb_entry_write_sync(
-		Z_XTENSA_PTE(vecbase, Z_XTENSA_KERNEL_RING,
-			Z_XTENSA_MMU_X | Z_XTENSA_MMU_CACHED_WT),
-		Z_XTENSA_TLB_ENTRY(
-			Z_XTENSA_PTEVADDR + MB(4), 3));
-
-	/* Temporarily uses KernelExceptionVector for level 1 interrupts
-	 * handling. This is due to UserExceptionVector needing to jump to
-	 * _Level1Vector. The jump ('j') instruction offset is incorrect
-	 * when we move VECBASE below.
-	 */
-	__asm__ volatile("rsr.ps %0" : "=r"(ps));
-	ps &= ~PS_UM;
-	__asm__ volatile("wsr.ps %0; rsync" :: "a"(ps));
-
-	__asm__ volatile("wsr.vecbase %0; rsync\n\t"
-			:: "a"(Z_XTENSA_PTEVADDR + MB(4)));
-
-
-	/* Finally, lets invalidate all entries in way 6 as the page tables
-	 * should have already mapped the regions we care about for boot.
-	 */
-	for (entry = 0; entry < BIT(XCHAL_ITLB_ARF_ENTRIES_LOG2); entry++) {
-		__asm__ volatile("iitlb %[idx]\n\t"
-				 "isync"
-				 :: [idx] "a"((entry << 29) | 6));
-	}
-
-	for (entry = 0; entry < BIT(XCHAL_DTLB_ARF_ENTRIES_LOG2); entry++) {
-		__asm__ volatile("idtlb %[idx]\n\t"
-				 "dsync"
-				 :: [idx] "a"((entry << 29) | 6));
-	}
-
-	/* Map VECBASE to a fixed data TLB */
-	xtensa_dtlb_entry_write(
-			Z_XTENSA_PTE((uint32_t)vecbase,
-				     Z_XTENSA_KERNEL_RING, Z_XTENSA_MMU_CACHED_WB),
-			Z_XTENSA_TLB_ENTRY((uint32_t)vecbase, Z_XTENSA_MMU_VECBASE_WAY));
-
-	/*
-	 * Pre-load TLB for vecbase so exception handling won't result
-	 * in TLB miss during boot, and that we can handle single
-	 * TLB misses.
-	 */
-	xtensa_itlb_entry_write_sync(
-		Z_XTENSA_PTE(vecbase, Z_XTENSA_KERNEL_RING,
-			Z_XTENSA_MMU_X | Z_XTENSA_MMU_CACHED_WT),
-		Z_XTENSA_AUTOFILL_TLB_ENTRY(vecbase));
-
-	/* To finish, just restore vecbase and invalidate TLB entries
-	 * used to map the relocated vecbase.
-	 */
-	__asm__ volatile("wsr.vecbase %0; rsync\n\t"
-			:: "a"(vecbase));
-
-	/* Restore PS_UM so that level 1 interrupt handling will go to
-	 * UserExceptionVector.
-	 */
-	__asm__ volatile("rsr.ps %0" : "=r"(ps));
-	ps |= PS_UM;
-	__asm__ volatile("wsr.ps %0; rsync" :: "a"(ps));
-
-	xtensa_dtlb_entry_invalidate_sync(Z_XTENSA_TLB_ENTRY(Z_XTENSA_PTEVADDR + MB(4), 3));
-	xtensa_itlb_entry_invalidate_sync(Z_XTENSA_TLB_ENTRY(Z_XTENSA_PTEVADDR + MB(4), 3));
-
-	/*
-	 * Clear out THREADPTR as we use it to indicate
-	 * whether we are in user mode or not.
-	 */
-	XTENSA_WUR("THREADPTR", 0);
+	xtensa_init_paging(z_xtensa_kernel_ptables);
 
 	arch_xtensa_mmu_post_init(_current_cpu->id == 0);
 }
@@ -504,7 +372,7 @@ __weak void arch_reserved_pages_update(void)
 static bool l2_page_table_map(uint32_t *l1_table, void *vaddr, uintptr_t phys,
 			      uint32_t flags, bool is_user)
 {
-	uint32_t l1_pos = (uint32_t)vaddr >> 22;
+	uint32_t l1_pos = Z_XTENSA_L1_POS((uint32_t)vaddr);
 	uint32_t l2_pos = Z_XTENSA_L2_POS((uint32_t)vaddr);
 	uint32_t *table;
 
@@ -530,6 +398,7 @@ static bool l2_page_table_map(uint32_t *l1_table, void *vaddr, uintptr_t phys,
 				     flags);
 
 	sys_cache_data_flush_range((void *)&table[l2_pos], sizeof(table[0]));
+	xtensa_tlb_autorefill_invalidate();
 
 	return true;
 }
@@ -604,18 +473,6 @@ static inline void __arch_mem_map(void *va, uintptr_t pa, uint32_t xtensa_flags,
 		k_spin_unlock(&z_mem_domain_lock, key);
 	}
 #endif /* CONFIG_USERSPACE */
-
-	if ((xtensa_flags & Z_XTENSA_MMU_X) == Z_XTENSA_MMU_X) {
-		xtensa_itlb_vaddr_invalidate(vaddr);
-	}
-	xtensa_dtlb_vaddr_invalidate(vaddr);
-
-	if (IS_ENABLED(CONFIG_XTENSA_MMU_DOUBLE_MAP)) {
-		if (xtensa_flags & Z_XTENSA_MMU_X) {
-			xtensa_itlb_vaddr_invalidate(vaddr_uc);
-		}
-		xtensa_dtlb_vaddr_invalidate(vaddr_uc);
-	}
 }
 
 void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
@@ -680,7 +537,7 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
  */
 static bool l2_page_table_unmap(uint32_t *l1_table, void *vaddr)
 {
-	uint32_t l1_pos = (uint32_t)vaddr >> 22;
+	uint32_t l1_pos = Z_XTENSA_L1_POS((uint32_t)vaddr);
 	uint32_t l2_pos = Z_XTENSA_L2_POS((uint32_t)vaddr);
 	uint32_t *l2_table;
 	uint32_t table_pos;
@@ -717,10 +574,9 @@ static bool l2_page_table_unmap(uint32_t *l1_table, void *vaddr)
 	table_pos = (l2_table - (uint32_t *)l2_page_tables) / (XTENSA_L2_PAGE_TABLE_ENTRIES);
 	atomic_clear_bit(l2_page_tables_track, table_pos);
 
-	/* Need to invalidate L2 page table as it is no longer valid. */
-	xtensa_dtlb_vaddr_invalidate((void *)l2_table);
-
 end:
+	/* Need to invalidate L2 page table as it is no longer valid. */
+	xtensa_tlb_autorefill_invalidate();
 	return exec;
 }
 
@@ -764,18 +620,6 @@ static inline void __arch_mem_unmap(void *va)
 	}
 	k_spin_unlock(&z_mem_domain_lock, key);
 #endif /* CONFIG_USERSPACE */
-
-	if (is_exec) {
-		xtensa_itlb_vaddr_invalidate(vaddr);
-	}
-	xtensa_dtlb_vaddr_invalidate(vaddr);
-
-	if (IS_ENABLED(CONFIG_XTENSA_MMU_DOUBLE_MAP)) {
-		if (is_exec) {
-			xtensa_itlb_vaddr_invalidate(vaddr_uc);
-		}
-		xtensa_dtlb_vaddr_invalidate(vaddr_uc);
-	}
 }
 
 void arch_mem_unmap(void *addr, size_t size)
@@ -853,7 +697,7 @@ void z_xtensa_mmu_tlb_shootdown(void)
 		 * MMU_PTE_WAY, so we can skip the probing step by
 		 * generating the query entry directly.
 		 */
-		ptevaddr_entry = Z_XTENSA_PAGE_TABLE_VADDR | MMU_PTE_WAY;
+		ptevaddr_entry = (uint32_t)xtensa_ptevaddr_get() | Z_XTENSA_MMU_PTE_WAY;
 		ptevaddr = xtensa_dtlb_paddr_read(ptevaddr_entry);
 
 		thread_ptables = (uint32_t)thread->arch.ptables;
@@ -863,7 +707,9 @@ void z_xtensa_mmu_tlb_shootdown(void)
 			 * indicated by the current thread are different
 			 * than the current mapped page table.
 			 */
-			switch_page_tables((uint32_t *)thread_ptables, true, true);
+			struct arch_mem_domain *domain =
+				&(thread->mem_domain_info.mem_domain->arch);
+			xtensa_set_paging(domain->asid, (uint32_t *)thread_ptables);
 		}
 
 	}
@@ -981,9 +827,8 @@ static int region_map_update(uint32_t *ptables, uintptr_t start,
 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
 		uint32_t *l2_table, pte;
 		uint32_t page = start + offset;
-		uint32_t l1_pos = page >> 22;
+		uint32_t l1_pos = Z_XTENSA_L1_POS(page);
 		uint32_t l2_pos = Z_XTENSA_L2_POS(page);
-
 		/* Make sure we grab a fresh copy of L1 page table */
 		sys_cache_data_invd_range((void *)&ptables[l1_pos], sizeof(ptables[0]));
 
@@ -998,8 +843,7 @@ static int region_map_update(uint32_t *ptables, uintptr_t start,
 
 		sys_cache_data_flush_range((void *)&l2_table[l2_pos], sizeof(l2_table[0]));
 
-		xtensa_dtlb_vaddr_invalidate(
-			(void *)(pte & Z_XTENSA_PTE_PPN_MASK));
+		xtensa_dtlb_vaddr_invalidate((void *)page);
 	}
 
 	return ret;
@@ -1127,7 +971,7 @@ int arch_mem_domain_thread_add(struct k_thread *thread)
 	 * the current thread running.
 	 */
 	if (thread == _current_cpu->current) {
-		switch_page_tables(thread->arch.ptables, true, true);
+		xtensa_set_paging(domain->arch.asid, thread->arch.ptables);
 	}
 
 #if CONFIG_MP_MAX_NUM_CPUS > 1
@@ -1179,7 +1023,7 @@ static bool page_validate(uint32_t *ptables, uint32_t page, uint8_t ring, bool w
 {
 	uint8_t asid_ring;
 	uint32_t rasid, pte, *l2_table;
-	uint32_t l1_pos = page >> 22;
+	uint32_t l1_pos = Z_XTENSA_L1_POS(page);
 	uint32_t l2_pos = Z_XTENSA_L2_POS(page);
 
 	if (is_pte_illegal(ptables[l1_pos])) {
@@ -1245,12 +1089,7 @@ void z_xtensa_swap_update_page_tables(struct k_thread *incoming)
 	struct arch_mem_domain *domain =
 		&(incoming->mem_domain_info.mem_domain->arch);
 
-	/* Lets set the asid for the incoming thread */
-	if ((incoming->base.user_options & K_USER) != 0) {
-		xtensa_rasid_asid_set(domain->asid, Z_XTENSA_USER_RING);
-	}
-
-	switch_page_tables(ptables, true, false);
+	xtensa_set_paging(domain->asid, ptables);
 }
 
 #endif /* CONFIG_USERSPACE */
diff --git a/arch/xtensa/include/xtensa-asm2-s.h b/arch/xtensa/include/xtensa-asm2-s.h
index 3f3ffd90b7ae04..416a83453a2d0e 100644
--- a/arch/xtensa/include/xtensa-asm2-s.h
+++ b/arch/xtensa/include/xtensa-asm2-s.h
@@ -589,31 +589,6 @@ _Level\LVL\()VectorHelper :
 .global _Level\LVL\()Vector
 _Level\LVL\()Vector:
 #endif
-#ifdef CONFIG_XTENSA_MMU
-	wsr.ZSR_MMU_0 a2
-	wsr.ZSR_MMU_1 a3
-	rsync
-
-	/* Calculations below will clobber registers used.
-	 * So we make a copy of the stack pointer to avoid
-	 * changing it.
-	 */
-	mov a3, a1
-
-	CALC_PTEVADDR_BASE a2
-
-	/* Preload PTE entry page of current stack. */
-	PRELOAD_PTEVADDR a3, a2
-
-	/* Preload PTE entry page of new stack, where
-	 * it will be used later (in EXCINT_HANDLER above).
-	 */
-	rsr.ZSR_CPU a3
-	PRELOAD_PTEVADDR a3, a2
-
-	rsr.ZSR_MMU_1 a3
-	rsr.ZSR_MMU_0 a2
-#endif /* CONFIG_XTENSA_MMU */
 	addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF
 	s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET
 	s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET
diff --git a/soc/xtensa/dc233c/mmu.c b/soc/xtensa/dc233c/mmu.c
index 718f79779ebd5c..8decc952bf2ac6 100644
--- a/soc/xtensa/dc233c/mmu.c
+++ b/soc/xtensa/dc233c/mmu.c
@@ -35,33 +35,3 @@ const struct xtensa_mmu_range xtensa_soc_mmu_ranges[] = {
 };
 
 int xtensa_soc_mmu_ranges_num = ARRAY_SIZE(xtensa_soc_mmu_ranges);
-
-void arch_xtensa_mmu_post_init(bool is_core0)
-{
-	uint32_t vecbase;
-
-	ARG_UNUSED(is_core0);
-
-	__asm__ volatile("rsr.vecbase %0" : "=r"(vecbase));
-
-	/* Invalidate any autorefill instr TLBs of VECBASE so we can map it
-	 * permanently below.
-	 */
-	xtensa_itlb_vaddr_invalidate((void *)vecbase);
-
-	/* Map VECBASE permanently in instr TLB way 4 so we will always have
-	 * access to exception handlers. Each way 4 TLB covers 1MB (unless
-	 * ITLBCFG has been changed before this, which should not have
-	 * happened). Also this needs to be mapped as SHARED so both kernel
-	 * and userspace can execute code here => same as .text.
-	 *
-	 * Note that we don't want to map the first 1MB in data TLB as
-	 * we want to keep page 0 (0x00000000) unmapped to catch null pointer
-	 * de-references.
-	 */
-	vecbase = ROUND_DOWN(vecbase, MB(1));
-	xtensa_itlb_entry_write_sync(
-		Z_XTENSA_PTE(vecbase, Z_XTENSA_SHARED_RING,
-			     Z_XTENSA_MMU_X | Z_XTENSA_MMU_CACHED_WT),
-		Z_XTENSA_TLB_ENTRY((uint32_t)vecbase, 4));
-}