diff --git a/README.md b/README.md
index 73c195f133..dcd5f51638 100644
--- a/README.md
+++ b/README.md
@@ -12,27 +12,36 @@
 
 
 
-Data-parallel Extension for Numba* (numba-dpex) is a standalone extension for
-the [Numba](http://numba.pydata.org) Python JIT compiler. Numba-dpex provides
-a generic kernel programming API and an offload feature that extends Numba's
-auto-parallelizer to generate data-parallel kernels for `parfor` nodes.
-
-Numba-dpex's kernel API has a design and API similar to Numba's `cuda.jit`
-module, but is based on the [SYCL](https://sycl.tech/) language. The
-code-generation for the kernel API currently supports
-[SPIR-V](https://www.khronos.org/spir/)-based
-[OpenCL](https://www.khronos.org/opencl/) and
-[oneAPI Level Zero](https://spec.oneapi.io/level-zero/latest/index.html)
-devices that are supported by Intel&reg; DPC++ SYCL compiler runtime. Supported
-devices include Intel&reg; CPUs, integrated GPUs and discrete GPUs.
-
-The offload functionality in numba-dpex is based on Numba's `parfor`
-loop-parallelizer. Our compiler extends Numba's `parfor` feature to generate
-kernels and offload them to devices supported by DPC++ SYCL compiler runtime.
-The offload functionality is supported via a new NumPy drop-in replacement
-library: [dpnp](https://github.com/IntelPython/dpnp). Note that `dpnp` and NumPy-based
-expressions can be used together in the same function, with `dpnp` expressions getting
-offloaded by `numba-dpex` and NumPy expressions getting parallelized by Numba.
+Data-parallel Extension for Numba* (numba-dpex) is an open-source standalone
+extension for the [Numba](http://numba.pydata.org) Python JIT compiler.
+Numba-dpex provides a [SYCL*](https://sycl.tech/)-like API for kernel
+programming Python. SYCL* is an open standard developed by the [Unified
+Acceleration Foundation](https://uxlfoundation.org/) as a vendor-agnostic way of
+programming different types of data-parallel hardware such as multi-core CPUs,
+GPUs, and FPGAs. Numba-dpex's kernel-programming API brings the same programming
+model and a similar API to Python. The API allows expressing portable
+data-parallel kernels  in Python and then JIT compiling them for different
+hardware targets. JIT compilation is supported for hardware that use the
+[SPIR-V](https://www.khronos.org/spir/) intermediate representation format that
+includes [OpenCL](https://www.khronos.org/opencl/) CPU (Intel, AMD) devices,
+OpenCL GPU (Intel integrated and discrete GPUs) devices, and [oneAPI Level
+Zero](https://spec.oneapi.io/level-zero/latest/index.html) GPU (Intel integrated
+and discrete GPUs) devices.
+
+The kernel programming API does not yet support every SYCL* feature. Refer to
+the [SYCL* and numba-dpex feature comparison](https://intelpython.github.io/numba-dpex/latest/supported_sycl_features.html)
+page to get a summary of supported features.
+Numba-dpex only implements SYCL*'s kernel programming API, all SYCL runtime
+Python bindings are provided by the [dpctl](https://github.com/IntelPython/dpctl)
+package.
+
+Along with the kernel programming API, numba-dpex extends Numba's
+auto-parallelizer to bring device offload capabilities to `prange` loops and
+NumPy-like vector expressions. The offload functionality is supported via the
+NumPy drop-in replacement library: [dpnp](https://github.com/IntelPython/dpnp).
+Note that `dpnp` and NumPy-based expressions can be used together in the same
+function, with `dpnp` expressions getting offloaded by `numba-dpex` and NumPy
+expressions getting parallelized by Numba.
 
 Refer the [documentation](https://intelpython.github.io/numba-dpex) and examples
 to learn more.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index f0ef3a41e8..89cb5f3b8b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -5,10 +5,12 @@
 # coding: utf-8
 # Configuration file for the Sphinx documentation builder.
 
-# -- Project information -----------------------------------------------------
+# -- Project information -------------------------------------------------------
 
 import sys
 
+import numba_dpex
+
 sys.path.append(".")
 
 from sycl_spec_links import sycl_ext_links  # noqa E402
@@ -18,16 +20,15 @@
 author = "Intel Corporation"
 
 # The full version, including alpha/beta/rc tags
-# release = "main"
+release = numba_dpex.__version__
 
-# -- General configuration ----------------------------------------------------
+# -- General configuration -----------------------------------------------------
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
     "sphinx.ext.todo",
-    "sphinx.ext.intersphinx",
     "sphinx.ext.extlinks",
     "sphinx.ext.githubpages",
     "sphinx.ext.napoleon",
@@ -35,6 +36,7 @@
     "sphinxcontrib.googleanalytics",
     "myst_parser",
     "autoapi.extension",
+    "sphinx.ext.intersphinx",
     "sphinxcontrib.bibtex",
 ]
 
@@ -52,12 +54,11 @@
 extlinks = {}
 extlinks.update(sycl_ext_links)
 
-# -- Options for HTML output -------------------------------------------------
+# -- Options for HTML output ---------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-# html_theme = "pydata_sphinx_theme"
 html_theme = "furo"
 
 html_theme_options = {
@@ -82,7 +83,7 @@
 
 html_show_sourcelink = False
 
-# -- Todo extension configuration  ----------------------------------------------
+# -- Todo extension configuration  ---------------------------------------------
 todo_include_todos = True
 todo_link_only = True
 
@@ -90,9 +91,15 @@
 intersphinx_mapping = {}
 
 
-# -- Prepend module name to an object name or not -----------------------------------
+# -- Prepend module name to an object name or not ------------------------------
 add_module_names = False
 
+
+# -- autodoc configurations ----------------------------------------------------
+
+autodoc_typehints_format = "short"
+python_use_unqualified_type_names = True
+
 # -- Auto API configurations ---------------------------------------------------
 
 autoapi_dirs = [
diff --git a/docs/source/contribution_guide.rst b/docs/source/contribution_guide.rst
deleted file mode 100644
index 971a7fd411..0000000000
--- a/docs/source/contribution_guide.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-.. _contributions:
-.. include:: ./ext_links.txt
-
-Contribution Guide
-==================
-
-Classification of Contributions
--------------------------------
-
-Development Cycle
------------------
-
-Issues and Pull Requests
-------------------------
-
-Coding Guidelines
------------------
-
-Unit Testing
--------------
-
-Documentation
--------------
-
-Tips for Developers
--------------------
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
deleted file mode 100644
index f05801f7ab..0000000000
--- a/docs/source/examples.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. _examples:
-.. include:: ./ext_links.txt
-
-List of examples
-================
-
-.. todo::
-   Provide list of examples for numba-dpex
-
-Benchmarks
-**********
-
-.. todo::
-   Provide instructions for dpbench
-
-Jupyter* Notebooks
-******************
-
-.. todo::
-   Provide instructions for Jupyter Notebook samples
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0bfda03bbc..8582ce97ff 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,43 +1,15 @@
 .. _index:
 .. include:: ./ext_links.txt
 
-.. Feedbacks:
-.. 	Use proper names (as provided by marketing)
-.. 	Do not use "we/I"
-.. 	Use correct acronym, expand acronym at the first instance
-
-
-.. Follow cupy docs.
-
-.. Overview:
-.. 	Main Features
-.. 	Project Goal
-
-.. Getting Started:
-.. 	Yevheni
-.. 	Docker page
-
-.. User Guide:
-.. 	https://intelpython.github.io/oneAPI-for-SciPy/details/programming_model/
-.. 	Kernel Programming API --> Kernel Programming Basics
-.. 	Compiling and Offloading DPNP
-.. 		- Introduce @dpjit
-.. 		- Array constructor in dpjit
-.. 		- ufunc (refer to numba)
-.. 		- prange
-.. 		-
-.. 	Debugging using GDB
-.. 	# Performance Tips
-.. 	# Troubleshooting
-.. 	Useful Links
-.. 	# To-Do
-
-.. API Reference:
-
-
 Data Parallel Extension for Numba*
 ==================================
 
+Numba-dpex is an open-source kernel-programming API and JIT compiler for
+portable accelerator programming directly in Python. The API and the compiler is
+modeled after the C++ SYCL* language and brings a similar programming model and
+language design to Python. The page lists the relevant documentation to learn to
+program data-parallel kernels using numba-dpex.
+
 .. module:: numba_dpex
 
 .. toctree::
@@ -55,14 +27,7 @@ Data Parallel Extension for Numba*
 
 .. toctree::
    :maxdepth: 1
-   :caption: Development
-
-   contribution_guide
-
-.. toctree::
-   :maxdepth: 1
-   :caption: Misc Notes
+   :caption: Miscellaneous Notes
 
-   examples
    license
    release-notes
diff --git a/docs/source/user_guide/kernel_programming/math-functions.rst b/docs/source/user_guide/kernel_programming/math-functions.rst
index 284f89e06e..f1ef2f512c 100644
--- a/docs/source/user_guide/kernel_programming/math-functions.rst
+++ b/docs/source/user_guide/kernel_programming/math-functions.rst
@@ -1,3 +1,5 @@
+.. include:: ./../../ext_links.txt
+
 
 Scalar mathematical functions from the Python `math`_ module and the `dpnp`_
 library can be used inside a kernel function. During compilation the
diff --git a/docs/source/user_guide/kernel_programming/reduction.rst b/docs/source/user_guide/kernel_programming/reduction.rst
deleted file mode 100644
index 0f57b4b40e..0000000000
--- a/docs/source/user_guide/kernel_programming/reduction.rst
+++ /dev/null
@@ -1,51 +0,0 @@
-Reduction on SYCL-supported Devices
-===================================
-
-Numba-dpex does not yet provide any specific decorator to implement
-reduction kernels. However, a kernel reduction can be written explicitly. This
-section provides two approaches for writing a reduction kernel as a
-``numba_dpex.kernel`` function.
-
-
-Example 1
----------
-
-This example demonstrates a summation reduction on a one-dimensional array.
-
-Full example can be found at ``numba_dpex/examples/sum_reduction.py``.
-
-In this example, to reduce the array we invoke the kernel multiple times.
-
-.. literalinclude:: ./../../../../numba_dpex/examples/sum_reduction.py
-   :pyobject: sum_reduction_kernel
-
-.. literalinclude:: ./../../../../numba_dpex/examples/sum_reduction.py
-   :pyobject: sum_reduce
-
-Example 2
----------
-
-Full example can be found at
-``numba_dpex/examples/sum_reduction_recursive_ocl.py``.
-
-.. literalinclude:: ./../../../../numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py
-   :pyobject: sum_reduction_kernel
-
-.. literalinclude:: ./../../../../numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py
-   :pyobject: sum_recursive_reduction
-
-.. literalinclude:: ./../../../../numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py
-   :pyobject: sum_reduce
-
-.. note::
-
-    Numba-dpex does not yet provide any analogue to the ``numba.cuda.reduce``
-    decorator for writing reductions kernel. Such a decorator will be added in
-    future releases.
-
-Full examples
--------------
-
-- :file:`numba_dpex/examples/sum_reduction_recursive_ocl.py`
-- :file:`numba_dpex/examples/sum_reduction_ocl.py`
-- :file:`numba_dpex/examples/sum_reduction.py`
diff --git a/docs/source/user_guide/kernel_programming/synchronization.rst b/docs/source/user_guide/kernel_programming/synchronization.rst
deleted file mode 100644
index d590b0801b..0000000000
--- a/docs/source/user_guide/kernel_programming/synchronization.rst
+++ /dev/null
@@ -1,33 +0,0 @@
-Synchronization Functions
-=========================
-
-``numba-dpex`` only supports some of the SYCL synchronization operations. For
-synchronization of all threads in the same thread block, numba-dpex provides
-a helper function called ``numba_dpex.barrier()``. This function implements the
-same pattern as barriers in traditional multi-threaded programming: invoking the
-function forces a thread to wait until all threads in the block reach the
-barrier, at which point it returns control to all its callers.
-
-``numba_dpex.barrier()`` supports two memory fence options:
-
-- ``numba_dpex.GLOBAL_MEM_FENCE``: The barrier function will queue a memory
-  fence to ensure correct ordering of memory operations to global memory. Using
-  the option can be useful when work-items, for example, write to buffer or
-  image objects and then want to read the updated data. Passing no arguments to
-  ``numba_dpex.barrier()`` is equivalent to setting the global memory fence
-  option.
-
-  .. .. literalinclude:: ./../../../../numba_dpex/examples/barrier.py
-  ..  :pyobject: no_arg_barrier_support
-
-- ``numba_dpex.LOCAL_MEM_FENCE``: The barrier function will either flush
-  any variables stored in local memory or queue a memory fence to ensure
-  correct ordering of memory operations to local memory.
-
-.. .. literalinclude:: ./../../../../numba_dpex/examples/barrier.py
-..    :pyobject: local_memory
-
-.. note::
-
-    The ``numba_dpex.barrier()`` function is semantically equivalent to
-    ``numba.cuda.syncthreads``.
diff --git a/numba_dpex/core/runtime/context.py b/numba_dpex/core/runtime/context.py
index 1efa58cde7..80f0253101 100644
--- a/numba_dpex/core/runtime/context.py
+++ b/numba_dpex/core/runtime/context.py
@@ -439,16 +439,18 @@ def acquire_meminfo_and_schedule_release(
     ):
         """Inserts LLVM IR to call nrt_acquire_meminfo_and_schedule_release.
 
-        DPCTLSyclEventRef
-        DPEXRT_nrt_acquire_meminfo_and_schedule_release(
-            NRT_api_functions *nrt,
-            DPCTLSyclQueueRef QRef,
-            NRT_MemInfo **meminfo_array,
-            size_t meminfo_array_size,
-            DPCTLSyclEventRef *depERefs,
-            size_t nDepERefs,
-            int *status,
-        );
+        .. code-block:: c
+
+            DPCTLSyclEventRef
+            DPEXRT_nrt_acquire_meminfo_and_schedule_release(
+                NRT_api_functions *nrt,
+                DPCTLSyclQueueRef QRef,
+                NRT_MemInfo **meminfo_array,
+                size_t meminfo_array_size,
+                DPCTLSyclEventRef *depERefs,
+                size_t nDepERefs,
+                int *status,
+            );
 
         """
         mod = builder.module
@@ -475,16 +477,18 @@ def acquire_meminfo_and_schedule_release(
     def build_or_get_kernel(self, builder: llvmir.IRBuilder, args):
         """Inserts LLVM IR to call build_or_get_kernel.
 
-        DPCTLSyclKernelRef
-        DPEXRT_build_or_get_kernel(
-            const DPCTLSyclContextRef ctx,
-            const DPCTLSyclDeviceRef dev,
-            size_t il_hash,
-            const char *il,
-            size_t il_length,
-            const char *compile_opts,
-            const char *kernel_name,
-        );
+        .. code-block:: c
+
+            DPCTLSyclKernelRef
+            DPEXRT_build_or_get_kernel(
+                const DPCTLSyclContextRef ctx,
+                const DPCTLSyclDeviceRef dev,
+                size_t il_hash,
+                const char *il,
+                size_t il_length,
+                const char *compile_opts,
+                const char *kernel_name,
+            );
 
         """
         mod = builder.module
@@ -511,7 +515,9 @@ def build_or_get_kernel(self, builder: llvmir.IRBuilder, args):
     def kernel_cache_size(self, builder: llvmir.IRBuilder):
         """Inserts LLVM IR to call kernel_cache_size.
 
-        size_t DPEXRT_kernel_cache_size();
+        .. code-block:: c
+
+            size_t DPEXRT_kernel_cache_size();
 
         """
         fn = cgutils.get_or_insert_function(