From 1505e5dfe412471e7a645ae6ed40d2d0d6a68f22 Mon Sep 17 00:00:00 2001
From: Paul Shen <pxshen@alumni.stanford.edu>
Date: Mon, 30 Dec 2024 17:19:17 -0500
Subject: [PATCH]

---
 Luminescent_AI_docs.ipynb      | 332 ++++++++++++++++++++++++++++++++-
 luminescent/a/splitter.py      |  31 ---
 luminescent/luminescent/sol.py |   1 +
 luminescent/tiny.py            |  13 ++
 src/core/gpu.jl                |   8 -
 src/core/monitors.jl           |   2 +-
 src/core/update.jl             |   2 +
 src/del.jl                     |   1 +
 src/gpu.jl                     |   3 +
 src/main.jl                    |   4 +-
 src/pic/run.jl                 |   5 +-
 src/pictest.jl                 |   4 +-
 src/sim/setup.jl               |   1 -
 src/sim/solve.jl               |  15 +-
 14 files changed, 372 insertions(+), 50 deletions(-)
 delete mode 100644 luminescent/a/splitter.py
 create mode 100644 luminescent/tiny.py
 delete mode 100644 src/core/gpu.jl
 create mode 100644 src/gpu.jl
diff --git a/Luminescent_AI_docs.ipynb b/Luminescent_AI_docs.ipynb
index 916326e..4645b6e 100644
--- a/Luminescent_AI_docs.ipynb
+++ b/Luminescent_AI_docs.ipynb
@@ -649,14 +649,202 @@
             "execution_count": null,
             "id": "ccf27d90",
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "['o2@0,o1@0', 'o3@0,o1@0']\n",
+                        "wavelengths has been adjusted to facilitate simulation:\n",
+                        "[1.1625, 1.55]\n",
+                        "Write WindowsPath('c:/Users/pxshe/OneDrive/Desktop/beans/Luminescent.jl/runs/demux/temp/clad_999_0.stl') zmin = 0.000, height = 1.247\n",
+                        "Write WindowsPath('c:/Users/pxshe/OneDrive/Desktop/beans/Luminescent.jl/runs/demux/temp/box_999_0.stl') zmin = -1.027, height = 2.273\n",
+                        "Write WindowsPath('c:/Users/pxshe/OneDrive/Desktop/beans/Luminescent.jl/runs/demux/temp/core_1_0.stl') zmin = 0.000, height = 0.220\n",
+                        "C:\\ProgramData\\anaconda3\\Lib\\site-packages\\luminescent\\pic\n"
+                    ]
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">22:36:23 US Eastern Standard Time </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING: frequency passed to                  </span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008000; text-decoration-color: #008000\">'Medium.eps_model()'</span><span style=\"color: #800000; text-decoration-color: #800000\">is outside of             </span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008000; text-decoration-color: #008000\">'Medium.frequency_range'</span><span style=\"color: #800000; text-decoration-color: #800000\"> = </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">206753419710997.8</span><span style=\"color: #800000; text-decoration-color: #800000\">,</span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1199169834323787.2</span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">)</span><span style=\"color: #800000; text-decoration-color: #800000\">                           </span>\n",
+                            "</pre>\n"
+                        ],
+                        "text/plain": [
+                            "\u001b[2;36m22:36:23 US Eastern Standard Time\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING: frequency passed to                  \u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[32m'Medium.eps_model\u001b[0m\u001b[32m(\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[31mis outside of             \u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[32m'Medium.frequency_range'\u001b[0m\u001b[31m = \u001b[0m\u001b[1;31m(\u001b[0m\u001b[1;36m206753419710997.8\u001b[0m\u001b[31m,\u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[1;36m1199169834323787.2\u001b[0m\u001b[1;31m)\u001b[0m\u001b[31m                           \u001b[0m\n"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING: Since Tidy3D </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.7</span><span style=\"color: #800000; text-decoration-color: #800000\">, the default variant</span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #800000; text-decoration-color: #800000\">for silicon dioxide has been switched from    </span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008000; text-decoration-color: #008000\">'Horiba'</span><span style=\"color: #800000; text-decoration-color: #800000\"> to </span><span style=\"color: #008000; text-decoration-color: #008000\">'Palik_Lossless'</span><span style=\"color: #800000; text-decoration-color: #800000\">.                 </span>\n",
+                            "</pre>\n"
+                        ],
+                        "text/plain": [
+                            "\u001b[2;36m                                 \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING: Since Tidy3D \u001b[0m\u001b[1;36m2.7\u001b[0m\u001b[31m, the default variant\u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[31mfor silicon dioxide has been switched from    \u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[32m'Horiba'\u001b[0m\u001b[31m to \u001b[0m\u001b[32m'Palik_Lossless'\u001b[0m\u001b[31m.                 \u001b[0m\n"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "runs\\demux\\problem.json\n",
+                        "using simulation folder runs\\demux\n",
+                        "runs\\demux\\problem.json\n",
+                        "using simulation folder runs\\demux\n",
+                        "loading problem from c:\\Users\\pxshe\\OneDrive\\Desktop\\beans\\Luminescent.jl\\runs\\demux\n",
+                        "no fdtd binaries found - starting julia session to compile fdtd code...\n",
+                        "setting up simulation...\n",
+                        "σpml = ϵmin * v = 8.347036361694336\n",
+                        "mpml = μmin * v = 4.0\n",
+                        "pml_depths = trim.(pml_depths, maxdeltas) = Float32[0.40000004, 0.40000004]\n",
+                        "saving mode solutions\n",
+                        "saving mode solutions\n",
+                        "saving mode solutions\n",
+                        "saving mode solutions\n",
+                        "saving mode solutions\n",
+                        "saving mode solutions\n",
+                        "(Ttrans, Tss) = (28.75328807276925, 3)\n",
+                        "using CPU backend.\n",
+                        "starting optimization... first iter will be slow due to adjoint compilation.\n",
+                        "\n",
+                        "(1)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.90957534\n",
+                        "weighted total loss 0.90957534\n",
+                        "578.118782 seconds (1.01 G allocations: 55.551 GiB, 5.11% gc time, 92.73% compilation time: <1% of which was recompilation)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (284.85196f0, 226)\n",
+                        "fractional change in design: 0.01893590280687055\n",
+                        "\n",
+                        "(2)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.9325714\n",
+                        "weighted total loss 0.9325714\n",
+                        "59.220920 seconds (31.79 M allocations: 11.404 GiB, 38.14% gc time, 9.21% compilation time: 10% of which was recompilation)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (237.3766326904297, 83)\n",
+                        "fractional change in design: 0.006954335986594051\n",
+                        "\n",
+                        "(3)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.92524815\n",
+                        "weighted total loss 0.92524815\n",
+                        "59.332213 seconds (27.84 M allocations: 11.230 GiB, 40.83% gc time, 0.57% compilation time: 32% of which was recompilation)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (284.8519592285156, 111)\n",
+                        "fractional change in design: 0.009300377042312527\n",
+                        "\n",
+                        "(4)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.9194937\n",
+                        "weighted total loss 0.9194937\n",
+                        "69.248792 seconds (27.70 M allocations: 11.223 GiB, 41.91% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (341.8223510742187, 101)\n",
+                        "fractional change in design: 0.008462505236698786\n",
+                        "\n",
+                        "(5)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.90807855\n",
+                        "weighted total loss 0.90807855\n",
+                        "86.405843 seconds (27.70 M allocations: 11.223 GiB, 37.99% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (410.18682128906244, 174)\n",
+                        "fractional change in design: 0.014578969417679095\n",
+                        "\n",
+                        "(6)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.92333055\n",
+                        "weighted total loss 0.92333055\n",
+                        "91.458605 seconds (27.70 M allocations: 11.223 GiB, 39.86% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (341.8223510742187, 116)\n",
+                        "fractional change in design: 0.009719312945119397\n",
+                        "\n",
+                        "(7)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.94486606\n",
+                        "weighted total loss 0.94486606\n",
+                        "104.713512 seconds (27.70 M allocations: 11.223 GiB, 40.30% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (284.8519592285156, 113)\n",
+                        "fractional change in design: 0.009467951403435275\n",
+                        "\n",
+                        "(8)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.9314217\n",
+                        "weighted total loss 0.9314217\n",
+                        "103.302416 seconds (27.70 M allocations: 11.223 GiB, 42.05% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (341.8223510742187, 103)\n",
+                        "fractional change in design: 0.008630079597821534\n",
+                        "\n",
+                        "(9)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.9087913\n",
+                        "weighted total loss 0.9087913\n",
+                        "116.105814 seconds (27.70 M allocations: 11.223 GiB, 39.20% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (95.39631447339724, 196)\n",
+                        "fractional change in design: 0.016422287390029325\n",
+                        "\n",
+                        "(10)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.8870224\n",
+                        "weighted total loss 0.8870224\n",
+                        "141.365957 seconds (27.70 M allocations: 11.223 GiB, 43.03% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (114.47557736807669, 135)\n",
+                        "fractional change in design: 0.011311269375785506\n",
+                        "\n",
+                        "(11)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.9157612\n",
+                        "weighted total loss 0.9157612\n",
+                        "158.975275 seconds (27.70 M allocations: 11.223 GiB, 40.71% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (95.39631447339724, 78)\n",
+                        "fractional change in design: 0.0065354000837871806\n",
+                        "\n",
+                        "(12)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.9080465\n",
+                        "weighted total loss 0.9080465\n",
+                        "131.167565 seconds (27.70 M allocations: 11.223 GiB, 41.27% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (114.47557736807669, 76)\n",
+                        "fractional change in design: 0.006367825722664432\n",
+                        "\n",
+                        "(13)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.9097321\n",
+                        "weighted total loss 0.9097321\n"
+                    ]
+                }
+            ],
             "source": [
                 "# RAM: 20G\n",
                 "import os\n",
                 "import luminescent as lumi\n",
                 "\n",
                 "path = os.path.join(\"runs\", \"demux\")\n",
-                "c = lumi.mimo(west=1, east=2, l=3, w=3, wwg=.5,taper=0.05)\n",
+                "c = lumi.mimo(west=1, east=2, l=4.0, w=3.0, wwg=.5,taper=0.05)\n",
                 "targets = {\"tparams\": {\n",
                 "    1.55: {\"2,1\": 1.0},\n",
                 "    1.20: {\"3,1\": 1.0},\n",
@@ -720,7 +908,145 @@
             "execution_count": 1,
             "id": "b95e430a-51fd-4019-8f5f-9d2b2e1a2548",
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "['o2@0,o1@0']\n",
+                        "Write WindowsPath('c:/Users/pxshe/OneDrive/Desktop/beans/Luminescent.jl/runs/splitter/temp/clad_999_0.stl') zmin = 0.000, height = 1.247\n",
+                        "Write WindowsPath('c:/Users/pxshe/OneDrive/Desktop/beans/Luminescent.jl/runs/splitter/temp/box_999_0.stl') zmin = -1.027, height = 2.273\n",
+                        "Write WindowsPath('c:/Users/pxshe/OneDrive/Desktop/beans/Luminescent.jl/runs/splitter/temp/core_1_0.stl') zmin = 0.000, height = 0.220\n",
+                        "C:\\ProgramData\\anaconda3\\Lib\\site-packages\\luminescent\\pic\n"
+                    ]
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">23:09:42 US Eastern Standard Time </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING: frequency passed to                  </span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008000; text-decoration-color: #008000\">'Medium.eps_model()'</span><span style=\"color: #800000; text-decoration-color: #800000\">is outside of             </span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008000; text-decoration-color: #008000\">'Medium.frequency_range'</span><span style=\"color: #800000; text-decoration-color: #800000\"> = </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">206753419710997.8</span><span style=\"color: #800000; text-decoration-color: #800000\">,</span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1199169834323787.2</span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">)</span><span style=\"color: #800000; text-decoration-color: #800000\">                           </span>\n",
+                            "</pre>\n"
+                        ],
+                        "text/plain": [
+                            "\u001b[2;36m23:09:42 US Eastern Standard Time\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING: frequency passed to                  \u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[32m'Medium.eps_model\u001b[0m\u001b[32m(\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[31mis outside of             \u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[32m'Medium.frequency_range'\u001b[0m\u001b[31m = \u001b[0m\u001b[1;31m(\u001b[0m\u001b[1;36m206753419710997.8\u001b[0m\u001b[31m,\u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[1;36m1199169834323787.2\u001b[0m\u001b[1;31m)\u001b[0m\u001b[31m                           \u001b[0m\n"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "data": {
+                        "text/html": [
+                            "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING: Since Tidy3D </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.7</span><span style=\"color: #800000; text-decoration-color: #800000\">, the default variant</span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #800000; text-decoration-color: #800000\">for silicon dioxide has been switched from    </span>\n",
+                            "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                                  </span><span style=\"color: #008000; text-decoration-color: #008000\">'Horiba'</span><span style=\"color: #800000; text-decoration-color: #800000\"> to </span><span style=\"color: #008000; text-decoration-color: #008000\">'Palik_Lossless'</span><span style=\"color: #800000; text-decoration-color: #800000\">.                 </span>\n",
+                            "</pre>\n"
+                        ],
+                        "text/plain": [
+                            "\u001b[2;36m                                 \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING: Since Tidy3D \u001b[0m\u001b[1;36m2.7\u001b[0m\u001b[31m, the default variant\u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[31mfor silicon dioxide has been switched from    \u001b[0m\n",
+                            "\u001b[2;36m                                  \u001b[0m\u001b[32m'Horiba'\u001b[0m\u001b[31m to \u001b[0m\u001b[32m'Palik_Lossless'\u001b[0m\u001b[31m.                 \u001b[0m\n"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "runs\\splitter\\problem.json\n",
+                        "using simulation folder runs\\splitter\n",
+                        "runs\\splitter\\problem.json\n",
+                        "using simulation folder runs\\splitter\n",
+                        "loading problem from c:\\Users\\pxshe\\OneDrive\\Desktop\\beans\\Luminescent.jl\\runs\\splitter\n",
+                        "no fdtd binaries found - starting julia session to compile fdtd code...\n",
+                        "setting up simulation...\n",
+                        "σpml = ϵmin * v = 8.347036361694336\n",
+                        "mpml = μmin * v = 4.0\n",
+                        "pml_depths = trim.(pml_depths, maxdeltas) = Float32[0.40000004, 0.40000004]\n",
+                        "saving mode solutions\n",
+                        "saving mode solutions\n",
+                        "saving mode solutions\n",
+                        "(Ttrans, Tss) = (28.75328807276925, 10)\n",
+                        "using CPU backend.\n",
+                        "starting optimization... first iter will be slow due to adjoint compilation.\n",
+                        "\n",
+                        "(1)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.63133264\n",
+                        "weighted total loss 0.63133264\n",
+                        "529.547124 seconds (1.01 G allocations: 55.705 GiB, 5.44% gc time, 91.70% compilation time: <1% of which was recompilation)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (492.22424f0, 117)\n",
+                        "fractional change in design: 0.009803100125680772\n",
+                        "\n",
+                        "(2)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.49467033\n",
+                        "weighted total loss 0.49467033\n",
+                        "65.001153 seconds (35.83 M allocations: 11.784 GiB, 38.37% gc time, 9.07% compilation time: 10% of which was recompilation)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (12.839192682640778, 74)\n",
+                        "fractional change in design: 0.006200251361541684\n",
+                        "\n",
+                        "(3)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.51544815\n",
+                        "weighted total loss 0.51544815\n",
+                        "74.434467 seconds (31.88 M allocations: 11.610 GiB, 43.81% gc time, 0.49% compilation time: 32% of which was recompilation)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (4.299818797328681, 83)\n",
+                        "fractional change in design: 0.006954335986594051\n",
+                        "\n",
+                        "(4)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.5311855\n",
+                        "weighted total loss 0.5311855\n",
+                        "89.394624 seconds (31.73 M allocations: 11.603 GiB, 43.36% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (3.583182331107235, 69)\n",
+                        "fractional change in design: 0.005781315458734814\n",
+                        "\n",
+                        "(5)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.6018612\n",
+                        "weighted total loss 0.6018612\n",
+                        "101.972902 seconds (31.73 M allocations: 11.603 GiB, 40.37% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (2.9859852759226957, 34)\n",
+                        "fractional change in design: 0.0028487641390867197\n",
+                        "\n",
+                        "(6)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.622378\n",
+                        "weighted total loss 0.622378\n",
+                        "124.641510 seconds (31.73 M allocations: 11.603 GiB, 39.88% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (2.4883210632689132, 14)\n",
+                        "fractional change in design: 0.0011730205278592375\n",
+                        "\n",
+                        "(7)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.6317251\n",
+                        "weighted total loss 0.6317251\n",
+                        "141.241130 seconds (31.73 M allocations: 11.603 GiB, 38.71% gc time, 0.00% compilation time)\n",
+                        "saving checkpoint...\n",
+                        "debug: (o.η, dA) = (2.0736008860574278, 16)\n",
+                        "fractional change in design: 0.0013405948889819858\n",
+                        "\n",
+                        "(8)\n",
+                        "lminloss: 0\n",
+                        "tparams loss: 0.64695907\n",
+                        "weighted total loss 0.64695907\n"
+                    ]
+                }
+            ],
             "source": [
                 "import os\n",
                 "import luminescent as lumi\n",
diff --git a/luminescent/a/splitter.py b/luminescent/a/splitter.py
deleted file mode 100644
index ee23002..0000000
--- a/luminescent/a/splitter.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from pprint import pprint
-import luminescent as lumi
-
-path = "splitter"
-c = lumi.gcells.mimo(west=1, east=2, l=4.0, w=2.0, wwg=.5, taper=.05, )
-targets = {
-    "tparams": {1.55: {"2,1": 0.5}},
-}
-
-prob = lumi.make_pic_inv_prob(
-    c, targets, path,
-    N=2,  nres=15,  symmetries=[1],
-    lvoid=0.15, lsolid=.15,
-    iters=50, stoploss=.05, )
-lumi.solve(prob, run=False)
-# path = "1x4_splitter"
-# c = lumi.gcells.mimo(west=1, east=4, l=6.0, w=6.0, wwg=.5, taper=.05, )
-# targets = {
-#     "tparams": {1.55: {"2,1": 0.25, "3,1": 0.25}},
-#     "phasediff": {1.55: {"2,3": 0.0}},
-# }
-
-# prob = lumi.make_pic_inv_prob(
-#     c, targets, path,
-#     symmetries=[1], lvoid=0.1, lsolid=0.1, dx=0.1,
-#    N=2, stoploss=.03, iters=40)
-# sol = lumi.solve(prob)
-
-# apt install libgl1-mesa-glx
-# sudo apt-get install libcairo2-dev
-# sudo apt install libxcb-cursor0
diff --git a/luminescent/luminescent/sol.py b/luminescent/luminescent/sol.py
index db520bd..1287283 100644
--- a/luminescent/luminescent/sol.py
+++ b/luminescent/luminescent/sol.py
@@ -59,6 +59,7 @@ def run(cmd):
     env = '0;'
     # cmd = ["lumi", path]
     gpu_backend = prob["gpu_backend"]
+    run(["julia", "-e", f'println(Base.active_project())'])
     if not gpu_backend:
         cmd = ["julia", "-e", f'{env}using Luminescent;picrun(raw"{path}")']
     else:
diff --git a/luminescent/tiny.py b/luminescent/tiny.py
new file mode 100644
index 0000000..c2a1727
--- /dev/null
+++ b/luminescent/tiny.py
@@ -0,0 +1,13 @@
+import os
+import luminescent as lumi
+from gdsfactory.generic_tech import LAYER, LAYER_STACK
+import gdsfactory as gf
+import numpy as np
+
+c = gf.components.straight(length=.1, width=0.5, layer=LAYER.WG)
+wavelengths = 1.55
+path = os.path.join("runs", "tiny")
+lumi.make_pic_sim_prob(path, c, wavelengths=wavelengths, keys=[
+                       "2,1"],                       nres=15, approx_2D_mode="TE", gpu="CUDA")  # approx_2D_mode="TE")
+# lumi.solve(path)
+# sol = lumi.lumi_solution()
diff --git a/src/core/gpu.jl b/src/core/gpu.jl
deleted file mode 100644
index a1a90d3..0000000
--- a/src/core/gpu.jl
+++ /dev/null
@@ -1,8 +0,0 @@
-T = AbstractArray{<:Number}
-_cpu(x::T) = Array(x)
-_gpu(x::T) = cu(x)
-_gpu(x) = x
-_cpu(x) = x
-gpu(d) = fmap(_gpu, d, T)
-cpu(d) = fmap(_cpu, d, T)
-
diff --git a/src/core/monitors.jl b/src/core/monitors.jl
index 7358544..b87d026 100644
--- a/src/core/monitors.jl
+++ b/src/core/monitors.jl
@@ -68,7 +68,7 @@ struct MonitorInstance <: AbstractMonitorInstance
     λmodes
     tags
 end
-@functor MonitorInstance (λmodes,)
+@functor MonitorInstance (λmodes, deltas)
 Base.ndims(m::MonitorInstance) = m.d
 area(m::MonitorInstance) = m.v
 wavelengths(m::MonitorInstance) = keys(m.λmodes)
diff --git a/src/core/update.jl b/src/core/update.jl
index 43087c5..28aaf83 100644
--- a/src/core/update.jl
+++ b/src/core/update.jl
@@ -55,6 +55,8 @@ function update(u, p, t, dt, field_diffdeltas, field_diffpadvals, source_instanc
     dHdt = -(∇ × E + H ⊙ m) ⊘ μ
     H += dHdt * dt
 
+    @ignore_derivatives gc()
+    # @ignore_derivatives unsafe_free!.((Js, dEdt, dHdt))
     # (; E, H, (Jkeys .=> Jm)..., (Pkeys .=> Pm)...)
     namedtuple([:E => E, :H => H, (Jkeys .=> Jm)..., (Pkeys .=> Pm)...])
 end
diff --git a/src/del.jl b/src/del.jl
index 93fc7ea..f902134 100644
--- a/src/del.jl
+++ b/src/del.jl
@@ -11,6 +11,7 @@ function diffpad(a, vl, vr=vl; dims=1, diff=diff)
     r = !isnothing(vr)
 
     sz = Tuple(size(a) + (l + r - 1) * sel)
+    # b = similar(a, sz)
     b = Buffer(a, sz)
     b[range.(l * sel + 1, sz - r * sel)...] = diff(a; dims)
     pad!(b, vl, l * sel, 0)
diff --git a/src/gpu.jl b/src/gpu.jl
new file mode 100644
index 0000000..3e955b1
--- /dev/null
+++ b/src/gpu.jl
@@ -0,0 +1,3 @@
+
+unsafe_free!(a) = 0
+unsafe_free!(a::CUDA.CuArray) = CUDA.unsafe_free!(a)
\ No newline at end of file
diff --git a/src/main.jl b/src/main.jl
index 829edbd..dc4c895 100644
--- a/src/main.jl
+++ b/src/main.jl
@@ -28,4 +28,6 @@ include("pic/run.jl")
 
 include("ops.jl")
 include("del.jl")
-# include("main.jl")
\ No newline at end of file
+# include("main.jl")
+
+include("gpu.jl")
\ No newline at end of file
diff --git a/src/pic/run.jl b/src/pic/run.jl
index 52a7fde..e2f82dd 100644
--- a/src/pic/run.jl
+++ b/src/pic/run.jl
@@ -92,7 +92,7 @@ function picrun(path; gpuarray=nothing, kw...)
                 frame = frame .>= 0.99maximum(frame)
                 # frame = nothing
                 start = round((bbox[1] - lb) / dl + 1)
-                b = Blob(szd; solid_frac=0.95, morph=false, lsolid=lsolid / dl, lvoid=lvoid / dl, symmetries, F, frame, start)
+                b = Blob(szd; solid_frac=0.99, morph=false, lsolid=lsolid / dl, lvoid=lvoid / dl, symmetries, F, frame, start)
                 display(heatmap(b.frame))
 
                 if !isnothing(sol) && !restart
@@ -182,8 +182,9 @@ function picrun(path; gpuarray=nothing, kw...)
     end
     t0 = time()
     lb3 = (lb..., zmin)
+    # error("not implemented")
+    println("compiling simulation code...")
     if study == "sparams"
-        println("simulating...")
         @unpack S, sols = make_pic_sim_prob(runs, run_probs, lb, dl;
             F, verbose=true, framerate, path)
         plotsols(sols, run_probs, path)
diff --git a/src/pictest.jl b/src/pictest.jl
index ef596a7..34ff7f5 100644
--- a/src/pictest.jl
+++ b/src/pictest.jl
@@ -7,6 +7,8 @@ ENV["JULIA_PKG_PRECOMPILE_AUTO"] = 0
 # picrun(joinpath("runs", "straight");)# gpuarray=cu)
 # picrun(joinpath("runs", "bend_R5"))
 # picrun(joinpath("runs", "mode_converter"))
-picrun(joinpath("runs", "demux"))
+# picrun(joinpath("runs", "demux"))
 # picrun(joinpath("runs", "splitter"))
 
+using CUDA
+picrun(joinpath("runs", "tiny"); gpuarray=cu)
\ No newline at end of file
diff --git a/src/sim/setup.jl b/src/sim/setup.jl
index 004b992..6ffefe0 100644
--- a/src/sim/setup.jl
+++ b/src/sim/setup.jl
@@ -316,7 +316,6 @@ function setup(dl, boundaries, sources, monitors, deltas, mode_deltas;
                      geometry, _geometry, nmax, nmin, ϵeff,
                      is_field_on_lb, is_field_on_ub,
                      u0, dt, kw...) |> pairs |> OrderedDict
-
 end
 update = update
 setup = setup
\ No newline at end of file
diff --git a/src/sim/solve.jl b/src/sim/solve.jl
index 7a1f503..2c5cf11 100644
--- a/src/sim/solve.jl
+++ b/src/sim/solve.jl
@@ -1,10 +1,18 @@
+function tidy(t, dt)
+    if round(t) > round(t - dt)
+        println("simulation time = $t, took $(timepassed()) seconds")
+    end
+end
+
 function f1(((u,), p, (dt, field_diffdeltas, field_diffpadvals, source_instances)), t)
-    u = update(u, p, t, dt, field_diffdeltas, field_diffpadvals, source_instances)
+    tidy(t, dt)
+    @time u = update(u, p, t, dt, field_diffdeltas, field_diffpadvals, source_instances)
     ((u,), p, (dt, field_diffdeltas, field_diffpadvals, source_instances))
 end
 
 function f2(((u, mf), p, (dt, field_diffdeltas, field_diffpadvals, source_instances), (t0, T, monitor_instances)), t)
-    u = update(u, p, t, dt, field_diffdeltas, field_diffpadvals, source_instances;)
+    tidy(t, dt)
+    @time u = update(u, p, t, dt, field_diffdeltas, field_diffpadvals, source_instances;)
     mf += [[
         begin
             c = dt / T * cispi(-2(t - t0) / λ)
@@ -39,10 +47,12 @@ function solve(prob, ;
     init = (us0, p, (dt, field_diffdeltas, field_diffpadvals, source_instances))
 
     ts = 0:dt:T[1]-F(0.001)
+    nt = -1
     if save_memory
         (u,), = adjoint_reduce(f1, ts, init, ulims)
     else
         (u,), = reduce(ts; init) do us, t
+
             ignore() do
                 if framerate > 0 && t > 0
                     if t % (1 / framerate) < dt
@@ -63,6 +73,7 @@ function solve(prob, ;
     ts = ts[end]+dt:dt:T[2]-F(0.001)
     init = ((u, 0), p, (dt, field_diffdeltas, field_diffpadvals, source_instances), (T[2], durations[2], monitor_instances))
 
+    println("accumulating dft fields")
     if save_memory
         (u, mf), = adjoint_reduce(f2, ts, init, ulims)
     else