diff --git a/CHANGELOG.md b/CHANGELOG.md index a634273..57e7b15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,21 +4,32 @@ All notable changes to this project will be documented in this file. Dates are d Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog). -### [v1.0.0](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/compare/v0.1.0...v1.0.0) +#### [v1.1.0](https://github.com/emit-sds/emit-sds-l2a/compare/v1.0.0...v1.1.0) + +> 3 June 2022 + +- spec_quality checker cleanup [`#7`](https://github.com/emit-sds/emit-sds-l2a/pull/7) +- Parallel mask [`#6`](https://github.com/emit-sds/emit-sds-l2a/pull/6) +- Add environment.yml and requirements.txt for working emit-isofit environment. [`e2e5f61`](https://github.com/emit-sds/emit-sds-l2a/commit/e2e5f61d930837b6e2af57cfc6b4929d10da41d3) +- parallel masking [`2d3d0a6`](https://github.com/emit-sds/emit-sds-l2a/commit/2d3d0a62718e3f3758e582009531201c599e7354) +- add license [`0c4e18f`](https://github.com/emit-sds/emit-sds-l2a/commit/0c4e18faf34586b5a162e1d83798740939f0bdf4) + +### [v1.0.0](https://github.com/emit-sds/emit-sds-l2a/compare/v0.1.0...v1.0.0) > 9 February 2022 -- Netcdf [`#4`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/pull/4) -- handle bad data flags [`#3`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/pull/3) -- import cleanup and pep8 [`b2fae44`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/commit/b2fae440d7ca59c8da9a72d59f835ff2f2717315) -- Add surface_filtered_emit_synthetic.mat and wavelengths_emit_synthetic.txt [`9afbbd0`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/commit/9afbbd068b86a3b7072b7ee731c41c2a01c64c53) -- add base l2a netcdf creation, untested [`62c8aee`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/commit/62c8aeebc816cf3353024b5ac1443cf4a58d2260) +- Merge develop to main for 1.0.0 [`#5`](https://github.com/emit-sds/emit-sds-l2a/pull/5) +- Netcdf [`#4`](https://github.com/emit-sds/emit-sds-l2a/pull/4) +- handle bad data flags [`#3`](https://github.com/emit-sds/emit-sds-l2a/pull/3) +- import cleanup and pep8 [`b2fae44`](https://github.com/emit-sds/emit-sds-l2a/commit/b2fae440d7ca59c8da9a72d59f835ff2f2717315) +- Add surface_filtered_emit_synthetic.mat and wavelengths_emit_synthetic.txt [`9afbbd0`](https://github.com/emit-sds/emit-sds-l2a/commit/9afbbd068b86a3b7072b7ee731c41c2a01c64c53) +- add base l2a netcdf creation, untested [`62c8aee`](https://github.com/emit-sds/emit-sds-l2a/commit/62c8aeebc816cf3353024b5ac1443cf4a58d2260) #### v0.1.0 > 26 January 2021 -- Merge develop into main for Release 1 [`#1`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/pull/1) -- Add resources for Release 1 including apply_glt (for masks), solar irradiance, environment.yml, surface files, and minor tweak to make_emit_masks [`d301867`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/commit/d301867e6e58b6790e6e41a3b5c5b41968c833b8) -- default surface [`b92f98a`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/commit/b92f98af46d89ecf7cd414486a9c4cb45886331a) -- updated better data read style, reworked code to be done in sections for reading and testing [`3e04ebc`](https://github.jpl.nasa.gov/emit-sds/emit-sds-l2a/commit/3e04ebcf3e19847307e305bc3d8ba4dd6162f383) +- Merge develop into main for Release 1 [`#1`](https://github.com/emit-sds/emit-sds-l2a/pull/1) +- Add resources for Release 1 including apply_glt (for masks), solar irradiance, environment.yml, surface files, and minor tweak to make_emit_masks [`d301867`](https://github.com/emit-sds/emit-sds-l2a/commit/d301867e6e58b6790e6e41a3b5c5b41968c833b8) +- default surface [`b92f98a`](https://github.com/emit-sds/emit-sds-l2a/commit/b92f98af46d89ecf7cd414486a9c4cb45886331a) +- updated better data read style, reworked code to be done in sections for reading and testing [`3e04ebc`](https://github.com/emit-sds/emit-sds-l2a/commit/3e04ebcf3e19847307e305bc3d8ba4dd6162f383) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 53d4d8b..2b945c2 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,72 @@ -# emit-sds-l2a +

emit-sds-l2a

+ +_NOTE - at this time the EMIT repositories are not supporting Pull Requests from members outside of the EMIT-SDS Team. This is expected to change in March, and guidance on branches will be provided at that time. At present, public migration of this repository is a work-in-progress, and not all features are yet fully up to date. See the **develop** branch - set as default - for the latest code._ Welcome to the EMIT Level 2a science data system repository. To understand how this repository is linked to the rest of the emit-sds repositories, please see [the repository guide](https://github.jpl.nasa.gov/emit-sds/emit-main/wiki/Repository-Guide). -The "surface_v2" subdirectory contains the data files used to generate the EMIT default surface model. The data files mostly come from the USGS spectral library version 7 (Kokaly et al., 2017), with some adjustments to smooth out artifacts at the periphery. There are some additional spectra from Tan et al (2016). The python script "make_surface.py" demonstrates how to call the surface model utility. It must be created anew for each wavelength file. - -The OE code is best called with the "apply_oe" script in the isofit/utils subdirectory of the isofit repository. An example command using current best practices might look like: - -> python apply_oe --modtran_path [MODTRAN_INSTALL_DIRECTORY]\ - --wavelength_path [WAVELENGTH FILE] \ - --surface_path [SURFACE_MODEL .mat FILE] \ - --atmosphere_type [ATM_SUBARC_SUMMER |_SUBATM_MIDLAT_SUMMER | ATM_TROPICAL] \ - --multiple_restarts \ - --presolve \ - --empirical_line 1 \ - --ray_temp_dir [MY_USER_TEMPORARY_DIRECTORY] \ - [INPUT_RADIANCE] \ - [INPUT_LOC_FILE] \ - [INPUT_OBS_FILE] \ - [WORKING_DIRECTORY] \ +L2A consists of three components: +1) Construction of a surface model (one time) +2) Estimation of surface reflectance and uncertainy using an optimal-estimation based retrieval +3) Generation of masks (cloud / cirrus / water / spacecraft / etc.) + + +## Surface Model Construction +The "surface_v2" subdirectory contains the data files used to generate the EMIT default surface model. The data files mostly come from the USGS spectral library version 7 [(Kokaly et al., 2017)](https://dx.doi.org/10.5066/F7RR1WDJ), with some additional spectra from [Tan et al (2016)](https://doi.org/10.3390/rs8060517). Some adjustments to smooth out artifacts at the periphery were included, and the finalized, adjusted spectra can be found at these locations: [snow / ice](https://doi.org/10.21232/xhgtM3A9), [vegetation](https://doi.org/10.21232/6sQDNjfv), [water](https://doi.org/10.21232/ZbyfMgxY), and [other](https://doi.org/10.21232/ezrQtdcw). To construct the surface model, utilize the "surface_model" utility in the isofit/utils subdirectory of the [isofit repository](https://github.com/isofit/isofit). An example call, made from inside surface_v2, may look like: + +``` +python -c "from isofit.utils import surface_model; surface_model('./surface.json')" +``` + +This will create an output .mat file, at the location specified by "output_model_file" in the json configuration file. This surface model will be passed into subsequent calls to apply_oe (see below) and need only be generated once for general use (though specific investigations may require custom surface models). Changing the wavelengths.txt file facilitates use with different instruments. + +## Estimating Surface Reflectance and Uncertainty + +The core optimal estimation code is best called with the "apply_oe" script in the isofit/utils subdirectory of the [isofit repository](https://github.com/isofit/isofit). The current implementation for the EMIT SDS looks like: + +``` +python apply_oe.py, + --presolve=1, \ + --empirical_line=1, \ + --emulator_base=[sRTMnet_DIRECTORY], \ + --n_cores str(self.n_cores), \ + --surface_path [SURFACE_MODEL .mat FILE], \ + --ray_temp_dir [SCRATCH_PARALLEL_DIRECTORY], \ + --log_file [OUTPUT_LOG_FILE], \ + --logging_level "INFO", \ + [INPUT_RADIANCE_FILE], \ + [INPUT_LOC_FILE], \ + [INPUT_OBS_FILE], \ + [OUTPUT_DIRECTORY], \ + "emit" +``` -The output reflectance file will appear in [WORKING_DIRECTORY]/output +The estimated surface reflectance and uncertainty files will appear in [WORKING_DIRECTORY]/output. The isofit repository is the result of extensive scientific inquiry that extends an initial presentation by [Thompson et al](https://doi.org/10.1016/j.rse.2018.07.003), and a bibliography can be found [here](https://isofit.readthedocs.io/en/latest/custom/bibliography.html). The sRTMnet dependency comes from [Brodrick et al](https://doi.org/10.1016/j.rse.2021.112476), and is available for [download](https://doi.org/10.5281/zenodo.4096627). + +## Applying Masks + +The EMIT mask can be generated using the following call, which is dependent on having completed the above surface reflectance estimation step: + +``` +python make_emit_masks.py \ + [INPUT_RADIANCE_FILE] + [INPUT_LOC_FILE] + [INPUT_LABEL_FILE] + [INPUT_STATE_SUBS_FILE] + [INPUT_IRR_FILE] + [OUTPUT_MASK_FILE] +``` + +The [INPUT_LABEL_FILE] and [INPUT_STATE_SUBS_FILE] can be found in the [WORKING_DIRECTORY]/output file noted about with the extensions \_lbl \_subs_state, respectively. An [INPUT_IRR/FILE] can be found in data/kurudz_0.1nm.dat. The [OUTPUT_MASK_FILE] is a 7 band output, designating: + +1) Cloud flag +2) Cirrus flag +3) Surface water flag +4) Spacecraft flag +5) Dilated cloud flag +6) Aerosol Optical Depth at 550 nm +7) Atmsopheric water vapor [g / cm^2] +8) Aggregate mask flag + +Masks are denoted as ones. + -References: -- Kokaly, R. F., R. N. Clark, G. A. Swayze, K. E. Livo, T. M. Hoefen, N. C. Pearson, R. A. Wise et al. USGS spectral library version 7. No. 1035. US Geological Survey (2017). -- J. Tan, K.A. Cherkauer, I. Chaubey Developing a comprehensive spectral-biogeochemical database of Midwestern rivers for water quality retrieval using remote sensing data: a case study of the Wabash River and its tributary, Indiana Remote Sens., 8 (2016) diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..60ce847 --- /dev/null +++ b/environment.yml @@ -0,0 +1,310 @@ +name: base +channels: + - defaults +dependencies: + - _ipyw_jlab_nb_ext_conf=0.1.0=py37_0 + - _libgcc_mutex=0.1=main + - alabaster=0.7.12=py37_0 + - anaconda=2019.10=py37_0 + - anaconda-client=1.7.2=py37_0 + - anaconda-navigator=1.9.7=py37_0 + - anaconda-project=0.8.3=py_0 + - asn1crypto=1.0.1=py37_0 + - astroid=2.3.1=py37_0 + - astropy=3.2.2=py37h7b6447c_0 + - atomicwrites=1.3.0=py37_1 + - attrs=19.2.0=py_0 + - babel=2.7.0=py_0 + - backcall=0.1.0=py37_0 + - backports=1.0=py_2 + - backports.functools_lru_cache=1.5=py_2 + - backports.os=0.1.1=py37_0 + - backports.shutil_get_terminal_size=1.0.0=py37_2 + - backports.tempfile=1.0=py_1 + - backports.weakref=1.0.post1=py_1 + - beautifulsoup4=4.8.0=py37_0 + - bitarray=1.0.1=py37h7b6447c_0 + - bkcharts=0.2=py37_0 + - blas=1.0=mkl + - bleach=3.1.0=py37_0 + - blosc=1.16.3=hd408876_0 + - bokeh=1.3.4=py37_0 + - boto=2.49.0=py37_0 + - bottleneck=1.2.1=py37h035aef0_1 + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2019.8.28=0 + - cairo=1.14.12=h8948797_3 + - certifi=2019.9.11=py37_0 + - cffi=1.12.3=py37h2e261b9_0 + - chardet=3.0.4=py37_1003 + - click=7.0=py37_0 + - cloudpickle=1.2.2=py_0 + - clyent=1.2.2=py37_1 + - colorama=0.4.1=py37_0 + - conda=4.8.3=py37_0 + - conda-build=3.18.9=py37_3 + - conda-env=2.6.0=1 + - conda-package-handling=1.6.0=py37h7b6447c_0 + - conda-verify=3.4.2=py_1 + - contextlib2=0.6.0=py_0 + - cryptography=2.7=py37h1ba5d50_0 + - curl=7.65.3=hbc83047_0 + - cycler=0.10.0=py37_0 + - cython=0.29.13=py37he6710b0_0 + - cytoolz=0.10.0=py37h7b6447c_0 + - dask=2.5.2=py_0 + - dask-core=2.5.2=py_0 + - dbus=1.13.6=h746ee38_0 + - decorator=4.4.0=py37_1 + - defusedxml=0.6.0=py_0 + - distributed=2.5.2=py_0 + - docutils=0.15.2=py37_0 + - entrypoints=0.3=py37_0 + - et_xmlfile=1.0.1=py37_0 + - expat=2.2.6=he6710b0_0 + - fastcache=1.1.0=py37h7b6447c_0 + - filelock=3.0.12=py_0 + - flask=1.1.1=py_0 + - fontconfig=2.13.0=h9420a91_0 + - freetype=2.9.1=h8a8886c_1 + - freexl=1.0.5=h14c3975_0 + - fribidi=1.0.5=h7b6447c_0 + - fsspec=0.5.2=py_0 + - future=0.17.1=py37_0 + - gdal=2.3.3=py37hbb2a789_0 + - geos=3.7.1=he6710b0_0 + - get_terminal_size=1.0.0=haa9412d_0 + - gevent=1.4.0=py37h7b6447c_0 + - giflib=5.1.4=h14c3975_1 + - glib=2.56.2=hd408876_0 + - glob2=0.7=py_0 + - gmp=6.1.2=h6c8ec71_1 + - gmpy2=2.0.8=py37h10f8cd9_2 + - graphite2=1.3.13=h23475e2_0 + - greenlet=0.4.15=py37h7b6447c_0 + - gst-plugins-base=1.14.0=hbbd80ab_1 + - gstreamer=1.14.0=hb453b48_1 + - harfbuzz=1.8.8=hffaf4a1_0 + - hdf4=4.2.13=h3ca952b_2 + - hdf5=1.10.4=hb1b8bf9_0 + - heapdict=1.0.1=py_0 + - html5lib=1.0.1=py37_0 + - icu=58.2=h9c2bf20_1 + - idna=2.8=py37_0 + - imageio=2.6.0=py37_0 + - imagesize=1.1.0=py37_0 + - importlib_metadata=0.23=py37_0 + - intel-openmp=2019.4=243 + - ipykernel=5.1.2=py37h39e3cac_0 + - ipython=7.8.0=py37h39e3cac_0 + - ipython_genutils=0.2.0=py37_0 + - ipywidgets=7.5.1=py_0 + - isort=4.3.21=py37_0 + - itsdangerous=1.1.0=py37_0 + - jbig=2.1=hdba287a_0 + - jdcal=1.4.1=py_0 + - jedi=0.15.1=py37_0 + - jeepney=0.4.1=py_0 + - jinja2=2.10.3=py_0 + - joblib=0.13.2=py37_0 + - jpeg=9b=h024ee3a_2 + - json-c=0.13.1=h1bed415_0 + - json5=0.8.5=py_0 + - jsonschema=3.0.2=py37_0 + - jupyter=1.0.0=py37_7 + - jupyter_client=5.3.3=py37_1 + - jupyter_console=6.0.0=py37_0 + - jupyter_core=4.5.0=py_0 + - jupyterlab=1.1.4=pyhf63ae98_0 + - jupyterlab_server=1.0.6=py_0 + - kealib=1.4.7=hd0c454d_6 + - keyring=18.0.0=py37_0 + - kiwisolver=1.1.0=py37he6710b0_0 + - krb5=1.16.1=h173b8e3_7 + - lazy-object-proxy=1.4.2=py37h7b6447c_0 + - libarchive=3.3.3=h5d8350f_5 + - libboost=1.67.0=h46d08c1_4 + - libcurl=7.65.3=h20c2e04_0 + - libdap4=3.19.1=h6ec2957_0 + - libedit=3.1.20181209=hc058e9b_0 + - libffi=3.2.1=hd88cf55_4 + - libgcc-ng=9.1.0=hdf63c60_0 + - libgdal=2.3.3=h2e7e64b_0 + - libgfortran-ng=7.3.0=hdf63c60_0 + - libkml=1.3.0=h590aaf7_4 + - liblief=0.9.0=h7725739_2 + - libnetcdf=4.6.1=h11d0813_2 + - libpng=1.6.37=hbc83047_0 + - libpq=11.2=h20c2e04_0 + - libsodium=1.0.16=h1bed415_0 + - libspatialite=4.3.0a=hb08deb6_19 + - libssh2=1.8.2=h1ba5d50_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - libtiff=4.0.10=h2733197_2 + - libtool=2.4.6=h7b6447c_5 + - libuuid=1.0.3=h1bed415_2 + - libxcb=1.13=h1bed415_1 + - libxml2=2.9.9=hea5a465_1 + - libxslt=1.1.33=h7d1a2b0_0 + - llvmlite=0.29.0=py37hd408876_0 + - locket=0.2.0=py37_1 + - lxml=4.4.1=py37hefd8a0e_0 + - lz4-c=1.8.1.2=h14c3975_0 + - lzo=2.10=h49e0be7_2 + - markupsafe=1.1.1=py37h7b6447c_0 + - matplotlib=3.1.1=py37h5429711_0 + - mccabe=0.6.1=py37_1 + - mistune=0.8.4=py37h7b6447c_0 + - mkl=2019.4=243 + - mkl-service=2.3.0=py37he904b0f_0 + - mkl_fft=1.0.14=py37ha843d7b_0 + - mkl_random=1.1.0=py37hd6b4f25_0 + - mock=3.0.5=py37_0 + - more-itertools=7.2.0=py37_0 + - mpc=1.1.0=h10f8cd9_1 + - mpfr=4.0.1=hdf1c602_3 + - mpmath=1.1.0=py37_0 + - multipledispatch=0.6.0=py37_0 + - navigator-updater=0.2.1=py37_0 + - nbconvert=5.6.0=py37_1 + - nbformat=4.4.0=py37_0 + - ncurses=6.1=he6710b0_1 + - networkx=2.3=py_0 + - nltk=3.4.5=py37_0 + - nose=1.3.7=py37_2 + - notebook=6.0.1=py37_0 + - numba=0.45.1=py37h962f231_0 + - numexpr=2.7.0=py37h9e4a6bb_0 + - numpy=1.17.2=py37haad9e8e_0 + - numpy-base=1.17.2=py37hde5b4d6_0 + - numpydoc=0.9.1=py_0 + - olefile=0.46=py37_0 + - openjpeg=2.3.0=h05c96fa_1 + - openpyxl=3.0.0=py_0 + - openssl=1.1.1d=h7b6447c_2 + - packaging=19.2=py_0 + - pandas=0.25.1=py37he6710b0_0 + - pandoc=2.2.3.2=0 + - pandocfilters=1.4.2=py37_1 + - pango=1.42.4=h049681c_0 + - parso=0.5.1=py_0 + - partd=1.0.0=py_0 + - patchelf=0.9=he6710b0_3 + - path.py=12.0.1=py_0 + - pathlib2=2.3.5=py37_0 + - patsy=0.5.1=py37_0 + - pcre=8.43=he6710b0_0 + - pep8=1.7.1=py37_0 + - pexpect=4.7.0=py37_0 + - pickleshare=0.7.5=py37_0 + - pillow=6.2.0=py37h34e0f95_0 + - pixman=0.38.0=h7b6447c_0 + - pkginfo=1.5.0.1=py37_0 + - pluggy=0.13.0=py37_0 + - ply=3.11=py37_0 + - poppler=0.65.0=h581218d_1 + - poppler-data=0.4.9=0 + - proj4=5.2.0=he6710b0_1 + - prometheus_client=0.7.1=py_0 + - prompt_toolkit=2.0.10=py_0 + - psutil=5.6.3=py37h7b6447c_0 + - ptyprocess=0.6.0=py37_0 + - py=1.8.0=py37_0 + - py-lief=0.9.0=py37h7725739_2 + - pycodestyle=2.5.0=py37_0 + - pycosat=0.6.3=py37h14c3975_0 + - pycparser=2.19=py37_0 + - pycrypto=2.6.1=py37h14c3975_9 + - pycurl=7.43.0.3=py37h1ba5d50_0 + - pyflakes=2.1.1=py37_0 + - pygments=2.4.2=py_0 + - pylint=2.4.2=py37_0 + - pyodbc=4.0.27=py37he6710b0_0 + - pyopenssl=19.0.0=py37_0 + - pyparsing=2.4.2=py_0 + - pyqt=5.9.2=py37h05f1152_2 + - pyrsistent=0.15.4=py37h7b6447c_0 + - pysocks=1.7.1=py37_0 + - pytables=3.5.2=py37h71ec239_1 + - pytest=5.2.1=py37_0 + - pytest-arraydiff=0.3=py37h39e3cac_0 + - pytest-astropy=0.5.0=py37_0 + - pytest-doctestplus=0.4.0=py_0 + - pytest-openfiles=0.4.0=py_0 + - pytest-remotedata=0.3.2=py37_0 + - python=3.7.4=h265db76_1 + - python-dateutil=2.8.0=py37_0 + - python-libarchive-c=2.8=py37_13 + - pytz=2019.3=py_0 + - pyyaml=5.1.2=py37h7b6447c_0 + - pyzmq=18.1.0=py37he6710b0_0 + - qt=5.9.7=h5867ecd_1 + - qtawesome=0.6.0=py_0 + - qtconsole=4.5.5=py_0 + - qtpy=1.9.0=py_0 + - readline=7.0=h7b6447c_5 + - requests=2.22.0=py37_0 + - ripgrep=0.10.0=hc07d326_0 + - rope=0.14.0=py_0 + - ruamel_yaml=0.15.46=py37h14c3975_0 + - scikit-learn=0.21.3=py37hd81dba3_0 + - scipy=1.3.1=py37h7c811a0_0 + - seaborn=0.9.0=py37_0 + - secretstorage=3.1.1=py37_0 + - send2trash=1.5.0=py37_0 + - setuptools=41.4.0=py37_0 + - simplegeneric=0.8.1=py37_2 + - singledispatch=3.4.0.3=py37_0 + - sip=4.19.8=py37hf484d3e_0 + - snappy=1.1.7=hbae5bb6_3 + - snowballstemmer=2.0.0=py_0 + - sortedcollections=1.1.2=py37_0 + - sortedcontainers=2.1.0=py37_0 + - soupsieve=1.9.3=py37_0 + - sphinx=2.2.0=py_0 + - sphinxcontrib=1.0=py37_1 + - sphinxcontrib-applehelp=1.0.1=py_0 + - sphinxcontrib-devhelp=1.0.1=py_0 + - sphinxcontrib-htmlhelp=1.0.2=py_0 + - sphinxcontrib-jsmath=1.0.1=py_0 + - sphinxcontrib-qthelp=1.0.2=py_0 + - sphinxcontrib-serializinghtml=1.1.3=py_0 + - sphinxcontrib-websupport=1.1.2=py_0 + - spyder=3.3.6=py37_0 + - spyder-kernels=0.5.2=py37_0 + - sqlalchemy=1.3.9=py37h7b6447c_0 + - sqlite=3.30.0=h7b6447c_0 + - statsmodels=0.10.1=py37hdd07704_0 + - sympy=1.4=py37_0 + - tbb=2019.4=hfd86e86_0 + - tblib=1.4.0=py_0 + - terminado=0.8.2=py37_0 + - testpath=0.4.2=py37_0 + - tk=8.6.8=hbc83047_0 + - toolz=0.10.0=py_0 + - tornado=6.0.3=py37h7b6447c_0 + - tqdm=4.36.1=py_0 + - traitlets=4.3.3=py37_0 + - unicodecsv=0.14.1=py37_0 + - unixodbc=2.3.7=h14c3975_0 + - urllib3=1.24.2=py37_0 + - wcwidth=0.1.7=py37_0 + - webencodings=0.5.1=py37_1 + - werkzeug=0.16.0=py_0 + - wheel=0.33.6=py37_0 + - widgetsnbextension=3.5.1=py37_0 + - wrapt=1.11.2=py37h7b6447c_0 + - wurlitzer=1.0.3=py37_0 + - xerces-c=3.2.2=h780794e_0 + - xlrd=1.2.0=py37_0 + - xlsxwriter=1.2.1=py_0 + - xlwt=1.3.0=py37_0 + - xz=5.2.4=h14c3975_4 + - yaml=0.1.7=had09818_2 + - zeromq=4.3.1=he6710b0_3 + - zict=1.0.0=py_0 + - zipp=0.6.0=py_0 + - zlib=1.2.11=h7b6447c_3 + - zstd=1.3.7=h0b5b093_0 +prefix: /home/drt/src/anaconda3 + diff --git a/make_emit_masks.py b/make_emit_masks.py index a28bf61..be45736 100644 --- a/make_emit_masks.py +++ b/make_emit_masks.py @@ -1,8 +1,13 @@ -#!/home/dthompson/src/anaconda/bin/python -# David R Thompson +""" +Mask generation for imaging spectroscopy, oriented towards EMIT. + +Authors: David R. Thompson, david.r.thompson@jpl.nasa.gov, + Philip G. Brodrick, philip.brodrick@jpl.nasa.gov +""" import os import argparse +from osgeo import gdal import numpy as np from spectral.io import envi from isofit.core.sunposition import sunpos @@ -10,6 +15,8 @@ from datetime import datetime from scipy.ndimage.morphology import distance_transform_edt from emit_utils.file_checks import envi_header +import ray +import multiprocessing def haversine_distance(lon1, lat1, lon2, lat2, radius=6335439): @@ -36,6 +43,86 @@ def haversine_distance(lon1, lat1, lon2, lat2, radius=6335439): return d +@ray.remote +def build_line_masks(start_line: int, stop_line: int, rdnfile: str, locfile: str, lblfile: str, state: np.array, dt: datetime, h2o_band: np.array, aod_bands: np.array, pixel_size: float, outfile: str, wl: np.array, irr: np.array): + # determine glint bands having negligible water reflectance + BLUE = np.logical_and(wl > 440, wl < 460) + NIR = np.logical_and(wl > 950, wl < 1000) + SWIRA = np.logical_and(wl > 1250, wl < 1270) + SWIRB = np.logical_and(wl > 1640, wl < 1660) + SWIRC = np.logical_and(wl > 2200, wl < 2500) + b450 = np.argmin(abs(wl-450)) + b762 = np.argmin(abs(wl-762)) + b780 = np.argmin(abs(wl-780)) + b1000 = np.argmin(abs(wl-1000)) + b1250 = np.argmin(abs(wl-1250)) + b1380 = np.argmin(abs(wl-1380)) + b1650 = np.argmin(abs(wl-1650)) + + rdn_ds = envi.open(envi_header(rdnfile)).open_memmap(interleave='bil') + loc_ds = envi.open(envi_header(locfile)).open_memmap(interleave='bil') + lbl_ds = envi.open(envi_header(lblfile)).open_memmap(interleave='bil') + + return_mask = np.zeros((stop_line - start_line, 8, rdn_ds.shape[2])) + for line in range(start_line, stop_line): + print(f'{line} / {stop_line - start_line}') + loc = loc_ds[line,...].copy().astype(np.float32).T + rdn = rdn_ds[line,...].copy().astype(np.float32).T + lbl = lbl_ds[line,...].copy().astype(np.float32).T + x = np.zeros((rdn.shape[0], state.shape[1])) + + elevation_m = loc[:, 2] + latitude = loc[:, 1] + longitudeE = loc[:, 0] + az, zen, ra, dec, h = sunpos(dt, latitude, longitudeE, + elevation_m, radians=True).T + + rho = (((rdn * np.pi) / (irr.T)).T / np.cos(zen)).T + + rho[rho[:, 0] < -9990, :] = -9999.0 + bad = (latitude < -9990).T + + # Cloud threshold from Sandford et al. + total = np.array(rho[:, b450] > 0.28, dtype=int) + \ + np.array(rho[:, b1250] > 0.46, dtype=int) + \ + np.array(rho[:, b1650] > 0.22, dtype=int) + + maskbands = 8 + mask = np.zeros((maskbands, rdn.shape[0])) + mask[0, :] = total > 2 + + # Cirrus Threshold from Gao and Goetz, GRL 20:4, 1993 + mask[1, :] = np.array(rho[:, b1380] > 0.1, dtype=int) + + # Water threshold as in CORAL + mask[2, :] = np.array(rho[:, b1000] < 0.05, dtype=int) + + # Threshold spacecraft parts using their lack of an O2 A Band + mask[3, :] = np.array(rho[:, b762]/rho[:, b780] > 0.8, dtype=int) + + for i, j in enumerate(lbl[:, 0]): + if j <= 0: + x[i, :] = -9999.0 + else: + x[i, :] = state[int(j), :, 0] + + max_cloud_height = 3000.0 + mask[4, :] = np.tan(zen) * max_cloud_height / pixel_size + + # AOD 550 + mask[5, :] = x[:, aod_bands].sum(axis=1) + aerosol_threshold = 0.4 + + mask[6, :] = x[:, h2o_band].T + + mask[7, :] = np.array((mask[0, :] + mask[2, :] + + (mask[3, :] > aerosol_threshold)) > 0, dtype=int) + mask[:, bad] = -9999.0 + return_mask[line - start_line,...] = mask.copy() + + return return_mask, start_line, stop_line + + def main(): parser = argparse.ArgumentParser(description="Remove glint") @@ -44,82 +131,53 @@ def main(): parser.add_argument('lblfile', type=str, metavar='SUBSET_LABELS') parser.add_argument('statefile', type=str, metavar='STATE_SUBSET') parser.add_argument('irrfile', type=str, metavar='SOLAR_IRRADIANCE') - parser.add_argument('rhofile', type=str, metavar='OUTPUT_RHO') parser.add_argument('outfile', type=str, metavar='OUTPUT_MASKS') parser.add_argument('--wavelengths', type=str, default=None) + parser.add_argument('--n_cores', type=int, default=-1) args = parser.parse_args() - dtypemap = {'4': np.float32, '5': np.float64, '2': np.float16} - - rdnhdrfile = envi_header(args.rdnfile) - rdnhdr = envi.read_envi_header(rdnhdrfile) - rdnlines = int(rdnhdr['lines']) - rdnsamples = int(rdnhdr['samples']) - rdnbands = int(rdnhdr['bands']) - rdndtype = dtypemap[rdnhdr['data type']] - rdnframe = rdnsamples * rdnbands - - lochdrfile = envi_header(args.locfile) - lochdr = envi.read_envi_header(lochdrfile) - loclines = int(lochdr['lines']) - locsamples = int(lochdr['samples']) - locbands = int(lochdr['bands']) - locintlv = lochdr['interleave'] - locdtype = dtypemap[lochdr['data type']] - locframe = locsamples * 3 - - lblhdrfile = envi_header(args.lblfile) - lblhdr = envi.read_envi_header(lblhdrfile) - lbllines = int(lblhdr['lines']) - lblsamples = int(lblhdr['samples']) - lblbands = int(lblhdr['bands']) - lbldtype = dtypemap[lblhdr['data type']] - lblframe = lblsamples - - statehdrfile = envi_header(args.statefile) - statehdr = envi.read_envi_header(statehdrfile) - statelines = int(statehdr['lines']) - statesamples = int(statehdr['samples']) - statebands = int(statehdr['bands']) - statedtype = dtypemap[statehdr['data type']] - stateframe = statesamples * statebands + rdn_hdr = envi.read_envi_header(envi_header(args.rdnfile)) + state_hdr = envi.read_envi_header(envi_header(args.statefile)) + rdn_shp = envi.open(envi_header(args.rdnfile)).open_memmap(interleave='bil').shape + lbl_shp = envi.open(envi_header(args.lblfile)).open_memmap(interleave='bil').shape + loc_shp = envi.open(envi_header(args.locfile)).open_memmap(interleave='bil').shape # Check file size consistency - if loclines != rdnlines or locsamples != rdnsamples: + if loc_shp[0] != rdn_shp[0] or loc_shp[2] != rdn_shp[2]: raise ValueError('LOC and input file dimensions do not match.') - if lbllines != rdnlines or lblsamples != rdnsamples: + if lbl_shp[0] != rdn_shp[0] or lbl_shp[2] != rdn_shp[2]: raise ValueError('Label and input file dimensions do not match.') - if locbands != 3: + if loc_shp[1] != 3: raise ValueError('LOC file should have three bands.') # Get wavelengths and bands if args.wavelengths is not None: c, wl, fwhm = np.loadtxt(args.wavelengths).T else: - if not 'wavelength' in rdnhdr: + if not 'wavelength' in rdn_hdr: raise IndexError('Could not find wavelength data anywhere') else: - wl = np.array([float(f) for f in rdnhdr['wavelength']]) - if not 'fwhm' in rdnhdr: + wl = np.array([float(f) for f in rdn_hdr['wavelength']]) + if not 'fwhm' in rdn_hdr: raise IndexError('Could not find fwhm data anywhere') else: - fwhm = np.array([float(f) for f in rdnhdr['fwhm']]) + fwhm = np.array([float(f) for f in rdn_hdr['fwhm']]) # Find H2O and AOD elements in state vector aod_bands, h2o_band = [], [] - for i, name in enumerate(statehdr['band names']): + for i, name in enumerate(state_hdr['band names']): if 'H2O' in name: h2o_band.append(i) elif 'AER' in name: aod_bands.append(i) # find pixel size - if 'map info' in rdnhdr.keys(): - pixel_size = float(rdnhdr['map info'][5].strip()) + if 'map info' in rdn_hdr.keys(): + pixel_size = float(rdn_hdr['map info'][5].strip()) else: - loc_memmap = envi.open(lochdrfile).open_memmap() - center_y = int(loclines/2) - center_x = int(locsamples/2) + loc_memmap = envi.open(envi_header(args.locfile)).open_memmap(interleave='bip') + center_y = int(loc_shp[0]/2) + center_x = int(loc_shp[2]/2) center_pixels = loc_memmap[center_y-1:center_y+1, center_x, :2] pixel_size = haversine_distance( center_pixels[0, 1], center_pixels[0, 0], center_pixels[1, 1], center_pixels[1, 0]) @@ -130,6 +188,7 @@ def main(): for prefix in ['prm', 'ang', 'emit']: fid = fid.replace(prefix, '') dt = datetime.strptime(fid, '%Y%m%dt%H%M%S') + day_of_year = dt.timetuple().tm_yday print(day_of_year, dt) @@ -143,94 +202,36 @@ def main(): irr_resamp = resample_spectrum(irr, irr_wl, wl, fwhm) irr_resamp = np.array(irr_resamp, dtype=np.float32) - # determine glint bands having negligible water reflectance - BLUE = np.logical_and(wl > 440, wl < 460) - NIR = np.logical_and(wl > 950, wl < 1000) - SWIRA = np.logical_and(wl > 1250, wl < 1270) - SWIRB = np.logical_and(wl > 1640, wl < 1660) - SWIRC = np.logical_and(wl > 2200, wl < 2500) - b450 = np.argmin(abs(wl-450)) - b762 = np.argmin(abs(wl-762)) - b780 = np.argmin(abs(wl-780)) - b1000 = np.argmin(abs(wl-1000)) - b1250 = np.argmin(abs(wl-1250)) - b1380 = np.argmin(abs(wl-1380)) - b1650 = np.argmin(abs(wl-1650)) - + rdn_dataset = gdal.Open(args.rdnfile, gdal.GA_ReadOnly) maskbands = 8 - mask = np.zeros((rdnlines, maskbands, rdnsamples), dtype=np.float32) - noise = [] - dt = datetime.strptime(fid, '%Y%m%dt%H%M%S') - with open(args.statefile, 'rb') as fstate: - statesize = statelines * statesamples * statebands - state = np.fromfile(fstate, dtype=statedtype, count=statesize) - state = state.reshape((statelines, statebands, statesamples)) - - with open(args.rdnfile, 'rb') as frdn: - with open(args.locfile, 'rb') as floc: - with open(args.lblfile, 'rb') as flbl: - with open(args.rhofile, 'wb') as frho: - for line in range(rdnlines): - - print('line %i/%i' % (line+1, rdnlines)) - loc = np.fromfile(floc, dtype=locdtype, count=locframe) - if locintlv == 'bip': - loc = np.array(loc.reshape((locsamples, locbands)), dtype=np.float32) - else: - loc = np.array(loc.reshape((locbands, locsamples)).T, dtype=np.float32) - rdn = np.fromfile(frdn, dtype=rdndtype, count=rdnframe) - rdn = np.array(rdn.reshape((rdnbands, rdnsamples)).T, dtype=np.float32) - lbl = np.fromfile(flbl, dtype=lbldtype, count=lblframe) - lbl = np.array(lbl.reshape((1, lblsamples)).T, dtype=np.float32) - x = np.zeros((rdnsamples, statebands)) - - elevation_m = loc[:, 2] - latitude = loc[:, 1] - longitudeE = loc[:, 0] - az, zen, ra, dec, h = sunpos(dt, latitude, longitudeE, - elevation_m, radians=True).T - - rho = (((rdn * np.pi) / (irr_resamp.T)).T / np.cos(zen)).T - - rho[rho[:, 0] < -9990, :] = -9999.0 - rho_bil = np.array(rho.T, dtype=np.float32) - frho.write(rho_bil.tobytes()) - bad = (latitude < -9990).T - - # Cloud threshold from Sandford et al. - total = np.array(rho[:, b450] > 0.28, dtype=int) + \ - np.array(rho[:, b1250] > 0.46, dtype=int) + \ - np.array(rho[:, b1650] > 0.22, dtype=int) - mask[line, 0, :] = total > 2 - - # Cirrus Threshold from Gao and Goetz, GRL 20:4, 1993 - mask[line, 1, :] = np.array(rho[:, b1380] > 0.1, dtype=int) - - # Water threshold as in CORAL - mask[line, 2, :] = np.array(rho[:, b1000] < 0.05, dtype=int) - - # Threshold spacecraft parts using their lack of an O2 A Band - mask[line, 3, :] = np.array(rho[:, b762]/rho[:, b780] > 0.8, dtype=int) - - for i, j in enumerate(lbl[:, 0]): - if j <= 0: - x[i, :] = -9999.0 - else: - x[i, :] = state[int(j), :, 0] - - max_cloud_height = 3000.0 - mask[line, 4, :] = np.tan(zen) * max_cloud_height / pixel_size - - # AOD 550 - mask[line, 5, :] = x[:, aod_bands].sum(axis=1) - aerosol_threshold = 0.4 - - mask[line, 6, :] = x[:, h2o_band].T - - mask[line, 7, :] = np.array((mask[line, 0, :] + mask[line, 2, :] + - (mask[line, 3, :] > aerosol_threshold)) > 0, dtype=int) - mask[line, :, bad] = -9999.0 + # Build output dataset + driver = gdal.GetDriverByName('ENVI') + driver.Register() + + outDataset = driver.Create(args.outfile, rdn_shp[2], rdn_shp[0], maskbands, gdal.GDT_Float32, options=['INTERLEAVE=BIL']) + outDataset.SetProjection(rdn_dataset.GetProjection()) + outDataset.SetGeoTransform(rdn_dataset.GetGeoTransform()) + del outDataset + + rayargs = {'local_mode': args.n_cores == 1} + if args.n_cores <= 0: + args.n_cores = multiprocessing.cpu_count() + rayargs['num_cpus'] = args.n_cores + ray.init(**rayargs) + + linebreaks = np.linspace(0, rdn_shp[0], num=args.n_cores*3).astype(int) + + state = envi.open(envi_header(args.statefile)).open_memmap(interleave='bil').copy() + stateid = ray.put(state) + irrid = ray.put(irr_resamp) + jobs = [build_line_masks.remote(linebreaks[_l], linebreaks[_l+1], args.rdnfile, args.locfile, args.lblfile, stateid, dt, h2o_band, aod_bands, pixel_size, args.outfile, wl, irrid) for _l in range(len(linebreaks)-1)] + rreturn = [ray.get(jid) for jid in jobs] + ray.shutdown() + + mask = np.zeros((rdn_shp[0], maskbands, rdn_shp[2])) + for lm, start_line, stop_line in rreturn: + mask[start_line:stop_line,...] = lm bad = np.squeeze(mask[:, 0, :]) < -9990 good = np.squeeze(mask[:, 0, :]) > -9990 @@ -241,7 +242,7 @@ def main(): invalid = (np.squeeze(mask[:, 4, :]) >= cloud_distance) mask[:, 4, :] = invalid.copy() - hdr = rdnhdr.copy() + hdr = rdn_hdr.copy() hdr['bands'] = str(maskbands) hdr['band names'] = ['Cloud flag', 'Cirrus flag', 'Water flag', 'Spacecraft Flag', 'Dilated Cloud Flag', diff --git a/output_conversion.py b/output_conversion.py index c522837..ed62698 100644 --- a/output_conversion.py +++ b/output_conversion.py @@ -16,12 +16,15 @@ def main(): parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description='''This script \ converts L2A PGE outputs to DAAC compatable formats, with supporting metadata''', add_help=True) - parser.add_argument('output_filename', type=str, help="Output netcdf filename") + parser.add_argument('rfl_output_filename', type=str, help="Output Reflectance netcdf filename") + parser.add_argument('rfl_unc_output_filename', type=str, help="Output Reflectance Uncertainty netcdf filename") + parser.add_argument('mask_output_filename', type=str, help="Output Mask netcdf filename") parser.add_argument('rfl_file', type=str, help="EMIT L2A reflectance ENVI file") parser.add_argument('rfl_unc_file', type=str, help="EMIT L2A reflectance uncertainty ENVI file") parser.add_argument('mask_file', type=str, help="EMIT L2A water/cloud mask ENVI file") parser.add_argument('loc_file', type=str, help="EMIT L1B location data ENVI file") parser.add_argument('glt_file', type=str, help="EMIT L1B glt ENVI file") + parser.add_argument('version', type=str, help="3 digit (with leading V) version number") parser.add_argument('--ummg_file', type=str, help="Output UMMG filename") parser.add_argument('--log_file', type=str, default=None, help="Logging file to write to") parser.add_argument('--log_level', type=str, default="INFO", help="Logging level") @@ -36,34 +39,31 @@ def main(): rfl_unc_ds = envi.open(envi_header(args.rfl_unc_file)) mask_ds = envi.open(envi_header(args.mask_file)) + # Start with Reflectance File + # make the netCDF4 file - logging.info(f'Creating netCDF4 file: {args.output_filename}') - nc_ds = Dataset(args.output_filename, 'w', clobber=True, format='NETCDF4') + logging.info(f'Creating netCDF4 file: {args.rfl_output_filename}') + nc_ds = Dataset(args.rfl_output_filename, 'w', clobber=True, format='NETCDF4') # make global attributes logging.debug('Creating global attributes') makeGlobalAttr(nc_ds, args.rfl_file, args.glt_file) - nc_ds.title = "EMIT L2A Estimate Surface Reflectance, Uncertainty, and Masks, 72km, V001" + nc_ds.title = "EMIT L2A Surface Reflectance 60 m " + args.version nc_ds.summary = nc_ds.summary + \ - f"\\n\\nThis collection contains L2A estimated surface reflectances \ + f"\\n\\nThis file contains L2A estimated surface reflectances \ and geolocation data. Reflectance estimates are created using an Optimal Estimation technique - see ATBD for \ details. Reflectance values are reported as fractions (relative to 1). " nc_ds.sync() logging.debug('Creating dimensions') makeDims(nc_ds, args.rfl_file, args.glt_file) - nc_ds.createDimension('mask_bands', int(mask_ds.metadata['bands'])) logging.debug('Creating and writing reflectance metadata') add_variable(nc_ds, "sensor_band_parameters/wavelengths", "f4", "Wavelength Centers", "nm", - [float(d) for d in rfl_ds.metadata['wavelength']], {"dimensions": ("number_of_bands",)}) + [float(d) for d in rfl_ds.metadata['wavelength']], {"dimensions": ("bands",)}) add_variable(nc_ds, "sensor_band_parameters/fwhm", "f4", "Full Width at Half Max", "nm", - [float(d) for d in rfl_ds.metadata['fwhm']], {"dimensions": ("number_of_bands",)}) - - logging.debug('Creating and writing mask metadata') - add_variable(nc_ds, "sensor_band_parameters/mask_bands", str, "Mask Band Names", None, - mask_ds.metadata['band names'], {"dimensions": ("mask_bands",)}) + [float(d) for d in rfl_ds.metadata['fwhm']], {"dimensions": ("bands",)}) logging.debug('Creating and writing location data') add_loc(nc_ds, args.loc_file) @@ -73,20 +73,91 @@ def main(): logging.debug('Write reflectance data') add_variable(nc_ds, 'reflectance', "f4", "Surface Reflectance", "unitless", rfl_ds.open_memmap(interleave='bip')[...].copy(), - {"dimensions":("number_of_scans", "pixels_per_scan", "number_of_bands")}) + {"dimensions":("downtrack", "crosstrack", "bands")}) + nc_ds.sync() + nc_ds.close() + del nc_ds + logging.debug(f'Successfully created {args.rfl_output_filename}') + + + + # Start Reflectance Uncertainty File + + # make the netCDF4 file + logging.info(f'Creating netCDF4 file: {args.rfl_unc_output_filename}') + nc_ds = Dataset(args.rfl_unc_output_filename, 'w', clobber=True, format='NETCDF4') + + # make global attributes + logging.debug('Creating global attributes') + makeGlobalAttr(nc_ds, args.rfl_unc_file, args.glt_file) + + nc_ds.title = "EMIT L2A Surface Reflectance Uncertainty 60 m V001" + nc_ds.summary = nc_ds.summary + \ + f"\\n\\nThis file contains L2A estimated surface reflectance uncertainties \ + and geolocation data. Reflectance uncertainty estimates are created using an Optimal Estimation technique - see ATBD for \ + details. Reflectance uncertainty values are reported as fractions (relative to 1). " nc_ds.sync() + + logging.debug('Creating dimensions') + makeDims(nc_ds, args.rfl_unc_file, args.glt_file) + + logging.debug('Creating and writing reflectance metadata') + add_variable(nc_ds, "sensor_band_parameters/wavelengths", "f4", "Wavelength Centers", "nm", + [float(d) for d in rfl_ds.metadata['wavelength']], {"dimensions": ("bands",)}) + add_variable(nc_ds, "sensor_band_parameters/fwhm", "f4", "Full Width at Half Max", "nm", + [float(d) for d in rfl_ds.metadata['fwhm']], {"dimensions": ("bands",)}) + + logging.debug('Creating and writing location data') + add_loc(nc_ds, args.loc_file) + logging.debug('Creating and writing glt data') + add_glt(nc_ds, args.glt_file) + add_variable(nc_ds, 'reflectance_uncertainty', "f4", "Surface Reflectance Uncertainty", "unitless", rfl_unc_ds.open_memmap(interleave='bip')[...].copy(), - {"dimensions":("number_of_scans", "pixels_per_scan", "number_of_bands")}) + {"dimensions":("downtrack", "crosstrack", "bands")}) + + nc_ds.sync() + nc_ds.close() + del nc_ds + logging.debug(f'Successfully created {args.rfl_unc_output_filename}') + + + # Start Mask File + + # make the netCDF4 file + logging.info(f'Creating netCDF4 file: {args.mask_output_filename}') + nc_ds = Dataset(args.mask_output_filename, 'w', clobber=True, format='NETCDF4') + + # make global attributes + logging.debug('Creating global attributes') + makeGlobalAttr(nc_ds, args.mask_file, args.glt_file) + + nc_ds.title = "EMIT L2A Masks 60 m V001" + nc_ds.summary = nc_ds.summary + \ + f"\\n\\nThis file contains masks for L2A estimated surface reflectances \ + and geolocation data. Masks account for clouds, cloud shadows (via buffering), spacecraft interference, and poor atmospheric conditions." nc_ds.sync() + logging.debug('Creating dimensions') + makeDims(nc_ds, args.mask_file, args.glt_file) + + logging.debug('Creating and writing mask metadata') + add_variable(nc_ds, "sensor_band_parameters/mask_bands", str, "Mask Band Names", None, + mask_ds.metadata['band names'], {"dimensions": ("bands",)}) + + logging.debug('Creating and writing location data') + add_loc(nc_ds, args.loc_file) + + logging.debug('Creating and writing glt data') + add_glt(nc_ds, args.glt_file) + logging.debug('Write mask data') add_variable(nc_ds, 'mask', "f4", "Masks", "unitless", mask_ds.open_memmap(interleave='bip')[...].copy(), - {"dimensions":("number_of_scans", "pixels_per_scan", "mask_bands"), "zlib": True}) + {"dimensions":("downtrack", "crosstrack", "bands"), "zlib": True, "complevel": 9}) nc_ds.sync() - nc_ds.close() - logging.debug(f'Successfully created {args.output_filename}') + del nc_ds + logging.debug(f'Successfully created {args.mask_output_filename}') return diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..69de934 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,57 @@ +absl-py==0.10.0 +aiohttp==3.6.2 +aiohttp-cors==0.7.0 +aioredis==1.3.1 +astunparse==1.6.3 +async-timeout==3.0.1 +blessings==1.7 +cachetools==4.1.1 +cftime==1.2.1 +colorful==0.5.4 +deprecated==1.2.13 +fast-histogram==0.9 +gast==0.3.3 +google==2.0.3 +google-api-core==1.26.1 +google-auth==1.22.1 +google-auth-oauthlib==0.4.1 +google-pasta==0.2.0 +googleapis-common-protos==1.53.0 +gpustat==0.6.0 +grpcio==1.29.0 +h5py==2.10.0 +hiredis==1.1.0 +hy-tools==1.1.2 +keras-preprocessing==1.1.2 +markdown==3.3.1 +msgpack==1.0.2 +multidict==4.7.6 +netcdf4==1.5.4 +nvidia-ml-py3==7.352.0 +oauthlib==3.1.0 +opencensus==0.7.12 +opencensus-context==0.1.2 +opt-einsum==3.3.0 +optimparallel==0.1.2 +pip==21.3.1 +protobuf==3.19.3 +py-spy==0.3.3 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pywavelets==1.1.1 +ray==1.9.2 +redis==4.0.2 +requests-oauthlib==1.3.0 +rsa==4.6 +scikit-image==0.17.2 +six==1.16.0 +spectral==0.20 +tensorboard==2.3.0 +tensorboard-plugin-wit==1.7.0 +tensorflow==2.3.1 +tensorflow-estimator==2.3.0 +termcolor==1.1.0 +tifffile==2020.11.18 +xxhash==1.4.3 +yarl==1.4.2 +ndsplines==0.1.2 diff --git a/spectrum_quality.py b/spectrum_quality.py new file mode 100644 index 0000000..4005b38 --- /dev/null +++ b/spectrum_quality.py @@ -0,0 +1,191 @@ +""" +Spectral quality evaluation metrics. + +Authors: David R. Thompson, david.r.thompson@jpl.nasa.gov +""" + +import os +import argparse +import numpy as np +import spectral.io.envi as envi +import pylab as plt +import logging + + +def find_header(inputpath:str) -> str: + """Return the header associated with an image file + + Args: + inputpath (str): input pathname to search for header from + + Returns: + str: envi header path + """ + if os.path.splitext(inputpath)[-1] == '.img' or os.path.splitext(inputpath)[-1] == '.dat' or os.path.splitext(inputpath)[-1] == '.raw': + # headers could be at either filename.img.hdr or filename.hdr. Check both, return the one that exists if it + # does, if not return the latter (new file creation presumed). + hdrfile = os.path.splitext(inputpath)[0] + '.hdr' + if os.path.isfile(hdrfile): + return hdrfile + elif os.path.isfile(inputpath + '.hdr'): + return inputpath + '.hdr' + return hdrfile + elif os.path.splitext(inputpath)[-1] == '.hdr': + return inputpath + else: + return inputpath + '.hdr' + + +def smooth(x:np.array, window_length:int = 3) -> np.array: + """Moving average smoother + Args: + x (np.array): Input spectrum + window_length (int, optional): Window size for smoothing. Defaults to 3. + + Returns: + np.array: smoothed spectra + """ + q=np.r_[x[window_length-1:0:-1],x,x[-1:-window_length:-1]] + w=np.ones(window_length,'d')/float(window_length) + y=np.convolve(w,q,mode='valid') + y= y[int(window_length/2):-int(window_length/2)] + return y + + +def wl2band(w: float, wl: np.array) -> int: + """Translate wavelength to nearest channel index + + Args: + w (float): input wavelength to match + wl (np.array): reference wavelengths + + Returns: + int: closest index of given wavelength + """ + return np.argmin(abs(wl-w)) + + +# parse the command line (perform the correction on all command line arguments) +def main(): + + parser = argparse.ArgumentParser(description="Spectrum quality") + parser.add_argument('rflfile', type=str, metavar='REFLECTANCE') + parser.add_argument('outfile', type=str, metavar='OUTPUT') + parser.add_argument('--sample', type=int, default=1) + parser.add_argument('--wavelengths', type=str, metavar='WAVELENGTH', default=None) + parser.add_argument('--plot', action='store_true') + parser.add_argument('--log_file', type=str, default=None) + parser.add_argument('--log_level', type=str, default='INFO') + args = parser.parse_args() + + if args.log_file is None: + logging.basicConfig(format='%(message)s', level=args.log_level) + else: + logging.basicConfig(format='%(message)s', level=args.log_level, filename=args.log_file) + + + dtypemap = {'4':np.float32, '5':np.float64, '2':np.float16} + + # Get file dimensions + rflhdrfile = find_header(args.rflfile) + rflhdr = envi.read_envi_header(rflhdrfile) + rfllines = int(rflhdr['lines']) + rflsamples = int(rflhdr['samples']) + rflbands = int(rflhdr['bands']) + rflintlv = rflhdr['interleave'] + rfldtype = dtypemap[rflhdr['data type']] + rflframe = rflsamples * rflbands + + # Get wavelengths + if args.wavelengths is not None: + c, wl, fwhm = np.loadtxt(args.wavelengths).T + else: + if not 'wavelength' in rflhdr: + raise IndexError('Could not find wavelength data anywhere') + else: + wl = np.array([float(f) for f in rflhdr['wavelength']]) + if not 'fwhm' in rflhdr: + raise IndexError('Could not find fwhm data anywhere') + else: + fwhm = np.array([float(f) for f in rflhdr['fwhm']]) + + # Convert from microns to nm if needed + if not any(wl>100): + logging.info('Assuming wavelengths provided in microns, converting to nm') + wl = wl*1000.0 + + # Define start and end channels for two water bands + # reference regions outside these features. The reference + # intervals serve to assess the channel-to-channel instrument + # noise + s940,e940 = wl2band(910,wl), wl2band(990,wl) + s1140,e1140 = wl2band(1090,wl), wl2band(1180,wl) + srefA,erefA = wl2band(1010,wl), wl2band(1080,wl) + srefB,erefB = wl2band(780,wl), wl2band(900,wl) + + samples = 0 + errors = [] + with open(args.rflfile,'rb') as fin: + for line in range(rfllines): + + logging.debug('line %i/%i'%(line+1,rfllines)) + + # Read reflectances and translate the frame to BIP + # (a sequential list of spectra) + rfl = np.fromfile(fin, dtype=rfldtype, count=rflframe) + if rflintlv == 'bip': + rfl = np.array(rfl.reshape((rflsamples,rflbands)), dtype=np.float32) + else: + rfl = np.array(rfl.reshape((rflbands,rflsamples)).T, dtype=np.float32) + + # Loop through all spectra + for spectrum in rfl: + + if any(spectrum<-9990): + continue + + samples = samples + 1 + if samples % args.sample != 0: + continue + + # Get divergence of each spectral interval from the local + # smooth spectrum + ctm = smooth(spectrum) + errsA = spectrum[s940:e940] - ctm[s940:e940] + errsB = spectrum[s1140:e1140] - ctm[s1140:e1140] + referenceA = spectrum[srefA:erefA] - ctm[srefA:erefA] + referenceB = spectrum[srefB:erefB] - ctm[srefB:erefB] + + # calcualte the root mean squared error of each interval + errsA = np.sqrt(np.mean(pow(errsA,2))) + errsB = np.sqrt(np.mean(pow(errsB,2))) + referenceA = np.sqrt(np.mean(pow(referenceA,2))) + referenceB = np.sqrt(np.mean(pow(referenceB,2))) + + # We use the better of two reference regions and two + # water regions for robustness + errs = min(errsA,errsB) + reference = min(referenceA,referenceB) + excess_error = errs/reference + + # Running tally of errors + errors.append(excess_error) + + if args.plot: + plt.plot(wl,spectrum) + plt.plot(wl,ctm) + plt.title(excess_error) + plt.show() + + # Write percentiles + errors.sort() + errors = np.array(errors) + with open(args.outfile,'w') as fout: + for pct in [50,95,99.9]: + fout.write('%8.6f\n'%np.percentile(errors,pct)) + +if __name__ == "__main__": + main() + + + diff --git a/surface_v2/wavelengths_emit_synthetic.txt b/surface_v2/wavelengths.txt similarity index 100% rename from surface_v2/wavelengths_emit_synthetic.txt rename to surface_v2/wavelengths.txt