From fa5adc3da1dd58c8dd0234fe0eafea3d534a8457 Mon Sep 17 00:00:00 2001 From: Zhiyuan Chen Date: Wed, 11 Dec 2024 23:43:20 +0800 Subject: [PATCH] ready for 0.0.6 rewrite module update models given module improve Dataset clarify on license Signed-off-by: Zhiyuan Chen --- .codespell-whitelist.txt | 1 + LICENSE | 661 ------------------ LICENSE.md | 660 +++++++++++++++++ README.md | 11 +- README.zh.md | 13 +- about/license-faq.md | 114 +++ about/license-faq.zh.md | 120 ++++ about/privacy.md | 408 +++++++++++ about/privacy.zh.md | 419 +++++++++++ docs/docs/about/license-faq.md | 110 +-- docs/docs/about/license-faq.zh.md | 116 +-- docs/docs/about/license.md | 661 +----------------- docs/docs/about/license.zh.md | 249 +------ docs/docs/about/privacy.md | 409 +---------- docs/docs/about/privacy.zh.md | 420 +---------- docs/docs/index.md | 2 +- docs/docs/index.zh.md | 2 +- multimolecule/__init__.py | 14 +- multimolecule/data/__init__.py | 5 +- multimolecule/data/dataset.py | 107 +-- multimolecule/data/utils.py | 6 +- multimolecule/defaults.py | 7 +- multimolecule/models/__init__.py | 2 + .../models/calm/configuration_calm.py | 4 +- multimolecule/models/calm/modeling_calm.py | 12 +- multimolecule/models/configuration_utils.py | 14 +- .../models/ernierna/configuration_ernierna.py | 4 +- .../models/ernierna/modeling_ernierna.py | 18 +- .../models/rinalmo/configuration_rinalmo.py | 4 +- .../models/rinalmo/modeling_rinalmo.py | 12 +- .../models/rnabert/configuration_rnabert.py | 4 +- .../models/rnabert/modeling_rnabert.py | 22 +- .../models/rnaernie/configuration_rnaernie.py | 4 +- .../models/rnaernie/modeling_rnaernie.py | 12 +- .../models/rnafm/configuration_rnafm.py | 4 +- multimolecule/models/rnafm/modeling_rnafm.py | 14 +- .../models/rnamsm/configuration_rnamsm.py | 4 +- .../models/rnamsm/modeling_rnamsm.py | 14 +- .../splicebert/configuration_splicebert.py | 4 +- .../models/splicebert/modeling_splicebert.py | 12 +- .../models/utrbert/configuration_utrbert.py | 4 +- .../models/utrbert/modeling_utrbert.py | 12 +- .../models/utrlm/configuration_utrlm.py | 4 +- multimolecule/models/utrlm/modeling_utrlm.py | 14 +- multimolecule/module/__init__.py | 14 +- multimolecule/module/backbones/__init__.py | 21 + multimolecule/module/backbones/registry.py | 21 + multimolecule/module/backbones/sequence.py | 59 ++ .../module/backbones/sequences/__init__.py | 20 + .../module/backbones/sequences/onehot.py | 39 ++ .../module/backbones/sequences/registry.py | 66 ++ multimolecule/module/criterions/__init__.py | 14 +- multimolecule/module/criterions/binary.py | 44 ++ multimolecule/module/criterions/generic.py | 33 +- multimolecule/module/criterions/multiclass.py | 44 ++ multimolecule/module/criterions/multilabel.py | 44 ++ multimolecule/module/criterions/registry.py | 29 + multimolecule/module/criterions/regression.py | 44 ++ multimolecule/module/heads/__init__.py | 12 +- multimolecule/module/heads/config.py | 46 +- multimolecule/module/heads/contact.py | 247 ++++++- multimolecule/module/heads/generic.py | 58 +- multimolecule/module/heads/pretrain.py | 6 +- multimolecule/module/heads/registry.py | 12 +- multimolecule/module/heads/token.py | 80 +-- multimolecule/module/heads/utils.py | 29 - multimolecule/module/model.py | 160 +++++ multimolecule/module/necks/__init__.py | 21 + multimolecule/module/necks/bert.py | 102 +++ multimolecule/module/necks/cat.py | 43 ++ multimolecule/module/necks/registry.py | 21 + multimolecule/module/registry.py | 35 + multimolecule/tasks/task.py | 3 +- pyproject.toml | 5 +- tests/data/test_dataset.py | 36 +- 75 files changed, 3128 insertions(+), 2993 deletions(-) delete mode 100755 LICENSE create mode 100644 LICENSE.md create mode 100644 about/license-faq.md create mode 100644 about/license-faq.zh.md create mode 100644 about/privacy.md create mode 100644 about/privacy.zh.md create mode 100644 multimolecule/module/backbones/__init__.py create mode 100644 multimolecule/module/backbones/registry.py create mode 100644 multimolecule/module/backbones/sequence.py create mode 100644 multimolecule/module/backbones/sequences/__init__.py create mode 100644 multimolecule/module/backbones/sequences/onehot.py create mode 100644 multimolecule/module/backbones/sequences/registry.py create mode 100644 multimolecule/module/criterions/binary.py create mode 100644 multimolecule/module/criterions/multiclass.py create mode 100644 multimolecule/module/criterions/multilabel.py create mode 100644 multimolecule/module/criterions/registry.py create mode 100644 multimolecule/module/criterions/regression.py create mode 100644 multimolecule/module/model.py create mode 100644 multimolecule/module/necks/__init__.py create mode 100644 multimolecule/module/necks/bert.py create mode 100644 multimolecule/module/necks/cat.py create mode 100644 multimolecule/module/necks/registry.py create mode 100644 multimolecule/module/registry.py diff --git a/.codespell-whitelist.txt b/.codespell-whitelist.txt index 44c7e9f5..467e5c38 100644 --- a/.codespell-whitelist.txt +++ b/.codespell-whitelist.txt @@ -1,3 +1,4 @@ +datas ser marz manuel diff --git a/LICENSE b/LICENSE deleted file mode 100755 index bae94e18..00000000 --- a/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..bc16720e --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,660 @@ +# GNU AFFERO GENERAL PUBLIC LICENSE + +Version 3, 19 November 2007 + +Copyright (C) 2007 Free Software Foundation, Inc. + + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +## Preamble + +The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + +The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains +free software for all its users. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + +Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + +A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + +The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + +An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing +under this license. + +The precise terms and conditions for copying, distribution and +modification follow. + +## TERMS AND CONDITIONS + +### 0. Definitions. + +"This License" refers to version 3 of the GNU Affero General Public +License. + +"Copyright" also means copyright-like laws that apply to other kinds +of works, such as semiconductor masks. + +"The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + +To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of +an exact copy. The resulting work is called a "modified version" of +the earlier work or a work "based on" the earlier work. + +A "covered work" means either the unmodified Program or a work based +on the Program. + +To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + +To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user +through a computer network, with no transfer of a copy, is not +conveying. + +An interactive user interface displays "Appropriate Legal Notices" to +the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +### 1. Source Code. + +The "source code" for a work means the preferred form of the work for +making modifications to it. "Object code" means any non-source form of +a work. + +A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + +The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + +The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can +regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same +work. + +### 2. Basic Permissions. + +All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, +without conditions so long as your license otherwise remains in force. +You may convey covered works to others for the sole purpose of having +them make modifications exclusively for you, or provide you with +facilities for running those works, provided that you comply with the +terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for +you must do so exclusively on your behalf, under your direction and +control, on terms that prohibit them from making any copies of your +copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 makes +it unnecessary. + +### 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + +No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such +circumvention is effected by exercising rights under this License with +respect to the covered work, and you disclaim any intention to limit +operation or modification of the work as a means of enforcing, against +the work's users, your or third parties' legal rights to forbid +circumvention of technological measures. + +### 4. Conveying Verbatim Copies. + +You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + +### 5. Conveying Modified Source Versions. + +You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these +conditions: + +- a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. +- b) The work must carry prominent notices stating that it is + released under this License and any conditions added under + section 7. This requirement modifies the requirement in section 4 + to "keep intact all notices". +- c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. +- d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + +A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + +### 6. Conveying Non-Source Forms. + +You may convey a covered work in object code form under the terms of +sections 4 and 5, provided that you also convey the machine-readable +Corresponding Source under the terms of this License, in one of these +ways: + +- a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. +- b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the Corresponding + Source from a network server at no charge. +- c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. +- d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. +- e) Convey the object code using peer-to-peer transmission, + provided you inform other peers where the object code and + Corresponding Source of the work are being offered to the general + public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + +A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, +family, or household purposes, or (2) anything designed or sold for +incorporation into a dwelling. In determining whether a product is a +consumer product, doubtful cases shall be resolved in favor of +coverage. For a particular product received by a particular user, +"normally used" refers to a typical or common use of that class of +product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected +to use, the product. A product is a consumer product regardless of +whether the product has substantial commercial, industrial or +non-consumer uses, unless such uses represent the only significant +mode of use of the product. + +"Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to +install and execute modified versions of a covered work in that User +Product from a modified version of its Corresponding Source. The +information must suffice to ensure that the continued functioning of +the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or +updates for a work that has been modified or installed by the +recipient, or for the User Product in which it has been modified or +installed. Access to a network may be denied when the modification +itself materially and adversely affects the operation of the network +or violates the rules and protocols for communication across the +network. + +Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + +### 7. Additional Terms. + +"Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders +of that material) supplement the terms of this License with terms: + +- a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or +- b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or +- c) Prohibiting misrepresentation of the origin of that material, + or requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or +- d) Limiting the use for publicity purposes of names of licensors + or authors of the material; or +- e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or +- f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions + of it) with contractual assumptions of liability to the recipient, + for any liability that these contractual assumptions directly + impose on those licensors and authors. + +All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; the +above requirements apply either way. + +### 8. Termination. + +You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + +### 9. Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run +a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + +### 10. Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + +An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + +### 11. Patents. + +A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + +A contributor's "essential patent claims" are all patent claims owned +or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + +In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + +If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + +A patent license is "discriminatory" if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on +the non-exercise of one or more of the rights that are specifically +granted under this License. You may not convey a covered work if you +are a party to an arrangement with a third party that is in the +business of distributing software, under which you make payment to the +third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties +who would receive the covered work from you, a discriminatory patent +license (a) in connection with copies of the covered work conveyed by +you (or copies made from those copies), or (b) primarily for and in +connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + +### 12. No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under +this License and any other pertinent obligations, then as a +consequence you may not convey it at all. For example, if you agree to +terms that obligate you to collect a royalty for further conveying +from those to whom you convey the Program, the only way you could +satisfy both those terms and this License would be to refrain entirely +from conveying the Program. + +### 13. Remote Network Interaction; Use with the GNU General Public License. + +Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your +version supports such interaction) an opportunity to receive the +Corresponding Source of your version by providing access to the +Corresponding Source from a network server at no charge, through some +standard or customary means of facilitating copying of software. This +Corresponding Source shall include the Corresponding Source for any +work covered by version 3 of the GNU General Public License that is +incorporated pursuant to the following paragraph. + +Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + +### 14. Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU Affero General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever +published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions +of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + +Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + +### 15. Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + +### 16. Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR +CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT +NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR +LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM +TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER +PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +### 17. Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + +END OF TERMS AND CONDITIONS + +## How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively state +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper +mail. + +If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for +the specific requirements. + +You should also get your employer (if you work as a programmer) or +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. For more information on this, and how to apply and follow +the GNU AGPL, see . diff --git a/README.md b/README.md index 17a49433..f054b737 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,7 @@ ---- -authors: - - zyc -date: 2024-05-04 00:00:00 ---- - # [MultiMolecule](https://multimolecule.danling.org) -!!! tips "" - - Accelerate Molecular Biology Research with Machine Learning +> [!TIP] +> Accelerate Molecular Biology Research with Machine Learning [![Codacy - 代码质量](https://app.codacy.com/project/badge/Grade/ad5fd8904c2e426bb0a865a9160d6c69)](https://app.codacy.com/gh/DLS5-Omics/multimolecule/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![Codacy - Coverage](https://app.codacy.com/project/badge/Coverage/ad5fd8904c2e426bb0a865a9160d6c69)](https://app.codacy.com/gh/DLS5-Omics/multimolecule/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage) diff --git a/README.zh.md b/README.zh.md index 310f0085..102ef349 100644 --- a/README.zh.md +++ b/README.zh.md @@ -1,14 +1,7 @@ ---- -authors: - - zyc -date: 2024-05-04 00:00:00 ---- - # [MultiMolecule](https://multimolecule.danling.org) -!!! tips "" - - 使用机器学习加速分子生物学研究 +> [!TIP] +> 机器学习加速分子生物学研究 [![Codacy - 代码质量](https://app.codacy.com/project/badge/Grade/ad5fd8904c2e426bb0a865a9160d6c69)](https://app.codacy.com/gh/DLS5-Omics/multimolecule/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![Codacy - 测试覆盖](https://app.codacy.com/project/badge/Coverage/ad5fd8904c2e426bb0a865a9160d6c69)](https://app.codacy.com/gh/DLS5-Omics/multimolecule/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage) @@ -72,7 +65,7 @@ pip install git+https://github.com/DLS5-Omics/MultiMolecule 我们相信开放是研究的基础。 -MultiMolecule 在 GNU Affero 通用公共许可证下授权。 +MultiMolecule 在 GNU Affero 通用公共许可证 下授权。 请加入我们,共同建立一个开放的研究社区。 diff --git a/about/license-faq.md b/about/license-faq.md new file mode 100644 index 00000000..ced05ebe --- /dev/null +++ b/about/license-faq.md @@ -0,0 +1,114 @@ +# License FAQ + +This License FAQ explains the terms and conditions under which you may use the data, models, code, configuration, documentation, and weights provided by the DanLing Team (also known as DanLing) ('we', 'us', or 'our'). +It serves as an addendum to our _[License](license.md)_. + +## 0. Summary of Key Points + +This summary provides key points from our license, but you can find out more details about any of these topics by clicking the link following each key point and by reading the full license. + +
+ +!!! question "What constitutes the 'source code' in MultiMolecule?" + + We consider everything in our repositories to be source code, including data, models, code, configuration, and documentation. + + [:octicons-arrow-right-24: What constitutes the 'source code' in MultiMolecule?](#1-what-constitutes-the-source-code-in-multimolecule) + +!!! question "Can I publish research papers using MultiMolecule?" + + It depends. + + You can publish research papers on fully open access journals and conferences or preprint servers following the terms of the *[License](license.md)*. + + You must obtain a separate license from us to publish research papers on closed access journals and conferences. + + [:octicons-arrow-right-24: Can I publish research papers using MultiMolecule?](#2-can-i-publish-research-papers-using-multimolecule) + +!!! question "Can I use MultiMolecule for commercial purposes?" + + Yes, you can use MultiMolecule for commercial purposes under the terms of the *[License](license.md)*. + + [:octicons-arrow-right-24: Can I use MultiMolecule for commercial purposes?](#3-can-i-use-multimolecule-for-commercial-purposes) + +!!! question "Do people affiliated with certain organizations have specific license terms?" + + Yes, people affiliated with certain organizations have specific license terms. + + [:octicons-arrow-right-24: Do people affiliated with certain organizations have specific license terms?](#4-do-people-affiliated-with-certain-organizations-have-specific-license-terms) + +
+ +## 1. What constitutes the "source code" in MultiMolecule? + +We consider everything in our repositories to be source code. + +The training process of machine learning models is viewed similarly to the compilation process of traditional software. +As such, the model, code, configuration, documentation, and data used for training are all part of the source code, while the trained model weights are part of the object code. + +We also consider research papers and manuscripts a special form of documentation, which are also part of the source code. + +## 2. Can I publish research papers using MultiMolecule? + +Since research papers are considered a form of source code, publishers are legally required to open-source all materials on their server to comply with the _[License](license.md)_ if they publish papers using MultiMolecule. This is generally impractical for most publishers. + +As a special exemption under section 7 of the _[License](license.md)_, we grant permission to publish research papers using MultiMolecule in fully open access journals, conferences, or preprint servers that do not charge any fee from authors, provided all published manuscripts are made available under the [GNU Free Documentation License (GFDL)](https://www.gnu.org/licenses/fdl.html), or a [Creative Commons license](https://creativecommons.org), or an [OSI-approved license](https://opensource.org/licenses) that permits the sharing of manuscripts. + +As a special exemption under section 7 of the _[License](license.md)_, we grant permission to publish research papers using MultiMolecule in certain non-profit journals, conferences, or preprint servers. Currently, the non-profit journals, conferences, or preprint servers we allow include: + +- [All journals published by American Association for the Advancement of Science (AAAS)](https://www.aaas.org/journals) +- [eLife](https://elifesciences.org) + +For publishing in closed access journals or conferences, you must obtain a separate license from us. This typically involves co-authorship, a fee to support the project, or both. Contact us at [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) for more information. + +While not mandatory, we recommend citing the MultiMolecule project in your research papers. + +## 3. Can I use MultiMolecule for commercial purposes? + +Yes, MultiMolecule can be used for commercial purposes under the _[License](license.md)_. However, you must open-source any modifications to the source code and make them available under the _[License](license.md)_. + +If you prefer to use MultiMolecule for commercial purposes without open-sourcing your modifications, you must obtain a separate license from us. This typically involves a fee to support the project. Contact us at [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) for further details. + +## 4. Do people affiliated with certain organizations have specific license terms? + +YES! + +If you are affiliated with an organization that has a separate license agreement with us, you may be subject to different license terms. +Please consult your organization's legal department to determine if you are subject to a separate license agreement. + +Members of the following organizations automatically receive a non-transferable, non-sublicensable, and non-distributable [MIT License](https://mit-license.org/) to use MultiMolecule: + +- [Microsoft Research AI for Science](https://www.microsoft.com/en-us/research/lab/microsoft-research-ai-for-science/) +- [DP Technology](https://dp.tech/) + +This special license is considered an additional term under section 7 of the _[License](license.md)_. +It is not redistributable, and you are prohibited from creating any independent derivative works. +Any modifications or derivative works based on this license are automatically considered derivative works of MultiMolecule and must comply with all the terms of the _[License](license.md)_. +This ensures that third parties cannot bypass the license terms or create separate licenses from derivative works. + +## 5. How can I use MultiMolecule if my organization forbids the use of code under the AGPL License? + +Some organizations, such as [Google](https://opensource.google/documentation/reference/using/agpl-policy), have policies that prohibit the use of code under the AGPL License. + +If you are affiliated with an organization that forbids the use of AGPL-licensed code, you must obtain a separate license from us. +Contact us at [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) for more information. + +## 6. Can I use MultiMolecule if I am a federal employee of the United States Government? + +No. + +Code written by federal employees of the United States Government is not protected by copyright under [17 U.S. Code § 105](https://www.law.cornell.edu/uscode/text/17/105). + +As a result, federal employees of the United States Government cannot comply with the terms of the _[License](license.md)_. + +## 7. Do we make updates to this FAQ? + +!!! tip "In Short" + + Yes, we will update this FAQ as necessary to stay compliant with relevant laws. + +We may update this license FAQ from time to time. +The updated version will be indicated by an updated 'Last Revised Time' at the bottom of this license FAQ. +If we make any material changes, we will notify you by posting the new license FAQ on this page. +We are unable to notify you directly as we do not collect any contact information from you. +We encourage you to review this license FAQ frequently to stay informed of how you can use our data, models, code, configuration, documentation, and weights. diff --git a/about/license-faq.zh.md b/about/license-faq.zh.md new file mode 100644 index 00000000..38ecc155 --- /dev/null +++ b/about/license-faq.zh.md @@ -0,0 +1,120 @@ +!!! warning "翻译" + + 本文内容为翻译版本,旨在为用户提供方便。 + 我们已经尽力确保翻译的准确性。 + 但请注意,翻译内容可能包含错误,仅供参考。 + 请以英文[原文](https://multimolecule.danling.org/about/license)为准。 + + 为满足合规性与执法要求,翻译文档中的任何不准确或歧义之处均不具有约束力,也不具备法律效力。 + +# 许可协议常见问题解答 + +本许可协议常见问题解答解释了您可以在何种条件下使用由丹灵团队(也称为丹灵)(“我们”或“我们的”)提供的数据、模型、代码、配置、文档和权重。 +它作为我们的 _[许可协议](license.zh.md)_ 的附加文件。 + +## 0. 关键点总结 + +本总结提供了常见问题解答的关键点,但您可以通过点击每个关键点后的链接或使用目录来找到您所查找的部分以了解更多详情。 + +
+ +!!! question "在 MultiMolecule 中,什么构成了“源代码”?" + + 我们认为我们存储库中的所有内容都是源代码,包括数据、模型、代码、配置和文档。 + + [:octicons-arrow-right-24: 在MultiMolecule中,什么构成了“源代码”?](#1-multimolecule) + +!!! question "我可以使用 MultiMolecule 发表研究论文吗?" + + 视情况而定。 + + 您可以按照 *[许可协议](license.zh.md)* 的条款在完全开放获取的期刊和会议或预印本服务器上发表研究论文。 + + 要在封闭获取的期刊和会议上发表研究论文,您必须从我们这里获得单独的许可。 + + [:octicons-arrow-right-24: 我可以使用MultiMolecule发表研究论文吗?](#2multimolecule) + +!!! question "我可以将 MultiMolecule 用于商业用途吗?" + + 是的,您可以根据 *[许可协议](license.zh.md)* 的条款将MultiMolecule用于商业用途。 + + [:octicons-arrow-right-24: 我可以将MultiMolecule用于商业用途吗?](#3-multimolecule) + +!!! question "与某些组织有关系的人是否有特定的许可条款?" + + 是的,与某些组织有关系的人有特定的许可条款。 + + [:octicons-arrow-right-24: 与某些组织有关系的人是否有特定的许可条款?](#4) + +
+ +## 1. 在 MultiMolecule 中,什么构成了“源代码”? + +我们认为我们存储库中的所有内容都是源代码。 + +机器学习模型的训练过程被视作类似于传统软件的编译过程。因此,模型、代码、配置、文档和用于训练的数据都被视为源代码的一部分,而训练出的模型权重则被视为目标代码的一部分。 + +我们还将研究论文和手稿视为一种特殊的文档形式,它们也是源代码的一部分。 + +## 2 我可以使用 MultiMolecule 发表研究论文吗? + +由于研究论文被视为源代码的一种形式,如果发表使用 MultiMolecule 的论文,出版商必须开源其服务器上的所有材料,以符合 _[许可协议](license.zh.md)_ 的要求。对于大多数出版商来说,这是不切实际的。 + +作为 _[许可协议](license.zh.md)_ 第 7 条的特别豁免,我们允许在不向作者收取任何费用的完全开放获取的期刊、会议或预印本服务器上发表使用 MultiMolecule 的研究论文,前提是所有发表的手稿都应按照允许共享手稿的[GNU 自由文档许可协议(GFDL)](https://www.gnu.org/licenses/fdl.html)或[知识共享许可协议](https://creativecommons.org)或[OSI 批准许可协议](https://opensource.org/licenses)提供。 + +作为 _[许可协议](license.zh.md)_ 第 7 条的特别豁免,我们允许在部分非盈利性的杂志、会议或预印本服务器上发表使用 MultiMolecule 的研究论文。目前,我们允许的非盈利性杂志、会议或预印本服务器包括: + +- [美国科学促进会(AAAS)出版的所有期刊](https://www.aaas.org/journals) +- [eLife](https://elifesciences.org) + +要在封闭获取的期刊或会议上发表论文,您必须从我们这里获得单独的许可。这通常包括共同署名、支持项目的费用或两者兼而有之。请通过 [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) 与我们联系以获取更多信息。 + +虽然不是强制性的,但我们建议在研究论文中引用 MultiMolecule 项目。 + +## 3. 我可以将 MultiMolecule 用于商业用途吗? + +是的,您可以根据 _[许可协议](license.zh.md)_ 将 MultiMolecule 用于商业用途。但是,您必须开源对源代码的任何修改,并使其在 _[许可协议](license.zh.md)_ 下可用。 + +如果您希望在不开源修改内容的情况下将 MultiMolecule 用于商业用途,则必须从我们这里获得单独的许可。这通常涉及支持项目的费用。请通过 [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) 与我们联系以获取更多详细信息。 + +## 4. 与某些组织有关系的人是否有特定的许可条款? + +是的! + +如果您与一个与我们有单独许可协议的组织有关系,您可能会受到不同的许可条款的约束。请咨询您组织的法律部门,以确定您是否受制于单独的许可协议。 + +以下组织的成员自动获得一个不可转让、不可再许可、不可分发的 [MIT 许可协议](https://mit-license.org/) 来使用 MultiMolecule: + +- [微软研究院科学智能中心](https://www.microsoft.com/en-us/research/lab/microsoft-research-ai-for-science/) +- [深势科技](https://dp.tech/) + +此特别许可被视为 _[许可协议](license.zh.md)_ 第 7 条中的附加条款。 +它不可再分发,并且您被禁止创建任何独立的衍生作品。 +基于此许可的任何修改或衍生作品将自动被视为 MultiMolecule 的衍生作品,必须遵守 _[许可协议](license.zh.md)_ 的所有条款。 +这确保了第三方无法绕过许可条款或从衍生作品中创建单独的许可协议。 + +## 5. 如果我的组织禁止使用 AGPL 许可协议下的代码,我该如何使用 MultiMolecule? + +一些组织(如[Google](https://opensource.google/documentation/reference/using/agpl-policy))有禁止使用 AGPL 许可协议下代码的政策。 + +如果您与禁止使用 AGPL 许可协议代码的组织有关系,您必须从我们这里获得单独的许可。请通过 [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) 与我们联系以获取更多详细信息。 + +## 6. 如果我是美国联邦政府的雇员,我可以使用 MultiMolecule 吗? + +不能。 + +根据[17 U.S. Code § 105](https://www.law.cornell.edu/uscode/text/17/105),美国联邦政府雇员撰写的代码不受版权保护。 + +因此,美国联邦政府雇员无法遵守 _[许可协议](license.zh.md)_ 的条款。 + +## 7. 我们会更新此常见问题解答吗? + +!!! tip "简而言之" + + 是的,我们将根据需要更新此常见问题解答以保持与相关法律的一致。 + +我们可能会不时更新此许可协议常见问题解答。 +更新后的版本将通过更新本页面底部的“最后修订时间”来表示。 +如果我们进行任何重大更改,我们将通过在本页发布新的许可协议常见问题解答来通知您。 +由于我们不收集您的任何联系信息,我们无法直接通知您。 +我们鼓励您经常查看本许可协议常见问题解答,以了解您可以如何使用我们的数据、模型、代码、配置、文档和权重。 diff --git a/about/privacy.md b/about/privacy.md new file mode 100644 index 00000000..dcb695c4 --- /dev/null +++ b/about/privacy.md @@ -0,0 +1,408 @@ +# Privacy Notice + +This privacy notice for DanLing Team (also known as DanLing) ('we', 'us', or 'our'), describes how and why we might collect, store, use, and/or share ('process') your information when you use our services ('Services'), such as when you: + +- Visit our website at [multimolecule.danling.org](https://multimolecule.danling.org), or any website of ours that links to this privacy notice + +You can change your privacy settings at any time by clicking the button below: + +[Privacy Control](#__consent){ .md-button } + +**Questions or concerns?** +Reading this privacy notice will help you understand your privacy rights and choices. +If you do not agree with our policies and practices, please do not use our Services. +If you still have any questions or concerns, please contact us at [privacy@danling.org](mailto:privacy@danling.org). + +## 0. Summary of Key Points + +This summary provides key points from our privacy notice, but you can find out more details about any of these topics by clicking the link following each key point or by using our table of contents below to find the section you are looking for. + +
+ +!!! question "What personal information do we process?" + + When you visit, use, or navigate our Services, we may process personal information depending on how you interact with us and the Services, the choices you make, and the products and features you use. + + [:octicons-arrow-right-24: What information do we collect?](#1-what-information-do-we-collect) + +!!! question "How do we process your information?" + + We process your information to provide, improve, and administer our Services, communicate with you, for security and fraud prevention, and to comply with law. + We may also process your information for other purposes with your consent. + We process your information only when we have a valid legal reason to do so. + + [:octicons-arrow-right-24: How do we process your information?](#2-how-do-we-process-your-information) + +!!! question "Do we process any sensitive personal information?" + + We do not process any sensitive personal information. + +!!! question "Do we collect any information from third parties?" + + We do not collect any information from third parties. + +!!! question "In what situations and with which parties do we share personal information?" + + We may share information in specific situations and with specific third parties. + + [:octicons-arrow-right-24: When and with whom we share your personal information?](#4-when-and-with-whom-do-we-share-your-personal-information) + +!!! question "How do we keep your information safe?" + + We have organisational and technical processes and procedures in place to protect your personal information. + + [:octicons-arrow-right-24: How do we keep your information safe?](#7-how-do-we-keep-your-information-safe) + +!!! question "What are your rights?" + + Depending on where you are located geographically, the applicable privacy law may mean you have certain rights regarding your personal information. + + [:octicons-arrow-right-24: What are your privacy rights?](#8-what-are-your-privacy-rights) + +!!! question "How do you exercise your rights?" + + The easiest way to exercise your rights is by contacting the relevant data protection authority in your jurisdiction. + + [:octicons-arrow-right-24: How to exercise your rights](#how-to-exercise-your-rights) + +
+ +## 1. What information do we collect? + +### Personal information you disclose to us + +!!! tip "In Short" + + We collect personal information that you provide to us. + +We collect personal information that you voluntarily provide to us when you express an interest in obtaining information about us or our products and Services, when you participate in activities on the Services, or otherwise when you contact us. + +!!! danger "Sensitive Personal Information" + + We do not collect any sensitive personal information from you. + +### Information automatically collected + +!!! tip "In Short" + + Some information — such as IP address and/or browser and device characteristics — is collected automatically when you visit our Services. + +We automatically collect certain information when you visit, use, or navigate our Services. +This information does not reveal your specific identity (like your name or contact information) but may include device and usage information, such as your IP address, browser and device characteristics, operating system, language preferences, referring URLs, device name, country, location, information about how and when you use our Services, and other technical information. +This information is primarily needed to maintain the security and operation of our Services, and for our internal analytics and reporting purposes. + +Like many businesses, we also collect information through cookies and similar technologies. + +The information we collect includes: + +- **Identifiers.** + Identifier is a device and browser-specific unique random string that we generate when you use our Service. + This identifier is stored in a cookie on your device, allowing us to identify you across multiple sessions and when you return to our Service. + Note that this identifier does not contain any personal information about you, and is device and browser-specific, meaning that it cannot be used to track you across multiple devices or browsers. + You can delete this cookie at any time by clearing your browser's cache. +- **Log and Usage Data.** + Log and usage data is service-related, diagnostic, usage, and performance information our servers automatically collect when you access or use our Services and which we record in log files. + Depending on how you interact with us, this log data may include your IP address, device information, browser type, and settings, and information about your activity in the Services (such as the date/time stamps associated with your usage, pages and files viewed, searches and other actions you take such as which features you use), device event information (such as system activity, error reports (sometimes called 'crash dumps') and hardware settings). +- **Device Data.** + We collect device data such as information about your computer, phone, tablet, or other devices you use to access the Services. + Depending on the device used, this device data may include information such as your IP address (or proxy server), device and application identification numbers, location, browser type, hardware model, Internet Service Provider and/or mobile carrier, operating system, and system configuration information. +- **Location Data.** + We collect location data such as information about your device's location, which can be either precise or imprecise. + How much information we collect depends on the type and settings of the device you use to access the Services. + For example, we may use GPS and other technologies to collect geolocation data that tells us your current location (based on your IP address). + You can opt out of allowing us to collect this information either by refusing access to the information or by disabling your location settings on your device. + +### Categories of Personal Information We Collect + +We have collected the following categories of personal information in the past twelve (12) months: + +| Category | Examples | Collected | +| ----------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | +| A. Identifiers | Contact details, such as real name, alias, postal address, telephone or mobile contact number, unique personal identifier, online identifier, Internet Protocol address, email address, and account name | YES | +| B. Personal information as defined in the California Customer Records statute | Name, contact information, education, employment, employment history, and financial information | NO | +| C. Protected classification characteristics under state or federal law | Gender, age, date of birth, race and ethnicity, national origin, marital status, and other demographic data | NO | +| D. Commercial information | Transaction information, purchase history, financial details, and payment information | NO | +| E. Biometric information | Fingerprints and voiceprints | NO | +| F. Internet or other similar network activity | Browsing history, search history, online behaviour, interest data, and interactions with our and other websites, applications, systems, and advertisements | YES | +| G. Geolocation data | Device location | YES | +| H. Audio, electronic, sensory, or similar information | Images and audio, video or call recordings created in connection with our business activities | NO | +| I. Professional or employment-related information | Business contact details in order to provide you our Services at a business level or job title, work history, and professional qualifications if you apply for a job with us | NO | +| J. Education Information | Student records and directory information | NO | +| K. Inferences drawn from collected personal information | Inferences drawn from any of the collected personal information listed above to create a profile or summary about, for example, an individual’s preferences and characteristics | YES | +| L. Sensitive personal Information | | NO | + +We may also collect other personal information outside of these categories through instances where you interact with us in person, online, or by phone or mail in the context of: + +- Receiving help through our customer support channels; +- Participation in customer surveys or contests; and +- Facilitation in the delivery of our Services and to respond to your inquiries. + +We will use and retain the collected personal information as needed to provide you with our Services and as necessary to comply with our legal obligations, resolve disputes, and enforce our agreement for the following period: + +- Category A: 24 months +- Category F: 24 months +- Category G: 24 months +- Category K: 24 months + +## 2. How do we process your information? + +!!! tip "In Short" + + We process your information to provide, improve, and administer our Services, communicate with you, for security and fraud prevention, and to comply with law. + We may also process your information for other purposes with your consent. + +We process your personal information for a variety of reasons, depending on how you interact with our Services, including: + +- **To protect our Services.** + We may process your information as part of our efforts to keep our Services safe and secure, including fraud monitoring and prevention. +- **To identify user trends.** + We may process information about how you use our Services to better understand how they are being used so we can improve them. +- **To save or protect an individual's vital interest.** + We may process your information when necessary to save or protect an individual’s vital interest, such as to prevent harm. + +## 3. What legal basis do we have for processing your information? + +!!! tip "In Short" + + We only process your personal information when we believe it is necessary and we have a valid legal reason (i.e. legal basis) to do so under applicable law, like with your consent, to comply with laws, to provide you with services to enter into or fulfil our contractual obligations, to protect your rights, or to fulfil our legitimate business interests. + +The General Data Protection Regulation (GDPR) and UK GDPR require us to explain the valid legal bases we rely on in order to process your personal information. +As such, we may rely on the following legal bases to process your personal information: + +- **Consent.** + We may process your personal information if you have given us specific consent to use your personal information for a specific purpose. + You have the right to withdraw your consent at any time. + Learn more about [withdrawing your consents](). +- **Legitimate Interests.** + We may process your information when we believe it is reasonably necessary to achieve our legitimate business interests and those interests do not outweigh your interests and fundamental rights and freedoms. + For example, we may process your personal information for some of the purposes described in order to: + - Analyse how our Services are used so we can improve them to engage and retain users + - Diagnose problems and/or prevent fraudulent activities +- **Legal Obligations.** + We may process your information where we believe it is necessary for compliance with our legal obligations, such as to cooperate with a law enforcement body or regulatory agency, exercise or defend our legal rights, or disclose your information as evidence in litigation in which we are involved. +- **Vital Interests.** + We may process your information where we believe it is necessary to protect your vital interests or the vital interests of a third party, such as situations involving potential threats to the safety of any person. + +!!! info "Consent to Processing in Canada" + + If you are located in Canada, we may be legally permitted under applicable law to process your information without your consent in some exceptional cases, including, for example: + + - If collection is clearly in the interests of an individual and consent cannot be obtained in a timely way + - For investigations and fraud detection and prevention + - For business transactions provided certain conditions are met + - If it is contained in a witness statement and the collection is necessary to assess, process, or settle an insurance claim + - For identifying injured, ill, or deceased persons and communicating with next of kin + - If we have reasonable grounds to believe an individual has been, is, or may be victim of financial abuse + - If it is reasonable to expect collection and use with consent would compromise the availability or the accuracy of the information and the collection is reasonable for purposes related to investigating a breach of an agreement or a contravention of the laws of Canada or a province + - If disclosure is required to comply with a subpoena, warrant, court order, or rules of the court relating to the production of records + - If it was produced by an individual in the course of their employment, business, or profession and the collection is consistent with the purposes for which the information was produced + - If the collection is solely for journalistic, artistic, or literary purposes + - If the information is publicly available and is specified by the regulations + +## 4. When and with whom do we share your personal information? + +!!! tip "In Short" + + We may share information in specific situations described in this section and/or with the following third parties. + +We may use your personal information for our business purposes, such as for undertaking internal research for technological development and demonstration. +This is not considered to be 'selling' of your personal information. + +**Vendors, Consultants, and Other Third-Party Service Providers.** +We may share your data with third-party vendors, service providers, contractors, or agents ('third parties') who perform services for us or on our behalf and require access to such information to do that work. +We have contracts in place with our third parties, which are designed to help safeguard your personal information. +This means that they cannot do anything with your personal information unless we have instructed them to do it. +They will also not share your personal information with any organisation apart from us. +They also commit to protect the data they hold on our behalf and to retain it for the period we instruct. + +The third parties we may share personal information with are as follows: + +- **Advertising, Direct Marketing, and Lead Generation** + - Google AdSense +- **Cloud Computing Services** + - Microsoft Azure + - Amazon Web Services (AWS) + - Google Cloud Platform (GCP) +- **Communications and Content Delivery Network (CDN) Services** + - Cloudflare +- **Content Optimisation** + - Google Site Search + - Google Fonts +- **Functionality and Infrastructure Optimisation** + - GitHub Pages +- **User Commenting and Forums** + - Disqus + - GitHub Issues + - GitHub Discussions +- **Web and Mobile Analytics** + - Google Analytics + +We also may need to share your personal information in the following situations: + +- **Business Transfers.** + We may share or transfer your information in connection with, or during negotiations of, any merger, sale of company assets, financing, or acquisition of all or a portion of our business to another company. + +!!! info "We have disclosed the following categories of personal information for a business purpose in the past twelve (12) months:" + + Nill + +!!! info "The categories of third parties to whom we sold personal information in the past twelve (12) months:" + + Nill + +!!! info "The categories of third parties to whom we shared personal information with in the past twelve (12) months:" + + - **Web and Mobile Analytics** + - Google Analytics + +## 5. Do we use cookies and other tracking technologies? + +!!! tip "In Short" + + We may use cookies and other tracking technologies to collect and store your information. + +We also permit third parties and service providers to use online tracking technologies on our Services for analytics and advertising, including to help manage and display advertisements, to tailor advertisements to your interests, or to send abandoned shopping cart reminders (depending on your communication preferences). +The third parties and service providers use their technology to provide advertising about products and services tailored to your interests which may appear either on our Services or on other websites. + +To the extent these online tracking technologies are deemed to be a 'sale'/'sharing' (which includes targeted advertising, as defined under the applicable laws) under applicable US state laws, you can opt out of these online tracking technologies by clicking the button on the top of this page or the button below: + +[Privacy Control](#__consent){ .md-button } + + + +### Google Analytics + +We may share your information with Google Analytics to track and analyse the use of the Services. +The Google Analytics Advertising Features that we may use include: + +- Remarketing with Google Analytics +- Google Display Network Impressions Reporting +- Google Analytics Demographics and Interests Reporting + +To opt out of being tracked by Google Analytics across the Services, visit [https://tools.google.com/dlpage/gaoptout](https://tools.google.com/dlpage/gaoptout). +You can opt out of Google Analytics Advertising Features through [Ads Settings](https://adssettings.google.com) and Ad Settings for mobile apps. + +Other opt out means include [http://optout.networkadvertising.org/](http://optout.networkadvertising.org/) and [http://www.networkadvertising.org/mobile-choice](http://www.networkadvertising.org/mobile-choice). + +For more information on the privacy practices of Google, please visit the [Google Privacy & Terms](https://policies.google.com/privacy). + +## 6. How long do we keep your information? + +!!! tip "In Short" + + We keep your information for as long as necessary to fulfil the purposes outlined in this privacy notice unless otherwise required by law. + +We will only keep your personal information for as long as it is necessary for the purposes set out in this privacy notice, unless a longer retention period is required or permitted by law (such as tax, accounting, or other legal requirements). + +When we have no ongoing legitimate business need to process your personal information, we will either delete or anonymise it, or, if this is not possible (for example, because your personal information has been stored in backup archives), then we will securely store your personal information and isolate it from any further processing until deletion is possible. + +## 7. How do we keep your information safe? + +!!! tip "In Short" + + We aim to protect your personal information through a system of organisational and technical security measures. + +We have implemented appropriate technical and organisational security measures designed to protect the security of any personal information we process. +However, despite our safeguards and efforts to secure your information, no electronic transmission over the internet or information storage technology can be guaranteed to be 100% secure, so we cannot promise or guarantee that hackers, cybercriminals, or other unauthorised third parties will not be able to defeat our security and improperly collect, access, steal, or modify your information. +Although we will do our best to protect your personal information, the transmission of personal information to and from our Services is at your own risk. +You should only access the Services within a secure environment. + +## 8. What are your privacy rights? + +!!! tip "In Short" + + We strive to protect your privacy rights and choices to the best possible extent under the law. + +You have rights under certain data protection laws. +However, these rights are not absolute, and in certain cases, we may decline your request as permitted by law. +These rights include: + +- **Right to know** + whether or not we are processing your personal data +- **Right to access** + your personal data +- **Right to correct** + inaccuracies in your personal data +- **Right to request** + the deletion of your personal data +- **Right to obtain a copy** + of the personal data you previously shared with us +- **Right to non-discrimination** + against you for exercising your rights +- **Right to opt-out** + - of the processing of your personal data if it is used for targeted advertising (or sharing as defined under applicable laws), the sale of personal data, or profiling in furtherance of decisions that produce legal or similarly significant effects ('profiling') concerning you + - of the collection of sensitive data and personal data collected through the operation of a voice or facial recognition feature +- **Right to obtain** + - a list of the categories of third parties to which we have disclosed personal data + - a list of specific third parties to which we have disclosed personal data +- **Right to limit** + use and disclosure of sensitive personal data + +### How to exercise your rights + +It is very unlikely that you will be able to exercise the above rights as we do not collect any identifiable personal data from you. + +We are unable to reply to and act on data subject access request as we do not save any identifiable information about you, and we will not be able to verify your identity. + +If you believe we are unlawfully processing your personal information, you can contact the relevant data protection regulator, state attorney general, or other competent authority in your jurisdiction. + +| Residency | Authority | +| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| European Economic Area | [Member State's data protection supervisory authority](https://edpb.europa.eu/about-edpb/board/members_en) | +| United Kingdom | [Information Commissioner's Office](https://ico.org.uk/make-a-complaint/) | +| Australia | [Office of the Australian Information Commissioner](https://www.oaic.gov.au/privacy/privacy-complaints/) | +| New Zealand | [Office of New Zealand Privacy Commissioner](https://www.privacy.org.nz/your-rights/making-a-complaint-to-the-privacy-commissioner/) | +| Canada | [Office of the Privacy Commissioner of Canada](https://www.priv.gc.ca/en/) | +| California of the United States | [California Privacy Protection Agency](https://cppa.ca.gov/webapplications/complaint/) | +| Switzerland | [Federal Data Protection and Information Commissioner](https://www.edoeb.admin.ch/edoeb/en/home/the-fdpic/contact.html) | +| South Africa | [Information Regulator](https://inforegulator.org.za/training/wp/complaints/) | + +#### Withdraw your consent + +If we are relying on your consent to process your personal information, which may be express and/or implied consent depending on the applicable law, you have the right to withdraw your consent at any time. +You can withdraw your consent at any time by clicking the button on the top of this page or the button below: + +[Privacy Control](#__consent){ .md-button } + +However, please note that this will not affect the lawfulness of the processing before its withdrawal nor, when applicable law allows, will it affect the processing of your personal information conducted in reliance on lawful processing grounds other than consent. + +#### Cookies and similar technologies + +Most web browsers are set to accept cookies by default. +If you prefer, you can usually choose to set your browser to remove or reject browser cookies. +Please note that if you choose to remove or reject cookies, this will NOT affect the availability and functionality of our Services. + +## 9. Controls for Do-Not-Track features + +Most web browsers and some mobile operating systems and mobile applications include a Do-Not-Track ('DNT') feature or setting you can activate to signal your privacy preference not to have data about your online browsing activities monitored and collected. +At this stage, no uniform technology standard for recognising and implementing DNT signals has been finalised. +Although we cannot promise to honour every DNT signal, we strive to honour all such requests where technically feasible. + +California law requires us to let you know how we respond to web browser DNT signals. +Because we cannot guarantee to recognise and houour all DNT signals, we do not respond to them at this time. + +## 10. Do residents in certain jurisdiction have specific privacy rights? + +NO. + +All men and women are created equal. + +We provide the same privacy rights to all individuals, regardless of their location. + +Be assured that we will treat you with the same respect and dignity as we would want to be treated. + +## 11. How can you review, update, or delete the data we collect from you? + +It is very unlikely that you will be able to review, update, or delete the data we collect from you as we do not collect any identifiable personal data from you, and we will not be able to identify which data belongs to you. + +## 12. Do we make updates to this notice? + +!!! tip "In Short" + + Yes, we will update this notice as necessary to stay compliant with relevant laws. + +We may update this privacy notice from time to time. +The updated version will be indicated by an updated 'Last Revised Time' at the bottom of this privacy notice. +If we make any material changes, we will notify you by posting the new privacy notice on this page. +We are unable to notify you directly as we do not collect any contact information from you. +We encourage you to review this privacy notice frequently to stay informed of how we are protecting your information. diff --git a/about/privacy.zh.md b/about/privacy.zh.md new file mode 100644 index 00000000..4e816720 --- /dev/null +++ b/about/privacy.zh.md @@ -0,0 +1,419 @@ +!!! warning "翻译" + + 本文内容为机器翻译版本,旨在为用户提供方便。 + 我们已经尽力确保翻译的准确性。 + 但请注意,翻译内容可能包含错误,仅供参考。 + 请以英文[原文](https://multimolecule.danling.org/about/privacy)为准。 + + 为满足合规性与执法要求,翻译文档中的任何不准确或歧义之处均不具有约束力,也不具备法律效力。 + +# 隐私声明 + +本隐私声明适用于丹灵团队(也被称作丹灵)(以下简称“我们”),描述了当您使用我们的服务(“服务”)时,我们如何以及为何可能收集、存储、使用和/或共享(“处理”)您的信息。例如当您: + +- 访问我们的网站 [multimolecule.danling.org](https://multimolecule.danling.org) 或任何链接到本隐私声明的我们的网站时 + +您可以随时通过点击下面的按钮更改您的隐私设置: + +[隐私控制](#__consent){ .md-button } + +**有问题或关注?** +阅读本隐私声明将帮助您了解您的隐私权利和选择。 +如果您不同意我们的声明和做法,请不要使用我们的服务。 +如果您仍有任何问题或关注,请通过[privacy@danling.org](mailto:privacy@danling.org)与我们联系。 + +## 0. 关键点总结 + +本总结提供了我们隐私声明的关键点,但您可以通过点击每个关键点后的链接或使用目录来找到您所查找的部分以了解更多详情。 + +
+ +!!! question "我们处理哪些个人信息?" + + 当您访问、使用或导航我们的服务时,我们可能会根据您与我们以及服务的互动方式、您所做的选择以及您使用的产品和功能来处理个人信息。 + + [:octicons-arrow-right-24: 我们收集哪些信息?](#1) + +!!! question "我们如何处理您的信息?" + + 我们处理您的信息以提供、改善和管理我们的服务,与您沟通,进行安全和防欺诈,以及遵守法律。 + 我们也可能在得到您的同意的情况下,出于其他目的处理您的信息。 + 我们仅在有合法法律理由时处理您的信息。 + + [:octicons-arrow-right-24: 我们如何处理您的信息?](#2) + +!!! question "我们处理任何敏感个人信息吗?" + + 我们不处理任何敏感个人信息。 + +!!! question "我们从第三方收集信息吗?" + + 我们不从第三方收集任何信息。 + +!!! question "在哪些情况下以及与哪些方我们共享个人信息?" + + 我们可能在特定情况下与特定第三方共享信息。 + + [:octicons-arrow-right-24: 我们何时以及与谁共享您的个人信息?](#4) + +!!! question "我们如何保护您的信息安全?" + + 我们已经实施了组织和技术流程和程序来保护您的个人信息。 + + [:octicons-arrow-right-24: 我们如何保持您的信息安全?](#7) + +!!! question "您有哪些权利?" + + 根据您所在地理位置,适用的隐私法可能意味着您对您的个人信息有某些权利。 + + [:octicons-arrow-right-24: 您有哪些隐私权利?](#8) + +!!! question "您如何行使您的权利?" + + 行使您的权利的最简单方式是联系您的司法管辖区的相关数据保护监管机构。 + + [:octicons-arrow-right-24: 如何行使您的权利?](#_5) + +
+ +## 1. 我们收集哪些信息? + +### 您向我们披露的个人信息 + +!!! tip "简而言之" + + 我们收集您向我们提供的个人信息。 + +我们收集您自愿向我们提供的个人信息,当您表达对我们或我们的产品和服务的兴趣、参与服务上的活动或以其他方式联系我们时。 + +!!! danger "敏感个人信息" + + 我们不从您那里收集任何敏感个人信息。 + +### 自动收集的信息 + +!!! tip "简而言之" + + 当您访问我们的服务时,某些信息——如IP地址和/或浏览器和设备特征——会自动被收集。 + +我们在您访问、使用或导航我们的服务时自动收集某些信息。 +这些信息不会透露您的特定身份(如您的姓名或联系信息),但可能包括设备和使用信息,如您的IP地址、浏览器和设备特性、操作系统、语言偏好、引用URL、设备名称、国家、位置、有关您如何以及何时使用我们的服务的信息,以及其他技术信息。 +这些信息主要是为了维护我们服务的安全性和运作所需,以及我们内部的分析和报告目的。 + +像许多企业一样,我们还通过cookies和类似技术收集信息。 + +我们收集的信息包括: + +- **标识符。** + 标识符是当您使用我们的服务时,我们生成的特定于设备和浏览器的唯一随机字符串。 + 该标识符存储在您设备上的一个cookie中,使我们能够在多个会话中以及您返回我们的服务时识别您。 + 注意这个标识符不包含任何关于您的个人信息,并且是特定于您的设备和浏览器的,这意味着它不能用于在不同设备或浏览器之间跟踪您。 + 您可以通过清除浏览器缓存随时删除此cookie。 +- **日志和使用数据。** + 日志和使用数据是与服务相关的、诊断、使用和性能信息,当您访问或使用我们的服务时我们的服务器会自动收集,并记录在日志文件中。 + 根据您与我们的互动方式,这些日志数据可能包括您的IP地址、设备信息、浏览器类型和设置,以及有关您在服务中的活动的信息(如与您使用相关的日期/时间戳、浏览和查看的页面和文件、搜索以及您采取的其他操作,例如您使用的功能),设备事件信息(如系统活动、错误报告(有时称为'崩溃转储')和硬件设置)。 +- **设备数据。** + 我们收集设备数据,如您用于访问服务的计算机、电话、平板或其他设备的信息。 + 根据所使用的设备,这些设备数据可能包括如下信息:您的IP地址(或代理服务器)、设备和应用程序识别号、位置、浏览器类型、硬件型号、互联网服务提供商和/或移动运营商、操作系统和系统配置信息。 +- **位置数据。** + 我们收集位置数据,如您设备的位置信息,这可以是精确的也可以是不精确的。 + 我们收集多少信息取决于您用于访问服务的设备的类型和设置。 + 例如,我们可能使用GPS和其他技术来收集地理位置数据,告诉我们您当前的位置(基于您的IP地址)。 + 您可以选择不让我们收集此信息,方法是拒绝访问信息或在您的设备上禁用位置设置。 + +### 我们收集的个人信息类别 + +过去十二(12)个月内,我们收集了以下类别的个人信息: + +| 类别 | 示例 | 已收集 | +|-------------------------|-----------------------------------------------------------------|-------| +| A. 标识符 | 联系方式,如真实姓名、别名、邮政地址、电话或移动联系号码、独特的个人标识符、在线标识符、互联网协议地址、电子邮件地址和帐户名称 | 是 | +| B. 加利福尼亚客户记录法中定义的个人信息 | 姓名、联系信息、教育、就业、就业历史和财务信息 | 否 | +| C. 州或联邦法律下的受保护分类特征 | 性别、年龄、出生日期、种族和民族、国籍、婚姻状况和其他人口统计数据 | 否 | +| D. 商业信息 | 交易信息、购买历史、财务详细信息和支付信息 | 否 | +| E. 生物识别信息 | 指纹和声纹 | 否 | +| F. 互联网或其他类似网络活动 | 浏览历史、搜索历史、在线行为、兴趣数据和与我们和其他网站、应用程序、系统和广告的互动 | 是 | +| G. 地理位置数据 | 设备位置 | 是 | +| H. 音频、电子、感觉或类似信息 | 在我们的业务活动中创建的图像和音频、视频或通话录音 | 否 | +| I. 与职业相关的信息 | 为了在业务层面提供我们的服务而收集的商业联系信息或职务名称、工作历史和职业资格 | 否 | +| J. 教育信息 | 学生记录和目录信息 | 否 | +| K. 从收集的个人信息中推断出的推论 | 从上述任何收集的个人信息中推断出的用于创建个人偏好和特征的概况或摘要 | 是 | +| L. 敏感个人信息 | | 否 | + +我们还可能在您与我们亲自、在线或通过电话或邮件与我们互动的情况下收集其他个人信息,包括: + +- 通过我们的客户支持渠道获得帮助; +- 参与客户调查或竞赛;以及 +- 促进我们服务的交付并回应您的查询。 + +我们将根据需要在以下期限内使用和保留所收集的个人信息,以便为您提供我们的服务,并根据需要遵守我们的法律义务、解决争议和执行我们的协议: + +- A 类:24个月 +- F 类:24个月 +- G 类:24个月 +- K 类:24个月 + +## 2. 我们如何处理您的信息? + +!!! tip "简而言之" + + 我们处理您的信息以提供、改善和管理我们的服务,与您沟通,进行安全和防欺诈,以及遵守法律。 + 我们也可能在得到您的同意的情况下,出于其他目的处理您的信息。 + +我们出于多种原因处理您的个人信息,这取决于您如何与我们的服务互动,包括: + +- **保护我们的服务。** + 我们可能会处理您的信息作为我们保持服务安全的努力的一部分,包括监控和预防欺诈。 +- **识别用户趋势。** + 我们可能会处理有关您如何使用我们的服务的信息,以更好地了解它们的使用情况,从而改进它们。 +- **保存或保护个人的重要利益。** + 我们可能会在必要时处理您的信息,以保存或保护个人的重要利益,例如为了防止伤害。 + +## 3. 我们处理您的信息有什么法律依据? + +!!! tip "简而言之" + + 我们只有在我们认为必要且有有效的法律理由(即法律依据)时才处理您的个人信息,如与您的同意、遵守法律、提供服务给您进入或履行我们的合同义务、保护您的权利或满足我们合法的业务利益。 + +欧盟通用数据保护条例(GDPR)和英国GDPR要求我们解释我们依靠的有效法律依据以处理您的个人信息。 +因此,我们可能依赖以下法律依据来处理您的个人信息: + +- **同意。** + 如果您已给我们明确同意使用您的个人信息用于某个特定目的,我们可能会处理您的个人信息。 + 您有权随时撤回您的同意。 + 了解更多关于[撤回您的同意]()。 +- **合法利益。** + 当我们认为出于我们合法的业务利益来处理您的信息是合理必要的,并且这些利益不超过您的利益和基本权利与自由时,我们可能会处理您的信息。 + 例如,我们可能会处理您的个人信息,用于: + - 分析我们的服务如何被使用,以便我们可以改进它们以吸引和保留用户 + - 诊断问题和/或预防欺诈活动 +- **法律义务。** + 我们可能会在我们认为必须遵守我们的法律义务的情况下处理您的信息,例如与执法机构或监管机构合作、行使或捍卫我们的法律权利,或在我们参与的诉讼中披露您的信息作为证据。 +- **重要利益。** + 我们可能会在我们认为必须保护您或第三方的重要利益的情况下处理您的信息,例如涉及潜在威胁任何人的安全的情况。 + +!!! info "在加拿大处理的同意" + + 如果您位于加拿大,我们可能在适用法律下在某些特殊情况下无需您的同意就可以合法地处理您的信息,包括例如: + + - 如果收集明显符合个人的利益且无法及时获得同意 + - 用于调查和欺诈检测与预防 + - 用于商业交易,前提是满足某些条件 + - 如果信息包含在证人声明中,且收集对于评估、处理或解决保险索赔是必要的 + - 用于识别受伤、生病或已故人士并与近亲沟通 + - 如果我们有合理的理由相信某个人已经、正在或可能成为金融滥用的受害者 + - 如果合理预期通过征得同意以收集和使用信息会损害信息的可用性或准确性,并且收集对于调查违反协议或违反加拿大或省法律的目的是合理的 + - 如果披露是为了遵守传票、搜查令、法院命令或与记录生产相关的法院规则 + - 如果信息是由个人在其就业、业务或专业过程中产生的,并且收集与信息产生的目的一致 + - 如果收集仅用于新闻、艺术或文学目的 + - 如果信息是公开可用的,并且通过规定指定 + +## 4. 我们何时以及与谁共享您的个人信息? + +!!! tip "简而言之" + + 我们可能在本节描述的特定情况下和/或与以下第三方共享信息。 + +我们可能会将您的个人信息用于我们的业务目的,如进行内部研究以进行技术开发和展示。 +这不被视为“出售”您的个人信息。 + +**供应商、顾问和其他第三方服务提供商。** +我们可能会与为我们服务或代表我们工作并需要访问此类信息以执行该工作的第三方供应商、服务提供商、承包商或代理(“第三方”)共享您的数据。 +我们与我们的第三方签订了合同,这些合同旨在帮助保护您的个人信息。 +这意味着他们不能在未经我们指示的情况下做任何事情。 +他们也不会与我们以外的任何组织共享您的个人信息。 +他们还承诺保护他们代表我们持有的数据并按照我们的指示保留该数据。 + +我们可能共享个人信息的第三方如下: + +- **广告、直销和潜在客户生成** + - Google AdSense +- **云计算服务** + - Microsoft Azure + - Amazon Web Services (AWS) + - Google Cloud Platform (GCP) +- **通信和内容交付网络 (CDN) 服务** + - Cloudflare +- **内容优化** + - Google站点搜索 + - Google字体 +- **功能和基础设施优化** + - GitHub页面 +- **用户评论和论坛** + - Disqus + - GitHub议题 + - GitHub讨论 +- **网络和移动分析** + - Google Analytics + +我们还可能需要在以下情况下共享您的个人信息: + +- **业务转移。** + 我们可能在进行任何并购、出售公司资产、融资或收购我们全部或部分业务的谈判中共享或转让您的信息。 + +!!! info "过去十二(12)个月我们出于业务目的披露了以下类别的个人信息:" + + 无 + +!!! info "过去十二(12)个月我们出售个人信息的第三方类别:" + + 无 + +!!! info "过去十二(12)个月我们与之共享个人信息的第三方类别:" + + - **网络和移动分析** + - Google Analytics + +## 5. 我们是否使用cookies和其他跟踪技术? + +!!! tip "简而言之" + + 我们可能使用cookies和其他跟踪技术来收集和存储您的信息。 + +我们还允许第三方和服务提供商在我们的服务中使用在线跟踪技术用于分析和广告,包括帮助管理和展示广告,根据您的兴趣定制广告,或发送遗弃购物车提醒(取决于您的沟通偏好)。 +这些第三方和服务提供商使用他们的技术为您提供定制的产品和服务广告,这些广告可能出现在我们的服务或其他网站上。 + +在适用的美国州法律下,这些在线跟踪技术被视为“销售”/“分享”(包括目标广告,根据适用法律定义)的程度,您可以通过点击本页顶部或下面的按钮来选择退出这些在线跟踪技术: + +[隐私控制](#__consent){ .md-button } + + + +### Google Analytics + +我们可能会与Google Analytics共享您的信息,以跟踪和分析服务的使用情况。 +我们可能使用的Google Analytics广告功能包括: + +- Google Analytics的再营销 +- Google Display Network印象报告 +- Google Analytics人口统计和兴趣报告 + +要选择退出在服务中通过Google Analytics跟踪您,请访问[https://tools.google.com/dlpage/gaoptout](https://tools.google.com/dlpage/gaoptout)。 + +您可以通过[广告设置](https://adssettings.google.com)和移动应用的广告设置来选择退出Google Analytics广告功能。 + +其他退出方式包括[http://optout.networkadvertising.org/](http://optout.networkadvertising.org/)和[http://www.networkadvertising.org/mobile-choice](http://www.networkadvertising.org/mobile-choice)。 + +有关Google隐私做法的更多信息,请访问[Google隐私与条款](https://policies.google.com/privacy)。 + +## 6. 我们保留您的信息多久? + +!!! tip "简而言之" + + 我们将根据本隐私声明中概述的目的保留您的信息,除非法律另有要求。 + +我们只会在本隐私声明中概述的目的所需的时间内保留您的个人信息,除非法律要求或允许更长的保留期(如税务、会计或其他法律要求)。 + +当我们没有持续的合法业务需要处理您的个人信息时,我们将删除或匿名化它,或者,如果这不可能(例如,因为您的个人信息已存储在备份档案中),那么我们将安全地存储您的个人信息并将其与任何进一步处理隔离,直到删除成为可能。 + +## 7. 我们如何保持您的信息安全? + +!!! tip "简而言之" + + 我们旨在通过一系列组织和技术安全措施保护您的个人信息。 + +我们已实施适当的技术和组织安全措施,旨在保护我们处理的任何个人信息的安全。 +然而,尽管我们采取了保障措施并努力确保您的信息安全,任何通过互联网的电子传输或信息存储技术都无法保证是100%安全的,因此我们无法承诺或保证黑客、网络犯罪分子或其他未经授权的第三方不会破坏我们的安全措施并不当地收集、访问、窃取或修改您的信息。 +尽管我们将尽最大努力保护您的个人信息,个人信息到我们服务的传输和从我们服务的传输仍然是您自己的风险。 +您应该只在安全的环境中访问服务。 + +## 8. 您有哪些隐私权利? + +!!! tip "简而言之" + + 我们努力在法律允许的最大范围内保护您的隐私权利和选择。 + +您在某些数据保护法下有权利。 +然而,这些权利不是绝对的,在某些情况下,我们可能会根据法律拒绝您的请求。 +这些权利包括: + +- **知情权** + 我们是否正在处理您的个人数据 +- **访问权** + 您的个人数据 +- **更正权** + 您的个人数据中的不准确信息 +- **请求删除权** + 您的个人数据 +- **获取副本权** + 您以前与我们共享的个人数据 +- **反歧视权** + 针对您行使您的权利 +- **选择退出权** + - 如果您的个人数据用于目标广告(或根据适用法律定义的“分享”),个人数据的销售,或促进对您产生法律或类似重大效果的决策(“分析”)的分析 + - 收集通过语音或面部识别功能操作收集的敏感数据和个人数据 +- **获取权** + - 向我们披露个人数据的第三方类别的列表 + - 向我们披露个人数据的特定第三方的列表 +- **限制使用和披露权** + 敏感个人数据 + +### 如何行使您的权利 + +您几乎不可能行使上述权利,因为我们不从您那里收集任何可识别的个人数据。 + +我们无法回复和采取数据主体访问请求,因为我们不保存任何可识别的关于您的信息,我们将无法验证您的身份。 + +如果您认为我们非法处理您的个人信息,您可以联系您所在管辖区的相关数据保护监管机构、州总检察长或其他有权机构。 + +| 居住地 | 机构 | +|-------------------|------------------------------------------------------------------------------------------------------| +| 欧洲经济区 | [成员国的数据保护监督机构](https://edpb.europa.eu/about-edpb/board/members_en) | +| 英国 | [信息专员办公室](https://ico.org.uk/make-a-complaint/) | +| 澳大利亚 | [澳大利亚信息专员办公室](https://www.oaic.gov.au/privacy/privacy-complaints/) | +| 新西兰 | [新西兰隐私专员办公室](https://www.privacy.org.nz/your-rights/making-a-complaint-to-the-privacy-commissioner/) | +| 加拿大 | [加拿大隐私专员办公室](https://www.priv.gc.ca/en/) | +| 美国加利福尼亚州 | [加利福尼亚隐私保护机构](https://cppa.ca.gov/webapplications/complaint/) | +| 瑞士 | [联邦数据保护和信息专员](https://www.edoeb.admin.ch/edoeb/en/home/the-fdpic/contact.html) | +| 南非 | [信息监管机构](https://inforegulator.org.za/training/wp/complaints/) | + + +#### 撤回您的同意 + +如果我们依赖您的同意来处理您的个人信息,这可能是明确的和/或暗示的同意,取决于适用法律,您有权随时撤回您的同意。 +您可以通过点击本页顶部或下面的按钮随时撤回您的同意: + +[隐私控制](#__consent){ .md-button } + +然而,请注意,这不会影响撤回之前的处理的合法性,也不会影响当适用法律允许时,基于除同意之外的合法处理理由进行的您的个人信息的处理。 + +#### Cookies和类似技术 + +大多数网络浏览器默认设置为接受cookies。 +如果您愿意,您通常可以选择设置您的浏览器以删除或拒绝浏览器cookies。 +请注意,如果您选择删除或拒绝cookies,这将不会影响我们服务的可用性和功能。 + +## 9. 不追踪功能的控制 + +大多数网络浏览器和一些移动操作系统和移动应用程序包括一个您可以激活的不追踪(“DNT”)功能或设置,以表达您的隐私偏好,不希望有关您的在线浏览活动的数据被监控和收集。 +到目前为止,还没有为识别和实施DNT信号制定统一的技术标准。 +虽然我们不能承诺尊重每一个DNT信号,我们力求尊重所有在技术上可行的此类请求。 + +加利福尼亚法律要求我们告诉您我们如何响应网络浏览器的DNT信号。 +由于我们不能保证识别和尊重所有DNT信号,我们目前不对它们做出响应。 + +## 10. 某些管辖区的居民是否有特定的隐私权利? + +否。 + +所有男女生而平等。 + +我们向所有个人提供相同的隐私权利,无论他们的位置如何。 + +请放心,我们将以我们希望被对待的相同的尊重和尊严对待您。 + +## 11. 您如何查看、更新或删除我们收集的数据? + +您几乎不可能查看、更新或删除我们收集的数据,因为我们不从您那里收集任何可识别的个人数据,也无法确定哪些数据是属于您的。 + +## 12. 我们是否会更新此声明? + +!!! tip "简而言之" + + 是的,我们将根据需要更新此声明以保持与相关法律的一致。 + +我们可能会不时更新此隐私声明。 +更新后的版本将通过更新本页面底部的“最后修订时间”来表示。 +如果我们进行任何重大更改,我们将通过在本页发布新的隐私声明来通知您。 +由于我们不收集您的任何联系信息,我们无法直接通知您。 +我们鼓励您经常查看本隐私声明,以了解我们如何保护您的信息。 diff --git a/docs/docs/about/license-faq.md b/docs/docs/about/license-faq.md index e8e341ff..15bdef27 100644 --- a/docs/docs/about/license-faq.md +++ b/docs/docs/about/license-faq.md @@ -1,109 +1 @@ -# License FAQ - -This License FAQ explains the terms and conditions under which you may use the data, models, code, configuration, documentation, and weights provided by the DanLing Team (also known as DanLing) ('we', 'us', or 'our'). -It serves as an addendum to our _[License](license.md)_. - -## 0. Summary of Key Points - -This summary provides key points from our license, but you can find out more details about any of these topics by clicking the link following each key point and by reading the full license. - -
- -!!! question "What constitutes the 'source code' in MultiMolecule?" - - We consider everything in our repositories to be source code, including data, models, code, configuration, and documentation. - - [:octicons-arrow-right-24: What constitutes the 'source code' in MultiMolecule?](#1-what-constitutes-the-source-code-in-multimolecule) - -!!! question "Can I publish research papers using MultiMolecule?" - - It depends. - - You can publish research papers on fully open access journals and conferences or preprint servers following the terms of the *[License](license.md)*. - - You must obtain a separate license from us to publish research papers on closed access journals and conferences. - - [:octicons-arrow-right-24: Can I publish research papers using MultiMolecule?](#2-can-i-publish-research-papers-using-multimolecule) - -!!! question "Can I use MultiMolecule for commercial purposes?" - - Yes, you can use MultiMolecule for commercial purposes under the terms of the *[License](license.md)*. - - [:octicons-arrow-right-24: Can I use MultiMolecule for commercial purposes?](#3-can-i-use-multimolecule-for-commercial-purposes) - -!!! question "Do people affiliated with certain organizations have specific license terms?" - - Yes, people affiliated with certain organizations have specific license terms. - - [:octicons-arrow-right-24: Do people affiliated with certain organizations have specific license terms?](#4-do-people-affiliated-with-certain-organizations-have-specific-license-terms) - -
- -## 1. What constitutes the "source code" in MultiMolecule? - -We consider everything in our repositories to be source code. - -The training process of machine learning models is viewed similarly to the compilation process of traditional software. -As such, the model, code, configuration, documentation, and data used for training are all part of the source code, while the trained model weights are part of the object code. - -We also consider research papers and manuscripts a special form of documentation, which are also part of the source code. - -## 2. Can I publish research papers using MultiMolecule? - -Since research papers are considered a form of source code, publishers are legally required to open-source all materials on their server to comply with the _[License](license.md)_ if they publish papers using MultiMolecule. This is generally impractical for most publishers. - -As a special exemption under section 7 of the _[License](license.md)_, we grant permission to publish research papers using MultiMolecule in fully open access journals, conferences, or preprint servers, provided all published manuscripts are made available under the [GNU Free Documentation License (GFDL)](https://www.gnu.org/licenses/fdl.html), or a [Creative Commons license](https://creativecommons.org), or an [OSI-approved license](https://opensource.org/licenses) that permits the sharing of manuscripts. - -For publishing in closed access journals or conferences, you must obtain a separate license from us. This typically involves co-authorship, a fee to support the project, or both. Contact us at [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) for more information. - -While not mandatory, we recommend citing the MultiMolecule project in your research papers. - -## 3. Can I use MultiMolecule for commercial purposes? - -Yes, MultiMolecule can be used for commercial purposes under the _[License](license.md)_. However, you must open-source any modifications to the source code and make them available under the _[License](license.md)_. - -If you prefer to use MultiMolecule for commercial purposes without open-sourcing your modifications, you must obtain a separate license from us. This typically involves a fee to support the project. Contact us at [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) for further details. - -## 4. Do people affiliated with certain organizations have specific license terms? - -YES! - -If you are affiliated with an organization that has a separate license agreement with us, you may be subject to different license terms. -Please consult your organization's legal department to determine if you are subject to a separate license agreement. - -Members of the following organizations automatically receive a non-transferable, non-sublicensable, and non-distributable [MIT License](https://mit-license.org/) to use MultiMolecule: - -- [Microsoft Research AI for Science](https://www.microsoft.com/en-us/research/lab/microsoft-research-ai-for-science/) -- [DP Technology](https://dp.tech/) - -This special license is considered an additional term under section 7 of the _[License](license.md)_. -It is not redistributable, and you are prohibited from creating any independent derivative works. -Any modifications or derivative works based on this license are automatically considered derivative works of MultiMolecule and must comply with all the terms of the _[License](license.md)_. -This ensures that third parties cannot bypass the license terms or create separate licenses from derivative works. - -## 5. How can I use MultiMolecule if my organization forbids the use of code under the AGPL License? - -Some organizations, such as [Google](https://opensource.google/documentation/reference/using/agpl-policy), have policies that prohibit the use of code under the AGPL License. - -If you are affiliated with an organization that forbids the use of AGPL-licensed code, you must obtain a separate license from us. -Contact us at [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) for more information. - -## 6. Can I use MultiMolecule if I am a federal employee of the United States Government? - -No. - -Code written by federal employees of the United States Government is not protected by copyright under [17 U.S. Code § 105](https://www.law.cornell.edu/uscode/text/17/105). - -As a result, federal employees of the United States Government cannot comply with the terms of the _[License](license.md)_. - -## 7. Do we make updates to this FAQ? - -!!! tip "In Short" - - Yes, we will update this FAQ as necessary to stay compliant with relevant laws. - -We may update this license FAQ from time to time. -The updated version will be indicated by an updated 'Last Revised Time' at the bottom of this license FAQ. -If we make any material changes, we will notify you by posting the new license FAQ on this page. -We are unable to notify you directly as we do not collect any contact information from you. -We encourage you to review this license FAQ frequently to stay informed of how you can use our data, models, code, configuration, documentation, and weights. +--8<-- "about/license-faq.md" \ No newline at end of file diff --git a/docs/docs/about/license-faq.zh.md b/docs/docs/about/license-faq.zh.md index 86ef6d07..b332000b 100644 --- a/docs/docs/about/license-faq.zh.md +++ b/docs/docs/about/license-faq.zh.md @@ -1,115 +1 @@ -!!! warning "翻译" - - 本文内容为翻译版本,旨在为用户提供方便。 - 我们已经尽力确保翻译的准确性。 - 但请注意,翻译内容可能包含错误,仅供参考。 - 请以英文[原文](https://multimolecule.danling.org/about/license)为准。 - - 为满足合规性与执法要求,翻译文档中的任何不准确或歧义之处均不具有约束力,也不具备法律效力。 - -# 许可协议常见问题解答 - -本许可协议常见问题解答解释了您可以在何种条件下使用由丹灵团队(也称为丹灵)(“我们”或“我们的”)提供的数据、模型、代码、配置、文档和权重。 -它作为我们的 _[许可协议](license.zh.md)_ 的附加文件。 - -## 0. 关键点总结 - -本总结提供了常见问题解答的关键点,但您可以通过点击每个关键点后的链接或使用目录来找到您所查找的部分以了解更多详情。 - -
- -!!! question "在 MultiMolecule 中,什么构成了“源代码”?" - - 我们认为我们存储库中的所有内容都是源代码,包括数据、模型、代码、配置和文档。 - - [:octicons-arrow-right-24: 在MultiMolecule中,什么构成了“源代码”?](#1-multimolecule) - -!!! question "我可以使用 MultiMolecule 发表研究论文吗?" - - 视情况而定。 - - 您可以按照 *[许可协议](license.zh.md)* 的条款在完全开放获取的期刊和会议或预印本服务器上发表研究论文。 - - 要在封闭获取的期刊和会议上发表研究论文,您必须从我们这里获得单独的许可。 - - [:octicons-arrow-right-24: 我可以使用MultiMolecule发表研究论文吗?](#2multimolecule) - -!!! question "我可以将 MultiMolecule 用于商业用途吗?" - - 是的,您可以根据 *[许可协议](license.zh.md)* 的条款将MultiMolecule用于商业用途。 - - [:octicons-arrow-right-24: 我可以将MultiMolecule用于商业用途吗?](#3-multimolecule) - -!!! question "与某些组织有关系的人是否有特定的许可条款?" - - 是的,与某些组织有关系的人有特定的许可条款。 - - [:octicons-arrow-right-24: 与某些组织有关系的人是否有特定的许可条款?](#4) - -
- -## 1. 在 MultiMolecule 中,什么构成了“源代码”? - -我们认为我们存储库中的所有内容都是源代码。 - -机器学习模型的训练过程被视作类似于传统软件的编译过程。因此,模型、代码、配置、文档和用于训练的数据都被视为源代码的一部分,而训练出的模型权重则被视为目标代码的一部分。 - -我们还将研究论文和手稿视为一种特殊的文档形式,它们也是源代码的一部分。 - -## 2 我可以使用 MultiMolecule 发表研究论文吗? - -由于研究论文被视为源代码的一种形式,如果发表使用 MultiMolecule 的论文,出版商必须开源其服务器上的所有材料,以符合 _[许可协议](license.zh.md)_ 的要求。对于大多数出版商来说,这是不切实际的。 - -作为 _[许可协议](license.zh.md)_ 第 7 条的特别豁免,我们允许在完全开放获取的期刊、会议或预印本服务器上发表使用 MultiMolecule 的研究论文,前提是所有发表的手稿都应按照允许共享手稿的[GNU 自由文档许可协议(GFDL)](https://www.gnu.org/licenses/fdl.html)或[知识共享许可协议](https://creativecommons.org)或[OSI 批准许可协议](https://opensource.org/licenses)提供。 - -要在封闭获取的期刊或会议上发表论文,您必须从我们这里获得单独的许可。这通常包括共同署名、支持项目的费用或两者兼而有之。请通过 [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) 与我们联系以获取更多信息。 - -虽然不是强制性的,但我们建议在研究论文中引用 MultiMolecule 项目。 - -## 3. 我可以将 MultiMolecule 用于商业用途吗? - -是的,您可以根据 _[许可协议](license.zh.md)_ 将 MultiMolecule 用于商业用途。但是,您必须开源对源代码的任何修改,并使其在 _[许可协议](license.zh.md)_ 下可用。 - -如果您希望在不开源修改内容的情况下将 MultiMolecule 用于商业用途,则必须从我们这里获得单独的许可。这通常涉及支持项目的费用。请通过 [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) 与我们联系以获取更多详细信息。 - -## 4. 与某些组织有关系的人是否有特定的许可条款? - -是的! - -如果您与一个与我们有单独许可协议的组织有关系,您可能会受到不同的许可条款的约束。请咨询您组织的法律部门,以确定您是否受制于单独的许可协议。 - -以下组织的成员自动获得一个不可转让、不可再许可、不可分发的 [MIT 许可协议](https://mit-license.org/) 来使用 MultiMolecule: - -- [微软研究院科学智能中心](https://www.microsoft.com/en-us/research/lab/microsoft-research-ai-for-science/) -- [深势科技](https://dp.tech/) - -此特别许可被视为 _[许可协议](license.zh.md)_ 第 7 条中的附加条款。 -它不可再分发,并且您被禁止创建任何独立的衍生作品。 -基于此许可的任何修改或衍生作品将自动被视为 MultiMolecule 的衍生作品,必须遵守 _[许可协议](license.zh.md)_ 的所有条款。 -这确保了第三方无法绕过许可条款或从衍生作品中创建单独的许可协议。 - -## 5. 如果我的组织禁止使用 AGPL 许可协议下的代码,我该如何使用 MultiMolecule? - -一些组织(如[Google](https://opensource.google/documentation/reference/using/agpl-policy))有禁止使用 AGPL 许可协议下代码的政策。 - -如果您与禁止使用 AGPL 许可协议代码的组织有关系,您必须从我们这里获得单独的许可。请通过 [multimolecule@zyc.ai](mailto:multimolecule@zyc.ai) 与我们联系以获取更多详细信息。 - -## 6. 如果我是美国联邦政府的雇员,我可以使用 MultiMolecule 吗? - -不能。 - -根据[17 U.S. Code § 105](https://www.law.cornell.edu/uscode/text/17/105),美国联邦政府雇员撰写的代码不受版权保护。 - -因此,美国联邦政府雇员无法遵守 _[许可协议](license.zh.md)_ 的条款。 - -## 7. 我们会更新此常见问题解答吗? - -!!! tip "简而言之" - - 是的,我们将根据需要更新此常见问题解答以保持与相关法律的一致。 - -我们可能会不时更新此许可协议常见问题解答。 -更新后的版本将通过更新本页面底部的“最后修订时间”来表示。 -如果我们进行任何重大更改,我们将通过在本页发布新的许可协议常见问题解答来通知您。 -由于我们不收集您的任何联系信息,我们无法直接通知您。 -我们鼓励您经常查看本许可协议常见问题解答,以了解您可以如何使用我们的数据、模型、代码、配置、文档和权重。 +--8<-- "about/license-faq.zh.md" \ No newline at end of file diff --git a/docs/docs/about/license.md b/docs/docs/about/license.md index bc16720e..abdf6655 100644 --- a/docs/docs/about/license.md +++ b/docs/docs/about/license.md @@ -1,660 +1 @@ -# GNU AFFERO GENERAL PUBLIC LICENSE - -Version 3, 19 November 2007 - -Copyright (C) 2007 Free Software Foundation, Inc. - - -Everyone is permitted to copy and distribute verbatim copies of this -license document, but changing it is not allowed. - -## Preamble - -The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - -The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains -free software for all its users. - -When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - -Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - -A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - -The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - -An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing -under this license. - -The precise terms and conditions for copying, distribution and -modification follow. - -## TERMS AND CONDITIONS - -### 0. Definitions. - -"This License" refers to version 3 of the GNU Affero General Public -License. - -"Copyright" also means copyright-like laws that apply to other kinds -of works, such as semiconductor masks. - -"The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - -To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of -an exact copy. The resulting work is called a "modified version" of -the earlier work or a work "based on" the earlier work. - -A "covered work" means either the unmodified Program or a work based -on the Program. - -To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - -To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user -through a computer network, with no transfer of a copy, is not -conveying. - -An interactive user interface displays "Appropriate Legal Notices" to -the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - -### 1. Source Code. - -The "source code" for a work means the preferred form of the work for -making modifications to it. "Object code" means any non-source form of -a work. - -A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - -The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - -The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - -The Corresponding Source need not include anything that users can -regenerate automatically from other parts of the Corresponding Source. - -The Corresponding Source for a work in source code form is that same -work. - -### 2. Basic Permissions. - -All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - -You may make, run and propagate covered works that you do not convey, -without conditions so long as your license otherwise remains in force. -You may convey covered works to others for the sole purpose of having -them make modifications exclusively for you, or provide you with -facilities for running those works, provided that you comply with the -terms of this License in conveying all material for which you do not -control copyright. Those thus making or running the covered works for -you must do so exclusively on your behalf, under your direction and -control, on terms that prohibit them from making any copies of your -copyrighted material outside their relationship with you. - -Conveying under any other circumstances is permitted solely under the -conditions stated below. Sublicensing is not allowed; section 10 makes -it unnecessary. - -### 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - -No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - -When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such -circumvention is effected by exercising rights under this License with -respect to the covered work, and you disclaim any intention to limit -operation or modification of the work as a means of enforcing, against -the work's users, your or third parties' legal rights to forbid -circumvention of technological measures. - -### 4. Conveying Verbatim Copies. - -You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - -You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - -### 5. Conveying Modified Source Versions. - -You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these -conditions: - -- a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. -- b) The work must carry prominent notices stating that it is - released under this License and any conditions added under - section 7. This requirement modifies the requirement in section 4 - to "keep intact all notices". -- c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. -- d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - -A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - -### 6. Conveying Non-Source Forms. - -You may convey a covered work in object code form under the terms of -sections 4 and 5, provided that you also convey the machine-readable -Corresponding Source under the terms of this License, in one of these -ways: - -- a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. -- b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the Corresponding - Source from a network server at no charge. -- c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. -- d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. -- e) Convey the object code using peer-to-peer transmission, - provided you inform other peers where the object code and - Corresponding Source of the work are being offered to the general - public at no charge under subsection 6d. - -A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - -A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, -family, or household purposes, or (2) anything designed or sold for -incorporation into a dwelling. In determining whether a product is a -consumer product, doubtful cases shall be resolved in favor of -coverage. For a particular product received by a particular user, -"normally used" refers to a typical or common use of that class of -product, regardless of the status of the particular user or of the way -in which the particular user actually uses, or expects or is expected -to use, the product. A product is a consumer product regardless of -whether the product has substantial commercial, industrial or -non-consumer uses, unless such uses represent the only significant -mode of use of the product. - -"Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to -install and execute modified versions of a covered work in that User -Product from a modified version of its Corresponding Source. The -information must suffice to ensure that the continued functioning of -the modified object code is in no case prevented or interfered with -solely because modification has been made. - -If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - -The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or -updates for a work that has been modified or installed by the -recipient, or for the User Product in which it has been modified or -installed. Access to a network may be denied when the modification -itself materially and adversely affects the operation of the network -or violates the rules and protocols for communication across the -network. - -Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - -### 7. Additional Terms. - -"Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - -When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - -Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders -of that material) supplement the terms of this License with terms: - -- a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or -- b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or -- c) Prohibiting misrepresentation of the origin of that material, - or requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or -- d) Limiting the use for publicity purposes of names of licensors - or authors of the material; or -- e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or -- f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions - of it) with contractual assumptions of liability to the recipient, - for any liability that these contractual assumptions directly - impose on those licensors and authors. - -All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - -If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - -Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; the -above requirements apply either way. - -### 8. Termination. - -You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - -However, if you cease all violation of this License, then your license -from a particular copyright holder is reinstated (a) provisionally, -unless and until the copyright holder explicitly and finally -terminates your license, and (b) permanently, if the copyright holder -fails to notify you of the violation by some reasonable means prior to -60 days after the cessation. - -Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - -Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - -### 9. Acceptance Not Required for Having Copies. - -You are not required to accept this License in order to receive or run -a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - -### 10. Automatic Licensing of Downstream Recipients. - -Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - -An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - -You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - -### 11. Patents. - -A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - -A contributor's "essential patent claims" are all patent claims owned -or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - -Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - -In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - -If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - -If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - -A patent license is "discriminatory" if it does not include within the -scope of its coverage, prohibits the exercise of, or is conditioned on -the non-exercise of one or more of the rights that are specifically -granted under this License. You may not convey a covered work if you -are a party to an arrangement with a third party that is in the -business of distributing software, under which you make payment to the -third party based on the extent of your activity of conveying the -work, and under which the third party grants, to any of the parties -who would receive the covered work from you, a discriminatory patent -license (a) in connection with copies of the covered work conveyed by -you (or copies made from those copies), or (b) primarily for and in -connection with specific products or compilations that contain the -covered work, unless you entered into that arrangement, or that patent -license was granted, prior to 28 March 2007. - -Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - -### 12. No Surrender of Others' Freedom. - -If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under -this License and any other pertinent obligations, then as a -consequence you may not convey it at all. For example, if you agree to -terms that obligate you to collect a royalty for further conveying -from those to whom you convey the Program, the only way you could -satisfy both those terms and this License would be to refrain entirely -from conveying the Program. - -### 13. Remote Network Interaction; Use with the GNU General Public License. - -Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your -version supports such interaction) an opportunity to receive the -Corresponding Source of your version by providing access to the -Corresponding Source from a network server at no charge, through some -standard or customary means of facilitating copying of software. This -Corresponding Source shall include the Corresponding Source for any -work covered by version 3 of the GNU General Public License that is -incorporated pursuant to the following paragraph. - -Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - -### 14. Revised Versions of this License. - -The Free Software Foundation may publish revised and/or new versions -of the GNU Affero General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever -published by the Free Software Foundation. - -If the Program specifies that a proxy can decide which future versions -of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - -Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - -### 15. Disclaimer of Warranty. - -THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT -WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND -PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE -DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR -CORRECTION. - -### 16. Limitation of Liability. - -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR -CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES -ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT -NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR -LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM -TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER -PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -### 17. Interpretation of Sections 15 and 16. - -If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - -END OF TERMS AND CONDITIONS - -## How to Apply These Terms to Your New Programs - -If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - -To do so, attach the following notices to the program. It is safest to -attach them to the start of each source file to most effectively state -the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as - published by the Free Software Foundation, either version 3 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper -mail. - -If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for -the specific requirements. - -You should also get your employer (if you work as a programmer) or -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. For more information on this, and how to apply and follow -the GNU AGPL, see . +--8<-- "LICENSE.md" \ No newline at end of file diff --git a/docs/docs/about/license.zh.md b/docs/docs/about/license.zh.md index a5500646..ec4876e1 100644 --- a/docs/docs/about/license.zh.md +++ b/docs/docs/about/license.zh.md @@ -1,248 +1 @@ -!!! warning "翻译" - - 本文内容为翻译版本,旨在为用户提供方便。 - 我们已经尽力确保翻译的准确性。 - 但请注意,翻译内容可能包含错误,仅供参考。 - 请以英文[原文](https://multimolecule.danling.org/about/license)为准。 - - 为满足合规性与执法要求,翻译文档中的任何不准确或歧义之处均不具有约束力,也不具备法律效力。 - -# GNU AFFERO 通用公共许可证 - -第3版,2007年11月19日 - -版权所有 © 2007 Free Software Foundation, Inc. - -Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. - -每个人都被允许复制和分发本许可证文件的逐字副本,但不允许进行更改。 - -## 序言 - -GNU Affero 通用公共许可证是一个自由的、允许复制的软件和其他类型作品的许可,在网络服务器软件的情况下,它是专门为确保与社区合作而设计。 - -大多数软件和其他实用作品的许可都是为了剥夺您分享和改变作品的自由。相比之下,我们的通用公共许可证的目的是保证您分享和改变一个程序的所有版本的自由--确保它对所有用户都是自由软件。 - -当我们谈论自由软件时,我们指的是自由,而不是价格。我们的通用公共许可证的设计是为了确保您有分发自由软件副本的自由(如果您愿意,还可以收费),您可以收到源代码,或者如果您想得到它,您可以改变软件或在新的自由程序中使用它的片段,而且您知道您可以做这些事情。 - -使用我们通用公共许可证的开发者通过两个步骤保护您的权利。(1)主张软件的版权,(2)向您提供本许可证,允许您合法地复制、分发和/或修改该软件。 - -捍卫所有用户自由的一个次要好处是,如果程序的替代版本得到广泛使用,就可以供其他开发者使用。许多自由软件的开发者对由此产生的合作感到振奋和鼓舞。然而,在网络服务器上使用的软件,这种结果可能无法实现。GNU通用公共许可证允许制作一个修改过的版本,让公众在服务器上访问它,而不需要向公众发布其源代码。 - -GNU Affero通用公共许可证是专门设计来确保在这种情况下,修改后的源代码可以被社区使用。它要求网络服务器的运营商向该服务器的用户提供运行在那里的修改版本的源代码。因此,在一个可公开访问的服务器上公开使用一个修改过的版本,使公众能够获得修改过的版本的源代码。 - -一个较早的许可证,称为Affero通用公共许可证,由Affero发布,旨在实现类似目标。这是一个不同的许可证,不是Affero GPL的一个版本,但Affero已经发布了Affero GPL的一个新版本,允许在这个许可证下重新许可。 - -关于复制、分发和修改的确切条款和条件如下。 - -## 条款与条件 - -### 0. 定义. - -"本许可证" 是指GNU Affero通用公共许可证的第三版。 -"版权" 也指适用于其他类型作品的类似版权的法律,如半导体掩模。 - -"本程序" 是指在本许可证下许可的任何有版权的作品。每个被许可人都被称呼为 "您"。"被许可人" 和 "接受者" 可以是个人或组织。 - -"修改" 作品是指以需要版权许可的方式复制或改编该作品的全部或部分内容,而不是制作一个完全的副本。由此产生的作品被称为早期作品的 "修改版" 或 "基于" 早期作品的作品。 -一个 "涵盖的作品" 是指未经修改的程序或基于该程序的作品。 - -"传播" 作品是指在未经许可的情况下,对作品做任何事情,使您在适用的版权法下承担直接或间接的侵权责任,但在计算机上执行或修改私人副本除外。传播包括复制、分发(无论是否修改)、向公众提供,在一些国家还包括其他活动。 -传播" 作品是指使其他各方能够制作或接受副本的任何一种传播。仅仅是通过计算机网络与用户互动,而没有转让副本,并不是传播。 - -交互式用户界面显示 "适当的法律声明" 的程度是,它包括一个方便和显眼的功能,(1)显示适当的版权声明,(2)告诉用户该作品没有保证(除了提供保证的范围),被许可人可以根据本许可传达该作品,以及如何查看本许可证的副本。如果界面呈现的是一个用户命令或选项的列表,如菜单,那么列表中的突出项目就符合这一标准。 - -### 1. 源代码. - -作品的 "源代码" 是指对作品进行修改的首选形式。"目标代码" 是指作品的任何非源码形式。 - -"标准接口" 是指由公认的标准机构定义的官方标准的接口,或者在为某一特定编程语言指定接口的情况下,指在使用该语言的开发者中广泛使用的接口。 - -可执行作品的 "系统库" 包括除作品整体以外的任何东西,这些东西(a)以正常的形式打包一个主要部件,但不是该主要部件的一部分,以及(b)仅用于使作品与该主要部件一起使用,或用于实现一个标准接口,该接口的实现已以源代码形式向公众提供。这里的 "主要部件" 是指可执行作品所运行的特定操作系统(如果有的话)的主要基本部件(内核、窗口系统等),或用于制作该作品的编译器,或用于运行该作品的目标代码解释器。 - -目标代码形式的作品的 "相应源代码" 是指生成、安装和(对于可执行作品)运行目标代码以及修改作品所需的所有源代码,包括控制这些活动的脚本。但是,它不包括作品的系统库,也不包括在执行这些活动时未经修改但不属于作品的通用工具或普遍可用的免费程序。例如,相应源包括与作品的源文件相关的接口定义文件,以及作品专门设计的共享库和动态链接的子程序的源代码,例如通过亲密的数据通信或控制流在这些子程序和作品的其他部分之间。 - -相应源不需要包括用户可以从相应源的其他部分自动重新生成的任何东西。 - -源代码形式的作品的相应源是指同一作品。 - -### 2. 基本权限. - -本许可证授予的所有权利都是在程序的版权期限内授予的,并且在满足所述条件的情况下是不可撤消的。本许可证明确肯定了您对运行未经修改的程序的无限许可。只有在输出的内容构成了一个受保护的作品的情况下,运行受保护作品的输出才受本许可证的保护。本许可证承认您的合理使用权或版权法所规定的其他同等权利。 - -只要您的许可证仍然有效,可以无条件地制作、运行和传播您没有转达的涵盖作品。您可以将涵盖的作品传达给其他人,唯一的目的是让他们专门为您进行修改,或为您提供运行这些作品的设施,前提是您在传达所有您不控制版权的材料时遵守本许可证的条款。那些为您制作或运行所涉作品的人必须完全代表您,在您的指导和控制下,按照禁止他们在与您的关系之外制作您的版权材料的任何副本的条款来进行。 - -在任何其他情况下,仅在下述条件下允许转让。不允许转授权;第10条规定没有必要。 - -### 3. 从反规避法中保护用户的合法权利. - -根据任何履行1996年12月20日通过的世界知识产权组织版权条约第11条规定的义务的适用法律,或禁止或限制规避此类措施的类似法律,任何涵盖的作品都不得被视为有效技术措施的一部分。 - -当您传达一个涵盖的作品时,您放弃任何禁止规避技术措施的法律权力,只要这种规避是通过对涵盖的作品行使本许可证下的权利而实现的,并且您否认有任何限制操作或修改作品的意图,以作为对作品的用户强制执行您或第三方禁止规避技术措施的法律权利的手段。 - -### 4. 传递逐字拷贝. - -您可以在收到程序的源代码后,以任何媒介传递其逐字拷贝,但您必须在每份拷贝上醒目地、适当地发布适当的版权声明;保持所有说明本许可证和根据第7条添加的任何非许可条款适用于代码的声明完整无缺;保持所有关于没有任何保证的声明完整无缺;并将本许可证的拷贝与程序一起交给所有接收者。 - -您可以对每份拷贝收取任何费用,也可以不收取任何费用,您还可以提供有偿的支持或保修保护。 - -### 5. 传递修改后的源版本. - -您可以根据第 4 节的条款,以源代码的形式传达基于本程序的作品,或根据本程序的修改而产生的作品,但您必须满足以下所有条件: - -- a) 作品必须有醒目的声明,说明您修改了它,并给出相关的日期。 -- b) 作品必须有醒目的声明,说明它是根据本许可证和根据第7条增加的条件发布的。这一要求修改了第4节中 "保持所有通知的完整性" 的要求。 -- c) 您必须根据本许可证将整个作品作为一个整体许可给任何拥有其副本的人。因此,本许可证将与任何适用的第7条附加条款一起,适用于整个作品及其所有部分,无论它们是如何包装的。本许可证不允许以任何其他方式许可该作品,但如果您已经单独收到了这种许可,它也不会使这种许可失效。 -如果一个受保护作品与其他单独和独立的作品的汇编,其性质不是受保护作品的延伸,并且没有与之结合以形成更大的程序,在存储或分发媒介的某一卷上,如果该汇编及其产生的版权没有被用来限制汇编用户的访问或法律权利,超出单个作品允许的范围,则被称为 "聚合"。将一个受保护的作品包含在一个总体中并不导致本许可证适用于总体的其他部分。 - -### 6. 传递非源形式. - -您可以根据第4条和第5条的规定,以目标代码的形式传递被保护的作品,但您也必须根据本许可证的规定,以下列方式之一传递机器可读的相应源代码: - -- a) 在实体产品(包括实体销售媒介)中传递目标代码,或体现在实体产品(包括实体销售媒介)中,同时将相应的源代码固定在通常用于软件交换的耐用实体媒介上。 -- b) 在实物产品(包括实物销售媒介)中传递目标代码,或在实物产品(包括实物销售媒介)中体现目标代码,并附有一份至少三年有效的书面报价,只要您为该产品型号提供备件或客户支持,就一直有效。向任何拥有目标代码的人提供(1)本许可证所涵盖的产品中所有软件的相应源代码的拷贝,拷贝在通常用于软件交换的耐用物理介质上,其价格不超过贵方实际执行这一传递源代码的合理成本,或者(2)从网络服务器上免费获取相应的源代码拷贝。 -- c) 将目标代码的单个副本与提供相应来源的书面提议的副本一起传送。只有在偶尔和非商业性的情况下,并且只有在您收到目标代码和这种提议的情况下,才允许这种选择,符合第6b款的规定。 -- d) 通过提供从指定地点(免费或收费)获取目标代码,并以同样的方式通过同一地点提供相应的源码,而不再收费。您不需要要求接受者在复制目标代码的同时复制相应的源代码。如果复制目标代码的地方是一个网络服务器,对应源可以在另一个支持同等复制设施的服务器上(由您或第三方运营),只要您在目标代码旁边保持明确的指示,说明在哪里可以找到对应源。无论对应源在哪个服务器上,您都有义务确保在满足这些要求所需的时间内提供对应源。 -- e) 使用点对点传输的方式传送目标代码,但您必须告知其他同行,根据第6d款,目标代码和作品的对应源正在免费提供给公众。 -目标代码的可分离部分,其源代码作为系统库被排除在相应的源码之外,不需要包括在传达目标代码作品中。 - -用户产品 "是指(1) "消费品",即通常用于个人、家庭或家居用途的任何有形个人财产,或(2)为纳入住宅而设计或出售的任何东西。在确定一个产品是否是消费品时,有疑问的情况应以有利于承保的方式解决。对于特定用户收到的特定产品,"通常使用" 是指该类产品的典型或常见用途,而不考虑特定用户的地位或特定用户实际使用或期望或预期使用该产品的方式。一个产品是消费类产品,无论该产品是否有大量的商业、工业或非消费类用途,除非这些用途是该产品的唯一重要使用方式。 - -用户产品的 "安装信息" 是指在该用户产品中从其对应源的修改版本安装和执行所涵盖作品的修改版本所需的任何方法、程序、授权密钥或其他信息。这些信息必须足以确保在任何情况下都不会仅仅因为进行了修改而阻止或干扰修改后的目标代码的继续运行。 - -如果您在用户产品中,或与用户产品一起,或专门在用户产品中使用,并作为交易的一部分,将用户产品的拥有权和使用权永久或固定地转让给接受者(无论交易如何定性),根据本条规定转让的对应源必须附有安装信息。但是,如果您或任何第三方都没有保留在用户产品上安装修改过的目标代码的能力(例如,作品已经安装在ROM中),则该要求不适用。 - -提供安装信息的要求不包括继续为被接受者修改或安装的作品或被修改或安装的用户产品提供支持服务、保证或更新的要求。当修改本身对网络的运行产生实质性的不利影响或违反了网络上的通信规则和协议时,可以拒绝访问网络。 -根据本节规定,所传达的相应源码和所提供的安装信息必须是公开记录的格式(并以源代码的形式向公众提供实现),并且必须不需要特殊的密码或钥匙来解包、阅读或复制。 - -### 7. 附加条款. - -"附加许可" 是对本许可条款的补充,对其中的一个或多个条件作出例外规定。适用于整个程序的附加许可应被视为包括在本许可证中,只要它们在适用法律下有效。如果附加许可只适用于本程序的一部分,则该部分可以根据这些许可单独使用,但整个程序仍受本许可证的管辖,而不考虑附加许可。 - -当您转送一份受保护作品的副本时,您可以选择从该副本或其任何部分中删除任何附加许可。(在某些情况下,当您修改作品时,附加许可可能被写成需要自己删除)。您可以在您添加到涵盖的作品中的材料上放置额外的许可,对于这些材料,您有或可以给予适当的版权许可。 - -尽管本许可证有任何其他规定,对于您添加到受保护作品中的材料,您可以(如果得到该材料的版权持有人的授权)用以下条款补充本许可证的条款: - -- a) 以不同于本许可证第15条和第16条的条款声明保证或限制责任;或 -- b) 要求在该材料或包含该材料的作品所显示的适当法律声明中保留特定的合理法律声明或作者归属;或 -- c) 禁止歪曲该材料的来源,或要求以合理的方式将该材料的修改版本标记为与原始版本不同;或 -- d) 限制为宣传目的使用该材料的许可人或作者的姓名;或 -- e) 拒绝根据商标法授予使用某些商号、商标或服务标志的权利;或 -- f) 要求将材料(或材料的修改版本)转交给接受者的任何人对这些合同假设直接加在这些许可人和作者身上的任何责任进行赔偿。 -所有其他非许可性的附加条款都被视为第10条意义上的 "进一步限制"。如果您收到的程序或其任何部分包含一个通知,说明它受本许可证的管辖,同时还有一个属于进一步限制的条款,您可以删除该条款。如果许可文件包含进一步的限制,但允许根据本许可证进行再许可或转让,您可以在受保护的作品中添加受该许可文件条款管辖的材料,但进一步的限制在这种再许可或转让中不存在。 - -如果您按照本节的规定向受保护的作品添加条款,您必须在相关的源文件中声明适用于这些文件的附加条款,或者说明在哪里可以找到适用条款。 - -额外的条款,不管是允许的还是非允许的,都可以以单独的书面许可的形式说明,或者作为例外情况说明;上述要求适用于任何一种方式。 - -### 8. 终止. - -除本许可证明确规定的情况外,您不得传播或修改覆盖作品。任何以其他方式传播或修改作品的尝试都是无效的,并将自动终止您在本许可下的权利(包括根据第11节第三段授予的任何专利许可)。 - -然而,如果您停止所有违反本许可的行为,那么您从某一特定版权人处获得的许可将被恢复(a)暂时性的,除非并直到版权人明确并最终终止您的许可,以及(b)永久性的,如果版权人未能在停止后的60天内通过某种合理的方式通知您侵权行为。 - -此外,如果版权持有人以某种合理的方式通知您侵权行为,这是您第一次收到该版权持有人违反本许可证的通知(针对任何作品),并且您在收到通知后30天内纠正了侵权行为,那么您从某一版权持有人获得的许可证将被永久恢复。 - -终止您在本节下的权利并不终止那些根据本许可证从您那里获得副本或权利的各方的许可。如果您的权利已经被终止,而且没有永久恢复,您就没有资格根据第10条获得相同材料的新许可。 - -### 9. 拥有副本不需要接受. - -您不需要为了接收或运行程序的副本而接受本许可证。仅仅由于使用点对点传输来接收拷贝而发生的被保护作品的辅助传播,也同样不需要接受。然而,除了本许可证之外,没有任何其他东西允许您传播或修改任何受保护的作品。如果您不接受本许可证,这些行为就侵犯了版权。因此,通过修改或传播涵盖的作品,您表明您接受本许可证,可以这样做。 - -### 10. 下游接受者的自动许可. - -每当您传递一个涵盖的作品时,接收者会自动从原始许可人那里得到一个许可证,在遵守本许可证的前提下,运行、修改和传播该作品。您不负责执行第三方对本许可证的遵守。 - -实体交易 "是指转让一个组织的控制权,或一个组织的大部分资产,或拆分一个组织,或合并组织的交易。如果实体交易导致覆盖作品的传播,那么收到作品副本的每一个交易方也会收到该方的权益前身根据前段规定所拥有或可以给予的任何作品许可,以及从权益前身处获得作品对应源的拥有权,如果权益前身拥有或通过合理努力可以获得。 - -您不得对行使本许可证下授予或确认的权利施加任何进一步的限制。例如,您不得对行使本许可证所授予的权利征收许可费、特许权使用费或其他费用,也不得提起诉讼(包括诉讼中的交叉索赔或反索赔),指控制作、使用、销售、提供销售或进口本程序或其任何部分侵犯了任何专利权利。 - -### 11. 专利. - -贡献者 "是指授权根据本许可证使用本程序或本程序所基于的作品的版权持有人。这样授权的作品被称为贡献者的 "贡献者版本"。 - -贡献者的 "基本专利权利要求" 是指贡献者拥有或控制的所有专利权利要求,不管是已经获得的还是以后获得的,这些专利权利要求将被本许可证允许的某种方式所侵犯,即制作、使用或销售其贡献者版本,但不包括仅因进一步修改贡献者版本而被侵犯的权利要求。就本定义而言,"控制" 包括以符合本许可证要求的方式授予专利分许可的权利。 - -每个贡献者根据贡献者的基本专利权利要求,授予您非独占性的、全球性的、免版税的专利许可,以制造、使用、销售、提供销售、进口和以其他方式运行、修改和传播其贡献者版本的内容。 - -在以下三段中,"专利许可" 是指不实施专利的任何明示协议或承诺,无论其名称如何(例如,明确允许实施专利或不起诉专利侵权的约定)。向一方 "授予" 这种专利许可意味着作出这种协议或承诺,不对该方实施专利。 - -如果您在知情的情况下,依靠专利许可传递了一个涵盖的作品,而该作品的相应来源并没有通过公开的网络服务器或其他容易获得的方式,供任何人根据本许可证的条款免费复制,那么您必须(1)使相应来源可以获得,或者(2)安排剥夺自己对该特定作品的专利许可利益,或者(3)以符合本许可证要求的方式,安排将专利许可扩展到下游接受者。"明知故犯" 是指您实际知道,如果没有专利许可,您在某个国家传递所涉作品,或者您的接受者在某个国家使用所涉作品,将侵犯您有理由相信在该国有效的一项或多项可识别专利。 - -如果根据一项交易或安排或与之相关,您转让或通过促成转让传播被保护作品,并向接受被保护作品的某些当事方授予专利许可,授权他们使用、传播、修改或转让被保护作品的特定副本,那么您授予的专利许可将自动扩展到被保护作品和基于该作品的所有接受者。 - -如果专利许可不包括在其覆盖范围内,禁止行使或以不行使本许可具体授予的一项或多项权利为条件,则为 "歧视性的"。如果您是与从事软件分销业务的第三方达成的安排的一方,根据该安排,您根据您传递作品的活动范围向第三方付款,并且根据该安排,第三方向任何将从您那里获得所涉作品的一方授予,则您不得传递所涉作品。歧视性的专利许可:(a)与您传递的作品的副本(或由这些副本制作的副本)有关,或(b)主要为包含该作品的特定产品或汇编并与之有关,除非您在2007年3月28日之前达成该安排,或授予该专利许可。 - -本许可证中的任何内容都不应被解释为排除或限制任何隐含的许可或其他对侵权的抗辩,根据适用的专利法,您可能会有这样的机会。 - -### 12. 不放弃他人的自由. - -如果强加给您的条件(无论是通过法院命令、协议或其他方式)与本许可证的条件相抵触,它们并不能免除您对本许可条件的遵守。如果您不能在转让作品时同时满足您在本许可证下的义务和任何其他相关的义务,那么作为结果,您可以不转让它。例如,如果您同意的条款规定,您有义务向接受您传送程序的人收取版税,那么您要同时满足这些条款和本许可证的要求,唯一的办法就是完全不传送该程序。 - -### 13. 远程网络交互;与GNU通用公共许可证一起使用. - -尽管本许可证有任何其他规定,如果您修改本程序,您的修改版本必须在显著位置向所有通过计算机网络远程与本程序互动的用户(如果您的版本支持这种互动)提供机会,通过一些标准或习惯的促进软件复制的方式,从网络服务器上免费提供相应的源码。该相应源码应包括根据下段规定纳入GNU通用公共许可证第3版的任何作品的相应源码。 - -尽管本许可证有任何其他规定,您有权利将任何受保护的作品与在GNU通用公共许可证第3版下许可证的作品链接或结合成一个单一的组合作品,并传递由此产生的作品。本许可证的条款将继续适用于受保护作品的部分,但与之结合的作品将继续受GNU通用公共许可证第3版的管辖。 - -### 14. 本许可证的修订版. - -自由软件基金会可能会不时地发布GNU Affero通用公共许可证的修订版和/或新版本。这些新版本在精神上与目前的版本相似,但在细节上可能有所不同,以解决新的问题或关切。 - -每个版本都有一个区分的版本号。如果本程序指定某个编号的 GNU Affero 通用公共许可证 "或任何后来的版本" 适用于它,您可以选择遵守该编号的版本或自由软件基金会发布的任何后来的版本的条款和条件。如果本程序没有指定 GNU Affero 通用公共许可证的版本号,您可以选择自由软件基金会发布的任何版本。 -如果本计划规定代理人可以决定未来可以使用哪个版本的 GNU Affero 通用公共许可证,那么该代理人对某个版本的公开接受声明将永久授权您为本计划选择该版本。 - -以后的许可证版本可能会给您额外的或不同的权限。但是,任何作者或版权持有人都不会因为您选择了后来的版本而承担额外的义务。 - -### 15. 免责声明. - -在适用法律允许的范围内,对该程序没有任何保证。除非另有书面说明,否则版权持有人和/或其他各方 "按原样" 提供该程序,不提供任何明示或暗示的保证,包括但不限于对适销性和特定用途的适用性的暗示保证。关于程序的质量和性能的全部风险由您承担。如果该程序被证明有缺陷,您将承担所有必要的服务、修理或纠正的费用。 - -### 16. 赔偿责任的限制. - -在任何情况下,除非适用法律要求或书面同意,任何版权持有人或任何其他按上述规定修改和/或传递程序的当事人都不对您的损害负责,包括因使用或无法使用本程序而引起的任何一般的、特殊的、偶然的或间接的损害(包括但不限于数据丢失或数据不准确或您或第三方遭受的损失或本程序无法与任何其他程序一起运行),即使该持有人或其他当事人已被告知这种损害的可能性。 - -### 17. 第15条和第16条的解释. - -如果以上规定的免责声明和责任限制不能根据其条款在当地产生法律效力,审查法院应适用最接近于绝对放弃与本程序有关的所有民事责任的当地法律,除非在收取费用的本程序副本上附有保证或责任承担。 - -以上是条款和条件 - -## 如何将这些条款应用于您的新程序 - -如果您开发了一个新的程序,并希望它对公众有最大的用处,实现这一目标的最好方法是使它成为自由软件,每个人都可以在这些条款下重新发布和修改。 - -要做到这一点,请在程序中附上以下通知。最安全的做法是把它们附在每个源文件的开头,以最有效地说明排除担保的情况;每个文件至少要有 "版权" 一行和一个指向完整通知的指针。 - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as - published by the Free Software Foundation, either version 3 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -翻译: - - <用一行字来说明程序的名称和它所做的事情的简单概念。> - Copyright (C) <年> <作者姓名> 版权所有。 - - 本程序是自由软件:你可以根据自由软件基金会发布的GNU Affero通用公共许可证的条款,即许可证的第3版或(您选择的)任何后来的版本重新发布它和/或修改它。。 - - 本程序的发布是希望它能起到作用。但没有任何保证;甚至没有隐含的保证。本程序的分发是希望它是有用的,但没有任何保证,甚至没有隐含的适销对路或适合某一特定目的的保证。 参见 GNU Affero通用公共许可证了解更多细节。 - - 您应该已经收到了一份GNU Affero通用公共许可证的副本。 如果没有,请参见。 - - 还要增加如何通过电子和纸质邮件与您联系的信息。 - -如果您的软件可以通过计算机网络与用户进行远程交互,您也应该确保它为用户提供一种获得其源代码的方法。例如,如果您的程序是一个网络应用程序,它的界面可以显示一个 "源代码" 的链接,引导用户进入代码的存档。您可以用很多方法提供源码,不同的解决方案对不同的程序会更好;具体要求见第13节。 - -如果有必要,您还应该让您的雇主(如果您是程序员)或学校(如果有的话)为该程序签署一份 "版权免责声明"。有关这方面的更多信息,以及如何申请和遵守GNU AGPL,请参见。 +--8<-- "LICENSE.zh.md" \ No newline at end of file diff --git a/docs/docs/about/privacy.md b/docs/docs/about/privacy.md index dcb695c4..06b40789 100644 --- a/docs/docs/about/privacy.md +++ b/docs/docs/about/privacy.md @@ -1,408 +1 @@ -# Privacy Notice - -This privacy notice for DanLing Team (also known as DanLing) ('we', 'us', or 'our'), describes how and why we might collect, store, use, and/or share ('process') your information when you use our services ('Services'), such as when you: - -- Visit our website at [multimolecule.danling.org](https://multimolecule.danling.org), or any website of ours that links to this privacy notice - -You can change your privacy settings at any time by clicking the button below: - -[Privacy Control](#__consent){ .md-button } - -**Questions or concerns?** -Reading this privacy notice will help you understand your privacy rights and choices. -If you do not agree with our policies and practices, please do not use our Services. -If you still have any questions or concerns, please contact us at [privacy@danling.org](mailto:privacy@danling.org). - -## 0. Summary of Key Points - -This summary provides key points from our privacy notice, but you can find out more details about any of these topics by clicking the link following each key point or by using our table of contents below to find the section you are looking for. - -
- -!!! question "What personal information do we process?" - - When you visit, use, or navigate our Services, we may process personal information depending on how you interact with us and the Services, the choices you make, and the products and features you use. - - [:octicons-arrow-right-24: What information do we collect?](#1-what-information-do-we-collect) - -!!! question "How do we process your information?" - - We process your information to provide, improve, and administer our Services, communicate with you, for security and fraud prevention, and to comply with law. - We may also process your information for other purposes with your consent. - We process your information only when we have a valid legal reason to do so. - - [:octicons-arrow-right-24: How do we process your information?](#2-how-do-we-process-your-information) - -!!! question "Do we process any sensitive personal information?" - - We do not process any sensitive personal information. - -!!! question "Do we collect any information from third parties?" - - We do not collect any information from third parties. - -!!! question "In what situations and with which parties do we share personal information?" - - We may share information in specific situations and with specific third parties. - - [:octicons-arrow-right-24: When and with whom we share your personal information?](#4-when-and-with-whom-do-we-share-your-personal-information) - -!!! question "How do we keep your information safe?" - - We have organisational and technical processes and procedures in place to protect your personal information. - - [:octicons-arrow-right-24: How do we keep your information safe?](#7-how-do-we-keep-your-information-safe) - -!!! question "What are your rights?" - - Depending on where you are located geographically, the applicable privacy law may mean you have certain rights regarding your personal information. - - [:octicons-arrow-right-24: What are your privacy rights?](#8-what-are-your-privacy-rights) - -!!! question "How do you exercise your rights?" - - The easiest way to exercise your rights is by contacting the relevant data protection authority in your jurisdiction. - - [:octicons-arrow-right-24: How to exercise your rights](#how-to-exercise-your-rights) - -
- -## 1. What information do we collect? - -### Personal information you disclose to us - -!!! tip "In Short" - - We collect personal information that you provide to us. - -We collect personal information that you voluntarily provide to us when you express an interest in obtaining information about us or our products and Services, when you participate in activities on the Services, or otherwise when you contact us. - -!!! danger "Sensitive Personal Information" - - We do not collect any sensitive personal information from you. - -### Information automatically collected - -!!! tip "In Short" - - Some information — such as IP address and/or browser and device characteristics — is collected automatically when you visit our Services. - -We automatically collect certain information when you visit, use, or navigate our Services. -This information does not reveal your specific identity (like your name or contact information) but may include device and usage information, such as your IP address, browser and device characteristics, operating system, language preferences, referring URLs, device name, country, location, information about how and when you use our Services, and other technical information. -This information is primarily needed to maintain the security and operation of our Services, and for our internal analytics and reporting purposes. - -Like many businesses, we also collect information through cookies and similar technologies. - -The information we collect includes: - -- **Identifiers.** - Identifier is a device and browser-specific unique random string that we generate when you use our Service. - This identifier is stored in a cookie on your device, allowing us to identify you across multiple sessions and when you return to our Service. - Note that this identifier does not contain any personal information about you, and is device and browser-specific, meaning that it cannot be used to track you across multiple devices or browsers. - You can delete this cookie at any time by clearing your browser's cache. -- **Log and Usage Data.** - Log and usage data is service-related, diagnostic, usage, and performance information our servers automatically collect when you access or use our Services and which we record in log files. - Depending on how you interact with us, this log data may include your IP address, device information, browser type, and settings, and information about your activity in the Services (such as the date/time stamps associated with your usage, pages and files viewed, searches and other actions you take such as which features you use), device event information (such as system activity, error reports (sometimes called 'crash dumps') and hardware settings). -- **Device Data.** - We collect device data such as information about your computer, phone, tablet, or other devices you use to access the Services. - Depending on the device used, this device data may include information such as your IP address (or proxy server), device and application identification numbers, location, browser type, hardware model, Internet Service Provider and/or mobile carrier, operating system, and system configuration information. -- **Location Data.** - We collect location data such as information about your device's location, which can be either precise or imprecise. - How much information we collect depends on the type and settings of the device you use to access the Services. - For example, we may use GPS and other technologies to collect geolocation data that tells us your current location (based on your IP address). - You can opt out of allowing us to collect this information either by refusing access to the information or by disabling your location settings on your device. - -### Categories of Personal Information We Collect - -We have collected the following categories of personal information in the past twelve (12) months: - -| Category | Examples | Collected | -| ----------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | -| A. Identifiers | Contact details, such as real name, alias, postal address, telephone or mobile contact number, unique personal identifier, online identifier, Internet Protocol address, email address, and account name | YES | -| B. Personal information as defined in the California Customer Records statute | Name, contact information, education, employment, employment history, and financial information | NO | -| C. Protected classification characteristics under state or federal law | Gender, age, date of birth, race and ethnicity, national origin, marital status, and other demographic data | NO | -| D. Commercial information | Transaction information, purchase history, financial details, and payment information | NO | -| E. Biometric information | Fingerprints and voiceprints | NO | -| F. Internet or other similar network activity | Browsing history, search history, online behaviour, interest data, and interactions with our and other websites, applications, systems, and advertisements | YES | -| G. Geolocation data | Device location | YES | -| H. Audio, electronic, sensory, or similar information | Images and audio, video or call recordings created in connection with our business activities | NO | -| I. Professional or employment-related information | Business contact details in order to provide you our Services at a business level or job title, work history, and professional qualifications if you apply for a job with us | NO | -| J. Education Information | Student records and directory information | NO | -| K. Inferences drawn from collected personal information | Inferences drawn from any of the collected personal information listed above to create a profile or summary about, for example, an individual’s preferences and characteristics | YES | -| L. Sensitive personal Information | | NO | - -We may also collect other personal information outside of these categories through instances where you interact with us in person, online, or by phone or mail in the context of: - -- Receiving help through our customer support channels; -- Participation in customer surveys or contests; and -- Facilitation in the delivery of our Services and to respond to your inquiries. - -We will use and retain the collected personal information as needed to provide you with our Services and as necessary to comply with our legal obligations, resolve disputes, and enforce our agreement for the following period: - -- Category A: 24 months -- Category F: 24 months -- Category G: 24 months -- Category K: 24 months - -## 2. How do we process your information? - -!!! tip "In Short" - - We process your information to provide, improve, and administer our Services, communicate with you, for security and fraud prevention, and to comply with law. - We may also process your information for other purposes with your consent. - -We process your personal information for a variety of reasons, depending on how you interact with our Services, including: - -- **To protect our Services.** - We may process your information as part of our efforts to keep our Services safe and secure, including fraud monitoring and prevention. -- **To identify user trends.** - We may process information about how you use our Services to better understand how they are being used so we can improve them. -- **To save or protect an individual's vital interest.** - We may process your information when necessary to save or protect an individual’s vital interest, such as to prevent harm. - -## 3. What legal basis do we have for processing your information? - -!!! tip "In Short" - - We only process your personal information when we believe it is necessary and we have a valid legal reason (i.e. legal basis) to do so under applicable law, like with your consent, to comply with laws, to provide you with services to enter into or fulfil our contractual obligations, to protect your rights, or to fulfil our legitimate business interests. - -The General Data Protection Regulation (GDPR) and UK GDPR require us to explain the valid legal bases we rely on in order to process your personal information. -As such, we may rely on the following legal bases to process your personal information: - -- **Consent.** - We may process your personal information if you have given us specific consent to use your personal information for a specific purpose. - You have the right to withdraw your consent at any time. - Learn more about [withdrawing your consents](). -- **Legitimate Interests.** - We may process your information when we believe it is reasonably necessary to achieve our legitimate business interests and those interests do not outweigh your interests and fundamental rights and freedoms. - For example, we may process your personal information for some of the purposes described in order to: - - Analyse how our Services are used so we can improve them to engage and retain users - - Diagnose problems and/or prevent fraudulent activities -- **Legal Obligations.** - We may process your information where we believe it is necessary for compliance with our legal obligations, such as to cooperate with a law enforcement body or regulatory agency, exercise or defend our legal rights, or disclose your information as evidence in litigation in which we are involved. -- **Vital Interests.** - We may process your information where we believe it is necessary to protect your vital interests or the vital interests of a third party, such as situations involving potential threats to the safety of any person. - -!!! info "Consent to Processing in Canada" - - If you are located in Canada, we may be legally permitted under applicable law to process your information without your consent in some exceptional cases, including, for example: - - - If collection is clearly in the interests of an individual and consent cannot be obtained in a timely way - - For investigations and fraud detection and prevention - - For business transactions provided certain conditions are met - - If it is contained in a witness statement and the collection is necessary to assess, process, or settle an insurance claim - - For identifying injured, ill, or deceased persons and communicating with next of kin - - If we have reasonable grounds to believe an individual has been, is, or may be victim of financial abuse - - If it is reasonable to expect collection and use with consent would compromise the availability or the accuracy of the information and the collection is reasonable for purposes related to investigating a breach of an agreement or a contravention of the laws of Canada or a province - - If disclosure is required to comply with a subpoena, warrant, court order, or rules of the court relating to the production of records - - If it was produced by an individual in the course of their employment, business, or profession and the collection is consistent with the purposes for which the information was produced - - If the collection is solely for journalistic, artistic, or literary purposes - - If the information is publicly available and is specified by the regulations - -## 4. When and with whom do we share your personal information? - -!!! tip "In Short" - - We may share information in specific situations described in this section and/or with the following third parties. - -We may use your personal information for our business purposes, such as for undertaking internal research for technological development and demonstration. -This is not considered to be 'selling' of your personal information. - -**Vendors, Consultants, and Other Third-Party Service Providers.** -We may share your data with third-party vendors, service providers, contractors, or agents ('third parties') who perform services for us or on our behalf and require access to such information to do that work. -We have contracts in place with our third parties, which are designed to help safeguard your personal information. -This means that they cannot do anything with your personal information unless we have instructed them to do it. -They will also not share your personal information with any organisation apart from us. -They also commit to protect the data they hold on our behalf and to retain it for the period we instruct. - -The third parties we may share personal information with are as follows: - -- **Advertising, Direct Marketing, and Lead Generation** - - Google AdSense -- **Cloud Computing Services** - - Microsoft Azure - - Amazon Web Services (AWS) - - Google Cloud Platform (GCP) -- **Communications and Content Delivery Network (CDN) Services** - - Cloudflare -- **Content Optimisation** - - Google Site Search - - Google Fonts -- **Functionality and Infrastructure Optimisation** - - GitHub Pages -- **User Commenting and Forums** - - Disqus - - GitHub Issues - - GitHub Discussions -- **Web and Mobile Analytics** - - Google Analytics - -We also may need to share your personal information in the following situations: - -- **Business Transfers.** - We may share or transfer your information in connection with, or during negotiations of, any merger, sale of company assets, financing, or acquisition of all or a portion of our business to another company. - -!!! info "We have disclosed the following categories of personal information for a business purpose in the past twelve (12) months:" - - Nill - -!!! info "The categories of third parties to whom we sold personal information in the past twelve (12) months:" - - Nill - -!!! info "The categories of third parties to whom we shared personal information with in the past twelve (12) months:" - - - **Web and Mobile Analytics** - - Google Analytics - -## 5. Do we use cookies and other tracking technologies? - -!!! tip "In Short" - - We may use cookies and other tracking technologies to collect and store your information. - -We also permit third parties and service providers to use online tracking technologies on our Services for analytics and advertising, including to help manage and display advertisements, to tailor advertisements to your interests, or to send abandoned shopping cart reminders (depending on your communication preferences). -The third parties and service providers use their technology to provide advertising about products and services tailored to your interests which may appear either on our Services or on other websites. - -To the extent these online tracking technologies are deemed to be a 'sale'/'sharing' (which includes targeted advertising, as defined under the applicable laws) under applicable US state laws, you can opt out of these online tracking technologies by clicking the button on the top of this page or the button below: - -[Privacy Control](#__consent){ .md-button } - - - -### Google Analytics - -We may share your information with Google Analytics to track and analyse the use of the Services. -The Google Analytics Advertising Features that we may use include: - -- Remarketing with Google Analytics -- Google Display Network Impressions Reporting -- Google Analytics Demographics and Interests Reporting - -To opt out of being tracked by Google Analytics across the Services, visit [https://tools.google.com/dlpage/gaoptout](https://tools.google.com/dlpage/gaoptout). -You can opt out of Google Analytics Advertising Features through [Ads Settings](https://adssettings.google.com) and Ad Settings for mobile apps. - -Other opt out means include [http://optout.networkadvertising.org/](http://optout.networkadvertising.org/) and [http://www.networkadvertising.org/mobile-choice](http://www.networkadvertising.org/mobile-choice). - -For more information on the privacy practices of Google, please visit the [Google Privacy & Terms](https://policies.google.com/privacy). - -## 6. How long do we keep your information? - -!!! tip "In Short" - - We keep your information for as long as necessary to fulfil the purposes outlined in this privacy notice unless otherwise required by law. - -We will only keep your personal information for as long as it is necessary for the purposes set out in this privacy notice, unless a longer retention period is required or permitted by law (such as tax, accounting, or other legal requirements). - -When we have no ongoing legitimate business need to process your personal information, we will either delete or anonymise it, or, if this is not possible (for example, because your personal information has been stored in backup archives), then we will securely store your personal information and isolate it from any further processing until deletion is possible. - -## 7. How do we keep your information safe? - -!!! tip "In Short" - - We aim to protect your personal information through a system of organisational and technical security measures. - -We have implemented appropriate technical and organisational security measures designed to protect the security of any personal information we process. -However, despite our safeguards and efforts to secure your information, no electronic transmission over the internet or information storage technology can be guaranteed to be 100% secure, so we cannot promise or guarantee that hackers, cybercriminals, or other unauthorised third parties will not be able to defeat our security and improperly collect, access, steal, or modify your information. -Although we will do our best to protect your personal information, the transmission of personal information to and from our Services is at your own risk. -You should only access the Services within a secure environment. - -## 8. What are your privacy rights? - -!!! tip "In Short" - - We strive to protect your privacy rights and choices to the best possible extent under the law. - -You have rights under certain data protection laws. -However, these rights are not absolute, and in certain cases, we may decline your request as permitted by law. -These rights include: - -- **Right to know** - whether or not we are processing your personal data -- **Right to access** - your personal data -- **Right to correct** - inaccuracies in your personal data -- **Right to request** - the deletion of your personal data -- **Right to obtain a copy** - of the personal data you previously shared with us -- **Right to non-discrimination** - against you for exercising your rights -- **Right to opt-out** - - of the processing of your personal data if it is used for targeted advertising (or sharing as defined under applicable laws), the sale of personal data, or profiling in furtherance of decisions that produce legal or similarly significant effects ('profiling') concerning you - - of the collection of sensitive data and personal data collected through the operation of a voice or facial recognition feature -- **Right to obtain** - - a list of the categories of third parties to which we have disclosed personal data - - a list of specific third parties to which we have disclosed personal data -- **Right to limit** - use and disclosure of sensitive personal data - -### How to exercise your rights - -It is very unlikely that you will be able to exercise the above rights as we do not collect any identifiable personal data from you. - -We are unable to reply to and act on data subject access request as we do not save any identifiable information about you, and we will not be able to verify your identity. - -If you believe we are unlawfully processing your personal information, you can contact the relevant data protection regulator, state attorney general, or other competent authority in your jurisdiction. - -| Residency | Authority | -| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | -| European Economic Area | [Member State's data protection supervisory authority](https://edpb.europa.eu/about-edpb/board/members_en) | -| United Kingdom | [Information Commissioner's Office](https://ico.org.uk/make-a-complaint/) | -| Australia | [Office of the Australian Information Commissioner](https://www.oaic.gov.au/privacy/privacy-complaints/) | -| New Zealand | [Office of New Zealand Privacy Commissioner](https://www.privacy.org.nz/your-rights/making-a-complaint-to-the-privacy-commissioner/) | -| Canada | [Office of the Privacy Commissioner of Canada](https://www.priv.gc.ca/en/) | -| California of the United States | [California Privacy Protection Agency](https://cppa.ca.gov/webapplications/complaint/) | -| Switzerland | [Federal Data Protection and Information Commissioner](https://www.edoeb.admin.ch/edoeb/en/home/the-fdpic/contact.html) | -| South Africa | [Information Regulator](https://inforegulator.org.za/training/wp/complaints/) | - -#### Withdraw your consent - -If we are relying on your consent to process your personal information, which may be express and/or implied consent depending on the applicable law, you have the right to withdraw your consent at any time. -You can withdraw your consent at any time by clicking the button on the top of this page or the button below: - -[Privacy Control](#__consent){ .md-button } - -However, please note that this will not affect the lawfulness of the processing before its withdrawal nor, when applicable law allows, will it affect the processing of your personal information conducted in reliance on lawful processing grounds other than consent. - -#### Cookies and similar technologies - -Most web browsers are set to accept cookies by default. -If you prefer, you can usually choose to set your browser to remove or reject browser cookies. -Please note that if you choose to remove or reject cookies, this will NOT affect the availability and functionality of our Services. - -## 9. Controls for Do-Not-Track features - -Most web browsers and some mobile operating systems and mobile applications include a Do-Not-Track ('DNT') feature or setting you can activate to signal your privacy preference not to have data about your online browsing activities monitored and collected. -At this stage, no uniform technology standard for recognising and implementing DNT signals has been finalised. -Although we cannot promise to honour every DNT signal, we strive to honour all such requests where technically feasible. - -California law requires us to let you know how we respond to web browser DNT signals. -Because we cannot guarantee to recognise and houour all DNT signals, we do not respond to them at this time. - -## 10. Do residents in certain jurisdiction have specific privacy rights? - -NO. - -All men and women are created equal. - -We provide the same privacy rights to all individuals, regardless of their location. - -Be assured that we will treat you with the same respect and dignity as we would want to be treated. - -## 11. How can you review, update, or delete the data we collect from you? - -It is very unlikely that you will be able to review, update, or delete the data we collect from you as we do not collect any identifiable personal data from you, and we will not be able to identify which data belongs to you. - -## 12. Do we make updates to this notice? - -!!! tip "In Short" - - Yes, we will update this notice as necessary to stay compliant with relevant laws. - -We may update this privacy notice from time to time. -The updated version will be indicated by an updated 'Last Revised Time' at the bottom of this privacy notice. -If we make any material changes, we will notify you by posting the new privacy notice on this page. -We are unable to notify you directly as we do not collect any contact information from you. -We encourage you to review this privacy notice frequently to stay informed of how we are protecting your information. +--8<-- "privacy.md" \ No newline at end of file diff --git a/docs/docs/about/privacy.zh.md b/docs/docs/about/privacy.zh.md index 4e816720..adb5edd9 100644 --- a/docs/docs/about/privacy.zh.md +++ b/docs/docs/about/privacy.zh.md @@ -1,419 +1 @@ -!!! warning "翻译" - - 本文内容为机器翻译版本,旨在为用户提供方便。 - 我们已经尽力确保翻译的准确性。 - 但请注意,翻译内容可能包含错误,仅供参考。 - 请以英文[原文](https://multimolecule.danling.org/about/privacy)为准。 - - 为满足合规性与执法要求,翻译文档中的任何不准确或歧义之处均不具有约束力,也不具备法律效力。 - -# 隐私声明 - -本隐私声明适用于丹灵团队(也被称作丹灵)(以下简称“我们”),描述了当您使用我们的服务(“服务”)时,我们如何以及为何可能收集、存储、使用和/或共享(“处理”)您的信息。例如当您: - -- 访问我们的网站 [multimolecule.danling.org](https://multimolecule.danling.org) 或任何链接到本隐私声明的我们的网站时 - -您可以随时通过点击下面的按钮更改您的隐私设置: - -[隐私控制](#__consent){ .md-button } - -**有问题或关注?** -阅读本隐私声明将帮助您了解您的隐私权利和选择。 -如果您不同意我们的声明和做法,请不要使用我们的服务。 -如果您仍有任何问题或关注,请通过[privacy@danling.org](mailto:privacy@danling.org)与我们联系。 - -## 0. 关键点总结 - -本总结提供了我们隐私声明的关键点,但您可以通过点击每个关键点后的链接或使用目录来找到您所查找的部分以了解更多详情。 - -
- -!!! question "我们处理哪些个人信息?" - - 当您访问、使用或导航我们的服务时,我们可能会根据您与我们以及服务的互动方式、您所做的选择以及您使用的产品和功能来处理个人信息。 - - [:octicons-arrow-right-24: 我们收集哪些信息?](#1) - -!!! question "我们如何处理您的信息?" - - 我们处理您的信息以提供、改善和管理我们的服务,与您沟通,进行安全和防欺诈,以及遵守法律。 - 我们也可能在得到您的同意的情况下,出于其他目的处理您的信息。 - 我们仅在有合法法律理由时处理您的信息。 - - [:octicons-arrow-right-24: 我们如何处理您的信息?](#2) - -!!! question "我们处理任何敏感个人信息吗?" - - 我们不处理任何敏感个人信息。 - -!!! question "我们从第三方收集信息吗?" - - 我们不从第三方收集任何信息。 - -!!! question "在哪些情况下以及与哪些方我们共享个人信息?" - - 我们可能在特定情况下与特定第三方共享信息。 - - [:octicons-arrow-right-24: 我们何时以及与谁共享您的个人信息?](#4) - -!!! question "我们如何保护您的信息安全?" - - 我们已经实施了组织和技术流程和程序来保护您的个人信息。 - - [:octicons-arrow-right-24: 我们如何保持您的信息安全?](#7) - -!!! question "您有哪些权利?" - - 根据您所在地理位置,适用的隐私法可能意味着您对您的个人信息有某些权利。 - - [:octicons-arrow-right-24: 您有哪些隐私权利?](#8) - -!!! question "您如何行使您的权利?" - - 行使您的权利的最简单方式是联系您的司法管辖区的相关数据保护监管机构。 - - [:octicons-arrow-right-24: 如何行使您的权利?](#_5) - -
- -## 1. 我们收集哪些信息? - -### 您向我们披露的个人信息 - -!!! tip "简而言之" - - 我们收集您向我们提供的个人信息。 - -我们收集您自愿向我们提供的个人信息,当您表达对我们或我们的产品和服务的兴趣、参与服务上的活动或以其他方式联系我们时。 - -!!! danger "敏感个人信息" - - 我们不从您那里收集任何敏感个人信息。 - -### 自动收集的信息 - -!!! tip "简而言之" - - 当您访问我们的服务时,某些信息——如IP地址和/或浏览器和设备特征——会自动被收集。 - -我们在您访问、使用或导航我们的服务时自动收集某些信息。 -这些信息不会透露您的特定身份(如您的姓名或联系信息),但可能包括设备和使用信息,如您的IP地址、浏览器和设备特性、操作系统、语言偏好、引用URL、设备名称、国家、位置、有关您如何以及何时使用我们的服务的信息,以及其他技术信息。 -这些信息主要是为了维护我们服务的安全性和运作所需,以及我们内部的分析和报告目的。 - -像许多企业一样,我们还通过cookies和类似技术收集信息。 - -我们收集的信息包括: - -- **标识符。** - 标识符是当您使用我们的服务时,我们生成的特定于设备和浏览器的唯一随机字符串。 - 该标识符存储在您设备上的一个cookie中,使我们能够在多个会话中以及您返回我们的服务时识别您。 - 注意这个标识符不包含任何关于您的个人信息,并且是特定于您的设备和浏览器的,这意味着它不能用于在不同设备或浏览器之间跟踪您。 - 您可以通过清除浏览器缓存随时删除此cookie。 -- **日志和使用数据。** - 日志和使用数据是与服务相关的、诊断、使用和性能信息,当您访问或使用我们的服务时我们的服务器会自动收集,并记录在日志文件中。 - 根据您与我们的互动方式,这些日志数据可能包括您的IP地址、设备信息、浏览器类型和设置,以及有关您在服务中的活动的信息(如与您使用相关的日期/时间戳、浏览和查看的页面和文件、搜索以及您采取的其他操作,例如您使用的功能),设备事件信息(如系统活动、错误报告(有时称为'崩溃转储')和硬件设置)。 -- **设备数据。** - 我们收集设备数据,如您用于访问服务的计算机、电话、平板或其他设备的信息。 - 根据所使用的设备,这些设备数据可能包括如下信息:您的IP地址(或代理服务器)、设备和应用程序识别号、位置、浏览器类型、硬件型号、互联网服务提供商和/或移动运营商、操作系统和系统配置信息。 -- **位置数据。** - 我们收集位置数据,如您设备的位置信息,这可以是精确的也可以是不精确的。 - 我们收集多少信息取决于您用于访问服务的设备的类型和设置。 - 例如,我们可能使用GPS和其他技术来收集地理位置数据,告诉我们您当前的位置(基于您的IP地址)。 - 您可以选择不让我们收集此信息,方法是拒绝访问信息或在您的设备上禁用位置设置。 - -### 我们收集的个人信息类别 - -过去十二(12)个月内,我们收集了以下类别的个人信息: - -| 类别 | 示例 | 已收集 | -|-------------------------|-----------------------------------------------------------------|-------| -| A. 标识符 | 联系方式,如真实姓名、别名、邮政地址、电话或移动联系号码、独特的个人标识符、在线标识符、互联网协议地址、电子邮件地址和帐户名称 | 是 | -| B. 加利福尼亚客户记录法中定义的个人信息 | 姓名、联系信息、教育、就业、就业历史和财务信息 | 否 | -| C. 州或联邦法律下的受保护分类特征 | 性别、年龄、出生日期、种族和民族、国籍、婚姻状况和其他人口统计数据 | 否 | -| D. 商业信息 | 交易信息、购买历史、财务详细信息和支付信息 | 否 | -| E. 生物识别信息 | 指纹和声纹 | 否 | -| F. 互联网或其他类似网络活动 | 浏览历史、搜索历史、在线行为、兴趣数据和与我们和其他网站、应用程序、系统和广告的互动 | 是 | -| G. 地理位置数据 | 设备位置 | 是 | -| H. 音频、电子、感觉或类似信息 | 在我们的业务活动中创建的图像和音频、视频或通话录音 | 否 | -| I. 与职业相关的信息 | 为了在业务层面提供我们的服务而收集的商业联系信息或职务名称、工作历史和职业资格 | 否 | -| J. 教育信息 | 学生记录和目录信息 | 否 | -| K. 从收集的个人信息中推断出的推论 | 从上述任何收集的个人信息中推断出的用于创建个人偏好和特征的概况或摘要 | 是 | -| L. 敏感个人信息 | | 否 | - -我们还可能在您与我们亲自、在线或通过电话或邮件与我们互动的情况下收集其他个人信息,包括: - -- 通过我们的客户支持渠道获得帮助; -- 参与客户调查或竞赛;以及 -- 促进我们服务的交付并回应您的查询。 - -我们将根据需要在以下期限内使用和保留所收集的个人信息,以便为您提供我们的服务,并根据需要遵守我们的法律义务、解决争议和执行我们的协议: - -- A 类:24个月 -- F 类:24个月 -- G 类:24个月 -- K 类:24个月 - -## 2. 我们如何处理您的信息? - -!!! tip "简而言之" - - 我们处理您的信息以提供、改善和管理我们的服务,与您沟通,进行安全和防欺诈,以及遵守法律。 - 我们也可能在得到您的同意的情况下,出于其他目的处理您的信息。 - -我们出于多种原因处理您的个人信息,这取决于您如何与我们的服务互动,包括: - -- **保护我们的服务。** - 我们可能会处理您的信息作为我们保持服务安全的努力的一部分,包括监控和预防欺诈。 -- **识别用户趋势。** - 我们可能会处理有关您如何使用我们的服务的信息,以更好地了解它们的使用情况,从而改进它们。 -- **保存或保护个人的重要利益。** - 我们可能会在必要时处理您的信息,以保存或保护个人的重要利益,例如为了防止伤害。 - -## 3. 我们处理您的信息有什么法律依据? - -!!! tip "简而言之" - - 我们只有在我们认为必要且有有效的法律理由(即法律依据)时才处理您的个人信息,如与您的同意、遵守法律、提供服务给您进入或履行我们的合同义务、保护您的权利或满足我们合法的业务利益。 - -欧盟通用数据保护条例(GDPR)和英国GDPR要求我们解释我们依靠的有效法律依据以处理您的个人信息。 -因此,我们可能依赖以下法律依据来处理您的个人信息: - -- **同意。** - 如果您已给我们明确同意使用您的个人信息用于某个特定目的,我们可能会处理您的个人信息。 - 您有权随时撤回您的同意。 - 了解更多关于[撤回您的同意]()。 -- **合法利益。** - 当我们认为出于我们合法的业务利益来处理您的信息是合理必要的,并且这些利益不超过您的利益和基本权利与自由时,我们可能会处理您的信息。 - 例如,我们可能会处理您的个人信息,用于: - - 分析我们的服务如何被使用,以便我们可以改进它们以吸引和保留用户 - - 诊断问题和/或预防欺诈活动 -- **法律义务。** - 我们可能会在我们认为必须遵守我们的法律义务的情况下处理您的信息,例如与执法机构或监管机构合作、行使或捍卫我们的法律权利,或在我们参与的诉讼中披露您的信息作为证据。 -- **重要利益。** - 我们可能会在我们认为必须保护您或第三方的重要利益的情况下处理您的信息,例如涉及潜在威胁任何人的安全的情况。 - -!!! info "在加拿大处理的同意" - - 如果您位于加拿大,我们可能在适用法律下在某些特殊情况下无需您的同意就可以合法地处理您的信息,包括例如: - - - 如果收集明显符合个人的利益且无法及时获得同意 - - 用于调查和欺诈检测与预防 - - 用于商业交易,前提是满足某些条件 - - 如果信息包含在证人声明中,且收集对于评估、处理或解决保险索赔是必要的 - - 用于识别受伤、生病或已故人士并与近亲沟通 - - 如果我们有合理的理由相信某个人已经、正在或可能成为金融滥用的受害者 - - 如果合理预期通过征得同意以收集和使用信息会损害信息的可用性或准确性,并且收集对于调查违反协议或违反加拿大或省法律的目的是合理的 - - 如果披露是为了遵守传票、搜查令、法院命令或与记录生产相关的法院规则 - - 如果信息是由个人在其就业、业务或专业过程中产生的,并且收集与信息产生的目的一致 - - 如果收集仅用于新闻、艺术或文学目的 - - 如果信息是公开可用的,并且通过规定指定 - -## 4. 我们何时以及与谁共享您的个人信息? - -!!! tip "简而言之" - - 我们可能在本节描述的特定情况下和/或与以下第三方共享信息。 - -我们可能会将您的个人信息用于我们的业务目的,如进行内部研究以进行技术开发和展示。 -这不被视为“出售”您的个人信息。 - -**供应商、顾问和其他第三方服务提供商。** -我们可能会与为我们服务或代表我们工作并需要访问此类信息以执行该工作的第三方供应商、服务提供商、承包商或代理(“第三方”)共享您的数据。 -我们与我们的第三方签订了合同,这些合同旨在帮助保护您的个人信息。 -这意味着他们不能在未经我们指示的情况下做任何事情。 -他们也不会与我们以外的任何组织共享您的个人信息。 -他们还承诺保护他们代表我们持有的数据并按照我们的指示保留该数据。 - -我们可能共享个人信息的第三方如下: - -- **广告、直销和潜在客户生成** - - Google AdSense -- **云计算服务** - - Microsoft Azure - - Amazon Web Services (AWS) - - Google Cloud Platform (GCP) -- **通信和内容交付网络 (CDN) 服务** - - Cloudflare -- **内容优化** - - Google站点搜索 - - Google字体 -- **功能和基础设施优化** - - GitHub页面 -- **用户评论和论坛** - - Disqus - - GitHub议题 - - GitHub讨论 -- **网络和移动分析** - - Google Analytics - -我们还可能需要在以下情况下共享您的个人信息: - -- **业务转移。** - 我们可能在进行任何并购、出售公司资产、融资或收购我们全部或部分业务的谈判中共享或转让您的信息。 - -!!! info "过去十二(12)个月我们出于业务目的披露了以下类别的个人信息:" - - 无 - -!!! info "过去十二(12)个月我们出售个人信息的第三方类别:" - - 无 - -!!! info "过去十二(12)个月我们与之共享个人信息的第三方类别:" - - - **网络和移动分析** - - Google Analytics - -## 5. 我们是否使用cookies和其他跟踪技术? - -!!! tip "简而言之" - - 我们可能使用cookies和其他跟踪技术来收集和存储您的信息。 - -我们还允许第三方和服务提供商在我们的服务中使用在线跟踪技术用于分析和广告,包括帮助管理和展示广告,根据您的兴趣定制广告,或发送遗弃购物车提醒(取决于您的沟通偏好)。 -这些第三方和服务提供商使用他们的技术为您提供定制的产品和服务广告,这些广告可能出现在我们的服务或其他网站上。 - -在适用的美国州法律下,这些在线跟踪技术被视为“销售”/“分享”(包括目标广告,根据适用法律定义)的程度,您可以通过点击本页顶部或下面的按钮来选择退出这些在线跟踪技术: - -[隐私控制](#__consent){ .md-button } - - - -### Google Analytics - -我们可能会与Google Analytics共享您的信息,以跟踪和分析服务的使用情况。 -我们可能使用的Google Analytics广告功能包括: - -- Google Analytics的再营销 -- Google Display Network印象报告 -- Google Analytics人口统计和兴趣报告 - -要选择退出在服务中通过Google Analytics跟踪您,请访问[https://tools.google.com/dlpage/gaoptout](https://tools.google.com/dlpage/gaoptout)。 - -您可以通过[广告设置](https://adssettings.google.com)和移动应用的广告设置来选择退出Google Analytics广告功能。 - -其他退出方式包括[http://optout.networkadvertising.org/](http://optout.networkadvertising.org/)和[http://www.networkadvertising.org/mobile-choice](http://www.networkadvertising.org/mobile-choice)。 - -有关Google隐私做法的更多信息,请访问[Google隐私与条款](https://policies.google.com/privacy)。 - -## 6. 我们保留您的信息多久? - -!!! tip "简而言之" - - 我们将根据本隐私声明中概述的目的保留您的信息,除非法律另有要求。 - -我们只会在本隐私声明中概述的目的所需的时间内保留您的个人信息,除非法律要求或允许更长的保留期(如税务、会计或其他法律要求)。 - -当我们没有持续的合法业务需要处理您的个人信息时,我们将删除或匿名化它,或者,如果这不可能(例如,因为您的个人信息已存储在备份档案中),那么我们将安全地存储您的个人信息并将其与任何进一步处理隔离,直到删除成为可能。 - -## 7. 我们如何保持您的信息安全? - -!!! tip "简而言之" - - 我们旨在通过一系列组织和技术安全措施保护您的个人信息。 - -我们已实施适当的技术和组织安全措施,旨在保护我们处理的任何个人信息的安全。 -然而,尽管我们采取了保障措施并努力确保您的信息安全,任何通过互联网的电子传输或信息存储技术都无法保证是100%安全的,因此我们无法承诺或保证黑客、网络犯罪分子或其他未经授权的第三方不会破坏我们的安全措施并不当地收集、访问、窃取或修改您的信息。 -尽管我们将尽最大努力保护您的个人信息,个人信息到我们服务的传输和从我们服务的传输仍然是您自己的风险。 -您应该只在安全的环境中访问服务。 - -## 8. 您有哪些隐私权利? - -!!! tip "简而言之" - - 我们努力在法律允许的最大范围内保护您的隐私权利和选择。 - -您在某些数据保护法下有权利。 -然而,这些权利不是绝对的,在某些情况下,我们可能会根据法律拒绝您的请求。 -这些权利包括: - -- **知情权** - 我们是否正在处理您的个人数据 -- **访问权** - 您的个人数据 -- **更正权** - 您的个人数据中的不准确信息 -- **请求删除权** - 您的个人数据 -- **获取副本权** - 您以前与我们共享的个人数据 -- **反歧视权** - 针对您行使您的权利 -- **选择退出权** - - 如果您的个人数据用于目标广告(或根据适用法律定义的“分享”),个人数据的销售,或促进对您产生法律或类似重大效果的决策(“分析”)的分析 - - 收集通过语音或面部识别功能操作收集的敏感数据和个人数据 -- **获取权** - - 向我们披露个人数据的第三方类别的列表 - - 向我们披露个人数据的特定第三方的列表 -- **限制使用和披露权** - 敏感个人数据 - -### 如何行使您的权利 - -您几乎不可能行使上述权利,因为我们不从您那里收集任何可识别的个人数据。 - -我们无法回复和采取数据主体访问请求,因为我们不保存任何可识别的关于您的信息,我们将无法验证您的身份。 - -如果您认为我们非法处理您的个人信息,您可以联系您所在管辖区的相关数据保护监管机构、州总检察长或其他有权机构。 - -| 居住地 | 机构 | -|-------------------|------------------------------------------------------------------------------------------------------| -| 欧洲经济区 | [成员国的数据保护监督机构](https://edpb.europa.eu/about-edpb/board/members_en) | -| 英国 | [信息专员办公室](https://ico.org.uk/make-a-complaint/) | -| 澳大利亚 | [澳大利亚信息专员办公室](https://www.oaic.gov.au/privacy/privacy-complaints/) | -| 新西兰 | [新西兰隐私专员办公室](https://www.privacy.org.nz/your-rights/making-a-complaint-to-the-privacy-commissioner/) | -| 加拿大 | [加拿大隐私专员办公室](https://www.priv.gc.ca/en/) | -| 美国加利福尼亚州 | [加利福尼亚隐私保护机构](https://cppa.ca.gov/webapplications/complaint/) | -| 瑞士 | [联邦数据保护和信息专员](https://www.edoeb.admin.ch/edoeb/en/home/the-fdpic/contact.html) | -| 南非 | [信息监管机构](https://inforegulator.org.za/training/wp/complaints/) | - - -#### 撤回您的同意 - -如果我们依赖您的同意来处理您的个人信息,这可能是明确的和/或暗示的同意,取决于适用法律,您有权随时撤回您的同意。 -您可以通过点击本页顶部或下面的按钮随时撤回您的同意: - -[隐私控制](#__consent){ .md-button } - -然而,请注意,这不会影响撤回之前的处理的合法性,也不会影响当适用法律允许时,基于除同意之外的合法处理理由进行的您的个人信息的处理。 - -#### Cookies和类似技术 - -大多数网络浏览器默认设置为接受cookies。 -如果您愿意,您通常可以选择设置您的浏览器以删除或拒绝浏览器cookies。 -请注意,如果您选择删除或拒绝cookies,这将不会影响我们服务的可用性和功能。 - -## 9. 不追踪功能的控制 - -大多数网络浏览器和一些移动操作系统和移动应用程序包括一个您可以激活的不追踪(“DNT”)功能或设置,以表达您的隐私偏好,不希望有关您的在线浏览活动的数据被监控和收集。 -到目前为止,还没有为识别和实施DNT信号制定统一的技术标准。 -虽然我们不能承诺尊重每一个DNT信号,我们力求尊重所有在技术上可行的此类请求。 - -加利福尼亚法律要求我们告诉您我们如何响应网络浏览器的DNT信号。 -由于我们不能保证识别和尊重所有DNT信号,我们目前不对它们做出响应。 - -## 10. 某些管辖区的居民是否有特定的隐私权利? - -否。 - -所有男女生而平等。 - -我们向所有个人提供相同的隐私权利,无论他们的位置如何。 - -请放心,我们将以我们希望被对待的相同的尊重和尊严对待您。 - -## 11. 您如何查看、更新或删除我们收集的数据? - -您几乎不可能查看、更新或删除我们收集的数据,因为我们不从您那里收集任何可识别的个人数据,也无法确定哪些数据是属于您的。 - -## 12. 我们是否会更新此声明? - -!!! tip "简而言之" - - 是的,我们将根据需要更新此声明以保持与相关法律的一致。 - -我们可能会不时更新此隐私声明。 -更新后的版本将通过更新本页面底部的“最后修订时间”来表示。 -如果我们进行任何重大更改,我们将通过在本页发布新的隐私声明来通知您。 -由于我们不收集您的任何联系信息,我们无法直接通知您。 -我们鼓励您经常查看本隐私声明,以了解我们如何保护您的信息。 +--8<-- "privacy.zh.md" \ No newline at end of file diff --git a/docs/docs/index.md b/docs/docs/index.md index 65af044e..a08d9e5c 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -7,4 +7,4 @@ date: 2022-05-04 # MultiMolecule ---8<-- "README.md:8:" +--8<-- "README.md" diff --git a/docs/docs/index.zh.md b/docs/docs/index.zh.md index 7794e456..884b7d1b 100644 --- a/docs/docs/index.zh.md +++ b/docs/docs/index.zh.md @@ -7,4 +7,4 @@ date: 2022-05-04 # MultiMolecule ---8<-- "README.zh.md:8:" +--8<-- "README.zh.md" diff --git a/multimolecule/__init__.py b/multimolecule/__init__.py index 240e9fcc..63f937c5 100644 --- a/multimolecule/__init__.py +++ b/multimolecule/__init__.py @@ -111,19 +111,16 @@ HeadConfig, HeadRegistry, HeadTransformRegistry, - HeadTransformRegistryHF, IdentityTransform, LinearTransform, MaskedLMHead, MaskedLMHeadConfig, NonLinearTransform, PositionEmbeddingRegistry, - PositionEmbeddingRegistryHF, PredictionHead, RotaryEmbedding, SequencePredictionHead, SinusoidalEmbedding, - TokenHeadRegistryHF, TokenKMerHead, TokenPredictionHead, ) @@ -132,9 +129,14 @@ from .utils import count_parameters __all__ = [ + "train", + "evaluate", + "infer", "modeling_auto", "modeling_outputs", "Dataset", + "MultiMoleculeConfig", + "MultiMoleculeRunner", "PreTrainedConfig", "HeadConfig", "BaseHeadConfig", @@ -233,21 +235,15 @@ "HeadRegistry", "PredictionHead", "SequencePredictionHead", - "TokenHeadRegistryHF", "TokenPredictionHead", "TokenKMerHead", - "NucleotideHeadRegistryHF", - "NucleotidePredictionHead", - "NucleotideKMerHead", "ContactPredictionHead", "MaskedLMHead", "HeadTransformRegistry", - "HeadTransformRegistryHF", "LinearTransform", "NonLinearTransform", "IdentityTransform", "PositionEmbeddingRegistry", - "PositionEmbeddingRegistryHF", "RotaryEmbedding", "SinusoidalEmbedding", "Criterion", diff --git a/multimolecule/data/__init__.py b/multimolecule/data/__init__.py index 62196c10..20d1bd5f 100644 --- a/multimolecule/data/__init__.py +++ b/multimolecule/data/__init__.py @@ -17,4 +17,7 @@ from .dataset import Dataset from .utils import no_collate -__all__ = ["Dataset", "no_collate"] +__all__ = [ + "Dataset", + "no_collate", +] diff --git a/multimolecule/data/dataset.py b/multimolecule/data/dataset.py index 54565349..bf422b91 100644 --- a/multimolecule/data/dataset.py +++ b/multimolecule/data/dataset.py @@ -80,10 +80,13 @@ class Dataset(datasets.Dataset): preprocess: Whether to preprocess the dataset. Preprocessing involves pre-tokenizing the sequences using the tokenizer. Defaults to `True`. - auto_rename_cols: Whether to automatically rename columns to standard names. - Only works when there is exactly one feature column / one label column. - You can control the naming through `multimolecule.defaults.SEQUENCE_COL_NAME` and - `multimolecule.defaults.LABEL_COL_NAME`. + auto_rename_sequence_col: Whether to automatically rename sequence columns to standard name. + Only works when there is exactly one sequence column + You can control the naming through `multimolecule.defaults.SEQUENCE_COL_NAME`. + For more refined control, use `column_names_map`. + auto_rename_label_cols: Whether to automatically rename label column to standard name. + Only works when there is exactly one label column. + You can control the naming through `multimolecule.defaults.LABEL_COL_NAME`. For more refined control, use `column_names_map`. column_names_map: A mapping of column names to new column names. This is useful for renaming columns to inputs that are expected by a model. @@ -122,7 +125,8 @@ class Dataset(datasets.Dataset): _discrete_map: Mapping preprocess: bool = True - auto_rename_cols: bool = False + auto_rename_sequence_col: bool = True + auto_rename_label_col: bool = False column_names_map: Mapping[str, str] | None = None ignored_cols: List[str] = [] @@ -136,7 +140,8 @@ def __init__( label_cols: List | None = None, id_cols: List | None = None, preprocess: bool | None = None, - auto_rename_cols: bool | None = None, + auto_rename_sequence_col: bool | None = None, + auto_rename_label_col: bool | None = None, column_names_map: Mapping[str, str] | None = None, truncation: bool | None = None, max_seq_length: int | None = None, @@ -149,8 +154,9 @@ def __init__( fingerprint: str | None = None, ignored_cols: List[str] | None = None, ): + self._tasks = NestedDict() if tasks is not None: - self._tasks = NestedDict(tasks) + self.tasks = tasks if discrete_map is not None: self._discrete_map = discrete_map arrow_table = self.build_table( @@ -166,10 +172,12 @@ def __init__( preprocess=preprocess, truncation=truncation, max_seq_length=max_seq_length, - auto_rename_cols=auto_rename_cols, + auto_rename_sequence_col=auto_rename_sequence_col, + auto_rename_label_col=auto_rename_label_col, column_names_map=column_names_map, ) self.ignored_cols = ignored_cols or self.id_cols + self.train = split == datasets.Split.TRAIN def build_table( self, @@ -187,13 +195,13 @@ def build_table( data = dl.load_pandas(data) if isinstance(data, DataFrame): data = data.loc[:, ~data.columns.str.contains("^Unnamed")] - data = pa.Table.from_pandas(data) + data = pa.Table.from_pandas(data, preserve_index=False) elif isinstance(data, dict): data = pa.Table.from_pydict(data) elif isinstance(data, list): data = pa.Table.from_pylist(data) elif isinstance(data, DataFrame): - data = pa.Table.from_pandas(data) + data = pa.Table.from_pandas(data, preserve_index=False) if feature_cols is not None and label_cols is not None: data = data.select(feature_cols + label_cols) data = self.process_nan(data, nan_process=nan_process, fill_value=fill_value) @@ -206,7 +214,8 @@ def post( max_seq_length: int | None = None, truncation: bool | None = None, preprocess: bool | None = None, - auto_rename_cols: bool | None = None, + auto_rename_sequence_col: bool | None = None, + auto_rename_label_col: bool | None = None, column_names_map: Mapping[str, str] | None = None, ) -> None: r""" @@ -214,7 +223,8 @@ def post( It first identifies the special columns (sequence and structure columns) in the dataset. Then it sets the feature and label columns based on the input arguments. - If `auto_rename_cols` is `True`, it will automatically rename the columns to model inputs. + If `auto_rename_sequence_col` is `True`, it will automatically rename the sequence column. + If `auto_rename_label_col` is `True`, it will automatically rename the label column. Finally, it sets the [`transform`][datasets.Dataset.set_transform] function based on the `preprocess` flag. """ if tokenizer is None: @@ -237,19 +247,24 @@ def post( self.seq_length_offset += 1 if preprocess is not None: self.preprocess = preprocess - if auto_rename_cols is not None: - self.auto_rename_cols = auto_rename_cols - if self.auto_rename_cols: - if column_names_map is not None: - raise ValueError("auto_rename_cols and column_names_map are mutually exclusive.") + if auto_rename_sequence_col is not None: + self.auto_rename_sequence_col = auto_rename_sequence_col + if auto_rename_label_col is not None: + self.auto_rename_label_col = auto_rename_label_col + if column_names_map is None: column_names_map = {} - if len(self.feature_cols) == 1: - column_names_map[self.feature_cols[0]] = defaults.SEQUENCE_COL_NAME - if len(self.label_cols) == 1: - column_names_map[self.label_cols[0]] = defaults.LABEL_COL_NAME + if self.auto_rename_sequence_col: + if len(self.sequence_cols) != 1: + raise ValueError("auto_rename_sequence_col can only be used when there is exactly one sequence column.") + column_names_map[self.sequence_cols[0]] = defaults.SEQUENCE_COL_NAME # type: ignore[index] + if self.auto_rename_label_col: + if len(self.label_cols) != 1: + raise ValueError("auto_rename_label_col can only be used when there is exactly one label column.") + column_names_map[self.label_cols[0]] = defaults.LABEL_COL_NAME # type: ignore[index] self.column_names_map = column_names_map if self.column_names_map: self.rename_columns(self.column_names_map) + self.infer_tasks() if self.preprocess: self.update(self.map(self.tokenization)) @@ -258,7 +273,7 @@ def post( if self.discrete_map: self.update(self.map(self.map_discrete)) fn_kwargs = { - "columns": [name for name, task in self.tasks.items() if task.level in ["nucleotide", "contact"]], + "columns": [name for name, task in self.tasks.items() if task.level in ["token", "contact"]], "max_seq_length": self.max_seq_length - self.seq_length_offset, } if self.truncation and 0 < self.max_seq_length < 2**32: @@ -297,20 +312,23 @@ def collate(self, col: str, data: Any) -> Tensor | NestedTensor | None: except ValueError: return NestedTensor(data) - def infer_tasks(self, tasks: Mapping | None = None, sequence_col: str | None = None) -> NestedDict: - self._tasks = tasks or NestedDict() + def infer_tasks(self, sequence_col: str | None = None) -> NestedDict: for col in self.label_cols: - if col not in self.tasks: - if col in self.secondary_structure_cols: - task = Task(TaskType.Binary, level=TaskLevel.Contact, num_labels=1) - self._tasks[col] = task # type: ignore[index] - warn( - f"Secondary structure columns are assumed to be {task}." - " Please explicitly specify the task if this is not the case." - ) - else: - self._tasks[col] = self.infer_task(col, sequence_col) # type: ignore[index] - return self._tasks + if col in self.tasks: + continue + if col in self.secondary_structure_cols: + task = Task(TaskType.Binary, level=TaskLevel.Contact, num_labels=1) + self.tasks[col] = task # type: ignore[index] + warn( + f"Secondary structure columns are assumed to be {task}. " + "Please explicitly specify the task if this is not the case." + ) + else: + try: + self.tasks[col] = self.infer_task(col, sequence_col) # type: ignore[index] + except ValueError: + raise ValueError(f"Unable to infer task for column {col}.") + return self.tasks def infer_task(self, label_col: str, sequence_col: str | None = None) -> Task: if sequence_col is None: @@ -346,8 +364,8 @@ def identify_special_cols( all_cols = self.data.column_names self._id_cols = id_cols or [i for i in all_cols if i in defaults.ID_COL_NAMES] - string_cols = [k for k, v in self.features.items() if k not in self.id_cols and v.dtype == "string"] - self._sequence_cols = [i for i in string_cols if i in defaults.SEQUENCE_COL_NAMES] + string_cols: list[str] = [k for k, v in self.features.items() if k not in self.id_cols and v.dtype == "string"] + self._sequence_cols = [i for i in string_cols if i.lower() in defaults.SEQUENCE_COL_NAMES] self._secondary_structure_cols = [i for i in string_cols if i in defaults.SECONDARY_STRUCTURE_COL_NAMES] data_cols = [i for i in all_cols if i not in self.id_cols] @@ -404,7 +422,7 @@ def rename_columns(self, column_mapping: Mapping[str, str], new_fingerprint: str self._label_cols = [column_mapping.get(i, i) for i in self.label_cols] self._sequence_cols = [column_mapping.get(i, i) for i in self.sequence_cols] self._secondary_structure_cols = [column_mapping.get(i, i) for i in self.secondary_structure_cols] - self._tasks = {column_mapping.get(k, k): v for k, v in self.tasks.items()} + self.tasks = {column_mapping.get(k, k): v for k, v in self.tasks.items()} return self def rename_column( @@ -418,7 +436,7 @@ def rename_column( self._secondary_structure_cols = [ new_column_name if i == original_column_name else i for i in self.secondary_structure_cols ] - self._tasks = {new_column_name if k == original_column_name else k: v for k, v in self.tasks.items()} + self.tasks = {new_column_name if k == original_column_name else k: v for k, v in self.tasks.items()} return self def process_nan(self, data: Table, nan_process: str | None, fill_value: str | int | float = 0) -> Table: @@ -441,7 +459,7 @@ def process_nan(self, data: Table, nan_process: str | None, fill_value: str | in data = data.fillna(fill_value) else: raise ValueError(f"Invalid nan_process: {nan_process}") - return pa.Table.from_pandas(data) + return pa.Table.from_pandas(data, preserve_index=False) @property def id_cols(self) -> List: @@ -470,9 +488,18 @@ def secondary_structure_cols(self) -> List: @property def tasks(self) -> NestedDict: if not hasattr(self, "_tasks"): + self._tasks = NestedDict() return self.infer_tasks() return self._tasks + @tasks.setter + def tasks(self, tasks: Mapping): + self._tasks = NestedDict() + for name, task in tasks.items(): + if not isinstance(task, Task): + task = Task(**task) + self._tasks[name] = task + @property def discrete_map(self) -> Mapping: if not hasattr(self, "_discrete_map"): diff --git a/multimolecule/data/utils.py b/multimolecule/data/utils.py index 85bc4423..1afddd1b 100644 --- a/multimolecule/data/utils.py +++ b/multimolecule/data/utils.py @@ -60,7 +60,7 @@ def infer_task( level = TaskLevel.Contact num_labels = len(flattened) // num_contacts elif len(flattened) % num_tokens == 0: - level = TaskLevel.Nucleotide + level = TaskLevel.Token num_labels = len(flattened) // num_tokens elif len(flattened) % num_elem == 0: level = TaskLevel.Sequence @@ -86,7 +86,7 @@ def infer_task( task_type = TaskType.MultiClass if num_labels > 2 else TaskType.Binary num_labels = 1 if task_type == TaskType.Binary else num_labels if num_tokens_flattened == num_tokens: - return Task(task_type, level=TaskLevel.Nucleotide, num_labels=num_labels) + return Task(task_type, level=TaskLevel.Token, num_labels=num_labels) if num_contacts_flattened == num_contacts: return Task(task_type, level=TaskLevel.Contact, num_labels=num_labels) return Task(task_type, level=TaskLevel.Sequence, num_labels=num_labels) @@ -122,7 +122,7 @@ def map_value(value: Any, mapping: dict[str, int] | None) -> Any: def truncate_value(value: Any, max_seq_length: int, level: int | None = None) -> Any: - if level == TaskLevel.Nucleotide: + if level == TaskLevel.Token: return value[:max_seq_length] if level == TaskLevel.Contact: return [i[:max_seq_length] for i in value[:max_seq_length]] diff --git a/multimolecule/defaults.py b/multimolecule/defaults.py index c299ea1a..a908bbdb 100644 --- a/multimolecule/defaults.py +++ b/multimolecule/defaults.py @@ -14,11 +14,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +TRAIN_SPLITS = ("train",) +VALIDATION_SPLITS = ("val", "validation") +TEST_SPLITS = ("test", "eval", "evaluation") +INFERENCE_SPLITS = ("inf", "inference") +DATASET_SPLITS = TRAIN_SPLITS + VALIDATION_SPLITS + TEST_SPLITS + INFERENCE_SPLITS ID_COL_NAMES = ["id", "idx", "index"] SEQUENCE_COL_NAMES = ["input_ids", "sequence", "seq"] SECONDARY_STRUCTURE_COL_NAMES = ["secondary_structure", "ss"] LABEL_COL_NAMES = ["label", "labels"] -SEQUENCE_COL_NAME = "input_ids" +SEQUENCE_COL_NAME = "sequence" LABEL_COL_NAME = "labels" LABLE_TYPE_THRESHOLD = 0.5 TASK_INFERENCE_NUM_ROWS = 100 diff --git a/multimolecule/models/__init__.py b/multimolecule/models/__init__.py index 66147616..29d99436 100644 --- a/multimolecule/models/__init__.py +++ b/multimolecule/models/__init__.py @@ -14,6 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from multimolecule.module import HeadConfig from multimolecule.tokenisers import DnaTokenizer, ProteinTokenizer, RnaTokenizer from .calm import ( @@ -127,6 +128,7 @@ __all__ = [ "PreTrainedConfig", + "HeadConfig", "DnaTokenizer", "RnaTokenizer", "ProteinTokenizer", diff --git a/multimolecule/models/calm/configuration_calm.py b/multimolecule/models/calm/configuration_calm.py index c5d73c03..032bda8e 100644 --- a/multimolecule/models/calm/configuration_calm.py +++ b/multimolecule/models/calm/configuration_calm.py @@ -127,5 +127,5 @@ def __init__( self.use_cache = use_cache self.emb_layer_norm_before = emb_layer_norm_before self.token_dropout = token_dropout - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/calm/modeling_calm.py b/multimolecule/models/calm/modeling_calm.py index 25c1eba4..c8abdffe 100644 --- a/multimolecule/models/calm/modeling_calm.py +++ b/multimolecule/models/calm/modeling_calm.py @@ -270,9 +270,9 @@ class CaLmForSequencePrediction(CaLmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: CaLmConfig): @@ -334,9 +334,9 @@ class CaLmForTokenPrediction(CaLmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: CaLmConfig): @@ -398,9 +398,9 @@ class CaLmForContactPrediction(CaLmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: CaLmConfig): diff --git a/multimolecule/models/configuration_utils.py b/multimolecule/models/configuration_utils.py index 2047d671..ce6f10ea 100644 --- a/multimolecule/models/configuration_utils.py +++ b/multimolecule/models/configuration_utils.py @@ -30,7 +30,8 @@ class PreTrainedConfig(PretrainedConfig): Base class for all model configuration classes. """ - head: HeadConfig + head: HeadConfig | None + num_labels: int = 1 hidden_size: int @@ -42,7 +43,15 @@ class PreTrainedConfig(PretrainedConfig): null_token_id: int = 5 def __init__( - self, pad_token_id=0, bos_token_id=1, eos_token_id=2, unk_token_id=3, mask_token_id=4, null_token_id=5, **kwargs + self, + pad_token_id: int = 0, + bos_token_id: int = 1, + eos_token_id: int = 2, + unk_token_id: int = 3, + mask_token_id: int = 4, + null_token_id: int = 5, + num_labels: int = 1, + **kwargs, ): super().__init__( pad_token_id=pad_token_id, @@ -51,6 +60,7 @@ def __init__( unk_token_id=unk_token_id, mask_token_id=mask_token_id, null_token_id=null_token_id, + num_labels=num_labels, **kwargs, ) diff --git a/multimolecule/models/ernierna/configuration_ernierna.py b/multimolecule/models/ernierna/configuration_ernierna.py index 0648bb2d..bfd11d51 100644 --- a/multimolecule/models/ernierna/configuration_ernierna.py +++ b/multimolecule/models/ernierna/configuration_ernierna.py @@ -110,5 +110,5 @@ def __init__( self.pairwise_alpha = pairwise_alpha self.is_decoder = is_decoder self.use_cache = use_cache - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/ernierna/modeling_ernierna.py b/multimolecule/models/ernierna/modeling_ernierna.py index 6354a68c..00f17416 100644 --- a/multimolecule/models/ernierna/modeling_ernierna.py +++ b/multimolecule/models/ernierna/modeling_ernierna.py @@ -321,7 +321,7 @@ class ErnieRnaForSequencePrediction(ErnieRnaPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) """ def __init__(self, config: ErnieRnaConfig): @@ -385,9 +385,9 @@ class ErnieRnaForTokenPrediction(ErnieRnaPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: ErnieRnaConfig): @@ -452,9 +452,9 @@ class ErnieRnaForContactPrediction(ErnieRnaPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: ErnieRnaConfig): @@ -1183,17 +1183,17 @@ class ErnieRnaContactClassificationHead(nn.Module): def __init__(self, config: ErnieRnaConfig, head_config: HeadConfig | None = None): super().__init__() if head_config is None: - head_config = config.head + head_config = config.head or HeadConfig() self.config = head_config - self.bos_token_id = config.bos_token_id - self.eos_token_id = config.eos_token_id - self.pad_token_id = config.pad_token_id self.conv1 = nn.Conv2d(1, 8, 7, 1, 3) self.relu = nn.ReLU(inplace=True) self.dropout = nn.Dropout(p=0.3) self.conv2 = nn.Conv2d(8, 63, 7, 1, 3) self.resnet = ErnieRnaResNet() self.criterion = Criterion(self.config) + self.bos_token_id = config.bos_token_id + self.eos_token_id = config.eos_token_id + self.pad_token_id = config.pad_token_id def forward( # type: ignore[override] # pylint: disable=arguments-renamed self, diff --git a/multimolecule/models/rinalmo/configuration_rinalmo.py b/multimolecule/models/rinalmo/configuration_rinalmo.py index 5e21725d..1cc963b2 100644 --- a/multimolecule/models/rinalmo/configuration_rinalmo.py +++ b/multimolecule/models/rinalmo/configuration_rinalmo.py @@ -125,6 +125,6 @@ def __init__( self.use_cache = use_cache self.learnable_beta = learnable_beta self.token_dropout = token_dropout - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None self.emb_layer_norm_before = emb_layer_norm_before diff --git a/multimolecule/models/rinalmo/modeling_rinalmo.py b/multimolecule/models/rinalmo/modeling_rinalmo.py index b45d2823..d0ac6e8c 100644 --- a/multimolecule/models/rinalmo/modeling_rinalmo.py +++ b/multimolecule/models/rinalmo/modeling_rinalmo.py @@ -269,9 +269,9 @@ class RiNALMoForSequencePrediction(RiNALMoPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RiNALMoConfig): @@ -333,9 +333,9 @@ class RiNALMoForTokenPrediction(RiNALMoPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RiNALMoConfig): @@ -397,9 +397,9 @@ class RiNALMoForContactPrediction(RiNALMoPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RiNALMoConfig): diff --git a/multimolecule/models/rnabert/configuration_rnabert.py b/multimolecule/models/rnabert/configuration_rnabert.py index f044ecc7..97632d2e 100644 --- a/multimolecule/models/rnabert/configuration_rnabert.py +++ b/multimolecule/models/rnabert/configuration_rnabert.py @@ -112,5 +112,5 @@ def __init__( self.position_embedding_type = position_embedding_type self.is_decoder = is_decoder self.use_cache = use_cache - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/rnabert/modeling_rnabert.py b/multimolecule/models/rnabert/modeling_rnabert.py index 74b06cf1..32f7bf01 100644 --- a/multimolecule/models/rnabert/modeling_rnabert.py +++ b/multimolecule/models/rnabert/modeling_rnabert.py @@ -37,7 +37,13 @@ from transformers.pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer from transformers.utils import logging -from multimolecule.module import ContactPredictionHead, MaskedLMHead, SequencePredictionHead, TokenPredictionHead +from multimolecule.module import ( + ContactPredictionHead, + HeadConfig, + MaskedLMHead, + SequencePredictionHead, + TokenPredictionHead, +) from ..modeling_outputs import ContactPredictorOutput, SequencePredictorOutput, TokenPredictorOutput from .configuration_rnabert import RnaBertConfig @@ -266,9 +272,9 @@ class RnaBertForSequencePrediction(RnaBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaBertConfig): @@ -330,9 +336,9 @@ class RnaBertForTokenPrediction(RnaBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaBertConfig): @@ -394,9 +400,9 @@ class RnaBertForContactPrediction(RnaBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaBertConfig): @@ -1065,7 +1071,7 @@ def __init__(self, config: RnaBertConfig): vocab_size, config.vocab_size = config.vocab_size, config.ss_vocab_size self.predictions_ss = MaskedLMHead(config) config.vocab_size = vocab_size - self.seq_relationship = SequencePredictionHead(config) + self.seq_relationship = SequencePredictionHead(config, HeadConfig(num_labels=2)) def forward( self, diff --git a/multimolecule/models/rnaernie/configuration_rnaernie.py b/multimolecule/models/rnaernie/configuration_rnaernie.py index 2d540c9d..7a788297 100644 --- a/multimolecule/models/rnaernie/configuration_rnaernie.py +++ b/multimolecule/models/rnaernie/configuration_rnaernie.py @@ -108,5 +108,5 @@ def __init__( self.position_embedding_type = position_embedding_type self.is_decoder = is_decoder self.use_cache = use_cache - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/rnaernie/modeling_rnaernie.py b/multimolecule/models/rnaernie/modeling_rnaernie.py index 7e0f4d10..8107ee20 100644 --- a/multimolecule/models/rnaernie/modeling_rnaernie.py +++ b/multimolecule/models/rnaernie/modeling_rnaernie.py @@ -270,9 +270,9 @@ class RnaErnieForSequencePrediction(RnaErniePreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config): @@ -334,9 +334,9 @@ class RnaErnieForTokenPrediction(RnaErniePreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaErnieConfig): @@ -398,9 +398,9 @@ class RnaErnieForContactPrediction(RnaErniePreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaErnieConfig): diff --git a/multimolecule/models/rnafm/configuration_rnafm.py b/multimolecule/models/rnafm/configuration_rnafm.py index 8fdb7f49..ef1f0c18 100644 --- a/multimolecule/models/rnafm/configuration_rnafm.py +++ b/multimolecule/models/rnafm/configuration_rnafm.py @@ -131,5 +131,5 @@ def __init__( self.use_cache = use_cache self.emb_layer_norm_before = emb_layer_norm_before self.token_dropout = token_dropout - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/rnafm/modeling_rnafm.py b/multimolecule/models/rnafm/modeling_rnafm.py index 99f553da..6898da9c 100644 --- a/multimolecule/models/rnafm/modeling_rnafm.py +++ b/multimolecule/models/rnafm/modeling_rnafm.py @@ -272,9 +272,9 @@ class RnaFmForSequencePrediction(RnaFmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaFmConfig): @@ -336,9 +336,9 @@ class RnaFmForTokenPrediction(RnaFmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaFmConfig): @@ -400,9 +400,9 @@ class RnaFmForContactPrediction(RnaFmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaFmConfig): @@ -555,7 +555,7 @@ class RnaFmForPreTraining(RnaFmPreTrainedModel): >>> output["logits"].shape torch.Size([1, 7, 26]) >>> output["contact_map"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) """ _tied_weights_keys = [ diff --git a/multimolecule/models/rnamsm/configuration_rnamsm.py b/multimolecule/models/rnamsm/configuration_rnamsm.py index ae914c82..2e8150ba 100644 --- a/multimolecule/models/rnamsm/configuration_rnamsm.py +++ b/multimolecule/models/rnamsm/configuration_rnamsm.py @@ -116,5 +116,5 @@ def __init__( self.attention_type = attention_type self.embed_positions_msa = embed_positions_msa self.attention_bias = attention_bias - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/rnamsm/modeling_rnamsm.py b/multimolecule/models/rnamsm/modeling_rnamsm.py index 5ed6bf87..0390a129 100644 --- a/multimolecule/models/rnamsm/modeling_rnamsm.py +++ b/multimolecule/models/rnamsm/modeling_rnamsm.py @@ -176,9 +176,9 @@ class RnaMsmForSequencePrediction(RnaMsmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaMsmConfig): @@ -239,9 +239,9 @@ class RnaMsmForTokenPrediction(RnaMsmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaMsmConfig): @@ -302,9 +302,9 @@ class RnaMsmForContactPrediction(RnaMsmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: RnaMsmConfig): @@ -449,7 +449,7 @@ class RnaMsmForPreTraining(RnaMsmPreTrainedModel): >>> output["logits"].shape torch.Size([1, 7, 26]) >>> output["contact_map"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) """ _tied_weights_keys = [ diff --git a/multimolecule/models/splicebert/configuration_splicebert.py b/multimolecule/models/splicebert/configuration_splicebert.py index 66b46a88..f789516d 100644 --- a/multimolecule/models/splicebert/configuration_splicebert.py +++ b/multimolecule/models/splicebert/configuration_splicebert.py @@ -108,5 +108,5 @@ def __init__( self.position_embedding_type = position_embedding_type self.is_decoder = is_decoder self.use_cache = use_cache - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/splicebert/modeling_splicebert.py b/multimolecule/models/splicebert/modeling_splicebert.py index 1b0fd072..9d129d74 100644 --- a/multimolecule/models/splicebert/modeling_splicebert.py +++ b/multimolecule/models/splicebert/modeling_splicebert.py @@ -274,9 +274,9 @@ class SpliceBertForSequencePrediction(SpliceBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: SpliceBertConfig): @@ -338,9 +338,9 @@ class SpliceBertForTokenPrediction(SpliceBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: SpliceBertConfig): @@ -402,9 +402,9 @@ class SpliceBertForContactPrediction(SpliceBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: SpliceBertConfig): diff --git a/multimolecule/models/utrbert/configuration_utrbert.py b/multimolecule/models/utrbert/configuration_utrbert.py index d032c5ee..5230c04f 100644 --- a/multimolecule/models/utrbert/configuration_utrbert.py +++ b/multimolecule/models/utrbert/configuration_utrbert.py @@ -125,5 +125,5 @@ def __init__( self.position_embedding_type = position_embedding_type self.is_decoder = is_decoder self.use_cache = use_cache - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None diff --git a/multimolecule/models/utrbert/modeling_utrbert.py b/multimolecule/models/utrbert/modeling_utrbert.py index 1a5b47f9..688bedbe 100644 --- a/multimolecule/models/utrbert/modeling_utrbert.py +++ b/multimolecule/models/utrbert/modeling_utrbert.py @@ -264,9 +264,9 @@ class UtrBertForSequencePrediction(UtrBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: UtrBertConfig): @@ -328,9 +328,9 @@ class UtrBertForTokenPrediction(UtrBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: UtrBertConfig): @@ -393,9 +393,9 @@ class UtrBertForContactPrediction(UtrBertPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: UtrBertConfig): diff --git a/multimolecule/models/utrlm/configuration_utrlm.py b/multimolecule/models/utrlm/configuration_utrlm.py index f0f705de..a4f930d7 100644 --- a/multimolecule/models/utrlm/configuration_utrlm.py +++ b/multimolecule/models/utrlm/configuration_utrlm.py @@ -127,7 +127,7 @@ def __init__( self.use_cache = use_cache self.emb_layer_norm_before = emb_layer_norm_before self.token_dropout = token_dropout - self.head = HeadConfig(**head if head is not None else {}) - self.lm_head = MaskedLMHeadConfig(**lm_head if lm_head is not None else {}) + self.head = HeadConfig(**head) if head is not None else None + self.lm_head = MaskedLMHeadConfig(**lm_head) if lm_head is not None else None self.ss_head = HeadConfig(**ss_head) if ss_head is not None else None self.mfe_head = HeadConfig(**mfe_head) if mfe_head is not None else None diff --git a/multimolecule/models/utrlm/modeling_utrlm.py b/multimolecule/models/utrlm/modeling_utrlm.py index 535f99f0..aae1b593 100644 --- a/multimolecule/models/utrlm/modeling_utrlm.py +++ b/multimolecule/models/utrlm/modeling_utrlm.py @@ -272,9 +272,9 @@ class UtrLmForSequencePrediction(UtrLmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.tensor([[1]])) >>> output["logits"].shape - torch.Size([1, 2]) + torch.Size([1, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: UtrLmConfig): @@ -336,9 +336,9 @@ class UtrLmForTokenPrediction(UtrLmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5))) >>> output["logits"].shape - torch.Size([1, 5, 2]) + torch.Size([1, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: UtrLmConfig): @@ -400,9 +400,9 @@ class UtrLmForContactPrediction(UtrLmPreTrainedModel): >>> input = tokenizer("ACGUN", return_tensors="pt") >>> output = model(**input, labels=torch.randint(2, (1, 5, 5))) >>> output["logits"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) >>> output["loss"] # doctest:+ELLIPSIS - tensor(..., grad_fn=) + tensor(..., grad_fn=) """ def __init__(self, config: UtrLmConfig): @@ -555,7 +555,7 @@ class UtrLmForPreTraining(UtrLmPreTrainedModel): >>> output["logits"].shape torch.Size([1, 7, 26]) >>> output["contact_map"].shape - torch.Size([1, 5, 5, 2]) + torch.Size([1, 5, 5, 1]) """ _tied_weights_keys = [ diff --git a/multimolecule/module/__init__.py b/multimolecule/module/__init__.py index 0128fe9b..dbba900b 100644 --- a/multimolecule/module/__init__.py +++ b/multimolecule/module/__init__.py @@ -14,8 +14,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from .criterions import Criterion -from .embeddings import PositionEmbeddingRegistry, PositionEmbeddingRegistryHF, RotaryEmbedding, SinusoidalEmbedding +from .criterions import Criterion, CriterionRegistry +from .embeddings import PositionEmbeddingRegistry, RotaryEmbedding, SinusoidalEmbedding from .heads import ( BaseHeadConfig, ContactPredictionHead, @@ -23,7 +23,6 @@ HeadOutput, HeadRegistry, HeadTransformRegistry, - HeadTransformRegistryHF, IdentityTransform, LinearTransform, MaskedLMHead, @@ -31,15 +30,18 @@ NonLinearTransform, PredictionHead, SequencePredictionHead, - TokenHeadRegistryHF, TokenKMerHead, TokenPredictionHead, ) +from .model import MultiMoleculeModel +from .registry import ModelRegistry __all__ = [ + "ModelRegistry", + "MultiMoleculeModel", + "CriterionRegistry", "Criterion", "PositionEmbeddingRegistry", - "PositionEmbeddingRegistryHF", "RotaryEmbedding", "SinusoidalEmbedding", "BaseHeadConfig", @@ -48,14 +50,12 @@ "HeadRegistry", "PredictionHead", "SequencePredictionHead", - "TokenHeadRegistryHF", "TokenPredictionHead", "TokenKMerHead", "ContactPredictionHead", "MaskedLMHead", "HeadOutput", "HeadTransformRegistry", - "HeadTransformRegistryHF", "LinearTransform", "NonLinearTransform", "IdentityTransform", diff --git a/multimolecule/module/backbones/__init__.py b/multimolecule/module/backbones/__init__.py new file mode 100644 index 00000000..d69e6292 --- /dev/null +++ b/multimolecule/module/backbones/__init__.py @@ -0,0 +1,21 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .registry import BackboneRegistry +from .sequence import SequenceBackbone +from .sequences import SequenceRegistry + +__all__ = ["BackboneRegistry", "SequenceRegistry", "SequenceBackbone"] diff --git a/multimolecule/module/backbones/registry.py b/multimolecule/module/backbones/registry.py new file mode 100644 index 00000000..47be122d --- /dev/null +++ b/multimolecule/module/backbones/registry.py @@ -0,0 +1,21 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from chanfig import Registry + +BackboneRegistry = Registry() diff --git a/multimolecule/module/backbones/sequence.py b/multimolecule/module/backbones/sequence.py new file mode 100644 index 00000000..2b0ee0cf --- /dev/null +++ b/multimolecule/module/backbones/sequence.py @@ -0,0 +1,59 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +import torch +from chanfig import FlatDict +from danling import NestedTensor +from torch import Tensor, nn + +from .registry import BackboneRegistry +from .sequences import SequenceRegistry + + +@BackboneRegistry.register("sequence", default=True) +class SequenceBackbone(nn.Module): + def __init__(self, sequence) -> None: + super().__init__() + sequence_dropout = sequence.pop("dropout", 0) + self.sequence = SequenceRegistry.build(**sequence) + self.sequence_dropout = nn.Dropout(sequence_dropout) + self.config = self.sequence.config + self.out_channels = self.config.hidden_size + + def forward(self, sequence: NestedTensor | Tensor, *args, **kwargs) -> tuple[FlatDict, FlatDict]: + attentions = None + input_ids, attention_mask = sequence.tensor, sequence.mask + sequence_output = self.sequence(input_ids.int(), attention_mask) + if "last_hidden_state" in sequence_output: + sequence_output["last_hidden_state"] = self.sequence_dropout(sequence_output["last_hidden_state"]) + elif "logits" in sequence_output: + sequence_output["last_hidden_state"] = self.sequence_dropout(sequence_output["logits"]) + else: + raise ValueError("No token output") + if "pooler_output" in sequence_output: + sequence_output["pooler_output"] = self.sequence_dropout(sequence_output["pooler_output"]) + elif "logits" in sequence_output: + sequence_output["pooler_output"] = self.sequence_dropout( + sequence_output["logits"].mean(dim=1, keepdim=True) + ) + else: + raise ValueError("No sequence output") + if "attentions" in sequence_output: + attentions = torch.stack(sequence_output["attentions"], dim=1).detach() + + return sequence_output, attentions diff --git a/multimolecule/module/backbones/sequences/__init__.py b/multimolecule/module/backbones/sequences/__init__.py new file mode 100644 index 00000000..e6e5cd08 --- /dev/null +++ b/multimolecule/module/backbones/sequences/__init__.py @@ -0,0 +1,20 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .onehot import OneHot +from .registry import SequenceRegistry + +__all__ = ["SequenceRegistry", "OneHot"] diff --git a/multimolecule/module/backbones/sequences/onehot.py b/multimolecule/module/backbones/sequences/onehot.py new file mode 100644 index 00000000..bc4c979f --- /dev/null +++ b/multimolecule/module/backbones/sequences/onehot.py @@ -0,0 +1,39 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import torch +from chanfig import FlatDict +from torch import nn +from transformers import AutoConfig + +from .registry import SequenceRegistry + + +@SequenceRegistry.register("onehot") +class OneHot(nn.Module): + def __init__(self, pretrained: str) -> None: + super().__init__() + self.config = AutoConfig.from_pretrained(str(pretrained)) + self.module = nn.Embedding(self.config.vocab_size, self.config.hidden_size) + + def forward(self, input_ids, attn_mask) -> FlatDict: + output = FlatDict() + output["last_hidden_state"] = self.module(input_ids) + valid_length = attn_mask.sum(dim=1) + output["pooler_output"] = torch.stack( + [t[: valid_length[i]].sum(0) for i, t in enumerate(output["last_hidden_state"])] + ) + return output diff --git a/multimolecule/module/backbones/sequences/registry.py b/multimolecule/module/backbones/sequences/registry.py new file mode 100644 index 00000000..c9178231 --- /dev/null +++ b/multimolecule/module/backbones/sequences/registry.py @@ -0,0 +1,66 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +import danling as dl +import transformers +from chanfig import Registry as Registry_ +from torch import nn +from transformers import AutoConfig, AutoModel, PretrainedConfig, PreTrainedModel + + +class Registry(Registry_): # pylint: disable=too-few-public-methods + def build( + self, + type: str | None = None, + name: str | None = None, + use_pretrained: bool = True, + gradient_checkpoint: bool = False, + checkpoint: str | None = None, + *args, + **kwargs, + ) -> nn.Module: + if type is not None: + if type in self: + sequence_cls = self.lookup(type) + sequence = self.init(sequence_cls, *args, **kwargs) + if checkpoint is not None: + sequence.load_state_dict(dl.load(checkpoint)) + elif hasattr(transformers, type + "Model"): + if use_pretrained: + sequence_cls: PreTrainedModel = getattr(transformers, type + "Model") # type: ignore[no-redef] + sequence = sequence_cls.from_pretrained(name, *args, **kwargs) + else: + config_cls: PretrainedConfig = getattr(transformers, type + "Config") + config, kwargs = config_cls.from_pretrained(name, return_unused_kwargs=True, **kwargs) + sequence_cls: PreTrainedModel = getattr(transformers, type + "Model") # type: ignore[no-redef] + sequence = sequence_cls.from_config(config, *args, **kwargs) + else: + raise ValueError(f"Sequence {type} not found in registry or transformers") + else: + if use_pretrained: + sequence = AutoModel.from_pretrained(name, *args, **kwargs) + else: + config, kwargs = AutoConfig.from_pretrained(name, return_unused_kwargs=True, **kwargs) + sequence = AutoModel.from_config(config, *args, **kwargs) + + if gradient_checkpoint: + sequence.gradient_checkpointing_enable() + return sequence + + +SequenceRegistry = Registry() diff --git a/multimolecule/module/criterions/__init__.py b/multimolecule/module/criterions/__init__.py index 104334b5..4b9adf7e 100644 --- a/multimolecule/module/criterions/__init__.py +++ b/multimolecule/module/criterions/__init__.py @@ -14,6 +14,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from .binary import BCEWithLogitsLoss from .generic import Criterion +from .multiclass import CrossEntropyLoss +from .multilabel import MultiLabelSoftMarginLoss +from .registry import CriterionRegistry +from .regression import MSELoss -__all__ = ["Criterion"] +__all__ = [ + "CriterionRegistry", + "Criterion", + "MSELoss", + "BCEWithLogitsLoss", + "CrossEntropyLoss", + "MultiLabelSoftMarginLoss", +] diff --git a/multimolecule/module/criterions/binary.py b/multimolecule/module/criterions/binary.py new file mode 100644 index 00000000..0bf53e59 --- /dev/null +++ b/multimolecule/module/criterions/binary.py @@ -0,0 +1,44 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +from danling import NestedTensor +from torch import Tensor, nn + +from .registry import CriterionRegistry + +if TYPE_CHECKING: + from ..heads.config import HeadConfig + + +@CriterionRegistry.register("binary") +class BCEWithLogitsLoss(nn.BCEWithLogitsLoss): + def __init__(self, config: HeadConfig) -> None: + super().__init__(**config.get("loss", {})) + self.config = config + + def forward(self, input: NestedTensor | Tensor, target: NestedTensor | Tensor) -> Tensor: + if isinstance(input, NestedTensor): + input = torch.cat(input.flatten().storage()) + if isinstance(target, NestedTensor): + target = torch.cat(target.flatten().storage()) + if input.ndim == target.ndim + 1: + input = input.squeeze(-1) + return super().forward(input, target.float()) diff --git a/multimolecule/module/criterions/generic.py b/multimolecule/module/criterions/generic.py index b003c81d..a6731933 100644 --- a/multimolecule/module/criterions/generic.py +++ b/multimolecule/module/criterions/generic.py @@ -17,8 +17,8 @@ from __future__ import annotations from typing import TYPE_CHECKING +from warnings import warn -import torch from danling import NestedTensor from torch import Tensor, nn from torch.nn import functional as F @@ -26,10 +26,13 @@ if TYPE_CHECKING: from ..heads.config import HeadConfig +from .registry import CriterionRegistry + +@CriterionRegistry.register(default=True) class Criterion(nn.Module): - problem_types = ["regression", "single_label_classification", "multi_label_classification"] + problem_types = ["regression", "binary", "multiclass", "multilabel"] def __init__(self, config: HeadConfig) -> None: super().__init__() @@ -41,21 +44,31 @@ def forward(self, logits: Tensor | NestedTensor, labels: Tensor | NestedTensor) if labels is None: return None if self.problem_type is None: - if self.num_labels == 1: + if labels.is_floating_point(): self.problem_type = "regression" - elif self.num_labels > 1 and labels.dtype in (torch.long, torch.int): - self.problem_type = "single_label_classification" + elif self.num_labels == 1: + self.problem_type = "binary" + elif labels.unique().numel() == 2: + self.problem_type = "multilabel" else: - self.problem_type = "multi_label_classification" + self.problem_type = "multiclass" + warn( + f"`problem_type` is not set. Assuming {self.problem_type}. \n" + "This can lead to unexpected behavior. Please set `problem_type` explicitly." + ) self.config.problem_type = self.problem_type if self.problem_type == "regression": labels = labels.to(logits.dtype) if self.num_labels == 1: return F.mse_loss(logits.squeeze(), labels.squeeze()) logits, labels = logits.view(-1, self.num_labels), labels.view(-1, self.num_labels) - return sum(F.mse_loss(logits[:, i], labels[:, i]).sqrt() for i in range(self.num_labels)) - if self.problem_type == "single_label_classification": + return sum(F.mse_loss(logits[:, i], labels[:, i]).sqrt() for i in range(self.num_labels)) # type: ignore + if self.problem_type == "multiclass": return F.cross_entropy(logits.view(-1, self.num_labels), labels.view(-1)) - if self.problem_type == "multi_label_classification": - return F.binary_cross_entropy_with_logits(logits, labels) + if self.problem_type == "binary": + if logits.ndim == labels.ndim + 1: + logits = logits.squeeze(-1) + return F.binary_cross_entropy_with_logits(logits, labels.to(logits.dtype)) + if self.problem_type == "multilabel": + return F.multilabel_soft_margin_loss(logits, labels.to(logits.dtype)) raise ValueError(f"problem_type should be one of {self.problem_types}, but got {self.problem_type}") diff --git a/multimolecule/module/criterions/multiclass.py b/multimolecule/module/criterions/multiclass.py new file mode 100644 index 00000000..f7070e94 --- /dev/null +++ b/multimolecule/module/criterions/multiclass.py @@ -0,0 +1,44 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +from danling import NestedTensor +from torch import Tensor, nn + +if TYPE_CHECKING: + from ..heads.config import HeadConfig + +from .registry import CriterionRegistry + + +@CriterionRegistry.register("multiclass") +class CrossEntropyLoss(nn.CrossEntropyLoss): + def __init__(self, config: HeadConfig) -> None: + super().__init__(**config.get("loss", {})) + self.config = config + + def forward(self, input: NestedTensor | Tensor, target: NestedTensor | Tensor) -> Tensor: + if isinstance(input, NestedTensor): + input = torch.cat(input.storage()) + if isinstance(target, NestedTensor): + target = torch.cat(target.storage()) + if input.ndim > 2: + input, target = input.view(-1, input.size(-1)), target.view(-1) + return super().forward(input, target.long()) diff --git a/multimolecule/module/criterions/multilabel.py b/multimolecule/module/criterions/multilabel.py new file mode 100644 index 00000000..c72bb9f9 --- /dev/null +++ b/multimolecule/module/criterions/multilabel.py @@ -0,0 +1,44 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +from danling import NestedTensor +from torch import Tensor, nn + +if TYPE_CHECKING: + from ..heads.config import HeadConfig + +from .registry import CriterionRegistry + + +@CriterionRegistry.register("multilabel") +class MultiLabelSoftMarginLoss(nn.MultiLabelSoftMarginLoss): + def __init__(self, config: HeadConfig) -> None: + super().__init__(**config.get("loss", {})) + self.config = config + + def forward(self, input: NestedTensor | Tensor, target: NestedTensor | Tensor) -> Tensor: + if isinstance(target, NestedTensor) and target.ndim > 2: + input, target = input.view(-1, input.size(-1)), target.view(-1, target.size(-1)) + if isinstance(input, NestedTensor): + input = torch.cat(input.storage()) + if isinstance(target, NestedTensor): + target = torch.cat(target.storage()) + return super().forward(input, target.float()) diff --git a/multimolecule/module/criterions/registry.py b/multimolecule/module/criterions/registry.py new file mode 100644 index 00000000..856341f7 --- /dev/null +++ b/multimolecule/module/criterions/registry.py @@ -0,0 +1,29 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from chanfig import ConfigRegistry as Registry_ +from torch import nn + + +class Registry(Registry_): # pylint: disable=too-few-public-methods + key = "problem_type" + + def build(self, config) -> nn.Module: # type: ignore[override] + name = getattr(config, self.getattr("key")) + return self.init(self.lookup(name), config) # type: ignore[arg-type] + + +CriterionRegistry = Registry(fallback=True) diff --git a/multimolecule/module/criterions/regression.py b/multimolecule/module/criterions/regression.py new file mode 100644 index 00000000..4f39e0eb --- /dev/null +++ b/multimolecule/module/criterions/regression.py @@ -0,0 +1,44 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +from danling import NestedTensor +from torch import Tensor, nn + +if TYPE_CHECKING: + from ..heads.config import HeadConfig + +from .registry import CriterionRegistry + + +@CriterionRegistry.register("regression") +class MSELoss(nn.MSELoss): + def __init__(self, config: HeadConfig) -> None: + super().__init__(**config.get("loss", {})) + self.config = config + + def forward(self, input: NestedTensor | Tensor, target: NestedTensor | Tensor) -> Tensor: + if isinstance(input, NestedTensor): + input = torch.cat(input.flatten().storage()) + if isinstance(target, NestedTensor): + target = torch.cat(target.flatten().storage()) + if input.ndim == target.ndim + 1: + target = target.unsqueeze(-1) + return super().forward(input, target.to(input.dtype)) diff --git a/multimolecule/module/heads/__init__.py b/multimolecule/module/heads/__init__.py index 8cc91f29..0e857c5e 100644 --- a/multimolecule/module/heads/__init__.py +++ b/multimolecule/module/heads/__init__.py @@ -21,14 +21,8 @@ from .pretrain import MaskedLMHead from .registry import HeadRegistry from .sequence import SequencePredictionHead -from .token import TokenHeadRegistryHF, TokenKMerHead, TokenPredictionHead -from .transform import ( - HeadTransformRegistry, - HeadTransformRegistryHF, - IdentityTransform, - LinearTransform, - NonLinearTransform, -) +from .token import TokenKMerHead, TokenPredictionHead +from .transform import HeadTransformRegistry, IdentityTransform, LinearTransform, NonLinearTransform __all__ = [ "BaseHeadConfig", @@ -37,14 +31,12 @@ "HeadRegistry", "PredictionHead", "SequencePredictionHead", - "TokenHeadRegistryHF", "TokenPredictionHead", "TokenKMerHead", "ContactPredictionHead", "MaskedLMHead", "HeadOutput", "HeadTransformRegistry", - "HeadTransformRegistryHF", "LinearTransform", "NonLinearTransform", "IdentityTransform", diff --git a/multimolecule/module/heads/config.py b/multimolecule/module/heads/config.py index bb0dbba6..ad67e1a0 100644 --- a/multimolecule/module/heads/config.py +++ b/multimolecule/module/heads/config.py @@ -16,15 +16,15 @@ from __future__ import annotations -from collections import OrderedDict -from dataclasses import dataclass +from typing import Optional +from chanfig import FlatDict -class BaseHeadConfig(OrderedDict): + +class BaseHeadConfig(FlatDict): pass -@dataclass class HeadConfig(BaseHeadConfig): r""" Configuration class for a prediction head. @@ -35,8 +35,8 @@ class HeadConfig(BaseHeadConfig): Head should look for [`Config.num_labels`][multimolecule.PreTrainedConfig] if is `None`. problem_type: - Problem type for `XxxForYyyPrediction` models. Can be one of `"regression"`, - `"single_label_classification"` or `"multi_label_classification"`. + Problem type for `XxxForYyyPrediction` models. Can be one of `"binary"`, `"regression"`, + `"multiclass"` or `"multilabel"`. Head should look for [`Config.problem_type`][multimolecule.PreTrainedConfig] if is `None`. hidden_size: @@ -55,25 +55,29 @@ class HeadConfig(BaseHeadConfig): The activation function of the final prediction output. layer_norm_eps: The epsilon used by the layer normalization layers. - output_name (`str`, *optional*): + output_name: The name of the tensor required in model outputs. If is `None`, will use the default output name of the corresponding head. + type: + The type of the head in the model. + + This is used by [`MultiMoleculeModel`][multimolecule.MultiMoleculeModel] to construct heads. """ - num_labels: int = None # type: ignore[assignment] - problem_type: str = None # type: ignore[assignment] - hidden_size: int | None = None + num_labels: Optional[int] = None + problem_type: Optional[str] = None + hidden_size: Optional[int] = None dropout: float = 0.0 - transform: str | None = None - transform_act: str | None = "gelu" + transform: Optional[str] = None + transform_act: Optional[str] = "gelu" bias: bool = True - act: str | None = None + act: Optional[str] = None layer_norm_eps: float = 1e-12 - output_name: str | None = None + output_name: Optional[str] = None + type: Optional[str] = None -@dataclass class MaskedLMHeadConfig(BaseHeadConfig): r""" Configuration class for a Masked Language Modeling head. @@ -95,17 +99,17 @@ class MaskedLMHeadConfig(BaseHeadConfig): The activation function of the final prediction output. layer_norm_eps: The epsilon used by the layer normalization layers. - output_name (`str`, *optional*): + output_name: The name of the tensor required in model outputs. If is `None`, will use the default output name of the corresponding head. """ - hidden_size: int | None = None + hidden_size: Optional[int] = None dropout: float = 0.0 - transform: str | None = "nonlinear" - transform_act: str | None = "gelu" + transform: Optional[str] = "nonlinear" + transform_act: Optional[str] = "gelu" bias: bool = True - act: str | None = None + act: Optional[str] = None layer_norm_eps: float = 1e-12 - output_name: str | None = None + output_name: Optional[str] = None diff --git a/multimolecule/module/heads/contact.py b/multimolecule/module/heads/contact.py index 50ec4fbb..b2ceb2ff 100644 --- a/multimolecule/module/heads/contact.py +++ b/multimolecule/module/heads/contact.py @@ -16,10 +16,12 @@ from __future__ import annotations -from typing import Mapping, Tuple +from typing import Callable, Mapping, Tuple, Type import torch from danling import NestedTensor +from danling.modules import MLP +from lazy_imports import try_import from torch import Tensor, nn from transformers.modeling_outputs import ModelOutput from typing_extensions import TYPE_CHECKING @@ -28,13 +30,55 @@ from .generic import PredictionHead from .output import HeadOutput from .registry import HeadRegistry -from .utils import average_product_correct, symmetrize + +with try_import() as tv: + from torchvision.models.resnet import BasicBlock, Bottleneck if TYPE_CHECKING: from multimolecule.models import PreTrainedConfig -@HeadRegistry.register("contact") +@HeadRegistry.contact.register("simple", default=True) +class ContactHead(PredictionHead): + + output_name: str = "last_hidden_state" + + def __init__(self, config: PreTrainedConfig, head_config: HeadConfig | None = None): + super().__init__(config, head_config) + out_channels: int = self.config.hidden_size # type: ignore[assignment] + self.qk_proj = nn.Linear(out_channels, 2 * out_channels) + self.ffn = MLP(1, out_channels, residual=False) + + def forward( # type: ignore[override] # pylint: disable=arguments-renamed + self, + outputs: ModelOutput | Mapping | Tuple[Tensor, ...], + attention_mask: Tensor | None = None, + input_ids: NestedTensor | Tensor | None = None, + labels: Tensor | None = None, + output_name: str | None = None, + **kwargs, + ) -> HeadOutput: + if isinstance(outputs, (Mapping, ModelOutput)): + output = outputs[output_name or self.output_name] + elif isinstance(outputs, tuple): + output = outputs[0] + else: + raise ValueError(f"Unsupported type for outputs: {type(outputs)}") + + if attention_mask is None: + attention_mask = self._get_attention_mask(input_ids) + output = output * attention_mask.unsqueeze(-1) + output, _, _ = self._remove_special_tokens(output, attention_mask, input_ids) + + q, k = self.qk_proj(output).chunk(2, dim=-1) + contact_map = (q @ k.transpose(-2, -1)).unsqueeze(-1) + contact_map = contact_map + self.ffn(contact_map) + if "continuous" in outputs: + contact_map = contact_map * (1 + outputs["continuous"].unsqueeze(dim=-1)) # type: ignore[call-overload] + return super().forward(contact_map, labels) + + +@HeadRegistry.contact.register("attention") class ContactPredictionHead(PredictionHead): r""" Head for tasks in contact-level. @@ -50,13 +94,20 @@ class ContactPredictionHead(PredictionHead): output_name: str = "attentions" r"""The default output to use for the head.""" + requires_attention: bool = True + def __init__(self, config: PreTrainedConfig, head_config: HeadConfig | None = None): super().__init__(config, head_config) - self.bos_token_id = config.bos_token_id - self.eos_token_id = config.eos_token_id - self.pad_token_id = config.pad_token_id - self.decoder = nn.Linear( - config.num_hidden_layers * config.num_attention_heads, self.num_labels, bias=self.config.bias + self.config.hidden_size = config.num_hidden_layers * config.num_attention_heads + num_layers = self.config.get("num_layers", 16) + num_channels = self.config.get("num_channels", self.config.hidden_size // 10) # type: ignore[operator] + block = self.config.get("block", "auto") + self.decoder = ResNet( + num_layers=num_layers, + hidden_size=self.config.hidden_size, # type: ignore[arg-type] + block=block, + num_channels=num_channels, + num_labels=self.num_labels, ) if head_config is not None and head_config.output_name is not None: self.output_name = head_config.output_name @@ -81,19 +132,6 @@ def forward( # type: ignore[override] # pylint: disable=arguments-renamed output_name: The name of the output to use. Defaults to `self.output_name`. """ - if attention_mask is None: - if isinstance(input_ids, NestedTensor): - input_ids, attention_mask = input_ids.tensor, input_ids.mask - else: - if input_ids is None: - raise ValueError( - f"Either attention_mask or input_ids must be provided for {self.__class__.__name__} to work." - ) - if self.pad_token_id is None: - raise ValueError( - f"pad_token_id must be provided when attention_mask is not passed to {self.__class__.__name__}." - ) - attention_mask = input_ids.ne(self.pad_token_id) if isinstance(outputs, (Mapping, ModelOutput)): output = outputs[output_name or self.output_name] @@ -105,13 +143,14 @@ def forward( # type: ignore[override] # pylint: disable=arguments-renamed # This makes no difference most of the time because the other tokens won't attend to them, # but it does for the contact prediction task, which takes attentions as input, # so we have to mimic that here. + if attention_mask is None: + attention_mask = self._get_attention_mask(input_ids) attention_mask = attention_mask.unsqueeze(1) * attention_mask.unsqueeze(2) - attentions *= attention_mask[:, None, None, :, :] + attentions = attentions * attention_mask[:, None, None, :, :] # remove cls token attentions if self.bos_token_id is not None: attentions = attentions[..., 1:, 1:] - # process attention_mask and input_ids to make removal of eos token happy attention_mask = attention_mask[..., 1:] if input_ids is not None: input_ids = input_ids[..., 1:] @@ -124,14 +163,172 @@ def forward( # type: ignore[override] # pylint: disable=arguments-renamed seq_length = attention_mask.size(-1) eos_mask = torch.arange(seq_length, device=attentions.device).unsqueeze(0) == last_valid_indices eos_mask = eos_mask.unsqueeze(1) * eos_mask.unsqueeze(2) - attentions *= eos_mask[:, None, None, :, :] + attentions = attentions * eos_mask[:, None, None, :, :] attentions = attentions[..., :-1, :-1] # features: batch x channels x input_ids x input_ids (symmetric) batch_size, layers, heads, seqlen, _ = attentions.size() attentions = attentions.view(batch_size, layers * heads, seqlen, seqlen) - attentions = attentions.to(self.decoder.weight.device) + attentions = attentions.to(self.decoder.proj.weight.device) attentions = average_product_correct(symmetrize(attentions)) attentions = attentions.permute(0, 2, 3, 1).squeeze(3) return super().forward(attentions, labels, **kwargs) + + +@HeadRegistry.contact.register("logits") +class ContactLogitsHead(PredictionHead): + r""" + Head for tasks in contact-level. + + Performs symmetrization, and average product correct. + + Args: + config: The configuration object for the model. + head_config: The configuration object for the head. + If None, will use configuration from the `config`. + """ + + output_name: str = "last_hidden_state" + r"""The default output to use for the head.""" + + requires_attention: bool = False + + def __init__(self, config: PreTrainedConfig, head_config: HeadConfig | None = None): + super().__init__(config, head_config) + num_layers = self.config.get("num_layers", 16) + num_channels = self.config.get("num_channels", self.config.hidden_size // 10) # type: ignore[operator] + block = self.config.get("block", "auto") + self.decoder = ResNet( + num_layers=num_layers, + hidden_size=self.config.hidden_size, # type: ignore[arg-type] + block=block, + num_channels=num_channels, + num_labels=self.num_labels, + ) + if head_config is not None and head_config.output_name is not None: + self.output_name = head_config.output_name + + def forward( # type: ignore[override] # pylint: disable=arguments-renamed + self, + outputs: ModelOutput | Mapping | Tuple[Tensor, ...], + attention_mask: Tensor | None = None, + input_ids: NestedTensor | Tensor | None = None, + labels: Tensor | None = None, + output_name: str | None = None, + **kwargs, + ) -> HeadOutput: + r""" + Forward pass of the ContactPredictionHead. + + Args: + outputs: The outputs of the model. + attention_mask: The attention mask for the inputs. + input_ids: The input ids for the inputs. + labels: The labels for the head. + output_name: The name of the output to use. + Defaults to `self.output_name`. + """ + if isinstance(outputs, (Mapping, ModelOutput)): + output = outputs[output_name or self.output_name] + elif isinstance(outputs, tuple): + output = outputs[0] + else: + raise ValueError(f"Unsupported type for outputs: {type(outputs)}") + + if attention_mask is None: + attention_mask = self._get_attention_mask(input_ids) + output = output * attention_mask.unsqueeze(-1) + output, _, _ = self._remove_special_tokens(output, attention_mask, input_ids) + + # make symmetric contact map + contact_map = output.unsqueeze(1) * output.unsqueeze(2) + + return super().forward(contact_map, labels, **kwargs) + + +class ResNet(nn.Module): + def __init__( + self, + num_layers: int, + hidden_size: int, + block: Type[BasicBlock | Bottleneck] | str = "auto", + num_channels: int | None = None, + num_labels: int = 1, + norm_layer: Callable[..., nn.Module] | None = None, + zero_init_residual: bool = True, + ) -> None: + tv.check() + super().__init__() + + if block == "auto": + block = BasicBlock if num_layers < 50 else Bottleneck + elif block in ("basic", "BasicBlock"): + block = BasicBlock + elif block in ("bottleneck", "Bottleneck"): + block = Bottleneck + else: + raise ValueError(f"Unknown block type: {block}") + if num_channels is None: + num_channels = hidden_size // 10 + if norm_layer is None: + norm_layer = LayerNorm2D + + self.proj = nn.Conv2d(hidden_size, num_channels, kernel_size=3, stride=1, padding=1, bias=False) + self.norm = norm_layer(num_channels) + self.relu = nn.ReLU(inplace=True) + self.layers = nn.Sequential( + *[block(num_channels, num_channels, norm_layer=norm_layer) for _ in range(num_layers)] # type: ignore + ) + self.output = nn.Linear(num_channels, num_labels) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck) and m.bn3.weight is not None: + nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type] + elif isinstance(m, BasicBlock) and m.bn2.weight is not None: + nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type] + + def forward(self, x: Tensor) -> Tensor: + x = self.proj(x.transpose(1, 3)) + x = self.norm(x) + x = self.relu(x) + x = self.layers(x) + x = self.output(x.transpose(1, 3)) + return x + + +class LayerNorm2D(nn.GroupNorm): + def __init__(self, num_features: int, eps: float = 1e-5, elementwise_affine: bool = True) -> None: + super().__init__(num_channels=num_features, eps=eps, affine=elementwise_affine, num_groups=1) + self.num_channels = num_features + + def __repr__(self): + return f"{self.__class__.__name__}(num_channels={self.num_channels}, eps={self.eps}, affine={self.affine})" + + +def symmetrize(x): + "Make layer symmetric in final two dimensions, used for contact prediction." + return x + x.transpose(-1, -2) + + +def average_product_correct(x): + "Perform average product correct, used for contact prediction." + a1 = x.sum(-1, keepdims=True) + a2 = x.sum(-2, keepdims=True) + a12 = x.sum((-1, -2), keepdims=True) + + avg = a1 * a2 + avg.div_(a12) # in-place to reduce memory + normalized = x - avg + return normalized diff --git a/multimolecule/module/heads/generic.py b/multimolecule/module/heads/generic.py index d97950a2..ae82e178 100644 --- a/multimolecule/module/heads/generic.py +++ b/multimolecule/module/heads/generic.py @@ -16,7 +16,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Tuple from warnings import warn import torch @@ -24,7 +24,7 @@ from torch import Tensor, nn from transformers.activations import ACT2FN -from ..criterions import Criterion +from ..criterions import CriterionRegistry from .config import HeadConfig from .output import HeadOutput from .transform import HeadTransformRegistryHF @@ -44,24 +44,28 @@ class PredictionHead(nn.Module): """ num_labels: int + requires_attention: bool = False def __init__(self, config: PreTrainedConfig, head_config: HeadConfig | None = None): super().__init__() if head_config is None: - head_config = config.head + head_config = config.head or HeadConfig(num_labels=config.num_labels) + elif head_config.num_labels is None: + head_config.num_labels = config.num_labels self.config = head_config if self.config.hidden_size is None: self.config.hidden_size = config.hidden_size - if self.config.num_labels is None: - self.config.num_labels = config.num_labels if self.config.problem_type is None: self.config.problem_type = config.problem_type - self.num_labels = self.config.num_labels + self.bos_token_id = config.bos_token_id + self.eos_token_id = config.eos_token_id + self.pad_token_id = config.pad_token_id + self.num_labels = self.config.num_labels # type: ignore[assignment] self.dropout = nn.Dropout(self.config.dropout) self.transform = HeadTransformRegistryHF.build(self.config) - self.decoder = nn.Linear(config.hidden_size, self.num_labels, bias=self.config.bias) + self.decoder = nn.Linear(self.config.hidden_size, self.num_labels, bias=self.config.bias) self.activation = ACT2FN[self.config.act] if self.config.act is not None else None - self.criterion = Criterion(self.config) + self.criterion = CriterionRegistry.build(self.config) def forward(self, embeddings: Tensor, labels: Tensor | None, **kwargs) -> HeadOutput: r""" @@ -85,6 +89,42 @@ def forward(self, embeddings: Tensor, labels: Tensor | None, **kwargs) -> HeadOu if isinstance(labels, NestedTensor): if isinstance(output, Tensor): output = labels.nested_like(output, strict=False) - return HeadOutput(output, self.criterion(torch.cat(output.storage()), torch.cat(labels.storage()))) + return HeadOutput(output, self.criterion(output.concat, labels.concat)) return HeadOutput(output, self.criterion(output, labels)) return HeadOutput(output) + + def _get_attention_mask(self, input_ids: NestedTensor | Tensor) -> Tensor: + if isinstance(input_ids, NestedTensor): + return input_ids.mask + if input_ids is None: + raise ValueError( + f"Either attention_mask or input_ids must be provided for {self.__class__.__name__} to work." + ) + if self.pad_token_id is None: + raise ValueError( + f"pad_token_id must be provided when attention_mask is not passed to {self.__class__.__name__}." + ) + return input_ids.ne(self.pad_token_id) + + def _remove_special_tokens( + self, output: Tensor, attention_mask: Tensor, input_ids: Tensor | None + ) -> Tuple[Tensor, Tensor, Tensor]: + # remove cls token embeddings + if self.bos_token_id is not None: + output = output[..., 1:, :] + attention_mask = attention_mask[..., 1:] + if input_ids is not None: + input_ids = input_ids[..., 1:] + # remove eos token embeddings + if self.eos_token_id is not None: + if input_ids is not None: + eos_mask = input_ids.ne(self.eos_token_id).to(output) + input_ids = input_ids[..., :-1] + else: + last_valid_indices = attention_mask.sum(dim=-1) + seq_length = attention_mask.size(-1) + eos_mask = torch.arange(seq_length, device=output.device) == last_valid_indices.unsqueeze(1) + output = output * eos_mask[:, :, None] + output = output[..., :-1, :] + attention_mask = attention_mask[..., 1:] + return output, attention_mask, input_ids diff --git a/multimolecule/module/heads/pretrain.py b/multimolecule/module/heads/pretrain.py index 994cb8ca..c6968c4b 100644 --- a/multimolecule/module/heads/pretrain.py +++ b/multimolecule/module/heads/pretrain.py @@ -53,8 +53,8 @@ def __init__( ): super().__init__() if head_config is None: - head_config = config.lm_head if hasattr(config, "lm_head") else config.head # type: ignore[assignment] - self.config: MaskedLMHeadConfig = head_config # type: ignore[assignment] + head_config = (config.lm_head if hasattr(config, "lm_head") else config.head) or MaskedLMHeadConfig() + self.config: MaskedLMHeadConfig = head_config if self.config.hidden_size is None: self.config.hidden_size = config.hidden_size self.num_labels = config.vocab_size @@ -97,6 +97,6 @@ def forward( if isinstance(labels, NestedTensor): if isinstance(output, Tensor): output = labels.nested_like(output, strict=False) - return HeadOutput(output, F.cross_entropy(torch.cat(output.storage()), torch.cat(labels.storage()))) + return HeadOutput(output, F.cross_entropy(output.concat, labels.concat)) return HeadOutput(output, F.cross_entropy(output.view(-1, self.num_labels), labels.view(-1))) return HeadOutput(output) diff --git a/multimolecule/module/heads/registry.py b/multimolecule/module/heads/registry.py index e5393e4e..6db3b680 100644 --- a/multimolecule/module/heads/registry.py +++ b/multimolecule/module/heads/registry.py @@ -14,6 +14,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from chanfig import Registry +from chanfig import ConfigRegistry as Registry_ +from torch import nn + + +class Registry(Registry_): # pylint: disable=too-few-public-methods + key = "type" + + def build(self, config, head_config) -> nn.Module: # type: ignore[override] + name = getattr(head_config, self.getattr("key")) + return self.init(self.lookup(name), config, head_config) # type: ignore[arg-type] + HeadRegistry = Registry(default_factory=Registry, fallback=True) diff --git a/multimolecule/module/heads/token.py b/multimolecule/module/heads/token.py index dbe6c721..5697d36c 100644 --- a/multimolecule/module/heads/token.py +++ b/multimolecule/module/heads/token.py @@ -19,7 +19,6 @@ from functools import partial from typing import TYPE_CHECKING, Mapping, Tuple -import torch from chanfig import ConfigRegistry from danling import NestedTensor from torch import Tensor @@ -54,9 +53,6 @@ class TokenPredictionHead(PredictionHead): def __init__(self, config: PreTrainedConfig, head_config: HeadConfig | None = None): super().__init__(config, head_config) - self.bos_token_id = config.bos_token_id - self.eos_token_id = config.eos_token_id - self.pad_token_id = config.pad_token_id if head_config is not None and head_config.output_name is not None: self.output_name = head_config.output_name @@ -80,45 +76,17 @@ def forward( # type: ignore[override] # pylint: disable=arguments-renamed output_name: The name of the output to use. Defaults to `self.output_name`. """ - if attention_mask is None: - if isinstance(input_ids, NestedTensor): - input_ids, attention_mask = input_ids.tensor, input_ids.mask - else: - if input_ids is None: - raise ValueError( - f"Either attention_mask or input_ids must be provided for {self.__class__.__name__} to work." - ) - if self.pad_token_id is None: - raise ValueError( - f"pad_token_id must be provided when attention_mask is not passed to {self.__class__.__name__}." - ) - attention_mask = input_ids.ne(self.pad_token_id) - if isinstance(outputs, (Mapping, ModelOutput)): output = outputs[output_name or self.output_name] elif isinstance(outputs, tuple): output = outputs[0] else: raise ValueError(f"Unsupported type for outputs: {type(outputs)}") - output = output * attention_mask.unsqueeze(-1) - # remove cls token embeddings - if self.bos_token_id is not None: - output = output[..., 1:, :] - # process attention_mask and input_ids to make removal of eos token happy - attention_mask = attention_mask[..., 1:] - if input_ids is not None: - input_ids = input_ids[..., 1:] - # remove eos token embeddings - if self.eos_token_id is not None: - if input_ids is not None: - eos_mask = input_ids.ne(self.eos_token_id).to(output) - else: - last_valid_indices = attention_mask.sum(dim=-1) - seq_length = attention_mask.size(-1) - eos_mask = torch.arange(seq_length, device=output.device) == last_valid_indices.unsqueeze(1) - output = output * eos_mask[:, :, None] - output = output[..., :-1, :] + if attention_mask is None: + attention_mask = self._get_attention_mask(input_ids) + output = output * attention_mask.unsqueeze(-1) + output, _, _ = self._remove_special_tokens(output, attention_mask, input_ids) return super().forward(output, labels, **kwargs) @@ -141,9 +109,6 @@ class TokenKMerHead(PredictionHead): def __init__(self, config: PreTrainedConfig, head_config: HeadConfig | None = None): super().__init__(config, head_config) self.nmers = config.nmers - self.bos_token_id = config.bos_token_id - self.eos_token_id = config.eos_token_id - self.pad_token_id = config.pad_token_id if head_config is not None and head_config.output_name is not None: self.output_name = head_config.output_name # Do not pass bos_token_id and eos_token_id to unfold_kmer_embeddings @@ -170,46 +135,17 @@ def forward( # type: ignore[override] # pylint: disable=arguments-renamed output_name: The name of the output to use. Defaults to `self.output_name`. """ - if attention_mask is None: - if isinstance(input_ids, NestedTensor): - input_ids, attention_mask = input_ids.tensor, input_ids.mask - else: - if input_ids is None: - raise ValueError( - f"Either attention_mask or input_ids must be provided for {self.__class__.__name__} to work." - ) - if self.pad_token_id is None: - raise ValueError( - f"pad_token_id must be provided when attention_mask is not passed to {self.__class__.__name__}." - ) - attention_mask = input_ids.ne(self.pad_token_id) - if isinstance(outputs, (Mapping, ModelOutput)): output = outputs[output_name or self.output_name] elif isinstance(outputs, tuple): output = outputs[0] else: raise ValueError(f"Unsupported type for outputs: {type(outputs)}") - output = output * attention_mask.unsqueeze(-1) - # remove cls token embeddings - if self.bos_token_id is not None: - output = output[..., 1:, :] - attention_mask = attention_mask[..., 1:] - if input_ids is not None: - input_ids = input_ids[..., 1:] - # remove eos token embeddings - if self.eos_token_id is not None: - if input_ids is not None: - eos_mask = input_ids.ne(self.eos_token_id).to(output) - input_ids = input_ids[..., :-1] - else: - last_valid_indices = attention_mask.sum(dim=-1) - seq_length = attention_mask.size(-1) - eos_mask = torch.arange(seq_length, device=output.device) == last_valid_indices.unsqueeze(1) - output = output * eos_mask[:, :, None] - output = output[..., :-1, :] - attention_mask = attention_mask[..., 1:] + if attention_mask is None: + attention_mask = self._get_attention_mask(input_ids) + output = output * attention_mask.unsqueeze(-1) + output, attention_mask, _ = self._remove_special_tokens(output, attention_mask, input_ids) output = self.unfold_kmer_embeddings(output, attention_mask) return super().forward(output, labels, **kwargs) diff --git a/multimolecule/module/heads/utils.py b/multimolecule/module/heads/utils.py index c5937c6d..cc1f3654 100644 --- a/multimolecule/module/heads/utils.py +++ b/multimolecule/module/heads/utils.py @@ -119,32 +119,3 @@ def unfold_kmer_embeddings( embedding = torch.cat([embedding, tensor[seq_len - 1][None, :]]) output[index, : seq_len + nmers - 1] = embedding return output - - -def rotate_half(x): - x1, x2 = x.chunk(2, dim=-1) - return torch.cat((-x2, x1), dim=-1) - - -def apply_rotary_pos_emb(x, cos, sin): - cos = cos[:, :, : x.shape[-2], :] - sin = sin[:, :, : x.shape[-2], :] - - return (x * cos) + (rotate_half(x) * sin) - - -def symmetrize(x): - "Make layer symmetric in final two dimensions, used for contact prediction." - return x + x.transpose(-1, -2) - - -def average_product_correct(x): - "Perform average product correct, used for contact prediction." - a1 = x.sum(-1, keepdims=True) - a2 = x.sum(-2, keepdims=True) - a12 = x.sum((-1, -2), keepdims=True) - - avg = a1 * a2 - avg.div_(a12) # in-place to reduce memory - normalized = x - avg - return normalized diff --git a/multimolecule/module/model.py b/multimolecule/module/model.py new file mode 100644 index 00000000..c190f34a --- /dev/null +++ b/multimolecule/module/model.py @@ -0,0 +1,160 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from chanfig import FlatDict +from danling import NestedTensor +from torch import Tensor, nn + +from .backbones import BackboneRegistry +from .heads import HeadRegistry +from .necks import NeckRegistry +from .registry import ModelRegistry + + +@ModelRegistry.register(default=True) +class MultiMoleculeModel(nn.Module): + + whitelist: list[str] = ["weight", "conv", "fc"] + blacklist: list[str] = ["bias", "bn", "norm"] + + def __init__( + self, + backbone: dict, + heads: dict, + neck: dict | None = None, + max_length: int = 1024, + truncation: bool = False, + probing: bool = False, + ): + super().__init__() + + # Backbone + self.backbone = BackboneRegistry.build(**backbone) + backbone = self.backbone.config + out_channels = self.backbone.out_channels + + # Neck + if neck: + num_discrete = self.backbone.num_discrete + num_continuous = self.backbone.num_continuous + embed_dim = self.backbone.sequence.config.hidden_size + attention_heads = self.backbone.sequence.config.num_attention_heads + neck.update( + { + "num_discrete": num_discrete, + "num_continuous": num_continuous, + "embed_dim": embed_dim, + "attention_heads": attention_heads, + "max_length": max_length, + "truncation": truncation, + } + ) + self.neck = NeckRegistry.build(**neck) + out_channels = self.neck.out_channels + else: + self.neck = None + + # Heads + for head in heads.values(): + if "hidden_size" not in head or head["hidden_size"] is None: + head["hidden_size"] = out_channels + self.heads = nn.ModuleDict({name: HeadRegistry.build(backbone, head) for name, head in heads.items()}) + if any(getattr(h, "requires_attention", False) for h in self.heads.values()): + self.backbone.sequence.config.output_attentions = True + + if probing: + for param in self.backbone.parameters(): + param.requires_grad = False + + def forward( + self, + sequence: NestedTensor | Tensor, + discrete: Tensor | None = None, + continuous: Tensor | None = None, + dataset: str | None = None, + **labels: NestedTensor | Tensor, + ) -> FlatDict: + ret = FlatDict() + output, _ = self.backbone(sequence, discrete, continuous) + if self.neck is not None: + output = self.neck(**output) + for task, label in labels.items(): + ret[task] = self.heads[task](output, input_ids=sequence, labels=label) + return ret + + def trainable_parameters( + self, + lr: float, + weight_decay: float, + pretrained_ratio: float = 1e-2, + whitelist: list[str] | None = None, + blacklist: list[str] | None = None, + ) -> list[dict]: + """ + Prepare parameter groups with specific optimization settings. + + Args: + lr: Base learning rate. + weight_decay: Base weight decay. + pretrained_ratio: Scaling factor for backbone's learning rate and weight decay. + whitelist: List of parameter name substrings to include in weight decay. + blacklist: List of parameter name substrings to exclude from weight decay. + + Returns: + Parameter groups for the optimizer. + """ + + whitelist = whitelist or self.whitelist + blacklist = blacklist or self.blacklist + trainable_parameters: list[dict] = [] + + def categorize_parameters( + module: nn.Module, base_lr: float, base_wd: float, lr_ratio: float = 1.0 + ) -> list[dict]: + decay_params = [] + no_decay_params = [] + for name, param in module.named_parameters(): + if not param.requires_grad: + continue + if any(w in name for w in whitelist): + decay_params.append(param) + elif any(b in name for b in blacklist): + no_decay_params.append(param) + else: + decay_params.append(param) + param_groups = [] + if decay_params: + param_groups.append( + {"params": decay_params, "weight_decay": base_wd * lr_ratio, "lr": base_lr * lr_ratio} + ) + if no_decay_params: + param_groups.append({"params": no_decay_params, "weight_decay": 0.0, "lr": base_lr * lr_ratio}) + return param_groups + + heads_param_groups = categorize_parameters(self.heads, lr, weight_decay) + trainable_parameters.extend(heads_param_groups) + + if isinstance(self.backbone, nn.Module): + backbone_param_groups = categorize_parameters(self.backbone, lr, weight_decay, lr_ratio=pretrained_ratio) + trainable_parameters.extend(backbone_param_groups) + + if isinstance(self.neck, nn.Module): + neck_param_groups = categorize_parameters(self.neck, lr, weight_decay) + trainable_parameters.extend(neck_param_groups) + + return trainable_parameters diff --git a/multimolecule/module/necks/__init__.py b/multimolecule/module/necks/__init__.py new file mode 100644 index 00000000..e8f1f7e2 --- /dev/null +++ b/multimolecule/module/necks/__init__.py @@ -0,0 +1,21 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .bert import BERTNeck +from .cat import CatNeck +from .registry import NeckRegistry + +__all__ = ["NeckRegistry", "CatNeck", "BERTNeck"] diff --git a/multimolecule/module/necks/bert.py b/multimolecule/module/necks/bert.py new file mode 100644 index 00000000..1360f0dd --- /dev/null +++ b/multimolecule/module/necks/bert.py @@ -0,0 +1,102 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +import torch +from chanfig import FlatDict +from danling.modules import TransformerEncoder, TransformerEncoderLayer +from torch import Tensor, nn + +from .registry import NeckRegistry + +MAX_LENGTH = 1024 + + +@NeckRegistry.register("bert") +class BERTNeck(nn.Module): + def __init__( # pylint: disable=keyword-arg-before-vararg + self, + num_discrete: int, + num_continuous: int, + embed_dim: int, + attention_heads: int, + num_layers: int = 6, + max_length: int | None = None, + truncation: bool = False, + dropout: float = 0.1, + *args, + **kwargs, + ): + super().__init__() + self.cls_token_dis = nn.Parameter(torch.zeros(embed_dim)) + self.cls_token_con = nn.Parameter(torch.zeros(embed_dim)) + if max_length is None: + if truncation: + max_length = MAX_LENGTH + 1 + num_discrete + 1 + num_continuous + else: + max_length = MAX_LENGTH * 4 + 1 + num_discrete + 1 + num_continuous + self.max_length = max_length + self.pos_embed = nn.Parameter(torch.zeros(1, self.max_length, embed_dim)) + bert_layer = TransformerEncoderLayer( + embed_dim, attention_heads, *args, dropout=dropout, attn_dropout=dropout, ffn_dropout=dropout, **kwargs + ) + self.bert = TransformerEncoder(bert_layer, num_layers) + self.out_channels = embed_dim + nn.init.normal_(self.pos_embed, std=0.02) + nn.init.trunc_normal_(self.cls_token_dis, std=0.2) + nn.init.trunc_normal_(self.cls_token_con, std=0.2) + + def forward( + self, + cls_token: Tensor | None = None, + all_tokens: Tensor | None = None, + discrete: Tensor | None = None, + continuous: Tensor | None = None, + ) -> FlatDict: + ret = FlatDict() + if cls_token is not None: + ret["cls_token"] = self._forward(cls_token, discrete, continuous) + if all_tokens is not None: + ret["all_tokens"] = self._forward(all_tokens, discrete, continuous) + return ret + + def _forward( + self, + sequence: Tensor, + discrete: Tensor | None = None, + continuous: Tensor | None = None, + ) -> Tensor: + if sequence is None: + raise ValueError("sequence should not be None.") + if sequence.dim() == 2: + sequence = sequence[:, None] + batch_size, seq_len, _ = sequence.shape + output = sequence + if discrete is not None: + cls_token_dis = self.cls_token_dis.expand(batch_size, 1, -1) + output = torch.cat((output, cls_token_dis, discrete), dim=1) + if continuous is not None: + cls_token_con = self.cls_token_con.expand(batch_size, -1)[:, None] + output = torch.cat((output, cls_token_con, continuous), dim=1) + all_len = output.shape[1] + if all_len > self.pos_embed.shape[1]: + raise ValueError("sequence length is out of range.") + output = output + self.pos_embed[:, 0:all_len, :] + output = self.bert(output)[0][:, 0:seq_len, :] + if seq_len == 1: + output = output.squeeze(1) + return output diff --git a/multimolecule/module/necks/cat.py b/multimolecule/module/necks/cat.py new file mode 100644 index 00000000..d5165a92 --- /dev/null +++ b/multimolecule/module/necks/cat.py @@ -0,0 +1,43 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +import torch +from chanfig import FlatDict +from torch import Tensor + +from .registry import NeckRegistry + + +@NeckRegistry.register("cat") +class CatNeck: # pylint: disable=too-few-public-methods + def __init__(self, embed_dim: int): + self.out_channels = embed_dim * 2 + + def __call__( + self, + cls_token: Tensor | None = None, + all_tokens: Tensor | None = None, + discrete: Tensor | None = None, + continuous: Tensor | None = None, + ) -> FlatDict: + ret = FlatDict() + if cls_token is not None: + ret.cls_token = torch.cat(tuple(i for i in (cls_token, discrete, continuous) if i is not None), -1) + if all_tokens is not None: + ret.all_tokens = torch.cat(tuple(i for i in (all_tokens, discrete, continuous) if i is not None), -1) + return ret diff --git a/multimolecule/module/necks/registry.py b/multimolecule/module/necks/registry.py new file mode 100644 index 00000000..c024227c --- /dev/null +++ b/multimolecule/module/necks/registry.py @@ -0,0 +1,21 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from chanfig import Registry + +NeckRegistry = Registry() diff --git a/multimolecule/module/registry.py b/multimolecule/module/registry.py new file mode 100644 index 00000000..b0332463 --- /dev/null +++ b/multimolecule/module/registry.py @@ -0,0 +1,35 @@ +# MultiMolecule +# Copyright (C) 2024-Present MultiMolecule + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from chanfig import Registry as Registry_ +from torch import nn + +from .backbones import BackboneRegistry +from .backbones.sequences import SequenceRegistry +from .heads import HeadRegistry +from .necks import NeckRegistry + + +class Registry(Registry_): # pylint: disable=too-few-public-methods + def build(self, *args, **kwargs) -> nn.Module: + return super().build(*args, **kwargs) + + +ModelRegistry = Registry() + +__all__ = ["ModelRegistry", "BackboneRegistry", "SequenceRegistry", "NeckRegistry", "HeadRegistry"] diff --git a/multimolecule/tasks/task.py b/multimolecule/tasks/task.py index e2473ab0..5d435f83 100644 --- a/multimolecule/tasks/task.py +++ b/multimolecule/tasks/task.py @@ -34,9 +34,8 @@ class TaskType(StrEnum): class TaskLevel(StrEnum): Sequence = auto() - Nucleotide = auto() + Token = auto() Contact = auto() - # Token = auto() @dataclass diff --git a/pyproject.toml b/pyproject.toml index 972b1d8a..ba202944 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ keywords = [ "Protein", "RNA", ] -license = {file = "LICENSE"} +license = {file = "LICENSE.md"} maintainers = [ {name = "Zhiyuan Chen", email = "this@zyc.ai"}, ] @@ -46,8 +46,9 @@ dynamic = [ dependencies = [ "accelerate", "chanfig>=0.0.105", - "danling>=0.3.11", + "danling[torch]>=0.3.11", "datasets", + 'StrEnum; python_version < "3.11"', "torch", "transformers", ] diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index 5ddfb3c4..13597fa2 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -21,7 +21,7 @@ import pytest import torch -from multimolecule import Dataset, Task, TaskLevel, TaskType +from multimolecule import Dataset, Task, TaskLevel, TaskType, defaults class TestRNADataset: @@ -31,28 +31,34 @@ class TestRNADataset: @pytest.mark.parametrize("preprocess", [True, False]) def test_5utr(self, preprocess: bool): + defaults.SEQUENCE_COL_NAME = "input_ids" file = os.path.join(self.root, "5utr.csv") - dataset = Dataset(file, split="train", pretrained=self.pretrained, preprocess=preprocess, auto_rename_cols=True) + dataset = Dataset( + file, split="train", pretrained=self.pretrained, preprocess=preprocess, auto_rename_sequence_col=True + ) + defaults.SEQUENCE_COL_NAME = "sequence" task = Task(type=TaskType.Regression, level=TaskLevel.Sequence) elem = dataset[0] assert isinstance(elem["input_ids"], torch.LongTensor) - assert isinstance(elem["labels"], torch.FloatTensor) + assert isinstance(elem["label"], torch.FloatTensor) batch = dataset[list(range(3))] assert isinstance(batch["input_ids"], dl.NestedTensor) - assert isinstance(batch["labels"], torch.FloatTensor) - assert dataset.tasks["labels"] == task + assert isinstance(batch["label"], torch.FloatTensor) + assert dataset.tasks["label"] == task assert not dataset.discrete_map @pytest.mark.parametrize("preprocess", [True, False]) def test_ncrna(self, preprocess: bool): file = os.path.join(self.root, "ncrna.csv") - dataset = Dataset(file, split="train", pretrained=self.pretrained, preprocess=preprocess, auto_rename_cols=True) + dataset = Dataset( + file, split="train", pretrained=self.pretrained, preprocess=preprocess, auto_rename_label_col=True + ) task = Task(type=TaskType.MultiClass, level=TaskLevel.Sequence, num_labels=13) elem = dataset[0] - assert isinstance(elem["input_ids"], torch.LongTensor) + assert isinstance(elem["sequence"], torch.LongTensor) assert isinstance(elem["labels"], torch.LongTensor) batch = dataset[list(range(3))] - assert isinstance(batch["input_ids"], dl.NestedTensor) + assert isinstance(batch["sequence"], dl.NestedTensor) assert isinstance(batch["labels"], torch.LongTensor) assert dataset.tasks["labels"] == task assert not dataset.discrete_map @@ -126,7 +132,7 @@ def test_spliceai(self, preprocess: bool): feature_cols=feature_cols, label_cols=label_cols, ) - task = Task(type=TaskType.Binary, level=TaskLevel.Nucleotide, num_labels=1) + task = Task(type=TaskType.Binary, level=TaskLevel.Token, num_labels=1) elem = dataset[0] assert isinstance(elem["sequence"], torch.LongTensor) assert isinstance(elem["splice_ai"], torch.LongTensor) @@ -175,20 +181,18 @@ def test_rna_task_recognition_json(self): assert dataset.tasks["sequence_regression"] == Task( type=TaskType.Regression, level=TaskLevel.Sequence, num_labels=1 ) - assert dataset.tasks["nucleotide_binary"] == Task( - type=TaskType.Binary, level=TaskLevel.Nucleotide, num_labels=1 - ) + assert dataset.tasks["nucleotide_binary"] == Task(type=TaskType.Binary, level=TaskLevel.Token, num_labels=1) assert dataset.tasks["nucleotide_multiclass"] == Task( - type=TaskType.MultiClass, level=TaskLevel.Nucleotide, num_labels=5 + type=TaskType.MultiClass, level=TaskLevel.Token, num_labels=5 ) assert dataset.tasks["nucleotide_multilabel"] == Task( - type=TaskType.MultiLabel, level=TaskLevel.Nucleotide, num_labels=5 + type=TaskType.MultiLabel, level=TaskLevel.Token, num_labels=5 ) assert dataset.tasks["nucleotide_multireg"] == Task( - type=TaskType.Regression, level=TaskLevel.Nucleotide, num_labels=5 + type=TaskType.Regression, level=TaskLevel.Token, num_labels=5 ) assert dataset.tasks["nucleotide_regression"] == Task( - type=TaskType.Regression, level=TaskLevel.Nucleotide, num_labels=1 + type=TaskType.Regression, level=TaskLevel.Token, num_labels=1 ) assert dataset.tasks["contact_binary"] == Task(type=TaskType.Binary, level=TaskLevel.Contact, num_labels=1) assert dataset.tasks["contact_multiclass"] == Task(