diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..02c381e --- /dev/null +++ b/.gitignore @@ -0,0 +1,145 @@ +### Data ### +data/ +erc/ + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# End of https://www.toptal.com/developers/gitignore/api/python \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md new file mode 100644 index 0000000..89ecc5a --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Time Series Generation \ No newline at end of file diff --git a/Timeseries_clustering.ipynb b/Timeseries_clustering.ipynb new file mode 100644 index 0000000..7f59b46 --- /dev/null +++ b/Timeseries_clustering.ipynb @@ -0,0 +1,1635 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Timeseries clustering\n", + "\n", + "Time series clustering is to partition time series data into groups based on similarity or distance, so that time series in the same cluster are similar.\n", + "\n", + "Methodology followed:\n", + "* Use Variational Recurrent AutoEncoder (VRAE) for dimensionality reduction of the timeseries\n", + "* To visualize the clusters, PCA and t-sne are used\n", + "\n", + "Paper:\n", + "https://arxiv.org/pdf/1412.6581.pdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Contents\n", + "\n", + "0. [Load data and preprocess](#Load-data-and-preprocess)\n", + "1. [Initialize VRAE object](#Initialize-VRAE-object)\n", + "2. [Fit the model onto dataset](#Fit-the-model-onto-dataset)\n", + "3. [Transform the input timeseries to encoded latent vectors](#Transform-the-input-timeseries-to-encoded-latent-vectors)\n", + "4. [Save the model to be fetched later](#Save-the-model-to-be-fetched-later)\n", + "5. [Visualize using PCA and tSNE](#Visualize-using-PCA-and-tSNE)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import required modules" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from model.org_vrae import VRAE\n", + "from model.utils import *\n", + "import numpy as np\n", + "import torch\n", + "\n", + "import plotly\n", + "from torch.utils.data import DataLoader, TensorDataset\n", + "plotly.offline.init_notebook_mode()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "dload = './model_dir' #download directory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hyper parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "hidden_size = 90\n", + "hidden_layer_depth = 1\n", + "latent_length = 20\n", + "batch_size = 32\n", + "learning_rate = 0.0005\n", + "n_epochs = 1\n", + "dropout_rate = 0.2\n", + "optimizer = 'Adam' # options: ADAM, SGD\n", + "cuda = True # options: True, False\n", + "print_every=30\n", + "clip = True # options: True, False\n", + "max_grad_norm=5\n", + "loss = 'MSELoss' # options: SmoothL1Loss, MSELoss\n", + "block = 'LSTM' # options: LSTM, GRU" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data and preprocess" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_val, y_train, y_val = open_data('data', ratio_train=0.9)\n", + "\n", + "num_classes = len(np.unique(y_train))\n", + "base = np.min(y_train) # Check if data is 0-based\n", + "if base != 0:\n", + " y_train -= base\n", + "y_val -= base" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(8549, 140, 1)\n" + ] + } + ], + "source": [ + "print(X_train.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = TensorDataset(torch.from_numpy(X_train))\n", + "test_dataset = TensorDataset(torch.from_numpy(X_val))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Fetch `sequence_length` from dataset**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "140" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_length = X_train.shape[1]\n", + "sequence_length" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Fetch `number_of_features` from dataset**\n", + "\n", + "This config corresponds to number of input features" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "number_of_features = X_train.shape[2]\n", + "number_of_features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize VRAE object\n", + "\n", + "VRAE inherits from `sklearn.base.BaseEstimator` and overrides `fit`, `transform` and `fit_transform` functions, similar to sklearn modules" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py:65: UserWarning:\n", + "\n", + "dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.2 and num_layers=1\n", + "\n", + "/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:42: UserWarning:\n", + "\n", + "size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", + "\n" + ] + } + ], + "source": [ + "vrae = VRAE(sequence_length=sequence_length,\n", + " number_of_features = number_of_features,\n", + " hidden_size = hidden_size, \n", + " hidden_layer_depth = hidden_layer_depth,\n", + " latent_length = latent_length,\n", + " batch_size = batch_size,\n", + " learning_rate = learning_rate,\n", + " n_epochs = n_epochs,\n", + " dropout_rate = dropout_rate,\n", + " optimizer = optimizer, \n", + " cuda = cuda,\n", + " print_every=print_every, \n", + " clip=clip, \n", + " max_grad_norm=max_grad_norm,\n", + " loss = loss,\n", + " block = block,\n", + " dload = dload)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fit the model onto dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fit result\n", + "torch.Size([32, 140, 1])\n", + "Epoch: 0\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 30, loss = 4126.4561, recon_loss = 4126.3960, kl_loss = 0.0602\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 60, loss = 3002.3113, recon_loss = 2999.1250, kl_loss = 3.1863\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 90, loss = 2697.9019, recon_loss = 2694.5222, kl_loss = 3.3796\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 120, loss = 2783.7349, recon_loss = 2780.4021, kl_loss = 3.3327\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 150, loss = 2799.6096, recon_loss = 2796.6382, kl_loss = 2.9714\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 180, loss = 2885.5581, recon_loss = 2882.9241, kl_loss = 2.6341\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 210, loss = 2366.8254, recon_loss = 2364.6108, kl_loss = 2.2146\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Batch 240, loss = 2797.3643, recon_loss = 2795.4651, kl_loss = 1.8991\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "Average loss: 2893.1905\n" + ] + } + ], + "source": [ + "vrae.fit(train_dataset)\n", + "\n", + "#If the model has to be saved, with the learnt parameters use:\n", + "# vrae.fit(dataset, save = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Transform the input timeseries to encoded latent vectors" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([140, 32, 1])\n", + "--------------------------\n" + ] + } + ], + "source": [ + "z_run = vrae.transform(test_dataset)\n", + "\n", + "#If the latent vectors have to be saved, pass the parameter `save`\n", + "# z_run = vrae.transform(dataset, save = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the model to be fetched later" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "vrae.save('vrae.pth')\n", + "\n", + "# To load a presaved model, execute:\n", + "# vrae.load('vrae.pth')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git "a/VRAE_\354\230\210\354\213\234.ipynb" "b/VRAE_\354\230\210\354\213\234.ipynb" new file mode 100644 index 0000000..c88052c --- /dev/null +++ "b/VRAE_\354\230\210\354\213\234.ipynb" @@ -0,0 +1,656 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Contents\n", + "\n", + "0. [Load data and preprocess](#Load-data-and-preprocess)\n", + "1. [Initialize VRAE object](#Initialize-VRAE-object)\n", + "2. [Fit the model onto dataset](#Fit-the-model-onto-dataset)\n", + "3. [Transform the input timeseries to encoded latent vectors](#Transform-the-input-timeseries-to-encoded-latent-vectors)\n", + "4. [Save the model to be fetched later](#Save-the-model-to-be-fetched-later)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import required modules" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from model.vrae import VRAE\n", + "from model.utils import *\n", + "import numpy as np\n", + "import torch\n", + "from torch.utils.data import DataLoader, Dataset\n", + "from tqdm.notebook import trange\n", + "import tqdm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "dload = './saved_model' #download directory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hyper parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data and preprocess\n", + "- `folder` : data location\n", + "- `cols_to_remove` : generation 수행하지 않을 column 제거" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**TODO : 해당 변수에 대한 처리를 어떻게 해줘야하는가 확인 작업이 필요함**\n", + "\n", + "~~~\n", + "YYYYMMDD : 년월일\n", + "HHMMSS : 시분초\n", + "MNG_NO : 장비번호\n", + "IF_IDX : 회선 index\n", + "~~~\n", + "\n", + "- 현재는 분석의 편의를 위해 ['YYYYMMDD', 'HHMMSS']만 제거해줌" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(23195128, 56)\n" + ] + } + ], + "source": [ + "# params\n", + "folder = 'data'\n", + "cols_to_remove = ['YYYYMMDD', 'HHMMSS']\n", + "\n", + "# load data\n", + "df_total = load_data(folder, cols_to_remove)\n", + "\n", + "# shape\n", + "print(df_total.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class HamonDataset(Dataset):\n", + " def __init__(self, data, window, stride):\n", + " self.data = torch.Tensor(data)\n", + " self.window = window\n", + " \n", + " def __len__(self):\n", + " return len(self.data) - self.window \n", + " \n", + " def __getitem__(self, index):\n", + " x_index = index*self.window\n", + " x = self.data[x_index:x_index+self.window]\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "data = df_total\n", + "stride = 10\n", + "window = 100" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<__main__.HamonDataset at 0x7f7cbaa3f940>" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dataset = HamonDataset(data, window, stride)\n", + "train_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([100, 56])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dataset[0].shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Fetch `sequence_length` from dataset**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "100" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_length = train_dataset[0].shape[0]\n", + "sequence_length" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Fetch `number_of_features` from dataset**\n", + "\n", + "This config corresponds to number of input features" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "56" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "number_of_features = train_dataset[0].shape[1]\n", + "number_of_features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "n_epochs = 1\n", + "hidden_size = 90\n", + "hidden_layer_depth = 1\n", + "latent_length = 20\n", + "batch_size = 32\n", + "learning_rate = 0.0005\n", + "dropout_rate = 0.2\n", + "optimizer = 'Adam' # options: ADAM, SGD\n", + "cuda = True # options: True, False\n", + "print_every=30\n", + "clip = True # options: True, False\n", + "max_grad_norm=5\n", + "loss = 'MSELoss' # options: SmoothL1Loss, MSELoss\n", + "block = 'LSTM' # options: LSTM, GRU" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = DataLoader(dataset = train_dataset,\n", + " batch_size = batch_size,\n", + " shuffle = False,\n", + " drop_last=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[2.7220e+03, 1.2400e+02, 1.8431e+05, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.7220e+03, 1.2400e+02, 3.8349e+05, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.7220e+03, 1.2400e+02, 2.3519e+05, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " ...,\n", + " [2.8500e+03, 1.2400e+02, 2.3200e+02, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00]],\n", + "\n", + " [[2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " ...,\n", + " [2.8630e+03, 1.2400e+02, 1.8664e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8630e+03, 1.2400e+02, 1.9056e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8630e+03, 1.2400e+02, 1.8104e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00]],\n", + "\n", + " [[2.8630e+03, 1.2400e+02, 1.8096e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8630e+03, 1.2400e+02, 1.8640e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8630e+03, 1.2400e+02, 1.9448e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " ...,\n", + " [2.8730e+03, 1.2400e+02, 3.3920e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8730e+03, 1.2400e+02, 3.4480e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [2.8730e+03, 1.2400e+02, 3.3840e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00]],\n", + "\n", + " ...,\n", + "\n", + " [[3.7730e+03, 1.2400e+02, 2.0880e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7730e+03, 1.2400e+02, 1.9360e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7730e+03, 1.2400e+02, 1.9840e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " ...,\n", + " [3.7810e+03, 1.2400e+02, 1.7760e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7810e+03, 1.2400e+02, 1.6800e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7810e+03, 1.2400e+02, 1.7600e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00]],\n", + "\n", + " [[3.7820e+03, 1.2400e+02, 5.1096e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7820e+03, 1.2400e+02, 1.2566e+06, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7820e+03, 1.2400e+02, 5.2016e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " ...,\n", + " [3.7900e+03, 1.2400e+02, 1.6496e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7900e+03, 1.2400e+02, 1.6416e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7900e+03, 1.2400e+02, 1.6776e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00]],\n", + "\n", + " [[3.7900e+03, 1.2400e+02, 1.6032e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7900e+03, 1.2400e+02, 1.6528e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7900e+03, 1.2400e+02, 1.7032e+04, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " ...,\n", + " [3.7980e+03, 1.2400e+02, 6.1760e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7980e+03, 1.2400e+02, 6.1920e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00],\n", + " [3.7980e+03, 1.2400e+02, 5.9840e+03, ..., 0.0000e+00,\n", + " 0.0000e+00, 0.0000e+00]]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = iter(train_loader).next()\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([32, 100, 56])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize VRAE object\n", + "\n", + "VRAE inherits from `sklearn.base.BaseEstimator` and overrides `fit`, `transform` and `fit_transform` functions, similar to sklearn modules" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "vrae = VRAE(sequence_length=sequence_length,\n", + " number_of_features = number_of_features,\n", + " hidden_size = hidden_size, \n", + " hidden_layer_depth = hidden_layer_depth,\n", + " latent_length = latent_length,\n", + " batch_size = batch_size,\n", + " learning_rate = learning_rate,\n", + " n_epochs = n_epochs,\n", + " dropout_rate = dropout_rate,\n", + " optimizer = optimizer, \n", + " cuda = cuda,\n", + " print_every=print_every, \n", + " clip=clip, \n", + " max_grad_norm=max_grad_norm,\n", + " loss = loss,\n", + " block = block,\n", + " dload = dload)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fit the model onto dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[2.7220e+03, 1.2400e+02, 1.8431e+05, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.7220e+03, 1.2400e+02, 3.8349e+05, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.7220e+03, 1.2400e+02, 2.3519e+05, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " ...,\n", + " [2.8500e+03, 1.2400e+02, 2.3200e+02, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00]])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dataset[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader_test = DataLoader(dataset = train_dataset[0],\n", + " batch_size = 32,\n", + " shuffle = False,\n", + " drop_last=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([32, 56])\n" + ] + } + ], + "source": [ + "tmp = iter(train_loader_test).next()\n", + "print(tmp.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fit result\n", + "<__main__.HamonDataset object at 0x7f7cbaa3f940>\n", + "tensor([[2.7220e+03, 1.2400e+02, 1.8431e+05, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.7220e+03, 1.2400e+02, 3.8349e+05, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.7220e+03, 1.2400e+02, 2.3519e+05, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " ...,\n", + " [2.8500e+03, 1.2400e+02, 2.3200e+02, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00],\n", + " [2.8500e+03, 1.2400e+02, 2.4000e+02, ..., 0.0000e+00, 0.0000e+00,\n", + " 0.0000e+00]])\n", + "torch.Size([32, 100, 56])\n", + "Epoch: 0\n", + "--------------------------\n", + "DEBUGGING\n", + "torch.Size([32, 100, 56])\n", + "--------------------------\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "Expected hidden[0] size (1, 32, 90), got [1, 100, 90]", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvrae\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_dataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#If the model has to be saved, with the learnt parameters use:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# vrae.fit(dataset, save = True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/repo/projects/timeseries-generation/model/vrae.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, dataset, save)\u001b[0m\n\u001b[1;32m 352\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Epoch: %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 354\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 355\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_fitted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/repo/projects/timeseries-generation/model/vrae.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, train_loader)\u001b[0m\n\u001b[1;32m 308\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecon_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkl_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/repo/projects/timeseries-generation/model/vrae.py\u001b[0m in \u001b[0;36mcompute_loss\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mVariable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequires_grad\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 289\u001b[0;31m \u001b[0mx_decoded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 290\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecon_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkl_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_decoded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_fn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1100\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1101\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1103\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1104\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/repo/projects/timeseries-generation/model/vrae.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0mcell_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0mlatent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlmbd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0mx_decoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlatent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mx_decoded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlatent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1100\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1101\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1103\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1104\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/repo/projects/timeseries-generation/model/vrae.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, latent)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLSTM\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mh_0\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mh_state\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhidden_layer_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m \u001b[0mdecoder_output\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecoder_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mh_0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 142\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGRU\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mh_0\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mh_state\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhidden_layer_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1100\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1101\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1103\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1104\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, hx)\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[0mhx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpermute_hidden\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msorted_indices\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 688\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 689\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_forward_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_sizes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 690\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbatch_sizes\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 691\u001b[0m result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py\u001b[0m in \u001b[0;36mcheck_forward_args\u001b[0;34m(self, input, hidden, batch_sizes)\u001b[0m\n\u001b[1;32m 632\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_sizes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 633\u001b[0m self.check_hidden_size(hidden[0], self.get_expected_hidden_size(input, batch_sizes),\n\u001b[0;32m--> 634\u001b[0;31m 'Expected hidden[0] size {}, got {}')\n\u001b[0m\u001b[1;32m 635\u001b[0m self.check_hidden_size(hidden[1], self.get_expected_cell_size(input, batch_sizes),\n\u001b[1;32m 636\u001b[0m 'Expected hidden[1] size {}, got {}')\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py\u001b[0m in \u001b[0;36mcheck_hidden_size\u001b[0;34m(self, hx, expected_hidden_size, msg)\u001b[0m\n\u001b[1;32m 224\u001b[0m msg: str = 'Expected hidden size {}, got {}') -> None:\n\u001b[1;32m 225\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mexpected_hidden_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 226\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpected_hidden_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcheck_forward_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_sizes\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Expected hidden[0] size (1, 32, 90), got [1, 100, 90]" + ] + } + ], + "source": [ + "vrae.fit(train_dataset)\n", + "\n", + "#If the model has to be saved, with the learnt parameters use:\n", + "# vrae.fit(dataset, save = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Transform the input timeseries to encoded latent vectors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "z_run = vrae.transform(test_dataset)\n", + "\n", + "#If the latent vectors have to be saved, pass the parameter `save`\n", + "# z_run = vrae.transform(dataset, save = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the model to be fetched later" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vrae.save('vrae.pth')\n", + "\n", + "# To load a presaved model, execute:\n", + "# vrae.load('vrae.pth')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualize using PCA and tSNE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_clustering(z_run, y_val, engine='matplotlib', download = False)\n", + "\n", + "# If plotly to be used as rendering engine, uncomment below line\n", + "#plot_clustering(z_run, y_val, engine='plotly', download = False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/etc/Hamon_Data_concat.ipynb b/etc/Hamon_Data_concat.ipynb new file mode 100644 index 0000000..9a5271b --- /dev/null +++ b/etc/Hamon_Data_concat.ipynb @@ -0,0 +1,1040 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "id": "08f72edc", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "89b19595", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/repo/projects/timeseries-generation/etc'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b4f6150c", + "metadata": {}, + "outputs": [], + "source": [ + "# data_path = '/repo/projects/HAI_timeseries/Hamon_dataset/*.csv'\n", + "data_path = '../data/*.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2e32eabf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['../data/20210620.csv',\n", + " '../data/20210621.csv',\n", + " '../data/20210622.csv',\n", + " '../data/20210623.csv',\n", + " '../data/20210624.csv',\n", + " '../data/20210625.csv',\n", + " '../data/20210626.csv']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "file_list = glob.glob(data_path)\n", + "file_list.sort()\n", + "file_list" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6ac1b77f", + "metadata": {}, + "outputs": [], + "source": [ + "# 전체 데이터\n", + "df_total = pd.DataFrame()\n", + "\n", + "for i in file_list:\n", + " data = pd.read_csv(i)\n", + " df_total = pd.concat([df_total, data])\n", + "\n", + "df_total = df_total.reset_index(drop = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "010299a0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDDHHMMSSMNG_NOIF_IDXAVG_INBPSMIN_INBPSMAX_INBPSAVG_OUTBPSMIN_OUTBPSMAX_OUTBPS...MAX_INMCASTPPSAVG_OUTMCASTPPSMIN_OUTMCASTPPSMAX_OUTMCASTPPSAVG_INBCASTPPSMIN_INBCASTPPSMAX_INBCASTPPSAVG_OUTBCASTPPSMIN_OUTBCASTPPSMAX_OUTBCASTPPS
02021062002722124184312184312184312448132844813284481328...0000000000
1202106205002722124383488383488383488141062561410625614106256...0000000000
22021062010002722124235192235192235192724388072438807243880...0000000000
32021062015002722124200032200032200032739856873985687398568...0000000000
42021062020002722124211264211264211264539952053995205399520...0000000000
\n", + "

5 rows × 58 columns

\n", + "
" + ], + "text/plain": [ + " YYYYMMDD HHMMSS MNG_NO IF_IDX AVG_INBPS MIN_INBPS MAX_INBPS \\\n", + "0 20210620 0 2722 124 184312 184312 184312 \n", + "1 20210620 500 2722 124 383488 383488 383488 \n", + "2 20210620 1000 2722 124 235192 235192 235192 \n", + "3 20210620 1500 2722 124 200032 200032 200032 \n", + "4 20210620 2000 2722 124 211264 211264 211264 \n", + "\n", + " AVG_OUTBPS MIN_OUTBPS MAX_OUTBPS ... MAX_INMCASTPPS AVG_OUTMCASTPPS \\\n", + "0 4481328 4481328 4481328 ... 0 0 \n", + "1 14106256 14106256 14106256 ... 0 0 \n", + "2 7243880 7243880 7243880 ... 0 0 \n", + "3 7398568 7398568 7398568 ... 0 0 \n", + "4 5399520 5399520 5399520 ... 0 0 \n", + "\n", + " MIN_OUTMCASTPPS MAX_OUTMCASTPPS AVG_INBCASTPPS MIN_INBCASTPPS \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + " MAX_INBCASTPPS AVG_OUTBCASTPPS MIN_OUTBCASTPPS MAX_OUTBCASTPPS \n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_total.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5388f7ca", + "metadata": {}, + "outputs": [], + "source": [ + "cols_to_remove = ['YYYYMMDD', 'HHMMSS']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cd234ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MNG_NOIF_IDXAVG_INBPSMIN_INBPSMAX_INBPSAVG_OUTBPSMIN_OUTBPSMAX_OUTBPSAVG_INPPSMIN_INPPS...MAX_INMCASTPPSAVG_OUTMCASTPPSMIN_OUTMCASTPPSMAX_OUTMCASTPPSAVG_INBCASTPPSMIN_INBCASTPPSMAX_INBCASTPPSAVG_OUTBCASTPPSMIN_OUTBCASTPPSMAX_OUTBCASTPPS
02722124184312184312184312448132844813284481328329329...0000000000
12722124383488383488383488141062561410625614106256546546...0000000000
22722124235192235192235192724388072438807243880424424...0000000000
32722124200032200032200032739856873985687398568371371...0000000000
42722124211264211264211264539952053995205399520388388...0000000000
\n", + "

5 rows × 56 columns

\n", + "
" + ], + "text/plain": [ + " MNG_NO IF_IDX AVG_INBPS MIN_INBPS MAX_INBPS AVG_OUTBPS MIN_OUTBPS \\\n", + "0 2722 124 184312 184312 184312 4481328 4481328 \n", + "1 2722 124 383488 383488 383488 14106256 14106256 \n", + "2 2722 124 235192 235192 235192 7243880 7243880 \n", + "3 2722 124 200032 200032 200032 7398568 7398568 \n", + "4 2722 124 211264 211264 211264 5399520 5399520 \n", + "\n", + " MAX_OUTBPS AVG_INPPS MIN_INPPS ... MAX_INMCASTPPS AVG_OUTMCASTPPS \\\n", + "0 4481328 329 329 ... 0 0 \n", + "1 14106256 546 546 ... 0 0 \n", + "2 7243880 424 424 ... 0 0 \n", + "3 7398568 371 371 ... 0 0 \n", + "4 5399520 388 388 ... 0 0 \n", + "\n", + " MIN_OUTMCASTPPS MAX_OUTMCASTPPS AVG_INBCASTPPS MIN_INBCASTPPS \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + " MAX_INBCASTPPS AVG_OUTBCASTPPS MIN_OUTBCASTPPS MAX_OUTBCASTPPS \n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + "[5 rows x 56 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_total.drop(cols_to_remove,axis=1).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "67749b8b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDDHHMMSSMNG_NOIF_IDXAVG_INBPSMIN_INBPSMAX_INBPSAVG_OUTBPSMIN_OUTBPSMAX_OUTBPS...MAX_INMCASTPPSAVG_OUTMCASTPPSMIN_OUTMCASTPPSMAX_OUTMCASTPPSAVG_INBCASTPPSMIN_INBCASTPPSMAX_INBCASTPPSAVG_OUTBCASTPPSMIN_OUTBCASTPPSMAX_OUTBCASTPPS
02021062002722124184312184312184312448132844813284481328...0000000000
1202106205002722124383488383488383488141062561410625614106256...0000000000
22021062010002722124235192235192235192724388072438807243880...0000000000
32021062015002722124200032200032200032739856873985687398568...0000000000
42021062020002722124211264211264211264539952053995205399520...0000000000
\n", + "

5 rows × 58 columns

\n", + "
" + ], + "text/plain": [ + " YYYYMMDD HHMMSS MNG_NO IF_IDX AVG_INBPS MIN_INBPS MAX_INBPS \\\n", + "0 20210620 0 2722 124 184312 184312 184312 \n", + "1 20210620 500 2722 124 383488 383488 383488 \n", + "2 20210620 1000 2722 124 235192 235192 235192 \n", + "3 20210620 1500 2722 124 200032 200032 200032 \n", + "4 20210620 2000 2722 124 211264 211264 211264 \n", + "\n", + " AVG_OUTBPS MIN_OUTBPS MAX_OUTBPS ... MAX_INMCASTPPS AVG_OUTMCASTPPS \\\n", + "0 4481328 4481328 4481328 ... 0 0 \n", + "1 14106256 14106256 14106256 ... 0 0 \n", + "2 7243880 7243880 7243880 ... 0 0 \n", + "3 7398568 7398568 7398568 ... 0 0 \n", + "4 5399520 5399520 5399520 ... 0 0 \n", + "\n", + " MIN_OUTMCASTPPS MAX_OUTMCASTPPS AVG_INBCASTPPS MIN_INBCASTPPS \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + " MAX_INBCASTPPS AVG_OUTBCASTPPS MIN_OUTBCASTPPS MAX_OUTBCASTPPS \n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_total.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "dce7124b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDDHHMMSSMNG_NOIF_IDXAVG_INBPSMIN_INBPSMAX_INBPSAVG_OUTBPSMIN_OUTBPSMAX_OUTBPS...MAX_INMCASTPPSAVG_OUTMCASTPPSMIN_OUTMCASTPPSMAX_OUTMCASTPPSAVG_INBCASTPPSMIN_INBCASTPPSMAX_INBCASTPPSAVG_OUTBCASTPPSMIN_OUTBCASTPPSMAX_OUTBCASTPPS
318750320210626235500263562168561685616856314723147231472...0000000000
318750420210626235500263572155681556815568429204292042920...0000000000
318750520210626235500263582721832072183207218320103256103256103256...0000000000
318750620210626235500263592298562985629856393043930439304...0000000000
318750720210626235500263602161681616816168411604116041160...0000000000
\n", + "

5 rows × 58 columns

\n", + "
" + ], + "text/plain": [ + " YYYYMMDD HHMMSS MNG_NO IF_IDX AVG_INBPS MIN_INBPS MAX_INBPS \\\n", + "3187503 20210626 235500 26356 2 16856 16856 16856 \n", + "3187504 20210626 235500 26357 2 15568 15568 15568 \n", + "3187505 20210626 235500 26358 2 7218320 7218320 7218320 \n", + "3187506 20210626 235500 26359 2 29856 29856 29856 \n", + "3187507 20210626 235500 26360 2 16168 16168 16168 \n", + "\n", + " AVG_OUTBPS MIN_OUTBPS MAX_OUTBPS ... MAX_INMCASTPPS \\\n", + "3187503 31472 31472 31472 ... 0 \n", + "3187504 42920 42920 42920 ... 0 \n", + "3187505 103256 103256 103256 ... 0 \n", + "3187506 39304 39304 39304 ... 0 \n", + "3187507 41160 41160 41160 ... 0 \n", + "\n", + " AVG_OUTMCASTPPS MIN_OUTMCASTPPS MAX_OUTMCASTPPS AVG_INBCASTPPS \\\n", + "3187503 0 0 0 0 \n", + "3187504 0 0 0 0 \n", + "3187505 0 0 0 0 \n", + "3187506 0 0 0 0 \n", + "3187507 0 0 0 0 \n", + "\n", + " MIN_INBCASTPPS MAX_INBCASTPPS AVG_OUTBCASTPPS MIN_OUTBCASTPPS \\\n", + "3187503 0 0 0 0 \n", + "3187504 0 0 0 0 \n", + "3187505 0 0 0 0 \n", + "3187506 0 0 0 0 \n", + "3187507 0 0 0 0 \n", + "\n", + " MAX_OUTBCASTPPS \n", + "3187503 0 \n", + "3187504 0 \n", + "3187505 0 \n", + "3187506 0 \n", + "3187507 0 \n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_total.tail(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "9a33b60a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "22745 25187\n", + "10784 13790\n", + "10751 13762\n", + "20633 13755\n", + "20542 13748\n", + " ... \n", + "14892 612\n", + "2869 533\n", + "26240 533\n", + "15259 455\n", + "26083 237\n", + "Name: MNG_NO, Length: 10686, dtype: int64" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_total['MNG_NO'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ceebd30", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/etc/Hamon_EDA.ipynb b/etc/Hamon_EDA.ipynb new file mode 100644 index 0000000..5a77ff3 --- /dev/null +++ b/etc/Hamon_EDA.ipynb @@ -0,0 +1,1270 @@ +{ + "cells": [ + { + "attachments": { + "a605f057-f69a-4b92-9208-56fa92cada49.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "21770ffe", + "metadata": {}, + "source": [ + "![image.png](attachment:a605f057-f69a-4b92-9208-56fa92cada49.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "617dbb54", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "9b4fead4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/repo/projects/HAI_timeseries/notebook/CES'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "d4518761", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = '/repo/projects/HAI_timeseries/Hamon_dataset/'\n", + "read_file = 'total_df.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "69d77230", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mread_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ], + "source": [ + "df = pd.read_csv(data_path+read_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31cb8a56", + "metadata": {}, + "outputs": [], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a4033a2", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99b9100d", + "metadata": {}, + "outputs": [], + "source": [ + "df['HHMMSS'].value_counts(sort = False).plot.bar(title='My Title')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ad261ce", + "metadata": {}, + "outputs": [], + "source": [ + "df['MNG_NO'].value_counts(sort = False).plot.bar(title='My Title')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fa49b65", + "metadata": {}, + "outputs": [], + "source": [ + "df['MNG_NO'].value_counts(sort = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c98808a", + "metadata": {}, + "outputs": [], + "source": [ + "for col in df.columns:\n", + " print(f'{col} : {len(df[col].unique())}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5a4e9b1", + "metadata": {}, + "outputs": [], + "source": [ + "len(df['MNG_NO'].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8ad2720", + "metadata": {}, + "outputs": [], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43e80a22", + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5dfd123", + "metadata": {}, + "outputs": [], + "source": [ + "df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3689bac", + "metadata": {}, + "outputs": [], + "source": [ + "desc = pd.DataFrame(df.describe())\n", + "desc.to_csv('description.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecc8bd99", + "metadata": {}, + "outputs": [], + "source": [ + "df['YYYYMMDD'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e481efa2", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7575efab", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2212c391", + "metadata": {}, + "outputs": [], + "source": [ + "date_cnt = Counter(df['YYYYMMDD'])\n", + "\n", + "# Change Counter to Dict\n", + "for key, value in date_cnt.items():\n", + " date_cnt[key] = value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "427cde29", + "metadata": {}, + "outputs": [], + "source": [ + "date_cnt.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af6404c5", + "metadata": {}, + "outputs": [], + "source": [ + "date_cnt.values()" + ] + }, + { + "cell_type": "markdown", + "id": "4835a31d", + "metadata": {}, + "source": [ + "## Get Count for Each Date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa3ef41f", + "metadata": {}, + "outputs": [], + "source": [ + "plt.title(\"Bar Chart\")\n", + "\n", + "x = list(date_cnt.keys())\n", + "y = list(date_cnt.values())\n", + "\n", + "xlabel = list(date_cnt.keys())\n", + "\n", + "plt.bar(x, y)\n", + "\n", + "plt.xticks(x, xlabel, rotation = 75)\n", + "\n", + "plt.xlabel(\"date\")\n", + "plt.ylabel(\"cnt\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "096b1bb6", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "cols = list(df.columns)\n", + "cols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f879545b", + "metadata": {}, + "outputs": [], + "source": [ + "vis_cols = []\n", + "\n", + "for col in cols:\n", + " if ('MIN' in col) or ('MAX' in col) or ('AVG' in col):\n", + " i, j = col.split('_')\n", + " vis_cols.append(j)\n", + "\n", + "print(vis_cols)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "199c8d95", + "metadata": {}, + "outputs": [], + "source": [ + "vis_cols = list(set(vis_cols))\n", + "print(vis_cols)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07e3d9bf", + "metadata": {}, + "outputs": [], + "source": [ + "print(cols)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64e8097f", + "metadata": {}, + "outputs": [], + "source": [ + "check_cols = ['BPS', 'DISCARD', 'ERR', 'NUPPS', 'PPS']" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "79a2ad40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[20210620, 20210621, 20210622, 20210623, 20210624, 20210625, 20210626]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates = list(date_cnt.keys())\n", + "dates" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "2d0de5f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDD
020210620
120210620
220210620
320210620
420210620
\n", + "
" + ], + "text/plain": [ + " YYYYMMDD\n", + "0 20210620\n", + "1 20210620\n", + "2 20210620\n", + "3 20210620\n", + "4 20210620" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates_df = pd.DataFrame(df['YYYYMMDD'])\n", + "dates_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5a96b6fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MIN_OUTERRMAX_OUTERRAVG_OUTERR
0000
1000
2000
3000
4000
............
23195123000
23195124000
23195125000
23195126000
23195127000
\n", + "

23195128 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " MIN_OUTERR MAX_OUTERR AVG_OUTERR\n", + "0 0 0 0\n", + "1 0 0 0\n", + "2 0 0 0\n", + "3 0 0 0\n", + "4 0 0 0\n", + "... ... ... ...\n", + "23195123 0 0 0\n", + "23195124 0 0 0\n", + "23195125 0 0 0\n", + "23195126 0 0 0\n", + "23195127 0 0 0\n", + "\n", + "[23195128 rows x 3 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['YYYYMMDD','MIN_OUTERR','MAX_OUTERR','AVG_OUTERR']].iloc[:,1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "856b12e8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "23195123 0\n", + "23195124 0\n", + "23195125 0\n", + "23195126 0\n", + "23195127 0\n", + "Name: MIN_OUTERR, Length: 23195128, dtype: int64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['MIN_OUTERR','MAX_OUTERR','AVG_OUTERR']].iloc[:,0]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "98f07a03", + "metadata": {}, + "outputs": [], + "source": [ + "save_path = '/repo/projects/timeseries_anomaly/EDA/results'" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "15a93a5d", + "metadata": {}, + "outputs": [], + "source": [ + "#vis_cols = ['INERR', 'OUTDROP', 'INNUPPS', 'OUTNUPPS', 'INDISCARD', 'INBCASTPPS']" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "078c6ecb", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "vis_col: INERR\n", + ">>> plotting 20210620\n", + ">>> plotting 20210621\n", + ">>> plotting 20210622\n", + ">>> plotting 20210623\n", + ">>> plotting 20210624\n", + ">>> plotting 20210625\n", + ">>> plotting 20210626\n", + "vis_col: OUTDROP\n", + ">>> plotting 20210620\n", + ">>> plotting 20210621\n", + ">>> plotting 20210622\n", + ">>> plotting 20210623\n", + ">>> plotting 20210624\n", + ">>> plotting 20210625\n", + ">>> plotting 20210626\n", + "vis_col: INNUPPS\n", + ">>> plotting 20210620\n", + ">>> plotting 20210621\n", + ">>> plotting 20210622\n", + ">>> plotting 20210623\n", + ">>> plotting 20210624\n", + ">>> plotting 20210625\n", + ">>> plotting 20210626\n", + "vis_col: OUTNUPPS\n", + ">>> plotting 20210620\n", + ">>> plotting 20210621\n", + ">>> plotting 20210622\n", + ">>> plotting 20210623\n", + ">>> plotting 20210624\n", + ">>> plotting 20210625\n", + ">>> plotting 20210626\n", + "vis_col: INDISCARD\n", + ">>> plotting 20210620\n", + ">>> plotting 20210621\n", + ">>> plotting 20210622\n", + ">>> plotting 20210623\n", + ">>> plotting 20210624\n", + ">>> plotting 20210625\n", + ">>> plotting 20210626\n", + "vis_col: INBCASTPPS\n", + ">>> plotting 20210620\n", + ">>> plotting 20210621\n", + ">>> plotting 20210622\n", + ">>> plotting 20210623\n", + ">>> plotting 20210624\n", + ">>> plotting 20210625\n", + ">>> plotting 20210626\n" + ] + } + ], + "source": [ + "for vis_col in vis_cols:\n", + " print(f'vis_col: {vis_col}')\n", + " tmp_cols = []\n", + " \n", + " for col in cols:\n", + " #print(f'>>>>>col: {col}')\n", + " if vis_col in col:\n", + " tmp_cols.append(col)\n", + " \n", + " #print(tmp_cols)\n", + " #print('--------------------')\n", + " tmp_df = df[tmp_cols]\n", + " \n", + " # make tmp dataframe\n", + " tmp_df = pd.concat([dates_df, tmp_df], axis = 1)\n", + " \n", + " # for dates\n", + " for date in dates:\n", + " print(f'>>> plotting {date}')\n", + " \n", + " # use data at certain date\n", + " date_df = tmp_df[tmp_df['YYYYMMDD']==date].iloc[:,1:]\n", + " \n", + " criteria = ['MAX', 'MIN', 'AVG']\n", + " \n", + " for c in criteria:\n", + " \n", + " if c == 'AVG':\n", + " plot_df = date_df.iloc[:,0]\n", + " elif c == 'MIN':\n", + " plot_df = date_df.iloc[:,1]\n", + " elif c == 'MAX':\n", + " plot_df = date_df.iloc[:,2]\n", + " \n", + " # define fig\n", + " fig = plt.figure()\n", + "\n", + " plot_df.plot(figsize = (12, 6))\n", + "\n", + " plt.title(f'{c}_{vis_col}({date}) ')\n", + "\n", + " plt.savefig(save_path+ f\"/{c}_{vis_col}_{date}.png\", dpi=200)\n", + " \n", + " plt.close('all')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "ee353892", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "vis_col: INERR\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0009929763219236904\n", + ">> MAX(MAX): 103\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0009929763219236904\n", + ">> MIN(MAX): 103\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0009929763219236904\n", + ">> AVG(MAX): 103\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.1070327038198467\n", + ">> MAX(MAX): 9360\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.1070327038198467\n", + ">> MIN(MAX): 9360\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.1070327038198467\n", + ">> AVG(MAX): 9360\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.21545382251356252\n", + ">> MAX(MAX): 12166\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.21545382251356252\n", + ">> MIN(MAX): 12166\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.21545382251356252\n", + ">> AVG(MAX): 12166\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.1899605151255562\n", + ">> MAX(MAX): 20995\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.1899605151255562\n", + ">> MIN(MAX): 20995\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.1899605151255562\n", + ">> AVG(MAX): 20995\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.3504950319532711\n", + ">> MAX(MAX): 87505\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.3504950319532711\n", + ">> MIN(MAX): 87505\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.3504950319532711\n", + ">> AVG(MAX): 87505\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.09246079843196818\n", + ">> MAX(MAX): 16707\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.09246079843196818\n", + ">> MIN(MAX): 16707\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.09246079843196818\n", + ">> AVG(MAX): 16707\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.013125300391402939\n", + ">> MAX(MAX): 3539\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.013125300391402939\n", + ">> MIN(MAX): 3539\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.013125300391402939\n", + ">> AVG(MAX): 3539\n", + "vis_col: OUTDROP\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "vis_col: INNUPPS\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.4232476784349255\n", + ">> MAX(MAX): 230423\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.4232476784349255\n", + ">> MIN(MAX): 230423\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.4232476784349255\n", + ">> AVG(MAX): 230423\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.3255765156290704\n", + ">> MAX(MAX): 4693\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.3255765156290704\n", + ">> MIN(MAX): 4693\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.3255765156290704\n", + ">> AVG(MAX): 4693\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.300698597051876\n", + ">> MAX(MAX): 3564\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.300698597051876\n", + ">> MIN(MAX): 3564\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.300698597051876\n", + ">> AVG(MAX): 3564\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.3138820131438875\n", + ">> MAX(MAX): 6472\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.3138820131438875\n", + ">> MIN(MAX): 6472\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.3138820131438875\n", + ">> AVG(MAX): 6472\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.414565846192937\n", + ">> MAX(MAX): 8387\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.414565846192937\n", + ">> MIN(MAX): 8387\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.414565846192937\n", + ">> AVG(MAX): 8387\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.3606745082676048\n", + ">> MAX(MAX): 3692\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.3606745082676048\n", + ">> MIN(MAX): 3692\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.3606745082676048\n", + ">> AVG(MAX): 3692\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.4019500500077176\n", + ">> MAX(MAX): 4121\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.4019500500077176\n", + ">> MIN(MAX): 4121\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.4019500500077176\n", + ">> AVG(MAX): 4121\n", + "vis_col: OUTNUPPS\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 2065.437395895265\n", + ">> MAX(MAX): 33784710\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 2065.437395895265\n", + ">> MIN(MAX): 33784710\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 2065.437395895265\n", + ">> AVG(MAX): 33784710\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10884.217727320281\n", + ">> MAX(MAX): 45275418\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10884.217727320281\n", + ">> MIN(MAX): 45275418\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10884.217727320281\n", + ">> AVG(MAX): 45275418\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10274.470403238776\n", + ">> MAX(MAX): 34360111\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10274.470403238776\n", + ">> MIN(MAX): 34360111\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10274.470403238776\n", + ">> AVG(MAX): 34360111\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10379.628168063316\n", + ">> MAX(MAX): 49227501\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10379.628168063316\n", + ">> MIN(MAX): 49227501\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10379.628168063316\n", + ">> AVG(MAX): 49227501\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10318.102998850687\n", + ">> MAX(MAX): 46340093\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10318.102998850687\n", + ">> MIN(MAX): 46340093\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10318.102998850687\n", + ">> AVG(MAX): 46340093\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 9422.836515565848\n", + ">> MAX(MAX): 32470807\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 9422.836515565848\n", + ">> MIN(MAX): 32470807\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 9422.836515565848\n", + ">> AVG(MAX): 32470807\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 2315.964770284498\n", + ">> MAX(MAX): 26240727\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 2315.964770284498\n", + ">> MIN(MAX): 26240727\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 2315.964770284498\n", + ">> AVG(MAX): 26240727\n", + "vis_col: INDISCARD\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 11.453699935354793\n", + ">> MAX(MAX): 8403755\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 11.453699935354793\n", + ">> MIN(MAX): 8403755\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 11.453699935354793\n", + ">> AVG(MAX): 8403755\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 102.17135060146234\n", + ">> MAX(MAX): 3169587\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 102.17135060146234\n", + ">> MIN(MAX): 3169587\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 102.17135060146234\n", + ">> AVG(MAX): 3169587\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 109.02586151696624\n", + ">> MAX(MAX): 3894820\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 109.02586151696624\n", + ">> MIN(MAX): 3894820\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 109.02586151696624\n", + ">> AVG(MAX): 3894820\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 124.03730567319344\n", + ">> MAX(MAX): 3365716\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 124.03730567319344\n", + ">> MIN(MAX): 3365716\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 124.03730567319344\n", + ">> AVG(MAX): 3365716\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 129.86653843994765\n", + ">> MAX(MAX): 9742979\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 129.86653843994765\n", + ">> MIN(MAX): 9742979\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 129.86653843994765\n", + ">> AVG(MAX): 9742979\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 101.42783862050769\n", + ">> MAX(MAX): 3252166\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 101.42783862050769\n", + ">> MIN(MAX): 3252166\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 101.42783862050769\n", + ">> AVG(MAX): 3252166\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 16.78371724870965\n", + ">> MAX(MAX): 877271\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 16.78371724870965\n", + ">> MIN(MAX): 877271\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 16.78371724870965\n", + ">> AVG(MAX): 877271\n", + "vis_col: INBCASTPPS\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n" + ] + } + ], + "source": [ + "for vis_col in vis_cols:\n", + " print(f'vis_col: {vis_col}')\n", + " tmp_cols = []\n", + " \n", + " for col in cols:\n", + " #print(f'>>>>>col: {col}')\n", + " if vis_col in col:\n", + " tmp_cols.append(col)\n", + " \n", + " #print(tmp_cols)\n", + " #print('--------------------')\n", + " tmp_df = df[tmp_cols]\n", + " \n", + " # make tmp dataframe\n", + " tmp_df = pd.concat([dates_df, tmp_df], axis = 1)\n", + " \n", + " # for dates\n", + " for date in dates:\n", + " print(f'> plotting {date}')\n", + " \n", + " # use data at certain date\n", + " date_df = tmp_df[tmp_df['YYYYMMDD']==date].iloc[:,1:]\n", + " \n", + " criteria = ['MAX', 'MIN', 'AVG']\n", + " \n", + " for c in criteria:\n", + " \n", + " if c == 'AVG':\n", + " plot_df = date_df.iloc[:,0]\n", + " plot_avg = plot_df.mean()\n", + " plot_min = plot_df.min()\n", + " plot_max = plot_df.max()\n", + " print(f'>> AVG(MIN): {plot_min}')\n", + " print(f'>> AVG(AVG): {plot_avg}')\n", + " print(f'>> AVG(MAX): {plot_max}')\n", + "\n", + " elif c == 'MIN':\n", + " plot_df = date_df.iloc[:,1]\n", + " plot_avg = plot_df.mean()\n", + " plot_min = plot_df.min()\n", + " plot_max = plot_df.max()\n", + " print(f'>> MIN(MIN): {plot_min}')\n", + " print(f'>> MIN(AVG): {plot_avg}')\n", + " print(f'>> MIN(MAX): {plot_max}')\n", + " elif c == 'MAX':\n", + " plot_df = date_df.iloc[:,2]\n", + " plot_avg = plot_df.mean()\n", + " plot_min = plot_df.min()\n", + " plot_max = plot_df.max()\n", + " print(f'>> MAX(MIN): {plot_min}')\n", + " print(f'>> MAX(AVG): {plot_avg}')\n", + " print(f'>> MAX(MAX): {plot_max}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31923da3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/etc/Hamon_EDA_ver2.ipynb b/etc/Hamon_EDA_ver2.ipynb new file mode 100644 index 0000000..fcc0099 --- /dev/null +++ b/etc/Hamon_EDA_ver2.ipynb @@ -0,0 +1,2562 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "hHGnodIzL8ux", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hHGnodIzL8ux", + "outputId": "845d8e3b-d218-48c9-def2-50578ea340bf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mounted at /gdrive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/gdrive')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "617dbb54", + "metadata": { + "id": "617dbb54" + }, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "69d77230", + "metadata": { + "id": "69d77230" + }, + "outputs": [], + "source": [ + "df = pd.read_csv('/gdrive/MyDrive/Colab Notebooks/total_df.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "31cb8a56", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "31cb8a56", + "outputId": "4c439845-84ab-40a8-a1b0-f6a181eeeb25" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['YYYYMMDD', 'HHMMSS', 'MNG_NO', 'IF_IDX', 'AVG_INBPS', 'MIN_INBPS',\n", + " 'MAX_INBPS', 'AVG_OUTBPS', 'MIN_OUTBPS', 'MAX_OUTBPS', 'AVG_INPPS',\n", + " 'MIN_INPPS', 'MAX_INPPS', 'AVG_OUTPPS', 'MIN_OUTPPS', 'MAX_OUTPPS',\n", + " 'AVG_INERR', 'MIN_INERR', 'MAX_INERR', 'AVG_OUTERR', 'MIN_OUTERR',\n", + " 'MAX_OUTERR', 'AVG_CRC', 'MIN_CRC', 'MAX_CRC', 'AVG_COLLISION',\n", + " 'MIN_COLLISION', 'MAX_COLLISION', 'AVG_INDROP', 'MIN_INDROP',\n", + " 'MAX_INDROP', 'AVG_OUTDROP', 'MIN_OUTDROP', 'MAX_OUTDROP',\n", + " 'AVG_INNUPPS', 'MIN_INNUPPS', 'MAX_INNUPPS', 'AVG_OUTNUPPS',\n", + " 'MIN_OUTNUPPS', 'MAX_OUTNUPPS', 'AVG_INDISCARD', 'MIN_INDISCARD',\n", + " 'MAX_INDISCARD', 'AVG_OUTDISCARD', 'MIN_OUTDISCARD', 'MAX_OUTDISCARD',\n", + " 'AVG_INMCASTPPS', 'MIN_INMCASTPPS', 'MAX_INMCASTPPS', 'AVG_OUTMCASTPPS',\n", + " 'MIN_OUTMCASTPPS', 'MAX_OUTMCASTPPS', 'AVG_INBCASTPPS',\n", + " 'MIN_INBCASTPPS', 'MAX_INBCASTPPS', 'AVG_OUTBCASTPPS',\n", + " 'MIN_OUTBCASTPPS', 'MAX_OUTBCASTPPS'],\n", + " dtype='object')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3a4033a2", + "metadata": { + "id": "3a4033a2" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1c98808a", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1c98808a", + "outputId": "821e069f-6ca8-41fb-fa25-f6c78b3f2bda" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "YYYYMMDD : 7\n", + "HHMMSS : 288\n", + "MNG_NO : 10686\n", + "IF_IDX : 26\n", + "AVG_INBPS : 4413650\n", + "MIN_INBPS : 4413650\n", + "MAX_INBPS : 4413650\n", + "AVG_OUTBPS : 1567959\n", + "MIN_OUTBPS : 1567959\n", + "MAX_OUTBPS : 1567959\n", + "AVG_INPPS : 44142\n", + "MIN_INPPS : 44142\n", + "MAX_INPPS : 44142\n", + "AVG_OUTPPS : 287257\n", + "MIN_OUTPPS : 287257\n", + "MAX_OUTPPS : 287257\n", + "AVG_INERR : 1302\n", + "MIN_INERR : 1302\n", + "MAX_INERR : 1302\n", + "AVG_OUTERR : 1065\n", + "MIN_OUTERR : 1065\n", + "MAX_OUTERR : 1065\n", + "AVG_CRC : 1\n", + "MIN_CRC : 1\n", + "MAX_CRC : 1\n", + "AVG_COLLISION : 1\n", + "MIN_COLLISION : 1\n", + "MAX_COLLISION : 1\n", + "AVG_INDROP : 1\n", + "MIN_INDROP : 1\n", + "MAX_INDROP : 1\n", + "AVG_OUTDROP : 1\n", + "MIN_OUTDROP : 1\n", + "MAX_OUTDROP : 1\n", + "AVG_INNUPPS : 897\n", + "MIN_INNUPPS : 897\n", + "MAX_INNUPPS : 897\n", + "AVG_OUTNUPPS : 287257\n", + "MIN_OUTNUPPS : 287257\n", + "MAX_OUTNUPPS : 287257\n", + "AVG_INDISCARD : 28382\n", + "MIN_INDISCARD : 28382\n", + "MAX_INDISCARD : 28382\n", + "AVG_OUTDISCARD : 709\n", + "MIN_OUTDISCARD : 709\n", + "MAX_OUTDISCARD : 709\n", + "AVG_INMCASTPPS : 1\n", + "MIN_INMCASTPPS : 1\n", + "MAX_INMCASTPPS : 1\n", + "AVG_OUTMCASTPPS : 1\n", + "MIN_OUTMCASTPPS : 1\n", + "MAX_OUTMCASTPPS : 1\n", + "AVG_INBCASTPPS : 1\n", + "MIN_INBCASTPPS : 1\n", + "MAX_INBCASTPPS : 1\n", + "AVG_OUTBCASTPPS : 1\n", + "MIN_OUTBCASTPPS : 1\n", + "MAX_OUTBCASTPPS : 1\n" + ] + } + ], + "source": [ + "for col in df.columns:\n", + " print(f'{col} : {len(df[col].unique())}')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d8ad2720", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "d8ad2720", + "outputId": "bfef1b51-f0f1-4b0c-dbc4-3a45c36c8efd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(23195128, 58)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "43e80a22", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 226 + }, + "id": "43e80a22", + "outputId": "8ff31d12-e303-4c63-bbcc-2df2dad55c02" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDDHHMMSSMNG_NOIF_IDXAVG_INBPSMIN_INBPSMAX_INBPSAVG_OUTBPSMIN_OUTBPSMAX_OUTBPSAVG_INPPSMIN_INPPSMAX_INPPSAVG_OUTPPSMIN_OUTPPSMAX_OUTPPSAVG_INERRMIN_INERRMAX_INERRAVG_OUTERRMIN_OUTERRMAX_OUTERRAVG_CRCMIN_CRCMAX_CRCAVG_COLLISIONMIN_COLLISIONMAX_COLLISIONAVG_INDROPMIN_INDROPMAX_INDROPAVG_OUTDROPMIN_OUTDROPMAX_OUTDROPAVG_INNUPPSMIN_INNUPPSMAX_INNUPPSAVG_OUTNUPPSMIN_OUTNUPPSMAX_OUTNUPPSAVG_INDISCARDMIN_INDISCARDMAX_INDISCARDAVG_OUTDISCARDMIN_OUTDISCARDMAX_OUTDISCARDAVG_INMCASTPPSMIN_INMCASTPPSMAX_INMCASTPPSAVG_OUTMCASTPPSMIN_OUTMCASTPPSMAX_OUTMCASTPPSAVG_INBCASTPPSMIN_INBCASTPPSMAX_INBCASTPPSAVG_OUTBCASTPPSMIN_OUTBCASTPPSMAX_OUTBCASTPPS
02021062023550026641124669926699266992365763657636576222222457245724572000000000000000000000457245724572000000000000000000
1202106208550055052378243782437824361923619236192101010141414000000000000000000000141414000000000000000000
2202106208500055052879287928792281042810428104555101010000000000000000000000101010000000000000000000
3202106208450055052144336144336144336436804368043680202020161616000000000000000000000161616000000000000000000
4202106208400055052456564565645656507205072050720111111161616000000000000000000000161616000000000000000000
\n", + "
" + ], + "text/plain": [ + " YYYYMMDD HHMMSS MNG_NO ... AVG_OUTBCASTPPS MIN_OUTBCASTPPS MAX_OUTBCASTPPS\n", + "0 20210620 235500 26641 ... 0 0 0\n", + "1 20210620 85500 5505 ... 0 0 0\n", + "2 20210620 85000 5505 ... 0 0 0\n", + "3 20210620 84500 5505 ... 0 0 0\n", + "4 20210620 84000 5505 ... 0 0 0\n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a5dfd123", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 226 + }, + "id": "a5dfd123", + "outputId": "9ebee84f-588a-45fb-ce00-cb830ad567ab" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDDHHMMSSMNG_NOIF_IDXAVG_INBPSMIN_INBPSMAX_INBPSAVG_OUTBPSMIN_OUTBPSMAX_OUTBPSAVG_INPPSMIN_INPPSMAX_INPPSAVG_OUTPPSMIN_OUTPPSMAX_OUTPPSAVG_INERRMIN_INERRMAX_INERRAVG_OUTERRMIN_OUTERRMAX_OUTERRAVG_CRCMIN_CRCMAX_CRCAVG_COLLISIONMIN_COLLISIONMAX_COLLISIONAVG_INDROPMIN_INDROPMAX_INDROPAVG_OUTDROPMIN_OUTDROPMAX_OUTDROPAVG_INNUPPSMIN_INNUPPSMAX_INNUPPSAVG_OUTNUPPSMIN_OUTNUPPSMAX_OUTNUPPSAVG_INDISCARDMIN_INDISCARDMAX_INDISCARDAVG_OUTDISCARDMIN_OUTDISCARDMAX_OUTDISCARDAVG_INMCASTPPSMIN_INMCASTPPSMAX_INMCASTPPSAVG_OUTMCASTPPSMIN_OUTMCASTPPSMAX_OUTMCASTPPSAVG_INBCASTPPSMIN_INBCASTPPSMAX_INBCASTPPSAVG_OUTBCASTPPSMIN_OUTBCASTPPSMAX_OUTBCASTPPS
23195123202106261505002069511161956016195601619560279856279856279856164164164727272000000000000000000000727272000000000000000000
23195124202106261505002069411133448133448133448167521675216752141414111111000000000000000000000111111000000000000000000
23195125202106261505002069311151124815112481511248381443814438144131131131272727000000000000000000000272727000000000000000000
2319512620210626150500221012438244382443824727047270472704171717212121000000000000000000000212121000000000000000000
2319512720210626500154212558005580055800694646946469464222222282828000000000000000000000282828000000000000000000
\n", + "
" + ], + "text/plain": [ + " YYYYMMDD HHMMSS ... MIN_OUTBCASTPPS MAX_OUTBCASTPPS\n", + "23195123 20210626 150500 ... 0 0\n", + "23195124 20210626 150500 ... 0 0\n", + "23195125 20210626 150500 ... 0 0\n", + "23195126 20210626 150500 ... 0 0\n", + "23195127 20210626 500 ... 0 0\n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e481efa2", + "metadata": { + "id": "e481efa2" + }, + "outputs": [], + "source": [ + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7575efab", + "metadata": { + "id": "7575efab" + }, + "outputs": [], + "source": [ + "from collections import Counter" + ] + }, + { + "cell_type": "markdown", + "id": "4835a31d", + "metadata": { + "id": "4835a31d" + }, + "source": [ + "## Get Count for Each Date" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "64e8097f", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "64e8097f", + "outputId": "88679368-aa47-4b16-a4d3-e45d42bdcfd6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['INBPS',\n", + " 'OUTBPS',\n", + " 'INDISCARD',\n", + " 'OUTDISCARD',\n", + " 'INERR',\n", + " 'OUTERR',\n", + " 'INNUPPS',\n", + " 'OUTNUPPS',\n", + " 'INPPS',\n", + " 'OUTPPS']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check_cols = ['INBPS', 'OUTBPS', \n", + " 'INDISCARD', 'OUTDISCARD', \n", + " 'INERR', 'OUTERR', \n", + " 'INNUPPS', 'OUTNUPPS', \n", + " 'INPPS', 'OUTPPS']\n", + "\n", + "check_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3-tUOC-9QhX3", + "metadata": { + "id": "3-tUOC-9QhX3" + }, + "outputs": [], + "source": [ + "INBPS_COLS = []\n", + "OUTBPS_COLS = []\n", + "INDISCARD_COLS = []\n", + "OUTDISCARD_COLS = []\n", + "INERR_COLS = []\n", + "OUTERR_COLS = []\n", + "INNUPPS_COLS = []\n", + "OUTNUPPS_COLS = []\n", + "INPPS_COLS = []\n", + "OUTPPS_COLS = []" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ajyJGDwoRF55", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "ajyJGDwoRF55", + "outputId": "813ebc95-51b0-44f6-9acf-d99de420dbb5" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'AVG_INBPS'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols = 'INBPS'\n", + "'AVG_' + cols" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ACq16XK6Qzzf", + "metadata": { + "id": "ACq16XK6Qzzf" + }, + "outputs": [], + "source": [ + "for cols in check_cols:\n", + " if cols == 'INBPS':\n", + " INBPS_COLS.append('MIN_'+cols)\n", + " INBPS_COLS.append('MAX_'+cols)\n", + " INBPS_COLS.append('AVG_'+cols)\n", + " elif cols == 'OUTBPS':\n", + " OUTBPS_COLS.append('MIN_'+cols)\n", + " OUTBPS_COLS.append('MAX_'+cols)\n", + " OUTBPS_COLS.append('AVG_'+cols)\n", + " elif cols == 'INDISCARD':\n", + " INDISCARD_COLS.append('MIN_'+cols)\n", + " INDISCARD_COLS.append('MAX_'+cols)\n", + " INDISCARD_COLS.append('AVG_'+cols)\n", + " elif cols == 'OUTDISCARD':\n", + " OUTDISCARD_COLS.append('MIN_'+cols)\n", + " OUTDISCARD_COLS.append('MAX_'+cols)\n", + " OUTDISCARD_COLS.append('AVG_'+cols)\n", + " elif cols == 'INERR':\n", + " INERR_COLS.append('MIN_'+cols)\n", + " INERR_COLS.append('MAX_'+cols)\n", + " INERR_COLS.append('AVG_'+cols)\n", + " elif cols == 'OUTERR':\n", + " OUTERR_COLS.append('MIN_'+cols)\n", + " OUTERR_COLS.append('MAX_'+cols)\n", + " OUTERR_COLS.append('AVG_'+cols)\n", + " elif cols == 'INNUPPS':\n", + " INNUPPS_COLS.append('MIN_'+cols)\n", + " INNUPPS_COLS.append('MAX_'+cols)\n", + " INNUPPS_COLS.append('AVG_'+cols)\n", + " elif cols == 'OUTNUPPS':\n", + " OUTNUPPS_COLS.append('MIN_'+cols)\n", + " OUTNUPPS_COLS.append('MAX_'+cols)\n", + " OUTNUPPS_COLS.append('AVG_'+cols)\n", + " elif cols == 'INPPS':\n", + " INPPS_COLS.append('MIN_'+cols)\n", + " INPPS_COLS.append('MAX_'+cols)\n", + " INPPS_COLS.append('AVG_'+cols)\n", + " elif cols == 'OUTPPS':\n", + " OUTPPS_COLS.append('MIN_'+cols)\n", + " OUTPPS_COLS.append('MAX_'+cols)\n", + " OUTPPS_COLS.append('AVG_'+cols)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "kj_9zpV3Sj49", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kj_9zpV3Sj49", + "outputId": "aeb0159e-eca3-4774-a411-0841611afd9b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['MIN_INBPS', 'MAX_INBPS', 'AVG_INBPS']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "INBPS_COLS" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "LhJBPHNFSo7Q", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LhJBPHNFSo7Q", + "outputId": "ff52f4db-a1be-4189-c004-c90bea14af56" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['MIN_INBPS', 'MAX_INBPS', 'AVG_INBPS'],\n", + " ['MIN_OUTBPS', 'MAX_OUTBPS', 'AVG_OUTBPS'],\n", + " ['MIN_INDISCARD', 'MAX_INDISCARD', 'AVG_INDISCARD'],\n", + " ['MIN_OUTDISCARD', 'MAX_OUTDISCARD', 'AVG_OUTDISCARD'],\n", + " ['MIN_INERR', 'MAX_INERR', 'AVG_INERR'],\n", + " ['MIN_OUTERR', 'MAX_OUTERR', 'AVG_OUTERR'],\n", + " ['MIN_INNUPPS', 'MAX_INNUPPS', 'AVG_INNUPPS'],\n", + " ['MIN_OUTNUPPS', 'MAX_OUTNUPPS', 'AVG_OUTNUPPS'],\n", + " ['MIN_INPPS', 'MAX_INPPS', 'AVG_INPPS'],\n", + " ['MIN_OUTPPS', 'MAX_OUTPPS', 'AVG_OUTPPS']]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "COLS = [INBPS_COLS, OUTBPS_COLS, \n", + " INDISCARD_COLS, OUTDISCARD_COLS, \n", + " INERR_COLS, OUTERR_COLS, \n", + " INNUPPS_COLS, OUTNUPPS_COLS, \n", + " INPPS_COLS, OUTPPS_COLS]\n", + "\n", + "COLS" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2d0de5f3", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "2d0de5f3", + "outputId": "8e14e897-9b33-483b-8f93-a95c5fa13efc" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDD
020210620
120210620
220210620
320210620
420210620
\n", + "
" + ], + "text/plain": [ + " YYYYMMDD\n", + "0 20210620\n", + "1 20210620\n", + "2 20210620\n", + "3 20210620\n", + "4 20210620" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates_df = pd.DataFrame(df['YYYYMMDD'])\n", + "dates_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5a96b6fa", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "5a96b6fa", + "outputId": "6545e16c-31ac-4233-c536-8e37f133fa89" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YYYYMMDDMIN_INBPSMAX_INBPSAVG_INBPS
020210620669926699266992
120210620378243782437824
220210620879287928792
320210620144336144336144336
420210620456564565645656
...............
2319512320210626161956016195601619560
2319512420210626133448133448133448
2319512520210626151124815112481511248
2319512620210626438244382443824
2319512720210626558005580055800
\n", + "

23195128 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " YYYYMMDD MIN_INBPS MAX_INBPS AVG_INBPS\n", + "0 20210620 66992 66992 66992\n", + "1 20210620 37824 37824 37824\n", + "2 20210620 8792 8792 8792\n", + "3 20210620 144336 144336 144336\n", + "4 20210620 45656 45656 45656\n", + "... ... ... ... ...\n", + "23195123 20210626 1619560 1619560 1619560\n", + "23195124 20210626 133448 133448 133448\n", + "23195125 20210626 1511248 1511248 1511248\n", + "23195126 20210626 43824 43824 43824\n", + "23195127 20210626 55800 55800 55800\n", + "\n", + "[23195128 rows x 4 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['YYYYMMDD','MIN_INBPS', 'MAX_INBPS', 'AVG_INBPS']]#.iloc[:,1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "4fF3zaFJeJyZ", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4fF3zaFJeJyZ", + "outputId": "ebb0c6fa-cbc6-4088-945d-0e7cc5e517dc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[20210620, 20210621, 20210622, 20210623, 20210624, 20210625, 20210626]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates = list(df['YYYYMMDD'].unique())\n", + "dates" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "ZGaB7FIuV6AL", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZGaB7FIuV6AL", + "outputId": "c86e9e93-233b-4d72-b8c4-ef9c64d46948" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['MIN_INBPS', 'MAX_INBPS', 'AVG_INBPS']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_OUTBPS', 'MAX_OUTBPS', 'AVG_OUTBPS']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_INDISCARD', 'MAX_INDISCARD', 'AVG_INDISCARD']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_OUTDISCARD', 'MAX_OUTDISCARD', 'AVG_OUTDISCARD']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_INERR', 'MAX_INERR', 'AVG_INERR']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_OUTERR', 'MAX_OUTERR', 'AVG_OUTERR']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_INNUPPS', 'MAX_INNUPPS', 'AVG_INNUPPS']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_OUTNUPPS', 'MAX_OUTNUPPS', 'AVG_OUTNUPPS']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_INPPS', 'MAX_INPPS', 'AVG_INPPS']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n", + "['MIN_OUTPPS', 'MAX_OUTPPS', 'AVG_OUTPPS']\n", + "Comparing MIN and MAX : [ True]\n", + "Comparing MIN and AVG : [ True]\n", + "Comparing MAX and AVG : [ True]\n", + "------end------\n" + ] + } + ], + "source": [ + "#['MIN_INBPS', 'MAX_INBPS', 'AVG_INBPS']\n", + "\n", + "for COL in COLS:\n", + " print(COL)\n", + " print(f'Comparing MIN and MAX : {pd.Series(df[COL[0]]==df[COL[1]]).unique()}')\n", + " print(f'Comparing MIN and AVG : {pd.Series(df[COL[0]]==df[COL[2]]).unique()}')\n", + " print(f'Comparing MAX and AVG : {pd.Series(df[COL[1]]==df[COL[2]]).unique()}')\n", + " print('------end------')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "2qezVIagVlkd", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2qezVIagVlkd", + "outputId": "43cf2793-1f51-42f9-e84a-1ccce0cb73c6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ True])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Series(df['MIN_INBPS']==df['MAX_INBPS']).unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "BcrUXWcsTrYe", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BcrUXWcsTrYe", + "outputId": "eacb6528-6066-4388-862b-8a7c71f0cb87" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['MIN_INBPS', 'MAX_INBPS', 'AVG_INBPS']\n", + " YYYYMMDD MIN_INBPS MAX_INBPS AVG_INBPS\n", + "0 20210620 66992 66992 66992\n", + "1 20210620 37824 37824 37824\n", + "2 20210620 8792 8792 8792\n", + "3 20210620 144336 144336 144336\n", + "4 20210620 45656 45656 45656\n", + " YYYYMMDD MIN_INBPS MAX_INBPS AVG_INBPS\n", + "23195123 20210626 1619560 1619560 1619560\n", + "23195124 20210626 133448 133448 133448\n", + "23195125 20210626 1511248 1511248 1511248\n", + "23195126 20210626 43824 43824 43824\n", + "23195127 20210626 55800 55800 55800\n", + "------end------\n", + "['MIN_OUTBPS', 'MAX_OUTBPS', 'AVG_OUTBPS']\n", + " YYYYMMDD MIN_OUTBPS MAX_OUTBPS AVG_OUTBPS\n", + "0 20210620 36576 36576 36576\n", + "1 20210620 36192 36192 36192\n", + "2 20210620 28104 28104 28104\n", + "3 20210620 43680 43680 43680\n", + "4 20210620 50720 50720 50720\n", + " YYYYMMDD MIN_OUTBPS MAX_OUTBPS AVG_OUTBPS\n", + "23195123 20210626 279856 279856 279856\n", + "23195124 20210626 16752 16752 16752\n", + "23195125 20210626 38144 38144 38144\n", + "23195126 20210626 72704 72704 72704\n", + "23195127 20210626 69464 69464 69464\n", + "------end------\n", + "['MIN_INDISCARD', 'MAX_INDISCARD', 'AVG_INDISCARD']\n", + " YYYYMMDD MIN_INDISCARD MAX_INDISCARD AVG_INDISCARD\n", + "0 20210620 0 0 0\n", + "1 20210620 0 0 0\n", + "2 20210620 0 0 0\n", + "3 20210620 0 0 0\n", + "4 20210620 0 0 0\n", + " YYYYMMDD MIN_INDISCARD MAX_INDISCARD AVG_INDISCARD\n", + "23195123 20210626 0 0 0\n", + "23195124 20210626 0 0 0\n", + "23195125 20210626 0 0 0\n", + "23195126 20210626 0 0 0\n", + "23195127 20210626 0 0 0\n", + "------end------\n", + "['MIN_OUTDISCARD', 'MAX_OUTDISCARD', 'AVG_OUTDISCARD']\n", + " YYYYMMDD MIN_OUTDISCARD MAX_OUTDISCARD AVG_OUTDISCARD\n", + "0 20210620 0 0 0\n", + "1 20210620 0 0 0\n", + "2 20210620 0 0 0\n", + "3 20210620 0 0 0\n", + "4 20210620 0 0 0\n", + " YYYYMMDD MIN_OUTDISCARD MAX_OUTDISCARD AVG_OUTDISCARD\n", + "23195123 20210626 0 0 0\n", + "23195124 20210626 0 0 0\n", + "23195125 20210626 0 0 0\n", + "23195126 20210626 0 0 0\n", + "23195127 20210626 0 0 0\n", + "------end------\n", + "['MIN_INERR', 'MAX_INERR', 'AVG_INERR']\n", + " YYYYMMDD MIN_INERR MAX_INERR AVG_INERR\n", + "0 20210620 0 0 0\n", + "1 20210620 0 0 0\n", + "2 20210620 0 0 0\n", + "3 20210620 0 0 0\n", + "4 20210620 0 0 0\n", + " YYYYMMDD MIN_INERR MAX_INERR AVG_INERR\n", + "23195123 20210626 0 0 0\n", + "23195124 20210626 0 0 0\n", + "23195125 20210626 0 0 0\n", + "23195126 20210626 0 0 0\n", + "23195127 20210626 0 0 0\n", + "------end------\n", + "['MIN_OUTERR', 'MAX_OUTERR', 'AVG_OUTERR']\n", + " YYYYMMDD MIN_OUTERR MAX_OUTERR AVG_OUTERR\n", + "0 20210620 0 0 0\n", + "1 20210620 0 0 0\n", + "2 20210620 0 0 0\n", + "3 20210620 0 0 0\n", + "4 20210620 0 0 0\n", + " YYYYMMDD MIN_OUTERR MAX_OUTERR AVG_OUTERR\n", + "23195123 20210626 0 0 0\n", + "23195124 20210626 0 0 0\n", + "23195125 20210626 0 0 0\n", + "23195126 20210626 0 0 0\n", + "23195127 20210626 0 0 0\n", + "------end------\n", + "['MIN_INNUPPS', 'MAX_INNUPPS', 'AVG_INNUPPS']\n", + " YYYYMMDD MIN_INNUPPS MAX_INNUPPS AVG_INNUPPS\n", + "0 20210620 0 0 0\n", + "1 20210620 0 0 0\n", + "2 20210620 0 0 0\n", + "3 20210620 0 0 0\n", + "4 20210620 0 0 0\n", + " YYYYMMDD MIN_INNUPPS MAX_INNUPPS AVG_INNUPPS\n", + "23195123 20210626 0 0 0\n", + "23195124 20210626 0 0 0\n", + "23195125 20210626 0 0 0\n", + "23195126 20210626 0 0 0\n", + "23195127 20210626 0 0 0\n", + "------end------\n", + "['MIN_OUTNUPPS', 'MAX_OUTNUPPS', 'AVG_OUTNUPPS']\n", + " YYYYMMDD MIN_OUTNUPPS MAX_OUTNUPPS AVG_OUTNUPPS\n", + "0 20210620 4572 4572 4572\n", + "1 20210620 14 14 14\n", + "2 20210620 10 10 10\n", + "3 20210620 16 16 16\n", + "4 20210620 16 16 16\n", + " YYYYMMDD MIN_OUTNUPPS MAX_OUTNUPPS AVG_OUTNUPPS\n", + "23195123 20210626 72 72 72\n", + "23195124 20210626 11 11 11\n", + "23195125 20210626 27 27 27\n", + "23195126 20210626 21 21 21\n", + "23195127 20210626 28 28 28\n", + "------end------\n", + "['MIN_INPPS', 'MAX_INPPS', 'AVG_INPPS']\n", + " YYYYMMDD MIN_INPPS MAX_INPPS AVG_INPPS\n", + "0 20210620 22 22 22\n", + "1 20210620 10 10 10\n", + "2 20210620 5 5 5\n", + "3 20210620 20 20 20\n", + "4 20210620 11 11 11\n", + " YYYYMMDD MIN_INPPS MAX_INPPS AVG_INPPS\n", + "23195123 20210626 164 164 164\n", + "23195124 20210626 14 14 14\n", + "23195125 20210626 131 131 131\n", + "23195126 20210626 17 17 17\n", + "23195127 20210626 22 22 22\n", + "------end------\n", + "['MIN_OUTPPS', 'MAX_OUTPPS', 'AVG_OUTPPS']\n", + " YYYYMMDD MIN_OUTPPS MAX_OUTPPS AVG_OUTPPS\n", + "0 20210620 4572 4572 4572\n", + "1 20210620 14 14 14\n", + "2 20210620 10 10 10\n", + "3 20210620 16 16 16\n", + "4 20210620 16 16 16\n", + " YYYYMMDD MIN_OUTPPS MAX_OUTPPS AVG_OUTPPS\n", + "23195123 20210626 72 72 72\n", + "23195124 20210626 11 11 11\n", + "23195125 20210626 27 27 27\n", + "23195126 20210626 21 21 21\n", + "23195127 20210626 28 28 28\n", + "------end------\n" + ] + } + ], + "source": [ + "for COL in COLS:\n", + " print(COL)\n", + " print(df[['YYYYMMDD']+COL].head())\n", + " print(df[['YYYYMMDD']+COL].tail())\n", + " print('------end------')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "yiYfd3poUX1Y", + "metadata": { + "id": "yiYfd3poUX1Y" + }, + "outputs": [], + "source": [ + "for COL in COLS:\n", + " print(COL)\n", + " print(df[['YYYYMMDD']+COL].head())\n", + " print(df[['YYYYMMDD']+COL].tail())\n", + " print('------end------')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "15a93a5d", + "metadata": { + "id": "15a93a5d" + }, + "outputs": [], + "source": [ + "vis_cols = ['AVG_INERR', 'AVG_INBPS', 'AVG_INDISCARD', 'AVG_INNUPPS', 'AVG_INPPS', \n", + " 'AVG_OUTERR', 'AVG_OUTBPS', 'AVG_OUTDISCARD', 'AVG_OUTNUPPS', 'AVG_OUTPPS']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "sFFZxrFqfc82", + "metadata": { + "id": "sFFZxrFqfc82" + }, + "outputs": [], + "source": [ + "save_path = '/gdrive/MyDrive/Colab Notebooks/results/'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9q7_SdSEevGA", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9q7_SdSEevGA", + "outputId": "ebd6a4c7-6a81-4950-858c-19393a486c32" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AVG INERR\n", + "AVG INBPS\n", + "AVG INDISCARD\n", + "AVG INNUPPS\n", + "AVG INPPS\n", + "AVG OUTERR\n", + "AVG OUTBPS\n", + "AVG OUTDISCARD\n", + "AVG OUTNUPPS\n", + "AVG OUTPPS\n" + ] + } + ], + "source": [ + "for col in vis_cols:\n", + " stat, _, name = col.partition(\"_\")\n", + " print(stat, name)\n", + "\n", + " # define plot_df\n", + " plot_df = df[col]\n", + "\n", + " # define fig\n", + " fig = plt.figure()\n", + "\n", + " plot_df.plot(figsize = (12, 6))\n", + "\n", + " plt.title(f'{name}')\n", + "\n", + " plt.savefig(save_path+f\"{name}_plot.png\", dpi=200)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "078c6ecb", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 449 + }, + "id": "078c6ecb", + "outputId": "979574a9-6f91-46aa-fe69-0ab5e8c3badd", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "vis_col: INERR\n", + ">>> plotting 20210620\n" + ] + }, + { + "ename": "IndexError", + "evalue": "ignored", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mplot_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdate_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mc\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'MAX'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m \u001b[0mplot_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdate_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;31m# define fig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;31m# AttributeError for IntervalTree get_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;31m# we by definition only have the 0th axis\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_tuple\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m 1441\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtup\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1442\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1443\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_has_valid_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1444\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1445\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_lowerdim\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_has_valid_tuple\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mIndexingError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Too many indexers\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 702\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 703\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 704\u001b[0m raise ValueError(\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_validate_key\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1350\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1351\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1352\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1353\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1354\u001b[0m \u001b[0;31m# a tuple should already have been caught by this point\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_validate_integer\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1435\u001b[0m \u001b[0mlen_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1436\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0mlen_axis\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mlen_axis\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1437\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"single positional indexer is out-of-bounds\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1438\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[0;31m# -------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: single positional indexer is out-of-bounds" + ] + } + ], + "source": [ + "for vis_col in vis_cols:\n", + " print(f'vis_col: {vis_col}')\n", + " tmp_cols = []\n", + " \n", + " for col in cols:\n", + " #print(f'>>>>>col: {col}')\n", + " if vis_col in col:\n", + " tmp_cols.append(col)\n", + " \n", + " #print(tmp_cols)\n", + " #print('--------------------')\n", + " tmp_df = df[tmp_cols]\n", + " \n", + " # make tmp dataframe\n", + " tmp_df = pd.concat([dates_df, tmp_df], axis = 1)\n", + " \n", + " # for dates\n", + " for date in dates:\n", + " print(f'>>> plotting {date}')\n", + " \n", + " # use data at certain date\n", + " date_df = tmp_df[tmp_df['YYYYMMDD']==date].iloc[:,1:]\n", + " \n", + " criteria = ['MAX', 'MIN', 'AVG']\n", + " \n", + " for c in criteria:\n", + " \n", + " if c == 'AVG':\n", + " plot_df = date_df.iloc[:,0]\n", + " elif c == 'MIN':\n", + " plot_df = date_df.iloc[:,1]\n", + " elif c == 'MAX':\n", + " plot_df = date_df.iloc[:,2]\n", + " \n", + " # define fig\n", + " fig = plt.figure()\n", + "\n", + " plot_df.plot(figsize = (12, 6))\n", + "\n", + " plt.title(f'{c}_{vis_col}({date}) ')\n", + "\n", + " plt.savefig(save_path+ f\"/{c}_{vis_col}_{date}.png\", dpi=200)\n", + " \n", + " plt.close('all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee353892", + "metadata": { + "id": "ee353892", + "outputId": "f6998abb-c304-439e-a2a0-b4973b1602a4", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "vis_col: INERR\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0009929763219236904\n", + ">> MAX(MAX): 103\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0009929763219236904\n", + ">> MIN(MAX): 103\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0009929763219236904\n", + ">> AVG(MAX): 103\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.1070327038198467\n", + ">> MAX(MAX): 9360\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.1070327038198467\n", + ">> MIN(MAX): 9360\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.1070327038198467\n", + ">> AVG(MAX): 9360\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.21545382251356252\n", + ">> MAX(MAX): 12166\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.21545382251356252\n", + ">> MIN(MAX): 12166\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.21545382251356252\n", + ">> AVG(MAX): 12166\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.1899605151255562\n", + ">> MAX(MAX): 20995\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.1899605151255562\n", + ">> MIN(MAX): 20995\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.1899605151255562\n", + ">> AVG(MAX): 20995\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.3504950319532711\n", + ">> MAX(MAX): 87505\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.3504950319532711\n", + ">> MIN(MAX): 87505\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.3504950319532711\n", + ">> AVG(MAX): 87505\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.09246079843196818\n", + ">> MAX(MAX): 16707\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.09246079843196818\n", + ">> MIN(MAX): 16707\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.09246079843196818\n", + ">> AVG(MAX): 16707\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.013125300391402939\n", + ">> MAX(MAX): 3539\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.013125300391402939\n", + ">> MIN(MAX): 3539\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.013125300391402939\n", + ">> AVG(MAX): 3539\n", + "vis_col: OUTDROP\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "vis_col: INNUPPS\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.4232476784349255\n", + ">> MAX(MAX): 230423\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.4232476784349255\n", + ">> MIN(MAX): 230423\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.4232476784349255\n", + ">> AVG(MAX): 230423\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.3255765156290704\n", + ">> MAX(MAX): 4693\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.3255765156290704\n", + ">> MIN(MAX): 4693\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.3255765156290704\n", + ">> AVG(MAX): 4693\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.300698597051876\n", + ">> MAX(MAX): 3564\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.300698597051876\n", + ">> MIN(MAX): 3564\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.300698597051876\n", + ">> AVG(MAX): 3564\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.3138820131438875\n", + ">> MAX(MAX): 6472\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.3138820131438875\n", + ">> MIN(MAX): 6472\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.3138820131438875\n", + ">> AVG(MAX): 6472\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.414565846192937\n", + ">> MAX(MAX): 8387\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.414565846192937\n", + ">> MIN(MAX): 8387\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.414565846192937\n", + ">> AVG(MAX): 8387\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.3606745082676048\n", + ">> MAX(MAX): 3692\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.3606745082676048\n", + ">> MIN(MAX): 3692\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.3606745082676048\n", + ">> AVG(MAX): 3692\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 1.4019500500077176\n", + ">> MAX(MAX): 4121\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 1.4019500500077176\n", + ">> MIN(MAX): 4121\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 1.4019500500077176\n", + ">> AVG(MAX): 4121\n", + "vis_col: OUTNUPPS\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 2065.437395895265\n", + ">> MAX(MAX): 33784710\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 2065.437395895265\n", + ">> MIN(MAX): 33784710\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 2065.437395895265\n", + ">> AVG(MAX): 33784710\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10884.217727320281\n", + ">> MAX(MAX): 45275418\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10884.217727320281\n", + ">> MIN(MAX): 45275418\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10884.217727320281\n", + ">> AVG(MAX): 45275418\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10274.470403238776\n", + ">> MAX(MAX): 34360111\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10274.470403238776\n", + ">> MIN(MAX): 34360111\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10274.470403238776\n", + ">> AVG(MAX): 34360111\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10379.628168063316\n", + ">> MAX(MAX): 49227501\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10379.628168063316\n", + ">> MIN(MAX): 49227501\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10379.628168063316\n", + ">> AVG(MAX): 49227501\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 10318.102998850687\n", + ">> MAX(MAX): 46340093\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 10318.102998850687\n", + ">> MIN(MAX): 46340093\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 10318.102998850687\n", + ">> AVG(MAX): 46340093\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 9422.836515565848\n", + ">> MAX(MAX): 32470807\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 9422.836515565848\n", + ">> MIN(MAX): 32470807\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 9422.836515565848\n", + ">> AVG(MAX): 32470807\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 2315.964770284498\n", + ">> MAX(MAX): 26240727\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 2315.964770284498\n", + ">> MIN(MAX): 26240727\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 2315.964770284498\n", + ">> AVG(MAX): 26240727\n", + "vis_col: INDISCARD\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 11.453699935354793\n", + ">> MAX(MAX): 8403755\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 11.453699935354793\n", + ">> MIN(MAX): 8403755\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 11.453699935354793\n", + ">> AVG(MAX): 8403755\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 102.17135060146234\n", + ">> MAX(MAX): 3169587\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 102.17135060146234\n", + ">> MIN(MAX): 3169587\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 102.17135060146234\n", + ">> AVG(MAX): 3169587\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 109.02586151696624\n", + ">> MAX(MAX): 3894820\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 109.02586151696624\n", + ">> MIN(MAX): 3894820\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 109.02586151696624\n", + ">> AVG(MAX): 3894820\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 124.03730567319344\n", + ">> MAX(MAX): 3365716\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 124.03730567319344\n", + ">> MIN(MAX): 3365716\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 124.03730567319344\n", + ">> AVG(MAX): 3365716\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 129.86653843994765\n", + ">> MAX(MAX): 9742979\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 129.86653843994765\n", + ">> MIN(MAX): 9742979\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 129.86653843994765\n", + ">> AVG(MAX): 9742979\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 101.42783862050769\n", + ">> MAX(MAX): 3252166\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 101.42783862050769\n", + ">> MIN(MAX): 3252166\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 101.42783862050769\n", + ">> AVG(MAX): 3252166\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 16.78371724870965\n", + ">> MAX(MAX): 877271\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 16.78371724870965\n", + ">> MIN(MAX): 877271\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 16.78371724870965\n", + ">> AVG(MAX): 877271\n", + "vis_col: INBCASTPPS\n", + "> plotting 20210620\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210621\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210622\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210623\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210624\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210625\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n", + "> plotting 20210626\n", + ">> MAX(MIN): 0\n", + ">> MAX(AVG): 0.0\n", + ">> MAX(MAX): 0\n", + ">> MIN(MIN): 0\n", + ">> MIN(AVG): 0.0\n", + ">> MIN(MAX): 0\n", + ">> AVG(MIN): 0\n", + ">> AVG(AVG): 0.0\n", + ">> AVG(MAX): 0\n" + ] + } + ], + "source": [ + "for vis_col in vis_cols:\n", + " print(f'vis_col: {vis_col}')\n", + " tmp_cols = []\n", + " \n", + " for col in cols:\n", + " #print(f'>>>>>col: {col}')\n", + " if vis_col in col:\n", + " tmp_cols.append(col)\n", + " \n", + " #print(tmp_cols)\n", + " #print('--------------------')\n", + " tmp_df = df[tmp_cols]\n", + " \n", + " # make tmp dataframe\n", + " tmp_df = pd.concat([dates_df, tmp_df], axis = 1)\n", + " \n", + " # for dates\n", + " for date in dates:\n", + " print(f'> plotting {date}')\n", + " \n", + " # use data at certain date\n", + " date_df = tmp_df[tmp_df['YYYYMMDD']==date].iloc[:,1:]\n", + " \n", + " criteria = ['MAX', 'MIN', 'AVG']\n", + " \n", + " for c in criteria:\n", + " \n", + " if c == 'AVG':\n", + " plot_df = date_df.iloc[:,0]\n", + " plot_avg = plot_df.mean()\n", + " plot_min = plot_df.min()\n", + " plot_max = plot_df.max()\n", + " print(f'>> AVG(MIN): {plot_min}')\n", + " print(f'>> AVG(AVG): {plot_avg}')\n", + " print(f'>> AVG(MAX): {plot_max}')\n", + "\n", + " elif c == 'MIN':\n", + " plot_df = date_df.iloc[:,1]\n", + " plot_avg = plot_df.mean()\n", + " plot_min = plot_df.min()\n", + " plot_max = plot_df.max()\n", + " print(f'>> MIN(MIN): {plot_min}')\n", + " print(f'>> MIN(AVG): {plot_avg}')\n", + " print(f'>> MIN(MAX): {plot_max}')\n", + " elif c == 'MAX':\n", + " plot_df = date_df.iloc[:,2]\n", + " plot_avg = plot_df.mean()\n", + " plot_min = plot_df.min()\n", + " plot_max = plot_df.max()\n", + " print(f'>> MAX(MIN): {plot_min}')\n", + " print(f'>> MAX(AVG): {plot_avg}')\n", + " print(f'>> MAX(MAX): {plot_max}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31923da3", + "metadata": { + "id": "31923da3" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "machine_shape": "hm", + "name": "Hamon_EDA.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/etc/test_info.csv b/etc/test_info.csv new file mode 100644 index 0000000..02e5c63 --- /dev/null +++ b/etc/test_info.csv @@ -0,0 +1,9 @@ +,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39,C40,C41,C42,C43,C44,C45,C46,C47,C48,C49,C50,C51,C52,C53,C54,C55,C56,C57,C58,C59,C60,C61,C62,C63,C64,C65,C66,C67,C68,C69,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86 +countmean,-2.690713361717612,0.00041848617176128096,12.083211255276565,-2.6023653566229986e-05,8.297552861062591,-0.3120051124454149,12.589179724890831,70.0,1.0,50.0,13251.349705240174,14.710806553457061,28.034372878165936,27050.774286754004,27.71467294581514,30.798442513791844,0.00011280931586608442,1.0,50.0,27.567075439701593,29.76383149796216,1.0,334.2999266182314,457.03519498948316,776.1617285298399,50.0,18271.534901746723,1.1185529979985442,1.0,54736.97572780204,993.203884737045,61.7955894588428,5.419255295123727,0.00011280931586608442,112.20986769701601,2880.0,25.56610018304222,0.0,0.0,78.25902647510917,7.729104944687045,0.00011838562590975255,14.874105055567691,11.953816232532748,84.94359995141922,25.0,80.62922604930856,0.9998871906841339,0.0,4929.470684133916,41.93711019734352,1.0,32.52107377085152,0.48547810986171774,0.0,334.0381702335882,456.23843434028373,17.816796929912666,49.330099638973806,720.0130115505823,0.9998871906841339,906.8498508005822,0.0,0.00011280931586608442,6987.690887918487,0.12581510385735079,9.279114564919938,9976.468133187773,0.999505094614265,321.3114397127729,20.876548724636102,8.760157998544397,244.31672755917032,296.4415334772561,31.294590227256183,48.790446494541484,1.1183014660844253,29.375247382569142,5.018883482423579,296.407937148035,366.6985281249271,1.0,993.2003626689228,12.003852831113539,50.0,1121.6063427947597 +std,0.5681280540025946,0.020452690857904385,0.2400350705119165,0.0016414225756156225,6.465559210823236,0.48106085818661865,0.057858468524922275,0.0,0.0,0.0,4153.1899031438315,3.409162383835392,0.0036753632482925476,71.70094250595464,0.772048310440249,3.719799654683417,0.010620593222309757,0.0,0.0,0.20362254904883526,0.7409660121868645,0.0,35.746581631979346,26.79836888880832,1314.8379458762295,0.0,1930.0727219859316,0.11229802459042486,0.0,89.5715663136732,31.413536307836782,11.085437424159368,0.6668449473052205,0.010620593222309757,7.9570470675043286,0.0,0.11157455286730868,0.0,0.0,148.23775618713944,0.5877156015581081,0.0020918972107001896,3.4370911168635327,0.2732193357622903,31.7853538898607,0.0,28.90658459963489,0.010620593222309757,0.0,5102.522608120266,4.793741692526357,0.0,31.20169737177613,0.3031595073388494,0.0,39.210911963147915,25.311289957876348,7.2184996284720375,1.5599016430941102,0.0730619310853684,0.010620593222309757,22.862428130689487,0.0,0.010620593222309757,6765.889692046581,0.038900994729483836,13.482452921384056,31.32189139646794,0.022241003899897034,47.55082342757309,15.211597309275012,6.271717235618787,15.525020693831664,415.22109941429875,3.772151468241192,1.4840426599620453,0.11136260954141612,0.36796069440622237,2.6904788145432303,415.2088832340863,59.69168937357197,0.0,31.06721269028631,0.15791451637071036,0.0,1700.3330700892282 +min,-4.38,0.0,11.781310000000001,-0.01128,0.27618000000000004,-1.0904,0.0,70.0,1.0,50.0,5728.0,0.26855,28.0195,26855.0,26.12927,0.0,0.0,1.0,50.0,27.33016,27.7771,1.0,232.69312000000002,226.45827999999997,-3.0,50.0,12758.0,0.77657,1.0,51648.0,913.32428,0.41963,0.0,0.0,0.0,2880.0,25.461389999999998,0.0,0.0,1.37329,0.0,-0.0319,0.0,2e-05,15.0,25.0,12.307739999999999,0.0,0.0,0.0,34.445190000000004,1.0,0.09042,0.30914,0.0,229.70923,410.71246,6.8288,43.35938,719.67841,0.0,0.0,0.0,0.0,-112.0,0.06324,0.0,9669.0,0.0,2.1849,0.0,0.0,208.03696000000002,7.15028,0.35248,42.6377,-0.012819999999999998,28.26935,2.40398,7.15028,230.52301,1.0,954.5495,12.0,50.0,1.0 +25%,-3.1289999999999996,0.0,11.90338,-0.00109,3.74756,-0.6093,12.5887,70.0,1.0,50.0,9518.0,12.84943,28.03166,26989.0,27.04294,28.447560000000003,0.0,1.0,50.0,27.33016,29.13513,1.0,313.784,433.22629000000006,3.0,50.0,17158.0,1.0484799999999999,1.0,54717.0,967.46063,56.49566,4.9334,0.0,108.24304,2880.0,25.461389999999998,0.0,0.0,4.99723,7.3751,-7e-05,12.958884999999999,11.907010000000001,100.0,25.0,60.599892499999996,1.0,0.0,120.0,37.619009999999996,1.0,0.2532,0.34424,0.0,308.4129,432.86947999999995,11.467260000000001,48.08197,719.96497,1.0,903.0,0.0,0.0,32.0,0.08793,0.0,9962.0,1.0,294.15405,0.0,3.90132,231.78097999999997,26.01891,28.87115,47.581759999999996,1.05377,29.148979999999998,2.85153,26.01891,319.73382999999995,1.0,966.99188,12.0,50.0,7.0 +50%,-2.7668,0.0,12.01782,0.0,6.28052,-0.4303,12.5887,70.0,1.0,50.0,13516.0,14.80255,28.034259999999996,27061.0,27.69424,30.406689999999998,0.0,1.0,50.0,27.554170000000003,29.86755,1.0,332.26636,460.47165,12.0,50.0,18171.0,1.11209,1.0,54738.0,983.11615,61.454730000000005,5.3311,0.0,112.25126999999999,2880.0,25.461389999999998,0.0,0.0,17.585729999999998,7.5982,7e-05,14.994025,11.953465,100.0,25.0,97.36022,1.0,0.0,3080.0,41.43371,1.0,23.20239,0.36713,0.0,333.0802,457.71722,20.44763,49.256890000000006,720.01581,1.0,907.0,0.0,0.0,4768.0,0.12549000000000002,2.240645,9973.0,1.0,319.97427000000005,26.66585,6.76975,238.64742999999999,91.56302,30.99213,48.542535,1.1186200000000002,29.35769,3.3442800000000004,91.56302,364.80035,1.0,980.1375099999999,12.0,50.0,61.0 +75%,-2.4478,0.0,12.36877,0.00101,13.833620000000002,-0.2435,12.58874,70.0,1.0,50.0,16990.0,16.54205,28.036859999999997,27103.0,28.402620000000002,32.5418,0.0,1.0,50.0,27.81151,30.310059999999996,1.0,351.59869,479.69162,970.0,50.0,19218.0,1.17945,1.0,54757.0,1023.1850599999999,66.22310999999999,5.6572,0.0,115.89333,2880.0,25.687170000000002,0.0,0.0,61.454730000000005,7.8422,0.00029,16.780907499999998,11.99618,100.0,25.0,97.37548000000001,1.0,0.0,9992.0,46.30127,1.0,66.18921,0.37933,0.0,355.68579,482.78879000000006,22.40454,50.56152,720.06061,1.0,911.0,0.0,0.0,14304.0,0.15045,13.9667375,9999.0,1.0,344.80163999999996,35.0,14.2204,258.48389,319.97427000000005,33.04443,49.917792500000004,1.18118,29.58042,8.81242,319.97427000000005,414.02631,1.0,1024.1756599999999,12.0,50.0,2355.0 +max,0.0,1.0,25.23956,0.01548,34.91364,1.4792,18.0,70.0,1.0,50.0,22711.0,92.37823,28.05423,27265.0,29.441470000000002,75.19748,1.0,1.0,50.0,27.81151,31.927490000000002,1.0,493.03747999999996,670.2044099999999,6080.0,50.0,26731.0,1.61973,1.0,55806.0,1063.68652,415.84015,7.5893,1.0,127.79274,2880.0,25.687170000000002,0.0,0.0,1221.50415,9.6698,0.033280000000000004,91.08211999999999,13.02161,100.0,25.0,97.68066,1.0,0.0,17768.0,49.93285,1.0,86.35339,1.53061,0.0,497.23309000000006,489.51122999999995,28.4608,57.786559999999994,720.26819,1.0,1106.0,0.0,1.0,20656.0,0.19749,67.14787,10248.0,1.0,1286.28479,40.0,35.038990000000005,282.70721000000003,2228.75806,76.51671999999999,57.88961,2.05933,31.02771,9.9604,2228.75806,499.78295999999995,1.0,1047.1856699999998,25.0,50.0,6927.0 diff --git a/etc/train_info.csv b/etc/train_info.csv new file mode 100644 index 0000000..f7d1cd8 --- /dev/null +++ b/etc/train_info.csv @@ -0,0 +1,9 @@ +,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39,C40,C41,C42,C43,C44,C45,C46,C47,C48,C49,C50,C51,C52,C53,C54,C55,C56,C57,C58,C59,C60,C61,C62,C63,C64,C65,C66,C67,C68,C69,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86 +countmean,-2.068787571709336,0.0,12.244646957931186,-3.072509811808419e-05,65.07154146955104,0.44577336166196374,12.599763282639822,74.48028777322227,1.0,50.0,14693.801581438507,15.541538997264048,28.035262942327865,27210.896770416624,28.144403459122948,34.14727118754244,0.08009442434403755,1.0,50.0,27.417241166923173,31.427579729002932,1.0,331.1973085985991,449.4780387570912,781.2005571474369,50.0,18078.83650171943,1.105594034201445,1.0,54705.525781509794,1009.9753670177777,64.43072233442385,6.063227472864452,0.08009541996133022,113.52418833603467,2880.0,30.094778600420966,0.0,0.0,1633.1039159324844,8.464796172050635,0.00011299864994295116,15.682983392117888,11.993893181943086,36.34008943093502,22.759856113388864,32.70780895271017,0.9199045800386698,0.0,4821.710343069807,38.84628559887374,1.0,35.19280239248827,0.43801698743132744,0.0,330.92928451551285,448.66954795727196,14.309426565229863,50.663483122634155,720.0587924286391,0.9199055756559624,899.2094649353546,0.0,0.08009442434403755,7593.734795430515,0.12330798264041688,69.14063293854451,9972.942004297085,1.0,335.45187440361525,32.12630538990365,64.96661636830672,253.06461754758558,2202.2179012579027,34.721946107136375,50.07516692115308,1.1055778428059682,33.869057637360335,4.192900486169884,2202.20052452744,366.26226368724855,1.0,1009.9765101432093,12.000096351610212,50.0,1126.177493672852 +std,0.9973781779292777,0.0,0.13220908913816462,0.0016441980134274594,41.241220162838665,0.9490235841204151,0.23123955904435284,4.972919044472526,0.0,0.0,4239.378640242929,2.6242861412641942,0.004702159607627068,105.66174451344102,0.6865890759661545,4.697763331358924,0.2714394608186152,0.0,0.0,0.2435842597408327,1.0861409369376545,0.0,32.75944125354465,30.936915715712995,1312.3061733121647,0.0,1773.9917048343993,0.10444978700291914,0.0,140.0807066054632,39.98494551687945,8.033772199829684,1.1015370005999874,0.27144100099256646,8.377749784930145,0.0,2.8866167758266794,0.0,0.0,1130.3406615091844,0.9659159234903938,0.002097804379129603,2.609598865321592,0.10670483321058306,36.472913245517695,2.486459522236263,33.40571009462432,0.27144100099256646,0.0,5027.134653423027,3.6219558971385433,0.0,31.326284304003376,0.22844839384917232,0.0,36.29853863318136,30.388422949162763,13.198126162058122,3.0266593379149898,0.07449546482319407,0.2714394608186152,26.85508225932885,0.0,0.2714394608186152,6809.536105849556,0.04335728230399993,41.881694157686745,25.369391804062506,0.0,41.56541792025882,8.066558169633623,40.91532344854486,21.34890786115544,1329.7656914378495,4.7750404380439075,2.9476095714644055,0.08886520949378571,2.897106397948702,3.445795065515289,1329.7747584787655,60.61223958172202,0.0,39.815554778945646,0.017185548065648176,0.0,1693.60949796644 +min,-4.6271,0.0,11.9873,-0.04253,0.26855,-2.1467,12.03892,70.0,1.0,50.0,4705.0,0.26093,28.01602,26866.0,26.15353,12.07794,0.0,1.0,50.0,27.0,27.89917,1.0,233.09099999999998,373.26614,-38.0,50.0,12851.0,0.77193,1.0,54157.0,863.9603900000001,0.41963,3.4227,0.0,100.12769,2880.0,25.18253,0.0,0.0,2.13625,6.3222,-0.03986,0.0,11.70507,15.0,20.0,12.33826,0.0,0.0,-8.0,34.460440000000006,1.0,0.09042,0.30762,0.0,230.36029,392.17688,0.0,43.69507,719.51025,0.0,792.0,0.0,0.0,-128.0,0.054979999999999994,0.0,9826.0,1.0,2.1849,0.0,0.0,186.38612,11.12275,12.81891,43.30319,0.55328,28.158720000000002,0.0,11.12275,230.95702999999997,1.0,890.0452300000001,12.0,50.0,-112.0 +25%,-2.6891,0.0,12.10938,-0.00109,15.00092,-0.0902,12.53358,70.0,1.0,50.0,11417.0,13.9328,28.03172,27167.0,27.704159999999998,30.9869025,0.0,1.0,50.0,27.24697,30.96619,1.0,312.35535,422.79843,3.0,50.0,17053.0,1.0393,1.0,54710.0,978.3325199999999,59.35671,5.3848,0.0,106.89041,2880.0,26.92264,0.0,0.0,106.27747,7.6753,-7e-05,14.063802500000001,11.928230000000001,15.0,20.0,12.399289999999999,1.0,0.0,128.0,36.6272,1.0,0.63293,0.36255,0.0,306.80341,422.64315999999997,1.1065399999999999,48.91357,720.00726,1.0,898.0,0.0,0.0,112.0,0.09437999999999999,18.186609999999998,9960.0,1.0,309.0506,35.0,15.2188125,236.54935,553.35132,31.51093,48.3573,1.05301,30.833059999999996,0.0,553.35132,318.03387000000004,1.0,977.51953,12.0,50.0,7.0 +50%,-2.1586,0.0,12.26196,-7e-05,94.03381,0.4304,12.669310000000001,70.0,1.0,50.0,15124.0,15.59601,28.035140000000002,27230.0,28.04138,33.53791,0.0,1.0,50.0,27.33063,31.62231,1.0,330.6026,449.63482999999997,21.0,50.0,18051.0,1.10428,1.0,54722.0,1010.42865,63.934309999999996,5.9528,0.0,111.10626,2880.0,32.0,0.0,0.0,2479.9726600000004,8.4954,7e-05,15.708089999999999,11.98278,15.0,25.0,12.437439999999999,1.0,0.0,2984.0,37.16125,1.0,33.98077,0.36407,0.0,330.96429,448.11495999999994,13.623779999999998,49.98169,720.06299,1.0,903.0,0.0,0.0,7344.0,0.11492000000000001,100.0,9973.0,1.0,332.88461,35.0,93.76555,250.85446000000002,3176.86206,34.211729999999996,49.42992,1.11176,35.48735,5.32794,3176.86206,365.23438,1.0,1007.8375900000001,12.0,50.0,106.0 +75%,-1.5605,0.0,12.269589999999999,0.00101,100.51116,0.8929,12.75681,80.0,1.0,50.0,18276.0,17.26684,28.037840000000003,27268.0,28.60309,36.711200000000005,0.0,1.0,50.0,27.48978,32.019040000000004,1.0,349.82642000000004,476.47122,976.0,50.0,19096.0,1.1718674999999998,1.0,54774.0,1031.79565,68.51198000000001,6.5804,0.0,120.64178000000001,2880.0,32.188179999999996,0.0,0.0,2491.9890100000002,8.9554,0.00029,17.399607500000002,12.04491,62.7065875,25.0,44.5343,1.0,0.0,9592.0,39.6637,1.0,67.27435,0.36789,0.0,354.25708,476.10582999999997,21.25391,51.39312,720.11108,1.0,912.0,0.0,0.0,14560.0,0.16434,100.0,9999.0,1.0,356.71902,35.0,100.0,263.63372999999996,3184.74487,37.301629999999996,50.77159,1.16516,36.025240000000004,6.86263,3184.74487,414.69543,1.0,1030.6046099999999,12.0,50.0,2376.0 +max,0.8286,0.0,16.86249,0.040510000000000004,100.87737,3.1487,12.81139,80.0,1.0,50.0,22644.0,34.82971,28.05606,27543.0,30.46622,81.01099,1.0,1.0,50.0,27.943459999999998,34.8114,1.0,494.26721,535.8690799999999,6193.0,50.0,26826.0,1.62739,1.0,54832.0,1146.92163,222.81647999999998,9.1331,1.0,129.87657,2880.0,33.47964,0.0,0.0,2536.8115199999997,11.1379,0.03631,34.58655,12.28788,100.0,25.0,97.81036,1.0,0.0,13832.0,50.05493,1.0,94.34674,1.53671,0.0,496.85327,498.11957,50.0,66.71295,720.48914,1.0,936.0,0.0,1.0,19536.0,0.19972,100.0,10250.0,1.0,947.02161,35.0,100.0,332.48901,3190.0,82.4829,65.98333000000001,2.08832,38.054959999999994,10.0,3190.0,499.76489000000004,1.0,1121.85547,17.14934,50.0,12993.0 diff --git a/etc/val_info.csv b/etc/val_info.csv new file mode 100644 index 0000000..0d56abb --- /dev/null +++ b/etc/val_info.csv @@ -0,0 +1,9 @@ +,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39,C40,C41,C42,C43,C44,C45,C46,C47,C48,C49,C50,C51,C52,C53,C54,C55,C56,C57,C58,C59,C60,C61,C62,C63,C64,C65,C66,C67,C68,C69,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86 +countmean,-2.4594666620370367,0.0,11.935542401967592,-2.7098611111111107e-05,15.921492494907406,0.17047383333333332,12.743439570592129,70.0,1.0,50.0,13515.36855324074,15.071240775115744,28.034307578009265,27226.147638888888,28.135595587500003,30.357984739004632,0.08112268518518519,1.0,50.0,27.79620552,31.577997648379636,1.0,333.31510262800924,449.90493005300925,787.2582986111111,50.0,18186.160972222224,1.1148783346064814,1.0,54721.15542824074,1004.4912464424767,63.554135648958336,5.092231543981482,0.08112268518518519,116.31296652627316,2880.0,27.754849555562963,0.0,0.0,244.4950306193287,7.184605325231481,0.00011872546296296298,15.232709471180556,11.948349152893519,62.185901524537044,25.0,53.84042227233797,0.9188773148148148,0.0,4873.662592592593,42.72525996539351,1.0,32.67355743229167,0.5721718575231483,0.0,333.0438692743055,449.182239556117,20.989593509999995,49.95465788923612,719.9960670815972,0.9188773148148148,904.987337962963,0.0,0.08112268518518519,7048.826296296296,0.1267813355595833,21.455218706134257,9975.377488425926,1.0,330.7670292971065,30.26630022604167,16.171110291319444,249.5227628508102,684.2666456087962,30.848991297106473,49.40108099699074,1.114719238425926,31.54851779039352,0.716041982,684.2342972121528,365.849156715162,1.0,1004.4884641429628,12.001176364930556,50.0,1116.742673611111 +std,0.3484381851310685,0.0,0.22409287962325264,0.0016356237489022843,10.159001887790705,0.19563939071473177,0.1089069475918646,0.0,0.0,0.0,4019.8680651955415,2.8596862281705753,0.0033994924037676496,51.73052155844038,0.6125734158726767,3.3109409042541267,0.2730250133152174,0.0,0.0,0.0,0.5336158882978931,0.0,34.75362073885327,31.970357185144607,1324.7333685476513,0.0,1870.190511183838,0.11208207280905767,0.0,135.63765486819364,27.58101724844867,8.793604086361194,0.3837526229034144,0.2730250133152174,7.60802942999722,0.0,0.02979847117722374,0.0,0.0,493.33220886219846,0.27628479237301745,0.002022964623627249,2.8576262638098298,0.0445938488169702,40.772148640125664,0.0,28.03264136764282,0.2730250133152174,0.0,5104.584512387677,3.2402148348332633,0.0,31.344725601215686,0.2803459826107888,0.0,38.060102623143436,30.972669999548,3.5527342386646136e-15,1.5140082759702138,0.0696943795187874,0.2730250133152174,25.766570554632818,0.0,0.2730250133152174,6815.743654090211,0.04376930508883382,21.073703101866332,30.633440186657378,0.0,43.610062969993116,8.108487518719581,10.031640240294303,13.872905867558908,667.1978083164512,3.3611672180280143,1.427325460902682,0.10927432086600021,0.3399967804855059,0.0,667.0942712466791,59.099703930936855,0.0,27.234350782275367,0.05720268675172349,0.0,1695.1679011198223 +min,-3.2945,0.0,11.76605,-0.01251,0.3067,-0.5016,12.669310000000001,70.0,1.0,50.0,6038.0,0.28381,28.02125,27080.0,27.187559999999998,12.14206,0.0,1.0,50.0,27.79620552,30.157459999999997,1.0,238.82379,304.92278999999996,1.0,50.0,13065.0,0.7817,1.0,54285.0,859.9325,0.80104,4.184,0.0,100.30814000000001,2880.0,27.73684692,0.0,0.0,2.13625,6.5217,-0.02481,0.0,11.84436,15.0,25.0,12.39166,0.0,0.0,0.0,34.53674,1.0,0.09042,0.30990999999999996,0.0,235.42395,412.92023,20.98959351,44.89288,719.69604,0.0,815.0,0.0,0.0,-112.0,0.059444718,0.0,9752.0,1.0,4.17075,0.0,2.0647,184.61951000000002,11.12275,12.93335,43.64949,0.33432,30.602420000000002,0.716041982,11.12275,235.65900000000002,1.0,954.83789,12.0,50.0,5.0 +25%,-2.7718,0.0,11.78894,-0.00109,12.69684,0.0321,12.669310000000001,70.0,1.0,50.0,10060.0,13.337710000000001,28.03169,27200.0,27.75165,28.18937,0.0,1.0,50.0,27.79620552,31.31713,1.0,313.63934,422.33840999999995,3.0,50.0,17116.0,1.04253,1.0,54726.0,988.33459,58.78445,4.778575,0.0,110.19345,2880.0,27.73684692,0.0,0.0,53.062419999999996,7.0152,-7e-05,13.4501525,11.90833,15.0,25.0,36.93542,1.0,0.0,96.0,40.96069,1.0,0.18085,0.36255,0.0,308.75653,421.35715,20.98959351,48.79913,719.951155,1.0,906.0,0.0,0.0,32.0,0.10219,10.254865,9948.0,1.0,306.07104,26.579742500000002,13.047130000000001,240.93623,277.27127,28.63464,48.30546,1.04614,31.35277,0.716041982,277.27127,318.92004,1.0,985.22772,12.0,50.0,7.0 +50%,-2.4546,0.0,11.78894,-7e-05,14.5813,0.1776,12.669310000000001,70.0,1.0,50.0,14065.0,15.12299,28.034259999999996,27222.0,27.848209999999998,30.004830000000002,0.0,1.0,50.0,27.79620552,31.530759999999997,1.0,331.48871,436.70224,3.0,50.0,18094.0,1.11142,1.0,54757.0,1001.8158,63.36213000000001,5.0954,0.0,115.87123000000001,2880.0,27.73684692,0.0,0.0,97.12221,7.1770000000000005,7e-05,15.231355,11.947735,100.0,25.0,45.86181,1.0,0.0,2952.0,42.959590000000006,1.0,23.4375,0.47623000000000004,0.0,332.10364,433.32815999999997,20.98959351,49.974059999999994,719.9995700000001,1.0,912.0,0.0,0.0,5072.0,0.14577,16.535804999999996,9973.0,1.0,329.90546,35.0,14.97593,246.84904,505.68295,30.412290000000002,49.42156,1.11481,31.53244,0.716041982,505.68295,365.70459,1.0,995.8059099999999,12.0,50.0,85.0 +75%,-2.1376,0.0,12.26196,0.00101,16.22925,0.3348,12.90343761,70.0,1.0,50.0,17076.0,16.908260000000002,28.03683,27246.0,28.439759999999996,32.0202175,0.0,1.0,50.0,27.79620552,31.7749,1.0,351.58057,483.88320999999996,1004.0,50.0,19185.0,1.1795624999999998,1.0,54801.0,1021.59052,67.74902,5.4127,0.0,124.22188,2880.0,27.80418,0.0,0.0,165.97743,7.3947,0.00029,17.1111275,11.9886,100.0,25.0,86.2632725,1.0,0.0,9872.0,45.08056,1.0,66.31586,0.6845100000000001,0.0,355.16132000000005,483.27768,20.98959351,50.79803,720.04132,1.0,919.0,0.0,0.0,14336.0,0.15256,24.834087500000003,9999.0,1.0,352.74652000000003,35.0,16.36675,257.53021,817.42853,32.43408,50.323347500000004,1.18347,31.72928,0.716041982,817.42853,412.7785,1.0,1018.39404,12.0,50.0,2351.0 +max,-1.8027,0.0,16.389470000000003,0.00817,63.203430000000004,0.5817,12.90343761,70.0,1.0,50.0,19878.0,40.16265,28.04815,27410.0,29.80283,49.27771,1.0,1.0,50.0,27.79620552,33.42284,1.0,485.5686,535.40906,4169.0,50.0,26388.0,1.6022399999999999,1.0,54831.0,1068.93176,249.51932000000002,5.8265,1.0,129.60573,2880.0,27.80418,0.0,0.0,2424.65967,7.7867,0.026910000000000003,45.22703,12.02977,100.0,25.0,97.43652,1.0,0.0,13824.0,49.64294,1.0,84.00244,1.5267899999999999,0.0,495.51507999999995,494.23264000000006,20.98959351,55.40619,720.25458,1.0,930.0,0.0,1.0,17792.0,0.19333,100.0,10234.0,1.0,1007.90405,40.0,62.886309999999995,285.84432999999996,3140.57666,50.340270000000004,55.20247,2.05093,33.015209999999996,0.716041982,3140.57666,496.36499000000003,1.0,1046.19189,17.10766,50.0,5477.0 diff --git a/image/architecture.png b/image/architecture.png new file mode 100644 index 0000000..407324c Binary files /dev/null and b/image/architecture.png differ diff --git a/model/__init__.py b/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/model/org_vrae.py b/model/org_vrae.py new file mode 100644 index 0000000..a6bd2f6 --- /dev/null +++ b/model/org_vrae.py @@ -0,0 +1,502 @@ +import numpy as np +import torch +from torch import nn, optim +from torch import distributions +from sklearn.base import BaseEstimator +from torch.utils.data import DataLoader +from torch.autograd import Variable +import os + + + +class Encoder(nn.Module): + """ + Encoder network containing enrolled LSTM/GRU + + :param number_of_features: number of input features + :param hidden_size: hidden size of the RNN + :param hidden_layer_depth: number of layers in RNN + :param latent_length: latent vector length + :param dropout: percentage of nodes to dropout + :param block: LSTM/GRU block + """ + def __init__(self, number_of_features, hidden_size, hidden_layer_depth, latent_length, dropout, block = 'LSTM'): + + super(Encoder, self).__init__() + + self.number_of_features = number_of_features + self.hidden_size = hidden_size + self.hidden_layer_depth = hidden_layer_depth + self.latent_length = latent_length + + if block == 'LSTM': + self.model = nn.LSTM(self.number_of_features, self.hidden_size, self.hidden_layer_depth, dropout = dropout) + elif block == 'GRU': + self.model = nn.GRU(self.number_of_features, self.hidden_size, self.hidden_layer_depth, dropout = dropout) + else: + raise NotImplementedError + + def forward(self, x): + """Forward propagation of encoder. Given input, outputs the last hidden state of encoder + + :param x: input to the encoder, of shape (sequence_length, batch_size, number_of_features) + :return: last hidden state of encoder, of shape (batch_size, hidden_size) + """ + print('--------------------------') + print('DEBUGGING') + print(x.shape) + print('--------------------------') + + _, (h_end, c_end) = self.model(x) + + h_end = h_end[-1, :, :] + return h_end + + +class Lambda(nn.Module): + """Lambda module converts output of encoder to latent vector + + :param hidden_size: hidden size of the encoder + :param latent_length: latent vector length + """ + def __init__(self, hidden_size, latent_length): + super(Lambda, self).__init__() + + self.hidden_size = hidden_size + self.latent_length = latent_length + + self.hidden_to_mean = nn.Linear(self.hidden_size, self.latent_length) + self.hidden_to_logvar = nn.Linear(self.hidden_size, self.latent_length) + + nn.init.xavier_uniform_(self.hidden_to_mean.weight) + nn.init.xavier_uniform_(self.hidden_to_logvar.weight) + + def forward(self, cell_output): + """Given last hidden state of encoder, passes through a linear layer, and finds the mean and variance + + :param cell_output: last hidden state of encoder + :return: latent vector + """ + + self.latent_mean = self.hidden_to_mean(cell_output) + self.latent_logvar = self.hidden_to_logvar(cell_output) + + if self.training: + std = torch.exp(0.5 * self.latent_logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(self.latent_mean) + else: + return self.latent_mean + +class Decoder(nn.Module): + """Converts latent vector into output + + :param sequence_length: length of the input sequence + :param batch_size: batch size of the input sequence + :param hidden_size: hidden size of the RNN + :param hidden_layer_depth: number of layers in RNN + :param latent_length: latent vector length + :param output_size: 2, one representing the mean, other log std dev of the output + :param block: GRU/LSTM - use the same which you've used in the encoder + :param dtype: Depending on cuda enabled/disabled, create the tensor + """ + def __init__(self, sequence_length, batch_size, hidden_size, hidden_layer_depth, latent_length, output_size, dtype, block='LSTM'): + + super(Decoder, self).__init__() + + self.hidden_size = hidden_size + self.batch_size = batch_size + self.sequence_length = sequence_length + self.hidden_layer_depth = hidden_layer_depth + self.latent_length = latent_length + self.output_size = output_size + self.dtype = dtype + + if block == 'LSTM': + self.model = nn.LSTM(1, self.hidden_size, self.hidden_layer_depth) + elif block == 'GRU': + self.model = nn.GRU(1, self.hidden_size, self.hidden_layer_depth) + else: + raise NotImplementedError + + self.latent_to_hidden = nn.Linear(self.latent_length, self.hidden_size) + self.hidden_to_output = nn.Linear(self.hidden_size, self.output_size) + + self.decoder_inputs = torch.zeros(self.sequence_length, self.batch_size, 1, requires_grad=True).type(self.dtype) + self.c_0 = torch.zeros(self.hidden_layer_depth, self.batch_size, self.hidden_size, requires_grad=True).type(self.dtype) + + nn.init.xavier_uniform_(self.latent_to_hidden.weight) + nn.init.xavier_uniform_(self.hidden_to_output.weight) + + def forward(self, latent): + """Converts latent to hidden to output + + :param latent: latent vector + :return: outputs consisting of mean and std dev of vector + """ + h_state = self.latent_to_hidden(latent) + + if isinstance(self.model, nn.LSTM): + h_0 = torch.stack([h_state for _ in range(self.hidden_layer_depth)]) + decoder_output, _ = self.model(self.decoder_inputs, (h_0, self.c_0)) + elif isinstance(self.model, nn.GRU): + h_0 = torch.stack([h_state for _ in range(self.hidden_layer_depth)]) + decoder_output, _ = self.model(self.decoder_inputs, h_0) + else: + raise NotImplementedError + + out = self.hidden_to_output(decoder_output) + return out + +def _assert_no_grad(tensor): + assert not tensor.requires_grad, \ + "nn criterions don't compute the gradient w.r.t. targets - please " \ + "mark these tensors as not requiring gradients" + +class VRAE(BaseEstimator, nn.Module): + """Variational recurrent auto-encoder. This module is used for dimensionality reduction of timeseries + + :param sequence_length: length of the input sequence + :param number_of_features: number of input features + :param hidden_size: hidden size of the RNN + :param hidden_layer_depth: number of layers in RNN + :param latent_length: latent vector length + :param batch_size: number of timeseries in a single batch + :param learning_rate: the learning rate of the module + :param block: GRU/LSTM to be used as a basic building block + :param n_epochs: Number of iterations/epochs + :param dropout_rate: The probability of a node being dropped-out + :param optimizer: ADAM/ SGD optimizer to reduce the loss function + :param loss: SmoothL1Loss / MSELoss / ReconLoss / any custom loss which inherits from `_Loss` class + :param boolean cuda: to be run on GPU or not + :param print_every: The number of iterations after which loss should be printed + :param boolean clip: Gradient clipping to overcome explosion + :param max_grad_norm: The grad-norm to be clipped + :param dload: Download directory where models are to be dumped + """ + def __init__(self, sequence_length, number_of_features, hidden_size=90, hidden_layer_depth=2, latent_length=20, + batch_size=32, learning_rate=0.005, block='LSTM', + n_epochs=5, dropout_rate=0., optimizer='Adam', loss='MSELoss', + cuda=False, print_every=100, clip=True, max_grad_norm=5, dload='.'): + + super(VRAE, self).__init__() + + + self.dtype = torch.FloatTensor + self.use_cuda = cuda + + if not torch.cuda.is_available() and self.use_cuda: + self.use_cuda = False + + + if self.use_cuda: + self.dtype = torch.cuda.FloatTensor + + + self.encoder = Encoder(number_of_features = number_of_features, + hidden_size=hidden_size, + hidden_layer_depth=hidden_layer_depth, + latent_length=latent_length, + dropout=dropout_rate, + block=block) + + self.lmbd = Lambda(hidden_size=hidden_size, + latent_length=latent_length) + + self.decoder = Decoder(sequence_length=sequence_length, + batch_size = batch_size, + hidden_size=hidden_size, + hidden_layer_depth=hidden_layer_depth, + latent_length=latent_length, + output_size=number_of_features, + block=block, + dtype=self.dtype) + + self.sequence_length = sequence_length + self.hidden_size = hidden_size + self.hidden_layer_depth = hidden_layer_depth + self.latent_length = latent_length + self.batch_size = batch_size + self.learning_rate = learning_rate + self.n_epochs = n_epochs + + self.print_every = print_every + self.clip = clip + self.max_grad_norm = max_grad_norm + self.is_fitted = False + self.dload = dload + + if self.use_cuda: + self.cuda() + + if optimizer == 'Adam': + self.optimizer = optim.Adam(self.parameters(), lr=learning_rate) + elif optimizer == 'SGD': + self.optimizer = optim.SGD(self.parameters(), lr=learning_rate) + else: + raise ValueError('Not a recognized optimizer') + + if loss == 'SmoothL1Loss': + self.loss_fn = nn.SmoothL1Loss(size_average=False) + elif loss == 'MSELoss': + self.loss_fn = nn.MSELoss(size_average=False) + + def __repr__(self): + return """VRAE(n_epochs={n_epochs},batch_size={batch_size},cuda={cuda})""".format( + n_epochs=self.n_epochs, + batch_size=self.batch_size, + cuda=self.use_cuda) + + def forward(self, x): + """ + Forward propagation which involves one pass from inputs to encoder to lambda to decoder + + :param x:input tensor + :return: the decoded output, latent vector + """ + cell_output = self.encoder(x) + latent = self.lmbd(cell_output) + x_decoded = self.decoder(latent) + + return x_decoded, latent + + def _rec(self, x_decoded, x, loss_fn): + """ + Compute the loss given output x decoded, input x and the specified loss function + + :param x_decoded: output of the decoder + :param x: input to the encoder + :param loss_fn: loss function specified + :return: joint loss, reconstruction loss and kl-divergence loss + """ + latent_mean, latent_logvar = self.lmbd.latent_mean, self.lmbd.latent_logvar + + kl_loss = -0.5 * torch.mean(1 + latent_logvar - latent_mean.pow(2) - latent_logvar.exp()) + recon_loss = loss_fn(x_decoded, x) + + return kl_loss + recon_loss, recon_loss, kl_loss + + def compute_loss(self, X): + """ + Given input tensor, forward propagate, compute the loss, and backward propagate. + Represents the lifecycle of a single iteration + + :param X: Input tensor + :return: total loss, reconstruction loss, kl-divergence loss and original input + """ + x = Variable(X[:,:,:].type(self.dtype), requires_grad = True) + + x_decoded, _ = self(x) + loss, recon_loss, kl_loss = self._rec(x_decoded, x.detach(), self.loss_fn) + + return loss, recon_loss, kl_loss, x + + + def _train(self, train_loader): + """ + For each epoch, given the batch_size, run this function batch_size * num_of_batches number of times + + :param train_loader:input train loader with shuffle + :return: + """ + self.train() + + epoch_loss = 0 + t = 0 + + for t, X in enumerate(train_loader): + + # Index first element of array to return tensor + X = X[0] + + # required to swap axes, since dataloader gives output in (batch_size x seq_len x num_of_features) + X = X.permute(1,0,2) + + self.optimizer.zero_grad() + loss, recon_loss, kl_loss, _ = self.compute_loss(X) + loss.backward() + + if self.clip: + torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm = self.max_grad_norm) + + # accumulator + epoch_loss += loss.item() + + self.optimizer.step() + + if (t + 1) % self.print_every == 0: + print('Batch %d, loss = %.4f, recon_loss = %.4f, kl_loss = %.4f' % (t + 1, loss.item(), + recon_loss.item(), kl_loss.item())) + + print('Average loss: {:.4f}'.format(epoch_loss / t)) + + + def fit(self, dataset, save = False): + """ + Calls `_train` function over a fixed number of epochs, specified by `n_epochs` + + :param dataset: `Dataset` object + :param bool save: If true, dumps the trained model parameters as pickle file at `dload` directory + :return: + """ + + train_loader = DataLoader(dataset = dataset, + batch_size = self.batch_size, + shuffle = True, + drop_last=True) + + ######################### + # debugging + print('fit result') + tmp = iter(train_loader).next()[0] + print(tmp.shape) + ########################### + + for i in range(self.n_epochs): + print('Epoch: %s' % i) + + self._train(train_loader) + + self.is_fitted = True + if save: + self.save('model.pth') + + + def _batch_transform(self, x): + """ + Passes the given input tensor into encoder and lambda function + + :param x: input batch tensor + :return: intermediate latent vector + """ + return self.lmbd( + self.encoder( + Variable(x.type(self.dtype), requires_grad = False) + ) + ).cpu().data.numpy() + + def _batch_reconstruct(self, x): + """ + Passes the given input tensor into encoder, lambda and decoder function + + :param x: input batch tensor + :return: reconstructed output tensor + """ + + x = Variable(x.type(self.dtype), requires_grad = False) + x_decoded, _ = self(x) + + return x_decoded.cpu().data.numpy() + + def reconstruct(self, dataset, save = False): + """ + Given input dataset, creates dataloader, runs dataloader on `_batch_reconstruct` + Prerequisite is that model has to be fit + + :param dataset: input dataset who's output vectors are to be obtained + :param bool save: If true, dumps the output vector dataframe as a pickle file + :return: + """ + + self.eval() + + test_loader = DataLoader(dataset = dataset, + batch_size = self.batch_size, + shuffle = False, + drop_last=True) # Don't shuffle for test_loader + + if self.is_fitted: + with torch.no_grad(): + x_decoded = [] + + for t, x in enumerate(test_loader): + x = x[0] + x = x.permute(1, 0, 2) + + x_decoded_each = self._batch_reconstruct(x) + x_decoded.append(x_decoded_each) + + x_decoded = np.concatenate(x_decoded, axis=1) + + if save: + if os.path.exists(self.dload): + pass + else: + os.mkdir(self.dload) + x_decoded.dump(self.dload + '/z_run.pkl') + return x_decoded + + raise RuntimeError('Model needs to be fit') + + + def transform(self, dataset, save = False): + """ + Given input dataset, creates dataloader, runs dataloader on `_batch_transform` + Prerequisite is that model has to be fit + + :param dataset: input dataset who's latent vectors are to be obtained + :param bool save: If true, dumps the latent vector dataframe as a pickle file + :return: + """ + self.eval() + + test_loader = DataLoader(dataset = dataset, + batch_size = self.batch_size, + shuffle = False, + drop_last=True) # Don't shuffle for test_loader + if self.is_fitted: + with torch.no_grad(): + z_run = [] + + for t, x in enumerate(test_loader): + x = x[0] + x = x.permute(1, 0, 2) + + z_run_each = self._batch_transform(x) + z_run.append(z_run_each) + + z_run = np.concatenate(z_run, axis=0) + if save: + if os.path.exists(self.dload): + pass + else: + os.mkdir(self.dload) + z_run.dump(self.dload + '/z_run.pkl') + return z_run + + raise RuntimeError('Model needs to be fit') + + def fit_transform(self, dataset, save = False): + """ + Combines the `fit` and `transform` functions above + + :param dataset: Dataset on which fit and transform have to be performed + :param bool save: If true, dumps the model and latent vectors as pickle file + :return: latent vectors for input dataset + """ + self.fit(dataset, save = save) + return self.transform(dataset, save = save) + + def save(self, file_name): + """ + Pickles the model parameters to be retrieved later + + :param file_name: the filename to be saved as,`dload` serves as the download directory + :return: None + """ + PATH = self.dload + '/' + file_name + if os.path.exists(self.dload): + pass + else: + os.mkdir(self.dload) + torch.save(self.state_dict(), PATH) + + def load(self, PATH): + """ + Loads the model's parameters from the path mentioned + + :param PATH: Should contain pickle file + :return: None + """ + self.is_fitted = True + self.load_state_dict(torch.load(PATH)) \ No newline at end of file diff --git a/model/utils.py b/model/utils.py new file mode 100644 index 0000000..3ec8ba4 --- /dev/null +++ b/model/utils.py @@ -0,0 +1,49 @@ +import os +import numpy as np +import glob +import pandas as pd + + +def load_data(folder, cols_to_remove = None): + """ + folder: folder where data is located + """ + # define path + data_path = f'./{folder}/*.csv' + + # get data + file_list = glob.glob(data_path) + file_list.sort() + + # load dataset + df_total = pd.DataFrame() + + for i in file_list: + data = pd.read_csv(i) + df_total = pd.concat([df_total, data]) + + # Sort by date + df_total = df_total.reset_index(drop = True) + df_total = df_total.drop(cols_to_remove, axis=1) + df_total = df_total.to_numpy() + + return df_total + +# TODO : Delete function +def open_data(direc, ratio_train=0.8, dataset="ECG5000"): + """Input: + direc: location of the UCR archive + ratio_train: ratio to split training and testset + dataset: name of the dataset in the UCR archive""" + datadir = direc + '/' + dataset + '/' + dataset + data_train = np.loadtxt(datadir + '_TRAIN', delimiter=',') + data_test_val = np.loadtxt(datadir + '_TEST', delimiter=',')[:-1] + data = np.concatenate((data_train, data_test_val), axis=0) + data = np.expand_dims(data, -1) + + N, D, _ = data.shape + + ind_cut = int(ratio_train * N) + ind = np.random.permutation(N) + return data[ind[:ind_cut], 1:, :], data[ind[ind_cut:], 1:, :], data[ind[:ind_cut], 0, :], data[ind[ind_cut:], 0, :] + diff --git a/model/vrae.py b/model/vrae.py new file mode 100644 index 0000000..66fa9e0 --- /dev/null +++ b/model/vrae.py @@ -0,0 +1,492 @@ +import numpy as np +import torch +from torch import nn, optim +from torch import distributions +from sklearn.base import BaseEstimator +from torch.utils.data import DataLoader +from torch.autograd import Variable +import os + + +class Encoder(nn.Module): + """ + Encoder network containing enrolled LSTM/GRU + + :param number_of_features: number of input features + :param hidden_size: hidden size of the RNN + :param hidden_layer_depth: number of layers in RNN + :param latent_length: latent vector length + :param dropout: percentage of nodes to dropout + :param block: LSTM/GRU block + """ + def __init__(self, number_of_features, hidden_size, hidden_layer_depth, latent_length, dropout, block = 'LSTM'): + + super(Encoder, self).__init__() + + self.number_of_features = number_of_features + self.hidden_size = hidden_size + self.hidden_layer_depth = hidden_layer_depth + self.latent_length = latent_length + + if block == 'LSTM': + self.model = nn.LSTM(self.number_of_features, self.hidden_size, self.hidden_layer_depth, dropout = dropout) + elif block == 'GRU': + self.model = nn.GRU(self.number_of_features, self.hidden_size, self.hidden_layer_depth, dropout = dropout) + else: + raise NotImplementedError + + def forward(self, x): + """Forward propagation of encoder. Given input, outputs the last hidden state of encoder + + :param x: input to the encoder, of shape (sequence_length, batch_size, number_of_features) + :return: last hidden state of encoder, of shape (batch_size, hidden_size) + """ + + print('--------------------------') + print('DEBUGGING') + print(x.shape) + print('--------------------------') + + _, (h_end, c_end) = self.model(x) + + h_end = h_end[-1, :, :] + return h_end + + +class Lambda(nn.Module): + """Lambda module converts output of encoder to latent vector + + :param hidden_size: hidden size of the encoder + :param latent_length: latent vector length + """ + def __init__(self, hidden_size, latent_length): + super(Lambda, self).__init__() + + self.hidden_size = hidden_size + self.latent_length = latent_length + + self.hidden_to_mean = nn.Linear(self.hidden_size, self.latent_length) + self.hidden_to_logvar = nn.Linear(self.hidden_size, self.latent_length) + + nn.init.xavier_uniform_(self.hidden_to_mean.weight) + nn.init.xavier_uniform_(self.hidden_to_logvar.weight) + + def forward(self, cell_output): + """Given last hidden state of encoder, passes through a linear layer, and finds the mean and variance + + :param cell_output: last hidden state of encoder + :return: latent vector + """ + + self.latent_mean = self.hidden_to_mean(cell_output) + self.latent_logvar = self.hidden_to_logvar(cell_output) + + if self.training: + std = torch.exp(0.5 * self.latent_logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(self.latent_mean) + else: + return self.latent_mean + +class Decoder(nn.Module): + """Converts latent vector into output + + :param sequence_length: length of the input sequence + :param batch_size: batch size of the input sequence + :param hidden_size: hidden size of the RNN + :param hidden_layer_depth: number of layers in RNN + :param latent_length: latent vector length + :param output_size: 2, one representing the mean, other log std dev of the output + :param block: GRU/LSTM - use the same which you've used in the encoder + :param dtype: Depending on cuda enabled/disabled, create the tensor + """ + def __init__(self, sequence_length, batch_size, hidden_size, hidden_layer_depth, latent_length, output_size, dtype, block='LSTM'): + + super(Decoder, self).__init__() + + self.hidden_size = hidden_size + self.batch_size = batch_size + self.sequence_length = sequence_length + self.hidden_layer_depth = hidden_layer_depth + self.latent_length = latent_length + self.output_size = output_size + self.dtype = dtype + + if block == 'LSTM': + self.model = nn.LSTM(1, self.hidden_size, self.hidden_layer_depth) + elif block == 'GRU': + self.model = nn.GRU(1, self.hidden_size, self.hidden_layer_depth) + else: + raise NotImplementedError + + self.latent_to_hidden = nn.Linear(self.latent_length, self.hidden_size) + self.hidden_to_output = nn.Linear(self.hidden_size, self.output_size) + + self.decoder_inputs = torch.zeros(self.sequence_length, self.batch_size, 1, requires_grad=True).type(self.dtype) + self.c_0 = torch.zeros(self.hidden_layer_depth, self.batch_size, self.hidden_size, requires_grad=True).type(self.dtype) + + nn.init.xavier_uniform_(self.latent_to_hidden.weight) + nn.init.xavier_uniform_(self.hidden_to_output.weight) + + def forward(self, latent): + """Converts latent to hidden to output + + :param latent: latent vector + :return: outputs consisting of mean and std dev of vector + """ + h_state = self.latent_to_hidden(latent) + + if isinstance(self.model, nn.LSTM): + h_0 = torch.stack([h_state for _ in range(self.hidden_layer_depth)]) + decoder_output, _ = self.model(self.decoder_inputs, (h_0, self.c_0)) + elif isinstance(self.model, nn.GRU): + h_0 = torch.stack([h_state for _ in range(self.hidden_layer_depth)]) + decoder_output, _ = self.model(self.decoder_inputs, h_0) + else: + raise NotImplementedError + + out = self.hidden_to_output(decoder_output) + return out + +def _assert_no_grad(tensor): + assert not tensor.requires_grad, \ + "nn criterions don't compute the gradient w.r.t. targets - please " \ + "mark these tensors as not requiring gradients" + +class VRAE(BaseEstimator, nn.Module): + """Variational recurrent auto-encoder. This module is used for dimensionality reduction of timeseries + + :param sequence_length: length of the input sequence + :param number_of_features: number of input features + :param hidden_size: hidden size of the RNN + :param hidden_layer_depth: number of layers in RNN + :param latent_length: latent vector length + :param batch_size: number of timeseries in a single batch + :param learning_rate: the learning rate of the module + :param block: GRU/LSTM to be used as a basic building block + :param n_epochs: Number of iterations/epochs + :param dropout_rate: The probability of a node being dropped-out + :param optimizer: ADAM/ SGD optimizer to reduce the loss function + :param loss: SmoothL1Loss / MSELoss / ReconLoss / any custom loss which inherits from `_Loss` class + :param boolean cuda: to be run on GPU or not + :param print_every: The number of iterations after which loss should be printed + :param boolean clip: Gradient clipping to overcome explosion + :param max_grad_norm: The grad-norm to be clipped + :param dload: Download directory where models are to be dumped + """ + def __init__(self, sequence_length, number_of_features, hidden_size=90, hidden_layer_depth=2, latent_length=20, + batch_size=32, learning_rate=0.005, block='LSTM', + n_epochs=5, dropout_rate=0., optimizer='Adam', loss='MSELoss', + cuda=False, print_every=100, clip=True, max_grad_norm=5, dload='.'): + + super(VRAE, self).__init__() + + + self.dtype = torch.FloatTensor + self.use_cuda = cuda + + if not torch.cuda.is_available() and self.use_cuda: + self.use_cuda = False + + + if self.use_cuda: + self.dtype = torch.cuda.FloatTensor + + + self.encoder = Encoder(number_of_features = number_of_features, + hidden_size=hidden_size, + hidden_layer_depth=hidden_layer_depth, + latent_length=latent_length, + dropout=dropout_rate, + block=block) + + self.lmbd = Lambda(hidden_size=hidden_size, + latent_length=latent_length) + + self.decoder = Decoder(sequence_length=sequence_length, + batch_size = batch_size, + hidden_size=hidden_size, + hidden_layer_depth=hidden_layer_depth, + latent_length=latent_length, + output_size=number_of_features, + block=block, + dtype=self.dtype) + + self.sequence_length = sequence_length + self.hidden_size = hidden_size + self.hidden_layer_depth = hidden_layer_depth + self.latent_length = latent_length + self.batch_size = batch_size + self.learning_rate = learning_rate + self.n_epochs = n_epochs + + self.print_every = print_every + self.clip = clip + self.max_grad_norm = max_grad_norm + self.is_fitted = False + self.dload = dload + + if self.use_cuda: + self.cuda() + + if optimizer == 'Adam': + self.optimizer = optim.Adam(self.parameters(), lr=learning_rate) + elif optimizer == 'SGD': + self.optimizer = optim.SGD(self.parameters(), lr=learning_rate) + else: + raise ValueError('Not a recognized optimizer') + + if loss == 'SmoothL1Loss': + self.loss_fn = nn.SmoothL1Loss(size_average=False) + elif loss == 'MSELoss': + self.loss_fn = nn.MSELoss(size_average=False) + + def __repr__(self): + return """VRAE(n_epochs={n_epochs},batch_size={batch_size},cuda={cuda})""".format( + n_epochs=self.n_epochs, + batch_size=self.batch_size, + cuda=self.use_cuda) + + def forward(self, x): + """ + Forward propagation which involves one pass from inputs to encoder to lambda to decoder + + :param x:input tensor + :return: the decoded output, latent vector + """ + cell_output = self.encoder(x) + latent = self.lmbd(cell_output) + x_decoded = self.decoder(latent) + + return x_decoded, latent + + def _rec(self, x_decoded, x, loss_fn): + """ + Compute the loss given output x decoded, input x and the specified loss function + + :param x_decoded: output of the decoder + :param x: input to the encoder + :param loss_fn: loss function specified + :return: joint loss, reconstruction loss and kl-divergence loss + """ + latent_mean, latent_logvar = self.lmbd.latent_mean, self.lmbd.latent_logvar + + kl_loss = -0.5 * torch.mean(1 + latent_logvar - latent_mean.pow(2) - latent_logvar.exp()) + recon_loss = loss_fn(x_decoded, x) + + return kl_loss + recon_loss, recon_loss, kl_loss + + def compute_loss(self, X): + """ + Given input tensor, forward propagate, compute the loss, and backward propagate. + Represents the lifecycle of a single iteration + + :param X: Input tensor + :return: total loss, reconstruction loss, kl-divergence loss and original input + """ + x = Variable(X[:,:,:].type(self.dtype), requires_grad = True) + + x_decoded, _ = self(x) + loss, recon_loss, kl_loss = self._rec(x_decoded, x.detach(), self.loss_fn) + + return loss, recon_loss, kl_loss, x + + + def _train(self, train_loader): + """ + For each epoch, given the batch_size, run this function batch_size * num_of_batches number of times + + :param train_loader:input train loader with shuffle + :return: + """ + self.train() + + epoch_loss = 0 + t = 0 + + for t, X in enumerate(train_loader): + + self.optimizer.zero_grad() + loss, recon_loss, kl_loss, _ = self.compute_loss(X) + loss.backward() + + if self.clip: + torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm = self.max_grad_norm) + + # accumulator + epoch_loss += loss.item() + + self.optimizer.step() + + if (t + 1) % self.print_every == 0: + print('Batch %d, loss = %.4f, recon_loss = %.4f, kl_loss = %.4f' % (t + 1, loss.item(), + recon_loss.item(), kl_loss.item())) + + print('Average loss: {:.4f}'.format(epoch_loss / t)) + + + def fit(self, dataset, save = False): + """ + Calls `_train` function over a fixed number of epochs, specified by `n_epochs` + + :param dataset: `Dataset` object + :param bool save: If true, dumps the trained model parameters as pickle file at `dload` directory + :return: + """ + + train_loader = DataLoader(dataset = dataset, + batch_size = self.batch_size, + shuffle = False, + drop_last=True) + + #################### + print('debugging') + print('fit result') + print(dataset) + print(dataset[0]) + tmp = iter(train_loader).next() + print(tmp.shape) + #################### + + for i in range(self.n_epochs): + print('Epoch: %s' % i) + + self._train(train_loader) + + self.is_fitted = True + if save: + self.save('model.pth') + + + def _batch_transform(self, x): + """ + Passes the given input tensor into encoder and lambda function + + :param x: input batch tensor + :return: intermediate latent vector + """ + return self.lmbd( + self.encoder( + Variable(x.type(self.dtype), requires_grad = False) + ) + ).cpu().data.numpy() + + def _batch_reconstruct(self, x): + """ + Passes the given input tensor into encoder, lambda and decoder function + + :param x: input batch tensor + :return: reconstructed output tensor + """ + + x = Variable(x.type(self.dtype), requires_grad = False) + x_decoded, _ = self(x) + + return x_decoded.cpu().data.numpy() + + def reconstruct(self, dataset, save = False): + """ + Given input dataset, creates dataloader, runs dataloader on `_batch_reconstruct` + Prerequisite is that model has to be fit + + :param dataset: input dataset who's output vectors are to be obtained + :param bool save: If true, dumps the output vector dataframe as a pickle file + :return: + """ + + self.eval() + + test_loader = DataLoader(dataset = dataset, + batch_size = self.batch_size, + shuffle = False, + drop_last=True) # Don't shuffle for test_loader + + if self.is_fitted: + with torch.no_grad(): + x_decoded = [] + + for t, x in enumerate(test_loader): + x_decoded_each = self._batch_reconstruct(x) + x_decoded.append(x_decoded_each) + + x_decoded = np.concatenate(x_decoded, axis=1) + + if save: + if os.path.exists(self.dload): + pass + else: + os.mkdir(self.dload) + x_decoded.dump(self.dload + '/z_run.pkl') + return x_decoded + + raise RuntimeError('Model needs to be fit') + + + def transform(self, dataset, save = False): + """ + Given input dataset, creates dataloader, runs dataloader on `_batch_transform` + Prerequisite is that model has to be fit + + :param dataset: input dataset who's latent vectors are to be obtained + :param bool save: If true, dumps the latent vector dataframe as a pickle file + :return: + """ + self.eval() + + test_loader = DataLoader(dataset = dataset, + batch_size = self.batch_size, + shuffle = False, + drop_last=True) # Don't shuffle for test_loader + if self.is_fitted: + with torch.no_grad(): + z_run = [] + + for t, x in enumerate(test_loader): + z_run_each = self._batch_transform(x) + z_run.append(z_run_each) + + z_run = np.concatenate(z_run, axis=0) + if save: + if os.path.exists(self.dload): + pass + else: + os.mkdir(self.dload) + z_run.dump(self.dload + '/z_run.pkl') + return z_run + + raise RuntimeError('Model needs to be fit') + + def fit_transform(self, dataset, save = False): + """ + Combines the `fit` and `transform` functions above + + :param dataset: Dataset on which fit and transform have to be performed + :param bool save: If true, dumps the model and latent vectors as pickle file + :return: latent vectors for input dataset + """ + self.fit(dataset, save = save) + return self.transform(dataset, save = save) + + def save(self, file_name): + """ + Pickles the model parameters to be retrieved later + + :param file_name: the filename to be saved as,`dload` serves as the download directory + :return: None + """ + PATH = self.dload + '/' + file_name + if os.path.exists(self.dload): + pass + else: + os.mkdir(self.dload) + torch.save(self.state_dict(), PATH) + + def load(self, PATH): + """ + Loads the model's parameters from the path mentioned + + :param PATH: Should contain pickle file + :return: None + """ + self.is_fitted = True + self.load_state_dict(torch.load(PATH)) \ No newline at end of file diff --git a/model_dir/vrae.pth b/model_dir/vrae.pth new file mode 100644 index 0000000..c3eb206 Binary files /dev/null and b/model_dir/vrae.pth differ diff --git a/params.txt b/params.txt new file mode 100644 index 0000000..6272edd --- /dev/null +++ b/params.txt @@ -0,0 +1,14 @@ +hidden_size = 90 +hidden_layer_depth = 1 +latent_length = 20 +batch_size = 100 +learning_rate = 0.0005 +n_epochs = 50 +dropout_rate = 0.2 +optimizer = 'Adam' +cuda = True +print_every=30 +clip = True +max_grad_norm=5 +loss = 'MSELoss' +block = 'LSTM' \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fe1979c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +matplotlib==3.3.4 +numpy==1.19.5 +plotly==5.4.0 +scikit-learn==0.24.2 +scipy==1.5.4 +torch==1.10.0 +torchvision==0.11.1 \ No newline at end of file