diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb74703e..05773a01 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: - name: "Install dependencies" run: | python -VV - python -m pip install --upgrade pip pip-tools + python -m pip install --upgrade pip make dev-install - name: "Run tox targets for ${{ matrix.python-version }}" @@ -49,13 +49,32 @@ jobs: - name: Install dependencies run: | python -VV - python -m pip install --upgrade pip pip-tools + python -m pip install --upgrade pip make dev-install python -m pip install cumulusci - name: Run Tests run: python -m pytest + faker_docs: + name: Faker Docs + runs-on: ubuntu-latest + steps: + - uses: "actions/checkout@v2" + - uses: "actions/setup-python@v1" + with: + python-version: "3.9" + + - name: Install dependencies + run: | + python -VV + python -m pip install --upgrade pip pip-tools + make dev-install + python setup.py install + + - name: Make Docs + run: make docs + windows: name: Windows ${{ matrix.python-version }} runs-on: windows-latest @@ -71,7 +90,7 @@ jobs: - name: "Install dependencies" run: | python -VV - python -m pip install --upgrade pip pip-tools + python -m pip install --upgrade pip make dev-install - name: Run Tests diff --git a/.gitignore b/.gitignore index 3623a5fc..828e9003 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ build/ dist/ docs/api/ docs/_build/ +docs/fakedata +docs/fakedata.md +docs/locales.md .eggs/ .idea/ .tox/ @@ -38,6 +41,7 @@ pip-wheel-metadata results_junit.xml test_results.json temp +coverage.xml # Salesforce / SFDX / CCI .cci diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 8ff90d9d..bc5f8ad7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -71,7 +71,13 @@ Or you could make it outside of the project repo. $ pytest -7. Your new code should also have meaningful tests. One way to double check that +7. Build the docs like this: + $ make docs + $ open build/html/index.html + +Set SF_MKDOCS_BUILD_LOCALES=False to skip building all locales + +8. Your new code should also have meaningful tests. One way to double check that your tests cover everything is to ensure that your new code has test code coverage: $ pytest --cov @@ -123,3 +129,27 @@ You can finish up the process by updating the release object that was auto-creat Just paste in the changelog notes and hit publish. Tada! You've published a new version of Snowfakery. + +Internal Software Architecture +------------------------------ + +=================================== ================================ +Filename Purpose +----------------------------------- -------------------------------- + +cli.py Click-based Command Line. Uses the Click library to supply a CLI. +data_generator.py The API entry point the CLI and CCI use.

This may be the best place to start reading. It abstracts away all of the complexity and outlines the core flow. +parse_recipe_yaml.py Phase 1: parse YAML into a Runtime DOM

Includes some hacks to the YAML parser for handling line numbers. +data_generator_runtime.py Phase 2: Runtime.

Actually generate the data by walking the template list top-to-bottom, generating rows as appopriate. +data_generator_runtime_dom.py An object model used in Phase 2. Roughly similar to the shape of the YAML file. +output_streams.py Where the data goes in the output. Used during Phase 2. +data_gen_exceptions.py Exceptions that can be thrown +generate_mapping_from_recipe.py In the CCI context, this utility package allows the generation of mapping.yml files. +template_funcs.py Functions that can be invoked using either block syntax or in Jinja templates +plugins.py Infrastructure for plugins +standard_plugins/ Plugins that ship with Snowfakery +tests/ Unit tests +=================================== ================================ + + +Architecture Diagram diff --git a/HISTORY.md b/HISTORY.md index 9296a6f5..2228ddb9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,29 @@ In the beginning, programmers created the databases. Now the databases were form And so [Salesforce.org](http://salesforce.org/) said “Let there be data,” and there was Snowfakery. And it was good. +## Snowfakery 2.0 + +Recipes can now merge in data from Salesforce orgs as +records, IDs or datasets. (#395 and #401) + +Output streams can now be specified as 3rd party libraries. Any Python class name +can be passed to --output-format as long as it adheres to the OutputStream +protocol as shown in examples/YamlOutputStream.py (#351) + +Snowfakery is thread-safe/multi-processor-ready and available for parallel usage +in CumulusCI. (CumulusCI Repo) + +Snowfakery now supports a --reps option as an easier way to repeat a recipe (#416) + +Snowfakery now accepts LeadingCaps syntax for all fakers, as well as +underscore_separated and everythinglowercase. (#403) + +Salesforce.ContentVersion and Salesforce.ProfileId convenience functions were added +(#421) + +Snowfakery now has voluminous documentation about Fakes in many languages and +locales (#409) + ## Snowfakery 1.12 Fix a regression: In some contexts it was impossible to call Faker with either diff --git a/LICENSE b/LICENSE index e9830463..da6962af 100644 --- a/LICENSE +++ b/LICENSE @@ -1,27 +1,30 @@ - Copyright (c) 2020, Salesforce.org - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of Salesforce.org nor the names of - its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. +Copyright (c) 2021, Salesforce.com, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the + above copyright notice, this list of conditions + and the following disclaimer. + +* Redistributions in binary form must reproduce + the above copyright notice, this list of conditions + and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of Salesforce.com nor the names + of its contributors may be used to endorse or promote + products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile index ddeb5f3e..515857b2 100644 --- a/Makefile +++ b/Makefile @@ -3,4 +3,12 @@ update-deps: pip-compile --upgrade --allow-unsafe requirements/dev.in dev-install: + pip install --upgrade pip-tools pip-sync requirements/*.txt + pip install -e . + +# set SF_MKDOCS_BUILD_LOCALES=False to skip building all locales +docs: .FORCE + python -m mkdocs build --clean --site-dir build/html --config-file mkdocs.yml + +.FORCE: \ No newline at end of file diff --git a/README.md b/README.md index ee33b7b9..ad611803 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,6 @@ Once you have youre virtual environment, you can install dependencies via pip: Or you can install dependencies via pip tools: ```python -pip install pip-tools make dev-install ``` diff --git a/custom_theme/img/favicon.ico b/custom_theme/img/favicon.ico new file mode 100755 index 00000000..4ef032f9 Binary files /dev/null and b/custom_theme/img/favicon.ico differ diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 43276bb9..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = ../build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py index 396cb091..23c008f7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -27,8 +27,7 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["myst_parser"] -myst_config = {} +extensions = [] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/index.md b/docs/index.md index b1293ba1..dfc5a71d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -119,7 +119,7 @@ pet_stories.yml - object: Animal fields: name: - fake: first_name + fake: FirstName species: canine food: - object: PetFood @@ -131,7 +131,7 @@ pet_stories.yml - object: Animal fields: name: - fake: first_name + fake: FirstName species: feline food: - object: PetFood @@ -224,6 +224,30 @@ persons_of_interest.yml In this case, there will be 6 Persons in the Person table (or file), 3 with age between 0 and 12 and 3 with age between 12 and 95. +### Fake Data + +You can generate many kinds of fake data using the `fake` function: + +```yaml +- object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State +``` + +You can fake all sorts of stuff. Names, addresses, Latin text, English sentences, URLs, etc. + +The complete list, along with other related features, can be found in +the [Fake Data Tutorial](fakedata.md) + ### Friends Sometimes you want to obey a rule like “For every Person I create, I’d like to create 2 animals” (maybe you really like animals). @@ -245,7 +269,7 @@ You would use the `friends` property to do that. count: 2 fields: name: - fake: first_name + fake: FirstName ``` This will output two animals per person: @@ -282,7 +306,7 @@ Relationships are a big part of what makes Snowfakery different than the dozens( - object: Animal fields: name: - fake: first_name + fake: FirstName species: Petaurus Breviceps ``` @@ -313,7 +337,7 @@ In addition, we can relate pets and owners “bidirectionally”, like this: - object: Animal fields: name: - fake: first_name + fake: FirstName owner: reference: Person ``` @@ -366,7 +390,7 @@ pet_stories_2.yml owner: reference: Person name: - fake: first_name + fake: FirstName species: canine food: reference: petschoice @@ -376,7 +400,7 @@ pet_stories_2.yml fields: owner: Person name: - fake: first_name + fake: FirstName species: feline nemesis: dog food: @@ -415,7 +439,7 @@ This function allows you to look up another row (object) and make a reference to - object: Animal fields: name: - fake: first_name + fake: FirstName owner: reference: Person ``` @@ -509,7 +533,8 @@ Create a reference to a random, already-created row from some table. - object: Owner count: 10 fields: - name: fake.name + name: + fake: Name - object: Pet count: 10 fields: @@ -533,123 +558,9 @@ github issue. ### `fake` -Generate fake data using functions from the [faker](https://github.com/joke2k/faker) library: - -```yaml -- object: Account - fields: - Name: - fake: company - Description: - fake: catch_phrase - BillingStreet: - fake: street_address - BillingCity: - fake: city - BillingState: - fake: state -``` - -You can fake all sorts of stuff. Names, addresses, Latin text, English sentences, URLs, etc. There are two lists of fake names you can pull from, a Snowfakery-specific list, -and the broader faker list. - -The Snowfakery names are: - -- Username: a globally unique username in the shape of an email address - -- Alias: a short string that looks like a first name. - -- FirstName, LastName: Localized first and last name - -- Email: An email address using one of the standard "example" domains (such as example.com, example.org, etc.) - -- RealisticMaybeRealEmail: An email address which looks -more real (because it uses domains like google.com, -yahoo.com, etc.) and may accidentally actually overlap -with a real email address. Be careful using this if -you might send actual emails to the addresses! +Generate fake data. This function is defined in detail +in the [Fake Data Tutorial](fakedata.md) -For example, you can use these like this: - -```yaml -# examples/salesforce/simple-user.yml -- object: User - fields: - Username: - fake: Username - FirstName: - fake: FirstName - LastName: - fake: LastName - Email: - fake: Email - Alias: - fake: Alias -``` - -It doesn't matter if you use upper or lower case for fake names. - -The complete list is here: - - - -You can also include Faker extension libraries after you’ve added them to your Python install: - -```yaml - - plugin: faker_microservice.Provider - - object: OBJ - fields: - service_name: - fake: - microservice -``` - -You would install that provider like this: - -```s -$ pip install faker_microservice -``` - -Here are some Python Faker providers: - - - -And you could make your own providers as well. - -Fake can be called as an inline function in an expression: - -```yaml -FullName: ${{fake.first_name}} Johnson -``` - -You can also call these functions with arguments as described in Faker's [documentation](https://faker.readthedocs.io/en/master/providers.html) - -```yaml -country: ${{fake.country_code(representation='alpha-2')}} -``` - -### International Fakes - -You can specify internationally appropriate fakes for many different kind of names (e.g. person, company) by setting the snowfakery_locale this: - -```yaml -- var: snowfakery_locale - value: no_NO -- object: person - fields: - name: - fake: name -- var: snowfakery_locale - value: fr_FR -- object: person - fields: - name: - fake: name -``` - -This will generate a “typical” Norwegian first name for the first person object and a French name for the second person object. - -You can infer which Faker providers are internationalizable by looking through the Faker [repository](https://github.com/joke2k/faker/tree/master/faker/providers) and seeing which directories have localizations. For example there are only three localizations of [credit card](https://github.com/joke2k/faker/tree/master/faker/providers) (who knew that credit cards were different in Iran and Russia) and dozens of localizations for [person name](https://github.com/joke2k/faker/tree/master/faker/providers/person). ### `date_between` @@ -761,16 +672,16 @@ some_number: A number ${{random_number(min=5, max=10)}} - choice: when: ${{gender=='Male'}} pick: - fake: first_name_male + fake: FirstNameMale - choice: when: ${{gender=='Female'}} pick: - fake: first_name_female + fake: FirstNameFemale - choice: pick: - fake: first_name + fake: FirstNameNonBinary ``` The `when` clause can be a Python formula and it will be interpreted as a boolean similar to how Python would do it. The first `when` clause that matches is selected. The last `choice` clause should have no `when` clause, and it is a fallback which is selected if the others do not match. @@ -853,17 +764,17 @@ this: ```yaml - var: lastname_var value: - fake: last_name + fake: LastName - object: person fields: - first_name: - fake: first_name - last_name: ${{lastname_var}} + FirstName: + fake: FirstName + LastName: ${{lastname_var}} - object: spouse fields: - first_name: - fake: first_name - last_name: ${{lastname_var}} + FirstName: + fake: FirstName + LastName: ${{lastname_var}} ``` This works both at the top level of your recipe and in friends @@ -878,7 +789,7 @@ do that by creating a "hidden" object: - object: __shared_address fields: street: - fake: street_address + fake: StreetAddress city: fake: city state: @@ -932,7 +843,7 @@ There is a lot to say about formulas and one day they will all be documented her - use `${{` to start a formula and `}}` to end it - use Python expression syntax in the middle - field values defined earlier on this object are available as names -- Use faker values like this: Name: ${{fake.first_name}} Johnson +- Use faker values like this: Name: ${{fake.FirstName}} Johnson - parent (or ancestor) values are available through the parent’s object name. Like Opportunity.amount Formulas are based on a similar language called Jinja2, but we use `${{` and `}}` where Jinja2 uses `{{` and `}}` because our version is more compatible with YAML. @@ -961,7 +872,7 @@ The `id` variable returns a unique identifier for the current Object/Row to allo ```yaml fields: - name: ${{fake.last_name}} Household ${{id}} + name: ${{fake.LastName}} Household ${{id}} ``` #### `today` @@ -973,7 +884,8 @@ a single recipe. #### `fake:` and `fake.` -The `fake:` function and `fake.` namespace both generate fake data as described elsewhere in this documentation. +The `fake:` block function and `fake.` namespace both generate +fake data as described in the [Fake Data Tutorial](fakedata.md). ```yaml # examples/two_fakers.yml @@ -1071,13 +983,21 @@ Options: sqlite:///foo.db if you don't have one set up. - --output-format [JSON|json|txt|csv|sql|PNG|png|SVG|svg|svgz|jpeg|jpg|ps|dot] + --output-format [png|svg|svgz|jpeg|jpg|ps|dot|json|txt|csv|sql] --output-folder PATH -o, --output-file PATH - --option EVAL_ARG... Options to send to the recipe YAML. - --target-number TEXT... Target options for the recipe YAML in the - form of 'number tablename'. For example: '50 - Account'. + --option EVAL_ARG... Option to send to the recipe YAML in a + format like 'OptName OptValue'. Specify + multiple times if needed. + + --target-number, --target-count TEXT... + Target record count for the recipe YAML in + the form of 'number tablename'. For example: + '50 Account' to generate roughly 50 + accounts. + + --reps INTEGER Target repetition count for the recipe YAML. + Use as an alternative to --target-number --debug-internals / --no-debug-internals --generate-cci-mapping-file FILENAME @@ -1092,6 +1012,10 @@ Options: --continuation-file FILENAME Continue generating a dataset where 'continuation-file' left off + --plugin-option EVAL_ARG... Option to send to a plugin in a format like + 'OptName OptValue'. Specify multiple times + if needed. + --load-declarations FILE Declarations to mix into the generated mapping file @@ -1101,7 +1025,23 @@ Options: ### Scaling up recipe execution -From the command line you can control how many rows a recipe generates. You do this by specifying a "target count" and a "target tablename", like this: +From the command line you can control how many rows a recipe generates. + +The simple way is: + +```s +snowfakery accounts.yml --reps 1000 +``` + +This will run the recipe 1000 times. Easy! + +But consider if the user wants to run a test against roughly 50,000 accounts. They +could do a calculation to figure out how many reps, but this may be complex +because Snowfakery has randomization features, so that a recipe might generate +a random number of accounts in each run. Even for simpler recipes, doing the +math may be a headache, especially if you are changing the recipe every day. + +A better solution is to specify the "target number" and a "target tablename", like this: ```s snowfakery accounts.yml --target-number 1000 Account @@ -1146,7 +1086,6 @@ for all of the CSV files. ## Advanced Features - ### Singletons with the "just_once" feature Snowfakery scales up to larger data volumes @@ -1388,7 +1327,67 @@ generate_data( Detailed information is available in [Embedding Snowfakery into Python Applications](./embedding.md) -## Plugins and Providers +### Using Snowfakery with Databases + +Snowfakery is built on top of a very flexible engine called +SQLAlchemy. This allows it to connect to many different databases +subject to the limitations described below. + +You should start by installing Snowfakery in a context which +makes it easy to use the Python command 'pip' to manage your +Python environment. For example you could install Python +using the standard installers from `python.org` and then +you would run the following commands to create and use a venv with the +Postgres package: + +```bash + +# create a new directory for our experiment +$ mkdir experiment_with_postgres +# cd into it +$ cd experiment_with_postgres +# create a new database: +# https://www.postgresql.org/docs/9.1/app-createdb.html +$ createdb snowfakerydb +# create a virtual environment. A good habit to get into. +# https://docs.python.org/3/library/venv.html +$ python3 -m venv myvenv +# activate the venv +$ source myvenv/bin/activate +# install Snowfakery in this venv +$ pip install snowfakery +# install the Postgres library for Python +# https://pypi.org/project/psycopg2/ +$ pip install psycopg2 +# let's use it! +$ snowfakery --dburl='postgresql://localhost:5432/snowfakerydb' ~/code/Snowfakery/examples/company.yml --target-number 1000 Employee +# and check the results +# https://www.postgresql.org/docs/9.3/app-psql.html +$ echo 'select * from "Employee"' | psql snowfakerydb +``` + +That's a lot to take in, but hopefully it will be clear enough +to follow the links and understand the details. + +A limitation of this process is that currently Snowfakery can +only create new tables rather than import into existing ones. + +The table will have an id column in addition to columns for every field that +was generated by the recipe. All columns will be of type text. + +The list of databases supported by our underlying infrastructure +(SQLAlchemy) is listed [here](https://docs.sqlalchemy.org/en/14/core/engines.html#supported-databases) and [here](https://docs.sqlalchemy.org/en/13/dialects/index.html). + +Snowfakery is not proactively tested with all of the output +databases. We will certainly accept bug reports and pull requests +relating to problems that are discovered. + +Please keep in touch with the Snowfakery team about your use of +other databases so we can have a sense of what works well and what +does not. + + +### Plugins and Providers Plugins and Providers allow Snowfakery to be extended with Python code. A plugin adds new functions to Snowfakery. A Provider adds new capabilities to the Faker library which is exposed to Snowfakery users through the fake: keyword. @@ -1398,9 +1397,9 @@ You include either Plugins or Providers in a Snowfakery file like this: - plugin: package.module.classname ``` -## Built-in Plugins +### Built-in Plugins -### Advanced Math +#### Advanced Math Snowfakery has a "Math" plugin which gives you access to all features from Python's [`math`](https://docs.python.org/3/library/math.html) module plus @@ -1526,9 +1525,9 @@ CumulusCI can also be used to download CSV data for enrichment as follows. Dataset.shuffle: dataset: ../../accounts.csv FirstName: - fake: first_name + fake: FirstName LastName: - fake: last_name + fake: LastName AccountId: ${{__accounts.Id}} ``` @@ -1655,66 +1654,9 @@ There are several examples [in the Snowfakery repository](https://github.com/SFD Salesforce-specific patterns and tools are described in [Using Snowfakery with Salesforce](salesforce.md) -## Using Snowfakery with Databases +## Appendices -Snowfakery is built on top of a very flexible engine called -SQLAlchemy. This allows it to connect to many different databases -subject to the limitations described below. - -You should start by installing Snowfakery in a context which -makes it easy to use the Python command 'pip' to manage your -Python environment. For example you could install Python -using the standard installers from `python.org` and then -you would run the following commands to create and use a venv with the -Postgres package: - -```bash - -# create a new directory for our experiment -$ mkdir experiment_with_postgres -# cd into it -$ cd experiment_with_postgres -# create a new database: -# https://www.postgresql.org/docs/9.1/app-createdb.html -$ createdb snowfakerydb -# create a virtual environment. A good habit to get into. -# https://docs.python.org/3/library/venv.html -$ python3 -m venv myvenv -# activate the venv -$ source myvenv/bin/activate -# install Snowfakery in this venv -$ pip install snowfakery -# install the Postgres library for Python -# https://pypi.org/project/psycopg2/ -$ pip install psycopg2 -# let's use it! -$ snowfakery --dburl='postgresql://localhost:5432/snowfakerydb' ~/code/Snowfakery/examples/company.yml --target-number 1000 Employee -# and check the results -# https://www.postgresql.org/docs/9.3/app-psql.html -$ echo 'select * from "Employee"' | psql snowfakerydb -``` - -That's a lot to take in, but hopefully it will be clear enough -to follow the links and understand the details. - -A limitation of this process is that currently Snowfakery can -only create new tables rather than import into existing ones. - -The table will have an id column in addition to columns for every field that -was generated by the recipe. All columns will be of type text. - -The list of databases supported by our underlying infrastructure -(SQLAlchemy) is listed [here](https://docs.sqlalchemy.org/en/14/core/engines.html#supported-databases) and [here](https://docs.sqlalchemy.org/en/13/dialects/index.html). - -Snowfakery is not proactively tested with all of the output -databases. We will certainly accept bug reports and pull requests -relating to problems that are discovered. - -Please keep in touch with the Snowfakery team about your use of -other databases so we can have a sense of what works well and what -does not. - -## Snowfakery Glossary +### Snowfakery Glossary - Object: When we think about our Rows in the context of each other, we often use the word “Object”. That’s because rows often *represent* real-world entities like houses (or at least their, addresses), organizations and people (in this case its acceptable to objectify people). See also: “Rows” - Object Template: These represent instructions on how to create a row, or multiple rows in a database. Each row represents a real-world Object. @@ -1724,7 +1666,7 @@ does not. - Singleton: A singleton is an Object Template that generates a single row regardless of how many times the recipe is iterated over. - YAML: YAML is a relatively simple, human-readable format. You can learn more about it at [yaml.org](http://yaml.org/). But you can also just pick up the basics of it by reading along. -## Security Profile of Snowfakery +### Appendix: Security Profile of Snowfakery Snowfakery should be considered a domain-specific programming language with access to most of the power of Python. It can load Python plugins and @@ -1732,26 +1674,8 @@ call Python methods. It would be unwise to run untrusted recipes in an environment that has access to secure resources such as passwords, network connections, etc. -## Internal Software Architecture - -|Filename |Purpose | -|--- |--- | -|cli.py |Click-based Command Line. Uses the Click library to supply a CLI. | -|data_generator.py |The API entry point the CLI and CCI use.

This may be the best place to start reading. It abstracts away all of the complexity and outlines the core flow. | -|parse_recipe_yaml.py |Phase 1: parse YAML into a Runtime DOM

Includes some hacks to the YAML parser for handling line numbers. | -|data_generator_runtime.py |Phase 2: Runtime.

Actually generate the data by walking the template list top-to-bottom, generating rows as appopriate. -|data_generator_runtime_dom.py |An object model used in Phase 2. Roughly similar to the shape of the YAML file.| -|output_streams.py |Where the data goes in the output. Used during Phase 2. | -|data_gen_exceptions.py |Exceptions that can be thrown | -|generate_mapping_from_recipe.py |In the CCI context, this utility package allows the generation of mapping.yml files. | -|template_funcs.py |Functions that can be invoked using either block syntax or in Jinja templates | -|plugins.py |Infrastructure for plugins | -|standard_plugins/ |Plugins that ship with Snowfakery | -|tests/ |Unit tests | - -Architecture Diagram - -## Appendix: The Age Old Puzzle + +### Appendix: The Age Old Puzzle ```yaml # As I was going to St. Ives, diff --git a/docs/salesforce.md b/docs/salesforce.md index c31e8944..5e2b6960 100644 --- a/docs/salesforce.md +++ b/docs/salesforce.md @@ -7,7 +7,11 @@ There are several examples [in the Snowfakery repository](https://github.com/SFD ## Using Snowfakery within CumulusCI The process of actually generating the data into a Salesforce -org happens through CumulusCI. +org happens through CumulusCI. The majority of the documentation +on using Snowfakery with CumulusCI is in +[the Generate Data section of the CumulusCI documentation](https://cumulusci.readthedocs.io/en/latest/data.html?highlight=snowfakery#generate-fake-data). + +A summarized overview follows. [CumulusCI](http://www.github.com/SFDO-Tooling/CumulusCI) is a tool and framework for building portable automation for @@ -17,8 +21,6 @@ creates Snowfakery. The easiest way to learn about CumulusCI (and to learn how to install it) is with its [Trailhead Trail](https://trailhead.salesforce.com/en/content/learn/trails/build-applications-with-cumulusci). -CumulusCI's documentation [describes](https://cumulusci.readthedocs.io/en/latest/data.html?highlight=snowfakery#generate-fake-data) -how to use it with Snowfakery. Here is a short example: ```s $ cci task run generate_and_load_from_yaml -o generator_yaml examples/salesforce/Contact.recipe.yml -o num_records 300 -o num_records_tablename Contact --org qa @@ -62,7 +64,7 @@ intensive, please remember to read the section Let's use an example where you have a Campaign object and would like to associate Contacts to it through CampaignMembers. -Here is an example were we query a particular Campaign object: +Here is an example where we query a particular Campaign object: ```yaml # examples/salesforce/CampaignMembers-first.recipe.yml @@ -228,13 +230,16 @@ In general, you can test Snowfakery files outside of CumulusCI to see if they wo $ snowfakery recipe.yml ``` -If you have a recipe which depends on data from an org, specify the CumulusCI orgname -like this: +If you have a recipe which depends on data from an org, +specify the CumulusCI org name like this: ```s -$ snowfakery recipe.yml --plugin-options orgname qa +$ snowfakery recipe.yml --plugin-options org_name qa ``` +When you run the recipe in this way, it will connect to the org to pull data but +not change data in the org at all. + ## Record Types To specify a Record Type for a record, just put the Record Type’s API Name in a field named RecordType. @@ -271,13 +276,13 @@ You can use Person Accounts like this: - object: Account fields: FirstName: - fake: first_name + fake: FirstName LastName: - fake: last_name + fake: LastName PersonMailingStreet: - fake: street_address + fake: StreetAddress PersonMailingCity: - fake: city + fake: City PersonContactId: Salesforce.SpecialObject: PersonContact ``` diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 00000000..c2a1dfda --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,4 @@ +code { + white-space : pre-wrap !important; + } + \ No newline at end of file diff --git a/examples/faker_in_formula.recipe.yml b/examples/faker_in_formula.recipe.yml new file mode 100644 index 00000000..7274f3b3 --- /dev/null +++ b/examples/faker_in_formula.recipe.yml @@ -0,0 +1,3 @@ +- object: Account + fields: + Name: ${{fake.State}} State University diff --git a/examples/parameters.recipe.yml b/examples/parameters.recipe.yml new file mode 100644 index 00000000..0badf01c --- /dev/null +++ b/examples/parameters.recipe.yml @@ -0,0 +1,3 @@ +- object: Example + fields: + gibberish_words: ${{fake.Sentence(nb_words=10, variable_nb_words=False)}} diff --git a/examples/parameters_block.recipe.yml b/examples/parameters_block.recipe.yml new file mode 100644 index 00000000..15ea0c46 --- /dev/null +++ b/examples/parameters_block.recipe.yml @@ -0,0 +1,6 @@ +- object: Example + fields: + gibberish_words: + fake.Sentence: + nb_words: 10 + variable_nb_words: False diff --git a/examples/salesforce/simple_account.recipe.yml b/examples/salesforce/simple_account.recipe.yml new file mode 100644 index 00000000..147621f8 --- /dev/null +++ b/examples/salesforce/simple_account.recipe.yml @@ -0,0 +1,18 @@ +- object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry + Phone: + fake: PhoneNumber diff --git a/examples/salesforce/simple_account_french.recipe.yml b/examples/salesforce/simple_account_french.recipe.yml new file mode 100644 index 00000000..72dd3104 --- /dev/null +++ b/examples/salesforce/simple_account_french.recipe.yml @@ -0,0 +1,20 @@ +- var: snowfakery_locale + value: fr_FR +- object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry + Phone: + fake: PhoneNumber diff --git a/examples/salesforce/simple_account_random.recipe.yml b/examples/salesforce/simple_account_random.recipe.yml new file mode 100644 index 00000000..f1d7ec3d --- /dev/null +++ b/examples/salesforce/simple_account_random.recipe.yml @@ -0,0 +1,26 @@ +- var: snowfakery_locale + value: + random_choice: + - ja_JP # Japanese + - en_CA # Canadian English + - fr_FR # French from France + - fr_CA # Canadian Frencch + - de_DE # German from Germany +- object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry + Phone: + fake: PhoneNumber diff --git a/mkdocs.yml b/mkdocs.yml index 35ac1e9a..b6a1461d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,3 +1,25 @@ site_name: Snowfakery documentation +site_url: "" +use_directory_urls: False theme: readthedocs +custom_dir: custom_theme repo_url: https://github.com/SFDO-Tooling/Snowfakery/ +nav: + - index.md + - salesforce.md + - Fake Data: fakedata.md + - Localization: locales.md + - embedding.md + - extending.md +extra_css: + - stylesheets/extra.css +markdown_extensions: + - toc: + permalink: True + toc_depth: 4 +plugins: + - search + - snowfakery_fakes: + build_locales: True # do generate locales + # set SF_MKDOCS_BUILD_LOCALES to overide + # future versions MAY turn off locale-info-building on main branch diff --git a/requirements/dev.in b/requirements/dev.in index 9eb9c2b7..42bcbb75 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -3,12 +3,10 @@ black coverage coveralls flake8 -myst-parser -pip-tools +mkdocs pre-commit pytest pytest-cov -Sphinx<4 # pin this until Myst is happy with Sphinx 4 typeguard==2.10.0 # do not upgrade until #181 is fixed faker-microservice tox diff --git a/requirements/dev.txt b/requirements/dev.txt index ceb84fb7..f672e435 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,19 +4,13 @@ # # pip-compile --allow-unsafe requirements/dev.in # -alabaster==0.7.12 - # via sphinx appdirs==1.4.4 # via # black # virtualenv attrs==21.2.0 - # via - # markdown-it-py - # pytest -babel==2.9.1 - # via sphinx -black==21.5b2 + # via pytest +black==21.6b0 # via -r requirements/dev.in certifi==2021.5.30 # via requests @@ -28,7 +22,7 @@ click==7.1.2 # via # -r requirements/prod.txt # black - # pip-tools + # mkdocs contextvars==2.4 # via -r requirements/prod.txt coverage==5.5 @@ -42,13 +36,9 @@ distlib==0.3.2 # via virtualenv docopt==0.6.2 # via coveralls -docutils==0.16 - # via - # myst-parser - # sphinx faker-microservice==2.0.0 # via -r requirements/dev.in -faker==8.6.0 +faker==8.10.0 # via # -r requirements/prod.txt # faker-microservice @@ -58,6 +48,8 @@ filelock==3.0.12 # virtualenv flake8==3.9.2 # via -r requirements/dev.in +ghp-import==2.0.1 + # via mkdocs greenlet==1.1.0 # via # -r requirements/prod.txt @@ -70,17 +62,16 @@ idna==2.10 # via # requests # yarl -imagesize==1.2.0 - # via sphinx immutables==0.15 # via # -r requirements/prod.txt # contextvars -importlib-metadata==4.5.0 +importlib-metadata==4.6.1 # via # -r requirements/prod.txt # flake8 - # pep517 + # markdown + # mkdocs # pluggy # pre-commit # pytest @@ -92,39 +83,32 @@ iniconfig==1.1.1 jinja2==2.11.3 # via # -r requirements/prod.txt - # myst-parser - # sphinx -markdown-it-py==1.1.0 - # via - # mdit-py-plugins - # myst-parser + # mkdocs +markdown==3.3.4 + # via mkdocs markupsafe==2.0.1 # via # -r requirements/prod.txt # jinja2 mccabe==0.6.1 # via flake8 -mdit-py-plugins==0.2.8 - # via myst-parser +mergedeep==1.3.4 + # via mkdocs +mkdocs==1.2.1 + # via -r requirements/dev.in multidict==5.1.0 # via yarl mypy-extensions==0.4.3 # via black -myst-parser==0.14.0 - # via -r requirements/dev.in nodeenv==1.6.0 # via pre-commit -packaging==20.9 +packaging==21.0 # via + # mkdocs # pytest - # sphinx # tox pathspec==0.8.1 # via black -pep517==0.10.0 - # via pip-tools -pip-tools==6.1.0 - # via -r requirements/dev.in pluggy==0.13.1 # via # pytest @@ -141,8 +125,6 @@ pydantic==1.8.2 # via -r requirements/prod.txt pyflakes==2.3.1 # via flake8 -pygments==2.9.0 - # via sphinx pyparsing==2.4.7 # via packaging pytest-cov==2.12.1 @@ -158,21 +140,22 @@ python-dateutil==2.8.1 # via # -r requirements/prod.txt # faker -pytz==2021.1 - # via babel + # ghp-import +pyyaml-env-tag==0.1 + # via mkdocs pyyaml==5.4.1 # via # -r requirements/prod.txt - # myst-parser + # mkdocs # pre-commit + # pyyaml-env-tag # vcrpy -regex==2021.4.4 +regex==2021.7.6 # via black requests==2.25.1 # via # coveralls # responses - # sphinx responses==0.13.3 # via -r requirements/dev.in six==1.16.0 @@ -183,25 +166,7 @@ six==1.16.0 # tox # vcrpy # virtualenv -snowballstemmer==2.1.0 - # via sphinx -sphinx==3.5.4 - # via - # -r requirements/dev.in - # myst-parser -sphinxcontrib-applehelp==1.0.2 - # via sphinx -sphinxcontrib-devhelp==1.0.2 - # via sphinx -sphinxcontrib-htmlhelp==2.0.0 - # via sphinx -sphinxcontrib-jsmath==1.0.1 - # via sphinx -sphinxcontrib-qthelp==1.0.3 - # via sphinx -sphinxcontrib-serializinghtml==1.1.5 - # via sphinx -sqlalchemy==1.4.17 +sqlalchemy==1.4.20 # via -r requirements/prod.txt text-unidecode==1.3 # via @@ -210,7 +175,6 @@ text-unidecode==1.3 toml==0.10.2 # via # black - # pep517 # pre-commit # pytest # pytest-cov @@ -230,10 +194,9 @@ typing-extensions==3.10.0.0 # -r requirements/prod.txt # black # importlib-metadata - # markdown-it-py # pydantic # yarl -urllib3==1.26.5 +urllib3==1.26.6 # via # requests # responses @@ -245,18 +208,13 @@ virtualenv==20.4.7 # via # pre-commit # tox +watchdog==2.1.3 + # via mkdocs wrapt==1.12.1 # via vcrpy yarl==1.6.3 # via vcrpy -zipp==3.4.1 +zipp==3.5.0 # via # -r requirements/prod.txt # importlib-metadata - # pep517 - -# The following packages are considered to be unsafe in a requirements file: -pip==21.1.2 - # via pip-tools -setuptools==57.0.0 - # via sphinx diff --git a/requirements/prod.txt b/requirements/prod.txt index 5f916a73..8b18daca 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -8,7 +8,7 @@ click==7.1.2 # via -r requirements/prod.in contextvars==2.4 # via -r requirements/prod.in -faker==8.6.0 +faker==8.10.0 # via -r requirements/prod.in greenlet==1.1.0 # via sqlalchemy @@ -16,7 +16,7 @@ gvgen==1.0 # via -r requirements/prod.in immutables==0.15 # via contextvars -importlib-metadata==4.5.0 +importlib-metadata==4.6.1 # via sqlalchemy jinja2==2.11.3 # via -r requirements/prod.in @@ -32,7 +32,7 @@ pyyaml==5.4.1 # via -r requirements/prod.in six==1.16.0 # via python-dateutil -sqlalchemy==1.4.17 +sqlalchemy==1.4.20 # via -r requirements/prod.in text-unidecode==1.3 # via faker @@ -41,5 +41,5 @@ typing-extensions==3.10.0.0 # -r requirements/prod.in # importlib-metadata # pydantic -zipp==3.4.1 +zipp==3.5.0 # via importlib-metadata diff --git a/setup.py b/setup.py index d90dfa51..7d8e34d6 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,8 @@ def parse_requirements_file(requirements_file) -> List[str]: "console_scripts": [ "snowfakery=snowfakery.cli:main", "snowbench=snowfakery.tools.snowbench:main", - ] + ], + "mkdocs.plugins": ["snowfakery_fakes=snowfakery.tools.mkdocs:Plugin"], }, long_description=long_description, long_description_content_type="text/markdown", diff --git a/snowfakery/api.py b/snowfakery/api.py index 750990f5..5441ad83 100644 --- a/snowfakery/api.py +++ b/snowfakery/api.py @@ -43,6 +43,8 @@ file_extensions = tuple(OUTPUT_FORMATS.keys()) +COUNT_REPS = "__REPS__" + class SnowfakeryApplication: """Base class for all applications which embed Snowfakery as a library, @@ -50,9 +52,10 @@ class SnowfakeryApplication: stopping_criteria = None starting_id = 0 + rep_count = 0 def __init__(self, stopping_criteria: StoppingCriteria = None): - self.stopping_criteria = stopping_criteria + self.stopping_criteria = stopping_criteria or StoppingCriteria(COUNT_REPS, 1) def echo(self, message=None, file=None, nl=True, err=False, color=None): """Write something to a virtual stdout or stderr. @@ -72,7 +75,7 @@ def stopping_tablename(self): This is used by Snowfakery to validate that the provided recipe will not generate forever due to a misspelling the stopping tablename.""" - if self.stopping_criteria: + if self.stopping_criteria.tablename != COUNT_REPS: return self.stopping_criteria.tablename def ensure_progress_was_made(self, id_manager): @@ -94,11 +97,13 @@ def ensure_progress_was_made(self, id_manager): def check_if_finished(self, id_manager): "Check whether we've finished making as many objects as we promised" # if nobody told us how much to make, finish after first run - if not self.stopping_criteria: - return True + self.rep_count += 1 target_table, count = self.stopping_criteria + if target_table == COUNT_REPS: + return self.rep_count >= count + # Snowfakery processes can be restarted. We would need # to keep track of where we restarted to know whether # we are truly finished diff --git a/snowfakery/cci_mapping_files/post_processes.py b/snowfakery/cci_mapping_files/post_processes.py index 7609d41e..9725a82d 100644 --- a/snowfakery/cci_mapping_files/post_processes.py +++ b/snowfakery/cci_mapping_files/post_processes.py @@ -2,6 +2,8 @@ def add_after_statements(mappings): + """Automatically add CCI after: statements to the lookups + in a mapping file""" indexed_by_sobject = _index_by_sobject(mappings) for idx, (mapping_name, mapping) in enumerate(mappings.items()): @@ -16,9 +18,9 @@ def add_after_statements(mappings): lookup["after"] = target_mapping_index.last_step_name -class MappingIndex(NamedTuple): - first_instance: int - last_step_name: str +class MappingIndex(NamedTuple): # info needed by the algorithm above + first_instance: int # where was the first time this sobj was referenced? + last_step_name: str # where was the last (so far)? def _index_by_sobject(mappings): diff --git a/snowfakery/cli.py b/snowfakery/cli.py index 17cece61..e5039b44 100755 --- a/snowfakery/cli.py +++ b/snowfakery/cli.py @@ -7,7 +7,7 @@ import click from snowfakery import version -from snowfakery.api import file_extensions, generate_data +from snowfakery.api import file_extensions, generate_data, COUNT_REPS if __name__ == "__main__": # pragma: no cover sys.path.append(str(Path(__file__).parent.parent)) @@ -77,10 +77,17 @@ def int_string_tuple(ctx, param, value=None): ) @click.option( "--target-number", + "--target-count", nargs=2, - help="Target options for the recipe YAML in the form of 'number tablename'. For example: '50 Account'.", + help="Target record count for the recipe YAML in the form of 'number tablename'. " + "For example: '50 Account' to generate roughly 50 accounts.", callback=int_string_tuple, # noqa https://github.com/pallets/click/issues/789#issuecomment-535121714 ) +@click.option( + "--reps", + help="Target repetition count for the recipe YAML. Use as an alternative to --target-number", + type=int, +) @click.option( "--debug-internals/--no-debug-internals", "debug_internals", default=False ) @@ -125,6 +132,7 @@ def generate_cli( option=(), dburls=(), target_number=None, + reps=None, debug_internals=None, generate_cci_mapping_file=None, output_format=None, @@ -163,10 +171,15 @@ def generate_cli( output_format, output_files, output_folder, + target_number, + reps, ) try: user_options = dict(option) plugin_options = dict(plugin_option) + if reps: + target_number = (COUNT_REPS, reps) + generate_data( yaml_file=yaml_file, user_options=user_options, @@ -201,6 +214,8 @@ def validate_options( output_format, output_files, output_folder, + target_number, + reps, ): if dburl and output_format: raise click.ClickException( @@ -221,6 +236,12 @@ def validate_options( "--output-folder can only be used with --output-file= or --output-format=csv" ) + if target_number and reps: + raise click.ClickException( + "Sorry, you need to pick --target_number or --reps " + "because they are mutually exclusive." + ) + def main(): generate_cli.main(prog_name="snowfakery") diff --git a/snowfakery/data_generator.py b/snowfakery/data_generator.py index 468a692c..eab8ccaf 100644 --- a/snowfakery/data_generator.py +++ b/snowfakery/data_generator.py @@ -40,7 +40,7 @@ def __init__(self, parse_results, runtime_results): self.templates = parse_results.templates self.intertable_dependencies = runtime_results.intertable_dependencies - def summarize_for_debugging(self): + def summarize_for_debugging(self): # pragma: no cover return self.intertable_dependencies, self.templates @@ -192,8 +192,8 @@ def process_plugins_options( ) -> Mapping[str, object]: """Replace option short names with fully qualified names and convert types of options. - e.g. the option name that the user specifies on the CLI or API is just "orgname" - but we use the long name internally to aavoid clashing with the + e.g. the option name that the user specifies on the CLI or API is just "org_name" + but we use the long name internally to avoid clashing with the user's variable names.""" allowed_options = collect_allowed_plugin_options(tuple(plugins.values())) diff --git a/snowfakery/docs/examples/secret_life_of_pets.yml b/snowfakery/docs/examples/secret_life_of_pets.yml index 9cb6b17f..ec5e9e82 100644 --- a/snowfakery/docs/examples/secret_life_of_pets.yml +++ b/snowfakery/docs/examples/secret_life_of_pets.yml @@ -7,6 +7,6 @@ - object: Animal fields: name: - fake: first_name + fake: FirstName owner: reference: Person diff --git a/snowfakery/fakedata/fake_data_generator.py b/snowfakery/fakedata/fake_data_generator.py index 41802566..066f1137 100644 --- a/snowfakery/fakedata/fake_data_generator.py +++ b/snowfakery/fakedata/fake_data_generator.py @@ -1,33 +1,43 @@ from difflib import get_close_matches -from faker import Faker import typing as T +from faker import Faker, Generator + class FakeNames(T.NamedTuple): f: Faker - def Username(self): + def user_name(self): + "Salesforce-style username in the form of an email address" return f"{self.f.first_name()}_{self.f.last_name()}_{self.f.uuid4()}@{self.f.hostname()}" - def Alias(self): + def alias(self): + "Salesforce-style 8-character alias" return self.f.first_name()[0:8] - def FirstName(self): - return self.f.first_name() - - def LastName(self): - return self.f.last_name() - - def Email(self): + def email(self): + """Email address using one of the "example" domains""" return self.f.ascii_safe_email() - def RealisticMaybeRealEmail(self): + def realistic_maybe_real_email(self): + """Like fake: email except that the email domain may be real and therefore + the email address itself may be real. Use with caution, you might + accidentally email strangers!!! + """ return self.f.email() + def state(self): + """Return a state, province or other appropriate administrative unit""" + return self.f.administrative_unit() + + def postalcode(self): + """Return whatever counts as a postalcode for a particular locale""" + return self.f.postcode() + # we will use this to exclude Faker's internal book-keeping methods # from our faker interface -faker_class_attrs = set(dir(Faker)) +faker_class_attrs = set(dir(Faker)).union((dir(Generator))) class FakeData: @@ -35,6 +45,10 @@ class FakeData: def __init__(self, faker: Faker): fake_names = FakeNames(faker) + self.faker = faker + + def no_underscore_name(name): + return name.lower().replace("_", "") def obj_to_func_list(obj: object, canonicalizer: T.Callable, ignore_list: set): return { @@ -49,11 +63,10 @@ def obj_to_func_list(obj: object, canonicalizer: T.Callable, ignore_list: set): # include snowfakery names defined above self.fake_names = { **obj_to_func_list(faker, str.lower, faker_class_attrs), - **obj_to_func_list( - faker, lambda x: x.lower().replace("_", ""), faker_class_attrs - ), + **obj_to_func_list(faker, no_underscore_name, faker_class_attrs), # in case of conflict, snowfakery names "win" over Faker names **obj_to_func_list(fake_names, str.lower, set()), + **obj_to_func_list(fake_names, no_underscore_name, set()), } def _get_fake_data(self, origname, *args, **kwargs): diff --git a/snowfakery/object_rows.py b/snowfakery/object_rows.py index 574ebd53..c9dc90d1 100644 --- a/snowfakery/object_rows.py +++ b/snowfakery/object_rows.py @@ -1,6 +1,7 @@ from enum import Enum, auto import yaml +import snowfakery # noqa from .utils.yaml_utils import SnowfakeryDumper IdManager = "snowfakery.data_generator_runtime.IdManager" @@ -49,10 +50,6 @@ def __setstate__(self, state): for slot, value in state.items(): setattr(self, slot, value) - @property - def _name(self): - return self._values.get("name") - class ObjectReference(yaml.YAMLObject): def __init__(self, tablename, id): @@ -77,7 +74,7 @@ class NicknameSlot(ObjectReference): id_manager: IdManager allocated_id: int = None - def __init__(self, tablename, id_manager): + def __init__(self, tablename: str, id_manager: IdManager): self._tablename = tablename self.id_manager = id_manager diff --git a/snowfakery/output_streams.py b/snowfakery/output_streams.py index 096a94ca..c95f9a99 100644 --- a/snowfakery/output_streams.py +++ b/snowfakery/output_streams.py @@ -10,7 +10,16 @@ from typing import Dict, Union, Optional, Mapping, Callable, Sequence from warnings import warn -from sqlalchemy import create_engine, MetaData, Column, Integer, Table, Unicode, func +from sqlalchemy import ( + create_engine, + MetaData, + Column, + Integer, + Table, + Unicode, + func, + inspect, +) from sqlalchemy.ext.automap import automap_base from sqlalchemy.orm import create_session from sqlalchemy.engine import Engine @@ -396,6 +405,7 @@ def close(self, *args, **kwargs): def create_tables_from_inferred_fields(tables, engine, metadata): """Create tables based on dictionary of tables->field-list.""" with engine.connect() as conn: + inspector = inspect(engine) for table_name, table in tables.items(): columns = [Column(field_name, Unicode(255)) for field_name in table.fields] id_column_as_list = [ @@ -412,7 +422,8 @@ def create_tables_from_inferred_fields(tables, engine, metadata): ) t = Table(table_name, metadata, id_column, *columns) - if t.exists(): + + if inspector.has_table(table_name): stmt = select([func.count(t.c.id)]) count = conn.execute(stmt).first()[0] if count > 0: diff --git a/snowfakery/standard_plugins/Salesforce.py b/snowfakery/standard_plugins/Salesforce.py index 66587353..2f833256 100644 --- a/snowfakery/standard_plugins/Salesforce.py +++ b/snowfakery/standard_plugins/Salesforce.py @@ -30,10 +30,18 @@ MAX_SALESFORCE_OFFSET = 2000 # Any way around this? -# the option name that the user specifies on the CLI or API is just "orgname" +# the option name that the user specifies on the CLI or API is just "org_name" # but using this long name internally prevents us from clashing with the # user's variable names. -plugin_option_name = "snowfakery.standard_plugins.Salesforce.SalesforceQuery.orgname" +plugin_option_org_name = ( + "snowfakery.standard_plugins.Salesforce.SalesforceQuery.org_name" +) +plugin_option_org_config = ( + "snowfakery.standard_plugins.Salesforce.SalesforceQuery.org_config" +) +plugin_option_project_config = ( + "snowfakery.standard_plugins.Salesforce.SalesforceQuery.project_config" +) class SalesforceConnection: @@ -41,15 +49,16 @@ class SalesforceConnection: _sf = None - def __init__(self, get_orgname): - self.get_orgname = get_orgname + def __init__(self, get_project_config_and_org_config): + self.get_project_config_and_org_config = get_project_config_and_org_config self.logger = getLogger(__name__) @property def sf(self): """simple_salesforce client""" if not self._sf: - self._sf, self._bulk = self._get_sf_clients(self.orgname) + project_config, org_config = self.get_project_config_and_org_config() + self._sf, self._bulk = self._get_sf_clients(project_config, org_config) return self._sf @property @@ -58,11 +67,6 @@ def bulk(self): self.sf # initializes self._bulk as a side-effect return self._bulk - @property - def orgname(self): - """Look up the orgname in the scope""" - return self.get_orgname() - def query(self, *args, **kwargs): """Query Salesforce through simple_salesforce""" return self.sf.query(*args, **kwargs) @@ -109,50 +113,85 @@ def compose_query(self, context_name, **kwargs): return query @staticmethod - def _get_sf_clients(orgname): + def _get_sf_clients(project_config, org_config): + from cumulusci.salesforce_api.utils import get_simple_salesforce_connection - try: - from cumulusci.cli.runtime import CliRuntime - from cumulusci.salesforce_api.utils import get_simple_salesforce_connection + sf = get_simple_salesforce_connection(project_config, org_config) + return sf, _init_bulk(sf, org_config) - runtime = CliRuntime(load_keychain=True) - except Exception as e: # pragma: no cover - raise DataGenError("CumulusCI Runtime cannot be loaded", *e.args) - name, org_config = runtime.get_org(orgname) - sf = get_simple_salesforce_connection(runtime.project_config, org_config) - return sf, SalesforceConnection._init_bulk(sf, org_config) +def _init_bulk(sf, org_config): + from salesforce_bulk import SalesforceBulk - @staticmethod - def _init_bulk(sf, org_config): - from salesforce_bulk import SalesforceBulk + return SalesforceBulk( + host=org_config.instance_url.replace("https://", "").rstrip("/"), + sessionId=org_config.access_token, + API_version=sf.sf_version, + ) - return SalesforceBulk( - host=org_config.instance_url.replace("https://", "").rstrip("/"), - sessionId=org_config.access_token, - API_version=sf.sf_version, - ) + +def check_orgconfig(config): + from cumulusci.core.config import BaseConfig + + if isinstance(config, BaseConfig): + return config + raise TypeError(f"Should be a CCI Config, not {type(config)}") class SalesforceConnectionMixin: _sf_connection = None - allowed_options = [PluginOption(plugin_option_name, str)] + _runtime = None + allowed_options = [ + PluginOption(plugin_option_org_name, str), + PluginOption(plugin_option_org_config, check_orgconfig), + PluginOption(plugin_option_project_config, check_orgconfig), + ] @property def sf_connection(self): assert self.context if not self._sf_connection: - self._sf_connection = SalesforceConnection(self.get_orgname) + self._sf_connection = SalesforceConnection( + self.get_project_config_and_org_config + ) return self._sf_connection - def get_orgname(self): - """Look up the orgname in the scope""" + def get_project_config_and_org_config(self): + fieldvars = self.context.field_vars() + project_config = fieldvars.get(plugin_option_project_config) + org_config = fieldvars.get(plugin_option_org_config) + + if not project_config or not org_config: + project_config, org_config = self._get_org_info_from_cli_keychain() + + return project_config, org_config + + def _get_org_info_from_cli_keychain(self): + org_name = self.get_org_name() # from command line argument + runtime = self._get_CliRuntime() # from CCI CliRuntime + name, org_config = runtime.get_org(org_name) + return runtime.project_config, org_config + + def _get_CliRuntime(self): + if self._runtime: + return self._runtime # pragma: no cover + + try: + from cumulusci.cli.runtime import CliRuntime + + self._runtime = CliRuntime(load_keychain=True) + return self._runtime + except Exception as e: # pragma: no cover + raise DataGenError("CumulusCI Runtime cannot be loaded", *e.args) + + def get_org_name(self): + """Look up the org_name in the scope""" fieldvars = self.context.field_vars() try: - return fieldvars[plugin_option_name] + return fieldvars[plugin_option_org_name] except KeyError: raise DataGenNameError( - "Orgname is not specified. Use --plugin-option orgname ", + "Orgname is not specified. Use --plugin-option org_name ", None, None, ) diff --git a/snowfakery/tools/mkdocs.py b/snowfakery/tools/mkdocs.py new file mode 100644 index 00000000..fd7edea2 --- /dev/null +++ b/snowfakery/tools/mkdocs.py @@ -0,0 +1,29 @@ +import sys +from pathlib import Path +from importlib import import_module +from unittest.mock import patch + +from mkdocs.plugins import BasePlugin +import mkdocs + + +class Plugin(BasePlugin): + config_scheme = ( + ("build_locales", mkdocs.config.config_options.Type(bool, default=False)), + ) + + def on_config(self, config): + """Look for and load main_mkdocs_plugin in tools/faker_docs_utils/mkdocs_plugins.py + This bootstrap plugin is needed because that other one is never "installed" + It is just present in the repo. So it can't have an official entry point + in setup.py. + """ + docs_dir = config["docs_dir"] + plugins_dir = Path(docs_dir).parent / "tools/faker_docs_utils/mkdocs_plugins" + new_sys_path = [*sys.path, str(plugins_dir)] + with patch.object(sys, "path", new_sys_path): + module = import_module("main_mkdocs_plugin") + main_plugin = module.Plugin() + config["plugins"]["main_mkdocs_plugin"] = main_plugin + main_plugin.config = self.config + main_plugin.on_config(config) diff --git a/snowfakery/version.txt b/snowfakery/version.txt index 35d51f33..415b19fc 100644 --- a/snowfakery/version.txt +++ b/snowfakery/version.txt @@ -1 +1 @@ -1.12 \ No newline at end of file +2.0 \ No newline at end of file diff --git a/tests/salesforce/test_where.recipe.yml b/tests/salesforce/test_where.recipe.yml index 20bcf05d..88983b6c 100644 --- a/tests/salesforce/test_where.recipe.yml +++ b/tests/salesforce/test_where.recipe.yml @@ -1,6 +1,6 @@ # execute this recipe like this: -# snowfakery tests/salesforce/test_where.recipe.yml --plugin-option orgname qa +# snowfakery tests/salesforce/test_where.recipe.yml --plugin-option org_name qa - plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery diff --git a/tests/test_cli.py b/tests/test_cli.py index 29e9d217..3afb1807 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,7 +9,7 @@ from tests.utils import named_temporary_file_path import yaml -from click.exceptions import ClickException +from click.exceptions import ClickException, BadParameter from snowfakery.cli import generate_cli, eval_arg, main from snowfakery.data_gen_exceptions import DataGenError @@ -179,6 +179,19 @@ def test_from_cli__target_number(self, capsys): assert len(re.findall(r"Account\(", stdout)) == 5 + def test_from_cli__reps(self, capsys): + generate_cli.main([str(sample_yaml), "--reps", "3"], standalone_mode=False) + stdout = capsys.readouterr().out + + assert len(re.findall(r"Account\(", stdout)) == 3 + + def test_from_cli__bad_target_number(self): + with pytest.raises(BadParameter): + generate_cli.main( + [str(sample_yaml), "--target-number", "abc", "def"], + standalone_mode=False, + ) + def test_from_cli__explicit_format_txt(self, capsys): with named_temporary_file_path() as t: generate_cli.main( @@ -198,7 +211,7 @@ def test_from_cli__explicit_format_txt(self, capsys): output = f.read() assert len(re.findall(r"Account\(", output)) == 5 - def test_from_cli__unknown_extension(self, capsys): + def test_from_cli__unknown_format(self, capsys): with pytest.raises(ClickException) as e: generate_cli.callback( yaml_file=str(sample_yaml), @@ -209,6 +222,20 @@ def test_from_cli__unknown_extension(self, capsys): assert "xyzzy" in str(e.value) Path("foo.txt").unlink() + def test_from_cli__pluggable_output_stream(self): + with named_temporary_file_path(suffix=".yml") as t: + generate_cli.main( + [ + str(sample_yaml), + "--output-format", + "examples.YamlOutputStream", + "--output-file", + t, + ], + standalone_mode=False, + ) + assert t.exists() + def test_from_cli__continuation(self, capsys): with TemporaryDirectory() as t: mapping_file_path = Path(t) / "mapping.yml" @@ -415,6 +442,14 @@ def test_mutually_exclusive(self): ) assert "apping-file" in str(e.value) + def test_mutually_exclusive_targets(self): + with pytest.raises(ClickException) as e: + generate_cli.main( + [str(sample_yaml), "--reps", "50", "--target-count", "Account", "100"], + standalone_mode=False, + ) + assert "mutually exclusive" in str(e.value) + def test_cli_errors__cannot_infer_output_format(self): with pytest.raises(ClickException, match="No format supplied"): with TemporaryDirectory() as t: diff --git a/tests/test_collections.py b/tests/test_collections.py new file mode 100644 index 00000000..b312ae36 --- /dev/null +++ b/tests/test_collections.py @@ -0,0 +1,155 @@ +from snowfakery.utils.collections import CaseInsensitiveDict +import pytest + + +# From: https://github.com/psf/requests/blob/05a1a21593c9c8e79393d35fae12c9c27a6f7605/tests/test_requests.py +class TestCaseInsensitiveDict: + @pytest.mark.parametrize( + "cid", + ( + CaseInsensitiveDict({"Foo": "foo", "BAr": "bar"}), + CaseInsensitiveDict([("Foo", "foo"), ("BAr", "bar")]), + CaseInsensitiveDict(FOO="foo", BAr="bar"), + ), + ) + def test_init(self, cid): + assert len(cid) == 2 + assert "foo" in cid + assert "bar" in cid + + def test_docstring_example(self): + cid = CaseInsensitiveDict() + cid["Accept"] = "application/json" + assert cid["aCCEPT"] == "application/json" + assert list(cid) == ["Accept"] + + def test_len(self): + cid = CaseInsensitiveDict({"a": "a", "b": "b"}) + cid["A"] = "a" + assert len(cid) == 2 + + def test_getitem(self): + cid = CaseInsensitiveDict({"Spam": "blueval"}) + assert cid["spam"] == "blueval" + assert cid["SPAM"] == "blueval" + + def test_fixes_649(self): + """__setitem__ should behave case-insensitively.""" + cid = CaseInsensitiveDict() + cid["spam"] = "oneval" + cid["Spam"] = "twoval" + cid["sPAM"] = "redval" + cid["SPAM"] = "blueval" + assert cid["spam"] == "blueval" + assert cid["SPAM"] == "blueval" + assert list(cid.keys()) == ["SPAM"] + + def test_delitem(self): + cid = CaseInsensitiveDict() + cid["Spam"] = "someval" + del cid["sPam"] + assert "spam" not in cid + assert len(cid) == 0 + + def test_contains(self): + cid = CaseInsensitiveDict() + cid["Spam"] = "someval" + assert "Spam" in cid + assert "spam" in cid + assert "SPAM" in cid + assert "sPam" in cid + assert "notspam" not in cid + + def test_get(self): + cid = CaseInsensitiveDict() + cid["spam"] = "oneval" + cid["SPAM"] = "blueval" + assert cid.get("spam") == "blueval" + assert cid.get("SPAM") == "blueval" + assert cid.get("sPam") == "blueval" + assert cid.get("notspam", "default") == "default" + + def test_update(self): + cid = CaseInsensitiveDict() + cid["spam"] = "blueval" + cid.update({"sPam": "notblueval"}) + assert cid["spam"] == "notblueval" + cid = CaseInsensitiveDict({"Foo": "foo", "BAr": "bar"}) + cid.update({"fOO": "anotherfoo", "bAR": "anotherbar"}) + assert len(cid) == 2 + assert cid["foo"] == "anotherfoo" + assert cid["bar"] == "anotherbar" + + def test_update_retains_unchanged(self): + cid = CaseInsensitiveDict({"foo": "foo", "bar": "bar"}) + cid.update({"foo": "newfoo"}) + assert cid["bar"] == "bar" + + def test_iter(self): + cid = CaseInsensitiveDict({"Spam": "spam", "Eggs": "eggs"}) + keys = frozenset(["Spam", "Eggs"]) + assert frozenset(iter(cid)) == keys + + def test_equality(self): + cid = CaseInsensitiveDict({"SPAM": "blueval", "Eggs": "redval"}) + othercid = CaseInsensitiveDict({"spam": "blueval", "eggs": "redval"}) + assert cid == othercid + del othercid["spam"] + assert cid != othercid + assert cid == {"spam": "blueval", "eggs": "redval"} + assert cid != object() + + def test_setdefault(self): + cid = CaseInsensitiveDict({"Spam": "blueval"}) + assert cid.setdefault("spam", "notblueval") == "blueval" + assert cid.setdefault("notspam", "notblueval") == "notblueval" + + def test_lower_items(self): + cid = CaseInsensitiveDict( + { + "Accept": "application/json", + "user-Agent": "requests", + } + ) + keyset = frozenset(lowerkey for lowerkey, v in cid.lower_items()) + lowerkeyset = frozenset(["accept", "user-agent"]) + assert keyset == lowerkeyset + + def test_preserve_key_case(self): + cid = CaseInsensitiveDict( + { + "Accept": "application/json", + "user-Agent": "requests", + } + ) + keyset = frozenset(["Accept", "user-Agent"]) + assert frozenset(i[0] for i in cid.items()) == keyset + assert frozenset(cid.keys()) == keyset + assert frozenset(cid) == keyset + + def test_preserve_last_key_case(self): + cid = CaseInsensitiveDict( + { + "Accept": "application/json", + "user-Agent": "requests", + } + ) + cid.update({"ACCEPT": "application/json"}) + cid["USER-AGENT"] = "requests" + keyset = frozenset(["ACCEPT", "USER-AGENT"]) + assert frozenset(i[0] for i in cid.items()) == keyset + assert frozenset(cid.keys()) == keyset + assert frozenset(cid) == keyset + + def test_copy(self): + cid = CaseInsensitiveDict( + { + "Accept": "application/json", + "user-Agent": "requests", + } + ) + cid_copy = cid.copy() + assert str(cid) == str(cid_copy) + assert cid == cid_copy + cid["changed"] = True + assert cid != cid_copy diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py new file mode 100644 index 00000000..7e67b11a --- /dev/null +++ b/tests/test_exceptions.py @@ -0,0 +1,13 @@ +from snowfakery.data_gen_exceptions import DataGenError + + +class TestExceptions: + def test_stringify_DataGenError(self): + val = str(DataGenError("Blah", "foo.yml", 25)) + assert "Blah" in val + assert "foo.yml" in val + assert "25" in val + + val = str(DataGenError("Blah", "foo.yml")) + assert "Blah" in val + assert "foo.yml" in val diff --git a/tests/test_faker.py b/tests/test_faker.py index af959ea9..3caf6157 100644 --- a/tests/test_faker.py +++ b/tests/test_faker.py @@ -207,6 +207,18 @@ def test_error_handling(self, write_row_mock): assert "xyzzy" in str(e.value) assert "fake" in str(e.value) + @mock.patch(write_row_path) + def test_did_you_mean(self, write_row_mock): + yaml = """ + - object: A + fields: + xyzzy: + fake: frst_name + """ + with pytest.raises(exc.DataGenError) as e: + generate(StringIO(yaml), {}, None) + assert "first_name" in str(e.value) + def test_faker_internals_are_invisible(self): yaml = """ - object: A diff --git a/tests/test_friends.py b/tests/test_friends.py new file mode 100644 index 00000000..1aa6eed6 --- /dev/null +++ b/tests/test_friends.py @@ -0,0 +1,23 @@ +from io import StringIO + +from snowfakery.data_generator import generate + + +class TestFriends: + def test_multiple_friends(self, generated_rows): + yaml = """ + - object: Account + - object: Account + friends: + - object: Contact + fields: + AccountId: + reference: Account + - object: Contact + fields: + AccountId: + reference: Account + """ + generate(StringIO(yaml), {}) + assert generated_rows.table_values("Contact", 0, "AccountId") == "Account(2)" + assert generated_rows.table_values("Contact", 1, "AccountId") == "Account(2)" diff --git a/tests/test_generate_mapping.py b/tests/test_generate_mapping.py index b774f80d..80d1f2a8 100644 --- a/tests/test_generate_mapping.py +++ b/tests/test_generate_mapping.py @@ -10,6 +10,7 @@ _table_is_free, ) from snowfakery.data_generator_runtime import Dependency +from snowfakery.cci_mapping_files.post_processes import add_after_statements from snowfakery import data_gen_exceptions as exc @@ -296,6 +297,24 @@ def test_incomplete_record_types(self, tmpdir, generate_in_tmpdir): assert mapping["Insert Case"]["fields"]["RecordTypeId"] == "recordtype" +class TestAddAfterStatements: + def test_add_after_statements(self): + mappings = { + "Insert Child": { + "fields": {}, + "lookups": {"parent": {"key_field": "parent", "table": "Parent"}}, + "sf_object": "Child", + "table": "Child", + }, + "Insert Parent": {"fields": {}, "sf_object": "Parent", "table": "Parent"}, + "Insert Parent 2": {"fields": {}, "sf_object": "Parent", "table": "Parent"}, + } + add_after_statements(mappings) + assert ( + mappings["Insert Child"]["lookups"]["parent"]["after"] == "Insert Parent 2" + ) + + class TestPersonAccounts: @skip_if_cumulusci_missing def test_basic_person_accounts(self, generate_in_tmpdir): diff --git a/tests/test_object_rows.py b/tests/test_object_rows.py new file mode 100644 index 00000000..faf5ce66 --- /dev/null +++ b/tests/test_object_rows.py @@ -0,0 +1,9 @@ +from unittest.mock import Mock + +from snowfakery.object_rows import NicknameSlot + + +class TestNicknameSlot: + def test_repr(self): + nns = NicknameSlot("Account", Mock()) + assert "Account" in repr(nns) diff --git a/tests/test_output_streams.py b/tests/test_output_streams.py index e4d1b9d0..b4b0b313 100644 --- a/tests/test_output_streams.py +++ b/tests/test_output_streams.py @@ -10,6 +10,8 @@ import pytest +from click.exceptions import ClickException + from sqlalchemy import create_engine from snowfakery.output_streams import ( @@ -367,3 +369,9 @@ def test_external_output_stream_yaml(self): """ print(x.getvalue()) assert x.getvalue() == expected + + def test_external_output_stream__failure(self): + with pytest.raises(ClickException, match="no.such.output.Stream"): + generate_cli.callback( + yaml_file=sample_yaml, output_format="no.such.output.Stream" + ) diff --git a/tests/test_salesforce_gen.py b/tests/test_salesforce_gen.py index 4a698ec5..d5fabaaa 100644 --- a/tests/test_salesforce_gen.py +++ b/tests/test_salesforce_gen.py @@ -39,5 +39,5 @@ def test_profile_id(self, generated_rows, org_config): ProfileId: Salesforce.ProfileId: Identity User """ - generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name}) assert generated_rows.table_values("foo", 0, "ProfileId").startswith("00e") diff --git a/tests/test_with_cci.py b/tests/test_with_cci.py index 4bf58e81..a31893d2 100644 --- a/tests/test_with_cci.py +++ b/tests/test_with_cci.py @@ -13,6 +13,7 @@ from snowfakery.data_generator import generate from snowfakery.data_gen_exceptions import DataGenError from snowfakery import generate_data +from snowfakery.standard_plugins import Salesforce try: import cumulusci @@ -29,11 +30,6 @@ sample_yaml = Path(__file__).parent / "include_parent.yml" -skip_if_cumulusci_missing = pytest.mark.skipif( - not hasattr(cumulusci, "api"), reason="CumulusCI not installed" -) - - class Test_CLI_CCI: # @skip_if_cumulusci_missing def test_mapping_file(self): @@ -76,12 +72,12 @@ def query(self, query: str): ) +@patch( + "snowfakery.standard_plugins.Salesforce.SalesforceConnection.sf", + wraps=fake_sf_client, +) +@patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 5) class TestSOQLNoCCI: - @patch( - "snowfakery.standard_plugins.Salesforce.SalesforceConnection.sf", - wraps=fake_sf_client, - ) - @patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 5) def test_soql_plugin_random(self, fake_sf_client, generated_rows): yaml = """ - plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery @@ -92,7 +88,7 @@ def test_soql_plugin_random(self, fake_sf_client, generated_rows): AccountId: SalesforceQuery.random_record: Account """ - generate(StringIO(yaml), plugin_options={"orgname": "blah"}) + generate(StringIO(yaml), plugin_options={"org_name": "blah"}) assert fake_sf_client.mock_calls assert generated_rows.row_values(0, "AccountId") == "FAKEID5" @@ -113,13 +109,8 @@ def test_soql_plugin_no_query_from(self, fake_sf_client, generated_rows): where: Name='Foo' """ with pytest.raises(DataGenError, match="Must supply 'from:'"): - generate(StringIO(yaml), plugin_options={"orgname": "blah"}) + generate(StringIO(yaml), plugin_options={"org_name": "blah"}) - @patch( - "snowfakery.standard_plugins.Salesforce.SalesforceConnection.sf", - wraps=fake_sf_client, - ) - @patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 5) def test_soql_plugin_record(self, fake_sf_client, generated_rows): yaml = """ - plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery @@ -130,16 +121,54 @@ def test_soql_plugin_record(self, fake_sf_client, generated_rows): AccountId: SalesforceQuery.find_record: Account """ - generate(StringIO(yaml), plugin_options={"orgname": "blah"}) + generate(StringIO(yaml), plugin_options={"org_name": "blah"}) assert fake_sf_client.mock_calls assert generated_rows.row_values(0, "AccountId") == "FAKEID0" + def test_soql_plugin_random__orgname_long(self, fake_sf_client, generated_rows): + yaml = """ + - plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery + - object: Contact + fields: + FirstName: Suzy + LastName: Salesforce + AccountId: + SalesforceQuery.random_record: Account + """ + plugin_option_name = ( + "snowfakery.standard_plugins.Salesforce.SalesforceQuery.org_name" + ) + generate(StringIO(yaml), plugin_options={plugin_option_name: "blah"}) + assert fake_sf_client.mock_calls + assert generated_rows.row_values(0, "AccountId") == "FAKEID5" + + +class TestCCIError: + def test_pretend_cci_not_available(self): + filename = ( + Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml" + ) + with unittest.mock.patch( + "snowfakery.standard_plugins.Salesforce.SalesforceConnectionMixin._get_CliRuntime" + ) as conn: + conn.side_effect = ImportError("CumulusCI Runtime cannot be loaded") + with pytest.raises(Exception, match="CumulusCI Runtime cannot be loaded"): + generate_data(filename, plugin_options={"org_name": "None"}) + + @pytest.mark.skipif(cumulusci, reason="CCI is installed") + def test_cci_really_not_available(self): + filename = ( + Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml" + ) + with pytest.raises(Exception, match="CumulusCI Runtime cannot be loaded"): + generate_data(filename, plugin_options={"org_name": "None"}) + @skip_if_cumulusci_missing class TestSOQLWithCCI: @patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 0) @pytest.mark.vcr() - def test_soql(self, sf, org_config, generated_rows): + def test_soql(self, sf, org_config, project_config, generated_rows): yaml = """ - plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery - object: Contact @@ -157,7 +186,10 @@ def test_soql(self, sf, org_config, generated_rows): """ assert org_config.name sf.Account.create({"Name": "Company"}) - generate(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate( + StringIO(yaml), + plugin_options={"org_config": org_config, "project_config": project_config}, + ) assert len(generated_rows.mock_calls) == 2 @pytest.mark.vcr() @@ -204,22 +236,9 @@ def test_example_through_api(self, sf, generated_rows, org_config): filename = ( Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml" ) - generate_data(filename, plugin_options={"orgname": org_config.name}) + generate_data(filename, plugin_options={"org_name": org_config.name}) assert generated_rows.mock_calls - def test_cci_not_available(self): - filename = ( - Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml" - ) - with unittest.mock.patch( - "snowfakery.standard_plugins.Salesforce.SalesforceConnection._get_sf_clients" - ) as conn: - conn.side_effect = ImportError( - "cumulusci module cannot be loaded by snowfakery" - ) - with pytest.raises(Exception, match="cumulusci module cannot be loaded"): - generate_data(filename, plugin_options={"orgname": "None"}) - @pytest.mark.vcr() def test_find_records_returns_nothing(self, org_config): yaml = """ @@ -232,7 +251,7 @@ def test_find_records_returns_nothing(self, org_config): SalesforceQuery.find_record: Contract """ with pytest.raises(DataGenError, match="No records returned"): - generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name}) @pytest.mark.vcr() def test_find_records_returns_multiple(self, org_config, sf, generated_rows): @@ -245,7 +264,7 @@ def test_find_records_returns_multiple(self, org_config, sf, generated_rows): AccountId: SalesforceQuery.find_record: User """ - generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name}) first_user_id = sf.query("select Id from User")["records"][0]["Id"] assert generated_rows.mock_calls[0][1][1]["AccountId"] == first_user_id @@ -258,7 +277,7 @@ def test_soql_dataset_shuffled(self, sf, org_config, generated_rows): Path(__file__).parent.parent / "examples/soql_dataset_shuffled.recipe.yml" ) - generate_data(filename, plugin_options={"orgname": org_config.name}) + generate_data(filename, plugin_options={"org_name": org_config.name}) assert len(generated_rows.mock_calls) == 10 for mock_call in generated_rows.mock_calls: @@ -279,7 +298,7 @@ def test_soql_dataset_shuffled(self, sf, org_config, generated_rows): def test_soql_dataset_in_order(self, sf, org_config, generated_rows): filename = Path(__file__).parent.parent / "examples/soql_dataset.recipe.yml" - generate_data(filename, plugin_options={"orgname": org_config.name}) + generate_data(filename, plugin_options={"org_name": org_config.name}) assert len(generated_rows.mock_calls) == 10 for mock_call in generated_rows.mock_calls: @@ -307,7 +326,7 @@ def test_soql_dataset_where(self, sf, org_config, generated_rows): Path(__file__).parent.parent / "examples/soql_dataset_where.recipe.yml" ) - generate_data(filename, plugin_options={"orgname": org_config.name}) + generate_data(filename, plugin_options={"org_name": org_config.name}) assert len(generated_rows.mock_calls) == 10 for mock_call in generated_rows.mock_calls: @@ -347,7 +366,7 @@ def download_file(*args, **kwargs): download_file, ) with pretend_5000, do_not_really_download: - generate_data(filename, plugin_options={"orgname": org_config.name}) + generate_data(filename, plugin_options={"org_name": org_config.name}) assert len(generated_rows.mock_calls) == 10 @@ -378,7 +397,7 @@ def test_dataset_bad_query(self, org_config, sf, generated_rows): from: Xyzzy """ with pytest.raises(DataGenError, match="Xyzzy"): - generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name}) @pytest.mark.vcr() @patch( @@ -399,7 +418,7 @@ def test_dataset_bad_query_bulk(self, restful, org_config): from: Account """ with pytest.raises(DataGenError, match="No such column 'Xyzzy' on entity"): - generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name}) def test_dataset_no_fields(self, org_config, sf, generated_rows): yaml = """ @@ -412,7 +431,7 @@ def test_dataset_no_fields(self, org_config, sf, generated_rows): junk: Junk2 """ with pytest.raises(DataGenError, match="SOQLDataset needs a 'fields' list"): - generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name}) def test_dataset_no_from(self, org_config, sf, generated_rows): yaml = """ @@ -425,4 +444,8 @@ def test_dataset_no_from(self, org_config, sf, generated_rows): fields: Junk3 """ with pytest.raises(DataGenError, match="SOQLDataset needs a 'from'"): - generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name}) + generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name}) + + def test_config_type_error(self): + with pytest.raises(TypeError): + Salesforce.check_orgconfig(None) diff --git a/tools/faker_docs.py b/tools/faker_docs.py new file mode 100644 index 00000000..0e63067e --- /dev/null +++ b/tools/faker_docs.py @@ -0,0 +1,12 @@ +from tools.faker_docs_utils.faker_markdown import generate_markdown_for_fakers + + +outfile = "docs/fakedata/en_US.md" +with open(outfile, "w") as o: + generate_markdown_for_fakers(o, "en_US") + +outfile = "docs/fakedata/fr_FR.md" +with open(outfile, "w") as o: + generate_markdown_for_fakers(o, "fr_FR") + +print("DONE", outfile) diff --git a/tools/faker_docs_utils/docs_config.yml b/tools/faker_docs_utils/docs_config.yml new file mode 100644 index 00000000..23f60a6d --- /dev/null +++ b/tools/faker_docs_utils/docs_config.yml @@ -0,0 +1,199 @@ +common_fakes: + company: + example: + - object: Account + fields: + Name: + fake: Company + - object: Account + fields: + Name: ${{fake.Company}} Holdings + last_name: + example: + - object: Contact + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + - object: Contact + fields: + FirstName: ${{fake.FirstName}} Sam + LastName: ${{fake.FirstName}} Senior + first_name: + example: + - object: Contact + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + - object: Contact + fields: + FirstName: ${{fake.FirstName}} Sam + LastName: ${{fake.LastName}}-Jones + first_name_female: + example: + - object: Contact + fields: + FirstName: + fake: FirstNameFemale + LastName: + fake: LastName + - object: Contact + fields: + FirstName: ${{fake.FirstNameFemale}} Beth + LastName: ${{fake.LastName}}-Jones + first_name_male: + example: + - object: Contact + fields: + FirstName: + fake: FirstNameMale + LastName: + fake: LastName + - object: Contact + fields: + FirstName: ${{fake.FirstNameMale}} Beth + LastName: ${{fake.LastName}}-Jones + first_name_nonbinary: + example: + - object: Contact + fields: + FirstName: + fake: FirstNameNonBinary + LastName: + fake: LastName + - object: Contact + fields: + FirstName: ${{fake.FirstNameNonBinary}} Mary-John + LastName: ${{fake.LastName}}-Jones + catch_phrase: + example: + - object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + - object: Account + fields: + Name: ${{fake.Company}} Holdings + Description: "Our Motto: ${{fake.CatchPhrase}}" + year: + example: + - object: Student + fields: + GraduationYear: + fake: year + - object: Student + fields: + GraduationYear: ${{year}} + email: + example: + - object: Contact + fields: + FirstName: + fake: FirstName + LastName: + fake: LastName + Email: + fake: Email + - object: Contact + fields: + FirstName: ${{fake.FirstName}} Sam + LastName: ${{fake.FirstName}} Senior + Email: ${{fake.Email}} + phone_number: + example: + - object: Account + fields: + Name: + fake: Company + Phone: + fake: PhoneNumber + - object: Account + fields: + Name: ${{fake.Company}} Holdings + Phone: ${{fake.PhoneNumber}} ext. 23 + city: + example: + - object: Account + fields: + Name: + fake: Company + BillingCity: + fake: City + - object: Account + fields: + Name: ${{fake.Company}} Holdings + BillingCity: ${{fake.City}} + state: + example: + - object: Account + fields: + Name: + fake: Company + BillingState: + fake: State + - object: Account + fields: + Name: ${{fake.Company}} Holdings + BillingState: ${{fake.State}} + postalcode: + example: + - object: Account + fields: + Name: + fake: Company + BillingPostalCode: + fake: postalcode + - object: Account + fields: + Name: ${{fake.Company}} Holdings + BillingPostalCode: ${{fake.Postalcode}} + street_address: + example: + - object: Account + fields: + Name: + fake: Company + BillingStreet: + fake: StreetAddress + - object: Account + fields: + Name: ${{fake.Company}} Holdings + BillingStreet: ${{fake.StreetAddress}} + country: + example: + - object: Account + fields: + Name: + fake: Company + BillingCountry: + fake: Country + - object: Account + fields: + Name: ${{fake.Company}} Holdings + BillingCountry: ${{fake.Country}} + current_country: + example: + - object: Account + fields: + Name: + fake: Company + BillingCountry: + fake: CurrentCountry + - object: Account + fields: + Name: ${{fake.Company}} Holdings + BillingCountry: ${{fake.CurrentCountry}} + time: + paragraph: + word: + sentence: + text: +uncommon_fakes: + building_number: + country_code: + current_country_code: diff --git a/tools/faker_docs_utils/docstring.py b/tools/faker_docs_utils/docstring.py new file mode 100644 index 00000000..db48dddc --- /dev/null +++ b/tools/faker_docs_utils/docstring.py @@ -0,0 +1,243 @@ +# Based on https://github.com/joke2k/faker/blob/2dac486e6d3b5f018feb524f6fa19917ec10299e/faker/sphinx/docstring.py +# Copied under the provisions of the MIT License + +# Search for "snowfakery" to find optimizations we've made. + +# coding=utf-8 +import inspect +import logging +import re + +from collections import namedtuple + +from faker import Faker +from faker.config import AVAILABLE_LOCALES, DEFAULT_LOCALE +from .validator import SampleCodeValidator + +logger = logging.getLogger(__name__) +_fake = Faker(AVAILABLE_LOCALES) +_base_provider_method_pattern = re.compile( + r"^faker\.providers\.BaseProvider\.(?P\w+)$" +) +_standard_provider_method_pattern = re.compile( + r"^faker\.providers\.\w+\.Provider\.(?P\w+)$" +) +_locale_provider_method_pattern = re.compile( + r"^faker\.providers\.\w+" + r"\.(?P[a-z]{2,3}_[A-Z]{2})" + r"\.Provider" + r"\.(?P\w+)$", +) +_sample_line_pattern = re.compile( + r"^:sample" + r"(?: size=(?P[1-9][0-9]*))?" + r"(?: seed=(?P[0-9]+))?" + r":" + r"(?: ?(?P.*))?$", +) +_command_template = "generator.{method}({kwargs})" +_sample_output_template = ( + ">>> Faker.seed({seed})\n" + ">>> for _ in range({size}):\n" + "... fake.{method}({kwargs})\n" + "...\n" + "{results}\n\n" +) + +DEFAULT_SAMPLE_SIZE = 5 +DEFAULT_SEED = 0 +Sample = namedtuple("Sample", ["size", "seed", "kwargs"]) + + +class ProviderMethodDocstring: + """ + Class that preprocesses provider method docstrings to generate sample usage and output + + Notes on how samples are generated: + - If the docstring belongs to a standard provider method, sample usage and output will be + generated using a `Faker` object in the `DEFAULT_LOCALE`. + - If the docstring belongs to a localized provider method, the correct locale will be used. + - If the docstring does not belong to any provider method, docstring preprocessing will be skipped. + - Docstring lines will be parsed for potential sample sections, and the generation details of each + sample section will internally be represented as a ``Sample`` namedtuple. + - Each ``Sample`` will have info on the keyword arguments to pass to the provider method, how many + times the provider method will be called, and the initial seed value to ``Faker.seed()``. + """ + + def __init__(self, app, what, name, obj, options, lines): + self._line_iter = iter(lines) + self._parsed_lines = [] + self._samples = [] + self._skipped = True + self._log_prefix = f"{inspect.getfile(obj)}:docstring of {name}: WARNING:" + + if what != "method": + return + + base_provider_method_match = _base_provider_method_pattern.match(name) + locale_provider_method_match = _locale_provider_method_pattern.match(name) + standard_provider_method_match = _standard_provider_method_pattern.match(name) + if base_provider_method_match: + groupdict = base_provider_method_match.groupdict() + self._method = groupdict["method"] + self._locale = DEFAULT_LOCALE + elif standard_provider_method_match: + groupdict = standard_provider_method_match.groupdict() + self._method = groupdict["method"] + self._locale = DEFAULT_LOCALE + elif locale_provider_method_match: + groupdict = locale_provider_method_match.groupdict() + self._method = groupdict["method"] + self._locale = groupdict["locale"] + else: + return + + self._skipped = False + self._parse() + self._generate_samples() + + def _log_warning(self, warning): + logger.warning(f"{self._log_prefix} {warning}") + + def _parse(self): + while True: + try: + line = next(self._line_iter) + except StopIteration: + break + else: + self._parse_section(line) + + def _parse_section(self, section): + # No-op if section does not look like the start of a sample section + if not section.startswith(":sample"): + self._parsed_lines.append(section) + return + + try: + next_line = next(self._line_iter) + except StopIteration: + # No more lines left to consume, so save current sample section + self._process_sample_section(section) + return + + # Next line is the start of a new sample section, so process + # current sample section, and start parsing the new section + if next_line.startswith(":sample"): + self._process_sample_section(section) + self._parse_section(next_line) + + # Next line is an empty line indicating the end of + # current sample section, so process current section + elif next_line == "": + self._process_sample_section(section) + + # Section is assumed to be multiline, so continue + # adding lines to current sample section + else: + section = section + next_line + self._parse_section(section) + + def _process_sample_section(self, section): + match = _sample_line_pattern.match(section) + + # Discard sample section if malformed + if not match: + msg = f"The section `{section}` is malformed and will be discarded." + self._log_warning(msg) + return + + # Set sample generation defaults and do some beautification if necessary + groupdict = match.groupdict() + size = groupdict.get("size") + seed = groupdict.get("seed") + kwargs = groupdict.get("kwargs") + size = max(int(size), DEFAULT_SAMPLE_SIZE) if size else DEFAULT_SAMPLE_SIZE + seed = int(seed) if seed else DEFAULT_SEED + kwargs = self._beautify_kwargs(kwargs) if kwargs else "" + + # Store sample generation details + sample = Sample(size, seed, kwargs) + self._samples.append(sample) + + def _beautify_kwargs(self, kwargs): + def _repl_whitespace(match): + quoted = match.group(1) or match.group(2) + return quoted if quoted else "" + + def _repl_comma(match): + quoted = match.group(1) or match.group(2) + return quoted if quoted else ", " + + # First, remove all whitespaces and tabs not within quotes + result = re.sub(r'("[^"]*")|(\'[^\']*\')|[ \t]+', _repl_whitespace, kwargs) + + # Next, insert a whitespace after each comma not within quotes + result = re.sub(r'("[^"]*")|(\'[^\']*\')|,', _repl_comma, result) + + # Then return the result with all leading and trailing whitespaces stripped + return result.strip() + + def _stringify_result(self, value): + return repr(value) + + def _generate_eval_scope(self): + from collections import ( + OrderedDict, + ) # noqa: F401 Do not remove! The eval command needs this reference. + + return { + "generator": _fake[self._locale], + "OrderedDict": OrderedDict, + } + + def _inject_default_sample_section(self): + default_sample = Sample(DEFAULT_SAMPLE_SIZE, DEFAULT_SEED, "") + self._samples.append(default_sample) + + def _generate_samples(self): + if not self._samples: + self._inject_default_sample_section() + + output = "" + for sample in self._samples: + command = _command_template.format( + method=self._method, kwargs=sample.kwargs + ) + validator = SampleCodeValidator(command) + if validator.errors: + msg = ( + f"Invalid code elements detected. Sample generation will be " + f"skipped for method `{self._method}` with arguments `{sample.kwargs}`." + ) + self._log_warning(msg) + continue + + try: + Faker.seed(sample.seed) + # optimization for the Snowfakery context + results = "" + except Exception as e: + msg = f"Sample generation failed for method `{self._method}` with arguments `{sample.kwargs}`: {e}." + self._log_warning(msg) + continue + else: + output += _sample_output_template.format( + seed=sample.seed, + method=self._method, + kwargs=sample.kwargs, + size=sample.size, + results=results, + ) + + if output: + output = ":examples:\n\n" + output + self._parsed_lines.extend(output.split("\n")) + + @property + def skipped(self): + return self._skipped + + @property + def lines(self): + return self._parsed_lines diff --git a/tools/faker_docs_utils/fakedata_header_full.md b/tools/faker_docs_utils/fakedata_header_full.md new file mode 100644 index 00000000..c00f5b53 --- /dev/null +++ b/tools/faker_docs_utils/fakedata_header_full.md @@ -0,0 +1,201 @@ +# Fake data + +##### Overview + +Fake data comes in a few different flavours. Let's start with the +most common pattern: + +```yaml +# examples/salesforce/simple_account.recipe.yml +- object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry + Phone: + fake: PhoneNumber +``` + +So the first obvious question is where you find these names. The answer +is you can scroll down on this page to see a long list with descriptions. + +The description above might generate output like this: + +```json +Account(id=1, Name=Nelson-Deleon, Description=Secured bandwidth-monitored moratorium, BillingStreet=2187 Kerry Way, BillingCity=Rangelland, BillingState=Colorado, BillingPostalCode=08388, BillingCountry=United States, Phone=001-738-530-9719) +``` + +It doesn't matter if you use upper or lower case for fake names. + +##### Formulas + +Sometimes you might want to combine the fake data with other data +in a single field. You can use formula syntaax for this. + +```yaml +# examples/faker_in_formula.recipe.yml +- object: Account + fields: + Name: ${{fake.State}} State University +``` + +Some complex faker definitions can also use parameters. The +documentation says what parameters are allowed. The docs +for [fake: sentence](#fake-sentence) define `nb_words` and +`variable_nb_words`, for example. + +```yaml +# examples/parameters.recipe.yml +- object: Example + fields: + gibberish_words: ${{fake.Sentence(nb_words=10, variable_nb_words=False)}} +``` + +##### Block fakers with parameters + +If you'd rather not use the formula syntax (${{ blah }}) there is also +a nested syntax for that: + +```yaml +# examples/parameters_block.recipe.yml +- object: Example + fields: + gibberish_words: + fake.Sentence: + nb_words: 10 + variable_nb_words: False +``` + +##### Localization + +Our fake data can be localized to many languages. We have +[detailed docs](https://snowfakery.readthedocs.io/en/feature-fake-data-docs/locales.html) +about how to use fake data in each of the other languages. + +The default locale is `en_US`: United Statesian Engish. + +Let's say that you want to generate fake data for France instead of the +United States. + +You do so by setting the special `snowfakery_locale` "variable" like this. + +```yaml +# examples/salesforce/simple_account_french.recipe.yml + +- var: snowfakery_locale + value: fr_FR +- object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry + Phone: + fake: PhoneNumber +``` + +This will translate the State to the appropriate administrative unit in +France. `CurrentCountry` will be France, not `United States`. The Catch +Phrase will be in French and so forth. + +For example: + +```json +Account(id=1, Name=Parent Auger S.A.S., Description=Le confort de rouler de manière sûre, BillingStreet=54, rue de Bailly, BillingCity=Charrier, BillingState=Île-de-France, BillingPostalCode=72902, BillingCountry=France, Phone=08 05 11 90 19) +``` + +We can do many countries. For example, Japanese (ja_JP locale): + +```json +Account(id=1, Name=有限会社山下電気, Description=Inverse 24hour pricing structure, BillingStreet=040 佐々木 Street, BillingCity=横浜市金沢区, BillingState=福岡県, BillingPostalCode=181-5538, BillingCountry=Japan, Phone=070-4156-5072) +``` + +We can even pick the locale randomly: + +```yaml +# examples/salesforce/simple_account_random.recipe.yml +- var: snowfakery_locale + value: + random_choice: + - ja_JP # Japanese + - en_CA # Canadian English + - fr_FR # French from France + - fr_CA # Canadian French + - de_DE # German from Germany +- object: Account + fields: + Name: + fake: Company + Description: + fake: CatchPhrase + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry + Phone: + fake: PhoneNumber +``` + +##### Fake Dates and Numbers + +The main Snowfakery documentation describes how to fake +[dates](index.md#date-between) and [numbers](index.md#random-number). + +That's it. Those are all of the concepts you need. + +##### Custom Faker Providers + +You can also include Faker extension libraries ("Providers") after +you’ve added them to your Python install: + +```yaml + - plugin: faker_microservice.Provider + - object: OBJ + fields: + service_name: + fake: + microservice +``` + +You would install that provider like this: + +```s +$ pip install faker_microservice +``` + +Here are some Python Faker providers: + + + +And you could make your own providers as well. Aaron Crossman +has written [a tutorial](https://spinningcode.org/2021/06/snowfakery-custom-plugins-part-2/) +about that process. + +## Index of Fake Datatypes diff --git a/tools/faker_docs_utils/fakedata_header_short.md b/tools/faker_docs_utils/fakedata_header_short.md new file mode 100644 index 00000000..a68c3427 --- /dev/null +++ b/tools/faker_docs_utils/fakedata_header_short.md @@ -0,0 +1,10 @@ +# Fake Data: {language} as spoken in {current_country} ({locale}) + +The basic concepts of fake data are described in +the [main tutorial](../fakedata.md#fake-data). + +Our fake data can be localized to many languages. We have +[detailed docs](https://snowfakery.readthedocs.io/en/feature-fake-data-docs/locales.html) +about the other languages. + +Current Locale: {locale} ({current_country}) diff --git a/tools/faker_docs_utils/faker_markdown.py b/tools/faker_docs_utils/faker_markdown.py new file mode 100644 index 00000000..87dd2c8e --- /dev/null +++ b/tools/faker_docs_utils/faker_markdown.py @@ -0,0 +1,193 @@ +import re +from functools import lru_cache +from pathlib import Path +import typing as T + +from yaml import dump as yaml_dump +from faker import Faker +from faker.config import AVAILABLE_LOCALES +from tools.faker_docs_utils.format_samples import ( + yaml_samples_for_docstring, + snowfakery_output_for, +) +from .summarize_fakers import summarize_all_fakers +from .language_codes import language_codes + +from snowfakery.fakedata.fake_data_generator import FakeData + +_RE_COMBINE_WHITESPACE = re.compile(r"(?<=^) +", re.MULTILINE) +_RE_STRIP_SAMPLES = re.compile(r"^\s*:sample:.*$", re.MULTILINE) +_COMMENT_LINES_THAT_LOOK_LIKE_TITLES = re.compile(r"^#", re.MULTILINE) + +non_countries = ("fr_QC", "ar_AA") +AVAILABLE_LOCALES = [ + locale + for locale in AVAILABLE_LOCALES + if locale not in non_countries and "_" in locale +] + + +def cleanup_docstring(my_str): + "Clean up a docstring to remove Faker-doc weirdness and excesss whitespace" + my_str = _RE_COMBINE_WHITESPACE.sub("", my_str) + my_str = _RE_STRIP_SAMPLES.sub("", my_str).strip() + my_str = _COMMENT_LINES_THAT_LOOK_LIKE_TITLES.sub(" #", my_str) + my_str = my_str.replace(":example", "\nExample:") + my_str = my_str.replace(":param", "\nParam:") + my_str = my_str.replace(":return", "\nReturn:") + return my_str + + +@lru_cache(maxsize=1000) +def country_for_locale(locale: str): + f = Faker(locale) + return f.current_country() + + +def locales_as_markdown_links(current_locale: str, locale_list: T.List[str]): + "Generate a list of Markdown locale links" + + def format_link(locale: str): + try: + country_name = country_for_locale(locale) + except (ValueError, AttributeError): + return None + language = language_codes[locale.split("_")[0]] + link_text = f"{language} as spoken in {country_name}: ({locale})" + return f" - [{link_text}](fakedata/{locale}.md)\n" + + other_locales = [locale for locale in locale_list if locale != current_locale] + links = [format_link(locale) for locale in other_locales] + return " ".join(link for link in links if link) + + +standard_header = (Path(__file__).parent / "fakedata_header_short.md").read_text() + + +def generate_markdown_for_fakers(outfile, locale: str, header: str = standard_header): + "Generate the Markdown page for a locale" + faker = Faker(locale) + language = language_codes[locale.split("_")[0]] + fd = FakeData(faker) + + all_fakers = summarize_all_fakers(fd) + + def output(*args, **kwargs): + print(*args, **kwargs, file=outfile) + + head_md = header.format( + locale=locale, current_country=faker.current_country(), language=language + ) + output( + head_md, + ) + + output("[TOC]\n") + + output("## Commonly Used\n") + output_fakers_in_categories(output, [f for f in all_fakers if f.common], "", locale) + output("## Rarely Used\n") + output_fakers_in_categories( + output, [f for f in all_fakers if not f.common], "", locale + ) + + +def output_fakers_in_categories(output, fakers, common: str, locale): + """Sort fakers into named categores and then output them""" + categorized = categorize(fakers) + for category_name, fakers in categorized.items(): + output(f"### {category_name.title()} Fakers\n") + for faker in fakers: + output_faker(faker.name, faker, output, locale) + + +def categorize(fakers): + "Sort fakers based on their categories (what module they came from)" + categories = {} + for fakerdata in fakers: + category = fakerdata.category + categories.setdefault(category, []) + categories[category].append(fakerdata) + return {name: value for name, value in sorted(categories.items())} + + +def gather_samples(name, data, locale): + if data.sample: # I already have a sample, no need to generate one + if locale and locale != "en_US": + locale_header = [{"var": "snowfakery_locale", "value": locale}] + sample = locale_header + data.sample + else: + sample = data.sample + example = yaml_dump(sample, sort_keys=False) + samples = [snowfakery_output_for(data.name, example, example)] + else: # need to generate a sample from scratch + samples = yaml_samples_for_docstring(name, data.fullname, data.doc, locale) + return list(filter(None, samples)) + + +def output_faker(name: str, data: str, output: callable, locale: str): + """Output the data relating to a particular faker""" + samples = gather_samples(name, data, locale) + # if there isn't at least one sample, don't publish + if not samples: + return + + output(f"#### fake: {name}\n") + cleaned_docstring = cleanup_docstring(data.doc) + if cleaned_docstring: + output(cleaned_docstring) + output() + + output("Aliases: ", ", ".join(data.aliases)) + output() + link = f"[{data.source}]({data.url}) : {data.fullname}" + output("Source:", link) + + if samples: + output() + for sample in samples: + yaml, out = sample + + output("Recipe:\n") + output(indent(yaml)) + output("Outputs:\n") + output(indent(out)) + else: + output() + + +def indent(yaml: str): + """Add indents to yaml""" + lines = yaml.split("\n") + + def prefix(line): + return " " if line.strip() else "" + + lines = [prefix(line) + line for line in lines] + return "\n".join(lines) + + +def generate_markdown_for_all_locales(path: Path, locales=None): + "Generate markdown file for each listed locale. None means all locales" + locales = locales or AVAILABLE_LOCALES + for locale in locales: + with Path(path, f"{locale}.md").open("w") as f: + print(f.name) + generate_markdown_for_fakers(f, locale) + + +def generate_locales_index(path: Path, locales_list: T.List[str]): + "Generate markdown index including listed locales. None means all locales" + locales_list = locales_list or AVAILABLE_LOCALES + with Path(path).open("w") as outfile: + + def output(*args, **kwargs): + print(*args, **kwargs, file=outfile) + + locales = locales_as_markdown_links(None, locales_list) + if locales: + output("## Fake Data Locales\n") + output( + "Learn more about Snowfakery localization in the [Fake Data Tutorial](fakedata.md#localization)\n" + ) + output(locales) diff --git a/tools/faker_docs_utils/format_samples.py b/tools/faker_docs_utils/format_samples.py new file mode 100644 index 00000000..2ddb4b68 --- /dev/null +++ b/tools/faker_docs_utils/format_samples.py @@ -0,0 +1,170 @@ +import ast +import yaml + +from io import StringIO +from collections import OrderedDict +from unittest.mock import MagicMock + +from snowfakery import generate_data + +from . import docstring + +# known code gen issues. ignore them. +IGNORE_ERRORS = set(("uuid4", "randomchoices", "randomelement", "randomelements")) + + +def samples_from_docstring(fullname, docstring_data): + """Convert a Faker-style docstring into a Snowfaery sample""" + lines = docstring_data.split("\n") + lines = [line.strip() for line in lines] + docstrings = docstring.ProviderMethodDocstring( + app=MagicMock(), + what="method", + name=fullname, + obj=MagicMock, + options=MagicMock(), + lines=lines, + ) + return docstrings._samples + + +def simplify(arg): + """Simplify Faker arg-types. e.g. tuples become lists. OrdereDicts become dicts""" + fieldname = arg._fields[0] + out = getattr(arg, fieldname) + + # primitives are fine + if isinstance(out, (str, int, float, bool)): + return out + + # simplify tuples to lists, and simplify the contents + if isinstance(out, (list, tuple)): + args = [simplify(a) for a in out] + return type(out)(args) + + # simplify OrderedDicts to dicts, and simplify the contents + if isinstance(out, (OrderedDict, dict)): + return {name: simplify(value) for name, value in dict(out).items()} + raise TypeError(type(out), out) + + +def extract_keywords(kwargstr): + """Reverse engineer the params from a Snowfakery faker by using the Python parser""" + fake_python = f"Func({kwargstr})" + tree = ast.parse(fake_python, mode="eval") + kwds = {arg.arg: simplify(arg.value) for arg in tree.body.keywords} + return kwds + + +def reformat_yaml(yaml_data): + """Normalize YAML to a common format""" + data = yaml.safe_load(yaml_data) + return yaml.dump(data, sort_keys=False) + + +def yaml_samples_for_docstring_sample(name, sample, locale): + """Try to generate Snowfakery input and output for a faker.""" + try: + return _yaml_samples_for_docstring_sample_inner(name, sample, locale) + except Exception as e: + print("Cannot generate sample from docstring", sample, str(e)[0:100]) + raise e + + +def _yaml_samples_for_docstring_sample_inner(name, sample, locale): + """Try to generate Snowfakery input and output for a faker.""" + try: + kwds = extract_keywords(sample.kwargs) + except Exception as e: + if name.lower() not in IGNORE_ERRORS: + IGNORE_ERRORS.add(name.lower()) + print("Cannot extract keywords", name, sample, str(e)[0:100]) + return None + + name = name.split(".")[-1] + return yaml_sample(name, kwds, sample.kwargs, locale) + + +def yaml_sample(name, kwds, kw_example, locale): + """Generate Snowfakery yaml input and output""" + if kwds: + inline_example = f"fake.{name}({kw_example})" + block_example = {f"fake.{name}": kwds} + else: + inline_example = f"fake.{name}" + block_example = {"fake": name} + + inline_example = "${{" + inline_example + "}}" + + if ":" in inline_example: + inline_example = f'"{inline_example}"' + + single_part_example = f""" + - object: SomeObject + fields: + formula_field_example: {inline_example}""" + + if locale: + locale_decl = f""" + - var: snowfakery_locale + value: {locale} + """ + single_part_example = locale_decl + single_part_example + try: + two_part_example = ( + single_part_example + + f""" + block_field_example: {block_example}""" + ) + + two_part_example = reformat_yaml(two_part_example) + single_part_example = reformat_yaml(single_part_example) + except Exception as e: + print("CANNOT PARSE") + print(two_part_example, single_part_example) + print(str(e)[0:100]) + raise + + return snowfakery_output_for(name, two_part_example, single_part_example) + + +def snowfakery_output_for(name, primary_example, secondary_example): + """Generate the Snowfakery output for some YAML + + Attempt to generate a two-part example, but fall back to single + or nothing if worse comes to worst.""" + output = None + exception = None + + for yaml_data in [primary_example, secondary_example]: + with StringIO() as s: + try: + generate_data(StringIO(yaml_data), output_file=s, output_format="txt") + output = s.getvalue() + exception = None + except Exception as e: + exception = e + + if exception and name.lower() not in IGNORE_ERRORS: + print(f"Cannot generate sample for {name}: {str(exception)[0:80]}") + IGNORE_ERRORS.add(name.lower()) + + if output: + return yaml_data, output + + +def default_yaml_sample(name, locale): + return yaml_sample(name, None, None, locale) + + +def yaml_samples_for_docstring(name, fullname, docstring_data, locale=None): + """Generate example for all samples associated wth a docstring""" + sample_objs = samples_from_docstring(fullname, docstring_data) + + output = [ + yaml_samples_for_docstring_sample(name, sample, locale) + for sample in sample_objs + ] + if not output: + output = [default_yaml_sample(name, locale)] + return output diff --git a/tools/faker_docs_utils/language_codes.py b/tools/faker_docs_utils/language_codes.py new file mode 100644 index 00000000..2eda9969 --- /dev/null +++ b/tools/faker_docs_utils/language_codes.py @@ -0,0 +1,190 @@ +language_codes = dict( + ( + ("ab", "Abkhaz"), + ("aa", "Afar"), + ("af", "Afrikaans"), + ("ak", "Akan"), + ("sq", "Albanian"), + ("am", "Amharic"), + ("ar", "Arabic"), + ("an", "Aragonese"), + ("hy", "Armenian"), + ("as", "Assamese"), + ("av", "Avaric"), + ("ae", "Avestan"), + ("ay", "Aymara"), + ("az", "Azerbaijani"), + ("bm", "Bambara"), + ("ba", "Bashkir"), + ("eu", "Basque"), + ("be", "Belarusian"), + ("bn", "Bengali"), + ("bh", "Bihari"), + ("bi", "Bislama"), + ("bs", "Bosnian"), + ("br", "Breton"), + ("bg", "Bulgarian"), + ("my", "Burmese"), + ("ca", "Catalan; Valencian"), + ("ch", "Chamorro"), + ("ce", "Chechen"), + ("ny", "Chichewa; Chewa; Nyanja"), + ("zh", "Chinese"), + ("cv", "Chuvash"), + ("kw", "Cornish"), + ("co", "Corsican"), + ("cr", "Cree"), + ("hr", "Croatian"), + ("cs", "Czech"), + ("da", "Danish"), + ("dk", "Danish"), # wrong + ("dv", "Divehi; Maldivian;"), + ("nl", "Dutch"), + ("dz", "Dzongkha"), + ("en", "English"), + ("eo", "Esperanto"), + ("et", "Estonian"), + ("ee", "Ewe"), + ("fo", "Faroese"), + ("fj", "Fijian"), + ("fi", "Finnish"), + ("fr", "French"), + ("ff", "Fula"), + ("gl", "Galician"), + ("ka", "Georgian"), + ("de", "German"), + ("el", "Greek, Modern"), + ("gn", "Guaraní"), + ("gu", "Gujarati"), + ("ht", "Haitian"), + ("ha", "Hausa"), + ("he", "Hebrew (modern)"), + ("hz", "Herero"), + ("hi", "Hindi"), + ("ho", "Hiri Motu"), + ("hu", "Hungarian"), + ("ia", "Interlingua"), + ("id", "Indonesian"), + ("ie", "Interlingue"), + ("ga", "Irish"), + ("ig", "Igbo"), + ("ik", "Inupiaq"), + ("io", "Ido"), + ("is", "Icelandic"), + ("it", "Italian"), + ("iu", "Inuktitut"), + ("ja", "Japanese"), + ("jv", "Javanese"), + ("kl", "Kalaallisut"), + ("kn", "Kannada"), + ("kr", "Kanuri"), + ("ks", "Kashmiri"), + ("kk", "Kazakh"), + ("km", "Khmer"), + ("ki", "Kikuyu, Gikuyu"), + ("rw", "Kinyarwanda"), + ("ky", "Kirghiz, Kyrgyz"), + ("kv", "Komi"), + ("kg", "Kongo"), + ("ko", "Korean"), + ("ku", "Kurdish"), + ("kj", "Kwanyama, Kuanyama"), + ("la", "Latin"), + ("lb", "Luxembourgish"), + ("lg", "Luganda"), + ("li", "Limburgish"), + ("ln", "Lingala"), + ("lo", "Lao"), + ("lt", "Lithuanian"), + ("lu", "Luba-Katanga"), + ("lv", "Latvian"), + ("gv", "Manx"), + ("mk", "Macedonian"), + ("mg", "Malagasy"), + ("ms", "Malay"), + ("ml", "Malayalam"), + ("mt", "Maltese"), + ("mi", "Māori"), + ("mr", "Marathi (Marāṭhī)"), + ("mh", "Marshallese"), + ("mn", "Mongolian"), + ("na", "Nauru"), + ("nv", "Navajo, Navaho"), + ("nb", "Norwegian Bokmål"), + ("nd", "North Ndebele"), + ("ne", "Nepali"), + ("ng", "Ndonga"), + ("nn", "Norwegian Nynorsk"), + ("no", "Norwegian"), + ("ii", "Nuosu"), + ("nr", "South Ndebele"), + ("oc", "Occitan"), + ("oj", "Ojibwe, Ojibwa"), + ("cu", "Old Church Slavonic"), + ("om", "Oromo"), + ("or", "Oriya"), + ("os", "Ossetian, Ossetic"), + ("pa", "Panjabi, Punjabi"), + ("pi", "Pāli"), + ("fa", "Persian"), + ("fil", "Filipino"), # Not a real language code + ("pl", "Polish"), + ("ps", "Pashto, Pushto"), + ("pt", "Portuguese"), + ("qu", "Quechua"), + ("rm", "Romansh"), + ("rn", "Kirundi"), + ("ro", "Romanian, Moldavan"), + ("ru", "Russian"), + ("sa", "Sanskrit (Saṁskṛta)"), + ("sc", "Sardinian"), + ("sd", "Sindhi"), + ("se", "Northern Sami"), + ("sm", "Samoan"), + ("sg", "Sango"), + ("sr", "Serbian"), + ("gd", "Scottish Gaelic"), + ("sn", "Shona"), + ("si", "Sinhala, Sinhalese"), + ("sk", "Slovak"), + ("sl", "Slovene"), + ("so", "Somali"), + ("st", "Southern Sotho"), + ("es", "Spanish; Castilian"), + ("su", "Sundanese"), + ("sw", "Swahili"), + ("ss", "Swati"), + ("sv", "Swedish"), + ("ta", "Tamil"), + ("te", "Telugu"), + ("tg", "Tajik"), + ("th", "Thai"), + ("ti", "Tigrinya"), + ("bo", "Tibetan"), + ("tk", "Turkmen"), + ("tl", "Tagalog"), + ("tn", "Tswana"), + ("to", "Tonga"), + ("tr", "Turkish"), + ("ts", "Tsonga"), + ("tt", "Tatar"), + ("tw", "Twi"), + ("ty", "Tahitian"), + ("ug", "Uighur, Uyghur"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("uz", "Uzbek"), + ("ve", "Venda"), + ("vi", "Vietnamese"), + ("vo", "Volapük"), + ("wa", "Walloon"), + ("cy", "Welsh"), + ("wo", "Wolof"), + ("fy", "Western Frisian"), + ("xh", "Xhosa"), + ("yi", "Yiddish"), + ("yo", "Yoruba"), + ("za", "Zhuang, Chuang"), + ("zu", "Zulu"), + ) +) diff --git a/tools/faker_docs_utils/mkdocs_plugins/main_mkdocs_plugin.py b/tools/faker_docs_utils/mkdocs_plugins/main_mkdocs_plugin.py new file mode 100644 index 00000000..aba8fd48 --- /dev/null +++ b/tools/faker_docs_utils/mkdocs_plugins/main_mkdocs_plugin.py @@ -0,0 +1,71 @@ +from pathlib import Path +import sys +import os +from unittest.mock import patch +from functools import lru_cache +from logging import Logger + +from mkdocs.plugins import BasePlugin +from faker.factory import Factory + + +class Plugin(BasePlugin): + def on_config(self, config): + pass + + def on_pre_build(self, config): + root_dir = Path(__file__).parent.parent.parent.parent + faker_docs_dir = root_dir / "docs/fakedata" + faker_docs_dir.mkdir(exist_ok=True) + new_sys_path = [*sys.path, str(root_dir)] + print("Note: Hiding warnings during docs build") + + # make modules available + sys_path_patch = patch.object(sys, "path", new_sys_path) + warning = Logger.warning + + irritating_warning = "Numbers generated by this method are purely hypothetical." + + def new_warning(self, *args, **kwargs): + if args == (irritating_warning,): + return + else: + warning(self, *args, **kwargs) + + logger_patch = patch("logging.Logger.warning", new=new_warning) + + # speed up a critical function + lru_patch = patch( + "faker.factory.Factory._get_provider_class", + lru_cache(maxsize=10_000)(Factory._get_provider_class), + ) + + with sys_path_patch, lru_patch, logger_patch: + from tools.faker_docs_utils.faker_markdown import ( + generate_markdown_for_all_locales, + generate_markdown_for_fakers, + generate_locales_index, + ) + + fakerdocs_md_header = ( + root_dir / "tools/faker_docs_utils/fakedata_header_full.md" + ) + main_header = Path(fakerdocs_md_header).read_text() + fakerdocs_md = root_dir / "docs/fakedata.md" + with fakerdocs_md.open("w") as f: + generate_markdown_for_fakers(f, "en_US", main_header) + + build_locales_env = os.environ.get( + "SF_MKDOCS_BUILD_LOCALES" + ) or self.config.get("build_locales", None) + if build_locales_env == "False": + locales_list = ["en_US", "fr_FR"] + elif build_locales_env in (True, "True", None): + locales_list = None # means "all" + elif isinstance(build_locales_env, str): + locales_list = build_locales_env.split(",") + else: + assert 0, f"Unexpected build_locales_env {build_locales_env}" + + generate_markdown_for_all_locales(faker_docs_dir, locales_list) + generate_locales_index("docs/locales.md", locales_list) diff --git a/tools/faker_docs_utils/summarize_fakers.py b/tools/faker_docs_utils/summarize_fakers.py new file mode 100644 index 00000000..cb005c29 --- /dev/null +++ b/tools/faker_docs_utils/summarize_fakers.py @@ -0,0 +1,82 @@ +from pathlib import Path +import types +import typing as T + +import yaml + + +class FakerInfo(T.NamedTuple): + name: str + fullname: str + aliases: T.List[str] + url: str + source: str + category: str + doc: str + common: bool + sample: str + + +def summarize_all_fakers(faker) -> T.Sequence[FakerInfo]: + """Summarize information about all fakers""" + from snowfakery.utils.collections import CaseInsensitiveDict + + # get config info that can override samples etc. + with (Path(__file__).parent / "docs_config.yml").open() as f: + yaml_data = yaml.safe_load(f) + common_fakes = yaml_data["common_fakes"] + uncommon_fakes = yaml_data["uncommon_fakes"] + + faker_infos = CaseInsensitiveDict() + for name, meth in faker.fake_names.items(): + if not isinstance(meth, types.MethodType): + continue + # python magic to introspect classnames, filenames, etc. + friendly = _to_camel_case(name) + func = meth.__func__ + doc = func.__doc__ + filename = func.__code__.co_filename + cls = meth.__self__.__class__ + fullname = cls.__module__ + "." + cls.__name__ + "." + meth.__name__ + overrides = common_fakes.get(meth.__name__) or uncommon_fakes.get(meth.__name__) + is_common = meth.__name__ in common_fakes + + # if it came from Faker + if "/faker/" in filename: + source = "faker" + idx = filename.find("/faker/") + url = "https://github.com/joke2k/faker/tree/master" + filename[idx:] + parts = filename.split("/") + while parts[-1] in ("__init__.py", "en_US"): + del parts[-1] + category = parts[-1] + else: # if it came from Snowfakery + source = "snowfakery" + idx = filename.find("/snowfakery/") + url = ( + "https://github.com/SFDO-Tooling/Snowfakery/tree/main" + filename[idx:] + ) + category = "Salesforce" + + faker_info = faker_infos.setdefault( + friendly, + FakerInfo( + friendly, + fullname, + [], + url, + source, + category, + doc or "", + is_common, + overrides.get("example") if overrides else None, + ), + ) + faker_info.aliases.append(name) + + return faker_infos.values() + + +def _to_camel_case(snake_str): + components = snake_str.split("_") + return "".join(x.title() for x in components) diff --git a/tools/faker_docs_utils/validator.py b/tools/faker_docs_utils/validator.py new file mode 100644 index 00000000..7cd87bb0 --- /dev/null +++ b/tools/faker_docs_utils/validator.py @@ -0,0 +1,154 @@ +# Based on https://github.com/joke2k/faker/blob/2dac486e6d3b5f018feb524f6fa19917ec10299e/faker/sphinx/validator.py +# Copied under the provisions of the MIT License + +# coding=utf-8 +import ast +import traceback + +from collections import OrderedDict + + +class SampleCodeValidator(ast.NodeVisitor): + """ + Class that checks if a string is a valid and "safe" Python expression + + What is considered "safe" for this class is limited to the context of generating + provider method sample code and output for documentation purposes. The end goal + is to pass a command string to `eval()` should the string pass the validation + performed by this class. + + The main assumption this class will make is that the command string passed during + class instantiation will always be in the form "{generator}.{method}({arguments})". + In said form, {generator} is a `Generator` object variable that already exists + within the scope where `eval()` will be called, {method} will be the provider + method name which is also available within the `eval()` scope, and {arguments} + will be sample arguments parsed from docstrings. This means that {arguments} can + potentially be used as a vector for code injection. + + In order to neuter the impact of code injection, the following validation steps + will be applied: + + - The command string is parsed using 'eval' mode, meaning expressions only. + - The command string can only have whitelisted code elements. See `_whitelisted_nodes`. + - The command string can only have one instance of variable access. + - The command string can only have one instance of attribute access. + - The command string can only have one instance of a function/method call. + - The argument values in the command string can only be literals. + - The only literals allowed are numbers (integers, floats, or complex numbers), + strings (but not f-strings), bytes, lists, tuples, sets, dictionaries, True, + False, and None. + + There is, however, an exception. In order to accommodate sample code with custom + probability distribution, variable access to `OrderedDict` will not count against + the maximum limit of variable access, and invoking `OrderedDict` constructor calls + will not count against the maximum limit of function/method calls. In order to + neuter the impact of code injection, please ensure that `OrderedDict` refers to + the standard library's `collections.OrderedDict` within the `eval()` scope before + passing the command string to `eval()` for execution. This can be done in code review. + """ + + _whitelisted_nodes = ( + # Code elements related to function calls and variable and attribute access + ast.Expression, + ast.Call, + ast.Attribute, + ast.Name, + ast.Load, + ast.keyword, + # Code elements representing whitelisted literals + ast.Num, + ast.Str, + ast.Bytes, + ast.List, + ast.Tuple, + ast.Set, + ast.Dict, + ast.NameConstant, + ) + + _max_function_call_count = 1 + _max_attribute_access_count = 1 + _max_variable_access_count = 1 + + def __init__(self, command): + self._errors = set() + self._function_call_count = 0 + self._attribute_access_count = 0 + self._variable_access_count = 0 + self._command = command + + try: + self._tree = ast.parse(command, mode="eval") + except (SyntaxError, ValueError): + self._log_error(traceback.format_exc()) + else: + self._validate() + + @property + def errors(self): + return self._errors + + def _is_whitelisted(self, node): + return isinstance(node, self._whitelisted_nodes) + + def _log_error(self, msg): + self._errors.add(msg) + + def _validate(self): + self.visit(self._tree) + + def _is_node_using_ordereddict(self, node): + is_valid = False + + # If instance of function call, check if it is a call to the OrderedDict constructor + if isinstance(node, ast.Call): + is_valid = self._is_node_using_ordereddict(node.func) + + # If instance of variable access, check if it is + elif isinstance(node, ast.Name) and node.id == OrderedDict.__name__: + is_valid = True + + return is_valid + + def visit(self, node): + # Check if code element type is allowed + if not self._is_whitelisted(node): + msg = "Code element `%s` is not allowed." % node.__class__.__name__ + self._log_error(msg) + + return super().visit(node) + + def visit_Call(self, node): + if not self._is_node_using_ordereddict(node): + # There can only be one instance of a function call + if self._function_call_count < self._max_function_call_count: + self._function_call_count += 1 + else: + msg = "There can only be one instance of a function/method call." + self._log_error(msg) + + # Proceed to child nodes + self.generic_visit(node) + + def visit_Attribute(self, node): + # There can only be one instance of attribute access + if self._attribute_access_count < self._max_attribute_access_count: + self._attribute_access_count += 1 + else: + msg = "There can only be one instance of attribute access." + self._log_error(msg) + + # Proceed to child nodes + self.generic_visit(node) + + def visit_Name(self, node): + if not self._is_node_using_ordereddict(node): + # There can only be one instance of variable access + if self._variable_access_count < self._max_variable_access_count: + self._variable_access_count += 1 + else: + msg = "There can only be one instance of variable access." + self._log_error(msg) + + # Proceed to child nodes + self.generic_visit(node)