diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cb74703e..05773a01 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,7 +22,7 @@ jobs:
- name: "Install dependencies"
run: |
python -VV
- python -m pip install --upgrade pip pip-tools
+ python -m pip install --upgrade pip
make dev-install
- name: "Run tox targets for ${{ matrix.python-version }}"
@@ -49,13 +49,32 @@ jobs:
- name: Install dependencies
run: |
python -VV
- python -m pip install --upgrade pip pip-tools
+ python -m pip install --upgrade pip
make dev-install
python -m pip install cumulusci
- name: Run Tests
run: python -m pytest
+ faker_docs:
+ name: Faker Docs
+ runs-on: ubuntu-latest
+ steps:
+ - uses: "actions/checkout@v2"
+ - uses: "actions/setup-python@v1"
+ with:
+ python-version: "3.9"
+
+ - name: Install dependencies
+ run: |
+ python -VV
+ python -m pip install --upgrade pip pip-tools
+ make dev-install
+ python setup.py install
+
+ - name: Make Docs
+ run: make docs
+
windows:
name: Windows ${{ matrix.python-version }}
runs-on: windows-latest
@@ -71,7 +90,7 @@ jobs:
- name: "Install dependencies"
run: |
python -VV
- python -m pip install --upgrade pip pip-tools
+ python -m pip install --upgrade pip
make dev-install
- name: Run Tests
diff --git a/.gitignore b/.gitignore
index 3623a5fc..828e9003 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,9 @@ build/
dist/
docs/api/
docs/_build/
+docs/fakedata
+docs/fakedata.md
+docs/locales.md
.eggs/
.idea/
.tox/
@@ -38,6 +41,7 @@ pip-wheel-metadata
results_junit.xml
test_results.json
temp
+coverage.xml
# Salesforce / SFDX / CCI
.cci
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 8ff90d9d..bc5f8ad7 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -71,7 +71,13 @@ Or you could make it outside of the project repo.
$ pytest
-7. Your new code should also have meaningful tests. One way to double check that
+7. Build the docs like this:
+ $ make docs
+ $ open build/html/index.html
+
+Set SF_MKDOCS_BUILD_LOCALES=False to skip building all locales
+
+8. Your new code should also have meaningful tests. One way to double check that
your tests cover everything is to ensure that your new code has test code coverage:
$ pytest --cov
@@ -123,3 +129,27 @@ You can finish up the process by updating the release object that was auto-creat
Just paste in the changelog notes and hit publish.
Tada! You've published a new version of Snowfakery.
+
+Internal Software Architecture
+------------------------------
+
+=================================== ================================
+Filename Purpose
+----------------------------------- --------------------------------
+
+cli.py Click-based Command Line. Uses the Click library to supply a CLI.
+data_generator.py The API entry point the CLI and CCI use.
This may be the best place to start reading. It abstracts away all of the complexity and outlines the core flow.
+parse_recipe_yaml.py Phase 1: parse YAML into a Runtime DOM
Includes some hacks to the YAML parser for handling line numbers.
+data_generator_runtime.py Phase 2: Runtime.
Actually generate the data by walking the template list top-to-bottom, generating rows as appopriate.
+data_generator_runtime_dom.py An object model used in Phase 2. Roughly similar to the shape of the YAML file.
+output_streams.py Where the data goes in the output. Used during Phase 2.
+data_gen_exceptions.py Exceptions that can be thrown
+generate_mapping_from_recipe.py In the CCI context, this utility package allows the generation of mapping.yml files.
+template_funcs.py Functions that can be invoked using either block syntax or in Jinja templates
+plugins.py Infrastructure for plugins
+standard_plugins/ Plugins that ship with Snowfakery
+tests/ Unit tests
+=================================== ================================
+
+
+
diff --git a/HISTORY.md b/HISTORY.md
index 9296a6f5..2228ddb9 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -4,6 +4,29 @@ In the beginning, programmers created the databases. Now the databases were form
And so [Salesforce.org](http://salesforce.org/) said “Let there be data,” and there was Snowfakery. And it was good.
+## Snowfakery 2.0
+
+Recipes can now merge in data from Salesforce orgs as
+records, IDs or datasets. (#395 and #401)
+
+Output streams can now be specified as 3rd party libraries. Any Python class name
+can be passed to --output-format as long as it adheres to the OutputStream
+protocol as shown in examples/YamlOutputStream.py (#351)
+
+Snowfakery is thread-safe/multi-processor-ready and available for parallel usage
+in CumulusCI. (CumulusCI Repo)
+
+Snowfakery now supports a --reps option as an easier way to repeat a recipe (#416)
+
+Snowfakery now accepts LeadingCaps syntax for all fakers, as well as
+underscore_separated and everythinglowercase. (#403)
+
+Salesforce.ContentVersion and Salesforce.ProfileId convenience functions were added
+(#421)
+
+Snowfakery now has voluminous documentation about Fakes in many languages and
+locales (#409)
+
## Snowfakery 1.12
Fix a regression: In some contexts it was impossible to call Faker with either
diff --git a/LICENSE b/LICENSE
index e9830463..da6962af 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,27 +1,30 @@
- Copyright (c) 2020, Salesforce.org
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Salesforce.org nor the names of
- its contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
+Copyright (c) 2021, Salesforce.com, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the
+ above copyright notice, this list of conditions
+ and the following disclaimer.
+
+* Redistributions in binary form must reproduce
+ the above copyright notice, this list of conditions
+ and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of Salesforce.com nor the names
+ of its contributors may be used to endorse or promote
+ products derived from this software without specific
+ prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
index ddeb5f3e..515857b2 100644
--- a/Makefile
+++ b/Makefile
@@ -3,4 +3,12 @@ update-deps:
pip-compile --upgrade --allow-unsafe requirements/dev.in
dev-install:
+ pip install --upgrade pip-tools
pip-sync requirements/*.txt
+ pip install -e .
+
+# set SF_MKDOCS_BUILD_LOCALES=False to skip building all locales
+docs: .FORCE
+ python -m mkdocs build --clean --site-dir build/html --config-file mkdocs.yml
+
+.FORCE:
\ No newline at end of file
diff --git a/README.md b/README.md
index ee33b7b9..ad611803 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,6 @@ Once you have youre virtual environment, you can install dependencies via pip:
Or you can install dependencies via pip tools:
```python
-pip install pip-tools
make dev-install
```
diff --git a/custom_theme/img/favicon.ico b/custom_theme/img/favicon.ico
new file mode 100755
index 00000000..4ef032f9
Binary files /dev/null and b/custom_theme/img/favicon.ico differ
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 43276bb9..00000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS ?=
-SPHINXBUILD ?= sphinx-build
-SOURCEDIR = .
-BUILDDIR = ../build
-
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/conf.py b/docs/conf.py
index 396cb091..23c008f7 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -27,8 +27,7 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
-extensions = ["myst_parser"]
-myst_config = {}
+extensions = []
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
diff --git a/docs/index.md b/docs/index.md
index b1293ba1..dfc5a71d 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -119,7 +119,7 @@ pet_stories.yml
- object: Animal
fields:
name:
- fake: first_name
+ fake: FirstName
species: canine
food:
- object: PetFood
@@ -131,7 +131,7 @@ pet_stories.yml
- object: Animal
fields:
name:
- fake: first_name
+ fake: FirstName
species: feline
food:
- object: PetFood
@@ -224,6 +224,30 @@ persons_of_interest.yml
In this case, there will be 6 Persons in the Person table (or file), 3 with age between 0 and 12 and 3 with age between 12 and 95.
+### Fake Data
+
+You can generate many kinds of fake data using the `fake` function:
+
+```yaml
+- object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ BillingStreet:
+ fake: StreetAddress
+ BillingCity:
+ fake: City
+ BillingState:
+ fake: State
+```
+
+You can fake all sorts of stuff. Names, addresses, Latin text, English sentences, URLs, etc.
+
+The complete list, along with other related features, can be found in
+the [Fake Data Tutorial](fakedata.md)
+
### Friends
Sometimes you want to obey a rule like “For every Person I create, I’d like to create 2 animals” (maybe you really like animals).
@@ -245,7 +269,7 @@ You would use the `friends` property to do that.
count: 2
fields:
name:
- fake: first_name
+ fake: FirstName
```
This will output two animals per person:
@@ -282,7 +306,7 @@ Relationships are a big part of what makes Snowfakery different than the dozens(
- object: Animal
fields:
name:
- fake: first_name
+ fake: FirstName
species: Petaurus Breviceps
```
@@ -313,7 +337,7 @@ In addition, we can relate pets and owners “bidirectionally”, like this:
- object: Animal
fields:
name:
- fake: first_name
+ fake: FirstName
owner:
reference: Person
```
@@ -366,7 +390,7 @@ pet_stories_2.yml
owner:
reference: Person
name:
- fake: first_name
+ fake: FirstName
species: canine
food:
reference: petschoice
@@ -376,7 +400,7 @@ pet_stories_2.yml
fields:
owner: Person
name:
- fake: first_name
+ fake: FirstName
species: feline
nemesis: dog
food:
@@ -415,7 +439,7 @@ This function allows you to look up another row (object) and make a reference to
- object: Animal
fields:
name:
- fake: first_name
+ fake: FirstName
owner:
reference: Person
```
@@ -509,7 +533,8 @@ Create a reference to a random, already-created row from some table.
- object: Owner
count: 10
fields:
- name: fake.name
+ name:
+ fake: Name
- object: Pet
count: 10
fields:
@@ -533,123 +558,9 @@ github issue.
### `fake`
-Generate fake data using functions from the [faker](https://github.com/joke2k/faker) library:
-
-```yaml
-- object: Account
- fields:
- Name:
- fake: company
- Description:
- fake: catch_phrase
- BillingStreet:
- fake: street_address
- BillingCity:
- fake: city
- BillingState:
- fake: state
-```
-
-You can fake all sorts of stuff. Names, addresses, Latin text, English sentences, URLs, etc. There are two lists of fake names you can pull from, a Snowfakery-specific list,
-and the broader faker list.
-
-The Snowfakery names are:
-
-- Username: a globally unique username in the shape of an email address
-
-- Alias: a short string that looks like a first name.
-
-- FirstName, LastName: Localized first and last name
-
-- Email: An email address using one of the standard "example" domains (such as example.com, example.org, etc.)
-
-- RealisticMaybeRealEmail: An email address which looks
-more real (because it uses domains like google.com,
-yahoo.com, etc.) and may accidentally actually overlap
-with a real email address. Be careful using this if
-you might send actual emails to the addresses!
+Generate fake data. This function is defined in detail
+in the [Fake Data Tutorial](fakedata.md)
-For example, you can use these like this:
-
-```yaml
-# examples/salesforce/simple-user.yml
-- object: User
- fields:
- Username:
- fake: Username
- FirstName:
- fake: FirstName
- LastName:
- fake: LastName
- Email:
- fake: Email
- Alias:
- fake: Alias
-```
-
-It doesn't matter if you use upper or lower case for fake names.
-
-The complete list is here:
-
-
-
-You can also include Faker extension libraries after you’ve added them to your Python install:
-
-```yaml
- - plugin: faker_microservice.Provider
- - object: OBJ
- fields:
- service_name:
- fake:
- microservice
-```
-
-You would install that provider like this:
-
-```s
-$ pip install faker_microservice
-```
-
-Here are some Python Faker providers:
-
-
-
-And you could make your own providers as well.
-
-Fake can be called as an inline function in an expression:
-
-```yaml
-FullName: ${{fake.first_name}} Johnson
-```
-
-You can also call these functions with arguments as described in Faker's [documentation](https://faker.readthedocs.io/en/master/providers.html)
-
-```yaml
-country: ${{fake.country_code(representation='alpha-2')}}
-```
-
-### International Fakes
-
-You can specify internationally appropriate fakes for many different kind of names (e.g. person, company) by setting the snowfakery_locale this:
-
-```yaml
-- var: snowfakery_locale
- value: no_NO
-- object: person
- fields:
- name:
- fake: name
-- var: snowfakery_locale
- value: fr_FR
-- object: person
- fields:
- name:
- fake: name
-```
-
-This will generate a “typical” Norwegian first name for the first person object and a French name for the second person object.
-
-You can infer which Faker providers are internationalizable by looking through the Faker [repository](https://github.com/joke2k/faker/tree/master/faker/providers) and seeing which directories have localizations. For example there are only three localizations of [credit card](https://github.com/joke2k/faker/tree/master/faker/providers) (who knew that credit cards were different in Iran and Russia) and dozens of localizations for [person name](https://github.com/joke2k/faker/tree/master/faker/providers/person).
### `date_between`
@@ -761,16 +672,16 @@ some_number: A number ${{random_number(min=5, max=10)}}
- choice:
when: ${{gender=='Male'}}
pick:
- fake: first_name_male
+ fake: FirstNameMale
- choice:
when: ${{gender=='Female'}}
pick:
- fake: first_name_female
+ fake: FirstNameFemale
- choice:
pick:
- fake: first_name
+ fake: FirstNameNonBinary
```
The `when` clause can be a Python formula and it will be interpreted as a boolean similar to how Python would do it. The first `when` clause that matches is selected. The last `choice` clause should have no `when` clause, and it is a fallback which is selected if the others do not match.
@@ -853,17 +764,17 @@ this:
```yaml
- var: lastname_var
value:
- fake: last_name
+ fake: LastName
- object: person
fields:
- first_name:
- fake: first_name
- last_name: ${{lastname_var}}
+ FirstName:
+ fake: FirstName
+ LastName: ${{lastname_var}}
- object: spouse
fields:
- first_name:
- fake: first_name
- last_name: ${{lastname_var}}
+ FirstName:
+ fake: FirstName
+ LastName: ${{lastname_var}}
```
This works both at the top level of your recipe and in friends
@@ -878,7 +789,7 @@ do that by creating a "hidden" object:
- object: __shared_address
fields:
street:
- fake: street_address
+ fake: StreetAddress
city:
fake: city
state:
@@ -932,7 +843,7 @@ There is a lot to say about formulas and one day they will all be documented her
- use `${{` to start a formula and `}}` to end it
- use Python expression syntax in the middle
- field values defined earlier on this object are available as names
-- Use faker values like this: Name: ${{fake.first_name}} Johnson
+- Use faker values like this: Name: ${{fake.FirstName}} Johnson
- parent (or ancestor) values are available through the parent’s object name. Like Opportunity.amount
Formulas are based on a similar language called Jinja2, but we use `${{` and `}}` where Jinja2 uses `{{` and `}}` because our version is more compatible with YAML.
@@ -961,7 +872,7 @@ The `id` variable returns a unique identifier for the current Object/Row to allo
```yaml
fields:
- name: ${{fake.last_name}} Household ${{id}}
+ name: ${{fake.LastName}} Household ${{id}}
```
#### `today`
@@ -973,7 +884,8 @@ a single recipe.
#### `fake:` and `fake.`
-The `fake:` function and `fake.` namespace both generate fake data as described elsewhere in this documentation.
+The `fake:` block function and `fake.` namespace both generate
+fake data as described in the [Fake Data Tutorial](fakedata.md).
```yaml
# examples/two_fakers.yml
@@ -1071,13 +983,21 @@ Options:
sqlite:///foo.db if you don't have one set
up.
- --output-format [JSON|json|txt|csv|sql|PNG|png|SVG|svg|svgz|jpeg|jpg|ps|dot]
+ --output-format [png|svg|svgz|jpeg|jpg|ps|dot|json|txt|csv|sql]
--output-folder PATH
-o, --output-file PATH
- --option EVAL_ARG... Options to send to the recipe YAML.
- --target-number TEXT... Target options for the recipe YAML in the
- form of 'number tablename'. For example: '50
- Account'.
+ --option EVAL_ARG... Option to send to the recipe YAML in a
+ format like 'OptName OptValue'. Specify
+ multiple times if needed.
+
+ --target-number, --target-count TEXT...
+ Target record count for the recipe YAML in
+ the form of 'number tablename'. For example:
+ '50 Account' to generate roughly 50
+ accounts.
+
+ --reps INTEGER Target repetition count for the recipe YAML.
+ Use as an alternative to --target-number
--debug-internals / --no-debug-internals
--generate-cci-mapping-file FILENAME
@@ -1092,6 +1012,10 @@ Options:
--continuation-file FILENAME Continue generating a dataset where
'continuation-file' left off
+ --plugin-option EVAL_ARG... Option to send to a plugin in a format like
+ 'OptName OptValue'. Specify multiple times
+ if needed.
+
--load-declarations FILE Declarations to mix into the generated
mapping file
@@ -1101,7 +1025,23 @@ Options:
### Scaling up recipe execution
-From the command line you can control how many rows a recipe generates. You do this by specifying a "target count" and a "target tablename", like this:
+From the command line you can control how many rows a recipe generates.
+
+The simple way is:
+
+```s
+snowfakery accounts.yml --reps 1000
+```
+
+This will run the recipe 1000 times. Easy!
+
+But consider if the user wants to run a test against roughly 50,000 accounts. They
+could do a calculation to figure out how many reps, but this may be complex
+because Snowfakery has randomization features, so that a recipe might generate
+a random number of accounts in each run. Even for simpler recipes, doing the
+math may be a headache, especially if you are changing the recipe every day.
+
+A better solution is to specify the "target number" and a "target tablename", like this:
```s
snowfakery accounts.yml --target-number 1000 Account
@@ -1146,7 +1086,6 @@ for all of the CSV files.
## Advanced Features
-
### Singletons with the "just_once" feature
Snowfakery scales up to larger data volumes
@@ -1388,7 +1327,67 @@ generate_data(
Detailed information is available in [Embedding Snowfakery into Python Applications](./embedding.md)
-## Plugins and Providers
+### Using Snowfakery with Databases
+
+Snowfakery is built on top of a very flexible engine called
+SQLAlchemy. This allows it to connect to many different databases
+subject to the limitations described below.
+
+You should start by installing Snowfakery in a context which
+makes it easy to use the Python command 'pip' to manage your
+Python environment. For example you could install Python
+using the standard installers from `python.org` and then
+you would run the following commands to create and use a venv with the
+Postgres package:
+
+```bash
+
+# create a new directory for our experiment
+$ mkdir experiment_with_postgres
+# cd into it
+$ cd experiment_with_postgres
+# create a new database:
+# https://www.postgresql.org/docs/9.1/app-createdb.html
+$ createdb snowfakerydb
+# create a virtual environment. A good habit to get into.
+# https://docs.python.org/3/library/venv.html
+$ python3 -m venv myvenv
+# activate the venv
+$ source myvenv/bin/activate
+# install Snowfakery in this venv
+$ pip install snowfakery
+# install the Postgres library for Python
+# https://pypi.org/project/psycopg2/
+$ pip install psycopg2
+# let's use it!
+$ snowfakery --dburl='postgresql://localhost:5432/snowfakerydb' ~/code/Snowfakery/examples/company.yml --target-number 1000 Employee
+# and check the results
+# https://www.postgresql.org/docs/9.3/app-psql.html
+$ echo 'select * from "Employee"' | psql snowfakerydb
+```
+
+That's a lot to take in, but hopefully it will be clear enough
+to follow the links and understand the details.
+
+A limitation of this process is that currently Snowfakery can
+only create new tables rather than import into existing ones.
+
+The table will have an id column in addition to columns for every field that
+was generated by the recipe. All columns will be of type text.
+
+The list of databases supported by our underlying infrastructure
+(SQLAlchemy) is listed [here](https://docs.sqlalchemy.org/en/14/core/engines.html#supported-databases) and [here](https://docs.sqlalchemy.org/en/13/dialects/index.html).
+
+Snowfakery is not proactively tested with all of the output
+databases. We will certainly accept bug reports and pull requests
+relating to problems that are discovered.
+
+Please keep in touch with the Snowfakery team about your use of
+other databases so we can have a sense of what works well and what
+does not.
+
+
+### Plugins and Providers
Plugins and Providers allow Snowfakery to be extended with Python code. A plugin adds new functions to Snowfakery. A Provider adds new capabilities to the Faker library which is exposed to Snowfakery users through the fake: keyword.
@@ -1398,9 +1397,9 @@ You include either Plugins or Providers in a Snowfakery file like this:
- plugin: package.module.classname
```
-## Built-in Plugins
+### Built-in Plugins
-### Advanced Math
+#### Advanced Math
Snowfakery has a "Math" plugin which gives you access to all features from Python's
[`math`](https://docs.python.org/3/library/math.html) module plus
@@ -1526,9 +1525,9 @@ CumulusCI can also be used to download CSV data for enrichment as follows.
Dataset.shuffle:
dataset: ../../accounts.csv
FirstName:
- fake: first_name
+ fake: FirstName
LastName:
- fake: last_name
+ fake: LastName
AccountId: ${{__accounts.Id}}
```
@@ -1655,66 +1654,9 @@ There are several examples [in the Snowfakery repository](https://github.com/SFD
Salesforce-specific patterns and tools are described in
[Using Snowfakery with Salesforce](salesforce.md)
-## Using Snowfakery with Databases
+## Appendices
-Snowfakery is built on top of a very flexible engine called
-SQLAlchemy. This allows it to connect to many different databases
-subject to the limitations described below.
-
-You should start by installing Snowfakery in a context which
-makes it easy to use the Python command 'pip' to manage your
-Python environment. For example you could install Python
-using the standard installers from `python.org` and then
-you would run the following commands to create and use a venv with the
-Postgres package:
-
-```bash
-
-# create a new directory for our experiment
-$ mkdir experiment_with_postgres
-# cd into it
-$ cd experiment_with_postgres
-# create a new database:
-# https://www.postgresql.org/docs/9.1/app-createdb.html
-$ createdb snowfakerydb
-# create a virtual environment. A good habit to get into.
-# https://docs.python.org/3/library/venv.html
-$ python3 -m venv myvenv
-# activate the venv
-$ source myvenv/bin/activate
-# install Snowfakery in this venv
-$ pip install snowfakery
-# install the Postgres library for Python
-# https://pypi.org/project/psycopg2/
-$ pip install psycopg2
-# let's use it!
-$ snowfakery --dburl='postgresql://localhost:5432/snowfakerydb' ~/code/Snowfakery/examples/company.yml --target-number 1000 Employee
-# and check the results
-# https://www.postgresql.org/docs/9.3/app-psql.html
-$ echo 'select * from "Employee"' | psql snowfakerydb
-```
-
-That's a lot to take in, but hopefully it will be clear enough
-to follow the links and understand the details.
-
-A limitation of this process is that currently Snowfakery can
-only create new tables rather than import into existing ones.
-
-The table will have an id column in addition to columns for every field that
-was generated by the recipe. All columns will be of type text.
-
-The list of databases supported by our underlying infrastructure
-(SQLAlchemy) is listed [here](https://docs.sqlalchemy.org/en/14/core/engines.html#supported-databases) and [here](https://docs.sqlalchemy.org/en/13/dialects/index.html).
-
-Snowfakery is not proactively tested with all of the output
-databases. We will certainly accept bug reports and pull requests
-relating to problems that are discovered.
-
-Please keep in touch with the Snowfakery team about your use of
-other databases so we can have a sense of what works well and what
-does not.
-
-## Snowfakery Glossary
+### Snowfakery Glossary
- Object: When we think about our Rows in the context of each other, we often use the word “Object”. That’s because rows often *represent* real-world entities like houses (or at least their, addresses), organizations and people (in this case its acceptable to objectify people). See also: “Rows”
- Object Template: These represent instructions on how to create a row, or multiple rows in a database. Each row represents a real-world Object.
@@ -1724,7 +1666,7 @@ does not.
- Singleton: A singleton is an Object Template that generates a single row regardless of how many times the recipe is iterated over.
- YAML: YAML is a relatively simple, human-readable format. You can learn more about it at [yaml.org](http://yaml.org/). But you can also just pick up the basics of it by reading along.
-## Security Profile of Snowfakery
+### Appendix: Security Profile of Snowfakery
Snowfakery should be considered a domain-specific programming language with
access to most of the power of Python. It can load Python plugins and
@@ -1732,26 +1674,8 @@ call Python methods. It would be unwise to run untrusted recipes in an
environment that has access to secure resources such as passwords, network
connections, etc.
-## Internal Software Architecture
-
-|Filename |Purpose |
-|--- |--- |
-|cli.py |Click-based Command Line. Uses the Click library to supply a CLI. |
-|data_generator.py |The API entry point the CLI and CCI use. This may be the best place to start reading. It abstracts away all of the complexity and outlines the core flow. |
-|parse_recipe_yaml.py |Phase 1: parse YAML into a Runtime DOM
Includes some hacks to the YAML parser for handling line numbers. |
-|data_generator_runtime.py |Phase 2: Runtime.
Actually generate the data by walking the template list top-to-bottom, generating rows as appopriate.
-|data_generator_runtime_dom.py |An object model used in Phase 2. Roughly similar to the shape of the YAML file.|
-|output_streams.py |Where the data goes in the output. Used during Phase 2. |
-|data_gen_exceptions.py |Exceptions that can be thrown |
-|generate_mapping_from_recipe.py |In the CCI context, this utility package allows the generation of mapping.yml files. |
-|template_funcs.py |Functions that can be invoked using either block syntax or in Jinja templates |
-|plugins.py |Infrastructure for plugins |
-|standard_plugins/ |Plugins that ship with Snowfakery |
-|tests/ |Unit tests |
-
-
-
-## Appendix: The Age Old Puzzle
+
+### Appendix: The Age Old Puzzle
```yaml
# As I was going to St. Ives,
diff --git a/docs/salesforce.md b/docs/salesforce.md
index c31e8944..5e2b6960 100644
--- a/docs/salesforce.md
+++ b/docs/salesforce.md
@@ -7,7 +7,11 @@ There are several examples [in the Snowfakery repository](https://github.com/SFD
## Using Snowfakery within CumulusCI
The process of actually generating the data into a Salesforce
-org happens through CumulusCI.
+org happens through CumulusCI. The majority of the documentation
+on using Snowfakery with CumulusCI is in
+[the Generate Data section of the CumulusCI documentation](https://cumulusci.readthedocs.io/en/latest/data.html?highlight=snowfakery#generate-fake-data).
+
+A summarized overview follows.
[CumulusCI](http://www.github.com/SFDO-Tooling/CumulusCI) is a
tool and framework for building portable automation for
@@ -17,8 +21,6 @@ creates Snowfakery.
The easiest way to learn about CumulusCI (and to learn how to
install it) is with its [Trailhead Trail](https://trailhead.salesforce.com/en/content/learn/trails/build-applications-with-cumulusci).
-CumulusCI's documentation [describes](https://cumulusci.readthedocs.io/en/latest/data.html?highlight=snowfakery#generate-fake-data)
-how to use it with Snowfakery. Here is a short example:
```s
$ cci task run generate_and_load_from_yaml -o generator_yaml examples/salesforce/Contact.recipe.yml -o num_records 300 -o num_records_tablename Contact --org qa
@@ -62,7 +64,7 @@ intensive, please remember to read the section
Let's use an example where you have a Campaign object and would like to associate
Contacts to it through CampaignMembers.
-Here is an example were we query a particular Campaign object:
+Here is an example where we query a particular Campaign object:
```yaml
# examples/salesforce/CampaignMembers-first.recipe.yml
@@ -228,13 +230,16 @@ In general, you can test Snowfakery files outside of CumulusCI to see if they wo
$ snowfakery recipe.yml
```
-If you have a recipe which depends on data from an org, specify the CumulusCI orgname
-like this:
+If you have a recipe which depends on data from an org,
+specify the CumulusCI org name like this:
```s
-$ snowfakery recipe.yml --plugin-options orgname qa
+$ snowfakery recipe.yml --plugin-options org_name qa
```
+When you run the recipe in this way, it will connect to the org to pull data but
+not change data in the org at all.
+
## Record Types
To specify a Record Type for a record, just put the Record Type’s API Name in a field named RecordType.
@@ -271,13 +276,13 @@ You can use Person Accounts like this:
- object: Account
fields:
FirstName:
- fake: first_name
+ fake: FirstName
LastName:
- fake: last_name
+ fake: LastName
PersonMailingStreet:
- fake: street_address
+ fake: StreetAddress
PersonMailingCity:
- fake: city
+ fake: City
PersonContactId:
Salesforce.SpecialObject: PersonContact
```
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
new file mode 100644
index 00000000..c2a1dfda
--- /dev/null
+++ b/docs/stylesheets/extra.css
@@ -0,0 +1,4 @@
+code {
+ white-space : pre-wrap !important;
+ }
+
\ No newline at end of file
diff --git a/examples/faker_in_formula.recipe.yml b/examples/faker_in_formula.recipe.yml
new file mode 100644
index 00000000..7274f3b3
--- /dev/null
+++ b/examples/faker_in_formula.recipe.yml
@@ -0,0 +1,3 @@
+- object: Account
+ fields:
+ Name: ${{fake.State}} State University
diff --git a/examples/parameters.recipe.yml b/examples/parameters.recipe.yml
new file mode 100644
index 00000000..0badf01c
--- /dev/null
+++ b/examples/parameters.recipe.yml
@@ -0,0 +1,3 @@
+- object: Example
+ fields:
+ gibberish_words: ${{fake.Sentence(nb_words=10, variable_nb_words=False)}}
diff --git a/examples/parameters_block.recipe.yml b/examples/parameters_block.recipe.yml
new file mode 100644
index 00000000..15ea0c46
--- /dev/null
+++ b/examples/parameters_block.recipe.yml
@@ -0,0 +1,6 @@
+- object: Example
+ fields:
+ gibberish_words:
+ fake.Sentence:
+ nb_words: 10
+ variable_nb_words: False
diff --git a/examples/salesforce/simple_account.recipe.yml b/examples/salesforce/simple_account.recipe.yml
new file mode 100644
index 00000000..147621f8
--- /dev/null
+++ b/examples/salesforce/simple_account.recipe.yml
@@ -0,0 +1,18 @@
+- object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ BillingStreet:
+ fake: StreetAddress
+ BillingCity:
+ fake: City
+ BillingState:
+ fake: State
+ BillingPostalCode:
+ fake: PostalCode
+ BillingCountry:
+ fake: CurrentCountry
+ Phone:
+ fake: PhoneNumber
diff --git a/examples/salesforce/simple_account_french.recipe.yml b/examples/salesforce/simple_account_french.recipe.yml
new file mode 100644
index 00000000..72dd3104
--- /dev/null
+++ b/examples/salesforce/simple_account_french.recipe.yml
@@ -0,0 +1,20 @@
+- var: snowfakery_locale
+ value: fr_FR
+- object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ BillingStreet:
+ fake: StreetAddress
+ BillingCity:
+ fake: City
+ BillingState:
+ fake: State
+ BillingPostalCode:
+ fake: PostalCode
+ BillingCountry:
+ fake: CurrentCountry
+ Phone:
+ fake: PhoneNumber
diff --git a/examples/salesforce/simple_account_random.recipe.yml b/examples/salesforce/simple_account_random.recipe.yml
new file mode 100644
index 00000000..f1d7ec3d
--- /dev/null
+++ b/examples/salesforce/simple_account_random.recipe.yml
@@ -0,0 +1,26 @@
+- var: snowfakery_locale
+ value:
+ random_choice:
+ - ja_JP # Japanese
+ - en_CA # Canadian English
+ - fr_FR # French from France
+ - fr_CA # Canadian Frencch
+ - de_DE # German from Germany
+- object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ BillingStreet:
+ fake: StreetAddress
+ BillingCity:
+ fake: City
+ BillingState:
+ fake: State
+ BillingPostalCode:
+ fake: PostalCode
+ BillingCountry:
+ fake: CurrentCountry
+ Phone:
+ fake: PhoneNumber
diff --git a/mkdocs.yml b/mkdocs.yml
index 35ac1e9a..b6a1461d 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,3 +1,25 @@
site_name: Snowfakery documentation
+site_url: ""
+use_directory_urls: False
theme: readthedocs
+custom_dir: custom_theme
repo_url: https://github.com/SFDO-Tooling/Snowfakery/
+nav:
+ - index.md
+ - salesforce.md
+ - Fake Data: fakedata.md
+ - Localization: locales.md
+ - embedding.md
+ - extending.md
+extra_css:
+ - stylesheets/extra.css
+markdown_extensions:
+ - toc:
+ permalink: True
+ toc_depth: 4
+plugins:
+ - search
+ - snowfakery_fakes:
+ build_locales: True # do generate locales
+ # set SF_MKDOCS_BUILD_LOCALES to overide
+ # future versions MAY turn off locale-info-building on main branch
diff --git a/requirements/dev.in b/requirements/dev.in
index 9eb9c2b7..42bcbb75 100644
--- a/requirements/dev.in
+++ b/requirements/dev.in
@@ -3,12 +3,10 @@ black
coverage
coveralls
flake8
-myst-parser
-pip-tools
+mkdocs
pre-commit
pytest
pytest-cov
-Sphinx<4 # pin this until Myst is happy with Sphinx 4
typeguard==2.10.0 # do not upgrade until #181 is fixed
faker-microservice
tox
diff --git a/requirements/dev.txt b/requirements/dev.txt
index ceb84fb7..f672e435 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -4,19 +4,13 @@
#
# pip-compile --allow-unsafe requirements/dev.in
#
-alabaster==0.7.12
- # via sphinx
appdirs==1.4.4
# via
# black
# virtualenv
attrs==21.2.0
- # via
- # markdown-it-py
- # pytest
-babel==2.9.1
- # via sphinx
-black==21.5b2
+ # via pytest
+black==21.6b0
# via -r requirements/dev.in
certifi==2021.5.30
# via requests
@@ -28,7 +22,7 @@ click==7.1.2
# via
# -r requirements/prod.txt
# black
- # pip-tools
+ # mkdocs
contextvars==2.4
# via -r requirements/prod.txt
coverage==5.5
@@ -42,13 +36,9 @@ distlib==0.3.2
# via virtualenv
docopt==0.6.2
# via coveralls
-docutils==0.16
- # via
- # myst-parser
- # sphinx
faker-microservice==2.0.0
# via -r requirements/dev.in
-faker==8.6.0
+faker==8.10.0
# via
# -r requirements/prod.txt
# faker-microservice
@@ -58,6 +48,8 @@ filelock==3.0.12
# virtualenv
flake8==3.9.2
# via -r requirements/dev.in
+ghp-import==2.0.1
+ # via mkdocs
greenlet==1.1.0
# via
# -r requirements/prod.txt
@@ -70,17 +62,16 @@ idna==2.10
# via
# requests
# yarl
-imagesize==1.2.0
- # via sphinx
immutables==0.15
# via
# -r requirements/prod.txt
# contextvars
-importlib-metadata==4.5.0
+importlib-metadata==4.6.1
# via
# -r requirements/prod.txt
# flake8
- # pep517
+ # markdown
+ # mkdocs
# pluggy
# pre-commit
# pytest
@@ -92,39 +83,32 @@ iniconfig==1.1.1
jinja2==2.11.3
# via
# -r requirements/prod.txt
- # myst-parser
- # sphinx
-markdown-it-py==1.1.0
- # via
- # mdit-py-plugins
- # myst-parser
+ # mkdocs
+markdown==3.3.4
+ # via mkdocs
markupsafe==2.0.1
# via
# -r requirements/prod.txt
# jinja2
mccabe==0.6.1
# via flake8
-mdit-py-plugins==0.2.8
- # via myst-parser
+mergedeep==1.3.4
+ # via mkdocs
+mkdocs==1.2.1
+ # via -r requirements/dev.in
multidict==5.1.0
# via yarl
mypy-extensions==0.4.3
# via black
-myst-parser==0.14.0
- # via -r requirements/dev.in
nodeenv==1.6.0
# via pre-commit
-packaging==20.9
+packaging==21.0
# via
+ # mkdocs
# pytest
- # sphinx
# tox
pathspec==0.8.1
# via black
-pep517==0.10.0
- # via pip-tools
-pip-tools==6.1.0
- # via -r requirements/dev.in
pluggy==0.13.1
# via
# pytest
@@ -141,8 +125,6 @@ pydantic==1.8.2
# via -r requirements/prod.txt
pyflakes==2.3.1
# via flake8
-pygments==2.9.0
- # via sphinx
pyparsing==2.4.7
# via packaging
pytest-cov==2.12.1
@@ -158,21 +140,22 @@ python-dateutil==2.8.1
# via
# -r requirements/prod.txt
# faker
-pytz==2021.1
- # via babel
+ # ghp-import
+pyyaml-env-tag==0.1
+ # via mkdocs
pyyaml==5.4.1
# via
# -r requirements/prod.txt
- # myst-parser
+ # mkdocs
# pre-commit
+ # pyyaml-env-tag
# vcrpy
-regex==2021.4.4
+regex==2021.7.6
# via black
requests==2.25.1
# via
# coveralls
# responses
- # sphinx
responses==0.13.3
# via -r requirements/dev.in
six==1.16.0
@@ -183,25 +166,7 @@ six==1.16.0
# tox
# vcrpy
# virtualenv
-snowballstemmer==2.1.0
- # via sphinx
-sphinx==3.5.4
- # via
- # -r requirements/dev.in
- # myst-parser
-sphinxcontrib-applehelp==1.0.2
- # via sphinx
-sphinxcontrib-devhelp==1.0.2
- # via sphinx
-sphinxcontrib-htmlhelp==2.0.0
- # via sphinx
-sphinxcontrib-jsmath==1.0.1
- # via sphinx
-sphinxcontrib-qthelp==1.0.3
- # via sphinx
-sphinxcontrib-serializinghtml==1.1.5
- # via sphinx
-sqlalchemy==1.4.17
+sqlalchemy==1.4.20
# via -r requirements/prod.txt
text-unidecode==1.3
# via
@@ -210,7 +175,6 @@ text-unidecode==1.3
toml==0.10.2
# via
# black
- # pep517
# pre-commit
# pytest
# pytest-cov
@@ -230,10 +194,9 @@ typing-extensions==3.10.0.0
# -r requirements/prod.txt
# black
# importlib-metadata
- # markdown-it-py
# pydantic
# yarl
-urllib3==1.26.5
+urllib3==1.26.6
# via
# requests
# responses
@@ -245,18 +208,13 @@ virtualenv==20.4.7
# via
# pre-commit
# tox
+watchdog==2.1.3
+ # via mkdocs
wrapt==1.12.1
# via vcrpy
yarl==1.6.3
# via vcrpy
-zipp==3.4.1
+zipp==3.5.0
# via
# -r requirements/prod.txt
# importlib-metadata
- # pep517
-
-# The following packages are considered to be unsafe in a requirements file:
-pip==21.1.2
- # via pip-tools
-setuptools==57.0.0
- # via sphinx
diff --git a/requirements/prod.txt b/requirements/prod.txt
index 5f916a73..8b18daca 100644
--- a/requirements/prod.txt
+++ b/requirements/prod.txt
@@ -8,7 +8,7 @@ click==7.1.2
# via -r requirements/prod.in
contextvars==2.4
# via -r requirements/prod.in
-faker==8.6.0
+faker==8.10.0
# via -r requirements/prod.in
greenlet==1.1.0
# via sqlalchemy
@@ -16,7 +16,7 @@ gvgen==1.0
# via -r requirements/prod.in
immutables==0.15
# via contextvars
-importlib-metadata==4.5.0
+importlib-metadata==4.6.1
# via sqlalchemy
jinja2==2.11.3
# via -r requirements/prod.in
@@ -32,7 +32,7 @@ pyyaml==5.4.1
# via -r requirements/prod.in
six==1.16.0
# via python-dateutil
-sqlalchemy==1.4.17
+sqlalchemy==1.4.20
# via -r requirements/prod.in
text-unidecode==1.3
# via faker
@@ -41,5 +41,5 @@ typing-extensions==3.10.0.0
# -r requirements/prod.in
# importlib-metadata
# pydantic
-zipp==3.4.1
+zipp==3.5.0
# via importlib-metadata
diff --git a/setup.py b/setup.py
index d90dfa51..7d8e34d6 100644
--- a/setup.py
+++ b/setup.py
@@ -49,7 +49,8 @@ def parse_requirements_file(requirements_file) -> List[str]:
"console_scripts": [
"snowfakery=snowfakery.cli:main",
"snowbench=snowfakery.tools.snowbench:main",
- ]
+ ],
+ "mkdocs.plugins": ["snowfakery_fakes=snowfakery.tools.mkdocs:Plugin"],
},
long_description=long_description,
long_description_content_type="text/markdown",
diff --git a/snowfakery/api.py b/snowfakery/api.py
index 750990f5..5441ad83 100644
--- a/snowfakery/api.py
+++ b/snowfakery/api.py
@@ -43,6 +43,8 @@
file_extensions = tuple(OUTPUT_FORMATS.keys())
+COUNT_REPS = "__REPS__"
+
class SnowfakeryApplication:
"""Base class for all applications which embed Snowfakery as a library,
@@ -50,9 +52,10 @@ class SnowfakeryApplication:
stopping_criteria = None
starting_id = 0
+ rep_count = 0
def __init__(self, stopping_criteria: StoppingCriteria = None):
- self.stopping_criteria = stopping_criteria
+ self.stopping_criteria = stopping_criteria or StoppingCriteria(COUNT_REPS, 1)
def echo(self, message=None, file=None, nl=True, err=False, color=None):
"""Write something to a virtual stdout or stderr.
@@ -72,7 +75,7 @@ def stopping_tablename(self):
This is used by Snowfakery to validate that
the provided recipe will not generate forever
due to a misspelling the stopping tablename."""
- if self.stopping_criteria:
+ if self.stopping_criteria.tablename != COUNT_REPS:
return self.stopping_criteria.tablename
def ensure_progress_was_made(self, id_manager):
@@ -94,11 +97,13 @@ def ensure_progress_was_made(self, id_manager):
def check_if_finished(self, id_manager):
"Check whether we've finished making as many objects as we promised"
# if nobody told us how much to make, finish after first run
- if not self.stopping_criteria:
- return True
+ self.rep_count += 1
target_table, count = self.stopping_criteria
+ if target_table == COUNT_REPS:
+ return self.rep_count >= count
+
# Snowfakery processes can be restarted. We would need
# to keep track of where we restarted to know whether
# we are truly finished
diff --git a/snowfakery/cci_mapping_files/post_processes.py b/snowfakery/cci_mapping_files/post_processes.py
index 7609d41e..9725a82d 100644
--- a/snowfakery/cci_mapping_files/post_processes.py
+++ b/snowfakery/cci_mapping_files/post_processes.py
@@ -2,6 +2,8 @@
def add_after_statements(mappings):
+ """Automatically add CCI after: statements to the lookups
+ in a mapping file"""
indexed_by_sobject = _index_by_sobject(mappings)
for idx, (mapping_name, mapping) in enumerate(mappings.items()):
@@ -16,9 +18,9 @@ def add_after_statements(mappings):
lookup["after"] = target_mapping_index.last_step_name
-class MappingIndex(NamedTuple):
- first_instance: int
- last_step_name: str
+class MappingIndex(NamedTuple): # info needed by the algorithm above
+ first_instance: int # where was the first time this sobj was referenced?
+ last_step_name: str # where was the last (so far)?
def _index_by_sobject(mappings):
diff --git a/snowfakery/cli.py b/snowfakery/cli.py
index 17cece61..e5039b44 100755
--- a/snowfakery/cli.py
+++ b/snowfakery/cli.py
@@ -7,7 +7,7 @@
import click
from snowfakery import version
-from snowfakery.api import file_extensions, generate_data
+from snowfakery.api import file_extensions, generate_data, COUNT_REPS
if __name__ == "__main__": # pragma: no cover
sys.path.append(str(Path(__file__).parent.parent))
@@ -77,10 +77,17 @@ def int_string_tuple(ctx, param, value=None):
)
@click.option(
"--target-number",
+ "--target-count",
nargs=2,
- help="Target options for the recipe YAML in the form of 'number tablename'. For example: '50 Account'.",
+ help="Target record count for the recipe YAML in the form of 'number tablename'. "
+ "For example: '50 Account' to generate roughly 50 accounts.",
callback=int_string_tuple, # noqa https://github.com/pallets/click/issues/789#issuecomment-535121714
)
+@click.option(
+ "--reps",
+ help="Target repetition count for the recipe YAML. Use as an alternative to --target-number",
+ type=int,
+)
@click.option(
"--debug-internals/--no-debug-internals", "debug_internals", default=False
)
@@ -125,6 +132,7 @@ def generate_cli(
option=(),
dburls=(),
target_number=None,
+ reps=None,
debug_internals=None,
generate_cci_mapping_file=None,
output_format=None,
@@ -163,10 +171,15 @@ def generate_cli(
output_format,
output_files,
output_folder,
+ target_number,
+ reps,
)
try:
user_options = dict(option)
plugin_options = dict(plugin_option)
+ if reps:
+ target_number = (COUNT_REPS, reps)
+
generate_data(
yaml_file=yaml_file,
user_options=user_options,
@@ -201,6 +214,8 @@ def validate_options(
output_format,
output_files,
output_folder,
+ target_number,
+ reps,
):
if dburl and output_format:
raise click.ClickException(
@@ -221,6 +236,12 @@ def validate_options(
"--output-folder can only be used with --output-file= or --output-format=csv"
)
+ if target_number and reps:
+ raise click.ClickException(
+ "Sorry, you need to pick --target_number or --reps "
+ "because they are mutually exclusive."
+ )
+
def main():
generate_cli.main(prog_name="snowfakery")
diff --git a/snowfakery/data_generator.py b/snowfakery/data_generator.py
index 468a692c..eab8ccaf 100644
--- a/snowfakery/data_generator.py
+++ b/snowfakery/data_generator.py
@@ -40,7 +40,7 @@ def __init__(self, parse_results, runtime_results):
self.templates = parse_results.templates
self.intertable_dependencies = runtime_results.intertable_dependencies
- def summarize_for_debugging(self):
+ def summarize_for_debugging(self): # pragma: no cover
return self.intertable_dependencies, self.templates
@@ -192,8 +192,8 @@ def process_plugins_options(
) -> Mapping[str, object]:
"""Replace option short names with fully qualified names
and convert types of options.
- e.g. the option name that the user specifies on the CLI or API is just "orgname"
- but we use the long name internally to aavoid clashing with the
+ e.g. the option name that the user specifies on the CLI or API is just "org_name"
+ but we use the long name internally to avoid clashing with the
user's variable names."""
allowed_options = collect_allowed_plugin_options(tuple(plugins.values()))
diff --git a/snowfakery/docs/examples/secret_life_of_pets.yml b/snowfakery/docs/examples/secret_life_of_pets.yml
index 9cb6b17f..ec5e9e82 100644
--- a/snowfakery/docs/examples/secret_life_of_pets.yml
+++ b/snowfakery/docs/examples/secret_life_of_pets.yml
@@ -7,6 +7,6 @@
- object: Animal
fields:
name:
- fake: first_name
+ fake: FirstName
owner:
reference: Person
diff --git a/snowfakery/fakedata/fake_data_generator.py b/snowfakery/fakedata/fake_data_generator.py
index 41802566..066f1137 100644
--- a/snowfakery/fakedata/fake_data_generator.py
+++ b/snowfakery/fakedata/fake_data_generator.py
@@ -1,33 +1,43 @@
from difflib import get_close_matches
-from faker import Faker
import typing as T
+from faker import Faker, Generator
+
class FakeNames(T.NamedTuple):
f: Faker
- def Username(self):
+ def user_name(self):
+ "Salesforce-style username in the form of an email address"
return f"{self.f.first_name()}_{self.f.last_name()}_{self.f.uuid4()}@{self.f.hostname()}"
- def Alias(self):
+ def alias(self):
+ "Salesforce-style 8-character alias"
return self.f.first_name()[0:8]
- def FirstName(self):
- return self.f.first_name()
-
- def LastName(self):
- return self.f.last_name()
-
- def Email(self):
+ def email(self):
+ """Email address using one of the "example" domains"""
return self.f.ascii_safe_email()
- def RealisticMaybeRealEmail(self):
+ def realistic_maybe_real_email(self):
+ """Like fake: email except that the email domain may be real and therefore
+ the email address itself may be real. Use with caution, you might
+ accidentally email strangers!!!
+ """
return self.f.email()
+ def state(self):
+ """Return a state, province or other appropriate administrative unit"""
+ return self.f.administrative_unit()
+
+ def postalcode(self):
+ """Return whatever counts as a postalcode for a particular locale"""
+ return self.f.postcode()
+
# we will use this to exclude Faker's internal book-keeping methods
# from our faker interface
-faker_class_attrs = set(dir(Faker))
+faker_class_attrs = set(dir(Faker)).union((dir(Generator)))
class FakeData:
@@ -35,6 +45,10 @@ class FakeData:
def __init__(self, faker: Faker):
fake_names = FakeNames(faker)
+ self.faker = faker
+
+ def no_underscore_name(name):
+ return name.lower().replace("_", "")
def obj_to_func_list(obj: object, canonicalizer: T.Callable, ignore_list: set):
return {
@@ -49,11 +63,10 @@ def obj_to_func_list(obj: object, canonicalizer: T.Callable, ignore_list: set):
# include snowfakery names defined above
self.fake_names = {
**obj_to_func_list(faker, str.lower, faker_class_attrs),
- **obj_to_func_list(
- faker, lambda x: x.lower().replace("_", ""), faker_class_attrs
- ),
+ **obj_to_func_list(faker, no_underscore_name, faker_class_attrs),
# in case of conflict, snowfakery names "win" over Faker names
**obj_to_func_list(fake_names, str.lower, set()),
+ **obj_to_func_list(fake_names, no_underscore_name, set()),
}
def _get_fake_data(self, origname, *args, **kwargs):
diff --git a/snowfakery/object_rows.py b/snowfakery/object_rows.py
index 574ebd53..c9dc90d1 100644
--- a/snowfakery/object_rows.py
+++ b/snowfakery/object_rows.py
@@ -1,6 +1,7 @@
from enum import Enum, auto
import yaml
+import snowfakery # noqa
from .utils.yaml_utils import SnowfakeryDumper
IdManager = "snowfakery.data_generator_runtime.IdManager"
@@ -49,10 +50,6 @@ def __setstate__(self, state):
for slot, value in state.items():
setattr(self, slot, value)
- @property
- def _name(self):
- return self._values.get("name")
-
class ObjectReference(yaml.YAMLObject):
def __init__(self, tablename, id):
@@ -77,7 +74,7 @@ class NicknameSlot(ObjectReference):
id_manager: IdManager
allocated_id: int = None
- def __init__(self, tablename, id_manager):
+ def __init__(self, tablename: str, id_manager: IdManager):
self._tablename = tablename
self.id_manager = id_manager
diff --git a/snowfakery/output_streams.py b/snowfakery/output_streams.py
index 096a94ca..c95f9a99 100644
--- a/snowfakery/output_streams.py
+++ b/snowfakery/output_streams.py
@@ -10,7 +10,16 @@
from typing import Dict, Union, Optional, Mapping, Callable, Sequence
from warnings import warn
-from sqlalchemy import create_engine, MetaData, Column, Integer, Table, Unicode, func
+from sqlalchemy import (
+ create_engine,
+ MetaData,
+ Column,
+ Integer,
+ Table,
+ Unicode,
+ func,
+ inspect,
+)
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import create_session
from sqlalchemy.engine import Engine
@@ -396,6 +405,7 @@ def close(self, *args, **kwargs):
def create_tables_from_inferred_fields(tables, engine, metadata):
"""Create tables based on dictionary of tables->field-list."""
with engine.connect() as conn:
+ inspector = inspect(engine)
for table_name, table in tables.items():
columns = [Column(field_name, Unicode(255)) for field_name in table.fields]
id_column_as_list = [
@@ -412,7 +422,8 @@ def create_tables_from_inferred_fields(tables, engine, metadata):
)
t = Table(table_name, metadata, id_column, *columns)
- if t.exists():
+
+ if inspector.has_table(table_name):
stmt = select([func.count(t.c.id)])
count = conn.execute(stmt).first()[0]
if count > 0:
diff --git a/snowfakery/standard_plugins/Salesforce.py b/snowfakery/standard_plugins/Salesforce.py
index 66587353..2f833256 100644
--- a/snowfakery/standard_plugins/Salesforce.py
+++ b/snowfakery/standard_plugins/Salesforce.py
@@ -30,10 +30,18 @@
MAX_SALESFORCE_OFFSET = 2000 # Any way around this?
-# the option name that the user specifies on the CLI or API is just "orgname"
+# the option name that the user specifies on the CLI or API is just "org_name"
# but using this long name internally prevents us from clashing with the
# user's variable names.
-plugin_option_name = "snowfakery.standard_plugins.Salesforce.SalesforceQuery.orgname"
+plugin_option_org_name = (
+ "snowfakery.standard_plugins.Salesforce.SalesforceQuery.org_name"
+)
+plugin_option_org_config = (
+ "snowfakery.standard_plugins.Salesforce.SalesforceQuery.org_config"
+)
+plugin_option_project_config = (
+ "snowfakery.standard_plugins.Salesforce.SalesforceQuery.project_config"
+)
class SalesforceConnection:
@@ -41,15 +49,16 @@ class SalesforceConnection:
_sf = None
- def __init__(self, get_orgname):
- self.get_orgname = get_orgname
+ def __init__(self, get_project_config_and_org_config):
+ self.get_project_config_and_org_config = get_project_config_and_org_config
self.logger = getLogger(__name__)
@property
def sf(self):
"""simple_salesforce client"""
if not self._sf:
- self._sf, self._bulk = self._get_sf_clients(self.orgname)
+ project_config, org_config = self.get_project_config_and_org_config()
+ self._sf, self._bulk = self._get_sf_clients(project_config, org_config)
return self._sf
@property
@@ -58,11 +67,6 @@ def bulk(self):
self.sf # initializes self._bulk as a side-effect
return self._bulk
- @property
- def orgname(self):
- """Look up the orgname in the scope"""
- return self.get_orgname()
-
def query(self, *args, **kwargs):
"""Query Salesforce through simple_salesforce"""
return self.sf.query(*args, **kwargs)
@@ -109,50 +113,85 @@ def compose_query(self, context_name, **kwargs):
return query
@staticmethod
- def _get_sf_clients(orgname):
+ def _get_sf_clients(project_config, org_config):
+ from cumulusci.salesforce_api.utils import get_simple_salesforce_connection
- try:
- from cumulusci.cli.runtime import CliRuntime
- from cumulusci.salesforce_api.utils import get_simple_salesforce_connection
+ sf = get_simple_salesforce_connection(project_config, org_config)
+ return sf, _init_bulk(sf, org_config)
- runtime = CliRuntime(load_keychain=True)
- except Exception as e: # pragma: no cover
- raise DataGenError("CumulusCI Runtime cannot be loaded", *e.args)
- name, org_config = runtime.get_org(orgname)
- sf = get_simple_salesforce_connection(runtime.project_config, org_config)
- return sf, SalesforceConnection._init_bulk(sf, org_config)
+def _init_bulk(sf, org_config):
+ from salesforce_bulk import SalesforceBulk
- @staticmethod
- def _init_bulk(sf, org_config):
- from salesforce_bulk import SalesforceBulk
+ return SalesforceBulk(
+ host=org_config.instance_url.replace("https://", "").rstrip("/"),
+ sessionId=org_config.access_token,
+ API_version=sf.sf_version,
+ )
- return SalesforceBulk(
- host=org_config.instance_url.replace("https://", "").rstrip("/"),
- sessionId=org_config.access_token,
- API_version=sf.sf_version,
- )
+
+def check_orgconfig(config):
+ from cumulusci.core.config import BaseConfig
+
+ if isinstance(config, BaseConfig):
+ return config
+ raise TypeError(f"Should be a CCI Config, not {type(config)}")
class SalesforceConnectionMixin:
_sf_connection = None
- allowed_options = [PluginOption(plugin_option_name, str)]
+ _runtime = None
+ allowed_options = [
+ PluginOption(plugin_option_org_name, str),
+ PluginOption(plugin_option_org_config, check_orgconfig),
+ PluginOption(plugin_option_project_config, check_orgconfig),
+ ]
@property
def sf_connection(self):
assert self.context
if not self._sf_connection:
- self._sf_connection = SalesforceConnection(self.get_orgname)
+ self._sf_connection = SalesforceConnection(
+ self.get_project_config_and_org_config
+ )
return self._sf_connection
- def get_orgname(self):
- """Look up the orgname in the scope"""
+ def get_project_config_and_org_config(self):
+ fieldvars = self.context.field_vars()
+ project_config = fieldvars.get(plugin_option_project_config)
+ org_config = fieldvars.get(plugin_option_org_config)
+
+ if not project_config or not org_config:
+ project_config, org_config = self._get_org_info_from_cli_keychain()
+
+ return project_config, org_config
+
+ def _get_org_info_from_cli_keychain(self):
+ org_name = self.get_org_name() # from command line argument
+ runtime = self._get_CliRuntime() # from CCI CliRuntime
+ name, org_config = runtime.get_org(org_name)
+ return runtime.project_config, org_config
+
+ def _get_CliRuntime(self):
+ if self._runtime:
+ return self._runtime # pragma: no cover
+
+ try:
+ from cumulusci.cli.runtime import CliRuntime
+
+ self._runtime = CliRuntime(load_keychain=True)
+ return self._runtime
+ except Exception as e: # pragma: no cover
+ raise DataGenError("CumulusCI Runtime cannot be loaded", *e.args)
+
+ def get_org_name(self):
+ """Look up the org_name in the scope"""
fieldvars = self.context.field_vars()
try:
- return fieldvars[plugin_option_name]
+ return fieldvars[plugin_option_org_name]
except KeyError:
raise DataGenNameError(
- "Orgname is not specified. Use --plugin-option orgname ",
+ "Orgname is not specified. Use --plugin-option org_name ",
None,
None,
)
diff --git a/snowfakery/tools/mkdocs.py b/snowfakery/tools/mkdocs.py
new file mode 100644
index 00000000..fd7edea2
--- /dev/null
+++ b/snowfakery/tools/mkdocs.py
@@ -0,0 +1,29 @@
+import sys
+from pathlib import Path
+from importlib import import_module
+from unittest.mock import patch
+
+from mkdocs.plugins import BasePlugin
+import mkdocs
+
+
+class Plugin(BasePlugin):
+ config_scheme = (
+ ("build_locales", mkdocs.config.config_options.Type(bool, default=False)),
+ )
+
+ def on_config(self, config):
+ """Look for and load main_mkdocs_plugin in tools/faker_docs_utils/mkdocs_plugins.py
+ This bootstrap plugin is needed because that other one is never "installed"
+ It is just present in the repo. So it can't have an official entry point
+ in setup.py.
+ """
+ docs_dir = config["docs_dir"]
+ plugins_dir = Path(docs_dir).parent / "tools/faker_docs_utils/mkdocs_plugins"
+ new_sys_path = [*sys.path, str(plugins_dir)]
+ with patch.object(sys, "path", new_sys_path):
+ module = import_module("main_mkdocs_plugin")
+ main_plugin = module.Plugin()
+ config["plugins"]["main_mkdocs_plugin"] = main_plugin
+ main_plugin.config = self.config
+ main_plugin.on_config(config)
diff --git a/snowfakery/version.txt b/snowfakery/version.txt
index 35d51f33..415b19fc 100644
--- a/snowfakery/version.txt
+++ b/snowfakery/version.txt
@@ -1 +1 @@
-1.12
\ No newline at end of file
+2.0
\ No newline at end of file
diff --git a/tests/salesforce/test_where.recipe.yml b/tests/salesforce/test_where.recipe.yml
index 20bcf05d..88983b6c 100644
--- a/tests/salesforce/test_where.recipe.yml
+++ b/tests/salesforce/test_where.recipe.yml
@@ -1,6 +1,6 @@
# execute this recipe like this:
-# snowfakery tests/salesforce/test_where.recipe.yml --plugin-option orgname qa
+# snowfakery tests/salesforce/test_where.recipe.yml --plugin-option org_name qa
- plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 29e9d217..3afb1807 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -9,7 +9,7 @@
from tests.utils import named_temporary_file_path
import yaml
-from click.exceptions import ClickException
+from click.exceptions import ClickException, BadParameter
from snowfakery.cli import generate_cli, eval_arg, main
from snowfakery.data_gen_exceptions import DataGenError
@@ -179,6 +179,19 @@ def test_from_cli__target_number(self, capsys):
assert len(re.findall(r"Account\(", stdout)) == 5
+ def test_from_cli__reps(self, capsys):
+ generate_cli.main([str(sample_yaml), "--reps", "3"], standalone_mode=False)
+ stdout = capsys.readouterr().out
+
+ assert len(re.findall(r"Account\(", stdout)) == 3
+
+ def test_from_cli__bad_target_number(self):
+ with pytest.raises(BadParameter):
+ generate_cli.main(
+ [str(sample_yaml), "--target-number", "abc", "def"],
+ standalone_mode=False,
+ )
+
def test_from_cli__explicit_format_txt(self, capsys):
with named_temporary_file_path() as t:
generate_cli.main(
@@ -198,7 +211,7 @@ def test_from_cli__explicit_format_txt(self, capsys):
output = f.read()
assert len(re.findall(r"Account\(", output)) == 5
- def test_from_cli__unknown_extension(self, capsys):
+ def test_from_cli__unknown_format(self, capsys):
with pytest.raises(ClickException) as e:
generate_cli.callback(
yaml_file=str(sample_yaml),
@@ -209,6 +222,20 @@ def test_from_cli__unknown_extension(self, capsys):
assert "xyzzy" in str(e.value)
Path("foo.txt").unlink()
+ def test_from_cli__pluggable_output_stream(self):
+ with named_temporary_file_path(suffix=".yml") as t:
+ generate_cli.main(
+ [
+ str(sample_yaml),
+ "--output-format",
+ "examples.YamlOutputStream",
+ "--output-file",
+ t,
+ ],
+ standalone_mode=False,
+ )
+ assert t.exists()
+
def test_from_cli__continuation(self, capsys):
with TemporaryDirectory() as t:
mapping_file_path = Path(t) / "mapping.yml"
@@ -415,6 +442,14 @@ def test_mutually_exclusive(self):
)
assert "apping-file" in str(e.value)
+ def test_mutually_exclusive_targets(self):
+ with pytest.raises(ClickException) as e:
+ generate_cli.main(
+ [str(sample_yaml), "--reps", "50", "--target-count", "Account", "100"],
+ standalone_mode=False,
+ )
+ assert "mutually exclusive" in str(e.value)
+
def test_cli_errors__cannot_infer_output_format(self):
with pytest.raises(ClickException, match="No format supplied"):
with TemporaryDirectory() as t:
diff --git a/tests/test_collections.py b/tests/test_collections.py
new file mode 100644
index 00000000..b312ae36
--- /dev/null
+++ b/tests/test_collections.py
@@ -0,0 +1,155 @@
+from snowfakery.utils.collections import CaseInsensitiveDict
+import pytest
+
+
+# From: https://github.com/psf/requests/blob/05a1a21593c9c8e79393d35fae12c9c27a6f7605/tests/test_requests.py
+class TestCaseInsensitiveDict:
+ @pytest.mark.parametrize(
+ "cid",
+ (
+ CaseInsensitiveDict({"Foo": "foo", "BAr": "bar"}),
+ CaseInsensitiveDict([("Foo", "foo"), ("BAr", "bar")]),
+ CaseInsensitiveDict(FOO="foo", BAr="bar"),
+ ),
+ )
+ def test_init(self, cid):
+ assert len(cid) == 2
+ assert "foo" in cid
+ assert "bar" in cid
+
+ def test_docstring_example(self):
+ cid = CaseInsensitiveDict()
+ cid["Accept"] = "application/json"
+ assert cid["aCCEPT"] == "application/json"
+ assert list(cid) == ["Accept"]
+
+ def test_len(self):
+ cid = CaseInsensitiveDict({"a": "a", "b": "b"})
+ cid["A"] = "a"
+ assert len(cid) == 2
+
+ def test_getitem(self):
+ cid = CaseInsensitiveDict({"Spam": "blueval"})
+ assert cid["spam"] == "blueval"
+ assert cid["SPAM"] == "blueval"
+
+ def test_fixes_649(self):
+ """__setitem__ should behave case-insensitively."""
+ cid = CaseInsensitiveDict()
+ cid["spam"] = "oneval"
+ cid["Spam"] = "twoval"
+ cid["sPAM"] = "redval"
+ cid["SPAM"] = "blueval"
+ assert cid["spam"] == "blueval"
+ assert cid["SPAM"] == "blueval"
+ assert list(cid.keys()) == ["SPAM"]
+
+ def test_delitem(self):
+ cid = CaseInsensitiveDict()
+ cid["Spam"] = "someval"
+ del cid["sPam"]
+ assert "spam" not in cid
+ assert len(cid) == 0
+
+ def test_contains(self):
+ cid = CaseInsensitiveDict()
+ cid["Spam"] = "someval"
+ assert "Spam" in cid
+ assert "spam" in cid
+ assert "SPAM" in cid
+ assert "sPam" in cid
+ assert "notspam" not in cid
+
+ def test_get(self):
+ cid = CaseInsensitiveDict()
+ cid["spam"] = "oneval"
+ cid["SPAM"] = "blueval"
+ assert cid.get("spam") == "blueval"
+ assert cid.get("SPAM") == "blueval"
+ assert cid.get("sPam") == "blueval"
+ assert cid.get("notspam", "default") == "default"
+
+ def test_update(self):
+ cid = CaseInsensitiveDict()
+ cid["spam"] = "blueval"
+ cid.update({"sPam": "notblueval"})
+ assert cid["spam"] == "notblueval"
+ cid = CaseInsensitiveDict({"Foo": "foo", "BAr": "bar"})
+ cid.update({"fOO": "anotherfoo", "bAR": "anotherbar"})
+ assert len(cid) == 2
+ assert cid["foo"] == "anotherfoo"
+ assert cid["bar"] == "anotherbar"
+
+ def test_update_retains_unchanged(self):
+ cid = CaseInsensitiveDict({"foo": "foo", "bar": "bar"})
+ cid.update({"foo": "newfoo"})
+ assert cid["bar"] == "bar"
+
+ def test_iter(self):
+ cid = CaseInsensitiveDict({"Spam": "spam", "Eggs": "eggs"})
+ keys = frozenset(["Spam", "Eggs"])
+ assert frozenset(iter(cid)) == keys
+
+ def test_equality(self):
+ cid = CaseInsensitiveDict({"SPAM": "blueval", "Eggs": "redval"})
+ othercid = CaseInsensitiveDict({"spam": "blueval", "eggs": "redval"})
+ assert cid == othercid
+ del othercid["spam"]
+ assert cid != othercid
+ assert cid == {"spam": "blueval", "eggs": "redval"}
+ assert cid != object()
+
+ def test_setdefault(self):
+ cid = CaseInsensitiveDict({"Spam": "blueval"})
+ assert cid.setdefault("spam", "notblueval") == "blueval"
+ assert cid.setdefault("notspam", "notblueval") == "notblueval"
+
+ def test_lower_items(self):
+ cid = CaseInsensitiveDict(
+ {
+ "Accept": "application/json",
+ "user-Agent": "requests",
+ }
+ )
+ keyset = frozenset(lowerkey for lowerkey, v in cid.lower_items())
+ lowerkeyset = frozenset(["accept", "user-agent"])
+ assert keyset == lowerkeyset
+
+ def test_preserve_key_case(self):
+ cid = CaseInsensitiveDict(
+ {
+ "Accept": "application/json",
+ "user-Agent": "requests",
+ }
+ )
+ keyset = frozenset(["Accept", "user-Agent"])
+ assert frozenset(i[0] for i in cid.items()) == keyset
+ assert frozenset(cid.keys()) == keyset
+ assert frozenset(cid) == keyset
+
+ def test_preserve_last_key_case(self):
+ cid = CaseInsensitiveDict(
+ {
+ "Accept": "application/json",
+ "user-Agent": "requests",
+ }
+ )
+ cid.update({"ACCEPT": "application/json"})
+ cid["USER-AGENT"] = "requests"
+ keyset = frozenset(["ACCEPT", "USER-AGENT"])
+ assert frozenset(i[0] for i in cid.items()) == keyset
+ assert frozenset(cid.keys()) == keyset
+ assert frozenset(cid) == keyset
+
+ def test_copy(self):
+ cid = CaseInsensitiveDict(
+ {
+ "Accept": "application/json",
+ "user-Agent": "requests",
+ }
+ )
+ cid_copy = cid.copy()
+ assert str(cid) == str(cid_copy)
+ assert cid == cid_copy
+ cid["changed"] = True
+ assert cid != cid_copy
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
new file mode 100644
index 00000000..7e67b11a
--- /dev/null
+++ b/tests/test_exceptions.py
@@ -0,0 +1,13 @@
+from snowfakery.data_gen_exceptions import DataGenError
+
+
+class TestExceptions:
+ def test_stringify_DataGenError(self):
+ val = str(DataGenError("Blah", "foo.yml", 25))
+ assert "Blah" in val
+ assert "foo.yml" in val
+ assert "25" in val
+
+ val = str(DataGenError("Blah", "foo.yml"))
+ assert "Blah" in val
+ assert "foo.yml" in val
diff --git a/tests/test_faker.py b/tests/test_faker.py
index af959ea9..3caf6157 100644
--- a/tests/test_faker.py
+++ b/tests/test_faker.py
@@ -207,6 +207,18 @@ def test_error_handling(self, write_row_mock):
assert "xyzzy" in str(e.value)
assert "fake" in str(e.value)
+ @mock.patch(write_row_path)
+ def test_did_you_mean(self, write_row_mock):
+ yaml = """
+ - object: A
+ fields:
+ xyzzy:
+ fake: frst_name
+ """
+ with pytest.raises(exc.DataGenError) as e:
+ generate(StringIO(yaml), {}, None)
+ assert "first_name" in str(e.value)
+
def test_faker_internals_are_invisible(self):
yaml = """
- object: A
diff --git a/tests/test_friends.py b/tests/test_friends.py
new file mode 100644
index 00000000..1aa6eed6
--- /dev/null
+++ b/tests/test_friends.py
@@ -0,0 +1,23 @@
+from io import StringIO
+
+from snowfakery.data_generator import generate
+
+
+class TestFriends:
+ def test_multiple_friends(self, generated_rows):
+ yaml = """
+ - object: Account
+ - object: Account
+ friends:
+ - object: Contact
+ fields:
+ AccountId:
+ reference: Account
+ - object: Contact
+ fields:
+ AccountId:
+ reference: Account
+ """
+ generate(StringIO(yaml), {})
+ assert generated_rows.table_values("Contact", 0, "AccountId") == "Account(2)"
+ assert generated_rows.table_values("Contact", 1, "AccountId") == "Account(2)"
diff --git a/tests/test_generate_mapping.py b/tests/test_generate_mapping.py
index b774f80d..80d1f2a8 100644
--- a/tests/test_generate_mapping.py
+++ b/tests/test_generate_mapping.py
@@ -10,6 +10,7 @@
_table_is_free,
)
from snowfakery.data_generator_runtime import Dependency
+from snowfakery.cci_mapping_files.post_processes import add_after_statements
from snowfakery import data_gen_exceptions as exc
@@ -296,6 +297,24 @@ def test_incomplete_record_types(self, tmpdir, generate_in_tmpdir):
assert mapping["Insert Case"]["fields"]["RecordTypeId"] == "recordtype"
+class TestAddAfterStatements:
+ def test_add_after_statements(self):
+ mappings = {
+ "Insert Child": {
+ "fields": {},
+ "lookups": {"parent": {"key_field": "parent", "table": "Parent"}},
+ "sf_object": "Child",
+ "table": "Child",
+ },
+ "Insert Parent": {"fields": {}, "sf_object": "Parent", "table": "Parent"},
+ "Insert Parent 2": {"fields": {}, "sf_object": "Parent", "table": "Parent"},
+ }
+ add_after_statements(mappings)
+ assert (
+ mappings["Insert Child"]["lookups"]["parent"]["after"] == "Insert Parent 2"
+ )
+
+
class TestPersonAccounts:
@skip_if_cumulusci_missing
def test_basic_person_accounts(self, generate_in_tmpdir):
diff --git a/tests/test_object_rows.py b/tests/test_object_rows.py
new file mode 100644
index 00000000..faf5ce66
--- /dev/null
+++ b/tests/test_object_rows.py
@@ -0,0 +1,9 @@
+from unittest.mock import Mock
+
+from snowfakery.object_rows import NicknameSlot
+
+
+class TestNicknameSlot:
+ def test_repr(self):
+ nns = NicknameSlot("Account", Mock())
+ assert "Account" in repr(nns)
diff --git a/tests/test_output_streams.py b/tests/test_output_streams.py
index e4d1b9d0..b4b0b313 100644
--- a/tests/test_output_streams.py
+++ b/tests/test_output_streams.py
@@ -10,6 +10,8 @@
import pytest
+from click.exceptions import ClickException
+
from sqlalchemy import create_engine
from snowfakery.output_streams import (
@@ -367,3 +369,9 @@ def test_external_output_stream_yaml(self):
"""
print(x.getvalue())
assert x.getvalue() == expected
+
+ def test_external_output_stream__failure(self):
+ with pytest.raises(ClickException, match="no.such.output.Stream"):
+ generate_cli.callback(
+ yaml_file=sample_yaml, output_format="no.such.output.Stream"
+ )
diff --git a/tests/test_salesforce_gen.py b/tests/test_salesforce_gen.py
index 4a698ec5..d5fabaaa 100644
--- a/tests/test_salesforce_gen.py
+++ b/tests/test_salesforce_gen.py
@@ -39,5 +39,5 @@ def test_profile_id(self, generated_rows, org_config):
ProfileId:
Salesforce.ProfileId: Identity User
"""
- generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name})
assert generated_rows.table_values("foo", 0, "ProfileId").startswith("00e")
diff --git a/tests/test_with_cci.py b/tests/test_with_cci.py
index 4bf58e81..a31893d2 100644
--- a/tests/test_with_cci.py
+++ b/tests/test_with_cci.py
@@ -13,6 +13,7 @@
from snowfakery.data_generator import generate
from snowfakery.data_gen_exceptions import DataGenError
from snowfakery import generate_data
+from snowfakery.standard_plugins import Salesforce
try:
import cumulusci
@@ -29,11 +30,6 @@
sample_yaml = Path(__file__).parent / "include_parent.yml"
-skip_if_cumulusci_missing = pytest.mark.skipif(
- not hasattr(cumulusci, "api"), reason="CumulusCI not installed"
-)
-
-
class Test_CLI_CCI:
# @skip_if_cumulusci_missing
def test_mapping_file(self):
@@ -76,12 +72,12 @@ def query(self, query: str):
)
+@patch(
+ "snowfakery.standard_plugins.Salesforce.SalesforceConnection.sf",
+ wraps=fake_sf_client,
+)
+@patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 5)
class TestSOQLNoCCI:
- @patch(
- "snowfakery.standard_plugins.Salesforce.SalesforceConnection.sf",
- wraps=fake_sf_client,
- )
- @patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 5)
def test_soql_plugin_random(self, fake_sf_client, generated_rows):
yaml = """
- plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery
@@ -92,7 +88,7 @@ def test_soql_plugin_random(self, fake_sf_client, generated_rows):
AccountId:
SalesforceQuery.random_record: Account
"""
- generate(StringIO(yaml), plugin_options={"orgname": "blah"})
+ generate(StringIO(yaml), plugin_options={"org_name": "blah"})
assert fake_sf_client.mock_calls
assert generated_rows.row_values(0, "AccountId") == "FAKEID5"
@@ -113,13 +109,8 @@ def test_soql_plugin_no_query_from(self, fake_sf_client, generated_rows):
where: Name='Foo'
"""
with pytest.raises(DataGenError, match="Must supply 'from:'"):
- generate(StringIO(yaml), plugin_options={"orgname": "blah"})
+ generate(StringIO(yaml), plugin_options={"org_name": "blah"})
- @patch(
- "snowfakery.standard_plugins.Salesforce.SalesforceConnection.sf",
- wraps=fake_sf_client,
- )
- @patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 5)
def test_soql_plugin_record(self, fake_sf_client, generated_rows):
yaml = """
- plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery
@@ -130,16 +121,54 @@ def test_soql_plugin_record(self, fake_sf_client, generated_rows):
AccountId:
SalesforceQuery.find_record: Account
"""
- generate(StringIO(yaml), plugin_options={"orgname": "blah"})
+ generate(StringIO(yaml), plugin_options={"org_name": "blah"})
assert fake_sf_client.mock_calls
assert generated_rows.row_values(0, "AccountId") == "FAKEID0"
+ def test_soql_plugin_random__orgname_long(self, fake_sf_client, generated_rows):
+ yaml = """
+ - plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery
+ - object: Contact
+ fields:
+ FirstName: Suzy
+ LastName: Salesforce
+ AccountId:
+ SalesforceQuery.random_record: Account
+ """
+ plugin_option_name = (
+ "snowfakery.standard_plugins.Salesforce.SalesforceQuery.org_name"
+ )
+ generate(StringIO(yaml), plugin_options={plugin_option_name: "blah"})
+ assert fake_sf_client.mock_calls
+ assert generated_rows.row_values(0, "AccountId") == "FAKEID5"
+
+
+class TestCCIError:
+ def test_pretend_cci_not_available(self):
+ filename = (
+ Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml"
+ )
+ with unittest.mock.patch(
+ "snowfakery.standard_plugins.Salesforce.SalesforceConnectionMixin._get_CliRuntime"
+ ) as conn:
+ conn.side_effect = ImportError("CumulusCI Runtime cannot be loaded")
+ with pytest.raises(Exception, match="CumulusCI Runtime cannot be loaded"):
+ generate_data(filename, plugin_options={"org_name": "None"})
+
+ @pytest.mark.skipif(cumulusci, reason="CCI is installed")
+ def test_cci_really_not_available(self):
+ filename = (
+ Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml"
+ )
+ with pytest.raises(Exception, match="CumulusCI Runtime cannot be loaded"):
+ generate_data(filename, plugin_options={"org_name": "None"})
+
@skip_if_cumulusci_missing
class TestSOQLWithCCI:
@patch("snowfakery.standard_plugins.Salesforce.randrange", lambda *arg, **kwargs: 0)
@pytest.mark.vcr()
- def test_soql(self, sf, org_config, generated_rows):
+ def test_soql(self, sf, org_config, project_config, generated_rows):
yaml = """
- plugin: snowfakery.standard_plugins.Salesforce.SalesforceQuery
- object: Contact
@@ -157,7 +186,10 @@ def test_soql(self, sf, org_config, generated_rows):
"""
assert org_config.name
sf.Account.create({"Name": "Company"})
- generate(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate(
+ StringIO(yaml),
+ plugin_options={"org_config": org_config, "project_config": project_config},
+ )
assert len(generated_rows.mock_calls) == 2
@pytest.mark.vcr()
@@ -204,22 +236,9 @@ def test_example_through_api(self, sf, generated_rows, org_config):
filename = (
Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml"
)
- generate_data(filename, plugin_options={"orgname": org_config.name})
+ generate_data(filename, plugin_options={"org_name": org_config.name})
assert generated_rows.mock_calls
- def test_cci_not_available(self):
- filename = (
- Path(__file__).parent.parent / "examples/salesforce_soql_example.recipe.yml"
- )
- with unittest.mock.patch(
- "snowfakery.standard_plugins.Salesforce.SalesforceConnection._get_sf_clients"
- ) as conn:
- conn.side_effect = ImportError(
- "cumulusci module cannot be loaded by snowfakery"
- )
- with pytest.raises(Exception, match="cumulusci module cannot be loaded"):
- generate_data(filename, plugin_options={"orgname": "None"})
-
@pytest.mark.vcr()
def test_find_records_returns_nothing(self, org_config):
yaml = """
@@ -232,7 +251,7 @@ def test_find_records_returns_nothing(self, org_config):
SalesforceQuery.find_record: Contract
"""
with pytest.raises(DataGenError, match="No records returned"):
- generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name})
@pytest.mark.vcr()
def test_find_records_returns_multiple(self, org_config, sf, generated_rows):
@@ -245,7 +264,7 @@ def test_find_records_returns_multiple(self, org_config, sf, generated_rows):
AccountId:
SalesforceQuery.find_record: User
"""
- generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name})
first_user_id = sf.query("select Id from User")["records"][0]["Id"]
assert generated_rows.mock_calls[0][1][1]["AccountId"] == first_user_id
@@ -258,7 +277,7 @@ def test_soql_dataset_shuffled(self, sf, org_config, generated_rows):
Path(__file__).parent.parent / "examples/soql_dataset_shuffled.recipe.yml"
)
- generate_data(filename, plugin_options={"orgname": org_config.name})
+ generate_data(filename, plugin_options={"org_name": org_config.name})
assert len(generated_rows.mock_calls) == 10
for mock_call in generated_rows.mock_calls:
@@ -279,7 +298,7 @@ def test_soql_dataset_shuffled(self, sf, org_config, generated_rows):
def test_soql_dataset_in_order(self, sf, org_config, generated_rows):
filename = Path(__file__).parent.parent / "examples/soql_dataset.recipe.yml"
- generate_data(filename, plugin_options={"orgname": org_config.name})
+ generate_data(filename, plugin_options={"org_name": org_config.name})
assert len(generated_rows.mock_calls) == 10
for mock_call in generated_rows.mock_calls:
@@ -307,7 +326,7 @@ def test_soql_dataset_where(self, sf, org_config, generated_rows):
Path(__file__).parent.parent / "examples/soql_dataset_where.recipe.yml"
)
- generate_data(filename, plugin_options={"orgname": org_config.name})
+ generate_data(filename, plugin_options={"org_name": org_config.name})
assert len(generated_rows.mock_calls) == 10
for mock_call in generated_rows.mock_calls:
@@ -347,7 +366,7 @@ def download_file(*args, **kwargs):
download_file,
)
with pretend_5000, do_not_really_download:
- generate_data(filename, plugin_options={"orgname": org_config.name})
+ generate_data(filename, plugin_options={"org_name": org_config.name})
assert len(generated_rows.mock_calls) == 10
@@ -378,7 +397,7 @@ def test_dataset_bad_query(self, org_config, sf, generated_rows):
from: Xyzzy
"""
with pytest.raises(DataGenError, match="Xyzzy"):
- generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name})
@pytest.mark.vcr()
@patch(
@@ -399,7 +418,7 @@ def test_dataset_bad_query_bulk(self, restful, org_config):
from: Account
"""
with pytest.raises(DataGenError, match="No such column 'Xyzzy' on entity"):
- generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name})
def test_dataset_no_fields(self, org_config, sf, generated_rows):
yaml = """
@@ -412,7 +431,7 @@ def test_dataset_no_fields(self, org_config, sf, generated_rows):
junk: Junk2
"""
with pytest.raises(DataGenError, match="SOQLDataset needs a 'fields' list"):
- generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name})
def test_dataset_no_from(self, org_config, sf, generated_rows):
yaml = """
@@ -425,4 +444,8 @@ def test_dataset_no_from(self, org_config, sf, generated_rows):
fields: Junk3
"""
with pytest.raises(DataGenError, match="SOQLDataset needs a 'from'"):
- generate_data(StringIO(yaml), plugin_options={"orgname": org_config.name})
+ generate_data(StringIO(yaml), plugin_options={"org_name": org_config.name})
+
+ def test_config_type_error(self):
+ with pytest.raises(TypeError):
+ Salesforce.check_orgconfig(None)
diff --git a/tools/faker_docs.py b/tools/faker_docs.py
new file mode 100644
index 00000000..0e63067e
--- /dev/null
+++ b/tools/faker_docs.py
@@ -0,0 +1,12 @@
+from tools.faker_docs_utils.faker_markdown import generate_markdown_for_fakers
+
+
+outfile = "docs/fakedata/en_US.md"
+with open(outfile, "w") as o:
+ generate_markdown_for_fakers(o, "en_US")
+
+outfile = "docs/fakedata/fr_FR.md"
+with open(outfile, "w") as o:
+ generate_markdown_for_fakers(o, "fr_FR")
+
+print("DONE", outfile)
diff --git a/tools/faker_docs_utils/docs_config.yml b/tools/faker_docs_utils/docs_config.yml
new file mode 100644
index 00000000..23f60a6d
--- /dev/null
+++ b/tools/faker_docs_utils/docs_config.yml
@@ -0,0 +1,199 @@
+common_fakes:
+ company:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ last_name:
+ example:
+ - object: Contact
+ fields:
+ FirstName:
+ fake: FirstName
+ LastName:
+ fake: LastName
+ - object: Contact
+ fields:
+ FirstName: ${{fake.FirstName}} Sam
+ LastName: ${{fake.FirstName}} Senior
+ first_name:
+ example:
+ - object: Contact
+ fields:
+ FirstName:
+ fake: FirstName
+ LastName:
+ fake: LastName
+ - object: Contact
+ fields:
+ FirstName: ${{fake.FirstName}} Sam
+ LastName: ${{fake.LastName}}-Jones
+ first_name_female:
+ example:
+ - object: Contact
+ fields:
+ FirstName:
+ fake: FirstNameFemale
+ LastName:
+ fake: LastName
+ - object: Contact
+ fields:
+ FirstName: ${{fake.FirstNameFemale}} Beth
+ LastName: ${{fake.LastName}}-Jones
+ first_name_male:
+ example:
+ - object: Contact
+ fields:
+ FirstName:
+ fake: FirstNameMale
+ LastName:
+ fake: LastName
+ - object: Contact
+ fields:
+ FirstName: ${{fake.FirstNameMale}} Beth
+ LastName: ${{fake.LastName}}-Jones
+ first_name_nonbinary:
+ example:
+ - object: Contact
+ fields:
+ FirstName:
+ fake: FirstNameNonBinary
+ LastName:
+ fake: LastName
+ - object: Contact
+ fields:
+ FirstName: ${{fake.FirstNameNonBinary}} Mary-John
+ LastName: ${{fake.LastName}}-Jones
+ catch_phrase:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ Description: "Our Motto: ${{fake.CatchPhrase}}"
+ year:
+ example:
+ - object: Student
+ fields:
+ GraduationYear:
+ fake: year
+ - object: Student
+ fields:
+ GraduationYear: ${{year}}
+ email:
+ example:
+ - object: Contact
+ fields:
+ FirstName:
+ fake: FirstName
+ LastName:
+ fake: LastName
+ Email:
+ fake: Email
+ - object: Contact
+ fields:
+ FirstName: ${{fake.FirstName}} Sam
+ LastName: ${{fake.FirstName}} Senior
+ Email: ${{fake.Email}}
+ phone_number:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ Phone:
+ fake: PhoneNumber
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ Phone: ${{fake.PhoneNumber}} ext. 23
+ city:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ BillingCity:
+ fake: City
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ BillingCity: ${{fake.City}}
+ state:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ BillingState:
+ fake: State
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ BillingState: ${{fake.State}}
+ postalcode:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ BillingPostalCode:
+ fake: postalcode
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ BillingPostalCode: ${{fake.Postalcode}}
+ street_address:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ BillingStreet:
+ fake: StreetAddress
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ BillingStreet: ${{fake.StreetAddress}}
+ country:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ BillingCountry:
+ fake: Country
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ BillingCountry: ${{fake.Country}}
+ current_country:
+ example:
+ - object: Account
+ fields:
+ Name:
+ fake: Company
+ BillingCountry:
+ fake: CurrentCountry
+ - object: Account
+ fields:
+ Name: ${{fake.Company}} Holdings
+ BillingCountry: ${{fake.CurrentCountry}}
+ time:
+ paragraph:
+ word:
+ sentence:
+ text:
+uncommon_fakes:
+ building_number:
+ country_code:
+ current_country_code:
diff --git a/tools/faker_docs_utils/docstring.py b/tools/faker_docs_utils/docstring.py
new file mode 100644
index 00000000..db48dddc
--- /dev/null
+++ b/tools/faker_docs_utils/docstring.py
@@ -0,0 +1,243 @@
+# Based on https://github.com/joke2k/faker/blob/2dac486e6d3b5f018feb524f6fa19917ec10299e/faker/sphinx/docstring.py
+# Copied under the provisions of the MIT License
+
+# Search for "snowfakery" to find optimizations we've made.
+
+# coding=utf-8
+import inspect
+import logging
+import re
+
+from collections import namedtuple
+
+from faker import Faker
+from faker.config import AVAILABLE_LOCALES, DEFAULT_LOCALE
+from .validator import SampleCodeValidator
+
+logger = logging.getLogger(__name__)
+_fake = Faker(AVAILABLE_LOCALES)
+_base_provider_method_pattern = re.compile(
+ r"^faker\.providers\.BaseProvider\.(?P\w+)$"
+)
+_standard_provider_method_pattern = re.compile(
+ r"^faker\.providers\.\w+\.Provider\.(?P\w+)$"
+)
+_locale_provider_method_pattern = re.compile(
+ r"^faker\.providers\.\w+"
+ r"\.(?P[a-z]{2,3}_[A-Z]{2})"
+ r"\.Provider"
+ r"\.(?P\w+)$",
+)
+_sample_line_pattern = re.compile(
+ r"^:sample"
+ r"(?: size=(?P[1-9][0-9]*))?"
+ r"(?: seed=(?P[0-9]+))?"
+ r":"
+ r"(?: ?(?P.*))?$",
+)
+_command_template = "generator.{method}({kwargs})"
+_sample_output_template = (
+ ">>> Faker.seed({seed})\n"
+ ">>> for _ in range({size}):\n"
+ "... fake.{method}({kwargs})\n"
+ "...\n"
+ "{results}\n\n"
+)
+
+DEFAULT_SAMPLE_SIZE = 5
+DEFAULT_SEED = 0
+Sample = namedtuple("Sample", ["size", "seed", "kwargs"])
+
+
+class ProviderMethodDocstring:
+ """
+ Class that preprocesses provider method docstrings to generate sample usage and output
+
+ Notes on how samples are generated:
+ - If the docstring belongs to a standard provider method, sample usage and output will be
+ generated using a `Faker` object in the `DEFAULT_LOCALE`.
+ - If the docstring belongs to a localized provider method, the correct locale will be used.
+ - If the docstring does not belong to any provider method, docstring preprocessing will be skipped.
+ - Docstring lines will be parsed for potential sample sections, and the generation details of each
+ sample section will internally be represented as a ``Sample`` namedtuple.
+ - Each ``Sample`` will have info on the keyword arguments to pass to the provider method, how many
+ times the provider method will be called, and the initial seed value to ``Faker.seed()``.
+ """
+
+ def __init__(self, app, what, name, obj, options, lines):
+ self._line_iter = iter(lines)
+ self._parsed_lines = []
+ self._samples = []
+ self._skipped = True
+ self._log_prefix = f"{inspect.getfile(obj)}:docstring of {name}: WARNING:"
+
+ if what != "method":
+ return
+
+ base_provider_method_match = _base_provider_method_pattern.match(name)
+ locale_provider_method_match = _locale_provider_method_pattern.match(name)
+ standard_provider_method_match = _standard_provider_method_pattern.match(name)
+ if base_provider_method_match:
+ groupdict = base_provider_method_match.groupdict()
+ self._method = groupdict["method"]
+ self._locale = DEFAULT_LOCALE
+ elif standard_provider_method_match:
+ groupdict = standard_provider_method_match.groupdict()
+ self._method = groupdict["method"]
+ self._locale = DEFAULT_LOCALE
+ elif locale_provider_method_match:
+ groupdict = locale_provider_method_match.groupdict()
+ self._method = groupdict["method"]
+ self._locale = groupdict["locale"]
+ else:
+ return
+
+ self._skipped = False
+ self._parse()
+ self._generate_samples()
+
+ def _log_warning(self, warning):
+ logger.warning(f"{self._log_prefix} {warning}")
+
+ def _parse(self):
+ while True:
+ try:
+ line = next(self._line_iter)
+ except StopIteration:
+ break
+ else:
+ self._parse_section(line)
+
+ def _parse_section(self, section):
+ # No-op if section does not look like the start of a sample section
+ if not section.startswith(":sample"):
+ self._parsed_lines.append(section)
+ return
+
+ try:
+ next_line = next(self._line_iter)
+ except StopIteration:
+ # No more lines left to consume, so save current sample section
+ self._process_sample_section(section)
+ return
+
+ # Next line is the start of a new sample section, so process
+ # current sample section, and start parsing the new section
+ if next_line.startswith(":sample"):
+ self._process_sample_section(section)
+ self._parse_section(next_line)
+
+ # Next line is an empty line indicating the end of
+ # current sample section, so process current section
+ elif next_line == "":
+ self._process_sample_section(section)
+
+ # Section is assumed to be multiline, so continue
+ # adding lines to current sample section
+ else:
+ section = section + next_line
+ self._parse_section(section)
+
+ def _process_sample_section(self, section):
+ match = _sample_line_pattern.match(section)
+
+ # Discard sample section if malformed
+ if not match:
+ msg = f"The section `{section}` is malformed and will be discarded."
+ self._log_warning(msg)
+ return
+
+ # Set sample generation defaults and do some beautification if necessary
+ groupdict = match.groupdict()
+ size = groupdict.get("size")
+ seed = groupdict.get("seed")
+ kwargs = groupdict.get("kwargs")
+ size = max(int(size), DEFAULT_SAMPLE_SIZE) if size else DEFAULT_SAMPLE_SIZE
+ seed = int(seed) if seed else DEFAULT_SEED
+ kwargs = self._beautify_kwargs(kwargs) if kwargs else ""
+
+ # Store sample generation details
+ sample = Sample(size, seed, kwargs)
+ self._samples.append(sample)
+
+ def _beautify_kwargs(self, kwargs):
+ def _repl_whitespace(match):
+ quoted = match.group(1) or match.group(2)
+ return quoted if quoted else ""
+
+ def _repl_comma(match):
+ quoted = match.group(1) or match.group(2)
+ return quoted if quoted else ", "
+
+ # First, remove all whitespaces and tabs not within quotes
+ result = re.sub(r'("[^"]*")|(\'[^\']*\')|[ \t]+', _repl_whitespace, kwargs)
+
+ # Next, insert a whitespace after each comma not within quotes
+ result = re.sub(r'("[^"]*")|(\'[^\']*\')|,', _repl_comma, result)
+
+ # Then return the result with all leading and trailing whitespaces stripped
+ return result.strip()
+
+ def _stringify_result(self, value):
+ return repr(value)
+
+ def _generate_eval_scope(self):
+ from collections import (
+ OrderedDict,
+ ) # noqa: F401 Do not remove! The eval command needs this reference.
+
+ return {
+ "generator": _fake[self._locale],
+ "OrderedDict": OrderedDict,
+ }
+
+ def _inject_default_sample_section(self):
+ default_sample = Sample(DEFAULT_SAMPLE_SIZE, DEFAULT_SEED, "")
+ self._samples.append(default_sample)
+
+ def _generate_samples(self):
+ if not self._samples:
+ self._inject_default_sample_section()
+
+ output = ""
+ for sample in self._samples:
+ command = _command_template.format(
+ method=self._method, kwargs=sample.kwargs
+ )
+ validator = SampleCodeValidator(command)
+ if validator.errors:
+ msg = (
+ f"Invalid code elements detected. Sample generation will be "
+ f"skipped for method `{self._method}` with arguments `{sample.kwargs}`."
+ )
+ self._log_warning(msg)
+ continue
+
+ try:
+ Faker.seed(sample.seed)
+ # optimization for the Snowfakery context
+ results = ""
+ except Exception as e:
+ msg = f"Sample generation failed for method `{self._method}` with arguments `{sample.kwargs}`: {e}."
+ self._log_warning(msg)
+ continue
+ else:
+ output += _sample_output_template.format(
+ seed=sample.seed,
+ method=self._method,
+ kwargs=sample.kwargs,
+ size=sample.size,
+ results=results,
+ )
+
+ if output:
+ output = ":examples:\n\n" + output
+ self._parsed_lines.extend(output.split("\n"))
+
+ @property
+ def skipped(self):
+ return self._skipped
+
+ @property
+ def lines(self):
+ return self._parsed_lines
diff --git a/tools/faker_docs_utils/fakedata_header_full.md b/tools/faker_docs_utils/fakedata_header_full.md
new file mode 100644
index 00000000..c00f5b53
--- /dev/null
+++ b/tools/faker_docs_utils/fakedata_header_full.md
@@ -0,0 +1,201 @@
+# Fake data
+
+##### Overview
+
+Fake data comes in a few different flavours. Let's start with the
+most common pattern:
+
+```yaml
+# examples/salesforce/simple_account.recipe.yml
+- object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ BillingStreet:
+ fake: StreetAddress
+ BillingCity:
+ fake: City
+ BillingState:
+ fake: State
+ BillingPostalCode:
+ fake: PostalCode
+ BillingCountry:
+ fake: CurrentCountry
+ Phone:
+ fake: PhoneNumber
+```
+
+So the first obvious question is where you find these names. The answer
+is you can scroll down on this page to see a long list with descriptions.
+
+The description above might generate output like this:
+
+```json
+Account(id=1, Name=Nelson-Deleon, Description=Secured bandwidth-monitored moratorium, BillingStreet=2187 Kerry Way, BillingCity=Rangelland, BillingState=Colorado, BillingPostalCode=08388, BillingCountry=United States, Phone=001-738-530-9719)
+```
+
+It doesn't matter if you use upper or lower case for fake names.
+
+##### Formulas
+
+Sometimes you might want to combine the fake data with other data
+in a single field. You can use formula syntaax for this.
+
+```yaml
+# examples/faker_in_formula.recipe.yml
+- object: Account
+ fields:
+ Name: ${{fake.State}} State University
+```
+
+Some complex faker definitions can also use parameters. The
+documentation says what parameters are allowed. The docs
+for [fake: sentence](#fake-sentence) define `nb_words` and
+`variable_nb_words`, for example.
+
+```yaml
+# examples/parameters.recipe.yml
+- object: Example
+ fields:
+ gibberish_words: ${{fake.Sentence(nb_words=10, variable_nb_words=False)}}
+```
+
+##### Block fakers with parameters
+
+If you'd rather not use the formula syntax (${{ blah }}) there is also
+a nested syntax for that:
+
+```yaml
+# examples/parameters_block.recipe.yml
+- object: Example
+ fields:
+ gibberish_words:
+ fake.Sentence:
+ nb_words: 10
+ variable_nb_words: False
+```
+
+##### Localization
+
+Our fake data can be localized to many languages. We have
+[detailed docs](https://snowfakery.readthedocs.io/en/feature-fake-data-docs/locales.html)
+about how to use fake data in each of the other languages.
+
+The default locale is `en_US`: United Statesian Engish.
+
+Let's say that you want to generate fake data for France instead of the
+United States.
+
+You do so by setting the special `snowfakery_locale` "variable" like this.
+
+```yaml
+# examples/salesforce/simple_account_french.recipe.yml
+
+- var: snowfakery_locale
+ value: fr_FR
+- object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ BillingStreet:
+ fake: StreetAddress
+ BillingCity:
+ fake: City
+ BillingState:
+ fake: State
+ BillingPostalCode:
+ fake: PostalCode
+ BillingCountry:
+ fake: CurrentCountry
+ Phone:
+ fake: PhoneNumber
+```
+
+This will translate the State to the appropriate administrative unit in
+France. `CurrentCountry` will be France, not `United States`. The Catch
+Phrase will be in French and so forth.
+
+For example:
+
+```json
+Account(id=1, Name=Parent Auger S.A.S., Description=Le confort de rouler de manière sûre, BillingStreet=54, rue de Bailly, BillingCity=Charrier, BillingState=Île-de-France, BillingPostalCode=72902, BillingCountry=France, Phone=08 05 11 90 19)
+```
+
+We can do many countries. For example, Japanese (ja_JP locale):
+
+```json
+Account(id=1, Name=有限会社山下電気, Description=Inverse 24hour pricing structure, BillingStreet=040 佐々木 Street, BillingCity=横浜市金沢区, BillingState=福岡県, BillingPostalCode=181-5538, BillingCountry=Japan, Phone=070-4156-5072)
+```
+
+We can even pick the locale randomly:
+
+```yaml
+# examples/salesforce/simple_account_random.recipe.yml
+- var: snowfakery_locale
+ value:
+ random_choice:
+ - ja_JP # Japanese
+ - en_CA # Canadian English
+ - fr_FR # French from France
+ - fr_CA # Canadian French
+ - de_DE # German from Germany
+- object: Account
+ fields:
+ Name:
+ fake: Company
+ Description:
+ fake: CatchPhrase
+ BillingStreet:
+ fake: StreetAddress
+ BillingCity:
+ fake: City
+ BillingState:
+ fake: State
+ BillingPostalCode:
+ fake: PostalCode
+ BillingCountry:
+ fake: CurrentCountry
+ Phone:
+ fake: PhoneNumber
+```
+
+##### Fake Dates and Numbers
+
+The main Snowfakery documentation describes how to fake
+[dates](index.md#date-between) and [numbers](index.md#random-number).
+
+That's it. Those are all of the concepts you need.
+
+##### Custom Faker Providers
+
+You can also include Faker extension libraries ("Providers") after
+you’ve added them to your Python install:
+
+```yaml
+ - plugin: faker_microservice.Provider
+ - object: OBJ
+ fields:
+ service_name:
+ fake:
+ microservice
+```
+
+You would install that provider like this:
+
+```s
+$ pip install faker_microservice
+```
+
+Here are some Python Faker providers:
+
+
+
+And you could make your own providers as well. Aaron Crossman
+has written [a tutorial](https://spinningcode.org/2021/06/snowfakery-custom-plugins-part-2/)
+about that process.
+
+## Index of Fake Datatypes
diff --git a/tools/faker_docs_utils/fakedata_header_short.md b/tools/faker_docs_utils/fakedata_header_short.md
new file mode 100644
index 00000000..a68c3427
--- /dev/null
+++ b/tools/faker_docs_utils/fakedata_header_short.md
@@ -0,0 +1,10 @@
+# Fake Data: {language} as spoken in {current_country} ({locale})
+
+The basic concepts of fake data are described in
+the [main tutorial](../fakedata.md#fake-data).
+
+Our fake data can be localized to many languages. We have
+[detailed docs](https://snowfakery.readthedocs.io/en/feature-fake-data-docs/locales.html)
+about the other languages.
+
+Current Locale: {locale} ({current_country})
diff --git a/tools/faker_docs_utils/faker_markdown.py b/tools/faker_docs_utils/faker_markdown.py
new file mode 100644
index 00000000..87dd2c8e
--- /dev/null
+++ b/tools/faker_docs_utils/faker_markdown.py
@@ -0,0 +1,193 @@
+import re
+from functools import lru_cache
+from pathlib import Path
+import typing as T
+
+from yaml import dump as yaml_dump
+from faker import Faker
+from faker.config import AVAILABLE_LOCALES
+from tools.faker_docs_utils.format_samples import (
+ yaml_samples_for_docstring,
+ snowfakery_output_for,
+)
+from .summarize_fakers import summarize_all_fakers
+from .language_codes import language_codes
+
+from snowfakery.fakedata.fake_data_generator import FakeData
+
+_RE_COMBINE_WHITESPACE = re.compile(r"(?<=^) +", re.MULTILINE)
+_RE_STRIP_SAMPLES = re.compile(r"^\s*:sample:.*$", re.MULTILINE)
+_COMMENT_LINES_THAT_LOOK_LIKE_TITLES = re.compile(r"^#", re.MULTILINE)
+
+non_countries = ("fr_QC", "ar_AA")
+AVAILABLE_LOCALES = [
+ locale
+ for locale in AVAILABLE_LOCALES
+ if locale not in non_countries and "_" in locale
+]
+
+
+def cleanup_docstring(my_str):
+ "Clean up a docstring to remove Faker-doc weirdness and excesss whitespace"
+ my_str = _RE_COMBINE_WHITESPACE.sub("", my_str)
+ my_str = _RE_STRIP_SAMPLES.sub("", my_str).strip()
+ my_str = _COMMENT_LINES_THAT_LOOK_LIKE_TITLES.sub(" #", my_str)
+ my_str = my_str.replace(":example", "\nExample:")
+ my_str = my_str.replace(":param", "\nParam:")
+ my_str = my_str.replace(":return", "\nReturn:")
+ return my_str
+
+
+@lru_cache(maxsize=1000)
+def country_for_locale(locale: str):
+ f = Faker(locale)
+ return f.current_country()
+
+
+def locales_as_markdown_links(current_locale: str, locale_list: T.List[str]):
+ "Generate a list of Markdown locale links"
+
+ def format_link(locale: str):
+ try:
+ country_name = country_for_locale(locale)
+ except (ValueError, AttributeError):
+ return None
+ language = language_codes[locale.split("_")[0]]
+ link_text = f"{language} as spoken in {country_name}: ({locale})"
+ return f" - [{link_text}](fakedata/{locale}.md)\n"
+
+ other_locales = [locale for locale in locale_list if locale != current_locale]
+ links = [format_link(locale) for locale in other_locales]
+ return " ".join(link for link in links if link)
+
+
+standard_header = (Path(__file__).parent / "fakedata_header_short.md").read_text()
+
+
+def generate_markdown_for_fakers(outfile, locale: str, header: str = standard_header):
+ "Generate the Markdown page for a locale"
+ faker = Faker(locale)
+ language = language_codes[locale.split("_")[0]]
+ fd = FakeData(faker)
+
+ all_fakers = summarize_all_fakers(fd)
+
+ def output(*args, **kwargs):
+ print(*args, **kwargs, file=outfile)
+
+ head_md = header.format(
+ locale=locale, current_country=faker.current_country(), language=language
+ )
+ output(
+ head_md,
+ )
+
+ output("[TOC]\n")
+
+ output("## Commonly Used\n")
+ output_fakers_in_categories(output, [f for f in all_fakers if f.common], "", locale)
+ output("## Rarely Used\n")
+ output_fakers_in_categories(
+ output, [f for f in all_fakers if not f.common], "", locale
+ )
+
+
+def output_fakers_in_categories(output, fakers, common: str, locale):
+ """Sort fakers into named categores and then output them"""
+ categorized = categorize(fakers)
+ for category_name, fakers in categorized.items():
+ output(f"### {category_name.title()} Fakers\n")
+ for faker in fakers:
+ output_faker(faker.name, faker, output, locale)
+
+
+def categorize(fakers):
+ "Sort fakers based on their categories (what module they came from)"
+ categories = {}
+ for fakerdata in fakers:
+ category = fakerdata.category
+ categories.setdefault(category, [])
+ categories[category].append(fakerdata)
+ return {name: value for name, value in sorted(categories.items())}
+
+
+def gather_samples(name, data, locale):
+ if data.sample: # I already have a sample, no need to generate one
+ if locale and locale != "en_US":
+ locale_header = [{"var": "snowfakery_locale", "value": locale}]
+ sample = locale_header + data.sample
+ else:
+ sample = data.sample
+ example = yaml_dump(sample, sort_keys=False)
+ samples = [snowfakery_output_for(data.name, example, example)]
+ else: # need to generate a sample from scratch
+ samples = yaml_samples_for_docstring(name, data.fullname, data.doc, locale)
+ return list(filter(None, samples))
+
+
+def output_faker(name: str, data: str, output: callable, locale: str):
+ """Output the data relating to a particular faker"""
+ samples = gather_samples(name, data, locale)
+ # if there isn't at least one sample, don't publish
+ if not samples:
+ return
+
+ output(f"#### fake: {name}\n")
+ cleaned_docstring = cleanup_docstring(data.doc)
+ if cleaned_docstring:
+ output(cleaned_docstring)
+ output()
+
+ output("Aliases: ", ", ".join(data.aliases))
+ output()
+ link = f"[{data.source}]({data.url}) : {data.fullname}"
+ output("Source:", link)
+
+ if samples:
+ output()
+ for sample in samples:
+ yaml, out = sample
+
+ output("Recipe:\n")
+ output(indent(yaml))
+ output("Outputs:\n")
+ output(indent(out))
+ else:
+ output()
+
+
+def indent(yaml: str):
+ """Add indents to yaml"""
+ lines = yaml.split("\n")
+
+ def prefix(line):
+ return " " if line.strip() else ""
+
+ lines = [prefix(line) + line for line in lines]
+ return "\n".join(lines)
+
+
+def generate_markdown_for_all_locales(path: Path, locales=None):
+ "Generate markdown file for each listed locale. None means all locales"
+ locales = locales or AVAILABLE_LOCALES
+ for locale in locales:
+ with Path(path, f"{locale}.md").open("w") as f:
+ print(f.name)
+ generate_markdown_for_fakers(f, locale)
+
+
+def generate_locales_index(path: Path, locales_list: T.List[str]):
+ "Generate markdown index including listed locales. None means all locales"
+ locales_list = locales_list or AVAILABLE_LOCALES
+ with Path(path).open("w") as outfile:
+
+ def output(*args, **kwargs):
+ print(*args, **kwargs, file=outfile)
+
+ locales = locales_as_markdown_links(None, locales_list)
+ if locales:
+ output("## Fake Data Locales\n")
+ output(
+ "Learn more about Snowfakery localization in the [Fake Data Tutorial](fakedata.md#localization)\n"
+ )
+ output(locales)
diff --git a/tools/faker_docs_utils/format_samples.py b/tools/faker_docs_utils/format_samples.py
new file mode 100644
index 00000000..2ddb4b68
--- /dev/null
+++ b/tools/faker_docs_utils/format_samples.py
@@ -0,0 +1,170 @@
+import ast
+import yaml
+
+from io import StringIO
+from collections import OrderedDict
+from unittest.mock import MagicMock
+
+from snowfakery import generate_data
+
+from . import docstring
+
+# known code gen issues. ignore them.
+IGNORE_ERRORS = set(("uuid4", "randomchoices", "randomelement", "randomelements"))
+
+
+def samples_from_docstring(fullname, docstring_data):
+ """Convert a Faker-style docstring into a Snowfaery sample"""
+ lines = docstring_data.split("\n")
+ lines = [line.strip() for line in lines]
+ docstrings = docstring.ProviderMethodDocstring(
+ app=MagicMock(),
+ what="method",
+ name=fullname,
+ obj=MagicMock,
+ options=MagicMock(),
+ lines=lines,
+ )
+ return docstrings._samples
+
+
+def simplify(arg):
+ """Simplify Faker arg-types. e.g. tuples become lists. OrdereDicts become dicts"""
+ fieldname = arg._fields[0]
+ out = getattr(arg, fieldname)
+
+ # primitives are fine
+ if isinstance(out, (str, int, float, bool)):
+ return out
+
+ # simplify tuples to lists, and simplify the contents
+ if isinstance(out, (list, tuple)):
+ args = [simplify(a) for a in out]
+ return type(out)(args)
+
+ # simplify OrderedDicts to dicts, and simplify the contents
+ if isinstance(out, (OrderedDict, dict)):
+ return {name: simplify(value) for name, value in dict(out).items()}
+ raise TypeError(type(out), out)
+
+
+def extract_keywords(kwargstr):
+ """Reverse engineer the params from a Snowfakery faker by using the Python parser"""
+ fake_python = f"Func({kwargstr})"
+ tree = ast.parse(fake_python, mode="eval")
+ kwds = {arg.arg: simplify(arg.value) for arg in tree.body.keywords}
+ return kwds
+
+
+def reformat_yaml(yaml_data):
+ """Normalize YAML to a common format"""
+ data = yaml.safe_load(yaml_data)
+ return yaml.dump(data, sort_keys=False)
+
+
+def yaml_samples_for_docstring_sample(name, sample, locale):
+ """Try to generate Snowfakery input and output for a faker."""
+ try:
+ return _yaml_samples_for_docstring_sample_inner(name, sample, locale)
+ except Exception as e:
+ print("Cannot generate sample from docstring", sample, str(e)[0:100])
+ raise e
+
+
+def _yaml_samples_for_docstring_sample_inner(name, sample, locale):
+ """Try to generate Snowfakery input and output for a faker."""
+ try:
+ kwds = extract_keywords(sample.kwargs)
+ except Exception as e:
+ if name.lower() not in IGNORE_ERRORS:
+ IGNORE_ERRORS.add(name.lower())
+ print("Cannot extract keywords", name, sample, str(e)[0:100])
+ return None
+
+ name = name.split(".")[-1]
+ return yaml_sample(name, kwds, sample.kwargs, locale)
+
+
+def yaml_sample(name, kwds, kw_example, locale):
+ """Generate Snowfakery yaml input and output"""
+ if kwds:
+ inline_example = f"fake.{name}({kw_example})"
+ block_example = {f"fake.{name}": kwds}
+ else:
+ inline_example = f"fake.{name}"
+ block_example = {"fake": name}
+
+ inline_example = "${{" + inline_example + "}}"
+
+ if ":" in inline_example:
+ inline_example = f'"{inline_example}"'
+
+ single_part_example = f"""
+ - object: SomeObject
+ fields:
+ formula_field_example: {inline_example}"""
+
+ if locale:
+ locale_decl = f"""
+ - var: snowfakery_locale
+ value: {locale}
+ """
+ single_part_example = locale_decl + single_part_example
+ try:
+ two_part_example = (
+ single_part_example
+ + f"""
+ block_field_example: {block_example}"""
+ )
+
+ two_part_example = reformat_yaml(two_part_example)
+ single_part_example = reformat_yaml(single_part_example)
+ except Exception as e:
+ print("CANNOT PARSE")
+ print(two_part_example, single_part_example)
+ print(str(e)[0:100])
+ raise
+
+ return snowfakery_output_for(name, two_part_example, single_part_example)
+
+
+def snowfakery_output_for(name, primary_example, secondary_example):
+ """Generate the Snowfakery output for some YAML
+
+ Attempt to generate a two-part example, but fall back to single
+ or nothing if worse comes to worst."""
+ output = None
+ exception = None
+
+ for yaml_data in [primary_example, secondary_example]:
+ with StringIO() as s:
+ try:
+ generate_data(StringIO(yaml_data), output_file=s, output_format="txt")
+ output = s.getvalue()
+ exception = None
+ except Exception as e:
+ exception = e
+
+ if exception and name.lower() not in IGNORE_ERRORS:
+ print(f"Cannot generate sample for {name}: {str(exception)[0:80]}")
+ IGNORE_ERRORS.add(name.lower())
+
+ if output:
+ return yaml_data, output
+
+
+def default_yaml_sample(name, locale):
+ return yaml_sample(name, None, None, locale)
+
+
+def yaml_samples_for_docstring(name, fullname, docstring_data, locale=None):
+ """Generate example for all samples associated wth a docstring"""
+ sample_objs = samples_from_docstring(fullname, docstring_data)
+
+ output = [
+ yaml_samples_for_docstring_sample(name, sample, locale)
+ for sample in sample_objs
+ ]
+ if not output:
+ output = [default_yaml_sample(name, locale)]
+ return output
diff --git a/tools/faker_docs_utils/language_codes.py b/tools/faker_docs_utils/language_codes.py
new file mode 100644
index 00000000..2eda9969
--- /dev/null
+++ b/tools/faker_docs_utils/language_codes.py
@@ -0,0 +1,190 @@
+language_codes = dict(
+ (
+ ("ab", "Abkhaz"),
+ ("aa", "Afar"),
+ ("af", "Afrikaans"),
+ ("ak", "Akan"),
+ ("sq", "Albanian"),
+ ("am", "Amharic"),
+ ("ar", "Arabic"),
+ ("an", "Aragonese"),
+ ("hy", "Armenian"),
+ ("as", "Assamese"),
+ ("av", "Avaric"),
+ ("ae", "Avestan"),
+ ("ay", "Aymara"),
+ ("az", "Azerbaijani"),
+ ("bm", "Bambara"),
+ ("ba", "Bashkir"),
+ ("eu", "Basque"),
+ ("be", "Belarusian"),
+ ("bn", "Bengali"),
+ ("bh", "Bihari"),
+ ("bi", "Bislama"),
+ ("bs", "Bosnian"),
+ ("br", "Breton"),
+ ("bg", "Bulgarian"),
+ ("my", "Burmese"),
+ ("ca", "Catalan; Valencian"),
+ ("ch", "Chamorro"),
+ ("ce", "Chechen"),
+ ("ny", "Chichewa; Chewa; Nyanja"),
+ ("zh", "Chinese"),
+ ("cv", "Chuvash"),
+ ("kw", "Cornish"),
+ ("co", "Corsican"),
+ ("cr", "Cree"),
+ ("hr", "Croatian"),
+ ("cs", "Czech"),
+ ("da", "Danish"),
+ ("dk", "Danish"), # wrong
+ ("dv", "Divehi; Maldivian;"),
+ ("nl", "Dutch"),
+ ("dz", "Dzongkha"),
+ ("en", "English"),
+ ("eo", "Esperanto"),
+ ("et", "Estonian"),
+ ("ee", "Ewe"),
+ ("fo", "Faroese"),
+ ("fj", "Fijian"),
+ ("fi", "Finnish"),
+ ("fr", "French"),
+ ("ff", "Fula"),
+ ("gl", "Galician"),
+ ("ka", "Georgian"),
+ ("de", "German"),
+ ("el", "Greek, Modern"),
+ ("gn", "Guaraní"),
+ ("gu", "Gujarati"),
+ ("ht", "Haitian"),
+ ("ha", "Hausa"),
+ ("he", "Hebrew (modern)"),
+ ("hz", "Herero"),
+ ("hi", "Hindi"),
+ ("ho", "Hiri Motu"),
+ ("hu", "Hungarian"),
+ ("ia", "Interlingua"),
+ ("id", "Indonesian"),
+ ("ie", "Interlingue"),
+ ("ga", "Irish"),
+ ("ig", "Igbo"),
+ ("ik", "Inupiaq"),
+ ("io", "Ido"),
+ ("is", "Icelandic"),
+ ("it", "Italian"),
+ ("iu", "Inuktitut"),
+ ("ja", "Japanese"),
+ ("jv", "Javanese"),
+ ("kl", "Kalaallisut"),
+ ("kn", "Kannada"),
+ ("kr", "Kanuri"),
+ ("ks", "Kashmiri"),
+ ("kk", "Kazakh"),
+ ("km", "Khmer"),
+ ("ki", "Kikuyu, Gikuyu"),
+ ("rw", "Kinyarwanda"),
+ ("ky", "Kirghiz, Kyrgyz"),
+ ("kv", "Komi"),
+ ("kg", "Kongo"),
+ ("ko", "Korean"),
+ ("ku", "Kurdish"),
+ ("kj", "Kwanyama, Kuanyama"),
+ ("la", "Latin"),
+ ("lb", "Luxembourgish"),
+ ("lg", "Luganda"),
+ ("li", "Limburgish"),
+ ("ln", "Lingala"),
+ ("lo", "Lao"),
+ ("lt", "Lithuanian"),
+ ("lu", "Luba-Katanga"),
+ ("lv", "Latvian"),
+ ("gv", "Manx"),
+ ("mk", "Macedonian"),
+ ("mg", "Malagasy"),
+ ("ms", "Malay"),
+ ("ml", "Malayalam"),
+ ("mt", "Maltese"),
+ ("mi", "Māori"),
+ ("mr", "Marathi (Marāṭhī)"),
+ ("mh", "Marshallese"),
+ ("mn", "Mongolian"),
+ ("na", "Nauru"),
+ ("nv", "Navajo, Navaho"),
+ ("nb", "Norwegian Bokmål"),
+ ("nd", "North Ndebele"),
+ ("ne", "Nepali"),
+ ("ng", "Ndonga"),
+ ("nn", "Norwegian Nynorsk"),
+ ("no", "Norwegian"),
+ ("ii", "Nuosu"),
+ ("nr", "South Ndebele"),
+ ("oc", "Occitan"),
+ ("oj", "Ojibwe, Ojibwa"),
+ ("cu", "Old Church Slavonic"),
+ ("om", "Oromo"),
+ ("or", "Oriya"),
+ ("os", "Ossetian, Ossetic"),
+ ("pa", "Panjabi, Punjabi"),
+ ("pi", "Pāli"),
+ ("fa", "Persian"),
+ ("fil", "Filipino"), # Not a real language code
+ ("pl", "Polish"),
+ ("ps", "Pashto, Pushto"),
+ ("pt", "Portuguese"),
+ ("qu", "Quechua"),
+ ("rm", "Romansh"),
+ ("rn", "Kirundi"),
+ ("ro", "Romanian, Moldavan"),
+ ("ru", "Russian"),
+ ("sa", "Sanskrit (Saṁskṛta)"),
+ ("sc", "Sardinian"),
+ ("sd", "Sindhi"),
+ ("se", "Northern Sami"),
+ ("sm", "Samoan"),
+ ("sg", "Sango"),
+ ("sr", "Serbian"),
+ ("gd", "Scottish Gaelic"),
+ ("sn", "Shona"),
+ ("si", "Sinhala, Sinhalese"),
+ ("sk", "Slovak"),
+ ("sl", "Slovene"),
+ ("so", "Somali"),
+ ("st", "Southern Sotho"),
+ ("es", "Spanish; Castilian"),
+ ("su", "Sundanese"),
+ ("sw", "Swahili"),
+ ("ss", "Swati"),
+ ("sv", "Swedish"),
+ ("ta", "Tamil"),
+ ("te", "Telugu"),
+ ("tg", "Tajik"),
+ ("th", "Thai"),
+ ("ti", "Tigrinya"),
+ ("bo", "Tibetan"),
+ ("tk", "Turkmen"),
+ ("tl", "Tagalog"),
+ ("tn", "Tswana"),
+ ("to", "Tonga"),
+ ("tr", "Turkish"),
+ ("ts", "Tsonga"),
+ ("tt", "Tatar"),
+ ("tw", "Twi"),
+ ("ty", "Tahitian"),
+ ("ug", "Uighur, Uyghur"),
+ ("uk", "Ukrainian"),
+ ("ur", "Urdu"),
+ ("uz", "Uzbek"),
+ ("ve", "Venda"),
+ ("vi", "Vietnamese"),
+ ("vo", "Volapük"),
+ ("wa", "Walloon"),
+ ("cy", "Welsh"),
+ ("wo", "Wolof"),
+ ("fy", "Western Frisian"),
+ ("xh", "Xhosa"),
+ ("yi", "Yiddish"),
+ ("yo", "Yoruba"),
+ ("za", "Zhuang, Chuang"),
+ ("zu", "Zulu"),
+ )
+)
diff --git a/tools/faker_docs_utils/mkdocs_plugins/main_mkdocs_plugin.py b/tools/faker_docs_utils/mkdocs_plugins/main_mkdocs_plugin.py
new file mode 100644
index 00000000..aba8fd48
--- /dev/null
+++ b/tools/faker_docs_utils/mkdocs_plugins/main_mkdocs_plugin.py
@@ -0,0 +1,71 @@
+from pathlib import Path
+import sys
+import os
+from unittest.mock import patch
+from functools import lru_cache
+from logging import Logger
+
+from mkdocs.plugins import BasePlugin
+from faker.factory import Factory
+
+
+class Plugin(BasePlugin):
+ def on_config(self, config):
+ pass
+
+ def on_pre_build(self, config):
+ root_dir = Path(__file__).parent.parent.parent.parent
+ faker_docs_dir = root_dir / "docs/fakedata"
+ faker_docs_dir.mkdir(exist_ok=True)
+ new_sys_path = [*sys.path, str(root_dir)]
+ print("Note: Hiding warnings during docs build")
+
+ # make modules available
+ sys_path_patch = patch.object(sys, "path", new_sys_path)
+ warning = Logger.warning
+
+ irritating_warning = "Numbers generated by this method are purely hypothetical."
+
+ def new_warning(self, *args, **kwargs):
+ if args == (irritating_warning,):
+ return
+ else:
+ warning(self, *args, **kwargs)
+
+ logger_patch = patch("logging.Logger.warning", new=new_warning)
+
+ # speed up a critical function
+ lru_patch = patch(
+ "faker.factory.Factory._get_provider_class",
+ lru_cache(maxsize=10_000)(Factory._get_provider_class),
+ )
+
+ with sys_path_patch, lru_patch, logger_patch:
+ from tools.faker_docs_utils.faker_markdown import (
+ generate_markdown_for_all_locales,
+ generate_markdown_for_fakers,
+ generate_locales_index,
+ )
+
+ fakerdocs_md_header = (
+ root_dir / "tools/faker_docs_utils/fakedata_header_full.md"
+ )
+ main_header = Path(fakerdocs_md_header).read_text()
+ fakerdocs_md = root_dir / "docs/fakedata.md"
+ with fakerdocs_md.open("w") as f:
+ generate_markdown_for_fakers(f, "en_US", main_header)
+
+ build_locales_env = os.environ.get(
+ "SF_MKDOCS_BUILD_LOCALES"
+ ) or self.config.get("build_locales", None)
+ if build_locales_env == "False":
+ locales_list = ["en_US", "fr_FR"]
+ elif build_locales_env in (True, "True", None):
+ locales_list = None # means "all"
+ elif isinstance(build_locales_env, str):
+ locales_list = build_locales_env.split(",")
+ else:
+ assert 0, f"Unexpected build_locales_env {build_locales_env}"
+
+ generate_markdown_for_all_locales(faker_docs_dir, locales_list)
+ generate_locales_index("docs/locales.md", locales_list)
diff --git a/tools/faker_docs_utils/summarize_fakers.py b/tools/faker_docs_utils/summarize_fakers.py
new file mode 100644
index 00000000..cb005c29
--- /dev/null
+++ b/tools/faker_docs_utils/summarize_fakers.py
@@ -0,0 +1,82 @@
+from pathlib import Path
+import types
+import typing as T
+
+import yaml
+
+
+class FakerInfo(T.NamedTuple):
+ name: str
+ fullname: str
+ aliases: T.List[str]
+ url: str
+ source: str
+ category: str
+ doc: str
+ common: bool
+ sample: str
+
+
+def summarize_all_fakers(faker) -> T.Sequence[FakerInfo]:
+ """Summarize information about all fakers"""
+ from snowfakery.utils.collections import CaseInsensitiveDict
+
+ # get config info that can override samples etc.
+ with (Path(__file__).parent / "docs_config.yml").open() as f:
+ yaml_data = yaml.safe_load(f)
+ common_fakes = yaml_data["common_fakes"]
+ uncommon_fakes = yaml_data["uncommon_fakes"]
+
+ faker_infos = CaseInsensitiveDict()
+ for name, meth in faker.fake_names.items():
+ if not isinstance(meth, types.MethodType):
+ continue
+ # python magic to introspect classnames, filenames, etc.
+ friendly = _to_camel_case(name)
+ func = meth.__func__
+ doc = func.__doc__
+ filename = func.__code__.co_filename
+ cls = meth.__self__.__class__
+ fullname = cls.__module__ + "." + cls.__name__ + "." + meth.__name__
+ overrides = common_fakes.get(meth.__name__) or uncommon_fakes.get(meth.__name__)
+ is_common = meth.__name__ in common_fakes
+
+ # if it came from Faker
+ if "/faker/" in filename:
+ source = "faker"
+ idx = filename.find("/faker/")
+ url = "https://github.com/joke2k/faker/tree/master" + filename[idx:]
+ parts = filename.split("/")
+ while parts[-1] in ("__init__.py", "en_US"):
+ del parts[-1]
+ category = parts[-1]
+ else: # if it came from Snowfakery
+ source = "snowfakery"
+ idx = filename.find("/snowfakery/")
+ url = (
+ "https://github.com/SFDO-Tooling/Snowfakery/tree/main" + filename[idx:]
+ )
+ category = "Salesforce"
+
+ faker_info = faker_infos.setdefault(
+ friendly,
+ FakerInfo(
+ friendly,
+ fullname,
+ [],
+ url,
+ source,
+ category,
+ doc or "",
+ is_common,
+ overrides.get("example") if overrides else None,
+ ),
+ )
+ faker_info.aliases.append(name)
+
+ return faker_infos.values()
+
+
+def _to_camel_case(snake_str):
+ components = snake_str.split("_")
+ return "".join(x.title() for x in components)
diff --git a/tools/faker_docs_utils/validator.py b/tools/faker_docs_utils/validator.py
new file mode 100644
index 00000000..7cd87bb0
--- /dev/null
+++ b/tools/faker_docs_utils/validator.py
@@ -0,0 +1,154 @@
+# Based on https://github.com/joke2k/faker/blob/2dac486e6d3b5f018feb524f6fa19917ec10299e/faker/sphinx/validator.py
+# Copied under the provisions of the MIT License
+
+# coding=utf-8
+import ast
+import traceback
+
+from collections import OrderedDict
+
+
+class SampleCodeValidator(ast.NodeVisitor):
+ """
+ Class that checks if a string is a valid and "safe" Python expression
+
+ What is considered "safe" for this class is limited to the context of generating
+ provider method sample code and output for documentation purposes. The end goal
+ is to pass a command string to `eval()` should the string pass the validation
+ performed by this class.
+
+ The main assumption this class will make is that the command string passed during
+ class instantiation will always be in the form "{generator}.{method}({arguments})".
+ In said form, {generator} is a `Generator` object variable that already exists
+ within the scope where `eval()` will be called, {method} will be the provider
+ method name which is also available within the `eval()` scope, and {arguments}
+ will be sample arguments parsed from docstrings. This means that {arguments} can
+ potentially be used as a vector for code injection.
+
+ In order to neuter the impact of code injection, the following validation steps
+ will be applied:
+
+ - The command string is parsed using 'eval' mode, meaning expressions only.
+ - The command string can only have whitelisted code elements. See `_whitelisted_nodes`.
+ - The command string can only have one instance of variable access.
+ - The command string can only have one instance of attribute access.
+ - The command string can only have one instance of a function/method call.
+ - The argument values in the command string can only be literals.
+ - The only literals allowed are numbers (integers, floats, or complex numbers),
+ strings (but not f-strings), bytes, lists, tuples, sets, dictionaries, True,
+ False, and None.
+
+ There is, however, an exception. In order to accommodate sample code with custom
+ probability distribution, variable access to `OrderedDict` will not count against
+ the maximum limit of variable access, and invoking `OrderedDict` constructor calls
+ will not count against the maximum limit of function/method calls. In order to
+ neuter the impact of code injection, please ensure that `OrderedDict` refers to
+ the standard library's `collections.OrderedDict` within the `eval()` scope before
+ passing the command string to `eval()` for execution. This can be done in code review.
+ """
+
+ _whitelisted_nodes = (
+ # Code elements related to function calls and variable and attribute access
+ ast.Expression,
+ ast.Call,
+ ast.Attribute,
+ ast.Name,
+ ast.Load,
+ ast.keyword,
+ # Code elements representing whitelisted literals
+ ast.Num,
+ ast.Str,
+ ast.Bytes,
+ ast.List,
+ ast.Tuple,
+ ast.Set,
+ ast.Dict,
+ ast.NameConstant,
+ )
+
+ _max_function_call_count = 1
+ _max_attribute_access_count = 1
+ _max_variable_access_count = 1
+
+ def __init__(self, command):
+ self._errors = set()
+ self._function_call_count = 0
+ self._attribute_access_count = 0
+ self._variable_access_count = 0
+ self._command = command
+
+ try:
+ self._tree = ast.parse(command, mode="eval")
+ except (SyntaxError, ValueError):
+ self._log_error(traceback.format_exc())
+ else:
+ self._validate()
+
+ @property
+ def errors(self):
+ return self._errors
+
+ def _is_whitelisted(self, node):
+ return isinstance(node, self._whitelisted_nodes)
+
+ def _log_error(self, msg):
+ self._errors.add(msg)
+
+ def _validate(self):
+ self.visit(self._tree)
+
+ def _is_node_using_ordereddict(self, node):
+ is_valid = False
+
+ # If instance of function call, check if it is a call to the OrderedDict constructor
+ if isinstance(node, ast.Call):
+ is_valid = self._is_node_using_ordereddict(node.func)
+
+ # If instance of variable access, check if it is
+ elif isinstance(node, ast.Name) and node.id == OrderedDict.__name__:
+ is_valid = True
+
+ return is_valid
+
+ def visit(self, node):
+ # Check if code element type is allowed
+ if not self._is_whitelisted(node):
+ msg = "Code element `%s` is not allowed." % node.__class__.__name__
+ self._log_error(msg)
+
+ return super().visit(node)
+
+ def visit_Call(self, node):
+ if not self._is_node_using_ordereddict(node):
+ # There can only be one instance of a function call
+ if self._function_call_count < self._max_function_call_count:
+ self._function_call_count += 1
+ else:
+ msg = "There can only be one instance of a function/method call."
+ self._log_error(msg)
+
+ # Proceed to child nodes
+ self.generic_visit(node)
+
+ def visit_Attribute(self, node):
+ # There can only be one instance of attribute access
+ if self._attribute_access_count < self._max_attribute_access_count:
+ self._attribute_access_count += 1
+ else:
+ msg = "There can only be one instance of attribute access."
+ self._log_error(msg)
+
+ # Proceed to child nodes
+ self.generic_visit(node)
+
+ def visit_Name(self, node):
+ if not self._is_node_using_ordereddict(node):
+ # There can only be one instance of variable access
+ if self._variable_access_count < self._max_variable_access_count:
+ self._variable_access_count += 1
+ else:
+ msg = "There can only be one instance of variable access."
+ self._log_error(msg)
+
+ # Proceed to child nodes
+ self.generic_visit(node)