Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/frontend #45

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions doc/schema/parameter.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"type": "object",
"title": "The parameter input file",
"description": "Parameters passed to the synthesis method, and the utility and privacy metrics",
"required": [
"dataset",
"synth-method",
"num_samples_to_fit",
"num_samples_to_synthesize",
"num_datasets_to_synthesize",
"random_state",
"disclosure_risk",
"sklearn_classifiers"
],
"dependencies": {
"synth-method": {
"oneOf": [
{
"required": ["sgf"],
"properties": {
"synth-method": { "enum": ["sgf"] },
"sgf": {
"type": "object",
"title": "SGF parameters",
"description": "Parameters for the SGF synthesis method",
"required": [
"gamma",
"omega",
"ncomp",
"ndist",
"k",
"epsilon0",
"tinc"
],
"properties": {
"gamma": {
"type": "number",
"title": "gamma",
"description": "Privacy parameter which controls how the plausible seeds for a record are determined.",
"exclusiveMinimum": 1.0
},
"omega": {
"type": "integer",
"title": "omega",
"description": "The number of resampled attributes is N - omega",
"minimum": 0
},
"ncomp": {
"enum": ["adv", "seq"],
"title": "Composition strategy",
"description": "",
"default": "adv"
},
"ndist": {
"enum": ["none", "lap", "geom"],
"title": "Noise distribution",
"description": "The noise distribution of the generative model: 'none', 'lap' (Laplacian) or 'geom' (Geometric)."
},
"k": {
"type": "integer",
"title": "k",
"description": "Minimum number of plausible seeds that a synthetic data point needs to have to be released.",
"minimum": 1
},
"epsilon0": {
"type": "number",
"title": "epsilon0",
"description": "epsilon-Differential privacy parameter"
},
"tinc": {
"type": "integer",
"title": "tinc",
"description": "Step size to create trade-off curve (between 1 and k - 1)"
}
}
}
}
},
{
"properties": {
"synth-method": { "enum": ["synthpop"] }
}
}
]
}
},
"properties": {
"dataset": {
"type": "string",
"title": "The dataset",
"description": "The prefix of the filename of the dataset (.csv will be appended)"
},
"synth-method": {
"enum": ["ctgan", "sgf", "synthpop"],
"title": "Synthesis method",
"description": "The synthesis method used for the run. It must correspond to a subdirectory of `synth-methods`"
},
"num_samples_to_fit": {
"type": "integer",
"title": "Number of samples to fit",
"description": "How many samples from the input dataset should be used as input to the synthesis procedure? To use all of the input records, pass a value of `-1`"
},
"num_samples_to_synthesize": {
"type": "integer",
"title": "Number of samples to synthesize",
"description": "How many synthetic samples should be produced as output? To produce the same number of output records as input records, pass a value of `-1`."
},
"num_datasets_to_synthesize": {
"type": "integer",
"title": "Number of entire datasets to synthesize",
"description": "How many entire synthetic datasets should be produced?",
"minimum": 0
},
"random_state": {
"type": "integer",
"title": "Random seed",
"description": "the seed for the random number generator (most methods require a PRNG: the seed can be explicitly passed to aid with the testability and reproducibility of the synthetic output)",
"default": 0
},
"disclosure_risk": {
"type": "object",
"title": "Disclosure risk parameters",
"description": "Parameters passed to the disclosure risk privacy metric",
"required": [
"num_samples_intruder",
"vars_intruder"
],
"properties": {
"num_samples_intruder": {
"type": "integer",
"title": "Intruder sample count",
"description": "How many records corresponding to the original dataset exist in a dataset visible to an attacker?",
"minimum": 0
},
"vars_intruder": {
"type": "array",
"title": "Intruder variables",
"description": "Names of the columns that are available in the attacker-visible dataset",
"items": { "type": "string" }
}
}
},
"sklearn_classifiers": {
"type": "object",
"title": "The Sklearn_classifiers Schema",
"description": "Parameters needed to compute the classification utility scores with scikit learn",
"required": [
"input_columns",
"label_column",
"test_train_ratio",
"num_leaked_rows"
],
"properties": {
"input_columns": {
"type": "array",
"items": { "type": "string" },
"title": "Column names",
"description": "Names of the columns to use as the explanatory variables for the classification"
},
"label_column": {
"type": "string",
"title": "Label column",
"description": "The name of the column to use for the category labels"
},
"test_train_ratio": {
"type": "number",
"title": "test/train ratio",
"description": "Fraction of records to use in the test set for the classification",
"minimum": 0.0
},
"num_leaked_rows": {
"type": "integer",
"title": "Count of leaked records",
"description": "The number of additional records from the original dataset with which to augment the synthetic data set before training the classifiers. This is primarily an option to enable testing of the utility metric (i.e. the more rows we leak, the better the utility should become). It should be set to 0 during normal synthesis tasks.",
"default": 0,
"minimum": 0
}
}
}
}
}
25 changes: 25 additions & 0 deletions frontend/example.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<!DOCTYPE html>
<html lang="en-gb">
<head>
<meta charset="utf-8">
<title>QUIPP parameters</title>
</head>

<body>
<div id="app" class="container"></div>

<!-- bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha384-HSMxcRTRxnN+Bdg0JdbxYKrThecOKuH5zCYotlSAcp1+c8xmyTe9GYg1l9a69psu" crossorigin="anonymous">
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap-theme.min.css" integrity="sha384-6pzBo3FDv/PJ8r2KRkGHifhEocL+1X2rVCTTkUfGk7/0pbek5mMa1upzvWbrUbOZ" crossorigin="anonymous">
<script src="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha384-aJ21OjlMXNL5UyIl/XNwTMqvzeRMZH2w8c5cRVpzpU8Y5bApTppSuUkhZXN0VxHd" crossorigin="anonymous"></script>

<!-- react -->
<script src="https://unpkg.com/react@16/umd/react.development.js" crossorigin></script>
<script src="https://unpkg.com/react-dom@16/umd/react-dom.development.js" crossorigin></script>

<!-- react-jsonschema-form -->
<script src="https://unpkg.com/react-jsonschema-form/dist/react-jsonschema-form.js" crossorigin></script>

<script src="form.js"></script>
</body>
</html>
Loading