From be1a71962a58d745c9aa4522b0fb3a22a9c39883 Mon Sep 17 00:00:00 2001 From: Sarina Meyer Date: Wed, 14 Sep 2022 13:28:21 +0200 Subject: [PATCH] Added code to paper "Speaker Anonymization with Phonetic Intermediate Representations" --- .gitignore | 134 +++++++++++ .gitmodules | 7 + IMS-Toucan | 1 + README.md | 120 ++++++++- Voice-Privacy-Challenge-2020 | 1 + ...native_challenge_framework_installation.md | 62 +++++ anonymization/__init__.py | 2 + anonymization/base_anonymizer.py | 53 ++++ anonymization/plda_model.py | 81 +++++++ anonymization/pool_anonymizer.py | 145 +++++++++++ anonymization/random_anonymizer.py | 76 ++++++ anonymization/speaker_embeddings.py | 220 +++++++++++++++++ evaluation/__init__.py | 1 + evaluation/evaluation_data.py | 69 ++++++ evaluation/run_evaluation.sh | 227 ++++++++++++++++++ evaluation/run_make_vctk_anon_subsets.sh | 66 +++++ figures/architecture.png | Bin 0 -> 47042 bytes inference/__init__.py | 3 + inference/anonymization.py | 93 +++++++ inference/asr.py | 69 ++++++ inference/tts.py | 84 +++++++ requirements.txt | 4 + run_inference.py | 89 +++++++ setup_scripts/install_challenge_framework.sh | 64 +++++ setup_scripts/run_download_data.sh | 98 ++++++++ setup_scripts/run_prepare_data.sh | 100 ++++++++ utils/__init__.py | 2 + utils/data_io.py | 19 ++ utils/path_management.py | 18 ++ utils/run_cleanup.sh | 8 + 30 files changed, 1915 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 160000 IMS-Toucan create mode 160000 Voice-Privacy-Challenge-2020 create mode 100644 alternative_challenge_framework_installation.md create mode 100644 anonymization/__init__.py create mode 100644 anonymization/base_anonymizer.py create mode 100644 anonymization/plda_model.py create mode 100644 anonymization/pool_anonymizer.py create mode 100644 anonymization/random_anonymizer.py create mode 100644 anonymization/speaker_embeddings.py create mode 100755 evaluation/__init__.py create mode 100755 evaluation/evaluation_data.py create mode 100755 evaluation/run_evaluation.sh create mode 100755 evaluation/run_make_vctk_anon_subsets.sh create mode 100644 figures/architecture.png create mode 100644 inference/__init__.py create mode 100644 inference/anonymization.py create mode 100644 inference/asr.py create mode 100644 inference/tts.py create mode 100644 requirements.txt create mode 100644 run_inference.py create mode 100755 setup_scripts/install_challenge_framework.sh create mode 100755 setup_scripts/run_download_data.sh create mode 100755 setup_scripts/run_prepare_data.sh create mode 100644 utils/__init__.py create mode 100644 utils/data_io.py create mode 100644 utils/path_management.py create mode 100644 utils/run_cleanup.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..27940a1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,134 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +models/ +original_speaker_embeddings/ +corpora/ +results/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a31b01e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,7 @@ +[submodule "Voice-Privacy-Challenge-2020"] + path = Voice-Privacy-Challenge-2020 + url = https://github.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020 +[submodule "IMS-Toucan"] + path = IMS-Toucan + url = https://github.com/Flux9665/IMS-Toucan + branch = vp_inference/1912a835c4b3de20f5190797e684f10aa45a76d9 diff --git a/IMS-Toucan b/IMS-Toucan new file mode 160000 index 0000000..1912a83 --- /dev/null +++ b/IMS-Toucan @@ -0,0 +1 @@ +Subproject commit 1912a835c4b3de20f5190797e684f10aa45a76d9 diff --git a/README.md b/README.md index 376760e..d9a4078 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,121 @@ # Speaker Anonymization -The code, descriptions and a link to the demo will be added soon. +This repository contains the speaker anonymization system developed at the Institute for Natural Language Processing +(IMS) at the University of Stuttgart, Germany. The system is described in our paper [*Speaker Anonymization with +Phonetic Intermediate Representations*](https://arxiv.org/abs/2207.04834) that will be +published at +Interspeech 2022. + +**In addition to the code, we are going to provide a live demo soon.** + +## System Description +The system is based on the Voice Privacy Challenge 2020 which is included as submodule. It uses the basic idea of +speaker embedding anonymization with neural synthesis, and uses the data and evaluation framework of the challenge. +For a detailed description of the system, please read our paper linked above. + +![architecture](../speaker-anonymization/figures/architecture.png) + + +## Installation +Clone this repository with all its submodules: +``` +git clone --recurse-submodules https://github.com/DigitalPhonetics/speaker-anonymization.git +``` + +In order to be able to use the framework of the Voice Privacy Challenge 2020 for evaluation, you need to install it +first. According to [the challenge repository](https://github.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020), this should simply be +``` +cd Voice-Privacy-Challenge-2020 +./install.sh +``` +However, on our systems, we had to make certain adjustments and also decided to use a more light-weight environment +that minimizes unnecessary components. If you are interested, you can see our steps in +[alternative_challenge_framework_installation.md](alternative_challenge_framework_installation.md). Just as a note: It is +very possible that those would not directly work on your system and would need to be modified. + +**Note: this step will download and install Kaldi, and might lead to complications. Additionally, make sure that you +are running the install script on a device with access to GPUs and CUDA.** + +Additionally, install the [requirements](requirements.txt) (in the base directory of this repository): +``` +pip install -r requirements.txt +``` + +## Getting started +Before the actual execution of our pipeline system, you first need to download and prepare the challenge data and +the evaluation models. For +this, you will need a password provided by the organizers of the Voice Privacy Challenge. Please contact them (see +information on [their repository](https://github.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020) or +[website](https://www.voiceprivacychallenge.org/)) for +this access. + +You can do this by either + +### a) Executing our lightweight scripts: +This will only download and prepare the necessary models and datasets. Note that these scripts are simply extracts +of the challenge run script. +``` +cd setup_scripts +./run_download_data.sh +./run_prepare_data.sh +``` + +or by +### b) Executing the challenge run script: +This will download and prepare everything necessary AND run the baseline system of the Voice Privacy Challenge 2020. +Note that you will need to have installed the whole framework by the challenge install script before. +``` +cd Voice-Privacy-Challenge-2020/baseline +./run.sh +``` + +### Running the pipeline +The system pipeline controlled in [run_inference.py](run_inference.py). You can run it via +``` +python run_inference.py --gpu +``` +with being the ID of the GPU the code should be executed on. If this option is not specified, it will run +on CPU (not recommended). + +The script will anonymize the development and test data of LibriSpeech and VCTK in three steps: +1. ASR: Recognition of the linguistic content, output in form of text or phone sequences +2. Anonymization: Modification of speaker embeddings, output as torch vectors +3. TTS: Synthesis based on recognized transcription and anonymized speaker embedding, output as audio files (wav) + +Each module produces intermediate results that are saved to disk. A module is only executed if previous intermediate +results for dependent pipeline combination do not exist or if recomputation is forced. Otherwise, the previous +results are loaded. Example: The ASR module is +only executed if there are no transcriptions produced by exactly that ASR model. On the other hand, the TTS is +executed if (a) the ASR was performed directly before (new transcriptions), and/or (b) the anonymization was +performed directly before (new speaker embeddings), and/or (c) no TTS results exist for this combination of models. + +If you want to change any settings, like the particular models or datasets, you can adjust the *settings* dictionary +in [run_inference.py](run_inference.py). If you want to force recomputation for a specific module, add its tag to +the *force_compute* list. + +Immediately after the anonymization pipeline terminates, the evaluation pipeline is started. It performs some +preparation steps and then executes the evaluation part of the challenge run script (this extract can be found in +[evaluation/run_evaluation.sh](../speaker-anonymization/evaluation/run_evaluation.sh)). + +Finally, for clarity, the most important parts of the evaluation results as well as the used settings are copied to +the [results](results) directory. + + +## Models +The following table lists all models for each module that are reported in the paper and are included in this +repository. Each model is given by its name in the directory and the name used in the paper. In the *settings* +dictionary in [run_inference.py](run_inference.py), the model name should be used. The *x* for default names the +models that are used in the main configuration of the system. + +| Module | Default| Model name | Name in paper| +|--------|--------|------------|--------------| +| ASR | x | asr_tts-phn_en.zip | phones | +| | | asr_stt_en | STT | +| | | asr_tts_en.zip | TTS | +| Anonymization | x | pool_minmax_ecapa+xvector | pool | +| | | pool_raw_ecapa+xvector | pool raw | +| | | random_in-scale_ecapa+xvector | random | +| TTS | x | trained_on_ground_truth_phonemes.pt| Libri100| +| | | trained_on_asr_phoneme_outputs.pt | Libri100 + finetuned | +| | | trained_on_libri600_asr_phoneme_outputs.pt | Libri600 | +| | | trained_on_libri600_ground_truth_phonemes.pt | Libri600 + finetuned | diff --git a/Voice-Privacy-Challenge-2020 b/Voice-Privacy-Challenge-2020 new file mode 160000 index 0000000..f58ef14 --- /dev/null +++ b/Voice-Privacy-Challenge-2020 @@ -0,0 +1 @@ +Subproject commit f58ef147eb2be14cc2a844ca76454926fabe24b6 diff --git a/alternative_challenge_framework_installation.md b/alternative_challenge_framework_installation.md new file mode 100644 index 0000000..38d76bc --- /dev/null +++ b/alternative_challenge_framework_installation.md @@ -0,0 +1,62 @@ +# Alternative Installation of the Framework for the Voice Privacy Challenge 2020 +Unfortunately, the installation is not always as easy as the organizers imply in their [install +script](Voice-Privacy-Challenge-2020/install.sh), and installs several tools that are only necessary if the primary +baseline of the challenge should be executed. To adapt the script to our devices and pipeline, we shortened and +modified it, and exchanged some components. + +**Note: To run the code in this repository, it is NOT necessary to use the installation steps described in this +document. Instead, you can also simply use the original [install +script](Voice-Privacy-Challenge-2020/install.sh). If you use this document, be aware that you probably have to +modify several steps to make it work for you.** + +## Installation Steps +This guide expects that you cloned the repository included submodules. Once you followed the installation steps +described in the following, continue with the *Getting started* section in the [main README](README.md). + +### 1. Environment creation +The original installation script would create a conda environment but conda would include many packages that are not +always needed. We therefore 'manually' create a virtual environment within the +repository: +``` +virtualenv venv --python=python3.8 +source venv/bin/activate +pip install -r Voice-Privacy-Challenge-2020/requirements.txt +``` +Instead of the last line, if you want to install all requirements for the whole repository, you can instead run +``` +pip install -r requirements.txt +``` +(If this does not work, install the requirements files listed in it separately) + +Finally, we have to make the install script skip the step of creating an environment by creating the required check +file: +``` +touch Voice-Privacy-Challenge-2020/.done-venv +``` + +### 2. Adapting Kaldi +The version of Kaldi in the framework is not up to date, and even the up to date one does not officially support our +gcc version. We have to change that: +``` +cd Voice-Privacy-Challenge-2020/kaldi +git checkout master +vim src/configure +``` +In src/configure, change the min supported gcc version: +``` + - MIN_UNSUPPORTED_GCC_VER="10.0" + - MIN_UNSUPPORTED_GCC_VER_NUM=100000; + + MIN_UNSUPPORTED_GCC_VER="12.0" + + MIN_UNSUPPORTED_GCC_VER_NUM=120000; +``` + +### 3. CUDA and MKL +Due to several installed versions of CUDA and MKL, and very specific requirements of Kaldi, we have to specify the +paths to them in the [setup_scripts/install_challenge_framework.sh](../speaker-anonymization/setup_scripts/install_challenge_framework.sh) file. + +### 4. Installation +Once everything above is resolved, you simply have to run the adapted install script: +``` +cd setup_scripts +./install_challenge_framework.sh +``` diff --git a/anonymization/__init__.py b/anonymization/__init__.py new file mode 100644 index 0000000..64dcbbb --- /dev/null +++ b/anonymization/__init__.py @@ -0,0 +1,2 @@ +from .pool_anonymizer import PoolAnonymizer +from .random_anonymizer import RandomAnonymizer diff --git a/anonymization/base_anonymizer.py b/anonymization/base_anonymizer.py new file mode 100644 index 0000000..cbecdee --- /dev/null +++ b/anonymization/base_anonymizer.py @@ -0,0 +1,53 @@ +from pathlib import Path +import torch + +from .speaker_embeddings import SpeakerEmbeddings + + +class BaseAnonymizer: + + def __init__(self, vec_type='xvector', device=None, emb_level='spk', **kwargs): + # Base class for speaker embedding anonymization. + self.vec_type = vec_type + self.emb_level = emb_level + + if isinstance(device, torch.device): + self.device = device + elif isinstance(device, str): + self.device = torch.device(device) + elif isinstance(device, int): + self.device = torch.device(f'cuda:{device}') + else: + self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + + def load_parameters(self, model_dir: Path): + # Template method for loading parameters special to the anonymization method. Not implemented. + raise NotImplementedError('load_parameters') + + def save_parameters(self, model_dir: Path): + # Template method for saving parameters special to the anonymization method. Not implemented. + raise NotImplementedError('save_parameters') + + def load_embeddings(self, emb_dir: Path): + # Load previously extracted or generated speaker embeddings from disk. + embeddings = SpeakerEmbeddings(self.vec_type, device=self.device, emb_level=self.emb_level) + embeddings.load_vectors(emb_dir) + return embeddings + + def save_embeddings(self, embeddings, emb_dir): + # Save speaker embeddings to disk. + embeddings.save_vectors(emb_dir) + + def anonymize_data(self, data_dir: Path, vector_dir: Path, emb_level='spk'): + # Template method for anonymizing a dataset. Not implemented. + raise NotImplementedError('anonymize_data') + + def _get_speaker_embeddings(self, data_dir: Path, vector_dir: Path, emb_level='spk'): + # Retrieve original speaker embeddings, either by extracting or loading them. + vectors = SpeakerEmbeddings(vec_type=self.vec_type, emb_level=emb_level, device=self.device) + if vector_dir.exists(): + vectors.load_vectors(in_dir=vector_dir) + else: + vectors.extract_vectors_from_audio(data_dir=data_dir) + vectors.save_vectors(out_dir=vector_dir) + return vectors diff --git a/anonymization/plda_model.py b/anonymization/plda_model.py new file mode 100644 index 0000000..d74b62a --- /dev/null +++ b/anonymization/plda_model.py @@ -0,0 +1,81 @@ +# This code is based on the descriptions in https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/processing/PLDA_LDA.py +from pathlib import Path +from speechbrain.processing.PLDA_LDA import PLDA, StatObject_SB, Ndx, fast_PLDA_scoring +import numpy as np +import torch + +class PLDAModel: + + def __init__(self, train_embeddings, results_path: Path=None): + self.mean, self.F, self.Sigma = None, None, None + + files_exist = False + if results_path and results_path.exists(): + files_exist = self.load_parameters(results_path) + if not files_exist: + self._train_plda(train_embeddings) + self.save_parameters(results_path) + + def compute_distance(self, enrollment_vectors, trial_vectors): + enrol_vecs = enrollment_vectors.cpu().numpy() + en_sets, en_s, en_stat0 = self._get_vector_stats(enrol_vecs, sg_tag='en') + en_stat = StatObject_SB(modelset=en_sets, segset=en_sets, start=en_s, stop=en_s, stat0=en_stat0, + stat1=enrol_vecs) + + trial_vecs = trial_vectors.cpu().numpy() + te_sets, te_s, te_stat0 = self._get_vector_stats(trial_vecs, sg_tag='te') + te_stat = StatObject_SB(modelset=te_sets, segset=te_sets, start=te_s, stop=te_s, stat0=te_stat0, + stat1=trial_vecs) + + ndx = Ndx(models=en_sets, testsegs=te_sets) + scores_plda = fast_PLDA_scoring(en_stat, te_stat, ndx, self.mean, self.F, self.Sigma) + return scores_plda.scoremat + + def save_parameters(self, filename): + filename.parent.mkdir(parents=True, exist_ok=True) + np.save(filename / 'plda_mean.npy', self.mean) + np.save(filename / 'plda_F.npy', self.F) + np.save(filename / 'plda_Sigma.npy', self.Sigma) + + def load_parameters(self, dir_path): + existing_files = [x.name for x in dir_path.glob('*')] + files_exist = True + if 'plda_mean.npy' in existing_files: + self.mean = np.load(dir_path / 'plda_mean.npy') + else: + files_exist = False + + if 'plda_F.npy' in existing_files: + self.F = np.load(dir_path / 'plda_F.npy') + else: + files_exist = False + + if 'plda_Sigma.npy' in existing_files: + self.Sigma = np.load(dir_path / 'plda_Sigma.npy') + else: + files_exist = False + return files_exist + + def _train_plda(self, train_embeddings): + vectors = train_embeddings.speaker_vectors.to(torch.float64) + speakers = train_embeddings.speakers + + modelset = np.array([f'md{speaker}' for speaker in speakers], dtype="|O") + segset, s, stat0 = self._get_vector_stats(vectors, sg_tag='sg') + + xvectors_stat = StatObject_SB(modelset=modelset, segset=segset, start=s, stop=s, stat0=stat0, + stat1=vectors.cpu().numpy()) + + plda = PLDA(rank_f=100) + plda.plda(xvectors_stat) + + self.mean = plda.mean + self.F = plda.F + self.Sigma = plda.Sigma + + def _get_vector_stats(self, vectors, sg_tag='sg'): + N, dim = vectors.shape + segset = np.array([f'{sg_tag}{i}' for i in range(N)], dtype="|O") + s = np.array([None] * N) + stat0 = np.array([[1.0]] * N) + return segset, s, stat0 \ No newline at end of file diff --git a/anonymization/pool_anonymizer.py b/anonymization/pool_anonymizer.py new file mode 100644 index 0000000..25374a9 --- /dev/null +++ b/anonymization/pool_anonymizer.py @@ -0,0 +1,145 @@ +from pathlib import Path +import numpy as np +import torch +import json +from tqdm import tqdm +from sklearn.metrics.pairwise import cosine_distances + +from .base_anonymizer import BaseAnonymizer +from .plda_model import PLDAModel +from .speaker_embeddings import SpeakerEmbeddings +from utils import create_clean_dir + +REVERSED_GENDERS = {'m': 'f', 'f': 'm'} + + +class PoolAnonymizer(BaseAnonymizer): + + def __init__(self, pool_data_dir=Path('libritts_train_other_500'), vec_type='xvector', N=200, N_star=100, + distance='plda', cross_gender=False, proximity='farthest', device=None, model_name=None, **kwargs): + # Pool anonymization method based on the primary baseline of the Voice Privacy Challenge 2020. + # Given a speaker vector, the N most distant vectors in an external speaker pool are extracted, + # and an average of a random subset of N_star vectors is computed and taken as new speaker vector. + # Default distance measure is PLDA. + super().__init__(vec_type=vec_type, device=device) + + self.model_name = model_name if model_name else f'pool_{vec_type}' + + self.pool_data_dir = pool_data_dir # data for external speaker pool + self.N = N # number of most distant vectors to consider + self.N_star = N_star # number of vectors to include in averaged vector + self.distance = distance # distance measure, either 'plda' or 'cosine' + self.proximity = proximity # proximity method, either 'farthest' (distant vectors), 'nearest', or 'closest' + self.cross_gender = cross_gender # Whether to reverse the genders of the speakers + + self.pool_embeddings = None + self.pool_genders = {} + self.plda = None + + def load_parameters(self, model_dir: Path): + self._load_settings(model_dir / 'settings.json') + self.pool_embeddings = SpeakerEmbeddings(vec_type=self.vec_type, emb_level='spk', device=self.device) + self.pool_embeddings.load_vectors(model_dir / 'pool_embeddings') + self.pool_genders = {gender: [i for i, spk_gender in enumerate(self.pool_embeddings.genders) + if spk_gender == gender] for gender in set(self.pool_embeddings.genders)} + if self.distance == 'plda': + self.plda = PLDAModel(train_embeddings=self.pool_embeddings, results_path=model_dir) + + def save_parameters(self, model_dir: Path): + create_clean_dir(model_dir) + self.pool_embeddings.save_vectors(model_dir / 'pool_embeddings') + self._save_settings(model_dir / 'settings.json') + if self.plda: + self.plda.save_parameters(model_dir) + + def anonymize_data(self, data_dir: Path, vector_dir: Path, emb_level='spk'): + print('Load original speaker embeddings...') + speaker_embeddings = self._get_speaker_embeddings(data_dir, vector_dir / f'{emb_level}_level_{self.vec_type}', + emb_level=emb_level) + if not self.pool_embeddings: + print('Compute speaker embeddings for speaker pool...') + self.pool_embeddings = SpeakerEmbeddings(vec_type=self.vec_type, emb_level='spk', device=self.device) + self.pool_embeddings.extract_vectors_from_audio(self.pool_data_dir) + self.pool_genders = {gender: [i for i, spk_gender in enumerate(self.pool_embeddings.genders) + if spk_gender == gender] for gender in set(self.pool_embeddings.genders)} + if self.distance == 'plda' and not self.plda: + print('Train PLDA model...') + self.plda = PLDAModel(train_embeddings=self.pool_embeddings) + + print('pool embeddings', self.pool_embeddings.speaker_vectors.shape) + print('speaker embeddings', speaker_embeddings.speaker_vectors.shape) + distance_matrix = self._compute_distances(vectors_a=self.pool_embeddings.speaker_vectors, + vectors_b=speaker_embeddings.speaker_vectors) + + print(f'Anonymize embeddings of {len(speaker_embeddings)} speakers...') + speakers = [] + anon_vectors = [] + genders = [] + for i in tqdm(range(len(speaker_embeddings))): + speaker, _ = speaker_embeddings[i] + gender = speaker_embeddings.genders[i] + distances_to_speaker = distance_matrix[:, i] + candidates = self._get_pool_candidates(distances_to_speaker, gender) + selected_anon_pool = np.random.choice(candidates, self.N_star, replace=False) + anon_vec = torch.mean(self.pool_embeddings.speaker_vectors[selected_anon_pool], dim=0) + speakers.append(speaker) + anon_vectors.append(anon_vec) + genders.append(gender if not self.cross_gender else REVERSED_GENDERS[gender]) + + anon_embeddings = SpeakerEmbeddings(vec_type=self.vec_type, device=self.device) + anon_embeddings.set_vectors(speakers=speakers, vectors=torch.stack(anon_vectors, dim=0), genders=genders, + utt2spk=speaker_embeddings.utt2spk) + + return anon_embeddings + + def _compute_distances(self, vectors_a, vectors_b): + if self.distance == 'plda': + return 1 - self.plda.compute_distance(enrollment_vectors=vectors_a, trial_vectors=vectors_b) + elif self.distance == 'cosine': + return cosine_distances(X=vectors_a.cpu(), Y=vectors_b.cpu()) + else: + return [] + + def _get_pool_candidates(self, distances, gender): + if self.cross_gender is True: + distances = distances[self.pool_genders[REVERSED_GENDERS[gender]]] + else: + distances = distances[self.pool_genders[gender]] + + if self.proximity == 'farthest': + return np.argpartition(distances, -self.N)[-self.N:] + elif self.proximity == 'nearest': + return np.argpartition(distances, self.N)[:self.N] + elif self.proximity == 'center': + sorted_distances = np.sort(distances) + return sorted_distances[len(sorted_distances)//2:(len(sorted_distances)//2)+self.N] + + def _save_settings(self, filename): + settings = { + 'vec_type': self.vec_type, + 'N': self.N, + 'N*': self.N_star, + 'distance': self.distance, + 'proximity': self.proximity, + 'cross_gender': self.cross_gender + } + with open(filename, 'w') as f: + json.dump(settings, f) + + def _load_settings(self, filename): + with open(filename, 'r') as f: + settings = json.load(f) + + self.N = settings['N'] if 'N' in settings else self.N + self.N_star = settings['N*'] if 'N*' in settings else self.N_star + self.distance = settings['distance'] if 'distance' in settings else self.distance + self.proximity = settings['proximity'] if 'proximity' in settings else self.proximity + self.cross_gender = settings['cross_gender'] if 'cross_gender' in settings else self.cross_gender + self.vec_type = settings['vec_type'] if 'vec_type' in settings else self.vec_type + + + +# for every source x-vector, an anonymized x-vector is computed by finding the N farthest x- +# vectors in an external pool (LibriTTS train-other-500) accord- +# ing to the PLDA distance, and by averaging N ∗ randomly se- +# lected vectors among them. In the baseline, we use N = 200 and N ∗ = 100 diff --git a/anonymization/random_anonymizer.py b/anonymization/random_anonymizer.py new file mode 100644 index 0000000..3d1a709 --- /dev/null +++ b/anonymization/random_anonymizer.py @@ -0,0 +1,76 @@ +import json +from pathlib import Path +import torch +import numpy as np + +from .speaker_embeddings import SpeakerEmbeddings +from .base_anonymizer import BaseAnonymizer +from utils import create_clean_dir + + +class RandomAnonymizer(BaseAnonymizer): + + def __init__(self, vec_type='xvector', device=None, model_name=None, in_scale=False, **kwargs): + super().__init__(vec_type=vec_type, device=device) + self.model_name = model_name if model_name else f'random_{vec_type}' + + self.in_scale = in_scale + self.dim_ranges = None + + def load_parameters(self, model_dir): + with open(model_dir / 'settings.json') as f: + settings = json.load(f) + self.vec_type = settings['vec_type'] if 'vec_type' in settings else self.vec_type + self.in_scale = settings.get('in_scale', self.in_scale) + + if self.in_scale: + with open(model_dir / 'stats_per_dim.json') as f: + dim_ranges = json.load(f) + self.dim_ranges = [(v['min'], v['max']) for k, v in sorted(dim_ranges.items(), key=lambda x: int(x[0]))] + + def save_parameters(self, model_dir): + create_clean_dir(model_dir) + settings = { + 'vec_type': self.vec_type + } + with open(model_dir / 'settings.json', 'w') as f: + json.dump(settings, f) + + def anonymize_data(self, data_dir: Path, vector_dir: Path, emb_level='spk'): + speaker_embeddings = self._get_speaker_embeddings(data_dir, vector_dir / f'{emb_level}_level_{self.vec_type}', + emb_level=emb_level) + + if self.dim_ranges: + print('Anonymize vectors in scale!') + return self._anonymize_data_in_scale(speaker_embeddings) + else: + speakers = [] + anon_vectors = [] + genders = speaker_embeddings.genders + for speaker, vector in speaker_embeddings: + mask = torch.zeros(vector.shape[0]).float().random_(-40, 40).to(self.device) + anon_vec = vector * mask + speakers.append(speaker) + anon_vectors.append(anon_vec) + + anon_embeddings = SpeakerEmbeddings(vec_type=self.vec_type, device=self.device) + anon_embeddings.set_vectors(speakers=speakers, vectors=torch.stack(anon_vectors, dim=0), genders=genders, + utt2spk=speaker_embeddings.utt2spk) + + return anon_embeddings + + def _anonymize_data_in_scale(self, speaker_embeddings): + speakers = [] + anon_vectors = [] + genders = speaker_embeddings.genders + + for speaker, vector in speaker_embeddings: + anon_vec = torch.tensor([np.random.uniform(*dim_range) for dim_range in self.dim_ranges]).to(self.device) + speakers.append(speaker) + anon_vectors.append(anon_vec) + + anon_embeddings = SpeakerEmbeddings(vec_type=self.vec_type, device=self.device) + anon_embeddings.set_vectors(speakers=speakers, vectors=torch.stack(anon_vectors, dim=0), genders=genders, + utt2spk=speaker_embeddings.utt2spk) + + return anon_embeddings diff --git a/anonymization/speaker_embeddings.py b/anonymization/speaker_embeddings.py new file mode 100644 index 0000000..2c77f3a --- /dev/null +++ b/anonymization/speaker_embeddings.py @@ -0,0 +1,220 @@ +from pathlib import Path +from collections import defaultdict +import numpy as np +import torch +import torchaudio +import pyloudnorm as pyln +from speechbrain.pretrained import EncoderClassifier + +from utils import read_kaldi_format, save_kaldi_format + + +VALID_VEC_TYPES = {'xvector', 'ecapa', 'ecapa+xvector'} + + +class SpeakerEmbeddings: + + def __init__(self, vec_type='xvector', emb_level='spk', device=torch.device('cpu')): + self.vec_type = vec_type + assert self.vec_type in VALID_VEC_TYPES, f'Invalid vec_type {self.vec_type}, must be one of {VALID_VEC_TYPES}' + self.emb_level = emb_level + self.device = device + + self.speakers = None # in the case of utt-level embeddings, this will be utterances, else speakers + self.utt2spk = {} + self.genders = None + self.idx2speakers = None + self.speaker_vectors = None + + def __iter__(self): + assert (self.speakers is not None) and (self.speaker_vectors is not None), \ + 'Speaker vectors need to be extracted or loaded before they can be iterated!' + assert len(self.speakers) == self.speaker_vectors.shape[0], \ + f'Not same amount of speakers and vectors, #speakers: {len(self.speakers)}, #vectors:' \ + f' {self.speaker_vectors.shape[0]}!' + + for speaker, idx in sorted(self.speakers.items(), key=lambda x: x[1]): + yield speaker, self.speaker_vectors[idx] + + def __len__(self): + return len(self.speakers) if self.speakers else 0 + + def __getitem__(self, item): + assert (self.speakers is not None) and (self.speaker_vectors is not None), \ + 'Speaker vectors need to be extracted or loaded before they can be accessed!' + assert item <= len(self), 'Index needs to be smaller or equal the number of speakers!' + return self.idx2speakers[item], self.speaker_vectors[item] + + def extract_vectors_from_audio(self, data_dir: Path, model_dir=Path('pretrained_models')): + # The following lines download and load the corresponding speaker embedding model from huggingface and store + # it in the corresponding savedir. If a model has been previously downloaded and stored already, + # it is loaded from savedir instead of downloading it again. + encoders = [] + if 'ecapa' in self.vec_type: + encoders.append(EncoderClassifier.from_hparams(source='speechbrain/spkrec-ecapa-voxceleb', + savedir=model_dir / 'spkrec-ecapa-voxceleb', + run_opts={'device': self.device})) + if 'xvector' in self.vec_type: + encoders.append(EncoderClassifier.from_hparams(source='speechbrain/spkrec-xvect-voxceleb', + savedir=model_dir / 'spkrec-xvect-voxceleb', + run_opts={'device': self.device})) + + recordings = read_kaldi_format(data_dir / 'wav.scp') + utt2spk = read_kaldi_format(data_dir / 'utt2spk') + spk2gender = read_kaldi_format(data_dir / 'spk2gender') + + spk2utt_ids = defaultdict(list) + vectors = [] + + i = 0 + for rec_name, rec_file in recordings.items(): + if self.emb_level == 'utt': + speaker = rec_name + else: # speaker-level anonymization + speaker = utt2spk[rec_name] + self.utt2spk[rec_name] = utt2spk[rec_name] + signal, fs = torchaudio.load(rec_file) + vector = self._extract_embedding(wave=signal, sr=fs, encoders=encoders) + + spk2utt_ids[speaker].append(i) + vectors.append(vector) + i += 1 + + if self.emb_level == 'utt': + self.speakers = {speaker: id_list[0] for speaker, id_list in spk2utt_ids.items()} + self.speaker_vectors = torch.stack(vectors, dim=0).to(self.device) + spk2gender = {utt: spk2gender[speaker] for utt, speaker in utt2spk.items()} + else: + self.speakers, self.speaker_vectors = self._get_speaker_level_vectors(spk2utt_ids, torch.stack( + vectors, dim=0).to(self.device)) + self.genders = [spk2gender[speaker] for speaker in self.speakers] + self.idx2speakers = {idx: spk for spk, idx in self.speakers.items()} + + def set_vectors(self, speakers, vectors, genders, utt2spk): + if not isinstance(speakers, dict): + self.speakers = {speaker: idx for idx, speaker in enumerate(speakers)} + else: + self.speakers = speakers + self.speaker_vectors = vectors + self.genders = genders + self.idx2speakers = {idx: spk for spk, idx in self.speakers.items()} + self.utt2spk = utt2spk + + def add_vectors(self, speakers, vectors, genders, utt2spk): + assert (self.speakers is not None) and (self.speaker_vectors is not None), \ + 'Speaker vectors need to be extracted or loaded before new vectors can be added to them!' + if not isinstance(speakers, dict): + speakers = {speakers: idx for idx, speaker in enumerate(speakers)} + + new_speakers = list(speakers.keys() - self.speakers.keys()) + indices = [speakers[speaker] for speaker in new_speakers] + last_known_index = len(self.speaker_vectors) + + new_speaker_dict = {speaker: last_known_index + i for i, speaker in enumerate(new_speakers)} + self.speakers.update(new_speaker_dict) + self.idx2speakers.update({idx: speaker for speaker, idx in new_speaker_dict.items()}) + self.speaker_vectors = torch.cat((self.speaker_vectors, vectors[indices]), dim=0) + self.genders.extend([genders[idx] for idx in indices]) + if utt2spk: + self.utt2spk.update({utt: utt2spk[utt] for utt in new_speaker_dict.keys()}) + + def load_vectors(self, in_dir:Path): + assert ((in_dir / f'spk2idx').exists() or (in_dir / f'utt2idx').exists()) and \ + ((in_dir / f'speaker_vectors.pt').exists()), \ + f'speaker_vectors.pt and either spk2idx or utt2idx must exist in {in_dir}!' + + spk2gender = read_kaldi_format(in_dir / 'spk2gender') + self.speaker_vectors = torch.load(in_dir / f'speaker_vectors.pt', map_location=self.device) + + if self.emb_level == 'spk': + spk2idx = read_kaldi_format(in_dir / f'spk2idx') + self.idx2speakers = {int(idx): spk for spk, idx in spk2idx.items()} + self.speakers = {spk: idx for idx, spk in self.idx2speakers.items()} + self.genders = [spk2gender[spk] for spk in self.speakers] + + elif self.emb_level == 'utt': + utt2idx = read_kaldi_format(in_dir / f'utt2idx') + self.idx2speakers = {int(idx): spk for spk, idx in utt2idx.items()} + self.utt2spk = read_kaldi_format(in_dir / 'utt2spk') + self.speakers = {spk: idx for idx, spk in self.idx2speakers.items()} + self.genders = [spk2gender[self.utt2spk[utt]] for utt in self.speakers] + + + def save_vectors(self, out_dir:Path): + assert (self.speakers is not None) and (self.speaker_vectors is not None), \ + 'Speaker vectors need to be extracted or loaded before they can be stored!' + out_dir.mkdir(exist_ok=True, parents=True) + + if self.emb_level == 'spk': + spk2idx = {spk: idx for idx, spk in self.idx2speakers.items()} + save_kaldi_format(spk2idx, out_dir / f'spk2idx') + elif self.emb_level == 'utt': + utt2idx = {spk: idx for idx, spk in self.idx2speakers.items()} + save_kaldi_format(utt2idx, out_dir / f'utt2idx') + save_kaldi_format(self.utt2spk, out_dir / 'utt2spk') + + spk2gender = self.get_spk2gender() + save_kaldi_format(spk2gender, out_dir / 'spk2gender') + torch.save(self.speaker_vectors, out_dir / f'speaker_vectors.pt') + + def get_embedding_for_speaker(self, speaker): + idx = self.speakers[speaker] + return self.speaker_vectors[idx] + + def get_spk2gender(self): + if self.emb_level == 'spk': + speaker_list = [speaker for speaker, idx in sorted(self.speakers.items(), key=lambda x: x[1])] + spk2gender = {speaker: gender for speaker, gender in zip(speaker_list, self.genders)} + elif self.emb_level == 'utt': + speaker_list = [self.utt2spk[utt] for utt, idx in sorted(self.speakers.items(), key=lambda x: x[1])] + spk2gender = {speaker: gender for speaker, gender in zip(speaker_list, self.genders)} + else: + spk2gender = {} + return spk2gender + + def _get_speaker_level_vectors(self, spk2utt_ids, vectors): + # speaker-level x-vector: mean of utterance-level x-vectors + speakers = {} + speaker_level_vecs = [] + + i = 0 + for speaker, utt_list in spk2utt_ids.items(): + utt_vecs = vectors[utt_list] + spk_vec = torch.mean(utt_vecs, dim=0) + speakers[speaker] = i + speaker_level_vecs.append(spk_vec) + i += 1 + + return speakers, torch.stack(speaker_level_vecs, dim=0).to(self.device) + + + def _extract_embedding(self, wave, sr, encoders): + # adapted from IMSToucan/Preprocessing/AudioPreprocessor + norm_wave = self._normalize_wave(wave, sr) + norm_wave = torch.tensor(np.trim_zeros(norm_wave.numpy())) + + spk_embs = [encoder.encode_batch(wavs=norm_wave.unsqueeze(0)).squeeze() for encoder in encoders] + if len(spk_embs) == 1: + return spk_embs[0] + else: + return torch.cat(spk_embs, dim=0) + + def _normalize_wave(self, wave, sr): + # adapted from IMSToucan/Preprocessing/AudioPreprocessor + dur = wave.shape[1] / sr + wave = wave.squeeze().cpu().numpy() + + # normalize loudness + meter = pyln.Meter(sr, block_size=min(dur-0.0001, abs(dur - 0.1)) if dur < 0.4 else 0.4) + loudness = meter.integrated_loudness(wave) + loud_normed = pyln.normalize.loudness(wave, loudness, -30.0) + peak = np.amax(np.abs(loud_normed)) + wave = np.divide(loud_normed, peak) + + wave = torch.Tensor(wave).to(self.device) + + if sr != 16000: + wave = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000).to(self.device)(wave) + + return wave.cpu() + diff --git a/evaluation/__init__.py b/evaluation/__init__.py new file mode 100755 index 0000000..63ba98b --- /dev/null +++ b/evaluation/__init__.py @@ -0,0 +1 @@ +from .evaluation_data import prepare_evaluation_data, copy_evaluation_results \ No newline at end of file diff --git a/evaluation/evaluation_data.py b/evaluation/evaluation_data.py new file mode 100755 index 0000000..6483ce3 --- /dev/null +++ b/evaluation/evaluation_data.py @@ -0,0 +1,69 @@ +from shutil import copy, copytree, ignore_patterns +import torchaudio +from datetime import datetime +import json + +from utils import save_kaldi_format, create_clean_dir + + +def prepare_evaluation_data(dataset_list, anon_wav_scps, orig_data_path, anon_vectors_path, output_path): + for dataset in dataset_list: + for anon in {True, False}: + anon_suffix = '_anon' if anon else '' + orig_data_split = orig_data_path / dataset + out_data_split = output_path / f'{dataset}{anon_suffix}' + out_data_split.mkdir(exist_ok=True, parents=True) + + copy_files = ['spk2utt', 'text', 'utt2spk'] + copy_files += ['trials'] if 'trials' in dataset else ['enrolls'] + + if anon: + anon_vec_split = anon_vectors_path / f'{dataset}' + copy(anon_vec_split / 'spk2gender', out_data_split / 'spk2gender') + save_kaldi_format(anon_wav_scps[dataset], out_data_split / 'wav.scp') + save_kaldi_format(get_utterance_durations(anon_wav_scps[dataset]), out_data_split / 'utt2dur') + else: + copy_files += ['spk2gender', 'wav.scp', 'utt2dur'] + + for file in copy_files: + copy(orig_data_split / file, out_data_split / file) + + if '_all' in dataset: + # for vctk, the 'all' tag combines two splits: one for common and one for diverse sentences + # we have to copy the original data for these splits to the output directory + common_split = dataset.replace('all', 'common') # same sentences for all speakers + diverse_split = dataset.replace('_all', '') # different sentences for each speaker + for split in {common_split, diverse_split}: + (output_path / split).mkdir(exist_ok=True, parents=True) + for file in ['spk2utt', 'text', 'utt2dur', 'utt2spk', 'spk2gender', 'wav.scp', 'trials']: + copy(orig_data_path / split / file, output_path / split / file) + + +def get_utterance_durations(wav_scp): + utt2dur = {} + for utt, wav_path in wav_scp.items(): + metadata = torchaudio.info(wav_path) + duration = metadata.num_frames / metadata.sample_rate + utt2dur[utt] = duration + return utt2dur + + +def copy_evaluation_results(results_dir, eval_dir, settings, copy_all=False): + results_dir = results_dir / datetime.strftime(datetime.today(), '%d-%m-%y_%H:%M') + create_clean_dir(results_dir) + + exp_results_dir = max(list(eval_dir.parent.glob('exp/results-*'))) # exp directory that was created latest + settings['exp_path'] = str(exp_results_dir) + + with open(results_dir / 'settings.json', 'w') as f: + json.dump(settings, f) + + if copy_all: # copy all files and directories from the evaluation but the 'exp_files' + for test_dir in exp_results_dir.glob('*'): + if test_dir.is_dir(): + copytree(test_dir, results_dir / test_dir.name, ignore=ignore_patterns('exp_files')) + else: + copy(test_dir, results_dir / test_dir.name) + else: # copy only the summary results.txt + copy(exp_results_dir / 'results.txt', results_dir / 'results.txt') + diff --git a/evaluation/run_evaluation.sh b/evaluation/run_evaluation.sh new file mode 100755 index 0000000..1c9cccf --- /dev/null +++ b/evaluation/run_evaluation.sh @@ -0,0 +1,227 @@ +#!/bin/bash +# Extract of Voice-Privacy-Challenge-2020/baseline/run.sh +# +# License of the original script: +# Copyright (C) 2020 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +set -e + +#===== begin config ======= + +nj=$(nproc) +mcadams=false +stage=0 +gpu=0 + +vp_dir=Voice-Privacy-Challenge-2020/baseline +cd $vp_dir + +printf -v results '%(%Y-%m-%d-%H-%M-%S)T' -1 +results=exp/results-$results + +# Chain model for ASR evaluation +asr_eval_model=exp/models/asr_eval + +# ASV_eval config +asv_eval_model=exp/models/asv_eval/xvect_01709_1 +plda_dir=${asv_eval_model}/xvect_train_clean_360 + +anon_data_suffix=_anon + +. utils/parse_options.sh || exit 1; + +. path.sh +. cmd.sh + +datasets=$@ + +if [[ $gpu != 'cpu' ]]; then + export CUDA_VISIBLE_DEVICES=$gpu + export CURRENNT_CUDA_DEVICE=$gpu +fi + +#=========== end config =========== + + +if [ $stage -le 0 ]; then + printf "${GREEN}\nStage 0: Evaluate datasets using speaker verification...${NC}\n" + for dataset in $datasets; do + + printf "${RED}**ASV: ${dataset}_trials_f, enroll - original, trial - original**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_f --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_f, enroll - original, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_f$anon_data_suffix --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_f, enroll - anonymized, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls$anon_data_suffix --trials ${dataset}_trials_f$anon_data_suffix --results $results || exit 1; + + printf "${RED}**ASV: ${dataset}_trials_m, enroll - original, trial - original**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_m --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_m, enroll - original, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_m$anon_data_suffix --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_m, enroll - anonymized, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls$anon_data_suffix --trials ${dataset}_trials_m$anon_data_suffix --results $results || exit 1; + + if [[ $dataset == vctk* ]]; then + + printf "${RED}**ASV: ${dataset}_trials_f_common, enroll - original, trial - original**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_f_common --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_f_common, enroll - original, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_f_common$anon_data_suffix --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_f_common, enroll - anonymized, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls$anon_data_suffix --trials ${dataset}_trials_f_common$anon_data_suffix --results $results || exit 1; + + printf "${RED}**ASV: ${dataset}_trials_m_common, enroll - original, trial - original**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_m_common --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_m_common, enroll - original, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls --trials ${dataset}_trials_m_common$anon_data_suffix --results $results || exit 1; + printf "${RED}**ASV: ${dataset}_trials_m_common, enroll - anonymized, trial - anonymized**${NC}\n" + local/asv_eval.sh --plda_dir $plda_dir --asv_eval_model $asv_eval_model \ + --enrolls ${dataset}_enrolls$anon_data_suffix --trials ${dataset}_trials_m_common$anon_data_suffix --results $results || exit 1; + fi + done +fi + +# Make ASR evaluation subsets +if [ $stage -le 1 ]; then + printf "${GREEN}\nStage 1: Making ASR evaluation subsets...${NC}\n" + for dataset in $datasets; do + + if [[ $dataset == libri* ]]; then + for name in data/${dataset}_{trials_f,trials_m} data/${dataset}_{trials_f,trials_m}$anon_data_suffix; do + [ ! -d $name ] && echo "Directory $name does not exist" && exit 1 + done + utils/combine_data.sh data/${dataset}_asr data/${dataset}_{trials_f,trials_m} || exit 1 + utils/combine_data.sh data/${dataset}_asr$anon_data_suffix data/${dataset}_{trials_f,trials_m}$anon_data_suffix || exit 1 + + elif [[ $dataset == vctk* ]]; then + for name in data/${dataset}_{trials_f_all,trials_m_all} data/${dataset}_{trials_f_all,trials_m_all}$anon_data_suffix; do + [ ! -d $name ] && echo "Directory $name does not exist" && exit 1 + done + utils/combine_data.sh data/${dataset}_asr data/${dataset}_{trials_f_all,trials_m_all} || exit 1 + utils/combine_data.sh data/${dataset}_asr$anon_data_suffix data/${dataset}_{trials_f_all,trials_m_all}$anon_data_suffix || exit 1 + fi + done +fi + +if [ $stage -le 2 ]; then + for dataset in $datasets; do + for data in ${dataset}_asr ${dataset}_asr$anon_data_suffix; do + printf "${GREEN}\nStage 2: Performing intelligibility assessment using ASR decoding on $dataset...${NC}\n" + local/asr_eval.sh --nj $nj --dset $data --model $asr_eval_model --results $results || exit 1; + done + done +fi + +if [ $stage -le 3 ]; then + printf "${GREEN}\nStage 3: Collecting results${NC}\n" + expo=$results/results.txt + for name in `find $results -type d -name "ASV-*" | sort`; do + echo "$(basename $name)" | tee -a $expo + [ ! -f $name/EER ] && echo "Directory $name/EER does not exist" && exit 1 + #for label in 'EER:' 'minDCF(p-target=0.01):' 'minDCF(p-target=0.001):'; do + for label in 'EER:'; do + value=$(grep "$label" $name/EER) + echo " $value" | tee -a $expo + done + [ ! -f $name/Cllr ] && echo "Directory $name/Cllr does not exist" && exit 1 + for label in 'Cllr (min/act):' 'ROCCH-EER:'; do + value=$(grep "$label" $name/Cllr) + value=$(echo $value) + echo " $value" | tee -a $expo + done + [ ! -f $name/linkability_log ] && echo "Directory $name/linkability_log does not exist" && exit 1 + for label in 'linkability:'; do + value=$(grep "$label" $name/linkability_log) + value=$(echo $value) + echo " $value" | tee -a $expo + done + [ ! -f $name/zebra ] && echo "Directory $name/zebra does not exist" && exit 1 + for label in 'Population:' 'Individual:'; do + value=$(grep "$label" $name/zebra) + value=$(echo $value) + echo " $value" | tee -a $expo + done + done + for name in `find $results -type f -name "ASR-*" | sort`; do + echo "$(basename $name)" | tee -a $expo + while read line; do + echo " $line" | tee -a $expo + done < $name + done +fi + +if [ $stage -le 4 ]; then + printf "${GREEN}\nStage 4: Compute the de-indentification and the voice-distinctiveness preservation with the similarity matrices${NC}\n" + for dataset in $datasets; do + + if [[ $dataset == libri* ]]; then + data_names="${dataset}_trials_f ${dataset}_trials_m" + + elif [[ $dataset == vctk* ]]; then + data_names="${dataset}_trials_f ${dataset}_trials_m ${dataset}_trials_f_common ${dataset}_trials_m_common" + fi + + for data in ${data_names}; do + printf "${BLUE}\nStage 4: Compute the de-indentification and the voice-distinctiveness for $data${NC}\n" + local/similarity_matrices/compute_similarity_matrices_metrics.sh --asv_eval_model $asv_eval_model --plda_dir $plda_dir --set_test $data --results $results || exit 1; + done + done +fi + +if [ $stage -le 5 ]; then + printf "${GREEN}\nStage 5: Collecting results for re-indentification and the voice-distinctiveness preservation${NC}\n" + expo=$results/results.txt + dir="similarity_matrices_DeID_Gvd" + for dataset in $datasets; do + + if [[ $dataset == libri* ]]; then + names="${dataset}_trials_f ${dataset}_trials_m" + + elif [[ $dataset == vctk* ]]; then + names="${dataset}_trials_f ${dataset}_trials_m ${dataset}_trials_f_common ${dataset}_trials_m_common" + fi + + for name in $names; do + echo "$name" | tee -a $expo + echo $results/$dir/$name/DeIDentification + [ ! -f $results/$dir/$name/DeIDentification ] && echo "File $results/$dir/$name/DeIDentification does not exist" && exit 1 + label='De-Identification :' + value=$(grep "$label" $results/$dir/$name/DeIDentification) + value=$(echo $value) + echo " $value" | tee -a $expo + [ ! -f $results/$dir/$name/gain_of_voice_distinctiveness ] && echo "File $name/gain_of_voice_distinctiveness does not exist" && exit 1 + label='Gain of voice distinctiveness :' + value=$(grep "$label" $results/$dir/$name/gain_of_voice_distinctiveness) + value=$(echo $value) + echo " $value" | tee -a $expo + done + done +fi + +echo Done diff --git a/evaluation/run_make_vctk_anon_subsets.sh b/evaluation/run_make_vctk_anon_subsets.sh new file mode 100755 index 0000000..5795369 --- /dev/null +++ b/evaluation/run_make_vctk_anon_subsets.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Extract of Voice-Privacy-Challenge-2020/baseline/run.sh +# +# License of the original script: +# Copyright (C) 2020 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +set -e + +#===== begin config ======= + +nj=$(nproc) +split='dev' + +vp_dir=Voice-Privacy-Challenge-2020/baseline +cd $vp_dir + +. utils/parse_options.sh || exit 1; + +. path.sh +. cmd.sh + +anon_data_suffix=_anon + +#=========== end config =========== + +# Make VCTK anonymized evaluation subsets +printf "${GREEN}\nMaking VCTK anonymized evaluation subsets for ${split}...${NC}\n" +temp=$(mktemp) + +dset=data/vctk_$split +for name in ${dset}_trials_f_all$anon_data_suffix ${dset}_trials_m_all$anon_data_suffix; do + [ ! -d $name ] && echo "Directory $name does not exist" && exit 1 +done + +cut -d' ' -f2 ${dset}_trials_f/trials | sort | uniq > $temp +utils/subset_data_dir.sh --utt-list $temp ${dset}_trials_f_all$anon_data_suffix ${dset}_trials_f${anon_data_suffix} || exit 1 +cp ${dset}_trials_f/trials ${dset}_trials_f${anon_data_suffix} || exit 1 + +cut -d' ' -f2 ${dset}_trials_f_common/trials | sort | uniq > $temp +utils/subset_data_dir.sh --utt-list $temp ${dset}_trials_f_all$anon_data_suffix ${dset}_trials_f_common${anon_data_suffix} || exit 1 +cp ${dset}_trials_f_common/trials ${dset}_trials_f_common${anon_data_suffix} || exit 1 + +cut -d' ' -f2 ${dset}_trials_m/trials | sort | uniq > $temp +utils/subset_data_dir.sh --utt-list $temp ${dset}_trials_m_all$anon_data_suffix ${dset}_trials_m${anon_data_suffix} || exit 1 +cp ${dset}_trials_m/trials ${dset}_trials_m${anon_data_suffix} || exit 1 + +cut -d' ' -f2 ${dset}_trials_m_common/trials | sort | uniq > $temp +utils/subset_data_dir.sh --utt-list $temp ${dset}_trials_m_all$anon_data_suffix ${dset}_trials_m_common${anon_data_suffix} || exit 1 +cp ${dset}_trials_m_common/trials ${dset}_trials_m_common${anon_data_suffix} || exit 1 + +rm $temp \ No newline at end of file diff --git a/figures/architecture.png b/figures/architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..ea1f99443040b017c765ddbde87ee231dccd06e0 GIT binary patch literal 47042 zcmZs?bzD?k+crE5B?2NKjC3O)3>_jR-ALEaoerVWUDDExbaxF%3?d=jNDqv(G}7^n z*Y(`b{eHjq{jLA@>^*y}bDisq<2d%Gx2kgZkI5ec004Xid1(y*038efpy58kLR}G8 znZyPFXaNe+Z?rs&e`jO+B=%DD=O%Sn-pEOW)ni)zFqTQ;kcuzTlSZ4Tcq5Buh0`cO zMn8qdHn2j=k<^jOPGaRM@-{of-lH2?^TGoj~~_`iQ+Lc;{x6I@ne2xEnAD&)^~ zRZQ(DUjAk@;^NLWra6}^c`e>i;va&Wp{v4%!}5O&Ax6}bPd=%L{`x_*%|qAtI*v2m z0ZWa#1QfjwmQi1f7rR&LFs-zY_&=vA21Y$3)(QV)=QUo-I5S0=4&dFk2q49P1Xm+=ZpwR>w!@LHf7G~y<3@Gr6!iJIr=C2gzdvVY5P9|g z`BwxA5`j?!9Mz4^m~}#HE3Lme{+*xVn)|- zhn_tP+v3BRlKf4h#6)8vf<(k?9=o-|avD!W4i>-t5hcE8LXKIJ`}*bji^1Om=BOY_ zZp%abOmnvH(}Njw;#mLJnjT*;p20j2`K5I8F9N|xNs7-j911z?I8Eh-8OBP9-#W3T zmr0qRSJ$G~4oEYXF(s;vKW|Fzev34*AghRScyG6i#{W*iqx0_s?#ast4L%1}jV-iw#A{XKvbgI9?zJ32qQ{B`GMm~1>F|qxsW^GOmLdXt$%BbY) z*Dx3YNjSE7U9iZ-Bv9tUeV>VcS|1MacEjI#3=J&Pf2wP1NuS@}%G#zb?zi9n$F2HR ze*Xe}i6XauFzJQ6)ZSRw#mjK)an#S-e_l?I>{7JVNd*fVpBTc+> znG!eyfPi$R|~b5Y+Q#pB;j1mtdH5d z@|SFIV}cJ)?mA$8gCw;d+u%rjWt1k#JQZOrU;f#bT>;&vh0L9N;f_RRzf z5o7~+2_R3KP}w5yYa-DN7{;llfT_&P$r-PMELh&>JvuPhPjN{D`&)Up3NT@q;}OjEOTLjY{3X*kd(;cQ13HadBoSr z@Rv`{#yjESn@ZIH{Pc%DQq4LtAMr+B0G4KNlAG8;@G8Ek1)J_p76JZv+qP`^>B~k| z=+E$Tr19BZ0HOZuY33dTO~TdzL)X>3kCjh=kALiLX8m9(w~w;h3LC zR9!fp-DUl@pzX3FUlxaBT3Z5$^^f#%s{yIGFyOFqa_sF|9HieRlob0Z7BPPSAwZ4f ze?w}Z2P%XpwSO<6KF9dSifoMLP&+sgR&=|woi6LA`zxs&oO<=>lYxTfG zeUO7{HFP-qVc>bSMeDaW)V<^|WI@Uju2FVcgD!pIu1MM={9j@^C_{Pii&_T8q zh_1x&J>)i)sUo%t9uS0hc$PLw7mqgF|MqD-fM;-+L`=N=ze(Z0DM8`yDao0U|Be8n zNg*||U($bClXzl6Ahh0tMl(^5&ke(c8u9Z$vd4>n^C zb4YP*7ea?1ga{szRVPjZ-2B{7jwV6YXNC_+I2DDC%7xUeL8jzda8`3KT7lbOw@{HU z*>5C(($9j~pE@c3cURED|C+{Zq6}M92KX4V)Iju!5)wc2Om#P=RniU6MM!x`_y)I= zO6WUijZNyFUWA8ovcgSf{YMN04SQS>z9mKKGn-TKs*BJww*iiM&le%Y+(U<6h(OZ! zFd3qQqOoId5?kq`X@=KKvBGR^HF|)jT(;Sft};h}%NE3SSB7Ur%YReg2PL>Mbb{aU zXRVc%mX=Dk>P540P58otUcKcHNm5Q@pI(ha8s9T@PQPLj@z>_PktCOD%jRXq9bOA( zJ5%AuDsw3uhFBCk9w6$QcT4i!TVuI`1`XCDOh_yWKHK5yBYBC-N}tFr*V5) zo!6h;ieu^kV)9x6>=*89?^XVu?*t}#s7suLHs`++`=kdPxu1UVzxzj9gUrKmmVYLcO%tP zd+1kg(m~Qe>o0{xLuHtcDXe$>O7Mqc!#ti~dpuz}D;yGRHDpRNTh#uI2BPZew-g&l zl_S985W2Kk%hf#bW|Y`#_PKKPxv{N3YN*Yv{&6vC1+61DE7otYF3S=14!b|#f(bDN zHx)_w?@SgYZVacNf~Xm?6wMBOWBUz+y!=vg#C#XL?ympHv!s5J2~>g{zeKGzMGLV` z{i@=B;vG>iMw{}APLEoRTDIFi*aMa36dsDlcWU-Af7XiRp^5j6YlQi$_J*G~cp23vBz{d!RcFZ-278mcQNU^i z^7C?YS52vxv}g6)ZDKH4SXup7a>d^bhu>=Afd|k{*;+G1-CXB8Ql8x5=`fLL_?gFy z%V52f9{yCRS)v+QsF0|-bw%QRb+RE?p;HyqDi=hG34RPAvPWlwDzN;h@Js$DaQmnM zeFC!k!gphRO8umTjWSrW4;tdDlZQidE=19UxEPcWlcFAm+8w!myi!9u!!0Jyp5UGN z&%1p)*Wyf+bu#CWxnjw_PmHhDCKVp9(xh>rfE{)Fant;EOBaH_*y!P-U$?aI*cm;l zkMWmn%>M0YK9e#s&?X}*TaE1T2U}=zJ2$Y2PsNC>b(q0<>~# zuuty%U1m;wp%}j1Vd$Ya>-jF0WlASAZ=*nYRW~=8abZc}H7!~5N=>>DPt|`V`K7Dwi7ydWNZ($#-cF_#n zdH)e4iM_XGiHRymXgGX@2|oX!v|RO{)M5Y(4D{++Bx|`VnVZHLz*OV8DuXz+K4;VlD5np;~QZMR>dC;dR z$WmoC$?POtTts;VGJe(OV)nc9a%~x0>nk>2W)7<{V~Mf=zPQ;FX3gr^-wUht)HQQ{ zpdWMjU3icjAFcO{8u#YyuJ`d`=w9U0YN z#M`^2{;x~eH#W2_uwWjGhF5^hw-YLqlZl;%D{RzHSXeQt7@hi0E< z?p(VrV&a`nVjnL=+NKae02+Sk!g^XH=48$a_aavCOEIg$+|qdys!ZkH=kyw#=L-&N zK2PVCzkaN#7x^-zLneWBTqNrgpZgvAcP&keL%%?QcA= z-$N%bLE;TBvdfNizl1xJ^STJ|JI6Ls)D4|fV%KLrza=|=sjBKtBC_di`qjkY5n*o=v)wd$1m z1!8Qcu9NwEerwrx-wA6REOp9kMBc{fIEhJL;nHs>Z}4zbI*0M)50@ z@BJs&G|PY4E8L%Pe@4>RN&6N5?K-yjx{VKF^c%J_p}kQMF3tDc)9#cpv+KD__#HD1 ztVVfP=M^@3U=G2!;Z=j;E!kJLZK#@*7|V-KCsW$yxv^BU3)=#Ai)wf(`_E&qxjk;j zoz_Qh3%xxf{a1d7>mtrUK**)dCR4PwZpfip|6Ru~KcS%fdomMFxcI{m_9@V%+lt}~ zZlECyjqQ8*!LG>G0L#6kh(aNlW+z|ZH_^i&^R3LNoz!?vL3Ew32oWP)(xf|o_y*FFEZW@SZyGp4;i4DI)Xb3WuabOb&7r z8t_W$@d!gF&FgR=_H)(QFJvO0MIna1ktgr=aqw_8ZuPi%i-~Y<_MFP&!`MAfekly} z#ZVlQz1})S^W&o#Ts!57UA)b1!sDN3k19*2HbhH3^%4ITBkPjM3tQD#>Z84P zUi68%p(OBs)+rWSW6|F8<+#?`8q9*7nPs?S;X$)rUta>31OQfR^WJQv_f z3*j!kJiRlzG!=QJDAys`rt>5L@ohh5{T%BA*7eKf0plm^M7C&2|6NFtB2oGobIa#- zyU3(RpxgyiFB8Dr+fZ@?+iz*zM_W3#J#e6UxXsJCEmu3xadC_LLKRw&AWfb4lyT(S zrTe_8c^uozD;xgL+$i-#Obs1fsnKFYb(!z~!#;bLER3D)zk4ZfU1bv}nB1D`nT^ z=i|($9`>>|-A|0s^_2D51p=dVklz+!6o0P*6(d=Fp3}g?_v5a(L_-Ot-1rcac$?a= z2@Dv!`M^NWlS`F6j!z%@2sS?C7y?0JUpUtV(1dIoXjUISMQf2RW7YFYDSeKsvFhBUmzE^Vn)l_o6EoUf#+n8je71a4tU9$m>PRJ_<29D^M!ckJ4`O==#7hF z9!ELjHuo~uSjtx^`fa0Vz#gEnuUD`$J*6}qev6jp_UH8$C6Qp)}Q5NVYesE~6xLMjiiF77hU zf8&)jtqpM60#Y?!+~9Qt&gpgq zjh`!13uaM^`baEaKhZ|4zkIDfBK1EV6s;@}i?^;S^@yVj%(lDyNwl2$K)}UNd~&D% z2+dmzJxFNCwvlP%iuX8|&Hux*VvPv-UhI9WTd{~dh7f}G%6U`osWn#nMi$7F=5L%2vB%c3O2LB~rYOr?8=~d79elQl~3V{saK+n|%KvwCjgK(yzoeZN*QX z6>Wb2FN)v!HBzDU&SIRZhA@bJ?jpMgPEl@z`z=CvNaTh97ZvrcUK zFa_vZ&%@1lv$QzPOG=)W79!K`eSXWIt+%7E4;lS$_FDwsxS7cyPq1lrdv#})WKBA| zS}}<4J((XMK(QCMZRz5i{ZOq%QmEO80ykFj$LC_I&~0R}tDgoj#wqVXOELz?N#5$< z0(4Nrr96pvfv$gahi=+ioGfe~68N@|Z5a@@NVn-jOEJTC9twt*6zwX(;?dMAD zF`#(V|FTIO4^N(U*zb88!%60YE+W5T?4lYKAuqt@@qvE`p4^dkhxT*D6D+Qt@@^$a z7jrN%lys+tq0tgJm>Sg9grhK$oKE>V4y%(AK@Vz2fr~EX#IGCvr1(B_0EJ?BDE|wR z1Zml1EBSfBI`56NcRH=PYOq{!Sd|niR}(>unU9CWo7`j=75XTJ$kVIc3r!Lx73dkz zz|8&0JE~#)ExaqQ><+F;I&(OtoLmlUt_pdY`uaw%*r!^^u++W+AzwV7KMMaS*7z|r zB?LPuWTfHx#EeQErpgO7ohWd`0GaXhB2Gjp$6B)omsGjL-;HPuGr|g$(@)+}>!!_S zsh~fj{}{~_gG;$i2I<_|de~TqJp>7)@MMsiMwsQt$s&1j4)vezQ#3@@bp7IPA0uZ> zi_{J~4=sqN+Tf;@moRkRC0rGc@_*9o>MMpScgB1Ik0vLI%^9w?MsD3q>4BgK8gKR% zYh?KGjS#>B7e*=C7?&~4Cda!~lV8qnuins>95I(k$_~n689Y-cu=_Uf=G#S~&Rw5Y zXM#8ht4@Mm6zSf_v1c&27P}+Tes`(}|L$z>ak(Bz9m6_lfYR3*?}@y-udJGzl|b6J z)b%Mvz|2o~t@aeY^YzAfu-`w)6`jP(WK0@T1|EXj+|S1S$X~aM|Dogx5_>48$QJsL zM(%$^&2N8BmRaJfYV+KgIhtJh$nCQjcwhW3!g;k#VfhAA;~J05_R0j;cH|`V>}C+U zmRbPs{iRVx@NYYsgaM&N6BAPV)$?9ZOg?w*5ZUw!uqK}HiQwe@e3E?(|26_?y<$sT zz)LAp@igDoWwGO`!G(u2x=8>4CMvv4*F4hwY;B(A<#vJWz{D? z?~~Ukq_P%^sL)rGb5=E+TaI69a_|12C8pe`2>yc2NuSt0{B14kp_UK4hg|WBf3t5i zd83^%-Ld{5E%KO)lkb`bJF`+S^s`IXH$odW~0DYuT zGX6#a4x>w4r`JCEfFU(azo!O~maHid`RxJ&|uhCrO9 z$Kh6?`jC{$pT%K7=u4w&B41wjN+77Aq6qvSOi@T1=qS+pmG{>0@2u zO#_jKK#>P^k#pJ{Ljv1854|I&wE(Pd@4FdcwKOWbNou(VlGLf&0a#yuVt^@H(rGqr z96SG@wNTjq${O3>RWduc#gn@)UsAfBOruC`+RMt_(i|N-je?HOF-Qd5O9iuLzX?0Q z01XVr@dLQRB_oR3AucenQGSd~W7jv1XrQH`a1X~?fmTO%<;&0oL{vw?!gLFNI6HIa38>+06gC6PV#k8UEAd|--Jfv&! zW<32Cp&d`IC8g_r{8XAaJ(26WebtBwNE5djur{oAqoJgZGdMjJVwI|Pd+I~m=Kl^m z)m?<(^o4ipC*zKDM9U7+k6jE0DIDF+^i15{0h2V=Q14zVc?51sKeM&?c^yk=_v2+H z-5)BE$j)6y;jW$JA<)j@pO@SH3Kkf@D_49%G>TN6CSN_QEWd7aT&rnxS#~vOHsj3n z`9w?a5{Z$T;w1b??sn%irYcVd^e8zVtCiT+)Lkjfit$ymKIs;PXO%r~ zWIEc2P(FF&H+HF$OYrBK!AFle8eMpy2?z4scMW-Yw310jmlEvZ(;b@YA7l#bQ=%t0 z7U^Wh;|QL2uLi_rqxErOJlPAYi+VeDnb6F=+T*ls!*k0Q6c^{Yb({SVb>ht(e^}Gw z)cHd=eDU^{PC$lYBtGKwWTAqb@AI6(biF+K-m0LXZr|5CiC)Qeb9D~W)mBF3#tqKw(AMv<5Bw`bFW?*N zS8hPa2kIGnBtFAbU2(Wnz-u6QR8L?g>|YL!mni_<_zN$EmJ<^-&XHLs18tG;Ade*t z%iv{8zN;K*c&TG8fSOPjlzS0=+-`(HI<2=-7m5~3~5{|CTC z;1HFL2AB3}2S&Z93`>on<2uUy)xxt(HM}b2|05$q*-u^FGss&f=0+tqb!2D~N&*vt zr5&*hV-)#8l~kTb+m$bnNTL1soDdPP9d`u5tm_V0cITV zWlz9GiXy;-Zv#mm5bE%py7?FKR#Y6$BJMR;k3~CX@G&cng6nZzZr<*5v_zdK!r%b{DgL2`_j$4ArsfFek z7RVw=AzwJ``~AR1UIf-#w_ffc!FbWu56K5U?*PH!?xX#;8O`PqX8x(4EW#)MR6I;* z%repqKIa2}ZGKn$h9QVWs(2Vrlz0cel8q-2{z^iE6crGy?O9=PUU{6!&Y(-f*7E6-s~F1G(v7Ws86it7Ul(y8&H!}aD3R=J z8DsY83^Cyp036VA%+OVYUy(YQ{klc^WaR#L66cMkdT&-stG3c##Lziag7%^m-n(zE zOT~u67>$wxFh(yCyVW!YDFJX5>Fq2bqV8BB@{zA9R~e76`0|=>aJQ~R3b*%KXL|W zG3ug_W=1&&6Bv!Ajn2hv#AKRD#SmmdjhRCuOe`T7OlwTI857h z{W}i%{j4XN->8(mwaKV=k@AaM?oRURJI;M1_ zW8@TQClqJ{Z17IcJu}iX(Lqa5Jc1Jn3MzUE@PD|_aM0ZJG-`Y6F+Z-m|MQM=*1y)0 zV+^=8CPYaUFlzy(DEET%Bss`dd%{-U7c%^b{u_O56jAoHsg4?wErR!2<^Q<~BY|Ds z5>?=^;dlgNFqWED2BZZ;C|r#C{w&+-bvKz!yhY->DJEIC7sdVQc93SePi5)NFire0 zcQt8;2r|{?k#)ohA4}iJr-*5|Cr6` ziv=O&FpyTs6ln4$X+#8Fps2WlM!Ttn<=7SxM0E^m^@blpoRIA<7la&o9l6Wqf5Iib z=2*-YHFv@PHs`7G@Pt_zJ3|P)>y{vDXRdfu21u9Rce>-k;L_|uLTL~yHUaN*$2R{5 zu|L}rJqP?#LAXR#=F#KuOM92OK37iI|EgsMkK35`&`gZ9=f?Ja&hIt4Y$=4CrvD?E z+4I&C{kK-uOOl{9;V*ixzCg-3FNAUv%REQQ;;*BrI4JeQNMuArtvaR4yD71_ivSdx zINQ5Z9kXE!#lhdaZ%h4G?c-$hX!prirdq##R$-Ue4yr$V?1e4jPw@VS;AbqvP_{El zYG$I-{XW`>#09tHKIITsdG>aA{CGA>I(e0^J$3Y|-0;BUnYZ^Hc)~(SZ6Q4k%PLHrTGb6CjuzM`YOCj?WS+s0?wwopvfr!!#JedJbF3u+UG-p6bT*eV=YR-e&nD zEzFcBg z=AU7E`n66KDJanBuza~ST5t7SnJv`<33O5YgG(g|FuK?eCP}Ok7;? zZ#hUX!8z^O#xWyC6hq61p!ygsNMV5Jf@%-cQS_FLx9#Ir!T(VvZ*!t#VjAfq{=N}i{i zB+NSJX_0Hj0U=HFZ+a+UK35y5No?9|iV2LLdW#(~K>)&Qyho3o-V1lWB|?V{yeA@0 zlV^J)TwVSXj3)lr0$KZLBjnxJf$3~TU?(G5+Dhm26j6@m`{9>LVbe)W7Q!T#-vedB zauFl13AQ(v`%iXi3x(|~`F_gd_Fb2HJVPmzINqjB!dRf6!fTy8rcrxZ5~S1_|>DM@aLur!06tvdAj>LJq(i`}}G57Qa*LAv_MbF#kf`{s@hQ8;~AnH9&t&gb@ zK&NQF!vs$V0K7L>Q-b=<0P@NrMhQj!+is0!f74?Bc(BxHHALFxwiT;_0FFHEvt&bK zLf~2eVeI<#((QhyBZ7SP(=3@EXLqJbSmk0VCDDH=^rAfYTNVk-V>0Zm#lPCZad|*- zv*QsVhOOOVX=UX2_V?xE-@Qs14el)gAThtQtjR^12`*x%IxO%kALGGaW7auEq4I-W zLtl~K-L+P+GFd}GxLu|W(wq~9fsKtj64nYF_*|REU*LVQ{4q3r{_1qAw;(nM*A&wf zgJWVOQxF-TbbYoXBQEdysm|bKP6lzG5d}(a)kKwF{o(_T`L`0H0zZ!S}Gbk*3Y6|m%cu?e==b7jE2)?Jw{tJ0lwwK zOQ#bNlN{&u?9807(6e)bGb}V(ue98pC-9OY?d8c8F3TE92AP5k$9zT{w+jhfjG zY)D@?*5!_^YF_&TacixXOAN;Bp^(grQKJGD(WdN$MgM9Cuj5;tY)mlr<+#bZKNi&U z;=R-_uJ+uX1;PL+>?dQ1Y-FnipV&xEpshW1N=<&8n}sp=Ehfz6HylZPjF#5&rcm5=fy*#{J0ke<~4@ijsvhCLDPvt1i1>^2<#L z0bUcLFoIF5xzDGKdyf_~JP2U}qWaCYU^Wwx7J}+X8)dt#wbRC%(vSQ2y7Hq&h#`PxZc^Yfz}UL7G*ZZ92_g-N$G&xJgneP=aLm~6hpLj0 z_+X(g=#MJ{aT}@q{QTUv$LY+f-yg5bxCo_^gDjvs(`Cde*}~ek@=$oL(8SYgkxoRt z^Rn>ncwX>P>Su&Tlf5>%m=A0lyVx9Bnv1dv`mRFBTd51a zko>3mmeKuN-&5qyvqHJ$$4H@9Cr0^T>q^ohw@!YP{s%$7CX36qJU`>TZrtCEY4Ijh z|7XvZwBYSMQ zwUz4+&ia|RF>Vie`Uf|IBp_-!q5b}PuhIL$`S=wab4-#V_~^$rK?XKJ{o-KR0(B)H zU;p$7mkc(W;SV>E3%fCj20N?JwT6E7@xHFx1TP310Wf| zK4pWzxJ3PK_0p>I@W^o&wt)dsGGx_YmpKFj;KvA-?SJI*>F zXw`W2a#%Gm>Wx=(0Pd5kZhjS|MM7x0dS)qmuH>*CB&VIWZDiBv)nrFoKOuCdZi36% zNt(wN(ukmMdRdK;%BAx=PwnF~)V4cefzoW&-~_~+PTa{X#1p+Jm{J(x$Qt+um@8UiRjBIuTv!=N$ZQ#=aSQ?gLy#ml5{K=&_Uk%cMsZ$^E$fFW||E!*YwO^4> zgsRs0LK?|asSgP;q3&Ee=)o5Y!0`xEH;zW54M8a_ZfQ_ruO9N`9d>?l$*0Fw%xq&4 ze2HDUW2v919L@P z8;-4q^VU6|9RcV>q&zOgNkFVrm&<2V$>YHUL%hOKUuZ7u!<-|a6fz}*H2OMp_X>&Q z(@M#*1Q(-%c%#J|!*MoL#^Eg1#{myGp^H;E+1hv};_*7wP%1AUR>xSGRif(I=uj;u zkB+~6^Bs{v7D~eX-(5z5SOK^JL9Ebjen1pAEaF4K-PRrfw^3UzDTu@|~Y#%049M{D{(HY3qP!V+La!3xSGMF#&YEDZ=s8(i~rL(u2GF-%EtU!ecHI81PJWmww_?te~m zP3M2t<% z1+Ec;p8;Jlt>$dDXt(CNJ5!X!#Wf#dD`|$Lp^dk#f{ze)5_l*lAw(8K_Tf7Uy$!goh z#0xZuFt^uI1tZ;seKO4DNk=J}cP5BT&CspK{+VHT3`0NNRY(JH`(on9i6GD_v8|N( z;rgq5p-g0*HCtzI*r}Dz*QaAWxW)u;;euBIsT6|FWLqel}7Sz*$AffL)gj4|-F-l`OqDW;+R6VTK>El_V{8j)3?WMIl{j8-^ zwY72z?;{7LW6MfX2R)+|k0BjU#{ZeohnW|*Hcmj6`IygCgjw1!_y_rLzu(QiV7Ych z%A-0+v!njnuUuvoa>)c$t@hO>ht$&NHmIWeY2L`wE4a@a=0k~+$E)tB&rz0WEoVZv z6yem+*5OZhy3Vg;$8s9b8rX2y0-yaz)VM2`Vd>vXNen|NjmND{wns@|&r|w!p!43z z{Pekf#u}Fu@FXQtH*(OvCJ`jMiTQ-u!k^Sz@G}aZy^@#+Qj!uFn*?H3E6Gos7h!^S zaM2@Bg~TR&Bj;9ZXgdiT`lhha*pa*#6XcYuE8Kq`T=($+w(5lVrrvxOVGzpI!S%qV zUH+`&&#Yn6RY6VZ(LVC*_R8(EJa^xx4x)nH=3YiATI}C#a;K|fGGjD-@2k~v1VpXa z-bPm+tSYLcQs>FUMB?bBiTKyS{H42cxDgs<8cYk#PMPr}3t5Jvt?7k^cfN0;HTNjH zeoboHXHCWDYu$KudR4LLHeENCnN*}TB_bxT`o(u9VrS^v$l;i$UIGqZ;wkvRt~n>& zANCg}D)k#27tm3~k-h#xY>uoOwmXqoW}U#*Nu6_I)Oa)r(lT>IQN6`^nPr`h2AU(v zylmK8=AV7Sx5*sdD)cdAv-t{64ns+gRVs>{HHac|F;GGjlt^hjFA*!-KLxMs%J9Mc zjNJdGCeoV;n%e{sJQVqo`Efofb?7T*gJ#p@ZE{iidIiGuHdku;$d~sMf4k)$vc!zlu~)9t~HsAoKW82c#x|D*Hrmmrx)&7Jz>Qp44}B>JMAXX_yP z(YGTfn5@eVJ9hmOFOx((PxLwvN;8qY^;65QTkQ5{{kN-pFsT;U_jJI;ba$^Os{Jp2 zIZEMN7Lq2j80C1K`6n|jPNVz^PooYKQYavG?VV*gBL8MK{wEL|!S&(zzJerP3>@*j zXr$M?XL8{^?ybIZU{smxZ#0mtUQXQ$ARLp~{>bRfXL67df+sp|Zc8`=8`S7V3;&Yr z(g?$c2o2X#ul__?)4(T~b0`1}-FCWEeO)f(MG7Jz;UQkc_nMDctIkEpVn0O14%K%~ zrb<<~OxQD9Zkx)5hHoJpg0fgxY$>dv1KIgbi%Z@Tcn9HA?d*WmD#Mn?u!ObGbWzCu zINW{0O-3nMhSnlcCZ~nQv!k^hJ;KrGTtNYXDdvX8l7CW+g~sUBIeS{$oDGvHO2hHm zJ6qsDMBzsw)0*<8XDJ3vinf9t4y9$plBZBmPFKXU@Gbgxb@*YQPq zZKFb^W}}wN2#;iMJ>4u&0*$I$S{7c~-lTNdoxs-kw&I$S5~}dt@>q>3>YI$o#)x^u zP1+1d)3jV>t*{y==sSTq$;RV$0zthFhiINS_A=w{gRg(wv}d{dq5~ z>STzj;z*l2fS~?+=F$=$W#B9vbM=T*ufZfOyqj;QD(jZNfQ_0iup!kMXG%0Jrm0eM zun=olx4YK^E$sYIS&m@8plkMk@2fyX%AJDf>&EJFT#1piF_Gu9I(>_?0LB6tQe97CQ+5m@1WD>b5;LbAfU*iKbFt; z`YbJrXWGz6av=hM<%>OzLikldX3R zkK*gxd#KqoqNRB49xL~|Zh2OW5Ul7Dh&YsZeXi^%fI< z9&o5q6fe2DN$3~a*pjD_S=By|`Xt7j_vk{@?74xfQ1g$x0yhWUd$N&*bb73|@zEXd z=Oj+XPZ66lv8OP+rQsT%)eUdI#&eJQv}#5eyBM*;1Ty%&%$nQ;dBx6AwUbko2 zPm0g(zarPXgsIPYb)3ZnS%mOc_N*gcLErqeWZcCQJ4^7YR#9ZAAQ}-KKGKLRl_$+l zsmQm5Vp1g!lox!u5sH5kxK*e`BtvytRJijX#^75R>qV+$HbT;^{<+Uq+>D!WU6l!+1H>ZYy9S^0wQv?Xqf5(P=!I!+WekJxX z5SJG7lqY$W8HL|{-}K^EUsg?CzmldBUQvEZPt7?S=yRNdL;6;IVg~r)p zMJD$P@_#B)BhXS6#PJGpZB&cVdivBmfuVLIkzKY2aovp`%V0E>uF-VGg0Qn<=R05- zo`b7Em7&6^wQ4?}-BfRY(v6xpeRS*V)x?}J(V+(e35OKU!Xa;a$bYl|tGZ!-zwfvf zhH2Rxhmms(L+;7Q(;ms)i-eer@4a+mWWTH!fc8I9iXMAwN*XqN9*`DY>D1cuWRqv7 zP?Q|Blk~0uN(K-J6i+=O)ibq*1MBLFR9(&l&oUXL8o7 zIZ=0{qP~?PVgq2R?$kdI9-42k!MUeGVV(=&CZ9?Ia5o>@qPi@7_FU>$T_`|Tv&nu& z`YpOrvAM<#D(64lSGpbmJV66fUMYp7rI;0yzQzVfF4?qk8cCk(ZNbTt-fNTf8ju^Z?AgA0YS>Za9dP^ z)oOz3QZhcj!i7XH_6VWyBHCNmeU$L1*!&#jZbYR3P&DQOc}S4#PcuLU278*<53V2a z#NS@|qmXn->+bGuNoNCEsqx?@hq=g7%W*ckpJm!uDg4>;6l_M%%S*~%EdHbzcQiNXO`Jv7wV-hxdQb6kE^eait-8n zrKM9?q`O@}N$D<;mRjjXWC5kSJCzm?q`Q}H5J?G1X+)$`nmc^&{oQ-c{cq0!4*Sl` zJM%oBnrlt>Sx?XNUG`5LS^@2rE}5B*zazF$3xu!F-xm7eALI$vbSSl9cq>#^$MqI{ z@%hu;G^yRX;l<}WU)g=Rpd$ystD2)DCtf2;y(ha{AKrXpvEU@*&Y{m^`BCT~(x)%| z6d~$!oI3BTxj^_Pn);HHbWDlFeb4kSTNnD^7l|@Gq1cY0(}){C-s?F@5v^uPtE)8Q z`U3Ngku>?Fems&Dv+rLu@yi|rW4(7m=joAqMuKs!mCtXov;EP(EE~xgR*4b{yw4@J zdM72)1zWvx5YNo~ZR*t}C(gmoH_!BOJ~-iLr<`=;QwoToG*bo&#G4Fu@+q{J0-Up& zvD}ExN;w>MBnVT+a3`k)fNzRq+FiLH3aO<2zIpDCc`MH>z7Z06>R%zqX%DHQpejX%+zcrU7nXw#VdqZ!N>Uk~gRzoez#%yfnohYpGJPg06xTzV z_>EnF4O^Nnf)oWFJKa!#--h?)WEAd8VhF%}a8& zI~&G-=D(XT*Vg=_ZfCt%4y|dMCE?I5I(IYgNbv0AMZAx#DWHhE@x$g%WIqBSt3lc zuwmrn6nr+4V5=m$l&g!Ns9%kDiXT{AoCDRMA5vgxCwi3k)u}BqH`rexu_xyJ(L@Fi zq%bZdveYpt{PD~j7y=#;FZgH$We!Qx0{COFn3WQPS9T`zRZ>!^p8oW`+-s`n?4z+Q z_WFbdV?|a&g=jsOOPworf#9%7rHP4!mrNI_I@ZP^Vdo0e+ih@nGMg6+qj2o=yuTTI zB&9E2tgD|ofy7Xjw%(~-GD8Ghw|l?L%y_o`Rh1SwB*P#{4z>!{G4_HAX6hLDNlj(-D#eStnng2u=c z&lC27qJd5eyW<2J^E;C_gcOUwz_z>b9LQZO0y;TgYcm&hN;!-wqe6= zZCnaj{VTWoz;5zLWGFE(JSNHxDlXni;5pU55#f&@n7!B zn+7EZ``JXYK12x8U1!}b4Nw-;H^H2WoFe<>Tr183x~xGRuKwfqhw4W#3wKK_=m-wc z?Y99TH$diLiHkqccRqAV^X){klOgAj=L!CH`E8~PmnQ58v+up7acZ-#X{z4@L?K~pNOwssf%F7|e>{08NP1Bm z7?V#=cQ6sB2g}A6CyLJ>qzCiM78hMeo$BL-ph3FTts6sznY_Yy&a;m4{Fzqv!nxqL z<2T?{rI!ODN68!J72B2E52Im&uPzEclxd_X$|!_knn-qSnNq&~f#te5u#U#t9-)pT>yVP%GA_x#j7~+uKUhfBeP)7VD|QuUTX7yf<5U1(`9e@#Eg! zKXPAj!I8h(sMayr(217wyHL5T1UmM+Y>lX4`q|^dx(PB1@7oe&c5%?5vA3QGN*Ir_ z(3aZDpATW@2L&fYh59Zjau;m%2NT@#wWgw!=J2jF3d@tpJOu)q>(!xdXQ@&rWQ57` zDCQFtmAoGM9*NSK&I7ZX+Hs8B*pcjij2V|oe%`OBm|yfF7PJ~5+d&5)DfutqD@k;l11pB>(KF6^tkUpZdsLY)UfhiDR! zXM>D+qLEA z3ut7T_@{~%xsV-^AhM()t{X^C_1e<}?%R@IG}yA;JhDI-2yHjo>HT7XBM<969|$ma zjl$3|7uM*Z8ps}T_dKPAGDrh*`j`9%G#E^fV%fptKX#QZ`@L9?_Ar^0ef?9aB$d=4 z`V^JcNKTL-0uApZrGiKlGAfc&XC&X@%h$&tk{0IHb#Z9Fe1_$LkxH4N&&C z1cw+VE>=!}^an*ns=8EEfFkU-H*6)G;9LBU#$hZH3W#=fHM#sD)o}f2wgh)4nvZRl zTmuO$JmE(>&?{O-0X!p;T!(Q9`tX2E{}!sO*vs)GzPB@Z&8B<}#Jo1hdyRVaZ-Y0T z$Byu`@Q2L16;U!~;j0v0ZIhU~>^ZYo)aSx5dj%~7i}a8WX9noI{2j!Uwiz*6k*eug#v}9XN?J#=0;xL5VyPy zG{S^J6GBP>%UPds?1b#C=tqVHEnvZdclof*_oi%XRk6QT4$^9M0!M~oDll^U4hi>Z zzvQy1n@jetd|-qkkqHSsaGtCkiHolU%EoODr8Hu}3RE-Xjr|;EO`rXXj1~t zO`jlv3^ttY}iKK*SM&114;m$Y4okQ1Tl0m z)8CH^mpU3_Km@RYT$vxUn6hgXdq*xU+h2JOH;y#|i zzwnA54L@jQ-5l}Jp-XMPSi+tlq3RJtmLzEfdEkAqQ?)g%;t6jZYD^R}T>q;2i>1S3 zo>s6u%3ev2l9d;e?}x?V@(BS-8_B{uLRSEQ0br2FDub@~&vf7sKkI_4nU>`g-%6^K zXcsTC>e23EQKW*DoFBmD5$4$6J^Qb3`B7n-iI(O{qDiis^1uUn*DpI4jsn^8$H|;m zsqqOBh>H>^Nfo${2*&jO&4z&ad^3#@o-OAGJ$g`)&FcX$`D2XwAv9amL%Ju7ASFE|>$%mi`zL*#YOJ2Y z6Tr(_1T;5+jFop}2!aoVoqhZ(fv>$jdXVDJ&8-lh$hDEiM7fsE=Q<@=nQE@Dl^crv z@E-z#mynJjoLA1jZ32q*n%{4mF=y3Hkp-egQ;WP#t&-9@1?=(o)!9y;7a+?5qVh^j z&x8wuhEj}u^*^hoN&j#GGZ8EimA9GgclQN|`hFB^6|y~hmWWNp#fG9}B7vy&zBwA0 z`XckF+er0~Gbqo~pM})w5dAV2PEltuAJr#9hmIzgOV{UiGJjz@xV`0oYpmq*lWMJ) zEg#ew@F9DqVW}OKp^>P(bgI;NM}x-X<8e9yq-~ zz|pEDX{Oa>BkU)FV-%>l{7KQ^McT#0fGmyyIyw1)I9do@kn2Lj=J4xx99d}4)Z?4W z6I;L)Ufeniv6#F3V#3JY2T$t8U5NrYFzN`V0@(ROE)dl{-g_(rE;*0Cd<+HYdgg&0 zyz4Y5DCi9aG&XAHB#)87D6siI*J_6KWqWQzlcrT9t1eL9XSPcu1@jwaREnmy4O`G zkiv*(Kls0P_El{homC5*J4t0U0J){OL#nx#V9xvF#!X?Sg4XNd_!&9zFyMKeWe}VE zCxiz|<{-30w(x%RNeY2gqU_H?PTlzwkW*U%#A2&Q5-?4_1dJ!K0hv7oWT0=yL+y$I zQPsamrSf77H{N_Mt8m*jhY z>$Zx(AHn5&X5dnDd;Jdw>4nShXdd%2;|_6pSp-)Y0WHf1 zuc8ors9V<1#TU208$D8;G!$YDWWJ+`7Hjqj0a2zSfU@yVC%*SW$N4ixvr<*Zxw;s) z$ldVRtJ8AJGWR&vuzBGbRm##j5MWGpqNi;V36O)}l;P@UR>N$=Juq7ivg}X5#X-9* zd|Eci551EPrrt?vrsYuc|7!-{nUO};c3Hj? zW5D79I;bj~vT9y7mvS<%&OSSH8sDI?prQR=EfEBnSnasKJ1u7_-e_0An$T#oYaaop z0Ki~qaG>NTKucuq!4_UiNtA5YLliut36B@srJcT2@Fq-^WeeM z&W>70#O!HAp6z5a>0{r+z}ZT+2U=|`y;UvA`sr7CpN;}b!3xcN(Mub`9@R_b={_-n z{pGwL0Y2=aZ^gvC&Oa)DDwbUKg)qGwQ-7$g9_&`c`9nV{YYVYQUPV0HwACJe`H(CF zJYgcL87;vjKZ{5i!_qf#G;4nvtA|zG7H=#(DLPpB!RPYVy?O8!!loVF|0+wyP@p{D z*h=+_NTZ|XXAD)_Uc|Qaj*!P9e7?~ugNGCc5YP_G$8J=Y?-o50oi{x)9wHk0@ekCz z+#E>Pzv~`S|GnM&r~RcyB4%~W+9|t+O4KXvh4&SaPT7@jG;nl^(61|u&-tmT<7YWW z2^jZHoSN*Kvy6P%PLiKY(;C>W$9+=llhS^w-vZ^`yJAA1g(M$NP6hx$ihAjb{I&Xy z>7L)rJboj-%^q7sa@$24Q(1S9NJ`5xk)h)yIcQ}JD2N=%S~Ds#ecxC`^P{J%?}$j9 zJM9+Ji}kz*K8=sH0IP}#1%#=vx3LfF&2WH7BKjA?Nk%fJ80ERJjR-S>%eWmTx?Tp%L;8t)b)aF`KGc_K z_Ss)#Yn?Km7AS&~$Sf=sraf1`T zD!FjD)lG9|$xIm~4veP}GB(GPiNkk(+u}zrx$|$+kWDzlNuW$W6j(4`0i)nrb-@=8 z3DsONk%sXK^P8ux+K;8yYaCAGuRMNWI7!gG3LJ_p|1y=_g?R_Pqn)zU--8fNxpZP) z-ITt|Rg0$HJHN0JBIVQwB^$m@@V)oIDF1auFjSX=7=U!4pZpc|oqTiW$8Zbi4XxOj zIUZxuLgkg+e8W2DA>Q2`FD=g*_!6y6VeEuhOcNyx`#8HK3G2eAi5;Hi>tAH9^zYlk zbQ?f_x1HgGY0nFJLU;v0ak2LIsYSETgP^VUL=PSiH_V#tO+3&=JG$qOu6>cHNDD~~ z8LV;Hdi8gvG8?lL?>TR3dKyZ!#bDwO04|0I6euPzbW`EAzYe!)a9Z!zTS2J`yM#QV zB_uk`8T2^)lYqoxqX$Gu5QSl|u;P#*bz#|`bwH=6k{Ierg$@zQ()ATAM2hu4|Apm` zQ(J9tmOlP1CtG#_DTSr}`;Rj8QuCV^-E&)cZ-$!D@OlADSNy;6L?X`?Nz*ea$A4h|5?_mwb#Y+>CF7+x{bJO+taG{y1he7<#axF9|;8H z^Tih7ZOv>BdjdtuYh>!aF$vX6sm{TD^BHz#)-d#VZH}y@7iU+=;tmV!WC$B(F`qRy zjHJN_uFjKjAY*M}cUwxkTz)UL`ry}-SLGC!IPk$#_F5!Jc^p8K+if($j8w7{0{t>+ zC-JT>9QNO5?Odi-_sn1zP<;gFka|;h<>W_UKdf)kM{nR*dtLkQrPvc$^Zi2)G$JjO zKDQi^IN6UGwSH|HIv=my;`X@L8e?y}C8_r*Z0V9qk7J%9<3opH@ssJEKInvpx;8=4 z(Ve_Z*@+$|<9!lyAM8_)055lZ_CO#GA9Dx}ge;xJ0olFhs2%u+P$-a~hhmrK3y$XR zGDJOU#&bIF(R?|8PB@R(yuTL~7NO_vj<~?RgYtaJd>*ECCl3Lz;-{msK#t5irk2VhWuuGqS`!eZeYJj3)aVfJYl%8P>+}}5n59?G%HZYgO4fUu z`*-uc8xqK+#4uOdj#p2fQJ}p3HiiU!AXSviQJ=hk_7Q6?{H~w-TFQn;F7V2K=z(e{ z{c`ik@@!m~1v{sIl#MN^C?B|Z_~1^qB*NY>R$p6X}j)%ZGU!$;Di`s z*HPI{7Lcx>Y1#PM_W%uMNq=ytr`iVJvWqnpK@v z_zy1JIRLFT21dQS`lF#)LfsPmS(|VnEOYDgIgzW5(M5cl7 zQWpzc|7JkcRQz@E)uUiu8%wks(f`o`j8p=4_uu}~mx2!*>caR;htwhm_Bh8g?=2MP zUH=eUeoxea1t$4&V!QybgF$|DS`DTIwZ3nrsJyK8f^Y9iM)GWz-1q#G3-u+7E6Iyi z_jAXjtvGz~r?`Ou5x+!VwrB!J8%`@q6EU1MC3Qq&tbL^wn!X6l>Tq$O{Q}ze2pSmS zZ_)(!%xxT#>t`+ZufXx*hz>D)jkXdPZWhj!oEmxTB|-J zN2?RNqm>Oi<8`_SD^7h%ks|C@MmnEFIa7f>;3G6Mln6Zh_;het$q~T^J&(ntX z?~-5Y;SECa?CeE#>p;zIBU!EwS{q-ceUmoVQw7Wr{<9#Z7&r6=rc#M z1>XEIF^i?KdMCGt3>6b_&gw{i5FzSo(CDQxzHnYlcr+GKQ+N{XBy9f}XrEu;!E&1X z<1ixuSn_kAkAxpOYQW=YJMf?Ht8o`|VE_c)yXwz57c#C){J;1)>r7}Y)uIaR3{-8s z@J`xe8EQfbL!vehJu_!L|4{bSg9HBtGwjByqBYk6%=y1A7_JYaLE2A=ag7)pq@rHK zf)28fv6o__`a#trX}k#|{WKatw<)_(CwYF29K0*$q2+H_*g!j3&zWMoC7~;fVi2Ua z8ow&?+_Kw^f$Er2*tt5ixD7Kbnk@++5OZxbAJpjLtk`BuVMj)!+zFCqs1R;_vaqo2 z|N1~V5iM|&LuJbl?faSjV@r5|68UjaCxnIfp|yPDpwQXZ#0Y<7A3m#-1Fx1%L7GhQ zfo0Y{U!1`1#>!rg)V+xIZzgFw2sdOp2!6GO10~S6PY`$kFL0uPY_G2=Q9xM3hx0Pv z0K{en3r@LeCoELX7A9(+-DGcm`3&LyGqTx{4XG)mUiUALbYae!Zs?^Fxrmu)7SbJs+K;5IE^mZo=3GCP3-m zm%p?%vfK7X7FJn1~DO+Ixzux--fD$u~=~%S#xA_2RiQW7B-?2 zdL9XDP_Y9Tq!8BRtN}1BYLCU;=Rj5R{`%bE!Mru-70*P9#m2j@h%Xamkw#BXuifhI zR2bWX0O!akX(c~g$`RbY0ZIF<#iXLV8M$??n8drK+;?Ia#Dc3Hi2H|s2=bM|vF)An zI3N_7lLHo;*&U*GwQb+PIDEJG_VMAuK4EOe@>|im^=A4gV5WgG-9qFCI4A=39FQTs z)BS>WrVU(b*XMg@2!gbARZ{;UK${nVoM0y?qqltmLu}frI|_o%>*A|YQV{@28Hn;Z zqfj6jUnx7?ocf)15@OyWLr&Rz-W(cP@-*7w!JLzvFy6bgKdo^=heGu+`0SJx7({do zASn^c9(XONuuMUN=|uhq2`reIdF6}Q4-fUaSuHy3&>c-TxT#mr+B7-2iwua}MQqqJ z^+l0knZmU84(#x17J4T!e@@I7sXMY2Rx-8o*if6tL}jnv-YKNU6em^@`+MjG*&V3I z&O9w#0TC57AFwx)^BXorJ0!l<)6ZDKj`DT)|73mo*=MP;k_plmgcM@4CL+Ne4_*<= zzF&O!58q@GsCD5;nIyknk$0q{lUoOsen68jk|~_j@a`8!caTgA z>IlKSyvNlkJ23ct>h9^0%K)`gyE}%Dl2)}`6xojlS8!_l##!6Hr99A=45T7-fP<@F z8`qq}ppNacxKH}|{x8m}&NHAJ9$|YocF%7rno`iYPxb2Fy$pT0Gj7{`iIC`@1f-L_};Qve*Y*f|Yj2HKA{h{Cu}Yy(rDC_J=`if%WC*v8~JD^Y$&VEibEmv5tQpSuVq9 zv>9sBOmCN3^=Ujdh>8sxE2zZ1XTu4@aR3`ij+y*>Bm<|S{_Whehi+oT$!*D_u=U@P zSB=Ah0Hdq(?_qXetV{-Fks^T7RH+GHelza61Rs;8OLcLAUlzPfgJC~NDgE3-zo^Ho|UE?f40Y^R&k zuJfkA! zMVtayI~joGgE>+`U%|%q`v#}E^(`>i=gUSA%Oj>Ljg2{UE8dDqz207GYuTNx?#YJ- zG+GU(@-Fh5U-!*ml4CAyld1y{zPGKM z$?(A}u@)kp*TQc5nMdz|Gql&;;DMW|i^9b6T8m))st`xhj}s(^QZ`X*t;Wbk2L&)J zRL^o35GgNjZcfG|1OvYt;EnKV<1&1G)8ZO2u&}iz>0WR1LmF656D~aN?`~CX(jndG zxe0w@$i4H7`%C(~CQ&=>k?SOe`r6lHL$q}5Fd^=m zTnFg${+XWu(Met%03*eOY!nKSZPWrJeraP|2pfWlH8ojoD99K{6@t^p`Bb3HCz(Y&W0lOR<>#NK|dBlaoE<;o9F~5asF)Mc5nTaX?7T*3X8PI z36s|9Dcd{%xs>h{&q%`))_U=p#ivLl4u?2u)oD;NPGy2l z<}6$`ZYY_QGx=ry05j4$dEtzIbvzPQFfTH83J=X9&mRM=y&#{&+G%D+y)r+|tPKQ0 zc~l`pZxu(Ft35cf{7uX2EYH8}BrjZnOFKz1uGdqA94wy!ro%>uh1bAYZAQG4kp0Z^ z`{SwbqC1|;= z&UmMA)CgrCheR3sI;hr@d3E{3m{eDQ_Ym?^$`C6=WgH)mQXYkgup8`4((Z@dt=s2G z5`}?%TC2prGh6@CNDo(`P@D%=W4Cf2{x%u3#D-MDn_mbSN%%MR6`LICATq_hr0&GM zmfp$_66>J|=BscvS`K~OC`dXl$0k`lt~<9)plF|*C1IW#;@ImXt`E*gxWCB;b#)<^5e{ z+A};crc{2nWm=>sus7pnM>-up8^n7fRo*z?&UCdq@)c!A&A8!>b$zy;(S&D7u}@O; zRr=qi*Fxkp)q$y#YsCZTftGXgznuL8nV$1Q9fSvLfWynscli}g3O7jp)nmu<_j6M14^cb3@t}OKX`zOw4 zz=#fbiwKi#t8mCJ|H-%v0keL@qwf5Vua840lACEt&yy(jC)#sDd42TvTm&Jov-?-;gbYFwTqt2q9!tHd8UkDqOLmEy6&s-yR%j)et%ov5U@cLIFk#<>XI@~ zl8HhJQL%}E814jv4<_woN??zX6eywA?V+A`aV-!0CizDewccwwR=7?msJ*?&6{Ng5 zd64>HLgHHDKf;@Gk~U}2DWh!$vjxMn+5O_a&crh(PcgzzH}v(Jtx|429A|Efd?=kN zg)Lf|cS|DpCPqipdq%<t#ECfn@_xos{f zS#@id#{(B;EQk-p;JZ7)>yZ+B~kpsojn!1d*MK^^X$ZA68-ReurK$T02EQjqx~~ ziz$8%3@9_sDquN;G`iK5zpJxPqv$1%t*@}Xo)x7-cB(Y^IRD7*kY?R+HpR}dgt=%Y z7(O01%~3?HNtqi;`uqupm0Hr6Qs)?0`Zu^Ar_r+>MkbY&9+oxzdL?$($4)}TmMFQa zA6$clS%y-jhqp@6Slnisfo76urplo%r@@bzMQoQb%~BkrDf5O+cbcbIvM99$i;zW# zO^A$yY$^;3Z?s2tLvUeBiW#0g)R;pj!5cPjXm{8v zjYO)&Sr(c>iRfRqBNW$~KBVk(q}u5)4~Sy4Bb3I}myr^C`gUvms(CtxNSsiHP;YXI zPV2bT@(!JR9wq!FjnuSE+(u_8(XMT#%NjDgQC zr`0YM_ELADBy^Ej1wp5aA#EK?!NNmOYO0CWwW^Fjq$9;lx;U5im5%68R)%7Ir>p%puXH8pYf1<6Sc;gb*kpW$H&6HaguM7XIKE)2FWXqy(6c z-1X`0$})-EU&r70Mo}zk99BtwV0`-+{otT+T11ML9!5v^eSs*L@bVR2hU#i#+_K%9 zg)PHVqSb=gw-e3nKDT;`NDO(2d+y#1GI)Yghc?`>U8pS@X)l49m7lJR4b@eI_zDIV z_T7G`AxsU=PmQpIBL6&P@8r=L(WSS;6dP=J`&bz3A^|BRiq^EIWQtyLm6&`Vyq4hv z!I7fd1^k#Toz#YlP9gBF;9lQuGl}BP&l6U5iieFX_z2D@??PW!i4npFlrBRoOfFmv}4f+1pAiqO|vpi!LT51By>~Ara?;B-E%yb0pXFl zLGjX96st9s|LFBEdyGgzgW+FII0>P6M(%P;yU}cE_sVh5%At}?W2Hs;`?3_m;cm;s z(5TPCZc^z#p*ws^61^cf+E2P0GD+QELW+b*@Uy_qYT>KVtQP4?zic1`Yvsn&iect4N$7&!Pi(5Eej&CIFpThL5IZ zF4`K!{`}`)xYXJ$7r(ABH0|^`CMhNX(}XmFZa1&x7yn)ejx7Fuu0Zv5Yqhvr%DUOF z{Q6hb*+^2Ebhvm)k44w?fio>Ln)3EABQ<^fy8f-KVaS$UohNtzx9+p8m(&NiJc7Bvu= z!Y7So7gp7$zAfb+p(o+dk)K(69{o!!CXrYCXC(7=TK)_@sjQoy9A#Bb6`azDGuIAi@A_Au zs76|dS>#PE;zQPl!JM#oQ!GtK*lh{KniFkaZnck;y8^MkhD>}UM0?d zqXp6#tlWIvSHKoXBY z;M?=vxnmGflx}}*AUZpMPJ{bWwkixulo3eOHNBMZhEk-3^msj#upqs4>(Hmp)@2*s zFFolDs-Dn9u9ukY2ji@5R;NF4Hdwio5*1-#BGe;%uGoy1u3oKvh?r&FGF&buCJ-;n z-F!ZnELQjtn)#-=4>ws}m2fjjJzY#!)p=u>8Q6J0@%B9WKFn@zIrmQ@oTP3?Do&5KqR?e)&2Ds$w|WjZSU2ZG^o(UB zeI%=9AyIwXN3RpbM^$47&~fnfn%>32S5>>QBM9Fe5;8_$Nn;GruOEchXL@}U4~y?v z7Qm!9J$b_K{+=C1Rea2kbrAv1oovaP^eM&_f>r0k6)8qKs-dTiiwthdpQ}^^(6G=3 zfb`Z9*j9f#egDV~L>1frPIA2!p8qUW;8k0_s?$s0vPDEs3i?c@vei4R_-UE*yO6e70u2oj|f_D@u1k)yErc z4-n>w5*^Mb&n&_}J~jMFg&H>i;1N#XJ59`R;JqOf{}vSy6T@#rHYIF5N;Q~9;<3>z z)iq!L0QG2%#Yx<`tJ0_f$rv9uU2Sh~FBASIh;7{f*hY%S3GrP=UO6G}Q^PyAnS~jr zf9i+}gSK@K!l#HlY{w`aI^W!BDiX8X4Eucl# z@8HA^Q-1u7AxPn6Ldn=v9=MQO@EIF-XnxGu-Hfk^P|mf+dHavA@m zK|;ABgl7Fl&{>oLVH_Y@Nj?6(k%clp*6l{;OfK{@nPrZCm^0y-kXOrFn!zu7b;LXw$ewDJtcOJqJxdUa`#9yMB#$b z8N>@kk&;yLGQU>rgf79OR(Us6gm6-ZQn)kJs~f!tK5bDL;)rhdlgE^({N~c%%&UGL z{m=4e-BHc^z5QPG>ZcOk+<5ldANxvuKL&9jxHhXFJhn?)zo#3^1_lO}j_9y3kew&kc;SH=^DALvYd~Nju{ILDemGa+wFaU@uf$JPs|8uFfvVJ%-Z2)ePJguE?hxa^cveET z)j)M_ccJ+P2YY8N3Vu5a=hy0@M9^;$`W?P)N(GIPK1H)2Y6s2hI~^aFp+MPG@xrT8 zHw;s=!+yz)S?(rWA~eTQuY=phl>P@$u4pb0ebx$4AzZ;BD&PDBe|?}^48b|p?e`H? z{JoGY1ATvhI&rkuu4$x8RQr7?+b{7>OeAeH{`s#&eGaj+==a;ak}~WGKS4eGQv%5~ z_Tj5~1B_q|l`o4J_E7$SS1GShF?K++{~LRkohc#qHRzVv4Z2$#iXZEw3p&252p9m$!&IHls$%Bjw^Wq@ zGkQ%R;l_$Z*bm50tVo|{E>zKzje#b*!trl?IhN%r@h}W^x`AhDfLR^WgPNEQnfNdr zY^5{b+@-6C4#OT`+qiaVm?YB*(JJiqD0Z@feD@s)RM}kiyXP;B1lyR}Gm}oF!H?{d zAG$Yzp5od2KkE8zp7r`3W#If{k;pe}d?&w)OCzcmOD$rREVEQ+o237_7yz{_*65rm z^p2f(6aaeX{Xk!bJ46q*j0L7Ub770j9ktctF^T(c3yvKz9BRcv*W9+Ov+cO_Q#9Bq zgcMw?s`)_Jb7a1wv5hOn2=qXWevP0W4%0|Yznff&YG~F?Q8-5^Dyo}RSulOmA<{sh zy`aDaN)7zw*b;bJWq1BLXJxCIfTzWg1TvShKj~QAsb-u?bPp+mt@NfK;xW`$CXItM zKMSv{xH@5Q9gN3IjU&5YN_$1g)tw*IKz*%{ zCQ-r#K&LDcP*7)*#^GwBP;ijtsqRFG=~T@zM%`nXI3aCp6bwTBm1zYgTlc!}b(we?#V5D-9jt-DBu1`FSpdYe6G70PAzxo95$g zvB_4~2B6g(#Nk{3Q%&GsGMzL0Whriu!fX^fO0Q2Halm!Q=-*}Z?oWjKOw-j6zNmjy z{&^nX@HXmcfyB>YLSmD{_Mt4+yS1(jwIej&I6+HH%@c)7=4GfvMR7dxAX%yWz zhvM@mZ4}ZgD@ul?=aKWih8gj?m}-VP{HLt)4kdf7v~LFv1JD=(!P1n)b-8fU?CBGq zQ2Nj!a?m>YmT+*57De&Oy>mia5VQVU zD*P`cUoF^pJo|D?pjRH!-RH=rm7j}*134v1@_9MYQRU;tdl~YliaQm}TpE8hQ1r3; zGali7iwp7kA-fCI2w2)^!#bfaz?hnz<4lWdd+FnK;00TV<(-Gl#>z|>ahC}0+a@syy6!^Gd-wD#qx zNFGHmq4XRaS71CC9-)cqX5Ec0O=O0id7vHC8KKpanzJI@YJVXbMsZ%-P5t6DqPq_x zf*?W4znl44zwoYC@pY-iv}MTAKpWaItZG2HKlUh5&;7|WBivWaE0c`d31g?7VY-6*8SEE%oNgPt^_h}aYm1W$V5j$ zq0u+DKYRUkf=4(SZw}jfN0ofPNeDFDY%S_HJ?B22k)xQ7O&;LTNtjv4&;jAIno@|B zmU%@y63dOc&V>k(W5fx*Q(z0uPuccx@vtxqbP`ic!f>MC57I9?QF&+ZF?#W3f5_Mo zKAAlvGD3U2n-ev2xZ+)4g#~%}b2Z=gOQ=9j=XFp7Uj^0=6H<4Ql_P(>?gLz!;A7Wp z5ph)xVcMC16)8V(%?qJ~?R0GMDux`lVg6zCim+@5Gc5h@346mBFF_!kOv~z~VkCj= z7;b%qYfOK?KA{x?7N07U$$RJL6&=ZM^m#6>NG!RSbgvvhr6}Y!iQGD7qGiEfIM8c z{n47d46cSD*&}4IGOm?i(Uw@aLvG19WU`i?Hrg2o&PB}Y7_L9xVPp};{19MsY$3hL z_+v*+B^JJY(?4{)H-ob|yj*B`-mRngm)^}qvV&i^kHet-R7Lp0tF`qM`<)eQW%ON9 zM;Yv{ei-I2fzFUL;L8-U-(*aqbF?;=h~3@SetEGRbMVZyQ|^Miy#sHGCnBtn>HC*T z%D2l0)mP!_jMke3J}3AKkK+C^c4rbQhYs=vqqIIrhIjQ+Em78?j+MWGufCC*O|3;4 z!(~~#_$=j+_qifc@Qc}DvPqyL>C1hhE6aM+=#>}%F}iKrd*KJ2@rzdf$rBML3?_8K z9C*Gu*kHyT3lBnG-jGwxt~+Sw`o$ON#p_b&3x9XNmw!al)IGX8^US?Tq4$|0tea6w z^yTwzDeUW^DrfU}`3e#PnBsBigvpPDu~R<&wE#7mELQuq!{^gSZg;CrQn@0nbcoXI zTc%ESSHK=$iZY+A1!iMIQMwE_>BAF=Vx1=t#d19y#Miwn5rMn={cK=9N zy4?l!mb0dyCv3? zR<0MJ#Q*B+yW^=2-~KrrI#!OAGLM}-%Bn+FA&O+yNhp-np_Qee`{v=lA>L_q<-`%Ok*zQ+5)YGP;tVqKUeR-a$bq+-=v zwU#7EWnr$Ve~vW5HBLI_5uLYKrVmb?14%&8lxzKyFB&z)eLIpZnc8|ai}XR%q^$ap zl%*Ph@#(g`TFu$=gna=}lyiAc$*wN%yD9O@<~8K#e@?jT99-UNJ@uvP@yvo`8n#lp zjk34)nm14J^F`M6@HElCb?XOj5tg-&N;{V|4$s|F^!P>J!)m1U*~s})nOe2t{PMe} zH!O5n%~*cd0N8^$mtjo*Deu!Ka%_(!ob|yid?<;H&*x6o6O59{dKgcilt9jXA@+)l zZQNSNkHj~XkNU3b;Y<{A3?*F4Pq?kJ+}g4+5jK)4218LFQs+c`zO63{8|YF-rHYBZ zyvqAdFglT`YcKw(y|Soxa3sT3Q}wg7-^5Rs4EGz#8kXE@D|%*=pCt5`yU}`LIk_Q^ zx;N**A8Y;PvS-epT%TMG`r)l|vgw^(uSp62^1>gc^>pLWXUmDzUWLi{wULA{;O`$^ z`0AL&Kh`O;Z+4aUTq5l8N;m)fX%p&8ojPE>Ja}3=xny*s_e2YEgq1%k zGB2(>;9A3DY5c-USww=D>+c<$zV98aFCBQ-QYPlq`lh&Lt|LohuqN3`bg|{L7!6Nm zlN>AXR=lvn98h=pd&v%~Tjyj}6ZQOOTtjfu>+G_)M>9RkV)+pMoqsa=F~l9)T8fRP zkc>U0EWNFm{P1jQa??uq`(fE393j1s=OBexiM})jk2!_WyB(w>pmdO@?4n(=JjTbB zVMBAuid{ol;}BeJx{oe|E)_bymkvBh>~fW#K&_Xxx8bp(8#R;M1t6)g5zODI!V_{3P)_UoBW3SwEwxN`YXHD{hx7r0e2~7JL$i{fZ@BLfv zbnjc)2L3z`K_0@{ud~gG;Yo)+ZFfJ?Jft&Nds$~xlPiV(tPgK_;gLG=;ib3B0nSgF z9W#a(WJSBz=seFmQ3>v)8BYGF_$4gEpfK2}`m7n6Ks>YVa~>%o=T5)Iw1VKE7Ngn` z<_9_(?E1X7(d?GIy0lXko1WNG%4K>rX@)0Zre~`;?|H7BstSxKp7ombSHX;U3jWF$ z-i#NTb5=d|TKUhc*Gh}MtJn0eJApxd2U=^53ydyR;Vma=2U$Pbh?DtNwxz0=TB(xV z23rvw!|GIKK^vPHlX+z4FY3>Es!ujNe%)|mV>~+Z$~<1>9K*_&z}`=R2XpLt()6c( zNXP$VFn@Dp$#@K$r&ys^n@Sn_TTyusw+{QlqBAS&pZCpvX?D6JLeQbaF3RKWp8CxS z^prvQ_cj^l`P}x#XruI>ZyGP&^E+tud%$sR{CG_7bkbP2-WfeZ9fv=i%O*WE_e73P#fy6$;v+2HAuI;{E4vnL{WW4C& z5FVelbJblMTz!n!jYLW;f?~x>yeKTMi!1BrUNPvplYxak#>q+d8lE{yhh6oO?O~L3zMlEU7Zo0nZsJ<&x*6f9X*vJ?vN9VB#b8g0~W|9}{ zO7_0#^`z7Ne7M(68`#JC)24StRa)d^S-+~&20@nYn~2CKPde$=xu1&fW+Ub@nEpik zZYD@|VN%{by^v0$)o(W~6SvoEhx7WI+D>^n-E-{VA8t$z?98lyHIN zWI4`>_bXDbbeR6ztqTE?gjZ%xvW}Vsrb%q6u0qURyOT{IKf{)sf3`D9n0`zVz++T) zcn(rpJ}@rh!NVC1YM|Z=)RZ5kVO)s>uYzr+8#7g`${L2KHb)v{o$}B3_IW*i$ky6$ z5no6sHO?jaX}vI0&(ar@j65}kiqUP}37Tu#ZHt6V&^*q8O`RK98yH4>k(a(5&iIxOf`=_J6!xN*1C z_Phe2Gg6)gFP!B@05e0ZZM@=aNBhr*Va(Jivwt_;??1!fx-hOsD`sV!7AH07ev9DG z>jxau#Ipz;+i^;8clt#q_;iQo@Zh|H>OrO^JC)XSXcBpr}%rt_GNKQV$;LaQU* z^%j?=H51BO(g2+D zMXAVRdt!={7t^zur@Sb6x-fgoqhGI6XC|zxgPOGgYGF~g_41!q6d0&M9*u5RQ@fj9 zG}l%e{|~+wpGDG+PZZH$9OQGKWJsJ`(RiP}7U5x|d}O7}>~G)4K}s6BR~fQr1P3nP zFxK{b@nEifc1O0jIcff~PsRl$UC&y3<=3RFhxu2B%>`|b(FW>#->G*uLcCQNEa-SL zzjSMF#WS8+0?WDl+e0T| z|VJ^q195Di+t2W2Whwe6po##V1j_fgJ$ zT#AnUlxTjRv6Sf3j+>w9HM%=L`5c}4P;cgqgbcl2&d^^aFP)c#xQ46E>_P8-mu}bD z7gv;Y%hPUnefksD8g%;N{;XZVfugFc;=l#R!HsdnxDL%nQ8IFG(7S`z<-jZ4`2qUQ za;h;66nJ(XAZdp3i6x)LYAVx5DJyXV0g2V>ZuW3X_&z^AV|Mw*o|L zk}7irDjsUwNa~@H>z=S3)Oq*OT0rN?p4vw->V@Zff(y?VsM}5Qhr1q%bZ8vr+(<@- zH3)`#NH5X46rRUFne2NCLMVfm>c@Y`^(2T!43!gjziL@A=`O0o%*B-cz3yKYCp?l( zlOoYK^1^=Z$=zFzjSe`xclOR9YPXPEyoW&;bihov3d!OWB9Ecs_`p~BCcSE{uz8d; zlU}B@a@#xXFI9zF^h@duJgMl^nfDd$Sylc`gxB2F?5FfoF12+!k&8o^icQ9ga-)Xn z)@XL)W4+(kcC}R4zfD_p?QXv-;kR+5K1%CIeS>|2Q$Y|ii|&ew2@_C?yp4x}q>mBr zTVkBeX>F1#3M+A~;JmW|h1~6(+jkBvmod<`QZ$%p7Fwn(_$EIFv3pW_*GQapC&@%uW|-Sy`+~yU-6Pzzvb}Jc#pLbr`#7;c27FX^++jG zr>q>gv*8bsg*v!oqleupWcjg?Xv9xF5dAMFXtY=7eMQwzm70%sdqwNU?2Zc%wB2wm zEDomo8C?)%^r!ofTD9_Wu`yvOknMhCr+u;Ev-j6DxEa`jw^@tLa;)SRezw@%8!9X4 zH*Onvr}!Q#^5IfxE}L3MiSkfw&4H;Ko zWaHURCN11FrcP(zcKM0mLe)A4Lv@)>Jvp_AU($JOjl9P;Y0VjQ3{~oIFStL`6yuz^ zu2it(j)Z!mXx`E;5GhSmlS~ z$>IsYA&L_m@63)vX7UMzMq1!}4F=%+f+-tYZITcwD>&TpZ4A|}*_hF!^NZ|_qNRII z-hr;2=5;D)FRNjZ(8FSAUt*b~-O7ChQ4C#1KII@);|b$RR!xQ9$7MsG>fG-lT@QHa zT^2~A!O$ptEE%W%5@!Hbd#A{s#qSZrU$6w>X=mp*KFyr7Kr4SQ-w`|`&M%f9Y~}d2 z&a(+BDpdTMb)*Hq*CKHJ&rfQ;?Jh}t_0A3pNlX*Ps+qK|Z?CwW#-<17CoTo)BsT9F z(M@Bf>{dQ1P^XxB)3hw&AnyLMYVSHsGRwJ1$@va*B0er z)#n(qiQ3xrB^l_}0F`sptZt{%$1Jz?ADI}7J_&A28XKy(QE5aJV_dQ?vUH#K9O@nO z{ycP_E3ogh9hPMy=h_IC!aSkh?lU#3n1ru_c;NH?>Z?Bwc@HhUIX^%b6!A$#WBU?y ziq;-w#U3!UFyf6AD0p^(5hIC$RBq`s&Ct{)+g4>mG2s}RMjox^+W4Q9BBo3kQYz1s zS~~;_I*O7EAq|(7YR?#waR)SHXB#HH`QN4(s4LS2UZjwnZQdyhZr>BJn<#J>CA^j# znlPlx;w^DGoELls?NP{6+c`7cQ)I{t8btMLIx2RMMp#OqdaDI?l!8&q5}C=A zU-y&wVq(kptC9t(H*vSP%p*)2c2(}u8U$!<>CmAlL_;iZT~8Dms#s%nF~47=nu<%~ z5)D(WvrwGnv8x$c`F*H3Qk8_unP8oYKRXyu=W&KkG_R?2qj`zVw0CqyZE9M&C>9^v zD!?BS6(tDj_y1O8`V1ts1Ey<7b1@VzWHjGb6!_K*M`+SLA+BOBb#rMNF}P7~_Dj5mw4yPP3DbX^;X|;}ExZJ?1Xmt)4}>JuTt-H20$1Tc z@G{OGXrGUR5m@{PAK6M=g7HTk%EIVG8M16%;-6{KJrC3||HRI}ykw?yGUQ;;A-@Cl z69ayyWz$!Rer?xX%1?cLOYzD%M60$S3d*f3Sey?QaUGRRq?(a1AFg@VwHGTkI#m&# ztv1D|8qi-BPe%3l1WY$k@YObinzTq%esbIrF&_jdbNMI+zA^dOq^`FVi$7Xcuyjav zcv10@a3hgj;K$6Nq-ox~szm`r52 z%89EMh;e`;=f-G!Q+)YxAY!;{M|r5_5@Or^L$K))1ZPe&y>usv;h%>-!(yMLr%dt<2y5 z*bY5!A7I~b>qL`8j$E#TF8VbzAvat?MpR^ zFDSCVa6b(cRv>1~y!3TfwH?clO|j9XhW8ss18JhIc@Vn5s}?wZlj7h1R^4`SwMjjBz1GA9gFuWhOZ|9_`T*^=gtF`Z9^fotI zx+RO(>?t~&M|qJcrPpzO?%)3SBXo|X$7X0E)Ui|}YenZ=r=_DwAh(v^U;AsA1L}O{ zFt85JuNx6|X)^=n$TEE=fGnGI0;0V8(j8&KQdXcd$>26^5iYEBKUWbShmyw?cH(RitJA(gaiuNbDH)(m)ME);xBN>gk@4TGhQ^|sSe-%{R1>Zdt z-oNam-F7U9E{-47uiyYp#RrEg&m4ut28&zb; zzwIOF`L+g4Q2$$Fq9C!se6r#r!~eb8$k(GHh5oZg+PHHudqzw0|DSKD++eu*zQ||@ zDmpU;&zI|bCbrQ@+%bV(ry2JEt!N2=gYFaoeh9aqs^l&tGOdl41(La2!Le?SnJD?>>iUxMywWcs%DKN3#b(1d_y)?jv2s^dE>Gxr zMGr%s(=`T{ht~uC9TQrF9~clS;R30k$>Rxo0Cqb!Lc^#NS%B4M7U=nA_1s|;Jzq52 z9jE>bz~wS^__I7AGYhF{>Z?D*d6NIj;3K__8Zp(p$jFLHRt0AHp3Xk%{SnW=3?E~R zjrV2suE1+RY3&k^2|b^Bv{HQ9-NHnps0jE-u%>`HnD?fmM?-!f0;H%fbTbERMtfHiiVzb1M8D z^EiaL#~JnGP^xQnY+P}tr;W3jkLiwI)3l*`F-h15+|R{{>tf% zNjkamL_z0nh(e{>Gn^JSou%k=oc`uXQ~ypz1+08mG-rd$=E@ZN!)k6C>h{8Z2ynFe z>wo1Q(?WPY2SkGavu2bdqKIiwnI26rkqE=%pT2aRS4JS9?9; zdDG#A%+Gb_@bJ{SNqeP#x0xK|Xx#0J78imcE}}$Czu$aUDMnM{idq8I3~&pFZ#y^09yE~hwk9>pJv0=Z~FCQ{6gh&B`S z2_Nfc9;WF}^d9Y=yL9BiB8XvU(R4=O}!E@ZDS$1fxZgLUgZ|@<&YfWv! zZrTi)?tkii$p7<94?@M>!DJ=5eBCq|1E`XvP9 z%47}~FihTqNw+2eXVgWD{W$}Rzl#=|GHCL+XV!4PG!sDasu>g|p{!S4G@M1uRO;); ze##d@7NsJTEvQQW{P>F%CF_tSZ4K3YAUzooo$AXCM`x3qw|HM&>S0V<=c434OYvI& z-CpB#u;JVT$I9#@TXBcw4xk9~Kpi$@ z#Vul2?O>gjNa#)XVDcwWZWv`$bZdtl&yNvx5~(8b2SB2g+vD45HE7qaPv+!gYj3UA zGzzU`Tf`i8>lV8m{`Y;Ab)Ss}HJavCGo0)lg;i=8F-{#ubm>f25+G;PvPP8!MqIBV z%5x{!*t-6Dc&~+8QWVPUwnjd-Gopmxa2Jj*Og7@tn85BAcfPzD`0a*v4b^JAZ-PZp z#+ZI?bZ2U^DvN$fq#9VYEe5*&cUll^FxDT9XOS4Fr_`er9Q*Le$IUuE-QvnR&Wno4 zvDg%Ku$;%qsd>xhXWWc)PT&!>L6O5kjnC0x>uG7Ord zj$s`5-<=UNJ@*c{(>C%MS2psyIZA5nh@ZoM;|_`tN_2x8An5tCpy!WYz(d-iFFO7_ zt|m~&h24qUW3Hs^vGVWv?Ue_0eSO;B@7yk93B3+J6V0(~^6pL)k66EUr zG{D3fWMH&mj@-Bp@))OpLFW;_jaEG!-Gqj-^#-{bR8_gd{uDH?&Zef`oq)!9C>67u=6|?2av^v!T~)(C)fbi1a=(lf4+`Gfv95c zHbntN8-F?$AGY{SCC)L$A?eH31X|=b4dRbx+!A=CpbPh^2XGd)xB|#m3+E31)zHk=N z$G0b%*M2lC_X60Z1OQwm!!aIl3n?>i5`i3y1ZZT}1G&yt1Iyzts*qG$g6HnemW=H{ zX8WMQ&nv1^@#(+Z9YLHD!I#MB#Mirv2uN{13nmFNikRNcDE55NZU)f6P}ry_akf9> zH(M}P0y;;@w$9ZIn;*}L1QZ^mpN#d#>Ra)0j+eiEtSM%ogLW43{^W|sVGl|p*<^cX zdo57(Zw^%Y4Fk}R^p^{n3KwzFi$OH)dgbO#vrDxT$$*Fi@MyNW1O4jsO8ob}4>7nI z5+%?>khL@SB*RIjb*}v7uFP~kKz`dWX?qa-H=dG$_oW(AKjYQfnE>0NMe;;)jq9K4wzYmRp0&X2JjG{~dSh;DnE|N>>vhvm}(~E_&2Hh?m#w^btieN3czVd{9 zDSDM5ujc}03FZj^wi*D$#jQ{EH4M@)vt+26O&x?PS$-&?m;a)vsVPLA0`(~Azdw5{ zh!+}{rYj!)oB*H*M`QZwfl_@zZWO4(y{^SCS4) zI1K_>V?4I14bDj68QhVe2ql_8i=|HuCe2&(>}jW5%NWpr6)D#`2dGEKmq%HbVyH&_ ziGIFB<1DW=?G_Z-G3?pXyPJ6&csG66YU{&fDu0uSM**;7^>a2ShA-eZrS?O4~}M84hO_H6pC0mtK>I+f<>YhqEbPX0=ff19n;-R zzTWb-9dj^1e*T@_Y1z-rZpw=~eaYf;P)?o*winx!|NCwZn${ZJS>?^i172w0b3If_d;M5-rm zEUv&lsy&2bRS<(4(}QCrgZ95zr$Vdy@Z@@Z<6*$YsV4PV1%>ygO1CZk^mkbtk#^tR z`T|%W5@+isw;(0<6_?a8tq;pk(tqOSYf{ODjaSD9wRCkOwW~FqAs@jTfX80&Vvehq z(YC*WY=7M{+Q3V}v`1fUnc~T^QlXo=7W7pI@4~zZDHtyKvA)X)huL-=N6a;nD5^z} z5Icd2=>mqdGiI$aCJSJtwN-bBYCJxEhQ}AGHyj#| zLCvhRc{+4$>rNED6iL?dtPi8qb}u%9k)7Q>TJzcIcDnYkzm$m$uFGao0@hiqLOY#pmqpquL3 zzX0dUudMue(I;N=nsSw{wDB)BKjPN*LU~lE-|!41#0h@m>f=$@%_cOuy>TN{jIVxs z_Y|TMTD_WDM|uIyOm~wtSr)+NU-n#69iTxE0MOm}W2sHV7DcE{NvNw+;ESGY@zZL7 zpS;#|qSG)05eHOT?IOwTUwG*es`+;AyC_Jk$hpLS3lSiIbvD~Gh3||eyF=K#w{ME< zAs1h6oe%q7bSJp=XnbZ3Y!z7R%v1vVRZ9`nk8oGAEHWxd7OiGfXCZu}L?u{X^6)Yo zAh)gv&HV?SLhgwV_BEw1Ca{P|h0UsHqz%$BJtTSx45-+1Fd+jfyrd2=l{@^K_x3Ac z>FR ze*`R14)5%QCnW8QBYPAiL;_G*{O#tg&TnBOPiKAh{yfq%1r!z$*9q@hP$KNg50}65 zFFtjFYsfc9^n9mY@tgX7qh^D=)A+Hg^>?Lv{uW+5#DzZ=#U@)n|8}I9l7L7lM8;f8 z4?2HptoqjV?KB`{`$@14{8N&UHTyzYWve9WKgK46X(mbJ?$8{n++G_*bH7b=b0Wkd zUjE(45CgznNP%C_VU72W79f1Au@?fE3AuGnMVEF=hphdFZj#@Dxj0QO?TvfK2=P&| zEc-N#hgHUFv?FRyi*y=2PL*=$>4l?`L8o=3FswcsT`BcqQ!)-CQlKadtyoOT&65z2 z?r>&d-sQ%h(UL~j?i>ozdB*Szg%|-^@o>W9L)~EF;K&G3aS1|X^cFAxwd>uzTTRtp57eV*ohxhj5zp>a$0N;L3hD`DPvfD(#zd# z9j*tPmPjquEYw{kL#fwYyntA{}(7~X<)$VDXk~IQ*{v_5Q)XcrTm^7i%6xA zBH&F&awr;nD8D9|B5^hTAeS4-EsW@x2-^P04?yXTLsUL!>oZ-KxV%L8DMxM~-17;#NE3KKE$91(QhN<*4NNVLv_$*$Rg9Vl< zckkkGpv zfSkat7i(1Sx-4<({vuJBFj?w%vx2%Er9pp~!S{;*K>MD}%rWJ;ieULLNZb^~D`M%e z#?U13#0wFH(_kh`1VvO?1`}b!G8V^Qyu%2%x#Q01189#BplnP1ZFNR0lP=RmXIi{A zV2>m~sRQ`BR$SY45X)Httlw@<&keYw;7I5l29d)|OxGcFL&GYO2%?|8+}CJQ)z7tq zQ34a>?^{VNIGY+ukHE{&>4#LI$o=j}3x~M2QqQzj0wDAWf7CRJ$)iNZCP;VcPN1_H zs?Dt6hz6$hO^MZ`hy#|s*b=C(;eQaVFA>U#VdekRHs(*1Ao}|HcTp9WD~b+4X4G0R zR!Zlq%4SO*YB0*%PfAt696H`bY0{V<2~{kflsz&{2o9&%>}u~rrq2)459PRjAF-Qc zF^_|ZEwkfm=GlYYW`JB3&cF^pV6t3k%HG?AkOMl~ysngrC!iKU4Yf|XLe4OIjDY+$Pm3KwgaKsr#hy!kjxWQI)x6%%70b5^#{cJmR`DlnZk% zoU2o5pd%ezu%6gQ90pYsWR0py=rscbA$JKG;GH z_UtH5xBqjVKLrNM7&89QHp{&eBMb%ueU}JU4y5T}w-zutORuq%BbrYeIRdw;M@`@e z7z#}4xG1wl5(O|c{9ioTMJUUujmOo#j^wC^=<+HtF7VJ|oz~y#NTy{nh8}<+7pdIY zS8}s)Vzdb^m~g1nNo)7SWC2h-7}?BI2}P${j@SW=>cIayt3&W zO&05H4(=rd%~2dTf}VcXYf5%g;V`7D4EP;uv7*3AX$^GU!%fF(1`aa`>PqF@fNC=4 zHf1SOELjB4IEWpf`gY%R2SW8Ap!(y;6Mx`}9s3>ar9GgTS6NNWQ48{7lt>-lTkYZ& zED?d)O{6Kf`%=v?;~nFQYDo?1#zSDLFey+?0e8bG%vLkjT*%Z0l1pguD0V2Qe<)40(6Xjy}o4Y z%wmfAS>;32$?CzO&V)lpQHO%KQVjRpE6KsEXHWrC0!5j26?-Fl8Tv{WVp-V`JOB9tnO!3g)guK}0B56my9n;hLBpUv_?Z-+Y zSE5n=-5;ZgQEp;{g`S7FCngC#fHBT1G zX84m11dL;6E1AJNiAuHKnLKeyqs}W zF@Nw&af(2)qIk=38wV;Nk{gnc72|wE5=K=^Re?Kw)f1hRvgq^e8VfU}(VzTM{X$!U z<5ynmzka;qtDenq`oZ{-7SipXr+uf`R&0t_pqwzWpLgSjIDzzwq`D`c&Iqk ziEMIYSZ(KI8eG8g2qh)B4>dCLolDJ_K3HD@xhaqs1X&z9)GF3V$k13_^kPR;6Ea7+ zoS8~iq10{Lg@diNx|KiUC#`dFImf7;U%c*#>@W#2(?73HzuEttGaA6>Snh4D%^fr0 ztOb7<+9tw7ncy^3ur$&d#qn1o{r$f28OpGET3ep?>*tqQ!_dTsO#93QjWRV0|NjFG C*}4b- literal 0 HcmV?d00001 diff --git a/inference/__init__.py b/inference/__init__.py new file mode 100644 index 0000000..66ff5e0 --- /dev/null +++ b/inference/__init__.py @@ -0,0 +1,3 @@ +from .anonymization import InferenceAnonymizer +from .asr import InferenceASR +from .tts import InferenceTTS \ No newline at end of file diff --git a/inference/anonymization.py b/inference/anonymization.py new file mode 100644 index 0000000..42af2e2 --- /dev/null +++ b/inference/anonymization.py @@ -0,0 +1,93 @@ +import json +import numpy as np +import torch +from sklearn.preprocessing import minmax_scale, StandardScaler + +from anonymization import PoolAnonymizer, RandomAnonymizer +from utils import create_clean_dir + + +ANON_MODELS = { + 'pool': PoolAnonymizer, + 'random': RandomAnonymizer +} + + +class InferenceAnonymizer: + + def __init__(self, model_name, data_dir, results_dir, model_dir, vectors_dir, device, force_compute=False): + self.force_compute = force_compute + self.results_dir = results_dir / 'speaker_embeddings' / model_name + self.data_dir = data_dir + self.vectors_dir = vectors_dir + self.device = device + self.scaling = None + self.std_scaler = None + + self.dim_ranges = self._load_dim_ranges(model_dir / 'anonymization' / model_name) + self.anonymizer = self._load_anonymizer(model_dir / 'anonymization' / model_name) + + def anonymize_embeddings(self, dataset): + dataset_results_dir = self.results_dir / dataset + if dataset_results_dir.exists() and any(dataset_results_dir.iterdir()) and not self.force_compute: + # if there are already anonymized speaker embeddings from this model and the computation is not forces, + # simply load them + print('No computation of anonymized embeddings necessary; load existing anonymized speaker embeddings ' + 'instead...') + anon_embeddings = self.anonymizer.load_embeddings(dataset_results_dir) + return anon_embeddings, False + else: + # otherwise, create new anonymized speaker embeddings + print('Anonymize speaker embeddings...') + anon_embeddings = self.anonymizer.anonymize_data(self.data_dir / dataset, + vector_dir=self.vectors_dir / dataset) + if self.dim_ranges: + anon_embeddings = self._scale_embeddings(anon_embeddings) + create_clean_dir(dataset_results_dir) # deletes existing results files + self.anonymizer.save_embeddings(anon_embeddings, dataset_results_dir) + return anon_embeddings, True + + def _load_dim_ranges(self, model_dir): + if (model_dir / 'stats_per_dim.json').exists(): + with open(model_dir / 'stats_per_dim.json') as f: + dim_ranges = json.load(f) + return [(v['min'], v['max']) for k, v in sorted(dim_ranges.items(), key=lambda x: int(x[0]))] + + def _load_anonymizer(self, model_dir): + model_name = model_dir.name.lower() + + if 'pool' in model_name: + model_type = 'pool' + else: + model_type = 'random' + + print(f'Model type of anonymizer: {model_type}') + + model = ANON_MODELS[model_type](device=self.device) + model.load_parameters(model_dir) + + if 'minmax' in model_name: + self.scaling = 'minmax' + elif 'std_scale' in model_name and model_type == 'pool': + self.scaling = 'std' + self.std_scaler = StandardScaler() + self.std_scaler.fit(model.pool_embeddings.speaker_vectors.cpu().numpy()) + + return model + + def _scale_embeddings(self, embeddings): + vectors = embeddings.speaker_vectors.cpu().numpy() + + if self.scaling == 'minmax': + scaled_dims = [] + for i in range(len(self.dim_ranges)): + scaled_dims.append(minmax_scale(vectors[:, i], self.dim_ranges[i], axis=0)) + + scaled_vectors = torch.tensor(np.array(scaled_dims)).T.to(self.device) + embeddings.speaker_vectors = scaled_vectors + elif self.scaling == 'std': + scaled_vectors = torch.tensor(self.std_scaler.transform(vectors)) + embeddings.speaker_vectors = scaled_vectors + return embeddings + + diff --git a/inference/asr.py b/inference/asr.py new file mode 100644 index 0000000..6270523 --- /dev/null +++ b/inference/asr.py @@ -0,0 +1,69 @@ +from tqdm import tqdm +from espnet2.bin.asr_inference import Speech2Text +import soundfile +import resampy +from espnet_model_zoo.downloader import ModelDownloader + +from utils import create_clean_dir, read_kaldi_format, save_kaldi_format + + +class InferenceASR: + + def __init__(self, model_name, results_dir, data_dir, model_dir, device, force_compute=False): + self.force_compute = force_compute + self.results_dir = results_dir / 'transcription' / model_name + self.data_dir = data_dir + + model_dir = model_dir / 'asr' / model_name + + d = ModelDownloader() + + self.speech2text = Speech2Text( + **d.download_and_unpack(str(model_dir)), + device=str(device), + minlenratio=0.0, + maxlenratio=0.0, + ctc_weight=0.4, + beam_size=15, + batch_size=1, + nbest=1 + ) + + def recognize_speech(self, dataset, utterance_list=None): + dataset_results_dir = self.results_dir / dataset + utt2spk = read_kaldi_format(self.data_dir / dataset / 'utt2spk') + new = False + + if (dataset_results_dir / 'text').exists() and not self.force_compute: + # if the text created from this ASR model already exists for this dataset and a computation is not + # forced, simply load the text + print('No speech recognition necessary; load existing text instead...') + texts = {} + with open(dataset_results_dir / 'text', 'r') as f: + for line in f: + splitted_line = line.strip().split(' ') + texts[splitted_line[0].strip()] = ' '.join(splitted_line[1:]) + else: + # otherwise, recognize the speech + print(f'Recognize speech of {len(utt2spk)} utterances...') + new = True + create_clean_dir(dataset_results_dir) + texts = {} + wav_scp = read_kaldi_format( self.data_dir / dataset / 'wav.scp') + + for utt, spk in tqdm(utt2spk.items()): + if utterance_list and utt not in utterance_list: + continue + if utt in wav_scp: + speech, rate = soundfile.read(wav_scp[utt]) + speech = resampy.resample(speech, rate, 16000) + rate = 16000 + + nbests = self.speech2text(speech) + text, *_ = nbests[0] + texts[utt] = text + + if not utterance_list: + save_kaldi_format(texts, dataset_results_dir / 'text') + + return texts, utt2spk, new diff --git a/inference/tts.py b/inference/tts.py new file mode 100644 index 0000000..57d5174 --- /dev/null +++ b/inference/tts.py @@ -0,0 +1,84 @@ +from tqdm import tqdm +import soundfile +import torch + +from IMSToucan.InferenceInterfaces.AnonFastSpeech2 import AnonFastSpeech2 +from utils import create_clean_dir + + +class InferenceTTS: + + def __init__(self, hifigan_model_name, fastspeech_model_name, anon_model_name, asr_model_name, model_dir, + results_dir, device, force_compute=False): + self.force_compute = force_compute + self.device = device + + model_name = f'{hifigan_model_name}_{fastspeech_model_name}' + hifigan_path = model_dir / 'tts' / 'HiFiGAN_combined' / hifigan_model_name + fastspeech_path = model_dir / 'tts' / 'FastSpeech2_Multi' / fastspeech_model_name + + self.results_dir = results_dir / 'speech' / model_name / anon_model_name / asr_model_name + + self.model = AnonFastSpeech2(device=self.device, path_to_hifigan_model=hifigan_path, + path_to_fastspeech_model=fastspeech_path) + + + def read_texts(self, dataset, texts, anon_embeddings, utt2spk, text_is_phonemes=False, force_compute=False, + save_wav=True, emb_level='spk'): + dataset_results_dir = self.results_dir / dataset + wav_scp = {} + wavs = {} + force_compute = force_compute or self.force_compute + + if dataset_results_dir.exists() and not force_compute: + already_anon_utts = {x.stem: str(x.absolute()) for x in dataset_results_dir.glob('*.wav')} + if already_anon_utts: + print(f'No synthesis necessary for {len(already_anon_utts)} of {len(texts)} utterances...') + texts = {utt: text for utt, text in texts.items() if utt not in already_anon_utts.keys()} + wav_scp = already_anon_utts + + if texts: + print(f'Synthesize {len(texts)} utterances...') + new = True + if self.force_compute: + create_clean_dir(dataset_results_dir) + elif not dataset_results_dir.exists(): + dataset_results_dir.mkdir(parents=True) + + for utt, text in tqdm(texts.items()): + if emb_level == 'spk': + speaker = utt2spk[utt] + speaker_embedding = anon_embeddings.get_embedding_for_speaker(speaker) + else: + speaker_embedding = anon_embeddings.get_embedding_for_speaker(utt) + out_file = str((dataset_results_dir / f'{utt}.wav').absolute()) + + self.model.default_utterance_embedding = speaker_embedding.to(self.device) + wav = self.model(text=text, text_is_phonemes=text_is_phonemes) + + i = 0 + while wav.shape[0] < 24000: # 0.5 s + # sometimes, the speaker embedding is so off that it leads to a practically empty audio + # then, we need to sample a new embedding + if i > 0 and i % 10 == 0: + mask = torch.zeros(speaker_embedding.shape[0]).float().random_(-40, 40).to(self.device) + else: + mask = torch.zeros(speaker_embedding.shape[0]).float().random_(-2, 2).to(self.device) + speaker_embedding = speaker_embedding * mask + self.model.default_utterance_embedding = speaker_embedding.to(self.device) + wav = self.model(text=text, text_is_phonemes=text_is_phonemes) + i += 1 + + if i > 0: + print(f'Synthesized utt {utt} in {i} takes') + + if save_wav: + soundfile.write(file=out_file, data=wav.cpu().numpy(), samplerate=48000) + wav_scp[utt] = out_file + + return wav_scp, wavs + + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8d7dd7b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +-r Voice-Privacy-Challenge-2020/requirements.txt +-r IMSToucan/requirements.txt +espnet +espnet-model-zoo \ No newline at end of file diff --git a/run_inference.py b/run_inference.py new file mode 100644 index 0000000..a66130e --- /dev/null +++ b/run_inference.py @@ -0,0 +1,89 @@ +from argparse import ArgumentParser +from pathlib import Path +import subprocess +import torch +from inference import InferenceAnonymizer, InferenceASR, InferenceTTS +from evaluation import prepare_evaluation_data, copy_evaluation_results + +parser = ArgumentParser() +parser.add_argument('--gpu', type=int, default=None) +args = parser.parse_args() + + +# Settings +gpu = args.gpu # None for CPU, integer for GPU ID +settings = { + 'datasets': ['libri_dev', 'libri_test', 'vctk_dev', 'vctk_test'], + 'anonymizer': 'pool_minmax_ecapa+xvector', # name of anonymization model + 'asr': 'asr_tts-phn_en.zip', # name of ASR model + 'tts_hifigan': 'best.pt', # name of TTS HiFiGAN model + 'tts_fastspeech': 'trained_on_ground_truth_phonemes.pt' # name of TTS FastSpeech2 model +} +force_compute = [] # options: 'anon', 'asr', 'tts' +settings['text_is_phonemes'] = '-phn' in settings['asr'] + +# Some static variables +data_dir = Path('Voice-Privacy-Challenge-2020', 'baseline', 'data') +vectors_dir = Path('original_speaker_embeddings') +models_dir = Path('models') +results_dir = Path('results') + +# the challenge's eval scripts require the data to be at a specific location +eval_data_dir = Path('Voice-Privacy-Challenge-2020', 'baseline', 'data') + +if not torch.cuda.is_available(): + gpu = None +device = torch.device(f'cuda:{gpu}') if gpu is not None else torch.device('cpu') + +dataset_splits = { + 'libri': ['trials_f', 'trials_m', 'enrolls'], + 'vctk': ['trials_f_all', 'trials_m_all', 'enrolls'] +} + +datasets = [f'{dset}_{split}' for dset in settings['datasets'] for split in dataset_splits[dset.split('_')[0]]] +anon_wav_scps = {} + + +print('Set up components...') +anonymizer = InferenceAnonymizer(settings['anonymizer'], data_dir=data_dir, model_dir=models_dir, + results_dir=results_dir, vectors_dir=vectors_dir, device=device, + force_compute='anon' in force_compute) +asr = InferenceASR(settings['asr'], device=device, data_dir=data_dir, model_dir=models_dir, + results_dir=results_dir, force_compute='asr' in force_compute) +tts = InferenceTTS(hifigan_model_name=settings['tts_hifigan'], fastspeech_model_name=settings['tts_fastspeech'], + anon_model_name=settings['anonymizer'], asr_model_name=settings['asr'], model_dir=models_dir, + results_dir=results_dir, device=device, force_compute='tts' in force_compute) + +with torch.inference_mode(): + for i, dataset in enumerate(datasets): + print(f'{i+1}/{len(datasets)}: Processing {dataset}...') + anon_embeddings, new_anon = anonymizer.anonymize_embeddings(dataset=dataset) + texts, utt2spk, new_text = asr.recognize_speech(dataset=dataset) + wav_scp, _ = tts.read_texts(dataset=dataset, texts=texts, anon_embeddings=anon_embeddings, utt2spk=utt2spk, + force_compute=(new_anon or new_text), + text_is_phonemes=settings['text_is_phonemes'], + emb_level=anonymizer.anonymizer.emb_level) + anon_wav_scps[dataset] = wav_scp +print('Done') + +# Evaluation +print(f'Prepare evaluation data for {datasets}...') +prepare_evaluation_data(dataset_list=datasets, anon_wav_scps=anon_wav_scps, orig_data_path=data_dir, + anon_vectors_path=anonymizer.results_dir, output_path=eval_data_dir) + +if 'vctk_dev' in settings['datasets']: + print('Make anon subsets for vctk_dev...') + subprocess.run(['./evaluation/run_make_vctk_anon_subsets.sh', '--split', 'dev'], check=True) +if 'vctk_test' in settings['datasets']: + print('Make anon subsets for vctk_test...') + subprocess.run(['./evaluation/run_make_vctk_anon_subsets.sh', '--split', 'test'], check=True) + +print('Perform evaluation...') +subprocess.run(['./evaluation/run_evaluation.sh', '--mcadams', 'false', '--gpu', str(gpu) or 'cpu', + *settings['datasets']], check=True) + + +# Copy the evaluation results to our results directory +copy_evaluation_results(results_dir=results_dir / 'evaluation', eval_dir=eval_data_dir, settings=settings) + +subprocess.run(['./utils/run_cleanup.sh'], check=True) diff --git a/setup_scripts/install_challenge_framework.sh b/setup_scripts/install_challenge_framework.sh new file mode 100755 index 0000000..ea9e6c0 --- /dev/null +++ b/setup_scripts/install_challenge_framework.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# Modified copy of the install script of Voice-Privacy-Challenge-2020 + +set -e + +nj=$(nproc) + +cd ../Voice-Privacy-Challenge-2020 +home=$PWD + +conda_url=https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh +venv_dir=$PWD/../venv + +cuda_path="Path/to/cuda-11.6" # TODO: change path to cuda +mkl_root="Path/to/mkl" # TODO: change path to mkl + +mark=.done-venv +if [ ! -f $mark ]; then + echo 'Making python virtual environment' + name=$(basename $conda_url) + if [ ! -f $name ]; then + wget $conda_url || exit 1 + fi + [ ! -f $name ] && echo "File $name does not exist" && exit 1 + [ -d $venv_dir ] && rm -r $venv_dir + sh $name -b -p $venv_dir || exit 1 + . $venv_dir/bin/activate + echo 'Installing python dependencies' + pip install -r requirements.txt || exit 1 + touch $mark +fi +echo "if [ \$(which python) != $venv_dir/bin/python ]; then source $venv_dir/bin/activate; fi" > env.sh +export PATH=${cuda_path}/bin:$PATH +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${cuda_path}/lib64 +export CUDA_HOME=${cuda_path} +echo "export PATH=${cuda_path}/bin:\$PATH" >> env.sh +echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:${cuda_path}/lib64" >> env.sh +echo "export CUDA_HOME=${cuda_path}" >> env.sh + + +mark=.done-kaldi-tools +if [ ! -f $mark ]; then + echo 'Building Kaldi tools' + cd kaldi/tools + extras/check_dependencies.sh || exit 1 + make -j $nj || exit 1 + cd $home + touch $mark +fi + +mark=.done-kaldi-src +if [ ! -f $mark ]; then + echo 'Building Kaldi src' + cd kaldi/src + ./configure --shared --mkl-root=${mkl_root} --cudatk-dir=${cuda_path} --with-cudadecoder=no || exit 1 + make clean || exit 1 + make depend -j $nj || exit 1 + make -j $nj || exit 1 + cd $home + touch $mark +fi + +echo Done diff --git a/setup_scripts/run_download_data.sh b/setup_scripts/run_download_data.sh new file mode 100755 index 0000000..689fd41 --- /dev/null +++ b/setup_scripts/run_download_data.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# Extract of Voice-Privacy-Challenge-2020/baseline/run.sh +# +# License of the original script: +# Copyright (C) 2020 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +set -e + +#===== begin config ======= + +nj=$(nproc) +mcadams=false +stage=0 + +vp_dir=../Voice-Privacy-Challenge-2020/baseline +cd $vp_dir + +download_full=false # If download_full=true all the data that can be used in the training/development will be dowloaded (except for Voxceleb-1,2 corpus); otherwise - only those subsets that are used in the current baseline (with the pretrained models) +data_url_librispeech=www.openslr.org/resources/12 # Link to download LibriSpeech corpus +data_url_libritts=www.openslr.org/resources/60 # Link to download LibriTTS corpus +corpora=corpora + +. utils/parse_options.sh || exit 1; + +. path.sh +. cmd.sh + +#=========== end config =========== + +# Download datasets +if [ $stage -le 0 ]; then + for dset in libri vctk; do + for suff in dev test; do + printf "${GREEN}\nStage 0: Downloading ${dset}_${suff} set...${NC}\n" + local/download_data.sh ${dset}_${suff} || exit 1; + done + done +fi + +# Download pretrained models +if [ $stage -le 1 ]; then + printf "${GREEN}\nStage 1: Downloading pretrained models...${NC}\n" + local/download_models.sh || exit 1; +fi +data_netcdf=$(realpath exp/am_nsf_data) # directory where features for voice anonymization will be stored +mkdir -p $data_netcdf || exit 1; + +if ! $mcadams; then + + # Download VoxCeleb-1,2 corpus for training anonymization system models + if $download_full && [[ $stage -le 2 ]]; then + printf "${GREEN}\nStage 2: In order to download VoxCeleb-1,2 corpus, please go to: http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ ...${NC}\n" + sleep 10; + fi + + # Download LibriSpeech data sets for training anonymization system (train-other-500, train-clean-100) + if $download_full && [[ $stage -le 3 ]]; then + printf "${GREEN}\nStage 3: Downloading LibriSpeech data sets for training anonymization system (train-other-500, train-clean-100)...${NC}\n" + for part in train-clean-100 train-other-500; do + local/download_and_untar.sh $corpora $data_url_librispeech $part LibriSpeech || exit 1; + done + fi + + # Download LibriTTS data sets for training anonymization system (train-clean-100) + if $download_full && [[ $stage -le 4 ]]; then + printf "${GREEN}\nStage 4: Downloading LibriTTS data sets for training anonymization system (train-clean-100)...${NC}\n" + for part in train-clean-100; do + local/download_and_untar.sh $corpora $data_url_libritts $part LibriTTS || exit 1; + done + fi + + # Download LibriTTS data sets for training anonymization system (train-other-500) + if [ $stage -le 5 ]; then + printf "${GREEN}\nStage 5: Downloading LibriTTS data sets for training anonymization system (train-other-500)...${NC}\n" + for part in train-other-500; do + local/download_and_untar.sh $corpora $data_url_libritts $part LibriTTS || exit 1; + done + fi + + libritts_corpus=$(realpath $corpora/LibriTTS) # Directory for LibriTTS corpus + librispeech_corpus=$(realpath $corpora/LibriSpeech) # Directory for LibriSpeech corpus + +fi # ! $mcadams \ No newline at end of file diff --git a/setup_scripts/run_prepare_data.sh b/setup_scripts/run_prepare_data.sh new file mode 100755 index 0000000..341f0c2 --- /dev/null +++ b/setup_scripts/run_prepare_data.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Extract of Voice-Privacy-Challenge-2020/baseline/run.sh +# +# License of the original script: +# Copyright (C) 2020 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + + +set -e + +#===== begin config ======= + +nj=$(nproc) + +vp_dir=../Voice-Privacy-Challenge-2020/baseline +cd ${vp_dir} + +. utils/parse_options.sh || exit 1; + +. path.sh +. cmd.sh + +corpus_dir=corpora +data_dir=data + +#=========== end config =========== + +mkdir -p ${data_dir} +cp utils/parse_options.sh . + +# Make evaluation data +printf "${GREEN}\nMaking evaluation subsets...${NC}\n" +temp=$(mktemp) +for suff in dev test; do + for name in ${corpus_dir}/libri_$suff/{enrolls,trials_f,trials_m} \ + ${corpus_dir}/vctk_$suff/{enrolls_mic2,trials_f_common_mic2,trials_f_mic2,trials_m_common_mic2,trials_m_mic2}; do + [ ! -f $name ] && echo "File $name does not exist" && exit 1 + done + + dset_in=${corpus_dir}/libri_$suff + dset_out=${data_dir}/libri_$suff + utils/subset_data_dir.sh --utt-list ${dset_in}/enrolls ${dset_in} ${dset_out}_enrolls || exit 1 + cp ${dset_in}/enrolls ${dset_out}_enrolls || exit 1 + + cut -d' ' -f2 ${dset_in}/trials_f | sort | uniq > $temp + utils/subset_data_dir.sh --utt-list $temp ${dset_in} ${dset_out}_trials_f || exit 1 + cp ${dset_in}/trials_f ${dset_out}_trials_f/trials || exit 1 + + cut -d' ' -f2 ${dset_in}/trials_m | sort | uniq > $temp + utils/subset_data_dir.sh --utt-list $temp ${dset_in} ${dset_out}_trials_m || exit 1 + cp ${dset_in}/trials_m ${dset_out}_trials_m/trials || exit 1 + + utils/combine_data.sh ${dset_out}_trials_all ${dset_out}_trials_f ${dset_out}_trials_m || exit 1 + cat ${dset_out}_trials_f/trials ${dset_out}_trials_m/trials > ${dset_out}_trials_all/trials + + dset_in=${corpus_dir}/vctk_$suff + dset_out=${data_dir}/vctk_$suff + utils/subset_data_dir.sh --utt-list ${dset_in}/enrolls_mic2 ${dset_in} ${dset_out}_enrolls || exit 1 + cp ${dset_in}/enrolls_mic2 ${dset_out}_enrolls/enrolls || exit 1 + + cut -d' ' -f2 ${dset_in}/trials_f_mic2 | sort | uniq > $temp + utils/subset_data_dir.sh --utt-list $temp ${dset_in} ${dset_out}_trials_f || exit 1 + cp ${dset_in}/trials_f_mic2 ${dset_out}_trials_f/trials || exit 1 + + cut -d' ' -f2 ${dset_in}/trials_f_common_mic2 | sort | uniq > $temp + utils/subset_data_dir.sh --utt-list $temp ${dset_in} ${dset_out}_trials_f_common || exit 1 + cp ${dset_in}/trials_f_common_mic2 ${dset_out}_trials_f_common/trials || exit 1 + + utils/combine_data.sh ${dset_out}_trials_f_all ${dset_out}_trials_f ${dset_out}_trials_f_common || exit 1 + cat ${dset_out}_trials_f/trials ${dset_out}_trials_f_common/trials > ${dset_out}_trials_f_all/trials + + cut -d' ' -f2 ${dset_in}/trials_m_mic2 | sort | uniq > $temp + utils/subset_data_dir.sh --utt-list $temp ${dset_in} ${dset_out}_trials_m || exit 1 + cp ${dset_in}/trials_m_mic2 ${dset_out}_trials_m/trials || exit 1 + + cut -d' ' -f2 ${dset_in}/trials_m_common_mic2 | sort | uniq > $temp + utils/subset_data_dir.sh --utt-list $temp ${dset_in} ${dset_out}_trials_m_common || exit 1 + cp ${dset_in}/trials_m_common_mic2 ${dset_out}_trials_m_common/trials || exit 1 + + utils/combine_data.sh ${dset_out}_trials_m_all ${dset_out}_trials_m ${dset_out}_trials_m_common || exit 1 + cat ${dset_out}_trials_m/trials ${dset_out}_trials_m_common/trials > ${dset_out}_trials_m_all/trials + + utils/combine_data.sh ${dset_out}_trials_all ${dset_out}_trials_f_all ${dset_out}_trials_m_all || exit 1 + cat ${dset_out}_trials_f_all/trials ${dset_out}_trials_m_all/trials > ${dset_out}_trials_all/trials +done +rm $temp +rm parse_options.sh diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..8f5499d --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,2 @@ +from .data_io import read_kaldi_format, save_kaldi_format +from .path_management import create_clean_dir \ No newline at end of file diff --git a/utils/data_io.py b/utils/data_io.py new file mode 100644 index 0000000..d374b3d --- /dev/null +++ b/utils/data_io.py @@ -0,0 +1,19 @@ +def read_kaldi_format(filename): + data = {} + with open(filename, 'r') as f: + for line in f: + splitted_line = line.split() + if len(splitted_line) == 2: + data[splitted_line[0].strip()] = splitted_line[1].strip() + elif len(splitted_line) > 2: + data[splitted_line[0].strip()] = [x.strip() for x in splitted_line[1:]] + return data + + +def save_kaldi_format(data_dict, filename): + with open(filename, 'w') as f: + for key, value in sorted(data_dict.items(), key=lambda x: x[0]): + if isinstance(value, list): + value = ' '.join(value) + f.write(f'{key} {value}\n') + diff --git a/utils/path_management.py b/utils/path_management.py new file mode 100644 index 0000000..5a7961c --- /dev/null +++ b/utils/path_management.py @@ -0,0 +1,18 @@ +from pathlib import Path +import shutil + + +def create_clean_dir(dir_name:Path): + if dir_name.exists(): + remove_contents_in_dir(dir_name) + else: + dir_name.mkdir(exist_ok=True, parents=True) + + +def remove_contents_in_dir(dir_name:Path): + # solution from https://stackoverflow.com/a/56151260 + for path in dir_name.glob("**/*"): + if path.is_file(): + path.unlink() + elif path.is_dir(): + shutil.rmtree(path) \ No newline at end of file diff --git a/utils/run_cleanup.sh b/utils/run_cleanup.sh new file mode 100644 index 0000000..e140a9d --- /dev/null +++ b/utils/run_cleanup.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +vp_dir=Voice-Privacy-Challenge-2020/baseline +cd $vp_dir + +./cleanup.sh \ No newline at end of file