diff --git a/Dockerfile b/Dockerfile index 4f54953..2253f55 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,6 +23,7 @@ RUN cd espeak-ng-1.51 && \ --without-speechplayer \ --without-mbrola \ --without-sonic \ + --with-extdict-cmn \ --prefix=/usr && \ make -j8 src/espeak-ng src/speak-ng && \ make && \ @@ -35,25 +36,25 @@ RUN mkdir -p /usr/local/include/onnxruntime && \ --strip-components 1 \ -xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz" -# Build larynx binary +# Build piper binary COPY Makefile ./ COPY src/cpp/ ./src/cpp/ RUN make no-pcaudio # Do a test run -RUN /build/build/larynx --help +RUN /build/build/piper --help # Build .tar.gz to keep symlinks WORKDIR /dist -RUN mkdir -p larynx && \ - cp -d /usr/lib64/libespeak-ng.so* ./larynx/ && \ - cp -dR /usr/share/espeak-ng-data ./larynx/ && \ - cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./larynx/ && \ - cp /build/build/larynx ./larynx/ && \ - tar -czf "larynx_${TARGETARCH}${TARGETVARIANT}.tar.gz" larynx/ +RUN mkdir -p piper && \ + cp -d /usr/lib64/libespeak-ng.so* ./piper/ && \ + cp -dR /usr/share/espeak-ng-data ./piper/ && \ + cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./piper/ && \ + cp /build/build/piper ./piper/ && \ + tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/ # ----------------------------------------------------------------------------- FROM scratch -COPY --from=build /dist/larynx_*.tar.gz ./ +COPY --from=build /dist/piper_*.tar.gz ./ diff --git a/Dockerfile.test b/Dockerfile.test index 14e351d..bf31ebe 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -5,9 +5,9 @@ ARG TARGETVARIANT COPY local/en-us/ljspeech/low/en-us-ljspeech-low.onnx \ local/en-us/ljspeech/low/en-us-ljspeech-low.onnx.json ./ -ADD dist/linux_${TARGETARCH}${TARGETVARIANT}/larynx_${TARGETARCH}${TARGETVARIANT}.tar.gz ./ +ADD dist/linux_${TARGETARCH}${TARGETVARIANT}/piper_${TARGETARCH}${TARGETVARIANT}.tar.gz ./ -RUN cd larynx/ && echo 'This is a test.' | ./larynx -m ../en-us-ljspeech-low.onnx -f test.wav -RUN if [ ! -f larynx/test.wav ]; then exit 1; fi -RUN size="$(wc -c < larynx/test.wav)"; \ +RUN cd piper/ && echo 'This is a test.' | ./piper -m ../en-us-ljspeech-low.onnx -f test.wav +RUN if [ ! -f piper/test.wav ]; then exit 1; fi +RUN size="$(wc -c < piper/test.wav)"; \ if [ "${size}" -lt "1000" ]; then echo "File size is ${size} bytes"; exit 1; fi diff --git a/README.md b/README.md index cbbf4de..7075335 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ -# Larynx +![Piper logo](etc/logo.png) A fast, local neural text to speech system. ``` sh echo 'Welcome to the world of speech synthesis!' | \ - ./larynx --model en-us-blizzard_lessac-medium.onnx --output_file welcome.wav + ./piper --model en-us-blizzard_lessac-medium.onnx --output_file welcome.wav ``` ## Voices -Download voices from [the release](https://github.com/rhasspy/larynx2/releases/tag/v0.0.2). +Download voices from [the release](https://github.com/rhasspy/piper/releases/tag/v0.0.2). Supported languages: @@ -30,7 +30,7 @@ Supported languages: ## Purpose -Larynx is meant to sound good and run reasonably fast on the Raspberry Pi 4. +Piper (formally Larynx 2) is meant to sound good and run reasonably fast on the Raspberry Pi 4. Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and exported to the [onnxruntime](https://onnxruntime.ai/). @@ -39,8 +39,8 @@ Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and export Download a release: -* [amd64](https://github.com/rhasspy/larynx2/releases/download/v0.0.2/larynx_amd64.tar.gz) (desktop Linux) -* [arm64](https://github.com/rhasspy/larynx2/releases/download/v0.0.2/larynx_arm64.tar.gz) (Raspberry Pi 4) +* [amd64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz) (desktop Linux) +* [arm64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_arm64.tar.gz) (Raspberry Pi 4) If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.13.1. @@ -48,18 +48,18 @@ If you want to build from source, see the [Makefile](Makefile) and [C++ source]( ## Usage 1. [Download a voice](#voices) and extract the `.onnx` and `.onnx.json` files -2. Run the `larynx` binary with text on standard input, `--model /path/to/your-voice.onnx`, and `--output_file output.wav` +2. Run the `piper` binary with text on standard input, `--model /path/to/your-voice.onnx`, and `--output_file output.wav` For example: ``` sh echo 'Welcome to the world of speech synthesis!' | \ - ./larynx --model blizzard_lessac-medium.onnx --output_file welcome.wav + ./piper --model blizzard_lessac-medium.onnx --output_file welcome.wav ``` For multi-speaker models, use `--speaker ` to change speakers (default: 0). -See `larynx --help` for more options. +See `piper --help` for more options. ## Training @@ -69,7 +69,7 @@ See [src/python](src/python) Start by creating a virtual environment: ``` sh -cd larynx2/src/python +cd piper/src/python python3 -m venv .venv source .venv/bin/activate pip3 install --upgrade pip @@ -84,7 +84,7 @@ Ensure you have [espeak-ng](https://github.com/espeak-ng/espeak-ng/) installed ( Next, preprocess your dataset: ``` sh -python3 -m larynx_train.preprocess \ +python3 -m piper_train.preprocess \ --language en-us \ --input-dir /path/to/ljspeech/ \ --output-dir /path/to/training_dir/ \ @@ -97,7 +97,7 @@ Datasets must either be in the [LJSpeech](https://keithito.com/LJ-Speech-Dataset Finally, you can train: ``` sh -python3 -m larynx_train \ +python3 -m piper_train \ --dataset-dir /path/to/training_dir/ \ --accelerator 'gpu' \ --devices 1 \ @@ -108,7 +108,7 @@ python3 -m larynx_train \ --precision 32 ``` -Training uses [PyTorch Lightning](https://www.pytorchlightning.ai/). Run `tensorboard --logdir /path/to/training_dir/lightning_logs` to monitor. See `python3 -m larynx_train --help` for many additional options. +Training uses [PyTorch Lightning](https://www.pytorchlightning.ai/). Run `tensorboard --logdir /path/to/training_dir/lightning_logs` to monitor. See `python3 -m piper_train --help` for many additional options. It is highly recommended to train with the following `Dockerfile`: @@ -121,11 +121,11 @@ RUN pip3 install \ ENV NUMBA_CACHE_DIR=.numba_cache ``` -See the various `infer_*` and `export_*` scripts in [src/python/larynx_train](src/python/larynx_train) to test and export your voice from the checkpoint in `lightning_logs`. The `dataset.jsonl` file in your training directory can be used with `python3 -m larynx_train.infer` for quick testing: +See the various `infer_*` and `export_*` scripts in [src/python/piper_train](src/python/piper_train) to test and export your voice from the checkpoint in `lightning_logs`. The `dataset.jsonl` file in your training directory can be used with `python3 -m piper_train.infer` for quick testing: ``` sh head -n5 /path/to/training_dir/dataset.jsonl | \ - python3 -m larynx_train.infer \ + python3 -m piper_train.infer \ --checkpoint lightning_logs/path/to/checkpoint.ckpt \ --sample-rate 22050 \ --output-dir wavs @@ -139,7 +139,7 @@ See [src/python_run](src/python_run) Run `scripts/setup.sh` to create a virtual environment and install the requirements. Then run: ``` sh -echo 'Welcome to the world of speech synthesis!' | scripts/larynx \ +echo 'Welcome to the world of speech synthesis!' | scripts/piper \ --model /path/to/voice.onnx \ --output_file welcome.wav ``` @@ -151,5 +151,5 @@ If you'd like to use a GPU, install the `onnxruntime-gpu` package: .venv/bin/pip3 install onnxruntime-gpu ``` -and then run `scripts/larynx` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). +and then run `scripts/piper` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). diff --git a/etc/logo.png b/etc/logo.png new file mode 100644 index 0000000..fbb8705 Binary files /dev/null and b/etc/logo.png differ diff --git a/etc/logo.svg b/etc/logo.svg new file mode 100644 index 0000000..1943a5e --- /dev/null +++ b/etc/logo.svg @@ -0,0 +1,151 @@ + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 4767b97..ef06d9f 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -2,12 +2,12 @@ cmake_minimum_required(VERSION 3.13) include(CheckIncludeFileCXX) -project(larynx C CXX) +project(piper C CXX) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -ADD_EXECUTABLE(larynx main.cpp) +ADD_EXECUTABLE(piper main.cpp) string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'") string(APPEND CMAKE_C_FLAGS " -Wall -Wextra") @@ -21,26 +21,26 @@ check_include_file_cxx("pcaudiolib/audio.h" PCAUDIO_INCLUDE_FOUND) if(PCAUDIO_INCLUDE_FOUND) option(USE_PCAUDIO "Build with pcaudiolib" ON) if(USE_PCAUDIO) - target_compile_definitions(larynx PUBLIC HAVE_PCAUDIO) + target_compile_definitions(piper PUBLIC HAVE_PCAUDIO) set(PCAUDIO_LIBRARIES "pcaudio") endif() endif() set(ONNXRUNTIME_ROOTDIR "/usr/local/include/onnxruntime") -target_link_libraries(larynx +target_link_libraries(piper onnxruntime -static-libgcc -static-libstdc++ ${ESPEAK_NG_LIBRARIES} ${PCAUDIO_LIBRARIES}) -target_link_directories(larynx PUBLIC +target_link_directories(piper PUBLIC ${ESPEAK_NG_LIBRARY_DIRS} ${ONNXRUNTIME_ROOTDIR}/lib) -target_include_directories(larynx PUBLIC +target_include_directories(piper PUBLIC ${ONNXRUNTIME_ROOTDIR}/include ${ESPEAK_NG_INCLUDE_DIRS}) -target_compile_options(larynx PUBLIC +target_compile_options(piper PUBLIC ${ESPEAK_NG_CFLAGS_OTHER}) diff --git a/src/cpp/config.hpp b/src/cpp/config.hpp index a50f16c..2083471 100644 --- a/src/cpp/config.hpp +++ b/src/cpp/config.hpp @@ -15,7 +15,7 @@ using namespace std; using json = nlohmann::json; -namespace larynx { +namespace piper { typedef char32_t Phoneme; typedef int64_t PhonemeId; @@ -145,6 +145,6 @@ void parseModelConfig(json &configRoot, ModelConfig &modelConfig) { } /* parseModelConfig */ -} // namespace larynx +} // namespace piper #endif // CONFIG_H_ diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 5b8aa49..f7d96e7 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -12,7 +12,7 @@ #include #endif -#include "larynx.hpp" +#include "piper.hpp" using namespace std; @@ -23,7 +23,7 @@ struct RunConfig { filesystem::path modelConfigPath; OutputType outputType = OUTPUT_PLAY; optional outputPath; - optional speakerId; + optional speakerId; optional noiseScale; optional lengthScale; optional noiseW; @@ -36,9 +36,9 @@ int main(int argc, char *argv[]) { parseArgs(argc, argv, runConfig); auto exePath = filesystem::path(argv[0]); - larynx::initialize(exePath.parent_path()); + piper::initialize(exePath.parent_path()); - larynx::Voice voice; + piper::Voice voice; auto startTime = chrono::steady_clock::now(); loadVoice(runConfig.modelPath.string(), runConfig.modelConfigPath.string(), voice, runConfig.speakerId); @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) { if (runConfig.outputType == OUTPUT_PLAY) { // Output audio to the default audio device - my_audio = create_audio_device_object(NULL, "larynx", "Text-to-Speech"); + my_audio = create_audio_device_object(NULL, "piper", "Text-to-Speech"); // TODO: Support 32-bit sample widths auto audioFormat = AUDIO_OBJECT_FORMAT_S16LE; @@ -78,7 +78,7 @@ int main(int argc, char *argv[]) { #else if (runConfig.outputType == OUTPUT_PLAY) { // Cannot play audio directly - cerr << "WARNING: Larynx was not compiled with pcaudiolib. Output audio " + cerr << "WARNING: Piper was not compiled with pcaudiolib. Output audio " "will be written to the current directory." << endl; runConfig.outputType = OUTPUT_DIRECTORY; @@ -92,7 +92,7 @@ int main(int argc, char *argv[]) { } string line; - larynx::SynthesisResult result; + piper::SynthesisResult result; while (getline(cin, line)) { // Path to output WAV file @@ -108,19 +108,19 @@ int main(int argc, char *argv[]) { // Output audio to automatically-named WAV file in a directory ofstream audioFile(outputPath.string(), ios::binary); - larynx::textToWavFile(voice, line, audioFile, result); + piper::textToWavFile(voice, line, audioFile, result); cout << outputPath.string() << endl; } else if (runConfig.outputType == OUTPUT_FILE) { // Output audio to WAV file ofstream audioFile(runConfig.outputPath.value().string(), ios::binary); - larynx::textToWavFile(voice, line, audioFile, result); + piper::textToWavFile(voice, line, audioFile, result); } else if (runConfig.outputType == OUTPUT_STDOUT) { // Output WAV to stdout - larynx::textToWavFile(voice, line, cout, result); + piper::textToWavFile(voice, line, cout, result); } else if (runConfig.outputType == OUTPUT_PLAY) { #ifdef HAVE_PCAUDIO vector audioBuffer; - larynx::textToAudio(voice, line, audioBuffer, result); + piper::textToAudio(voice, line, audioBuffer, result); int error = audio_object_write(my_audio, (const char *)audioBuffer.data(), sizeof(int16_t) * audioBuffer.size()); @@ -138,7 +138,7 @@ int main(int argc, char *argv[]) { << " sec, audio=" << result.audioSeconds << " sec)" << endl; } - larynx::terminate(); + piper::terminate(); #ifdef HAVE_PCAUDIO audio_object_close(my_audio); @@ -211,7 +211,7 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) { runConfig.outputPath = filesystem::path(argv[++i]); } else if (arg == "-s" || arg == "--speaker") { ensureArg(argc, argv, i); - runConfig.speakerId = (larynx::SpeakerId)stol(argv[++i]); + runConfig.speakerId = (piper::SpeakerId)stol(argv[++i]); } else if (arg == "--noise-scale") { ensureArg(argc, argv, i); runConfig.noiseScale = stof(argv[++i]); diff --git a/src/cpp/model.hpp b/src/cpp/model.hpp index b82897b..4adcf82 100644 --- a/src/cpp/model.hpp +++ b/src/cpp/model.hpp @@ -7,8 +7,8 @@ using namespace std; -namespace larynx { -const string instanceName{"larynx"}; +namespace piper { +const string instanceName{"piper"}; struct ModelSession { Ort::Session onnx; @@ -48,6 +48,6 @@ void loadModel(string modelPath, ModelSession &session) { auto loadDuration = chrono::duration(endTime - startTime); } -} // namespace larynx +} // namespace piper #endif // MODEL_H_ diff --git a/src/cpp/phonemize.hpp b/src/cpp/phonemize.hpp index 1c89b53..ddad088 100644 --- a/src/cpp/phonemize.hpp +++ b/src/cpp/phonemize.hpp @@ -16,7 +16,7 @@ using namespace std; -namespace larynx { +namespace piper { // Text to phonemes using eSpeak-ng void phonemize(PhonemizeConfig &phonemizeConfig) { @@ -103,6 +103,6 @@ void phonemes2ids(PhonemizeConfig &phonemizeConfig, } /* phonemes2ids */ -} // namespace larynx +} // namespace piper #endif // PHONEMIZE_H_ diff --git a/src/cpp/larynx.hpp b/src/cpp/piper.hpp similarity index 96% rename from src/cpp/larynx.hpp rename to src/cpp/piper.hpp index eb742cd..59bd65b 100644 --- a/src/cpp/larynx.hpp +++ b/src/cpp/piper.hpp @@ -1,5 +1,5 @@ -#ifndef LARYNX_H_ -#define LARYNX_H_ +#ifndef PIPER_H_ +#define PIPER_H_ #include #include @@ -17,7 +17,7 @@ using json = nlohmann::json; -namespace larynx { +namespace piper { struct Voice { json configRoot; @@ -106,6 +106,6 @@ void textToWavFile(Voice &voice, string text, ostream &audioFile, } /* textToWavFile */ -} // namespace larynx +} // namespace piper -#endif // LARYNX_H_ +#endif // PIPER_H_ diff --git a/src/cpp/synthesize.hpp b/src/cpp/synthesize.hpp index ef61aef..04021aa 100644 --- a/src/cpp/synthesize.hpp +++ b/src/cpp/synthesize.hpp @@ -14,7 +14,7 @@ using namespace std; -namespace larynx { +namespace piper { // Maximum value for 16-bit signed WAV sample const float MAX_WAV_VALUE = 32767.0f; @@ -126,6 +126,6 @@ void synthesize(SynthesisConfig &synthesisConfig, ModelSession &session, Ort::OrtRelease(inputTensors[i].release()); } } -} // namespace larynx +} // namespace piper #endif // SYNTHESIZE_H_ diff --git a/src/cpp/wavfile.hpp b/src/cpp/wavfile.hpp index 4a453b7..e99caf6 100644 --- a/src/cpp/wavfile.hpp +++ b/src/cpp/wavfile.hpp @@ -3,7 +3,7 @@ #include -namespace larynx { +namespace piper { struct WavHeader { uint8_t RIFF[4] = {'R', 'I', 'F', 'F'}; @@ -39,6 +39,6 @@ void writeWavHeader(int sampleRate, int sampleWidth, int channels, } /* writeWavHeader */ -} // namespace larynx +} // namespace piper #endif // WAVFILE_H_ diff --git a/src/python/build_monotonic_align.sh b/src/python/build_monotonic_align.sh index 921cbdb..ee27a5a 100755 --- a/src/python/build_monotonic_align.sh +++ b/src/python/build_monotonic_align.sh @@ -7,7 +7,7 @@ if [ -d "${this_dir}/.venv" ]; then source "${this_dir}/.venv/bin/activate" fi -cd "${this_dir}/larynx_train/vits/monotonic_align" +cd "${this_dir}/piper_train/vits/monotonic_align" mkdir -p monotonic_align cythonize -i core.pyx mv core*.so monotonic_align/ diff --git a/src/python/larynx_train/.gitignore b/src/python/piper_train/.gitignore similarity index 100% rename from src/python/larynx_train/.gitignore rename to src/python/piper_train/.gitignore diff --git a/src/python/larynx_train/.isort.cfg b/src/python/piper_train/.isort.cfg similarity index 100% rename from src/python/larynx_train/.isort.cfg rename to src/python/piper_train/.isort.cfg diff --git a/src/python/larynx_train/VERSION b/src/python/piper_train/VERSION similarity index 100% rename from src/python/larynx_train/VERSION rename to src/python/piper_train/VERSION diff --git a/src/python/larynx_train/__init__.py b/src/python/piper_train/__init__.py similarity index 100% rename from src/python/larynx_train/__init__.py rename to src/python/piper_train/__init__.py diff --git a/src/python/larynx_train/__main__.py b/src/python/piper_train/__main__.py similarity index 100% rename from src/python/larynx_train/__main__.py rename to src/python/piper_train/__main__.py diff --git a/src/python/larynx_train/_resources.py b/src/python/piper_train/_resources.py similarity index 94% rename from src/python/larynx_train/_resources.py rename to src/python/piper_train/_resources.py index f1a2232..874f72f 100644 --- a/src/python/larynx_train/_resources.py +++ b/src/python/piper_train/_resources.py @@ -13,7 +13,7 @@ except (ImportError, AttributeError): files = importlib_resources.files -_PACKAGE = "larynx_train" +_PACKAGE = "piper_train" _DIR = Path(typing.cast(os.PathLike, files(_PACKAGE))) __version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip() diff --git a/src/python/larynx_train/export_generator.py b/src/python/piper_train/export_generator.py similarity index 95% rename from src/python/larynx_train/export_generator.py rename to src/python/piper_train/export_generator.py index 163e9ab..7061507 100644 --- a/src/python/larynx_train/export_generator.py +++ b/src/python/piper_train/export_generator.py @@ -7,7 +7,7 @@ import torch from .vits.lightning import VitsModel -_LOGGER = logging.getLogger("larynx_train.export_generator") +_LOGGER = logging.getLogger("piper_train.export_generator") def main(): diff --git a/src/python/larynx_train/export_onnx.py b/src/python/piper_train/export_onnx.py similarity index 98% rename from src/python/larynx_train/export_onnx.py rename to src/python/piper_train/export_onnx.py index 4ebff9b..bdb8abc 100644 --- a/src/python/larynx_train/export_onnx.py +++ b/src/python/piper_train/export_onnx.py @@ -8,7 +8,7 @@ import torch from .vits.lightning import VitsModel -_LOGGER = logging.getLogger("larynx_train.export_onnx") +_LOGGER = logging.getLogger("piper_train.export_onnx") OPSET_VERSION = 15 diff --git a/src/python/larynx_train/export_torchscript.py b/src/python/piper_train/export_torchscript.py similarity index 97% rename from src/python/larynx_train/export_torchscript.py rename to src/python/piper_train/export_torchscript.py index 10718af..80e413f 100644 --- a/src/python/larynx_train/export_torchscript.py +++ b/src/python/piper_train/export_torchscript.py @@ -8,7 +8,7 @@ import torch from .vits.lightning import VitsModel -_LOGGER = logging.getLogger("larynx_train.export_torchscript") +_LOGGER = logging.getLogger("piper_train.export_torchscript") def main(): diff --git a/src/python/larynx_train/infer.py b/src/python/piper_train/infer.py similarity index 95% rename from src/python/larynx_train/infer.py rename to src/python/piper_train/infer.py index aed620f..bc535d5 100644 --- a/src/python/larynx_train/infer.py +++ b/src/python/piper_train/infer.py @@ -12,13 +12,13 @@ from .vits.lightning import VitsModel from .vits.utils import audio_float_to_int16 from .vits.wavfile import write as write_wav -_LOGGER = logging.getLogger("larynx_train.infer") +_LOGGER = logging.getLogger("piper_train.infer") def main(): """Main entry point""" logging.basicConfig(level=logging.DEBUG) - parser = argparse.ArgumentParser(prog="larynx_train.infer") + parser = argparse.ArgumentParser(prog="piper_train.infer") parser.add_argument( "--checkpoint", required=True, help="Path to model checkpoint (.ckpt)" ) diff --git a/src/python/larynx_train/infer_generator.py b/src/python/piper_train/infer_generator.py similarity index 94% rename from src/python/larynx_train/infer_generator.py rename to src/python/piper_train/infer_generator.py index 308a87e..fe4f348 100644 --- a/src/python/larynx_train/infer_generator.py +++ b/src/python/piper_train/infer_generator.py @@ -11,13 +11,13 @@ import torch from .vits.utils import audio_float_to_int16 from .vits.wavfile import write as write_wav -_LOGGER = logging.getLogger("larynx_train.infer_generator") +_LOGGER = logging.getLogger("piper_train.infer_generator") def main(): """Main entry point""" logging.basicConfig(level=logging.DEBUG) - parser = argparse.ArgumentParser(prog="larynx_train.infer_generator") + parser = argparse.ArgumentParser(prog="piper_train.infer_generator") parser.add_argument("--model", required=True, help="Path to generator (.pt)") parser.add_argument("--output-dir", required=True, help="Path to write WAV files") parser.add_argument("--sample-rate", type=int, default=22050) diff --git a/src/python/larynx_train/infer_onnx.py b/src/python/piper_train/infer_onnx.py similarity index 98% rename from src/python/larynx_train/infer_onnx.py rename to src/python/piper_train/infer_onnx.py index 6f98a63..3816cdb 100644 --- a/src/python/larynx_train/infer_onnx.py +++ b/src/python/piper_train/infer_onnx.py @@ -13,13 +13,13 @@ import onnxruntime from .vits.utils import audio_float_to_int16 from .vits.wavfile import write as write_wav -_LOGGER = logging.getLogger("larynx_train.infer_onnx") +_LOGGER = logging.getLogger("piper_train.infer_onnx") def main(): """Main entry point""" logging.basicConfig(level=logging.DEBUG) - parser = argparse.ArgumentParser(prog="larynx_train.infer_onnx") + parser = argparse.ArgumentParser(prog="piper_train.infer_onnx") parser.add_argument("--model", required=True, help="Path to model (.onnx)") parser.add_argument("--output-dir", required=True, help="Path to write WAV files") parser.add_argument("--sample-rate", type=int, default=22050) diff --git a/src/python/larynx_train/infer_torchscript.py b/src/python/piper_train/infer_torchscript.py similarity index 94% rename from src/python/larynx_train/infer_torchscript.py rename to src/python/piper_train/infer_torchscript.py index fc163f5..2344863 100755 --- a/src/python/larynx_train/infer_torchscript.py +++ b/src/python/piper_train/infer_torchscript.py @@ -11,13 +11,13 @@ import torch from .vits.utils import audio_float_to_int16 from .vits.wavfile import write as write_wav -_LOGGER = logging.getLogger("larynx_train.infer_torchscript") +_LOGGER = logging.getLogger("piper_train.infer_torchscript") def main(): """Main entry point""" logging.basicConfig(level=logging.DEBUG) - parser = argparse.ArgumentParser(prog="larynx_train.infer_torchscript") + parser = argparse.ArgumentParser(prog="piper_train.infer_torchscript") parser.add_argument( "--model", required=True, help="Path to torchscript checkpoint (.ts)" ) diff --git a/src/python/larynx_train/norm_audio/__init__.py b/src/python/piper_train/norm_audio/__init__.py similarity index 97% rename from src/python/larynx_train/norm_audio/__init__.py rename to src/python/piper_train/norm_audio/__init__.py index cb0df33..dc024d4 100644 --- a/src/python/larynx_train/norm_audio/__init__.py +++ b/src/python/piper_train/norm_audio/__init__.py @@ -5,7 +5,7 @@ from typing import Optional, Tuple, Union import librosa import torch -from larynx_train.vits.mel_processing import spectrogram_torch +from piper_train.vits.mel_processing import spectrogram_torch from .trim import trim_silence from .vad import SileroVoiceActivityDetector diff --git a/src/python/larynx_train/norm_audio/models/silero_vad.onnx b/src/python/piper_train/norm_audio/models/silero_vad.onnx similarity index 100% rename from src/python/larynx_train/norm_audio/models/silero_vad.onnx rename to src/python/piper_train/norm_audio/models/silero_vad.onnx diff --git a/src/python/larynx_train/norm_audio/trim.py b/src/python/piper_train/norm_audio/trim.py similarity index 100% rename from src/python/larynx_train/norm_audio/trim.py rename to src/python/piper_train/norm_audio/trim.py diff --git a/src/python/larynx_train/norm_audio/vad.py b/src/python/piper_train/norm_audio/vad.py similarity index 100% rename from src/python/larynx_train/norm_audio/vad.py rename to src/python/piper_train/norm_audio/vad.py diff --git a/src/python/larynx_train/phonemize.py b/src/python/piper_train/phonemize.py similarity index 100% rename from src/python/larynx_train/phonemize.py rename to src/python/piper_train/phonemize.py diff --git a/src/python/larynx_train/preprocess.py b/src/python/piper_train/preprocess.py similarity index 100% rename from src/python/larynx_train/preprocess.py rename to src/python/piper_train/preprocess.py diff --git a/src/python/larynx_train/py.typed b/src/python/piper_train/py.typed similarity index 100% rename from src/python/larynx_train/py.typed rename to src/python/piper_train/py.typed diff --git a/src/python/larynx_train/pylintrc b/src/python/piper_train/pylintrc similarity index 100% rename from src/python/larynx_train/pylintrc rename to src/python/piper_train/pylintrc diff --git a/src/python/larynx_train/setup.cfg b/src/python/piper_train/setup.cfg similarity index 100% rename from src/python/larynx_train/setup.cfg rename to src/python/piper_train/setup.cfg diff --git a/src/python/larynx_train/vits/__init__.py b/src/python/piper_train/vits/__init__.py similarity index 100% rename from src/python/larynx_train/vits/__init__.py rename to src/python/piper_train/vits/__init__.py diff --git a/src/python/larynx_train/vits/attentions.py b/src/python/piper_train/vits/attentions.py similarity index 100% rename from src/python/larynx_train/vits/attentions.py rename to src/python/piper_train/vits/attentions.py diff --git a/src/python/larynx_train/vits/commons.py b/src/python/piper_train/vits/commons.py similarity index 100% rename from src/python/larynx_train/vits/commons.py rename to src/python/piper_train/vits/commons.py diff --git a/src/python/larynx_train/vits/config.py b/src/python/piper_train/vits/config.py similarity index 100% rename from src/python/larynx_train/vits/config.py rename to src/python/piper_train/vits/config.py diff --git a/src/python/larynx_train/vits/dataset.py b/src/python/piper_train/vits/dataset.py similarity index 93% rename from src/python/larynx_train/vits/dataset.py rename to src/python/piper_train/vits/dataset.py index fc6841d..258425f 100644 --- a/src/python/larynx_train/vits/dataset.py +++ b/src/python/piper_train/vits/dataset.py @@ -44,16 +44,7 @@ class Batch: speaker_ids: Optional[LongTensor] = None -# @dataclass -# class LarynxDatasetSettings: -# sample_rate: int -# is_multispeaker: bool -# espeak_voice: Optional[str] = None -# phoneme_map: Dict[str, Optional[List[str]]] = field(default_factory=dict) -# phoneme_id_map: Dict[str, List[int]] = DEFAULT_PHONEME_ID_MAP - - -class LarynxDataset(Dataset): +class PiperDataset(Dataset): """ Dataset format: @@ -76,9 +67,7 @@ class LarynxDataset(Dataset): dataset_path = Path(dataset_path) _LOGGER.debug("Loading dataset: %s", dataset_path) self.utterances.extend( - LarynxDataset.load_dataset( - dataset_path, max_phoneme_ids=max_phoneme_ids - ) + PiperDataset.load_dataset(dataset_path, max_phoneme_ids=max_phoneme_ids) ) def __len__(self): @@ -110,7 +99,7 @@ class LarynxDataset(Dataset): continue try: - utt = LarynxDataset.load_utterance(line) + utt = PiperDataset.load_utterance(line) if (max_phoneme_ids is None) or ( len(utt.phoneme_ids) <= max_phoneme_ids ): diff --git a/src/python/larynx_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py similarity index 99% rename from src/python/larynx_train/vits/lightning.py rename to src/python/piper_train/vits/lightning.py index 441a038..c6b7250 100644 --- a/src/python/larynx_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -9,7 +9,7 @@ from torch.nn import functional as F from torch.utils.data import DataLoader, Dataset, random_split from .commons import slice_segments -from .dataset import Batch, LarynxDataset, UtteranceCollate +from .dataset import Batch, PiperDataset, UtteranceCollate from .losses import discriminator_loss, feature_loss, generator_loss, kl_loss from .mel_processing import mel_spectrogram_torch, spec_to_mel_torch from .models import MultiPeriodDiscriminator, SynthesizerTrn @@ -128,7 +128,7 @@ class VitsModel(pl.LightningModule): _LOGGER.debug("No dataset to load") return - full_dataset = LarynxDataset( + full_dataset = PiperDataset( self.hparams.dataset, max_phoneme_ids=max_phoneme_ids ) valid_set_size = int(len(full_dataset) * validation_split) diff --git a/src/python/larynx_train/vits/losses.py b/src/python/piper_train/vits/losses.py similarity index 100% rename from src/python/larynx_train/vits/losses.py rename to src/python/piper_train/vits/losses.py diff --git a/src/python/larynx_train/vits/mel_processing.py b/src/python/piper_train/vits/mel_processing.py similarity index 100% rename from src/python/larynx_train/vits/mel_processing.py rename to src/python/piper_train/vits/mel_processing.py diff --git a/src/python/larynx_train/vits/models.py b/src/python/piper_train/vits/models.py similarity index 100% rename from src/python/larynx_train/vits/models.py rename to src/python/piper_train/vits/models.py diff --git a/src/python/larynx_train/vits/modules.py b/src/python/piper_train/vits/modules.py similarity index 100% rename from src/python/larynx_train/vits/modules.py rename to src/python/piper_train/vits/modules.py diff --git a/src/python/larynx_train/vits/monotonic_align/Makefile b/src/python/piper_train/vits/monotonic_align/Makefile similarity index 100% rename from src/python/larynx_train/vits/monotonic_align/Makefile rename to src/python/piper_train/vits/monotonic_align/Makefile diff --git a/src/python/larynx_train/vits/monotonic_align/__init__.py b/src/python/piper_train/vits/monotonic_align/__init__.py similarity index 100% rename from src/python/larynx_train/vits/monotonic_align/__init__.py rename to src/python/piper_train/vits/monotonic_align/__init__.py diff --git a/src/python/larynx_train/vits/monotonic_align/core.c b/src/python/piper_train/vits/monotonic_align/core.c similarity index 98% rename from src/python/larynx_train/vits/monotonic_align/core.c rename to src/python/piper_train/vits/monotonic_align/core.c index f88960d..8b07e5c 100644 --- a/src/python/larynx_train/vits/monotonic_align/core.c +++ b/src/python/piper_train/vits/monotonic_align/core.c @@ -1,14 +1,14 @@ -/* Generated by Cython 0.29.32 */ +/* Generated by Cython 0.29.33 */ /* BEGIN: Cython Metadata { "distutils": { - "name": "larynx_train.vits.monotonic_align.core", + "name": "piper_train.vits.monotonic_align.core", "sources": [ - "/home/hansenm/opt/larynx2/src/python/larynx_train/vits/monotonic_align/core.pyx" + "/home/hansenm/opt/larynx2/src/python/piper_train/vits/monotonic_align/core.pyx" ] }, - "module_name": "larynx_train.vits.monotonic_align.core" + "module_name": "piper_train.vits.monotonic_align.core" } END: Cython Metadata */ @@ -21,8 +21,8 @@ END: Cython Metadata */ #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) #error Cython requires Python 2.6+ or Python 3.3+. #else -#define CYTHON_ABI "0_29_32" -#define CYTHON_HEX_VERSION 0x001D20F0 +#define CYTHON_ABI "0_29_33" +#define CYTHON_HEX_VERSION 0x001D21F0 #define CYTHON_FUTURE_DIVISION 0 #include #ifndef offsetof @@ -99,7 +99,7 @@ END: Cython Metadata */ #undef CYTHON_USE_EXC_INFO_STACK #define CYTHON_USE_EXC_INFO_STACK 0 #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC (PYPY_VERSION_HEX >= 0x07030900) + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 #endif #elif defined(PYSTON_VERSION) #define CYTHON_COMPILING_IN_PYPY 0 @@ -564,11 +564,11 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { #endif #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #define CYTHON_PEP393_ENABLED 1 - #if defined(PyUnicode_IS_READY) - #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ - 0 : _PyUnicode_Ready((PyObject *)(op))) + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_READY(op) (0) #else - #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) #endif #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) @@ -577,14 +577,14 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch) - #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE) - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) - #else - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) - #endif + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) #else - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif #endif #else #define CYTHON_PEP393_ENABLED 0 @@ -750,8 +750,8 @@ static CYTHON_INLINE float __PYX_NAN() { #endif #endif -#define __PYX_HAVE__larynx_train__vits__monotonic_align__core -#define __PYX_HAVE_API__larynx_train__vits__monotonic_align__core +#define __PYX_HAVE__piper_train__vits__monotonic_align__core +#define __PYX_HAVE_API__piper_train__vits__monotonic_align__core /* Early includes */ #include "pythread.h" #include @@ -1080,16 +1080,16 @@ struct __pyx_array_obj; struct __pyx_MemviewEnum_obj; struct __pyx_memoryview_obj; struct __pyx_memoryviewslice_obj; -struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each; +struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each; -/* "larynx_train/vits/monotonic_align/core.pyx":7 +/* "piper_train/vits/monotonic_align/core.pyx":7 * @cython.boundscheck(False) * @cython.wraparound(False) * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< * cdef int x * cdef int y */ -struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each { +struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each { int __pyx_n; float max_neg_val; }; @@ -1551,18 +1551,18 @@ static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UIN /* GetModuleGlobalName.proto */ #if CYTHON_USE_DICT_VERSIONS -#define __Pyx_GetModuleGlobalName(var, name) {\ +#define __Pyx_GetModuleGlobalName(var, name) do {\ static PY_UINT64_T __pyx_dict_version = 0;\ static PyObject *__pyx_dict_cached_value = NULL;\ (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ -} -#define __Pyx_GetModuleGlobalNameUncached(var, name) {\ +} while(0) +#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\ PY_UINT64_T __pyx_dict_version;\ PyObject *__pyx_dict_cached_value;\ (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ -} +} while(0) static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); #else #define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) @@ -1864,7 +1864,7 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo /* Module declarations from 'cython' */ -/* Module declarations from 'larynx_train.vits.monotonic_align.core' */ +/* Module declarations from 'piper_train.vits.monotonic_align.core' */ static PyTypeObject *__pyx_array_type = 0; static PyTypeObject *__pyx_MemviewEnum_type = 0; static PyTypeObject *__pyx_memoryview_type = 0; @@ -1876,8 +1876,8 @@ static PyObject *contiguous = 0; static PyObject *indirect_contiguous = 0; static int __pyx_memoryview_thread_locks_used; static PyThread_type_lock __pyx_memoryview_thread_locks[8]; -static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args); /*proto*/ -static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch); /*proto*/ +static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args); /*proto*/ +static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch); /*proto*/ static struct __pyx_array_obj *__pyx_array_new(PyObject *, Py_ssize_t, char *, char *, char *); /*proto*/ static void *__pyx_align_pointer(void *, size_t); /*proto*/ static PyObject *__pyx_memoryview_new(PyObject *, int, int, __Pyx_TypeInfo *); /*proto*/ @@ -1913,11 +1913,11 @@ static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/ static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 }; static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 }; -#define __Pyx_MODULE_NAME "larynx_train.vits.monotonic_align.core" -extern int __pyx_module_is_main_larynx_train__vits__monotonic_align__core; -int __pyx_module_is_main_larynx_train__vits__monotonic_align__core = 0; +#define __Pyx_MODULE_NAME "piper_train.vits.monotonic_align.core" +extern int __pyx_module_is_main_piper_train__vits__monotonic_align__core; +int __pyx_module_is_main_piper_train__vits__monotonic_align__core = 0; -/* Implementation of 'larynx_train.vits.monotonic_align.core' */ +/* Implementation of 'piper_train.vits.monotonic_align.core' */ static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_MemoryError; @@ -2104,7 +2104,7 @@ static PyObject *__pyx_kp_s_unable_to_allocate_shape_and_str; static PyObject *__pyx_n_s_unpack; static PyObject *__pyx_n_s_update; static PyObject *__pyx_n_s_values; -static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs); /* proto */ +static PyObject *__pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs); /* proto */ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer); /* proto */ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self); /* proto */ @@ -2186,7 +2186,7 @@ static PyObject *__pyx_tuple__26; static PyObject *__pyx_codeobj__27; /* Late includes */ -/* "larynx_train/vits/monotonic_align/core.pyx":7 +/* "piper_train/vits/monotonic_align/core.pyx":7 * @cython.boundscheck(False) * @cython.wraparound(False) * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< @@ -2194,7 +2194,7 @@ static PyObject *__pyx_codeobj__27; * cdef int y */ -static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_y, int __pyx_v_t_x, struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args) { +static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_y, int __pyx_v_t_x, struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args) { float __pyx_v_max_neg_val = __pyx_k_; int __pyx_v_x; int __pyx_v_y; @@ -2223,7 +2223,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea } } - /* "larynx_train/vits/monotonic_align/core.pyx":13 + /* "piper_train/vits/monotonic_align/core.pyx":13 * cdef float v_cur * cdef float tmp * cdef int index = t_x - 1 # <<<<<<<<<<<<<< @@ -2232,7 +2232,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea */ __pyx_v_index = (__pyx_v_t_x - 1); - /* "larynx_train/vits/monotonic_align/core.pyx":15 + /* "piper_train/vits/monotonic_align/core.pyx":15 * cdef int index = t_x - 1 * * for y in range(t_y): # <<<<<<<<<<<<<< @@ -2244,7 +2244,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_y = __pyx_t_3; - /* "larynx_train/vits/monotonic_align/core.pyx":16 + /* "piper_train/vits/monotonic_align/core.pyx":16 * * for y in range(t_y): * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): # <<<<<<<<<<<<<< @@ -2270,7 +2270,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea for (__pyx_t_5 = __pyx_t_7; __pyx_t_5 < __pyx_t_6; __pyx_t_5+=1) { __pyx_v_x = __pyx_t_5; - /* "larynx_train/vits/monotonic_align/core.pyx":17 + /* "piper_train/vits/monotonic_align/core.pyx":17 * for y in range(t_y): * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): * if x == y: # <<<<<<<<<<<<<< @@ -2280,7 +2280,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea __pyx_t_8 = ((__pyx_v_x == __pyx_v_y) != 0); if (__pyx_t_8) { - /* "larynx_train/vits/monotonic_align/core.pyx":18 + /* "piper_train/vits/monotonic_align/core.pyx":18 * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): * if x == y: * v_cur = max_neg_val # <<<<<<<<<<<<<< @@ -2289,7 +2289,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea */ __pyx_v_v_cur = __pyx_v_max_neg_val; - /* "larynx_train/vits/monotonic_align/core.pyx":17 + /* "piper_train/vits/monotonic_align/core.pyx":17 * for y in range(t_y): * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): * if x == y: # <<<<<<<<<<<<<< @@ -2299,7 +2299,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea goto __pyx_L7; } - /* "larynx_train/vits/monotonic_align/core.pyx":20 + /* "piper_train/vits/monotonic_align/core.pyx":20 * v_cur = max_neg_val * else: * v_cur = value[y-1, x] # <<<<<<<<<<<<<< @@ -2313,7 +2313,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea } __pyx_L7:; - /* "larynx_train/vits/monotonic_align/core.pyx":21 + /* "piper_train/vits/monotonic_align/core.pyx":21 * else: * v_cur = value[y-1, x] * if x == 0: # <<<<<<<<<<<<<< @@ -2323,7 +2323,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea __pyx_t_8 = ((__pyx_v_x == 0) != 0); if (__pyx_t_8) { - /* "larynx_train/vits/monotonic_align/core.pyx":22 + /* "piper_train/vits/monotonic_align/core.pyx":22 * v_cur = value[y-1, x] * if x == 0: * if y == 0: # <<<<<<<<<<<<<< @@ -2333,7 +2333,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea __pyx_t_8 = ((__pyx_v_y == 0) != 0); if (__pyx_t_8) { - /* "larynx_train/vits/monotonic_align/core.pyx":23 + /* "piper_train/vits/monotonic_align/core.pyx":23 * if x == 0: * if y == 0: * v_prev = 0. # <<<<<<<<<<<<<< @@ -2342,7 +2342,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea */ __pyx_v_v_prev = 0.; - /* "larynx_train/vits/monotonic_align/core.pyx":22 + /* "piper_train/vits/monotonic_align/core.pyx":22 * v_cur = value[y-1, x] * if x == 0: * if y == 0: # <<<<<<<<<<<<<< @@ -2352,7 +2352,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea goto __pyx_L9; } - /* "larynx_train/vits/monotonic_align/core.pyx":25 + /* "piper_train/vits/monotonic_align/core.pyx":25 * v_prev = 0. * else: * v_prev = max_neg_val # <<<<<<<<<<<<<< @@ -2364,7 +2364,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea } __pyx_L9:; - /* "larynx_train/vits/monotonic_align/core.pyx":21 + /* "piper_train/vits/monotonic_align/core.pyx":21 * else: * v_cur = value[y-1, x] * if x == 0: # <<<<<<<<<<<<<< @@ -2374,7 +2374,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea goto __pyx_L8; } - /* "larynx_train/vits/monotonic_align/core.pyx":27 + /* "piper_train/vits/monotonic_align/core.pyx":27 * v_prev = max_neg_val * else: * v_prev = value[y-1, x-1] # <<<<<<<<<<<<<< @@ -2388,7 +2388,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea } __pyx_L8:; - /* "larynx_train/vits/monotonic_align/core.pyx":28 + /* "piper_train/vits/monotonic_align/core.pyx":28 * else: * v_prev = value[y-1, x-1] * value[y, x] += max(v_prev, v_cur) # <<<<<<<<<<<<<< @@ -2408,7 +2408,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea } } - /* "larynx_train/vits/monotonic_align/core.pyx":30 + /* "piper_train/vits/monotonic_align/core.pyx":30 * value[y, x] += max(v_prev, v_cur) * * for y in range(t_y - 1, -1, -1): # <<<<<<<<<<<<<< @@ -2418,7 +2418,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea for (__pyx_t_1 = (__pyx_v_t_y - 1); __pyx_t_1 > -1; __pyx_t_1-=1) { __pyx_v_y = __pyx_t_1; - /* "larynx_train/vits/monotonic_align/core.pyx":31 + /* "piper_train/vits/monotonic_align/core.pyx":31 * * for y in range(t_y - 1, -1, -1): * path[y, index] = 1 # <<<<<<<<<<<<<< @@ -2429,7 +2429,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea __pyx_t_9 = __pyx_v_index; *((int *) ( /* dim=1 */ ((char *) (((int *) ( /* dim=0 */ (__pyx_v_path.data + __pyx_t_10 * __pyx_v_path.strides[0]) )) + __pyx_t_9)) )) = 1; - /* "larynx_train/vits/monotonic_align/core.pyx":32 + /* "piper_train/vits/monotonic_align/core.pyx":32 * for y in range(t_y - 1, -1, -1): * path[y, index] = 1 * if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<< @@ -2457,7 +2457,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea __pyx_L13_bool_binop_done:; if (__pyx_t_8) { - /* "larynx_train/vits/monotonic_align/core.pyx":33 + /* "piper_train/vits/monotonic_align/core.pyx":33 * path[y, index] = 1 * if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): * index = index - 1 # <<<<<<<<<<<<<< @@ -2466,7 +2466,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea */ __pyx_v_index = (__pyx_v_index - 1); - /* "larynx_train/vits/monotonic_align/core.pyx":32 + /* "piper_train/vits/monotonic_align/core.pyx":32 * for y in range(t_y - 1, -1, -1): * path[y, index] = 1 * if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<< @@ -2476,7 +2476,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea } } - /* "larynx_train/vits/monotonic_align/core.pyx":7 + /* "piper_train/vits/monotonic_align/core.pyx":7 * @cython.boundscheck(False) * @cython.wraparound(False) * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< @@ -2487,7 +2487,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea /* function exit code */ } -/* "larynx_train/vits/monotonic_align/core.pyx":38 +/* "piper_train/vits/monotonic_align/core.pyx":38 * @cython.boundscheck(False) * @cython.wraparound(False) * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<< @@ -2495,8 +2495,8 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea * cdef int i */ -static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs, CYTHON_UNUSED int __pyx_skip_dispatch) { +static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs, CYTHON_UNUSED int __pyx_skip_dispatch) { CYTHON_UNUSED int __pyx_v_b; int __pyx_v_i; int __pyx_t_1; @@ -2507,7 +2507,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c( Py_ssize_t __pyx_t_6; Py_ssize_t __pyx_t_7; - /* "larynx_train/vits/monotonic_align/core.pyx":39 + /* "piper_train/vits/monotonic_align/core.pyx":39 * @cython.wraparound(False) * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: * cdef int b = paths.shape[0] # <<<<<<<<<<<<<< @@ -2516,7 +2516,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c( */ __pyx_v_b = (__pyx_v_paths.shape[0]); - /* "larynx_train/vits/monotonic_align/core.pyx":41 + /* "piper_train/vits/monotonic_align/core.pyx":41 * cdef int b = paths.shape[0] * cdef int i * for i in prange(b, nogil=True): # <<<<<<<<<<<<<< @@ -2552,7 +2552,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c( { __pyx_v_i = (int)(0 + 1 * __pyx_t_2); - /* "larynx_train/vits/monotonic_align/core.pyx":42 + /* "piper_train/vits/monotonic_align/core.pyx":42 * cdef int i * for i in prange(b, nogil=True): * maximum_path_each(paths[i], values[i], t_ys[i], t_xs[i]) # <<<<<<<<<<<<<< @@ -2593,7 +2593,7 @@ __pyx_t_5.strides[1] = __pyx_v_values.strides[2]; __pyx_t_6 = __pyx_v_i; __pyx_t_7 = __pyx_v_i; - __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_7)) ))), NULL); + __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_7)) ))), NULL); __PYX_XDEC_MEMVIEW(&__pyx_t_4, 0); __pyx_t_4.memview = NULL; __pyx_t_4.data = NULL; @@ -2613,7 +2613,7 @@ __pyx_t_6 = __pyx_v_i; #endif } - /* "larynx_train/vits/monotonic_align/core.pyx":41 + /* "piper_train/vits/monotonic_align/core.pyx":41 * cdef int b = paths.shape[0] * cdef int i * for i in prange(b, nogil=True): # <<<<<<<<<<<<<< @@ -2631,7 +2631,7 @@ __pyx_t_6 = __pyx_v_i; } } - /* "larynx_train/vits/monotonic_align/core.pyx":38 + /* "piper_train/vits/monotonic_align/core.pyx":38 * @cython.boundscheck(False) * @cython.wraparound(False) * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<< @@ -2643,8 +2643,8 @@ __pyx_t_6 = __pyx_v_i; } /* Python wrapper */ -static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { __Pyx_memviewslice __pyx_v_paths = { 0, 0, { 0 }, { 0 }, { 0 } }; __Pyx_memviewslice __pyx_v_values = { 0, 0, { 0 }, { 0 }, { 0 } }; __Pyx_memviewslice __pyx_v_t_ys = { 0, 0, { 0 }, { 0 }, { 0 } }; @@ -2717,18 +2717,18 @@ static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_ __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("maximum_path_c", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 38, __pyx_L3_error) __pyx_L3_error:; - __Pyx_AddTraceback("larynx_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("piper_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs); + __pyx_r = __pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs) { +static PyObject *__pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -2741,7 +2741,7 @@ static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_p if (unlikely(!__pyx_v_values.memview)) { __Pyx_RaiseUnboundLocalError("values"); __PYX_ERR(0, 38, __pyx_L1_error) } if (unlikely(!__pyx_v_t_ys.memview)) { __Pyx_RaiseUnboundLocalError("t_ys"); __PYX_ERR(0, 38, __pyx_L1_error) } if (unlikely(!__pyx_v_t_xs.memview)) { __Pyx_RaiseUnboundLocalError("t_xs"); __PYX_ERR(0, 38, __pyx_L1_error) } - __pyx_t_1 = __Pyx_void_to_None(__pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error) + __pyx_t_1 = __Pyx_void_to_None(__pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -2750,7 +2750,7 @@ static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_p /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("larynx_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("piper_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1); @@ -3066,7 +3066,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ * self.format = self._format * */ - if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 141, __pyx_L1_error) + if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 141, __pyx_L1_error) __pyx_t_3 = __pyx_v_format; __Pyx_INCREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); @@ -5044,7 +5044,7 @@ static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_Me * def __setstate_cython__(self, __pyx_state): * __pyx_unpickle_Enum__set_state(self, __pyx_state) # <<<<<<<<<<<<<< */ - if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 17, __pyx_L1_error) + if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 17, __pyx_L1_error) __pyx_t_1 = __pyx_unpickle_Enum__set_state(__pyx_v_self, ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 17, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -7347,7 +7347,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error) + if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error) __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; @@ -7420,7 +7420,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 514, __pyx_L1_error) + if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 514, __pyx_L1_error) __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; } @@ -15623,7 +15623,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSE * return __pyx_result * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): */ - if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 9, __pyx_L1_error) + if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 9, __pyx_L1_error) __pyx_t_4 = __pyx_unpickle_Enum__set_state(((struct __pyx_MemviewEnum_obj *)__pyx_v___pyx_result), ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -15925,7 +15925,7 @@ static PyBufferProcs __pyx_tp_as_buffer_array = { static PyTypeObject __pyx_type___pyx_array = { PyVarObject_HEAD_INIT(0, 0) - "larynx_train.vits.monotonic_align.core.array", /*tp_name*/ + "piper_train.vits.monotonic_align.core.array", /*tp_name*/ sizeof(struct __pyx_array_obj), /*tp_basicsize*/ 0, /*tp_itemsize*/ __pyx_tp_dealloc_array, /*tp_dealloc*/ @@ -16047,7 +16047,7 @@ static PyMethodDef __pyx_methods_Enum[] = { static PyTypeObject __pyx_type___pyx_MemviewEnum = { PyVarObject_HEAD_INIT(0, 0) - "larynx_train.vits.monotonic_align.core.Enum", /*tp_name*/ + "piper_train.vits.monotonic_align.core.Enum", /*tp_name*/ sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/ 0, /*tp_itemsize*/ __pyx_tp_dealloc_Enum, /*tp_dealloc*/ @@ -16311,7 +16311,7 @@ static PyBufferProcs __pyx_tp_as_buffer_memoryview = { static PyTypeObject __pyx_type___pyx_memoryview = { PyVarObject_HEAD_INIT(0, 0) - "larynx_train.vits.monotonic_align.core.memoryview", /*tp_name*/ + "piper_train.vits.monotonic_align.core.memoryview", /*tp_name*/ sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/ 0, /*tp_itemsize*/ __pyx_tp_dealloc_memoryview, /*tp_dealloc*/ @@ -16452,7 +16452,7 @@ static struct PyGetSetDef __pyx_getsets__memoryviewslice[] = { static PyTypeObject __pyx_type___pyx_memoryviewslice = { PyVarObject_HEAD_INIT(0, 0) - "larynx_train.vits.monotonic_align.core._memoryviewslice", /*tp_name*/ + "piper_train.vits.monotonic_align.core._memoryviewslice", /*tp_name*/ sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/ 0, /*tp_itemsize*/ __pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/ @@ -16531,7 +16531,7 @@ static PyTypeObject __pyx_type___pyx_memoryviewslice = { }; static PyMethodDef __pyx_methods[] = { - {"maximum_path_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c, METH_VARARGS|METH_KEYWORDS, 0}, + {"maximum_path_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c, METH_VARARGS|METH_KEYWORDS, 0}, {0, 0, 0, 0} }; @@ -16961,7 +16961,7 @@ PyEval_InitThreads(); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error) - if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_int_112105877 = PyInt_FromLong(112105877L); if (unlikely(!__pyx_int_112105877)) __PYX_ERR(0, 1, __pyx_L1_error) @@ -17266,20 +17266,20 @@ if (!__Pyx_RefNanny) { Py_INCREF(__pyx_b); __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) Py_INCREF(__pyx_cython_runtime); - if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) /*--- Initialize various global constants etc. ---*/ if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif - if (__pyx_module_is_main_larynx_train__vits__monotonic_align__core) { + if (__pyx_module_is_main_piper_train__vits__monotonic_align__core) { if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name_2, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) } #if PY_MAJOR_VERSION >= 3 { PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) - if (!PyDict_GetItemString(modules, "larynx_train.vits.monotonic_align.core")) { - if (unlikely(PyDict_SetItemString(modules, "larynx_train.vits.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "piper_train.vits.monotonic_align.core")) { + if (unlikely(PyDict_SetItemString(modules, "piper_train.vits.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) } } #endif @@ -17300,7 +17300,7 @@ if (!__Pyx_RefNanny) { if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif - /* "larynx_train/vits/monotonic_align/core.pyx":7 + /* "piper_train/vits/monotonic_align/core.pyx":7 * @cython.boundscheck(False) * @cython.wraparound(False) * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< @@ -17309,7 +17309,7 @@ if (!__Pyx_RefNanny) { */ __pyx_k_ = (-1e9); - /* "larynx_train/vits/monotonic_align/core.pyx":1 + /* "piper_train/vits/monotonic_align/core.pyx":1 * cimport cython # <<<<<<<<<<<<<< * from cython.parallel import prange * @@ -17479,11 +17479,11 @@ if (!__Pyx_RefNanny) { __Pyx_XDECREF(__pyx_t_1); if (__pyx_m) { if (__pyx_d) { - __Pyx_AddTraceback("init larynx_train.vits.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("init piper_train.vits.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename); } Py_CLEAR(__pyx_m); } else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ImportError, "init larynx_train.vits.monotonic_align.core"); + PyErr_SetString(PyExc_ImportError, "init piper_train.vits.monotonic_align.core"); } __pyx_L0:; __Pyx_RefNannyFinishContext(); @@ -18536,7 +18536,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, /* ObjectGetItem */ #if CYTHON_USE_TYPE_SLOTS static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) { - PyObject *runerr; + PyObject *runerr = NULL; Py_ssize_t key_value; PySequenceMethods *m = Py_TYPE(obj)->tp_as_sequence; if (unlikely(!(m && m->sq_item))) { @@ -19417,7 +19417,7 @@ __PYX_GOOD: /* CLineInTraceback */ #ifndef CYTHON_CLINE_IN_TRACEBACK -static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) { +static int __Pyx_CLineForTraceback(CYTHON_UNUSED PyThreadState *tstate, int c_line) { PyObject *use_cline; PyObject *ptype, *pvalue, *ptraceback; #if CYTHON_COMPILING_IN_CPYTHON diff --git a/src/python/larynx_train/vits/monotonic_align/core.pyx b/src/python/piper_train/vits/monotonic_align/core.pyx similarity index 100% rename from src/python/larynx_train/vits/monotonic_align/core.pyx rename to src/python/piper_train/vits/monotonic_align/core.pyx diff --git a/src/python/larynx_train/vits/monotonic_align/setup.py b/src/python/piper_train/vits/monotonic_align/setup.py similarity index 100% rename from src/python/larynx_train/vits/monotonic_align/setup.py rename to src/python/piper_train/vits/monotonic_align/setup.py diff --git a/src/python/larynx_train/vits/transforms.py b/src/python/piper_train/vits/transforms.py similarity index 100% rename from src/python/larynx_train/vits/transforms.py rename to src/python/piper_train/vits/transforms.py diff --git a/src/python/larynx_train/vits/utils.py b/src/python/piper_train/vits/utils.py similarity index 100% rename from src/python/larynx_train/vits/utils.py rename to src/python/piper_train/vits/utils.py diff --git a/src/python/larynx_train/vits/wavfile.py b/src/python/piper_train/vits/wavfile.py similarity index 100% rename from src/python/larynx_train/vits/wavfile.py rename to src/python/piper_train/vits/wavfile.py diff --git a/src/python/larynx_train/voice_conversion.py b/src/python/piper_train/voice_conversion.py similarity index 98% rename from src/python/larynx_train/voice_conversion.py rename to src/python/piper_train/voice_conversion.py index 8163676..bb971c6 100755 --- a/src/python/larynx_train/voice_conversion.py +++ b/src/python/piper_train/voice_conversion.py @@ -11,7 +11,7 @@ from .vits.lightning import VitsModel from .vits.mel_processing import spectrogram_torch from .vits.wavfile import write as write_wav -_LOGGER = logging.getLogger("larynx_train.voice_converstion") +_LOGGER = logging.getLogger("piper_train.voice_converstion") def main(): diff --git a/src/python/run-docker b/src/python/run-docker index 6261f54..08eace7 100755 --- a/src/python/run-docker +++ b/src/python/run-docker @@ -10,5 +10,5 @@ docker run \ -v "${HOME}:${HOME}" \ -v /etc/hostname:/etc/hostname:ro \ -v /etc/localtime:/etc/localtime:ro \ - larynx2-train \ + piper-train \ "$@" diff --git a/src/python/scripts/check.sh b/src/python/scripts/check.sh index d8414c5..d2c3ae4 100755 --- a/src/python/scripts/check.sh +++ b/src/python/scripts/check.sh @@ -17,7 +17,7 @@ if [ -d "${venv}" ]; then source "${venv}/bin/activate" fi -python_files=("${base_dir}/larynx_train") +python_files=("${base_dir}/piper_train") # Format code black "${python_files[@]}" diff --git a/src/python/setup.py b/src/python/setup.py index 4c7260b..7a114bc 100644 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -6,7 +6,7 @@ import setuptools from setuptools import setup this_dir = Path(__file__).parent -module_dir = this_dir / "larynx_train" +module_dir = this_dir / "piper_train" # ----------------------------------------------------------------------------- @@ -29,23 +29,23 @@ with open(version_path, "r", encoding="utf-8") as version_file: # ----------------------------------------------------------------------------- setup( - name="larynx_train", + name="piper_train", version=version, description="A fast and local neural text to speech system", long_description=long_description, - url="http://github.com/rhasspy/larynx", + url="http://github.com/rhasspy/piper", author="Michael Hansen", author_email="mike@rhasspy.org", license="MIT", packages=setuptools.find_packages(), package_data={ - "larynx_train": ["VERSION", "py.typed"], + "piper_train": ["VERSION", "py.typed"], }, install_requires=requirements, extras_require={':python_version<"3.9"': ["importlib_resources"]}, entry_points={ "console_scripts": [ - "larynx-train = larynx_train.__main__:main", + "piper-train = piper_train.__main__:main", ] }, classifiers=[ diff --git a/src/python_run/larynx/__init__.py b/src/python_run/piper/__init__.py similarity index 97% rename from src/python_run/larynx/__init__.py rename to src/python_run/piper/__init__.py index 23ab305..2ab2622 100644 --- a/src/python_run/larynx/__init__.py +++ b/src/python_run/piper/__init__.py @@ -15,7 +15,7 @@ _PAD = "_" @dataclass -class LarynxConfig: +class PiperConfig: num_symbols: int num_speakers: int sample_rate: int @@ -26,7 +26,7 @@ class LarynxConfig: phoneme_id_map: Mapping[str, Sequence[int]] -class Larynx: +class Piper: def __init__( self, model_path: Union[str, Path], @@ -114,12 +114,12 @@ class Larynx: return wav_io.getvalue() -def load_config(config_path: Union[str, Path]) -> LarynxConfig: +def load_config(config_path: Union[str, Path]) -> PiperConfig: with open(config_path, "r", encoding="utf-8") as config_file: config_dict = json.load(config_file) inference = config_dict.get("inference", {}) - return LarynxConfig( + return PiperConfig( num_symbols=config_dict["num_symbols"], num_speakers=config_dict["num_speakers"], sample_rate=config_dict["audio"]["sample_rate"], diff --git a/src/python_run/larynx/__main__.py b/src/python_run/piper/__main__.py similarity index 95% rename from src/python_run/larynx/__main__.py rename to src/python_run/piper/__main__.py index 57eff08..a4cadb2 100644 --- a/src/python_run/larynx/__main__.py +++ b/src/python_run/piper/__main__.py @@ -5,7 +5,7 @@ import time from functools import partial from pathlib import Path -from . import Larynx +from . import Piper _FILE = Path(__file__) _DIR = _FILE.parent @@ -34,7 +34,7 @@ def main() -> None: args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) - voice = Larynx(args.model, config_path=args.config, use_cuda=args.cuda) + voice = Piper(args.model, config_path=args.config, use_cuda=args.cuda) synthesize = partial( voice.synthesize, speaker_id=args.speaker, diff --git a/src/python_run/scripts/check.sh b/src/python_run/scripts/check.sh index 60b3c0d..85bb211 100755 --- a/src/python_run/scripts/check.sh +++ b/src/python_run/scripts/check.sh @@ -17,7 +17,7 @@ if [ -d "${venv}" ]; then source "${venv}/bin/activate" fi -python_files=("${base_dir}/larynx") +python_files=("${base_dir}/piper") # Format code black "${python_files[@]}" diff --git a/src/python_run/scripts/larynx b/src/python_run/scripts/piper similarity index 93% rename from src/python_run/scripts/larynx rename to src/python_run/scripts/piper index 21e3714..3123a0b 100755 --- a/src/python_run/scripts/larynx +++ b/src/python_run/scripts/piper @@ -14,4 +14,4 @@ if [ -d "${venv}" ]; then source "${venv}/bin/activate" fi -python3 -m larynx "$@" +python3 -m piper "$@"