Rename to piper

pull/35/head
Michael Hansen 1 year ago
parent 3dfa161ba5
commit 70afec58bc

@ -23,6 +23,7 @@ RUN cd espeak-ng-1.51 && \
--without-speechplayer \
--without-mbrola \
--without-sonic \
--with-extdict-cmn \
--prefix=/usr && \
make -j8 src/espeak-ng src/speak-ng && \
make && \
@ -35,25 +36,25 @@ RUN mkdir -p /usr/local/include/onnxruntime && \
--strip-components 1 \
-xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz"
# Build larynx binary
# Build piper binary
COPY Makefile ./
COPY src/cpp/ ./src/cpp/
RUN make no-pcaudio
# Do a test run
RUN /build/build/larynx --help
RUN /build/build/piper --help
# Build .tar.gz to keep symlinks
WORKDIR /dist
RUN mkdir -p larynx && \
cp -d /usr/lib64/libespeak-ng.so* ./larynx/ && \
cp -dR /usr/share/espeak-ng-data ./larynx/ && \
cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./larynx/ && \
cp /build/build/larynx ./larynx/ && \
tar -czf "larynx_${TARGETARCH}${TARGETVARIANT}.tar.gz" larynx/
RUN mkdir -p piper && \
cp -d /usr/lib64/libespeak-ng.so* ./piper/ && \
cp -dR /usr/share/espeak-ng-data ./piper/ && \
cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./piper/ && \
cp /build/build/piper ./piper/ && \
tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/
# -----------------------------------------------------------------------------
FROM scratch
COPY --from=build /dist/larynx_*.tar.gz ./
COPY --from=build /dist/piper_*.tar.gz ./

@ -5,9 +5,9 @@ ARG TARGETVARIANT
COPY local/en-us/ljspeech/low/en-us-ljspeech-low.onnx \
local/en-us/ljspeech/low/en-us-ljspeech-low.onnx.json ./
ADD dist/linux_${TARGETARCH}${TARGETVARIANT}/larynx_${TARGETARCH}${TARGETVARIANT}.tar.gz ./
ADD dist/linux_${TARGETARCH}${TARGETVARIANT}/piper_${TARGETARCH}${TARGETVARIANT}.tar.gz ./
RUN cd larynx/ && echo 'This is a test.' | ./larynx -m ../en-us-ljspeech-low.onnx -f test.wav
RUN if [ ! -f larynx/test.wav ]; then exit 1; fi
RUN size="$(wc -c < larynx/test.wav)"; \
RUN cd piper/ && echo 'This is a test.' | ./piper -m ../en-us-ljspeech-low.onnx -f test.wav
RUN if [ ! -f piper/test.wav ]; then exit 1; fi
RUN size="$(wc -c < piper/test.wav)"; \
if [ "${size}" -lt "1000" ]; then echo "File size is ${size} bytes"; exit 1; fi

@ -1,15 +1,15 @@
# Larynx
![Piper logo](etc/logo.png)
A fast, local neural text to speech system.
``` sh
echo 'Welcome to the world of speech synthesis!' | \
./larynx --model en-us-blizzard_lessac-medium.onnx --output_file welcome.wav
./piper --model en-us-blizzard_lessac-medium.onnx --output_file welcome.wav
```
## Voices
Download voices from [the release](https://github.com/rhasspy/larynx2/releases/tag/v0.0.2).
Download voices from [the release](https://github.com/rhasspy/piper/releases/tag/v0.0.2).
Supported languages:
@ -30,7 +30,7 @@ Supported languages:
## Purpose
Larynx is meant to sound good and run reasonably fast on the Raspberry Pi 4.
Piper (formally Larynx 2) is meant to sound good and run reasonably fast on the Raspberry Pi 4.
Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and exported to the [onnxruntime](https://onnxruntime.ai/).
@ -39,8 +39,8 @@ Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and export
Download a release:
* [amd64](https://github.com/rhasspy/larynx2/releases/download/v0.0.2/larynx_amd64.tar.gz) (desktop Linux)
* [arm64](https://github.com/rhasspy/larynx2/releases/download/v0.0.2/larynx_arm64.tar.gz) (Raspberry Pi 4)
* [amd64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz) (desktop Linux)
* [arm64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_arm64.tar.gz) (Raspberry Pi 4)
If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.13.1.
@ -48,18 +48,18 @@ If you want to build from source, see the [Makefile](Makefile) and [C++ source](
## Usage
1. [Download a voice](#voices) and extract the `.onnx` and `.onnx.json` files
2. Run the `larynx` binary with text on standard input, `--model /path/to/your-voice.onnx`, and `--output_file output.wav`
2. Run the `piper` binary with text on standard input, `--model /path/to/your-voice.onnx`, and `--output_file output.wav`
For example:
``` sh
echo 'Welcome to the world of speech synthesis!' | \
./larynx --model blizzard_lessac-medium.onnx --output_file welcome.wav
./piper --model blizzard_lessac-medium.onnx --output_file welcome.wav
```
For multi-speaker models, use `--speaker <number>` to change speakers (default: 0).
See `larynx --help` for more options.
See `piper --help` for more options.
## Training
@ -69,7 +69,7 @@ See [src/python](src/python)
Start by creating a virtual environment:
``` sh
cd larynx2/src/python
cd piper/src/python
python3 -m venv .venv
source .venv/bin/activate
pip3 install --upgrade pip
@ -84,7 +84,7 @@ Ensure you have [espeak-ng](https://github.com/espeak-ng/espeak-ng/) installed (
Next, preprocess your dataset:
``` sh
python3 -m larynx_train.preprocess \
python3 -m piper_train.preprocess \
--language en-us \
--input-dir /path/to/ljspeech/ \
--output-dir /path/to/training_dir/ \
@ -97,7 +97,7 @@ Datasets must either be in the [LJSpeech](https://keithito.com/LJ-Speech-Dataset
Finally, you can train:
``` sh
python3 -m larynx_train \
python3 -m piper_train \
--dataset-dir /path/to/training_dir/ \
--accelerator 'gpu' \
--devices 1 \
@ -108,7 +108,7 @@ python3 -m larynx_train \
--precision 32
```
Training uses [PyTorch Lightning](https://www.pytorchlightning.ai/). Run `tensorboard --logdir /path/to/training_dir/lightning_logs` to monitor. See `python3 -m larynx_train --help` for many additional options.
Training uses [PyTorch Lightning](https://www.pytorchlightning.ai/). Run `tensorboard --logdir /path/to/training_dir/lightning_logs` to monitor. See `python3 -m piper_train --help` for many additional options.
It is highly recommended to train with the following `Dockerfile`:
@ -121,11 +121,11 @@ RUN pip3 install \
ENV NUMBA_CACHE_DIR=.numba_cache
```
See the various `infer_*` and `export_*` scripts in [src/python/larynx_train](src/python/larynx_train) to test and export your voice from the checkpoint in `lightning_logs`. The `dataset.jsonl` file in your training directory can be used with `python3 -m larynx_train.infer` for quick testing:
See the various `infer_*` and `export_*` scripts in [src/python/piper_train](src/python/piper_train) to test and export your voice from the checkpoint in `lightning_logs`. The `dataset.jsonl` file in your training directory can be used with `python3 -m piper_train.infer` for quick testing:
``` sh
head -n5 /path/to/training_dir/dataset.jsonl | \
python3 -m larynx_train.infer \
python3 -m piper_train.infer \
--checkpoint lightning_logs/path/to/checkpoint.ckpt \
--sample-rate 22050 \
--output-dir wavs
@ -139,7 +139,7 @@ See [src/python_run](src/python_run)
Run `scripts/setup.sh` to create a virtual environment and install the requirements. Then run:
``` sh
echo 'Welcome to the world of speech synthesis!' | scripts/larynx \
echo 'Welcome to the world of speech synthesis!' | scripts/piper \
--model /path/to/voice.onnx \
--output_file welcome.wav
```
@ -151,5 +151,5 @@ If you'd like to use a GPU, install the `onnxruntime-gpu` package:
.venv/bin/pip3 install onnxruntime-gpu
```
and then run `scripts/larynx` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch).
and then run `scripts/piper` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch).

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

@ -0,0 +1,151 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="118.3606mm"
height="41.577671mm"
viewBox="0 0 118.36058 41.577671"
version="1.1"
id="svg120"
inkscape:version="1.0.2 (e86c870879, 2021-01-15)"
sodipodi:docname="logo.svg"
inkscape:export-filename="./logo.png"
inkscape:export-xdpi="100"
inkscape:export-ydpi="100">
<defs
id="defs114" />
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="1"
inkscape:pageshadow="2"
inkscape:zoom="1.8469919"
inkscape:cx="164.97755"
inkscape:cy="48.418276"
inkscape:document-units="mm"
inkscape:current-layer="layer1"
inkscape:document-rotation="0"
showgrid="false"
inkscape:window-width="1920"
inkscape:window-height="1012"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
fit-margin-top="2"
fit-margin-left="2"
fit-margin-right="2"
fit-margin-bottom="2" />
<metadata
id="metadata117">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-46.653036,-127.37783)">
<g
id="g85"
transform="translate(39.632581,-38.116038)">
<path
d="m 20.289791,179.54097 h 10.117541 q 4.512519,0 6.919195,2.01084 2.42251,1.995 2.42251,5.70002 0,3.72085 -2.42251,5.73169 -2.406676,1.99501 -6.919195,1.99501 h -4.021683 v 8.2017 h -6.095858 z m 6.095858,4.41751 v 6.60253 h 3.372514 q 1.77334,0 2.739178,-0.855 0.965837,-0.87084 0.965837,-2.45418 0,-1.58334 -0.965837,-2.43834 -0.965838,-0.85501 -2.739178,-0.85501 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2223" />
<path
d="M 56.120769,179.54097 H 66.23831 q 4.512519,0 6.919196,2.01084 2.42251,1.995 2.42251,5.70002 0,3.72085 -2.42251,5.73169 -2.406677,1.99501 -6.919196,1.99501 h -4.021683 v 8.2017 h -6.095858 z m 6.095858,4.41751 v 6.60253 h 3.372514 q 1.773341,0 2.739179,-0.855 0.965836,-0.87084 0.965836,-2.45418 0,-1.58334 -0.965836,-2.43834 -0.965837,-0.85501 -2.739179,-0.85501 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2225" />
<path
d="m 79.8867,179.54097 h 16.450901 v 4.60751 H 85.982557 v 4.40169 h 9.73754 v 4.60752 h -9.73754 v 5.41502 h 10.703378 v 4.60752 H 79.8867 Z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2227" />
<path
d="m 110.69849,190.02267 q 1.91585,0 2.73918,-0.7125 0.83917,-0.7125 0.83917,-2.34334 0,-1.61501 -0.83917,-2.31168 -0.82333,-0.69667 -2.73918,-0.69667 h -2.56501 v 6.06419 z m -2.56501,4.21169 v 8.94587 h -6.09585 v -23.63926 h 9.31003 q 4.67086,0 6.84003,1.5675 2.18501,1.56751 2.18501,4.95586 0,2.34334 -1.14,3.84751 -1.12417,1.50417 -3.40418,2.21668 1.25083,0.285 2.2325,1.29834 0.99751,0.9975 2.01085,3.04001 l 3.30918,6.71336 h -6.4917 l -2.88168,-5.87419 q -0.87083,-1.77334 -1.77334,-2.42251 -0.88667,-0.64917 -2.37501,-0.64917 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2229" />
<g
id="g2239"
transform="translate(2.4090272,49.575953)">
<path
style="fill:#000000;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 17.980579,136.56697 c -0.476851,-1.21561 9.934617,-6.83345 10.62213,-6.38851 -4.147652,-4.33579 -3.488951,-7.42528 -1.437093,-9.875 -2.134965,-0.0137 -4.642444,-0.12021 -6.370534,4.67585 -4.134299,0.0803 -4.437171,-3.11951 -4.48854,-6.20362 -1.859141,2.96638 -2.878913,5.02914 -1.495979,9.34664 -4.921996,-1.38523 -5.5668734,2.41507 -7.6020931,4.32371 4.1744251,-2.16864 9.3792941,-2.93932 10.7721091,4.12093 z"
id="path2231"
sodipodi:nodetypes="cccccccc" />
<circle
style="fill:#000000;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="circle2233"
cx="7.4886017"
cy="132.36996"
r="0.87717384" />
<circle
style="fill:#000000;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="circle2235"
cx="16.220198"
cy="118.79509"
r="0.87717384" />
<circle
style="fill:#000000;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="circle2237"
cx="26.696749"
cy="120.39225"
r="0.87717384" />
</g>
<path
d="m 50.078877,179.54097 c 0,-1.55844 -6.095858,-1.44086 -6.095858,0 v 23.63926 h 6.095858 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2241"
sodipodi:nodetypes="ssccs" />
<path
id="path2243"
style="fill:#ffffff;stroke:none;stroke-width:0.1;stroke-linecap:round"
d="m 51.124975,184.33936 c -2.760939,-1.42504 -5.456589,-1.18336 -8.115352,0 v 1.45029 c 2.642276,-1.3158 5.348351,-1.29574 8.115352,0 z"
sodipodi:nodetypes="ccccc" />
<path
style="fill:none;stroke:#ffffff;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.277649,178.01214 v 27.0594"
id="path2245"
sodipodi:nodetypes="cc" />
<path
style="fill:none;stroke:#ffffff;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.736606,177.35346 v 27.71808"
id="path2249"
sodipodi:nodetypes="cc" />
<rect
style="fill:#ffffff;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="rect2251"
width="3.0975902"
height="6.8333788"
x="-48.726681"
y="197.01622"
transform="scale(-1,1)" />
<rect
style="fill:#ffffff;stroke:none;stroke-width:0.0999999;stroke-linecap:round"
id="rect2255"
width="2.0844173"
height="9.6287899"
x="-45.551418"
y="194.22081"
transform="scale(-1,1)" />
<path
id="path2257"
style="fill:#ffffff;stroke:none;stroke-width:0.0999995;stroke-linecap:round"
d="m 19.97109,185.20282 10.735834,-6.19836 c 0.21219,-0.12249 0.502502,-0.0141 0.650911,0.24289 l 0.11208,0.19413 c 0.148409,0.25705 0.107331,0.58244 -0.115118,0.68513 -3.765389,1.73827 -7.326841,3.8345 -10.735835,6.19834 -0.201345,0.13962 -0.502495,0.0141 -0.65091,-0.24287 l -0.112081,-0.19413 c -0.148409,-0.25704 -0.09706,-0.56263 0.115117,-0.68513 z"
sodipodi:nodetypes="ssssssssss" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 8.6 KiB

@ -2,12 +2,12 @@ cmake_minimum_required(VERSION 3.13)
include(CheckIncludeFileCXX)
project(larynx C CXX)
project(piper C CXX)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
ADD_EXECUTABLE(larynx main.cpp)
ADD_EXECUTABLE(piper main.cpp)
string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'")
string(APPEND CMAKE_C_FLAGS " -Wall -Wextra")
@ -21,26 +21,26 @@ check_include_file_cxx("pcaudiolib/audio.h" PCAUDIO_INCLUDE_FOUND)
if(PCAUDIO_INCLUDE_FOUND)
option(USE_PCAUDIO "Build with pcaudiolib" ON)
if(USE_PCAUDIO)
target_compile_definitions(larynx PUBLIC HAVE_PCAUDIO)
target_compile_definitions(piper PUBLIC HAVE_PCAUDIO)
set(PCAUDIO_LIBRARIES "pcaudio")
endif()
endif()
set(ONNXRUNTIME_ROOTDIR "/usr/local/include/onnxruntime")
target_link_libraries(larynx
target_link_libraries(piper
onnxruntime
-static-libgcc -static-libstdc++
${ESPEAK_NG_LIBRARIES}
${PCAUDIO_LIBRARIES})
target_link_directories(larynx PUBLIC
target_link_directories(piper PUBLIC
${ESPEAK_NG_LIBRARY_DIRS}
${ONNXRUNTIME_ROOTDIR}/lib)
target_include_directories(larynx PUBLIC
target_include_directories(piper PUBLIC
${ONNXRUNTIME_ROOTDIR}/include
${ESPEAK_NG_INCLUDE_DIRS})
target_compile_options(larynx PUBLIC
target_compile_options(piper PUBLIC
${ESPEAK_NG_CFLAGS_OTHER})

@ -15,7 +15,7 @@
using namespace std;
using json = nlohmann::json;
namespace larynx {
namespace piper {
typedef char32_t Phoneme;
typedef int64_t PhonemeId;
@ -145,6 +145,6 @@ void parseModelConfig(json &configRoot, ModelConfig &modelConfig) {
} /* parseModelConfig */
} // namespace larynx
} // namespace piper
#endif // CONFIG_H_

@ -12,7 +12,7 @@
#include <pcaudiolib/audio.h>
#endif
#include "larynx.hpp"
#include "piper.hpp"
using namespace std;
@ -23,7 +23,7 @@ struct RunConfig {
filesystem::path modelConfigPath;
OutputType outputType = OUTPUT_PLAY;
optional<filesystem::path> outputPath;
optional<larynx::SpeakerId> speakerId;
optional<piper::SpeakerId> speakerId;
optional<float> noiseScale;
optional<float> lengthScale;
optional<float> noiseW;
@ -36,9 +36,9 @@ int main(int argc, char *argv[]) {
parseArgs(argc, argv, runConfig);
auto exePath = filesystem::path(argv[0]);
larynx::initialize(exePath.parent_path());
piper::initialize(exePath.parent_path());
larynx::Voice voice;
piper::Voice voice;
auto startTime = chrono::steady_clock::now();
loadVoice(runConfig.modelPath.string(), runConfig.modelConfigPath.string(),
voice, runConfig.speakerId);
@ -64,7 +64,7 @@ int main(int argc, char *argv[]) {
if (runConfig.outputType == OUTPUT_PLAY) {
// Output audio to the default audio device
my_audio = create_audio_device_object(NULL, "larynx", "Text-to-Speech");
my_audio = create_audio_device_object(NULL, "piper", "Text-to-Speech");
// TODO: Support 32-bit sample widths
auto audioFormat = AUDIO_OBJECT_FORMAT_S16LE;
@ -78,7 +78,7 @@ int main(int argc, char *argv[]) {
#else
if (runConfig.outputType == OUTPUT_PLAY) {
// Cannot play audio directly
cerr << "WARNING: Larynx was not compiled with pcaudiolib. Output audio "
cerr << "WARNING: Piper was not compiled with pcaudiolib. Output audio "
"will be written to the current directory."
<< endl;
runConfig.outputType = OUTPUT_DIRECTORY;
@ -92,7 +92,7 @@ int main(int argc, char *argv[]) {
}
string line;
larynx::SynthesisResult result;
piper::SynthesisResult result;
while (getline(cin, line)) {
// Path to output WAV file
@ -108,19 +108,19 @@ int main(int argc, char *argv[]) {
// Output audio to automatically-named WAV file in a directory
ofstream audioFile(outputPath.string(), ios::binary);
larynx::textToWavFile(voice, line, audioFile, result);
piper::textToWavFile(voice, line, audioFile, result);
cout << outputPath.string() << endl;
} else if (runConfig.outputType == OUTPUT_FILE) {
// Output audio to WAV file
ofstream audioFile(runConfig.outputPath.value().string(), ios::binary);
larynx::textToWavFile(voice, line, audioFile, result);
piper::textToWavFile(voice, line, audioFile, result);
} else if (runConfig.outputType == OUTPUT_STDOUT) {
// Output WAV to stdout
larynx::textToWavFile(voice, line, cout, result);
piper::textToWavFile(voice, line, cout, result);
} else if (runConfig.outputType == OUTPUT_PLAY) {
#ifdef HAVE_PCAUDIO
vector<int16_t> audioBuffer;
larynx::textToAudio(voice, line, audioBuffer, result);
piper::textToAudio(voice, line, audioBuffer, result);
int error = audio_object_write(my_audio, (const char *)audioBuffer.data(),
sizeof(int16_t) * audioBuffer.size());
@ -138,7 +138,7 @@ int main(int argc, char *argv[]) {
<< " sec, audio=" << result.audioSeconds << " sec)" << endl;
}
larynx::terminate();
piper::terminate();
#ifdef HAVE_PCAUDIO
audio_object_close(my_audio);
@ -211,7 +211,7 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
runConfig.outputPath = filesystem::path(argv[++i]);
} else if (arg == "-s" || arg == "--speaker") {
ensureArg(argc, argv, i);
runConfig.speakerId = (larynx::SpeakerId)stol(argv[++i]);
runConfig.speakerId = (piper::SpeakerId)stol(argv[++i]);
} else if (arg == "--noise-scale") {
ensureArg(argc, argv, i);
runConfig.noiseScale = stof(argv[++i]);

@ -7,8 +7,8 @@
using namespace std;
namespace larynx {
const string instanceName{"larynx"};
namespace piper {
const string instanceName{"piper"};
struct ModelSession {
Ort::Session onnx;
@ -48,6 +48,6 @@ void loadModel(string modelPath, ModelSession &session) {
auto loadDuration = chrono::duration<double>(endTime - startTime);
}
} // namespace larynx
} // namespace piper
#endif // MODEL_H_

@ -16,7 +16,7 @@
using namespace std;
namespace larynx {
namespace piper {
// Text to phonemes using eSpeak-ng
void phonemize(PhonemizeConfig &phonemizeConfig) {
@ -103,6 +103,6 @@ void phonemes2ids(PhonemizeConfig &phonemizeConfig,
} /* phonemes2ids */
} // namespace larynx
} // namespace piper
#endif // PHONEMIZE_H_

@ -1,5 +1,5 @@
#ifndef LARYNX_H_
#define LARYNX_H_
#ifndef PIPER_H_
#define PIPER_H_
#include <filesystem>
#include <iostream>
@ -17,7 +17,7 @@
using json = nlohmann::json;
namespace larynx {
namespace piper {
struct Voice {
json configRoot;
@ -106,6 +106,6 @@ void textToWavFile(Voice &voice, string text, ostream &audioFile,
} /* textToWavFile */
} // namespace larynx
} // namespace piper
#endif // LARYNX_H_
#endif // PIPER_H_

@ -14,7 +14,7 @@
using namespace std;
namespace larynx {
namespace piper {
// Maximum value for 16-bit signed WAV sample
const float MAX_WAV_VALUE = 32767.0f;
@ -126,6 +126,6 @@ void synthesize(SynthesisConfig &synthesisConfig, ModelSession &session,
Ort::OrtRelease(inputTensors[i].release());
}
}
} // namespace larynx
} // namespace piper
#endif // SYNTHESIZE_H_

@ -3,7 +3,7 @@
#include <iostream>
namespace larynx {
namespace piper {
struct WavHeader {
uint8_t RIFF[4] = {'R', 'I', 'F', 'F'};
@ -39,6 +39,6 @@ void writeWavHeader(int sampleRate, int sampleWidth, int channels,
} /* writeWavHeader */
} // namespace larynx
} // namespace piper
#endif // WAVFILE_H_

@ -7,7 +7,7 @@ if [ -d "${this_dir}/.venv" ]; then
source "${this_dir}/.venv/bin/activate"
fi
cd "${this_dir}/larynx_train/vits/monotonic_align"
cd "${this_dir}/piper_train/vits/monotonic_align"
mkdir -p monotonic_align
cythonize -i core.pyx
mv core*.so monotonic_align/

@ -13,7 +13,7 @@ except (ImportError, AttributeError):
files = importlib_resources.files
_PACKAGE = "larynx_train"
_PACKAGE = "piper_train"
_DIR = Path(typing.cast(os.PathLike, files(_PACKAGE)))
__version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip()

@ -7,7 +7,7 @@ import torch
from .vits.lightning import VitsModel
_LOGGER = logging.getLogger("larynx_train.export_generator")
_LOGGER = logging.getLogger("piper_train.export_generator")
def main():

@ -8,7 +8,7 @@ import torch
from .vits.lightning import VitsModel
_LOGGER = logging.getLogger("larynx_train.export_onnx")
_LOGGER = logging.getLogger("piper_train.export_onnx")
OPSET_VERSION = 15

@ -8,7 +8,7 @@ import torch
from .vits.lightning import VitsModel
_LOGGER = logging.getLogger("larynx_train.export_torchscript")
_LOGGER = logging.getLogger("piper_train.export_torchscript")
def main():

@ -12,13 +12,13 @@ from .vits.lightning import VitsModel
from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer")
_LOGGER = logging.getLogger("piper_train.infer")
def main():
"""Main entry point"""
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer")
parser = argparse.ArgumentParser(prog="piper_train.infer")
parser.add_argument(
"--checkpoint", required=True, help="Path to model checkpoint (.ckpt)"
)

@ -11,13 +11,13 @@ import torch
from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer_generator")
_LOGGER = logging.getLogger("piper_train.infer_generator")
def main():
"""Main entry point"""
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer_generator")
parser = argparse.ArgumentParser(prog="piper_train.infer_generator")
parser.add_argument("--model", required=True, help="Path to generator (.pt)")
parser.add_argument("--output-dir", required=True, help="Path to write WAV files")
parser.add_argument("--sample-rate", type=int, default=22050)

@ -13,13 +13,13 @@ import onnxruntime
from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer_onnx")
_LOGGER = logging.getLogger("piper_train.infer_onnx")
def main():
"""Main entry point"""
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer_onnx")
parser = argparse.ArgumentParser(prog="piper_train.infer_onnx")
parser.add_argument("--model", required=True, help="Path to model (.onnx)")
parser.add_argument("--output-dir", required=True, help="Path to write WAV files")
parser.add_argument("--sample-rate", type=int, default=22050)

@ -11,13 +11,13 @@ import torch
from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer_torchscript")
_LOGGER = logging.getLogger("piper_train.infer_torchscript")
def main():
"""Main entry point"""
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer_torchscript")
parser = argparse.ArgumentParser(prog="piper_train.infer_torchscript")
parser.add_argument(
"--model", required=True, help="Path to torchscript checkpoint (.ts)"
)

@ -5,7 +5,7 @@ from typing import Optional, Tuple, Union
import librosa
import torch
from larynx_train.vits.mel_processing import spectrogram_torch
from piper_train.vits.mel_processing import spectrogram_torch
from .trim import trim_silence
from .vad import SileroVoiceActivityDetector

@ -44,16 +44,7 @@ class Batch:
speaker_ids: Optional[LongTensor] = None
# @dataclass
# class LarynxDatasetSettings:
# sample_rate: int
# is_multispeaker: bool
# espeak_voice: Optional[str] = None
# phoneme_map: Dict[str, Optional[List[str]]] = field(default_factory=dict)
# phoneme_id_map: Dict[str, List[int]] = DEFAULT_PHONEME_ID_MAP
class LarynxDataset(Dataset):
class PiperDataset(Dataset):
"""
Dataset format:
@ -76,9 +67,7 @@ class LarynxDataset(Dataset):
dataset_path = Path(dataset_path)
_LOGGER.debug("Loading dataset: %s", dataset_path)
self.utterances.extend(
LarynxDataset.load_dataset(
dataset_path, max_phoneme_ids=max_phoneme_ids
)
PiperDataset.load_dataset(dataset_path, max_phoneme_ids=max_phoneme_ids)
)
def __len__(self):
@ -110,7 +99,7 @@ class LarynxDataset(Dataset):
continue
try:
utt = LarynxDataset.load_utterance(line)
utt = PiperDataset.load_utterance(line)
if (max_phoneme_ids is None) or (
len(utt.phoneme_ids) <= max_phoneme_ids
):

@ -9,7 +9,7 @@ from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from .commons import slice_segments
from .dataset import Batch, LarynxDataset, UtteranceCollate
from .dataset import Batch, PiperDataset, UtteranceCollate
from .losses import discriminator_loss, feature_loss, generator_loss, kl_loss
from .mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from .models import MultiPeriodDiscriminator, SynthesizerTrn
@ -128,7 +128,7 @@ class VitsModel(pl.LightningModule):
_LOGGER.debug("No dataset to load")
return
full_dataset = LarynxDataset(
full_dataset = PiperDataset(
self.hparams.dataset, max_phoneme_ids=max_phoneme_ids
)
valid_set_size = int(len(full_dataset) * validation_split)

@ -1,14 +1,14 @@
/* Generated by Cython 0.29.32 */
/* Generated by Cython 0.29.33 */
/* BEGIN: Cython Metadata
{
"distutils": {
"name": "larynx_train.vits.monotonic_align.core",
"name": "piper_train.vits.monotonic_align.core",
"sources": [
"/home/hansenm/opt/larynx2/src/python/larynx_train/vits/monotonic_align/core.pyx"
"/home/hansenm/opt/larynx2/src/python/piper_train/vits/monotonic_align/core.pyx"
]
},
"module_name": "larynx_train.vits.monotonic_align.core"
"module_name": "piper_train.vits.monotonic_align.core"
}
END: Cython Metadata */
@ -21,8 +21,8 @@ END: Cython Metadata */
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
#error Cython requires Python 2.6+ or Python 3.3+.
#else
#define CYTHON_ABI "0_29_32"
#define CYTHON_HEX_VERSION 0x001D20F0
#define CYTHON_ABI "0_29_33"
#define CYTHON_HEX_VERSION 0x001D21F0
#define CYTHON_FUTURE_DIVISION 0
#include <stddef.h>
#ifndef offsetof
@ -99,7 +99,7 @@ END: Cython Metadata */
#undef CYTHON_USE_EXC_INFO_STACK
#define CYTHON_USE_EXC_INFO_STACK 0
#ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
#define CYTHON_UPDATE_DESCRIPTOR_DOC (PYPY_VERSION_HEX >= 0x07030900)
#define CYTHON_UPDATE_DESCRIPTOR_DOC 0
#endif
#elif defined(PYSTON_VERSION)
#define CYTHON_COMPILING_IN_PYPY 0
@ -564,11 +564,11 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#endif
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
#define CYTHON_PEP393_ENABLED 1
#if defined(PyUnicode_IS_READY)
#define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
0 : _PyUnicode_Ready((PyObject *)(op)))
#if PY_VERSION_HEX >= 0x030C0000
#define __Pyx_PyUnicode_READY(op) (0)
#else
#define __Pyx_PyUnicode_READY(op) (0)
#define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
0 : _PyUnicode_Ready((PyObject *)(op)))
#endif
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
@ -577,14 +577,14 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
#define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
#define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch)
#if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE)
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
#else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
#endif
#if PY_VERSION_HEX >= 0x030C0000
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
#else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
#else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
#endif
#endif
#else
#define CYTHON_PEP393_ENABLED 0
@ -750,8 +750,8 @@ static CYTHON_INLINE float __PYX_NAN() {
#endif
#endif
#define __PYX_HAVE__larynx_train__vits__monotonic_align__core
#define __PYX_HAVE_API__larynx_train__vits__monotonic_align__core
#define __PYX_HAVE__piper_train__vits__monotonic_align__core
#define __PYX_HAVE_API__piper_train__vits__monotonic_align__core
/* Early includes */
#include "pythread.h"
#include <string.h>
@ -1080,16 +1080,16 @@ struct __pyx_array_obj;
struct __pyx_MemviewEnum_obj;
struct __pyx_memoryview_obj;
struct __pyx_memoryviewslice_obj;
struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each;
struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each;
/* "larynx_train/vits/monotonic_align/core.pyx":7
/* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False)
* @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
* cdef int x
* cdef int y
*/
struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each {
struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each {
int __pyx_n;
float max_neg_val;
};
@ -1551,18 +1551,18 @@ static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UIN
/* GetModuleGlobalName.proto */
#if CYTHON_USE_DICT_VERSIONS
#define __Pyx_GetModuleGlobalName(var, name) {\
#define __Pyx_GetModuleGlobalName(var, name) do {\
static PY_UINT64_T __pyx_dict_version = 0;\
static PyObject *__pyx_dict_cached_value = NULL;\
(var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
(likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
__Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
}
#define __Pyx_GetModuleGlobalNameUncached(var, name) {\
} while(0)
#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\
PY_UINT64_T __pyx_dict_version;\
PyObject *__pyx_dict_cached_value;\
(var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
}
} while(0)
static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
#else
#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name)
@ -1864,7 +1864,7 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo
/* Module declarations from 'cython' */
/* Module declarations from 'larynx_train.vits.monotonic_align.core' */
/* Module declarations from 'piper_train.vits.monotonic_align.core' */
static PyTypeObject *__pyx_array_type = 0;
static PyTypeObject *__pyx_MemviewEnum_type = 0;
static PyTypeObject *__pyx_memoryview_type = 0;
@ -1876,8 +1876,8 @@ static PyObject *contiguous = 0;
static PyObject *indirect_contiguous = 0;
static int __pyx_memoryview_thread_locks_used;
static PyThread_type_lock __pyx_memoryview_thread_locks[8];
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args); /*proto*/
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch); /*proto*/
static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args); /*proto*/
static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch); /*proto*/
static struct __pyx_array_obj *__pyx_array_new(PyObject *, Py_ssize_t, char *, char *, char *); /*proto*/
static void *__pyx_align_pointer(void *, size_t); /*proto*/
static PyObject *__pyx_memoryview_new(PyObject *, int, int, __Pyx_TypeInfo *); /*proto*/
@ -1913,11 +1913,11 @@ static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize
static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/
static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 };
static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 };
#define __Pyx_MODULE_NAME "larynx_train.vits.monotonic_align.core"
extern int __pyx_module_is_main_larynx_train__vits__monotonic_align__core;
int __pyx_module_is_main_larynx_train__vits__monotonic_align__core = 0;
#define __Pyx_MODULE_NAME "piper_train.vits.monotonic_align.core"
extern int __pyx_module_is_main_piper_train__vits__monotonic_align__core;
int __pyx_module_is_main_piper_train__vits__monotonic_align__core = 0;
/* Implementation of 'larynx_train.vits.monotonic_align.core' */
/* Implementation of 'piper_train.vits.monotonic_align.core' */
static PyObject *__pyx_builtin_range;
static PyObject *__pyx_builtin_ValueError;
static PyObject *__pyx_builtin_MemoryError;
@ -2104,7 +2104,7 @@ static PyObject *__pyx_kp_s_unable_to_allocate_shape_and_str;
static PyObject *__pyx_n_s_unpack;
static PyObject *__pyx_n_s_update;
static PyObject *__pyx_n_s_values;
static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs); /* proto */
static PyObject *__pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs); /* proto */
static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer); /* proto */
static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self); /* proto */
@ -2186,7 +2186,7 @@ static PyObject *__pyx_tuple__26;
static PyObject *__pyx_codeobj__27;
/* Late includes */
/* "larynx_train/vits/monotonic_align/core.pyx":7
/* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False)
* @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
@ -2194,7 +2194,7 @@ static PyObject *__pyx_codeobj__27;
* cdef int y
*/
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_y, int __pyx_v_t_x, struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args) {
static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_y, int __pyx_v_t_x, struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args) {
float __pyx_v_max_neg_val = __pyx_k_;
int __pyx_v_x;
int __pyx_v_y;
@ -2223,7 +2223,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
}
}
/* "larynx_train/vits/monotonic_align/core.pyx":13
/* "piper_train/vits/monotonic_align/core.pyx":13
* cdef float v_cur
* cdef float tmp
* cdef int index = t_x - 1 # <<<<<<<<<<<<<<
@ -2232,7 +2232,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/
__pyx_v_index = (__pyx_v_t_x - 1);
/* "larynx_train/vits/monotonic_align/core.pyx":15
/* "piper_train/vits/monotonic_align/core.pyx":15
* cdef int index = t_x - 1
*
* for y in range(t_y): # <<<<<<<<<<<<<<
@ -2244,7 +2244,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
__pyx_v_y = __pyx_t_3;
/* "larynx_train/vits/monotonic_align/core.pyx":16
/* "piper_train/vits/monotonic_align/core.pyx":16
*
* for y in range(t_y):
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): # <<<<<<<<<<<<<<
@ -2270,7 +2270,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
for (__pyx_t_5 = __pyx_t_7; __pyx_t_5 < __pyx_t_6; __pyx_t_5+=1) {
__pyx_v_x = __pyx_t_5;
/* "larynx_train/vits/monotonic_align/core.pyx":17
/* "piper_train/vits/monotonic_align/core.pyx":17
* for y in range(t_y):
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
* if x == y: # <<<<<<<<<<<<<<
@ -2280,7 +2280,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_8 = ((__pyx_v_x == __pyx_v_y) != 0);
if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":18
/* "piper_train/vits/monotonic_align/core.pyx":18
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
* if x == y:
* v_cur = max_neg_val # <<<<<<<<<<<<<<
@ -2289,7 +2289,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/
__pyx_v_v_cur = __pyx_v_max_neg_val;
/* "larynx_train/vits/monotonic_align/core.pyx":17
/* "piper_train/vits/monotonic_align/core.pyx":17
* for y in range(t_y):
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
* if x == y: # <<<<<<<<<<<<<<
@ -2299,7 +2299,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
goto __pyx_L7;
}
/* "larynx_train/vits/monotonic_align/core.pyx":20
/* "piper_train/vits/monotonic_align/core.pyx":20
* v_cur = max_neg_val
* else:
* v_cur = value[y-1, x] # <<<<<<<<<<<<<<
@ -2313,7 +2313,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
}
__pyx_L7:;
/* "larynx_train/vits/monotonic_align/core.pyx":21
/* "piper_train/vits/monotonic_align/core.pyx":21
* else:
* v_cur = value[y-1, x]
* if x == 0: # <<<<<<<<<<<<<<
@ -2323,7 +2323,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_8 = ((__pyx_v_x == 0) != 0);
if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":22
/* "piper_train/vits/monotonic_align/core.pyx":22
* v_cur = value[y-1, x]
* if x == 0:
* if y == 0: # <<<<<<<<<<<<<<
@ -2333,7 +2333,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_8 = ((__pyx_v_y == 0) != 0);
if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":23
/* "piper_train/vits/monotonic_align/core.pyx":23
* if x == 0:
* if y == 0:
* v_prev = 0. # <<<<<<<<<<<<<<
@ -2342,7 +2342,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/
__pyx_v_v_prev = 0.;
/* "larynx_train/vits/monotonic_align/core.pyx":22
/* "piper_train/vits/monotonic_align/core.pyx":22
* v_cur = value[y-1, x]
* if x == 0:
* if y == 0: # <<<<<<<<<<<<<<
@ -2352,7 +2352,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
goto __pyx_L9;
}
/* "larynx_train/vits/monotonic_align/core.pyx":25
/* "piper_train/vits/monotonic_align/core.pyx":25
* v_prev = 0.
* else:
* v_prev = max_neg_val # <<<<<<<<<<<<<<
@ -2364,7 +2364,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
}
__pyx_L9:;
/* "larynx_train/vits/monotonic_align/core.pyx":21
/* "piper_train/vits/monotonic_align/core.pyx":21
* else:
* v_cur = value[y-1, x]
* if x == 0: # <<<<<<<<<<<<<<
@ -2374,7 +2374,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
goto __pyx_L8;
}
/* "larynx_train/vits/monotonic_align/core.pyx":27
/* "piper_train/vits/monotonic_align/core.pyx":27
* v_prev = max_neg_val
* else:
* v_prev = value[y-1, x-1] # <<<<<<<<<<<<<<
@ -2388,7 +2388,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
}
__pyx_L8:;
/* "larynx_train/vits/monotonic_align/core.pyx":28
/* "piper_train/vits/monotonic_align/core.pyx":28
* else:
* v_prev = value[y-1, x-1]
* value[y, x] += max(v_prev, v_cur) # <<<<<<<<<<<<<<
@ -2408,7 +2408,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
}
}
/* "larynx_train/vits/monotonic_align/core.pyx":30
/* "piper_train/vits/monotonic_align/core.pyx":30
* value[y, x] += max(v_prev, v_cur)
*
* for y in range(t_y - 1, -1, -1): # <<<<<<<<<<<<<<
@ -2418,7 +2418,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
for (__pyx_t_1 = (__pyx_v_t_y - 1); __pyx_t_1 > -1; __pyx_t_1-=1) {
__pyx_v_y = __pyx_t_1;
/* "larynx_train/vits/monotonic_align/core.pyx":31
/* "piper_train/vits/monotonic_align/core.pyx":31
*
* for y in range(t_y - 1, -1, -1):
* path[y, index] = 1 # <<<<<<<<<<<<<<
@ -2429,7 +2429,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_9 = __pyx_v_index;
*((int *) ( /* dim=1 */ ((char *) (((int *) ( /* dim=0 */ (__pyx_v_path.data + __pyx_t_10 * __pyx_v_path.strides[0]) )) + __pyx_t_9)) )) = 1;
/* "larynx_train/vits/monotonic_align/core.pyx":32
/* "piper_train/vits/monotonic_align/core.pyx":32
* for y in range(t_y - 1, -1, -1):
* path[y, index] = 1
* if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<<
@ -2457,7 +2457,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_L13_bool_binop_done:;
if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":33
/* "piper_train/vits/monotonic_align/core.pyx":33
* path[y, index] = 1
* if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]):
* index = index - 1 # <<<<<<<<<<<<<<
@ -2466,7 +2466,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/
__pyx_v_index = (__pyx_v_index - 1);
/* "larynx_train/vits/monotonic_align/core.pyx":32
/* "piper_train/vits/monotonic_align/core.pyx":32
* for y in range(t_y - 1, -1, -1):
* path[y, index] = 1
* if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<<
@ -2476,7 +2476,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
}
}
/* "larynx_train/vits/monotonic_align/core.pyx":7
/* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False)
* @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
@ -2487,7 +2487,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
/* function exit code */
}
/* "larynx_train/vits/monotonic_align/core.pyx":38
/* "piper_train/vits/monotonic_align/core.pyx":38
* @cython.boundscheck(False)
* @cython.wraparound(False)
* cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<<
@ -2495,8 +2495,8 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
* cdef int i
*/
static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs, CYTHON_UNUSED int __pyx_skip_dispatch) {
static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs, CYTHON_UNUSED int __pyx_skip_dispatch) {
CYTHON_UNUSED int __pyx_v_b;
int __pyx_v_i;
int __pyx_t_1;
@ -2507,7 +2507,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(
Py_ssize_t __pyx_t_6;
Py_ssize_t __pyx_t_7;
/* "larynx_train/vits/monotonic_align/core.pyx":39
/* "piper_train/vits/monotonic_align/core.pyx":39
* @cython.wraparound(False)
* cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil:
* cdef int b = paths.shape[0] # <<<<<<<<<<<<<<
@ -2516,7 +2516,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(
*/
__pyx_v_b = (__pyx_v_paths.shape[0]);
/* "larynx_train/vits/monotonic_align/core.pyx":41
/* "piper_train/vits/monotonic_align/core.pyx":41
* cdef int b = paths.shape[0]
* cdef int i
* for i in prange(b, nogil=True): # <<<<<<<<<<<<<<
@ -2552,7 +2552,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(
{
__pyx_v_i = (int)(0 + 1 * __pyx_t_2);
/* "larynx_train/vits/monotonic_align/core.pyx":42
/* "piper_train/vits/monotonic_align/core.pyx":42
* cdef int i
* for i in prange(b, nogil=True):
* maximum_path_each(paths[i], values[i], t_ys[i], t_xs[i]) # <<<<<<<<<<<<<<
@ -2593,7 +2593,7 @@ __pyx_t_5.strides[1] = __pyx_v_values.strides[2];
__pyx_t_6 = __pyx_v_i;
__pyx_t_7 = __pyx_v_i;
__pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_7)) ))), NULL);
__pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_7)) ))), NULL);
__PYX_XDEC_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.memview = NULL;
__pyx_t_4.data = NULL;
@ -2613,7 +2613,7 @@ __pyx_t_6 = __pyx_v_i;
#endif
}
/* "larynx_train/vits/monotonic_align/core.pyx":41
/* "piper_train/vits/monotonic_align/core.pyx":41
* cdef int b = paths.shape[0]
* cdef int i
* for i in prange(b, nogil=True): # <<<<<<<<<<<<<<
@ -2631,7 +2631,7 @@ __pyx_t_6 = __pyx_v_i;
}
}
/* "larynx_train/vits/monotonic_align/core.pyx":38
/* "piper_train/vits/monotonic_align/core.pyx":38
* @cython.boundscheck(False)
* @cython.wraparound(False)
* cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<<
@ -2643,8 +2643,8 @@ __pyx_t_6 = __pyx_v_i;
}
/* Python wrapper */
static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
__Pyx_memviewslice __pyx_v_paths = { 0, 0, { 0 }, { 0 }, { 0 } };
__Pyx_memviewslice __pyx_v_values = { 0, 0, { 0 }, { 0 }, { 0 } };
__Pyx_memviewslice __pyx_v_t_ys = { 0, 0, { 0 }, { 0 }, { 0 } };
@ -2717,18 +2717,18 @@ static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("maximum_path_c", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 38, __pyx_L3_error)
__pyx_L3_error:;
__Pyx_AddTraceback("larynx_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_AddTraceback("piper_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs);
__pyx_r = __pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs);
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs) {
static PyObject *__pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
@ -2741,7 +2741,7 @@ static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_p
if (unlikely(!__pyx_v_values.memview)) { __Pyx_RaiseUnboundLocalError("values"); __PYX_ERR(0, 38, __pyx_L1_error) }
if (unlikely(!__pyx_v_t_ys.memview)) { __Pyx_RaiseUnboundLocalError("t_ys"); __PYX_ERR(0, 38, __pyx_L1_error) }
if (unlikely(!__pyx_v_t_xs.memview)) { __Pyx_RaiseUnboundLocalError("t_xs"); __PYX_ERR(0, 38, __pyx_L1_error) }
__pyx_t_1 = __Pyx_void_to_None(__pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error)
__pyx_t_1 = __Pyx_void_to_None(__pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
@ -2750,7 +2750,7 @@ static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_p
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("larynx_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_AddTraceback("piper_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1);
@ -3066,7 +3066,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __
* self.format = self._format
*
*/
if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 141, __pyx_L1_error)
if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 141, __pyx_L1_error)
__pyx_t_3 = __pyx_v_format;
__Pyx_INCREF(__pyx_t_3);
__Pyx_GIVEREF(__pyx_t_3);
@ -5044,7 +5044,7 @@ static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_Me
* def __setstate_cython__(self, __pyx_state):
* __pyx_unpickle_Enum__set_state(self, __pyx_state) # <<<<<<<<<<<<<<
*/
if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 17, __pyx_L1_error)
if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 17, __pyx_L1_error)
__pyx_t_1 = __pyx_unpickle_Enum__set_state(__pyx_v_self, ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 17, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@ -7347,7 +7347,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie
__Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error)
if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error)
__pyx_v_bytesvalue = ((PyObject*)__pyx_t_4);
__pyx_t_4 = 0;
@ -7420,7 +7420,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
}
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 514, __pyx_L1_error)
if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 514, __pyx_L1_error)
__pyx_v_bytesvalue = ((PyObject*)__pyx_t_4);
__pyx_t_4 = 0;
}
@ -15623,7 +15623,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSE
* return __pyx_result
* cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):
*/
if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 9, __pyx_L1_error)
if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 9, __pyx_L1_error)
__pyx_t_4 = __pyx_unpickle_Enum__set_state(((struct __pyx_MemviewEnum_obj *)__pyx_v___pyx_result), ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 9, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
@ -15925,7 +15925,7 @@ static PyBufferProcs __pyx_tp_as_buffer_array = {
static PyTypeObject __pyx_type___pyx_array = {
PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core.array", /*tp_name*/
"piper_train.vits.monotonic_align.core.array", /*tp_name*/
sizeof(struct __pyx_array_obj), /*tp_basicsize*/
0, /*tp_itemsize*/
__pyx_tp_dealloc_array, /*tp_dealloc*/
@ -16047,7 +16047,7 @@ static PyMethodDef __pyx_methods_Enum[] = {
static PyTypeObject __pyx_type___pyx_MemviewEnum = {
PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core.Enum", /*tp_name*/
"piper_train.vits.monotonic_align.core.Enum", /*tp_name*/
sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/
0, /*tp_itemsize*/
__pyx_tp_dealloc_Enum, /*tp_dealloc*/
@ -16311,7 +16311,7 @@ static PyBufferProcs __pyx_tp_as_buffer_memoryview = {
static PyTypeObject __pyx_type___pyx_memoryview = {
PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core.memoryview", /*tp_name*/
"piper_train.vits.monotonic_align.core.memoryview", /*tp_name*/
sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/
0, /*tp_itemsize*/
__pyx_tp_dealloc_memoryview, /*tp_dealloc*/
@ -16452,7 +16452,7 @@ static struct PyGetSetDef __pyx_getsets__memoryviewslice[] = {
static PyTypeObject __pyx_type___pyx_memoryviewslice = {
PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core._memoryviewslice", /*tp_name*/
"piper_train.vits.monotonic_align.core._memoryviewslice", /*tp_name*/
sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/
0, /*tp_itemsize*/
__pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/
@ -16531,7 +16531,7 @@ static PyTypeObject __pyx_type___pyx_memoryviewslice = {
};
static PyMethodDef __pyx_methods[] = {
{"maximum_path_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c, METH_VARARGS|METH_KEYWORDS, 0},
{"maximum_path_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c, METH_VARARGS|METH_KEYWORDS, 0},
{0, 0, 0, 0}
};
@ -16961,7 +16961,7 @@ PyEval_InitThreads();
if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error)
if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_112105877 = PyInt_FromLong(112105877L); if (unlikely(!__pyx_int_112105877)) __PYX_ERR(0, 1, __pyx_L1_error)
@ -17266,20 +17266,20 @@ if (!__Pyx_RefNanny) {
Py_INCREF(__pyx_b);
__pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error)
Py_INCREF(__pyx_cython_runtime);
if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
/*--- Initialize various global constants etc. ---*/
if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
#if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
#endif
if (__pyx_module_is_main_larynx_train__vits__monotonic_align__core) {
if (__pyx_module_is_main_piper_train__vits__monotonic_align__core) {
if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name_2, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
}
#if PY_MAJOR_VERSION >= 3
{
PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
if (!PyDict_GetItemString(modules, "larynx_train.vits.monotonic_align.core")) {
if (unlikely(PyDict_SetItemString(modules, "larynx_train.vits.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
if (!PyDict_GetItemString(modules, "piper_train.vits.monotonic_align.core")) {
if (unlikely(PyDict_SetItemString(modules, "piper_train.vits.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
}
}
#endif
@ -17300,7 +17300,7 @@ if (!__Pyx_RefNanny) {
if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
#endif
/* "larynx_train/vits/monotonic_align/core.pyx":7
/* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False)
* @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
@ -17309,7 +17309,7 @@ if (!__Pyx_RefNanny) {
*/
__pyx_k_ = (-1e9);
/* "larynx_train/vits/monotonic_align/core.pyx":1
/* "piper_train/vits/monotonic_align/core.pyx":1
* cimport cython # <<<<<<<<<<<<<<
* from cython.parallel import prange
*
@ -17479,11 +17479,11 @@ if (!__Pyx_RefNanny) {
__Pyx_XDECREF(__pyx_t_1);
if (__pyx_m) {
if (__pyx_d) {
__Pyx_AddTraceback("init larynx_train.vits.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_AddTraceback("init piper_train.vits.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename);
}
Py_CLEAR(__pyx_m);
} else if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ImportError, "init larynx_train.vits.monotonic_align.core");
PyErr_SetString(PyExc_ImportError, "init piper_train.vits.monotonic_align.core");
}
__pyx_L0:;
__Pyx_RefNannyFinishContext();
@ -18536,7 +18536,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
/* ObjectGetItem */
#if CYTHON_USE_TYPE_SLOTS
static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) {
PyObject *runerr;
PyObject *runerr = NULL;
Py_ssize_t key_value;
PySequenceMethods *m = Py_TYPE(obj)->tp_as_sequence;
if (unlikely(!(m && m->sq_item))) {
@ -19417,7 +19417,7 @@ __PYX_GOOD:
/* CLineInTraceback */
#ifndef CYTHON_CLINE_IN_TRACEBACK
static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) {
static int __Pyx_CLineForTraceback(CYTHON_UNUSED PyThreadState *tstate, int c_line) {
PyObject *use_cline;
PyObject *ptype, *pvalue, *ptraceback;
#if CYTHON_COMPILING_IN_CPYTHON

@ -11,7 +11,7 @@ from .vits.lightning import VitsModel
from .vits.mel_processing import spectrogram_torch
from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.voice_converstion")
_LOGGER = logging.getLogger("piper_train.voice_converstion")
def main():

@ -10,5 +10,5 @@ docker run \
-v "${HOME}:${HOME}" \
-v /etc/hostname:/etc/hostname:ro \
-v /etc/localtime:/etc/localtime:ro \
larynx2-train \
piper-train \
"$@"

@ -17,7 +17,7 @@ if [ -d "${venv}" ]; then
source "${venv}/bin/activate"
fi
python_files=("${base_dir}/larynx_train")
python_files=("${base_dir}/piper_train")
# Format code
black "${python_files[@]}"

@ -6,7 +6,7 @@ import setuptools
from setuptools import setup
this_dir = Path(__file__).parent
module_dir = this_dir / "larynx_train"
module_dir = this_dir / "piper_train"
# -----------------------------------------------------------------------------
@ -29,23 +29,23 @@ with open(version_path, "r", encoding="utf-8") as version_file:
# -----------------------------------------------------------------------------
setup(
name="larynx_train",
name="piper_train",
version=version,
description="A fast and local neural text to speech system",
long_description=long_description,
url="http://github.com/rhasspy/larynx",
url="http://github.com/rhasspy/piper",
author="Michael Hansen",
author_email="mike@rhasspy.org",
license="MIT",
packages=setuptools.find_packages(),
package_data={
"larynx_train": ["VERSION", "py.typed"],
"piper_train": ["VERSION", "py.typed"],
},
install_requires=requirements,
extras_require={':python_version<"3.9"': ["importlib_resources"]},
entry_points={
"console_scripts": [
"larynx-train = larynx_train.__main__:main",
"piper-train = piper_train.__main__:main",
]
},
classifiers=[

@ -15,7 +15,7 @@ _PAD = "_"
@dataclass
class LarynxConfig:
class PiperConfig:
num_symbols: int
num_speakers: int
sample_rate: int
@ -26,7 +26,7 @@ class LarynxConfig:
phoneme_id_map: Mapping[str, Sequence[int]]
class Larynx:
class Piper:
def __init__(
self,
model_path: Union[str, Path],
@ -114,12 +114,12 @@ class Larynx:
return wav_io.getvalue()
def load_config(config_path: Union[str, Path]) -> LarynxConfig:
def load_config(config_path: Union[str, Path]) -> PiperConfig:
with open(config_path, "r", encoding="utf-8") as config_file:
config_dict = json.load(config_file)
inference = config_dict.get("inference", {})
return LarynxConfig(
return PiperConfig(
num_symbols=config_dict["num_symbols"],
num_speakers=config_dict["num_speakers"],
sample_rate=config_dict["audio"]["sample_rate"],

@ -5,7 +5,7 @@ import time
from functools import partial
from pathlib import Path
from . import Larynx
from . import Piper
_FILE = Path(__file__)
_DIR = _FILE.parent
@ -34,7 +34,7 @@ def main() -> None:
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
voice = Larynx(args.model, config_path=args.config, use_cuda=args.cuda)
voice = Piper(args.model, config_path=args.config, use_cuda=args.cuda)
synthesize = partial(
voice.synthesize,
speaker_id=args.speaker,

@ -17,7 +17,7 @@ if [ -d "${venv}" ]; then
source "${venv}/bin/activate"
fi
python_files=("${base_dir}/larynx")
python_files=("${base_dir}/piper")
# Format code
black "${python_files[@]}"

@ -14,4 +14,4 @@ if [ -d "${venv}" ]; then
source "${venv}/bin/activate"
fi
python3 -m larynx "$@"
python3 -m piper "$@"
Loading…
Cancel
Save