Rename to piper

pull/35/head
Michael Hansen 1 year ago
parent 3dfa161ba5
commit 70afec58bc

@ -23,6 +23,7 @@ RUN cd espeak-ng-1.51 && \
--without-speechplayer \ --without-speechplayer \
--without-mbrola \ --without-mbrola \
--without-sonic \ --without-sonic \
--with-extdict-cmn \
--prefix=/usr && \ --prefix=/usr && \
make -j8 src/espeak-ng src/speak-ng && \ make -j8 src/espeak-ng src/speak-ng && \
make && \ make && \
@ -35,25 +36,25 @@ RUN mkdir -p /usr/local/include/onnxruntime && \
--strip-components 1 \ --strip-components 1 \
-xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz" -xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz"
# Build larynx binary # Build piper binary
COPY Makefile ./ COPY Makefile ./
COPY src/cpp/ ./src/cpp/ COPY src/cpp/ ./src/cpp/
RUN make no-pcaudio RUN make no-pcaudio
# Do a test run # Do a test run
RUN /build/build/larynx --help RUN /build/build/piper --help
# Build .tar.gz to keep symlinks # Build .tar.gz to keep symlinks
WORKDIR /dist WORKDIR /dist
RUN mkdir -p larynx && \ RUN mkdir -p piper && \
cp -d /usr/lib64/libespeak-ng.so* ./larynx/ && \ cp -d /usr/lib64/libespeak-ng.so* ./piper/ && \
cp -dR /usr/share/espeak-ng-data ./larynx/ && \ cp -dR /usr/share/espeak-ng-data ./piper/ && \
cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./larynx/ && \ cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./piper/ && \
cp /build/build/larynx ./larynx/ && \ cp /build/build/piper ./piper/ && \
tar -czf "larynx_${TARGETARCH}${TARGETVARIANT}.tar.gz" larynx/ tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
FROM scratch FROM scratch
COPY --from=build /dist/larynx_*.tar.gz ./ COPY --from=build /dist/piper_*.tar.gz ./

@ -5,9 +5,9 @@ ARG TARGETVARIANT
COPY local/en-us/ljspeech/low/en-us-ljspeech-low.onnx \ COPY local/en-us/ljspeech/low/en-us-ljspeech-low.onnx \
local/en-us/ljspeech/low/en-us-ljspeech-low.onnx.json ./ local/en-us/ljspeech/low/en-us-ljspeech-low.onnx.json ./
ADD dist/linux_${TARGETARCH}${TARGETVARIANT}/larynx_${TARGETARCH}${TARGETVARIANT}.tar.gz ./ ADD dist/linux_${TARGETARCH}${TARGETVARIANT}/piper_${TARGETARCH}${TARGETVARIANT}.tar.gz ./
RUN cd larynx/ && echo 'This is a test.' | ./larynx -m ../en-us-ljspeech-low.onnx -f test.wav RUN cd piper/ && echo 'This is a test.' | ./piper -m ../en-us-ljspeech-low.onnx -f test.wav
RUN if [ ! -f larynx/test.wav ]; then exit 1; fi RUN if [ ! -f piper/test.wav ]; then exit 1; fi
RUN size="$(wc -c < larynx/test.wav)"; \ RUN size="$(wc -c < piper/test.wav)"; \
if [ "${size}" -lt "1000" ]; then echo "File size is ${size} bytes"; exit 1; fi if [ "${size}" -lt "1000" ]; then echo "File size is ${size} bytes"; exit 1; fi

@ -1,15 +1,15 @@
# Larynx ![Piper logo](etc/logo.png)
A fast, local neural text to speech system. A fast, local neural text to speech system.
``` sh ``` sh
echo 'Welcome to the world of speech synthesis!' | \ echo 'Welcome to the world of speech synthesis!' | \
./larynx --model en-us-blizzard_lessac-medium.onnx --output_file welcome.wav ./piper --model en-us-blizzard_lessac-medium.onnx --output_file welcome.wav
``` ```
## Voices ## Voices
Download voices from [the release](https://github.com/rhasspy/larynx2/releases/tag/v0.0.2). Download voices from [the release](https://github.com/rhasspy/piper/releases/tag/v0.0.2).
Supported languages: Supported languages:
@ -30,7 +30,7 @@ Supported languages:
## Purpose ## Purpose
Larynx is meant to sound good and run reasonably fast on the Raspberry Pi 4. Piper (formally Larynx 2) is meant to sound good and run reasonably fast on the Raspberry Pi 4.
Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and exported to the [onnxruntime](https://onnxruntime.ai/). Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and exported to the [onnxruntime](https://onnxruntime.ai/).
@ -39,8 +39,8 @@ Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and export
Download a release: Download a release:
* [amd64](https://github.com/rhasspy/larynx2/releases/download/v0.0.2/larynx_amd64.tar.gz) (desktop Linux) * [amd64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz) (desktop Linux)
* [arm64](https://github.com/rhasspy/larynx2/releases/download/v0.0.2/larynx_arm64.tar.gz) (Raspberry Pi 4) * [arm64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_arm64.tar.gz) (Raspberry Pi 4)
If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.13.1. If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.13.1.
@ -48,18 +48,18 @@ If you want to build from source, see the [Makefile](Makefile) and [C++ source](
## Usage ## Usage
1. [Download a voice](#voices) and extract the `.onnx` and `.onnx.json` files 1. [Download a voice](#voices) and extract the `.onnx` and `.onnx.json` files
2. Run the `larynx` binary with text on standard input, `--model /path/to/your-voice.onnx`, and `--output_file output.wav` 2. Run the `piper` binary with text on standard input, `--model /path/to/your-voice.onnx`, and `--output_file output.wav`
For example: For example:
``` sh ``` sh
echo 'Welcome to the world of speech synthesis!' | \ echo 'Welcome to the world of speech synthesis!' | \
./larynx --model blizzard_lessac-medium.onnx --output_file welcome.wav ./piper --model blizzard_lessac-medium.onnx --output_file welcome.wav
``` ```
For multi-speaker models, use `--speaker <number>` to change speakers (default: 0). For multi-speaker models, use `--speaker <number>` to change speakers (default: 0).
See `larynx --help` for more options. See `piper --help` for more options.
## Training ## Training
@ -69,7 +69,7 @@ See [src/python](src/python)
Start by creating a virtual environment: Start by creating a virtual environment:
``` sh ``` sh
cd larynx2/src/python cd piper/src/python
python3 -m venv .venv python3 -m venv .venv
source .venv/bin/activate source .venv/bin/activate
pip3 install --upgrade pip pip3 install --upgrade pip
@ -84,7 +84,7 @@ Ensure you have [espeak-ng](https://github.com/espeak-ng/espeak-ng/) installed (
Next, preprocess your dataset: Next, preprocess your dataset:
``` sh ``` sh
python3 -m larynx_train.preprocess \ python3 -m piper_train.preprocess \
--language en-us \ --language en-us \
--input-dir /path/to/ljspeech/ \ --input-dir /path/to/ljspeech/ \
--output-dir /path/to/training_dir/ \ --output-dir /path/to/training_dir/ \
@ -97,7 +97,7 @@ Datasets must either be in the [LJSpeech](https://keithito.com/LJ-Speech-Dataset
Finally, you can train: Finally, you can train:
``` sh ``` sh
python3 -m larynx_train \ python3 -m piper_train \
--dataset-dir /path/to/training_dir/ \ --dataset-dir /path/to/training_dir/ \
--accelerator 'gpu' \ --accelerator 'gpu' \
--devices 1 \ --devices 1 \
@ -108,7 +108,7 @@ python3 -m larynx_train \
--precision 32 --precision 32
``` ```
Training uses [PyTorch Lightning](https://www.pytorchlightning.ai/). Run `tensorboard --logdir /path/to/training_dir/lightning_logs` to monitor. See `python3 -m larynx_train --help` for many additional options. Training uses [PyTorch Lightning](https://www.pytorchlightning.ai/). Run `tensorboard --logdir /path/to/training_dir/lightning_logs` to monitor. See `python3 -m piper_train --help` for many additional options.
It is highly recommended to train with the following `Dockerfile`: It is highly recommended to train with the following `Dockerfile`:
@ -121,11 +121,11 @@ RUN pip3 install \
ENV NUMBA_CACHE_DIR=.numba_cache ENV NUMBA_CACHE_DIR=.numba_cache
``` ```
See the various `infer_*` and `export_*` scripts in [src/python/larynx_train](src/python/larynx_train) to test and export your voice from the checkpoint in `lightning_logs`. The `dataset.jsonl` file in your training directory can be used with `python3 -m larynx_train.infer` for quick testing: See the various `infer_*` and `export_*` scripts in [src/python/piper_train](src/python/piper_train) to test and export your voice from the checkpoint in `lightning_logs`. The `dataset.jsonl` file in your training directory can be used with `python3 -m piper_train.infer` for quick testing:
``` sh ``` sh
head -n5 /path/to/training_dir/dataset.jsonl | \ head -n5 /path/to/training_dir/dataset.jsonl | \
python3 -m larynx_train.infer \ python3 -m piper_train.infer \
--checkpoint lightning_logs/path/to/checkpoint.ckpt \ --checkpoint lightning_logs/path/to/checkpoint.ckpt \
--sample-rate 22050 \ --sample-rate 22050 \
--output-dir wavs --output-dir wavs
@ -139,7 +139,7 @@ See [src/python_run](src/python_run)
Run `scripts/setup.sh` to create a virtual environment and install the requirements. Then run: Run `scripts/setup.sh` to create a virtual environment and install the requirements. Then run:
``` sh ``` sh
echo 'Welcome to the world of speech synthesis!' | scripts/larynx \ echo 'Welcome to the world of speech synthesis!' | scripts/piper \
--model /path/to/voice.onnx \ --model /path/to/voice.onnx \
--output_file welcome.wav --output_file welcome.wav
``` ```
@ -151,5 +151,5 @@ If you'd like to use a GPU, install the `onnxruntime-gpu` package:
.venv/bin/pip3 install onnxruntime-gpu .venv/bin/pip3 install onnxruntime-gpu
``` ```
and then run `scripts/larynx` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). and then run `scripts/piper` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch).

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

@ -0,0 +1,151 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="118.3606mm"
height="41.577671mm"
viewBox="0 0 118.36058 41.577671"
version="1.1"
id="svg120"
inkscape:version="1.0.2 (e86c870879, 2021-01-15)"
sodipodi:docname="logo.svg"
inkscape:export-filename="./logo.png"
inkscape:export-xdpi="100"
inkscape:export-ydpi="100">
<defs
id="defs114" />
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="1"
inkscape:pageshadow="2"
inkscape:zoom="1.8469919"
inkscape:cx="164.97755"
inkscape:cy="48.418276"
inkscape:document-units="mm"
inkscape:current-layer="layer1"
inkscape:document-rotation="0"
showgrid="false"
inkscape:window-width="1920"
inkscape:window-height="1012"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
fit-margin-top="2"
fit-margin-left="2"
fit-margin-right="2"
fit-margin-bottom="2" />
<metadata
id="metadata117">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-46.653036,-127.37783)">
<g
id="g85"
transform="translate(39.632581,-38.116038)">
<path
d="m 20.289791,179.54097 h 10.117541 q 4.512519,0 6.919195,2.01084 2.42251,1.995 2.42251,5.70002 0,3.72085 -2.42251,5.73169 -2.406676,1.99501 -6.919195,1.99501 h -4.021683 v 8.2017 h -6.095858 z m 6.095858,4.41751 v 6.60253 h 3.372514 q 1.77334,0 2.739178,-0.855 0.965837,-0.87084 0.965837,-2.45418 0,-1.58334 -0.965837,-2.43834 -0.965838,-0.85501 -2.739178,-0.85501 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2223" />
<path
d="M 56.120769,179.54097 H 66.23831 q 4.512519,0 6.919196,2.01084 2.42251,1.995 2.42251,5.70002 0,3.72085 -2.42251,5.73169 -2.406677,1.99501 -6.919196,1.99501 h -4.021683 v 8.2017 h -6.095858 z m 6.095858,4.41751 v 6.60253 h 3.372514 q 1.773341,0 2.739179,-0.855 0.965836,-0.87084 0.965836,-2.45418 0,-1.58334 -0.965836,-2.43834 -0.965837,-0.85501 -2.739179,-0.85501 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2225" />
<path
d="m 79.8867,179.54097 h 16.450901 v 4.60751 H 85.982557 v 4.40169 h 9.73754 v 4.60752 h -9.73754 v 5.41502 h 10.703378 v 4.60752 H 79.8867 Z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2227" />
<path
d="m 110.69849,190.02267 q 1.91585,0 2.73918,-0.7125 0.83917,-0.7125 0.83917,-2.34334 0,-1.61501 -0.83917,-2.31168 -0.82333,-0.69667 -2.73918,-0.69667 h -2.56501 v 6.06419 z m -2.56501,4.21169 v 8.94587 h -6.09585 v -23.63926 h 9.31003 q 4.67086,0 6.84003,1.5675 2.18501,1.56751 2.18501,4.95586 0,2.34334 -1.14,3.84751 -1.12417,1.50417 -3.40418,2.21668 1.25083,0.285 2.2325,1.29834 0.99751,0.9975 2.01085,3.04001 l 3.30918,6.71336 h -6.4917 l -2.88168,-5.87419 q -0.87083,-1.77334 -1.77334,-2.42251 -0.88667,-0.64917 -2.37501,-0.64917 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2229" />
<g
id="g2239"
transform="translate(2.4090272,49.575953)">
<path
style="fill:#000000;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 17.980579,136.56697 c -0.476851,-1.21561 9.934617,-6.83345 10.62213,-6.38851 -4.147652,-4.33579 -3.488951,-7.42528 -1.437093,-9.875 -2.134965,-0.0137 -4.642444,-0.12021 -6.370534,4.67585 -4.134299,0.0803 -4.437171,-3.11951 -4.48854,-6.20362 -1.859141,2.96638 -2.878913,5.02914 -1.495979,9.34664 -4.921996,-1.38523 -5.5668734,2.41507 -7.6020931,4.32371 4.1744251,-2.16864 9.3792941,-2.93932 10.7721091,4.12093 z"
id="path2231"
sodipodi:nodetypes="cccccccc" />
<circle
style="fill:#000000;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="circle2233"
cx="7.4886017"
cy="132.36996"
r="0.87717384" />
<circle
style="fill:#000000;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="circle2235"
cx="16.220198"
cy="118.79509"
r="0.87717384" />
<circle
style="fill:#000000;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="circle2237"
cx="26.696749"
cy="120.39225"
r="0.87717384" />
</g>
<path
d="m 50.078877,179.54097 c 0,-1.55844 -6.095858,-1.44086 -6.095858,0 v 23.63926 h 6.095858 z"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:32.4268px;line-height:1.25;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
id="path2241"
sodipodi:nodetypes="ssccs" />
<path
id="path2243"
style="fill:#ffffff;stroke:none;stroke-width:0.1;stroke-linecap:round"
d="m 51.124975,184.33936 c -2.760939,-1.42504 -5.456589,-1.18336 -8.115352,0 v 1.45029 c 2.642276,-1.3158 5.348351,-1.29574 8.115352,0 z"
sodipodi:nodetypes="ccccc" />
<path
style="fill:none;stroke:#ffffff;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.277649,178.01214 v 27.0594"
id="path2245"
sodipodi:nodetypes="cc" />
<path
style="fill:none;stroke:#ffffff;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.736606,177.35346 v 27.71808"
id="path2249"
sodipodi:nodetypes="cc" />
<rect
style="fill:#ffffff;stroke:none;stroke-width:0.1;stroke-linecap:round"
id="rect2251"
width="3.0975902"
height="6.8333788"
x="-48.726681"
y="197.01622"
transform="scale(-1,1)" />
<rect
style="fill:#ffffff;stroke:none;stroke-width:0.0999999;stroke-linecap:round"
id="rect2255"
width="2.0844173"
height="9.6287899"
x="-45.551418"
y="194.22081"
transform="scale(-1,1)" />
<path
id="path2257"
style="fill:#ffffff;stroke:none;stroke-width:0.0999995;stroke-linecap:round"
d="m 19.97109,185.20282 10.735834,-6.19836 c 0.21219,-0.12249 0.502502,-0.0141 0.650911,0.24289 l 0.11208,0.19413 c 0.148409,0.25705 0.107331,0.58244 -0.115118,0.68513 -3.765389,1.73827 -7.326841,3.8345 -10.735835,6.19834 -0.201345,0.13962 -0.502495,0.0141 -0.65091,-0.24287 l -0.112081,-0.19413 c -0.148409,-0.25704 -0.09706,-0.56263 0.115117,-0.68513 z"
sodipodi:nodetypes="ssssssssss" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 8.6 KiB

@ -2,12 +2,12 @@ cmake_minimum_required(VERSION 3.13)
include(CheckIncludeFileCXX) include(CheckIncludeFileCXX)
project(larynx C CXX) project(piper C CXX)
set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
ADD_EXECUTABLE(larynx main.cpp) ADD_EXECUTABLE(piper main.cpp)
string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'") string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'")
string(APPEND CMAKE_C_FLAGS " -Wall -Wextra") string(APPEND CMAKE_C_FLAGS " -Wall -Wextra")
@ -21,26 +21,26 @@ check_include_file_cxx("pcaudiolib/audio.h" PCAUDIO_INCLUDE_FOUND)
if(PCAUDIO_INCLUDE_FOUND) if(PCAUDIO_INCLUDE_FOUND)
option(USE_PCAUDIO "Build with pcaudiolib" ON) option(USE_PCAUDIO "Build with pcaudiolib" ON)
if(USE_PCAUDIO) if(USE_PCAUDIO)
target_compile_definitions(larynx PUBLIC HAVE_PCAUDIO) target_compile_definitions(piper PUBLIC HAVE_PCAUDIO)
set(PCAUDIO_LIBRARIES "pcaudio") set(PCAUDIO_LIBRARIES "pcaudio")
endif() endif()
endif() endif()
set(ONNXRUNTIME_ROOTDIR "/usr/local/include/onnxruntime") set(ONNXRUNTIME_ROOTDIR "/usr/local/include/onnxruntime")
target_link_libraries(larynx target_link_libraries(piper
onnxruntime onnxruntime
-static-libgcc -static-libstdc++ -static-libgcc -static-libstdc++
${ESPEAK_NG_LIBRARIES} ${ESPEAK_NG_LIBRARIES}
${PCAUDIO_LIBRARIES}) ${PCAUDIO_LIBRARIES})
target_link_directories(larynx PUBLIC target_link_directories(piper PUBLIC
${ESPEAK_NG_LIBRARY_DIRS} ${ESPEAK_NG_LIBRARY_DIRS}
${ONNXRUNTIME_ROOTDIR}/lib) ${ONNXRUNTIME_ROOTDIR}/lib)
target_include_directories(larynx PUBLIC target_include_directories(piper PUBLIC
${ONNXRUNTIME_ROOTDIR}/include ${ONNXRUNTIME_ROOTDIR}/include
${ESPEAK_NG_INCLUDE_DIRS}) ${ESPEAK_NG_INCLUDE_DIRS})
target_compile_options(larynx PUBLIC target_compile_options(piper PUBLIC
${ESPEAK_NG_CFLAGS_OTHER}) ${ESPEAK_NG_CFLAGS_OTHER})

@ -15,7 +15,7 @@
using namespace std; using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
namespace larynx { namespace piper {
typedef char32_t Phoneme; typedef char32_t Phoneme;
typedef int64_t PhonemeId; typedef int64_t PhonemeId;
@ -145,6 +145,6 @@ void parseModelConfig(json &configRoot, ModelConfig &modelConfig) {
} /* parseModelConfig */ } /* parseModelConfig */
} // namespace larynx } // namespace piper
#endif // CONFIG_H_ #endif // CONFIG_H_

@ -12,7 +12,7 @@
#include <pcaudiolib/audio.h> #include <pcaudiolib/audio.h>
#endif #endif
#include "larynx.hpp" #include "piper.hpp"
using namespace std; using namespace std;
@ -23,7 +23,7 @@ struct RunConfig {
filesystem::path modelConfigPath; filesystem::path modelConfigPath;
OutputType outputType = OUTPUT_PLAY; OutputType outputType = OUTPUT_PLAY;
optional<filesystem::path> outputPath; optional<filesystem::path> outputPath;
optional<larynx::SpeakerId> speakerId; optional<piper::SpeakerId> speakerId;
optional<float> noiseScale; optional<float> noiseScale;
optional<float> lengthScale; optional<float> lengthScale;
optional<float> noiseW; optional<float> noiseW;
@ -36,9 +36,9 @@ int main(int argc, char *argv[]) {
parseArgs(argc, argv, runConfig); parseArgs(argc, argv, runConfig);
auto exePath = filesystem::path(argv[0]); auto exePath = filesystem::path(argv[0]);
larynx::initialize(exePath.parent_path()); piper::initialize(exePath.parent_path());
larynx::Voice voice; piper::Voice voice;
auto startTime = chrono::steady_clock::now(); auto startTime = chrono::steady_clock::now();
loadVoice(runConfig.modelPath.string(), runConfig.modelConfigPath.string(), loadVoice(runConfig.modelPath.string(), runConfig.modelConfigPath.string(),
voice, runConfig.speakerId); voice, runConfig.speakerId);
@ -64,7 +64,7 @@ int main(int argc, char *argv[]) {
if (runConfig.outputType == OUTPUT_PLAY) { if (runConfig.outputType == OUTPUT_PLAY) {
// Output audio to the default audio device // Output audio to the default audio device
my_audio = create_audio_device_object(NULL, "larynx", "Text-to-Speech"); my_audio = create_audio_device_object(NULL, "piper", "Text-to-Speech");
// TODO: Support 32-bit sample widths // TODO: Support 32-bit sample widths
auto audioFormat = AUDIO_OBJECT_FORMAT_S16LE; auto audioFormat = AUDIO_OBJECT_FORMAT_S16LE;
@ -78,7 +78,7 @@ int main(int argc, char *argv[]) {
#else #else
if (runConfig.outputType == OUTPUT_PLAY) { if (runConfig.outputType == OUTPUT_PLAY) {
// Cannot play audio directly // Cannot play audio directly
cerr << "WARNING: Larynx was not compiled with pcaudiolib. Output audio " cerr << "WARNING: Piper was not compiled with pcaudiolib. Output audio "
"will be written to the current directory." "will be written to the current directory."
<< endl; << endl;
runConfig.outputType = OUTPUT_DIRECTORY; runConfig.outputType = OUTPUT_DIRECTORY;
@ -92,7 +92,7 @@ int main(int argc, char *argv[]) {
} }
string line; string line;
larynx::SynthesisResult result; piper::SynthesisResult result;
while (getline(cin, line)) { while (getline(cin, line)) {
// Path to output WAV file // Path to output WAV file
@ -108,19 +108,19 @@ int main(int argc, char *argv[]) {
// Output audio to automatically-named WAV file in a directory // Output audio to automatically-named WAV file in a directory
ofstream audioFile(outputPath.string(), ios::binary); ofstream audioFile(outputPath.string(), ios::binary);
larynx::textToWavFile(voice, line, audioFile, result); piper::textToWavFile(voice, line, audioFile, result);
cout << outputPath.string() << endl; cout << outputPath.string() << endl;
} else if (runConfig.outputType == OUTPUT_FILE) { } else if (runConfig.outputType == OUTPUT_FILE) {
// Output audio to WAV file // Output audio to WAV file
ofstream audioFile(runConfig.outputPath.value().string(), ios::binary); ofstream audioFile(runConfig.outputPath.value().string(), ios::binary);
larynx::textToWavFile(voice, line, audioFile, result); piper::textToWavFile(voice, line, audioFile, result);
} else if (runConfig.outputType == OUTPUT_STDOUT) { } else if (runConfig.outputType == OUTPUT_STDOUT) {
// Output WAV to stdout // Output WAV to stdout
larynx::textToWavFile(voice, line, cout, result); piper::textToWavFile(voice, line, cout, result);
} else if (runConfig.outputType == OUTPUT_PLAY) { } else if (runConfig.outputType == OUTPUT_PLAY) {
#ifdef HAVE_PCAUDIO #ifdef HAVE_PCAUDIO
vector<int16_t> audioBuffer; vector<int16_t> audioBuffer;
larynx::textToAudio(voice, line, audioBuffer, result); piper::textToAudio(voice, line, audioBuffer, result);
int error = audio_object_write(my_audio, (const char *)audioBuffer.data(), int error = audio_object_write(my_audio, (const char *)audioBuffer.data(),
sizeof(int16_t) * audioBuffer.size()); sizeof(int16_t) * audioBuffer.size());
@ -138,7 +138,7 @@ int main(int argc, char *argv[]) {
<< " sec, audio=" << result.audioSeconds << " sec)" << endl; << " sec, audio=" << result.audioSeconds << " sec)" << endl;
} }
larynx::terminate(); piper::terminate();
#ifdef HAVE_PCAUDIO #ifdef HAVE_PCAUDIO
audio_object_close(my_audio); audio_object_close(my_audio);
@ -211,7 +211,7 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
runConfig.outputPath = filesystem::path(argv[++i]); runConfig.outputPath = filesystem::path(argv[++i]);
} else if (arg == "-s" || arg == "--speaker") { } else if (arg == "-s" || arg == "--speaker") {
ensureArg(argc, argv, i); ensureArg(argc, argv, i);
runConfig.speakerId = (larynx::SpeakerId)stol(argv[++i]); runConfig.speakerId = (piper::SpeakerId)stol(argv[++i]);
} else if (arg == "--noise-scale") { } else if (arg == "--noise-scale") {
ensureArg(argc, argv, i); ensureArg(argc, argv, i);
runConfig.noiseScale = stof(argv[++i]); runConfig.noiseScale = stof(argv[++i]);

@ -7,8 +7,8 @@
using namespace std; using namespace std;
namespace larynx { namespace piper {
const string instanceName{"larynx"}; const string instanceName{"piper"};
struct ModelSession { struct ModelSession {
Ort::Session onnx; Ort::Session onnx;
@ -48,6 +48,6 @@ void loadModel(string modelPath, ModelSession &session) {
auto loadDuration = chrono::duration<double>(endTime - startTime); auto loadDuration = chrono::duration<double>(endTime - startTime);
} }
} // namespace larynx } // namespace piper
#endif // MODEL_H_ #endif // MODEL_H_

@ -16,7 +16,7 @@
using namespace std; using namespace std;
namespace larynx { namespace piper {
// Text to phonemes using eSpeak-ng // Text to phonemes using eSpeak-ng
void phonemize(PhonemizeConfig &phonemizeConfig) { void phonemize(PhonemizeConfig &phonemizeConfig) {
@ -103,6 +103,6 @@ void phonemes2ids(PhonemizeConfig &phonemizeConfig,
} /* phonemes2ids */ } /* phonemes2ids */
} // namespace larynx } // namespace piper
#endif // PHONEMIZE_H_ #endif // PHONEMIZE_H_

@ -1,5 +1,5 @@
#ifndef LARYNX_H_ #ifndef PIPER_H_
#define LARYNX_H_ #define PIPER_H_
#include <filesystem> #include <filesystem>
#include <iostream> #include <iostream>
@ -17,7 +17,7 @@
using json = nlohmann::json; using json = nlohmann::json;
namespace larynx { namespace piper {
struct Voice { struct Voice {
json configRoot; json configRoot;
@ -106,6 +106,6 @@ void textToWavFile(Voice &voice, string text, ostream &audioFile,
} /* textToWavFile */ } /* textToWavFile */
} // namespace larynx } // namespace piper
#endif // LARYNX_H_ #endif // PIPER_H_

@ -14,7 +14,7 @@
using namespace std; using namespace std;
namespace larynx { namespace piper {
// Maximum value for 16-bit signed WAV sample // Maximum value for 16-bit signed WAV sample
const float MAX_WAV_VALUE = 32767.0f; const float MAX_WAV_VALUE = 32767.0f;
@ -126,6 +126,6 @@ void synthesize(SynthesisConfig &synthesisConfig, ModelSession &session,
Ort::OrtRelease(inputTensors[i].release()); Ort::OrtRelease(inputTensors[i].release());
} }
} }
} // namespace larynx } // namespace piper
#endif // SYNTHESIZE_H_ #endif // SYNTHESIZE_H_

@ -3,7 +3,7 @@
#include <iostream> #include <iostream>
namespace larynx { namespace piper {
struct WavHeader { struct WavHeader {
uint8_t RIFF[4] = {'R', 'I', 'F', 'F'}; uint8_t RIFF[4] = {'R', 'I', 'F', 'F'};
@ -39,6 +39,6 @@ void writeWavHeader(int sampleRate, int sampleWidth, int channels,
} /* writeWavHeader */ } /* writeWavHeader */
} // namespace larynx } // namespace piper
#endif // WAVFILE_H_ #endif // WAVFILE_H_

@ -7,7 +7,7 @@ if [ -d "${this_dir}/.venv" ]; then
source "${this_dir}/.venv/bin/activate" source "${this_dir}/.venv/bin/activate"
fi fi
cd "${this_dir}/larynx_train/vits/monotonic_align" cd "${this_dir}/piper_train/vits/monotonic_align"
mkdir -p monotonic_align mkdir -p monotonic_align
cythonize -i core.pyx cythonize -i core.pyx
mv core*.so monotonic_align/ mv core*.so monotonic_align/

@ -13,7 +13,7 @@ except (ImportError, AttributeError):
files = importlib_resources.files files = importlib_resources.files
_PACKAGE = "larynx_train" _PACKAGE = "piper_train"
_DIR = Path(typing.cast(os.PathLike, files(_PACKAGE))) _DIR = Path(typing.cast(os.PathLike, files(_PACKAGE)))
__version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip() __version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip()

@ -7,7 +7,7 @@ import torch
from .vits.lightning import VitsModel from .vits.lightning import VitsModel
_LOGGER = logging.getLogger("larynx_train.export_generator") _LOGGER = logging.getLogger("piper_train.export_generator")
def main(): def main():

@ -8,7 +8,7 @@ import torch
from .vits.lightning import VitsModel from .vits.lightning import VitsModel
_LOGGER = logging.getLogger("larynx_train.export_onnx") _LOGGER = logging.getLogger("piper_train.export_onnx")
OPSET_VERSION = 15 OPSET_VERSION = 15

@ -8,7 +8,7 @@ import torch
from .vits.lightning import VitsModel from .vits.lightning import VitsModel
_LOGGER = logging.getLogger("larynx_train.export_torchscript") _LOGGER = logging.getLogger("piper_train.export_torchscript")
def main(): def main():

@ -12,13 +12,13 @@ from .vits.lightning import VitsModel
from .vits.utils import audio_float_to_int16 from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer") _LOGGER = logging.getLogger("piper_train.infer")
def main(): def main():
"""Main entry point""" """Main entry point"""
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer") parser = argparse.ArgumentParser(prog="piper_train.infer")
parser.add_argument( parser.add_argument(
"--checkpoint", required=True, help="Path to model checkpoint (.ckpt)" "--checkpoint", required=True, help="Path to model checkpoint (.ckpt)"
) )

@ -11,13 +11,13 @@ import torch
from .vits.utils import audio_float_to_int16 from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer_generator") _LOGGER = logging.getLogger("piper_train.infer_generator")
def main(): def main():
"""Main entry point""" """Main entry point"""
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer_generator") parser = argparse.ArgumentParser(prog="piper_train.infer_generator")
parser.add_argument("--model", required=True, help="Path to generator (.pt)") parser.add_argument("--model", required=True, help="Path to generator (.pt)")
parser.add_argument("--output-dir", required=True, help="Path to write WAV files") parser.add_argument("--output-dir", required=True, help="Path to write WAV files")
parser.add_argument("--sample-rate", type=int, default=22050) parser.add_argument("--sample-rate", type=int, default=22050)

@ -13,13 +13,13 @@ import onnxruntime
from .vits.utils import audio_float_to_int16 from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer_onnx") _LOGGER = logging.getLogger("piper_train.infer_onnx")
def main(): def main():
"""Main entry point""" """Main entry point"""
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer_onnx") parser = argparse.ArgumentParser(prog="piper_train.infer_onnx")
parser.add_argument("--model", required=True, help="Path to model (.onnx)") parser.add_argument("--model", required=True, help="Path to model (.onnx)")
parser.add_argument("--output-dir", required=True, help="Path to write WAV files") parser.add_argument("--output-dir", required=True, help="Path to write WAV files")
parser.add_argument("--sample-rate", type=int, default=22050) parser.add_argument("--sample-rate", type=int, default=22050)

@ -11,13 +11,13 @@ import torch
from .vits.utils import audio_float_to_int16 from .vits.utils import audio_float_to_int16
from .vits.wavfile import write as write_wav from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.infer_torchscript") _LOGGER = logging.getLogger("piper_train.infer_torchscript")
def main(): def main():
"""Main entry point""" """Main entry point"""
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(prog="larynx_train.infer_torchscript") parser = argparse.ArgumentParser(prog="piper_train.infer_torchscript")
parser.add_argument( parser.add_argument(
"--model", required=True, help="Path to torchscript checkpoint (.ts)" "--model", required=True, help="Path to torchscript checkpoint (.ts)"
) )

@ -5,7 +5,7 @@ from typing import Optional, Tuple, Union
import librosa import librosa
import torch import torch
from larynx_train.vits.mel_processing import spectrogram_torch from piper_train.vits.mel_processing import spectrogram_torch
from .trim import trim_silence from .trim import trim_silence
from .vad import SileroVoiceActivityDetector from .vad import SileroVoiceActivityDetector

@ -44,16 +44,7 @@ class Batch:
speaker_ids: Optional[LongTensor] = None speaker_ids: Optional[LongTensor] = None
# @dataclass class PiperDataset(Dataset):
# class LarynxDatasetSettings:
# sample_rate: int
# is_multispeaker: bool
# espeak_voice: Optional[str] = None
# phoneme_map: Dict[str, Optional[List[str]]] = field(default_factory=dict)
# phoneme_id_map: Dict[str, List[int]] = DEFAULT_PHONEME_ID_MAP
class LarynxDataset(Dataset):
""" """
Dataset format: Dataset format:
@ -76,9 +67,7 @@ class LarynxDataset(Dataset):
dataset_path = Path(dataset_path) dataset_path = Path(dataset_path)
_LOGGER.debug("Loading dataset: %s", dataset_path) _LOGGER.debug("Loading dataset: %s", dataset_path)
self.utterances.extend( self.utterances.extend(
LarynxDataset.load_dataset( PiperDataset.load_dataset(dataset_path, max_phoneme_ids=max_phoneme_ids)
dataset_path, max_phoneme_ids=max_phoneme_ids
)
) )
def __len__(self): def __len__(self):
@ -110,7 +99,7 @@ class LarynxDataset(Dataset):
continue continue
try: try:
utt = LarynxDataset.load_utterance(line) utt = PiperDataset.load_utterance(line)
if (max_phoneme_ids is None) or ( if (max_phoneme_ids is None) or (
len(utt.phoneme_ids) <= max_phoneme_ids len(utt.phoneme_ids) <= max_phoneme_ids
): ):

@ -9,7 +9,7 @@ from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset, random_split from torch.utils.data import DataLoader, Dataset, random_split
from .commons import slice_segments from .commons import slice_segments
from .dataset import Batch, LarynxDataset, UtteranceCollate from .dataset import Batch, PiperDataset, UtteranceCollate
from .losses import discriminator_loss, feature_loss, generator_loss, kl_loss from .losses import discriminator_loss, feature_loss, generator_loss, kl_loss
from .mel_processing import mel_spectrogram_torch, spec_to_mel_torch from .mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from .models import MultiPeriodDiscriminator, SynthesizerTrn from .models import MultiPeriodDiscriminator, SynthesizerTrn
@ -128,7 +128,7 @@ class VitsModel(pl.LightningModule):
_LOGGER.debug("No dataset to load") _LOGGER.debug("No dataset to load")
return return
full_dataset = LarynxDataset( full_dataset = PiperDataset(
self.hparams.dataset, max_phoneme_ids=max_phoneme_ids self.hparams.dataset, max_phoneme_ids=max_phoneme_ids
) )
valid_set_size = int(len(full_dataset) * validation_split) valid_set_size = int(len(full_dataset) * validation_split)

@ -1,14 +1,14 @@
/* Generated by Cython 0.29.32 */ /* Generated by Cython 0.29.33 */
/* BEGIN: Cython Metadata /* BEGIN: Cython Metadata
{ {
"distutils": { "distutils": {
"name": "larynx_train.vits.monotonic_align.core", "name": "piper_train.vits.monotonic_align.core",
"sources": [ "sources": [
"/home/hansenm/opt/larynx2/src/python/larynx_train/vits/monotonic_align/core.pyx" "/home/hansenm/opt/larynx2/src/python/piper_train/vits/monotonic_align/core.pyx"
] ]
}, },
"module_name": "larynx_train.vits.monotonic_align.core" "module_name": "piper_train.vits.monotonic_align.core"
} }
END: Cython Metadata */ END: Cython Metadata */
@ -21,8 +21,8 @@ END: Cython Metadata */
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
#error Cython requires Python 2.6+ or Python 3.3+. #error Cython requires Python 2.6+ or Python 3.3+.
#else #else
#define CYTHON_ABI "0_29_32" #define CYTHON_ABI "0_29_33"
#define CYTHON_HEX_VERSION 0x001D20F0 #define CYTHON_HEX_VERSION 0x001D21F0
#define CYTHON_FUTURE_DIVISION 0 #define CYTHON_FUTURE_DIVISION 0
#include <stddef.h> #include <stddef.h>
#ifndef offsetof #ifndef offsetof
@ -99,7 +99,7 @@ END: Cython Metadata */
#undef CYTHON_USE_EXC_INFO_STACK #undef CYTHON_USE_EXC_INFO_STACK
#define CYTHON_USE_EXC_INFO_STACK 0 #define CYTHON_USE_EXC_INFO_STACK 0
#ifndef CYTHON_UPDATE_DESCRIPTOR_DOC #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
#define CYTHON_UPDATE_DESCRIPTOR_DOC (PYPY_VERSION_HEX >= 0x07030900) #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
#endif #endif
#elif defined(PYSTON_VERSION) #elif defined(PYSTON_VERSION)
#define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_PYPY 0
@ -564,11 +564,11 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#endif #endif
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
#define CYTHON_PEP393_ENABLED 1 #define CYTHON_PEP393_ENABLED 1
#if defined(PyUnicode_IS_READY) #if PY_VERSION_HEX >= 0x030C0000
#define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ #define __Pyx_PyUnicode_READY(op) (0)
0 : _PyUnicode_Ready((PyObject *)(op)))
#else #else
#define __Pyx_PyUnicode_READY(op) (0) #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
0 : _PyUnicode_Ready((PyObject *)(op)))
#endif #endif
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
@ -577,14 +577,14 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
#define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
#define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch) #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch)
#if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE) #if PY_VERSION_HEX >= 0x030C0000
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
#else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
#endif
#else #else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
#else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
#endif
#endif #endif
#else #else
#define CYTHON_PEP393_ENABLED 0 #define CYTHON_PEP393_ENABLED 0
@ -750,8 +750,8 @@ static CYTHON_INLINE float __PYX_NAN() {
#endif #endif
#endif #endif
#define __PYX_HAVE__larynx_train__vits__monotonic_align__core #define __PYX_HAVE__piper_train__vits__monotonic_align__core
#define __PYX_HAVE_API__larynx_train__vits__monotonic_align__core #define __PYX_HAVE_API__piper_train__vits__monotonic_align__core
/* Early includes */ /* Early includes */
#include "pythread.h" #include "pythread.h"
#include <string.h> #include <string.h>
@ -1080,16 +1080,16 @@ struct __pyx_array_obj;
struct __pyx_MemviewEnum_obj; struct __pyx_MemviewEnum_obj;
struct __pyx_memoryview_obj; struct __pyx_memoryview_obj;
struct __pyx_memoryviewslice_obj; struct __pyx_memoryviewslice_obj;
struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each; struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each;
/* "larynx_train/vits/monotonic_align/core.pyx":7 /* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False) * @cython.boundscheck(False)
* @cython.wraparound(False) * @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
* cdef int x * cdef int x
* cdef int y * cdef int y
*/ */
struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each { struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each {
int __pyx_n; int __pyx_n;
float max_neg_val; float max_neg_val;
}; };
@ -1551,18 +1551,18 @@ static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UIN
/* GetModuleGlobalName.proto */ /* GetModuleGlobalName.proto */
#if CYTHON_USE_DICT_VERSIONS #if CYTHON_USE_DICT_VERSIONS
#define __Pyx_GetModuleGlobalName(var, name) {\ #define __Pyx_GetModuleGlobalName(var, name) do {\
static PY_UINT64_T __pyx_dict_version = 0;\ static PY_UINT64_T __pyx_dict_version = 0;\
static PyObject *__pyx_dict_cached_value = NULL;\ static PyObject *__pyx_dict_cached_value = NULL;\
(var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
(likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
__Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
} } while(0)
#define __Pyx_GetModuleGlobalNameUncached(var, name) {\ #define __Pyx_GetModuleGlobalNameUncached(var, name) do {\
PY_UINT64_T __pyx_dict_version;\ PY_UINT64_T __pyx_dict_version;\
PyObject *__pyx_dict_cached_value;\ PyObject *__pyx_dict_cached_value;\
(var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
} } while(0)
static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
#else #else
#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) #define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name)
@ -1864,7 +1864,7 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo
/* Module declarations from 'cython' */ /* Module declarations from 'cython' */
/* Module declarations from 'larynx_train.vits.monotonic_align.core' */ /* Module declarations from 'piper_train.vits.monotonic_align.core' */
static PyTypeObject *__pyx_array_type = 0; static PyTypeObject *__pyx_array_type = 0;
static PyTypeObject *__pyx_MemviewEnum_type = 0; static PyTypeObject *__pyx_MemviewEnum_type = 0;
static PyTypeObject *__pyx_memoryview_type = 0; static PyTypeObject *__pyx_memoryview_type = 0;
@ -1876,8 +1876,8 @@ static PyObject *contiguous = 0;
static PyObject *indirect_contiguous = 0; static PyObject *indirect_contiguous = 0;
static int __pyx_memoryview_thread_locks_used; static int __pyx_memoryview_thread_locks_used;
static PyThread_type_lock __pyx_memoryview_thread_locks[8]; static PyThread_type_lock __pyx_memoryview_thread_locks[8];
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args); /*proto*/ static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args); /*proto*/
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch); /*proto*/ static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch); /*proto*/
static struct __pyx_array_obj *__pyx_array_new(PyObject *, Py_ssize_t, char *, char *, char *); /*proto*/ static struct __pyx_array_obj *__pyx_array_new(PyObject *, Py_ssize_t, char *, char *, char *); /*proto*/
static void *__pyx_align_pointer(void *, size_t); /*proto*/ static void *__pyx_align_pointer(void *, size_t); /*proto*/
static PyObject *__pyx_memoryview_new(PyObject *, int, int, __Pyx_TypeInfo *); /*proto*/ static PyObject *__pyx_memoryview_new(PyObject *, int, int, __Pyx_TypeInfo *); /*proto*/
@ -1913,11 +1913,11 @@ static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize
static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/ static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/
static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 }; static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 };
static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 }; static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 };
#define __Pyx_MODULE_NAME "larynx_train.vits.monotonic_align.core" #define __Pyx_MODULE_NAME "piper_train.vits.monotonic_align.core"
extern int __pyx_module_is_main_larynx_train__vits__monotonic_align__core; extern int __pyx_module_is_main_piper_train__vits__monotonic_align__core;
int __pyx_module_is_main_larynx_train__vits__monotonic_align__core = 0; int __pyx_module_is_main_piper_train__vits__monotonic_align__core = 0;
/* Implementation of 'larynx_train.vits.monotonic_align.core' */ /* Implementation of 'piper_train.vits.monotonic_align.core' */
static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_range;
static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_ValueError;
static PyObject *__pyx_builtin_MemoryError; static PyObject *__pyx_builtin_MemoryError;
@ -2104,7 +2104,7 @@ static PyObject *__pyx_kp_s_unable_to_allocate_shape_and_str;
static PyObject *__pyx_n_s_unpack; static PyObject *__pyx_n_s_unpack;
static PyObject *__pyx_n_s_update; static PyObject *__pyx_n_s_update;
static PyObject *__pyx_n_s_values; static PyObject *__pyx_n_s_values;
static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs); /* proto */ static PyObject *__pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs); /* proto */
static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer); /* proto */ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer); /* proto */
static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self); /* proto */ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self); /* proto */
@ -2186,7 +2186,7 @@ static PyObject *__pyx_tuple__26;
static PyObject *__pyx_codeobj__27; static PyObject *__pyx_codeobj__27;
/* Late includes */ /* Late includes */
/* "larynx_train/vits/monotonic_align/core.pyx":7 /* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False) * @cython.boundscheck(False)
* @cython.wraparound(False) * @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
@ -2194,7 +2194,7 @@ static PyObject *__pyx_codeobj__27;
* cdef int y * cdef int y
*/ */
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_y, int __pyx_v_t_x, struct __pyx_opt_args_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args) { static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_y, int __pyx_v_t_x, struct __pyx_opt_args_11piper_train_4vits_15monotonic_align_4core_maximum_path_each *__pyx_optional_args) {
float __pyx_v_max_neg_val = __pyx_k_; float __pyx_v_max_neg_val = __pyx_k_;
int __pyx_v_x; int __pyx_v_x;
int __pyx_v_y; int __pyx_v_y;
@ -2223,7 +2223,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
} }
} }
/* "larynx_train/vits/monotonic_align/core.pyx":13 /* "piper_train/vits/monotonic_align/core.pyx":13
* cdef float v_cur * cdef float v_cur
* cdef float tmp * cdef float tmp
* cdef int index = t_x - 1 # <<<<<<<<<<<<<< * cdef int index = t_x - 1 # <<<<<<<<<<<<<<
@ -2232,7 +2232,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/ */
__pyx_v_index = (__pyx_v_t_x - 1); __pyx_v_index = (__pyx_v_t_x - 1);
/* "larynx_train/vits/monotonic_align/core.pyx":15 /* "piper_train/vits/monotonic_align/core.pyx":15
* cdef int index = t_x - 1 * cdef int index = t_x - 1
* *
* for y in range(t_y): # <<<<<<<<<<<<<< * for y in range(t_y): # <<<<<<<<<<<<<<
@ -2244,7 +2244,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
__pyx_v_y = __pyx_t_3; __pyx_v_y = __pyx_t_3;
/* "larynx_train/vits/monotonic_align/core.pyx":16 /* "piper_train/vits/monotonic_align/core.pyx":16
* *
* for y in range(t_y): * for y in range(t_y):
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): # <<<<<<<<<<<<<< * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): # <<<<<<<<<<<<<<
@ -2270,7 +2270,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
for (__pyx_t_5 = __pyx_t_7; __pyx_t_5 < __pyx_t_6; __pyx_t_5+=1) { for (__pyx_t_5 = __pyx_t_7; __pyx_t_5 < __pyx_t_6; __pyx_t_5+=1) {
__pyx_v_x = __pyx_t_5; __pyx_v_x = __pyx_t_5;
/* "larynx_train/vits/monotonic_align/core.pyx":17 /* "piper_train/vits/monotonic_align/core.pyx":17
* for y in range(t_y): * for y in range(t_y):
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
* if x == y: # <<<<<<<<<<<<<< * if x == y: # <<<<<<<<<<<<<<
@ -2280,7 +2280,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_8 = ((__pyx_v_x == __pyx_v_y) != 0); __pyx_t_8 = ((__pyx_v_x == __pyx_v_y) != 0);
if (__pyx_t_8) { if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":18 /* "piper_train/vits/monotonic_align/core.pyx":18
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
* if x == y: * if x == y:
* v_cur = max_neg_val # <<<<<<<<<<<<<< * v_cur = max_neg_val # <<<<<<<<<<<<<<
@ -2289,7 +2289,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/ */
__pyx_v_v_cur = __pyx_v_max_neg_val; __pyx_v_v_cur = __pyx_v_max_neg_val;
/* "larynx_train/vits/monotonic_align/core.pyx":17 /* "piper_train/vits/monotonic_align/core.pyx":17
* for y in range(t_y): * for y in range(t_y):
* for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
* if x == y: # <<<<<<<<<<<<<< * if x == y: # <<<<<<<<<<<<<<
@ -2299,7 +2299,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
goto __pyx_L7; goto __pyx_L7;
} }
/* "larynx_train/vits/monotonic_align/core.pyx":20 /* "piper_train/vits/monotonic_align/core.pyx":20
* v_cur = max_neg_val * v_cur = max_neg_val
* else: * else:
* v_cur = value[y-1, x] # <<<<<<<<<<<<<< * v_cur = value[y-1, x] # <<<<<<<<<<<<<<
@ -2313,7 +2313,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
} }
__pyx_L7:; __pyx_L7:;
/* "larynx_train/vits/monotonic_align/core.pyx":21 /* "piper_train/vits/monotonic_align/core.pyx":21
* else: * else:
* v_cur = value[y-1, x] * v_cur = value[y-1, x]
* if x == 0: # <<<<<<<<<<<<<< * if x == 0: # <<<<<<<<<<<<<<
@ -2323,7 +2323,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_8 = ((__pyx_v_x == 0) != 0); __pyx_t_8 = ((__pyx_v_x == 0) != 0);
if (__pyx_t_8) { if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":22 /* "piper_train/vits/monotonic_align/core.pyx":22
* v_cur = value[y-1, x] * v_cur = value[y-1, x]
* if x == 0: * if x == 0:
* if y == 0: # <<<<<<<<<<<<<< * if y == 0: # <<<<<<<<<<<<<<
@ -2333,7 +2333,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_8 = ((__pyx_v_y == 0) != 0); __pyx_t_8 = ((__pyx_v_y == 0) != 0);
if (__pyx_t_8) { if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":23 /* "piper_train/vits/monotonic_align/core.pyx":23
* if x == 0: * if x == 0:
* if y == 0: * if y == 0:
* v_prev = 0. # <<<<<<<<<<<<<< * v_prev = 0. # <<<<<<<<<<<<<<
@ -2342,7 +2342,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/ */
__pyx_v_v_prev = 0.; __pyx_v_v_prev = 0.;
/* "larynx_train/vits/monotonic_align/core.pyx":22 /* "piper_train/vits/monotonic_align/core.pyx":22
* v_cur = value[y-1, x] * v_cur = value[y-1, x]
* if x == 0: * if x == 0:
* if y == 0: # <<<<<<<<<<<<<< * if y == 0: # <<<<<<<<<<<<<<
@ -2352,7 +2352,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
goto __pyx_L9; goto __pyx_L9;
} }
/* "larynx_train/vits/monotonic_align/core.pyx":25 /* "piper_train/vits/monotonic_align/core.pyx":25
* v_prev = 0. * v_prev = 0.
* else: * else:
* v_prev = max_neg_val # <<<<<<<<<<<<<< * v_prev = max_neg_val # <<<<<<<<<<<<<<
@ -2364,7 +2364,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
} }
__pyx_L9:; __pyx_L9:;
/* "larynx_train/vits/monotonic_align/core.pyx":21 /* "piper_train/vits/monotonic_align/core.pyx":21
* else: * else:
* v_cur = value[y-1, x] * v_cur = value[y-1, x]
* if x == 0: # <<<<<<<<<<<<<< * if x == 0: # <<<<<<<<<<<<<<
@ -2374,7 +2374,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
goto __pyx_L8; goto __pyx_L8;
} }
/* "larynx_train/vits/monotonic_align/core.pyx":27 /* "piper_train/vits/monotonic_align/core.pyx":27
* v_prev = max_neg_val * v_prev = max_neg_val
* else: * else:
* v_prev = value[y-1, x-1] # <<<<<<<<<<<<<< * v_prev = value[y-1, x-1] # <<<<<<<<<<<<<<
@ -2388,7 +2388,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
} }
__pyx_L8:; __pyx_L8:;
/* "larynx_train/vits/monotonic_align/core.pyx":28 /* "piper_train/vits/monotonic_align/core.pyx":28
* else: * else:
* v_prev = value[y-1, x-1] * v_prev = value[y-1, x-1]
* value[y, x] += max(v_prev, v_cur) # <<<<<<<<<<<<<< * value[y, x] += max(v_prev, v_cur) # <<<<<<<<<<<<<<
@ -2408,7 +2408,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
} }
} }
/* "larynx_train/vits/monotonic_align/core.pyx":30 /* "piper_train/vits/monotonic_align/core.pyx":30
* value[y, x] += max(v_prev, v_cur) * value[y, x] += max(v_prev, v_cur)
* *
* for y in range(t_y - 1, -1, -1): # <<<<<<<<<<<<<< * for y in range(t_y - 1, -1, -1): # <<<<<<<<<<<<<<
@ -2418,7 +2418,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
for (__pyx_t_1 = (__pyx_v_t_y - 1); __pyx_t_1 > -1; __pyx_t_1-=1) { for (__pyx_t_1 = (__pyx_v_t_y - 1); __pyx_t_1 > -1; __pyx_t_1-=1) {
__pyx_v_y = __pyx_t_1; __pyx_v_y = __pyx_t_1;
/* "larynx_train/vits/monotonic_align/core.pyx":31 /* "piper_train/vits/monotonic_align/core.pyx":31
* *
* for y in range(t_y - 1, -1, -1): * for y in range(t_y - 1, -1, -1):
* path[y, index] = 1 # <<<<<<<<<<<<<< * path[y, index] = 1 # <<<<<<<<<<<<<<
@ -2429,7 +2429,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_t_9 = __pyx_v_index; __pyx_t_9 = __pyx_v_index;
*((int *) ( /* dim=1 */ ((char *) (((int *) ( /* dim=0 */ (__pyx_v_path.data + __pyx_t_10 * __pyx_v_path.strides[0]) )) + __pyx_t_9)) )) = 1; *((int *) ( /* dim=1 */ ((char *) (((int *) ( /* dim=0 */ (__pyx_v_path.data + __pyx_t_10 * __pyx_v_path.strides[0]) )) + __pyx_t_9)) )) = 1;
/* "larynx_train/vits/monotonic_align/core.pyx":32 /* "piper_train/vits/monotonic_align/core.pyx":32
* for y in range(t_y - 1, -1, -1): * for y in range(t_y - 1, -1, -1):
* path[y, index] = 1 * path[y, index] = 1
* if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<< * if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<<
@ -2457,7 +2457,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
__pyx_L13_bool_binop_done:; __pyx_L13_bool_binop_done:;
if (__pyx_t_8) { if (__pyx_t_8) {
/* "larynx_train/vits/monotonic_align/core.pyx":33 /* "piper_train/vits/monotonic_align/core.pyx":33
* path[y, index] = 1 * path[y, index] = 1
* if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): * if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]):
* index = index - 1 # <<<<<<<<<<<<<< * index = index - 1 # <<<<<<<<<<<<<<
@ -2466,7 +2466,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
*/ */
__pyx_v_index = (__pyx_v_index - 1); __pyx_v_index = (__pyx_v_index - 1);
/* "larynx_train/vits/monotonic_align/core.pyx":32 /* "piper_train/vits/monotonic_align/core.pyx":32
* for y in range(t_y - 1, -1, -1): * for y in range(t_y - 1, -1, -1):
* path[y, index] = 1 * path[y, index] = 1
* if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<< * if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]): # <<<<<<<<<<<<<<
@ -2476,7 +2476,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
} }
} }
/* "larynx_train/vits/monotonic_align/core.pyx":7 /* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False) * @cython.boundscheck(False)
* @cython.wraparound(False) * @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
@ -2487,7 +2487,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
/* function exit code */ /* function exit code */
} }
/* "larynx_train/vits/monotonic_align/core.pyx":38 /* "piper_train/vits/monotonic_align/core.pyx":38
* @cython.boundscheck(False) * @cython.boundscheck(False)
* @cython.wraparound(False) * @cython.wraparound(False)
* cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<< * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<<
@ -2495,8 +2495,8 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_ea
* cdef int i * cdef int i
*/ */
static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs, CYTHON_UNUSED int __pyx_skip_dispatch) { static void __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs, CYTHON_UNUSED int __pyx_skip_dispatch) {
CYTHON_UNUSED int __pyx_v_b; CYTHON_UNUSED int __pyx_v_b;
int __pyx_v_i; int __pyx_v_i;
int __pyx_t_1; int __pyx_t_1;
@ -2507,7 +2507,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(
Py_ssize_t __pyx_t_6; Py_ssize_t __pyx_t_6;
Py_ssize_t __pyx_t_7; Py_ssize_t __pyx_t_7;
/* "larynx_train/vits/monotonic_align/core.pyx":39 /* "piper_train/vits/monotonic_align/core.pyx":39
* @cython.wraparound(False) * @cython.wraparound(False)
* cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil:
* cdef int b = paths.shape[0] # <<<<<<<<<<<<<< * cdef int b = paths.shape[0] # <<<<<<<<<<<<<<
@ -2516,7 +2516,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(
*/ */
__pyx_v_b = (__pyx_v_paths.shape[0]); __pyx_v_b = (__pyx_v_paths.shape[0]);
/* "larynx_train/vits/monotonic_align/core.pyx":41 /* "piper_train/vits/monotonic_align/core.pyx":41
* cdef int b = paths.shape[0] * cdef int b = paths.shape[0]
* cdef int i * cdef int i
* for i in prange(b, nogil=True): # <<<<<<<<<<<<<< * for i in prange(b, nogil=True): # <<<<<<<<<<<<<<
@ -2552,7 +2552,7 @@ static void __pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(
{ {
__pyx_v_i = (int)(0 + 1 * __pyx_t_2); __pyx_v_i = (int)(0 + 1 * __pyx_t_2);
/* "larynx_train/vits/monotonic_align/core.pyx":42 /* "piper_train/vits/monotonic_align/core.pyx":42
* cdef int i * cdef int i
* for i in prange(b, nogil=True): * for i in prange(b, nogil=True):
* maximum_path_each(paths[i], values[i], t_ys[i], t_xs[i]) # <<<<<<<<<<<<<< * maximum_path_each(paths[i], values[i], t_ys[i], t_xs[i]) # <<<<<<<<<<<<<<
@ -2593,7 +2593,7 @@ __pyx_t_5.strides[1] = __pyx_v_values.strides[2];
__pyx_t_6 = __pyx_v_i; __pyx_t_6 = __pyx_v_i;
__pyx_t_7 = __pyx_v_i; __pyx_t_7 = __pyx_v_i;
__pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_7)) ))), NULL); __pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_7)) ))), NULL);
__PYX_XDEC_MEMVIEW(&__pyx_t_4, 0); __PYX_XDEC_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.memview = NULL; __pyx_t_4.memview = NULL;
__pyx_t_4.data = NULL; __pyx_t_4.data = NULL;
@ -2613,7 +2613,7 @@ __pyx_t_6 = __pyx_v_i;
#endif #endif
} }
/* "larynx_train/vits/monotonic_align/core.pyx":41 /* "piper_train/vits/monotonic_align/core.pyx":41
* cdef int b = paths.shape[0] * cdef int b = paths.shape[0]
* cdef int i * cdef int i
* for i in prange(b, nogil=True): # <<<<<<<<<<<<<< * for i in prange(b, nogil=True): # <<<<<<<<<<<<<<
@ -2631,7 +2631,7 @@ __pyx_t_6 = __pyx_v_i;
} }
} }
/* "larynx_train/vits/monotonic_align/core.pyx":38 /* "piper_train/vits/monotonic_align/core.pyx":38
* @cython.boundscheck(False) * @cython.boundscheck(False)
* @cython.wraparound(False) * @cython.wraparound(False)
* cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<< * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil: # <<<<<<<<<<<<<<
@ -2643,8 +2643,8 @@ __pyx_t_6 = __pyx_v_i;
} }
/* Python wrapper */ /* Python wrapper */
static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { static PyObject *__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
__Pyx_memviewslice __pyx_v_paths = { 0, 0, { 0 }, { 0 }, { 0 } }; __Pyx_memviewslice __pyx_v_paths = { 0, 0, { 0 }, { 0 }, { 0 } };
__Pyx_memviewslice __pyx_v_values = { 0, 0, { 0 }, { 0 }, { 0 } }; __Pyx_memviewslice __pyx_v_values = { 0, 0, { 0 }, { 0 }, { 0 } };
__Pyx_memviewslice __pyx_v_t_ys = { 0, 0, { 0 }, { 0 }, { 0 } }; __Pyx_memviewslice __pyx_v_t_ys = { 0, 0, { 0 }, { 0 }, { 0 } };
@ -2717,18 +2717,18 @@ static PyObject *__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_
__pyx_L5_argtuple_error:; __pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("maximum_path_c", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 38, __pyx_L3_error) __Pyx_RaiseArgtupleInvalid("maximum_path_c", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 38, __pyx_L3_error)
__pyx_L3_error:; __pyx_L3_error:;
__Pyx_AddTraceback("larynx_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("piper_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
return NULL; return NULL;
__pyx_L4_argument_unpacking_done:; __pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs); __pyx_r = __pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs);
/* function exit code */ /* function exit code */
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
return __pyx_r; return __pyx_r;
} }
static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs) { static PyObject *__pyx_pf_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_ys, __Pyx_memviewslice __pyx_v_t_xs) {
PyObject *__pyx_r = NULL; PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations __Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_1 = NULL;
@ -2741,7 +2741,7 @@ static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_p
if (unlikely(!__pyx_v_values.memview)) { __Pyx_RaiseUnboundLocalError("values"); __PYX_ERR(0, 38, __pyx_L1_error) } if (unlikely(!__pyx_v_values.memview)) { __Pyx_RaiseUnboundLocalError("values"); __PYX_ERR(0, 38, __pyx_L1_error) }
if (unlikely(!__pyx_v_t_ys.memview)) { __Pyx_RaiseUnboundLocalError("t_ys"); __PYX_ERR(0, 38, __pyx_L1_error) } if (unlikely(!__pyx_v_t_ys.memview)) { __Pyx_RaiseUnboundLocalError("t_ys"); __PYX_ERR(0, 38, __pyx_L1_error) }
if (unlikely(!__pyx_v_t_xs.memview)) { __Pyx_RaiseUnboundLocalError("t_xs"); __PYX_ERR(0, 38, __pyx_L1_error) } if (unlikely(!__pyx_v_t_xs.memview)) { __Pyx_RaiseUnboundLocalError("t_xs"); __PYX_ERR(0, 38, __pyx_L1_error) }
__pyx_t_1 = __Pyx_void_to_None(__pyx_f_12larynx_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error) __pyx_t_1 = __Pyx_void_to_None(__pyx_f_11piper_train_4vits_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_ys, __pyx_v_t_xs, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1); __Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1; __pyx_r = __pyx_t_1;
__pyx_t_1 = 0; __pyx_t_1 = 0;
@ -2750,7 +2750,7 @@ static PyObject *__pyx_pf_12larynx_train_4vits_15monotonic_align_4core_maximum_p
/* function exit code */ /* function exit code */
__pyx_L1_error:; __pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("larynx_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("piper_train.vits.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL; __pyx_r = NULL;
__pyx_L0:; __pyx_L0:;
__PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1); __PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1);
@ -3066,7 +3066,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __
* self.format = self._format * self.format = self._format
* *
*/ */
if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 141, __pyx_L1_error) if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 141, __pyx_L1_error)
__pyx_t_3 = __pyx_v_format; __pyx_t_3 = __pyx_v_format;
__Pyx_INCREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_3);
__Pyx_GIVEREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_3);
@ -5044,7 +5044,7 @@ static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_Me
* def __setstate_cython__(self, __pyx_state): * def __setstate_cython__(self, __pyx_state):
* __pyx_unpickle_Enum__set_state(self, __pyx_state) # <<<<<<<<<<<<<< * __pyx_unpickle_Enum__set_state(self, __pyx_state) # <<<<<<<<<<<<<<
*/ */
if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 17, __pyx_L1_error) if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 17, __pyx_L1_error)
__pyx_t_1 = __pyx_unpickle_Enum__set_state(__pyx_v_self, ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 17, __pyx_L1_error) __pyx_t_1 = __pyx_unpickle_Enum__set_state(__pyx_v_self, ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 17, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1); __Pyx_GOTREF(__pyx_t_1);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@ -7347,7 +7347,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie
__Pyx_GOTREF(__pyx_t_4); __Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error) if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error)
__pyx_v_bytesvalue = ((PyObject*)__pyx_t_4); __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4);
__pyx_t_4 = 0; __pyx_t_4 = 0;
@ -7420,7 +7420,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
} }
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 514, __pyx_L1_error) if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 514, __pyx_L1_error)
__pyx_v_bytesvalue = ((PyObject*)__pyx_t_4); __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4);
__pyx_t_4 = 0; __pyx_t_4 = 0;
} }
@ -15623,7 +15623,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSE
* return __pyx_result * return __pyx_result
* cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):
*/ */
if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 9, __pyx_L1_error) if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||((void)PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(1, 9, __pyx_L1_error)
__pyx_t_4 = __pyx_unpickle_Enum__set_state(((struct __pyx_MemviewEnum_obj *)__pyx_v___pyx_result), ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 9, __pyx_L1_error) __pyx_t_4 = __pyx_unpickle_Enum__set_state(((struct __pyx_MemviewEnum_obj *)__pyx_v___pyx_result), ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 9, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4); __Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
@ -15925,7 +15925,7 @@ static PyBufferProcs __pyx_tp_as_buffer_array = {
static PyTypeObject __pyx_type___pyx_array = { static PyTypeObject __pyx_type___pyx_array = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core.array", /*tp_name*/ "piper_train.vits.monotonic_align.core.array", /*tp_name*/
sizeof(struct __pyx_array_obj), /*tp_basicsize*/ sizeof(struct __pyx_array_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc_array, /*tp_dealloc*/ __pyx_tp_dealloc_array, /*tp_dealloc*/
@ -16047,7 +16047,7 @@ static PyMethodDef __pyx_methods_Enum[] = {
static PyTypeObject __pyx_type___pyx_MemviewEnum = { static PyTypeObject __pyx_type___pyx_MemviewEnum = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core.Enum", /*tp_name*/ "piper_train.vits.monotonic_align.core.Enum", /*tp_name*/
sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/ sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc_Enum, /*tp_dealloc*/ __pyx_tp_dealloc_Enum, /*tp_dealloc*/
@ -16311,7 +16311,7 @@ static PyBufferProcs __pyx_tp_as_buffer_memoryview = {
static PyTypeObject __pyx_type___pyx_memoryview = { static PyTypeObject __pyx_type___pyx_memoryview = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core.memoryview", /*tp_name*/ "piper_train.vits.monotonic_align.core.memoryview", /*tp_name*/
sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/ sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc_memoryview, /*tp_dealloc*/ __pyx_tp_dealloc_memoryview, /*tp_dealloc*/
@ -16452,7 +16452,7 @@ static struct PyGetSetDef __pyx_getsets__memoryviewslice[] = {
static PyTypeObject __pyx_type___pyx_memoryviewslice = { static PyTypeObject __pyx_type___pyx_memoryviewslice = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"larynx_train.vits.monotonic_align.core._memoryviewslice", /*tp_name*/ "piper_train.vits.monotonic_align.core._memoryviewslice", /*tp_name*/
sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/ sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/ __pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/
@ -16531,7 +16531,7 @@ static PyTypeObject __pyx_type___pyx_memoryviewslice = {
}; };
static PyMethodDef __pyx_methods[] = { static PyMethodDef __pyx_methods[] = {
{"maximum_path_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_12larynx_train_4vits_15monotonic_align_4core_1maximum_path_c, METH_VARARGS|METH_KEYWORDS, 0}, {"maximum_path_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_11piper_train_4vits_15monotonic_align_4core_1maximum_path_c, METH_VARARGS|METH_KEYWORDS, 0},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@ -16961,7 +16961,7 @@ PyEval_InitThreads();
if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error) if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error)
if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error); if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_112105877 = PyInt_FromLong(112105877L); if (unlikely(!__pyx_int_112105877)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_int_112105877 = PyInt_FromLong(112105877L); if (unlikely(!__pyx_int_112105877)) __PYX_ERR(0, 1, __pyx_L1_error)
@ -17266,20 +17266,20 @@ if (!__Pyx_RefNanny) {
Py_INCREF(__pyx_b); Py_INCREF(__pyx_b);
__pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error)
Py_INCREF(__pyx_cython_runtime); Py_INCREF(__pyx_cython_runtime);
if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error); if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
/*--- Initialize various global constants etc. ---*/ /*--- Initialize various global constants etc. ---*/
if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
#if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
#endif #endif
if (__pyx_module_is_main_larynx_train__vits__monotonic_align__core) { if (__pyx_module_is_main_piper_train__vits__monotonic_align__core) {
if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name_2, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name_2, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
} }
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
{ {
PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
if (!PyDict_GetItemString(modules, "larynx_train.vits.monotonic_align.core")) { if (!PyDict_GetItemString(modules, "piper_train.vits.monotonic_align.core")) {
if (unlikely(PyDict_SetItemString(modules, "larynx_train.vits.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) if (unlikely(PyDict_SetItemString(modules, "piper_train.vits.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
} }
} }
#endif #endif
@ -17300,7 +17300,7 @@ if (!__Pyx_RefNanny) {
if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
#endif #endif
/* "larynx_train/vits/monotonic_align/core.pyx":7 /* "piper_train/vits/monotonic_align/core.pyx":7
* @cython.boundscheck(False) * @cython.boundscheck(False)
* @cython.wraparound(False) * @cython.wraparound(False)
* cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<< * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
@ -17309,7 +17309,7 @@ if (!__Pyx_RefNanny) {
*/ */
__pyx_k_ = (-1e9); __pyx_k_ = (-1e9);
/* "larynx_train/vits/monotonic_align/core.pyx":1 /* "piper_train/vits/monotonic_align/core.pyx":1
* cimport cython # <<<<<<<<<<<<<< * cimport cython # <<<<<<<<<<<<<<
* from cython.parallel import prange * from cython.parallel import prange
* *
@ -17479,11 +17479,11 @@ if (!__Pyx_RefNanny) {
__Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_1);
if (__pyx_m) { if (__pyx_m) {
if (__pyx_d) { if (__pyx_d) {
__Pyx_AddTraceback("init larynx_train.vits.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("init piper_train.vits.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename);
} }
Py_CLEAR(__pyx_m); Py_CLEAR(__pyx_m);
} else if (!PyErr_Occurred()) { } else if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ImportError, "init larynx_train.vits.monotonic_align.core"); PyErr_SetString(PyExc_ImportError, "init piper_train.vits.monotonic_align.core");
} }
__pyx_L0:; __pyx_L0:;
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
@ -18536,7 +18536,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
/* ObjectGetItem */ /* ObjectGetItem */
#if CYTHON_USE_TYPE_SLOTS #if CYTHON_USE_TYPE_SLOTS
static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) { static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) {
PyObject *runerr; PyObject *runerr = NULL;
Py_ssize_t key_value; Py_ssize_t key_value;
PySequenceMethods *m = Py_TYPE(obj)->tp_as_sequence; PySequenceMethods *m = Py_TYPE(obj)->tp_as_sequence;
if (unlikely(!(m && m->sq_item))) { if (unlikely(!(m && m->sq_item))) {
@ -19417,7 +19417,7 @@ __PYX_GOOD:
/* CLineInTraceback */ /* CLineInTraceback */
#ifndef CYTHON_CLINE_IN_TRACEBACK #ifndef CYTHON_CLINE_IN_TRACEBACK
static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) { static int __Pyx_CLineForTraceback(CYTHON_UNUSED PyThreadState *tstate, int c_line) {
PyObject *use_cline; PyObject *use_cline;
PyObject *ptype, *pvalue, *ptraceback; PyObject *ptype, *pvalue, *ptraceback;
#if CYTHON_COMPILING_IN_CPYTHON #if CYTHON_COMPILING_IN_CPYTHON

@ -11,7 +11,7 @@ from .vits.lightning import VitsModel
from .vits.mel_processing import spectrogram_torch from .vits.mel_processing import spectrogram_torch
from .vits.wavfile import write as write_wav from .vits.wavfile import write as write_wav
_LOGGER = logging.getLogger("larynx_train.voice_converstion") _LOGGER = logging.getLogger("piper_train.voice_converstion")
def main(): def main():

@ -10,5 +10,5 @@ docker run \
-v "${HOME}:${HOME}" \ -v "${HOME}:${HOME}" \
-v /etc/hostname:/etc/hostname:ro \ -v /etc/hostname:/etc/hostname:ro \
-v /etc/localtime:/etc/localtime:ro \ -v /etc/localtime:/etc/localtime:ro \
larynx2-train \ piper-train \
"$@" "$@"

@ -17,7 +17,7 @@ if [ -d "${venv}" ]; then
source "${venv}/bin/activate" source "${venv}/bin/activate"
fi fi
python_files=("${base_dir}/larynx_train") python_files=("${base_dir}/piper_train")
# Format code # Format code
black "${python_files[@]}" black "${python_files[@]}"

@ -6,7 +6,7 @@ import setuptools
from setuptools import setup from setuptools import setup
this_dir = Path(__file__).parent this_dir = Path(__file__).parent
module_dir = this_dir / "larynx_train" module_dir = this_dir / "piper_train"
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -29,23 +29,23 @@ with open(version_path, "r", encoding="utf-8") as version_file:
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
setup( setup(
name="larynx_train", name="piper_train",
version=version, version=version,
description="A fast and local neural text to speech system", description="A fast and local neural text to speech system",
long_description=long_description, long_description=long_description,
url="http://github.com/rhasspy/larynx", url="http://github.com/rhasspy/piper",
author="Michael Hansen", author="Michael Hansen",
author_email="mike@rhasspy.org", author_email="mike@rhasspy.org",
license="MIT", license="MIT",
packages=setuptools.find_packages(), packages=setuptools.find_packages(),
package_data={ package_data={
"larynx_train": ["VERSION", "py.typed"], "piper_train": ["VERSION", "py.typed"],
}, },
install_requires=requirements, install_requires=requirements,
extras_require={':python_version<"3.9"': ["importlib_resources"]}, extras_require={':python_version<"3.9"': ["importlib_resources"]},
entry_points={ entry_points={
"console_scripts": [ "console_scripts": [
"larynx-train = larynx_train.__main__:main", "piper-train = piper_train.__main__:main",
] ]
}, },
classifiers=[ classifiers=[

@ -15,7 +15,7 @@ _PAD = "_"
@dataclass @dataclass
class LarynxConfig: class PiperConfig:
num_symbols: int num_symbols: int
num_speakers: int num_speakers: int
sample_rate: int sample_rate: int
@ -26,7 +26,7 @@ class LarynxConfig:
phoneme_id_map: Mapping[str, Sequence[int]] phoneme_id_map: Mapping[str, Sequence[int]]
class Larynx: class Piper:
def __init__( def __init__(
self, self,
model_path: Union[str, Path], model_path: Union[str, Path],
@ -114,12 +114,12 @@ class Larynx:
return wav_io.getvalue() return wav_io.getvalue()
def load_config(config_path: Union[str, Path]) -> LarynxConfig: def load_config(config_path: Union[str, Path]) -> PiperConfig:
with open(config_path, "r", encoding="utf-8") as config_file: with open(config_path, "r", encoding="utf-8") as config_file:
config_dict = json.load(config_file) config_dict = json.load(config_file)
inference = config_dict.get("inference", {}) inference = config_dict.get("inference", {})
return LarynxConfig( return PiperConfig(
num_symbols=config_dict["num_symbols"], num_symbols=config_dict["num_symbols"],
num_speakers=config_dict["num_speakers"], num_speakers=config_dict["num_speakers"],
sample_rate=config_dict["audio"]["sample_rate"], sample_rate=config_dict["audio"]["sample_rate"],

@ -5,7 +5,7 @@ import time
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from . import Larynx from . import Piper
_FILE = Path(__file__) _FILE = Path(__file__)
_DIR = _FILE.parent _DIR = _FILE.parent
@ -34,7 +34,7 @@ def main() -> None:
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
voice = Larynx(args.model, config_path=args.config, use_cuda=args.cuda) voice = Piper(args.model, config_path=args.config, use_cuda=args.cuda)
synthesize = partial( synthesize = partial(
voice.synthesize, voice.synthesize,
speaker_id=args.speaker, speaker_id=args.speaker,

@ -17,7 +17,7 @@ if [ -d "${venv}" ]; then
source "${venv}/bin/activate" source "${venv}/bin/activate"
fi fi
python_files=("${base_dir}/larynx") python_files=("${base_dir}/piper")
# Format code # Format code
black "${python_files[@]}" black "${python_files[@]}"

@ -14,4 +14,4 @@ if [ -d "${venv}" ]; then
source "${venv}/bin/activate" source "${venv}/bin/activate"
fi fi
python3 -m larynx "$@" python3 -m piper "$@"
Loading…
Cancel
Save