mirror of https://github.com/rhasspy/piper
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
160 lines
5.0 KiB
Python
160 lines
5.0 KiB
Python
import argparse
|
|
import logging
|
|
import sys
|
|
import time
|
|
import wave
|
|
from pathlib import Path
|
|
from typing import Any, Dict
|
|
|
|
from . import PiperVoice
|
|
from .download import ensure_voice_exists, find_voice, get_voices
|
|
|
|
_FILE = Path(__file__)
|
|
_DIR = _FILE.parent
|
|
_LOGGER = logging.getLogger(_FILE.stem)
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
|
|
parser.add_argument("-c", "--config", help="Path to model config file")
|
|
parser.add_argument(
|
|
"-f",
|
|
"--output-file",
|
|
"--output_file",
|
|
help="Path to output WAV file (default: stdout)",
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--output-dir",
|
|
"--output_dir",
|
|
help="Path to output directory (default: cwd)",
|
|
)
|
|
parser.add_argument(
|
|
"--output-raw",
|
|
"--output_raw",
|
|
action="store_true",
|
|
help="Stream raw audio to stdout",
|
|
)
|
|
#
|
|
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
|
|
parser.add_argument(
|
|
"--length-scale", "--length_scale", type=float, help="Phoneme length"
|
|
)
|
|
parser.add_argument(
|
|
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
|
|
)
|
|
parser.add_argument(
|
|
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
|
|
)
|
|
#
|
|
parser.add_argument("--cuda", action="store_true", help="Use GPU")
|
|
#
|
|
parser.add_argument(
|
|
"--sentence-silence",
|
|
"--sentence_silence",
|
|
type=float,
|
|
default=0.0,
|
|
help="Seconds of silence after each sentence",
|
|
)
|
|
#
|
|
parser.add_argument(
|
|
"--data-dir",
|
|
"--data_dir",
|
|
action="append",
|
|
default=[str(Path.cwd())],
|
|
help="Data directory to check for downloaded models (default: current directory)",
|
|
)
|
|
parser.add_argument(
|
|
"--download-dir",
|
|
"--download_dir",
|
|
help="Directory to download voices into (default: first data dir)",
|
|
)
|
|
#
|
|
parser.add_argument(
|
|
"--update-voices",
|
|
action="store_true",
|
|
help="Download latest voices.json during startup",
|
|
)
|
|
#
|
|
parser.add_argument(
|
|
"--debug", action="store_true", help="Print DEBUG messages to console"
|
|
)
|
|
args = parser.parse_args()
|
|
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
|
_LOGGER.debug(args)
|
|
|
|
if not args.download_dir:
|
|
# Download to first data directory by default
|
|
args.download_dir = args.data_dir[0]
|
|
|
|
# Download voice if file doesn't exist
|
|
model_path = Path(args.model)
|
|
if not model_path.exists():
|
|
# Load voice info
|
|
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
|
|
|
# Resolve aliases for backwards compatibility with old voice names
|
|
aliases_info: Dict[str, Any] = {}
|
|
for voice_info in voices_info.values():
|
|
for voice_alias in voice_info.get("aliases", []):
|
|
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
|
|
|
|
voices_info.update(aliases_info)
|
|
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
|
|
args.model, args.config = find_voice(args.model, args.data_dir)
|
|
|
|
# Load voice
|
|
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
|
|
synthesize_args = {
|
|
"speaker_id": args.speaker,
|
|
"length_scale": args.length_scale,
|
|
"noise_scale": args.noise_scale,
|
|
"noise_w": args.noise_w,
|
|
"sentence_silence": args.sentence_silence,
|
|
}
|
|
|
|
if args.output_raw:
|
|
# Read line-by-line
|
|
for line in sys.stdin:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Write raw audio to stdout as its produced
|
|
audio_stream = voice.synthesize_stream_raw(line, **synthesize_args)
|
|
for audio_bytes in audio_stream:
|
|
sys.stdout.buffer.write(audio_bytes)
|
|
sys.stdout.buffer.flush()
|
|
elif args.output_dir:
|
|
output_dir = Path(args.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Read line-by-line
|
|
for line in sys.stdin:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
wav_path = output_dir / f"{time.monotonic_ns()}.wav"
|
|
with wave.open(str(wav_path), "wb") as wav_file:
|
|
voice.synthesize(line, wav_file, **synthesize_args)
|
|
|
|
_LOGGER.info("Wrote %s", wav_path)
|
|
else:
|
|
# Read entire input
|
|
text = sys.stdin.read()
|
|
|
|
if (not args.output_file) or (args.output_file == "-"):
|
|
# Write to stdout
|
|
with wave.open(sys.stdout.buffer, "wb") as wav_file:
|
|
voice.synthesize(text, wav_file, **synthesize_args)
|
|
else:
|
|
# Write to file
|
|
with wave.open(args.output_file, "wb") as wav_file:
|
|
voice.synthesize(text, wav_file, **synthesize_args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|