Fix synthesis speaker id bug

1 year ago · a34b2d4f3b
parent dfa03d80e6
commit a34b2d4f3b
3 changed files with 7 additions and 5 deletions
--- a/src/cpp/main.cpp
+++ b/src/cpp/main.cpp
@ -188,7 +188,7 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
      runConfig.outputPath = filesystem::path(argv[++i]);
    } else if (arg == "-s" || arg == "--speaker") {
      ensureArg(argc, argv, i);
-      runConfig.speakerId = (larynx::SpeakerId)stoll(argv[++i]);
+      runConfig.speakerId = (larynx::SpeakerId)stoi(argv[++i]);
    } else if (arg == "-h" || arg == "--help") {
      printUsage(argv);
      exit(0);
--- a/src/cpp/synthesize.hpp
+++ b/src/cpp/synthesize.hpp
@ -53,10 +53,12 @@ void synthesize(SynthesisConfig &synthesisConfig, ModelSession &session,
      Ort::Value::CreateTensor<float>(memoryInfo, scales.data(), scales.size(),
                                      scalesShape.data(), scalesShape.size()));

+  // Add speaker id.
+  // NOTE: These must be kept outside the "if" below to avoid being deallocated.
+  vector<int64_t> speakerId{(int64_t)synthesisConfig.speakerId.value_or(0)};
+  vector<int64_t> speakerIdShape{(int64_t)speakerId.size()};
+
  if (synthesisConfig.speakerId) {
-    // Add speaker id
-    vector<int64_t> speakerId{(int64_t)synthesisConfig.speakerId.value()};
-    vector<int64_t> speakerIdShape{(int64_t)speakerId.size()};
    inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(
        memoryInfo, speakerId.data(), speakerId.size(), speakerIdShape.data(),
        speakerIdShape.size()));
--- a/src/cpp/wavfile.hpp
+++ b/src/cpp/wavfile.hpp
@ -27,7 +27,7 @@ struct WavHeader {

 // Write WAV file header only
 void writeWavHeader(int sampleRate, int sampleWidth, int channels,
-                    uint32_t numSamples, ostream &audioFile) {
+                    uint32_t numSamples, std::ostream &audioFile) {
  WavHeader header;
  header.chunkSize = numSamples + sizeof(WavHeader) - 8;
  header.sampleRate = sampleRate;