From 40171cb6ff4b7e85b929bde146d670c102913b3d Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Mon, 18 May 2026 16:51:10 +0900 Subject: [PATCH 01/18] Add HTS label support infrastructure --- OpenUtau.Core/Hts/HTSLabelPhonemizer.cs | 675 +++++++++++++++ OpenUtau.Core/Hts/HTSLabelRenderer.cs | 484 +++++++++++ OpenUtau.Core/Render/RenderPhrase.cs | 17 +- OpenUtau.Core/Util/HTS.cs | 770 ++++++++++++++++++ .../Util}/HTSLabelFile.cs | 4 +- .../Util}/Merlin.cs | 5 +- .../Util}/Python.cs | 2 +- .../Util}/Scaler.cs | 2 +- .../EnunuOnnx/EnunuOnnxPhonemizer.cs | 70 +- OpenUtau.Plugin.Builtin/EnunuOnnx/HTS.cs | 256 ------ OpenUtau.Test/Core/Util/HtsSpecTests.cs | 311 +++++++ .../Plugins/HtsLabelPhonemizerTest.cs | 242 ++++++ 12 files changed, 2548 insertions(+), 290 deletions(-) create mode 100644 OpenUtau.Core/Hts/HTSLabelPhonemizer.cs create mode 100644 OpenUtau.Core/Hts/HTSLabelRenderer.cs create mode 100644 OpenUtau.Core/Util/HTS.cs rename {OpenUtau.Plugin.Builtin/EnunuOnnx => OpenUtau.Core/Util}/HTSLabelFile.cs (99%) rename {OpenUtau.Plugin.Builtin/EnunuOnnx => OpenUtau.Core/Util}/Merlin.cs (98%) rename {OpenUtau.Plugin.Builtin/EnunuOnnx => OpenUtau.Core/Util}/Python.cs (94%) rename {OpenUtau.Plugin.Builtin/EnunuOnnx => OpenUtau.Core/Util}/Scaler.cs (97%) delete mode 100644 OpenUtau.Plugin.Builtin/EnunuOnnx/HTS.cs create mode 100644 OpenUtau.Test/Core/Util/HtsSpecTests.cs create mode 100644 OpenUtau.Test/Plugins/HtsLabelPhonemizerTest.cs diff --git a/OpenUtau.Core/Hts/HTSLabelPhonemizer.cs b/OpenUtau.Core/Hts/HTSLabelPhonemizer.cs new file mode 100644 index 000000000..d3c7f5f46 --- /dev/null +++ b/OpenUtau.Core/Hts/HTSLabelPhonemizer.cs @@ -0,0 +1,675 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using K4os.Hash.xxHash; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; +using OpenUtau.Core.Util.nnmnkwii.io.hts; +using Serilog; +using static System.Net.Mime.MediaTypeNames; + +namespace OpenUtau.Core.Hts { + public abstract class HTSLabelPhonemizer : MachineLearningPhonemizer { + protected USinger singer; + //information used by HTS writer + protected Dictionary phoneDict = new Dictionary(); + protected List vowels = new List(); + protected List consonants = new List(); + protected List breaks = new List(); + protected List pauses = new List(); + protected List silences = new List(); + protected List unvoiced = new List(); + protected string lang = ""; + int key = 0; + int resolution = 480; + + //information used by openutau phonemizer + protected IG2p g2p; + //result caching + private Dictionary>> partResult = new Dictionary>>(); + + protected string tmpPath = string.Empty; + protected string tablePath = string.Empty; + protected string questionPath = string.Empty; + protected string htstmpPath = string.Empty; + protected string monoScorePath = string.Empty; + protected string fullScorePath = string.Empty; + protected string monoTimingPath = string.Empty; + protected string fullTimingPath = string.Empty; + + public HTSLabelPhonemizer() { + + } + + public override void SetSinger(USinger singer) { + this.singer = singer; + if (singer == null) { + return; + } + phoneDict.Clear(); + //Load enuconfig + string rootPath; + if (File.Exists(Path.Join(singer.Location, "enunux", "enuconfig.yaml"))) { + rootPath = Path.Combine(singer.Location, "enunux"); + } + if (File.Exists(Path.Join(singer.Location, "enuconfig.yaml"))) { + rootPath = Path.Combine(singer.Location, "enunux"); + } else { + rootPath = singer.Location; + } + //Load g2p from enunux.yaml + //g2p dict should be load after enunu dict + try { + g2p = LoadG2p(singer.Location); + } catch (Exception e) { + Log.Error(e, "failed to load g2p dictionary"); + return; + } + //Load Dictionary + var enunuDictPath = Path.Join(rootPath, tablePath); + try { + LoadDict(Path.Join(rootPath, tablePath), singer.TextFileEncoding); + } catch (Exception e) { + Log.Error(e, $"failed to load dictionary from {enunuDictPath}"); + return; + } + } + + protected virtual IG2p LoadG2p(string rootPath) { + var g2ps = new List(); + + var enunuxPath = Path.Combine(rootPath, "enunux.yaml"); + var builder = G2pDictionary.NewBuilder(); + // Load dictionary from enunux.yaml and nnsvs dict + if (File.Exists(enunuxPath)) { + try { + var input = File.ReadAllText(enunuxPath, singer.TextFileEncoding); + var data = Yaml.DefaultDeserializer.Deserialize(input); + if (data.symbols != null) { + foreach (var symbolData in data.symbols) { + builder.AddSymbol(symbolData.symbol, symbolData.type); + } + } + foreach (var grapheme in phoneDict.Keys) { + builder.AddEntry(grapheme, phoneDict[grapheme]); + } + if (data.entries != null) { + foreach (var entry in data.entries) { + builder.AddEntry(entry.grapheme, entry.phonemes); + } + } + } catch (Exception e) { + Log.Error(e, $"Failed to load Dictionary"); + } + } + foreach (var entry in phoneDict.Keys) { + builder.AddEntry(entry, phoneDict[entry]); + } + g2ps.Add(builder.Build()); + return new G2pFallbacks(g2ps.ToArray()); + } + + public void LoadDict(string path, Encoding encoding) { + if (path.EndsWith(".conf")) { + LoadConf(path, encoding); + } else { + LoadTable(path, encoding); + } + } + + public void LoadTable(string path, Encoding encoding) { + var lines = File.ReadLines(path, encoding); + foreach (var line in lines) { + var lineSplit = line.Split(); + phoneDict[lineSplit[0]] = lineSplit[1..]; + } + } + + public void LoadConf(string path, Encoding encoding) { + phoneDict["SILENCES"] = new string[] { "sil" }; + phoneDict["PAUSES"] = new string[] { "pau" }; + phoneDict["BREAK"] = new string[] { "br" }; + var lines = File.ReadLines(path, encoding); + foreach (var line in lines) { + if (line.Contains('=')) { + var lineSplit = line.Split("="); + var key = lineSplit[0]; + var value = lineSplit[1]; + var phonemes = value.Trim(new char[] { '\"' }).Split(","); + phoneDict[key] = phonemes; + } + } + } + + public override void SetUp(Note[][] notes, UProject project, UTrack track) { + key = project.key; + resolution = project.resolution; + //将全曲拆分为句子 + var phrase = new List { notes[0] }; + for (var i = 1; i < notes.Length; ++i) { + //如果上下音符相互衔接,则不分句 + if (notes[i - 1][^1].position + notes[i - 1][^1].duration == notes[i][0].position) { + phrase.Add(notes[i]); + } else { + //如果断开了,则处理当前句子,并开启下一句 + ProcessPart(phrase.ToArray()); + phrase.Clear(); + phrase.Add(notes[i]); + } + } + if (phrase.Count > 0) { + ProcessPart(phrase.ToArray()); + } + } + + protected (string prefix, string suffix) GetPrefixAndSuffix(Note note) { + var prefix = string.Empty; + var suffix = string.Empty; + + var textList = note.lyric.Split().ToList(); + var splitFlag = true; + foreach (var text in textList) { + var existSymbol = g2p.IsValidSymbol(text); + if (existSymbol) { + splitFlag = false; + continue; + } else if (existSymbol && !splitFlag) { + splitFlag = true; + continue; + } + if (splitFlag) { + prefix += text; + } else { + suffix += text; + } + } + + return (prefix, suffix); + } + + protected abstract HTSNote CustomHTSNoteContext(HTSNote htsNote, Note note); + + //make a HTS Note from given symbols and UNotes + //TODO:Fix the processing for rests + protected HTSNote makeHtsNote(string[] symbols, IList group, int startTick) { + var htsNote = HTSContextBuilder.BuildNote( + symbols, + group[0].tone, + IsSyllableVowelExtensionNote(group[0]), + lang, + key, + timeAxis, + group[0].position, + group[^1].position + group[^1].duration, + startTick, + 0, + symbol => pauses.Contains(symbol) || silences.Contains(symbol) || breaks.Contains(symbol)); + return CustomHTSNoteContext(htsNote, group[0]) ?? htsNote; + } + + protected HTSNote makeHtsNote(string symbol, Note[] group, int startTick) { + return makeHtsNote(new string[] { symbol }, group, startTick); + } + + protected bool IsSyllableVowelExtensionNote(Note note) { + return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*"); + } + + private string[] ApplyExtensions(string[] symbols, Note[] notes) { + var newSymbols = new List(); + var vowelIds = ExtractVowels(symbols); + if (vowelIds.Count == 0) { + // no syllables or all consonants, the last phoneme will be interpreted as vowel + vowelIds.Add(symbols.Length - 1); + } + var lastVowelI = 0; + newSymbols.AddRange(symbols.Take(vowelIds[lastVowelI] + 1)); + for (var i = 1; i < notes.Length && lastVowelI + 1 < vowelIds.Count; i++) { + if (!IsSyllableVowelExtensionNote(notes[i])) { + var prevVowel = vowelIds[lastVowelI]; + lastVowelI++; + var vowel = vowelIds[lastVowelI]; + newSymbols.AddRange(symbols.Skip(prevVowel + 1).Take(vowel - prevVowel)); + } else { + newSymbols.Add(symbols[vowelIds[lastVowelI]]); + } + } + newSymbols.AddRange(symbols.Skip(vowelIds[lastVowelI] + 1)); + return newSymbols.ToArray(); + } + + private List ExtractVowels(string[] symbols) { + var vowelIds = new List(); + for (var i = 0; i < symbols.Length; i++) { + if (g2p.IsVowel(symbols[i])) { + vowelIds.Add(i); + } + } + return vowelIds; + } + + protected virtual Note[] HandleNotEnoughNotes(Note[] notes, List vowelIds) { + var newNotes = new List(); + newNotes.AddRange(notes.SkipLast(1)); + var lastNote = notes.Last(); + var position = lastNote.position; + var notesToSplit = vowelIds.Count - newNotes.Count; + var duration = lastNote.duration / notesToSplit / 15 * 15; + for (var i = 0; i < notesToSplit; i++) { + var durationFinal = i != notesToSplit - 1 ? duration : lastNote.duration - duration * (notesToSplit - 1); + newNotes.Add(new Note() { + position = position, + duration = durationFinal, + tone = lastNote.tone, + phonemeAttributes = lastNote.phonemeAttributes + }); + position += durationFinal; + } + + return newNotes.ToArray(); + } + + protected virtual Note[] HandleExcessNotes(Note[] notes, List vowelIds) { + var newNotes = new List(); + var SyllableCount = vowelIds.Count; + newNotes.AddRange(notes.Take(SyllableCount - 1)); + var lastNote = notes[SyllableCount - 1]; + newNotes.Add(new Note() { + lyric = lastNote.lyric, + phoneticHint = lastNote.phoneticHint, + position = lastNote.position, + duration = notes[(SyllableCount - 1)..].Select(note => note.duration).Sum(), + tone = lastNote.tone, + phonemeAttributes = lastNote.phonemeAttributes + }); + return newNotes.ToArray(); + } + + public string GetPhonemeType(string phoneme) { + if (phoneme == "xx") { + return "xx"; + } + if (vowels.Contains(phoneme)) { + return "v"; + } + if (pauses.Contains(phoneme)) { + return "p"; + } + if (silences.Contains(phoneme)) { + return "s"; + } + if (breaks.Contains(phoneme)) { + return "b"; + } + //if (unvoiced.Contains(phoneme)) { + // return "c"; + //} + return "c"; + } + + string[] GetSymbols(Note note) { + //priority: + //1. phonetic hint + //2. query from g2p dictionary + //3. treat lyric as phonetic hint, including single phoneme + //4. default pause + if (!string.IsNullOrEmpty(note.phoneticHint)) { + // Split space-separated symbols into an array. + return note.phoneticHint.Split() + .Where(s => g2p.IsValidSymbol(s)) // skip the invalid symbols. + .ToArray(); + } + // User has not provided hint, query g2p dictionary. + var g2presult = g2p.Query(note.lyric.ToLowerInvariant()); + if (g2presult != null) { + return g2presult; + } + //not founded in g2p dictionary, treat lyric as phonetic hint + var lyricSplited = note.lyric.Split() + .Where(s => g2p.IsValidSymbol(s)) // skip the invalid symbols. + .ToArray(); + if (lyricSplited.Length > 0) { + return lyricSplited; + } + return new string[] { "pau" }; + } + + private (string[], int[], Note[]) GetSymbolsAndVowels(Note[] notes) { + var mainNote = notes[0]; + var symbols = GetSymbols(mainNote); + if (symbols == null) { + return (null, null, null); + } + if (symbols.Length == 0) { + symbols = new string[] { "" }; + } + symbols = ApplyExtensions(symbols, notes); + var vowelIds = ExtractVowels(symbols); + if (vowelIds.Count == 0) { + // no syllables or all consonants, the last phoneme will be interpreted as vowel + vowelIds.Add(symbols.Length - 1); + } + if (notes.Length < vowelIds.Count) { + notes = HandleNotEnoughNotes(notes, vowelIds); + } else if (notes.Length > vowelIds.Count) { + notes = HandleExcessNotes(notes, vowelIds); + } + return (symbols, vowelIds.ToArray(), notes); + } + + protected struct Syllable { + public List symbols; + public List notes; + } + + protected virtual HTSNote[] MakeSyllables(Note[] inputNotes, int startTick) { + (var symbols, var vowelIds, var notes) = GetSymbolsAndVowels(inputNotes); + if (symbols == null || vowelIds == null || notes == null) { + return null; + } + var firstVowelId = vowelIds[0]; + if (notes.Length < vowelIds.Length) { + //error = $"Not enough extension notes, {vowelIds.Length - notes.Length} more expected"; + return null; + } + + var syllables = new Syllable[vowelIds.Length]; + + // Making the first syllable + + // there is only empty space before us + syllables[0] = new Syllable() { + symbols = symbols.Take(firstVowelId + 1).ToList(), + notes = notes[0..1].ToList() + }; + + // normal syllables after the first one + var noteI = 1; + var ccs = new List(); + var position = 0; + var lastSymbolI = firstVowelId + 1; + for (; lastSymbolI < symbols.Length; lastSymbolI++) { + if (!vowelIds.Contains(lastSymbolI)) { + ccs.Add(symbols[lastSymbolI]); + } else { + position += notes[noteI - 1].duration; + syllables[noteI] = new Syllable() { + symbols = ccs.Append(symbols[lastSymbolI]).ToList(), + notes = new List() { notes[noteI] } + }; + ccs = new List(); + noteI++; + } + } + syllables[^1].symbols.AddRange(ccs); + return syllables.Select(x => makeHtsNote(x.symbols.ToArray(), x.notes, startTick)).ToArray(); + } + + HTSPhoneme[] HTSNoteToPhonemes(HTSNote htsNote) { + var htsPhonemes = htsNote.symbols.Select(x => new HTSPhoneme(x, htsNote)).ToArray(); + // 音節内の音素に対して、タイプ(母音/子音/休符など)や位置情報を付与 + foreach (var i in Enumerable.Range(0, htsPhonemes.Length)) { + htsPhonemes[i].type = GetPhonemeType(htsPhonemes[i].symbol); + htsPhonemes[i].position = i + 1; + htsPhonemes[i].position_backward = htsPhonemes.Length - i; + } + foreach (var i in Enumerable.Range(0, htsPhonemes.Length)) { + if (htsPhonemes[i].type.Equals("c")) { + var prev = i - 1; + if (prev >= 0) { + if (htsPhonemes[prev].type.Equals("v")) { + htsPhonemes[i].prev_vowel_distance = 1; + } else if (htsPhonemes[prev].prev_vowel_distance > 0) { + htsPhonemes[i].prev_vowel_distance = htsPhonemes[prev].prev_vowel_distance + 1; + } else { + htsPhonemes[i].prev_vowel_distance = 0; + } + } + } + } + for (var i = htsPhonemes.Length - 1; i >= 0; --i) { + if (htsPhonemes[i].type.Equals("c")) { + var next = i + 1; + if (next < htsPhonemes.Length) { + if (htsPhonemes[next].type.Equals("v")) { + htsPhonemes[i].next_vowel_distance = 1; + } else if (htsPhonemes[next].next_vowel_distance > 0) { + htsPhonemes[i].next_vowel_distance = htsPhonemes[next].next_vowel_distance + 1; + } else { + htsPhonemes[i].next_vowel_distance = 0; + } + } + } + } + return htsPhonemes; + } + + protected abstract void SendScore(Note[][] phrase); + + ulong HashPhraseGroups(Note[][] phrase) { + using (var stream = new MemoryStream()) { + using (var writer = new BinaryWriter(stream)) { + writer.Write(phrase.ToString()); + foreach (var phone in phrase) { + writer.Write(phone[0].lyric); + if (phone[0].phoneticHint != null) { + writer.Write("[" + phone[0].phoneticHint + "]"); + } + var attr = phone[0].phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default; + writer.Write(attr.toneShift); + writer.Write(phone[0].position); + writer.Write(phone[0].duration); + } + return XXH64.DigestOf(stream.ToArray()); + } + } + } + + protected abstract Note[][] PhraseAdjustments(Note[][] phrese); + + protected abstract HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes, Note[] notes); + + + protected override void ProcessPart(Note[][] phrase) { + tmpPath = Path.Join(PathManager.Inst.CachePath, $"lab-{HashPhraseGroups(phrase):x16}"); + htstmpPath = tmpPath + "_htstemp"; + fullScorePath = Path.Join(htstmpPath, $"full_score.lab"); + fullTimingPath = Path.Join(htstmpPath, $"full_timing.lab"); + monoScorePath = Path.Join(htstmpPath, $"mono_score.lab"); + monoTimingPath = Path.Join(htstmpPath, $"mono_timing.lab"); + + phrase = PhraseAdjustments(phrase) ?? phrase; + + var startTick = phrase[0][0].position; + var endTick = phrase[^1][^1].position + phrase[^1][^1].duration; + + // パディングを小節長で設定(開始・終了ともに1小節) + var sigStart = timeAxis.TimeSignatureAtTick(startTick); + var bpmStart = timeAxis.GetBpmAtTick(startTick); + var barLenMsStart = (int)Math.Round(60000.0 / bpmStart * sigStart.beatPerBar); + var barLenTicksStart = timeAxis.MsPosToTickPos(barLenMsStart); + + var sigEnd = timeAxis.TimeSignatureAtTick(endTick); + var bpmEnd = timeAxis.GetBpmAtTick(endTick); + var barLenMsEnd = (int)Math.Round(60000.0 / bpmEnd * sigEnd.beatPerBar); + var barLenTicksEnd = timeAxis.MsPosToTickPos(barLenMsEnd); + + // 文全体の長さ(開始1小節 + 本体 + 終了1小節) + var sentenceDurMs = barLenMsStart + (int)timeAxis.MsBetweenTickPos(startTick, endTick) + barLenMsEnd; + var sentenceDurTicks = barLenTicksStart + (endTick - startTick) + barLenTicksEnd; + + var notePhIndex = new List { 1 }; // 先頭パディング分 + var phAlignPoints = new List>(); + + // 先頭パディング pau + timeAxis.TickPosToBarBeat(startTick - barLenTicksStart, out var barStart, out var beatStart, out var _); + var sigForPadStart = timeAxis.TimeSignatureAtTick(startTick - barLenTicksStart); + var PaddingNoteStart = new HTSNote( + symbols: new string[] { "pau" }, + beatPerBar: sigForPadStart.beatPerBar, + beatUnit: sigForPadStart.beatUnit, + positionBar: barStart, + positionBeat: beatStart, + key: key, + bpm: timeAxis.GetBpmAtTick(startTick - barLenTicksStart), + tone: 0, + isSlur: false, + isRest: true, + lang: string.Empty, + accent: string.Empty, + startms: 0, + endms: barLenMsStart, + positionTicks: startTick - barLenTicksStart, + durationTicks: barLenTicksStart + ); + var htsNotes = new List { PaddingNoteStart }; + var htsPhonemes = new List(); + htsPhonemes.AddRange(CustomHTSPhonemeContext(HTSNoteToPhonemes(PaddingNoteStart), phrase[0])); + + // 楽譜ノート → HTSノート + for (var n = 0; n < phrase.Length; ++n) { + var Syllables = MakeSyllables(phrase[n], startTick); + // 各ノートの start/end を「開始パディング加算」ベースに + foreach (var note in Syllables) { + note.startMs += barLenMsStart; + note.endMs += barLenMsStart; + } + htsNotes.AddRange(Syllables); + + for (var noteIndex = 0; noteIndex < Syllables.Length; noteIndex++) { + var htsNote = Syllables[noteIndex]; + var tmpPhonemes = HTSNoteToPhonemes(htsNote); + var notePhonemes = CustomHTSPhonemeContext(tmpPhonemes, phrase[n]) ?? tmpPhonemes; + + // 第1母音位置をアンカーに(絶対ms) + var firstVowelIndex = 0; + for (var phIndex = 0; phIndex < htsNote.symbols.Length; phIndex++) { + if (g2p.IsVowel(htsNote.symbols[phIndex])) { + firstVowelIndex = phIndex; + break; + } + } + phAlignPoints.Add(Tuple.Create( + htsPhonemes.Count + firstVowelIndex, + timeAxis.TickPosToMsPos(htsNote.positionTicks) + barLenMsStart + )); + htsPhonemes.AddRange(notePhonemes); + } + notePhIndex.Add(htsPhonemes.Count); + } + + // 終端パディング pau(位置は「本当の曲末」tick) + timeAxis.TickPosToBarBeat(endTick, out var barEnd, out var beatEnd, out var _); + var PaddingNoteEnd = new HTSNote( + symbols: new string[] { "pau" }, + beatPerBar: sigEnd.beatPerBar, + beatUnit: sigEnd.beatUnit, + positionBar: barEnd, + positionBeat: beatEnd, + key: key, + bpm: bpmEnd, + tone: 0, + isSlur: false, + isRest: true, + lang: string.Empty, + accent: string.Empty, + // 絶対msで末尾に配置 + startms: sentenceDurMs - barLenMsEnd, + endms: sentenceDurMs, + positionTicks: endTick, + durationTicks: barLenTicksEnd + ); + htsNotes.Add(PaddingNoteEnd); + htsPhonemes.AddRange(CustomHTSPhonemeContext(HTSNoteToPhonemes(PaddingNoteEnd), phrase[^1])); + + // 末尾アンカーは「曲末+終端パディング」位置 + var lastNote = htsNotes[^1]; + phAlignPoints.Add(Tuple.Create( + htsPhonemes.Count, + timeAxis.TickPosToMsPos(lastNote.positionTicks + lastNote.durationTicks) + barLenMsStart // = sentenceDurMs + )); + var htsPhrase = new HTSPhrase(htsNotes.ToArray()); + htsPhrase.UpdateResolution(resolution); + htsPhrase.totalNotes = htsNotes.Count - 2; + htsPhrase.totalPhonemes = htsPhonemes.Count - 3; + htsPhrase.totalPhrases = 1; + //make neighborhood links between htsNotes and between htsPhonemes + foreach (var i in Enumerable.Range(0, htsNotes.Count)) { + htsNotes[i].parent = htsPhrase; + htsNotes[i].index = i; + htsNotes[i].indexBackwards = htsNotes.Count - i - 1; + htsNotes[i].sentenceDurMs = sentenceDurMs; + htsNotes[i].sentenceDurTicks = sentenceDurTicks; + if (i > 0) { + htsNotes[i].prev = htsNotes[i - 1]; + htsNotes[i - 1].next = htsNotes[i]; + } + } + for (var i = 1; i < htsPhonemes.Count; ++i) { + htsPhonemes[i].prev = htsPhonemes[i - 1]; + htsPhonemes[i - 1].next = htsPhonemes[i]; + } + + try { + if (!Directory.Exists(htstmpPath)) { + Directory.CreateDirectory(htstmpPath); + } + File.WriteAllLines(fullScorePath, htsPhonemes.Select(x => x.dump())); + } catch (Exception e) { + Log.Error(e.ToString()); + throw; + } + + SendScore(phrase); + if (!File.Exists(monoTimingPath)) { + Log.Error($"File not found.:{monoTimingPath}"); + return; + } + + var hTSLabels = hts.load(monoTimingPath, Encoding.UTF8); + + // 100ns -> ms は 10000 で割る + var labPositions = + hTSLabels.Skip(1).SkipLast(1).Select(label => (label.end_time - label.start_time) / 10000.0).ToList(); + labPositions.Insert(0, labPositions[0]); + labPositions.Add(labPositions[^1]); + + var positions = HTSContextBuilder.AlignTimingPositions(labPositions, phAlignPoints); + + // 出力(略) + var phonemesRedirected = htsPhonemes.Select(x => x.symbol).ToArray(); + for (var groupIndex = 0; groupIndex < phrase.Length; groupIndex++) { + var group = phrase[groupIndex]; + if (group[0].lyric.StartsWith("+")) { + continue; + } + var notePos = timeAxis.TickPosToMsPos(group[0].position) + barLenMsStart; // ms + var noteResult = HTSContextBuilder.BuildAlignedNoteTimingResult( + phonemesRedirected, + notePhIndex[groupIndex], + notePhIndex[groupIndex + 1], + positions, + notePos, + timeAxis.TicksBetweenMsPos); + partResult[group[0].position] = noteResult; + } + } + + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + if (!partResult.TryGetValue(notes[0].position, out var phonemes)) { + throw new Exception("error"); + } + return new Result { + phonemes = phonemes + .Select((tu) => new Phoneme() { + phoneme = tu.Item1, + position = tu.Item2, + }) + .ToArray(), + }; + } + } +} diff --git a/OpenUtau.Core/Hts/HTSLabelRenderer.cs b/OpenUtau.Core/Hts/HTSLabelRenderer.cs new file mode 100644 index 000000000..986505de6 --- /dev/null +++ b/OpenUtau.Core/Hts/HTSLabelRenderer.cs @@ -0,0 +1,484 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using OpenUtau.Api; +using OpenUtau.Core.Render; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; +using Serilog; + +namespace OpenUtau.Core.Hts { + public abstract class HTSLabelRenderer : IRenderer { + + static readonly object lockObj = new object(); + + public virtual bool SupportsRenderPitch => true; + + public abstract USingerType SingerType { get; } + + public abstract bool SupportsExpression(UExpressionDescriptor descriptor); + + protected TimeAxis timeAxis; + + //information used by HTS writer + protected Dictionary phoneDict = new Dictionary(); + protected List vowels = new List(); + protected List consonants = new List(); + protected List breaks = new List(); + protected List pauses = new List(); + protected List silences = new List(); + protected List unvoiced = new List(); + protected List macronLyrics = new List(); + protected int startTick; + protected int endTick; + protected UTimeSignature sigStart; + protected double bpmStart; + protected double headMs; + protected int barLenTicksStart; + protected UTimeSignature sigEnd; + protected double bpmEnd; + protected double tailMs; + protected int barLenTicksEnd; + protected string lang = ""; + protected int key = 0; + protected int resolution = 480; + protected int framePeriod = 5; + + //information used by openutau phonemizer + protected IG2p g2p; + //result caching + private Dictionary>> partResult = new Dictionary>>(); + protected string tablePath = string.Empty; + protected string monoScorePath = string.Empty; + protected string fullScorePath = string.Empty; + protected string monoTimingPath = string.Empty; + protected string fullTimingPath = string.Empty; + + public virtual void SetUp() { + phoneDict.Clear(); + lang = "JPN";//TODO: use singer.language + // Lyrics often handled in OpenUtau + phoneDict.Add("R", new string[] { "pau" }); + phoneDict.Add("-", new string[] { "pau" }); + phoneDict.Add("SP", new string[] { "pau" }); + phoneDict.Add("AP", new string[] { "br" }); + g2p = LoadG2p(); + } + + protected virtual void LoadDict(string path, Encoding encoding) { + if (path.EndsWith(".conf")) { + LoadConf(path, encoding); + } else { + LoadTable(path, encoding); + } + } + + private void LoadTable(string path, Encoding encoding) { + var lines = File.ReadLines(path, encoding); + foreach (var line in lines) { + var lineSplit = line.Split(); + phoneDict[lineSplit[0]] = lineSplit[1..]; + } + } + + private void LoadConf(string path, Encoding encoding) { + phoneDict["SILENCES"] = new string[] { "sil" }; + phoneDict["PAUSES"] = new string[] { "pau" }; + phoneDict["BREAK"] = new string[] { "br" }; + var lines = File.ReadLines(path, encoding); + foreach (var line in lines) { + if (line.Contains('=')) { + var lineSplit = line.Split("="); + var key = lineSplit[0]; + var value = lineSplit[1]; + var phonemes = value.Trim(new char[] { '\"' }).Split(","); + phoneDict[key] = phonemes; + } + } + } + protected IG2p LoadG2p() { + var g2ps = new List(); + var builder = G2pDictionary.NewBuilder(); + vowels.AddRange(phoneDict["VOWELS"]); + breaks.AddRange(phoneDict["BREAK"]); + pauses.AddRange(phoneDict["PAUSES"]); + silences.AddRange(phoneDict["SILENCES"]); + consonants.AddRange(phoneDict["PHONEME_CL"]); + macronLyrics.AddRange(phoneDict["MACRON"]); + foreach (var dict in phoneDict.Values) { + foreach (var phoneme in dict) { + if (!consonants.Contains(phoneme) && !vowels.Contains(phoneme) && + !breaks.Contains(phoneme) && !pauses.Contains(phoneme) && + !silences.Contains(phoneme)) { + consonants.Add(phoneme); + } + if (!consonants.Contains(phoneme)) { + builder.AddSymbol(phoneme, true); + } else { + builder.AddSymbol(phoneme, false); + } + } + } + foreach (var entry in phoneDict.Keys) { + builder.AddEntry(entry, phoneDict[entry]); + foreach (var reduction in phoneDict["VOWEL_REDUCTION"]) { + var phonemes = phoneDict[entry].Except(vowels).ToList(); + if (phonemes.Count == 0) continue; + builder.AddEntry(entry + reduction, phonemes); + } + foreach (var macron in phoneDict["MACRON"]) { + var addPhonemes = phoneDict[entry].Where(x => vowels.Contains(x)).ToList(); + if (addPhonemes.Count == 0) continue; + var phonemes = phoneDict[entry].ToList(); + phonemes.AddRange(addPhonemes); + builder.AddEntry(entry + macron, phonemes); + macronLyrics.Add(entry + macron); + } + } + g2ps.Add(builder.Build()); + return new G2pFallbacks(g2ps.ToArray()); + } + + + + protected (string prefix, string suffix) GetPrefixAndSuffix(RenderNote note) { + string prefix = string.Empty; + string suffix = string.Empty; + + var textList = note.lyric.Split().ToList(); + bool splitFlag = true; + foreach (var text in textList) { + var existSymbol = g2p.IsValidSymbol(text); + if (existSymbol) { + splitFlag = false; + continue; + } else if (existSymbol && !splitFlag) { + splitFlag = true; + continue; + } + if (splitFlag) { + prefix += text; + } else { + suffix += text; + } + } + + return (prefix, suffix); + } + + private RenderPhone FindLastVowelOrLastPhoneme(RenderPhone[] phonemes) { + for (int i = phonemes.Length - 1; i >= 0; --i) { + if (g2p.IsVowel(phonemes[i].phoneme)) { + return phonemes[i]; + } + } + return phonemes[^1]; + } + + protected virtual HTSNote CustomHTSNoteContext(HTSNote htsNote, RenderNote note) { + var fixs = GetPrefixAndSuffix(note); + if (!htsNote.isRest && !htsNote.isSlur) { + htsNote.langDependent = "0"; // no macron + if (macronLyrics.Contains(note.lyric)) { + htsNote.langDependent = "1"; // macron + } + } + return htsNote; + } + + //make a HTS Note from given symbols and UNotes + private HTSNote makeHtsNote(string[] symbols, RenderNote note, int startTick, double leadingMs) { + var positiontick = startTick + note.position; + var endTick = positiontick + note.duration; + UTimeSignature sig = timeAxis.TimeSignatureAtTick(positiontick); + timeAxis.TickPosToBarBeat(positiontick, out int bar, out int beat, out int remainingTicks); + var isRest = symbols.Select(x => x.ToLowerInvariant()).Any(x => pauses.Contains(x) || silences.Contains(x) || breaks.Contains(x)); + var htsNote = new HTSNote( + symbols: symbols, + tone: note.tone, + isSlur: IsSyllableVowelExtensionNote(note), + isRest: isRest, + lang: isRest ? string.Empty : lang, + accent: string.Empty, + beatPerBar: sig.beatPerBar, + beatUnit: sig.beatUnit, + positionBar: bar, + positionBeat: beat, + key: key, + bpm: timeAxis.GetBpmAtTick(positiontick), + startms: timeAxis.MsBetweenTickPos(startTick, positiontick) + leadingMs, + endms: timeAxis.MsBetweenTickPos(startTick, endTick) + leadingMs, + positionTicks: positiontick, + durationTicks: note.duration + ); + return CustomHTSNoteContext(htsNote, note) ?? htsNote; + } + private HTSNote makeHtsNote(string symbol, RenderNote note, int startTick, double leadingMs) { + return makeHtsNote(new string[] { symbol }, note, startTick, leadingMs); + } + + protected virtual bool IsSyllableVowelExtensionNote(RenderNote note) { + return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*"); + } + + private string GetPhonemeType(string phoneme) { + if (phoneme == "xx") { + return "xx"; + } + if (vowels.Contains(phoneme)) { + return "v"; + } + if (pauses.Contains(phoneme)) { + return "p"; + } + if (silences.Contains(phoneme)) { + return "s"; + } + if (breaks.Contains(phoneme)) { + return "b"; + } + //if (unvoiced.Contains(phoneme)) { + // return "c"; + //} + return "c"; + } + + private HTSPhoneme[] HTSNoteToPhonemes(HTSNote htsNote) { + var htsPhonemes = htsNote.symbols.Select(x => new HTSPhoneme(x, htsNote)).ToArray(); + foreach (int i in Enumerable.Range(0, htsPhonemes.Length)) { + htsPhonemes[i].type = GetPhonemeType(htsPhonemes[i].symbol); + htsPhonemes[i].position = i + 1; + htsPhonemes[i].position_backward = htsPhonemes.Length - i; + if (htsPhonemes[i].type.Equals("c")) { + int prev = i - 1; + if (prev >= 0) { + if (htsPhonemes[prev].type.Equals("v")) { + htsPhonemes[i].prev_vowel_distance = 1; + } else { + htsPhonemes[i].prev_vowel_distance = htsPhonemes[prev].prev_vowel_distance + 1; + } + } + } + } + for (int i = htsPhonemes.Length - 1; i > 0; --i) { + if (htsPhonemes[i].type.Equals("c")) { + int next = i + 1; + if (next < htsPhonemes.Length) { + if (htsPhonemes[next].type.Equals("v")) { + htsPhonemes[i].next_vowel_distance = 1; + } else { + htsPhonemes[i].next_vowel_distance = htsPhonemes[next].next_vowel_distance + 1; + } + } + } + } + return htsPhonemes; + } + + protected abstract HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes, RenderNote notes); + + private struct monoLabel { + public string symbol; + public double startMs; + public double endMs; + public override string ToString() { + return $"{(long)Math.Round(startMs * 10000.0)} {(long)Math.Round(endMs * 10000.0)} {symbol}"; + } + } + + public void ProcessPart(RenderPhrase phrase) { + if (timeAxis == null) { + timeAxis = phrase.timeAxis; + } + + int startTick = phrase.position; + int endTick = phrase.position + phrase.duration; + + // 文全体の長さ(開始1小節 + 本体 + 終了1小節) + double sentenceDurMs = headMs + phrase.endMs - phrase.positionMs + tailMs; + int sentenceDurTicks = barLenTicksStart + (endTick - startTick) + barLenTicksEnd; + + // 先頭パディング pau + timeAxis.TickPosToBarBeat(startTick - barLenTicksStart, out int barStart, out int beatStart, out int _); + var sigForPadStart = timeAxis.TimeSignatureAtTick(startTick - barLenTicksStart); + + + List monoLabels_ = new List(); + double phonemeDuration = 0; + + HTSNote PaddingNoteStart = new HTSNote( + symbols: new string[] { "pau" }, + beatPerBar: sigForPadStart.beatPerBar, + beatUnit: sigForPadStart.beatUnit, + positionBar: barStart, + positionBeat: beatStart, + key: key, + bpm: timeAxis.GetBpmAtTick(startTick - barLenTicksStart), + tone: 0, + isSlur: false, + isRest: true, + lang: string.Empty, + accent: string.Empty, + startms: 0, + endms: headMs, + positionTicks: startTick - barLenTicksStart, + durationTicks: barLenTicksStart + ); + var htsNotes = new List { PaddingNoteStart }; + var htsPhonemes = new List(); + htsPhonemes.AddRange(HTSNoteToPhonemes(PaddingNoteStart)); + + monoLabels_.Add(new monoLabel() { + symbol = htsPhonemes[0].symbol, + startMs = phonemeDuration, + endMs = headMs + }); + phonemeDuration += headMs; + + //Alignment + var phonemesByNoteIndex = phrase.phones + .GroupBy(phone => phone.noteIndex) + .ToDictionary( + group => group.Key, + group => group.Select(phone => phone).ToArray()); + var lastBasePhonemes = Array.Empty(); + var tuples = new List>(); + for (int noteIndex = 0; noteIndex < phrase.notes.Length; noteIndex++) { + var note = phrase.notes[noteIndex]; + if (phonemesByNoteIndex.TryGetValue(noteIndex, out var phonemes)) { + foreach (var phone in phonemes) { + monoLabels_.Add(new monoLabel() { + symbol = phone.phoneme, + startMs = phonemeDuration, + endMs = phonemeDuration + phone.durationMs + }); + phonemeDuration += phone.durationMs; + } + + lastBasePhonemes = phonemes; + HTSNote htsNote = makeHtsNote(phonemes.Select(phone => phone.phoneme).ToArray(), note, startTick, headMs); + tuples.Add(Tuple.Create(htsNote, noteIndex)); + } else if (IsSyllableVowelExtensionNote(note)) { + // 拍点延長ノートは、直前の通常ノートの最後の母音を引き延ばす + var extensionPhoneme = FindLastVowelOrLastPhoneme(lastBasePhonemes); + if (!string.IsNullOrEmpty(extensionPhoneme.phoneme)) { + var extensionStartMs = note.positionMs - phrase.positionMs + headMs; + var extensionEndMs = note.endMs - phrase.positionMs + headMs; + + monoLabels_.Add(new monoLabel() { + symbol = extensionPhoneme.phoneme, + startMs = phonemeDuration, + endMs = phonemeDuration + note.durationMs + }); + phonemeDuration += note.durationMs; + + HTSNote htsNote = makeHtsNote(extensionPhoneme.phoneme, note, startTick, headMs); + tuples.Add(Tuple.Create(htsNote, noteIndex)); + } + } else { + continue; + } + } + for (int i = 0; i < tuples.Count; i++) { + var htsNote = tuples[i].Item1; + htsNotes.Add(htsNote); + htsNote.index = i; + htsNote.indexBackwards = htsNotes.Count - i; + htsNote.sentenceDurMs = sentenceDurMs; + htsNote.sentenceDurTicks = sentenceDurTicks; + var tmpPhonemes = HTSNoteToPhonemes(htsNote); + var notePhonemes = CustomHTSPhonemeContext(tmpPhonemes, phrase.notes[tuples[i].Item2]) ?? tmpPhonemes; + htsPhonemes.AddRange(notePhonemes); + } + // 終端パディング pau(位置は「本当の曲末」tick) + timeAxis.TickPosToBarBeat(endTick, out int barEnd, out int beatEnd, out int _); + HTSNote PaddingNoteEnd = new HTSNote( + symbols: new string[] { "pau" }, + beatPerBar: sigEnd.beatPerBar, + beatUnit: sigEnd.beatUnit, + positionBar: barEnd, + positionBeat: beatEnd, + key: key, + bpm: bpmEnd, + tone: 0, + isSlur: false, + isRest: true, + lang: string.Empty, + accent: string.Empty, + // 絶対msで末尾に配置 + startms: sentenceDurMs - tailMs, + endms: sentenceDurMs, + positionTicks: endTick, + durationTicks: barLenTicksEnd + ); + htsNotes.Add(PaddingNoteEnd); + htsPhonemes.AddRange(HTSNoteToPhonemes(PaddingNoteEnd)); + + monoLabels_.Add(new monoLabel() { + symbol = htsPhonemes[^1].symbol, + startMs = phonemeDuration, + endMs = sentenceDurMs + }); + + var htsPhrase = new HTSPhrase(htsNotes.ToArray()); + htsPhrase.UpdateResolution(resolution); + htsPhrase.totalNotes = htsNotes.Count - 1; + htsPhrase.totalPhonemes = htsPhonemes.Count - 1; + htsPhrase.totalPhrases = 1; + //make neighborhood links between htsNotes and between htsPhonemes + foreach (int i in Enumerable.Range(0, htsNotes.Count)) { + htsNotes[i].parent = htsPhrase; + if (i > 0) { + htsNotes[i].prev = htsNotes[i - 1]; + htsNotes[i - 1].next = htsNotes[i]; + } + } + for (int i = 1; i < htsPhonemes.Count; ++i) { + htsPhonemes[i].prev = htsPhonemes[i - 1]; + htsPhonemes[i - 1].next = htsPhonemes[i]; + } + + try { + File.WriteAllLines(fullScorePath, htsPhonemes.Select(x => x.dump())); + File.WriteAllLines(monoTimingPath, monoLabels_.Select(x => x.ToString())); + } catch (Exception e) { + Log.Error(e.ToString()); + throw e; + } + } + + public virtual RenderResult Layout(RenderPhrase phrase) { + if (timeAxis == null) { + timeAxis = phrase.timeAxis; + } + startTick = phrase.position; + endTick = phrase.position + phrase.duration; + + // パディングを小節長で設定(開始・終了ともに1小節) + sigStart = timeAxis.TimeSignatureAtTick(startTick); + bpmStart = timeAxis.GetBpmAtTick(startTick); + headMs = (int)Math.Round((60000.0 / bpmStart) * sigStart.beatPerBar); + + sigEnd = timeAxis.TimeSignatureAtTick(endTick); + bpmEnd = timeAxis.GetBpmAtTick(endTick); + tailMs = (int)Math.Round((60000.0 / bpmEnd) * sigEnd.beatPerBar); + return new RenderResult() { + leadingMs = headMs, + positionMs = phrase.positionMs, + estimatedLengthMs = headMs + phrase.durationMs + tailMs, + }; + } + + public abstract Task Render(RenderPhrase phrase, Progress progress, int trackNo, CancellationTokenSource cancellation, bool isPreRender); + + public abstract UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings); + + public abstract override string ToString(); + + public abstract RenderPitchResult LoadRenderedPitch(RenderPhrase phrase); + } +} diff --git a/OpenUtau.Core/Render/RenderPhrase.cs b/OpenUtau.Core/Render/RenderPhrase.cs index 048fd64f6..bf302843e 100644 --- a/OpenUtau.Core/Render/RenderPhrase.cs +++ b/OpenUtau.Core/Render/RenderPhrase.cs @@ -72,13 +72,13 @@ public class RenderPhone { public readonly bool direct; public readonly Vector2[] envelope; - // voicevox & enunu args + // voicevox & enunu & neutrino args public readonly int toneShift; public readonly UOto oto; public readonly ulong hash; - internal RenderPhone(UProject project, UTrack track, UVoicePart part, UNote note, UPhoneme phoneme, int phrasePosition) { + internal RenderPhone(UProject project, UTrack track, UVoicePart part, UNote note, UPhoneme phoneme, int phrasePosition, int noteIndex) { position = part.position + phoneme.position - phrasePosition; duration = phoneme.Duration; end = position + duration; @@ -90,6 +90,7 @@ internal RenderPhone(UProject project, UTrack track, UVoicePart part, UNote note this.phoneme = phoneme.phoneme; tone = note.tone; + this.noteIndex = noteIndex; tempos = project.timeAxis.TemposBetweenTicks(part.position + phoneme.position - leading, part.position + phoneme.End); UTempo[] noteTempos = project.timeAxis.TemposBetweenTicks(part.position + phoneme.position, part.position + phoneme.End); tempo = noteTempos.Length > 0 ? noteTempos[0].bpm : project.tempos[0].bpm; @@ -211,12 +212,10 @@ internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerab uNotes.Add(next); next = next.Next; } - if (uNotes.First().Prev != null && uNotes.First().Prev.End == uNotes.First().position) { - uNotes.Insert(0, uNotes.First().Prev); - } - if (uNotes.Last().Next != null && uNotes.Last().End == uNotes.Last().Next.position) { - uNotes.Add(uNotes.Last().Next); - } + + var noteIndexes = uNotes + .Select((note, index) => new { note, index }) + .ToDictionary(x => x.note, x => x.index); singer = track.Singer; renderer = track.RendererSettings.Renderer; @@ -231,7 +230,7 @@ internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerab .Select(n => new RenderNote(project, part, n, position)) .ToArray(); phones = phonemes - .Select(p => new RenderPhone(project, track, part, p.Parent, p, position)) + .Select(p => new RenderPhone(project, track, part, p.Parent, p, position, noteIndexes[p.Parent])) .ToArray(); leading = phones.First().leading; diff --git a/OpenUtau.Core/Util/HTS.cs b/OpenUtau.Core/Util/HTS.cs new file mode 100644 index 000000000..790f6bf65 --- /dev/null +++ b/OpenUtau.Core/Util/HTS.cs @@ -0,0 +1,770 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Core.Ustx; + +//This file implement utaupy.hts python library's function +//https://github.com/oatsu-gh/utaupy/hts.py + +//HTS labels use b instead of # +//In HTS labels, "xx" is a preserved keyword that means null +namespace OpenUtau.Core.Util { + public static class HTS { + public static readonly string[] KeysInOctave = { + "C", + "Db", + "D", + "Eb", + "E", + "F", + "Gb", + "G", + "Ab", + "A", + "Bb", + "B" , + }; + + public static readonly Dictionary NameInOctave = new Dictionary { + { "C", 0 }, { "C#", 1 }, { "Db", 1 }, + { "D", 2 }, { "D#", 3 }, { "Eb", 3 }, + { "E", 4 }, + { "F", 5 }, { "F#", 6 }, { "Gb", 6 }, + { "G", 7 }, { "G#", 8 }, { "Ab", 8 }, + { "A", 9 }, { "A#", 10 }, { "Bb", 10 }, + { "B", 11 }, + }; + + public static string GetToneName(int noteNum) { + return noteNum < 0 ? string.Empty : KeysInOctave[noteNum % 12] + (noteNum / 12 - 1).ToString(); + } + + public static string GetOctaveNum(int noteNum) { + NameInOctave.TryGetValue(KeysInOctave[noteNum % 12].ToString(), out int num); + return noteNum < 0 ? string.Empty : num.ToString(); + } + + //return -1 if error + public static int NameToTone(string name) { + if (name.Length < 2) { + return -1; + } + var str = name.Substring(0, (name[1] == '#' || name[1] == 'b') ? 2 : 1); + var num = name.Substring(str.Length); + if (!int.TryParse(num, out int octave)) { + return -1; + } + if (!NameInOctave.TryGetValue(str, out int inOctave)) { + return -1; + } + return 12 * (octave + 1) + inOctave; + } + + public static string WriteInt(int integer) { + return (integer >= 0 ? "p" : "m") + Math.Abs(integer).ToString(); + } + } + + public static class HTSContextBuilder { + public static bool HasPauseLikePhoneme(IEnumerable symbols, Func isPauseLike) { + return symbols.Any(symbol => isPauseLike(symbol.ToLowerInvariant())); + } + + public static HTSNote BuildNote( + string[] symbols, + int tone, + bool isSlur, + string lang, + int key, + TimeAxis timeAxis, + int noteStartTick, + int noteEndTick, + int phraseStartTick, + int startMsOffset, + Func isPauseLike) { + UTimeSignature sig = timeAxis.TimeSignatureAtTick(noteStartTick); + timeAxis.TickPosToBarBeat(noteStartTick, out int bar, out int beat, out int _); + var isRest = HasPauseLikePhoneme(symbols, isPauseLike); + return new HTSNote( + symbols: symbols, + tone: tone, + isSlur: isSlur, + isRest: isRest, + lang: isRest ? string.Empty : lang, + accent: string.Empty, + beatPerBar: sig.beatPerBar, + beatUnit: sig.beatUnit, + positionBar: bar, + positionBeat: beat, + key: key, + bpm: timeAxis.GetBpmAtTick(noteStartTick), + startms: (int)timeAxis.MsBetweenTickPos(phraseStartTick, noteStartTick) + startMsOffset, + endms: (int)timeAxis.MsBetweenTickPos(phraseStartTick, noteEndTick) + startMsOffset, + positionTicks: noteStartTick, + durationTicks: noteEndTick - noteStartTick); + } + + public static int FindFirstVowelIndex(IReadOnlyList symbols, Func isVowel) { + for (int i = 0; i < symbols.Count; i++) { + if (isVowel(symbols[i])) { + return i; + } + } + return 0; + } + + public static List AlignTimingPositions( + IReadOnlyList durationsMs, + IReadOnlyList> phAlignPoints) { + var positions = new List(); + if (durationsMs.Count == 0 || phAlignPoints.Count == 0) { + return positions; + } + var firstCount = Math.Max(0, phAlignPoints[0].Item1 - 1); + var initialGroup = durationsMs.Take(firstCount).ToList(); + positions.AddRange(Stretch(initialGroup, 1, phAlignPoints[0].Item2)); + foreach (var pair in phAlignPoints.Zip(phAlignPoints.Skip(1), Tuple.Create)) { + var currAlignPoint = pair.Item1; + var nextAlignPoint = pair.Item2; + var count = nextAlignPoint.Item1 - currAlignPoint.Item1; + if (count <= 0) { + continue; + } + var alignGroup = durationsMs.Skip(currAlignPoint.Item1).Take(count).ToList(); + if (alignGroup.Count == 0) { + continue; + } + var sum = alignGroup.Sum(); + var ratio = sum == 0 ? 0 : (nextAlignPoint.Item2 - currAlignPoint.Item2) / sum; + positions.AddRange(Stretch(alignGroup, ratio, nextAlignPoint.Item2)); + } + return positions; + } + + public static List> BuildAlignedNoteTimingResult( + IReadOnlyList phonemes, + int startIndex, + int endIndex, + IReadOnlyList positionsMs, + double notePosMs, + Func ticksBetweenMsPos) { + var noteResult = new List>(); + for (int phIndex = startIndex; phIndex < endIndex; ++phIndex) { + if (phIndex < 0 || phIndex >= phonemes.Count) { + continue; + } + var phoneme = phonemes[phIndex]; + if (string.IsNullOrEmpty(phoneme)) { + continue; + } + var positionIndex = phIndex - 1; + if (positionIndex < 0 || positionIndex >= positionsMs.Count) { + continue; + } + noteResult.Add(Tuple.Create( + phoneme, + ticksBetweenMsPos(notePosMs, positionsMs[positionIndex]))); + } + return noteResult; + } + + public static List Stretch(IList source, double ratio, double endPos) { + double startPos = endPos - source.Sum() * ratio; + var result = CumulativeSum(source.Select(x => x * ratio).Prepend(0), startPos).ToList(); + result.RemoveAt(result.Count - 1); + return result; + } + + public static IEnumerable CumulativeSum(IEnumerable sequence, double start = 0) { + double sum = start; + foreach (var item in sequence) { + sum += item; + yield return sum; + } + } + } + + public class HTSPhoneme { + public string symbol; + public string flag1 = "xx"; + public string flag2 = "xx"; + + //Links to this phoneme's neighbors and parent + public HTSPhoneme? prev; + public HTSPhoneme? next; + public HTSNote parent; + + //informations about this phoneme + //v:vowel, c:consonant, p:pause, s:silence, b:break + public string type = "xx"; + //(number of phonemes before this phoneme in this note) + 1 + public int position = 1; + //(number of phonemes after this phoneme in this note) + 1 + public int position_backward = 1; + //Here -1 means null + //distances to vowels in this note, -1 for vowels themselves + public int prev_vowel_distance = 0; + public int next_vowel_distance = 0; + + public HTSPhoneme(string phoneme, HTSNote note) { + this.symbol = phoneme; + this.parent = note; + } + + public HTSPhoneme? beforePrev { + get { + if (prev == null) { return null; } else { return prev.prev; } + } + } + + public HTSPhoneme? afterNext { + get { + if (next == null) { return null; } else { return next.next; } + } + } + + public string dump() { + //Write phoneme as an HTS line + // 100ns単位出力時にintオーバーフローを避けるためlongへ + string result = + $"{(long)Math.Round(parent.startMs * 10000.0)} {(long)Math.Round(parent.endMs * 10000.0)} " + //Phoneme informations + + string.Format("{0}@{1}^{2}-{3}+{4}={5}_{6}%{7}^{8}_{9}~{10}-{11}!{12}[{13}${14}]{15}", p()) + //Syllable informations + + string.Format("/A:{0}-{1}-{2}@{3}~{4}", a()) + + string.Format("/B:{0}_{1}_{2}@{3}|{4}", b()) + + string.Format("/C:{0}+{1}+{2}@{3}&{4}", c()) + //Note informations + + string.Format("/D:{0}!{1}#{2}${3}%{4}|{5}&{6};{7}-{8}", d()) + + string.Format( + "/E:{0}]{1}^{2}={3}~{4}!{5}@{6}#{7}+{8}]{9}${10}|{11}[{12}&{13}]{14}={15}^{16}~{17}#{18}_{19};{20}${21}&{22}%{23}[{24}|{25}]{26}-{27}^{28}+{29}~{30}={31}@{32}${33}!{34}%{35}#{36}|{37}|{38}-{39}&{40}&{41}+{42}[{43};{44}]{45};{46}~{47}~{48}^{49}^{50}@{51}[{52}#{53}={54}!{55}~{56}+{57}!{58}^{59}", + e()) + + string.Format("/F:{0}#{1}#{2}-{3}${4}${5}+{6}%{7};{8}", f()) + + string.Format("/G:{0}_{1}", g()) + + string.Format("/H:{0}_{1}", h()) + + string.Format("/I:{0}_{1}", i()) + + string.Format("/J:{0}~{1}@{2}", j()) + ; + return result; + } + + public string[] p() { + var result = Enumerable.Repeat("xx", 16).ToArray(); + result[0] = type; + result[1] = (beforePrev == null) ? "xx" : beforePrev.symbol; + result[2] = (prev == null) ? "xx" : prev.symbol; + result[3] = symbol; + result[4] = (next == null) ? "xx" : next.symbol; + result[5] = (afterNext == null) ? "xx" : afterNext.symbol; + result[6] = (beforePrev == null) ? "xx" : beforePrev.flag1; + result[7] = (prev == null) ? "xx" : prev.flag1; + result[8] = flag1; + result[9] = (next == null) ? "xx" : next.flag1; + result[10] = (afterNext == null) ? "xx" : afterNext.flag1; + result[11] = position.ToString(); + result[12] = position_backward.ToString(); + result[13] = prev_vowel_distance == 0 ? "xx" : prev_vowel_distance.ToString(); + result[14] = next_vowel_distance == 0 ? "xx" : next_vowel_distance.ToString(); + result[15] = flag2; + + return result; + } + + public string[] a() { + return parent.a(); + } + + public string[] b() { + return parent.b(); + } + + public string[] c() { + return parent.c(); + } + + public string[] d() { + return parent.d(); + } + + public string[] e() { + return parent.e(); + } + + public string[] f() { + return parent.f(); + } + + public string[] g() { + return parent.g(); + } + + public string[] h() { + return parent.h(); + } + + public string[] i() { + return parent.i(); + } + + public string[] j() { + return parent.j(); + } + } + + // TODO: Keep HTS note-context generation centralized here. + // Remaining E-context slots that stay "xx" today should only be filled after + // their HTS/NEUTRINO semantics are confirmed against the target implementation. + public class HTSNote { + public double startMs = 0; + public double endMs = 0; + public int positionTicks; + public int durationTicks = 0; + public int index = 0;//index of this note in sentence + public int indexBackwards = 0; + public double sentenceDurMs = 0; + public int sentenceDurTicks = 0; + public double startMsPercent = 0; + + //TimeSignatures + public int beatPerBar = 0; + public int beatUnit = 0; + + public int positionBar = 1; //bar number in the sentence, starting from 1 + public int positionBeat = 1; //unit number in the bar, starting from 1 + + public double key = 0; + public double bpm = 0; + public int tone = 0; + public bool isSlur = false; + public bool isRest = true; + public string[] symbols; + public string lang = string.Empty; + public string langDependent = "xx"; + public string accent = string.Empty; + + public HTSNote? prev; + public HTSNote? next; + public HTSPhrase parent; + + public HTSNote(string[] symbols, int beatPerBar, int beatUnit, int positionBar, int positionBeat, int key, double bpm, int tone, bool isSlur, bool isRest, string lang, string accent, double startms, double endms, int positionTicks, int durationTicks) { + this.startMs = startms; + this.endMs = endms; + this.beatPerBar = beatPerBar; + this.beatUnit = beatUnit; + this.positionBar = positionBar; + this.positionBeat = positionBeat; + this.key = key; + this.bpm = bpm; + this.tone = tone; + this.isSlur = isSlur; + this.isRest = isRest; + this.lang = lang; + this.accent = accent; + this.symbols = symbols; + this.positionTicks = positionTicks; + this.durationTicks = durationTicks; + } + + public double durationMs { + get { return endMs - startMs; } + } + + private double startMsBackwards { + get { return sentenceDurMs - startMs; } + } + + private int positionTickBackwards { + get { return sentenceDurTicks - positionTicks; } + } + + + public int? measureIndexForward; + public double? measureMsForward; + public int? measureTickForward; + public int? measurePercentForward; + public int? measureIndexBackward; + public double? measureMsBackward; + public int? measureTickBackward; + public int? measurePercentBackward; + + public int? accentIndexForward; + public double? accentMsForward; + public int? accentTickForward; + public int? accentIndexBackward; + public double? accentMsBackward; + public int? accentTickBackward; + + public string[] a() { + if (prev == null) { + return Enumerable.Repeat("xx", 5).ToArray(); + } else if (prev.isRest) { + return Enumerable.Repeat("xx", 5).ToArray(); + } else { + return prev.b(); + } + } + + public string[] b() { + return new string[] { + symbols.Length.ToString(), + "1", + "1", + lang != string.Empty ? lang : "xx", + langDependent, + }; + } + + public string[] c() { + if (next == null) { + return Enumerable.Repeat("xx", 5).ToArray(); + } else if (next.isRest) { + return Enumerable.Repeat("xx", 5).ToArray(); + } else { + return next.b(); + } + } + + public string[] d() { + if (prev == null) { + return Enumerable.Repeat("xx", 60).ToArray(); + } else if (prev.isRest) { + return Enumerable.Repeat("xx", 60).ToArray(); + } else { + return prev.e(); + } + } + + public string[] e() { + var result = Enumerable.Repeat("xx", 60).ToArray(); + result[0] = isRest ? "xx" : HTS.GetToneName(tone); + result[1] = isRest ? "xx" : HTS.GetOctaveNum(tone); + result[2] = ((int)Math.Round(key)).ToString(); + result[3] = $"{beatPerBar}/{beatUnit}"; + result[4] = ((int)Math.Round(bpm)).ToString(); + result[5] = "1"; + + int lengthCs = Math.Max(0, (int)Math.Round(durationMs / 10.0)); + int ticksPer96th = (parent != null && parent.resolution > 0) ? parent.resolution / 24 : 0; + int length96 = (ticksPer96th > 0) ? (int)Math.Round((double)durationTicks / ticksPer96th) : 0; + result[6] = lengthCs.ToString(); + result[7] = length96.ToString(); + + result[9] = measureIndexForward != null ? measureIndexForward.ToString() : "xx"; // e10 + result[10] = measureIndexBackward != null ? measureIndexBackward.ToString() : "xx"; // e11 + result[11] = measureMsForward != null ? ((int)Math.Round(measureMsForward.Value)).ToString() : "xx"; // e12 (centisecond already) + result[12] = measureMsBackward != null ? ((int)Math.Round(measureMsBackward.Value)).ToString() : "xx"; // e13 + result[13] = measureTickForward != null ? measureTickForward.ToString() : "xx"; // e14 (96th already) + result[14] = measureTickBackward != null ? measureTickBackward.ToString() : "xx"; // e15 + result[15] = measurePercentForward != null ? measurePercentForward.ToString() : "xx"; // e16 + result[16] = measurePercentBackward != null ? measurePercentBackward.ToString() : "xx"; // e17 + + if (!isRest) { + result[17] = index <= 0 ? "xx" : index.ToString(); + result[18] = indexBackwards <= 0 ? "xx" : indexBackwards.ToString(); + result[19] = ((int)Math.Round(startMs / 10)).ToString(); // 10ms単位 + result[20] = ((int)Math.Round(startMsBackwards / 10)).ToString(); + + // e22/e23: phrase-level position by 96th note, resolution independent + if (ticksPer96th > 0 && parent != null && parent.notes != null && index > 0) { + int firstPhraseTick = parent.notes + .Select(note => note.positionTicks) + .DefaultIfEmpty(positionTicks) + .Min(); + int lastPhraseTick = parent.notes + .Select(note => note.positionTicks) + .DefaultIfEmpty(positionTicks) + .Max(); + int forwardTicks = Math.Max(0, positionTicks - firstPhraseTick); + int backwardTicks = Math.Max(0, lastPhraseTick - positionTicks); + result[21] = ((forwardTicks + ticksPer96th / 2) / ticksPer96th).ToString(); + result[22] = ((backwardTicks + ticksPer96th / 2) / ticksPer96th).ToString(); + } else { + result[21] = "xx"; + result[22] = "xx"; + } + + int totalNotes = parent?.totalNotes ?? 0; + if (totalNotes > 1) { + result[23] = ((index - 1) * 100 / (totalNotes - 1)).ToString(); + result[24] = ((indexBackwards - 1) * 100 / (totalNotes - 1)).ToString(); + } else { + result[23] = "xx"; + result[24] = "xx"; + } + + } + + if (prev != null) { + result[25] = prev.isSlur && isSlur ? "1" : "0"; + } else { + result[25] = "0"; + } + if (next != null) { + result[26] = next.isSlur && isSlur ? "1" : "0"; + } else { + result[26] = "0"; + } + result[27] = "n"; + result[28] = accentIndexBackward.HasValue ? accentIndexBackward.Value.ToString() : "xx"; + result[29] = accentIndexForward.HasValue ? accentIndexForward.Value.ToString() : "xx"; + result[30] = accentMsBackward.HasValue ? ((int)Math.Round(accentMsBackward.Value / 10.0)).ToString() : "xx"; + result[31] = accentMsForward.HasValue ? ((int)Math.Round(accentMsForward.Value / 10.0)).ToString() : "xx"; + result[32] = (accentTickBackward.HasValue && ticksPer96th > 0) ? ((int)Math.Round((double)accentTickBackward.Value / ticksPer96th)).ToString() : "xx"; + result[33] = (accentTickForward.HasValue && ticksPer96th > 0) ? ((int)Math.Round((double)accentTickForward.Value / ticksPer96th)).ToString() : "xx"; + + // TODO: e34-e56 remain intentionally "xx" until OpenUtau adopts a + // verified mapping for staccato / crescendo / decrescendo related + // score-label contexts. Keep current behavior visible instead of + // guessing values from timing-label-only information. + + if (!isRest && this.tone > 0) { + result[56] = (prev == null || prev.isRest || prev.tone <= 0) ? "xx" : HTS.WriteInt(prev.tone - tone); + result[57] = (next == null || next.isRest || next.tone <= 0) ? "xx" : HTS.WriteInt(next.tone - tone); + } else { + result[56] = "xx"; + result[57] = "xx"; + } + return result; + } + + public string[] f() { + if (next == null) { + return Enumerable.Repeat("xx", 60).ToArray(); + } else if (next.isRest) { + return Enumerable.Repeat("xx", 60).ToArray(); + } else { + return next.e(); + } + } + + public string[] g() { + //TODO Calculate using HTSPhrase + if (prev != null) { + if (isRest) { + return prev.h(); + } + } + return parent.g(); + } + + public string[] h() { + // TODO Calculate using HTSPhrase + if (isRest) { + return Enumerable.Repeat("xx", 2).ToArray(); + } + return parent.h(); + } + + public string[] i() { + //TODO Calculate using HTSPhrase + if (next != null) { + if (isRest) { + return next.h(); + } + } + return parent.i(); + } + + public string[] j() { + return parent.j(); + } + } + + public class HTSPhrase { + public int resolution = 480; + public int totalPhrases; + public int totalNotes; + public int totalPhonemes; + + public HTSPhrase? prev; + public HTSPhrase? next; + public HTSNote[] notes; + + public HTSPhrase(HTSNote[] notes) { + this.notes = notes; + RecalculateDerivedContexts(); + } + + public void UpdateResolution(int resolution) { + this.resolution = resolution; + RecalculateDerivedContexts(); + } + + void RecalculateDerivedContexts() { + foreach (var note in notes) { + note.accentIndexForward = null; + note.accentMsForward = null; + note.accentTickForward = null; + note.accentIndexBackward = null; + note.accentMsBackward = null; + note.accentTickBackward = null; + note.measureIndexForward = null; + note.measureMsForward = null; + note.measureTickForward = null; + note.measurePercentForward = null; + note.measureIndexBackward = null; + note.measureMsBackward = null; + note.measureTickBackward = null; + note.measurePercentBackward = null; + } + + // アクセント(forward) + int accentIndexForwardSum = 0; + double accentMsForwardSum = 0; + int accentTickForwardSum = 0; + for (int i = 0; i < notes.Length; i++) { + var note = notes[i]; + if (note.isRest) { + accentIndexForwardSum = 0; + accentMsForwardSum = 0; + accentTickForwardSum = 0; + } else if (!string.IsNullOrEmpty(note.accent)) { + note.accentIndexForward = 0; + note.accentMsForward = 0; + note.accentTickForward = 0; + + accentIndexForwardSum = 1; + accentMsForwardSum = note.durationMs; + accentTickForwardSum = note.durationTicks; + } else { + if (accentIndexForwardSum != 0) { + note.accentIndexForward = accentIndexForwardSum; + accentIndexForwardSum += 1; + } + if (accentMsForwardSum != 0) { + note.accentMsForward = accentMsForwardSum; + accentMsForwardSum += note.durationMs; + } + if (accentTickForwardSum != 0) { + note.accentTickForward = accentTickForwardSum; + accentTickForwardSum += note.durationTicks; + } + } + } + + // アクセント(backward) + int accentIndexBackwardSum = 0; + double accentMsBackwardSum = 0; + int accentTickBackwardSum = 0; + int lastAccentIndexContribution = 0; + double lastAccentMs = 0; + int lastAccentTicks = 0; + for (int i = notes.Length - 1; i >= 0; i--) { + var note = notes[i]; + if (note.isRest) { + accentIndexBackwardSum = 0; + accentMsBackwardSum = 0; + accentTickBackwardSum = 0; + lastAccentIndexContribution = 0; + lastAccentMs = 0; + lastAccentTicks = 0; + } else if (!string.IsNullOrEmpty(note.accent)) { + note.accentIndexBackward = Math.Max(0, accentIndexBackwardSum - lastAccentIndexContribution); + note.accentMsBackward = Math.Max(0, accentMsBackwardSum - lastAccentMs); + note.accentTickBackward = Math.Max(0, accentTickBackwardSum - lastAccentTicks); + + lastAccentIndexContribution = 1; + lastAccentMs = note.durationMs; + lastAccentTicks = note.durationTicks; + + accentIndexBackwardSum = 1; + accentMsBackwardSum = note.durationMs; + accentTickBackwardSum = note.durationTicks; + } else { + if (accentIndexBackwardSum != 0) { + note.accentIndexBackward = accentIndexBackwardSum; + accentIndexBackwardSum += 1; + } + if (accentMsBackwardSum != 0) { + note.accentMsBackward = accentMsBackwardSum; + accentMsBackwardSum += note.durationMs; + } + if (accentTickBackwardSum != 0) { + note.accentTickBackward = accentTickBackwardSum; + accentTickBackwardSum += note.durationTicks; + } + + } + } + + // 小節ごとのグルーピング(positionBar 基準) + var groups = notes + .GroupBy(n => n.positionBar) + .OrderBy(g => g.Key) + .Select(g => g.OrderBy(n => n.positionTicks).ToList()) + .ToList(); + + int ticksPer96th = (resolution > 0) ? (resolution / 24) : 0; + + foreach (var group in groups) { + double totalDurationMs = group.Sum(n => n.durationMs); + int totalDurationTicks = group.Sum(n => n.durationTicks); + int totalNotesInMeasure = group.Count; + // forward(小節先頭からの位置) + double accMsF = 0; + int accTicksF = 0; + for (var noteIndex = 0; noteIndex < group.Count; noteIndex++) { + var note = group[noteIndex]; + note.measureIndexForward = noteIndex + 1; + note.measureMsForward = (int)Math.Round(accMsF / 100.0); + note.measureTickForward = ticksPer96th > 0 ? (int)Math.Round((double)accTicksF / ticksPer96th) : 0; + note.measurePercentForward = totalNotesInMeasure > 1 ? (noteIndex * 100) / (totalNotesInMeasure - 1) : 0; + + accMsF += note.durationMs; + accTicksF += note.durationTicks; + } + + // backward + double accMsB = 0; + int accTicksB = 0; + for (int noteIndex = group.Count - 1; noteIndex >= 0; --noteIndex) { + var note = group[noteIndex]; + int backwardIndex = group.Count - noteIndex; + note.measureIndexBackward = backwardIndex; + note.measureMsBackward = (int)Math.Round(accMsB / 100.0); + note.measureTickBackward = ticksPer96th > 0 ? (int)Math.Round((double)accTicksB / ticksPer96th) : 0; + note.measurePercentBackward = totalNotesInMeasure > 1 ? ((backwardIndex - 1) * 100) / (totalNotesInMeasure - 1) : 0; + + accMsB += note.durationMs; + accTicksB += note.durationTicks; + } + } + } + private int barCount { + get { return notes[^1].positionBar - notes[0].positionBar + 1; } + } + + public string[] g() { + var result = Enumerable.Repeat("xx", 2).ToArray(); + if (prev == null) { + return result; + } else { + return prev.h(); + } + } + + public string[] h() { + var result = Enumerable.Repeat("xx", 2).ToArray(); + result[0] = notes.Length.ToString(); + result[1] = notes.Select(note => note.symbols.Length).Sum().ToString(); + return result; + } + + public string[] i() { + var result = Enumerable.Repeat("xx", 2).ToArray(); + if (next == null) { + return result; + } else { + return next.h(); + } + } + + public string[] j() { + var result = Enumerable.Repeat("xx", 3).ToArray(); + result[0] = (barCount > 0 ? (totalNotes / barCount).ToString() : "xx"); + result[1] = (barCount > 0 ? (totalPhonemes / barCount).ToString() : "xx"); + result[2] = totalPhrases.ToString(); + return result; + } + } +} diff --git a/OpenUtau.Plugin.Builtin/EnunuOnnx/HTSLabelFile.cs b/OpenUtau.Core/Util/HTSLabelFile.cs similarity index 99% rename from OpenUtau.Plugin.Builtin/EnunuOnnx/HTSLabelFile.cs rename to OpenUtau.Core/Util/HTSLabelFile.cs index 87fd0028a..944c4f844 100644 --- a/OpenUtau.Plugin.Builtin/EnunuOnnx/HTSLabelFile.cs +++ b/OpenUtau.Core/Util/HTSLabelFile.cs @@ -5,10 +5,10 @@ using System.Collections; using System.IO; using System.Text.RegularExpressions; -using OpenUtau.Plugin.Builtin.EnunuOnnx.nnmnkwii.python; +using OpenUtau.Core.Util.nnmnkwii.python; //reference: https://github.com/r9y9/nnmnkwii/blob/master/nnmnkwii/io/hts.py -namespace OpenUtau.Plugin.Builtin.EnunuOnnx.nnmnkwii.io.hts { +namespace OpenUtau.Core.Util.nnmnkwii.io.hts { public class HTSLabel { public int start_time = 0; public int end_time = 0; diff --git a/OpenUtau.Plugin.Builtin/EnunuOnnx/Merlin.cs b/OpenUtau.Core/Util/Merlin.cs similarity index 98% rename from OpenUtau.Plugin.Builtin/EnunuOnnx/Merlin.cs rename to OpenUtau.Core/Util/Merlin.cs index cfb874872..59da9dc0d 100644 --- a/OpenUtau.Plugin.Builtin/EnunuOnnx/Merlin.cs +++ b/OpenUtau.Core/Util/Merlin.cs @@ -2,10 +2,11 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using System.Linq; -using OpenUtau.Plugin.Builtin.EnunuOnnx.nnmnkwii.io.hts; +using OpenUtau.Core.Util; +using OpenUtau.Core.Util.nnmnkwii.io.hts; //reference: https://github.com/r9y9/nnmnkwii/blob/master/nnmnkwii/frontend/merlin.py -namespace OpenUtau.Plugin.Builtin.EnunuOnnx.nnmnkwii.frontend { +namespace OpenUtau.Core.Util.nnmnkwii.frontend { public class merlin { //TODO:Should subphone_features be an enum? static Dictionary frame_feature_size_dict = new Dictionary diff --git a/OpenUtau.Plugin.Builtin/EnunuOnnx/Python.cs b/OpenUtau.Core/Util/Python.cs similarity index 94% rename from OpenUtau.Plugin.Builtin/EnunuOnnx/Python.cs rename to OpenUtau.Core/Util/Python.cs index 9a27970d1..a8306a96d 100644 --- a/OpenUtau.Plugin.Builtin/EnunuOnnx/Python.cs +++ b/OpenUtau.Core/Util/Python.cs @@ -1,7 +1,7 @@ using System; using System.Text.RegularExpressions; -namespace OpenUtau.Plugin.Builtin.EnunuOnnx.nnmnkwii.python { +namespace OpenUtau.Core.Util.nnmnkwii.python { public class AssertionError : Exception { public AssertionError() : base() { } diff --git a/OpenUtau.Plugin.Builtin/EnunuOnnx/Scaler.cs b/OpenUtau.Core/Util/Scaler.cs similarity index 97% rename from OpenUtau.Plugin.Builtin/EnunuOnnx/Scaler.cs rename to OpenUtau.Core/Util/Scaler.cs index 6201fcfe5..39a5b303f 100644 --- a/OpenUtau.Plugin.Builtin/EnunuOnnx/Scaler.cs +++ b/OpenUtau.Core/Util/Scaler.cs @@ -4,7 +4,7 @@ using System.Text; using Newtonsoft.Json; -namespace OpenUtau.Plugin.Builtin.EnunuOnnx { +namespace OpenUtau.Core.Util { public class ScalerLine { public float xmin; public float scale; diff --git a/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs index 8c3b74a07..d9c664bcf 100644 --- a/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs @@ -9,9 +9,10 @@ using OpenUtau.Api; using OpenUtau.Core; using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; +using OpenUtau.Core.Util.nnmnkwii.frontend; +using OpenUtau.Core.Util.nnmnkwii.io.hts; using OpenUtau.Plugin.Builtin.EnunuOnnx; -using OpenUtau.Plugin.Builtin.EnunuOnnx.nnmnkwii.frontend; -using OpenUtau.Plugin.Builtin.EnunuOnnx.nnmnkwii.io.hts; using Serilog; //This phonemizer is a pure C# implemention of the ENUNU phonemizer, @@ -280,9 +281,21 @@ string[] GetSymbols(Note note) { //make a HTS Note from given symbols and UNotes protected HTSNote makeHtsNote(string[] symbols, IList group, int startTick) { + UTimeSignature sig = timeAxis.TimeSignatureAtTick(group[0].position); + timeAxis.TickPosToBarBeat(group[0].position, out int bar, out int beat, out int remainingTicks); return new HTSNote( symbols: symbols, tone: group[0].tone, + isSlur: IsSyllableVowelExtensionNote(group[0]), + isRest: symbols.Select(x => x.ToLowerInvariant()).Any(x => pauses.Contains(x) || silences.Contains(x) || breaks.Contains(x)), + beatPerBar: sig.beatPerBar, + beatUnit: sig.beatUnit, + positionBar: bar, + positionBeat: beat, + key: 0, + lang: string.Empty, + accent: string.Empty, + bpm: timeAxis.GetBpmAtTick(group[0].position), startms: (int)timeAxis.MsBetweenTickPos(startTick, group[0].position) + paddingMs, endms: (int)timeAxis.MsBetweenTickPos(startTick, group[^1].position + group[^1].duration) + paddingMs, positionTicks: group[0].position, @@ -439,26 +452,32 @@ protected virtual HTSNote[] MakeSyllables(Note[] inputNotes, int startTick) { HTSPhoneme[] HTSNoteToPhonemes(HTSNote htsNote) { var htsPhonemes = htsNote.symbols.Select(x => new HTSPhoneme(x, htsNote)).ToArray(); - int prevVowelPos = -1; foreach (int i in Enumerable.Range(0, htsPhonemes.Length)) { + htsPhonemes[i].type = GetPhonemeType(htsPhonemes[i].symbol); htsPhonemes[i].position = i + 1; htsPhonemes[i].position_backward = htsPhonemes.Length - i; - htsPhonemes[i].type = GetPhonemeType(htsPhonemes[i].symbol); - if (htsPhonemes[i].type == "v") { - prevVowelPos = i; - } else { - if (prevVowelPos > 0) { - htsPhonemes[i].distance_from_previous_vowel = i - prevVowelPos; + } + foreach (int i in Enumerable.Range(0, htsPhonemes.Length)) { + if (htsPhonemes[i].type.Equals("c")) { + int next = i + 1; + if (next < htsPhonemes.Length) { + if (htsPhonemes[next].type.Equals("v")) { + htsPhonemes[i].next_vowel_distance = 1; + } else { + htsPhonemes[i].next_vowel_distance = htsPhonemes[next].next_vowel_distance + 1; + } } } } - int nextVowelPos = -1; for (int i = htsPhonemes.Length - 1; i > 0; --i) { - if (htsPhonemes[i].type == "v") { - nextVowelPos = i; - } else { - if (nextVowelPos > 0) { - htsPhonemes[i].distance_to_next_vowel = nextVowelPos - i; + if (htsPhonemes[i].type.Equals("c")) { + int prev = i - 1; + if (prev >= 0) { + if (htsPhonemes[prev].type.Equals("v")) { + htsPhonemes[i].prev_vowel_distance = 1; + } else { + htsPhonemes[i].prev_vowel_distance = htsPhonemes[prev].prev_vowel_distance + 1; + } } } } @@ -473,9 +492,21 @@ void ProcessPart(Note[][] phrase) { int paddingTicks = timeAxis.MsPosToTickPos(paddingMs); var notePhIndex = new List { 1 };//每个音符的第一个音素在音素列表上对应的位置 var phAlignPoints = new List>();//音素对齐的位置,Ms,绝对时间 + UTimeSignature sig = timeAxis.TimeSignatureAtTick(phrase[0][0].position - paddingTicks); + timeAxis.TickPosToBarBeat(phrase[0][0].position - paddingTicks, out int bar, out int beat, out int remainingTicks); HTSNote PaddingNote = new HTSNote( - symbols: new string[] { "sil" }, + symbols: new string[] { defaultPause }, + beatPerBar: sig.beatPerBar, + beatUnit: sig.beatUnit, + positionBar: bar, + positionBeat: beat, + key: 0, + bpm: 0, tone: 0, + isSlur: false, + isRest: true, + lang: string.Empty, + accent: string.Empty, startms: 0, endms: paddingMs, positionTicks: phrase[0][0].position - paddingTicks, @@ -515,11 +546,12 @@ void ProcessPart(Note[][] phrase) { htsPhonemes.Count, timeAxis.TickPosToMsPos(lastNote.positionTicks + lastNote.durationTicks))); + var htsPhrase = new HTSPhrase(htsNotes.ToArray()); + htsPhrase.totalNotes = htsNotes.Count; + htsPhrase.totalPhonemes = htsPhonemes.Count; //make neighborhood links between htsNotes and between htsPhonemes foreach (int i in Enumerable.Range(0, htsNotes.Count)) { - htsNotes[i].index = i; - htsNotes[i].indexBackwards = htsNotes.Count - i; - htsNotes[i].sentenceDurMs = sentenceDurMs; + htsNotes[i].parent = htsPhrase; if (i > 0) { htsNotes[i].prev = htsNotes[i - 1]; htsNotes[i - 1].next = htsNotes[i]; diff --git a/OpenUtau.Plugin.Builtin/EnunuOnnx/HTS.cs b/OpenUtau.Plugin.Builtin/EnunuOnnx/HTS.cs deleted file mode 100644 index ffe0750b6..000000000 --- a/OpenUtau.Plugin.Builtin/EnunuOnnx/HTS.cs +++ /dev/null @@ -1,256 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -//This file implement utaupy.hts python library's function -//https://github.com/oatsu-gh/utaupy/blob/master/utaupy/hts.py - -//HTS labels use b instead of # -//In HTS labels, "xx" is a preserved keyword that means null -namespace OpenUtau.Plugin.Builtin.EnunuOnnx { - public static class HTS { - public static readonly string[] KeysInOctave = { - "C", - "Db", - "D", - "Eb", - "E", - "F", - "Gb", - "G", - "Ab", - "A", - "Bb", - "B" , - }; - - public static readonly Dictionary NameInOctave = new Dictionary { - { "C", 0 }, { "C#", 1 }, { "Db", 1 }, - { "D", 2 }, { "D#", 3 }, { "Eb", 3 }, - { "E", 4 }, - { "F", 5 }, { "F#", 6 }, { "Gb", 6 }, - { "G", 7 }, { "G#", 8 }, { "Ab", 8 }, - { "A", 9 }, { "A#", 10 }, { "Bb", 10 }, - { "B", 11 }, - }; - - public static string GetToneName(int noteNum) { - return noteNum < 0 ? string.Empty : KeysInOctave[noteNum % 12] + (noteNum / 12 - 1).ToString(); - } - - //return -1 if error - public static int NameToTone(string name) { - if (name.Length < 2) { - return -1; - } - var str = name.Substring(0, (name[1] == '#' || name[1] == 'b') ? 2 : 1); - var num = name.Substring(str.Length); - if (!int.TryParse(num, out int octave)) { - return -1; - } - if (!NameInOctave.TryGetValue(str, out int inOctave)) { - return -1; - } - return 12 * (octave + 1) + inOctave; - } - - //write integer with "p" as positive and "n" as negative. 0 is "p0" - public static string WriteInt(int integer) { - return (integer >= 0 ? "p":"m" )+Math.Abs(integer).ToString(); - } - } - - public class HTSPhoneme{ - public string symbol; - - //Links to this phoneme's neighbors and parent - public HTSPhoneme? prev; - public HTSPhoneme? next; - public HTSNote parent; - - //informations about this phoneme - //v:vowel, c:consonant, p:pause, s:silence, b:break - public string type = "xx"; - //(number of phonemes before this phoneme in this note) + 1 - public int position = 1; - //(number of phonemes after this phoneme in this note) + 1 - public int position_backward = 1; - //Here -1 means null - //distances to vowels in this note, -1 for vowels themselves - public int distance_from_previous_vowel = -1; - public int distance_to_next_vowel = -1; - - public HTSPhoneme(string phoneme, HTSNote note) { - this.symbol = phoneme; - this.parent = note; - } - - public HTSPhoneme? beforePrev { - get { - if (prev == null) { return null; } else { return prev.prev;} - } - } - - public HTSPhoneme? afterNext { - get { - if (next == null) { return null; } else { return next.next; } - } - } - - public string dump() { - //Write phoneme as an HTS line - - string result = - $"{parent.startMs * 100000} {parent.endMs * 100000} " - //Phoneme informations - + string.Format("{0}@{1}^{2}-{3}+{4}={5}_{6}%{7}^{8}_{9}~{10}-{11}!{12}[{13}${14}]{15}", p()) - //Syllable informations - + string.Format("/A:{0}-{1}-{2}@{3}~{4}", a()) - + string.Format("/B:{0}_{1}_{2}@{3}|{4}", b()) - + string.Format("/C:{0}+{1}+{2}@{3}&{4}", c()) - //Note informations - + string.Format("/D:{0}!{1}#{2}${3}%{4}|{5}&{6};{7}-{8}", d()) - + string.Format( - "/E:{0}]{1}^{2}={3}~{4}!{5}@{6}#{7}+{8}]{9}${10}|{11}[{12}&{13}]{14}={15}^{16}~{17}#{18}_{19};{20}${21}&{22}%{23}[{24}|{25}]{26}-{27}^{28}+{29}~{30}={31}@{32}${33}!{34}%{35}#{36}|{37}|{38}-{39}&{40}&{41}+{42}[{43};{44}]{45};{46}~{47}~{48}^{49}^{50}@{51}[{52}#{53}={54}!{55}~{56}+{57}!{58}^{59}", - e()) - +string.Format("/F:{0}#{1}#{2}-{3}${4}${5}+{6}%{7};{8}",f()) - + "/G:xx_xx/H:xx_xx/I:xx_xx/J:xx~xx@1" - ; - return result; - } - - public string[] p() { - var result = Enumerable.Repeat("xx",16).ToArray(); - result[0] = type; - result[1] = (beforePrev == null) ? "xx" : beforePrev.symbol; - result[2] = (prev == null) ? "xx" : prev.symbol; - result[3] = symbol; - result[4] = (next == null) ? "xx" : next.symbol; - result[5] = (afterNext == null) ? "xx" : afterNext.symbol; - result[11] = position.ToString(); - result[12] = position_backward.ToString(); - result[13] = distance_from_previous_vowel < 0 ? "xx" : distance_from_previous_vowel.ToString(); - result[14] = distance_to_next_vowel < 0 ? "xx" : distance_to_next_vowel.ToString(); - return result; - } - - public string[] a() { - return parent.a(); - } - - public string[] b() { - return parent.b(); - } - - public string[] c() { - return parent.c(); - } - - public string[] d() { - return parent.d(); - } - - public string[] e() { - return parent.e(); - } - - public string[] f() { - return parent.f(); - } - } - - //TODO - public class HTSNote { - public int startMs = 0; - public int endMs = 0; - public int positionTicks; - public int durationTicks = 0; - public int index = 0;//index of this note in sentence - public int indexBackwards = 0; - public int sentenceDurMs = 0; - - public int tone = 0; - public string[] symbols; - - public HTSNote? prev; - public HTSNote? next; - - public HTSNote(string[] symbols, int tone, int startms,int endms,int positionTicks, int durationTicks) { - this.startMs = startms; - this.endMs = endms; - this.tone = tone; - this.symbols = symbols; - this.positionTicks = positionTicks; - this.durationTicks = durationTicks; - } - - public int durationMs { - get { return endMs - startMs; } - } - - public int startMsBackwards { - get { return sentenceDurMs - startMs; } - } - - public string[] b() { - return new string[] { - symbols.Length.ToString(), - "1", - "1", - "xx", - "xx" - }; - } - - public string[] a() { - if (prev == null) { - return Enumerable.Repeat("xx", 5).ToArray(); - } else { - return prev.b(); - } - } - - public string[] c() { - if (next == null) { - return Enumerable.Repeat("xx", 5).ToArray(); - } else { - return next.b(); - } - } - - public string[] e() { - var result = Enumerable.Repeat("xx", 60).ToArray(); - result[0] = HTS.GetToneName(tone); - result[5] = "1";//number_of_syllables - result[6] = ((durationMs + 5) / 10).ToString();//duration in 10ms - result[7] = ((durationTicks + 10) / 20).ToString(); //length in 96th note, or 20 ticks - result[17] = index <= 0 ? "xx" : index.ToString();//index of note in sentence - result[18] = indexBackwards <= 0 ? "xx" : indexBackwards.ToString(); - result[19] = ((startMs + 50) / 100).ToString();//position in 100ms - result[20] = ((startMsBackwards + 50) / 100).ToString(); - if (this.tone > 0) { - result[56] = (prev == null || prev.tone <= 0) ? "p0" : HTS.WriteInt(prev.tone - tone); - result[57] = (next == null || next.tone <= 0) ? "p0" : HTS.WriteInt(next.tone - tone); - } else { - result[56] = "p0"; - result[57] = "p0"; - } - return result; - } - - public string[] d() { - if(prev == null) { - return Enumerable.Repeat("xx", 60).ToArray(); - } else { - return prev.e(); - } - } - public string[] f() { - if (next == null) { - return Enumerable.Repeat("xx", 60).ToArray(); - } else { - return next.e(); - } - } - } -} diff --git a/OpenUtau.Test/Core/Util/HtsSpecTests.cs b/OpenUtau.Test/Core/Util/HtsSpecTests.cs new file mode 100644 index 000000000..2b66cb46d --- /dev/null +++ b/OpenUtau.Test/Core/Util/HtsSpecTests.cs @@ -0,0 +1,311 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using System.Text.RegularExpressions; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util.nnmnkwii.frontend; +using OpenUtau.Core.Util.nnmnkwii.io.hts; +using Xunit; + +namespace OpenUtau.Core.Util { + public class HtsSpecTests { + private static readonly Regex CurrentPhonemePattern = new(@"^[^@]+@[^\^]+\^[^-]+-(?[^+]+)\+", RegexOptions.Compiled); + protected Dictionary phoneDict = new Dictionary(); + protected List vowels = new List() {"a","i","u","e","o" }; + protected List consonants = new List() {"k","s","t","n","h","m","y","r","w","g","z","d","b","p" }; + protected List breaks = new List(); + protected List pauses = new List() { "pau", "sil" }; + protected List silences = new List(); + protected List unvoiced = new List(); + + private string GetPhonemeType(string phoneme) { + if (phoneme == "xx") { + return "xx"; + } + if (vowels.Contains(phoneme)) { + return "v"; + } + if (pauses.Contains(phoneme)) { + return "p"; + } + if (silences.Contains(phoneme)) { + return "s"; + } + if (breaks.Contains(phoneme)) { + return "b"; + } + //if (unvoiced.Contains(phoneme)) { + // return "c"; + //} + return "c"; + } + + private HTSNote MakeNote(int startMs, int endMs, int positionTicks, int durationTicks, int positionBar, string accent = "") { + var symbols = new[] { "a" }; + var beatPerBar = 4; + var beatUnit = 4; + var key = 0; + double bpm = 120; + var tone = 60; // C4 + var isSlur = false; + var isRest = false; + var lang = "JPN"; + var accentStr = accent; + var note = new HTSNote(symbols, beatPerBar, beatUnit, positionBar, 0, key, bpm, tone, isSlur, isRest, lang, accentStr, startMs, endMs, positionTicks, durationTicks); + return note; + } + + private HTSPhrase BuildPhrase(HTSNote[] notes, int resolution) { + var phrase = new HTSPhrase(notes); + phrase.UpdateResolution(resolution); + var sentenceDurMs = notes.Sum(n => n.durationMs); + var sentenceDurTicks = notes.Sum(n => n.durationTicks); + for (var i = 0; i < notes.Length; i++) { + var n = notes[i]; + n.parent = phrase; + n.index = i + 1; + n.indexBackwards = notes.Length - i; + n.sentenceDurMs = sentenceDurMs; + n.sentenceDurTicks = sentenceDurTicks; + if (i > 0) { + notes[i - 1].next = n; + n.prev = notes[i - 1]; + } + } + return phrase; + } + + private TimeAxis BuildDefaultTimeAxis() { + var timeAxis = new TimeAxis(); + var project = new UProject(); + timeAxis.BuildSegments(project); + return timeAxis; + } + + [Fact] + public void MeasureForwardBackwardAreComputedPerBar() { + var res = 480; // ticks per quarter + var ticksPer96 = res / 24; // 20 + var n0 = MakeNote(0, 1000, 0, 480, 0); + var n1 = MakeNote(1000, 2000, 480, 480, 0); + var n2 = MakeNote(2000, 3000, 960, 480, 0); + var phrase = BuildPhrase(new[] { n0, n1, n2 }, res); + + var e0 = n0.e(); + var e1 = n1.e(); + var e2 = n2.e(); + + // forward index (e10) + Assert.Equal("0", e0[9]); + Assert.Equal("1", e1[9]); + Assert.Equal("2", e2[9]); + // backward index (e11) + Assert.Equal("2", e0[10]); + Assert.Equal("1", e1[10]); + Assert.Equal("0", e2[10]); + + // forward ms in centiseconds (e12) + Assert.Equal("0", e0[11]); + Assert.Equal("10", e1[11]); + Assert.Equal("20", e2[11]); + // backward ms in centiseconds (e13) + Assert.Equal("20", e0[12]); + Assert.Equal("10", e1[12]); + Assert.Equal("0", e2[12]); + + // forward 96th (e14) + Assert.Equal("0", e0[13]); + Assert.Equal((480 / ticksPer96).ToString(), e1[13]); + Assert.Equal((960 / ticksPer96).ToString(), e2[13]); + // backward 96th (e15) + Assert.Equal((960 / ticksPer96).ToString(), e0[14]); + Assert.Equal((480 / ticksPer96).ToString(), e1[14]); + Assert.Equal("0", e2[14]); + + // forward percent (e16) + Assert.Equal("0", e0[15]); + Assert.Equal("33", e1[15]); + Assert.Equal("66", e2[15]); + // backward percent (e17) + Assert.Equal("66", e0[16]); + Assert.Equal("33", e1[16]); + Assert.Equal("0", e2[16]); + } + + [Fact] + public void AccentDistancesForwardBackward() { + var res = 480; + var ticksPer96 = res / 24; // 20 + var n0 = MakeNote(0, 1000, 0, 480, 0, accent: ""); + var n1 = MakeNote(1000, 2000, 480, 480, 0, accent: "A"); + var n2 = MakeNote(2000, 3000, 960, 480, 0, accent: ""); + var n3 = MakeNote(3000, 4000, 1440, 480, 0, accent: "A"); + var phrase = BuildPhrase(new[] { n0, n1, n2, n3 }, res); + + var e0 = n0.e(); + var e1 = n1.e(); + var e2 = n2.e(); + var e3 = n3.e(); + + // For n2 (between accents): distances should be 1 note, 100 cs, 24 (96th) + Assert.Equal("1", e2[28]); // next accent (notes) + Assert.Equal("1", e2[29]); // prev accent (notes) + Assert.Equal("100", e2[30]); // next accent (cs) + Assert.Equal("100", e2[31]); // prev accent (cs) + Assert.Equal((480 / ticksPer96).ToString(), e2[32]); // next (96th) + Assert.Equal((480 / ticksPer96).ToString(), e2[33]); // prev (96th) + + // For n1 (accent): prev distance is 0, next accent is one note away (n2) + Assert.Equal("1", e1[28]); // next accent (n3 via one note n2) + Assert.Equal("0", e1[29]); // prev accent (itself) + Assert.Equal("100", e1[30]); // next accent (cs) + Assert.Equal("0", e1[31]); // prev accent (cs) + } + + [Fact] + public void NoteToPhonemesKeepsSharedNoteTiming() { + var note = new HTSNote( + new[] { "k", "a", "pau" }, + 4, + 4, + 0, + 0, + 0, + 120, + 60, + false, + false, + "JPN", + string.Empty, + 120, + 360, + 0, + 480); + + var htsPhonemes = note.symbols.Select(x => new HTSPhoneme(x, note)).ToArray(); + int prevVowelPos = -1; + foreach (int i in Enumerable.Range(0, htsPhonemes.Length)) { + htsPhonemes[i].position = i + 1; + htsPhonemes[i].position_backward = htsPhonemes.Length - i; + htsPhonemes[i].type = GetPhonemeType(htsPhonemes[i].symbol); + if (htsPhonemes[i].type == "v") { + prevVowelPos = i; + } else { + if (prevVowelPos > 0) { + htsPhonemes[i].prev_vowel_distance = i - prevVowelPos; + } + } + } + int nextVowelPos = -1; + for (int i = htsPhonemes.Length - 1; i > 0; --i) { + if (htsPhonemes[i].type == "v") { + nextVowelPos = i; + } else { + if (nextVowelPos > 0) { + htsPhonemes[i].next_vowel_distance = nextVowelPos - i; + } + } + } + + Assert.Equal(3, htsPhonemes.Length); + Assert.All(htsPhonemes, phoneme => Assert.Same(note, phoneme.parent)); + Assert.All(htsPhonemes, phoneme => Assert.Equal(120, phoneme.parent.startMs)); + Assert.All(htsPhonemes, phoneme => Assert.Equal(360, phoneme.parent.endMs)); + Assert.Equal(new[] { 1, 2, 3 }, htsPhonemes.Select(phoneme => phoneme.position).ToArray()); + Assert.Equal(new[] { 3, 2, 1 }, htsPhonemes.Select(phoneme => phoneme.position_backward).ToArray()); + Assert.Equal(new[] { "c", "v", "p" }, htsPhonemes.Select(phoneme => phoneme.type).ToArray()); + Assert.Equal(1, htsPhonemes[2].prev_vowel_distance); + } + + [Fact] + public void PhraseResolutionUpdateRecomputesMeasureTicks() { + var note0 = MakeNote(0, 1000, 0, 960, 0); + var note1 = MakeNote(1000, 2000, 960, 960, 0); + var phrase = new HTSPhrase(new[] { note0, note1 }); + note0.parent = phrase; + note1.parent = phrase; + note0.index = 1; + note1.index = 2; + note0.indexBackwards = 2; + note1.indexBackwards = 1; + note0.next = note1; + note1.prev = note0; + note0.sentenceDurMs = 2000; + note1.sentenceDurMs = 2000; + note0.sentenceDurTicks = 1920; + note1.sentenceDurTicks = 1920; + + phrase.UpdateResolution(960); + + var e1 = note1.e(); + Assert.Equal("24", e1[13]); + Assert.Equal("24", e1[21]); + } + + [Fact] + public void RestNoteMasksPitchFields() { + var rest = MakeNote(0, 500, 0, 480, 0); + rest.isRest = true; + rest.tone = 0; + + var phrase = BuildPhrase(new[] { rest }, 480); + var e = rest.e(); + + Assert.Equal("xx", e[0]); + Assert.Equal("xx", e[1]); + Assert.Equal("xx", e[56]); + Assert.Equal("xx", e[57]); + } + + [Fact] + public void PitchDifferenceToRestNeighborsUsesXx() { + var restStart = MakeNote(0, 500, 0, 480, 0); + restStart.isRest = true; + restStart.tone = 0; + var note = MakeNote(500, 1000, 480, 480, 0); + var restEnd = MakeNote(1000, 1500, 960, 480, 0); + restEnd.isRest = true; + restEnd.tone = 0; + + var phrase = BuildPhrase(new[] { restStart, note, restEnd }, 480); + var e = note.e(); + + Assert.Equal("xx", e[56]); + Assert.Equal("xx", e[57]); + } + + [Fact] + public void AlignTimingPositionsFollowsAnchorPoints() { + var durations = new[] { 20d, 10d, 30d }; + var alignPoints = new[] { + Tuple.Create(1, 100d), + Tuple.Create(3, 160d), + }; + + var positions = HTSContextBuilder.AlignTimingPositions(durations, alignPoints); + + Assert.Equal(2, positions.Count); + Assert.Equal(100d, positions[0]); + Assert.Equal(115d, positions[1]); + } + + [Fact] + public void BuildAlignedNoteTimingResultReturnsNoteRelativeTicks() { + var result = HTSContextBuilder.BuildAlignedNoteTimingResult( + new[] { "pau", "a", "b", "c" }, + 1, + 4, + new[] { 80d, 100d, 120d }, + 50d, + (start, end) => (int)Math.Round(end - start)); + + Assert.Equal(3, result.Count); + Assert.Equal(Tuple.Create("a", 30), result[0]); + Assert.Equal(Tuple.Create("b", 50), result[1]); + Assert.Equal(Tuple.Create("c", 70), result[2]); + } + } +} diff --git a/OpenUtau.Test/Plugins/HtsLabelPhonemizerTest.cs b/OpenUtau.Test/Plugins/HtsLabelPhonemizerTest.cs new file mode 100644 index 000000000..b8614ad6b --- /dev/null +++ b/OpenUtau.Test/Plugins/HtsLabelPhonemizerTest.cs @@ -0,0 +1,242 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using OpenUtau.Api; +using OpenUtau.Classic; +using OpenUtau.Core; +using OpenUtau.Core.Format; +using OpenUtau.Core.Hts; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; +using OpenUtau.Core.Util.nnmnkwii.frontend; +using OpenUtau.Core.Util.nnmnkwii.io.hts; +using Xunit; +using Xunit.Abstractions; + +namespace OpenUtau.Plugins { + // Minimal concrete HTSLabelPhonemizer for testing without external aligners. + class DummyHtsLabelPhonemizer : HTSLabelPhonemizer { + public string GeneratedFullScorePath => fullScorePath; + public string GeneratedMonoTimingPath => monoTimingPath; + public string GeneratedTempPath => htstmpPath; + + public DummyHtsLabelPhonemizer() { + // Minimal language and symbol classes + lang = "JPN"; + vowels = new List { "a", "i", "u", "e", "o" }; + pauses = new List { "pau" }; + silences = new List { "sil" }; + breaks = new List { "br" }; + } + + protected override IG2p LoadG2p(string rootPath) { + // Provide a tiny JP-like dictionary: simple CV mapping. + var builder = G2pDictionary.NewBuilder(); + // vowels + builder.AddSymbol("a", true); + builder.AddSymbol("i", true); + builder.AddSymbol("u", true); + builder.AddSymbol("e", true); + builder.AddSymbol("o", true); + // consonants + var cons = new[] { "k", "s", "t", "n", "h", "m", "y", "r", "w" }; + foreach (var c in cons) builder.AddSymbol(c, false); + // pauses etc + builder.AddSymbol("pau", false); + builder.AddSymbol("sil", false); + builder.AddSymbol("br", false); + // single vowels + builder.AddEntry("a", new[] { "a" }); + builder.AddEntry("i", new[] { "i" }); + builder.AddEntry("u", new[] { "u" }); + builder.AddEntry("e", new[] { "e" }); + builder.AddEntry("o", new[] { "o" }); + // CV (subset) + builder.AddEntry("ka", new[] { "k", "a" }); + builder.AddEntry("ki", new[] { "k", "i" }); + builder.AddEntry("ku", new[] { "k", "u" }); + builder.AddEntry("ke", new[] { "k", "e" }); + builder.AddEntry("ko", new[] { "k", "o" }); + builder.AddEntry("ta", new[] { "t", "a" }); + builder.AddEntry("ti", new[] { "t", "i" }); + builder.AddEntry("to", new[] { "t", "o" }); + builder.AddEntry("na", new[] { "n", "a" }); + builder.AddEntry("ni", new[] { "n", "i" }); + builder.AddEntry("no", new[] { "n", "o" }); + builder.AddEntry("ma", new[] { "m", "a" }); + builder.AddEntry("mi", new[] { "m", "i" }); + builder.AddEntry("mo", new[] { "m", "o" }); + builder.AddEntry("ra", new[] { "r", "a" }); + builder.AddEntry("ri", new[] { "r", "i" }); + builder.AddEntry("ro", new[] { "r", "o" }); + return builder.Build(); + } + + protected override HTSNote CustomHTSNoteContext(HTSNote htsNote, Phonemizer.Note note) { + return htsNote; // no-op + } + + protected override HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes, Phonemizer.Note[] notes) { + return htsPhonemes; // no-op + } + + protected override Phonemizer.Note[][] PhraseAdjustments(Phonemizer.Note[][] phrese) { + return phrese; // no-op + } + + protected override void SendScore(Phonemizer.Note[][] phrase) { + // Create a fake mono_timing.lab with uniform 100ms durations for each phoneme in full_score.lab + if (!Directory.Exists(htstmpPath)) { + Directory.CreateDirectory(htstmpPath); + } + int count = 0; + if (File.Exists(fullScorePath)) { + count = File.ReadLines(fullScorePath).Count(); + } + long start = 0; + var lines = new List(count); + for (int i = 0; i < count; i++) { + long end = start + 1_000_000; // 100ms in 100ns units + lines.Add($"{start} {end} a"); + start = end; + } + File.WriteAllLines(monoTimingPath, lines); + } + } + + public class HtsLabelPhonemizerTest : PhonemizerTestBase { + public HtsLabelPhonemizerTest(ITestOutputHelper output) : base(output) { } + + protected override Phonemizer CreatePhonemizer() { + return new DummyHtsLabelPhonemizer(); + } + + [Theory] + [InlineData(new string[] { "a" }, new string[] { "a" })] + [InlineData(new string[] { "a", "i" }, new string[] { "a", "i" })] + [InlineData(new string[] { "a", "+~a", "i" }, new string[] { "a", "i" })] // extension note should not duplicate symbols + // JP CV + [InlineData(new string[] { "ka" }, new string[] { "k", "a" })] + [InlineData(new string[] { "ka", "ki" }, new string[] { "k", "a", "k", "i" })] + [InlineData(new string[] { "ka", "+~a", "ki" }, new string[] { "k", "a", "k", "i" })] + public void BasicHtsPipelineTest(string[] lyrics, string[] aliases) { + SameAltsTonesColorsTest("en_delta0", lyrics, aliases, "", "C4", ""); + } + + [Fact] + public void GeneratedLabelsCanDriveFrontendAndSimpleSynthesis() { + var phonemizer = CreateConfiguredPhonemizer(new[] { "ka", "ki", "ro" }); + + Assert.True(File.Exists(phonemizer.GeneratedFullScorePath)); + Assert.True(File.Exists(phonemizer.GeneratedMonoTimingPath)); + + var questionPath = WriteMinimalQuestionSet(phonemizer.GeneratedTempPath); + var questionSet = hts.load_question_set(questionPath, encoding: Encoding.UTF8); + var fullLabels = hts.load(phonemizer.GeneratedFullScorePath, Encoding.UTF8); + var monoLabels = hts.load(phonemizer.GeneratedMonoTimingPath, Encoding.UTF8); + var features = merlin.linguistic_features(fullLabels, questionSet.Item1, questionSet.Item2); + + Assert.Equal(fullLabels.Count, monoLabels.Count); + Assert.Equal(fullLabels.Count, features.Count); + Assert.All(features, feature => { + Assert.Single(feature); + Assert.Equal(1f, feature[0]); + }); + + var waveform = SynthesizeFromLabels(monoLabels, features, 16000); + + Assert.NotEmpty(waveform); + Assert.Contains(waveform, sample => Math.Abs(sample) > 0.0001f); + } + + DummyHtsLabelPhonemizer CreateConfiguredPhonemizer(string[] lyrics) { + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + var dir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + var basePath = Path.Join(dir, "Files"); + var file = Path.Join(basePath, "en_delta0", "character.txt"); + + VoicebankLoader.IsTest = true; + var voicebank = new Voicebank() { File = file, BasePath = dir }; + VoicebankLoader.LoadVoicebank(voicebank); + var singer = new ClassicSinger(voicebank); + singer.EnsureLoaded(); + + var project = new UProject(); + Ustx.AddDefaultExpressions(project); + var track = project.tracks[0]; + project.expressions.TryGetValue(Ustx.CLR, out var descriptor); + track.VoiceColorExp = descriptor.Clone(); + var colors = singer.Subbanks.Select(subbank => subbank.Color).ToHashSet(); + track.VoiceColorExp.options = colors.OrderBy(color => color).ToArray(); + track.VoiceColorExp.max = track.VoiceColorExp.options.Length - 1; + + var timeAxis = new TimeAxis(); + timeAxis.BuildSegments(project); + + var phonemizer = new DummyHtsLabelPhonemizer(); + phonemizer.Testing = true; + phonemizer.SetSinger(singer); + phonemizer.SetTiming(timeAxis); + phonemizer.SetUp(BuildGroups(lyrics), project, track); + return phonemizer; + } + + Phonemizer.Note[][] BuildGroups(string[] lyrics) { + var groups = new List(); + int position = 240; + foreach (var lyric in lyrics) { + groups.Add(new[] { + new Phonemizer.Note { + lyric = lyric, + duration = 240, + position = position, + tone = Core.MusicMath.NameToTone("C4"), + phonemeAttributes = new[] { + new Phonemizer.PhonemeAttributes { + index = 0, + consonantStretchRatio = 1, + voiceColor = string.Empty, + } + }, + } + }); + position += 240; + } + return groups.ToArray(); + } + + string WriteMinimalQuestionSet(string directory) { + var questionPath = Path.Combine(directory, "test-minimal.qst"); + File.WriteAllLines(questionPath, new[] { + "QS \"ALL\" {*}", + }); + return questionPath; + } + + float[] SynthesizeFromLabels(HTSLabelFile monoLabels, List> features, int sampleRate) { + Assert.True(monoLabels.Count > 0); + long totalDuration = monoLabels[^1].end_time; + int totalSamples = (int)Math.Ceiling(totalDuration / 10_000_000.0 * sampleRate); + var waveform = new float[totalSamples]; + for (int index = 0; index < monoLabels.Count; index++) { + var label = monoLabels[index]; + Assert.True(label.end_time > label.start_time); + if (index > 0) { + Assert.Equal(monoLabels[index - 1].end_time, label.start_time); + } + int startSample = (int)Math.Round(label.start_time / 10_000_000.0 * sampleRate); + int endSample = Math.Min(totalSamples, (int)Math.Round(label.end_time / 10_000_000.0 * sampleRate)); + float amplitude = 0.05f + 0.05f * features[index].Sum(); + float frequency = 220f + 30f * index; + for (int sample = startSample; sample < endSample; sample++) { + float time = sample / (float)sampleRate; + waveform[sample] = amplitude * (float)Math.Sin(2 * Math.PI * frequency * time); + } + } + return waveform; + } + } +} From 7572f6e515606a2edf8bc73665f017c3e4802296 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Mon, 18 May 2026 17:47:41 +0900 Subject: [PATCH 02/18] Addition of NEUTRINO Support Changes have been implemented to support NEUTRINO type singers and renderers. - Added USingerType.Neutrino to ClassicSingerLoader.cs. - Added NeutrinoLabelPhonemizer.cs and implemented label generation. - Added NeutrinoRenderer.cs and implemented audio rendering. - Added NeutrinoServerLauncher.cs and implemented server management. - Added NeutrinoSinger.cs and implemented NEUTRINO singers. - Added the NEUTRINO renderer to Renderers.cs. - Added USingerType.Neutrino to USinger.cs. - Added locale settings and background process launching to ProcessRunner.cs. - Added TomlData.cs and implemented TOML file reading. - Added TomlDataTests.cs and implemented tests for TOML utilities. - Added the NEUTRINO type option to SingersViewModel.cs. --- OpenUtau.Core/Classic/ClassicSingerLoader.cs | 2 + .../Neutrino/NeutrinoLabelPhonemizer.cs | 183 ++++++ OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 537 ++++++++++++++++++ .../Neutrino/NeutrinoServerLauncher.cs | 107 ++++ OpenUtau.Core/Neutrino/NeutrinoSinger.cs | 218 +++++++ OpenUtau.Core/Render/Renderers.cs | 6 + OpenUtau.Core/Ustx/USinger.cs | 4 +- OpenUtau.Core/Util/ProcessRunner.cs | 55 +- OpenUtau.Core/Util/TomlData.cs | 245 ++++++++ OpenUtau.Test/Core/Util/TomlDataTests.cs | 54 ++ OpenUtau/ViewModels/SingersViewModel.cs | 2 +- 11 files changed, 1410 insertions(+), 3 deletions(-) create mode 100644 OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs create mode 100644 OpenUtau.Core/Neutrino/NeutrinoRenderer.cs create mode 100644 OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs create mode 100644 OpenUtau.Core/Neutrino/NeutrinoSinger.cs create mode 100644 OpenUtau.Core/Util/TomlData.cs create mode 100644 OpenUtau.Test/Core/Util/TomlDataTests.cs diff --git a/OpenUtau.Core/Classic/ClassicSingerLoader.cs b/OpenUtau.Core/Classic/ClassicSingerLoader.cs index e9f7a5d87..06dd175b0 100644 --- a/OpenUtau.Core/Classic/ClassicSingerLoader.cs +++ b/OpenUtau.Core/Classic/ClassicSingerLoader.cs @@ -13,6 +13,8 @@ static USinger AdjustSingerType(Voicebank v) { return new Core.DiffSinger.DiffSingerSinger(v) as USinger; case USingerType.Voicevox: return new Core.Voicevox.VoicevoxSinger(v) as USinger; + case USingerType.Neutrino: + return new Core.Neutrino.NeutrinoSinger(v) as USinger; default: return new ClassicSinger(v) as USinger; } diff --git a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs new file mode 100644 index 000000000..b605e968f --- /dev/null +++ b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs @@ -0,0 +1,183 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.Hts; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; +using Serilog; + +namespace OpenUtau.Core.Neutrino { + [Phonemizer("Neutrino Label Phonemizer", "NEUTRINO")] + public class Neutrino : HTSLabelPhonemizer { + string NeutrinoExe = string.Empty; + string NeutrinoClientExe = string.Empty; + string NeutrinoServerExe = string.Empty; + string NsfExe = string.Empty; + string WorldExe = string.Empty; + + protected new NeutrinoSinger singer; + + List macronLyrics = new List(); + + public override void SetSinger(USinger singer) { + this.singer = singer as NeutrinoSinger; + if (this.singer == null) { + return; + } + lang = "JPN";//TODO: use singer.language + string confPath = "japanese.utf_8.conf"; + tablePath = "japanese.utf_8.table"; + string basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO"); + if (!Directory.Exists(basePath)) { + if (this.singer.singerVersion.StartsWith("v2.7")) { + basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v27"); + } else if (this.singer.singerVersion.StartsWith("v3.")) { + basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); + } + } + //Load Dictionary + try { + phoneDict.Clear(); + LoadDict(Path.Join(Path.Join(basePath, @"settings\dic"), confPath), singer.TextFileEncoding); + LoadDict(Path.Join(Path.Join(basePath, @"settings\dic"), tablePath), singer.TextFileEncoding); + // Lyrics often handled in OpenUtau + phoneDict.Add("R",new string[] { "pau" }); + phoneDict.Add("-", new string[] { "pau" }); + phoneDict.Add("SP", new string[] { "pau" }); + phoneDict.Add("AP", new string[] { "br" }); + g2p = this.LoadG2p(); + } catch (Exception e) { + Log.Error(e, $"failed to load dictionary from {tablePath}"); + return; + } + if (OS.IsWindows()) { + NeutrinoExe = Path.Join(basePath, @"bin", "NEUTRINO.exe"); + NeutrinoClientExe = Path.Join(basePath, @"bin", "neutrino_client.exe"); + NeutrinoServerExe = Path.Join(basePath, @"bin", "neutrino_server.exe"); + } else if (OS.IsMacOS() || OS.IsLinux()) { + NeutrinoExe = Path.Join(basePath, @"bin", "NEUTRINO"); + } else { + throw new NotSupportedException("Platform not supported."); + } + NeutrinoServerLauncher.EnsureStarted(NeutrinoServerExe); + } + protected IG2p LoadG2p() { + var g2ps = new List(); + var builder = G2pDictionary.NewBuilder(); + vowels.AddRange(phoneDict["VOWELS"]); + breaks.AddRange(phoneDict["BREAK"]); + pauses.AddRange(phoneDict["PAUSES"]); + silences.AddRange(phoneDict["SILENCES"]); + consonants.AddRange(phoneDict["PHONEME_CL"]); + macronLyrics.AddRange(phoneDict["MACRON"]); + foreach (var dict in phoneDict.Values) { + foreach (var phoneme in dict) { + if (!consonants.Contains(phoneme) && !vowels.Contains(phoneme) && + !breaks.Contains(phoneme) && !pauses.Contains(phoneme) && + !silences.Contains(phoneme)) { + consonants.Add(phoneme); + } + if (!consonants.Contains(phoneme)) { + builder.AddSymbol(phoneme, true); + }else { + builder.AddSymbol(phoneme, false); + } + } + } + foreach (var entry in phoneDict.Keys) { + builder.AddEntry(entry, phoneDict[entry]); + foreach (var reduction in phoneDict["VOWEL_REDUCTION"]) { + var phonemes = phoneDict[entry].Except(vowels).ToList(); + if (phonemes.Count == 0) continue; + builder.AddEntry(entry + reduction, phonemes); + } + foreach (var macron in phoneDict["MACRON"]) { + var addPhonemes = phoneDict[entry].Where(x => vowels.Contains(x)).ToList(); + if (addPhonemes.Count == 0) continue; + var phonemes = phoneDict[entry].ToList(); + phonemes.AddRange(addPhonemes); + builder.AddEntry(entry + macron, phonemes); + macronLyrics.Add(entry + macron); + } + } + g2ps.Add(builder.Build()); + return new G2pFallbacks(g2ps.ToArray()); + } + + protected override Note[][] PhraseAdjustments(Note[][] phrese) { + for (int i = 0; i < phrese.Length; i++) { + var lyric = phrese[i][0].lyric; + if (phoneDict["MACRON"].Contains(lyric) && (i > 0)) { + if (g2p.IsValidSymbol(lyric)) { + var vowel = g2p.Query(phrese[i-1][0].lyric).FirstOrDefault(phoneme => vowels.Contains(phoneme)); + phrese[i][0].lyric = vowel; + } + } + } + return phrese; + } + + protected override HTSNote CustomHTSNoteContext(HTSNote htsNote, Note note) { + var fixs = GetPrefixAndSuffix(note); + if(!htsNote.isRest && !htsNote.isSlur) { + htsNote.langDependent = "0"; // no macron + if (macronLyrics.Contains(note.lyric)) { + htsNote.langDependent = "1"; // macron + } + } + return htsNote; + } + + protected override HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes, Note[] notes) { + var fixs = GetPrefixAndSuffix(notes[0]); + foreach (var htsPhoneme in htsPhonemes) { + htsPhoneme.flag1 = "00"; // NEUTRINO Default. + } + return htsPhonemes; + } + + protected override void SendScore(Note[][] phrase) { + if (this.singer.singerVersion == null) { + return; + } + if (File.Exists(fullScorePath) && !File.Exists(monoTimingPath)) { + var voicebankNameHash = $"{this.singer.voicebankNameHash:x16}"; + string f0Path = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.f0"); + string melspecPath = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.melspec"); + string wavPath = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.wav"); + //string PhraseList = Path.Join(htstmpPath, $"{voicebankNameHash}_phraselist.txt"); + string modelDir = this.singer.Location+"\\"; + var attr = phrase[0][0].phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default; + int toneShift = attr.toneShift; + int numThreads = Preferences.Default.NumRenderThreads; + //int gpuMode = -1; + //switch (Preferences.Default.OnnxRunner) { + // case "directml": + // gpuMode = Preferences.Default.OnnxGpu; + // break; + // default: + // gpuMode = -1; + // break; + //} + string ArgParam = string.Empty; + if (this.singer.singerVersion.StartsWith("v2.7")) { + ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -a -k {toneShift} -d 3 -n 1 -p {numThreads} -m -t"; + } else if (this.singer.singerVersion.StartsWith("v3.")) { + //TODO: -S support model + ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-f0 --skip-melspec --skip-wav -f {toneShift} -m -t"; + } else { + Log.Error($"Unsupported NEUTRINO version: {this.singer.singerVersion}"); + return; + } + Log.Information($"NEUTRINO timing args: {ArgParam}"); + if (File.Exists(NeutrinoClientExe)) { + ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(NeutrinoExe, ArgParam, Log.Logger); + } + } + } + } +} diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs new file mode 100644 index 000000000..c61ff8a83 --- /dev/null +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -0,0 +1,537 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using K4os.Hash.xxHash; +using NAudio.Wave; +using OpenUtau.Core.Format; +using OpenUtau.Core.Hts; +using OpenUtau.Core.Render; +using OpenUtau.Core.SignalChain; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; +using Serilog; +using SharpCompress; +using ThirdParty; + +namespace OpenUtau.Core.Neutrino { + public class NeutrinoRenderer : HTSLabelRenderer { + const string NTYP = "ntyp"; + const string NMOD = "nmod"; + const string NMEL = "nmel"; + const string SMOC = "smoc"; + + enum NeutrinoRenderType { + WORLD, + NSF, + } + + enum NeutrinoRenderMode { + Elements = 2, + Standard = 3, + Advanced = 4, + } + + enum NsfModel { + va, + vs, + ve, + } + + static readonly HashSet supportedExp = new HashSet(){ + Format.Ustx.DYN, + Format.Ustx.PITD, + Format.Ustx.SHFT, + Format.Ustx.GENC, + Format.Ustx.TENC, + Format.Ustx.BREC, + Format.Ustx.VOIC, + Format.Ustx.DIR, + NTYP, + NMOD, + SMOC + }; + + static readonly object lockObj = new object(); + + public override USingerType SingerType => USingerType.Neutrino; + + public override bool SupportsRenderPitch => true; + + public override bool SupportsExpression(UExpressionDescriptor descriptor) { + return supportedExp.Contains(descriptor.abbr); + } + + protected NeutrinoSinger singer; + string NeutrinoExe = string.Empty; + string NeutrinoClientExe = string.Empty; + string NeutrinoServerExe = string.Empty; + string NsfExe = string.Empty; + string WorldExe = string.Empty; + string VocoderClientExe = string.Empty; + string VocoderServerExe = string.Empty; + bool existNeutrinoClient = false; + int sampleRate = 48000; + + public override void SetUp() { + lang = "JPN";//TODO: use singer.language + string confPath = "japanese.utf_8.conf"; + tablePath = "japanese.utf_8.table"; + string basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO"); + if (!Directory.Exists(basePath)) { + if (singer.singerVersion.StartsWith("v2.7")) { + basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v27"); + } else if (singer.singerVersion.StartsWith("v3.")) { + basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); + } + } + //Load Dictionary + try { + phoneDict.Clear(); + LoadDict(Path.Join(Path.Join(basePath, @".\settings\dic"), confPath), singer.TextFileEncoding); + LoadDict(Path.Join(Path.Join(basePath, @".\settings\dic"), tablePath), singer.TextFileEncoding); + // Lyrics often handled in OpenUtau + phoneDict.Add("R", new string[] { "pau" }); + phoneDict.Add("-", new string[] { "pau" }); + phoneDict.Add("SP", new string[] { "pau" }); + phoneDict.Add("AP", new string[] { "br" }); + g2p = this.LoadG2p(); + } catch (Exception e) { + Log.Error(e, $"failed to load dictionary from {tablePath}"); + return; + } + LoadG2p(); + if (OS.IsWindows()) { + NeutrinoExe = Path.Join(basePath, @".\bin", "NEUTRINO.exe"); + NeutrinoClientExe = Path.Join(basePath, @".\bin", "neutrino_client.exe"); + NeutrinoServerExe = Path.Join(basePath, @".\bin", "neutrino_server.exe"); + NsfExe = Path.Join(basePath, @".\bin", "NSF.exe"); + WorldExe = Path.Join(basePath, @".\bin", "WORLD.exe"); + VocoderClientExe = Path.Join(basePath, @".\bin", "vocoder_client.exe"); + VocoderServerExe = Path.Join(basePath, @".\bin", "vocoder_server.exe"); + } else if (OS.IsMacOS() || OS.IsLinux()) { + NeutrinoExe = Path.Join(basePath, @".\bin", "NEUTRINO"); + NsfExe = Path.Join(basePath, @".\bin", "NSF"); + WorldExe = Path.Join(basePath, @".\bin", "WORLD"); + } else { + throw new NotSupportedException("Platform not supported."); + } + existNeutrinoClient = File.Exists(NeutrinoClientExe); + NeutrinoServerLauncher.EnsureStarted(NeutrinoServerExe); + NeutrinoServerLauncher.EnsureStarted(VocoderServerExe, 23456); + } + + protected override HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes, RenderNote notes) { + var fixs = GetPrefixAndSuffix(notes); + foreach (var htsPhoneme in htsPhonemes) { + htsPhoneme.flag1 = "00"; // NEUTRINO Default. + } + return htsPhonemes; + } + + public double[] LoadFile(string filePath) { + if (File.Exists(filePath)) { + using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read)) { + using (BinaryReader reader = new BinaryReader(fs)) { + long fileSize = fs.Length; + int Count = (int)(fileSize / sizeof(float)); + double[] data = new double[Count]; + for (int i = 0; i < Count; i++) { + data[i] = reader.ReadSingle(); + } + return data; + } + } + } + return new double[0]; + } + + public void SaveFile(string filePath, double[] doubles) { + try { + using (FileStream fs = new FileStream(filePath, FileMode.Create, FileAccess.Write)) { + using (BinaryWriter writer = new BinaryWriter(fs)) { + foreach (double pitch in doubles) { + writer.Write((float)pitch); + } + } + } + } catch (Exception ex) { + Log.Error($"Error: {ex.Message}"); + } + } + + public override Task Render(RenderPhrase phrase, Progress progress, int trackNo, CancellationTokenSource cancellation, bool isPreRender) { + var task = Task.Run(() => { + lock (lockObj) { + if (cancellation.IsCancellationRequested) { + return new RenderResult(); + } + string progressInfo = $"Track {trackNo + 1}: {this} \"{string.Join(" ", phrase.phones.Select(p => p.phoneme))}\""; + progress.Complete(0, progressInfo); + this.singer = phrase.singer as NeutrinoSinger; + if (g2p == null || string.IsNullOrEmpty(NeutrinoExe)) { + SetUp(); + } + var result = Layout(phrase); + var hash = HashPhraseGroups(phrase); + string tmpPath = Path.Join(PathManager.Inst.CachePath, $"ne-{hash:x16}_temp"); + if (!Directory.Exists(tmpPath)) { + Directory.CreateDirectory(tmpPath); + } + string wavPath = Path.Join(tmpPath, $"ne-{phrase.hash}.wav"); + string f0Path = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.f0"); + string editorf0Path = Path.Join(tmpPath, $"ne-edit.f0"); + string melspecPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.melspec"); + string mgcPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.mgc"); + string bapPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.bap"); + fullScorePath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}_full_score.lab"); + monoTimingPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}_mono_timing.lab"); + string modelDir = this.singer.Location + "\\"; + int toneShift = phrase.phones[0] != null ? phrase.phones[0].toneShift : 0; + int numThreads = Preferences.Default.NumRenderThreads; + if (!File.Exists(fullScorePath) && !File.Exists(monoTimingPath)) { + ProcessPart(phrase); + } + var flag1 = phrase.phones[0].flags.FirstOrDefault(f => f.Item3.Equals(NTYP)); + string eng = string.Empty; + if (flag1 != null) { + eng = flag1.Item1; + } + string ArgParam = string.Empty; + if (this.singer.singerVersion.StartsWith("v2.7")) { + if (eng.Equals(NeutrinoRenderType.NSF.ToString())) { + var flag2 = phrase.phones[0].flags.FirstOrDefault(f => f.Item3.Equals(NMOD)); + string nsf = "vs"; + if (flag2 != null) { + if (flag2.Item2 == 4) { + nsf = NsfModel.va.ToString(); + sampleRate = 48000; + } else if (flag2.Item2 == 3) { + nsf = NsfModel.vs.ToString(); + sampleRate = 48000; + } else if (flag2.Item2 == 2) { + nsf = NsfModel.ve.ToString(); + sampleRate = 24000; + } + } + if (!File.Exists(f0Path) || !File.Exists(melspecPath)) { + ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -s -n 1 -o {numThreads} -k {toneShift} -m -t"; + if (existNeutrinoClient) { + ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(NeutrinoExe, ArgParam, Log.Logger); + } + } + if (cancellation.IsCancellationRequested) { + return new RenderResult(); + } + if (!File.Exists(wavPath) && File.Exists(f0Path) && File.Exists(melspecPath)) { + if (phrase.phones[0].direct) { + ArgParam = $"{f0Path} {melspecPath} {modelDir}{nsf}.bin {wavPath} -l {monoTimingPath} -n 1 -p {numThreads} -s{(int)sampleRate / 1000} -f {toneShift} -m -t"; + } else { + double[] f0 = LoadFile(f0Path); + double[] melspec = LoadFile(melspecPath); + int totalFrames = f0.Length; + int headFrames = (int)Math.Round(headMs / framePeriod); + int tailFrames = (int)Math.Round(tailMs / framePeriod); + double[] editorF0 = SampleCurve(phrase, phrase.pitches, 0, framePeriod, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01)); + SaveFile(editorf0Path, editorF0); + ArgParam = $"{editorf0Path} {melspecPath} {modelDir}{nsf}.bin {wavPath} -l {monoTimingPath} -n 1 -p {numThreads} -s{(int)sampleRate / 1000} -f {toneShift} -m -t"; + } + if (File.Exists(VocoderClientExe)) { + ProcessRunner.Run(VocoderClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(NsfExe, ArgParam, Log.Logger); + } + using (var waveStream = new WaveFileReader(wavPath)) { + result.samples = Wave.GetSamples(waveStream.ToSampleProvider()); + } + Wave.CorrectSampleScale(result.samples); + var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); + signal = NWaves.Operations.Operation.Resample(signal, 44100); + var source = new WaveSource(0, 0, 0, 1); + source.SetSamples(result.samples); + WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0)); + } + } else { + if (!File.Exists(f0Path) || !File.Exists(mgcPath) || !File.Exists(bapPath)) { + ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -w {mgcPath} {bapPath} -s -n 1 -o {numThreads} -k {toneShift} -m -t"; + if (existNeutrinoClient) { + ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(NeutrinoExe, ArgParam, Log.Logger); + } + } + if (cancellation.IsCancellationRequested) { + return new RenderResult(); + } + if (!File.Exists(wavPath) && File.Exists(f0Path) && File.Exists(mgcPath) && File.Exists(bapPath)) { + if (phrase.phones[0].direct) { + float gender = 1f + (phrase.phones[0].flags.FirstOrDefault(f => f.Item3.Equals(Format.Ustx.GEN)).Item2 / 100) ?? 1f; + float breathiness = phrase.phones[0].flags.FirstOrDefault(f => f.Item3.Equals(Format.Ustx.BRE)).Item2 ?? 0f; + ArgParam = $"{f0Path} {mgcPath} {bapPath} {wavPath} -n 1 -m {gender} -b {breathiness} -t"; + if (File.Exists(VocoderClientExe)) { + ProcessRunner.Run(VocoderClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(WorldExe, ArgParam, Log.Logger); + } + using (var waveStream = new WaveFileReader(wavPath)) { + result.samples = Wave.GetSamples(waveStream.ToSampleProvider()); + } + Wave.CorrectSampleScale(result.samples); + var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); + signal = NWaves.Operations.Operation.Resample(signal, 44100); + var source = new WaveSource(0, 0, 0, 1); + source.SetSamples(result.samples); + WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0)); + } else { + double[] f0 = LoadFile(f0Path); + double[] mgc = LoadFile(mgcPath); + double[] bap = LoadFile(bapPath); + int totalFrames = f0.Length; + int headFrames = (int)Math.Round(headMs / framePeriod); + int tailFrames = (int)Math.Round(tailMs / framePeriod); + + var editorF0 = SampleCurve(phrase, phrase.pitches, 0, framePeriod, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01)); + var gender = SampleCurve(phrase, phrase.gender, 0.5, framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x); + var tension = SampleCurve(phrase, phrase.tension, 0.5, framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x); + var breathiness = SampleCurve(phrase, phrase.breathiness, 0.5, framePeriod, totalFrames, headFrames, tailFrames, x => 0.5 + 0.005 * x); + var voicing = SampleCurve(phrase, phrase.voicing, 1.0, framePeriod, totalFrames, headFrames, tailFrames, x => 0.01 * x); + + for (int i = 0; i < f0.Length; i++) { + if (f0[i] < 50) { + editorF0[i] = 0; + } + } + + var samples = Worldline.WorldSynthesis( + editorF0, + mgc, true, 60, + bap, true, 2048, + framePeriod, sampleRate, + gender, tension, breathiness, voicing); + result.samples = samples.Select(d => (float)d).ToArray(); + Wave.CorrectSampleScale(result.samples); + var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); + signal = NWaves.Operations.Operation.Resample(signal, 44100); + result.samples = signal.Samples; + var source = new WaveSource(0, 0, 0, 1); + source.SetSamples(result.samples); + WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0)); + } + } + } + } else if (this.singer.singerVersion.StartsWith("v3.")) { + // F0ファイル生成 + if (!File.Exists(f0Path)) { + ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-melspec --skip-wav -f {toneShift} -m -t"; + if (existNeutrinoClient) { + ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(NeutrinoExe, ArgParam, Log.Logger); + } + } + if (cancellation.IsCancellationRequested) { + return new RenderResult(); + } + //メルスペクトグラムファイル生成 + if (File.Exists(f0Path) && !File.Exists(melspecPath)) { + if (phrase.phones[0].direct) { + ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; + } else { + double[] f0 = LoadFile(f0Path); + int totalFrames = f0.Length; + int headFrames = (int)Math.Ceiling(headMs / 1000.0 * 99.84); + int tailFrames = (int)Math.Floor(tailMs / 1000.0 * 99.84); + var editorF0 = SampleCurve(phrase, phrase.pitches, 0, 9.984, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01)); + SaveFile(editorf0Path, editorF0); + // F0の編集とメルスペクトグラムの生成はセット + ArgParam = $"{fullScorePath} {monoTimingPath} {editorf0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; + } + if (existNeutrinoClient) { + ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(NeutrinoExe, ArgParam, Log.Logger); + } + } + if (cancellation.IsCancellationRequested) { + return new RenderResult(); + } + //音声ファイル生成 + if (!File.Exists(wavPath) && File.Exists(f0Path) && File.Exists(melspecPath)) { + if (phrase.phones[0].direct) { + ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; + } else { + // TODO:メルスペクトグラムの編集 + ArgParam = $"{fullScorePath} {monoTimingPath} {editorf0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; + } + if (existNeutrinoClient) { + ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); + } else { + ProcessRunner.Run(NeutrinoExe, ArgParam, Log.Logger); + } + using (var waveStream = new WaveFileReader(wavPath)) { + result.samples = Wave.GetSamples(waveStream.ToSampleProvider()); + } + Wave.CorrectSampleScale(result.samples); + var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); + signal = NWaves.Operations.Operation.Resample(signal, 44100); + var source = new WaveSource(0, 0, 0, 1); + source.SetSamples(result.samples); + WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0)); + } + } else { + Log.Error($"Unsupported NEUTRINO version: {this.singer.singerVersion}"); + result.samples = new float[0]; + return result; + } + progress.Complete(phrase.phones.Length, progressInfo); + try { + if (File.Exists(wavPath)) { + using (var waveStream = new WaveFileReader(wavPath)) { + + result.samples = Wave.GetSamples(waveStream.ToSampleProvider().ToMono(1, 0)); + } + if (result.samples != null) { + Renderers.ApplyDynamics(phrase, result); + } + } + } catch (Exception e) { + Log.Error(e.Message); + result.samples = new float[0]; + } + return result; + } + }); + return task; + } + + double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, Func convert) { + const int interval = 5; + var result = new double[length]; + if (curve == null) { + Array.Fill(result, defaultValue); + return result; + } + for (int i = 0; i < length - headFrames - tailFrames; i++) { + double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs; + int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading); + int index = Math.Max(0, (int)((double)ticks / interval)); + if (index < curve.Length) { + result[i + headFrames] = convert(curve[index]); + } + } + Array.Fill(result, defaultValue, 0, headFrames); + Array.Fill(result, defaultValue, length - tailFrames, tailFrames); + return result; + } + + + public override UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) { + var result = new List { + //energy + //new UExpressionDescriptor{ + // name="energy (curve)", + // abbr=ENE, + // type=UExpressionType.Curve, + // min=-100, + // max=100, + // defaultValue=0, + // isFlag=false, + //}, + ////engine + new UExpressionDescriptor { + name = "NEUTRINO engine type (~2.x)", + abbr = NTYP, + type = UExpressionType.Options, + options = Enum.GetNames(), + isFlag = false + }, + ////engine mode + new UExpressionDescriptor { + name = "NEUTRINO engine mode (~2.x)", + abbr = NMOD, + type = UExpressionType.Options, + options = Enum.GetNames(), + isFlag = false + }, + //expressiveness + new UExpressionDescriptor { + name = "pitch smoothened (curve)", + abbr = SMOC, + type = UExpressionType.Curve, + min = 0, + max = 10, + defaultValue = 0, + isFlag = false + }, + }; + return result.ToArray(); + } + + public override string ToString() => Renderers.NEUTRINO; + + public override RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) { + var result = new RenderPitchResult(); + try { + var hash = HashPhraseGroups(phrase); + string tmpPath = Path.Join(PathManager.Inst.CachePath, $"ne-{hash:x16}_temp"); + string f0Path = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.f0"); + if (!File.Exists(f0Path)) { + return null; + } + double[] f0 = LoadFile(f0Path); + + int totalFrames = f0.Length; + int headFrames = 0; + int tailFrames = 0; + if (this.singer.singerVersion.StartsWith("v3.")) { + headFrames = (int)Math.Round(headMs / 1000.0 * 99.84); + tailFrames = (int)Math.Round(tailMs / 1000.0 * 99.84); + } else { + headFrames = (int)Math.Round(headMs / framePeriod); + tailFrames = (int)Math.Round(tailMs / framePeriod); + } + var exprCurve = phrase.curves.FirstOrDefault(curve => curve.Item1.Equals(SMOC)); + if (exprCurve != null) { + + List exprs = SampleCurve(phrase, exprCurve.Item2, 0, framePeriod, totalFrames, headFrames, tailFrames, x => x).Select(x => (int)x).ToList(); + var f0S = new F0Smoother(f0.ToList()); + f0S.SmoothenWidthList = exprs; + f0 = f0S.GetSmoothenedF0List(f0.ToList()).ToArray(); + } + + result = new RenderPitchResult() { + tones = f0.Select(f => (float)MusicMath.FreqToTone(f)).ToArray(), + }; + result.ticks = new float[result.tones.Length]; + var layout = Layout(phrase); + var t = layout.positionMs - layout.leadingMs; + for (int i = 0; i < result.tones.Length; i++) { + if (this.singer.singerVersion.StartsWith("v3.")) { + t += 10; + } else { + t += framePeriod; + } + result.ticks[i] = phrase.timeAxis.MsPosToTickPos(t) - phrase.position; + } + } catch { + } + return result; + } + + + ulong HashPhraseGroups(RenderPhrase phrase) { + using (var stream = new MemoryStream()) { + using (var writer = new BinaryWriter(stream)) { + writer.Write(phrase.preEffectHash); + writer.Write(phrase.phones[0].toneShift); + phrase.phones.ForEach(x => writer.Write(x.tone)); + return XXH64.DigestOf(stream.ToArray()); + } + } + } + } +} diff --git a/OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs b/OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs new file mode 100644 index 000000000..bfbb88ec1 --- /dev/null +++ b/OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs @@ -0,0 +1,107 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Net.Sockets; +using System.Threading; +using OpenUtau.Core.Util; +using Serilog; + +namespace OpenUtau.Core.Neutrino { + static class NeutrinoServerLauncher { + static readonly object lockObj = new object(); + static readonly Dictionary serverProcesses = + new Dictionary(StringComparer.OrdinalIgnoreCase); + static NeutrinoServerLauncher() { + AppDomain.CurrentDomain.ProcessExit += (_, _) => StopAll(); + } + + public static void EnsureStarted(string serverExe, int? serverPort = 12345, string host = "127.0.0.1") { + if (string.IsNullOrEmpty(serverExe) || !File.Exists(serverExe)) { + return; + } + + serverExe = Path.GetFullPath(serverExe); + var serverName = Path.GetFileNameWithoutExtension(serverExe); + if (Process.GetProcessesByName(serverName).Any() || IsServerReady(host, serverPort)) { + Log.Information("Background server already running: {ServerExe}", serverExe); + return; + } + + lock (lockObj) { + if (serverProcesses.TryGetValue(serverExe, out var runningProcess) && + !runningProcess.HasExited) { + return; + } + if (Process.GetProcessesByName(serverName).Any() || IsServerReady(host, serverPort)) { + return; + } + + var startedProcess = ProcessRunner.StartBackground( + serverExe, + string.Empty, + Log.Logger, + workDir: Path.GetDirectoryName(serverExe)); + startedProcess.EnableRaisingEvents = true; + startedProcess.Exited += (_, _) => { + lock (lockObj) { + if (serverProcesses.TryGetValue(serverExe, out var currentProcess) && + ReferenceEquals(currentProcess, startedProcess)) { + serverProcesses.Remove(serverExe); + } + } + }; + serverProcesses[serverExe] = startedProcess; + WaitForServerReady(host, serverPort); + Log.Information("Started background server: {ServerExe}", serverExe); + } + } + + static void StopAll() { + Process[] processes; + lock (lockObj) { + processes = serverProcesses.Values.ToArray(); + serverProcesses.Clear(); + } + + foreach (var process in processes) { + try { + if (process.HasExited) { + process.Dispose(); + continue; + } + process.Kill(entireProcessTree: true); + process.WaitForExit(3000); + process.Dispose(); + } catch (Exception e) { + Log.Warning(e, "Failed to stop background server process."); + } + } + } + + static bool IsServerReady(string host, int? serverPort) { + if (!serverPort.HasValue) { + return false; + } + try { + using var client = new TcpClient(); + return client.ConnectAsync(host, serverPort.Value).Wait(50); + } catch { + return false; + } + } + + static void WaitForServerReady(string host, int? serverPort) { + if (!serverPort.HasValue) { + return; + } + for (int i = 0; i < 50; i++) { + if (IsServerReady(host, serverPort)) { + return; + } + Thread.Sleep(100); + } + } + } +} diff --git a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs new file mode 100644 index 000000000..c8ce69777 --- /dev/null +++ b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs @@ -0,0 +1,218 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using K4os.Hash.xxHash; +using OpenUtau.Classic; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Util; +using Serilog; +using static OpenUtau.Api.Phonemizer; + +namespace OpenUtau.Core.Neutrino { + public class NeutrinoSinger : USinger { + public override string Id => voicebank.Id; + public override string Name => voicebank.Name; + public override Dictionary LocalizedNames => voicebank.LocalizedNames; + public override USingerType SingerType => voicebank.SingerType; + public override string BasePath => voicebank.BasePath; + public override string Author => voicebank.Author; + public override string Voice => voicebank.Voice; + public override string Location => Path.GetDirectoryName(voicebank.File); + public override string Web => voicebank.Web; + public override string Version => voicebank.Version; + public override string OtherInfo => voicebank.OtherInfo; + public override IList Errors => errors; + public override string Avatar => voicebank.Image == null ? null : Path.Combine(Location, voicebank.Image); + public override byte[] AvatarData => avatarData; + public override string Portrait => voicebank.Portrait == null ? null : Path.Combine(Location, voicebank.Portrait); + public override float PortraitOpacity => voicebank.PortraitOpacity; + public override int PortraitHeight => voicebank.PortraitHeight; + public override string Sample => voicebank.Sample == null ? null : Path.Combine(Location, voicebank.Sample); + public override string DefaultPhonemizer => voicebank.DefaultPhonemizer; + public override Encoding TextFileEncoding => voicebank.TextFileEncoding; + public override IList Subbanks => subbanks; + public override IList Otos => otos; + + Voicebank voicebank; + List errors = new List(); + List subbanks = new List(); + List otos = new List(); + Dictionary otoMap = new Dictionary(); + + HashSet phonemes = new HashSet(); + Dictionary table = new Dictionary(); + + public byte[] avatarData; + public ulong voicebankNameHash; + public string singerVersion = string.Empty; + + public NeutrinoSinger(Voicebank voicebank) { + this.voicebank = voicebank; + found = true; + } + + public override void EnsureLoaded() { + if (Loaded) { + return; + } + Reload(); + } + + public override void Reload() { + if (!Found) { + return; + } + try { + voicebank.Reload(); + Load(); + loaded = true; + } catch (Exception e) { + Log.Error(e, $"Failed to load {voicebank.File}"); + } + } + + void Load() { + voicebankNameHash = Hash(); + phonemes.Clear(); + table.Clear(); + otos.Clear(); + subbanks.Clear(); + + string infoPath = Path.Join(Location, "info.toml"); + if (File.Exists(infoPath)) { + var info = TomlData.Load(infoPath); + info.TryGetValue("acoustic", "version", out object? version); + if (version != null) { + singerVersion = version.ToString() ?? string.Empty; + } + info.TryGetValue("", "version", out object? version_); + if (version_ != null) { + singerVersion = version_.ToString() ?? string.Empty; + } + } + + if (voicebank.Subbanks == null || voicebank.Subbanks.Count == 0 || + voicebank.Subbanks.Count == 1 && string.IsNullOrEmpty(voicebank.Subbanks[0].Color)) { + subbanks.Add(new USubbank(new Subbank() { + Prefix = string.Empty, + Suffix = string.Empty, + ToneRanges = new[] { "C1-B7" }, + })); + } else { + subbanks.AddRange(voicebank.Subbanks + .Select(subbank => new USubbank(subbank))); + } + + try { + string basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO"); + if (!Directory.Exists(basePath)) { + if (singerVersion.StartsWith("v2.7")) { + basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v27"); + } else if (singerVersion.StartsWith("v3.")) { + basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); + } + } + var tablePath = Path.Join(Path.Join(basePath, @".\settings\dic"), "japanese.utf_8.table"); + foreach (var line in File.ReadAllLines(tablePath)) { + if (line.Contains("#")) { + continue; + } + var parts = line.Trim().Split(); + table[parts[0]] = parts.Skip(1).ToArray(); + foreach (var phoneme in table[parts[0]]) { + phonemes.Add(phoneme); + } + } + var confPath = Path.Join(Path.Join(basePath, @".\settings\dic"), "japanese.utf_8.conf"); + foreach (var line in File.ReadAllLines(confPath)) { + if (line.Contains('=')) { + var lineSplit = line.Split("="); + var key = lineSplit[0]; + var value = lineSplit[1]; + var phonemes_ = value.Trim(new char[] { '\"' }).Split(","); + foreach (var phoneme in phonemes_) { + phonemes.Add(phoneme); + } + } + } + phonemes.Add("pau"); + phonemes.Add("br"); + } catch (Exception e) { + Log.Error(e, $"Failed to load table for {Name}"); + } + + var dummyOtoSet = new UOtoSet(new OtoSet(), Location); + foreach (var phone in phonemes) { + foreach (var subbank in subbanks) { + var uOto = UOto.OfDummy(phone); + if (!otoMap.ContainsKey(uOto.Alias)) { + otos.Add(uOto); + otoMap.Add(uOto.Alias, uOto); + } else { + //Errors.Add($"oto conflict {Otos[oto.Alias].Set}/{oto.Alias} and {otoSet.Name}/{oto.Alias}"); + } + } + } + + if (Avatar != null && File.Exists(Avatar)) { + try { + using (var stream = new FileStream(Avatar, FileMode.Open, FileAccess.Read)) { + using (var memoryStream = new MemoryStream()) { + stream.CopyTo(memoryStream); + avatarData = memoryStream.ToArray(); + } + } + } catch (Exception e) { + avatarData = null; + Log.Error(e, "Failed to load avatar data."); + } + } else { + avatarData = null; + Log.Error("Avatar can't be found"); + } + } + + public override bool TryGetOto(string phoneme, out UOto oto) { + var parts = phoneme.Split(); + if (parts.All(p => phonemes.Contains(p))) { + oto = UOto.OfDummy(phoneme); + return true; + } + oto = null; + return false; + } + + public override IEnumerable GetSuggestions(string text) { + if (text != null) { + text = text.ToLowerInvariant().Replace(" ", ""); + } + bool all = string.IsNullOrEmpty(text); + return table.Keys + .Where(key => all || key.Contains(text)) + .Select(key => UOto.OfDummy(key)); + } + + public override byte[] LoadPortrait() { + return string.IsNullOrEmpty(Portrait) + ? null + : File.ReadAllBytes(Portrait); + } + + public override byte[] LoadSample() { + return string.IsNullOrEmpty(Sample) + ? null + : File.ReadAllBytes(Sample); + } + + private ulong Hash() { + using (var stream = new MemoryStream()) { + using (var writer = new BinaryWriter(stream)) { + writer.Write(Name); + return XXH64.DigestOf(stream.ToArray()); + } + } + } + } +} diff --git a/OpenUtau.Core/Render/Renderers.cs b/OpenUtau.Core/Render/Renderers.cs index 469a6ba84..7ce000077 100644 --- a/OpenUtau.Core/Render/Renderers.cs +++ b/OpenUtau.Core/Render/Renderers.cs @@ -15,12 +15,14 @@ public static class Renderers { public const string VOGEN = "VOGEN"; public const string DIFFSINGER = "DIFFSINGER"; public const string VOICEVOX = "VOICEVOX"; + public const string NEUTRINO = "NEUTRINO"; static readonly string[] classicRenderers = new[] { WORLDLINE_R, CLASSIC }; static readonly string[] enunuRenderers = new[] { ENUNU }; static readonly string[] vogenRenderers = new[] { VOGEN }; static readonly string[] diffSingerRenderers = new[] { DIFFSINGER }; static readonly string[] voicevoxRenderers = new[] { VOICEVOX }; + static readonly string[] neutrinoRenderers = new[] { NEUTRINO }; static readonly string[] noRenderers = new string[0]; public static string[] GetSupportedRenderers(USingerType singerType) { @@ -35,6 +37,8 @@ public static string[] GetSupportedRenderers(USingerType singerType) { return diffSingerRenderers; case USingerType.Voicevox: return voicevoxRenderers; + case USingerType.Neutrino: + return neutrinoRenderers; default: return noRenderers; } @@ -70,6 +74,8 @@ public static IRenderer CreateRenderer(string renderer) { return new DiffSinger.DiffSingerRenderer(); } else if (renderer == VOICEVOX) { return new Voicevox.VoicevoxRenderer(); + } else if (renderer == NEUTRINO) { + return new Neutrino.NeutrinoRenderer(); } return null; } diff --git a/OpenUtau.Core/Ustx/USinger.cs b/OpenUtau.Core/Ustx/USinger.cs index 9e952b457..5c9daf00d 100644 --- a/OpenUtau.Core/Ustx/USinger.cs +++ b/OpenUtau.Core/Ustx/USinger.cs @@ -193,7 +193,7 @@ public override string ToString() { } } - [Flags] public enum USingerType { Classic = 0x1, Enunu = 0x2, Vogen = 0x4, DiffSinger = 0x5, Voicevox = 0x6 } + [Flags] public enum USingerType { Classic = 0x1, Enunu = 0x2, Vogen = 0x4, DiffSinger = 0x5, Voicevox = 0x6, Neutrino = 0x7 } public static class SingerTypeUtils { public static Dictionary SingerTypeNames = new Dictionary(){ @@ -201,6 +201,7 @@ public static class SingerTypeUtils { {USingerType.Enunu, "enunu"}, {USingerType.DiffSinger, "diffsinger"}, {USingerType.Voicevox, "voicevox"}, + {USingerType.Neutrino, "neutrino"}, }; public static Dictionary SingerTypeFromName = new Dictionary(){ @@ -208,6 +209,7 @@ public static class SingerTypeUtils { {"enunu", USingerType.Enunu}, {"diffsinger", USingerType.DiffSinger}, {"voicevox", USingerType.Voicevox}, + {"neutrino", USingerType.Neutrino}, }; } diff --git a/OpenUtau.Core/Util/ProcessRunner.cs b/OpenUtau.Core/Util/ProcessRunner.cs index b599de30d..83cea9d89 100644 --- a/OpenUtau.Core/Util/ProcessRunner.cs +++ b/OpenUtau.Core/Util/ProcessRunner.cs @@ -1,4 +1,5 @@ using System; +using System.Globalization; using System.Diagnostics; using System.IO; using System.Threading; @@ -7,6 +8,15 @@ namespace OpenUtau.Core.Util { public static class ProcessRunner { public static bool DebugSwitch { get; set; } + + static string GetLanguageEnvironmentValue() { + var culture = CultureInfo.CurrentCulture; + if (culture == CultureInfo.InvariantCulture || string.IsNullOrWhiteSpace(culture.Name)) { + return "C.UTF-8"; + } + return culture.Name.Replace('-', '_') + ".UTF-8"; + } + public static void Run(string file, string args, ILogger logger, string workDir = null, int timeoutMs = 60000) { if (!File.Exists(file)) { throw new FileNotFoundException($"Executable {file} not found."); @@ -14,7 +24,7 @@ public static void Run(string file, string args, ILogger logger, string workDir var threadId = Thread.CurrentThread.ManagedThreadId; using (var proc = new Process()) { proc.StartInfo = new ProcessStartInfo(file, args) { - Environment = {{"LANG", "ja_JP.utf8"}}, + Environment = { { "LANG", "ja_JP.utf8" } }, UseShellExecute = false, RedirectStandardOutput = DebugSwitch, RedirectStandardError = true, @@ -54,5 +64,48 @@ public static void Run(string file, string args, ILogger logger, string workDir } } } + + public static Process StartBackground(string file, string args, ILogger logger, string workDir = null) { + if (!File.Exists(file)) { + throw new FileNotFoundException($"Executable {file} not found."); + } + + var threadId = Thread.CurrentThread.ManagedThreadId; + var proc = new Process(); + proc.StartInfo = new ProcessStartInfo(file, args) { + Environment = { { "LANG", GetLanguageEnvironmentValue() } }, + UseShellExecute = false, + RedirectStandardOutput = true, + RedirectStandardError = true, + CreateNoWindow = false, + WorkingDirectory = workDir, + }; + if (DebugSwitch) { + proc.OutputDataReceived += (o, e) => { + if (!string.IsNullOrEmpty(e.Data)) { + logger.Information($"ProcessRunner >>> [thread-{threadId}] {e.Data}"); + } + }; + } + proc.ErrorDataReceived += (o, e) => { + if (!string.IsNullOrEmpty(e.Data)) { + logger.Error($"ProcessRunner >>> [thread-{threadId}] {e.Data}"); + } + }; + proc.Start(); + if (DebugSwitch) { + proc.BeginOutputReadLine(); + } + proc.BeginErrorReadLine(); + proc.EnableRaisingEvents = true; + proc.Exited += (_, _) => { + try { + logger.Warning($"ProcessRunner >>> [thread-{threadId}] Exited with code {proc.ExitCode}"); + } catch (Exception e) { + logger.Error(e, $"ProcessRunner >>> [thread-{threadId}] Failed to read exit code"); + } + }; + return proc; + } } } diff --git a/OpenUtau.Core/Util/TomlData.cs b/OpenUtau.Core/Util/TomlData.cs new file mode 100644 index 000000000..1db10b561 --- /dev/null +++ b/OpenUtau.Core/Util/TomlData.cs @@ -0,0 +1,245 @@ +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; + +namespace OpenUtau.Core.Util { + public sealed class TomlData { + static readonly IReadOnlyDictionary emptySection = + new ReadOnlyDictionary(new Dictionary()); + + readonly IReadOnlyDictionary> sections; + + private TomlData(IReadOnlyDictionary> sections) { + this.sections = sections; + } + + public IReadOnlyCollection SectionNames => sections.Keys.ToArray(); + + /// + /// Loads a TOML file by reading it line by line. + /// Root-level keys are stored under the empty section name. + /// + public static TomlData Load(string filePath) { + if (string.IsNullOrWhiteSpace(filePath)) { + throw new ArgumentException("File path is required.", nameof(filePath)); + } + + var sections = new Dictionary>(StringComparer.OrdinalIgnoreCase) { + [string.Empty] = new Dictionary(StringComparer.OrdinalIgnoreCase), + }; + var currentSection = string.Empty; + var lineNumber = 0; + + foreach (var rawLine in File.ReadLines(filePath, Encoding.UTF8)) { + lineNumber++; + var line = StripComment(rawLine).Trim(); + if (string.IsNullOrEmpty(line)) { + continue; + } + if (line.StartsWith("[[", StringComparison.Ordinal) && line.EndsWith("]]", StringComparison.Ordinal)) { + throw new InvalidDataException($"Array of tables is not supported: {Path.GetFileName(filePath)}:{lineNumber}"); + } + if (line.StartsWith("[", StringComparison.Ordinal) && line.EndsWith("]", StringComparison.Ordinal)) { + currentSection = line[1..^1].Trim(); + if (string.IsNullOrEmpty(currentSection)) { + throw new InvalidDataException($"Section name is empty: {Path.GetFileName(filePath)}:{lineNumber}"); + } + if (!sections.ContainsKey(currentSection)) { + sections[currentSection] = new Dictionary(StringComparer.OrdinalIgnoreCase); + } + continue; + } + + var separatorIndex = FindKeyValueSeparator(line); + if (separatorIndex < 0) { + throw new InvalidDataException($"Invalid TOML key/value line: {Path.GetFileName(filePath)}:{lineNumber}"); + } + + var key = line[..separatorIndex].Trim(); + var valueText = line[(separatorIndex + 1)..].Trim(); + if (string.IsNullOrEmpty(key)) { + throw new InvalidDataException($"Key name is empty: {Path.GetFileName(filePath)}:{lineNumber}"); + } + + sections[currentSection][key] = ParseValue(valueText); + } + + return new TomlData(sections.ToDictionary( + pair => pair.Key, + pair => (IReadOnlyDictionary)new ReadOnlyDictionary(pair.Value), + StringComparer.OrdinalIgnoreCase)); + } + + /// + /// Gets all key/value pairs for a section. + /// Root-level keys use an empty section name. + /// + public bool TryGetSection(string section, out IReadOnlyDictionary values) { + ArgumentNullException.ThrowIfNull(section); + if (sections.TryGetValue(section, out var sectionValues)) { + values = sectionValues; + return true; + } + values = emptySection; + return false; + } + + /// + /// Gets a value from a section by key. + /// Root-level keys use an empty section name. + /// + public bool TryGetValue(string section, string key, out object? value) { + ArgumentNullException.ThrowIfNull(section); + ArgumentNullException.ThrowIfNull(key); + if (sections.TryGetValue(section, out var sectionValues) && sectionValues.TryGetValue(key, out value)) { + return true; + } + value = null; + return false; + } + + /// + /// Enumerates all TOML entries as section/key/value tuples. + /// Root-level keys use an empty section name. + /// + public IEnumerable<(string Section, string Key, object? Value)> EnumerateEntries() { + foreach (var section in sections) { + foreach (var item in section.Value) { + yield return (section.Key, item.Key, item.Value); + } + } + } + + private static int FindKeyValueSeparator(string line) { + var inDoubleQuote = false; + var inSingleQuote = false; + var escape = false; + for (var i = 0; i < line.Length; i++) { + var c = line[i]; + if (escape) { + escape = false; + continue; + } + if (c == '\\' && inDoubleQuote) { + escape = true; + continue; + } + if (c == '"' && !inSingleQuote) { + inDoubleQuote = !inDoubleQuote; + continue; + } + if (c == '\'' && !inDoubleQuote) { + inSingleQuote = !inSingleQuote; + continue; + } + if (c == '=' && !inDoubleQuote && !inSingleQuote) { + return i; + } + } + return -1; + } + + private static string StripComment(string line) { + var inDoubleQuote = false; + var inSingleQuote = false; + var escape = false; + for (var i = 0; i < line.Length; i++) { + var c = line[i]; + if (escape) { + escape = false; + continue; + } + if (c == '\\' && inDoubleQuote) { + escape = true; + continue; + } + if (c == '"' && !inSingleQuote) { + inDoubleQuote = !inDoubleQuote; + continue; + } + if (c == '\'' && !inDoubleQuote) { + inSingleQuote = !inSingleQuote; + continue; + } + if (c == '#' && !inDoubleQuote && !inSingleQuote) { + return line[..i]; + } + } + return line; + } + + private static object? ParseValue(string valueText) { + if (string.IsNullOrWhiteSpace(valueText)) { + return string.Empty; + } + if ((valueText.StartsWith('"') && valueText.EndsWith('"')) || + (valueText.StartsWith('\'') && valueText.EndsWith('\''))) { + return valueText[1..^1]; + } + if (valueText.StartsWith("[", StringComparison.Ordinal) && valueText.EndsWith("]", StringComparison.Ordinal)) { + return ParseArray(valueText[1..^1]); + } + if (bool.TryParse(valueText, out var boolValue)) { + return boolValue; + } + if (long.TryParse(valueText, NumberStyles.Integer, CultureInfo.InvariantCulture, out var longValue)) { + return longValue; + } + if (double.TryParse(valueText, NumberStyles.Float, CultureInfo.InvariantCulture, out var doubleValue)) { + return doubleValue; + } + if (DateTimeOffset.TryParse(valueText, CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out var dateTimeOffset)) { + return dateTimeOffset; + } + return valueText; + } + + private static object?[] ParseArray(string valueText) { + if (string.IsNullOrWhiteSpace(valueText)) { + return Array.Empty(); + } + var values = new List(); + var builder = new StringBuilder(); + var inDoubleQuote = false; + var inSingleQuote = false; + var escape = false; + for (var i = 0; i < valueText.Length; i++) { + var c = valueText[i]; + if (escape) { + builder.Append(c); + escape = false; + continue; + } + if (c == '\\' && inDoubleQuote) { + builder.Append(c); + escape = true; + continue; + } + if (c == '"' && !inSingleQuote) { + inDoubleQuote = !inDoubleQuote; + builder.Append(c); + continue; + } + if (c == '\'' && !inDoubleQuote) { + inSingleQuote = !inSingleQuote; + builder.Append(c); + continue; + } + if (c == ',' && !inDoubleQuote && !inSingleQuote) { + values.Add(ParseValue(builder.ToString().Trim())); + builder.Clear(); + continue; + } + builder.Append(c); + } + if (builder.Length > 0) { + values.Add(ParseValue(builder.ToString().Trim())); + } + return values.ToArray(); + } + } +} diff --git a/OpenUtau.Test/Core/Util/TomlDataTests.cs b/OpenUtau.Test/Core/Util/TomlDataTests.cs new file mode 100644 index 000000000..d65eac5f4 --- /dev/null +++ b/OpenUtau.Test/Core/Util/TomlDataTests.cs @@ -0,0 +1,54 @@ +using System; +using System.IO; +using OpenUtau.Core.Util; +using Xunit; + +namespace OpenUtau.Core.Util { + public class TomlDataTests { + [Fact] + public void WhenLoadingTomlThenItReadsRootKey() { + var filePath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}.toml"); + File.WriteAllText(filePath, "title = \"demo\"\n"); + + var toml = TomlData.Load(filePath); + + Assert.True(toml.TryGetValue(string.Empty, "title", out var value)); + Assert.Equal("demo", value); + } + + [Fact] + public void WhenLoadingTomlThenItReadsSectionKey() { + var filePath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}.toml"); + File.WriteAllText(filePath, "[singer]\nname = \"Tom\"\n"); + + var toml = TomlData.Load(filePath); + + Assert.True(toml.TryGetValue("singer", "name", out var value)); + Assert.Equal("Tom", value); + } + + [Fact] + public void WhenLoadingTomlThenItEnumeratesSectionKeyValue() { + var filePath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}.toml"); + File.WriteAllText(filePath, "[singer]\nname = \"Tom\"\n"); + + var toml = TomlData.Load(filePath); + + Assert.Contains(toml.EnumerateEntries(), entry => + entry.Section == "singer" && + entry.Key == "name" && + Equals(entry.Value, "Tom")); + } + + [Fact] + public void WhenLoadingTomlThenItReadsFileLineByLineWithComments() { + var filePath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}.toml"); + File.WriteAllText(filePath, "# comment\n[singer]\nname = \"Tom\" # inline\nage = 14\n"); + + var toml = TomlData.Load(filePath); + + Assert.True(toml.TryGetValue("singer", "age", out var value)); + Assert.Equal(14L, value); + } + } +} diff --git a/OpenUtau/ViewModels/SingersViewModel.cs b/OpenUtau/ViewModels/SingersViewModel.cs index 1b5a65751..42edc0d89 100644 --- a/OpenUtau/ViewModels/SingersViewModel.cs +++ b/OpenUtau/ViewModels/SingersViewModel.cs @@ -115,7 +115,7 @@ void AttachSinger() { } ).ToList(); var singerTypes = new string[] { - "utau", "enunu", "diffsinger", "voicevox" + "utau", "enunu", "diffsinger", "voicevox", "neutrino" }; setSingerTypeMenuItems = singerTypes.Select(singerType => new MenuItemViewModel((SingerTypeUtils.SingerTypeNames.TryGetValue(singer.SingerType, out var name) ? name : "") == singerType) { From 92c1f736c31a052256005cb1aef032a36ee86cdc Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Mon, 18 May 2026 18:21:57 +0900 Subject: [PATCH 03/18] Fix expected values for test assertions The test assertions within the MeasureForwardBackwardAreComputedPerBar method have been fixed. The expected values for the indices e0, e1, and e2 have been modified, and the following items were updated: - Forward Index (e10): Fixed expected values for e0[9], e1[9], and e2[9]. - Backward Index (e11): Fixed expected values for e0[10], e1[10], and e2[10]. - Forward Percent (e16): Fixed expected values for e1[15] and e2[15]. - Backward Percent (e17): Fixed expected values for e0[16] and e1[16]. As a result, the test's expected values have been updated to align with the specifications. --- OpenUtau.Test/Core/Util/HtsSpecTests.cs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/OpenUtau.Test/Core/Util/HtsSpecTests.cs b/OpenUtau.Test/Core/Util/HtsSpecTests.cs index 2b66cb46d..a54682df9 100644 --- a/OpenUtau.Test/Core/Util/HtsSpecTests.cs +++ b/OpenUtau.Test/Core/Util/HtsSpecTests.cs @@ -99,13 +99,13 @@ public void MeasureForwardBackwardAreComputedPerBar() { var e2 = n2.e(); // forward index (e10) - Assert.Equal("0", e0[9]); - Assert.Equal("1", e1[9]); - Assert.Equal("2", e2[9]); + Assert.Equal("1", e0[9]); + Assert.Equal("2", e1[9]); + Assert.Equal("3", e2[9]); // backward index (e11) - Assert.Equal("2", e0[10]); - Assert.Equal("1", e1[10]); - Assert.Equal("0", e2[10]); + Assert.Equal("3", e0[10]); + Assert.Equal("2", e1[10]); + Assert.Equal("1", e2[10]); // forward ms in centiseconds (e12) Assert.Equal("0", e0[11]); @@ -127,11 +127,11 @@ public void MeasureForwardBackwardAreComputedPerBar() { // forward percent (e16) Assert.Equal("0", e0[15]); - Assert.Equal("33", e1[15]); - Assert.Equal("66", e2[15]); + Assert.Equal("50", e1[15]); + Assert.Equal("100", e2[15]); // backward percent (e17) - Assert.Equal("66", e0[16]); - Assert.Equal("33", e1[16]); + Assert.Equal("100", e0[16]); + Assert.Equal("50", e1[16]); Assert.Equal("0", e2[16]); } From ebfe4e49d95bdc04a2f49d0fd3cd99ba787c6319 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Mon, 18 May 2026 18:51:14 +0900 Subject: [PATCH 04/18] Addressing Copilot AI review comments --- OpenUtau.Core/Hts/HTSLabelPhonemizer.cs | 32 ++++++++++--------- OpenUtau.Core/Hts/HTSLabelRenderer.cs | 9 ++++-- .../EnunuOnnx/EnunuOnnxPhonemizer.cs | 30 +++++++++-------- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/OpenUtau.Core/Hts/HTSLabelPhonemizer.cs b/OpenUtau.Core/Hts/HTSLabelPhonemizer.cs index d3c7f5f46..892e88d56 100644 --- a/OpenUtau.Core/Hts/HTSLabelPhonemizer.cs +++ b/OpenUtau.Core/Hts/HTSLabelPhonemizer.cs @@ -54,8 +54,7 @@ public override void SetSinger(USinger singer) { string rootPath; if (File.Exists(Path.Join(singer.Location, "enunux", "enuconfig.yaml"))) { rootPath = Path.Combine(singer.Location, "enunux"); - } - if (File.Exists(Path.Join(singer.Location, "enuconfig.yaml"))) { + }else if (File.Exists(Path.Join(singer.Location, "enuconfig.yaml"))) { rootPath = Path.Combine(singer.Location, "enunux"); } else { rootPath = singer.Location; @@ -63,7 +62,7 @@ public override void SetSinger(USinger singer) { //Load g2p from enunux.yaml //g2p dict should be load after enunu dict try { - g2p = LoadG2p(singer.Location); + g2p = LoadG2p(rootPath); } catch (Exception e) { Log.Error(e, "failed to load g2p dictionary"); return; @@ -176,7 +175,7 @@ public override void SetUp(Note[][] notes, UProject project, UTrack track) { if (existSymbol) { splitFlag = false; continue; - } else if (existSymbol && !splitFlag) { + } else if (!existSymbol && !splitFlag) { splitFlag = true; continue; } @@ -659,17 +658,20 @@ protected override void ProcessPart(Note[][] phrase) { } public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { - if (!partResult.TryGetValue(notes[0].position, out var phonemes)) { - throw new Exception("error"); - } - return new Result { - phonemes = phonemes - .Select((tu) => new Phoneme() { - phoneme = tu.Item1, - position = tu.Item2, - }) - .ToArray(), - }; + if (partResult.TryGetValue(notes[0].position, out var phonemes)) { + return new Result { + phonemes = phonemes + .Select((tu) => new Phoneme() { + phoneme = tu.Item1, + position = tu.Item2, + }) + .ToArray(), + }; + } + if (SetUpException != null) { + throw new Exception("Phonemizer failed to process.", SetUpException); + } + throw new Exception("Part result not found"); } } } diff --git a/OpenUtau.Core/Hts/HTSLabelRenderer.cs b/OpenUtau.Core/Hts/HTSLabelRenderer.cs index 986505de6..26bf021a3 100644 --- a/OpenUtau.Core/Hts/HTSLabelRenderer.cs +++ b/OpenUtau.Core/Hts/HTSLabelRenderer.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Runtime.CompilerServices; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -66,6 +67,7 @@ public virtual void SetUp() { phoneDict.Add("-", new string[] { "pau" }); phoneDict.Add("SP", new string[] { "pau" }); phoneDict.Add("AP", new string[] { "br" }); + LoadDict(monoScorePath, Encoding.UTF8); g2p = LoadG2p(); } @@ -156,7 +158,7 @@ protected IG2p LoadG2p() { if (existSymbol) { splitFlag = false; continue; - } else if (existSymbol && !splitFlag) { + } else if (!existSymbol && !splitFlag) { splitFlag = true; continue; } @@ -383,11 +385,12 @@ public void ProcessPart(RenderPhrase phrase) { continue; } } + int noteCount = tuples.Count; for (int i = 0; i < tuples.Count; i++) { var htsNote = tuples[i].Item1; htsNotes.Add(htsNote); - htsNote.index = i; - htsNote.indexBackwards = htsNotes.Count - i; + htsNote.index = i + 1; + htsNote.indexBackwards = noteCount - i; htsNote.sentenceDurMs = sentenceDurMs; htsNote.sentenceDurTicks = sentenceDurTicks; var tmpPhonemes = HTSNoteToPhonemes(htsNote); diff --git a/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs index d9c664bcf..d3f3a1ccc 100644 --- a/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnunuOnnx/EnunuOnnxPhonemizer.cs @@ -452,31 +452,35 @@ protected virtual HTSNote[] MakeSyllables(Note[] inputNotes, int startTick) { HTSPhoneme[] HTSNoteToPhonemes(HTSNote htsNote) { var htsPhonemes = htsNote.symbols.Select(x => new HTSPhoneme(x, htsNote)).ToArray(); - foreach (int i in Enumerable.Range(0, htsPhonemes.Length)) { + foreach (var i in Enumerable.Range(0, htsPhonemes.Length)) { htsPhonemes[i].type = GetPhonemeType(htsPhonemes[i].symbol); htsPhonemes[i].position = i + 1; htsPhonemes[i].position_backward = htsPhonemes.Length - i; } - foreach (int i in Enumerable.Range(0, htsPhonemes.Length)) { + foreach (var i in Enumerable.Range(0, htsPhonemes.Length)) { if (htsPhonemes[i].type.Equals("c")) { - int next = i + 1; - if (next < htsPhonemes.Length) { - if (htsPhonemes[next].type.Equals("v")) { - htsPhonemes[i].next_vowel_distance = 1; + var prev = i - 1; + if (prev >= 0) { + if (htsPhonemes[prev].type.Equals("v")) { + htsPhonemes[i].prev_vowel_distance = 1; + } else if (htsPhonemes[prev].prev_vowel_distance > 0) { + htsPhonemes[i].prev_vowel_distance = htsPhonemes[prev].prev_vowel_distance + 1; } else { - htsPhonemes[i].next_vowel_distance = htsPhonemes[next].next_vowel_distance + 1; + htsPhonemes[i].prev_vowel_distance = 0; } } } } - for (int i = htsPhonemes.Length - 1; i > 0; --i) { + for (var i = htsPhonemes.Length - 1; i >= 0; --i) { if (htsPhonemes[i].type.Equals("c")) { - int prev = i - 1; - if (prev >= 0) { - if (htsPhonemes[prev].type.Equals("v")) { - htsPhonemes[i].prev_vowel_distance = 1; + var next = i + 1; + if (next < htsPhonemes.Length) { + if (htsPhonemes[next].type.Equals("v")) { + htsPhonemes[i].next_vowel_distance = 1; + } else if (htsPhonemes[next].next_vowel_distance > 0) { + htsPhonemes[i].next_vowel_distance = htsPhonemes[next].next_vowel_distance + 1; } else { - htsPhonemes[i].prev_vowel_distance = htsPhonemes[prev].prev_vowel_distance + 1; + htsPhonemes[i].next_vowel_distance = 0; } } } From 53cac3ff32cd28663e609bdd479b1c9f0e791457 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Mon, 18 May 2026 19:37:11 +0900 Subject: [PATCH 05/18] Addressing Copilot AI review comments --- OpenUtau.Core/Util/HTS.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/OpenUtau.Core/Util/HTS.cs b/OpenUtau.Core/Util/HTS.cs index 790f6bf65..cb55436fc 100644 --- a/OpenUtau.Core/Util/HTS.cs +++ b/OpenUtau.Core/Util/HTS.cs @@ -40,8 +40,7 @@ public static string GetToneName(int noteNum) { } public static string GetOctaveNum(int noteNum) { - NameInOctave.TryGetValue(KeysInOctave[noteNum % 12].ToString(), out int num); - return noteNum < 0 ? string.Empty : num.ToString(); + return noteNum < 0 ? string.Empty : (noteNum / 12 - 1).ToString(); } //return -1 if error From 54f77a36da1ec7b26003d87dde13d8620f11861b Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Mon, 18 May 2026 19:55:59 +0900 Subject: [PATCH 06/18] Changing from a design that shares logic in the base class to implementing everything in the subclasses Change HTSLabelRenderer: SetUp method to abstract The SetUp method has been changed from virtual to abstract. As a result, all subclasses are now required to implement SetUp. The original logic within the SetUp method (initialization of phoneDict, language settings, dictionary loading, etc.) has been removed, and these responsibilities are now delegated to the subclasses. --- OpenUtau.Core/Hts/HTSLabelRenderer.cs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/OpenUtau.Core/Hts/HTSLabelRenderer.cs b/OpenUtau.Core/Hts/HTSLabelRenderer.cs index 26bf021a3..b2d0df3ef 100644 --- a/OpenUtau.Core/Hts/HTSLabelRenderer.cs +++ b/OpenUtau.Core/Hts/HTSLabelRenderer.cs @@ -59,17 +59,7 @@ public abstract class HTSLabelRenderer : IRenderer { protected string monoTimingPath = string.Empty; protected string fullTimingPath = string.Empty; - public virtual void SetUp() { - phoneDict.Clear(); - lang = "JPN";//TODO: use singer.language - // Lyrics often handled in OpenUtau - phoneDict.Add("R", new string[] { "pau" }); - phoneDict.Add("-", new string[] { "pau" }); - phoneDict.Add("SP", new string[] { "pau" }); - phoneDict.Add("AP", new string[] { "br" }); - LoadDict(monoScorePath, Encoding.UTF8); - g2p = LoadG2p(); - } + public abstract void SetUp(); protected virtual void LoadDict(string path, Encoding encoding) { if (path.EndsWith(".conf")) { From ea527ac41e0d8362fe57e19b85e64ebba2bf437e Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Tue, 19 May 2026 20:33:24 +0900 Subject: [PATCH 07/18] =?UTF-8?q?Unix=E3=81=A7=E5=AE=9F=E8=A1=8C=E5=8F=AF?= =?UTF-8?q?=E8=83=BD=E3=81=AA=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB=E3=83=91?= =?UTF-8?q?=E3=82=B9=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Neutrino/NeutrinoLabelPhonemizer.cs | 6 +-- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 44 ++++++++++--------- OpenUtau.Core/Neutrino/NeutrinoSinger.cs | 5 +-- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs index b605e968f..ef7a73594 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs @@ -40,8 +40,8 @@ public override void SetSinger(USinger singer) { //Load Dictionary try { phoneDict.Clear(); - LoadDict(Path.Join(Path.Join(basePath, @"settings\dic"), confPath), singer.TextFileEncoding); - LoadDict(Path.Join(Path.Join(basePath, @"settings\dic"), tablePath), singer.TextFileEncoding); + LoadDict(Path.Join(Path.Join(basePath, @"settings/dic"), confPath), singer.TextFileEncoding); + LoadDict(Path.Join(Path.Join(basePath, @"settings/dic"), tablePath), singer.TextFileEncoding); // Lyrics often handled in OpenUtau phoneDict.Add("R",new string[] { "pau" }); phoneDict.Add("-", new string[] { "pau" }); @@ -148,7 +148,7 @@ protected override void SendScore(Note[][] phrase) { string melspecPath = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.melspec"); string wavPath = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.wav"); //string PhraseList = Path.Join(htstmpPath, $"{voicebankNameHash}_phraselist.txt"); - string modelDir = this.singer.Location+"\\"; + string modelDir = this.singer.Location+"/"; var attr = phrase[0][0].phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default; int toneShift = attr.toneShift; int numThreads = Preferences.Default.NumRenderThreads; diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index c61ff8a83..72eb07ed6 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -6,6 +6,8 @@ using System.Threading.Tasks; using K4os.Hash.xxHash; using NAudio.Wave; +using NWaves.Operations; +using NWaves.Signals; using OpenUtau.Core.Format; using OpenUtau.Core.Hts; using OpenUtau.Core.Render; @@ -90,8 +92,8 @@ public override void SetUp() { //Load Dictionary try { phoneDict.Clear(); - LoadDict(Path.Join(Path.Join(basePath, @".\settings\dic"), confPath), singer.TextFileEncoding); - LoadDict(Path.Join(Path.Join(basePath, @".\settings\dic"), tablePath), singer.TextFileEncoding); + LoadDict(Path.Join(Path.Join(basePath, @"./settings/dic"), confPath), singer.TextFileEncoding); + LoadDict(Path.Join(Path.Join(basePath, @"./settings/dic"), tablePath), singer.TextFileEncoding); // Lyrics often handled in OpenUtau phoneDict.Add("R", new string[] { "pau" }); phoneDict.Add("-", new string[] { "pau" }); @@ -104,17 +106,17 @@ public override void SetUp() { } LoadG2p(); if (OS.IsWindows()) { - NeutrinoExe = Path.Join(basePath, @".\bin", "NEUTRINO.exe"); - NeutrinoClientExe = Path.Join(basePath, @".\bin", "neutrino_client.exe"); - NeutrinoServerExe = Path.Join(basePath, @".\bin", "neutrino_server.exe"); - NsfExe = Path.Join(basePath, @".\bin", "NSF.exe"); - WorldExe = Path.Join(basePath, @".\bin", "WORLD.exe"); - VocoderClientExe = Path.Join(basePath, @".\bin", "vocoder_client.exe"); - VocoderServerExe = Path.Join(basePath, @".\bin", "vocoder_server.exe"); + NeutrinoExe = Path.Join(basePath, @"./bin", "NEUTRINO.exe"); + NeutrinoClientExe = Path.Join(basePath, @"./bin", "neutrino_client.exe"); + NeutrinoServerExe = Path.Join(basePath, @"./bin", "neutrino_server.exe"); + NsfExe = Path.Join(basePath, @"./bin", "NSF.exe"); + WorldExe = Path.Join(basePath, @"./bin", "WORLD.exe"); + VocoderClientExe = Path.Join(basePath, @"./bin", "vocoder_client.exe"); + VocoderServerExe = Path.Join(basePath, @"./bin", "vocoder_server.exe"); } else if (OS.IsMacOS() || OS.IsLinux()) { - NeutrinoExe = Path.Join(basePath, @".\bin", "NEUTRINO"); - NsfExe = Path.Join(basePath, @".\bin", "NSF"); - WorldExe = Path.Join(basePath, @".\bin", "WORLD"); + NeutrinoExe = Path.Join(basePath, @"./bin", "NEUTRINO"); + NsfExe = Path.Join(basePath, @"./bin", "NSF"); + WorldExe = Path.Join(basePath, @"./bin", "WORLD"); } else { throw new NotSupportedException("Platform not supported."); } @@ -188,7 +190,7 @@ public override Task Render(RenderPhrase phrase, Progress progress string bapPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.bap"); fullScorePath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}_full_score.lab"); monoTimingPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}_mono_timing.lab"); - string modelDir = this.singer.Location + "\\"; + string modelDir = this.singer.Location + "/"; int toneShift = phrase.phones[0] != null ? phrase.phones[0].toneShift : 0; int numThreads = Preferences.Default.NumRenderThreads; if (!File.Exists(fullScorePath) && !File.Exists(monoTimingPath)) { @@ -249,8 +251,8 @@ public override Task Render(RenderPhrase phrase, Progress progress result.samples = Wave.GetSamples(waveStream.ToSampleProvider()); } Wave.CorrectSampleScale(result.samples); - var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); - signal = NWaves.Operations.Operation.Resample(signal, 44100); + var signal = new DiscreteSignal(sampleRate, result.samples); + signal = Operation.Resample(signal, 44100); var source = new WaveSource(0, 0, 0, 1); source.SetSamples(result.samples); WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0)); @@ -281,8 +283,8 @@ public override Task Render(RenderPhrase phrase, Progress progress result.samples = Wave.GetSamples(waveStream.ToSampleProvider()); } Wave.CorrectSampleScale(result.samples); - var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); - signal = NWaves.Operations.Operation.Resample(signal, 44100); + var signal = new DiscreteSignal(sampleRate, result.samples); + signal = Operation.Resample(signal, 44100); var source = new WaveSource(0, 0, 0, 1); source.SetSamples(result.samples); WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0)); @@ -314,8 +316,8 @@ public override Task Render(RenderPhrase phrase, Progress progress gender, tension, breathiness, voicing); result.samples = samples.Select(d => (float)d).ToArray(); Wave.CorrectSampleScale(result.samples); - var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); - signal = NWaves.Operations.Operation.Resample(signal, 44100); + var signal = new DiscreteSignal(sampleRate, result.samples); + signal = Operation.Resample(signal, 44100); result.samples = signal.Samples; var source = new WaveSource(0, 0, 0, 1); source.SetSamples(result.samples); @@ -376,8 +378,8 @@ public override Task Render(RenderPhrase phrase, Progress progress result.samples = Wave.GetSamples(waveStream.ToSampleProvider()); } Wave.CorrectSampleScale(result.samples); - var signal = new NWaves.Signals.DiscreteSignal(sampleRate, result.samples); - signal = NWaves.Operations.Operation.Resample(signal, 44100); + var signal = new DiscreteSignal(sampleRate, result.samples); + signal = Operation.Resample(signal, 44100); var source = new WaveSource(0, 0, 0, 1); source.SetSamples(result.samples); WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0)); diff --git a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs index c8ce69777..323eebfb7 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs @@ -8,7 +8,6 @@ using OpenUtau.Core.Ustx; using OpenUtau.Core.Util; using Serilog; -using static OpenUtau.Api.Phonemizer; namespace OpenUtau.Core.Neutrino { public class NeutrinoSinger : USinger { @@ -114,7 +113,7 @@ void Load() { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); } } - var tablePath = Path.Join(Path.Join(basePath, @".\settings\dic"), "japanese.utf_8.table"); + var tablePath = Path.Join(Path.Join(basePath, @"./settings/dic"), "japanese.utf_8.table"); foreach (var line in File.ReadAllLines(tablePath)) { if (line.Contains("#")) { continue; @@ -125,7 +124,7 @@ void Load() { phonemes.Add(phoneme); } } - var confPath = Path.Join(Path.Join(basePath, @".\settings\dic"), "japanese.utf_8.conf"); + var confPath = Path.Join(Path.Join(basePath, @"./settings/dic"), "japanese.utf_8.conf"); foreach (var line in File.ReadAllLines(confPath)) { if (line.Contains('=')) { var lineSplit = line.Split("="); From 6b02dd42d912dd0690d7a1a71a906587398233b7 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Tue, 19 May 2026 20:36:22 +0900 Subject: [PATCH 08/18] Change the voicebank version information to be retrieved from the Toml file. --- OpenUtau.Core/Neutrino/NeutrinoSinger.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs index c8ce69777..add8c54df 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs @@ -21,7 +21,7 @@ public class NeutrinoSinger : USinger { public override string Voice => voicebank.Voice; public override string Location => Path.GetDirectoryName(voicebank.File); public override string Web => voicebank.Web; - public override string Version => voicebank.Version; + public override string Version => singerVersion; public override string OtherInfo => voicebank.OtherInfo; public override IList Errors => errors; public override string Avatar => voicebank.Image == null ? null : Path.Combine(Location, voicebank.Image); From 9b490266220532e350091d76d92f34f9ab040bc8 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Wed, 20 May 2026 12:11:02 +0900 Subject: [PATCH 09/18] Added processing to consider toneShift in pitch calculation Added a process to adjust the pitch based on the toneShift value of phrase.phones[0] when generating RenderPitchResult. This allows the pitch to be shifted in units of 12-interval semitones, ensuring accurate pitch calculation. --- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index 72eb07ed6..db2beafa1 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -505,8 +505,9 @@ public override RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) { f0 = f0S.GetSmoothenedF0List(f0.ToList()).ToArray(); } + int toneShift = phrase.phones[0] != null ? phrase.phones[0].toneShift : 0; result = new RenderPitchResult() { - tones = f0.Select(f => (float)MusicMath.FreqToTone(f)).ToArray(), + tones = f0.Select(f => (float)MusicMath.FreqToTone(f * Math.Pow(2, ((toneShift * -1) / 12d)))).ToArray(), }; result.ticks = new float[result.tones.Length]; var layout = Layout(phrase); From 6e55aedb7e31b2df67b5c665b58d08915c8b5c0c Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Wed, 20 May 2026 13:19:11 +0900 Subject: [PATCH 10/18] Refactoring of log improvements and environment variable settings In NeutrinoServerLauncher.cs, I changed the log output to string interpolation to improve readability. In ProcessRunner.cs, I removed the GetLanguageEnvironmentValue method and set the LANG environment variable to a fixed value of "ja_JP.utf8". Additionally, I updated the code to dynamically control RedirectStandardOutput based on the DebugSwitch. --- OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs | 4 ++-- OpenUtau.Core/Util/ProcessRunner.cs | 12 ++---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs b/OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs index bfbb88ec1..cb5db1b94 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoServerLauncher.cs @@ -25,7 +25,7 @@ public static void EnsureStarted(string serverExe, int? serverPort = 12345, stri serverExe = Path.GetFullPath(serverExe); var serverName = Path.GetFileNameWithoutExtension(serverExe); if (Process.GetProcessesByName(serverName).Any() || IsServerReady(host, serverPort)) { - Log.Information("Background server already running: {ServerExe}", serverExe); + Log.Information($"Background server already running: {serverExe}"); return; } @@ -54,7 +54,7 @@ public static void EnsureStarted(string serverExe, int? serverPort = 12345, stri }; serverProcesses[serverExe] = startedProcess; WaitForServerReady(host, serverPort); - Log.Information("Started background server: {ServerExe}", serverExe); + Log.Information($"Started background server: {serverExe}"); } } diff --git a/OpenUtau.Core/Util/ProcessRunner.cs b/OpenUtau.Core/Util/ProcessRunner.cs index 83cea9d89..e509c4426 100644 --- a/OpenUtau.Core/Util/ProcessRunner.cs +++ b/OpenUtau.Core/Util/ProcessRunner.cs @@ -9,14 +9,6 @@ namespace OpenUtau.Core.Util { public static class ProcessRunner { public static bool DebugSwitch { get; set; } - static string GetLanguageEnvironmentValue() { - var culture = CultureInfo.CurrentCulture; - if (culture == CultureInfo.InvariantCulture || string.IsNullOrWhiteSpace(culture.Name)) { - return "C.UTF-8"; - } - return culture.Name.Replace('-', '_') + ".UTF-8"; - } - public static void Run(string file, string args, ILogger logger, string workDir = null, int timeoutMs = 60000) { if (!File.Exists(file)) { throw new FileNotFoundException($"Executable {file} not found."); @@ -73,9 +65,9 @@ public static Process StartBackground(string file, string args, ILogger logger, var threadId = Thread.CurrentThread.ManagedThreadId; var proc = new Process(); proc.StartInfo = new ProcessStartInfo(file, args) { - Environment = { { "LANG", GetLanguageEnvironmentValue() } }, + Environment = { { "LANG", "ja_JP.utf8" } }, UseShellExecute = false, - RedirectStandardOutput = true, + RedirectStandardOutput = DebugSwitch, RedirectStandardError = true, CreateNoWindow = false, WorkingDirectory = workDir, From 930782a911ae0405bdc9561b520396f24c3d528d Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Wed, 20 May 2026 13:50:51 +0900 Subject: [PATCH 11/18] Unifying Path Handling Path handling has been unified into a platform-independent format using Path.Join. This removes Windows-style path separators and improves code readability and portability. --- .../Neutrino/NeutrinoLabelPhonemizer.cs | 26 ++++++------------- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 24 ++++++++--------- OpenUtau.Core/Neutrino/NeutrinoSinger.cs | 4 +-- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs index ef7a73594..5958713ab 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs @@ -40,10 +40,10 @@ public override void SetSinger(USinger singer) { //Load Dictionary try { phoneDict.Clear(); - LoadDict(Path.Join(Path.Join(basePath, @"settings/dic"), confPath), singer.TextFileEncoding); - LoadDict(Path.Join(Path.Join(basePath, @"settings/dic"), tablePath), singer.TextFileEncoding); + LoadDict(Path.Join(basePath, "settings", "dic", confPath), singer.TextFileEncoding); + LoadDict(Path.Join(basePath, "settings", "dic", tablePath), singer.TextFileEncoding); // Lyrics often handled in OpenUtau - phoneDict.Add("R",new string[] { "pau" }); + phoneDict.Add("R", new string[] { "pau" }); phoneDict.Add("-", new string[] { "pau" }); phoneDict.Add("SP", new string[] { "pau" }); phoneDict.Add("AP", new string[] { "br" }); @@ -81,7 +81,7 @@ protected IG2p LoadG2p() { } if (!consonants.Contains(phoneme)) { builder.AddSymbol(phoneme, true); - }else { + } else { builder.AddSymbol(phoneme, false); } } @@ -111,7 +111,7 @@ protected override Note[][] PhraseAdjustments(Note[][] phrese) { var lyric = phrese[i][0].lyric; if (phoneDict["MACRON"].Contains(lyric) && (i > 0)) { if (g2p.IsValidSymbol(lyric)) { - var vowel = g2p.Query(phrese[i-1][0].lyric).FirstOrDefault(phoneme => vowels.Contains(phoneme)); + var vowel = g2p.Query(phrese[i - 1][0].lyric).FirstOrDefault(phoneme => vowels.Contains(phoneme)); phrese[i][0].lyric = vowel; } } @@ -121,7 +121,7 @@ protected override Note[][] PhraseAdjustments(Note[][] phrese) { protected override HTSNote CustomHTSNoteContext(HTSNote htsNote, Note note) { var fixs = GetPrefixAndSuffix(note); - if(!htsNote.isRest && !htsNote.isSlur) { + if (!htsNote.isRest && !htsNote.isSlur) { htsNote.langDependent = "0"; // no macron if (macronLyrics.Contains(note.lyric)) { htsNote.langDependent = "1"; // macron @@ -140,27 +140,17 @@ protected override HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes protected override void SendScore(Note[][] phrase) { if (this.singer.singerVersion == null) { - return; + return; } if (File.Exists(fullScorePath) && !File.Exists(monoTimingPath)) { var voicebankNameHash = $"{this.singer.voicebankNameHash:x16}"; string f0Path = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.f0"); string melspecPath = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.melspec"); string wavPath = Path.Join(htstmpPath, $"{voicebankNameHash}_tmp.wav"); - //string PhraseList = Path.Join(htstmpPath, $"{voicebankNameHash}_phraselist.txt"); - string modelDir = this.singer.Location+"/"; + string modelDir = this.singer.Location + "/"; var attr = phrase[0][0].phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default; int toneShift = attr.toneShift; int numThreads = Preferences.Default.NumRenderThreads; - //int gpuMode = -1; - //switch (Preferences.Default.OnnxRunner) { - // case "directml": - // gpuMode = Preferences.Default.OnnxGpu; - // break; - // default: - // gpuMode = -1; - // break; - //} string ArgParam = string.Empty; if (this.singer.singerVersion.StartsWith("v2.7")) { ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -a -k {toneShift} -d 3 -n 1 -p {numThreads} -m -t"; diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index db2beafa1..d185276fe 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -92,8 +92,8 @@ public override void SetUp() { //Load Dictionary try { phoneDict.Clear(); - LoadDict(Path.Join(Path.Join(basePath, @"./settings/dic"), confPath), singer.TextFileEncoding); - LoadDict(Path.Join(Path.Join(basePath, @"./settings/dic"), tablePath), singer.TextFileEncoding); + LoadDict(Path.Join(basePath, "settings", "dic", confPath), singer.TextFileEncoding); + LoadDict(Path.Join(basePath, "settings", "dic", tablePath), singer.TextFileEncoding); // Lyrics often handled in OpenUtau phoneDict.Add("R", new string[] { "pau" }); phoneDict.Add("-", new string[] { "pau" }); @@ -106,17 +106,17 @@ public override void SetUp() { } LoadG2p(); if (OS.IsWindows()) { - NeutrinoExe = Path.Join(basePath, @"./bin", "NEUTRINO.exe"); - NeutrinoClientExe = Path.Join(basePath, @"./bin", "neutrino_client.exe"); - NeutrinoServerExe = Path.Join(basePath, @"./bin", "neutrino_server.exe"); - NsfExe = Path.Join(basePath, @"./bin", "NSF.exe"); - WorldExe = Path.Join(basePath, @"./bin", "WORLD.exe"); - VocoderClientExe = Path.Join(basePath, @"./bin", "vocoder_client.exe"); - VocoderServerExe = Path.Join(basePath, @"./bin", "vocoder_server.exe"); + NeutrinoExe = Path.Join(basePath, "bin", "NEUTRINO.exe"); + NeutrinoClientExe = Path.Join(basePath, "bin", "neutrino_client.exe"); + NeutrinoServerExe = Path.Join(basePath, "bin", "neutrino_server.exe"); + NsfExe = Path.Join(basePath, "bin", "NSF.exe"); + WorldExe = Path.Join(basePath, "bin", "WORLD.exe"); + VocoderClientExe = Path.Join(basePath, "bin", "vocoder_client.exe"); + VocoderServerExe = Path.Join(basePath, "bin", "vocoder_server.exe"); } else if (OS.IsMacOS() || OS.IsLinux()) { - NeutrinoExe = Path.Join(basePath, @"./bin", "NEUTRINO"); - NsfExe = Path.Join(basePath, @"./bin", "NSF"); - WorldExe = Path.Join(basePath, @"./bin", "WORLD"); + NeutrinoExe = Path.Join(basePath, "bin", "NEUTRINO"); + NsfExe = Path.Join(basePath, "bin", "NSF"); + WorldExe = Path.Join(basePath, "bin", "WORLD"); } else { throw new NotSupportedException("Platform not supported."); } diff --git a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs index 7021c3cde..e912eb59e 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs @@ -113,7 +113,7 @@ void Load() { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); } } - var tablePath = Path.Join(Path.Join(basePath, @"./settings/dic"), "japanese.utf_8.table"); + var tablePath = Path.Join(basePath, "settings", "dic", "japanese.utf_8.table"); foreach (var line in File.ReadAllLines(tablePath)) { if (line.Contains("#")) { continue; @@ -124,7 +124,7 @@ void Load() { phonemes.Add(phoneme); } } - var confPath = Path.Join(Path.Join(basePath, @"./settings/dic"), "japanese.utf_8.conf"); + var confPath = Path.Join(basePath, "settings", "dic", "japanese.utf_8.conf"); foreach (var line in File.ReadAllLines(confPath)) { if (line.Contains('=')) { var lineSplit = line.Split("="); From 536ccd5fc10ee6b5e77812f43e3af1ca7d5cc8cf Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Wed, 20 May 2026 18:50:41 +0900 Subject: [PATCH 12/18] Address review comments from Copilot --- OpenUtau.Core/Hts/HTSLabelRenderer.cs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/OpenUtau.Core/Hts/HTSLabelRenderer.cs b/OpenUtau.Core/Hts/HTSLabelRenderer.cs index b2d0df3ef..839786e0d 100644 --- a/OpenUtau.Core/Hts/HTSLabelRenderer.cs +++ b/OpenUtau.Core/Hts/HTSLabelRenderer.cs @@ -290,6 +290,17 @@ public void ProcessPart(RenderPhrase phrase) { int startTick = phrase.position; int endTick = phrase.position + phrase.duration; + // パディングを小節長で設定(開始・終了ともに1小節) + var sigStart = timeAxis.TimeSignatureAtTick(startTick); + var bpmStart = timeAxis.GetBpmAtTick(startTick); + var barLenMsStart = (int)Math.Round(60000.0 / bpmStart * sigStart.beatPerBar); + var barLenTicksStart = timeAxis.MsPosToTickPos(barLenMsStart); + + var sigEnd = timeAxis.TimeSignatureAtTick(endTick); + var bpmEnd = timeAxis.GetBpmAtTick(endTick); + var barLenMsEnd = (int)Math.Round(60000.0 / bpmEnd * sigEnd.beatPerBar); + var barLenTicksEnd = timeAxis.MsPosToTickPos(barLenMsEnd); + // 文全体の長さ(開始1小節 + 本体 + 終了1小節) double sentenceDurMs = headMs + phrase.endMs - phrase.positionMs + tailMs; int sentenceDurTicks = barLenTicksStart + (endTick - startTick) + barLenTicksEnd; @@ -440,7 +451,7 @@ public void ProcessPart(RenderPhrase phrase) { File.WriteAllLines(monoTimingPath, monoLabels_.Select(x => x.ToString())); } catch (Exception e) { Log.Error(e.ToString()); - throw e; + throw; } } From 60ca0370c1dda3b452df40b905ec116b7c7f2ed0 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Wed, 20 May 2026 19:08:19 +0900 Subject: [PATCH 13/18] Address review comments from Copilot --- OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs | 6 ++++-- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs index 5958713ab..66a1edd06 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs @@ -10,7 +10,7 @@ namespace OpenUtau.Core.Neutrino { [Phonemizer("Neutrino Label Phonemizer", "NEUTRINO")] - public class Neutrino : HTSLabelPhonemizer { + public class NeutrinoLabelPhonemizer : HTSLabelPhonemizer { string NeutrinoExe = string.Empty; string NeutrinoClientExe = string.Empty; string NeutrinoServerExe = string.Empty; @@ -112,7 +112,9 @@ protected override Note[][] PhraseAdjustments(Note[][] phrese) { if (phoneDict["MACRON"].Contains(lyric) && (i > 0)) { if (g2p.IsValidSymbol(lyric)) { var vowel = g2p.Query(phrese[i - 1][0].lyric).FirstOrDefault(phoneme => vowels.Contains(phoneme)); - phrese[i][0].lyric = vowel; + if (!string.IsNullOrEmpty(vowel)) { + phrese[i][0].lyric = vowel; + } } } } diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index d185276fe..24d43eea2 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -104,7 +104,6 @@ public override void SetUp() { Log.Error(e, $"failed to load dictionary from {tablePath}"); return; } - LoadG2p(); if (OS.IsWindows()) { NeutrinoExe = Path.Join(basePath, "bin", "NEUTRINO.exe"); NeutrinoClientExe = Path.Join(basePath, "bin", "neutrino_client.exe"); From 06dd804c9f33b99e0cb8feb7b1f51ed37b2cc8ac Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Thu, 21 May 2026 10:25:33 +0900 Subject: [PATCH 14/18] Improvements to Vowel Extension Note Processing and Timing Calculations The conditions for the IsSyllableVowelExtensionNote method have been expanded to recognize lyrics starting with specific symbols as vowel extension notes. Additionally, the calculation of phonemeDuration within the ProcessPart method has been removed, and a logic to directly calculate startMs and endMs has been introduced. In phoneme timing calculations, new logic considering headMs and phrase.positionMs has been added, and a process to adjust the end time of existing monoLabels has been implemented. This prevents overlaps and inconsistencies, improving the accuracy of timing. Furthermore, the startMs of the monoLabel at the end of a phrase has been changed to sentenceDurMs - tailMs to ensure that the timing of the entire phrase is accurately reflected. --- OpenUtau.Core/Hts/HTSLabelRenderer.cs | 41 ++++++++++++++++++--------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/OpenUtau.Core/Hts/HTSLabelRenderer.cs b/OpenUtau.Core/Hts/HTSLabelRenderer.cs index 839786e0d..039f62d1a 100644 --- a/OpenUtau.Core/Hts/HTSLabelRenderer.cs +++ b/OpenUtau.Core/Hts/HTSLabelRenderer.cs @@ -214,7 +214,7 @@ private HTSNote makeHtsNote(string symbol, RenderNote note, int startTick, doubl } protected virtual bool IsSyllableVowelExtensionNote(RenderNote note) { - return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*"); + return note.lyric.StartsWith("+") || note.lyric.StartsWith("-") || note.lyric.StartsWith("*") || note.lyric.StartsWith("~"); } private string GetPhonemeType(string phoneme) { @@ -311,7 +311,7 @@ public void ProcessPart(RenderPhrase phrase) { List monoLabels_ = new List(); - double phonemeDuration = 0; + //double phonemeDuration = 0; HTSNote PaddingNoteStart = new HTSNote( symbols: new string[] { "pau" }, @@ -337,10 +337,10 @@ public void ProcessPart(RenderPhrase phrase) { monoLabels_.Add(new monoLabel() { symbol = htsPhonemes[0].symbol, - startMs = phonemeDuration, + startMs = 0, endMs = headMs }); - phonemeDuration += headMs; + //phonemeDuration += headMs; //Alignment var phonemesByNoteIndex = phrase.phones @@ -354,12 +354,18 @@ public void ProcessPart(RenderPhrase phrase) { var note = phrase.notes[noteIndex]; if (phonemesByNoteIndex.TryGetValue(noteIndex, out var phonemes)) { foreach (var phone in phonemes) { + var phoneStartMs = headMs + (phone.positionMs - phrase.positionMs); + var phoneEndMs = headMs + (phone.endMs - phrase.positionMs); + var lastMonoLabel = monoLabels_[^1]; + if (phoneStartMs < lastMonoLabel.endMs) { + lastMonoLabel.endMs = phoneStartMs; + monoLabels_[^1] = lastMonoLabel; + } monoLabels_.Add(new monoLabel() { symbol = phone.phoneme, - startMs = phonemeDuration, - endMs = phonemeDuration + phone.durationMs + startMs = phoneStartMs, + endMs = phoneEndMs }); - phonemeDuration += phone.durationMs; } lastBasePhonemes = phonemes; @@ -369,17 +375,24 @@ public void ProcessPart(RenderPhrase phrase) { // 拍点延長ノートは、直前の通常ノートの最後の母音を引き延ばす var extensionPhoneme = FindLastVowelOrLastPhoneme(lastBasePhonemes); if (!string.IsNullOrEmpty(extensionPhoneme.phoneme)) { - var extensionStartMs = note.positionMs - phrase.positionMs + headMs; - var extensionEndMs = note.endMs - phrase.positionMs + headMs; + var htsNote = makeHtsNote(extensionPhoneme.phoneme, note, startTick, headMs); + var extensionStartMs = htsNote.startMs; + var extensionEndMs = htsNote.endMs; + + var lastMonoLabel = monoLabels_[^1]; + if (lastMonoLabel.symbol == extensionPhoneme.phoneme && + lastMonoLabel.startMs < extensionStartMs && + extensionStartMs < lastMonoLabel.endMs) { + lastMonoLabel.endMs = extensionStartMs; + monoLabels_[^1] = lastMonoLabel; + } monoLabels_.Add(new monoLabel() { symbol = extensionPhoneme.phoneme, - startMs = phonemeDuration, - endMs = phonemeDuration + note.durationMs + startMs = extensionStartMs, + endMs = extensionEndMs }); - phonemeDuration += note.durationMs; - HTSNote htsNote = makeHtsNote(extensionPhoneme.phoneme, note, startTick, headMs); tuples.Add(Tuple.Create(htsNote, noteIndex)); } } else { @@ -424,7 +437,7 @@ public void ProcessPart(RenderPhrase phrase) { monoLabels_.Add(new monoLabel() { symbol = htsPhonemes[^1].symbol, - startMs = phonemeDuration, + startMs = sentenceDurMs - tailMs, endMs = sentenceDurMs }); From 317c910b07162e0dcca00d6f5f8f9153cd6b4eb2 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Fri, 22 May 2026 11:02:15 +0900 Subject: [PATCH 15/18] Changed the naming convention for cache files to use hash values. This now allows for the regeneration of parameters when the pitch is changed. --- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index 24d43eea2..5b749008f 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -177,18 +177,18 @@ public override Task Render(RenderPhrase phrase, Progress progress } var result = Layout(phrase); var hash = HashPhraseGroups(phrase); - string tmpPath = Path.Join(PathManager.Inst.CachePath, $"ne-{hash:x16}_temp"); + string tmpPath = Path.Join(PathManager.Inst.CachePath, $"ne-{phrase.preEffectHash:x16}_temp"); if (!Directory.Exists(tmpPath)) { Directory.CreateDirectory(tmpPath); } string wavPath = Path.Join(tmpPath, $"ne-{phrase.hash}.wav"); - string f0Path = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.f0"); + string f0Path = Path.Join(tmpPath, $"ne-{phrase.hash}.f0"); string editorf0Path = Path.Join(tmpPath, $"ne-edit.f0"); - string melspecPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.melspec"); - string mgcPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.mgc"); - string bapPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.bap"); - fullScorePath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}_full_score.lab"); - monoTimingPath = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}_mono_timing.lab"); + string melspecPath = Path.Join(tmpPath, $"ne-{phrase.hash}.melspec"); + string mgcPath = Path.Join(tmpPath, $"ne-{phrase.hash}.mgc"); + string bapPath = Path.Join(tmpPath, $"ne-{phrase.hash}.bap"); + fullScorePath = Path.Join(tmpPath, $"ne-{hash}_full_score.lab"); + monoTimingPath = Path.Join(tmpPath, $"ne-{hash}_mono_timing.lab"); string modelDir = this.singer.Location + "/"; int toneShift = phrase.phones[0] != null ? phrase.phones[0].toneShift : 0; int numThreads = Preferences.Default.NumRenderThreads; @@ -477,9 +477,8 @@ public override UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, public override RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) { var result = new RenderPitchResult(); try { - var hash = HashPhraseGroups(phrase); - string tmpPath = Path.Join(PathManager.Inst.CachePath, $"ne-{hash:x16}_temp"); - string f0Path = Path.Join(tmpPath, $"ne-{phrase.preEffectHash}.f0"); + string tmpPath = Path.Join(PathManager.Inst.CachePath, $"ne-{phrase.preEffectHash:x16}_temp"); + string f0Path = Path.Join(tmpPath, $"ne-{phrase.hash}.f0"); if (!File.Exists(f0Path)) { return null; } From 2e001a06e6eea624d7bbcab91382ae6a4f939afa Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Fri, 22 May 2026 14:55:13 +0900 Subject: [PATCH 16/18] Improved path handling for command line arguments Modified path strings within ArgParam to be enclosed in double quotes. This fix ensures correct processing even when paths contain spaces. Updated multiple locations within the SendScore method of NeutrinoLabelPhonemizer.cs and the Render method of NeutrinoRenderer.cs. --- .../Neutrino/NeutrinoLabelPhonemizer.cs | 2 +- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs index 66a1edd06..76f3c33b2 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs @@ -158,7 +158,7 @@ protected override void SendScore(Note[][] phrase) { ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -a -k {toneShift} -d 3 -n 1 -p {numThreads} -m -t"; } else if (this.singer.singerVersion.StartsWith("v3.")) { //TODO: -S support model - ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-f0 --skip-melspec --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-f0 --skip-melspec --skip-wav -f {toneShift} -m -t"; } else { Log.Error($"Unsupported NEUTRINO version: {this.singer.singerVersion}"); return; diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index 5b749008f..899cf6924 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -218,7 +218,7 @@ public override Task Render(RenderPhrase phrase, Progress progress } } if (!File.Exists(f0Path) || !File.Exists(melspecPath)) { - ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -s -n 1 -o {numThreads} -k {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{modelDir}\" -s -n 1 -o {numThreads} -k {toneShift} -m -t"; if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); } else { @@ -230,7 +230,7 @@ public override Task Render(RenderPhrase phrase, Progress progress } if (!File.Exists(wavPath) && File.Exists(f0Path) && File.Exists(melspecPath)) { if (phrase.phones[0].direct) { - ArgParam = $"{f0Path} {melspecPath} {modelDir}{nsf}.bin {wavPath} -l {monoTimingPath} -n 1 -p {numThreads} -s{(int)sampleRate / 1000} -f {toneShift} -m -t"; + ArgParam = $"\"{f0Path}\" \"{melspecPath}\" \"{modelDir}{nsf}.bin\" \"{wavPath}\" -l \"{monoTimingPath}\" -n 1 -p {numThreads} -s{(int)sampleRate / 1000} -f {toneShift} -m -t"; } else { double[] f0 = LoadFile(f0Path); double[] melspec = LoadFile(melspecPath); @@ -239,7 +239,7 @@ public override Task Render(RenderPhrase phrase, Progress progress int tailFrames = (int)Math.Round(tailMs / framePeriod); double[] editorF0 = SampleCurve(phrase, phrase.pitches, 0, framePeriod, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01)); SaveFile(editorf0Path, editorF0); - ArgParam = $"{editorf0Path} {melspecPath} {modelDir}{nsf}.bin {wavPath} -l {monoTimingPath} -n 1 -p {numThreads} -s{(int)sampleRate / 1000} -f {toneShift} -m -t"; + ArgParam = $"\"{editorf0Path}\" \"{melspecPath}\" \"{modelDir}{nsf}.bin\" \"{wavPath}\" -l \"{monoTimingPath}\" -n 1 -p {numThreads} -s{(int)sampleRate / 1000} -f {toneShift} -m -t"; } if (File.Exists(VocoderClientExe)) { ProcessRunner.Run(VocoderClientExe, ArgParam, Log.Logger); @@ -258,7 +258,7 @@ public override Task Render(RenderPhrase phrase, Progress progress } } else { if (!File.Exists(f0Path) || !File.Exists(mgcPath) || !File.Exists(bapPath)) { - ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -w {mgcPath} {bapPath} -s -n 1 -o {numThreads} -k {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{modelDir}\" -w \"{mgcPath}\" \"{bapPath}\" -s -n 1 -o {numThreads} -k {toneShift} -m -t"; if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); } else { @@ -272,7 +272,7 @@ public override Task Render(RenderPhrase phrase, Progress progress if (phrase.phones[0].direct) { float gender = 1f + (phrase.phones[0].flags.FirstOrDefault(f => f.Item3.Equals(Format.Ustx.GEN)).Item2 / 100) ?? 1f; float breathiness = phrase.phones[0].flags.FirstOrDefault(f => f.Item3.Equals(Format.Ustx.BRE)).Item2 ?? 0f; - ArgParam = $"{f0Path} {mgcPath} {bapPath} {wavPath} -n 1 -m {gender} -b {breathiness} -t"; + ArgParam = $"\"{f0Path}\" \"{mgcPath}\" \"{bapPath}\" \"{wavPath}\" -n 1 -m {gender} -b {breathiness} -t"; if (File.Exists(VocoderClientExe)) { ProcessRunner.Run(VocoderClientExe, ArgParam, Log.Logger); } else { @@ -327,7 +327,7 @@ public override Task Render(RenderPhrase phrase, Progress progress } else if (this.singer.singerVersion.StartsWith("v3.")) { // F0ファイル生成 if (!File.Exists(f0Path)) { - ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-melspec --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-melspec --skip-wav -f {toneShift} -m -t"; if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); } else { @@ -340,7 +340,7 @@ public override Task Render(RenderPhrase phrase, Progress progress //メルスペクトグラムファイル生成 if (File.Exists(f0Path) && !File.Exists(melspecPath)) { if (phrase.phones[0].direct) { - ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; } else { double[] f0 = LoadFile(f0Path); int totalFrames = f0.Length; @@ -349,7 +349,7 @@ public override Task Render(RenderPhrase phrase, Progress progress var editorF0 = SampleCurve(phrase, phrase.pitches, 0, 9.984, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01)); SaveFile(editorf0Path, editorF0); // F0の編集とメルスペクトグラムの生成はセット - ArgParam = $"{fullScorePath} {monoTimingPath} {editorf0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{editorf0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; } if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); @@ -363,10 +363,10 @@ public override Task Render(RenderPhrase phrase, Progress progress //音声ファイル生成 if (!File.Exists(wavPath) && File.Exists(f0Path) && File.Exists(melspecPath)) { if (phrase.phones[0].direct) { - ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; } else { // TODO:メルスペクトグラムの編集 - ArgParam = $"{fullScorePath} {monoTimingPath} {editorf0Path} {melspecPath} {wavPath} {modelDir} --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{editorf0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; } if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); From dffeec939cb7e9d3c0855633d4e05e140e137d43 Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Fri, 22 May 2026 17:01:36 +0900 Subject: [PATCH 17/18] Exposing monoLabel and adding customization processing The monoLabel struct has been changed to public, and a customizable virtual method CustomMonoLabel has been added. This allows subclasses to override the processing logic for monoLabels_. Additionally, in NeutrinoRenderer.cs, CustomMonoLabel is overridden to implement a process that rounds label timings to 10ms increments when singerVersion is "v2.7". --- OpenUtau.Core/Hts/HTSLabelRenderer.cs | 8 +++++++- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/OpenUtau.Core/Hts/HTSLabelRenderer.cs b/OpenUtau.Core/Hts/HTSLabelRenderer.cs index 039f62d1a..011729886 100644 --- a/OpenUtau.Core/Hts/HTSLabelRenderer.cs +++ b/OpenUtau.Core/Hts/HTSLabelRenderer.cs @@ -273,7 +273,7 @@ private HTSPhoneme[] HTSNoteToPhonemes(HTSNote htsNote) { protected abstract HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes, RenderNote notes); - private struct monoLabel { + public struct monoLabel { public string symbol; public double startMs; public double endMs; @@ -282,6 +282,10 @@ public override string ToString() { } } + protected virtual List CustomMonoLabel(List monoLabels) { + return monoLabels; + } + public void ProcessPart(RenderPhrase phrase) { if (timeAxis == null) { timeAxis = phrase.timeAxis; @@ -459,6 +463,8 @@ public void ProcessPart(RenderPhrase phrase) { htsPhonemes[i - 1].next = htsPhonemes[i]; } + monoLabels_ = CustomMonoLabel(monoLabels_); + try { File.WriteAllLines(fullScorePath, htsPhonemes.Select(x => x.dump())); File.WriteAllLines(monoTimingPath, monoLabels_.Select(x => x.ToString())); diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index 899cf6924..c42adf2dd 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -132,6 +132,18 @@ protected override HTSPhoneme[] CustomHTSPhonemeContext(HTSPhoneme[] htsPhonemes return htsPhonemes; } + protected override List CustomMonoLabel(List monoLabels) { + if (this.singer.singerVersion.StartsWith("v2.7")) { + for (int i = 0; i < monoLabels.Count; i++) { + var label = monoLabels[i]; + label.startMs = Math.Round(label.startMs / 10.0) * 10.0; + label.endMs = Math.Round(label.endMs / 10.0) * 10.0; + monoLabels[i] = label; + } + } + return monoLabels; + } + public double[] LoadFile(string filePath) { if (File.Exists(filePath)) { using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read)) { From 27ac2aaaea35d7c5fb071cbe328ab96b0fe9619a Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Sun, 31 May 2026 00:40:27 +0900 Subject: [PATCH 18/18] In accordance with the NEUTRINO update, I have set the minimum version for Version 3 to 3.2. --- .../Neutrino/NeutrinoLabelPhonemizer.cs | 6 ++--- OpenUtau.Core/Neutrino/NeutrinoRenderer.cs | 24 +++++++++---------- OpenUtau.Core/Neutrino/NeutrinoSinger.cs | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs index 76f3c33b2..75520c623 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoLabelPhonemizer.cs @@ -33,7 +33,7 @@ public override void SetSinger(USinger singer) { if (!Directory.Exists(basePath)) { if (this.singer.singerVersion.StartsWith("v2.7")) { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v27"); - } else if (this.singer.singerVersion.StartsWith("v3.")) { + } else if (this.singer.singerVersion.StartsWith("v3") && !this.singer.singerVersion.StartsWith("v3.1")) { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); } } @@ -156,9 +156,9 @@ protected override void SendScore(Note[][] phrase) { string ArgParam = string.Empty; if (this.singer.singerVersion.StartsWith("v2.7")) { ArgParam = $"{fullScorePath} {monoTimingPath} {f0Path} {melspecPath} {modelDir} -a -k {toneShift} -d 3 -n 1 -p {numThreads} -m -t"; - } else if (this.singer.singerVersion.StartsWith("v3.")) { + } else if (this.singer.singerVersion.StartsWith("v3") && !this.singer.singerVersion.StartsWith("v3.1")) { //TODO: -S support model - ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-f0 --skip-melspec --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-f0 --skip-melspec --skip-wav -k {toneShift} -m -t"; } else { Log.Error($"Unsupported NEUTRINO version: {this.singer.singerVersion}"); return; diff --git a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs index c42adf2dd..57956e86d 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoRenderer.cs @@ -83,17 +83,17 @@ public override void SetUp() { tablePath = "japanese.utf_8.table"; string basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO"); if (!Directory.Exists(basePath)) { - if (singer.singerVersion.StartsWith("v2.7")) { + if (this.singer.singerVersion.StartsWith("v2.7")) { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v27"); - } else if (singer.singerVersion.StartsWith("v3.")) { + } else if (this.singer.singerVersion.StartsWith("v3") && !this.singer.singerVersion.StartsWith("v3.1")) { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); } } //Load Dictionary try { phoneDict.Clear(); - LoadDict(Path.Join(basePath, "settings", "dic", confPath), singer.TextFileEncoding); - LoadDict(Path.Join(basePath, "settings", "dic", tablePath), singer.TextFileEncoding); + LoadDict(Path.Join(basePath, "settings", "dic", confPath), this.singer.TextFileEncoding); + LoadDict(Path.Join(basePath, "settings", "dic", tablePath), this.singer.TextFileEncoding); // Lyrics often handled in OpenUtau phoneDict.Add("R", new string[] { "pau" }); phoneDict.Add("-", new string[] { "pau" }); @@ -336,10 +336,10 @@ public override Task Render(RenderPhrase phrase, Progress progress } } } - } else if (this.singer.singerVersion.StartsWith("v3.")) { + } else if (this.singer.singerVersion.StartsWith("v3") && !this.singer.singerVersion.StartsWith("v3.1")) { // F0ファイル生成 if (!File.Exists(f0Path)) { - ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-melspec --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-melspec --skip-wav -k {toneShift} -m -t"; if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); } else { @@ -352,7 +352,7 @@ public override Task Render(RenderPhrase phrase, Progress progress //メルスペクトグラムファイル生成 if (File.Exists(f0Path) && !File.Exists(melspecPath)) { if (phrase.phones[0].direct) { - ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-wav -k {toneShift} -m -t"; } else { double[] f0 = LoadFile(f0Path); int totalFrames = f0.Length; @@ -361,7 +361,7 @@ public override Task Render(RenderPhrase phrase, Progress progress var editorF0 = SampleCurve(phrase, phrase.pitches, 0, 9.984, totalFrames, headFrames, tailFrames, x => MusicMath.ToneToFreq(x * 0.01)); SaveFile(editorf0Path, editorF0); // F0の編集とメルスペクトグラムの生成はセット - ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{editorf0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-wav -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{editorf0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-wav -k {toneShift} -m -t"; } if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); @@ -375,10 +375,10 @@ public override Task Render(RenderPhrase phrase, Progress progress //音声ファイル生成 if (!File.Exists(wavPath) && File.Exists(f0Path) && File.Exists(melspecPath)) { if (phrase.phones[0].direct) { - ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{f0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-melspec -k {toneShift} -m -t"; } else { // TODO:メルスペクトグラムの編集 - ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{editorf0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-melspec -f {toneShift} -m -t"; + ArgParam = $"\"{fullScorePath}\" \"{monoTimingPath}\" \"{editorf0Path}\" \"{melspecPath}\" \"{wavPath}\" \"{modelDir}\" --skip-timing --skip-f0 --skip-melspec -k {toneShift} -m -t"; } if (existNeutrinoClient) { ProcessRunner.Run(NeutrinoClientExe, ArgParam, Log.Logger); @@ -499,7 +499,7 @@ public override RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) { int totalFrames = f0.Length; int headFrames = 0; int tailFrames = 0; - if (this.singer.singerVersion.StartsWith("v3.")) { + if (this.singer.singerVersion.StartsWith("v3") && !this.singer.singerVersion.StartsWith("v3.1")) { headFrames = (int)Math.Round(headMs / 1000.0 * 99.84); tailFrames = (int)Math.Round(tailMs / 1000.0 * 99.84); } else { @@ -523,7 +523,7 @@ public override RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) { var layout = Layout(phrase); var t = layout.positionMs - layout.leadingMs; for (int i = 0; i < result.tones.Length; i++) { - if (this.singer.singerVersion.StartsWith("v3.")) { + if (this.singer.singerVersion.StartsWith("v3") && !this.singer.singerVersion.StartsWith("v3.1")) { t += 10; } else { t += framePeriod; diff --git a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs index e912eb59e..9b5d6f5a9 100644 --- a/OpenUtau.Core/Neutrino/NeutrinoSinger.cs +++ b/OpenUtau.Core/Neutrino/NeutrinoSinger.cs @@ -109,7 +109,7 @@ void Load() { if (!Directory.Exists(basePath)) { if (singerVersion.StartsWith("v2.7")) { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v27"); - } else if (singerVersion.StartsWith("v3.")) { + } else if (singerVersion.StartsWith("v3") && !singerVersion.StartsWith("v3.1")) { basePath = Path.Join(PathManager.Inst.DependencyPath, "NEUTRINO_v3"); } }