Synthesizer V Pro用リップモーション生成プラグイン

投稿：2021-09-25、更新：2021-11-06

背景

Synthesizer V Proで歌ってもらった曲にMMD（MikuMikuDance）でキャラクターが歌う映像を作る際、キャラクターに口パク（リップモーション）をさせたい。 USTファイルを出力、UTAUで読みVSQを出力、MMDの「vsqによるリップシンク」で作ることができますけれど、手順が面倒なのとMMDのリップシンクはテンポ変更に対応しません。 Synthesizer V Proではプラグインを自作できますから、いきなりリップモーションを生成できればよい訳です。

2021-11-06 【更新情報】バージョン２です。・グループに対応　グループをライブラリからトラックにドロップしてリフレイン的に再利用する機能ですね。　前バージョンはグループを全無視していたので多用したトラックでは口が開きません。・口を閉じる子音　「ま」「ぱ」「ば」はフレーズ途中でも口を閉じるようにしました。・詳細デバッグ出力　処理中の発音情報をテキストファイルへ出力します。　場所はSynthesizer V Proのプログラムと同じフォルダです。

使い方

リップモーションを作りたいトラックを選択してプラグインを起動（メニュー|スクリプト|MMD|リップモーション生成(Lua)を選択）します。プロジェクトファイルと同じフォルダにlipmotion.vmdファイルを出力しますが新規作成のプロジェクトであればパスがないのでSynthesizer V Proのプログラムと同じフォルダになってしまいます。あとWindowsの場合はプラグインがutf8、処理中のファイル名はcp932(シフトＪＩＳの親戚)になっている都合で漢字のパスやファイル名を使えず、vmdファイルを作成できません。ドライブのルートか、全てASCIIのパスにしてください。
母音ごとのウエイトはMMDの表情にある数字で０～１の範囲です。あ～お、んのそれぞれをモデルで表示させて良い感じのウエイトを探してください。 MMDの「vsqによるリップシンク」で作ると大体０．７なのでデフォルトをそれに合わせてあります。例えば公式弦巻マキSynthesizer V衣装モデルでは「あ=0.5、い=0.4、う=0.7、え=0.5、お=0.8、ん=1.0、接続=60%」で良さそうです。同じ母音が続く際にウエイトが同じですから口が全く動きません。すると１音が長く続いているように見えてしまいます。そこで後続のウエイト直前に３フレームだけウエイトを若干下げて２音に見せるようにしました。下端のチェックボックスはデバッグ用ですから特に使わないと思います。もしモーションの動作が期待と違う場合に発生位置のチェックに使えるかも知れません。

使用例

せっかくリップモーションを改善したので、ついつい口元が映るアングルにしてしまいます。

注意事項

バグがあるのが前提ですからSynthesizer V Proのプロジェクトを壊したり、vmdファイルの出力が止まらずに巨大なファイルが生成されたり、MMDで読み込むとMMDのプロジェクトを壊すかも知れません。くれぐれもバックアップを確保してから作業してください。プラグインの元ネタとして好きに書き替えてお使いください。 Synthesizer V Proのプラグインがもっと増えますように(-人-)

プラグイン

これです。

-- リップモーション生成

local plugin_name = "リップモーション生成(Lua)"

function getClientInfo()
	return {
		name = plugin_name,
		category = "MMD",
		author = "lemorin_jp",
		versionNumber = 2,
		minEditorVersion = 0
	}
end

local fmt_long = "<I4"
local fmt_float = "<f"

local prefs = {
-- 母音毎
-- ウエイト
-- 接続ウエイト
	a = { weight = 0.7, display = "あ" },
	i = { weight = 0.7, display = "い" },
	u = { weight = 0.7, display = "う" },
	e = { weight = 0.7, display = "え" },
	o = { weight = 0.7, display = "お" },
	n = { weight = 0.7, display = "ん" }
}
local seam_ratio = 60
local verbose = false
local verbose_fh = nil

function main()
	-- プロジェクトのパス
	local path_func = string.gmatch(SV:getProject():getFileName(), ".+\\")
	local project_path = path_func()
	-- プロジェクトがない場合
	if nil == project_path then
		project_path = ""
	end

	-- ダイアログの内容
	local myForm = {
		title = plugin_name,
		message = "母音毎のウエイト",
		buttons = "OkCancel",
		widgets = {
			{
				type = "TextBox",
				label = "vmdファイル Windowsは漢字NG",
				name = "vmdfile",
				default = project_path .. "lipmotion.vmd",
			},
		}
	}

	local disp_order = {"a", "i", "u", "e", "o", "n", }
	local i
	local disp
	for i, disp in pairs(disp_order) do
		table.insert(myForm.widgets, {
			type = "Slider",
			label = prefs[disp].display,
			name = "weight_" .. disp,
			format = "%5.3f",
			minValue = 0,
			maxValue = 1,
			interval = 0.01,
			default = prefs[disp].weight
		})
	end

	table.insert(myForm.widgets, {
		type = "Slider",
		label = "接続ウエイト比（同じ母音が続く際の境界）",
		name = "seam_ratio",
		format = "%3.0f %%",
		minValue = 0,
		maxValue = 100,
		interval = 5,
		default = seam_ratio
	})

	table.insert(myForm.widgets, {
		type = "CheckBox",
		text = "詳細デバッグ出力(verbose.log)",
		name = "verbose",
		default = false,
	})

	-- ダイアログ表示（モーダル）
	local result = SV:showCustomDialog(myForm)
	if result.status then
		-- 設定内容を拾う
		local vmdfile = result.answers.vmdfile
		-- 出力先に同名ファイルがないか、上書き確認した場合
		if not file_exists(vmdfile) or SV:showOkCancelBox(plugin_name, "vmdファイルが既にあります。上書きしてよいですか？\n" .. vmdfile) then
			for i, disp in pairs(disp_order) do
				prefs[disp].weight = result.answers["weight_" .. disp]
			end
			seam_ratio = result.answers.seam_ratio
			verbose = result.answers.verbose

			if verbose then
				verbose_fh = io.open("verbose.log", "a")
			end

			trace("----------")
			trace("デバッグ出力開始")

			local lyric = grab_lyric()
			local boin_shiin = lyric_list2boin_shiin_list(lyric)
			local lip = boin_shiin2lip(boin_shiin)
			local rc = write_vmd(vmdfile, lip)

			trace("デバッグ出力終了")
			trace("----------")

			if verbose_fh then
				verbose_fh:close()
				verbose_fh = nil
			end
			if 1 == rc then
				message_box("vmdファイルを出力しました")
			else
				message_box("vmdファイルを開けません\n" .. vmdfile)
			end
		end
	end

	-- スクリプト終了
	SV:finish()
end

-- デバッグ出力
function trace(msg)
	if nil ~= verbose_fh then
		verbose_fh:write(msg .. "\n")
	end
end

-- メッセージ表示
function message_box(msg)
	SV:showMessageBox(plugin_name, msg)
	trace(msg)
end

-- ファイルの有無チェック
function file_exists(file)
	local result = false 
	local fh = io.open(file, "rb")
	if nil ~= fh then
		fh:close(fh)
		result = true
	end
	return result
end

-- 歌詞収集
function grab_lyric()
	trace("歌詞収集開始")

	local timeaxis = SV:getProject():getTimeAxis()

	local track = SV:getMainEditor():getCurrentTrack()
	local lyric_list ={}

	-- 選択中のトラックのグループ
	local groupnum = track:getNumGroups()
	trace("グループ数(1個目はメイン) " .. groupnum)

	local group_index
	for group_index = 1, groupnum do
		trace("----------")
		trace("グループ位置 " .. group_index)

		-- グループ１個
		local notegroup_ref = track:getGroupReference(group_index)
		local offset_blick = notegroup_ref:getTimeOffset()
		trace("オフセット " .. offset_blick)
		local notegroup = notegroup_ref:getTarget()

		-- グループ内音符
		trace("開始blick 終了blick 開始フレーム 終了フレーム 歌詞")
		local num_note = notegroup:getNumNotes()
		local note_index
		for note_index = 1, num_note do
			-- 音符１個
			local note = notegroup:getNote(note_index)

			-- 歌詞
			local lyric = {
				start = offset_blick + note:getOnset(),
				stop = offset_blick + note:getEnd(),
				lyric = note:getLyrics()
			}

			local frame_start = math.floor(timeaxis:getSecondsFromBlick(lyric.start) * 30)
			local frame_stop = math.floor(timeaxis:getSecondsFromBlick(lyric.stop) * 30)
			trace(lyric.start .. " " .. lyric.stop .. " " .. frame_start .. " " .. frame_stop .. " " .. lyric.lyric)

			table.insert(lyric_list, lyric)
		end

		trace("----------")
	end

	trace("発声位置順に並べ替え")
	table.sort(lyric_list,
		function(a, b)
			return (a.start < b.start)
		end
	)

	trace("開始blick 終了blick 開始フレーム 終了フレーム 歌詞")
	trace("----------")
	local i
	for i = 1, #lyric_list do
		local frame_start = math.floor(timeaxis:getSecondsFromBlick(lyric_list[i].start) * 30)
		local frame_stop = math.floor(timeaxis:getSecondsFromBlick(lyric_list[i].stop) * 30)
		trace(lyric_list[i].start .. " " .. lyric_list[i].stop .. " " .. frame_start .. " " .. frame_stop .. " " .. lyric_list[i].lyric)
	end
	trace("----------")

	trace("歌詞収集終了")

	return lyric_list
end

-- 歌詞から母音
function lyric_list2boin_shiin_list(lyric_list)
	local timeaxis = SV:getProject():getTimeAxis()
	local boin = ""
	local boin_list = {}
	local i
	for i = 1, #lyric_list do
		local word = lyric_list[i].lyric

		-- いくつかの例外は直近の母音を継続
		if not string.find("-ーっ", word) then
			boin = lyric2boin(word)
		end

		if "" == boin then
			message_box("変換できない歌詞（母音不明） 小節 " .. timeaxis:getMeasureAt(lyric_list[i].start) .. " 歌詞 '" .. word .. "'")
			boin = "a"

			local before_lyric = ""
			local j
			for j = i - 5, i do
				if 0 < j then
					before_lyric = before_lyric .. lyric_list[j].lyric
				end
			end
			message_box("直近の歌詞 '" .. before_lyric .. "'")
		end

		local shiin = lyric2shiin(word)

		-- 発声の開始/終了をopen/closeで
		local lyric = {
			open = math.floor(timeaxis:getSecondsFromBlick(lyric_list[i].start) * 30),
			close = math.floor(timeaxis:getSecondsFromBlick(lyric_list[i].stop) * 30),
			boin = boin,
			shiin = shiin
		}
		table.insert(boin_list, lyric)
	end

	return boin_list
end

function lyric2boin(lyric)
	-- 先頭の文字
	local capital = utf8_sub(lyric, 1, 1)

	-- 母音が分かる文字を含む
	local boin
	if string.find(lyric, "ぁ") or string.find(lyric, "ァ") or string.find(lyric, "ゃ") or string.find(lyric, "ャ") then
		boin = "a"
	elseif string.find(lyric, "ぃ") or string.find(lyric, "ィ") then
		boin = "i"
	elseif string.find(lyric, "ぅ") or string.find(lyric, "ゥ") or string.find(lyric, "ゅ") or string.find(lyric, "ュ") then
		boin = "u"
	elseif string.find(lyric, "ぇ") or string.find(lyric, "ェ") then
		boin = "e"
	elseif string.find(lyric, "ぉ") or string.find(lyric, "ォ") or string.find(lyric, "ょ") or string.find(lyric, "ョ") then
		boin = "o"

	-- 母音
	elseif string.find("あアかカがガさサざザたタだダなナはハばバぱパまマやヤらラわワ", capital) then
		boin = "a"
	elseif string.find("いイきキぎギしシじジちチぢヂにニひヒびビぴピみミりリ", capital) then
		boin = "i"
	elseif string.find("うウくクぐグすスずズつツづヅぬヌふフぶブぷプむムゆユるル", capital) then
		boin = "u"
	elseif string.find("えエけケげゲせセぜゼてテでデねネへヘべベぺペめメれレ", capital) then
		boin = "e"
	elseif string.find("おオこコごゴそソぞゾとトどドのノほホぼボぽポもモよヨろロをヲ", capital) then
		boin = "o"

	-- ん
	elseif string.find("んン", capital) then
		boin = "n"

	-- 変換できない
	else
		boin = ""
	end

	return boin
end

function lyric2shiin(lyric)
	-- 先頭の文字
	local capital = utf8_sub(lyric, 1, 1)

	-- 子音
	local shiin
	if string.find("あいうえおアイウエオ", capital) then
		shiin = ""
	elseif string.find("かきくけこカキクケコ", capital) then
		shiin = "k"
	elseif string.find("がぎぐげごガギグゲゴ", capital) then
		shiin = "g"
	elseif string.find("さしすせそサシスセソ", capital) then
		shiin = "s"
	elseif string.find("ざじずぜぞザジズゼゾ", capital) then
		shiin = "z"
	elseif string.find("たちつてとタチツテト", capital) then
		shiin = "t"
	elseif string.find("だぢづでどダヂヅデド", capital) then
		shiin = "d"
	elseif string.find("なにぬねのナニヌネノ", capital) then
		shiin = "n"
	elseif string.find("はひふへほハヒフヘホ", capital) then
		shiin = "h"
	elseif string.find("ばびぶべぼバビブベボ", capital) then
		shiin = "b"
	elseif string.find("ぱぴぷぺぽパピプペポ", capital) then
		shiin = "p"
	elseif string.find("まみむめもマミムメモ", capital) then
		shiin = "m"
	elseif string.find("やゆよヤユヨ", capital) then
		shiin = "y"
	elseif string.find("らりるれろラリルレロ", capital) then
		shiin = "r"
	elseif string.find("わワ", capital) then
		shiin = "w"

	-- 変換できない
	else
		shiin = ""
	end

	return shiin
end

-- string.subのutf8版
function utf8_sub(str, head, tail)
	local head_offset = utf8.offset(str, head)
	local tail_offset = utf8.offset(str, tail + 1) - 1
	return string.sub(str, head_offset, tail_offset)
end

-- 母音と子音からリップモーション
function boin_shiin2lip(boin_shiin)
	-- 予備動作start、余韻動作stopを追加
	-- 全体の順番 start open close stop
	local i
	for i = 1, #boin_shiin do
		boin_shiin[i].start = boin_shiin[i].open - 2
		boin_shiin[i].stop = boin_shiin[i].close + 2

		boin_shiin[i].start_weight = 0
		boin_shiin[i].open_weight = prefs[boin_shiin[i].boin].weight
		boin_shiin[i].close_weight = prefs[boin_shiin[i].boin].weight
		boin_shiin[i].stop_weight = 0
	end

	for i = 1, #boin_shiin do
		-- 口を閉じる子音の前は必ず口を閉じる
		if string.find("pbm", boin_shiin[i].shiin) then
			-- start open close stop start open close stop
			local j = i - 1
			while 1 <= j and boin_shiin[i].start <= boin_shiin[j].stop do
				if boin_shiin[i].start < boin_shiin[j].start then
					boin_shiin[j].start_weight = -1
					boin_shiin[j].open_weight = -1
					boin_shiin[j].close_weight = -1
					boin_shiin[j].stop_weight = -1
				elseif boin_shiin[i].start < boin_shiin[j].open then
					boin_shiin[j].open = boin_shiin[i].start
					boin_shiin[j].open_weight = 0 
					boin_shiin[j].close_weight = -1
					boin_shiin[j].stop_weight = -1
				elseif boin_shiin[i].start < boin_shiin[j].close then
					boin_shiin[j].close = boin_shiin[i].start - 2
					boin_shiin[j].stop = boin_shiin[i].start
					boin_shiin[j].stop_weight = 0 
				elseif boin_shiin[i].start < boin_shiin[j].stop then
					boin_shiin[j].stop = boin_shiin[i].start
					boin_shiin[j].stop_weight =0 
				end

				j = j - 1
			end
		else
			-- 同じ母音がラップする場合に調整
			-- 前へ向かって終了と開始がラップする間スキャン
			local j = i - 1
			while 1 <= j and boin_shiin[i].start <= boin_shiin[j].stop do
				-- 同じ母音
				if boin_shiin[j].boin == boin_shiin[i].boin then
					-- 発声が切れている場合
					if boin_shiin[j].close < boin_shiin[i].open then
						-- start open close (stop) start/small open close stop
						-- 前の終了を削除
						boin_shiin[j].stop_weight = -1
						--今の開始を少し閉じ気味して継続
						boin_shiin[i].start_weight = boin_shiin[i].start_weight * seam_ratio / 100
					else
						-- start open close stop/small (start) open close stop
						-- 発声が継続する場合
						-- 前の開けにぶつからない場合
						if boin_shiin[j].open < boin_shiin[i].open - 4 then
							-- 前の閉じを今の開けの手前に移動
							boin_shiin[j].close = boin_shiin[i].open - 4
							-- 前の終了を今の開けの手前へ移動
							boin_shiin[j].stop = boin_shiin[i].open - 2
						else
							-- 前の開けにぶつかる場合は詰め詰め
							-- 前の閉じを今の開けの手前に移動
							boin_shiin[j].close = boin_shiin[i].open - 2
							-- 前の終了を今の開けの手前へ移動
							boin_shiin[j].stop = boin_shiin[i].open - 1
						end
						-- 一旦閉じ気味にする
						boin_shiin[j].stop_weight = boin_shiin[j].close_weight * seam_ratio / 100
						-- 今の開始を削除
						boin_shiin[i].start_weight = -1
					end
				end

				j = j - 1
			end
		end
	end

	-- リップモーション生成
	-- ウエイトが－１の場合は生成しない
	local lip = {}
	for i = 1, #boin_shiin do
		if 0 <= boin_shiin[i].start_weight then
			local lip_start = {
				weight = boin_shiin[i].start_weight,
				frame = boin_shiin[i].start,
				boin = boin_shiin[i].boin
			}
			table.insert(lip, lip_start)
		end

		if 0 <= boin_shiin[i].open_weight then
			local lip_open = {
				weight = boin_shiin[i].open_weight,
				frame = boin_shiin[i].open,
				boin = boin_shiin[i].boin
			}
			table.insert(lip, lip_open)
		end

		if 0 <= boin_shiin[i].close_weight then
			local lip_close = {
				weight = boin_shiin[i].close_weight,
				frame = boin_shiin[i].close,
				boin = boin_shiin[i].boin
			}
			table.insert(lip, lip_close)
		end

		if 0 <= boin_shiin[i].stop_weight then
			local lip_stop = {
				weight = boin_shiin[i].stop_weight,
				frame = boin_shiin[i].stop,
				boin = boin_shiin[i].boin
			}
			table.insert(lip, lip_stop)
		end
	end

	return lip
end

function write_vmd(vmdfile, lip)
	local rc = 0

	local fh = io.open(vmdfile, "wb")
	if fh then
		-- ヘッダ
		fh:write(padding_0x00("Vocaloid Motion Data 0002", 30))
		-- バージョン
		fh:write(padding_0x00("Lip Motion by SynthV", 20))

		-- ボーン個数
		fh:write(string.pack(fmt_long, 0))

		-- ここからリップモーション
		-- スキン個数
		fh:write(string.pack(fmt_long, #lip))

		message_box("モーション個数 " .. #lip)

		local bonename = {
			a = "\x82\xa0", -- あ
			i = "\x82\xa2", -- い
			u = "\x82\xa4", -- う
			e = "\x82\xa6", -- え
			o = "\x82\xa8", -- お
			n = "\x82\xf1"  -- ん
		}
		-- その他（想定外）
		bonename[""] = "\x82\xa0"

		local boin
		local name
		for boin, name in pairs(bonename) do
			bonename[boin] = padding_0x00(name, 15)
		end

		local frame_min = 99999
		local frame_max = -1
		for i = 1, #lip do
			if lip[i].frame < frame_min then
				frame_min = lip[i].frame
			end
			if frame_max < lip[i].frame then
				frame_max = lip[i].frame
			end

			if lip[i].frame < 0 then
				message_box("フレーム位置がマイナス " .. i .. " " .. lip[i].frame)
			end
		end
		message_box("最小フレーム位置 " .. frame_min .. " 最大 " .. frame_max)

		for i = 1, #lip do
			-- 表情
			fh:write(bonename[lip[i].boin])
			-- フレーム位置
			fh:write(string.pack(fmt_long, lip[i].frame))
			-- ウエイト
			fh:write(string.pack(fmt_float, lip[i].weight))
		end
		-- ここまでリップモーション

		-- カメラ個数
		fh:write(string.pack(fmt_long, 0))

		-- 照明個数
		fh:write(string.pack(fmt_long, 0))

		-- セルフ影個数
		fh:write(string.pack(fmt_long, 0))

		-- モデル表示個数
		fh:write(string.pack(fmt_long, 0))

		fh:close()

		rc = 1
	end

	return rc
end

function padding_0x00(str, byte)
	local pad = ""
	local i
	for i = 1, byte do
		pad = pad .. "\0"
	end

	return string.sub(str .. pad, 1, byte)
end