bongole · April 23, 2025 07:06
diff --git a/mcp-youtube.py b/mcp-youtube.py
 import hashlib
 import re
 from mcp.server.fastmcp import FastMCP
 import os
 import subprocess
 import glob
 import xml.etree.ElementTree as ET
 import webvtt
 import json

 import logging

 # ログの準備
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("mcp-youtube")

 # MCP サーバーを初期化
 mcp = FastMCP("mcp-youtube")


 def parse_vtt(file_path: str) -> str:
    text = ""
    for caption in webvtt.read(file_path):
        text += f"{caption.text}\n"

    lines = text.split("\n")

    # 空行を削除
    non_empty_lines = [line.strip() for line in lines if line.strip()]

    if not non_empty_lines:
        return ""

    # 連続した同じ値の行を1行にまとめる
    result = [non_empty_lines[0]]
    for line in non_empty_lines[1:]:
        if line != result[-1]:
            result.append(line)

    # 結果を文字列に戻す
    return "\n".join(result)


 def get_orig_subtitle_languages(url):
    # yt-dlpコマンドを実行して利用可能な字幕一覧を取得
    command = ["yt-dlp", "--list-subs", url]

    try:
        # コマンドを実行し、出力を取得
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        output = result.stdout

        # 別の方法: 行ごとに処理する場合
        orig_language = None

        for line in output.splitlines():
            # '-orig'が含まれる行を検索
            if "-orig" in line:
                # 最初のカラムを抽出（言語コード）
                lang_code = line.strip().split()[0]
                orig_language = lang_code

        return orig_language

    except subprocess.CalledProcessError:
        return None


 @mcp.tool()
 async def download_video_subtitles(
    url: str, max_length: int = 99000, start_index: int = 0
 ) -> str:
    """動画の字幕をダウンロードします

    Args:
        url: 字幕をダウンロードしたい動画のURL (対応している動画サービス: youtube.com, vimeo.com)
        max_length: Maximum number of characters to return.
        start_index: On return output starting at this character index, useful if a previous fetch was truncated and more context is required. (default: 0)

    Returns:
        動画の字幕
    """

    orig_sub_lang = get_orig_subtitle_languages(url)
    if orig_sub_lang:
        sub_lang_option = ["--sub-lang", orig_sub_lang]
    else:
        sub_lang_option = []
        if "youtube" in url:
            sub_lang_option = ["--sub-lang", "en"]

    url_hash = hashlib.sha256(url.encode()).hexdigest()
    work_dir = f"/tmp/video/{url_hash}"
    os.makedirs(work_dir, exist_ok=True)

    get_sub_command = [
        "yt-dlp",
        "-o",
        "subtitle.%(ext)s",
        "--write-sub",
        "--write-auto-sub",
        *sub_lang_option,
        "--skip-download",
        "--sub-format",
        "vtt",
        url,
    ]

    logger.error(f"{get_sub_command=}")

    get_info_command = [
        "yt-dlp",
        "-J",
        "--skip-download",
        url,
    ]

    logger.error(f"{get_info_command=}")

    result = ""
    try:
        # ダウンロードされた.vttファイルを検索
        subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt"))

        if len(subtitle_files) == 0:
            with open(f"{work_dir}/info.json", "w") as outfile:
                subprocess.run(get_info_command, check=True, stdout=outfile)

            subprocess.run(
                get_sub_command,
                check=True,
                capture_output=True,
                text=True,
                cwd=work_dir
            )

        # ダウンロードされた.vttファイルを検索
        subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt"))

        # 各ファイルの内容を指定された形式で表示
        for file_path in subtitle_files:
            content = parse_vtt(file_path)

            original_length = len(content)

            if start_index >= original_length:
                content = "<error>No more content available.</error>"
            else:
                truncated_content = content[start_index : start_index + max_length]
                if not truncated_content:
                    content = "<error>No more content available.</error>"
                else:
                    content = truncated_content
                    actual_content_length = len(truncated_content)
                    remaining_content_length = original_length - (
                        start_index + actual_content_length
                    )
                    # Only add the prompt to continue fetching if there is still remaining content
                    if (
                        actual_content_length == max_length
                        and remaining_content_length > 0
                    ):
                        next_start = start_index + actual_content_length
                        content += f"\n\n<error>Content truncated. Call this tool with a start_index of {next_start} to get more content.</error>"

            title = json.load(open(f"{work_dir}/info.json")).get("title")
            result = f"Title: {title}\nSubtitle:\n{content}"

    except subprocess.CalledProcessError as e:
        return f"エラーが発生しました: {e}"

    return result


 if __name__ == "__main__":
    mcp.run(transport="stdio")
	import hashlib
	import re
	from mcp.server.fastmcp import FastMCP
	import os
	import subprocess
	import glob
	import xml.etree.ElementTree as ET
	import webvtt
	import json

	import logging

	# ログの準備
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger("mcp-youtube")

	# MCP サーバーを初期化
	mcp = FastMCP("mcp-youtube")


	def parse_vtt(file_path: str) -> str:
	text = ""
	for caption in webvtt.read(file_path):
	text += f"{caption.text}\n"

	lines = text.split("\n")

	# 空行を削除
	non_empty_lines = [line.strip() for line in lines if line.strip()]

	if not non_empty_lines:
	return ""

	# 連続した同じ値の行を1行にまとめる
	result = [non_empty_lines[0]]
	for line in non_empty_lines[1:]:
	if line != result[-1]:
	result.append(line)

	# 結果を文字列に戻す
	return "\n".join(result)


	def get_orig_subtitle_languages(url):
	# yt-dlpコマンドを実行して利用可能な字幕一覧を取得
	command = ["yt-dlp", "--list-subs", url]

	try:
	# コマンドを実行し、出力を取得
	result = subprocess.run(command, check=True, capture_output=True, text=True)
	output = result.stdout

	# 別の方法: 行ごとに処理する場合
	orig_language = None

	for line in output.splitlines():
	# '-orig'が含まれる行を検索
	if "-orig" in line:
	# 最初のカラムを抽出（言語コード）
	lang_code = line.strip().split()[0]
	orig_language = lang_code

	return orig_language

	except subprocess.CalledProcessError:
	return None


	@mcp.tool()
	async def download_video_subtitles(
	url: str, max_length: int = 99000, start_index: int = 0
	) -> str:
	"""動画の字幕をダウンロードします

	Args:
	url: 字幕をダウンロードしたい動画のURL (対応している動画サービス: youtube.com, vimeo.com)
	max_length: Maximum number of characters to return.
	start_index: On return output starting at this character index, useful if a previous fetch was truncated and more context is required. (default: 0)

	Returns:
	動画の字幕
	"""

	orig_sub_lang = get_orig_subtitle_languages(url)
	if orig_sub_lang:
	sub_lang_option = ["--sub-lang", orig_sub_lang]
	else:
	sub_lang_option = []
	if "youtube" in url:
	sub_lang_option = ["--sub-lang", "en"]

	url_hash = hashlib.sha256(url.encode()).hexdigest()
	work_dir = f"/tmp/video/{url_hash}"
	os.makedirs(work_dir, exist_ok=True)

	get_sub_command = [
	"yt-dlp",
	"-o",
	"subtitle.%(ext)s",
	"--write-sub",
	"--write-auto-sub",
	*sub_lang_option,
	"--skip-download",
	"--sub-format",
	"vtt",
	url,
	]

	logger.error(f"{get_sub_command=}")

	get_info_command = [
	"yt-dlp",
	"-J",
	"--skip-download",
	url,
	]

	logger.error(f"{get_info_command=}")

	result = ""
	try:
	# ダウンロードされた.vttファイルを検索
	subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt"))

	if len(subtitle_files) == 0:
	with open(f"{work_dir}/info.json", "w") as outfile:
	subprocess.run(get_info_command, check=True, stdout=outfile)

	subprocess.run(
	get_sub_command,
	check=True,
	capture_output=True,
	text=True,
	cwd=work_dir
	)

	# ダウンロードされた.vttファイルを検索
	subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt"))

	# 各ファイルの内容を指定された形式で表示
	for file_path in subtitle_files:
	content = parse_vtt(file_path)

	original_length = len(content)

	if start_index >= original_length:
	content = "<error>No more content available.</error>"
	else:
	truncated_content = content[start_index : start_index + max_length]
	if not truncated_content:
	content = "<error>No more content available.</error>"
	else:
	content = truncated_content
	actual_content_length = len(truncated_content)
	remaining_content_length = original_length - (
	start_index + actual_content_length
	)
	# Only add the prompt to continue fetching if there is still remaining content
	if (
	actual_content_length == max_length
	and remaining_content_length > 0
	):
	next_start = start_index + actual_content_length
	content += f"\n\n<error>Content truncated. Call this tool with a start_index of {next_start} to get more content.</error>"

	title = json.load(open(f"{work_dir}/info.json")).get("title")
	result = f"Title: {title}\nSubtitle:\n{content}"

	except subprocess.CalledProcessError as e:
	return f"エラーが発生しました: {e}"

	return result


	if __name__ == "__main__":
	mcp.run(transport="stdio")