Skip to content

Instantly share code, notes, and snippets.

@bongole
Last active April 23, 2025 07:06
Show Gist options
  • Save bongole/745e828f9c88ba86f8fc18e1a379d0a8 to your computer and use it in GitHub Desktop.
Save bongole/745e828f9c88ba86f8fc18e1a379d0a8 to your computer and use it in GitHub Desktop.
import hashlib
import re
from mcp.server.fastmcp import FastMCP
import os
import subprocess
import glob
import xml.etree.ElementTree as ET
import webvtt
import json
import logging
# ログの準備
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("mcp-youtube")
# MCP サーバーを初期化
mcp = FastMCP("mcp-youtube")
def parse_vtt(file_path: str) -> str:
text = ""
for caption in webvtt.read(file_path):
text += f"{caption.text}\n"
lines = text.split("\n")
# 空行を削除
non_empty_lines = [line.strip() for line in lines if line.strip()]
if not non_empty_lines:
return ""
# 連続した同じ値の行を1行にまとめる
result = [non_empty_lines[0]]
for line in non_empty_lines[1:]:
if line != result[-1]:
result.append(line)
# 結果を文字列に戻す
return "\n".join(result)
def get_orig_subtitle_languages(url):
# yt-dlpコマンドを実行して利用可能な字幕一覧を取得
command = ["yt-dlp", "--list-subs", url]
try:
# コマンドを実行し、出力を取得
result = subprocess.run(command, check=True, capture_output=True, text=True)
output = result.stdout
# 別の方法: 行ごとに処理する場合
orig_language = None
for line in output.splitlines():
# '-orig'が含まれる行を検索
if "-orig" in line:
# 最初のカラムを抽出(言語コード)
lang_code = line.strip().split()[0]
orig_language = lang_code
return orig_language
except subprocess.CalledProcessError:
return None
@mcp.tool()
async def download_video_subtitles(
url: str, max_length: int = 99000, start_index: int = 0
) -> str:
"""動画の字幕をダウンロードします
Args:
url: 字幕をダウンロードしたい動画のURL (対応している動画サービス: youtube.com, vimeo.com)
max_length: Maximum number of characters to return.
start_index: On return output starting at this character index, useful if a previous fetch was truncated and more context is required. (default: 0)
Returns:
動画の字幕
"""
orig_sub_lang = get_orig_subtitle_languages(url)
if orig_sub_lang:
sub_lang_option = ["--sub-lang", orig_sub_lang]
else:
sub_lang_option = []
if "youtube" in url:
sub_lang_option = ["--sub-lang", "en"]
url_hash = hashlib.sha256(url.encode()).hexdigest()
work_dir = f"/tmp/video/{url_hash}"
os.makedirs(work_dir, exist_ok=True)
get_sub_command = [
"yt-dlp",
"-o",
"subtitle.%(ext)s",
"--write-sub",
"--write-auto-sub",
*sub_lang_option,
"--skip-download",
"--sub-format",
"vtt",
url,
]
logger.error(f"{get_sub_command=}")
get_info_command = [
"yt-dlp",
"-J",
"--skip-download",
url,
]
logger.error(f"{get_info_command=}")
result = ""
try:
# ダウンロードされた.vttファイルを検索
subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt"))
if len(subtitle_files) == 0:
with open(f"{work_dir}/info.json", "w") as outfile:
subprocess.run(get_info_command, check=True, stdout=outfile)
subprocess.run(
get_sub_command,
check=True,
capture_output=True,
text=True,
cwd=work_dir
)
# ダウンロードされた.vttファイルを検索
subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt"))
# 各ファイルの内容を指定された形式で表示
for file_path in subtitle_files:
content = parse_vtt(file_path)
original_length = len(content)
if start_index >= original_length:
content = "<error>No more content available.</error>"
else:
truncated_content = content[start_index : start_index + max_length]
if not truncated_content:
content = "<error>No more content available.</error>"
else:
content = truncated_content
actual_content_length = len(truncated_content)
remaining_content_length = original_length - (
start_index + actual_content_length
)
# Only add the prompt to continue fetching if there is still remaining content
if (
actual_content_length == max_length
and remaining_content_length > 0
):
next_start = start_index + actual_content_length
content += f"\n\n<error>Content truncated. Call this tool with a start_index of {next_start} to get more content.</error>"
title = json.load(open(f"{work_dir}/info.json")).get("title")
result = f"Title: {title}\nSubtitle:\n{content}"
except subprocess.CalledProcessError as e:
return f"エラーが発生しました: {e}"
return result
if __name__ == "__main__":
mcp.run(transport="stdio")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment