Last active
April 23, 2025 07:06
-
-
Save bongole/745e828f9c88ba86f8fc18e1a379d0a8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import re | |
from mcp.server.fastmcp import FastMCP | |
import os | |
import subprocess | |
import glob | |
import xml.etree.ElementTree as ET | |
import webvtt | |
import json | |
import logging | |
# ログの準備 | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger("mcp-youtube") | |
# MCP サーバーを初期化 | |
mcp = FastMCP("mcp-youtube") | |
def parse_vtt(file_path: str) -> str: | |
text = "" | |
for caption in webvtt.read(file_path): | |
text += f"{caption.text}\n" | |
lines = text.split("\n") | |
# 空行を削除 | |
non_empty_lines = [line.strip() for line in lines if line.strip()] | |
if not non_empty_lines: | |
return "" | |
# 連続した同じ値の行を1行にまとめる | |
result = [non_empty_lines[0]] | |
for line in non_empty_lines[1:]: | |
if line != result[-1]: | |
result.append(line) | |
# 結果を文字列に戻す | |
return "\n".join(result) | |
def get_orig_subtitle_languages(url): | |
# yt-dlpコマンドを実行して利用可能な字幕一覧を取得 | |
command = ["yt-dlp", "--list-subs", url] | |
try: | |
# コマンドを実行し、出力を取得 | |
result = subprocess.run(command, check=True, capture_output=True, text=True) | |
output = result.stdout | |
# 別の方法: 行ごとに処理する場合 | |
orig_language = None | |
for line in output.splitlines(): | |
# '-orig'が含まれる行を検索 | |
if "-orig" in line: | |
# 最初のカラムを抽出(言語コード) | |
lang_code = line.strip().split()[0] | |
orig_language = lang_code | |
return orig_language | |
except subprocess.CalledProcessError: | |
return None | |
@mcp.tool() | |
async def download_video_subtitles( | |
url: str, max_length: int = 99000, start_index: int = 0 | |
) -> str: | |
"""動画の字幕をダウンロードします | |
Args: | |
url: 字幕をダウンロードしたい動画のURL (対応している動画サービス: youtube.com, vimeo.com) | |
max_length: Maximum number of characters to return. | |
start_index: On return output starting at this character index, useful if a previous fetch was truncated and more context is required. (default: 0) | |
Returns: | |
動画の字幕 | |
""" | |
orig_sub_lang = get_orig_subtitle_languages(url) | |
if orig_sub_lang: | |
sub_lang_option = ["--sub-lang", orig_sub_lang] | |
else: | |
sub_lang_option = [] | |
if "youtube" in url: | |
sub_lang_option = ["--sub-lang", "en"] | |
url_hash = hashlib.sha256(url.encode()).hexdigest() | |
work_dir = f"/tmp/video/{url_hash}" | |
os.makedirs(work_dir, exist_ok=True) | |
get_sub_command = [ | |
"yt-dlp", | |
"-o", | |
"subtitle.%(ext)s", | |
"--write-sub", | |
"--write-auto-sub", | |
*sub_lang_option, | |
"--skip-download", | |
"--sub-format", | |
"vtt", | |
url, | |
] | |
logger.error(f"{get_sub_command=}") | |
get_info_command = [ | |
"yt-dlp", | |
"-J", | |
"--skip-download", | |
url, | |
] | |
logger.error(f"{get_info_command=}") | |
result = "" | |
try: | |
# ダウンロードされた.vttファイルを検索 | |
subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt")) | |
if len(subtitle_files) == 0: | |
with open(f"{work_dir}/info.json", "w") as outfile: | |
subprocess.run(get_info_command, check=True, stdout=outfile) | |
subprocess.run( | |
get_sub_command, | |
check=True, | |
capture_output=True, | |
text=True, | |
cwd=work_dir | |
) | |
# ダウンロードされた.vttファイルを検索 | |
subtitle_files = glob.glob(os.path.join(work_dir, "*.vtt")) | |
# 各ファイルの内容を指定された形式で表示 | |
for file_path in subtitle_files: | |
content = parse_vtt(file_path) | |
original_length = len(content) | |
if start_index >= original_length: | |
content = "<error>No more content available.</error>" | |
else: | |
truncated_content = content[start_index : start_index + max_length] | |
if not truncated_content: | |
content = "<error>No more content available.</error>" | |
else: | |
content = truncated_content | |
actual_content_length = len(truncated_content) | |
remaining_content_length = original_length - ( | |
start_index + actual_content_length | |
) | |
# Only add the prompt to continue fetching if there is still remaining content | |
if ( | |
actual_content_length == max_length | |
and remaining_content_length > 0 | |
): | |
next_start = start_index + actual_content_length | |
content += f"\n\n<error>Content truncated. Call this tool with a start_index of {next_start} to get more content.</error>" | |
title = json.load(open(f"{work_dir}/info.json")).get("title") | |
result = f"Title: {title}\nSubtitle:\n{content}" | |
except subprocess.CalledProcessError as e: | |
return f"エラーが発生しました: {e}" | |
return result | |
if __name__ == "__main__": | |
mcp.run(transport="stdio") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment