我最近用AI写了一个脚本,主要功能是下载youtube视频后插入字幕。我要的功能是:
- 下载视频文件(格式为MP4)。
- 下载对应的字幕文件(格式为SRT)。
- 将SRT字幕插入到视频中。
不过,目前脚本最终输出的是MKV格式的文件,但这种格式在B站上无法识别字幕。
我想优化这个脚本,让它在下载MP4和SRT字幕后,直接将SRT字幕插入到视频中,并保持输出文件的格式为MP4,这样B站就可以正常识别字幕了。
哪位大佬帮我优化一下代码
import re
import yt_dlp
import os
import subprocess
import sys
from typing import List
from youtube_transcript_api import YouTubeTranscriptApi
# 下载视频
def download_video(video_url, outtmpl, quality_options, download=True):
ydl_opts = {
'outtmpl': outtmpl,
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best',
'writesubtitles': False, # 不下载字幕
'verbose': True,
'nocheckcertificate': True,
'nocachdir': True,
'compat_opts': set(),
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.24 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate'
}
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
info_dict = ydl.extract_info(video_url, download=download)
if download:
available_formats = [f['format_id'] for f in info_dict['formats'] if f['ext'] == 'mp4']
for quality in quality_options:
if quality in available_formats:
ydl_opts['format'] = quality
break
ydl.download([video_url])
return info_dict
except Exception as e:
print(f"Error: {e}")
print(f"Failed to download video: {video_url}")
return None
def format_time(seconds: float) -> str:
milliseconds = int((seconds % 1) * 100)
minutes, seconds = divmod(int(seconds), 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:d}:{minutes:02d}:{seconds:02d}.{milliseconds:02d}"
def captions_to_srt(captions: List[dict]) -> List[str]:
srt_lines = []
for index, caption in enumerate(captions, start=1):
start_time = format_time(caption["start"])
end_time = format_time(caption["start"] + caption["duration"])
text = caption["text"].replace("\n", " ")
srt_lines.append(str(index))
srt_lines.append(f"{start_time} --> {end_time}")
srt_lines.append(text)
srt_lines.append("")
return srt_lines
def srt_to_ass(srt_file: str, ass_file: str):
with open(srt_file, "r", encoding="utf-8") as f:
srt_content = f.read()
ass_lines = [
"[Script Info]",
"ScriptType: v4.00+",
"Collisions: Normal",
"PlayResX: 384",
"PlayResY: 288",
"Timer: 100.0000",
"",
"[V4+ Styles]",
"Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding",
"Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,3,0,2,10,10,50,1",
"",
"[Events]",
"Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"
]
for srt_block in srt_content.strip().split("\n\n"):
lines = srt_block.strip().split("\n")
start_time, end_time = lines[1].split(" --> ")
start_time = start_time.replace(",", ".")
end_time = end_time.replace(",", ".")
text = "\\N".join(lines[2:]).replace("\n", "\\N")
ass_lines.append(f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{text}")
with open(ass_file, "w", encoding="utf-8") as f:
f.write("\n".join(ass_lines))
def download_captions(video_id: str, output_file: str):
try:
captions = YouTubeTranscriptApi.list_transcripts(video_id)
chinese_captions = None
for transcript in captions:
if transcript.language_code.startswith("zh"):
chinese_captions_obj = transcript.fetch()
chinese_captions = chinese_captions_obj
break
if chinese_captions:
srt_lines = captions_to_srt(chinese_captions)
with open(output_file, "w", encoding="utf-8") as f:
f.write("\n".join(srt_lines))
return True
else:
print("No Chinese captions available for this video.")
return False
except Exception as e:
print(f"Error: {e}")
print(f"Failed to download captions for video: {video_id}")
return False
def embed_subtitles(video_file, subtitle_file, output_file):
cmd = [
'ffmpeg',
'-i', video_file,
'-i', subtitle_file,
'-c', 'copy',
'-c:s', 'ass',
output_file
]
try:
subprocess.run(cmd, check=True)
print(f"Subtitles embedded successfully. Output file: {output_file}")
except subprocess.CalledProcessError as e:
print(f"Error embedding subtitles: {e}")
print(f"Failed to embed subtitles for video: {video_file}")
def replace_special_chars(filename):
return re.sub(r'[\\/*?:"<>|,_|]', '_', filename)
def main(video_urls, max_results=None, download_videos=True):
quality_options = ['4320', '2160', '1440', '1080'] # 按优先级排列的视频质量选项
for video_url in video_urls:
# 步骤0:获取视频信息
info_dict = download_video(video_url, None, quality_options, download=False)
if info_dict is None:
continue
# 定义输出模板
safe_title = replace_special_chars(info_dict['title'])
outtmpl = f'E:/Downloaded_Videos/{safe_title}/{safe_title}.%(ext)s'
# 步骤1:下载视频(如果启用下载)
info_dict = download_video(video_url, outtmpl, quality_options, download=download_videos)
if info_dict is None:
continue
# 步骤2:下载字幕
video_id = info_dict['id']
title = info_dict['title']
safe_title = replace_special_chars(title)
outtmpl_subs = f'E:/Downloaded_Videos/{safe_title}/{safe_title}.%(ext)s'
video_file = os.path.join("E:/Downloaded_Videos", safe_title, f"{safe_title}.mp4")
srt_file = os.path.join("E:/Downloaded_Videos", safe_title, f"{safe_title}.srt")
ass_file = os.path.join("E:/Downloaded_Videos", safe_title, f"{safe_title}.ass")
# 确保目录存在
os.makedirs(os.path.dirname(video_file), exist_ok=True)
if download_captions(video_id, srt_file):
srt_to_ass(srt_file, ass_file)
if download_videos:
output_file = video_file.replace('.mp4', '.mkv')
# 步骤3:嵌入字幕
embed_subtitles(video_file, ass_file, output_file)
print(f"Subtitles downloaded and embedded for video: {video_url}")
else:
print(f"Video info retrieved and subtitles downloaded for: {video_url}")
else:
if download_videos:
print(f"Video downloaded, but no subtitles available for: {video_url}")
else:
print(f"Video info retrieved, but no subtitles available for: {video_url}")
if __name__ == "__main__":
with open("video_urls.txt", "r") as f:
video_urls = [line.strip() for line in f.readlines()]
main(video_urls, max_results=10, download_videos=True)