import os import re from typing import List from .types import SubtitleEntry class SubtitleHandler: @staticmethod def parse_time(t): h, m, s, ms = int(t[:2]), int(t[3:5]), int(t[6:8]), int(t[9:]) return h * 3600000 + m * 60000 + s * 1000 + ms @staticmethod def format_time(ms): ms = max(0, ms) h, r = divmod(ms, 3600000) m, r = divmod(r, 60000) s, ms = divmod(r, 1000) return f"{h:02}:{m:02}:{s:02},{ms:03}" @staticmethod def parse_srt(filepath: str) -> List[SubtitleEntry]: if not os.path.exists(filepath): return [] encodings = ['utf-8-sig', 'utf-8', 'latin-1'] content = "" for enc in encodings: try: with open(filepath, 'r', encoding=enc) as f: content = f.read() break except UnicodeDecodeError: continue entries = [] pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n((?:(?!\r?\n\r?\n).)*)', re.DOTALL) for match in pattern.finditer(content): start = SubtitleHandler.parse_time(match.group(2)) end = SubtitleHandler.parse_time(match.group(3)) entries.append(SubtitleEntry(int(match.group(1)), start, end, match.group(4).strip())) return entries @staticmethod def write_srt(filepath: str, entries: List[SubtitleEntry]): with open(filepath, 'w', encoding='utf-8') as f: for entry in entries: f.write( f"{entry.index}\n{SubtitleHandler.format_time(entry.start_ms)} --> {SubtitleHandler.format_time(entry.end_ms)}\n{entry.raw_text}\n\n")