feat: forward language code and more config settings
This commit is contained in:
17
config.py
17
config.py
@@ -1,19 +1,26 @@
|
|||||||
# config.py
|
|
||||||
|
|
||||||
SYNC_CONFIG = {
|
SYNC_CONFIG = {
|
||||||
|
# System
|
||||||
|
"device": "cpu", # 'cuda' for NVIDIA GPU, 'cpu' for Processor
|
||||||
|
"compute_type": "int8", # 'float16' for GPU, 'int8' for CPU
|
||||||
|
|
||||||
# Sampling
|
# Sampling
|
||||||
"sample_count": 20,
|
"sample_count": 20,
|
||||||
"scan_duration_sec": 45,
|
"scan_duration_sec": 75,
|
||||||
|
|
||||||
# Matching
|
# Matching
|
||||||
"min_match_count": 3,
|
"min_match_count": 3,
|
||||||
"min_match_score": 0.70,
|
"min_match_score": 0.70,
|
||||||
"search_window_sec": 30,
|
"search_window_sec": 30,
|
||||||
|
|
||||||
|
# Whisper Settings
|
||||||
|
"beam_size": 5, # 5 is more accurate, 1 is faster
|
||||||
|
"vad_filter": True, # Remove silence
|
||||||
|
"vad_min_silence": 500, # Milliseconds
|
||||||
|
|
||||||
# Logic & Decision Thresholds
|
# Logic & Decision Thresholds
|
||||||
"fix_drift": True,
|
"fix_drift": True,
|
||||||
"correction_method": "auto", # Options: "auto", "constant", "force_elastic"
|
"correction_method": "auto",
|
||||||
"jitter_tolerance_ms": 300,
|
"jitter_tolerance_ms": 300,
|
||||||
"min_drift_slope": 0.00005,
|
"min_drift_slope": 0.00005,
|
||||||
"linear_r2_threshold": 0.80,
|
"linear_r2_threshold": 0.80,
|
||||||
}
|
}
|
||||||
@@ -30,6 +30,17 @@ class MediaHandler:
|
|||||||
except Exception:
|
except Exception:
|
||||||
return "0:a:0"
|
return "0:a:0"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_language_code(language_name: str) -> str:
|
||||||
|
"""Maps Bazarr language names to Whisper ISO codes."""
|
||||||
|
mapping = {
|
||||||
|
'english': 'en', 'french': 'fr', 'spanish': 'es',
|
||||||
|
'german': 'de', 'italian': 'it', 'portuguese': 'pt',
|
||||||
|
'dutch': 'nl', 'russian': 'ru', 'japanese': 'ja',
|
||||||
|
'chinese': 'zh'
|
||||||
|
}
|
||||||
|
return mapping.get(language_name.lower(), 'en')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_audio_chunk(media_path: str, start_sec: int, duration_sec: int, stream_index: str) -> str:
|
def extract_audio_chunk(media_path: str, start_sec: int, duration_sec: int, stream_index: str) -> str:
|
||||||
fd, tmp_name = tempfile.mkstemp(suffix=".wav")
|
fd, tmp_name = tempfile.mkstemp(suffix=".wav")
|
||||||
|
|||||||
75
main.py
75
main.py
@@ -35,9 +35,24 @@ def main():
|
|||||||
audio_stream = MediaHandler.get_audio_stream_index(info.episode_path, info.episode_language)
|
audio_stream = MediaHandler.get_audio_stream_index(info.episode_path, info.episode_language)
|
||||||
media_duration = MediaHandler.get_media_duration(info.episode_path)
|
media_duration = MediaHandler.get_media_duration(info.episode_path)
|
||||||
|
|
||||||
print(f"Duration: {int(media_duration // 60)}m. Loading Whisper...")
|
# Get the 2-letter code (e.g., "en")
|
||||||
model_name = "base.en" if 'english' in info.episode_language.lower() else "base"
|
whisper_lang_code = MediaHandler.get_language_code(info.episode_language)
|
||||||
whisper = WhisperModel(model_name, device="cpu", compute_type="int8", cpu_threads=4)
|
|
||||||
|
print(f"Duration: {int(media_duration // 60)}m. Loading Whisper ({SYNC_CONFIG['device']})...")
|
||||||
|
|
||||||
|
# Load model based on Config
|
||||||
|
model_name = "base.en" if whisper_lang_code == 'en' else "base"
|
||||||
|
|
||||||
|
try:
|
||||||
|
whisper = WhisperModel(
|
||||||
|
model_name,
|
||||||
|
device=SYNC_CONFIG['device'],
|
||||||
|
compute_type=SYNC_CONFIG['compute_type'],
|
||||||
|
cpu_threads=4
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading model: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
subtitles = SubtitleHandler.parse_srt(info.subtitle_path)
|
subtitles = SubtitleHandler.parse_srt(info.subtitle_path)
|
||||||
if not subtitles:
|
if not subtitles:
|
||||||
@@ -60,7 +75,16 @@ def main():
|
|||||||
info.episode_path, int(start_sec), SYNC_CONFIG['scan_duration_sec'], audio_stream
|
info.episode_path, int(start_sec), SYNC_CONFIG['scan_duration_sec'], audio_stream
|
||||||
)
|
)
|
||||||
|
|
||||||
segments, _ = whisper.transcribe(audio_file, vad_filter=True)
|
segments, _ = whisper.transcribe(
|
||||||
|
audio_file,
|
||||||
|
vad_filter=SYNC_CONFIG['vad_filter'],
|
||||||
|
vad_parameters=dict(min_silence_duration_ms=SYNC_CONFIG['vad_min_silence']),
|
||||||
|
language=whisper_lang_code,
|
||||||
|
beam_size=SYNC_CONFIG['beam_size'],
|
||||||
|
condition_on_previous_text=False,
|
||||||
|
word_timestamps=False
|
||||||
|
)
|
||||||
|
|
||||||
w_segments = [WhisperSegment(int(s.start * 1000), int(s.end * 1000), s.text) for s in list(segments)]
|
w_segments = [WhisperSegment(int(s.start * 1000), int(s.end * 1000), s.text) for s in list(segments)]
|
||||||
|
|
||||||
matches = TextMatcher.find_matches(subtitles, w_segments, int(start_sec * 1000))
|
matches = TextMatcher.find_matches(subtitles, w_segments, int(start_sec * 1000))
|
||||||
@@ -159,11 +183,48 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
sys.argv = [
|
sys.argv = [
|
||||||
'sync_script.py',
|
'sync_script.py',
|
||||||
'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.mkv',
|
'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/A Knight of the Seven Kingdoms - S01E01/A Knight of the Seven Kingdoms - S01E01 - The Hedge Knight WEBRip-1080p.mkv',
|
||||||
'episode_name=Superman & Lois - S03E05',
|
'episode_name=A Knight of the Seven Kingdoms - S01E01',
|
||||||
'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.en.srt',
|
'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/A Knight of the Seven Kingdoms - S01E01/A Knight of the Seven Kingdoms - S01E01 - The Hedge Knight WEBRip-1080p.en.srt',
|
||||||
'episode_language=English',
|
'episode_language=English',
|
||||||
'subtitles_language=English'
|
'subtitles_language=English'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# sys.argv = [
|
||||||
|
# 'sync_script.py',
|
||||||
|
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.mkv',
|
||||||
|
# 'episode_name=Superman & Lois - S03E05',
|
||||||
|
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.en.srt',
|
||||||
|
# 'episode_language=English',
|
||||||
|
# 'subtitles_language=English'
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# sys.argv = [
|
||||||
|
# 'sync_script.py',
|
||||||
|
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Free Willy 3 The Rescue (1997)/Free Willy 3 The Rescue (1997) WEBRip-1080p.mp4',
|
||||||
|
# 'episode_name=Free Willy 3: The Rescue (1997)',
|
||||||
|
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Free Willy 3 The Rescue (1997)/Free Willy 3 The Rescue (1997) WEBRip-1080p.en.srt',
|
||||||
|
# 'episode_language=English',
|
||||||
|
# 'subtitles_language=English'
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# sys.argv = [
|
||||||
|
# 'sync_script.py',
|
||||||
|
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Zootopia 2 (2025)/Zootopia 2 (2025) WEBRip-1080p.mp4',
|
||||||
|
# 'episode_name=Zootopia 2 (2025)',
|
||||||
|
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Zootopia 2 (2025)/Zootopia 2 (2025) WEBRip-1080p.en.hi.srt',
|
||||||
|
# 'episode_language=English',
|
||||||
|
# 'subtitles_language=English'
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# sys.argv = [
|
||||||
|
# 'sync_script.py',
|
||||||
|
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/How to Train Your Dragon The Hidden World (2019)/How to Train Your Dragon The Hidden World (2019) Bluray-1080p.mp4',
|
||||||
|
# 'episode_name=How to Train Your Dragon: The Hidden World (2019)',
|
||||||
|
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/How to Train Your Dragon The Hidden World (2019)/How to Train Your Dragon The Hidden World (2019) Bluray-1080p.en.srt',
|
||||||
|
# 'episode_language=English',
|
||||||
|
# 'subtitles_language=English'
|
||||||
|
# ]
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user