feat: forward language code and more config settings
This commit is contained in:
17
config.py
17
config.py
@@ -1,19 +1,26 @@
|
||||
# config.py
|
||||
|
||||
SYNC_CONFIG = {
|
||||
# System
|
||||
"device": "cpu", # 'cuda' for NVIDIA GPU, 'cpu' for Processor
|
||||
"compute_type": "int8", # 'float16' for GPU, 'int8' for CPU
|
||||
|
||||
# Sampling
|
||||
"sample_count": 20,
|
||||
"scan_duration_sec": 45,
|
||||
"scan_duration_sec": 75,
|
||||
|
||||
# Matching
|
||||
"min_match_count": 3,
|
||||
"min_match_score": 0.70,
|
||||
"search_window_sec": 30,
|
||||
|
||||
# Whisper Settings
|
||||
"beam_size": 5, # 5 is more accurate, 1 is faster
|
||||
"vad_filter": True, # Remove silence
|
||||
"vad_min_silence": 500, # Milliseconds
|
||||
|
||||
# Logic & Decision Thresholds
|
||||
"fix_drift": True,
|
||||
"correction_method": "auto", # Options: "auto", "constant", "force_elastic"
|
||||
"correction_method": "auto",
|
||||
"jitter_tolerance_ms": 300,
|
||||
"min_drift_slope": 0.00005,
|
||||
"linear_r2_threshold": 0.80,
|
||||
}
|
||||
}
|
||||
@@ -30,6 +30,17 @@ class MediaHandler:
|
||||
except Exception:
|
||||
return "0:a:0"
|
||||
|
||||
@staticmethod
|
||||
def get_language_code(language_name: str) -> str:
|
||||
"""Maps Bazarr language names to Whisper ISO codes."""
|
||||
mapping = {
|
||||
'english': 'en', 'french': 'fr', 'spanish': 'es',
|
||||
'german': 'de', 'italian': 'it', 'portuguese': 'pt',
|
||||
'dutch': 'nl', 'russian': 'ru', 'japanese': 'ja',
|
||||
'chinese': 'zh'
|
||||
}
|
||||
return mapping.get(language_name.lower(), 'en')
|
||||
|
||||
@staticmethod
|
||||
def extract_audio_chunk(media_path: str, start_sec: int, duration_sec: int, stream_index: str) -> str:
|
||||
fd, tmp_name = tempfile.mkstemp(suffix=".wav")
|
||||
|
||||
75
main.py
75
main.py
@@ -35,9 +35,24 @@ def main():
|
||||
audio_stream = MediaHandler.get_audio_stream_index(info.episode_path, info.episode_language)
|
||||
media_duration = MediaHandler.get_media_duration(info.episode_path)
|
||||
|
||||
print(f"Duration: {int(media_duration // 60)}m. Loading Whisper...")
|
||||
model_name = "base.en" if 'english' in info.episode_language.lower() else "base"
|
||||
whisper = WhisperModel(model_name, device="cpu", compute_type="int8", cpu_threads=4)
|
||||
# Get the 2-letter code (e.g., "en")
|
||||
whisper_lang_code = MediaHandler.get_language_code(info.episode_language)
|
||||
|
||||
print(f"Duration: {int(media_duration // 60)}m. Loading Whisper ({SYNC_CONFIG['device']})...")
|
||||
|
||||
# Load model based on Config
|
||||
model_name = "base.en" if whisper_lang_code == 'en' else "base"
|
||||
|
||||
try:
|
||||
whisper = WhisperModel(
|
||||
model_name,
|
||||
device=SYNC_CONFIG['device'],
|
||||
compute_type=SYNC_CONFIG['compute_type'],
|
||||
cpu_threads=4
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error loading model: {e}")
|
||||
return
|
||||
|
||||
subtitles = SubtitleHandler.parse_srt(info.subtitle_path)
|
||||
if not subtitles:
|
||||
@@ -60,7 +75,16 @@ def main():
|
||||
info.episode_path, int(start_sec), SYNC_CONFIG['scan_duration_sec'], audio_stream
|
||||
)
|
||||
|
||||
segments, _ = whisper.transcribe(audio_file, vad_filter=True)
|
||||
segments, _ = whisper.transcribe(
|
||||
audio_file,
|
||||
vad_filter=SYNC_CONFIG['vad_filter'],
|
||||
vad_parameters=dict(min_silence_duration_ms=SYNC_CONFIG['vad_min_silence']),
|
||||
language=whisper_lang_code,
|
||||
beam_size=SYNC_CONFIG['beam_size'],
|
||||
condition_on_previous_text=False,
|
||||
word_timestamps=False
|
||||
)
|
||||
|
||||
w_segments = [WhisperSegment(int(s.start * 1000), int(s.end * 1000), s.text) for s in list(segments)]
|
||||
|
||||
matches = TextMatcher.find_matches(subtitles, w_segments, int(start_sec * 1000))
|
||||
@@ -159,11 +183,48 @@ if __name__ == '__main__':
|
||||
|
||||
sys.argv = [
|
||||
'sync_script.py',
|
||||
'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.mkv',
|
||||
'episode_name=Superman & Lois - S03E05',
|
||||
'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.en.srt',
|
||||
'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/A Knight of the Seven Kingdoms - S01E01/A Knight of the Seven Kingdoms - S01E01 - The Hedge Knight WEBRip-1080p.mkv',
|
||||
'episode_name=A Knight of the Seven Kingdoms - S01E01',
|
||||
'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/A Knight of the Seven Kingdoms - S01E01/A Knight of the Seven Kingdoms - S01E01 - The Hedge Knight WEBRip-1080p.en.srt',
|
||||
'episode_language=English',
|
||||
'subtitles_language=English'
|
||||
]
|
||||
|
||||
|
||||
# sys.argv = [
|
||||
# 'sync_script.py',
|
||||
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.mkv',
|
||||
# 'episode_name=Superman & Lois - S03E05',
|
||||
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Superman & Lois - S03E05/Superman & Lois - S03E05 - Head On Bluray-1080p.en.srt',
|
||||
# 'episode_language=English',
|
||||
# 'subtitles_language=English'
|
||||
# ]
|
||||
|
||||
# sys.argv = [
|
||||
# 'sync_script.py',
|
||||
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Free Willy 3 The Rescue (1997)/Free Willy 3 The Rescue (1997) WEBRip-1080p.mp4',
|
||||
# 'episode_name=Free Willy 3: The Rescue (1997)',
|
||||
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Free Willy 3 The Rescue (1997)/Free Willy 3 The Rescue (1997) WEBRip-1080p.en.srt',
|
||||
# 'episode_language=English',
|
||||
# 'subtitles_language=English'
|
||||
# ]
|
||||
|
||||
# sys.argv = [
|
||||
# 'sync_script.py',
|
||||
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Zootopia 2 (2025)/Zootopia 2 (2025) WEBRip-1080p.mp4',
|
||||
# 'episode_name=Zootopia 2 (2025)',
|
||||
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/Zootopia 2 (2025)/Zootopia 2 (2025) WEBRip-1080p.en.hi.srt',
|
||||
# 'episode_language=English',
|
||||
# 'subtitles_language=English'
|
||||
# ]
|
||||
|
||||
# sys.argv = [
|
||||
# 'sync_script.py',
|
||||
# 'episode=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/How to Train Your Dragon The Hidden World (2019)/How to Train Your Dragon The Hidden World (2019) Bluray-1080p.mp4',
|
||||
# 'episode_name=How to Train Your Dragon: The Hidden World (2019)',
|
||||
# 'subtitles=/home/mathieub/Documents/DEV/PycharmProjects/ai-subtitles-sync/test_data/How to Train Your Dragon The Hidden World (2019)/How to Train Your Dragon The Hidden World (2019) Bluray-1080p.en.srt',
|
||||
# 'episode_language=English',
|
||||
# 'subtitles_language=English'
|
||||
# ]
|
||||
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user