import os
import re
import yt_dlp
import requests

# ---------- Configuration ----------
YOUTUBE_URL = 'https://www.youtube.com/watch?v=hyC28sfTD3Q&pp=0gcJCYUJAYcqIYzv'
DEEPGRAM_API_KEY = '6224f610f7cbd44525dc96ad7c78e655f1b28be2'
TRANSCRIPTS_DIR = 'transcripts'
# -----------------------------------

def extract_video_id(url):
    match = re.search(r"v=([a-zA-Z0-9_-]{11})", url)
    return match.group(1) if match else 'unknown_video'

def download_audio(url, video_id):
    output_template = f"{video_id}.%(ext)s"
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_template,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'quiet': True,
        'noplaylist': True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        print("Downloading audio...")
        ydl.download([url])

    return f"{video_id}.mp3"

def transcribe_with_deepgram(file_path):
    print("Sending audio to Deepgram API...")
    url = "https://api.deepgram.com/v1/listen?model=nova&smart_format=true"

    headers = {
        "Authorization": f"Token {DEEPGRAM_API_KEY}",
        "Content-Type": "audio/mp3",
    }

    with open(file_path, "rb") as audio:
        response = requests.post(url, headers=headers, data=audio)

    if response.status_code != 200:
        raise Exception(f"Deepgram error: {response.status_code} - {response.text}")

    result = response.json()
    transcript = result["results"]["channels"][0]["alternatives"][0]["transcript"]
    return transcript

def save_transcript(transcript_text, video_id):
    os.makedirs(TRANSCRIPTS_DIR, exist_ok=True)
    transcript_path = os.path.join(TRANSCRIPTS_DIR, f"{video_id}.txt")
    with open(transcript_path, 'w', encoding='utf-8') as f:
        f.write(transcript_text)
    print(f"Transcript saved to: {transcript_path}")

def main():
    video_id = extract_video_id(YOUTUBE_URL)

    mp3_file = f"{video_id}.mp3"
    if not os.path.exists(mp3_file):
        download_audio(YOUTUBE_URL, video_id)

    transcript = transcribe_with_deepgram(mp3_file)
    save_transcript(transcript, video_id)

if __name__ == '__main__':
    main()
