from flask import Flask, request, jsonify
import os
import re
import yt_dlp
import requests

# ---------- Configuration ----------
DEEPGRAM_API_KEY = '6224f610f7cbd44525dc96ad7c78e655f1b28be2'  # Replace with your actual Deepgram API key
TRANSCRIPTS_DIR = 'transcripts'
# -----------------------------------

app = Flask(__name__)

def extract_video_id(url):
    match = re.search(r"v=([a-zA-Z0-9_-]{11})", url)
    return match.group(1) if match else 'unknown_video'

def download_audio(url, video_id):
    output_template = f"{video_id}.%(ext)s"
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_template,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'quiet': True,
        'noplaylist': True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    return f"{video_id}.mp3"

def transcribe_with_deepgram(file_path):
    print("Sending audio to Deepgram...")
    url = "https://api.deepgram.com/v1/listen?model=nova&smart_format=true"
    headers = {
        "Authorization": f"Token {DEEPGRAM_API_KEY}",
        "Content-Type": "audio/mp3",
    }
    with open(file_path, "rb") as audio:
        response = requests.post(url, headers=headers, data=audio)

    if response.status_code != 200:
        raise Exception(f"Deepgram error: {response.status_code} - {response.text}")

    result = response.json()
    return result["results"]["channels"][0]["alternatives"][0]["transcript"]

def save_transcript(transcript_text, video_id):
    os.makedirs(TRANSCRIPTS_DIR, exist_ok=True)
    transcript_path = os.path.join(TRANSCRIPTS_DIR, f"{video_id}.txt")
    with open(transcript_path, 'w', encoding='utf-8') as f:
        f.write(transcript_text)
    return transcript_path

@app.route('/transcribe', methods=['POST'])
def transcribe():
    data = request.get_json()
    if not data or 'url' not in data:
        return jsonify({"error": "Missing 'url' in request"}), 400

    youtube_url = data['url']
    video_id = extract_video_id(youtube_url)
    mp3_file = f"{video_id}.mp3"

    try:
        if not os.path.exists(mp3_file):
            download_audio(youtube_url, video_id)

        transcript = transcribe_with_deepgram(mp3_file)
        transcript_path = save_transcript(transcript, video_id)

        return jsonify({
            "video_id": video_id,
            "transcript_file": transcript_path,
            "transcript": transcript
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500

    finally:
        if os.path.exists(mp3_file):
            os.remove(mp3_file)

if __name__ == '__main__':
    app.run(debug=True, port=5000)
