import os
import re
import subprocess
import whisper
import yt_dlp
from flask import Flask, request, jsonify, send_file

app = Flask(__name__)
model = whisper.load_model("tiny")
# model = whisper.load_model("base")

FFMPEG_BIN = r"C:\ffmpeg\bin"  # 🔁 Change if needed
TRANSCRIPT_DIR = "transcripts"
os.makedirs(TRANSCRIPT_DIR, exist_ok=True)

def extract_video_id(url):
    match = re.search(r'(?:v=|youtu\.be/)([\w-]{11})', url)
    return match.group(1) if match else None

def get_audio_duration(filename):
    try:
        result = subprocess.run(
            [os.path.join(FFMPEG_BIN, 'ffprobe'), '-v', 'error', '-show_entries',
             'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', filename],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True
        )
        return float(result.stdout.strip())
    except Exception as e:
        print(f"[ffprobe error] {e}")
        return 0.0

def download_audio(video_url, video_id):
    output_filename = f"{video_id}.mp3"
    print(f"[INFO] Downloading audio as {output_filename}...")

    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': f'{video_id}.%(ext)s',
        'quiet': False,
        'no_warnings': True,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'ffmpeg_location': FFMPEG_BIN,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])

    if not os.path.exists(output_filename):
        raise FileNotFoundError(f"Audio file {output_filename} not found.")
    if os.path.getsize(output_filename) == 0:
        raise RuntimeError(f"Audio file {output_filename} is empty.")

    duration = get_audio_duration(output_filename)
    if duration == 0.0:
        raise RuntimeError(f"Audio file {output_filename} has 0 duration.")
    
    return output_filename

@app.route('/transcribe', methods=['POST'])
def transcribe_endpoint():
    data = request.get_json()
    if not data or 'url' not in data:
        return jsonify({'error': 'Missing YouTube URL in request'}), 400

    url = data['url']
    video_id = extract_video_id(url)
    if not video_id:
        return jsonify({'error': 'Invalid YouTube URL'}), 400

    try:
        audio_file = download_audio(url, video_id)
        print(f"[INFO] Transcribing {audio_file}...")
        result = model.transcribe(audio_file)
        transcript = result['text']

        transcript_path = os.path.join(TRANSCRIPT_DIR, f"{video_id}.txt")
        with open(transcript_path, "w", encoding="utf-8") as f:
            f.write(transcript)

        os.remove(audio_file)

        return jsonify({
            'video_id': video_id,
            'transcript': transcript,
            'download_url': f"/download/{video_id}"
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/download/<video_id>')
def download(video_id):
    transcript_path = os.path.join(TRANSCRIPT_DIR, f"{video_id}.txt")
    if os.path.exists(transcript_path):
        return send_file(transcript_path, as_attachment=True)
    return f"Transcript file {video_id}.txt not found.", 404

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)
