#!/usr/bin/env python3 """ DJ Teaser Builder (local, offline-friendly) - Scans a folder for audio files (max 20 by default) - Lets you select tracks by index (e.g. 1,2,5,7) or use "all" - Finds highlight segments (energy + onset) - Snaps start to bar grid (DJ-ish phrase cuts) - Renders clips + acrossfades them into a teaser via FFmpeg - Writes a JSON report (chosen start times / durations) Requirements: - ffmpeg in PATH - pip install numpy librosa soundfile Example: python dj_teaser.py --tracks-dir ./tracks --select 1,2,3,4 --mode rollcall --teaser 60 """ import argparse import json import shutil import subprocess from dataclasses import dataclass from pathlib import Path from typing import List, Optional, Tuple import numpy as np import librosa AUDIO_EXTS = {".wav", ".mp3", ".flac", ".m4a", ".aiff", ".aac", ".ogg", ".opus"} @dataclass class Config: tracks_dir: Path work_dir: Path out_dir: Path output_name: str max_tracks: int = 20 analysis_sr: int = 22050 hop_length: int = 512 # Teaser / DJ settings teaser_seconds: float = 60.0 crossfade_seconds: float = 0.25 fade_seconds: float = 0.08 avoid_intro_seconds: float = 30.0 avoid_outro_seconds: float = 20.0 # rollcall: short bars per track, bestof: longer bars per track (and fewer tracks ideally) mode: str = "rollcall" # "rollcall" or "bestof" bars_per_track: int = 2 beats_per_bar: int = 4 # Loudness target (simple 1-pass loudnorm) target_lufs: float = -14.0 def run(cmd: List[str]) -> None: p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if p.returncode != 0: raise RuntimeError(f"Command failed:\n{' '.join(cmd)}\n\nSTDERR:\n{p.stderr}") def ensure_ffmpeg() -> None: if shutil.which("ffmpeg") is None: raise RuntimeError("ffmpeg not found in PATH. Install ffmpeg and try again.") def list_tracks(tracks_dir: Path, max_tracks: int) -> List[Path]: files = [p for p in sorted(tracks_dir.iterdir()) if p.is_file() and p.suffix.lower() in AUDIO_EXTS] return files[:max_tracks] def parse_selection(selection: str, num_tracks: int) -> List[int]: """ Returns 0-based indices. selection examples: "all" "1,2,3,7" "1-4,7,9-10" """ s = selection.strip().lower() if s == "all": return list(range(num_tracks)) parts = [p.strip() for p in s.split(",") if p.strip()] out: List[int] = [] for part in parts: if "-" in part: a, b = part.split("-", 1) a_i = int(a) - 1 b_i = int(b) - 1 if a_i > b_i: a_i, b_i = b_i, a_i out.extend(list(range(a_i, b_i + 1))) else: out.append(int(part) - 1) # unique, keep order seen = set() filtered = [] for i in out: if 0 <= i < num_tracks and i not in seen: seen.add(i) filtered.append(i) if not filtered: raise ValueError("Selection resulted in an empty track list. Check --select.") return filtered def ffmpeg_to_wav(in_path: Path, out_wav: Path, sr: int) -> None: out_wav.parent.mkdir(parents=True, exist_ok=True) run([ "ffmpeg", "-y", "-i", str(in_path), "-vn", "-ac", "2", "-ar", str(sr), "-f", "wav", str(out_wav), ]) def zscore(x: np.ndarray) -> np.ndarray: x = np.asarray(x, dtype=np.float32) mu = float(np.mean(x)) sd = float(np.std(x) + 1e-9) return (x - mu) / sd def pick_highlight_segment( y: np.ndarray, sr: int, hop_length: int, clip_s: float, avoid_intro_s: float, avoid_outro_s: float ) -> Tuple[float, float, dict]: """ Returns: (approx_start_seconds, duration_seconds, debug_metrics) """ duration = len(y) / sr debug = {"duration_seconds": float(duration)} if duration <= (avoid_intro_s + avoid_outro_s + clip_s + 1.0): start = max(0.0, (duration - clip_s) / 2.0) debug["reason"] = "short_track_center" return start, clip_s, debug rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=hop_length)[0] onset = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length) n = min(len(rms), len(onset)) rms, onset = rms[:n], onset[:n] score = 0.35 * zscore(rms) + 0.65 * zscore(onset) score = np.maximum(score, 0.0) clip_frames = max(1, int(round((clip_s * sr) / hop_length))) t_seconds = (np.arange(n) * hop_length) / sr valid = (t_seconds >= avoid_intro_s) & (t_seconds <= (duration - avoid_outro_s - clip_s)) valid_idxs = np.where(valid)[0] if len(valid_idxs) == 0: start = max(0.0, (duration - clip_s) / 2.0) debug["reason"] = "no_valid_window_center" return start, clip_s, debug window = np.ones(clip_frames, dtype=np.float32) summed = np.convolve(score, window, mode="same") best_idx = int(valid_idxs[np.argmax(summed[valid_idxs])]) center_t = float(t_seconds[best_idx]) start_t = center_t - (clip_s / 2.0) start_t = float(max(avoid_intro_s, min(start_t, duration - avoid_outro_s - clip_s))) debug.update({ "best_center_seconds": center_t, "approx_start_seconds": start_t, "clip_frames": int(clip_frames), }) return start_t, clip_s, debug def bars_to_seconds(tempo_bpm: float, bars: int, beats_per_bar: int) -> float: beats = bars * beats_per_bar return (60.0 / max(1e-6, tempo_bpm)) * beats def snap_to_bars(y: np.ndarray, sr: int, approx_start: float, bars: int, beats_per_bar: int = 4) -> Tuple[float, float]: """ Returns: (snapped_start_seconds, tempo_bpm) """ try: tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) tempo = float(tempo) if beat_frames is None or len(beat_frames) < 8: return approx_start, tempo beat_times = librosa.frames_to_time(beat_frames, sr=sr) i = int(np.argmin(np.abs(beat_times - approx_start))) grid = max(1, bars * beats_per_bar) # beats per bar-grid chunk snapped_i = int(round(i / grid) * grid) snapped_i = max(0, min(snapped_i, len(beat_times) - 1)) snapped_t = float(beat_times[snapped_i]) # keep snapping reasonable if abs(snapped_t - approx_start) <= 2.0: return snapped_t, tempo return approx_start, tempo except Exception: return approx_start, 0.0 def render_clip( in_wav: Path, out_path: Path, start: float, dur: float, fade_s: float, target_lufs: float ) -> None: out_path.parent.mkdir(parents=True, exist_ok=True) af = ( f"atrim=start={start}:duration={dur}," f"afade=t=in:st=0:d={fade_s}," f"afade=t=out:st={max(0.0, dur - fade_s)}:d={fade_s}," f"loudnorm=I={target_lufs}:TP=-1.5:LRA=11" ) run([ "ffmpeg", "-y", "-i", str(in_wav), "-vn", "-af", af, str(out_path), ]) def build_acrossfade_chain(clips: List[Path], out_path: Path, crossfade_s: float) -> None: if len(clips) == 1: shutil.copyfile(clips[0], out_path) return cmd = ["ffmpeg", "-y"] for c in clips: cmd += ["-i", str(c)] filter_parts = [] last = "[0:a]" for i in range(1, len(clips)): nxt = f"[{i}:a]" out = f"[a{i}]" filter_parts.append(f"{last}{nxt}acrossfade=d={crossfade_s}:c1=tri:c2=tri{out}") last = out cmd += [ "-filter_complex", ";".join(filter_parts), "-map", last, str(out_path), ] run(cmd) def main(): parser = argparse.ArgumentParser(description="Local DJ Teaser Builder (Python + FFmpeg)") parser.add_argument("--tracks-dir", default="./tracks", help="Folder containing audio tracks") parser.add_argument("--work-dir", default="./work", help="Temp working folder") parser.add_argument("--out-dir", default="./out", help="Output folder") parser.add_argument("--max-tracks", type=int, default=20, help="Max tracks to scan from folder (default: 20)") parser.add_argument("--select", default="all", help='Track selection: "all", "1,2,5", "1-4,7" (1-based)') parser.add_argument("--mode", choices=["rollcall", "bestof"], default="rollcall", help="Teaser style") parser.add_argument("--teaser", type=float, default=60.0, help="Final teaser length in seconds") parser.add_argument("--bars", type=int, default=2, help="Bars per track clip (DJ phrasing). rollcall=2 typical") parser.add_argument("--bpb", type=int, default=4, help="Beats per bar (4 for trance)") parser.add_argument("--crossfade", type=float, default=0.25, help="Acrossfade duration in seconds") parser.add_argument("--avoid-intro", type=float, default=30.0, help="Skip intro seconds when searching highlights") parser.add_argument("--avoid-outro", type=float, default=20.0, help="Skip outro seconds when searching highlights") parser.add_argument("--target-lufs", type=float, default=-14.0, help="Loudness target LUFS (approx)") parser.add_argument("--output", default="album_teaser.wav", help="Output teaser filename") args = parser.parse_args() ensure_ffmpeg() cfg = Config( tracks_dir=Path(args.tracks_dir), work_dir=Path(args.work_dir), out_dir=Path(args.out_dir), output_name=args.output, max_tracks=args.max_tracks, teaser_seconds=args.teaser, crossfade_seconds=args.crossfade, avoid_intro_seconds=args.avoid_intro, avoid_outro_seconds=args.avoid_outro, mode=args.mode, bars_per_track=args.bars, beats_per_bar=args.bpb, target_lufs=args.target_lufs, ) cfg.out_dir.mkdir(parents=True, exist_ok=True) cfg.work_dir.mkdir(parents=True, exist_ok=True) tracks = list_tracks(cfg.tracks_dir, cfg.max_tracks) if not tracks: raise SystemExit(f"No audio tracks found in: {cfg.tracks_dir.resolve()}") # Print discovered tracks (nice for Git usage) print("\nDiscovered tracks:") for i, t in enumerate(tracks, start=1): print(f" {i:02d}. {t.name}") selected_idxs = parse_selection(args.select, len(tracks)) selected_tracks = [tracks[i] for i in selected_idxs] print("\nSelected tracks:") for i, t in zip(selected_idxs, selected_tracks): print(f" {i+1:02d}. {t.name}") n = len(selected_tracks) teaser_s = float(cfg.teaser_seconds) cf = float(cfg.crossfade_seconds) # Total playtime math with acrossfades: # final_length = sum(durs) - (n-1)*cf => sum(durs) = teaser + (n-1)*cf # We use avg_dur to clamp bar-based clip duration. avg_dur = (teaser_s + (n - 1) * cf) / max(1, n) clips: List[Path] = [] report = { "config": { "mode": cfg.mode, "teaser_seconds": teaser_s, "crossfade_seconds": cf, "bars_per_track": cfg.bars_per_track, "beats_per_bar": cfg.beats_per_bar, "avoid_intro_seconds": cfg.avoid_intro_seconds, "avoid_outro_seconds": cfg.avoid_outro_seconds, "target_lufs": cfg.target_lufs, "avg_clip_seconds_target": avg_dur, }, "tracks": [] } for idx, track in enumerate(selected_tracks, start=1): tmp_wav = cfg.work_dir / f"track_{idx:02d}.wav" ffmpeg_to_wav(track, tmp_wav, cfg.analysis_sr) y, sr = librosa.load(tmp_wav, sr=cfg.analysis_sr, mono=True) # 1) pick approximate highlight approx_start, _, debug = pick_highlight_segment( y=y, sr=sr, hop_length=cfg.hop_length, clip_s=max(4.0, min(8.0, avg_dur)), # search window size avoid_intro_s=cfg.avoid_intro_seconds, avoid_outro_s=cfg.avoid_outro_seconds ) # 2) snap to bar grid (DJ phrasing) + compute tempo snapped_start, tempo = snap_to_bars( y=y, sr=sr, approx_start=approx_start, bars=cfg.bars_per_track, beats_per_bar=cfg.beats_per_bar ) # 3) derive duration from bars at detected tempo # If tempo fails (0), fall back to avg_dur. if tempo and tempo > 1.0: dur = bars_to_seconds(tempo, cfg.bars_per_track, cfg.beats_per_bar) else: dur = avg_dur # clamp duration so total stays in bounds dur = float(np.clip(dur, 2.5, avg_dur)) clip_out = cfg.work_dir / f"clip_{idx:02d}.wav" render_clip( in_wav=tmp_wav, out_path=clip_out, start=snapped_start, dur=dur, fade_s=cfg.fade_seconds, target_lufs=cfg.target_lufs ) clips.append(clip_out) report["tracks"].append({ "index_in_folder": int(selected_idxs[idx - 1] + 1), "filename": track.name, "tempo_bpm_est": round(float(tempo), 2), "start_seconds": round(float(snapped_start), 3), "duration_seconds": round(float(dur), 3), "debug": debug, }) teaser_path = cfg.out_dir / cfg.output_name build_acrossfade_chain(clips, teaser_path, cfg.crossfade_seconds) report_path = cfg.out_dir / "teaser_report.json" with open(report_path, "w", encoding="utf-8") as f: json.dump(report, f, ensure_ascii=False, indent=2) print(f"\nāœ… Teaser created: {teaser_path.resolve()}") print(f"šŸ“ Report written: {report_path.resolve()}\n") if __name__ == "__main__": main()