Files
DJ_Teaser_Clipper/V_1/dj_teaser.py
Thomas abf2109171 First Upload
All Files
2026-01-29 10:48:02 +01:00

424 lines
13 KiB
Python

#!/usr/bin/env python3
"""
DJ Teaser Builder (local, offline-friendly)
- Scans a folder for audio files (max 20 by default)
- Lets you select tracks by index (e.g. 1,2,5,7) or use "all"
- Finds highlight segments (energy + onset)
- Snaps start to bar grid (DJ-ish phrase cuts)
- Renders clips + acrossfades them into a teaser via FFmpeg
- Writes a JSON report (chosen start times / durations)
Requirements:
- ffmpeg in PATH
- pip install numpy librosa soundfile
Example:
python dj_teaser.py --tracks-dir ./tracks --select 1,2,3,4 --mode rollcall --teaser 60
"""
import argparse
import json
import shutil
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Tuple
import numpy as np
import librosa
AUDIO_EXTS = {".wav", ".mp3", ".flac", ".m4a", ".aiff", ".aac", ".ogg", ".opus"}
@dataclass
class Config:
tracks_dir: Path
work_dir: Path
out_dir: Path
output_name: str
max_tracks: int = 20
analysis_sr: int = 22050
hop_length: int = 512
# Teaser / DJ settings
teaser_seconds: float = 60.0
crossfade_seconds: float = 0.25
fade_seconds: float = 0.08
avoid_intro_seconds: float = 30.0
avoid_outro_seconds: float = 20.0
# rollcall: short bars per track, bestof: longer bars per track (and fewer tracks ideally)
mode: str = "rollcall" # "rollcall" or "bestof"
bars_per_track: int = 2
beats_per_bar: int = 4
# Loudness target (simple 1-pass loudnorm)
target_lufs: float = -14.0
def run(cmd: List[str]) -> None:
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if p.returncode != 0:
raise RuntimeError(f"Command failed:\n{' '.join(cmd)}\n\nSTDERR:\n{p.stderr}")
def ensure_ffmpeg() -> None:
if shutil.which("ffmpeg") is None:
raise RuntimeError("ffmpeg not found in PATH. Install ffmpeg and try again.")
def list_tracks(tracks_dir: Path, max_tracks: int) -> List[Path]:
files = [p for p in sorted(tracks_dir.iterdir()) if p.is_file() and p.suffix.lower() in AUDIO_EXTS]
return files[:max_tracks]
def parse_selection(selection: str, num_tracks: int) -> List[int]:
"""
Returns 0-based indices.
selection examples:
"all"
"1,2,3,7"
"1-4,7,9-10"
"""
s = selection.strip().lower()
if s == "all":
return list(range(num_tracks))
parts = [p.strip() for p in s.split(",") if p.strip()]
out: List[int] = []
for part in parts:
if "-" in part:
a, b = part.split("-", 1)
a_i = int(a) - 1
b_i = int(b) - 1
if a_i > b_i:
a_i, b_i = b_i, a_i
out.extend(list(range(a_i, b_i + 1)))
else:
out.append(int(part) - 1)
# unique, keep order
seen = set()
filtered = []
for i in out:
if 0 <= i < num_tracks and i not in seen:
seen.add(i)
filtered.append(i)
if not filtered:
raise ValueError("Selection resulted in an empty track list. Check --select.")
return filtered
def ffmpeg_to_wav(in_path: Path, out_wav: Path, sr: int) -> None:
out_wav.parent.mkdir(parents=True, exist_ok=True)
run([
"ffmpeg", "-y",
"-i", str(in_path),
"-vn",
"-ac", "2",
"-ar", str(sr),
"-f", "wav",
str(out_wav),
])
def zscore(x: np.ndarray) -> np.ndarray:
x = np.asarray(x, dtype=np.float32)
mu = float(np.mean(x))
sd = float(np.std(x) + 1e-9)
return (x - mu) / sd
def pick_highlight_segment(
y: np.ndarray,
sr: int,
hop_length: int,
clip_s: float,
avoid_intro_s: float,
avoid_outro_s: float
) -> Tuple[float, float, dict]:
"""
Returns: (approx_start_seconds, duration_seconds, debug_metrics)
"""
duration = len(y) / sr
debug = {"duration_seconds": float(duration)}
if duration <= (avoid_intro_s + avoid_outro_s + clip_s + 1.0):
start = max(0.0, (duration - clip_s) / 2.0)
debug["reason"] = "short_track_center"
return start, clip_s, debug
rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=hop_length)[0]
onset = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
n = min(len(rms), len(onset))
rms, onset = rms[:n], onset[:n]
score = 0.35 * zscore(rms) + 0.65 * zscore(onset)
score = np.maximum(score, 0.0)
clip_frames = max(1, int(round((clip_s * sr) / hop_length)))
t_seconds = (np.arange(n) * hop_length) / sr
valid = (t_seconds >= avoid_intro_s) & (t_seconds <= (duration - avoid_outro_s - clip_s))
valid_idxs = np.where(valid)[0]
if len(valid_idxs) == 0:
start = max(0.0, (duration - clip_s) / 2.0)
debug["reason"] = "no_valid_window_center"
return start, clip_s, debug
window = np.ones(clip_frames, dtype=np.float32)
summed = np.convolve(score, window, mode="same")
best_idx = int(valid_idxs[np.argmax(summed[valid_idxs])])
center_t = float(t_seconds[best_idx])
start_t = center_t - (clip_s / 2.0)
start_t = float(max(avoid_intro_s, min(start_t, duration - avoid_outro_s - clip_s)))
debug.update({
"best_center_seconds": center_t,
"approx_start_seconds": start_t,
"clip_frames": int(clip_frames),
})
return start_t, clip_s, debug
def bars_to_seconds(tempo_bpm: float, bars: int, beats_per_bar: int) -> float:
beats = bars * beats_per_bar
return (60.0 / max(1e-6, tempo_bpm)) * beats
def snap_to_bars(y: np.ndarray, sr: int, approx_start: float, bars: int, beats_per_bar: int = 4) -> Tuple[float, float]:
"""
Returns: (snapped_start_seconds, tempo_bpm)
"""
try:
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
tempo = float(tempo)
if beat_frames is None or len(beat_frames) < 8:
return approx_start, tempo
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
i = int(np.argmin(np.abs(beat_times - approx_start)))
grid = max(1, bars * beats_per_bar) # beats per bar-grid chunk
snapped_i = int(round(i / grid) * grid)
snapped_i = max(0, min(snapped_i, len(beat_times) - 1))
snapped_t = float(beat_times[snapped_i])
# keep snapping reasonable
if abs(snapped_t - approx_start) <= 2.0:
return snapped_t, tempo
return approx_start, tempo
except Exception:
return approx_start, 0.0
def render_clip(
in_wav: Path,
out_path: Path,
start: float,
dur: float,
fade_s: float,
target_lufs: float
) -> None:
out_path.parent.mkdir(parents=True, exist_ok=True)
af = (
f"atrim=start={start}:duration={dur},"
f"afade=t=in:st=0:d={fade_s},"
f"afade=t=out:st={max(0.0, dur - fade_s)}:d={fade_s},"
f"loudnorm=I={target_lufs}:TP=-1.5:LRA=11"
)
run([
"ffmpeg", "-y",
"-i", str(in_wav),
"-vn",
"-af", af,
str(out_path),
])
def build_acrossfade_chain(clips: List[Path], out_path: Path, crossfade_s: float) -> None:
if len(clips) == 1:
shutil.copyfile(clips[0], out_path)
return
cmd = ["ffmpeg", "-y"]
for c in clips:
cmd += ["-i", str(c)]
filter_parts = []
last = "[0:a]"
for i in range(1, len(clips)):
nxt = f"[{i}:a]"
out = f"[a{i}]"
filter_parts.append(f"{last}{nxt}acrossfade=d={crossfade_s}:c1=tri:c2=tri{out}")
last = out
cmd += [
"-filter_complex", ";".join(filter_parts),
"-map", last,
str(out_path),
]
run(cmd)
def main():
parser = argparse.ArgumentParser(description="Local DJ Teaser Builder (Python + FFmpeg)")
parser.add_argument("--tracks-dir", default="./tracks", help="Folder containing audio tracks")
parser.add_argument("--work-dir", default="./work", help="Temp working folder")
parser.add_argument("--out-dir", default="./out", help="Output folder")
parser.add_argument("--max-tracks", type=int, default=20, help="Max tracks to scan from folder (default: 20)")
parser.add_argument("--select", default="all", help='Track selection: "all", "1,2,5", "1-4,7" (1-based)')
parser.add_argument("--mode", choices=["rollcall", "bestof"], default="rollcall", help="Teaser style")
parser.add_argument("--teaser", type=float, default=60.0, help="Final teaser length in seconds")
parser.add_argument("--bars", type=int, default=2, help="Bars per track clip (DJ phrasing). rollcall=2 typical")
parser.add_argument("--bpb", type=int, default=4, help="Beats per bar (4 for trance)")
parser.add_argument("--crossfade", type=float, default=0.25, help="Acrossfade duration in seconds")
parser.add_argument("--avoid-intro", type=float, default=30.0, help="Skip intro seconds when searching highlights")
parser.add_argument("--avoid-outro", type=float, default=20.0, help="Skip outro seconds when searching highlights")
parser.add_argument("--target-lufs", type=float, default=-14.0, help="Loudness target LUFS (approx)")
parser.add_argument("--output", default="album_teaser.wav", help="Output teaser filename")
args = parser.parse_args()
ensure_ffmpeg()
cfg = Config(
tracks_dir=Path(args.tracks_dir),
work_dir=Path(args.work_dir),
out_dir=Path(args.out_dir),
output_name=args.output,
max_tracks=args.max_tracks,
teaser_seconds=args.teaser,
crossfade_seconds=args.crossfade,
avoid_intro_seconds=args.avoid_intro,
avoid_outro_seconds=args.avoid_outro,
mode=args.mode,
bars_per_track=args.bars,
beats_per_bar=args.bpb,
target_lufs=args.target_lufs,
)
cfg.out_dir.mkdir(parents=True, exist_ok=True)
cfg.work_dir.mkdir(parents=True, exist_ok=True)
tracks = list_tracks(cfg.tracks_dir, cfg.max_tracks)
if not tracks:
raise SystemExit(f"No audio tracks found in: {cfg.tracks_dir.resolve()}")
# Print discovered tracks (nice for Git usage)
print("\nDiscovered tracks:")
for i, t in enumerate(tracks, start=1):
print(f" {i:02d}. {t.name}")
selected_idxs = parse_selection(args.select, len(tracks))
selected_tracks = [tracks[i] for i in selected_idxs]
print("\nSelected tracks:")
for i, t in zip(selected_idxs, selected_tracks):
print(f" {i+1:02d}. {t.name}")
n = len(selected_tracks)
teaser_s = float(cfg.teaser_seconds)
cf = float(cfg.crossfade_seconds)
# Total playtime math with acrossfades:
# final_length = sum(durs) - (n-1)*cf => sum(durs) = teaser + (n-1)*cf
# We use avg_dur to clamp bar-based clip duration.
avg_dur = (teaser_s + (n - 1) * cf) / max(1, n)
clips: List[Path] = []
report = {
"config": {
"mode": cfg.mode,
"teaser_seconds": teaser_s,
"crossfade_seconds": cf,
"bars_per_track": cfg.bars_per_track,
"beats_per_bar": cfg.beats_per_bar,
"avoid_intro_seconds": cfg.avoid_intro_seconds,
"avoid_outro_seconds": cfg.avoid_outro_seconds,
"target_lufs": cfg.target_lufs,
"avg_clip_seconds_target": avg_dur,
},
"tracks": []
}
for idx, track in enumerate(selected_tracks, start=1):
tmp_wav = cfg.work_dir / f"track_{idx:02d}.wav"
ffmpeg_to_wav(track, tmp_wav, cfg.analysis_sr)
y, sr = librosa.load(tmp_wav, sr=cfg.analysis_sr, mono=True)
# 1) pick approximate highlight
approx_start, _, debug = pick_highlight_segment(
y=y,
sr=sr,
hop_length=cfg.hop_length,
clip_s=max(4.0, min(8.0, avg_dur)), # search window size
avoid_intro_s=cfg.avoid_intro_seconds,
avoid_outro_s=cfg.avoid_outro_seconds
)
# 2) snap to bar grid (DJ phrasing) + compute tempo
snapped_start, tempo = snap_to_bars(
y=y, sr=sr,
approx_start=approx_start,
bars=cfg.bars_per_track,
beats_per_bar=cfg.beats_per_bar
)
# 3) derive duration from bars at detected tempo
# If tempo fails (0), fall back to avg_dur.
if tempo and tempo > 1.0:
dur = bars_to_seconds(tempo, cfg.bars_per_track, cfg.beats_per_bar)
else:
dur = avg_dur
# clamp duration so total stays in bounds
dur = float(np.clip(dur, 2.5, avg_dur))
clip_out = cfg.work_dir / f"clip_{idx:02d}.wav"
render_clip(
in_wav=tmp_wav,
out_path=clip_out,
start=snapped_start,
dur=dur,
fade_s=cfg.fade_seconds,
target_lufs=cfg.target_lufs
)
clips.append(clip_out)
report["tracks"].append({
"index_in_folder": int(selected_idxs[idx - 1] + 1),
"filename": track.name,
"tempo_bpm_est": round(float(tempo), 2),
"start_seconds": round(float(snapped_start), 3),
"duration_seconds": round(float(dur), 3),
"debug": debug,
})
teaser_path = cfg.out_dir / cfg.output_name
build_acrossfade_chain(clips, teaser_path, cfg.crossfade_seconds)
report_path = cfg.out_dir / "teaser_report.json"
with open(report_path, "w", encoding="utf-8") as f:
json.dump(report, f, ensure_ascii=False, indent=2)
print(f"\n✅ Teaser created: {teaser_path.resolve()}")
print(f"📝 Report written: {report_path.resolve()}\n")
if __name__ == "__main__":
main()