424 lines
13 KiB
Python
424 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
DJ Teaser Builder (local, offline-friendly)
|
|
|
|
- Scans a folder for audio files (max 20 by default)
|
|
- Lets you select tracks by index (e.g. 1,2,5,7) or use "all"
|
|
- Finds highlight segments (energy + onset)
|
|
- Snaps start to bar grid (DJ-ish phrase cuts)
|
|
- Renders clips + acrossfades them into a teaser via FFmpeg
|
|
- Writes a JSON report (chosen start times / durations)
|
|
|
|
Requirements:
|
|
- ffmpeg in PATH
|
|
- pip install numpy librosa soundfile
|
|
|
|
Example:
|
|
python dj_teaser.py --tracks-dir ./tracks --select 1,2,3,4 --mode rollcall --teaser 60
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import shutil
|
|
import subprocess
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import List, Optional, Tuple
|
|
|
|
import numpy as np
|
|
import librosa
|
|
|
|
|
|
AUDIO_EXTS = {".wav", ".mp3", ".flac", ".m4a", ".aiff", ".aac", ".ogg", ".opus"}
|
|
|
|
|
|
@dataclass
|
|
class Config:
|
|
tracks_dir: Path
|
|
work_dir: Path
|
|
out_dir: Path
|
|
output_name: str
|
|
|
|
max_tracks: int = 20
|
|
analysis_sr: int = 22050
|
|
hop_length: int = 512
|
|
|
|
# Teaser / DJ settings
|
|
teaser_seconds: float = 60.0
|
|
crossfade_seconds: float = 0.25
|
|
fade_seconds: float = 0.08
|
|
|
|
avoid_intro_seconds: float = 30.0
|
|
avoid_outro_seconds: float = 20.0
|
|
|
|
# rollcall: short bars per track, bestof: longer bars per track (and fewer tracks ideally)
|
|
mode: str = "rollcall" # "rollcall" or "bestof"
|
|
bars_per_track: int = 2
|
|
beats_per_bar: int = 4
|
|
|
|
# Loudness target (simple 1-pass loudnorm)
|
|
target_lufs: float = -14.0
|
|
|
|
|
|
def run(cmd: List[str]) -> None:
|
|
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
if p.returncode != 0:
|
|
raise RuntimeError(f"Command failed:\n{' '.join(cmd)}\n\nSTDERR:\n{p.stderr}")
|
|
|
|
|
|
def ensure_ffmpeg() -> None:
|
|
if shutil.which("ffmpeg") is None:
|
|
raise RuntimeError("ffmpeg not found in PATH. Install ffmpeg and try again.")
|
|
|
|
|
|
def list_tracks(tracks_dir: Path, max_tracks: int) -> List[Path]:
|
|
files = [p for p in sorted(tracks_dir.iterdir()) if p.is_file() and p.suffix.lower() in AUDIO_EXTS]
|
|
return files[:max_tracks]
|
|
|
|
|
|
def parse_selection(selection: str, num_tracks: int) -> List[int]:
|
|
"""
|
|
Returns 0-based indices.
|
|
selection examples:
|
|
"all"
|
|
"1,2,3,7"
|
|
"1-4,7,9-10"
|
|
"""
|
|
s = selection.strip().lower()
|
|
if s == "all":
|
|
return list(range(num_tracks))
|
|
|
|
parts = [p.strip() for p in s.split(",") if p.strip()]
|
|
out: List[int] = []
|
|
|
|
for part in parts:
|
|
if "-" in part:
|
|
a, b = part.split("-", 1)
|
|
a_i = int(a) - 1
|
|
b_i = int(b) - 1
|
|
if a_i > b_i:
|
|
a_i, b_i = b_i, a_i
|
|
out.extend(list(range(a_i, b_i + 1)))
|
|
else:
|
|
out.append(int(part) - 1)
|
|
|
|
# unique, keep order
|
|
seen = set()
|
|
filtered = []
|
|
for i in out:
|
|
if 0 <= i < num_tracks and i not in seen:
|
|
seen.add(i)
|
|
filtered.append(i)
|
|
|
|
if not filtered:
|
|
raise ValueError("Selection resulted in an empty track list. Check --select.")
|
|
return filtered
|
|
|
|
|
|
def ffmpeg_to_wav(in_path: Path, out_wav: Path, sr: int) -> None:
|
|
out_wav.parent.mkdir(parents=True, exist_ok=True)
|
|
run([
|
|
"ffmpeg", "-y",
|
|
"-i", str(in_path),
|
|
"-vn",
|
|
"-ac", "2",
|
|
"-ar", str(sr),
|
|
"-f", "wav",
|
|
str(out_wav),
|
|
])
|
|
|
|
|
|
def zscore(x: np.ndarray) -> np.ndarray:
|
|
x = np.asarray(x, dtype=np.float32)
|
|
mu = float(np.mean(x))
|
|
sd = float(np.std(x) + 1e-9)
|
|
return (x - mu) / sd
|
|
|
|
|
|
def pick_highlight_segment(
|
|
y: np.ndarray,
|
|
sr: int,
|
|
hop_length: int,
|
|
clip_s: float,
|
|
avoid_intro_s: float,
|
|
avoid_outro_s: float
|
|
) -> Tuple[float, float, dict]:
|
|
"""
|
|
Returns: (approx_start_seconds, duration_seconds, debug_metrics)
|
|
"""
|
|
duration = len(y) / sr
|
|
debug = {"duration_seconds": float(duration)}
|
|
|
|
if duration <= (avoid_intro_s + avoid_outro_s + clip_s + 1.0):
|
|
start = max(0.0, (duration - clip_s) / 2.0)
|
|
debug["reason"] = "short_track_center"
|
|
return start, clip_s, debug
|
|
|
|
rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=hop_length)[0]
|
|
onset = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
|
|
n = min(len(rms), len(onset))
|
|
rms, onset = rms[:n], onset[:n]
|
|
|
|
score = 0.35 * zscore(rms) + 0.65 * zscore(onset)
|
|
score = np.maximum(score, 0.0)
|
|
|
|
clip_frames = max(1, int(round((clip_s * sr) / hop_length)))
|
|
t_seconds = (np.arange(n) * hop_length) / sr
|
|
|
|
valid = (t_seconds >= avoid_intro_s) & (t_seconds <= (duration - avoid_outro_s - clip_s))
|
|
valid_idxs = np.where(valid)[0]
|
|
if len(valid_idxs) == 0:
|
|
start = max(0.0, (duration - clip_s) / 2.0)
|
|
debug["reason"] = "no_valid_window_center"
|
|
return start, clip_s, debug
|
|
|
|
window = np.ones(clip_frames, dtype=np.float32)
|
|
summed = np.convolve(score, window, mode="same")
|
|
|
|
best_idx = int(valid_idxs[np.argmax(summed[valid_idxs])])
|
|
center_t = float(t_seconds[best_idx])
|
|
start_t = center_t - (clip_s / 2.0)
|
|
start_t = float(max(avoid_intro_s, min(start_t, duration - avoid_outro_s - clip_s)))
|
|
|
|
debug.update({
|
|
"best_center_seconds": center_t,
|
|
"approx_start_seconds": start_t,
|
|
"clip_frames": int(clip_frames),
|
|
})
|
|
return start_t, clip_s, debug
|
|
|
|
|
|
def bars_to_seconds(tempo_bpm: float, bars: int, beats_per_bar: int) -> float:
|
|
beats = bars * beats_per_bar
|
|
return (60.0 / max(1e-6, tempo_bpm)) * beats
|
|
|
|
|
|
def snap_to_bars(y: np.ndarray, sr: int, approx_start: float, bars: int, beats_per_bar: int = 4) -> Tuple[float, float]:
|
|
"""
|
|
Returns: (snapped_start_seconds, tempo_bpm)
|
|
"""
|
|
try:
|
|
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
|
|
tempo = float(tempo)
|
|
if beat_frames is None or len(beat_frames) < 8:
|
|
return approx_start, tempo
|
|
|
|
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
|
i = int(np.argmin(np.abs(beat_times - approx_start)))
|
|
|
|
grid = max(1, bars * beats_per_bar) # beats per bar-grid chunk
|
|
snapped_i = int(round(i / grid) * grid)
|
|
snapped_i = max(0, min(snapped_i, len(beat_times) - 1))
|
|
snapped_t = float(beat_times[snapped_i])
|
|
|
|
# keep snapping reasonable
|
|
if abs(snapped_t - approx_start) <= 2.0:
|
|
return snapped_t, tempo
|
|
return approx_start, tempo
|
|
except Exception:
|
|
return approx_start, 0.0
|
|
|
|
|
|
def render_clip(
|
|
in_wav: Path,
|
|
out_path: Path,
|
|
start: float,
|
|
dur: float,
|
|
fade_s: float,
|
|
target_lufs: float
|
|
) -> None:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
af = (
|
|
f"atrim=start={start}:duration={dur},"
|
|
f"afade=t=in:st=0:d={fade_s},"
|
|
f"afade=t=out:st={max(0.0, dur - fade_s)}:d={fade_s},"
|
|
f"loudnorm=I={target_lufs}:TP=-1.5:LRA=11"
|
|
)
|
|
|
|
run([
|
|
"ffmpeg", "-y",
|
|
"-i", str(in_wav),
|
|
"-vn",
|
|
"-af", af,
|
|
str(out_path),
|
|
])
|
|
|
|
|
|
def build_acrossfade_chain(clips: List[Path], out_path: Path, crossfade_s: float) -> None:
|
|
if len(clips) == 1:
|
|
shutil.copyfile(clips[0], out_path)
|
|
return
|
|
|
|
cmd = ["ffmpeg", "-y"]
|
|
for c in clips:
|
|
cmd += ["-i", str(c)]
|
|
|
|
filter_parts = []
|
|
last = "[0:a]"
|
|
for i in range(1, len(clips)):
|
|
nxt = f"[{i}:a]"
|
|
out = f"[a{i}]"
|
|
filter_parts.append(f"{last}{nxt}acrossfade=d={crossfade_s}:c1=tri:c2=tri{out}")
|
|
last = out
|
|
|
|
cmd += [
|
|
"-filter_complex", ";".join(filter_parts),
|
|
"-map", last,
|
|
str(out_path),
|
|
]
|
|
run(cmd)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Local DJ Teaser Builder (Python + FFmpeg)")
|
|
parser.add_argument("--tracks-dir", default="./tracks", help="Folder containing audio tracks")
|
|
parser.add_argument("--work-dir", default="./work", help="Temp working folder")
|
|
parser.add_argument("--out-dir", default="./out", help="Output folder")
|
|
parser.add_argument("--max-tracks", type=int, default=20, help="Max tracks to scan from folder (default: 20)")
|
|
|
|
parser.add_argument("--select", default="all", help='Track selection: "all", "1,2,5", "1-4,7" (1-based)')
|
|
parser.add_argument("--mode", choices=["rollcall", "bestof"], default="rollcall", help="Teaser style")
|
|
parser.add_argument("--teaser", type=float, default=60.0, help="Final teaser length in seconds")
|
|
parser.add_argument("--bars", type=int, default=2, help="Bars per track clip (DJ phrasing). rollcall=2 typical")
|
|
parser.add_argument("--bpb", type=int, default=4, help="Beats per bar (4 for trance)")
|
|
|
|
parser.add_argument("--crossfade", type=float, default=0.25, help="Acrossfade duration in seconds")
|
|
parser.add_argument("--avoid-intro", type=float, default=30.0, help="Skip intro seconds when searching highlights")
|
|
parser.add_argument("--avoid-outro", type=float, default=20.0, help="Skip outro seconds when searching highlights")
|
|
|
|
parser.add_argument("--target-lufs", type=float, default=-14.0, help="Loudness target LUFS (approx)")
|
|
parser.add_argument("--output", default="album_teaser.wav", help="Output teaser filename")
|
|
|
|
args = parser.parse_args()
|
|
ensure_ffmpeg()
|
|
|
|
cfg = Config(
|
|
tracks_dir=Path(args.tracks_dir),
|
|
work_dir=Path(args.work_dir),
|
|
out_dir=Path(args.out_dir),
|
|
output_name=args.output,
|
|
max_tracks=args.max_tracks,
|
|
teaser_seconds=args.teaser,
|
|
crossfade_seconds=args.crossfade,
|
|
avoid_intro_seconds=args.avoid_intro,
|
|
avoid_outro_seconds=args.avoid_outro,
|
|
mode=args.mode,
|
|
bars_per_track=args.bars,
|
|
beats_per_bar=args.bpb,
|
|
target_lufs=args.target_lufs,
|
|
)
|
|
|
|
cfg.out_dir.mkdir(parents=True, exist_ok=True)
|
|
cfg.work_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
tracks = list_tracks(cfg.tracks_dir, cfg.max_tracks)
|
|
if not tracks:
|
|
raise SystemExit(f"No audio tracks found in: {cfg.tracks_dir.resolve()}")
|
|
|
|
# Print discovered tracks (nice for Git usage)
|
|
print("\nDiscovered tracks:")
|
|
for i, t in enumerate(tracks, start=1):
|
|
print(f" {i:02d}. {t.name}")
|
|
|
|
selected_idxs = parse_selection(args.select, len(tracks))
|
|
selected_tracks = [tracks[i] for i in selected_idxs]
|
|
|
|
print("\nSelected tracks:")
|
|
for i, t in zip(selected_idxs, selected_tracks):
|
|
print(f" {i+1:02d}. {t.name}")
|
|
|
|
n = len(selected_tracks)
|
|
teaser_s = float(cfg.teaser_seconds)
|
|
cf = float(cfg.crossfade_seconds)
|
|
|
|
# Total playtime math with acrossfades:
|
|
# final_length = sum(durs) - (n-1)*cf => sum(durs) = teaser + (n-1)*cf
|
|
# We use avg_dur to clamp bar-based clip duration.
|
|
avg_dur = (teaser_s + (n - 1) * cf) / max(1, n)
|
|
|
|
clips: List[Path] = []
|
|
report = {
|
|
"config": {
|
|
"mode": cfg.mode,
|
|
"teaser_seconds": teaser_s,
|
|
"crossfade_seconds": cf,
|
|
"bars_per_track": cfg.bars_per_track,
|
|
"beats_per_bar": cfg.beats_per_bar,
|
|
"avoid_intro_seconds": cfg.avoid_intro_seconds,
|
|
"avoid_outro_seconds": cfg.avoid_outro_seconds,
|
|
"target_lufs": cfg.target_lufs,
|
|
"avg_clip_seconds_target": avg_dur,
|
|
},
|
|
"tracks": []
|
|
}
|
|
|
|
for idx, track in enumerate(selected_tracks, start=1):
|
|
tmp_wav = cfg.work_dir / f"track_{idx:02d}.wav"
|
|
ffmpeg_to_wav(track, tmp_wav, cfg.analysis_sr)
|
|
|
|
y, sr = librosa.load(tmp_wav, sr=cfg.analysis_sr, mono=True)
|
|
|
|
# 1) pick approximate highlight
|
|
approx_start, _, debug = pick_highlight_segment(
|
|
y=y,
|
|
sr=sr,
|
|
hop_length=cfg.hop_length,
|
|
clip_s=max(4.0, min(8.0, avg_dur)), # search window size
|
|
avoid_intro_s=cfg.avoid_intro_seconds,
|
|
avoid_outro_s=cfg.avoid_outro_seconds
|
|
)
|
|
|
|
# 2) snap to bar grid (DJ phrasing) + compute tempo
|
|
snapped_start, tempo = snap_to_bars(
|
|
y=y, sr=sr,
|
|
approx_start=approx_start,
|
|
bars=cfg.bars_per_track,
|
|
beats_per_bar=cfg.beats_per_bar
|
|
)
|
|
|
|
# 3) derive duration from bars at detected tempo
|
|
# If tempo fails (0), fall back to avg_dur.
|
|
if tempo and tempo > 1.0:
|
|
dur = bars_to_seconds(tempo, cfg.bars_per_track, cfg.beats_per_bar)
|
|
else:
|
|
dur = avg_dur
|
|
|
|
# clamp duration so total stays in bounds
|
|
dur = float(np.clip(dur, 2.5, avg_dur))
|
|
|
|
clip_out = cfg.work_dir / f"clip_{idx:02d}.wav"
|
|
render_clip(
|
|
in_wav=tmp_wav,
|
|
out_path=clip_out,
|
|
start=snapped_start,
|
|
dur=dur,
|
|
fade_s=cfg.fade_seconds,
|
|
target_lufs=cfg.target_lufs
|
|
)
|
|
|
|
clips.append(clip_out)
|
|
|
|
report["tracks"].append({
|
|
"index_in_folder": int(selected_idxs[idx - 1] + 1),
|
|
"filename": track.name,
|
|
"tempo_bpm_est": round(float(tempo), 2),
|
|
"start_seconds": round(float(snapped_start), 3),
|
|
"duration_seconds": round(float(dur), 3),
|
|
"debug": debug,
|
|
})
|
|
|
|
teaser_path = cfg.out_dir / cfg.output_name
|
|
build_acrossfade_chain(clips, teaser_path, cfg.crossfade_seconds)
|
|
|
|
report_path = cfg.out_dir / "teaser_report.json"
|
|
with open(report_path, "w", encoding="utf-8") as f:
|
|
json.dump(report, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"\n✅ Teaser created: {teaser_path.resolve()}")
|
|
print(f"📝 Report written: {report_path.resolve()}\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|