First Upload
All Files
This commit is contained in:
644
V_3/dj_teaser.py
Normal file
644
V_3/dj_teaser.py
Normal file
@@ -0,0 +1,644 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DJ Teaser Builder v3 (local, offline-friendly)
|
||||
Adds:
|
||||
- Key detection (Krumhansl-Schmuckler on chroma) + Camelot mapping
|
||||
- Harmonic ordering (Camelot adjacent keys) + tempo clustering + energy ramp
|
||||
- Downbeat-ish snap (bar start scoring) on top of beat grid
|
||||
- 2-pass EBU R128 loudnorm per clip for consistent loudness
|
||||
- Exports WAV + MP3 + report JSON
|
||||
|
||||
Requirements:
|
||||
- ffmpeg in PATH
|
||||
- pip install numpy librosa soundfile requests (requests only needed if you use Ollama)
|
||||
|
||||
Examples:
|
||||
python dj_teaser_v3.py --tracks-dir ./tracks --select all --teaser 60 --bars 2 --preroll-bars 1
|
||||
python dj_teaser_v3.py --tracks-dir ./tracks --select auto --auto-n 8 --teaser 75 --bars 4 --harmonic
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional, Dict
|
||||
|
||||
import numpy as np
|
||||
import librosa
|
||||
|
||||
AUDIO_EXTS = {".wav", ".mp3", ".flac", ".m4a", ".aiff", ".aac", ".ogg", ".opus"}
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# Key profiles (Krumhansl)
|
||||
# ---------------------------
|
||||
KRUMHANSL_MAJOR = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88], dtype=np.float32)
|
||||
KRUMHANSL_MINOR = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17], dtype=np.float32)
|
||||
PITCHES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
||||
|
||||
# Camelot mappings (simplified)
|
||||
# We map major keys to "B" and minor keys to "A" numbers.
|
||||
# Common Camelot wheel:
|
||||
# 8B = C Major, 5A = C Minor, etc.
|
||||
# We'll use a standard mapping table for pitch class -> camelot number.
|
||||
CAMELOT_MAJOR = {"C": "8B", "G": "9B", "D": "10B", "A": "11B", "E": "12B", "B": "1B", "F#": "2B", "C#": "3B", "G#": "4B", "D#": "5B", "A#": "6B", "F": "7B"}
|
||||
CAMELOT_MINOR = {"A": "8A", "E": "9A", "B": "10A", "F#": "11A", "C#": "12A", "G#": "1A", "D#": "2A", "A#": "3A", "F": "4A", "C": "5A", "G": "6A", "D": "7A"}
|
||||
|
||||
def run(cmd: List[str]) -> Tuple[str, str]:
|
||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
if p.returncode != 0:
|
||||
raise RuntimeError(f"Command failed:\n{' '.join(cmd)}\n\nSTDERR:\n{p.stderr}")
|
||||
return p.stdout, p.stderr
|
||||
|
||||
|
||||
def ensure_ffmpeg() -> None:
|
||||
if shutil.which("ffmpeg") is None:
|
||||
raise RuntimeError("ffmpeg not found in PATH. Install ffmpeg and try again.")
|
||||
|
||||
|
||||
def list_tracks(tracks_dir: Path, max_tracks: int) -> List[Path]:
|
||||
files = [p for p in sorted(tracks_dir.iterdir()) if p.is_file() and p.suffix.lower() in AUDIO_EXTS]
|
||||
return files[:max_tracks]
|
||||
|
||||
|
||||
def parse_selection(selection: str, num_tracks: int) -> List[int]:
|
||||
s = selection.strip().lower()
|
||||
if s in {"all", "auto"}:
|
||||
return list(range(num_tracks))
|
||||
|
||||
parts = [p.strip() for p in s.split(",") if p.strip()]
|
||||
out: List[int] = []
|
||||
|
||||
for part in parts:
|
||||
if "-" in part:
|
||||
a, b = part.split("-", 1)
|
||||
a_i = int(a) - 1
|
||||
b_i = int(b) - 1
|
||||
if a_i > b_i:
|
||||
a_i, b_i = b_i, a_i
|
||||
out.extend(list(range(a_i, b_i + 1)))
|
||||
else:
|
||||
out.append(int(part) - 1)
|
||||
|
||||
seen = set()
|
||||
filtered = []
|
||||
for i in out:
|
||||
if 0 <= i < num_tracks and i not in seen:
|
||||
seen.add(i)
|
||||
filtered.append(i)
|
||||
|
||||
if not filtered:
|
||||
raise ValueError("Selection resulted in an empty track list. Check --select.")
|
||||
return filtered
|
||||
|
||||
|
||||
def ffmpeg_to_wav(in_path: Path, out_wav: Path, sr: int) -> None:
|
||||
out_wav.parent.mkdir(parents=True, exist_ok=True)
|
||||
run([
|
||||
"ffmpeg", "-y",
|
||||
"-i", str(in_path),
|
||||
"-vn",
|
||||
"-ac", "2",
|
||||
"-ar", str(sr),
|
||||
"-f", "wav",
|
||||
str(out_wav),
|
||||
])
|
||||
|
||||
|
||||
def zscore(x: np.ndarray) -> np.ndarray:
|
||||
x = np.asarray(x, dtype=np.float32)
|
||||
mu = float(np.mean(x))
|
||||
sd = float(np.std(x) + 1e-9)
|
||||
return (x - mu) / sd
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrackInfo:
|
||||
path: Path
|
||||
folder_index: int # 1-based
|
||||
duration_s: float
|
||||
tempo_bpm: float
|
||||
energy_score: float
|
||||
highlight_score: float
|
||||
approx_start_s: float
|
||||
snapped_start_s: float
|
||||
clip_dur_s: float
|
||||
key_name: str
|
||||
camelot: str
|
||||
|
||||
|
||||
def compute_score(y: np.ndarray, sr: int, hop_length: int) -> np.ndarray:
|
||||
rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=hop_length)[0]
|
||||
onset = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
|
||||
n = min(len(rms), len(onset))
|
||||
rms, onset = rms[:n], onset[:n]
|
||||
score = 0.35 * zscore(rms) + 0.65 * zscore(onset)
|
||||
return np.maximum(score, 0.0)
|
||||
|
||||
|
||||
def pick_highlight_start(score: np.ndarray, sr: int, hop_length: int,
|
||||
clip_s: float, avoid_intro_s: float, avoid_outro_s: float, duration_s: float) -> Tuple[float, float]:
|
||||
if duration_s <= (avoid_intro_s + avoid_outro_s + clip_s + 1.0):
|
||||
return max(0.0, (duration_s - clip_s) / 2.0), float(np.sum(score))
|
||||
|
||||
n = len(score)
|
||||
clip_frames = max(1, int(round((clip_s * sr) / hop_length)))
|
||||
t_seconds = (np.arange(n) * hop_length) / sr
|
||||
|
||||
valid = (t_seconds >= avoid_intro_s) & (t_seconds <= (duration_s - avoid_outro_s - clip_s))
|
||||
valid_idxs = np.where(valid)[0]
|
||||
if len(valid_idxs) == 0:
|
||||
return max(0.0, (duration_s - clip_s) / 2.0), float(np.sum(score))
|
||||
|
||||
window = np.ones(clip_frames, dtype=np.float32)
|
||||
summed = np.convolve(score, window, mode="same")
|
||||
|
||||
best_idx = int(valid_idxs[np.argmax(summed[valid_idxs])])
|
||||
center_t = float(t_seconds[best_idx])
|
||||
start_t = center_t - (clip_s / 2.0)
|
||||
start_t = float(max(avoid_intro_s, min(start_t, duration_s - avoid_outro_s - clip_s)))
|
||||
return start_t, float(summed[best_idx])
|
||||
|
||||
|
||||
def estimate_key(y: np.ndarray, sr: int) -> Tuple[str, str, float]:
|
||||
"""
|
||||
Krumhansl-Schmuckler key estimation using average chroma.
|
||||
Returns (key_name, camelot, confidence)
|
||||
"""
|
||||
# Use harmonic component for more stable key
|
||||
yh = librosa.effects.harmonic(y)
|
||||
chroma = librosa.feature.chroma_cqt(y=yh, sr=sr)
|
||||
chroma_mean = np.mean(chroma, axis=1)
|
||||
chroma_mean /= (np.sum(chroma_mean) + 1e-9)
|
||||
|
||||
def corr_profile(profile):
|
||||
# rotate profile for each tonic
|
||||
corrs = []
|
||||
for shift in range(12):
|
||||
prof = np.roll(profile, shift)
|
||||
corrs.append(np.corrcoef(chroma_mean, prof)[0, 1])
|
||||
return np.array(corrs, dtype=np.float32)
|
||||
|
||||
major_corr = corr_profile(KRUMHANSL_MAJOR)
|
||||
minor_corr = corr_profile(KRUMHANSL_MINOR)
|
||||
|
||||
best_major = int(np.argmax(major_corr))
|
||||
best_minor = int(np.argmax(minor_corr))
|
||||
maj_val = float(major_corr[best_major])
|
||||
min_val = float(minor_corr[best_minor])
|
||||
|
||||
if maj_val >= min_val:
|
||||
tonic = PITCHES[best_major]
|
||||
key_name = f"{tonic} Major"
|
||||
camelot = CAMELOT_MAJOR.get(tonic, "")
|
||||
conf = maj_val
|
||||
else:
|
||||
tonic = PITCHES[best_minor]
|
||||
key_name = f"{tonic} Minor"
|
||||
camelot = CAMELOT_MINOR.get(tonic, "")
|
||||
conf = min_val
|
||||
|
||||
if not camelot:
|
||||
camelot = "??"
|
||||
|
||||
return key_name, camelot, conf
|
||||
|
||||
|
||||
def bars_to_seconds(tempo_bpm: float, bars: int, beats_per_bar: int) -> float:
|
||||
beats = bars * beats_per_bar
|
||||
return (60.0 / max(1e-6, tempo_bpm)) * beats
|
||||
|
||||
|
||||
def snap_to_downbeat_like(y: np.ndarray, sr: int, approx_start: float, bars: int, beats_per_bar: int,
|
||||
onset_weight: float = 1.0) -> Tuple[float, float, Optional[np.ndarray]]:
|
||||
"""
|
||||
"Downbeat-ish" snap:
|
||||
- get beat_times
|
||||
- build a bar-grid (every beats_per_bar beats)
|
||||
- score each bar start around approx_start by local onset strength
|
||||
- pick best bar start near approx_start
|
||||
Returns (snapped_start, tempo, beat_times)
|
||||
"""
|
||||
try:
|
||||
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
|
||||
tempo = float(tempo)
|
||||
if beat_frames is None or len(beat_frames) < (beats_per_bar * 4):
|
||||
return approx_start, tempo, None
|
||||
|
||||
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
||||
|
||||
# onset envelope for scoring
|
||||
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
||||
onset_t = librosa.times_like(onset_env, sr=sr)
|
||||
|
||||
# candidate bar starts near approx_start
|
||||
# bar start indices in beat grid
|
||||
bar_stride = beats_per_bar
|
||||
bar_idxs = np.arange(0, len(beat_times), bar_stride)
|
||||
|
||||
# focus region +/- 8 seconds around approx_start
|
||||
region = []
|
||||
for bi in bar_idxs:
|
||||
t0 = float(beat_times[bi])
|
||||
if abs(t0 - approx_start) <= 8.0:
|
||||
region.append(bi)
|
||||
if not region:
|
||||
# fallback: nearest bar
|
||||
nearest = int(bar_idxs[np.argmin(np.abs(beat_times[bar_idxs] - approx_start))])
|
||||
return float(beat_times[nearest]), tempo, beat_times
|
||||
|
||||
# score each candidate bar start by onset energy in small window after it
|
||||
best_bi = region[0]
|
||||
best_val = -1.0
|
||||
for bi in region:
|
||||
t0 = float(beat_times[bi])
|
||||
# window: first ~0.35s after bar start (kick/transient)
|
||||
mask = (onset_t >= t0) & (onset_t <= (t0 + 0.35))
|
||||
val = float(np.mean(onset_env[mask])) if np.any(mask) else 0.0
|
||||
|
||||
# also prefer closeness to approx_start
|
||||
closeness = 1.0 - min(1.0, abs(t0 - approx_start) / 8.0)
|
||||
val = onset_weight * val + 0.25 * closeness
|
||||
|
||||
if val > best_val:
|
||||
best_val = val
|
||||
best_bi = bi
|
||||
|
||||
snapped = float(beat_times[best_bi])
|
||||
|
||||
# additionally snap to bar-grid chunk size (bars) for phrase alignment
|
||||
# i.e. every (bars * beats_per_bar) beats
|
||||
chunk = max(1, bars * beats_per_bar)
|
||||
# convert best_bi (beat index) into chunk-aligned beat index
|
||||
chunk_bi = int(round(best_bi / chunk) * chunk)
|
||||
chunk_bi = max(0, min(chunk_bi, len(beat_times) - 1))
|
||||
snapped2 = float(beat_times[chunk_bi])
|
||||
|
||||
# keep in sane range
|
||||
if abs(snapped2 - approx_start) <= 2.5:
|
||||
return snapped2, tempo, beat_times
|
||||
return snapped, tempo, beat_times
|
||||
|
||||
except Exception:
|
||||
return approx_start, 0.0, None
|
||||
|
||||
|
||||
def apply_preroll(snapped_start: float, beat_times: Optional[np.ndarray], preroll_bars: int, beats_per_bar: int) -> float:
|
||||
if preroll_bars <= 0:
|
||||
return snapped_start
|
||||
if beat_times is None or len(beat_times) < (preroll_bars * beats_per_bar + 2):
|
||||
return max(0.0, snapped_start - preroll_bars * 2.0)
|
||||
|
||||
i = int(np.argmin(np.abs(beat_times - snapped_start)))
|
||||
back_beats = preroll_bars * beats_per_bar
|
||||
j = max(0, i - back_beats)
|
||||
return float(beat_times[j])
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# 2-pass loudnorm helpers
|
||||
# ---------------------------
|
||||
def loudnorm_2pass_filter(infile: Path, start: float, dur: float, fade_s: float, target_lufs: float) -> str:
|
||||
"""
|
||||
Build a 2-pass loudnorm filter for a trimmed segment.
|
||||
Pass1: measure JSON from ffmpeg stderr
|
||||
Pass2: apply measured params
|
||||
"""
|
||||
# pass1 measure
|
||||
pre = f"atrim=start={start}:duration={dur},afade=t=in:st=0:d={fade_s},afade=t=out:st={max(0.0, dur - fade_s)}:d={fade_s}"
|
||||
measure = f"{pre},loudnorm=I={target_lufs}:TP=-1.5:LRA=11:print_format=json"
|
||||
_, err = run(["ffmpeg", "-y", "-i", str(infile), "-vn", "-af", measure, "-f", "null", "-"])
|
||||
# extract the last JSON object from stderr
|
||||
jtxt = err[err.rfind("{") : err.rfind("}") + 1]
|
||||
data = json.loads(jtxt)
|
||||
|
||||
# pass2 apply
|
||||
# Use measured values
|
||||
applied = (
|
||||
f"{pre},loudnorm=I={target_lufs}:TP=-1.5:LRA=11:"
|
||||
f"measured_I={data['input_i']}:measured_TP={data['input_tp']}:measured_LRA={data['input_lra']}:"
|
||||
f"measured_thresh={data['input_thresh']}:offset={data['target_offset']}:linear=true:print_format=summary"
|
||||
)
|
||||
return applied
|
||||
|
||||
|
||||
def render_clip_2pass(in_wav: Path, out_path: Path, start: float, dur: float, fade_s: float, target_lufs: float) -> None:
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
af2 = loudnorm_2pass_filter(in_wav, start, dur, fade_s, target_lufs)
|
||||
run(["ffmpeg", "-y", "-i", str(in_wav), "-vn", "-af", af2, str(out_path)])
|
||||
|
||||
|
||||
def build_acrossfade_chain(clips: List[Path], out_wav: Path, crossfade_s: float) -> None:
|
||||
if len(clips) == 1:
|
||||
shutil.copyfile(clips[0], out_wav)
|
||||
return
|
||||
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
for c in clips:
|
||||
cmd += ["-i", str(c)]
|
||||
|
||||
filter_parts = []
|
||||
last = "[0:a]"
|
||||
for i in range(1, len(clips)):
|
||||
nxt = f"[{i}:a]"
|
||||
out = f"[a{i}]"
|
||||
filter_parts.append(f"{last}{nxt}acrossfade=d={crossfade_s}:c1=tri:c2=tri{out}")
|
||||
last = out
|
||||
|
||||
cmd += ["-filter_complex", ";".join(filter_parts), "-map", last, str(out_wav)]
|
||||
run(cmd)
|
||||
|
||||
|
||||
def export_mp3(in_wav: Path, out_mp3: Path, bitrate: str) -> None:
|
||||
out_mp3.parent.mkdir(parents=True, exist_ok=True)
|
||||
run(["ffmpeg", "-y", "-i", str(in_wav), "-vn", "-codec:a", "libmp3lame", "-b:a", bitrate, str(out_mp3)])
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# Harmonic / DJ ordering
|
||||
# ---------------------------
|
||||
def camelot_neighbors(c: str) -> List[str]:
|
||||
"""
|
||||
Camelot adjacency: same number A<->B, +/-1 same letter.
|
||||
Example: 8A neighbors -> 8B, 7A, 9A
|
||||
"""
|
||||
if len(c) != 2 and len(c) != 3:
|
||||
return []
|
||||
# handle 10A/11B/12A
|
||||
num = int(c[:-1])
|
||||
letter = c[-1].upper()
|
||||
def wrap(n):
|
||||
return 12 if n == 0 else (1 if n == 13 else n)
|
||||
neigh = []
|
||||
neigh.append(f"{num}{'A' if letter=='B' else 'B'}")
|
||||
neigh.append(f"{wrap(num-1)}{letter}")
|
||||
neigh.append(f"{wrap(num+1)}{letter}")
|
||||
return neigh
|
||||
|
||||
|
||||
def harmonic_path_order(infos: List[TrackInfo]) -> List[TrackInfo]:
|
||||
"""
|
||||
Greedy harmonic chaining:
|
||||
start from a low-energy track, then pick next that is Camelot-neighbor if possible,
|
||||
otherwise fall back to closest tempo + energy.
|
||||
"""
|
||||
if not infos:
|
||||
return []
|
||||
remaining = infos[:]
|
||||
remaining.sort(key=lambda t: t.energy_score) # start calm
|
||||
ordered = [remaining.pop(0)]
|
||||
|
||||
while remaining:
|
||||
cur = ordered[-1]
|
||||
neigh = set(camelot_neighbors(cur.camelot))
|
||||
# prefer harmonic neighbors
|
||||
candidates = [t for t in remaining if t.camelot in neigh]
|
||||
if not candidates:
|
||||
candidates = remaining
|
||||
|
||||
# pick best candidate by (tempo closeness, energy slightly higher)
|
||||
def keyfn(t: TrackInfo):
|
||||
tempo_pen = abs((t.tempo_bpm or 0) - (cur.tempo_bpm or 0))
|
||||
energy_pen = max(0.0, cur.energy_score - t.energy_score) # prefer rising energy
|
||||
return (tempo_pen, energy_pen, -t.energy_score)
|
||||
|
||||
pick = min(candidates, key=keyfn)
|
||||
remaining.remove(pick)
|
||||
ordered.append(pick)
|
||||
|
||||
return ordered
|
||||
|
||||
|
||||
def tempo_cluster_energy_ramp(infos: List[TrackInfo], tempo_tol: float) -> List[TrackInfo]:
|
||||
infos_sorted = sorted(infos, key=lambda t: (t.tempo_bpm if t.tempo_bpm > 0 else 1e9))
|
||||
clusters: List[List[TrackInfo]] = []
|
||||
for t in infos_sorted:
|
||||
placed = False
|
||||
for c in clusters:
|
||||
tempos = [x.tempo_bpm for x in c if x.tempo_bpm > 0]
|
||||
med = float(np.median(tempos)) if tempos else t.tempo_bpm
|
||||
if t.tempo_bpm > 0 and abs(t.tempo_bpm - med) <= tempo_tol:
|
||||
c.append(t)
|
||||
placed = True
|
||||
break
|
||||
if not placed:
|
||||
clusters.append([t])
|
||||
|
||||
for c in clusters:
|
||||
c.sort(key=lambda x: x.energy_score)
|
||||
|
||||
def ckey(c):
|
||||
tempos = [x.tempo_bpm for x in c if x.tempo_bpm > 0]
|
||||
med_t = float(np.median(tempos)) if tempos else 9999.0
|
||||
med_e = float(np.median([x.energy_score for x in c]))
|
||||
return (med_t, med_e)
|
||||
|
||||
clusters.sort(key=ckey)
|
||||
return [t for c in clusters for t in c]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Local DJ Teaser Builder v3")
|
||||
parser.add_argument("--tracks-dir", default="./tracks")
|
||||
parser.add_argument("--work-dir", default="./work")
|
||||
parser.add_argument("--out-dir", default="./out")
|
||||
parser.add_argument("--max-tracks", type=int, default=20)
|
||||
|
||||
parser.add_argument("--select", default="all", help='all | auto | "1,2,7" | "1-4,9"')
|
||||
parser.add_argument("--auto-n", type=int, default=8, help="when --select auto: keep N best tracks")
|
||||
|
||||
parser.add_argument("--teaser", type=float, default=60.0)
|
||||
parser.add_argument("--bars", type=int, default=2)
|
||||
parser.add_argument("--bpb", type=int, default=4)
|
||||
parser.add_argument("--preroll-bars", type=int, default=1)
|
||||
|
||||
parser.add_argument("--crossfade", type=float, default=0.25)
|
||||
parser.add_argument("--fade", type=float, default=0.08)
|
||||
parser.add_argument("--avoid-intro", type=float, default=30.0)
|
||||
parser.add_argument("--avoid-outro", type=float, default=20.0)
|
||||
|
||||
parser.add_argument("--tempo-tol", type=float, default=4.0)
|
||||
parser.add_argument("--target-lufs", type=float, default=-14.0)
|
||||
parser.add_argument("--output-wav", default="album_teaser.wav")
|
||||
parser.add_argument("--output-mp3", default="album_teaser.mp3")
|
||||
parser.add_argument("--mp3-bitrate", default="320k")
|
||||
|
||||
parser.add_argument("--harmonic", action="store_true", help="Enable Camelot harmonic ordering (recommended for trance)")
|
||||
|
||||
args = parser.parse_args()
|
||||
ensure_ffmpeg()
|
||||
|
||||
tracks_dir = Path(args.tracks_dir)
|
||||
work_dir = Path(args.work_dir)
|
||||
out_dir = Path(args.out_dir)
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
tracks = list_tracks(tracks_dir, args.max_tracks)
|
||||
if not tracks:
|
||||
raise SystemExit(f"No tracks found in {tracks_dir.resolve()}")
|
||||
|
||||
print("\nDiscovered tracks:")
|
||||
for i, t in enumerate(tracks, start=1):
|
||||
print(f" {i:02d}. {t.name}")
|
||||
|
||||
selected_idxs = parse_selection(args.select, len(tracks))
|
||||
selected_tracks = [tracks[i] for i in selected_idxs]
|
||||
|
||||
n = len(selected_tracks)
|
||||
teaser_s = float(args.teaser)
|
||||
cf = float(args.crossfade)
|
||||
avg_dur = (teaser_s + (n - 1) * cf) / max(1, n)
|
||||
|
||||
infos: List[TrackInfo] = []
|
||||
|
||||
for local_idx, track in enumerate(selected_tracks, start=1):
|
||||
tmp_wav = work_dir / f"src_{local_idx:02d}.wav"
|
||||
ffmpeg_to_wav(track, tmp_wav, sr=22050)
|
||||
|
||||
y, sr = librosa.load(tmp_wav, sr=22050, mono=True)
|
||||
duration_s = float(len(y) / sr)
|
||||
|
||||
score = compute_score(y, sr, hop_length=512)
|
||||
# robust energy score
|
||||
q = np.quantile(score, 0.90) if len(score) else 0.0
|
||||
energy_score = float(np.mean(score[score >= q])) if np.any(score >= q) else float(np.mean(score) if len(score) else 0.0)
|
||||
|
||||
search_clip = float(np.clip(avg_dur, 4.0, 12.0))
|
||||
approx_start, highlight_score = pick_highlight_start(
|
||||
score=score,
|
||||
sr=sr,
|
||||
hop_length=512,
|
||||
clip_s=search_clip,
|
||||
avoid_intro_s=float(args.avoid_intro),
|
||||
avoid_outro_s=float(args.avoid_outro),
|
||||
duration_s=duration_s
|
||||
)
|
||||
|
||||
snapped_start, tempo, beat_times = snap_to_downbeat_like(
|
||||
y=y, sr=sr,
|
||||
approx_start=approx_start,
|
||||
bars=int(args.bars),
|
||||
beats_per_bar=int(args.bpb)
|
||||
)
|
||||
|
||||
snapped_start = apply_preroll(snapped_start, beat_times, int(args.preroll_bars), int(args.bpb))
|
||||
|
||||
if tempo and tempo > 1.0:
|
||||
dur = bars_to_seconds(tempo, int(args.bars), int(args.bpb))
|
||||
else:
|
||||
dur = avg_dur
|
||||
dur = float(np.clip(dur, 2.5, avg_dur))
|
||||
|
||||
key_name, camelot, conf = estimate_key(y, sr)
|
||||
|
||||
infos.append(TrackInfo(
|
||||
path=track,
|
||||
folder_index=int(selected_idxs[local_idx - 1] + 1),
|
||||
duration_s=duration_s,
|
||||
tempo_bpm=float(tempo),
|
||||
energy_score=energy_score,
|
||||
highlight_score=float(highlight_score),
|
||||
approx_start_s=float(approx_start),
|
||||
snapped_start_s=float(snapped_start),
|
||||
clip_dur_s=float(dur),
|
||||
key_name=key_name,
|
||||
camelot=camelot
|
||||
))
|
||||
|
||||
# Auto best-of
|
||||
if args.select.strip().lower() == "auto":
|
||||
auto_n = int(max(1, min(args.auto_n, len(infos))))
|
||||
infos.sort(key=lambda t: (t.highlight_score, t.energy_score), reverse=True)
|
||||
infos = infos[:auto_n]
|
||||
print(f"\nAuto-selected best-of: {auto_n} tracks.")
|
||||
|
||||
# Ordering
|
||||
if args.harmonic:
|
||||
# harmonic path, but keep tempo smooth-ish by pre-sorting with tempo clusters first
|
||||
pre = tempo_cluster_energy_ramp(infos, tempo_tol=float(args.tempo_tol))
|
||||
ordered = harmonic_path_order(pre)
|
||||
print("\nOrdering: harmonic (Camelot neighbors) + tempo/energy heuristics")
|
||||
else:
|
||||
ordered = tempo_cluster_energy_ramp(infos, tempo_tol=float(args.tempo_tol))
|
||||
print("\nOrdering: tempo clustering + energy ramp")
|
||||
|
||||
print("\nFinal clip order:")
|
||||
for i, t in enumerate(ordered, start=1):
|
||||
print(f" {i:02d}. [{t.tempo_bpm:6.1f} BPM] [{t.camelot:>3}] (E={t.energy_score:.3f}) {t.path.name}")
|
||||
|
||||
# Render clips (2-pass loudnorm)
|
||||
clip_paths: List[Path] = []
|
||||
report_tracks = []
|
||||
|
||||
for i, t in enumerate(ordered, start=1):
|
||||
src = work_dir / f"ord_{i:02d}.wav"
|
||||
ffmpeg_to_wav(t.path, src, sr=22050)
|
||||
|
||||
clip_out = work_dir / f"clip_{i:02d}.wav"
|
||||
render_clip_2pass(
|
||||
in_wav=src,
|
||||
out_path=clip_out,
|
||||
start=t.snapped_start_s,
|
||||
dur=t.clip_dur_s,
|
||||
fade_s=float(args.fade),
|
||||
target_lufs=float(args.target_lufs)
|
||||
)
|
||||
clip_paths.append(clip_out)
|
||||
|
||||
report_tracks.append({
|
||||
"folder_index": t.folder_index,
|
||||
"filename": t.path.name,
|
||||
"tempo_bpm_est": round(t.tempo_bpm, 2),
|
||||
"key": t.key_name,
|
||||
"camelot": t.camelot,
|
||||
"energy_score": round(t.energy_score, 6),
|
||||
"highlight_score": round(t.highlight_score, 6),
|
||||
"approx_start_seconds": round(t.approx_start_s, 3),
|
||||
"snapped_start_seconds": round(t.snapped_start_s, 3),
|
||||
"clip_duration_seconds": round(t.clip_dur_s, 3),
|
||||
})
|
||||
|
||||
out_wav = out_dir / args.output_wav
|
||||
out_mp3 = out_dir / args.output_mp3
|
||||
|
||||
build_acrossfade_chain(clip_paths, out_wav, crossfade_s=float(args.crossfade))
|
||||
export_mp3(out_wav, out_mp3, bitrate=str(args.mp3_bitrate))
|
||||
|
||||
report = {
|
||||
"version": "v3",
|
||||
"settings": {
|
||||
"teaser_seconds": float(args.teaser),
|
||||
"bars": int(args.bars),
|
||||
"beats_per_bar": int(args.bpb),
|
||||
"preroll_bars": int(args.preroll_bars),
|
||||
"harmonic": bool(args.harmonic),
|
||||
"tempo_tolerance_bpm": float(args.tempo_tol),
|
||||
"crossfade_seconds": float(args.crossfade),
|
||||
"fade_seconds": float(args.fade),
|
||||
"avoid_intro_seconds": float(args.avoid_intro),
|
||||
"avoid_outro_seconds": float(args.avoid_outro),
|
||||
"target_lufs": float(args.target_lufs),
|
||||
"mp3_bitrate": str(args.mp3_bitrate),
|
||||
},
|
||||
"outputs": {
|
||||
"wav": str(out_wav.resolve()),
|
||||
"mp3": str(out_mp3.resolve()),
|
||||
},
|
||||
"tracks": report_tracks
|
||||
}
|
||||
|
||||
report_path = out_dir / "teaser_report.json"
|
||||
with open(report_path, "w", encoding="utf-8") as f:
|
||||
json.dump(report, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n✅ Teaser WAV: {out_wav.resolve()}")
|
||||
print(f"✅ Teaser MP3: {out_mp3.resolve()}")
|
||||
print(f"📝 Report: {report_path.resolve()}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
V_3/ffmpeg.exe
Normal file
BIN
V_3/ffmpeg.exe
Normal file
Binary file not shown.
141
V_3/ollama_assets.py
Normal file
141
V_3/ollama_assets.py
Normal file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate repo assets (README, promo text, tracklist) using Ollama.
|
||||
|
||||
Input:
|
||||
- teaser_report.json from dj_teaser_v3.py
|
||||
|
||||
Output:
|
||||
- README.md
|
||||
- PROMO.txt
|
||||
- TRACKLIST.md
|
||||
|
||||
Requirements:
|
||||
pip install requests
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def ollama_generate(base_url: str, model: str, prompt: str) -> str:
|
||||
url = base_url.rstrip("/") + "/api/generate"
|
||||
payload = {"model": model, "prompt": prompt, "stream": False}
|
||||
r = requests.post(url, json=payload, timeout=120)
|
||||
r.raise_for_status()
|
||||
return r.json().get("response", "").strip()
|
||||
|
||||
|
||||
def format_timestamps(tracks: List[Dict[str, Any]], crossfade_seconds: float) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Approximate teaser timestamps by accumulating clip durations minus crossfades.
|
||||
timestamp[i] = sum(durs[0..i-1]) - i*crossfade
|
||||
"""
|
||||
out = []
|
||||
t = 0.0
|
||||
for i, tr in enumerate(tracks):
|
||||
out.append({
|
||||
**tr,
|
||||
"teaser_timestamp_seconds": round(t, 2)
|
||||
})
|
||||
dur = float(tr.get("clip_duration_seconds", 0.0))
|
||||
t += max(0.0, dur - crossfade_seconds)
|
||||
return out
|
||||
|
||||
|
||||
def seconds_to_mmss(sec: float) -> str:
|
||||
sec = max(0.0, float(sec))
|
||||
m = int(sec // 60)
|
||||
s = int(round(sec - (m * 60)))
|
||||
return f"{m:02d}:{s:02d}"
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Generate README/promo/tracklist via Ollama")
|
||||
ap.add_argument("--report", default="./out/teaser_report.json", help="Path to teaser_report.json")
|
||||
ap.add_argument("--out-dir", default="./out", help="Output directory for generated assets")
|
||||
ap.add_argument("--ollama", default="http://192.168.2.60:11434", help="Ollama base URL")
|
||||
ap.add_argument("--model", default="llama3.1:8b-instruct-q4_0", help="Ollama model name")
|
||||
ap.add_argument("--project-name", default="DJ Teaser Builder", help="Project/repo name")
|
||||
ap.add_argument("--artist", default="DjGulvBasS", help="Artist/DJ name")
|
||||
ap.add_argument("--genre", default="old school trance", help="Genre")
|
||||
args = ap.parse_args()
|
||||
|
||||
report_path = Path(args.report)
|
||||
out_dir = Path(args.out_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = json.loads(report_path.read_text(encoding="utf-8"))
|
||||
tracks = data.get("tracks", [])
|
||||
settings = data.get("settings", {})
|
||||
|
||||
crossfade = float(settings.get("crossfade_seconds", 0.25))
|
||||
tracks_ts = format_timestamps(tracks, crossfade_seconds=crossfade)
|
||||
|
||||
# Build TRACKLIST.md ourselves (deterministic)
|
||||
lines = [f"# Tracklist (approx.) — {args.artist}\n"]
|
||||
for tr in tracks_ts:
|
||||
ts = seconds_to_mmss(tr["teaser_timestamp_seconds"])
|
||||
fname = tr.get("filename", "Unknown")
|
||||
bpm = tr.get("tempo_bpm_est", "?")
|
||||
camelot = tr.get("camelot", "??")
|
||||
key = tr.get("key", "")
|
||||
lines.append(f"- **{ts}** — {fname} _(BPM ~ {bpm}, {camelot}, {key})_")
|
||||
(out_dir / "TRACKLIST.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
|
||||
# README prompt
|
||||
readme_prompt = f"""
|
||||
You are writing a GitHub README in English for a small local audio tool.
|
||||
|
||||
Project: {args.project_name}
|
||||
Artist use-case: {args.artist} — {args.genre}
|
||||
The tool scans a folder of tracks and builds a DJ-style teaser by:
|
||||
- detecting highlight segments
|
||||
- snapping cuts to bar grid (DJ phrasing)
|
||||
- optional harmonic ordering using Camelot keys
|
||||
- rendering clips and acrossfading them with FFmpeg
|
||||
- exporting WAV + MP3
|
||||
It produces a JSON report and a tracklist with timestamps.
|
||||
|
||||
Please write a README with these sections:
|
||||
1) What it does
|
||||
2) Requirements (ffmpeg + Python)
|
||||
3) Install (venv)
|
||||
4) Usage examples (include: select all, select by indices, auto best-of)
|
||||
5) Trance/DJ tips (avoid-intro, bars, preroll-bars, harmonic)
|
||||
6) Troubleshooting (ffmpeg not found, weird beat detection, key detection limitations)
|
||||
Keep it concise and practical.
|
||||
|
||||
These settings were used in an example run:
|
||||
{json.dumps(settings, indent=2)}
|
||||
|
||||
Do NOT invent features beyond what is described.
|
||||
"""
|
||||
|
||||
readme_text = ollama_generate(args.ollama, args.model, readme_prompt)
|
||||
(out_dir / "README.md").write_text(readme_text + "\n", encoding="utf-8")
|
||||
|
||||
# Promo prompt
|
||||
promo_prompt = f"""
|
||||
Write 3 short promo text variants (English) for a DJ album teaser for {args.artist} ({args.genre}).
|
||||
Constraints:
|
||||
- Each variant should be 2–4 lines max
|
||||
- Include 4–8 hashtags (trance/electronic)
|
||||
- Tone: energetic, DJ/club vibe
|
||||
- Do not mention "AI" or "tool" or "script"
|
||||
- Do not include any URLs
|
||||
"""
|
||||
promo_text = ollama_generate(args.ollama, args.model, promo_prompt)
|
||||
(out_dir / "PROMO.txt").write_text(promo_text + "\n", encoding="utf-8")
|
||||
|
||||
print(f"✅ Generated: {out_dir / 'README.md'}")
|
||||
print(f"✅ Generated: {out_dir / 'TRACKLIST.md'}")
|
||||
print(f"✅ Generated: {out_dir / 'PROMO.txt'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
92
V_3/readme.md
Normal file
92
V_3/readme.md
Normal file
@@ -0,0 +1,92 @@
|
||||
Install (til v3)
|
||||
|
||||
python -m venv .venv
|
||||
|
||||
source .venv/bin/activate
|
||||
|
||||
pip install -U pip
|
||||
|
||||
pip install numpy librosa soundfile
|
||||
|
||||
\# ffmpeg skal være installeret
|
||||
|
||||
|
||||
|
||||
🎛️ Kommandoer (til dine 14 old school trance tracks)
|
||||
|
||||
Rollcall (alle 14, DJ flip, harmonic ordering on)
|
||||
|
||||
python dj\_teaser\_v3.py --tracks-dir ./tracks --select all --teaser 60 --bars 2 --preroll-bars 1 --avoid-intro 30 --harmonic
|
||||
|
||||
|
||||
|
||||
Best-of mini-mix vibe (8 tracks, 4 bars)
|
||||
|
||||
python dj\_teaser\_v3.py --tracks-dir ./tracks --select auto --auto-n 8 --teaser 75 --bars 4 --preroll-bars 1 --avoid-intro 30 --harmonic
|
||||
|
||||
|
||||
|
||||
💡 V3 tweaks jeg typisk bruger til trance
|
||||
|
||||
|
||||
|
||||
--avoid-intro 30 eller 45 (lange trance intros)
|
||||
|
||||
|
||||
|
||||
--bars 2 hvis alle skal med (rollcall)
|
||||
|
||||
|
||||
|
||||
--bars 4 hvis du vil have mere “rigtig” trance-feel
|
||||
|
||||
|
||||
|
||||
--preroll-bars 1 giver DJ-lead-in (får overgangen til at føles naturlig)
|
||||
|
||||
|
||||
|
||||
--harmonic næsten altid “on” til trance 👌
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Hvordan du bruger det i praksis
|
||||
|
||||
|
||||
|
||||
Byg teaser med v3:
|
||||
|
||||
|
||||
|
||||
python dj\_teaser\_v3.py --tracks-dir ./tracks --select all --teaser 60 --bars 2 --preroll-bars 1 --avoid-intro 30 --harmonic
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Generér repo-ting + promo med din Llama 3.1:
|
||||
|
||||
|
||||
|
||||
pip install requests
|
||||
|
||||
python ollama\_assets.py --report ./out/teaser\_report.json --ollama http://192.168.2.60:11434 --model llama3.1:8b-instruct-q4\_0 --artist DjGulvBasS --genre "old school trance"
|
||||
|
||||
|
||||
|
||||
🎛️ Små tips (trance)
|
||||
|
||||
|
||||
|
||||
Hvis cut føles “for tidligt”: sænk --avoid-intro eller sæt --preroll-bars 0
|
||||
|
||||
|
||||
|
||||
Hvis du vil have mere “rigtig trance”: brug --bars 4 og vælg --select auto --auto-n 8
|
||||
|
||||
|
||||
|
||||
Hvis key detection virker off på enkelte tracks: det er normalt (pads + noise + modulations). Camelot er “best effort” her.
|
||||
|
||||
Reference in New Issue
Block a user