From d799a4a8eb2d2482cab341eb9086820dde67609c Mon Sep 17 00:00:00 2001 From: AlvinRamoutar Date: Wed, 31 Dec 2025 04:25:51 -0500 Subject: [PATCH 1/2] feature/repair-persistent-queues --- Dockerfile | 2 +- README.md | 3 ++- app/ytdl.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 401ece6..9cb0ba8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ COPY pyproject.toml uv.lock docker-entrypoint.sh ./ # Install dependencies RUN sed -i 's/\r$//g' docker-entrypoint.sh && \ chmod +x docker-entrypoint.sh && \ - apk add --update ffmpeg aria2 coreutils shadow su-exec curl tini deno && \ + apk add --update ffmpeg aria2 coreutils shadow su-exec curl tini deno gdbm-tools sqlite && \ apk add --update --virtual .build-deps gcc g++ musl-dev uv && \ UV_PROJECT_ENVIRONMENT=/usr/local uv sync --frozen --no-dev --compile-bytecode && \ apk del .build-deps && \ diff --git a/README.md b/README.md index 02d1992..b8301d1 100644 --- a/README.md +++ b/README.md @@ -270,8 +270,9 @@ MeTube development relies on code contributions by the community. The program as Make sure you have Node.js 22+ and Python 3.13 installed. ```bash -cd metube/ui # install Angular and build the UI +cd ui +curl -fsSL https://get.pnpm.io/install.sh | sh - pnpm install pnpm run build # install python dependencies diff --git a/app/ytdl.py b/app/ytdl.py index 5e3cbb1..65631ef 100644 --- a/app/ytdl.py +++ b/app/ytdl.py @@ -8,6 +8,8 @@ import multiprocessing import logging import re import types +import dbm +import subprocess import yt_dlp.networking.impersonate from dl_formats import get_format, get_opts, AUDIO_FORMATS @@ -193,13 +195,16 @@ class Download: await self.notifier.updated(self.info) class PersistentQueue: - def __init__(self, path): + def __init__(self, name, path): + self.identifier = name pdir = os.path.dirname(path) if not os.path.isdir(pdir): os.mkdir(pdir) with shelve.open(path, 'c'): pass + self.path = path + self.repair() self.dict = OrderedDict() def load(self): @@ -238,13 +243,73 @@ class PersistentQueue: def empty(self): return not bool(self.dict) + def repair(self): + # check DB format + type_check = subprocess.run( + ["file", self.path], + capture_output=True, + text=True + ) + db_type = type_check.stdout.lower() + + if "gnu dbm" in db_type: + # perform gdbm repair + log_prefix = f"PersistentQueue:{self.identifier} repair (dbm/file)" + log.debug(f"{log_prefix} started") + try: + result = subprocess.run( + ["gdbmtool", self.path], + input="recover verbose summary\n", + text=True, + capture_output=True, + cwd=os.getcwd() + ) + log.debug(f"{log_prefix}{result.stdout}") + if result.stderr: + log.debug(f"{log_prefix} failed: {result.stderr}") + except FileNotFoundError: + log.debug(f"{log_prefix} failed: 'gdbmtool' was not found") + + # perform null key cleanup + log_prefix = f"PersistentQueue:{self.identifier} repair (null keys)" + log.debug(f"{log_prefix} started") + deleted = 0 + try: + with dbm.open((self.path), "w") as db: + for key in list(db.keys()): + if key and all(b == 0x00 for b in key): + log.debug(f"{log_prefix} deleting key of length {len(key)} (all NUL bytes)") + del db[key] + deleted += 1 + log.debug(f"{log_prefix} done - deleted {deleted} key(s)") + except dbm.error: + log.debug(f"{log_prefix} failed: db type is dbm.gnu, but the module is not available") + + elif "sqlite" in db_type: + # perform sqlite3 recovery + log_prefix = f"PersistentQueue:{self.identifier} repair (sqlite3/file)" + log.debug(f"{log_prefix} started") + try: + result = subprocess.run( + f"sqlite3 {self.path} '.recover' | sqlite3 {self.path}", + capture_output=True, + text=True, + shell=True + ) + if result.stderr: + log.debug(f"{log_prefix} failed: {result.stderr}") + else: + log.debug(f"{log_prefix}{result.stdout or " was successful, no output"}") + except FileNotFoundError: + log.debug(f"{log_prefix} failed: 'sqlite3' was not found") + class DownloadQueue: def __init__(self, config, notifier): self.config = config self.notifier = notifier - self.queue = PersistentQueue(self.config.STATE_DIR + '/queue') - self.done = PersistentQueue(self.config.STATE_DIR + '/completed') - self.pending = PersistentQueue(self.config.STATE_DIR + '/pending') + self.queue = PersistentQueue("queue", self.config.STATE_DIR + '/queue') + self.done = PersistentQueue("completed", self.config.STATE_DIR + '/completed') + self.pending = PersistentQueue("pending", self.config.STATE_DIR + '/pending') self.active_downloads = set() self.semaphore = None # For sequential mode, use an asyncio lock to ensure one-at-a-time execution. From 191f17ee383335324d259c2666bd5a69cabe769e Mon Sep 17 00:00:00 2001 From: AlvinRamoutar Date: Mon, 5 Jan 2026 18:13:42 -0500 Subject: [PATCH 2/2] syntax changes + null logic update for dbm repair --- app/ytdl.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/app/ytdl.py b/app/ytdl.py index 65631ef..1cb2aef 100644 --- a/app/ytdl.py +++ b/app/ytdl.py @@ -1,4 +1,5 @@ import os +import shutil import yt_dlp from collections import OrderedDict import shelve @@ -252,6 +253,15 @@ class PersistentQueue: ) db_type = type_check.stdout.lower() + # create backup (.old) + try: + shutil.copy2(self.path, f"{self.path}.old") + except Exception as e: + # if we cannot backup then its not safe to attempt a repair + # since it could be due to a filesystem error + log.debug(f"PersistentQueue:{self.identifier} backup failed, skipping repair") + return + if "gnu dbm" in db_type: # perform gdbm repair log_prefix = f"PersistentQueue:{self.identifier} repair (dbm/file)" @@ -262,9 +272,9 @@ class PersistentQueue: input="recover verbose summary\n", text=True, capture_output=True, - cwd=os.getcwd() + timeout=60 ) - log.debug(f"{log_prefix}{result.stdout}") + log.debug(f"{log_prefix} {result.stdout}") if result.stderr: log.debug(f"{log_prefix} failed: {result.stderr}") except FileNotFoundError: @@ -275,15 +285,15 @@ class PersistentQueue: log.debug(f"{log_prefix} started") deleted = 0 try: - with dbm.open((self.path), "w") as db: + with dbm.open(self.path, "w") as db: for key in list(db.keys()): - if key and all(b == 0x00 for b in key): + if len(key) > 0 and all(b == 0x00 for b in key): log.debug(f"{log_prefix} deleting key of length {len(key)} (all NUL bytes)") del db[key] deleted += 1 log.debug(f"{log_prefix} done - deleted {deleted} key(s)") except dbm.error: - log.debug(f"{log_prefix} failed: db type is dbm.gnu, but the module is not available") + log.debug(f"{log_prefix} failed: db type is dbm.gnu, but the module is not available (dbm.error; module support may be missing or the file may be corrupted)") elif "sqlite" in db_type: # perform sqlite3 recovery @@ -291,14 +301,16 @@ class PersistentQueue: log.debug(f"{log_prefix} started") try: result = subprocess.run( - f"sqlite3 {self.path} '.recover' | sqlite3 {self.path}", + f"sqlite3 {self.path} '.recover' | sqlite3 {self.path}.tmp", capture_output=True, text=True, - shell=True + shell=True, + timeout=60 ) if result.stderr: log.debug(f"{log_prefix} failed: {result.stderr}") else: + shutil.move(f"{self.path}.tmp", self.path) log.debug(f"{log_prefix}{result.stdout or " was successful, no output"}") except FileNotFoundError: log.debug(f"{log_prefix} failed: 'sqlite3' was not found")