mirror of
https://gitlab.com/LazyLibrarian/LazyLibrarian.git
synced 2026-02-06 10:47:15 +00:00
4072 lines
149 KiB
Python
4072 lines
149 KiB
Python
# This file is part of Lazylibrarian.
|
|
#
|
|
# Lazylibrarian is free software, you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Lazylibrarian is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Lazylibrarian. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import contextlib
|
|
import datetime
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import shutil
|
|
import threading
|
|
import time
|
|
import traceback
|
|
import uuid
|
|
import zipfile
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Final
|
|
|
|
from rapidfuzz import fuzz
|
|
|
|
import lazylibrarian
|
|
from lazylibrarian import database
|
|
from lazylibrarian.archive_utils import unpack_archive, unpack_multipart
|
|
from lazylibrarian.bookrename import audio_rename, name_vars, stripspaces
|
|
from lazylibrarian.cache import ImageType, cache_img
|
|
from lazylibrarian.calibre_integration import send_to_calibre
|
|
from lazylibrarian.common import multibook, run_script
|
|
from lazylibrarian.config2 import CONFIG
|
|
from lazylibrarian.download_client import (
|
|
check_contents,
|
|
delete_task,
|
|
get_download_folder,
|
|
get_download_name,
|
|
get_download_progress,
|
|
)
|
|
from lazylibrarian.filesystem import (
|
|
DIRS,
|
|
book_file,
|
|
bts_file,
|
|
copy_tree,
|
|
get_directory,
|
|
jpg_file,
|
|
listdir,
|
|
make_dirs,
|
|
path_exists,
|
|
path_isdir,
|
|
path_isfile,
|
|
remove_file,
|
|
safe_copy,
|
|
safe_move,
|
|
setperm,
|
|
syspath,
|
|
)
|
|
from lazylibrarian.formatter import (
|
|
check_int,
|
|
get_list,
|
|
is_valid_type,
|
|
make_unicode,
|
|
md5_utf8,
|
|
now,
|
|
plural,
|
|
sanitize,
|
|
thread_name,
|
|
today,
|
|
unaccented,
|
|
)
|
|
from lazylibrarian.images import create_mag_cover, createthumbs
|
|
from lazylibrarian.importer import update_totals
|
|
from lazylibrarian.magazinescan import create_id
|
|
from lazylibrarian.mailinglist import mailing_list
|
|
from lazylibrarian.metadata_opf import create_comic_opf, create_mag_opf, create_opf
|
|
from lazylibrarian.notifiers import (
|
|
custom_notify_download,
|
|
custom_notify_snatch,
|
|
notify_download,
|
|
notify_snatch,
|
|
)
|
|
from lazylibrarian.postprocess_metadata import (
|
|
BookMetadata,
|
|
BookType,
|
|
ComicMetadata,
|
|
EbookMetadata,
|
|
MagazineMetadata,
|
|
prepare_book_metadata,
|
|
prepare_comic_metadata,
|
|
prepare_magazine_metadata,
|
|
)
|
|
from lazylibrarian.postprocess_utils import enforce_bytes, enforce_str
|
|
from lazylibrarian.preprocessor import (
|
|
preprocess_audio,
|
|
preprocess_ebook,
|
|
preprocess_magazine,
|
|
)
|
|
from lazylibrarian.scheduling import SchedulerCommand, schedule_job
|
|
from lazylibrarian.telemetry import TELEMETRY
|
|
|
|
|
|
@dataclass
|
|
class BookState:
|
|
"""
|
|
Represents the state of a book/magazine being processed.
|
|
|
|
This class tracks the download item as it moves through the postprocessing
|
|
pipeline, maintaining information about the candidate file/folder location,
|
|
metadata, and processing status.
|
|
|
|
Attributes:
|
|
book_id: Unique identifier for the book/magazine
|
|
book_title: Normalized title for matching
|
|
aux_type: Type of media from aux_info field (eBook, AudioBook, Magazine)
|
|
aux_info: Auxiliary information (e.g., issue date for magazines)
|
|
completed_at: Unix timestamp when download completed
|
|
mode_type: Download mode (torrent, magnet, torznab, nzb, etc.)
|
|
|
|
candidate_ptr: Current path to the candidate file/folder being evaluated.
|
|
This pointer gets updated as we drill down through folders
|
|
and extract archives to find the actual media file.
|
|
|
|
skipped_reason: Reason why processing was skipped (if applicable)
|
|
copy_to_destination: Whether to copy (vs move) files to destination
|
|
"""
|
|
|
|
# Core identifiers (from database row)
|
|
book_id: str
|
|
download_title: str # NZBtitle (download/torrent name) - for location finding
|
|
book_title: str = "" # Actual book title from books table - for drill-down matching
|
|
aux_type: str = (
|
|
"" # case sensitive AuxInfo book type (eBook, AudioBook, Magazine, comic)
|
|
)
|
|
aux_info: str = ""
|
|
|
|
# Download metadata
|
|
completed_at: int = 0
|
|
mode_type: str = ""
|
|
source: str = "" # Download client source (SABnzbd, Deluge, etc.)
|
|
download_id: str = "" # ID in download client for tracking/deletion
|
|
download_folder: str = "" # Folder path from download client (for targeted search)
|
|
download_url: str = "" # URL of the download (unique identifier)
|
|
download_provider: str = "" # Provider name (for stats/notifications)
|
|
status: str = "Snatched" # Current status (Snatched, Seeding, Aborted, etc.)
|
|
snatched_date: str = "" # When snatched (for timeout calculations)
|
|
|
|
# Processing state - mutable as we search for matches
|
|
candidate_ptr: str | None = None
|
|
skipped_reason: str | None = None
|
|
|
|
# Failure tracking - populated during processing
|
|
failure_reason: str = ""
|
|
processing_stage: str = (
|
|
"" # "matching", "validation", "metadata", "destination", "post"
|
|
)
|
|
was_processed: bool = False # True if successfully processed
|
|
|
|
# Configuration flags
|
|
copy_to_destination: bool = False
|
|
|
|
# Runtime state (for unprocessed download tracking)
|
|
aborted: bool = False
|
|
finished: bool = False
|
|
progress: int | str = "Unknown"
|
|
|
|
@classmethod
|
|
def from_db_row(cls, book_row: dict, config) -> "BookState":
|
|
"""
|
|
Create a BookState instance from a database row.
|
|
|
|
Args:
|
|
book_row: Database row from 'wanted' table
|
|
config: Configuration object
|
|
|
|
Returns:
|
|
Initialized BookState instance
|
|
"""
|
|
book_data = dict(book_row)
|
|
normalized_download_title = _normalize_title(book_data["NZBtitle"])
|
|
mode_type = book_data["NZBmode"]
|
|
|
|
return cls(
|
|
book_id=book_data["BookID"],
|
|
download_title=normalized_download_title,
|
|
book_title="", # Will be populated later if needed for drill-down
|
|
aux_type=_extract_aux_type(book_data),
|
|
aux_info=book_data["AuxInfo"],
|
|
completed_at=check_int(book_data["Completed"], 0),
|
|
mode_type=mode_type,
|
|
source=book_data.get("Source", ""),
|
|
download_id=book_data.get("DownloadID", ""),
|
|
download_folder="", # Will be populated in _get_ready_snatched_books
|
|
download_url=book_data.get("NZBurl", ""),
|
|
download_provider=book_data.get("NZBprov", ""),
|
|
status=book_data.get("Status", "Snatched"),
|
|
snatched_date=book_data.get("NZBdate", ""),
|
|
copy_to_destination=config.get_bool("DESTINATION_COPY"),
|
|
candidate_ptr=None,
|
|
skipped_reason=None,
|
|
)
|
|
|
|
def is_completed(self) -> bool:
|
|
"""Check if the download has a completion timestamp."""
|
|
return self.completed_at > 0
|
|
|
|
def seconds_since_completion(self) -> int:
|
|
"""Calculate seconds since download completed (rounded up)."""
|
|
if not self.is_completed():
|
|
return 0
|
|
completion = time.time() - self.completed_at
|
|
return int(-(-completion // 1)) # Round up to int
|
|
|
|
def should_delay_processing(self, delay_seconds: int) -> "tuple[bool, int]":
|
|
"""
|
|
Check if processing should be delayed based on completion time.
|
|
|
|
Args:
|
|
delay_seconds: Required delay in seconds from config (PP_DELAY)
|
|
|
|
Returns:
|
|
Tuple of (should_delay, seconds_since_completion)
|
|
"""
|
|
if not self.is_completed():
|
|
return False, 0
|
|
|
|
seconds_elapsed = self.seconds_since_completion()
|
|
should_delay = seconds_elapsed < delay_seconds
|
|
|
|
return should_delay, seconds_elapsed
|
|
|
|
def update_candidate(self, new_path: str) -> None:
|
|
"""
|
|
Update the candidate pointer to a new location.
|
|
|
|
This is called as we drill down through directories and extract archives
|
|
to find the actual media file.
|
|
|
|
Args:
|
|
new_path: New path to set as candidate
|
|
"""
|
|
self.candidate_ptr = new_path
|
|
|
|
def mark_skipped(self, reason: str) -> None:
|
|
"""
|
|
Mark this item as skipped with a reason.
|
|
|
|
Args:
|
|
reason: Human-readable reason for skipping
|
|
"""
|
|
self.skipped_reason = reason
|
|
|
|
def is_skipped(self) -> bool:
|
|
"""Check if this item was marked as skipped."""
|
|
return self.skipped_reason is not None
|
|
|
|
def is_torrent(self) -> bool:
|
|
"""Convenience flag indicating if this is a torrent download"""
|
|
return self.mode_type in ["torrent", "magnet", "torznab"]
|
|
|
|
def has_candidate(self) -> bool:
|
|
"""Check if we have found a candidate file/folder."""
|
|
return self.candidate_ptr is not None
|
|
|
|
def is_book(self) -> bool:
|
|
"""Check if this is a book (ebook or audiobook)."""
|
|
book_type_enum = self.get_book_type_enum()
|
|
return book_type_enum in [BookType.EBOOK, BookType.AUDIOBOOK]
|
|
|
|
def is_magazine(self) -> bool:
|
|
"""Check if this is a magazine."""
|
|
book_type_enum = self.get_book_type_enum()
|
|
return book_type_enum in [BookType.MAGAZINE]
|
|
|
|
def get_book_type_str(self) -> str:
|
|
"""
|
|
Get the book type as a string
|
|
|
|
Returns:
|
|
gets normalized string for book type based on aux info type
|
|
"""
|
|
book_type_str = ""
|
|
try:
|
|
book_type_str = self.get_book_type_enum().value
|
|
except ValueError:
|
|
contextlib.suppress(ValueError)
|
|
|
|
return book_type_str
|
|
|
|
def get_book_type_enum(self) -> BookType:
|
|
"""
|
|
Get the book type as a string
|
|
|
|
Returns:
|
|
gets normalized string for book type based on aux info type
|
|
Raises ValueError if aux_type is invalid book type
|
|
"""
|
|
return BookType.from_string(self.aux_type)
|
|
|
|
def has_download_client(self) -> bool:
|
|
"""
|
|
Check if this download has a source and download ID.
|
|
|
|
Returns:
|
|
True if both source and download_id are set
|
|
"""
|
|
return bool(self.source and self.download_id)
|
|
|
|
def can_delete_from_client(self) -> bool:
|
|
"""
|
|
Check if we can delete this from the download client.
|
|
|
|
Returns:
|
|
True if download can be deleted from client
|
|
"""
|
|
if not self.source:
|
|
return False
|
|
if not self.download_id or self.download_id == "unknown":
|
|
return False
|
|
return self.source != "DIRECT"
|
|
|
|
def get_display_name(self, config) -> str:
|
|
"""
|
|
Get formatted provider name for notifications.
|
|
|
|
Args:
|
|
config: Configuration object
|
|
|
|
Returns:
|
|
Formatted display name with optional title/URL
|
|
"""
|
|
dispname = config.disp_name(self.download_provider)
|
|
|
|
if config.get_bool("NOTIFY_WITH_TITLE"):
|
|
dispname = f"{dispname}: {self.book_title}"
|
|
|
|
if config.get_bool("NOTIFY_WITH_URL"):
|
|
dispname = f"{dispname}: {self.download_url}"
|
|
|
|
return dispname
|
|
|
|
def is_seeding(self) -> bool:
|
|
"""Check if status indicates seeding."""
|
|
return self.status == "Seeding"
|
|
|
|
def is_snatched(self) -> bool:
|
|
"""Check if status indicates snatched."""
|
|
return self.status == "Snatched"
|
|
|
|
def is_aborted(self) -> bool:
|
|
"""Check if status indicates aborted."""
|
|
return self.status == "Aborted"
|
|
|
|
def mark_failed(self, stage: str, reason: str) -> None:
|
|
"""
|
|
Mark this item as failed with stage and reason.
|
|
|
|
Args:
|
|
stage: Processing stage where failure occurred
|
|
("matching", "validation", "metadata", "destination", "post")
|
|
reason: Human-readable reason for failure
|
|
"""
|
|
self.processing_stage = stage
|
|
self.failure_reason = reason
|
|
|
|
def mark_processed(self) -> None:
|
|
"""Mark this item as successfully processed"""
|
|
self.was_processed = True
|
|
|
|
def has_failed(self) -> bool:
|
|
"""Check if this item failed during processing"""
|
|
return bool(self.failure_reason)
|
|
|
|
def enrich_with_download_info(self, db) -> None:
|
|
"""
|
|
Populate download_folder and book_title from download client and database.
|
|
|
|
This is called after BookState creation to add:
|
|
- download_folder: Exact folder path from download client (for targeted search)
|
|
- book_title: Actual book title from books table (for drill-down matching)
|
|
|
|
Args:
|
|
db: Database connection for querying book title
|
|
"""
|
|
# Get specific download folder by combining general folder + download name
|
|
logger = logging.getLogger(__name__)
|
|
|
|
if self.source and self.download_id:
|
|
general_folder = get_download_folder(self.source, self.download_id)
|
|
download_name = get_download_name(self.download_title, self.source, self.download_id)
|
|
res = None
|
|
if general_folder is None and self.source == 'DIRECT':
|
|
res = db.match("SELECT NZBprov,NZBtitle,NZBurl from wanted where source='DIRECT' and DownloadID=?", (self.download_id, ))
|
|
# These download into first download_dir
|
|
if res and res['NZBprov'] in ['annas', 'zlibrary', 'soulseek'] or res['NZBprov'].startswith('libgen'):
|
|
general_folder = get_directory('Download')
|
|
# For usenet clients (SABnzbd, NZBGet), the storage field already contains
|
|
# the complete download path including the folder name
|
|
if self.source in ("SABNZBD", "NZBGET") and general_folder:
|
|
if os.path.isdir(general_folder):
|
|
self.download_folder = general_folder
|
|
else:
|
|
self.download_folder = os.path.dirname(general_folder)
|
|
elif res and res['NZBprov'] == 'soulseek':
|
|
try:
|
|
soulseek = json.loads(res['NZBurl'].split('^')[1].replace('\\','/'))
|
|
self.download_folder = os.path.join(general_folder, soulseek.get("name", '').rsplit('/', 1)[1])
|
|
except (KeyError, IndexError):
|
|
self.download_folder = general_folder
|
|
# For torrent clients, combine base folder with download name
|
|
elif general_folder and download_name:
|
|
self.download_folder = os.path.join(general_folder, download_name)
|
|
elif general_folder:
|
|
# Fallback: use general folder as-is
|
|
self.download_folder = general_folder
|
|
|
|
logger.debug(f"General:{general_folder} DownloadName:{download_name} "
|
|
f"DownloadFolder:{self.download_folder}")
|
|
# Get actual book title for drill-down matching
|
|
if self.book_id and self.is_book():
|
|
result = db.match(
|
|
"SELECT AuthorName, BookName FROM books,authors "
|
|
"WHERE books.BookID=? AND books.AuthorID=authors.AuthorID",
|
|
(self.book_id,),
|
|
)
|
|
if result:
|
|
data = dict(result)
|
|
# Combine author and book name for better matching
|
|
self.book_title = _normalize_title(
|
|
f"{data.get('AuthorName', '')} - {data.get('BookName', '')}"
|
|
)
|
|
elif self.book_id and self.is_magazine():
|
|
result = db.match(
|
|
"SELECT Title FROM magazines WHERE Title=?", (self.book_id,)
|
|
)
|
|
if result:
|
|
data = dict(result)
|
|
self.book_title = _normalize_title(data.get("Title", ""))
|
|
logger.debug(f"IsBook:{self.is_book()} IsMag:{self.is_magazine()} Title:{self.book_title}")
|
|
|
|
def __repr__(self) -> str:
|
|
"""String representation for debugging."""
|
|
return (
|
|
f"BookState(id={self.book_id}, type={self.aux_type}, "
|
|
f"download_title='{self.download_title[:30]}...', source={self.source}, "
|
|
f"status={self.status}, candidate={self.candidate_ptr}, "
|
|
f"skipped={self.is_skipped()})"
|
|
)
|
|
|
|
|
|
def PostProcessor(): # was cron_process_dir
|
|
"""Scheduled postprocessor entry point.
|
|
|
|
Thread safety is handled inside process_dir() itself.
|
|
"""
|
|
logger = logging.getLogger(__name__)
|
|
if lazylibrarian.STOPTHREADS:
|
|
logger.debug("STOPTHREADS is set, not starting postprocessor")
|
|
schedule_job(SchedulerCommand.STOP, target="PostProcessor")
|
|
else:
|
|
# process_dir() has its own thread safety and thread naming
|
|
process_dir()
|
|
|
|
|
|
def process_img(
|
|
dest_path: str,
|
|
bookid,
|
|
bookimg,
|
|
global_name,
|
|
cache: ImageType = ImageType.BOOK,
|
|
overwrite=False,
|
|
):
|
|
"""cache the bookimg from url or filename, and optionally copy it to bookdir"""
|
|
# if lazylibrarian.CONFIG['IMP_AUTOADD_BOOKONLY']:
|
|
# logger.debug('Not creating coverfile, bookonly is set')
|
|
# return
|
|
|
|
logger = logging.getLogger(__name__)
|
|
coverfile = jpg_file(dest_path)
|
|
if not overwrite and coverfile:
|
|
logger.debug(f"Cover {coverfile} already exists")
|
|
return
|
|
if not bookimg:
|
|
logger.debug(f"No cover to cache for {bookid}")
|
|
return
|
|
|
|
TELEMETRY.record_usage_data("Process/Image")
|
|
if bookimg.startswith("cache/"):
|
|
img = bookimg.replace("cache/", "")
|
|
if os.path.__name__ == "ntpath":
|
|
img = img.replace("/", "\\")
|
|
cachefile = os.path.join(DIRS.CACHEDIR, img)
|
|
else:
|
|
link, success, _ = cache_img(cache, bookid, bookimg, False)
|
|
if not success:
|
|
logger.error(f"Error caching cover from {bookimg}, {link}")
|
|
return
|
|
cachefile = os.path.join(DIRS.DATADIR, link)
|
|
|
|
try:
|
|
coverfile = os.path.join(dest_path, f"{global_name}.jpg")
|
|
coverfile = safe_copy(cachefile, coverfile)
|
|
setperm(coverfile)
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error copying image {bookimg} to {coverfile}, {type(e).__name__} {e!s}"
|
|
)
|
|
return
|
|
|
|
|
|
def _update_downloads_provider_count(provider: str = "manually added"):
|
|
"""
|
|
Count the number of times that each provider was used for each download.
|
|
|
|
Args:
|
|
provider: Optional name of provider of download
|
|
"""
|
|
db = database.DBConnection()
|
|
entry = dict(db.match("SELECT Count FROM downloads where Provider=?", (provider,)))
|
|
if entry:
|
|
counter = int(entry["Count"])
|
|
db.action(
|
|
"UPDATE downloads SET Count=? WHERE Provider=?", (counter + 1, provider)
|
|
)
|
|
else:
|
|
db.action(
|
|
"INSERT into downloads (Count, Provider) VALUES (?, ?)", (1, provider)
|
|
)
|
|
db.close()
|
|
|
|
|
|
def _transfer_matching_files(
|
|
sourcedir: str, targetdir: str, fname_prefix: str, copy=False
|
|
):
|
|
"""
|
|
Selectively transfer files matching a filename prefix from source to target directory.
|
|
|
|
Only transfers files that:
|
|
- Start with the given filename prefix
|
|
- Are valid media file types
|
|
|
|
This prevents accidentally moving/copying unrelated files when processing
|
|
a single file in a directory with multiple downloads.
|
|
|
|
Args:
|
|
sourcedir: Source directory containing files
|
|
targetdir: Target directory to transfer to
|
|
fname_prefix: Filename prefix to match (e.g., "Book Title")
|
|
copy: If True, copy files; if False, move files
|
|
|
|
Returns:
|
|
Count of files transferred
|
|
"""
|
|
logger = logging.getLogger(__name__)
|
|
cnt = 0
|
|
list_dir = listdir(sourcedir)
|
|
valid_extensions = CONFIG.get_all_types_list()
|
|
|
|
for _ourfile in list_dir:
|
|
ourfile = str(_ourfile)
|
|
# Only transfer files that start with our book's name and are valid media files
|
|
if ourfile.startswith(fname_prefix) and is_valid_type(ourfile, extensions=valid_extensions):
|
|
try:
|
|
srcfile = os.path.join(sourcedir, ourfile)
|
|
dstfile = os.path.join(targetdir, ourfile)
|
|
|
|
if copy:
|
|
dstfile = safe_copy(srcfile, dstfile)
|
|
setperm(dstfile)
|
|
logger.debug(f"Copied {ourfile} to subdirectory")
|
|
else:
|
|
dstfile = safe_move(srcfile, dstfile)
|
|
setperm(dstfile)
|
|
logger.debug(f"Moved {ourfile} to subdirectory")
|
|
cnt += 1
|
|
except Exception as why:
|
|
logger.warning(
|
|
f"Failed to transfer file {ourfile} to [{targetdir}], "
|
|
f"{type(why).__name__} {why!s}"
|
|
)
|
|
continue
|
|
return cnt
|
|
|
|
|
|
def _extract_aux_type(book: dict) -> str:
|
|
"""
|
|
A simple helper function to ensure the value is valid possibility for the AuxInfo column
|
|
"""
|
|
book_type = book["AuxInfo"]
|
|
if book_type not in ["AudioBook", "eBook", "comic"]:
|
|
book_type = "eBook" if not book_type else "Magazine"
|
|
return book_type
|
|
|
|
|
|
def _update_download_status(
|
|
book_state: BookState, db, logger: logging.Logger, dlresult=None
|
|
):
|
|
"""
|
|
Update the status of a completed download based on its current state.
|
|
|
|
Args:
|
|
book_state: The book/download record
|
|
db: Database connection
|
|
logger: Logger instance
|
|
dlresult: Optional result message for the download
|
|
|
|
Returns:
|
|
str: The new status ('Seeding', 'Processed', or None if not updated)
|
|
"""
|
|
if not isinstance(book_state.progress, int) or book_state.progress != 100:
|
|
return None
|
|
|
|
# Determine if this should be marked as Seeding
|
|
is_torrent = book_state.mode_type in ["torrent", "magnet", "torznab"]
|
|
should_keep_seeding = (
|
|
is_torrent and CONFIG.get_bool("KEEP_SEEDING") and not book_state.finished
|
|
)
|
|
|
|
if should_keep_seeding:
|
|
# Mark as Seeding - download complete but still active in client
|
|
cmd = "UPDATE wanted SET Status='Seeding' WHERE NZBurl=? and Status IN ('Snatched', 'Processed')"
|
|
db.action(cmd, (book_state.download_url,))
|
|
logger.info(
|
|
f"STATUS: {book_state.download_title} [{book_state.status} -> Seeding] "
|
|
f"Download complete, continuing to seed"
|
|
)
|
|
return "Seeding"
|
|
# Mark as Processed - download complete
|
|
if not dlresult:
|
|
dlresult = "Download complete"
|
|
cmd = "UPDATE wanted SET Status='Processed', DLResult=? WHERE NZBurl=? and Status='Snatched'"
|
|
db.action(cmd, (dlresult, book_state.download_url))
|
|
logger.info(
|
|
f"STATUS: {book_state.download_title} [{book_state.status} -> Processed] {dlresult}"
|
|
)
|
|
|
|
# Optionally delete from client if configured
|
|
if book_state.finished and CONFIG.get_bool("DEL_COMPLETED"):
|
|
logger.debug(
|
|
f"Deleting {book_state.download_title} from {book_state.source} (DEL_COMPLETED=True)"
|
|
)
|
|
delete_task(book_state.source, book_state.download_id, False)
|
|
return "Processed"
|
|
|
|
|
|
def _get_ready_from_snatched(db, snatched_list: list[dict]):
|
|
"""
|
|
Filter snatched books to find those ready for processing.
|
|
|
|
Filters out books that:
|
|
- Have rejected content (file size, banned extensions, etc.)
|
|
- Are still downloading (progress 0-99%)
|
|
- Have incomplete torrents
|
|
|
|
Updates download names if they changed in the client (common with torrents).
|
|
|
|
Args:
|
|
db: Database connection
|
|
snatched_list: List of books with Status='Snatched'
|
|
|
|
Returns:
|
|
List of book rows ready to process (download complete and content valid)
|
|
"""
|
|
logger = logging.getLogger(__name__)
|
|
|
|
books_to_process = [] # the filtered list of books ready for processing
|
|
delete_failed = CONFIG.get_bool("DEL_FAILED")
|
|
|
|
for book_row in snatched_list:
|
|
# Get current status from the downloader as the name may have changed
|
|
# once magnet resolved, or download started or completed.
|
|
# This is common with torrent downloaders. Usenet doesn't change the name.
|
|
book_id = book_row["BookID"]
|
|
book_type_str = BookType.from_string(
|
|
_extract_aux_type(book_row)
|
|
).value # normalize aux info type
|
|
title = unaccented(book_row["NZBtitle"], only_ascii=False)
|
|
source = book_row["Source"]
|
|
download_id = book_row["DownloadID"]
|
|
download_url = book_row["NZBurl"]
|
|
download_name = get_download_name(title, source, download_id)
|
|
|
|
if download_name and download_name != title:
|
|
if source == "SABNZBD":
|
|
logger.warning(
|
|
f"{source} unexpected change [{title}] to [{download_name}]"
|
|
)
|
|
logger.debug(f"{source} Changing [{title}] to [{download_name}]")
|
|
# should we check against reject word list again as the name has changed?
|
|
db.action(
|
|
"UPDATE wanted SET NZBtitle=? WHERE NZBurl=?",
|
|
(download_name, download_url),
|
|
)
|
|
title = download_name
|
|
|
|
rejected = check_contents(source, download_id, book_type_str, title)
|
|
if rejected:
|
|
logger.debug(f"Rejected: {title} BookID: {book_id} DownloadID: {download_id}")
|
|
|
|
# change status to "Failed", and ask downloader to delete task and files
|
|
# Only reset book status to wanted if still snatched in case another download task succeeded
|
|
if book_id != "unknown":
|
|
cmd = ""
|
|
if book_type_str == BookType.EBOOK.value:
|
|
cmd = "UPDATE books SET status='Wanted' WHERE status='Snatched' and BookID=?"
|
|
elif book_type_str == BookType.AUDIOBOOK.value:
|
|
cmd = "UPDATE books SET audiostatus='Wanted' WHERE audiostatus='Snatched' and BookID=?"
|
|
if cmd:
|
|
db.action(cmd, (book_id,))
|
|
# use downloadid as identifier as bookid is not unique for magazine issues
|
|
db.action(
|
|
"UPDATE wanted SET Status='Failed',DLResult=? WHERE DownloadID=?",
|
|
(rejected, download_id),
|
|
)
|
|
logger.info(
|
|
f"STATUS: {title} [Snatched -> Failed] Content rejected: {rejected}"
|
|
)
|
|
if delete_failed:
|
|
delete_task(source, download_id, True)
|
|
continue
|
|
|
|
# Check if download is complete before processing download directories
|
|
progress, finished = get_download_progress(source, download_id)
|
|
# progress can be: -1 (not found/removed - may be seeding complete), 0-99 (in progress), 100+ (complete)
|
|
# finished is True only when downloader confirms completion
|
|
# Process if: progress >= 100 (complete/seeding), finished == True, or progress == -1
|
|
# (torrent removed after seeding)
|
|
# Skip only if: 0 <= progress < 100 and not finished (actively downloading)
|
|
if 0 <= progress < 100 and not finished:
|
|
logger.debug(
|
|
f"Download not yet complete for {title} (progress: {progress}%), skipping"
|
|
)
|
|
continue
|
|
|
|
# If we reach this point, this book can be processed
|
|
logger.debug(
|
|
f"Download for '{title}' of type {book_type_str} ready to process."
|
|
)
|
|
books_to_process.append(book_row)
|
|
|
|
return books_to_process
|
|
|
|
|
|
def _normalize_title(title: str) -> str:
|
|
# remove accents and convert not-ascii apostrophes
|
|
new_title = str(unaccented(title, only_ascii=False))
|
|
# torrent names might have words_separated_by_underscores
|
|
new_title = new_title.split(" LL.(")[0].replace("_", " ")
|
|
year_len: Final[int] = 4
|
|
# Strip known file extensions and special suffixes from the end
|
|
# This handles cases like "Book Name.2013" or "folder.unpack" or "Book.epub"
|
|
# but preserves periods in names like "J.R.R. Tolkien" or "Dr. Seuss"
|
|
if '.' in new_title:
|
|
# Get the part after the last period
|
|
last_dot_index = new_title.rfind(".")
|
|
suffix = new_title[last_dot_index + 1:].lower()
|
|
|
|
# Strip if it matches known patterns:
|
|
# 1. Known file extensions
|
|
known_extensions = CONFIG.get_all_types_list() if CONFIG else []
|
|
# 2. Special suffixes
|
|
special_suffixes = ['unpack']
|
|
# 3. 4-digit years
|
|
is_year = suffix.isdigit() and len(suffix) == year_len
|
|
|
|
if suffix in known_extensions or suffix in special_suffixes or is_year:
|
|
new_title = new_title[:last_dot_index]
|
|
|
|
# strip noise characters
|
|
return sanitize(new_title).strip()
|
|
|
|
|
|
def _tokenize_file(filepath_or_name: str) -> "tuple[str, str]":
|
|
"""
|
|
Extract filename stem and extension from a file path.
|
|
|
|
Example:
|
|
>>> _tokenize_file("/path/to/file.epub")
|
|
("file", "epub")
|
|
|
|
Args:
|
|
filepath_or_name: Full path or filename
|
|
|
|
Returns:
|
|
Tuple of (stem, extension) where extension has no leading dot
|
|
"""
|
|
path_obj = Path(filepath_or_name)
|
|
stem = path_obj.stem
|
|
# Slice off the leading dot from the suffix
|
|
extension = path_obj.suffix[1:]
|
|
return stem, extension
|
|
|
|
|
|
def _is_valid_media_file(
|
|
filepath: str, book_type=BookType.EBOOK.value, include_archives=False
|
|
) -> bool:
|
|
"""
|
|
Check if a file is a valid media type for processing.
|
|
|
|
Args:
|
|
filepath: Path to the file to check
|
|
book_type: Type of media ("ebook", "audiobook", "magazine", "comic")
|
|
include_archives: Whether to include comic book archives (cbr, cbz)
|
|
|
|
Returns:
|
|
True if file is a valid media type
|
|
"""
|
|
if include_archives:
|
|
return is_valid_type(
|
|
filepath, extensions=CONFIG.get_all_types_list(), extras="cbr, cbz"
|
|
)
|
|
return CONFIG.is_valid_booktype(filepath, booktype=book_type)
|
|
|
|
|
|
def _count_zipfiles_in_directory(directory_path: str) -> int:
|
|
"""
|
|
Count zip files in a directory, excluding epub and cbz files.
|
|
|
|
Args:
|
|
directory_path: Path to directory to scan
|
|
|
|
Returns:
|
|
Number of zip files found (excluding ebook/comic formats)
|
|
"""
|
|
zipcount = 0
|
|
for _f in listdir(directory_path):
|
|
f = enforce_str(_f) # Ensure string for path operations
|
|
file_path = os.path.join(directory_path, f)
|
|
_, extn = _tokenize_file(f)
|
|
extn = extn.lower()
|
|
|
|
# Skip ebook and comic book formats that happen to be zips
|
|
if extn not in [".epub", ".cbz"] and zipfile.is_zipfile(file_path):
|
|
zipcount += 1
|
|
return zipcount
|
|
|
|
|
|
def _find_valid_file_in_directory(
|
|
directory_path, book_type=BookType.EBOOK.value, recurse=False
|
|
) -> str:
|
|
"""
|
|
Find the first valid media file in a directory.
|
|
|
|
Args:
|
|
directory_path: Path to directory to search
|
|
book_type: Type of media to look for
|
|
recurse: Whether to search subdirectories
|
|
|
|
Returns:
|
|
Path to first valid file found, or empty string if none found
|
|
"""
|
|
if recurse:
|
|
for _dirpath, _, files in os.walk(directory_path):
|
|
dirpath = enforce_str(_dirpath) # Ensure string for path operations
|
|
for _item in files:
|
|
item = enforce_str(_item) # Ensure string for path operations
|
|
if _is_valid_media_file(
|
|
item, book_type=book_type, include_archives=True
|
|
):
|
|
return os.path.join(dirpath, item)
|
|
else:
|
|
for _f in listdir(directory_path):
|
|
f = enforce_str(_f) # Ensure string for path operations
|
|
if _is_valid_media_file(f, book_type=book_type, include_archives=True):
|
|
return os.path.join(directory_path, f)
|
|
return ""
|
|
|
|
|
|
def _extract_best_match_from_collection(
|
|
candidate_dir,
|
|
target_title: str,
|
|
download_dir: str,
|
|
logger: logging.Logger,
|
|
fuzzlogger: logging.Logger,
|
|
) -> "tuple[str, bool, str]":
|
|
"""
|
|
Extract the best matching book from a multi-book collection.
|
|
|
|
When a download contains multiple books, find the one that best matches
|
|
the target title and copy it to an isolated directory for processing.
|
|
|
|
Args:
|
|
candidate_dir: Directory containing multiple books
|
|
target_title: The title we're trying to match
|
|
download_dir: Parent download directory for creating .unpack folder
|
|
logger: Logger instance
|
|
fuzzlogger: Fuzzy matching logger instance
|
|
|
|
Returns:
|
|
Tuple of (extracted_path, skipped, skip_reason)
|
|
- extracted_path: Path to the extracted book directory
|
|
- skipped: Whether extraction was skipped
|
|
- skip_reason: Reason for skipping, if applicable
|
|
"""
|
|
match_threshold = CONFIG.get_int("DLOAD_RATIO")
|
|
best_match = None
|
|
best_score = 0
|
|
|
|
# Find the best matching file
|
|
for _f in listdir(candidate_dir):
|
|
f = enforce_str(_f) # Ensure string for validation
|
|
if CONFIG.is_valid_booktype(f, booktype=BookType.EBOOK.value):
|
|
filename_stem, _ = _tokenize_file(f)
|
|
normalized_fname = _normalize_title(filename_stem)
|
|
|
|
match_percent = fuzz.token_set_ratio(target_title, normalized_fname)
|
|
is_match = match_percent >= match_threshold
|
|
fuzzlogger.debug(
|
|
f"{round(match_percent, 2)}% match {target_title} : {normalized_fname}"
|
|
)
|
|
|
|
if is_match and match_percent > best_score:
|
|
best_match = f
|
|
best_score = match_percent
|
|
|
|
if not best_match:
|
|
return "", True, "Multiple books found with no good match"
|
|
|
|
# Create isolated directory for the best match
|
|
# use md5 to handle enormously long titles overflowing windows directory name limit
|
|
target_dir = os.path.join(download_dir, f"{md5_utf8(target_title)[-8:]}.unpack")
|
|
if not make_dirs(target_dir, new=True):
|
|
logger.error(f"Failed to create target dir {target_dir}")
|
|
return "", True, "Failed to create extraction directory"
|
|
|
|
logger.debug(
|
|
f"Best candidate match: {best_match} ({round(best_score, 2)}%) "
|
|
f"for {target_title} in multi book collection"
|
|
)
|
|
|
|
best_match_stem, _ = _tokenize_file(best_match)
|
|
|
|
# Copy all files related to the best match (including .opf, .jpg)
|
|
for _f in listdir(candidate_dir):
|
|
f = enforce_str(_f) # Ensure string for validation and path operations
|
|
filename_stem, _ = _tokenize_file(f)
|
|
|
|
if filename_stem == best_match_stem and (CONFIG.is_valid_booktype(
|
|
f, booktype=BookType.EBOOK.value
|
|
) or _is_metadata_file(f)):
|
|
source = os.path.join(candidate_dir, f)
|
|
if path_isdir(source):
|
|
shutil.copytree(source, target_dir)
|
|
else:
|
|
dest = os.path.join(target_dir, f)
|
|
shutil.copyfile(source, dest)
|
|
|
|
return target_dir, False, ""
|
|
|
|
|
|
def _validate_candidate_directory(
|
|
candidate_ptr, logger: logging.Logger
|
|
) -> "tuple[bool, str]":
|
|
"""
|
|
Validate that a candidate directory is suitable for processing.
|
|
|
|
Checks for:
|
|
- Empty directories
|
|
- Presence of .bts files (BitTorrent Sync files)
|
|
|
|
Args:
|
|
candidate_ptr: Path to candidate directory
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
Tuple of (is_valid, skip_reason)
|
|
"""
|
|
if not listdir(candidate_ptr):
|
|
logger.debug(f"Skipping {candidate_ptr}, folder is empty")
|
|
return False, "Folder is empty"
|
|
|
|
if bts_file(candidate_ptr):
|
|
logger.debug(f"Skipping {candidate_ptr}, found a .bts file")
|
|
return False, "Folder contains .bts file"
|
|
|
|
return True, ""
|
|
|
|
|
|
def _extract_archives_in_directory(
|
|
candidate_ptr,
|
|
download_dir: str,
|
|
title: str,
|
|
) -> "tuple[str, bool]":
|
|
"""
|
|
Extract all archives in a directory and return the path to extracted content.
|
|
|
|
Handles both multipart archives and regular archives. Updates the candidate
|
|
pointer to the extracted location if successful.
|
|
|
|
Args:
|
|
candidate_ptr: Directory containing archives
|
|
download_dir: Parent download directory
|
|
title: Title for naming extracted content
|
|
|
|
Returns:
|
|
Tuple of (new_candidate_ptr, content_changed)
|
|
- new_candidate_ptr: Updated path (same as input if nothing changed)
|
|
- content_changed: Whether any archives were extracted
|
|
"""
|
|
# Count zip files to detect multipart archives
|
|
zipfile_count = _count_zipfiles_in_directory(candidate_ptr)
|
|
|
|
if zipfile_count == 0:
|
|
return candidate_ptr, False
|
|
|
|
incoming_candidate = candidate_ptr
|
|
|
|
# Handle multipart archives first
|
|
if zipfile_count > 1:
|
|
unpacked_path = unpack_multipart(candidate_ptr, download_dir, title)
|
|
if unpacked_path:
|
|
candidate_ptr = unpacked_path
|
|
|
|
# Extract remaining archives
|
|
for _dirpath, _, files in os.walk(candidate_ptr):
|
|
dirpath = enforce_str(_dirpath) # Ensure string for path operations
|
|
for item in files:
|
|
_, extn = _tokenize_file(item)
|
|
extn = extn.lower()
|
|
|
|
# Skip files that are ebooks/comics (they're already in zip format)
|
|
if extn not in [".epub", ".cbr", ".cbz"]:
|
|
res = unpack_archive(os.path.join(dirpath, item), download_dir, title)
|
|
if res:
|
|
candidate_ptr = res
|
|
break
|
|
|
|
content_changed = candidate_ptr != incoming_candidate
|
|
return candidate_ptr, content_changed
|
|
|
|
|
|
def _calculate_fuzzy_match(title1: str, title2: str, fuzzlogger: logging.Logger=None) -> float:
|
|
"""
|
|
Calculate fuzzy match percentage between two titles.
|
|
|
|
Args:
|
|
title1: First title to compare
|
|
title2: Second title to compare
|
|
|
|
Returns:
|
|
Match percentage (0-100)
|
|
"""
|
|
match_fuzz = fuzz.token_set_ratio(title1, title2)
|
|
|
|
if match_fuzz >= CONFIG.get_int('NAME_RATIO'):
|
|
# if it's a close enough match, check for purely number differences
|
|
# This is in case we have multiple issues of a magazine in the folder
|
|
# where the only difference is the date
|
|
# magazine issue No 1.pdf magazine issue No 2.pdf etc. where the fuzzy match is too close
|
|
num1 = []
|
|
num2 = []
|
|
set1 = set(title1.split())
|
|
set2 = set(title2.split())
|
|
for word in set1:
|
|
# see if word coerces to an integer or a float
|
|
word = word.replace('-', '')
|
|
try:
|
|
num1.append(float(re.findall(r'\d+\.\d+', word)[0]))
|
|
except IndexError:
|
|
with contextlib.suppress(IndexError):
|
|
num1.append(int(re.findall(r'\d+', word)[0]))
|
|
for word in set2:
|
|
word = word.replace('-', '')
|
|
try:
|
|
num2.append(float(re.findall(r'\d+\.\d+', word)[0]))
|
|
except IndexError:
|
|
with contextlib.suppress(IndexError):
|
|
num2.append(int(re.findall(r'\d+', word)[0]))
|
|
if fuzzlogger:
|
|
fuzzlogger.debug(f"[{title1}][{title2}]{num1}:{num2}")
|
|
if num1 and num2 and num1 != num2:
|
|
return CONFIG.get_int('NAME_RATIO') - 1 # only difference is a number, not the same
|
|
return match_fuzz
|
|
|
|
|
|
def _find_matching_subdir(
|
|
directory: str,
|
|
target_title: str,
|
|
match_threshold: float,
|
|
book_type: str,
|
|
logger: logging.Logger,
|
|
) -> "tuple[str, float]":
|
|
"""
|
|
Search a directory for a SUBDIRECTORY that matches the target title and contains books.
|
|
|
|
Used for collections organized with each book in its own subdirectory.
|
|
Particularly common for audiobook series downloads.
|
|
|
|
Args:
|
|
directory: Parent directory to search
|
|
target_title: Normalized title to match against
|
|
match_threshold: Minimum match percentage to consider valid
|
|
book_type: Type of media to look for
|
|
logger: Logger for reporting matching debug messages
|
|
|
|
Returns:
|
|
Tuple of (matched_subdir_path, match_percent)
|
|
Returns ("", 0) if no matching subdirectory found
|
|
"""
|
|
best_match_path = ""
|
|
best_match_percent = 0
|
|
|
|
try:
|
|
items = listdir(directory)
|
|
except Exception as e:
|
|
logger.debug(f"Error listing directory {directory}: {e}")
|
|
return "", 0
|
|
|
|
for _item in items:
|
|
item = enforce_str(_item) # Ensure string for path operations
|
|
item_path = os.path.join(directory, item)
|
|
|
|
# Only consider subdirectories
|
|
if path_isdir(item_path):
|
|
try:
|
|
# Check if this subdirectory contains the target book type
|
|
subdir_files = listdir(item_path)
|
|
has_book = any(
|
|
CONFIG.is_valid_booktype(enforce_str(f), booktype=book_type)
|
|
for f in subdir_files
|
|
)
|
|
|
|
if has_book:
|
|
# Fuzzy match the subdirectory name against target
|
|
# _normalize_title now handles stripping known extensions intelligently
|
|
normalized_dirname = _normalize_title(item)
|
|
match_percent = _calculate_fuzzy_match(
|
|
target_title, normalized_dirname, logger
|
|
)
|
|
|
|
logger.debug(
|
|
f"{round(match_percent, 2)}% match (subdir) {target_title} : {normalized_dirname}"
|
|
)
|
|
|
|
# Track best match
|
|
if (
|
|
match_percent >= match_threshold
|
|
and match_percent > best_match_percent
|
|
):
|
|
best_match_path = item_path
|
|
best_match_percent = match_percent
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Error checking subdirectory {item}: {e}")
|
|
continue
|
|
|
|
return best_match_path, best_match_percent
|
|
|
|
|
|
def _find_matching_file_in_directory(
|
|
directory: str,
|
|
target_title: str,
|
|
match_threshold: float,
|
|
fuzzlogger: logging.Logger,
|
|
) -> "tuple[str, float]":
|
|
"""
|
|
Search a directory for a file that matches the target title.
|
|
|
|
Args:
|
|
directory: Directory to search
|
|
target_title: Normalized title to match against
|
|
match_threshold: Minimum match percentage to consider valid
|
|
fuzzlogger: Logger for fuzzy matching debug messages
|
|
|
|
Returns:
|
|
Tuple of (matched_file_path, match_percent)
|
|
Returns ("", 0) if no match found
|
|
"""
|
|
for _f in listdir(directory):
|
|
f = enforce_str(_f) # Ensure string for path operations
|
|
if _is_valid_media_file(f, book_type="ebook", include_archives=True):
|
|
filename_stem, _ = _tokenize_file(f)
|
|
normalized_filename = _normalize_title(filename_stem)
|
|
match_percent = _calculate_fuzzy_match(target_title, normalized_filename, fuzzlogger)
|
|
is_match = match_percent >= match_threshold
|
|
|
|
fuzzlogger.debug(
|
|
f"{round(match_percent, 2)}% match {target_title} : {normalized_filename}"
|
|
)
|
|
|
|
if is_match:
|
|
return os.path.join(directory, str(f)), match_percent
|
|
|
|
return "", 0
|
|
|
|
|
|
def _create_and_cache_cover(
|
|
dest_file: str, media_type: BookType, pagenum=1
|
|
) -> str | None:
|
|
"""
|
|
Create and cache a cover image for comics/magazines.
|
|
|
|
Args:
|
|
dest_file: Path to the media file
|
|
media_type: "comic" or "magazine"
|
|
pagenum: Page number to use for cover (default: 1)
|
|
|
|
Returns:
|
|
Cached cover path (e.g., "cache/comic/abc123.jpg") or None
|
|
"""
|
|
coverfile = create_mag_cover(dest_file, pagenum=pagenum, refresh=True)
|
|
|
|
if not coverfile:
|
|
return None
|
|
|
|
# need cache folder as "magazine" not "BookType.MAGAZINE"
|
|
sub_cache = media_type.value
|
|
myhash = uuid.uuid4().hex
|
|
hashname = os.path.join(DIRS.CACHEDIR, sub_cache, f"{myhash}.jpg")
|
|
shutil.copyfile(coverfile, hashname)
|
|
setperm(hashname)
|
|
createthumbs(hashname)
|
|
|
|
return f"cache/{sub_cache}/{myhash}.jpg"
|
|
|
|
|
|
def _update_issue_database(
|
|
db,
|
|
media_type: BookType,
|
|
book_id: str,
|
|
issue_id: str,
|
|
dest_file: str,
|
|
coverfile: str,
|
|
older: int,
|
|
aux_info="",
|
|
) -> None:
|
|
"""
|
|
Update database for comic/magazine issues.
|
|
|
|
Args:
|
|
db: Database connection
|
|
media_type: "comic" or "magazine"
|
|
book_id: Comic/Magazine ID
|
|
issue_id: Issue identifier
|
|
dest_file: Path to processed file
|
|
coverfile: Path to cached cover
|
|
older: Whether this is an older issue than current
|
|
aux_info: Additional info (used for magazines)
|
|
"""
|
|
if media_type == BookType.COMIC:
|
|
# Update comics table
|
|
control_value_dict = {"ComicID": book_id}
|
|
if older:
|
|
new_value_dict = {
|
|
"LastAcquired": today(),
|
|
"IssueStatus": CONFIG["FOUND_STATUS"],
|
|
}
|
|
else:
|
|
new_value_dict = {
|
|
"LatestIssue": issue_id,
|
|
"LastAcquired": today(),
|
|
"LatestCover": coverfile,
|
|
"IssueStatus": CONFIG["FOUND_STATUS"],
|
|
}
|
|
db.upsert("comics", new_value_dict, control_value_dict)
|
|
|
|
# Update comicissues table
|
|
control_value_dict = {"ComicID": book_id, "IssueID": issue_id}
|
|
new_value_dict = {
|
|
"IssueAcquired": today(),
|
|
"IssueFile": dest_file,
|
|
"Cover": coverfile,
|
|
}
|
|
db.upsert("comicissues", new_value_dict, control_value_dict)
|
|
|
|
elif media_type == BookType.MAGAZINE:
|
|
# Create issue ID
|
|
issueid = create_id(f"{book_id} {aux_info}")
|
|
|
|
# Update issues table
|
|
control_value_dict = {"Title": book_id, "IssueDate": aux_info}
|
|
new_value_dict = {
|
|
"IssueAcquired": today(),
|
|
"IssueFile": dest_file,
|
|
"IssueID": issueid,
|
|
"Cover": coverfile,
|
|
}
|
|
db.upsert("issues", new_value_dict, control_value_dict)
|
|
|
|
# Update magazines table
|
|
control_value_dict = {"Title": book_id}
|
|
if older:
|
|
new_value_dict = {
|
|
"LastAcquired": today(),
|
|
"IssueStatus": CONFIG["FOUND_STATUS"],
|
|
}
|
|
else:
|
|
new_value_dict = {
|
|
"LastAcquired": today(),
|
|
"IssueStatus": CONFIG["FOUND_STATUS"],
|
|
"IssueDate": aux_info,
|
|
"LatestCover": coverfile,
|
|
}
|
|
db.upsert("magazines", new_value_dict, control_value_dict)
|
|
|
|
|
|
def _should_delete_processed_files(book_path, download_dir) -> "tuple[bool, str]":
|
|
"""
|
|
Determine if processed files should be deleted based on configuration.
|
|
|
|
Args:
|
|
book_path: Path to the processed files
|
|
download_dir: Root download directory
|
|
|
|
Returns:
|
|
Tuple of (should_delete, deletion_path)
|
|
- should_delete: Whether files should be deleted
|
|
- deletion_path: The path that should be deleted (may differ from book_path)
|
|
"""
|
|
# Always delete unpacked files
|
|
if ".unpack" in book_path:
|
|
book_path = f"{book_path.split('.unpack')[0]}.unpack"
|
|
return True, book_path
|
|
|
|
# Don't delete if DESTINATION_COPY is enabled (keep source files)
|
|
if CONFIG.get_bool("DESTINATION_COPY"):
|
|
return False, book_path
|
|
|
|
# Don't delete if path is the download root directory
|
|
if book_path == download_dir.rstrip(os.sep):
|
|
return False, book_path
|
|
|
|
# Walk up subdirectories to find the top-level folder to delete
|
|
deletion_path = book_path
|
|
if deletion_path.startswith(download_dir) and ".unpack" not in deletion_path:
|
|
while os.path.dirname(deletion_path) != download_dir.rstrip(os.sep):
|
|
deletion_path = os.path.dirname(deletion_path)
|
|
|
|
return True, deletion_path
|
|
|
|
|
|
def _cleanup_successful_download(book_path, download_dir, book_state, logger) -> None:
|
|
"""
|
|
Clean up files after successful processing.
|
|
|
|
Args:
|
|
book_path: Path to processed files
|
|
download_dir: Root download directory
|
|
book_state: BookState instance with download info
|
|
logger: Logger instance
|
|
"""
|
|
should_delete, deletion_path = _should_delete_processed_files(
|
|
book_path, download_dir
|
|
)
|
|
|
|
logger.debug(f"To Delete: {deletion_path} {should_delete}")
|
|
|
|
if should_delete:
|
|
try:
|
|
shutil.rmtree(deletion_path, ignore_errors=True)
|
|
logger.debug(
|
|
f"Deleted {deletion_path} for {book_state.download_title}, "
|
|
f"{book_state.mode_type} from {book_state.source}"
|
|
)
|
|
except Exception as why:
|
|
logger.warning(
|
|
f"Unable to remove {deletion_path}, {type(why).__name__} {why!s}"
|
|
)
|
|
elif CONFIG.get_bool("DESTINATION_COPY"):
|
|
logger.debug(f"Not removing {deletion_path} as Keep Files is set")
|
|
else:
|
|
logger.debug(f"Not removing {deletion_path} as in download root")
|
|
|
|
|
|
def _send_download_notifications(
|
|
book_state: BookState, book_type: str, global_name: str, notification_id: str
|
|
) -> None:
|
|
"""
|
|
Send all notifications for a successful download.
|
|
|
|
Args:
|
|
book_state: BookState instance with download info
|
|
book_type: Type of media
|
|
global_name: Formatted name for the downloaded item
|
|
notification_id: Book/Issue ID for notifications
|
|
"""
|
|
dispname = book_state.get_display_name(CONFIG)
|
|
|
|
custom_notify_download(f"{book_state.book_id} {book_type}")
|
|
notify_download(
|
|
f"{book_type} {global_name} from {dispname} at {now()}",
|
|
notification_id,
|
|
)
|
|
mailing_list(book_type, global_name, notification_id)
|
|
_update_downloads_provider_count(book_state.download_provider)
|
|
|
|
|
|
def _handle_failed_processing(
|
|
book_state: BookState,
|
|
book_path: str,
|
|
metadata: BookMetadata,
|
|
dest_file: str,
|
|
db,
|
|
logger: logging.Logger,
|
|
) -> None:
|
|
"""
|
|
Handle cleanup and notifications for failed processing.
|
|
|
|
Args:
|
|
book_state: BookState instance with download info
|
|
book_path: Path to processed files
|
|
metadata: BookMetadata object with book information
|
|
dest_file: Error message or destination file path
|
|
db: Database connection
|
|
logger: Logger instance
|
|
"""
|
|
global_name = metadata.global_name
|
|
book_type_enum = metadata.book_type_enum
|
|
book_type_str = metadata.book_type
|
|
logger.error(
|
|
f"Postprocessing for {global_name!r} has failed: {dest_file!r}"
|
|
)
|
|
|
|
# Mark failure in BookState
|
|
book_state.mark_failed("destination", dest_file)
|
|
|
|
# Send failure notifications
|
|
dispname = book_state.get_display_name(CONFIG)
|
|
custom_notify_snatch(f"{book_state.book_id} {book_type_str}", fail=True)
|
|
notify_snatch(
|
|
f"{book_type_str} {global_name} from {dispname} at {now()}",
|
|
fail=True,
|
|
)
|
|
|
|
# Update database status to Failed
|
|
control_value_dict = {
|
|
"NZBurl": book_state.download_url,
|
|
"Status": "Snatched",
|
|
}
|
|
new_value_dict = {
|
|
"Status": "Failed",
|
|
"DLResult": enforce_str(make_unicode(dest_file)),
|
|
"NZBDate": now(),
|
|
}
|
|
db.upsert("wanted", new_value_dict, control_value_dict)
|
|
|
|
# Reset book status to Wanted so we can try a different version
|
|
if book_type_enum == BookType.EBOOK:
|
|
db.action(
|
|
"UPDATE books SET status='Wanted' WHERE BookID=?",
|
|
(book_state.book_id,),
|
|
)
|
|
elif book_type_enum == BookType.AUDIOBOOK:
|
|
db.action(
|
|
"UPDATE books SET audiostatus='Wanted' WHERE BookID=?",
|
|
(book_state.book_id,),
|
|
)
|
|
|
|
# Handle failed download cleanup
|
|
_cleanup_failed_download(book_path, logger)
|
|
|
|
|
|
def _cleanup_failed_download(book_path, logger) -> None:
|
|
"""
|
|
Clean up files from a failed download.
|
|
|
|
Either deletes the files or moves them to a .fail directory based on config.
|
|
|
|
Args:
|
|
book_path: Path to the failed download files
|
|
logger: Logger instance
|
|
"""
|
|
if CONFIG.get_bool("DEL_DOWNLOADFAILED"):
|
|
logger.debug(f"Deleting {book_path}")
|
|
shutil.rmtree(book_path, ignore_errors=True)
|
|
else:
|
|
# Move to .fail directory for manual inspection
|
|
fail_path = f"{book_path}.fail"
|
|
shutil.rmtree(fail_path, ignore_errors=True)
|
|
|
|
try:
|
|
_ = safe_move(book_path, fail_path)
|
|
logger.warning(f"Residual files remain in {fail_path}")
|
|
except Exception as why:
|
|
logger.error(
|
|
f"Unable to rename {book_path!r}, {type(why).__name__} {why!s}"
|
|
)
|
|
|
|
# Diagnose permission issues
|
|
if not os.access(syspath(book_path), os.R_OK):
|
|
logger.error(f"{book_path!r} is not readable")
|
|
if not os.access(syspath(book_path), os.W_OK):
|
|
logger.error(f"{book_path!r} is not writeable")
|
|
if not os.access(syspath(book_path), os.X_OK):
|
|
logger.error(f"{book_path!r} is not executable")
|
|
|
|
# Test parent directory writability
|
|
parent = os.path.dirname(book_path)
|
|
try:
|
|
test_file = os.path.join(parent, "ll_temp")
|
|
with open(syspath(test_file), "w", encoding="utf-8") as f:
|
|
f.write("test")
|
|
remove_file(test_file)
|
|
except Exception as why:
|
|
logger.error(f"Parent Directory {parent} is not writeable: {why}")
|
|
|
|
logger.warning(f"Residual files remain in {book_path}")
|
|
|
|
|
|
def _try_match_candidate_file(
|
|
candidate_file,
|
|
book_state: BookState,
|
|
download_dir: str,
|
|
match_threshold: float,
|
|
logger: logging.Logger,
|
|
fuzzlogger: logging.Logger,
|
|
) -> "tuple[bool, float]":
|
|
"""
|
|
Try to match a candidate file/folder against the target book.
|
|
|
|
Performs fuzzy matching on filename, and if no match but it's a directory,
|
|
drills down to search for matches:
|
|
1. First tries matching subdirectories (for series/collection folders)
|
|
2. Then tries matching files at root level (for flat collections)
|
|
|
|
This supports:
|
|
- Audiobook series with each book in subdirectory
|
|
- Ebook series with each book in subdirectory
|
|
- Flat ebook/audiobook collections (files at root)
|
|
|
|
Args:
|
|
candidate_file: Filename in download directory
|
|
book_state: BookState being matched
|
|
download_dir: Download directory path
|
|
match_threshold: Minimum match percentage
|
|
logger: Logger for general logging
|
|
fuzzlogger: Logger for fuzzy matching
|
|
|
|
Returns:
|
|
Tuple of (is_match, match_percent)
|
|
"""
|
|
book_state.update_candidate(os.path.join(download_dir, candidate_file))
|
|
|
|
fuzzlogger.debug(f"Checking candidate {candidate_file}")
|
|
filename_stem, extn = _tokenize_file(candidate_file)
|
|
|
|
skipped_extensions = get_list(CONFIG["SKIPPED_EXT"])
|
|
if extn in skipped_extensions:
|
|
logger.debug(f"Skipping {candidate_file}, extension not considered")
|
|
return False, 0
|
|
|
|
# Fuzzy match the candidate filename
|
|
normalized_candidate = _normalize_title(filename_stem)
|
|
match_percent = _calculate_fuzzy_match(
|
|
book_state.download_title, normalized_candidate, fuzzlogger
|
|
)
|
|
is_match = match_percent >= match_threshold
|
|
|
|
fuzzlogger.debug(
|
|
f"{round(match_percent, 2)}% match {book_state.download_title} : {normalized_candidate}"
|
|
)
|
|
|
|
# If no match and it's a directory, drill down to find the right book
|
|
if not is_match and path_isdir(book_state.candidate_ptr or ""):
|
|
logger.debug(f"{candidate_file} is a directory, checking contents")
|
|
|
|
book_type_str = book_state.get_book_type_str()
|
|
if not book_type_str:
|
|
return False, 0
|
|
|
|
# Use actual book title for drill-down if available, otherwise use download title
|
|
# This is critical for collections where download name != individual book name
|
|
search_title = (
|
|
book_state.book_title
|
|
if book_state.book_title
|
|
else book_state.download_title
|
|
)
|
|
|
|
# _normalize_title now handles stripping known extensions intelligently
|
|
search_title = _normalize_title(search_title)
|
|
|
|
# Try 1: Match subdirectories (for collections organized in folders)
|
|
# This is common for audiobook series and some ebook collections
|
|
matched_subdir, subdir_match_percent = _find_matching_subdir(
|
|
book_state.candidate_ptr or "",
|
|
search_title,
|
|
match_threshold,
|
|
book_type_str,
|
|
fuzzlogger,
|
|
)
|
|
|
|
if matched_subdir:
|
|
logger.debug(
|
|
f"Found matching subdirectory: {os.path.basename(matched_subdir)}"
|
|
)
|
|
book_state.update_candidate(matched_subdir)
|
|
is_match = True
|
|
match_percent = subdir_match_percent
|
|
else:
|
|
# Try 2: Match files at root level (for collections with files in one directory)
|
|
matched_file, file_match_percent = _find_matching_file_in_directory(
|
|
book_state.candidate_ptr or "",
|
|
search_title,
|
|
match_threshold,
|
|
fuzzlogger,
|
|
)
|
|
|
|
if matched_file:
|
|
logger.debug(f"Found matching file: {os.path.basename(matched_file)}")
|
|
book_state.update_candidate(matched_file)
|
|
is_match = True
|
|
match_percent = file_match_percent
|
|
|
|
return is_match, match_percent
|
|
|
|
|
|
def _process_matched_directory(
|
|
book_state: BookState,
|
|
download_dir: str,
|
|
match_percent: float,
|
|
logger: logging.Logger,
|
|
fuzzlogger: logging.Logger,
|
|
) -> "tuple[bool, str]":
|
|
"""
|
|
Process a matched file or directory to extract and validate the media file.
|
|
|
|
Handles:
|
|
- Single files in download root (isolates to .unpack subdirectory)
|
|
- Finding valid files in directories
|
|
- Extracting archives if no valid files found
|
|
- Handling multi-book collections
|
|
- Final validation
|
|
|
|
For single files in download root, selectively transfers ONLY files matching
|
|
the book's filename to an isolated .unpack subdirectory to protect other files.
|
|
|
|
Args:
|
|
book_state: BookState with candidate_ptr pointing to matched file/directory
|
|
download_dir: Download directory path
|
|
match_percent: Initial match percentage
|
|
logger: Logger instance
|
|
fuzzlogger: Fuzzy match logger
|
|
|
|
Returns:
|
|
Tuple of (is_valid, skip_reason)
|
|
"""
|
|
candidate_ptr = book_state.candidate_ptr or ""
|
|
if not path_isdir(candidate_ptr):
|
|
# It's a single file - check if it's in download root
|
|
file_dir = os.path.dirname(candidate_ptr)
|
|
|
|
if file_dir == download_dir.rstrip(os.sep):
|
|
# Single file in download root - need to isolate it to protect other files
|
|
logger.debug(f"Single file in download root: {candidate_ptr}")
|
|
|
|
fname = os.path.basename(candidate_ptr)
|
|
fname_prefix = os.path.splitext(fname)[0]
|
|
|
|
# Remove trailing noise characters
|
|
while fname_prefix and fname_prefix[-1] in "_. ":
|
|
fname_prefix = fname_prefix[:-1]
|
|
|
|
# Determine if we should copy or move
|
|
if CONFIG.get_bool("DESTINATION_COPY") or (
|
|
book_state.is_torrent() and CONFIG.get_bool("KEEP_SEEDING")
|
|
):
|
|
copy_files = True
|
|
else:
|
|
copy_files = False
|
|
|
|
# Create isolated .unpack directory
|
|
targetdir = os.path.join(download_dir, f"{md5_utf8(fname_prefix)[-8:]}.unpack")
|
|
if not make_dirs(targetdir, new=True):
|
|
return False, f"Failed to create isolation directory {targetdir}"
|
|
|
|
# Selectively transfer ONLY files matching this book's name
|
|
cnt = _transfer_matching_files(
|
|
download_dir, targetdir, fname_prefix, copy=copy_files
|
|
)
|
|
|
|
if cnt:
|
|
# Successfully isolated - update candidate to the folder
|
|
book_state.update_candidate(targetdir)
|
|
logger.debug(f"Isolated {cnt} file(s) to {targetdir}")
|
|
return True, "" # Success - file isolated to .unpack folder
|
|
# No files transferred - cleanup empty directory
|
|
try:
|
|
os.rmdir(targetdir)
|
|
except OSError:
|
|
contextlib.suppress(OSError)
|
|
return False, "Failed to isolate file to subdirectory"
|
|
|
|
# File not in root - update candidate_ptr to parent directory
|
|
# process_destination expects a directory, not a file
|
|
parent_dir = os.path.dirname(book_state.candidate_ptr or "")
|
|
book_state.update_candidate(parent_dir)
|
|
logger.debug(f"Updated candidate from file to parent directory: {parent_dir}")
|
|
return True, ""
|
|
|
|
logger.debug(
|
|
f"Found folder ({round(match_percent, 2)}%) [{book_state.candidate_ptr}] "
|
|
f"for {book_state.get_book_type_str()} {book_state.download_title}"
|
|
)
|
|
|
|
# First pass: Look for valid files
|
|
valid_file_path = _find_valid_file_in_directory(
|
|
book_state.candidate_ptr, book_type=book_state.get_book_type_str()
|
|
)
|
|
|
|
if valid_file_path:
|
|
book_state.update_candidate(os.path.dirname(valid_file_path))
|
|
else:
|
|
# No valid file, try extracting archives
|
|
new_candidate, archives_extracted = _extract_archives_in_directory(
|
|
book_state.candidate_ptr, download_dir, book_state.download_title
|
|
)
|
|
book_state.update_candidate(new_candidate)
|
|
|
|
# If we extracted archives, search again
|
|
if archives_extracted:
|
|
valid_file_path = _find_valid_file_in_directory(
|
|
book_state.candidate_ptr,
|
|
book_type=book_state.get_book_type_str(),
|
|
recurse=True,
|
|
)
|
|
|
|
if not valid_file_path:
|
|
logger.debug("No valid file after extraction")
|
|
return False, "No valid file found after extraction"
|
|
else:
|
|
return False, "No valid file or archives found"
|
|
|
|
# Handle multi-book collections for eBooks
|
|
# If folder contains multiple books, extract ONLY the best matching one
|
|
book_type_enum = book_state.get_book_type_enum()
|
|
if book_type_enum == BookType.EBOOK:
|
|
mult = multibook(book_state.candidate_ptr, recurse=True)
|
|
if mult:
|
|
# Use actual book title for better matching in collections
|
|
search_title = (
|
|
book_state.book_title
|
|
if book_state.book_title
|
|
else book_state.download_title
|
|
)
|
|
|
|
# Found collection - extract best match to isolated directory
|
|
extracted_path, skipped, skip_reason = _extract_best_match_from_collection(
|
|
book_state.candidate_ptr,
|
|
search_title,
|
|
download_dir,
|
|
logger,
|
|
fuzzlogger,
|
|
)
|
|
if skipped:
|
|
return False, skip_reason
|
|
book_state.update_candidate(extracted_path)
|
|
else:
|
|
# For non ebook types, just find the file
|
|
book_type_str = book_state.get_book_type_str()
|
|
if book_type_str:
|
|
result = book_file(
|
|
book_state.candidate_ptr or "",
|
|
book_type_str,
|
|
recurse=True,
|
|
config=CONFIG,
|
|
)
|
|
if result:
|
|
book_state.update_candidate(os.path.dirname(result))
|
|
else:
|
|
return False, f"No {book_type_enum.value} found"
|
|
|
|
# Final validation
|
|
is_valid, skip_reason = _validate_candidate_directory(
|
|
book_state.candidate_ptr, logger
|
|
)
|
|
return is_valid, skip_reason
|
|
|
|
|
|
def _find_best_match_in_downloads(
|
|
book_state,
|
|
all_downloads,
|
|
match_threshold,
|
|
logger,
|
|
fuzzlogger,
|
|
) -> "tuple[float, str]":
|
|
"""
|
|
Search through all downloads to find the best match for a book.
|
|
|
|
This is the core matching loop (Matching Stage|Second Pass i.e., the Fallback Search) that:
|
|
1. Iterates through all candidates from all directories
|
|
2. Fuzzy matches each candidate
|
|
3. Searches inside directories if needed
|
|
4. Processes matched directories (extract archives, handle collections)
|
|
5. Tracks all matches and returns the best one
|
|
|
|
Args:
|
|
book_state: BookState to find matches for
|
|
all_downloads: List of (parent_dir, filename) tuples from all directories
|
|
match_threshold: Minimum match percentage
|
|
logger: Logger instance
|
|
fuzzlogger: Fuzzy match logger
|
|
|
|
Returns:
|
|
Tuple of (best_match_percent, skip_reason)
|
|
Updates book_state.candidate_ptr to best match location
|
|
"""
|
|
matches = []
|
|
if not all_downloads:
|
|
return 0, "No downloads found"
|
|
|
|
book_type = book_state.get_book_type_str()
|
|
logger.debug(f"Fuzzy searching for {book_type} across all downloads")
|
|
|
|
for parent_dir, candidate_file in all_downloads:
|
|
# Try to match this candidate
|
|
is_match, match_percent = _try_match_candidate_file(
|
|
candidate_file,
|
|
book_state,
|
|
parent_dir,
|
|
match_threshold,
|
|
logger,
|
|
fuzzlogger,
|
|
)
|
|
|
|
if is_match:
|
|
# Process matched directory (extract archives, handle collections, validate)
|
|
is_valid, skip_reason = _process_matched_directory(
|
|
book_state,
|
|
parent_dir,
|
|
match_percent,
|
|
logger,
|
|
fuzzlogger,
|
|
)
|
|
|
|
if is_valid:
|
|
matches.append([match_percent, book_state.candidate_ptr])
|
|
if match_percent == 100:
|
|
# Perfect match, no need to keep searching
|
|
break
|
|
else:
|
|
book_state.mark_skipped(skip_reason)
|
|
# Even non-matches get tracked to report closest match
|
|
elif match_percent > 0:
|
|
matches.append([match_percent, book_state.candidate_ptr])
|
|
|
|
# Find the best match
|
|
if not matches:
|
|
return 0, "No matches found"
|
|
|
|
highest = max(matches, key=lambda x: x[0])
|
|
best_match_percent = highest[0]
|
|
best_candidate_ptr = highest[1]
|
|
|
|
book_state.update_candidate(best_candidate_ptr)
|
|
|
|
if best_match_percent >= match_threshold:
|
|
logger.debug(
|
|
f"Found match ({round(best_match_percent, 2)}%): {best_candidate_ptr} "
|
|
f"for {book_state} {book_state.download_title}"
|
|
)
|
|
return best_match_percent, ""
|
|
logger.debug(
|
|
f"Closest match ({round(best_match_percent, 2)}%): {best_candidate_ptr}"
|
|
)
|
|
for match in matches:
|
|
fuzzlogger.debug(f"Match: {round(match[0], 2)}% {match[1]}")
|
|
return best_match_percent, "No match above threshold"
|
|
|
|
|
|
def _process_book_post(
|
|
metadata: EbookMetadata,
|
|
dest_file: str,
|
|
book_id: str,
|
|
) -> str:
|
|
"""
|
|
Handle post-processing for ebooks and audiobooks.
|
|
|
|
Args:
|
|
metadata: EbookMetadata object
|
|
dest_file: Destination file path
|
|
book_id: Book ID
|
|
|
|
Returns:
|
|
book_name
|
|
"""
|
|
if metadata.book_name and dest_file:
|
|
_process_extras(dest_file, metadata.global_name, book_id, metadata.book_type)
|
|
|
|
return metadata.book_name
|
|
|
|
|
|
def _process_comic_post(
|
|
metadata: ComicMetadata, dest_file: str, mostrecentissue: str, db
|
|
) -> "tuple[str, str]":
|
|
"""
|
|
Handle post-processing for comics.
|
|
|
|
Args:
|
|
metadata: ComicMetadata object
|
|
dest_file: Destination file path
|
|
mostrecentissue: Most recent issue date for comparison
|
|
db: Database connection
|
|
|
|
Returns:
|
|
Tuple of (bookname, issueid) for notification purposes
|
|
"""
|
|
comicid = metadata.comic_id
|
|
issueid = metadata.issue_id
|
|
|
|
if comicid:
|
|
# Determine if this is an older issue
|
|
older = int(mostrecentissue) > int(issueid) if mostrecentissue else False
|
|
|
|
# Create and cache cover
|
|
coverfile = _create_and_cache_cover(dest_file, BookType.COMIC, pagenum=1) or ""
|
|
|
|
# Update database
|
|
_update_issue_database(
|
|
db, BookType.COMIC, comicid, issueid, dest_file, coverfile, older
|
|
)
|
|
|
|
return "", issueid
|
|
|
|
|
|
def _process_magazine_post(
|
|
metadata: MagazineMetadata, dest_file: str, mostrecentissue: str, book_state, db
|
|
) -> "tuple[str, str]":
|
|
"""
|
|
Handle post-processing for magazines.
|
|
|
|
Args:
|
|
metadata: MagazineMetadata object
|
|
dest_file: Destination file path
|
|
mostrecentissue: Most recent issue date for comparison
|
|
book_state: BookState for aux_info access
|
|
db: Database connection
|
|
|
|
Returns:
|
|
Tuple of (bookname, issueid) for notification purposes
|
|
"""
|
|
issueid = metadata.issue_id
|
|
|
|
if mostrecentissue:
|
|
if mostrecentissue.isdigit() and str(book_state.aux_info).isdigit():
|
|
older = int(mostrecentissue) > int(book_state.aux_info)
|
|
else:
|
|
older = mostrecentissue > book_state.aux_info
|
|
else:
|
|
older = False
|
|
|
|
# Get cover page from metadata
|
|
if CONFIG.get_bool("SWAP_COVERPAGE"):
|
|
coverpage = 1
|
|
else:
|
|
coverpage = metadata.cover_page if metadata.cover_page else 1
|
|
|
|
coverfile = (
|
|
_create_and_cache_cover(dest_file, BookType.MAGAZINE, pagenum=coverpage) or ""
|
|
)
|
|
|
|
# Update database
|
|
_update_issue_database(
|
|
db,
|
|
BookType.MAGAZINE,
|
|
book_state.book_id,
|
|
book_state.aux_info,
|
|
dest_file,
|
|
coverfile,
|
|
older,
|
|
book_state.aux_info,
|
|
)
|
|
|
|
# Auto-add if enabled
|
|
if CONFIG["IMP_AUTOADDMAG"]:
|
|
dest_path = os.path.dirname(dest_file)
|
|
_process_auto_add(dest_path, book_type_enum=BookType.MAGAZINE)
|
|
|
|
return "", issueid
|
|
|
|
|
|
def _process_successful_download(
|
|
book_state: BookState,
|
|
metadata: BookMetadata,
|
|
dest_file,
|
|
book_path,
|
|
download_dir,
|
|
mostrecentissue,
|
|
ignoreclient: bool,
|
|
db,
|
|
logger: logging.Logger,
|
|
) -> int:
|
|
"""
|
|
Handle all post-processing for a successful download.
|
|
|
|
This includes:
|
|
- Processing extras (for books)
|
|
- Creating covers (for comics/magazines)
|
|
- Updating database
|
|
- Deleting from download client
|
|
- Cleaning up files
|
|
- Sending notifications
|
|
|
|
Args:
|
|
book_state: BookState instance
|
|
dest_file: Destination file path
|
|
book_path: Processing path
|
|
download_dir: Download directory
|
|
metadata: BookMetadata object with book information
|
|
mostrecentissue: Most recent issue (for comics/magazines)
|
|
ignoreclient: Whether to skip download client interaction
|
|
db: Database connection
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
1 if successfully processed, 0 otherwise
|
|
"""
|
|
global_name = metadata.global_name
|
|
book_type = metadata.book_type
|
|
logger.debug(
|
|
f"Processed {book_state.mode_type} ({book_path}): {global_name}, {book_state.download_url}"
|
|
)
|
|
dest_file = enforce_str(make_unicode(dest_file))
|
|
|
|
# Update wanted table status to Processed
|
|
control_value_dict = {
|
|
"NZBurl": book_state.download_url,
|
|
"Status": "Snatched",
|
|
}
|
|
new_value_dict = {
|
|
"Status": "Processed",
|
|
"NZBDate": now(),
|
|
"DLResult": dest_file,
|
|
}
|
|
db.upsert("wanted", new_value_dict, control_value_dict)
|
|
|
|
# Type-specific post-processing
|
|
if isinstance(metadata, EbookMetadata):
|
|
bookname = _process_book_post(metadata, dest_file, book_state.book_id)
|
|
issueid = 0
|
|
elif isinstance(metadata, ComicMetadata):
|
|
bookname, issueid = _process_comic_post(
|
|
metadata, dest_file, mostrecentissue, db
|
|
)
|
|
elif isinstance(metadata, MagazineMetadata):
|
|
bookname, issueid = _process_magazine_post(
|
|
metadata, dest_file, mostrecentissue, book_state, db
|
|
)
|
|
else:
|
|
# Unknown metadata type (should never happen)
|
|
bookname = ""
|
|
issueid = 0
|
|
|
|
# Delete from download client if appropriate
|
|
to_delete = True
|
|
if ignoreclient is False and to_delete:
|
|
if book_state.can_delete_from_client():
|
|
book_state.progress, book_state.finished = get_download_progress(
|
|
book_state.source, book_state.download_id
|
|
)
|
|
logger.debug(
|
|
f"Progress for {book_state.download_title} {book_state.progress}/{book_state.finished}"
|
|
)
|
|
|
|
if isinstance(book_state.progress, int) and book_state.progress == 100:
|
|
_update_download_status(book_state, db, logger)
|
|
elif isinstance(book_state.progress, int) and book_state.progress < 0:
|
|
logger.debug(
|
|
f"{book_state.download_title} not found at {book_state.source}"
|
|
)
|
|
elif not book_state.source:
|
|
logger.warning(f"Unable to remove {book_state.download_title}, no source")
|
|
elif book_state.source != 'DIRECT': # direct sources don't have anything to remove
|
|
logger.warning(
|
|
f"Unable to remove {book_state.download_title} from {book_state.source}, no DownloadID"
|
|
)
|
|
|
|
# Clean up source files
|
|
_cleanup_successful_download(book_path, download_dir, book_state, logger)
|
|
|
|
logger.info(f"Successfully processed:{global_name}")
|
|
|
|
# Send notifications
|
|
notification_id = book_state.book_id if bookname else str(issueid)
|
|
_send_download_notifications(book_state, book_type, global_name, notification_id)
|
|
|
|
# Mark as successfully processed
|
|
book_state.mark_processed()
|
|
|
|
return 1
|
|
|
|
|
|
def _process_book_after_matching(
|
|
book_state: BookState,
|
|
parent_dir: str,
|
|
ignoreclient: bool,
|
|
db,
|
|
logger: logging.Logger,
|
|
):
|
|
"""
|
|
Process a book after candidate location has been found and validated.
|
|
|
|
This is the common path used after both targeted and fallback search.
|
|
Handles metadata retrieval, destination processing, and post-processing.
|
|
|
|
Args:
|
|
book_state: BookState with candidate_ptr set to valid location
|
|
parent_dir: Parent directory for processing context
|
|
ignoreclient: Whether to skip download client interaction
|
|
db: Database connection
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
Number of books processed (1 if successful, 0 if failed)
|
|
"""
|
|
# Retrieve metadata and prepare destination paths
|
|
if book_state.is_book(): # eBook or Audiobook
|
|
metadata = prepare_book_metadata(
|
|
book_state.book_id, book_state.get_book_type_str(), db
|
|
)
|
|
if not metadata:
|
|
logger.warning(f"Unable to retrieve metadata for {book_state.book_id}")
|
|
book_state.mark_failed(
|
|
"metadata", f"Book {book_state.book_id} not found in database"
|
|
)
|
|
return 0
|
|
mostrecentissue = ""
|
|
|
|
elif book_state.is_magazine(): # Magazine
|
|
metadata = prepare_magazine_metadata(
|
|
book_state.book_id, book_state.aux_info, db
|
|
)
|
|
if not metadata or not metadata.dest_path:
|
|
logger.warning(
|
|
f"Unable to retrieve magazine metadata for {book_state.book_id}"
|
|
)
|
|
book_state.mark_failed(
|
|
"metadata", f"Magazine {book_state.book_id} not found in database"
|
|
)
|
|
return 0
|
|
if not make_dirs(metadata.dest_path):
|
|
logger.warning(f"Unable to create directory {metadata.dest_path}")
|
|
book_state.mark_failed(
|
|
"metadata", f"Cannot create directory {metadata.dest_path}"
|
|
)
|
|
return 0
|
|
mostrecentissue = metadata.most_recent_issue
|
|
|
|
else: # Comic
|
|
metadata = prepare_comic_metadata(book_state.book_id, db)
|
|
if not metadata:
|
|
emsg = f'Nothing in database matching "{book_state.book_id}"'
|
|
logger.debug(emsg)
|
|
book_state.mark_failed("metadata", emsg)
|
|
control_value_dict = {"BookID": book_state.book_id, "Status": "Snatched"}
|
|
new_value_dict = {"Status": "Failed", "NZBDate": now(), "DLResult": emsg}
|
|
db.upsert("wanted", new_value_dict, control_value_dict)
|
|
return 0
|
|
if not make_dirs(metadata.dest_path):
|
|
logger.warning(f"Unable to create directory {metadata.dest_path}")
|
|
book_state.mark_failed(
|
|
"metadata", f"Cannot create directory {metadata.dest_path}"
|
|
)
|
|
return 0
|
|
mostrecentissue = metadata.most_recent_issue
|
|
logger.debug(f"Processing {metadata.title} issue {metadata.issue_date}")
|
|
|
|
# Process the downloaded files and move them to the target destination
|
|
success, dest_file, book_path = _process_destination(
|
|
book_metadata=metadata,
|
|
book_path=book_state.candidate_ptr or "",
|
|
logger=logger,
|
|
mode=book_state.mode_type,
|
|
)
|
|
|
|
if success:
|
|
# Handle successful processing
|
|
return _process_successful_download(
|
|
book_state,
|
|
metadata,
|
|
dest_file,
|
|
book_path,
|
|
parent_dir,
|
|
mostrecentissue,
|
|
ignoreclient,
|
|
db,
|
|
logger,
|
|
)
|
|
# Handle failed processing
|
|
_handle_failed_processing(
|
|
book_state, book_path, metadata, dest_file, db, logger
|
|
)
|
|
return 0
|
|
|
|
|
|
def _process_snatched_book(
|
|
book_state,
|
|
all_downloads,
|
|
ignoreclient: bool,
|
|
db,
|
|
logger,
|
|
fuzzlogger,
|
|
) -> int:
|
|
"""
|
|
Process a single snatched book using fallback fuzzy search (Matching Stage|Second pass).
|
|
|
|
This is called when targeted search fails or isn't available.
|
|
Fuzzy matches against all compiled downloads from all directories.
|
|
|
|
Args:
|
|
book_state: BookState instance for tracking the item being processed
|
|
all_downloads: List of (parent_dir, filename) tuples from all directories
|
|
ignoreclient: Whether to skip download client interaction
|
|
db: Database connection
|
|
logger: Logger instance
|
|
fuzzlogger: Fuzzy match logger
|
|
|
|
Returns:
|
|
Number of items successfully processed (0 or 1)
|
|
"""
|
|
match_threshold = CONFIG.get_int("DLOAD_RATIO")
|
|
|
|
# Find best matching candidate across all downloads
|
|
match_percent, skip_reason = _find_best_match_in_downloads(
|
|
book_state,
|
|
all_downloads,
|
|
match_threshold,
|
|
logger,
|
|
fuzzlogger,
|
|
)
|
|
|
|
if match_percent < match_threshold:
|
|
logger.debug(f"No match found for {book_state.download_title}: {skip_reason}")
|
|
book_state.mark_failed(
|
|
"matching",
|
|
f"No match above {match_threshold}% threshold (best: {match_percent}%)",
|
|
)
|
|
return 0
|
|
|
|
# Match found - derive parent_dir from matched candidate_ptr
|
|
parent_dir = os.path.dirname(book_state.candidate_ptr.rstrip(os.sep))
|
|
if not path_isdir(book_state.candidate_ptr):
|
|
# It's a file, get its parent directory
|
|
parent_dir = os.path.dirname(book_state.candidate_ptr)
|
|
|
|
# Continue with common post-matching processing
|
|
return _process_book_after_matching(
|
|
book_state, parent_dir, ignoreclient, db, logger
|
|
)
|
|
|
|
|
|
def _calculate_download_age(snatched_date: str) -> tuple:
|
|
"""
|
|
Calculate time elapsed since download was snatched.
|
|
|
|
Args:
|
|
snatched_date: NZBdate string in format 'YYYY-MM-DD HH:MM:SS'
|
|
|
|
Returns:
|
|
Tuple of (hours, minutes, total_seconds)
|
|
Returns (0, 0, 0) if date parsing fails
|
|
"""
|
|
try:
|
|
when_snatched = datetime.datetime.strptime(snatched_date, "%Y-%m-%d %H:%M:%S")
|
|
timenow = datetime.datetime.now()
|
|
td = timenow - when_snatched
|
|
diff = td.total_seconds() # time difference in seconds
|
|
except ValueError:
|
|
diff = 0
|
|
|
|
hours = int(diff / 3600)
|
|
mins = int(diff / 60)
|
|
|
|
return hours, mins, diff
|
|
|
|
|
|
def _handle_seeding_status(
|
|
book_state: BookState,
|
|
keep_seeding: bool,
|
|
wait_for_seeding: bool,
|
|
db,
|
|
logger: logging.Logger,
|
|
) -> bool:
|
|
"""
|
|
Handle downloads in 'Seeding' status.
|
|
|
|
Checks if seeding is complete and handles:
|
|
- Torrents removed from client (progress < 0)
|
|
- Seeding completion based on config
|
|
- File cleanup after seeding
|
|
- Database status updates
|
|
|
|
Args:
|
|
book_state: SimpleNamespace with download state
|
|
keep_seeding: CONFIG['KEEP_SEEDING'] value
|
|
wait_for_seeding: CONFIG['SEED_WAIT'] value
|
|
db: Database connection
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
True if item should be skipped (still seeding), False otherwise
|
|
"""
|
|
logger.debug(
|
|
f"Progress:{book_state.progress} Finished:{book_state.finished} "
|
|
f"Waiting:{wait_for_seeding} Keep Seeding: {keep_seeding}"
|
|
)
|
|
|
|
# Handle case where torrent not found in client (was removed after seeding)
|
|
if isinstance(book_state.progress, int) and book_state.progress < 0:
|
|
# Torrent not found in client - it was removed after seeding completed
|
|
# Files should still be on disk, but file processing loop has already run
|
|
# Change status to Snatched so file matching logic will run next cycle to find and process files
|
|
logger.info(
|
|
f"{book_state.download_title} not found at {book_state.source}, "
|
|
f"torrent was removed, changing status to Snatched to process files from download directory"
|
|
)
|
|
if book_state.book_id != "unknown":
|
|
cmd = "UPDATE wanted SET status='Snatched' WHERE status='Seeding' and DownloadID=?"
|
|
db.action(cmd, (book_state.download_id,))
|
|
# File matching will process it next cycle
|
|
return True # Skip to next item
|
|
|
|
# Handle normal seeding completion
|
|
if not keep_seeding and (book_state.finished or not wait_for_seeding):
|
|
if book_state.finished:
|
|
logger.debug(
|
|
f"{book_state.download_title} finished seeding at {book_state.source}"
|
|
)
|
|
else:
|
|
logger.debug(
|
|
f"{book_state.download_title} not seeding at {book_state.source}"
|
|
)
|
|
|
|
if CONFIG.get_bool("DEL_COMPLETED"):
|
|
logger.debug(
|
|
f"Removing seeding completed {book_state.download_title} from {book_state.source}"
|
|
)
|
|
|
|
delfiles = not CONFIG.get_bool("DESTINATION_COPY")
|
|
delete_task(
|
|
book_state.source,
|
|
book_state.download_id,
|
|
delfiles,
|
|
)
|
|
|
|
if book_state.book_id != "unknown":
|
|
cmd = "UPDATE wanted SET status='Processed',NZBDate=? WHERE status='Seeding' and DownloadID=?"
|
|
db.action(cmd, (now(), book_state.download_id))
|
|
logger.info(
|
|
f"STATUS: {book_state.download_title} [Seeding -> Processed] Seeding complete"
|
|
)
|
|
|
|
# only delete the files if not in download root dir and DESTINATION_COPY not set
|
|
# This is for downloaders (rtorrent) that don't let us tell them to delete files
|
|
# NOTE it will silently fail if the torrent client downloadfolder is not local
|
|
# e.g. in a docker or on a remote machine
|
|
book_path = get_download_folder(book_state.source, book_state.download_id)
|
|
if CONFIG.get_bool("DESTINATION_COPY"):
|
|
logger.debug("Not removing original files as Keep Files is set")
|
|
elif book_path in get_list(CONFIG["DOWNLOAD_DIR"]):
|
|
logger.debug("Not removing original files as in download root")
|
|
else:
|
|
shutil.rmtree(book_path, ignore_errors=True)
|
|
logger.debug(
|
|
f"Deleted {book_path} for {book_state.download_title}, {book_state.mode_type} from {book_state.source}"
|
|
)
|
|
return True # Skip to next item
|
|
logger.debug(
|
|
f"{book_state.download_title} still seeding at {book_state.source}"
|
|
)
|
|
return True # Skip to next item
|
|
|
|
|
|
def _handle_snatched_timeout(
|
|
book_state, hours: int, mins: int, max_hours: int, logger: logging.Logger
|
|
) -> tuple:
|
|
"""
|
|
Handle timeout logic for downloads in 'Snatched' status.
|
|
|
|
Determines if a snatched download should be aborted based on:
|
|
- Time since snatched
|
|
- Download progress
|
|
- Whether torrent exists in client
|
|
|
|
For downloads at 100% that timed out, attempts direct processing.
|
|
|
|
Args:
|
|
book_state: SimpleNamespace with download state
|
|
hours: Hours since download was snatched
|
|
mins: Minutes since download was snatched
|
|
max_hours: CONFIG['TASK_AGE'] maximum age before abort
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
Tuple of (should_abort, should_skip_to_next)
|
|
should_abort: True if download should be aborted
|
|
should_skip_to_next: True if we should continue to next item (processed successfully)
|
|
"""
|
|
should_abort = False
|
|
should_skip = False
|
|
short_wait = 5
|
|
longer_wait = 30
|
|
|
|
# has it been aborted (wait a short while before checking)
|
|
if mins > short_wait and isinstance(book_state.progress, int) and book_state.progress < 0:
|
|
# Torrent/download not found in client
|
|
# Give slow magnets and client issues more time before aborting
|
|
if mins < longer_wait:
|
|
# Less than 30 minutes - could be slow magnet link or temporary client issue
|
|
logger.debug(
|
|
f"{book_state.download_title} not found at {book_state.source} but only "
|
|
f"{mins} {plural(mins, 'minute')} old, waiting for torrent to appear"
|
|
)
|
|
should_abort = False
|
|
else:
|
|
# Over 30 minutes and never appeared - probably failed to add or was rejected
|
|
logger.warning(
|
|
f"{book_state.download_title} not found at {book_state.source} after "
|
|
f"{mins} {plural(mins, 'minute')}, aborting"
|
|
)
|
|
should_abort = True
|
|
|
|
if max_hours and hours >= max_hours:
|
|
# SAB can report 100% (or more) and not finished if missing blocks and needs repair
|
|
# For torrents, check if download is complete before timing out
|
|
# This handles edge cases where:
|
|
# - Torrent reached 100% but files aren't accessible yet (client still moving/verifying)
|
|
# - Race condition where torrent completes just as timeout expires
|
|
# - Large torrents that take time to post-process and hit timeout during processing
|
|
# - Any case where a complete torrent somehow wasn't processed in the normal flow
|
|
if check_int(book_state.progress, 0) >= 100:
|
|
# Download is complete - attempt direct processing if it's a book/audiobook
|
|
logger.info(
|
|
f"{book_state.download_title} reached timeout but is 100% complete - attempting direct processing"
|
|
)
|
|
should_abort = False
|
|
|
|
# For downloads at 100%, don't abort - let normal processing retry next cycle
|
|
logger.debug(
|
|
f"{book_state.download_title} at 100% will retry on next postprocessor run"
|
|
)
|
|
|
|
elif check_int(book_state.progress, 0) < 95:
|
|
# Less than 95% after timeout - likely stuck
|
|
should_abort = True
|
|
|
|
elif (
|
|
hours >= max_hours + 1
|
|
): # Progress is 95-99% so let's give it an extra hour
|
|
# Still not complete after extended timeout
|
|
should_abort = True
|
|
|
|
return should_abort, should_skip
|
|
|
|
|
|
def _handle_aborted_download(
|
|
book_state: BookState, hours: int, db, logger: logging.Logger
|
|
) -> None:
|
|
"""
|
|
Handle downloads marked as 'Aborted'.
|
|
|
|
Updates database, sends notifications, and optionally deletes
|
|
the failed download task from the download client.
|
|
|
|
Args:
|
|
book_state: SimpleNamespace with download state
|
|
hours: Hours since download was snatched (for error message)
|
|
db: Database connection
|
|
logger: Logger instance
|
|
"""
|
|
dlresult = ""
|
|
if book_state.source and book_state.source != "DIRECT":
|
|
if book_state.status == "Snatched":
|
|
progress = f"{book_state.progress}"
|
|
if progress.isdigit(): # could be "Unknown" or -1
|
|
progress += "%"
|
|
dlresult = (
|
|
f"{book_state.download_title} was sent to {book_state.source} {hours} hours ago. "
|
|
f"Progress: {progress}"
|
|
)
|
|
if check_int(book_state.progress, 0) == 100: # Fixed typo from chech_int
|
|
dlresult += " Please check download directory is correct"
|
|
else:
|
|
dlresult = f"{book_state.download_title} was aborted by {book_state.source}"
|
|
|
|
custom_notify_snatch(f"{book_state.book_id} {book_state.source}", fail=True)
|
|
notify_snatch(
|
|
f"{book_state.download_title} from {book_state.source} at {now()}", fail=True
|
|
)
|
|
|
|
# change status to "Failed", and ask downloader to delete task and files
|
|
# Only reset book status to wanted if still snatched in case another download task succeeded
|
|
if book_state.book_id != "unknown":
|
|
cmd = ""
|
|
book_type_enum = book_state.get_book_type_enum()
|
|
if book_type_enum == BookType.EBOOK:
|
|
cmd = (
|
|
"UPDATE books SET status='Wanted' WHERE status='Snatched' and BookID=?"
|
|
)
|
|
elif book_type_enum == BookType.AUDIOBOOK:
|
|
cmd = "UPDATE books SET audiostatus='Wanted' WHERE audiostatus='Snatched' and BookID=?"
|
|
if cmd:
|
|
db.action(cmd, (book_state.book_id,))
|
|
|
|
# use url and status for identifier because magazine id isn't unique
|
|
if book_state.status == "Snatched":
|
|
q = "UPDATE wanted SET Status='Failed',DLResult=? WHERE NZBurl=? and Status='Snatched'"
|
|
db.action(q, (dlresult, book_state.download_url))
|
|
else: # don't overwrite dlresult reason for the abort
|
|
q = "UPDATE wanted SET Status='Failed' WHERE NZBurl=? and Status='Aborted'"
|
|
db.action(q, (book_state.download_url,))
|
|
|
|
if CONFIG.get_bool("DEL_FAILED"):
|
|
logger.warning(f"{dlresult}, deleting failed task")
|
|
delete_task(book_state.source, book_state.download_id, True)
|
|
|
|
|
|
def _check_and_schedule_next_run(db, logger: logging.Logger, reset: bool) -> None:
|
|
"""
|
|
Determine if postprocessor should run again.
|
|
|
|
Checks for remaining snatched/seeding items and schedules
|
|
the PostProcessor job accordingly (STOP, RESTART, or continue).
|
|
|
|
Args:
|
|
db: Database connection
|
|
logger: Logger instance
|
|
reset: Whether to force restart
|
|
"""
|
|
|
|
# Check if postprocessor needs to run again
|
|
snatched = db.select("SELECT * from wanted WHERE Status='Snatched'")
|
|
seeding = db.select("SELECT * from wanted WHERE Status='Seeding'")
|
|
|
|
if not len(snatched) and not len(seeding):
|
|
logger.info("Nothing marked as snatched or seeding. Stopping postprocessor.")
|
|
schedule_job(SchedulerCommand.STOP, target="PostProcessor")
|
|
elif len(seeding):
|
|
logger.info(f"Seeding {len(seeding)}")
|
|
schedule_job(SchedulerCommand.RESTART, target="PostProcessor")
|
|
elif reset:
|
|
schedule_job(SchedulerCommand.RESTART, target="PostProcessor")
|
|
|
|
|
|
def _manage_download_status(db, logger: logging.Logger) -> None:
|
|
"""
|
|
Manage download lifecycle for incomplete/failed downloads.
|
|
|
|
Handles three status types:
|
|
- Seeding: Check completion, handle removed torrents, update status
|
|
- Snatched: Check timeouts, attempt direct processing if 100% complete
|
|
- Aborted: Send failure notifications, clean up
|
|
|
|
This runs after main processing to handle items that couldn't be processed
|
|
or are still in progress.
|
|
|
|
Args:
|
|
db: Database connection
|
|
logger: Logger instance
|
|
"""
|
|
# Query for items needing status management
|
|
cmd = "SELECT * from wanted WHERE Status IN ('Snatched', 'Aborted', 'Seeding')"
|
|
incomplete = db.select(cmd)
|
|
logger.info(f"Found {len(incomplete)} items for status management")
|
|
|
|
# Get config values once
|
|
keep_seeding = CONFIG.get_bool("KEEP_SEEDING")
|
|
wait_for_seeding = CONFIG.get_bool("SEED_WAIT")
|
|
max_hours = CONFIG.get_int("TASK_AGE")
|
|
|
|
for book_row in incomplete:
|
|
book_dict = dict(book_row)
|
|
|
|
# Use BookState for consistency with main processing loop
|
|
book_state = BookState.from_db_row(book_row, CONFIG)
|
|
|
|
# Set runtime fields for download status tracking
|
|
book_state.aborted = False
|
|
book_state.finished = False
|
|
book_state.progress = "Unknown"
|
|
book_state.skipped_reason = book_dict.get("skipped", "")
|
|
|
|
logger.debug(
|
|
f"{book_state.status} {book_state.source} {book_state.download_title}"
|
|
)
|
|
|
|
# Get progress from download client
|
|
if book_state.status == "Aborted":
|
|
book_state.aborted = True
|
|
else:
|
|
book_state.progress, book_state.finished = get_download_progress(
|
|
book_state.source, book_state.download_id
|
|
)
|
|
|
|
# Route to appropriate handler based on status
|
|
if book_state.status == "Seeding":
|
|
should_skip = _handle_seeding_status(
|
|
book_state, keep_seeding, wait_for_seeding, db, logger
|
|
)
|
|
if should_skip:
|
|
continue
|
|
|
|
elif book_state.status == "Snatched":
|
|
hours, mins, _ = _calculate_download_age(book_state.snatched_date)
|
|
should_abort, should_skip = _handle_snatched_timeout(
|
|
book_state, hours, mins, max_hours, logger
|
|
)
|
|
|
|
if should_skip:
|
|
continue # Successfully processed, move to next
|
|
|
|
if should_abort:
|
|
book_state.aborted = True
|
|
|
|
# Handle aborted downloads
|
|
if book_state.aborted:
|
|
hours, mins, _ = _calculate_download_age(book_state.snatched_date)
|
|
_handle_aborted_download(book_state, hours, db, logger)
|
|
elif book_state.status == "Snatched":
|
|
# Log progress for items still downloading
|
|
hours, mins, _ = _calculate_download_age(book_state.snatched_date)
|
|
if mins:
|
|
provider = book_state.source
|
|
if book_state.source == "DIRECT":
|
|
provider = book_state.download_provider
|
|
logger.debug(
|
|
f"{book_state.download_title} was sent to {provider} {mins} {plural(mins, 'minute')} ago."
|
|
f" Progress {book_state.progress} {book_state.skipped_reason}"
|
|
f" Status {book_state.status}"
|
|
)
|
|
|
|
|
|
def _search_in_known_location(
|
|
book_state: BookState,
|
|
ignoreclient: bool,
|
|
db,
|
|
logger: logging.Logger,
|
|
fuzzlogger: logging.Logger,
|
|
):
|
|
"""
|
|
Search for book in client-provided download_folder (ratching Stage:First Pass e.g., the Targeted Search).
|
|
|
|
Trusts client location even if outside configured directories.
|
|
Uses book_title for drill-down matching in collections.
|
|
|
|
Args:
|
|
book_state: BookState with download_folder populated
|
|
ignoreclient: Whether to skip download client interaction
|
|
db: Database connection
|
|
logger: Logger instance
|
|
fuzzlogger: Fuzzy logger
|
|
|
|
Returns:
|
|
Number of books processed (1 if successful, 0 if failed)
|
|
"""
|
|
logger.info(f"Download folder: {book_state.download_folder}")
|
|
logger.info(f"Book title: {book_state.book_title}")
|
|
|
|
# Validate folder exists
|
|
if not path_exists(book_state.download_folder):
|
|
logger.warning(f"FAIL: Download folder not found: {book_state.download_folder}")
|
|
return 0
|
|
|
|
# Check if folder has content
|
|
try:
|
|
contents = listdir(book_state.download_folder)
|
|
if not contents:
|
|
logger.warning("FAIL: Download folder is empty")
|
|
return 0
|
|
logger.info(f"Folder has {len(contents)} items")
|
|
except Exception as e:
|
|
logger.error(f"ERROR: Cannot access folder: {e}")
|
|
return 0
|
|
|
|
# Set candidate_ptr to the known folder
|
|
book_state.update_candidate(book_state.download_folder)
|
|
logger.debug(f"Candidate set to: {book_state.candidate_ptr}")
|
|
|
|
# Get parent directory for processing context
|
|
parent_dir = os.path.dirname(book_state.download_folder.rstrip(os.sep))
|
|
logger.debug(f"Parent directory: {parent_dir}")
|
|
|
|
# Process the known folder (drill-down uses book_title, extraction, validation)
|
|
is_valid, skip_reason = _process_matched_directory(
|
|
book_state,
|
|
parent_dir,
|
|
100.0, # 100% - we trust the client
|
|
logger,
|
|
fuzzlogger,
|
|
)
|
|
|
|
if not is_valid:
|
|
logger.warning(f"FAIL: Validation failed: {skip_reason}")
|
|
book_state.mark_failed("validation", skip_reason)
|
|
return 0
|
|
|
|
logger.debug("Validation passed, continuing to metadata and processing")
|
|
|
|
# Continue with metadata and processing
|
|
result = _process_book_after_matching(
|
|
book_state, parent_dir, ignoreclient, db, logger
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def _compile_all_downloads(dirlist, logger):
|
|
"""
|
|
Compile all downloads from configured directories into a single list.
|
|
|
|
Handles OSError gracefully by skipping inaccessible directories.
|
|
|
|
Args:
|
|
dirlist: List of download directories to scan
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
List of (parent_dir, filename) tuples
|
|
"""
|
|
all_downloads = []
|
|
|
|
for download_dir in dirlist:
|
|
try:
|
|
downloads = listdir(download_dir)
|
|
all_downloads.extend([(download_dir, f) for f in downloads])
|
|
logger.debug(f"Found {len(downloads)} items in {download_dir}")
|
|
except OSError as why: # noqa: PERF203
|
|
logger.error(
|
|
f"Could not access [{download_dir}]: {why.strerror} - skipping"
|
|
)
|
|
continue
|
|
|
|
logger.info(
|
|
f"Compiled {len(all_downloads)} total items from {len(dirlist)} download {plural(len(dirlist), 'directory')}"
|
|
)
|
|
return all_downloads
|
|
|
|
|
|
# noinspection PyBroadException
|
|
def process_dir(reset=False, startdir=None, ignoreclient=False, downloadid=None):
|
|
"""
|
|
Main postprocessor entry point with book-centric workflow.
|
|
|
|
Pass 1: Process completed snatched downloads
|
|
- Handles deliberately downloaded books (from search/snatch)
|
|
- Fuzzy matches download_title against filesystem to find location
|
|
- Uses book_title for drill-down matching in collections
|
|
- Handles single books AND collections (extracts best match)
|
|
- Processes all media types: ebook, audiobook, magazine, comic
|
|
- Extracts archives, handles multipart, searches obfuscated folders
|
|
- Moves/copies to library, sends notifications
|
|
|
|
Pass 2: Process unsnatched books with LL.(bookid) naming
|
|
- Scans for folders/files with "LL.(bookid)" pattern
|
|
- These are manually added or leftover books NOT in wanted table
|
|
- Imports to library if not already present
|
|
|
|
Pass 3: Handle download status management
|
|
- Seeding: Check completion, handle removed torrents
|
|
- Snatched: Check timeouts (100% complete downloads retry next cycle)
|
|
- Aborted: Send notifications, clean up
|
|
|
|
Args:
|
|
reset: Force postprocessor to restart after completion
|
|
startdir: Specific directory to process (overrides config)
|
|
ignoreclient: Skip download client interaction
|
|
downloadid: Process specific download ID only
|
|
"""
|
|
logger = logging.getLogger(__name__)
|
|
postprocesslogger = logging.getLogger("special.postprocess")
|
|
fuzzlogger = logging.getLogger("special.fuzz")
|
|
|
|
# Thread safety check - prevent concurrent execution
|
|
count = 0
|
|
logger.debug("Attempt to run POSTPROCESSOR")
|
|
for name in [t.name for t in threading.enumerate()]:
|
|
if name == "POSTPROCESSOR":
|
|
count += 1
|
|
|
|
incoming_threadname = thread_name()
|
|
if incoming_threadname == "POSTPROCESSOR":
|
|
count -= 1
|
|
|
|
if count:
|
|
logger.debug("POSTPROCESSOR is already running")
|
|
return # Exit early if already running
|
|
|
|
logger.debug("No concurrent POSTPROCESSOR threads detected")
|
|
|
|
# Set thread name for this execution
|
|
thread_name("POSTPROCESS")
|
|
|
|
db = database.DBConnection()
|
|
try:
|
|
db.upsert("jobs", {"Start": time.time()}, {"Name": thread_name()})
|
|
|
|
# Now we will get a list of wanted books that are snatched and ready for processing
|
|
if downloadid:
|
|
snatched_books = db.select(
|
|
"SELECT * from wanted WHERE DownloadID=? AND Status='Snatched'",
|
|
(downloadid,),
|
|
)
|
|
else:
|
|
snatched_books = db.select("SELECT * from wanted WHERE Status='Snatched'")
|
|
|
|
postprocesslogger.debug(
|
|
f'Found {len(snatched_books)} {plural(len(snatched_books), "file")} marked "Snatched"'
|
|
)
|
|
|
|
# ======================================================================
|
|
# Filtering Stage: Get snatched books that are ready for processing
|
|
# by removing books still downloading, rejected content, etc.
|
|
# ======================================================================
|
|
books_to_process = []
|
|
if len(snatched_books):
|
|
TELEMETRY.record_usage_data("Process/Snatched")
|
|
books_to_process = _get_ready_from_snatched(db, snatched_books)
|
|
|
|
postprocesslogger.info(
|
|
f"Found {len(books_to_process)} {plural(len(books_to_process), 'book')} ready to process"
|
|
)
|
|
|
|
# Build the list of directories that will will use to process downloaded assets
|
|
# At least one valid directory must be present or we will stop
|
|
startdir_file = None
|
|
if startdir:
|
|
if path_isfile(startdir):
|
|
startdir_file = startdir
|
|
templist = [os.path.dirname(startdir)]
|
|
else:
|
|
templist = [startdir]
|
|
else:
|
|
templist = get_list(CONFIG["DOWNLOAD_DIR"], ",")
|
|
if len(templist) and get_directory("Download") != templist[0]:
|
|
templist.insert(0, str(get_directory("Download")))
|
|
|
|
download_dirlist = []
|
|
for item in templist:
|
|
if path_isdir(item):
|
|
download_dirlist.append(item)
|
|
else:
|
|
postprocesslogger.debug(f"[{item}] is not a directory")
|
|
|
|
# Collect all entries within our download directories in a list to analyze against our
|
|
# downloaded items. If there are no available download directories or entries in the download
|
|
# folders, we will still process the books because there is a chance that that a book is downloading
|
|
# to a location outside of our configured directories (e.g., the download started before a config change)
|
|
all_downloads = None
|
|
if download_dirlist:
|
|
if startdir_file:
|
|
all_downloads = [
|
|
(os.path.dirname(startdir_file), os.path.basename(startdir_file))
|
|
]
|
|
postprocesslogger.debug(
|
|
f"Using single file startdir: {startdir_file}"
|
|
)
|
|
else:
|
|
# Compile all downloads from all directories once
|
|
postprocesslogger.debug(
|
|
f"Compiling downloads from directories: {download_dirlist}"
|
|
)
|
|
all_downloads = _compile_all_downloads(download_dirlist, logger)
|
|
if not all_downloads:
|
|
postprocesslogger.warning(
|
|
"No downloads found in any configured directory"
|
|
)
|
|
else:
|
|
postprocesslogger.warning("No download directories are configured.")
|
|
|
|
# This is where our processing of books will occur. This is a multipass process for locating
|
|
# and performing file operations on the downloaded items
|
|
|
|
# This will provide a little bit of padding between dl completion and processing
|
|
processing_delay = CONFIG.get_int("PP_DELAY")
|
|
|
|
ppcount = 0
|
|
|
|
for book_row in books_to_process:
|
|
# Create BookState from database row
|
|
book_state = BookState.from_db_row(book_row, CONFIG)
|
|
|
|
# Check processing delay (once per book, not per directory!)
|
|
# Legacy-compatible: processes even if Completed==0 (some clients don't set it)
|
|
if processing_delay:
|
|
should_delay, elapsed = book_state.should_delay_processing(
|
|
processing_delay
|
|
)
|
|
if should_delay:
|
|
postprocesslogger.warning(
|
|
f"Ignoring {book_state.download_title} as completion was only {elapsed} "
|
|
f"{plural(elapsed, 'second')} ago, delay is {processing_delay}"
|
|
)
|
|
continue
|
|
# Only log completion time if we have a valid timestamp
|
|
if book_state.is_completed():
|
|
postprocesslogger.debug(
|
|
f"{book_state.download_title} was completed {elapsed} {plural(elapsed, 'second')} ago"
|
|
)
|
|
else:
|
|
postprocesslogger.debug(
|
|
f"{book_state.download_title} has no completion timestamp (client doesn't support it)"
|
|
)
|
|
|
|
# Enrich with download_folder (general + name) and book_title
|
|
book_state.enrich_with_download_info(db)
|
|
|
|
postprocesslogger.debug(
|
|
f"Enrichment result: download_folder='{book_state.download_folder}', "
|
|
f"book_title='{book_state.book_title[:50] if book_state.book_title else '(empty)'}'"
|
|
)
|
|
|
|
# ========================================================
|
|
# Downloaded Processing Stage | Targeted Location Pass
|
|
# ========================================================
|
|
if book_state.download_folder:
|
|
postprocesslogger.debug(
|
|
f"Targeted Location Pass: Processing search for {book_state.download_title}"
|
|
)
|
|
result = _search_in_known_location(
|
|
book_state, ignoreclient, db, postprocesslogger, fuzzlogger
|
|
)
|
|
if result > 0:
|
|
ppcount += result
|
|
postprocesslogger.info(
|
|
f"Matching Stage / First Pass SUCCESS: {book_state.download_title}"
|
|
)
|
|
continue # Successfully processed, move to next book
|
|
|
|
postprocesslogger.debug(
|
|
f"Matching Stage / First Pass unsuccessful for {book_state.download_title}, trying Pass 2"
|
|
)
|
|
else:
|
|
postprocesslogger.debug(
|
|
f"Matching Stage / First Pass skipped because no targeted download folder "
|
|
f"specified for {book_state.download_title}, trying Pass 2"
|
|
)
|
|
|
|
# =======================================================
|
|
# Matching Stage | Second Pass: Fuzzy Search
|
|
# * Only here if not processed in First Pass
|
|
# =======================================================
|
|
postprocesslogger.debug(
|
|
f"Matching Stage | Second Pass: Fallback search for {book_state.download_title}"
|
|
)
|
|
result = _process_snatched_book(
|
|
book_state,
|
|
all_downloads,
|
|
ignoreclient,
|
|
db,
|
|
postprocesslogger,
|
|
fuzzlogger,
|
|
)
|
|
if result > 0:
|
|
ppcount += result
|
|
postprocesslogger.info(
|
|
f"Matching Stage | Second Pass SUCCESS: {book_state.download_title}"
|
|
)
|
|
else:
|
|
postprocesslogger.warning(
|
|
f"Matching Stage | Second Pass FAILED: {book_state.download_title}. No matches"
|
|
)
|
|
|
|
postprocesslogger.debug("Snatched Processing Stage Complete")
|
|
|
|
# ==========================================================================
|
|
# Supplemental Search: Look for and process books in LL.(bookid) Folders
|
|
# ==========================================================================
|
|
postprocesslogger.info("Supplemental Search: Processing LL.(bookid) folders")
|
|
if all_downloads:
|
|
ppcount += _process_ll_bookid_folders_from_list(
|
|
all_downloads, db, postprocesslogger
|
|
)
|
|
|
|
# Mark unprocessed books as Failed to force a retry on next search
|
|
failed = [0, 0, 0, 0] # ebook, audio, mag, comic
|
|
for book_row in books_to_process:
|
|
book_state = BookState.from_db_row(book_row, CONFIG)
|
|
if not book_state.was_processed and book_state.has_failed():
|
|
logger.warning(
|
|
f"Marking {book_state.download_title} as Failed: "
|
|
f"{book_state.processing_stage} - {book_state.failure_reason}"
|
|
)
|
|
control_value_dict = {"DownloadID": book_state.download_id, "Status": "Snatched"}
|
|
new_value_dict = {
|
|
"Status": "Failed",
|
|
"NZBDate": now(),
|
|
"DLResult": f"{book_state.processing_stage}: {book_state.failure_reason}"
|
|
}
|
|
db.upsert("wanted", new_value_dict, control_value_dict)
|
|
book_type_str = book_state.get_book_type_str()
|
|
cmd = ''
|
|
if book_type_str == BookType.EBOOK.value:
|
|
cmd = "UPDATE books SET status='Wanted' WHERE status='Snatched' and BookID=?"
|
|
failed[0] += 1
|
|
elif book_type_str == BookType.AUDIOBOOK.value:
|
|
cmd = "UPDATE books SET audiostatus='Wanted' WHERE audiostatus='Snatched' and BookID=?"
|
|
failed[1] += 1
|
|
elif book_type_str == BookType.MAGAZINE.value():
|
|
failed[2] += 1
|
|
elif book_type_str == BookType.COMIC.value():
|
|
failed[3] += 1
|
|
if cmd:
|
|
db.action(cmd, (book_state.book_id,))
|
|
if CONFIG.get_bool("DEL_FAILED"):
|
|
delete_task(book_state.source, book_state.download_id, True)
|
|
if any(failed):
|
|
logger.debug(f"Failed to process {failed[0]} ebook, {failed[1]} audio, {failed[2]} magazine, {failed[3]} comic")
|
|
|
|
postprocesslogger.info(f"{ppcount} {plural(ppcount, 'download')} processed.")
|
|
|
|
# ═══════════════════════════════════════════════════════════
|
|
# PASS 3: DOWNLOAD STATUS MANAGEMENT
|
|
# ═══════════════════════════════════════════════════════════
|
|
postprocesslogger.info("Third pass: Download status management")
|
|
|
|
_manage_download_status(db, postprocesslogger)
|
|
|
|
# Cleanup and scheduling
|
|
db.upsert("jobs", {"Finish": time.time()}, {"Name": thread_name()})
|
|
_check_and_schedule_next_run(db, logger, reset)
|
|
|
|
except Exception:
|
|
logger.error(f"Unhandled exception in process_dir: {traceback.format_exc()}")
|
|
finally:
|
|
db.close()
|
|
# Restore original thread name
|
|
thread_name(incoming_threadname)
|
|
|
|
|
|
def _process_ll_bookid_folders_from_list(all_downloads, db, logger):
|
|
"""
|
|
Process unsnatched books/audiobooks with LL.(bookid) naming from compiled list.
|
|
|
|
Searches through compiled download list for items named "LL.(bookid)".
|
|
These are books that weren't explicitly snatched but appeared in downloads.
|
|
|
|
Args:
|
|
all_downloads: List of (parent_dir, filename) tuples
|
|
db: Database connection
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
Count of books successfully processed
|
|
"""
|
|
ppcount = 0
|
|
skipped_extensions = get_list(CONFIG["SKIPPED_EXT"])
|
|
TELEMETRY.record_usage_data("Process/Residual")
|
|
|
|
logger.debug(f"Scanning {len(all_downloads)} items for LL.(bookid) pattern")
|
|
|
|
for parent_dir, _entry in all_downloads:
|
|
entry = enforce_str(_entry)
|
|
if "LL.(" in entry:
|
|
_, extn = os.path.splitext(entry)
|
|
if not extn or extn.strip(".") not in skipped_extensions:
|
|
book_id = entry.split("LL.(")[1].split(")")[0]
|
|
logger.debug(f"Book with id: {book_id} found in {parent_dir}")
|
|
book_path = os.path.join(parent_dir, entry)
|
|
|
|
# At this point we don't know if we want audio or ebook or both
|
|
is_audio = book_file(book_path, "audiobook", config=CONFIG) != ""
|
|
is_ebook = book_file(book_path, "ebook", config=CONFIG) != ""
|
|
logger.debug(f"Contains ebook={is_ebook} audio={is_audio}")
|
|
|
|
data = db.match(
|
|
"SELECT BookFile,AudioFile from books WHERE BookID=?",
|
|
(book_id,),
|
|
)
|
|
have_ebook = data and data["BookFile"] and path_isfile(data["BookFile"])
|
|
have_audio = (
|
|
data and data["AudioFile"] and path_isfile(data["AudioFile"])
|
|
)
|
|
logger.debug(f"Already have ebook={have_ebook} audio={have_audio}")
|
|
|
|
if (have_ebook and have_audio) or (
|
|
have_ebook and not CONFIG.get_bool("AUDIO_TAB")
|
|
):
|
|
exists = True
|
|
else:
|
|
exists = False
|
|
|
|
if exists:
|
|
logger.debug(f"Skipping BookID {book_id}, already exists")
|
|
else:
|
|
logger.debug(f"Checking type of {book_path}")
|
|
|
|
if path_isfile(book_path):
|
|
logger.debug(f"{book_path} is a file")
|
|
# We want to work on the download directory not the individual file
|
|
book_path = os.path.normpath(parent_dir)
|
|
|
|
if path_isdir(book_path):
|
|
logger.debug(f"{book_path} is a dir")
|
|
if process_book(book_path, book_id, logger=logger):
|
|
logger.debug(f"Imported {book_path}")
|
|
ppcount += 1
|
|
else:
|
|
logger.debug(f"Skipping extn {entry}")
|
|
else:
|
|
logger.debug(f"Skipping (no LL bookid) {entry}")
|
|
|
|
return ppcount
|
|
|
|
|
|
def process_book(book_path: str, book_id: str, logger=None, library=""):
|
|
TELEMETRY.record_usage_data("Process/Book")
|
|
if not logger:
|
|
logger = logging.getLogger(__name__)
|
|
|
|
db = database.DBConnection()
|
|
# noinspection PyBroadException
|
|
try:
|
|
# Move a book into LL folder structure given just the folder and bookID, returns True or False
|
|
# Called from "import_alternate" or if we find a "LL.(xxx)" folder that doesn't match a snatched book/mag
|
|
logger.debug(f"process_book {book_path}")
|
|
is_audio = book_file(book_path, "audiobook", config=CONFIG) != ""
|
|
is_ebook = book_file(book_path, "ebook", config=CONFIG) != ""
|
|
|
|
# Get the details of the book
|
|
cmd = (
|
|
"SELECT AuthorName,BookName,BookID,books.Status,AudioStatus from books,authors WHERE BookID=? "
|
|
"and books.AuthorID = authors.AuthorID"
|
|
)
|
|
data = dict(db.match(cmd, (book_id,)))
|
|
if data:
|
|
authorname = data["AuthorName"]
|
|
bookname = data["BookName"]
|
|
want_audio = False
|
|
want_ebook = False
|
|
book_type_enum = None
|
|
|
|
if data["Status"] in ["Wanted", "Snatched"] or library == "eBook":
|
|
want_ebook = True
|
|
if data["AudioStatus"] in ["Wanted", "Snatched"] or library == "Audio":
|
|
want_audio = True
|
|
|
|
# we may have wanted to snatch an ebook and audiobook of the same title/id
|
|
cmd = "SELECT BookID, NZBprov, NZBmode,AuxInfo FROM wanted WHERE BookID=? and Status='Snatched'"
|
|
was_snatched = db.select(cmd, (book_id,))
|
|
|
|
# For each snatched type of a book id, see if
|
|
# it is an ebook or audiobook
|
|
for item in was_snatched:
|
|
book_type_enum = BookType.from_string(
|
|
item["AuxInfo"] or "ebook"
|
|
) # default to ebook if unknown
|
|
if book_type_enum == BookType.AUDIOBOOK:
|
|
want_audio = True
|
|
elif book_type_enum == BookType.EBOOK:
|
|
want_ebook = True
|
|
|
|
if not is_audio and not is_ebook:
|
|
logger.debug(f"Bookid {book_id}, failed to find valid booktype")
|
|
elif want_audio and is_audio:
|
|
book_type_enum = BookType.AUDIOBOOK
|
|
elif want_ebook and is_ebook:
|
|
book_type_enum = BookType.EBOOK
|
|
elif not was_snatched:
|
|
logger.debug(
|
|
f"Bookid {book_id} was not snatched so cannot check type, contains ebook:{is_ebook} "
|
|
f"audio:{is_audio}"
|
|
)
|
|
|
|
# If audiobooks aren't enable, don't look for it
|
|
if not CONFIG.get_bool("AUDIO_TAB"):
|
|
is_audio = False
|
|
|
|
if is_audio:
|
|
book_type_enum = BookType.AUDIOBOOK
|
|
elif is_ebook:
|
|
book_type_enum = BookType.EBOOK
|
|
|
|
if book_type_enum == BookType.AUDIOBOOK:
|
|
dest_dir = enforce_str(
|
|
str(get_directory("Audio"))
|
|
) # Ensure string for join
|
|
elif book_type_enum == BookType.EBOOK:
|
|
dest_dir = enforce_str(
|
|
str(get_directory("eBook"))
|
|
) # Ensure string for join
|
|
else:
|
|
logger.debug(
|
|
f"Bookid {book_id}, failed to find valid booktype, contains ebook:{is_ebook} audio:{is_audio}"
|
|
)
|
|
return False
|
|
|
|
namevars = name_vars(book_id)
|
|
# global_name is only used for ebooks to ensure book/cover/opf all have the same basename
|
|
# audiobooks are usually multipart so can't be renamed this way
|
|
global_name = str(namevars["BookFile"]) # Enforce string from dict
|
|
if book_type_enum == BookType.AUDIOBOOK:
|
|
audio_folder = str(
|
|
namevars["AudioFolderName"]
|
|
) # Enforce string from dict
|
|
dest_path = stripspaces(os.path.join(dest_dir, audio_folder))
|
|
else:
|
|
folder_name = str(namevars["FolderName"]) # Enforce string from dict
|
|
dest_path = stripspaces(os.path.join(dest_dir, folder_name))
|
|
|
|
# Validate encoding via make_utf8bytes, then decode to string for metadata
|
|
dest_path = enforce_str(enforce_bytes(dest_path))
|
|
|
|
# Create metadata object for manual processing
|
|
# For manual processing, we have limited metadata available
|
|
metadata = EbookMetadata(
|
|
book_id=book_id,
|
|
book_type_enum=book_type_enum,
|
|
dest_path=dest_path,
|
|
global_name=global_name,
|
|
author_name=authorname,
|
|
book_name=bookname,
|
|
)
|
|
|
|
success, dest_file, book_path = _process_destination(
|
|
book_metadata=metadata,
|
|
book_path=book_path,
|
|
logger=logger,
|
|
# No mode for manual processing
|
|
)
|
|
book_type_aux = (
|
|
"AudioBook" if (book_type_enum == BookType.AUDIOBOOK) else "eBook"
|
|
)
|
|
if success:
|
|
# update nzbs
|
|
dest_file = enforce_str(make_unicode(dest_file))
|
|
control_value_dict = {"BookID": book_id}
|
|
if was_snatched:
|
|
snatched_from = CONFIG.disp_name(was_snatched[0]["NZBprov"])
|
|
logger.debug(f"{global_name} was snatched from {snatched_from}")
|
|
new_value_dict = {
|
|
"Status": "Processed",
|
|
"NZBDate": now(),
|
|
"DLResult": dest_file,
|
|
}
|
|
db.upsert("wanted", new_value_dict, control_value_dict)
|
|
else:
|
|
new_value_dict = {
|
|
"Status": "Processed",
|
|
"NZBProv": "Manual",
|
|
"AuxInfo": book_type_aux,
|
|
"NZBDate": now(),
|
|
"DLResult": dest_file,
|
|
"NZBSize": 0,
|
|
}
|
|
if path_isfile(dest_file):
|
|
new_value_dict["NZBSize"] = os.path.getsize(syspath(dest_file))
|
|
|
|
db.upsert("wanted", new_value_dict, control_value_dict)
|
|
snatched_from = "manually added"
|
|
logger.debug(f"{book_type_aux} {global_name} was {snatched_from}")
|
|
|
|
if dest_file: # do we know the location (not calibre already exists)
|
|
_process_extras(
|
|
dest_file, global_name, book_id, book_type_enum.value
|
|
)
|
|
|
|
if ".unpack" in book_path:
|
|
book_path = f"{book_path.split('.unpack')[0]}.unpack"
|
|
|
|
if (
|
|
".unpack" in book_path
|
|
or (not CONFIG.get_bool("DESTINATION_COPY")
|
|
and book_path != dest_dir)
|
|
):
|
|
if path_isdir(book_path):
|
|
# calibre might have already deleted it?
|
|
logger.debug(f"Deleting {book_path}")
|
|
shutil.rmtree(book_path, ignore_errors=True)
|
|
elif CONFIG.get_bool("DESTINATION_COPY"):
|
|
logger.debug(f"Not removing {book_path} as Keep Files is set")
|
|
else:
|
|
logger.debug(f"Not removing {book_path} as in download root")
|
|
|
|
logger.info(f"Successfully processed: {global_name}")
|
|
custom_notify_download(f"{book_id} {book_type_aux}")
|
|
frm = "" if snatched_from == "manually added" else "from "
|
|
|
|
notify_download(
|
|
f"{book_type_aux} {global_name} {frm}{snatched_from} at {now()}",
|
|
book_id,
|
|
)
|
|
mailing_list(book_type_aux, global_name, book_id)
|
|
if was_snatched:
|
|
_update_downloads_provider_count(
|
|
CONFIG.disp_name(was_snatched[0]["NZBprov"])
|
|
)
|
|
else:
|
|
_update_downloads_provider_count("manually added")
|
|
return True
|
|
logger.error(
|
|
f"Postprocessing for {global_name!r} has failed: {dest_file!r}"
|
|
)
|
|
shutil.rmtree(f"{book_path}.fail", ignore_errors=True)
|
|
try:
|
|
_ = safe_move(book_path, f"{book_path}.fail")
|
|
logger.warning(f"Residual files remain in {book_path}.fail")
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Unable to rename {book_path!r}, {type(e).__name__} {e!s}"
|
|
)
|
|
if not os.access(syspath(book_path), os.R_OK):
|
|
logger.error(f"{book_path!r} is not readable")
|
|
if not os.access(syspath(book_path), os.W_OK):
|
|
logger.error(f"{book_path!r} is not writeable")
|
|
parent = os.path.dirname(book_path)
|
|
try:
|
|
with open(
|
|
syspath(os.path.join(parent, "ll_temp")),
|
|
"w",
|
|
encoding="utf-8",
|
|
) as f:
|
|
f.write("test")
|
|
remove_file(os.path.join(parent, "ll_temp"))
|
|
except Exception as why:
|
|
logger.error(f"Directory {parent} is not writeable: {why}")
|
|
logger.warning(f"Residual files remain in {book_path}")
|
|
|
|
was_snatched = dict(
|
|
db.match(
|
|
"SELECT NZBurl FROM wanted WHERE BookID=? and Status='Snatched'",
|
|
(book_id,),
|
|
)
|
|
)
|
|
if was_snatched:
|
|
control_value_dict = {"NZBurl": was_snatched["NZBurl"]}
|
|
new_value_dict = {
|
|
"Status": "Failed",
|
|
"NZBDate": now(),
|
|
"DLResult": dest_file,
|
|
}
|
|
db.upsert("wanted", new_value_dict, control_value_dict)
|
|
# reset status so we try for a different version
|
|
if book_type_enum == BookType.AUDIOBOOK:
|
|
db.action(
|
|
"UPDATE books SET audiostatus='Wanted' WHERE BookID=?",
|
|
(book_id,),
|
|
)
|
|
else:
|
|
db.action(
|
|
"UPDATE books SET status='Wanted' WHERE BookID=?", (book_id,)
|
|
)
|
|
return False
|
|
except Exception:
|
|
logger.error(f"Unhandled exception in process_book: {traceback.format_exc()}")
|
|
return False
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def _process_extras(
|
|
dest_file=None, global_name=None, bookid=None, book_type: str = BookType.EBOOK.value
|
|
):
|
|
# given bookid, handle author count, calibre autoadd, book image, opf
|
|
|
|
logger = logging.getLogger(__name__)
|
|
if not bookid:
|
|
logger.error("No bookid supplied")
|
|
return
|
|
if not dest_file:
|
|
logger.error("No dest_file supplied")
|
|
return
|
|
|
|
TELEMETRY.record_usage_data("Process/Extras")
|
|
db = database.DBConnection()
|
|
try:
|
|
booktype_enum = BookType.from_string(book_type)
|
|
control_value_dict = {"BookID": bookid}
|
|
if booktype_enum == BookType.AUDIOBOOK:
|
|
new_value_dict = {
|
|
"AudioFile": dest_file,
|
|
"AudioStatus": CONFIG["FOUND_STATUS"],
|
|
"AudioLibrary": now(),
|
|
}
|
|
db.upsert("books", new_value_dict, control_value_dict)
|
|
if CONFIG["AUDIOBOOK_DEST_FILE"]:
|
|
book_filename = audio_rename(bookid, rename=True, playlist=True)
|
|
if dest_file != book_filename:
|
|
db.action(
|
|
"UPDATE books set AudioFile=? where BookID=?",
|
|
(book_filename, bookid),
|
|
)
|
|
else:
|
|
new_value_dict = {
|
|
"Status": CONFIG["FOUND_STATUS"],
|
|
"BookFile": dest_file,
|
|
"BookLibrary": now(),
|
|
}
|
|
db.upsert("books", new_value_dict, control_value_dict)
|
|
|
|
# update authors book counts
|
|
match = dict(db.match("SELECT AuthorID FROM books WHERE BookID=?", (bookid,)))
|
|
if match:
|
|
update_totals(match["AuthorID"])
|
|
|
|
elif booktype_enum != BookType.EBOOK: # only do autoadd/img/opf for ebooks
|
|
return
|
|
|
|
cmd = (
|
|
"SELECT AuthorName,BookID,BookName,BookDesc,BookIsbn,BookImg,BookDate,BookLang,BookPub,BookRate,"
|
|
"Narrator from books,authors WHERE BookID=? and books.AuthorID = authors.AuthorID"
|
|
)
|
|
data = dict(db.match(cmd, (bookid,)))
|
|
if not data:
|
|
logger.error(f"No data found for bookid {bookid}")
|
|
return
|
|
finally:
|
|
db.close()
|
|
|
|
dest_path = os.path.dirname(dest_file)
|
|
|
|
# download and cache image if http link
|
|
process_img(dest_path, data["BookID"], data["BookImg"], global_name, ImageType.BOOK)
|
|
|
|
# do we want to create metadata - there may already be one in book_path, but it was downloaded and might
|
|
# not contain our choice of authorname/title/identifier, so if autoadding we ignore it and write our own
|
|
if not CONFIG.get_bool("IMP_AUTOADD_BOOKONLY"):
|
|
_ = create_opf(dest_path, data, global_name, overwrite=True)
|
|
else:
|
|
_ = create_opf(dest_path, data, global_name, overwrite=False)
|
|
# if our_opf:
|
|
# write_meta(dest_path, opf_file) # write metadata from opf to all ebook types in dest folder
|
|
|
|
# If you use auto add by Calibre you need the book in a single directory, not nested
|
|
# So take the files you Copied/Moved to Dest_path and copy/move into Calibre auto add folder.
|
|
if CONFIG["IMP_AUTOADD"]:
|
|
_process_auto_add(dest_path)
|
|
|
|
|
|
def _find_best_format(
|
|
path: str, prioritized_list: "list[str]"
|
|
) -> "tuple[str, set[str]]":
|
|
dir_list = listdir(path)
|
|
found_set: set[str] = set()
|
|
|
|
# Collect all valid extension types in a set
|
|
for _fname in dir_list:
|
|
fname = enforce_str(_fname) # Ensure string for path operations
|
|
_, extn = _tokenize_file(fname)
|
|
extn = extn.lower()
|
|
if extn in prioritized_list:
|
|
found_set.add(extn)
|
|
|
|
best_match = ""
|
|
# Now pick the best found type based on the order in the prioritized extn list
|
|
for extn in prioritized_list:
|
|
if extn in found_set:
|
|
best_match = extn
|
|
break
|
|
|
|
return best_match, found_set
|
|
|
|
|
|
def _is_metadata_file(fname: str) -> bool:
|
|
"""Check if file is a metadata file (.jpg or .opf)"""
|
|
fname_lower = fname.lower()
|
|
return fname_lower.endswith((".jpg", ".opf"))
|
|
|
|
|
|
def _should_use_calibre(book_type: str) -> bool:
|
|
"""Determine if Calibre should be used for this book type"""
|
|
if not CONFIG["IMP_CALIBREDB"]:
|
|
return False
|
|
|
|
calibre_settings = {
|
|
"ebook": "IMP_CALIBRE_EBOOK",
|
|
"magazine": "IMP_CALIBRE_MAGAZINE",
|
|
"comic": "IMP_CALIBRE_COMIC",
|
|
}
|
|
|
|
setting = calibre_settings.get(book_type)
|
|
if setting:
|
|
return CONFIG.get_bool(setting)
|
|
|
|
return False
|
|
|
|
|
|
def _prepare_destination_directory(dest_path, logger) -> "tuple[bool, str, bytes]":
|
|
"""
|
|
Ensure destination directory exists and is ready for file operations.
|
|
|
|
Matches original postprocess.py pattern: keeps dest_path as bytes for
|
|
filesystem operations (encoding-safe across platforms).
|
|
|
|
Args:
|
|
dest_path: Destination directory path
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
Tuple of (success, error_message, dest_path_bytes)
|
|
- success: True if directory is ready, False on error
|
|
- error_message: Error description if success is False, empty string otherwise
|
|
- dest_path_bytes: UTF8-encoded destination path as bytes (for filesystem ops)
|
|
"""
|
|
|
|
if not path_exists(dest_path):
|
|
logger.debug(f"{dest_path} does not exist, so it's safe to create it")
|
|
elif not path_isdir(dest_path):
|
|
logger.debug(f"{dest_path} exists but is not a directory, deleting it")
|
|
try:
|
|
remove_file(dest_path)
|
|
except OSError as why:
|
|
return False, f"Unable to delete {dest_path}: {why.strerror}", dest_path
|
|
|
|
if path_isdir(dest_path):
|
|
setperm(dest_path)
|
|
elif not make_dirs(dest_path):
|
|
return False, f"Unable to create directory {dest_path}", dest_path
|
|
|
|
# Note: encoding detection is handled inside enforce_bytes via make_utf8bytes
|
|
dest_path = enforce_bytes(dest_path) # Convert to bytes and enforce type
|
|
|
|
return True, "", dest_path
|
|
|
|
|
|
def _should_copy_file(fname: str, best_format: str, book_type: str) -> bool:
|
|
"""
|
|
Determine if a file should be copied to the destination.
|
|
|
|
Args:
|
|
fname: Filename to check
|
|
best_format: Best format to keep (empty string if not filtering)
|
|
book_type: Type of book (ebook, audiobook, comic, magazine)
|
|
|
|
Returns:
|
|
True if file should be copied, False otherwise
|
|
"""
|
|
# If we're filtering for a specific format and this is a valid booktype
|
|
# but not the best format, skip it
|
|
if best_format and CONFIG.is_valid_booktype(fname, booktype=book_type) and not fname.endswith(best_format):
|
|
return False
|
|
|
|
# Copy valid book files or metadata files
|
|
return CONFIG.is_valid_booktype(fname, booktype=book_type) or _is_metadata_file(
|
|
fname
|
|
)
|
|
|
|
|
|
def _get_dest_filename(
|
|
fname: str, global_name: str, book_type: str, dest_dir: str
|
|
) -> str:
|
|
"""
|
|
Generate destination filename based on book type.
|
|
|
|
Args:
|
|
fname: Source filename
|
|
global_name: Base name for renamed files
|
|
book_type: Type of book (ebook, audiobook, comic, magazine)
|
|
dest_dir: Destination directory
|
|
|
|
Returns:
|
|
Full destination file path
|
|
"""
|
|
if book_type in [BookType.AUDIOBOOK.value, BookType.COMIC.value]:
|
|
# For audiobooks and comics, only rename metadata files
|
|
if _is_metadata_file(fname):
|
|
return os.path.join(dest_dir, global_name + os.path.splitext(fname)[1])
|
|
# Keep original filename for audio/comic files
|
|
return os.path.join(dest_dir, fname)
|
|
# For ebooks and magazines, rename all files
|
|
return os.path.join(dest_dir, global_name + os.path.splitext(fname)[1])
|
|
|
|
|
|
def _find_preferred_book_file(
|
|
book_type: str,
|
|
dest_path: bytes,
|
|
global_name: bytes,
|
|
dir_list,
|
|
udest_path: str,
|
|
logger,
|
|
) -> str:
|
|
"""
|
|
Find the preferred file to use as the main book file.
|
|
|
|
For ebooks: Find the first format matching the priority order
|
|
For audiobooks: Find first part of multipart or whole-book file
|
|
For other types: Return empty string
|
|
|
|
Args:
|
|
book_type: Type of book (ebook, audiobook, etc.)
|
|
dest_path: Destination path (bytes)
|
|
global_name: Global filename (bytes)
|
|
dir_list: List of files in source directory
|
|
udest_path: Unicode destination path
|
|
logger: Logger instance
|
|
|
|
Returns:
|
|
Path to preferred file, or empty string if none found
|
|
"""
|
|
if book_type == BookType.EBOOK.value:
|
|
book_basename = os.path.join(dest_path, global_name)
|
|
book_basename_str = enforce_str(
|
|
make_unicode(book_basename)
|
|
) # Ensure string for f-string
|
|
ebook_extn_list = get_list(CONFIG["EBOOK_TYPE"])
|
|
for extn in ebook_extn_list:
|
|
preferred_type = f"{book_basename_str}.{extn}"
|
|
if path_exists(preferred_type):
|
|
logger.debug(f"Link to preferred type {extn}, {preferred_type}")
|
|
return preferred_type
|
|
return ""
|
|
|
|
if book_type == BookType.AUDIOBOOK.value:
|
|
firstfile = ""
|
|
tokmatch = ""
|
|
|
|
# First, look for a whole-book file (no numbers in filename)
|
|
for f in dir_list:
|
|
if not re.findall(r"\d+\b", f) and CONFIG.is_valid_booktype(
|
|
f, booktype=book_type
|
|
):
|
|
firstfile = os.path.join(udest_path, f)
|
|
tokmatch = "whole"
|
|
logger.debug(f"Found whole audiobook file: {f}")
|
|
break
|
|
|
|
# If no whole-book file, find first part by common numbering patterns
|
|
if not tokmatch:
|
|
for token in [" 001.", " 01.", " 1.", " 001 ", " 01 ", " 1 ", "001", "01"]:
|
|
if tokmatch:
|
|
break
|
|
for f in dir_list:
|
|
if CONFIG.is_valid_booktype(f, booktype=book_type):
|
|
if not firstfile:
|
|
firstfile = os.path.join(udest_path, f)
|
|
logger.debug(f"Primary link to {f}")
|
|
if token in f:
|
|
firstfile = os.path.join(udest_path, f)
|
|
logger.debug(f"Link to first part [{token}], {f}")
|
|
tokmatch = token
|
|
break
|
|
|
|
return firstfile
|
|
|
|
return ""
|
|
|
|
|
|
def _handle_magazine_comic_metadata(
|
|
book_type: str,
|
|
book_path: str,
|
|
book_file: str,
|
|
bookid: str,
|
|
issueid: str,
|
|
title: str,
|
|
issuedate: str,
|
|
mag_genres: str,
|
|
global_name,
|
|
udest_path: str,
|
|
logger,
|
|
):
|
|
"""
|
|
Create metadata files and .ll_ignore for magazines and comics.
|
|
|
|
Args:
|
|
book_type: Type (comic or magazine)
|
|
book_path: Path to book files
|
|
bookid: Book ID
|
|
issueid: Issue ID (for comics/magazines)
|
|
title: Magazine title
|
|
issuedate: Issue date
|
|
mag_genres: Magazine genres
|
|
global_name: Global filename
|
|
udest_path: Unicode destination path
|
|
logger: Logger instance
|
|
"""
|
|
# Create .ll_ignore file
|
|
try:
|
|
ignorefile = os.path.join(udest_path, ".ll_ignore")
|
|
with open(syspath(ignorefile), "w") as f:
|
|
f.write(book_type)
|
|
except (OSError, TypeError) as e:
|
|
logger.warning(f"Unable to create/write to ignorefile: {e!s}")
|
|
|
|
if book_type == BookType.COMIC.value:
|
|
cmd = (
|
|
"SELECT Title,comicissues.ComicID,IssueID,IssueAcquired,IssueFile,comicissues.Cover,"
|
|
"Publisher,Contributors from comics,comicissues WHERE "
|
|
"comics.ComicID = comicissues.ComicID and IssueID=? and comicissues.ComicID=?"
|
|
)
|
|
db = database.DBConnection()
|
|
try:
|
|
data = dict(db.match(cmd, (issueid, bookid)))
|
|
finally:
|
|
db.close()
|
|
|
|
bookid = f"{bookid}_{issueid}"
|
|
if data:
|
|
# process_img and create_comic_opf expect string for global_name
|
|
process_img(book_path, bookid, data["Cover"], global_name, ImageType.COMIC)
|
|
if CONFIG.get_bool("IMP_COMICOPF"):
|
|
_, _ = create_comic_opf(book_path, data, global_name, True)
|
|
else:
|
|
logger.debug("create_comic_opf is disabled")
|
|
else:
|
|
logger.debug(f"No data found for {bookid}")
|
|
|
|
elif CONFIG.get_bool("IMP_MAGOPF"):
|
|
db = database.DBConnection()
|
|
try:
|
|
entry = dict(
|
|
db.match(
|
|
"SELECT Language,Genre FROM magazines where Title=? COLLATE NOCASE",
|
|
(title,),
|
|
)
|
|
)
|
|
if entry:
|
|
_, _ = create_mag_opf(
|
|
book_file,
|
|
title,
|
|
issuedate,
|
|
issueid,
|
|
language=entry["Language"],
|
|
genres=mag_genres,
|
|
overwrite=True,
|
|
)
|
|
finally:
|
|
db.close()
|
|
else:
|
|
logger.debug("create_mag_opf is disabled")
|
|
|
|
|
|
def _process_destination(
|
|
book_metadata: BookMetadata,
|
|
book_path: str,
|
|
logger: logging.Logger,
|
|
mode: str = "",
|
|
preprocess: bool = True,
|
|
) -> "tuple[bool, str, str ]":
|
|
"""
|
|
Copy/move book/mag and associated files into target directory.
|
|
|
|
Args:
|
|
book_path: Source path containing the book files
|
|
book_metadata: BookMetadata object with all book information and destination paths
|
|
mode: Download mode (torrent, magnet, nzb, etc.) for copy/seeding logic
|
|
preprocess: Whether to run preprocessing steps
|
|
|
|
Returns:
|
|
Tuple of (success, full_path_to_book, book_path)
|
|
- success: True if successful, False otherwise
|
|
- full_path_to_book: Path to the processed book file
|
|
- book_path: Processing path (may have changed to .unpack)
|
|
"""
|
|
TELEMETRY.record_usage_data("Process/Destination")
|
|
|
|
# Extract commonly used fields from metadata
|
|
book_type = book_metadata.book_type
|
|
book_id = book_metadata.book_id
|
|
dest_path = book_metadata.dest_path
|
|
|
|
# Convert global_name to bytes for filesystem operations
|
|
# Create unicode version for use in os.path.join and function calls
|
|
global_name = enforce_bytes(book_metadata.global_name)
|
|
uglobal_name = enforce_str(global_name) # Convert bytes to string for joins
|
|
|
|
# Get type-specific fields from metadata
|
|
fields = book_metadata.get_processing_fields()
|
|
authorname = fields["authorname"]
|
|
bookname = fields["bookname"]
|
|
issueid = fields["issueid"]
|
|
title = fields["title"]
|
|
issuedate = fields["issuedate"]
|
|
mag_genres = fields["mag_genres"]
|
|
cover = fields["cover"]
|
|
|
|
book_path = enforce_str(make_unicode(book_path))
|
|
|
|
logger.info(
|
|
f"DESTINATION PROCESSING - Book type: {book_type}, Source: {book_path}, "
|
|
f"Destination: {dest_path}, Global name: {uglobal_name}"
|
|
)
|
|
|
|
dir_list = listdir(book_path)
|
|
best_format = ""
|
|
found_types = {}
|
|
single_ebook_type = book_type == BookType.EBOOK.value and CONFIG.get_bool(
|
|
"ONE_FORMAT"
|
|
)
|
|
if single_ebook_type:
|
|
ebook_extn_list = get_list(CONFIG["EBOOK_TYPE"])
|
|
best_format, found_types = _find_best_format(book_path, ebook_extn_list)
|
|
|
|
match = False
|
|
if best_format:
|
|
match = True
|
|
logger.debug(
|
|
f"One format import, found {','.join(found_types)}, best match {best_format}"
|
|
)
|
|
else: # mag, comic or audiobook or multi-format book
|
|
for _fname in dir_list:
|
|
fname = enforce_str(_fname)
|
|
if CONFIG.is_valid_booktype(fname, booktype=book_type):
|
|
match = True
|
|
break
|
|
|
|
if not match:
|
|
# no book/mag found in a format we wanted. Leave for the user to delete or convert manually
|
|
return (
|
|
False,
|
|
f"Unable to locate a valid filetype ({book_type}) in {book_path}, leaving for manual processing",
|
|
book_path,
|
|
)
|
|
|
|
_, path_extn = _tokenize_file(book_path)
|
|
if path_extn != "unpack" and (
|
|
CONFIG.get_bool("DESTINATION_COPY")
|
|
or (
|
|
mode in ["torrent", "magnet", "torznab"] and CONFIG.get_bool("KEEP_SEEDING")
|
|
)
|
|
):
|
|
dest_dir = f"{book_path}.unpack"
|
|
logger.debug(f"Copying to target {dest_dir}")
|
|
failed, err = copy_tree(book_path, dest_dir)
|
|
if not failed:
|
|
book_path = dest_dir
|
|
else:
|
|
msg = f"Failed to copy {failed} files to {dest_dir}, aborted"
|
|
logger.error(msg)
|
|
logger.debug(f"{err}")
|
|
return False, msg, ""
|
|
|
|
if preprocess:
|
|
logger.debug(f"PreProcess ({book_type}) {book_path}")
|
|
if book_type == BookType.EBOOK.value:
|
|
preprocess_ebook(book_path)
|
|
elif book_type == BookType.AUDIOBOOK.value:
|
|
preprocess_audio(book_path, book_id, authorname, bookname)
|
|
elif book_type == BookType.MAGAZINE.value:
|
|
# Use metadata fields instead of querying database again
|
|
success, msg = preprocess_magazine(
|
|
book_path,
|
|
cover=cover,
|
|
tag=CONFIG.get_bool("TAG_PDF"),
|
|
title=book_id,
|
|
issue=issuedate,
|
|
genres=mag_genres,
|
|
)
|
|
if not success:
|
|
return False, msg, book_path
|
|
|
|
# run custom pre-processing, for example remove unwanted formats
|
|
# or force format conversion before sending to calibre
|
|
if len(CONFIG["EXT_PREPROCESS"]):
|
|
logger.debug(
|
|
f"Running external PreProcessor: {book_type} {book_path} {authorname} {bookname}"
|
|
)
|
|
params = [
|
|
CONFIG["EXT_PREPROCESS"],
|
|
book_type,
|
|
book_path,
|
|
authorname,
|
|
bookname,
|
|
]
|
|
rc, res, err = run_script(params)
|
|
if rc:
|
|
return (
|
|
False,
|
|
f"PreProcessor returned {rc}: res[{res}] err[{err}]",
|
|
book_path,
|
|
)
|
|
logger.debug(f"PreProcessor: {res}", book_path)
|
|
|
|
if single_ebook_type:
|
|
ebook_extn_list = get_list(CONFIG["EBOOK_TYPE"])
|
|
best_format, found_types = _find_best_format(book_path, ebook_extn_list)
|
|
logger.debug(
|
|
f"After PreProcessing, found {','.join(found_types)}, best match {best_format}"
|
|
)
|
|
|
|
# If ebook, magazine or comic, do we want calibre to import it for us
|
|
newbookfile = ""
|
|
if _should_use_calibre(book_type):
|
|
# Build data dict for Calibre from metadata
|
|
data = book_metadata.get_opf_data()
|
|
data["bestformat"] = best_format
|
|
data["cover"] = cover
|
|
data["mag_genres"] = mag_genres
|
|
return send_to_calibre(book_type, uglobal_name, book_path, data)
|
|
|
|
# we are copying the files ourselves
|
|
success, error_msg, dest_path = _prepare_destination_directory(dest_path, logger)
|
|
if not success:
|
|
logger.error(f"FAIL: Cannot prepare destination: {error_msg}")
|
|
return False, error_msg, book_path
|
|
|
|
# dest_path is bytes - create unicode version for string operations
|
|
udest_path = enforce_str(dest_path) # Convert bytes to string for os.path.join
|
|
|
|
dir_list = listdir(book_path) # Refresh our directory listing
|
|
copied_count = 0
|
|
|
|
# ok, we've got a target directory, try to copy only the files we want, renaming them on the fly.
|
|
logger.info(f"COPY FILES STARTING - {book_path} ==> {udest_path}")
|
|
for _fname in dir_list:
|
|
fname = enforce_str(_fname)
|
|
if not _should_copy_file(fname, best_format, book_type):
|
|
logger.debug(f"Skip: {fname}")
|
|
continue
|
|
|
|
srcfile = os.path.join(book_path, fname)
|
|
destfile = _get_dest_filename(fname, uglobal_name, book_type, udest_path)
|
|
|
|
try:
|
|
destfile = safe_copy(srcfile, destfile)
|
|
setperm(destfile)
|
|
copied_count += 1
|
|
logger.info(f"File copied to {destfile}")
|
|
if destfile and CONFIG.is_valid_booktype(
|
|
enforce_str(make_unicode(destfile)), booktype=book_type
|
|
):
|
|
newbookfile = destfile
|
|
except Exception as why:
|
|
# extra debugging to see if we can figure out a windows encoding issue
|
|
parent = os.path.dirname(destfile)
|
|
try:
|
|
with open(
|
|
syspath(os.path.join(parent, "ll_temp")),
|
|
"w",
|
|
encoding="utf-8",
|
|
) as f:
|
|
f.write("test")
|
|
remove_file(os.path.join(parent, "ll_temp"))
|
|
except Exception as w:
|
|
logger.error(f"Destination Directory [{parent}] is not writeable: {w}")
|
|
return (
|
|
False,
|
|
f"Unable to copy file {srcfile} to {destfile}: {type(why).__name__} {why!s}",
|
|
book_path,
|
|
)
|
|
|
|
logger.info(f"COPY FILES COMPLETE - Files copied: {copied_count} to {udest_path}")
|
|
|
|
if book_type in [BookType.EBOOK.value, BookType.AUDIOBOOK.value]:
|
|
# Use metadata we already have instead of querying database again
|
|
# Use isinstance for type narrowing (pyright requires this)
|
|
if isinstance(book_metadata, EbookMetadata):
|
|
process_img(
|
|
book_path,
|
|
book_id,
|
|
book_metadata.book_img,
|
|
uglobal_name,
|
|
ImageType.BOOK,
|
|
)
|
|
opf_data = book_metadata.get_opf_data()
|
|
_ = create_opf(book_path, opf_data, uglobal_name, True)
|
|
|
|
# try to keep track of "preferred" ebook type or the first part of multipart audiobooks
|
|
# Find the preferred file to use as the main book file
|
|
firstfile = _find_preferred_book_file(
|
|
book_type, dest_path, global_name, dir_list, udest_path, logger
|
|
)
|
|
|
|
if firstfile:
|
|
newbookfile = firstfile
|
|
logger.info(f"Primary book file: {newbookfile}")
|
|
|
|
# Handle magazine/comic metadata creation
|
|
elif book_type in [BookType.MAGAZINE.value, BookType.COMIC.value]:
|
|
_handle_magazine_comic_metadata(
|
|
book_type,
|
|
book_path,
|
|
newbookfile,
|
|
book_id,
|
|
issueid,
|
|
title,
|
|
issuedate,
|
|
mag_genres,
|
|
uglobal_name, # Use unicode version for process_img
|
|
udest_path,
|
|
logger,
|
|
)
|
|
|
|
logger.info(f"DESTINATION PROCESSING COMPLETE - {uglobal_name}")
|
|
return True, newbookfile, book_path
|
|
|
|
|
|
def _process_auto_add(src_path: str, book_type_enum: BookType = BookType.EBOOK):
|
|
# Called to copy/move the book files to an auto add directory for the likes of Calibre which can't do nested dirs
|
|
logger = logging.getLogger(__name__)
|
|
autoadddir = CONFIG["IMP_AUTOADD"]
|
|
savefiles = CONFIG.get_bool("IMP_AUTOADD_COPY")
|
|
|
|
book_type_str = book_type_enum.value
|
|
if book_type_enum == BookType.MAGAZINE:
|
|
autoadddir = CONFIG["IMP_AUTOADDMAG"]
|
|
savefiles = CONFIG.get_bool("IMP_AUTOADDMAG_COPY")
|
|
|
|
if not path_exists(autoadddir):
|
|
logger.error(
|
|
f"AutoAdd directory for {book_type_str} [{autoadddir}] is missing or not set - cannot perform autoadd"
|
|
)
|
|
return False
|
|
TELEMETRY.record_usage_data("Process/Autoadd")
|
|
# Now try and copy all the book files into a single dir.
|
|
try:
|
|
names = listdir(src_path)
|
|
# files jpg, opf & book(s) should have same name
|
|
# Caution - book may be pdf, mobi, epub or all 3.
|
|
# for now simply copy all files, and let the autoadder sort it out
|
|
#
|
|
# Update - seems Calibre will only use the jpeg if named same as book, not cover.jpg
|
|
# and only imports one format of each ebook, treats the others as duplicates, might be configable in calibre?
|
|
# ignores author/title data in opf file if there is any embedded in book
|
|
|
|
match = False
|
|
if book_type_enum == BookType.EBOOK and CONFIG.get_bool("ONE_FORMAT"):
|
|
booktype_list = get_list(CONFIG["EBOOK_TYPE"])
|
|
for bktype in booktype_list:
|
|
while not match:
|
|
for _name in names:
|
|
name = enforce_str(_name)
|
|
extn = os.path.splitext(name)[1].lstrip(".")
|
|
if extn and extn.lower() == bktype:
|
|
match = bktype
|
|
break
|
|
copied = False
|
|
for _name in names:
|
|
name = enforce_str(_name)
|
|
valid_type = CONFIG.is_valid_booktype(name, book_type_str)
|
|
if match and valid_type and not name.endswith(match):
|
|
logger.debug(f"Skipping book format {os.path.splitext(name)[1]}")
|
|
elif (
|
|
book_type_enum == BookType.EBOOK
|
|
and CONFIG.get_bool("IMP_AUTOADD_BOOKONLY")
|
|
and not valid_type
|
|
):
|
|
logger.debug(f"Skipping non-book {name}")
|
|
elif (
|
|
book_type_enum == BookType.MAGAZINE
|
|
and CONFIG.get_bool("IMP_AUTOADD_MAGONLY")
|
|
and not valid_type
|
|
):
|
|
logger.debug(f"Skipping non-mag {name}")
|
|
else:
|
|
logger.debug(
|
|
f"booktype [{book_type_str}] bookonly [{CONFIG.get_bool('IMP_AUTOADD_BOOKONLY')}] "
|
|
f"validtype [{valid_type}]"
|
|
)
|
|
srcname = os.path.join(src_path, name)
|
|
dstname = os.path.join(autoadddir, name)
|
|
try:
|
|
if savefiles:
|
|
logger.debug(
|
|
f"AutoAdd Copying file [{name}] from [{srcname}] to [{dstname}]"
|
|
)
|
|
dstname = safe_copy(srcname, dstname)
|
|
else:
|
|
logger.debug(
|
|
f"AutoAdd Moving file [{name}] from [{srcname}] to [{dstname}]"
|
|
)
|
|
dstname = safe_move(srcname, dstname)
|
|
copied = True
|
|
except Exception as why:
|
|
logger.error(
|
|
f"AutoAdd - Failed to copy/move file [{name}] {type(why).__name__} [{why!s}] "
|
|
)
|
|
return False
|
|
try:
|
|
os.chmod(syspath(dstname), 0o666) # make rw for calibre
|
|
except OSError as why:
|
|
logger.warning(
|
|
f"Could not set permission of {dstname} because [{why.strerror}]"
|
|
)
|
|
# permissions might not be fatal, continue
|
|
|
|
if copied and not savefiles: # do we want to keep the library files?
|
|
logger.debug(f"Removing {src_path}")
|
|
shutil.rmtree(src_path)
|
|
|
|
except OSError as why:
|
|
logger.error(f"AutoAdd - Failed because [{why.strerror}]")
|
|
return False
|
|
|
|
logger.info(f"Auto Add completed for [{src_path}]")
|
|
return True
|