LazyLibrarian/lazylibrarian/librarysync.py
2026-02-05 15:16:28 +01:00

1479 lines
79 KiB
Python

# This file is part of Lazylibrarian.
# Lazylibrarian is free software : you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# Lazylibrarian is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with Lazylibrarian. If not, see <http://www.gnu.org/licenses/>.
# Purpose:
# Look up book metadata or information, find it in the DB or add from dir
import contextlib
import logging
import os
import re
import shutil
import time
import traceback
import zipfile
from xml.etree import ElementTree
from rapidfuzz import fuzz
import lazylibrarian
from lazylibrarian import ROLE, database
from lazylibrarian.bookrename import audio_rename, book_rename, delete_empty_folders, id3read
from lazylibrarian.cache import ImageType, cache_img
from lazylibrarian.config2 import CONFIG
from lazylibrarian.filesystem import (
DIRS,
any_file,
book_file,
get_directory,
listdir,
opf_file,
path_exists,
path_isdir,
path_isfile,
splitext,
)
from lazylibrarian.formatter import (
get_list,
is_valid_isbn,
make_unicode,
now,
plural,
replace_all,
split_author_names,
split_title,
strip_quotes,
thread_name,
unaccented,
)
from lazylibrarian.images import img_id
from lazylibrarian.importer import (
add_author_name_to_db,
collate_nopunctuation,
search_for,
title_translates,
update_totals,
)
from lazylibrarian.preprocessor import preprocess_audio
from lib.mobi import Mobi
# noinspection PyBroadException
def get_book_meta(fdir, reason="get_book_meta"):
# look for a bookid in a LL.() filename or a .desktop file and return author/title/bookid
logger = logging.getLogger(__name__)
libsynclogger = logging.getLogger('special.libsync')
bookid = ''
reason = f"{reason} [{fdir}]"
libsynclogger.debug(reason)
try:
for item in listdir(fdir):
libsynclogger.debug(f"Checking [{item}]")
if 'LL.(' in item:
bookid = item.split('LL.(')[1].split(')')[0]
if bookid:
logger.debug(f"bookid {bookid} from {item}")
break
if item.endswith(('.desktop', '.url')):
with open(os.path.join(fdir, item)) as f:
try:
lynes = f.readlines()
except Exception as e:
logger.debug(f"Unable to readlines from {item}")
logger.debug(str(e))
lynes = []
for lyne in lynes:
if '/book/show/' in lyne:
bookid = lyne.split('/book/show/')[1].split('-')[0].split('.')[0]
if bookid:
logger.debug(f"bookid {bookid} from {item}")
break
if bookid:
db = database.DBConnection()
cmd = ("SELECT AuthorName,BookName FROM authors,books where authors.AuthorID = books.AuthorID and "
"books.BookID=?")
existing_book = db.match(cmd, (bookid,))
if not existing_book:
logger.debug(f"Searching {CONFIG['BOOK_API']} for {bookid}")
this_source = lazylibrarian.INFOSOURCES[CONFIG['BOOK_API']]
api = this_source['api']
api = api()
_ = api.add_bookid_to_db(bookid, None, None, reason)
existing_book = db.match(cmd, (bookid,))
db.close()
if existing_book:
logger.debug(f"Found {existing_book['AuthorName']}:{existing_book['BookName']} for {bookid}")
return existing_book['AuthorName'], existing_book['BookName'], str(bookid)
logger.debug(f"No match for {bookid} at {CONFIG['BOOK_API']}")
return "", "", ""
except Exception:
logger.error(f'Error getting book meta: {traceback.format_exc()}')
return "", "", ""
def get_book_info(fname):
# only handles epub, mobi, azw3 and opf for now,
# for pdf see notes below
logger = logging.getLogger(__name__)
fname = make_unicode(fname)
res = {}
extn = splitext(fname)[1]
if not extn:
return res
res['type'] = extn[1:].lower()
if res['type'] in ["mobi", "azw3"]:
try:
book = Mobi(fname)
book.parse()
except Exception as e:
logger.error(f'Unable to parse mobi in {fname}, {type(e).__name__} {str(e)}')
return res
res['creator'] = make_unicode(book.author())
res['title'] = make_unicode(book.title())
res['language'] = make_unicode(book.language())
res['isbn'] = make_unicode(book.isbn())
return res
# noinspection PyUnreachableCode
"""
# none of the pdfs in my library had language,isbn
# most didn't have author, or had the wrong author
# (author set to publisher, or software used)
# so probably not much point in looking at pdfs
#
from PyPDF2 import PdfFileReader
if (extn == ".pdf"):
pdf = PdfFileReader(open(fname, "rb"))
txt = pdf.getDocumentInfo()
# repackage the data here to get components we need
res = {}
for s in ['title','language','creator']:
res[s] = txt[s]
res['identifier'] = txt['isbn']
res['type'] = "pdf"
return res
"""
if res['type'] == "epub":
# prepare to read from the .epub file
try:
zipdata = zipfile.ZipFile(fname)
except Exception as e:
logger.error(f'Unable to parse epub file {fname}, {type(e).__name__} {str(e)}')
return res
# find the contents metafile
txt = zipdata.read('META-INF/container.xml')
try:
tree = ElementTree.fromstring(txt)
except Exception as e:
logger.error(f"Error parsing metadata from epub zipfile: {type(e).__name__} {str(e)}")
return res
n = 0
cfname = ""
if not len(tree):
return res
while n < len(tree[0]):
att = tree[0][n].attrib
if 'full-path' in att:
cfname = att['full-path']
break
n += 1
# grab the metadata block from the contents metafile
txt = zipdata.read(cfname)
elif res['type'] == "opf":
with open(fname, 'rb') as f:
txt = f.read()
txt = make_unicode(txt)
# sanitize any unmatched html tags or ElementTree won't parse
dic = {'<br>': '', '</br>': ''}
txt = replace_all(txt, dic)
else:
logger.error(f'Unhandled extension in get_book_info: {extn}')
return res
# repackage epub or opf metadata
try:
tree = ElementTree.fromstring(txt)
except Exception as e:
logger.error(f"Error parsing metadata from {fname}, {type(e).__name__} {str(e)}")
return res
if not len(tree):
return res
n = 0
authors = []
while n < len(tree[0]):
tag = str(tree[0][n].tag).lower()
if '}' in tag:
tag = tag.split('}')[1]
txt = tree[0][n].text
attrib = tree[0][n].attrib
txt = make_unicode(txt)
if 'title' in tag:
if not res.get('title') or attrib.get('id') == 'maintitle':
res['title'] = txt
elif 'language' in tag:
res['language'] = txt
elif 'publisher' in tag:
res['publisher'] = txt
elif 'narrator' in tag:
res['narrator'] = txt
elif 'creator' in tag and txt:
authors.append(txt)
elif 'identifier' in tag:
for k in attrib.keys():
if k.endswith('scheme'): # can be "scheme" or "http://www.idpf.org/2007/opf:scheme"
if attrib[k] == 'ISBN' and is_valid_isbn(txt):
res['isbn'] = txt
elif attrib[k] == 'GOODREADS':
res['gr_id'] = txt
elif attrib[k] == 'OPENLIBRARY':
res['ol_id'] = txt
elif attrib[k] == 'HARDCOVER':
res['hc_id'] = txt
elif attrib[k] == 'GOOGLE':
res['gb_id'] = txt
elif attrib[k] == 'DNB':
res['dnb_id'] = txt
n += 1
if len(authors):
res['creator'] = authors[0]
res['authors'] = authors
return res
def find_book_in_db(author, book, ignored=None, library='eBook', reason='find_book_in_db', source=''):
# Fuzzy search for book in library, return LL bookid and status if found or zero
# prefer an exact match on author & book
# prefer 'Have' if the user has marked the one they want
# or one already marked 'Open' so we match the same one as before
# or prefer not ignored over ignored
logger = logging.getLogger(__name__)
fuzzlogger = logging.getLogger('special.fuzz')
book = book.replace('\n', ' ')
book = " ".join(book.split())
author = " ".join(author.split())
logger.debug(f'Searching database for [{book}] by [{author}] {source}')
db = database.DBConnection()
db.connection.create_collation('nopunctuation', collate_nopunctuation)
new_author = False
try:
check_exist_author = db.match('SELECT AuthorID FROM authors where AuthorName=? COLLATE NOCASE', (author,))
if check_exist_author:
authorid = check_exist_author['AuthorID']
else:
newauthorname, authorid, new_author = add_author_name_to_db(author, False, reason=reason, title=book)
if newauthorname and newauthorname != author:
if new_author:
logger.debug(f"Authorname changed from [{author}] to [{newauthorname}]")
else:
logger.debug(f"Authorname changed from [{author}] to existing [{newauthorname}]")
author = make_unicode(newauthorname)
if not newauthorname:
authorid = 0
if not authorid:
logger.warning(f"Author [{author}] not recognised")
return 0, ''
cmd = ("SELECT BookID,books.Status,AudioStatus FROM books,authors where books.AuthorID = authors.AuthorID and "
"authors.AuthorID=? and BookName=? COLLATE NOPUNCTUATION")
if source:
cmd += f' and books.{source} = BookID'
res = db.select(cmd, (authorid, book))
whichstatus = 'Status' if library == 'eBook' else 'AudioStatus'
fuzzlogger.debug(f"Found {len(res)} exact match")
for item in res:
fuzzlogger.debug(f"{book} [{item[whichstatus]}]")
match = None
for item in res:
if item[whichstatus] == 'Have':
match = item
break
if not match:
for item in res:
if item[whichstatus] == 'Open':
match = item
break
if not match:
for item in res:
if item[whichstatus] != 'Ignored':
match = item
break
if not match:
for item in res:
if item[whichstatus] == 'Ignored':
match = item
break
if match:
logger.debug(f"Exact match [{book}] {match['BookID']}")
return match['BookID'], match
# Try a more complex fuzzy match against each book in the db by this author
cmd = ("SELECT BookID,BookName,BookSub,BookISBN,books.Status,AudioStatus FROM books,authors where "
"books.AuthorID = authors.AuthorID ")
if source:
cmd += f' and books.{source} = BookID '
ign = ''
if library == 'eBook':
if ignored is True:
cmd += "and books.Status = 'Ignored' "
ign = 'ignored '
elif ignored is False:
cmd += "and books.Status != 'Ignored' "
else:
if ignored is True:
cmd += "and AudioStatus = 'Ignored' "
ign = 'ignored '
elif ignored is False:
cmd += "and AudioStatus != 'Ignored' "
cmd += "and authors.AuthorID=?"
books = db.select(cmd, (authorid,))
if not len(books):
logger.warning(f"No matching titles by {authorid}:{author} in database "
f"(source={source},library={library},ignored={ignored})")
return 0, ''
fuzzlogger.debug(cmd)
best_ratio = 0.0
best_partial = 0.0
best_partname = 0.0
have_prefix = False
ratio_name = ""
partial_name = ""
partname_name = ""
prefix_name = ""
ratio_id = 0
partial_id = 0
partname_id = 0
prefix_id = 0
partname = 0
best_type = ''
partial_type = ''
partname_type = ''
prefix_type = ''
book_lower = unaccented(book.lower(), only_ascii=False)
book_lower = strip_quotes(book_lower)
# source is the book_key eg hc_id, dnb_id
# from this we need to see if the source provides subtitles
has_subtitles = []
for item in lazylibrarian.INFOSOURCES.keys():
this_source = lazylibrarian.INFOSOURCES[item]
if this_source['has_subs']:
has_subtitles.append(this_source['book_key'])
if source in has_subtitles:
book_partname = ''
book_sub = ''
else:
book_partname, book_sub, _ = split_title(author, book_lower)
# We want to match a book on disk with a subtitle to a shorter book in the DB
# - Strict prefix match with a : followed by junk is allowed
# - Strict prefix match with a ()ed remainder is allowed
# But the leading : is removed by has_clean_subtitle, so we allow all non (): subtitles
has_clean_subtitle = re.search(r"^\s+([^:()]+|\([^)]+\))$", book_sub) is not None
logger.debug(f"Searching {len(books)} {ign}{plural(len(books), 'book')} by "
f"[{authorid}:{author}] in database for [{book}]")
if book_partname == book_lower:
book_partname = ''
fuzzlogger.debug(f'book partname [{book_partname}] book_sub [{book_sub}]')
for a_book in books:
a_bookname = a_book['BookName']
if a_book['BookSub'] and book_sub:
a_bookname += f" {a_book['BookSub']}"
fuzzlogger.debug(f"Checking [{a_bookname}]")
# tidy up everything to raise fuzziness scores
# still need to lowercase for matching against partial_name later on
a_book_lower = unaccented(a_bookname.lower(), only_ascii=False)
a_book_lower = strip_quotes(a_book_lower)
for entry in title_translates:
if entry[0] in a_book_lower and entry[0] not in book_lower and entry[1] in book_lower:
a_book_lower = a_book_lower.replace(entry[0], entry[1])
if entry[1] in a_book_lower and entry[1] not in book_lower and entry[0] in book_lower:
a_book_lower = a_book_lower.replace(entry[1], entry[0])
#
# token sort ratio allows "Lord Of The Rings, The" to match "The Lord Of The Rings"
ratio = fuzz.token_sort_ratio(book_lower, a_book_lower)
# partial ratio allows "Lord Of The Rings" to match "The Lord Of The Rings"
partial = fuzz.partial_ratio(book_lower, a_book_lower)
if book_partname:
# partname allows "Lord Of The Rings (illustrated edition)" to match "The Lord Of The Rings"
partname = fuzz.partial_ratio(book_partname, a_book_lower)
# lose points if the difference is just numbers so we don't match "book 2" and "book 3"
# eg "He Who Fights With Monsters #7" is not the same as "He Who Fights With Monsters 05"
# AND "He Who Fights With Monsters" is not the same as "He Who Fights With Monsters #2"
# BUT The Secret of Secrets (Robert Langdon, #6) IS the same as The Secret of Secrets
set1 = set(book_lower.split())
set2 = set(a_book_lower.split())
differences = set1.symmetric_difference(set2)
numbers = []
for word in differences:
# see if word coerces to an integer or a float
word = word.replace('-', '') # merge ranges so books 1-3 is different to books 1-5
try:
numbers.append(float(re.findall(r'\d+\.\d+', word)[0]))
except IndexError:
with contextlib.suppress(IndexError):
numbers.append(int(re.findall(r'\d+', word)[0]))
if len(numbers) == 2 and numbers[0] != numbers[1]:
# make sure we are below match threshold
if ratio >= CONFIG.get_int('NAME_RATIO'):
ratio = CONFIG.get_int('NAME_RATIO') - 5
if partial >= CONFIG.get_int('NAME_PARTIAL'):
partial = CONFIG.get_int('NAME_PARTIAL') - 5
if partname >= CONFIG.get_int('NAME_PARTNAME'):
partname = CONFIG.get_int('NAME_PARTNAME') - 5
fuzzlogger.debug(f"Downgraded ratios as different numbers {numbers}")
if len(numbers) == 1:
# If the number is in braces and other words are in the braces,
# looks like it's series details eg (Robert Langdon, #6)
# and the comparison book doesn't include series details
ok = False
try:
series_details = book_lower.split('(')[1].split(')')[0].split()
for word in series_details:
word = word.replace('-', '')
try:
n = float(re.findall(r'\d+\.\d+', word)[0])
except IndexError:
try:
n = int(re.findall(r'\d+', word)[0])
except IndexError:
n = None
if n is not None and n == numbers[0]:
ok = True # found the number
break
if len(series_details) == 1:
# don't accept only number, eg (#6)
ok = False
if numbers[0] == 1:
# unless first book in series, so
# He Who Fights With Monsters (He Who Fights With Monsters #1)
# and He Who Fights With Monsters (#1)
# and He Who Fights With Monsters
# are all the same book
ok = True
except IndexError:
ok = False
if not ok:
# make sure we are below match threshold
if ratio >= CONFIG.get_int('NAME_RATIO'):
ratio = CONFIG.get_int('NAME_RATIO') - 5
if partial >= CONFIG.get_int('NAME_PARTIAL'):
partial = CONFIG.get_int('NAME_PARTIAL') - 5
if partname >= CONFIG.get_int('NAME_PARTNAME'):
partname = CONFIG.get_int('NAME_PARTNAME') - 5
fuzzlogger.debug(f"Downgraded ratios as one has number {numbers}")
# lose a point for each extra word in the fuzzy matches so we get the closest match
# this should also stop us matching single books against omnibus editions
title_length = len(get_list(a_book_lower))
words = len(get_list(book_lower))
words -= title_length
ratio -= abs(words)
partial -= abs(words)
if title_length <= 2: # very short titles can't use partial match, too many false positives
fuzzlogger.debug(f"Not using partial ratio as short title [{title_length}]")
partial = CONFIG.get_int('NAME_PARTIAL') - 5
# don't subtract extra words from partname so we can compare books with/without subtitle
# partname -= abs(words)
def isitbest(aratio, abest_ratio, aratio_name, abest_type, astatus, booklower, abookname):
use_it = False
if aratio > abest_ratio:
use_it = True
elif aratio == abest_ratio:
use_it = astatus == 'Have'
if not use_it:
want_words = get_list(booklower)
best_words = get_list(aratio_name.lower())
new_words = get_list(abookname.lower())
best_cnt = 0
new_cnt = 0
for wrd in want_words:
if wrd in best_words:
best_cnt += 1
if wrd in new_words:
new_cnt += 1
if new_cnt > best_cnt:
use_it = True
if not use_it and abest_type == 'Ignored':
use_it = astatus != 'Ignored'
return use_it
if isitbest(ratio, best_ratio, ratio_name, best_type, a_book[whichstatus], book_lower, a_bookname):
best_ratio = ratio
best_type = a_book[whichstatus]
ratio_name = a_book['BookName']
ratio_id = a_book['BookID']
if isitbest(partial, best_partial, partial_name, partial_type, a_book[whichstatus], book_lower, a_bookname):
best_partial = partial
partial_type = a_book[whichstatus]
partial_name = a_book['BookName']
partial_id = a_book['BookID']
if isitbest(partname, best_partname, partname_name, partname_type, a_book[whichstatus],
book_lower, a_bookname):
best_partname = partname
partname_type = a_book[whichstatus]
partname_name = a_book['BookName']
partname_id = a_book['BookID']
if a_book_lower == book_partname and has_clean_subtitle:
have_prefix = True
prefix_type = a_book[whichstatus]
prefix_name = a_book['BookName']
prefix_id = a_book['BookID']
if best_ratio >= CONFIG.get_int('NAME_RATIO'):
logger.debug(f"Fuzz match ratio [{round(best_ratio, 2)}] [{book}] [{ratio_name}] {ratio_id}")
return ratio_id, best_type
if best_partial >= CONFIG.get_int('NAME_PARTIAL'):
logger.debug(f"Fuzz match partial [{round(best_partial, 2)}] [{book}] [{partial_name}] {partial_id}")
return partial_id, partial_type
if best_partname >= CONFIG.get_int('NAME_PARTNAME'):
logger.debug(f"Fuzz match partname [{round(best_partname, 2)}] [{book}] [{partname_name}] {partname_id}")
return partname_id, partname_type
if have_prefix:
logger.debug(f"Fuzz match prefix [{book}] [{prefix_name}] {prefix_id}")
return prefix_id, prefix_type
if books:
logger.debug(
f'Best fuzz results [{author} - {book}] ratio [{round(best_ratio, 2)},{ratio_name},{ratio_id}], '
f'partial [{round(best_partial, 2)},{partial_name},{partial_id}], '
f'partname [{round(best_partname, 2)},{partname_name},{partname_id}]')
if new_author:
# we auto-added a new author but they don't have the book so we should remove them again
db.action('DELETE from authors WHERE AuthorID=?', (authorid,))
db.close()
except Exception as e:
logger.error(str(e))
db.close()
return 0, ''
def library_scan(startdir=None, library='eBook', authid=None, remove=True):
""" Scan a directory tree adding new books into database
Return how many books you added """
logger = logging.getLogger(__name__)
libsynclogger = logging.getLogger('special.libsync')
matchinglogger = logging.getLogger('special.matching')
destdir = get_directory(library)
if not startdir:
if not destdir:
logger.warning(f'Cannot find destination directory: {destdir}. Not scanning')
return 0
startdir = destdir
if not path_isdir(startdir):
logger.warning(f'Cannot find directory: {startdir}. Not scanning')
return 0
db = database.DBConnection()
db.upsert("jobs", {"Start": time.time()}, {"Name": thread_name()})
if startdir == destdir:
lazylibrarian.AUTHORS_UPDATE = 1
logger.debug(f"Counting directories: {startdir}")
dir_cnt = 0
for rootdir, dirnames, _filenames in os.walk(startdir):
for directory in dirnames:
lazylibrarian.libraryscan_data = f"Counting directories: {dir_cnt}"
# prevent magazine being scanned
c = directory[0]
ignorefile = '.ll_ignore'
dirname = os.path.join(rootdir, directory)
if c in ["_", "."]:
logger.debug(f'Skipping {dirname}')
# ignore directories containing this special file
elif path_exists(os.path.join(rootdir, directory, ignorefile)):
logger.debug(f'Found .ll_ignore file in {dirname}')
elif (book_file(dirname, booktype='ebook', config=CONFIG, recurse=False) or
book_file(dirname, booktype='audiobook', config=CONFIG, recurse=False)):
dir_cnt += 1
msg = f"Found {dir_cnt} directories"
logger.debug(msg)
lazylibrarian.libraryscan_data = msg
processed_subdirectories = []
rehit = []
remiss = []
# noinspection PyBroadException
try:
# keep statistics of full library scans
if startdir == destdir:
lazylibrarian.AUTHORS_UPDATE = 1
if library == 'eBook':
lazylibrarian.EBOOK_UPDATE = 1
elif library == 'AudioBook':
lazylibrarian.AUDIO_UPDATE = 1
db.action('DELETE from stats')
try: # remove any extra whitespace in authornames
authors = db.select("SELECT AuthorID,AuthorName FROM authors WHERE instr(AuthorName, ' ') > 0")
if authors:
logger.info(f"Removing extra spaces from {len(authors)} {plural(len(authors), 'authorname')}")
for author in authors:
authorid = author["AuthorID"]
authorname = ' '.join(author['AuthorName'].split())
# Have we got author name both with-and-without extra spaces? If so, merge them
duplicate = db.match(
'Select AuthorID,AuthorName FROM authors WHERE AuthorName=?', (authorname,))
if duplicate:
db.action('DELETE from authors where authorname=?', (author['AuthorName'],))
if author['AuthorID'] != duplicate['AuthorID']:
db.action('UPDATE books set AuthorID=? WHERE AuthorID=?',
(duplicate['AuthorID'], author['AuthorID']))
else:
db.action('UPDATE authors set AuthorName=? WHERE AuthorID=?', (authorname, authorid))
except Exception as e:
logger.error(f'{type(e).__name__} {str(e)}')
else:
if authid:
match = db.match('SELECT authorid from authors where authorid=?', (authid,))
if match:
control_value_dict = {"AuthorID": authid}
new_value_dict = {"Status": "Loading"}
db.upsert("authors", new_value_dict, control_value_dict)
logger.info(f'Scanning {library} directory: {startdir}')
new_book_count = 0
modified_count = 0
rescan_count = 0
rescan_hits = 0
file_count = 0
# allow full_scan override so we can scan in alternate directories without deleting others
if remove:
if library == 'eBook':
cmd = ("select AuthorName, BookName, BookFile, BookID from books,authors where BookLibrary "
"is not null and books.AuthorID = authors.AuthorID")
if startdir != destdir:
cmd += f" and instr(BookFile, '{startdir}') = 1"
books = db.select(cmd)
status = CONFIG['NOTFOUND_STATUS']
logger.info(f'Missing eBooks will be marked as {status}')
for book in books:
bookfile = book['BookFile']
if bookfile and not path_isfile(bookfile):
db.action("update books set Status=?,BookFile='',BookLibrary='' where BookID=?",
(status, book['BookID']))
logger.warning(f"eBook {book['AuthorName']} - {book['BookName']} updated as not found on disk")
else: # library == 'AudioBook':
cmd = ("select AuthorName, BookName, AudioFile, BookID from books,authors where AudioLibrary "
"is not null and books.AuthorID = authors.AuthorID")
if startdir != destdir:
cmd += f" and instr(AudioFile, '{startdir}') = 1"
books = db.select(cmd)
status = CONFIG['NOTFOUND_STATUS']
logger.info(f'Missing AudioBooks will be marked as {status}')
for book in books:
bookfile = book['AudioFile']
if bookfile and not path_isfile(bookfile):
db.action("update books set AudioStatus=?,AudioFile='',AudioLibrary='' where BookID=?",
(status, book['BookID']))
logger.warning(
f"Audiobook {book['AuthorName']} - {book['BookName']} updated as not found on disk")
# to save repeat-scans of the same directory if it contains multiple formats of the same book,
# keep track of which directories we've already looked at
warned_no_new_authors = False # only warn about the setting once
booktypes = ''
count = -1
if library == 'eBook':
booktype_list = get_list(CONFIG['EBOOK_TYPE'])
for book_type in booktype_list:
count += 1
if count == 0:
booktypes = book_type
else:
booktypes = f"{booktypes}|{book_type}"
matchto = CONFIG['EBOOK_DEST_FILE']
else:
booktype_list = get_list(CONFIG['AUDIOBOOK_TYPE'])
for book_type in booktype_list:
count += 1
if count == 0:
booktypes = book_type
else:
booktypes = f"{booktypes}|{book_type}"
matchto = CONFIG['AUDIOBOOK_DEST_FILE']
match_string = ''
for char in matchto:
if not char.isalpha():
match_string += '\\'
match_string = match_string + char
match_string = match_string.replace(
"\\$Author", "(?P<author>.*?)").replace(
"\\$SortAuthor", "(?P<sauthor>.*?)").replace(
"\\$Title", "(?P<book>.*?)").replace(
"\\$SortTitle", "(?P<sbook>.*?)").replace(
"\\$Series", "(?P<series>.*?)").replace(
"\\$SerNum", "(?P<sernum>.*?)").replace(
"\\$SerName", "(?P<sername>.*?)").replace(
"\\$FmtName", "(?P<fmtname>.*?)").replace(
"\\$FmtNum", "(?P<fmtnum>.*?)").replace(
"\\$PadNum", "(?P<padnum>.*?)").replace(
"\\$PubYear", "(?P<pubyear>.*?)").replace(
"\\$SerYear", "(?P<seryear>.*?)").replace(
"\\$Part", "(?P<part>.*?)").replace(
"\\$Total", "(?P<total>.*?)").replace(
"\\$Abridged", "(?P<abridged>.*?)").replace(
"\\$\\$", "\\ ") + r'\.[' + booktypes + ']'
matchinglogger.debug(f"Pattern [{match_string}]")
# noinspection PyBroadException
try:
pattern = re.compile(match_string, re.VERBOSE | re.IGNORECASE)
except Exception as e:
logger.error(f"Pattern failed for [{matchto}] {str(e)}")
pattern = None
last_authorid = None
for rootdir, dirnames, filenames in os.walk(startdir):
for directory in dirnames:
# prevent magazine being scanned
c = directory[0]
ignorefile = '.ll_ignore'
if c in ["_", "."]:
logger.debug(f'Skipping {os.path.join(rootdir, directory)}')
dirnames.remove(directory)
# ignore directories containing this special file
elif path_exists(os.path.join(rootdir, directory, ignorefile)):
logger.debug(f'Found .ll_ignore file in {os.path.join(rootdir, directory)}')
dirnames.remove(directory)
subdirectory = rootdir.replace(make_unicode(startdir), '')
total_items = dir_cnt
for files in filenames:
current_item = len(processed_subdirectories)
current_percent = int(current_item * 100 / total_items) if total_items else 0
lazylibrarian.libraryscan_data = f"{current_item}/{total_items}/{current_percent}"
# Added new code to skip if we've done this directory before.
# Made this conditional with a switch in config.ini
# in case user keeps multiple different books in the same subdirectory
if (library == 'eBook' and CONFIG.get_bool('IMP_SINGLEBOOK') and
(subdirectory in processed_subdirectories) or library == 'AudioBook'
and (subdirectory in processed_subdirectories)):
libsynclogger.debug(f"[{subdirectory}] already scanned")
elif not path_isdir(rootdir):
logger.debug(f"Directory {repr(rootdir)} missing (renamed?)")
else:
# If this is a book, try to get author/title/isbn/language
# if epub or mobi, read metadata from the book
# If metadata.opf exists, use that allowing it to override
# embedded metadata. User may have edited metadata.opf
# to merge author aliases together
# If all else fails, try pattern match for author/title
# and look up isbn/lang from LT or GR later
if (library == 'eBook' and CONFIG.is_valid_booktype(files, 'ebook')) or \
(library == 'AudioBook' and CONFIG.is_valid_booktype(files, 'audiobook')):
logger.debug(f"[{startdir}] Now scanning subdirectory {subdirectory}")
file_count += 1
language = "Unknown"
isbn = ""
book = ""
author = ""
publisher = ""
narrator = ""
extn = splitext(files)[1]
bookid = None
forced_bookid = ''
# if it's an epub or a mobi we can try to read metadata from it
res = {}
if extn.lower() in [".epub", ".mobi"]:
book_filename = os.path.join(rootdir, files)
try:
res = get_book_info(book_filename)
except Exception as e:
logger.error(f'get_book_info failed for {book_filename}, {type(e).__name__} {str(e)}')
res = {}
# title and creator are the minimum we need
if 'title' in res and 'creator' in res:
book = res['title']
author = res['creator']
if 'language' in res:
language = res['language']
isbn = res.get('isbn', '')
if 'type' in res:
extn = res['type']
logger.debug(f"book meta [{isbn}] [{language}] [{author}] [{book}] [{extn}]")
if not author and book:
logger.debug(f"Book meta incomplete in {book_filename}")
# calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
# just look for any .opf file in the current directory since we don't know
# LL preferred authorname/bookname at this point.
# Allow metadata in opf file to override book metadata as may be users pref
metafile = ''
try:
metafile = opf_file(rootdir)
if metafile:
res2 = get_book_info(metafile)
for item in res2:
res[item] = res2[item]
except Exception as e:
logger.error(f'get_book_info failed for {metafile}, {type(e).__name__} {str(e)}')
# title and creator are the minimum we need
if res and 'title' in res and 'creator' in res:
book = res['title']
author = res['creator']
author = author.strip() # some audiobooks have fields of spaces
book = book.strip()
if 'language' in res:
language = res['language']
if 'isbn' in res:
isbn = res['isbn']
if 'publisher' in res:
publisher = res['publisher']
if 'narrator' in res:
narrator = res['narrator']
ident = ''
if 'gr_id' in res:
gr_id = res['gr_id']
ident = f"GR: {gr_id}"
if 'gb_id' in res:
gb_id = res['gb_id']
ident = f"GB: {gb_id}"
if 'ol_id' in res:
ol_id = res['ol_id']
ident = f"OL: {ol_id}"
if 'hc_id' in res:
hc_id = res['hc_id']
ident = f"HC: {hc_id}"
if 'dnb_id' in res:
dnb_id = res['dnb_id']
ident = f"DN: {dnb_id}"
logger.debug(
f"file meta [{isbn}] [{language}] [{author}] [{book}] [{ident}] [{publisher}] "
f"[{narrator}]")
if not author or not book:
logger.debug(f"File meta incomplete in {metafile}")
if not author or not book and CONFIG.is_valid_booktype(files, 'audiobook'):
# no author/book from metadata file, and not embedded either
# or audiobook which may have id3 tags
filename = os.path.join(rootdir, files)
id3tags = id3read(filename)
author = id3tags.get('author')
book = id3tags.get('title')
if not narrator:
narrator = id3tags.get('narrator')
if not author or not book:
# try for details from a special file
author, book, forced_bookid = get_book_meta(rootdir, reason="libraryscan")
# Failing anything better, just pattern match on filename
if pattern and (not author or not book):
# might need a different pattern match for audiobooks
# as they often seem to have xxChapter-Seriesnum Author Title
# but hopefully the tags will get there first...
match = pattern.match(files)
if match:
try:
author = match.group("author")
except IndexError:
author = ''
if not author:
try:
author = match.group("sauthor")
except IndexError:
author = ''
try:
book = match.group("book")
except IndexError:
book = ''
if not book:
try:
book = match.group("sbook")
except IndexError:
book = ''
book = make_unicode(book)
author = make_unicode(author)
if not author or not book:
logger.debug(f"Pattern match failed [{files}]")
else:
logger.debug(f"Pattern match author[{author}] book[{book}]")
if publisher and publisher.lower() in get_list(CONFIG['REJECT_PUBLISHER']):
logger.warning(f"Ignoring {files}: Publisher {publisher}")
author = '' # suppress
if not author or not book:
logger.debug(f"No valid {library} found in {subdirectory}")
else:
# flag that we found a book in this subdirectory
if subdirectory:
processed_subdirectories.append(subdirectory)
# If we have a valid looking isbn, and language != "Unknown", add it to cache
if language != "Unknown" and is_valid_isbn(isbn):
logger.debug(f"Found Language [{language}] ISBN [{isbn}]")
# we need to add it to language cache if not already
# there, is_valid_isbn has checked length is 10 or 13
if len(isbn) == 10:
isbnhead = isbn[0:3]
else:
isbnhead = isbn[3:6]
match = db.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
if not match:
db.action('insert into languages values (?, ?)', (isbnhead, language))
logger.debug(f"Cached Lang [{language}] ISBN [{isbnhead}]")
else:
logger.debug(f"Already cached Lang [{language}] ISBN [{isbnhead}]")
newauthorname, authorid, new_author = add_author_name_to_db(
author, addbooks=None, reason=f"Add author of {book}", title=book)
if last_authorid and last_authorid != authorid:
update_totals(last_authorid)
last_authorid = authorid
if newauthorname and newauthorname != author:
logger.debug(f"Preferred authorname changed from [{author}] to [{newauthorname}]")
author = make_unicode(newauthorname)
if not authorid:
logger.warning(f"Authorname {author} not added to database")
if authorid:
# author exists, check if this book by this author is in our database
# metadata might have quotes in book name
# some books might be stored under a different author name
# e.g. books by multiple authors, books where author is "writing as"
# or books we moved to "merge" authors
book = strip_quotes(book)
# If we have a valid ID, use that
mtype = ''
match = None
this_source = lazylibrarian.INFOSOURCES[CONFIG['BOOK_API']]
try:
bookid = eval(this_source['book_key'])
except NameError:
bookid = None
if bookid:
match = db.match('SELECT AuthorID,Status FROM books where BookID=?',
(bookid,))
if match:
mtype = match['Status']
if authorid != match['AuthorID']:
logger.warning(
f"Metadata authorid [{authorid}] does not match database "
f"[{match['AuthorID']}]")
if not match:
cmd = "SELECT Status,BookID FROM books where BookName=? and AuthorID=?"
match = db.match(cmd, (book, authorid))
if match:
logger.warning(
f"Metadata bookid [{bookid}] not found in database, title matches "
f"{match['BookID']}")
mtype = match['Status']
# update stored bookid to match preferred (owned) book
db.action('PRAGMA foreign_keys = OFF')
for table in ['books', 'member', 'wanted', 'failedsearch', 'genrebooks']:
cmd = f"UPDATE {table} SET BookID=? WHERE BookID=?"
db.action(cmd, (bookid, match['BookID']))
db.action('PRAGMA foreign_keys = ON')
if not match:
# Try and find in database under author and bookname
# as we may have it under a different bookid or isbn to goodreads/googlebooks
# which might have several bookid/isbn for the same book
reason = f'Author exists for {book}'
logger.debug(reason)
oldbookid = bookid
bookid, mtype = find_book_in_db(author, book, reason=reason)
if bookid:
if oldbookid:
logger.warning(
f"Metadata bookid [{oldbookid}] not found in database, using {bookid}")
else:
logger.debug(f"Found bookid {bookid} for {book}")
elif oldbookid:
bookid = oldbookid
logger.warning(
f"Metadata bookid [{bookid}] not found in database, trying to add...")
this_source = lazylibrarian.INFOSOURCES[CONFIG['BOOK_API']]
api = this_source['api']
api = api()
book_id = eval(this_source['book_key'])
if book_id:
src = this_source['src']
_ = api.add_bookid_to_db(book_id, None, None, f"Added by {src}"
f" librarysync")
if bookid:
# see if it's there now...
match = db.match('SELECT AuthorID,BookName,Status from books where BookID=?',
(bookid,))
if match:
mtype = match['Status']
book = match['BookName']
if authorid != match['AuthorID']:
logger.warning(
f"Metadata authorid [{authorid}] does not match database "
f"[{match['AuthorID']}]")
else:
logger.debug(f"Unable to add bookid via metadata bookid ({bookid})")
bookid = ""
if not bookid and isbn:
# See if the isbn is in our database
match = db.match('SELECT AuthorID,BookID,Status FROM books where BookIsbn=?',
(isbn,))
if match:
bookid = match['BookID']
mtype = match['Status']
if authorid != match['AuthorID']:
logger.warning(
f"Metadata authorid [{authorid}] does not match database "
f"[{match['AuthorID']}]")
if bookid and mtype == "Ignored":
logger.warning(
f"Book {book} by {author} is marked Ignored in database, importing anyway")
if (not bookid and forced_bookid and
lazylibrarian.manual_import.process_book_from_dir(source_dir=rootdir,
library=library,
bookid=forced_bookid)):
bookid = forced_bookid
if not bookid:
# get author name from (grand)parent directory of this book directory
book_filename = os.path.join(rootdir, files)
newauthorname = book_filename[len(startdir.rstrip(os.sep)) + 1:].split(os.sep)[0]
if ' ' not in newauthorname:
newauthorname = os.path.basename(os.path.dirname(rootdir))
newauthorname = make_unicode(newauthorname)
# calibre replaces trailing periods with _ e.g. Smith Jr. -> Smith Jr_
if newauthorname.endswith('_'):
newauthorname = f"{newauthorname[:-1]}."
if author.lower() != newauthorname.lower():
logger.debug(f"Trying authorname [{newauthorname}]")
bookid, mtype = find_book_in_db(newauthorname, book, ignored=False,
reason=f'New author for {book}')
if bookid and mtype == "Ignored":
logger.warning(f"Book {book} by {newauthorname} is marked "
f"Ignored in database, importing anyway")
if bookid:
logger.warning(
f"{book} not found under [{author}], found under [{newauthorname}]")
# at this point if we still have no bookid, it looks like we
# have author and book title but no database entry for it
if not bookid:
sources = [CONFIG['BOOK_API']]
if CONFIG.get_bool('MULTI_SOURCE'):
# Either original source doesn't have the book, or it didn't match language
# prefs, or it's under another author (pseudonym, series continuation author)
# Since we have the book anyway, try and reload it
for source in lazylibrarian.INFOSOURCES.keys():
this_source = lazylibrarian.INFOSOURCES[source]
if source not in sources and CONFIG[this_source['enabled']]:
sources.append(source)
searchresults = []
for source in sources:
searchresults += search_for(f"{book}<ll>{author}", source)
sortedlist = sorted(searchresults,
key=lambda x: (x['highest_fuzz'], x['bookrate_count']),
reverse=True)
rescan_count += 1
bookid = ''
bookauthor = ''
booktitle = ''
language = ''
source = ''
closest = 0
bestmatch = 0
if sortedlist:
item = sortedlist[0]
closest = item['highest_fuzz']
while bestmatch + 1 < len(sortedlist) and item['source'] != CONFIG['BOOK_API']:
bestmatch += 1
if sortedlist[bestmatch]['highest_fuzz'] < closest:
break
if sortedlist[bestmatch]['source'] == CONFIG['BOOK_API']:
item = sortedlist[bestmatch]
if closest >= CONFIG.get_int('NAME_PARTIAL'):
rescan_hits += 1
logger.debug(
f"Rescan {item['source']} found [{item['authorname']}] "
f"{item['bookname']} : {item['booklang']}: {item['bookid']}")
bookid = item['bookid']
bookauthor = item['authorname']
booktitle = item['bookname']
language = item['booklang']
source = item['source']
rehit.append(booktitle)
if bookid:
cmd = "SELECT * from books WHERE BookID=?"
check_status = db.match(cmd, (bookid,))
if check_status:
logger.debug(f"{bookid} [{bookauthor}] matched on rescan for {booktitle}")
else:
logger.debug(f"Adding {bookid} [{bookauthor}] on rescan for {booktitle}")
this_source = lazylibrarian.INFOSOURCES[source]
api = this_source['api']
api = api()
_ = api.add_bookid_to_db(bookid, reason=f"Librarysync {source} "
f"rescan {bookauthor}")
if language and language != "Unknown":
# set language from book metadata
logger.debug(f"Setting language from metadata {booktitle} : {language}")
cmd = "UPDATE books SET BookLang=? WHERE BookID=?"
db.action(cmd, (language, bookid))
else:
logger.warning(f"Rescan no match for {book}, closest {round(closest, 2)}%")
remiss.append(f"{book}:{author} ({round(closest, 2)}%)")
# see if it's there now...
if bookid:
cmd = ("SELECT books.Status, books.AuthorID, AudioStatus, BookFile, AudioFile, "
"AuthorName, BookName, BookID, BookDesc, BookGenre,Narrator from "
"books,authors where books.AuthorID = authors.AuthorID and BookID=?")
check_status = db.match(cmd, (bookid,))
if not check_status:
logger.debug(f'Unable to find bookid {bookid} in database')
else:
if CONFIG['CONTRIBUTING_AUTHORS'] and res and 'authors' in res:
authorlist = split_author_names(res['authors'],
get_list(CONFIG['MULTI_AUTHOR_SPLIT']))
for auth in authorlist:
if auth != check_status['AuthorID']: # primary author already added
aname, aid, added = (
add_author_name_to_db(auth, addbooks=False,
reason=f"Contributor to "
f"{check_status['BookName']}"))
if aid:
# Add any new authors as contributing authors
db.action('INSERT into bookauthors (AuthorID, BookID, Role) '
'VALUES (?, ?, ?)',
(aid, check_status['BookID'], ROLE['CONTRIBUTING']),
suppress='UNIQUE')
update_totals(aid)
book_filename = None
if library == 'eBook':
if check_status['Status'] not in ['Open', 'Have']:
# we found a new book
new_book_count += 1
db.action(
'UPDATE books set Status=?, BookLibrary=? where BookID=?',
(CONFIG['FOUND_STATUS'], now(), bookid))
# create an opf file if there isn't one
book_filename = os.path.join(rootdir, files)
_ = lazylibrarian.metadata_opf.create_opf(os.path.dirname(book_filename),
check_status,
splitext(os.path.basename(
book_filename))[0],
overwrite=False)
if CONFIG.get_bool('IMP_RENAME'):
new_filename, _ = book_rename(bookid)
if new_filename and new_filename != check_status['BookFile']:
book_filename = new_filename
db.action("UPDATE books SET BookFile=?,Status=? where BookID=?",
(book_filename, CONFIG['FOUND_STATUS'], bookid))
# check preferred type and store book location
# so we can check if it gets (re)moved
book_basename = splitext(book_filename)[0]
booktype_list = get_list(CONFIG['EBOOK_TYPE'])
for book_type in booktype_list:
preferred_type = f"{book_basename}.{book_type}"
if path_exists(preferred_type):
book_filename = preferred_type
if book_filename != check_status['BookFile']:
modified_count += 1
logger.debug(f"Librarysync link to preferred type {book_type}")
db.action("UPDATE books SET BookFile=?,Status=? where BookID=?",
(book_filename, CONFIG['FOUND_STATUS'], bookid))
break
if 'unknown' in check_status['AuthorName'].lower():
newauth = db.match("SELECT * from authors WHERE AuthorName=?",
(author,))
if newauth:
logger.debug(
f"Moving {bookid} from {check_status['AuthorName']} "
f"to {author}")
db.action('UPDATE books set AuthorID=? where BookID=?',
(newauth['AuthorID'], bookid))
db.action("DELETE from authors WHERE AuthorID=?",
(check_status['AuthorID'],))
elif library == 'AudioBook':
if 'narrator' and not check_status['Narrator']:
db.action("update books set narrator=? where bookid=?", (narrator,
bookid))
check_status = db.match(cmd, (bookid,))
if check_status['AudioStatus'] != 'Open':
# we found a new audiobook
new_book_count += 1
db.action(
'UPDATE books set AudioStatus=?, AudioLibrary=? where BookID=?',
(CONFIG['FOUND_STATUS'], now(), bookid))
# store audiobook location so we can check if it gets (re)moved
book_filename = os.path.join(rootdir, files)
# create an opf if there isn't one
_ = lazylibrarian.metadata_opf.create_opf(os.path.dirname(book_filename),
check_status,
check_status['BookName'],
overwrite=False)
# link to the first part of multi-part audiobooks
tokmatch = ''
for token in [' 001.', ' 01.', ' 1.', ' 001 ', ' 01 ', ' 1 ', '01']:
if tokmatch:
break
for e in listdir(rootdir):
if CONFIG.is_valid_booktype(e, booktype='audiobook') and token in e:
book_filename = os.path.join(rootdir, e)
logger.debug(
f"Librarysync link to preferred part {token}: "
f"{book_filename}")
tokmatch = token
break
db.action('UPDATE books set AudioFile=? where BookID=?',
(book_filename, bookid))
if CONFIG['AUDIOBOOK_DEST_FILE']:
rename = CONFIG.get_bool('IMP_RENAME')
new_filename = audio_rename(bookid, rename=rename, playlist=True)
if new_filename and new_filename != book_filename:
book_filename = new_filename
if rename:
preprocess_audio(os.path.dirname(book_filename), bookid,
author, book, tag=True)
# location may have changed since last scan
if book_filename != check_status['AudioFile']:
modified_count += 1
logger.warning(f"Updating audiobook location for {author} {book} from "
f"{check_status['AudioFile']} to {book_filename}")
db.action('UPDATE books set AudioFile=?,AudioStatus=? where BookID=?',
(book_filename, CONFIG['FOUND_STATUS'], bookid))
# update cover file to any .jpg in book folder, prefer cover.jpg
if book_filename:
bookdir = os.path.dirname(book_filename)
cachedir = DIRS.CACHEDIR
cacheimg = os.path.join(cachedir, 'book', f"{bookid}.jpg")
coverimg = os.path.join(bookdir, 'cover.jpg')
if not path_isfile(coverimg):
coverimg = any_file(bookdir, '.jpg')
if coverimg:
shutil.copyfile(coverimg, cacheimg)
else:
if library == 'eBook':
logger.warning(
f"Failed to match book [{book}] by [{author}] in database")
else:
logger.warning(
f"Failed to match audiobook [{book}] by [{author}] in database")
if not authorid and not warned_no_new_authors and not CONFIG.get_bool('ADD_AUTHOR'):
logger.warning("Add authors to database is disabled")
warned_no_new_authors = True
if new_author and not bookid:
# we auto-added a new author but they don't have the book so we should remove them again
db.action('DELETE from authors WHERE AuthorID=?', (authorid,))
if last_authorid:
update_totals(last_authorid)
logger.info(
f"{new_book_count}/{modified_count} new/modified {library}{plural(new_book_count + modified_count)} "
f"found and added to the database")
logger.info(f"{file_count} {plural(file_count, 'file')} processed")
if startdir == destdir:
if len(remiss):
lazylibrarian.libraryscan_data = (f'<div class="alert alert-danger">'
f'{len(remiss)} unmatched items<br>'
'See debug log for details<br></div>')
# On full library scans, check for books with unknown language
nolang = db.match(
"select count(*) as counter from Books where status='Open' and BookLang='Unknown'")
nolang = nolang['counter']
if nolang:
logger.warning(f"Found {nolang} {plural(nolang, 'book')} in your library with unknown language")
# show stats if new books were added
cmd = ("SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), "
"sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats")
stats = db.match(cmd)
st = {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'],
'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'],
'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'],
'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'],
'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']}
# noinspection PyUnresolvedReferences
for item in list(st.keys()):
if st[item] is None:
st[item] = 0
if CONFIG['BOOK_API'] == "GoogleBooks":
logger.debug(f"GoogleBooks was hit {st['GR_book_hits']} {plural(st['GR_book_hits'], 'time')} for books")
logger.debug(
f"GoogleBooks language was changed {st['GB_lang_change']} {plural(st['GB_lang_change'], 'time')}")
elif CONFIG['BOOK_API'] == "OpenLibrary":
logger.debug(f"OpenLibrary was hit {st['GR_book_hits']} {plural(st['GR_book_hits'], 'time')} for books")
elif CONFIG['BOOK_API'] == "HardCover":
logger.debug(f"HardCover was hit {st['GR_book_hits']} {plural(st['GR_book_hits'], 'time')} for books")
elif CONFIG['BOOK_API'] == "GoodReads":
logger.debug(f"GoodReads was hit {st['GR_book_hits']} {plural(st['GR_book_hits'], 'time')} for books")
logger.debug(
f"GoodReads was hit {st['GR_lang_hits']} {plural(st['GR_lang_hits'], 'time')} for languages")
logger.debug(
f"LibraryThing was hit {st['LT_lang_hits']} {plural(st['LT_lang_hits'], 'time')} for languages")
logger.debug(f"Language cache was hit {st['cache_hits']} {plural(st['cache_hits'], 'time')}")
logger.debug(f"Unwanted language removed {st['bad_lang']} {plural(st['bad_lang'], 'book')}")
logger.debug(f"Invalid/Incomplete removed {st['bad_char']} {plural(st['bad_char'], 'book')}")
logger.debug(
f"Unable to cache language for {st['uncached']} {plural(st['uncached'], 'book')} with missing ISBN")
logger.debug(f"Found {st['duplicates']} duplicate {plural(st['duplicates'], 'book')}")
logger.debug(f"Rescan {rescan_hits} {plural(rescan_hits, 'hit')}, {rescan_count - rescan_hits} miss")
for bk in rehit:
logger.debug(f"HIT: {bk}")
for bk in remiss:
logger.debug(f"MISS: {bk}")
logger.debug(
f"Cache {lazylibrarian.CACHE_HIT} {plural(lazylibrarian.CACHE_HIT, 'hit')}, "
f"{lazylibrarian.CACHE_MISS} miss")
cachesize = db.match("select count(*) as counter from languages")
logger.debug(f"ISBN Language cache holds {cachesize['counter']} {plural(cachesize['counter'], 'entry')}")
# Cache any covers and images
images = db.select("select bookid, bookimg, bookname from books where instr(bookimg, 'http') = 1")
if len(images):
logger.info(f"Caching {plural(len(images), 'cover')} for {len(images)} {plural(len(images), 'book')}")
for item in images:
bookid = item['bookid']
bookimg = item['bookimg']
# bookname = item['bookname']
newimg, success, _ = cache_img(ImageType.BOOK, bookid, bookimg)
if success:
db.action('update books set BookImg=? where BookID=?', (newimg, bookid))
else:
logger.warning(f"Unable to cache image for BookID {bookid}")
db.action('update books set BookImg=? where BookID=?', ('images/nocover.png', bookid))
images = db.select("select AuthorID, AuthorImg, AuthorName from authors where instr(AuthorImg, 'http') = 1")
if len(images):
logger.info(f"Caching {plural(len(images), 'image')} for {len(images)} {plural(len(images), 'author')}")
for item in images:
authorid = item['authorid']
authorimg = item['authorimg']
# authorname = item['authorname']
newimg, success, _ = cache_img(ImageType.AUTHOR, img_id(), authorimg)
if success:
db.action('update authors set AuthorImg=? where AuthorID=?', (newimg, authorid))
else:
logger.warning(f"Unable to cache image for AuthorID {authorid}")
db.action('update authors set AuthorImg=? where AuthorID=?', ('images/nophoto.png', authorid))
cnt = len(delete_empty_folders(startdir)) # tidy up
logger.debug(f"Deleted {cnt} empty {plural(cnt, 'folder')} in {startdir}")
if library == 'eBook':
lazylibrarian.EBOOK_UPDATE = 0
elif library == 'AudioBook':
lazylibrarian.AUDIO_UPDATE = 0
lazylibrarian.AUTHORS_UPDATE = 0
else:
if authid:
match = db.match('SELECT authorid from authors where authorid=?', (authid,))
if match:
control_value_dict = {"AuthorID": authid}
new_value_dict = {"Status": "Active"}
db.upsert("authors", new_value_dict, control_value_dict)
# On single author/book import, just update bookcount for that author
update_totals(authid)
if remove:
# sometimes librarything tells us about a series contributor
# but openlibrary doesn't agree...
res = db.select("select * from authors where status='Paused' and totalbooks=0")
if len(res):
logger.debug(f"Removed {len(res)} empty series authors")
db.action("delete from authors where status='Paused' and totalbooks=0")
logger.info('Library scan complete')
return new_book_count
except Exception:
logger.error(f'Unhandled exception in library_scan: {traceback.format_exc()}')
if startdir == destdir: # full library scan
if library == 'eBook':
lazylibrarian.EBOOK_UPDATE = 0
elif library == 'AudioBook':
lazylibrarian.AUDIO_UPDATE = 0
else:
if authid:
match = db.match('SELECT authorid from authors where authorid=?', (authid,))
if match:
control_value_dict = {"AuthorID": authid}
new_value_dict = {"Status": "Active"}
db.upsert("authors", new_value_dict, control_value_dict)
finally:
logger.debug(f"Processed folders: {len(processed_subdirectories)}, "
f"matched books: {len(rehit)}, unmatched: {len(remiss)}")
db.upsert("jobs", {"Finish": time.time()}, {"Name": thread_name()})
if '_SCAN' in thread_name():
thread_name('WEBSERVER')
db.close()