LazyLibrarian/demo_preprocessor.py
2020-02-26 11:11:04 +01:00

485 lines
21 KiB
Python

#!/usr/bin/python
# NOTE make sure the above path to python is correct for your environment
# The parameter list is type, folder, authorname, bookname
# where "type" is one of 'ebook', 'audiobook', 'magazine', 'comic', 'test'
# and "folder" is the folder ready to be processed
# and authorname, bookname are optional, only used for audio tags
# This example uses "ebook-convert" from calibre to make sure we have both epub and mobi of the new book.
# and "ffmpeg" to produce a single file audiobook and/or write id3 tags
# Note it is not fully error trapped, just a basic working example.
# Error messages appear as errors in the lazylibrarian log
# Anything you print to stdout appears as debug messages in the log
# The exit code and messages get passed back to the "test" button
# Always exit zero on success, non-zero on fail
#
################################################################
# NOTE all of the features in the example preprocessor
# are already included in the main lazylibrarian program
################################################################
import os
import subprocess
import sys
import time
try:
from tinytag import TinyTag
except ImportError:
try:
from lib.tinytag import TinyTag
except ImportError:
TinyTag = None
try:
# noinspection PyProtectedMember
from PyPDF3 import PdfFileWriter, PdfFileReader
except ImportError:
try:
# noinspection PyProtectedMember
from lib.PyPDF3 import PdfFileWriter, PdfFileReader
except ImportError:
PdfFileWriter = None
PdfFileReader = None
if sys.version_info[0] == 3:
text_type = str
else:
# noinspection PyUnresolvedReferences
text_type = unicode
# eBook options
###########################################################################
preprocess_ebook = True
converter = "ebook-convert" # if not in your "path", put the full pathname here
wanted_formats = ['.epub', '.mobi']
keep_opf = True
keep_jpg = True
delete_others = False # use with care, deletes everything except wanted formats (and opf/jpg if keep is True)
###########################################################################
# audiobook options
write_singlefile = True
write_tags = True
ffmpeg = 'ffmpeg' # if not in your "path", put the full pathname here
audio_options = ['-ab', '320k']
keep_original_audiofiles = True
audiotypes = ['mp3', 'flac', 'm4a', 'm4b']
###########################################################################
# magazine options
swap_page1 = True
###########################################################################
# should not need to alter anything below here
###########################################################################
def makeBytestr(txt):
# convert unicode to bytestring, needed for os.walk and os.listdir
# listdir falls over if given unicode startdir and a filename in a subdir can't be decoded to ascii
if not txt:
return b''
elif not isinstance(txt, text_type): # nothing to do if already bytestring
return txt
for encoding in ['utf-8', 'latin-1']:
try:
txt = txt.encode(encoding)
return txt
except UnicodeError:
pass
return txt
def makeUnicode(txt):
# convert a bytestring to unicode, don't know what encoding it might be so try a few
# it could be a file on a windows filesystem, unix...
if not txt:
return u''
elif isinstance(txt, text_type):
return txt
for encoding in ['utf-8', 'latin-1']:
try:
txt = txt.decode(encoding)
return txt
except UnicodeError:
pass
return txt
def check_int(var, default, positive=True):
"""
Return an integer representation of var
or return default value if var is not a positive integer
"""
try:
res = int(var)
if positive and res < 0:
return default
return res
except (ValueError, TypeError):
try:
return int(default)
except (ValueError, TypeError):
return 0
def main():
authorname = ''
bookname = ''
if len(sys.argv) < 3:
sys.stderr.write("Invalid parameters (%s) assume test\n" % len(sys.argv))
booktype = 'test'
bookfolder = ''
else:
booktype = sys.argv[1]
bookfolder = sys.argv[2]
if len(sys.argv) == 5:
authorname = sys.argv[3]
bookname = sys.argv[4]
with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'preprocessor.log'), 'a') as pplog:
pplog.write("%s: %s %s\n" % (time.ctime(), booktype, bookfolder))
if not booktype or booktype not in ['ebook', 'audiobook', 'magazine', 'test']:
sys.stderr.write("%s %s\n" % ("Invalid booktype", booktype))
pplog.write("%s: %s %s\n" % (time.ctime(), "Invalid booktype", booktype))
exit(0)
if not os.path.exists(bookfolder) and booktype != 'test':
sys.stderr.write("%s %s\n" % ("Invalid bookfolder", bookfolder))
pplog.write("%s: %s %s\n" % (time.ctime(), "Invalid bookfolder", bookfolder))
exit(1)
if booktype == 'test':
print("Preprocessor test")
if not os.path.exists(bookfolder):
bookfolder = os.path.dirname(os.path.abspath(__file__))
if booktype in ['ebook', 'test']:
if booktype == 'ebook' and not preprocess_ebook:
print("ebook preprocessing is disabled")
exit(0)
sourcefile = None
source_extn = None
created = ''
for fname in os.listdir(makeBytestr(bookfolder)):
fname = makeUnicode(fname)
filename, extn = os.path.splitext(fname)
if extn.lower() == '.epub':
sourcefile = fname
break
elif extn.lower() in ['.mobi', '.azw3']:
sourcefile = fname
break
pplog.write("Wanted formats: %s\n" % str(wanted_formats))
if not sourcefile:
if booktype == 'test':
print("No suitable sourcefile found in %s" % bookfolder)
else:
sys.stderr.write("%s %s\n" % ("No suitable sourcefile found in", bookfolder))
pplog.write("%s: %s %s\n" % (time.ctime(), "No suitable sourcefile found in", bookfolder))
else:
basename, source_extn = os.path.splitext(sourcefile)
for ftype in wanted_formats:
if not os.path.exists(os.path.join(bookfolder, basename + ftype)):
pplog.write("No %s\n" % ftype)
params = [converter, os.path.join(bookfolder, sourcefile),
os.path.join(bookfolder, basename + ftype)]
if ftype == '.mobi':
params.extend(['--output-profile', 'kindle'])
try:
_ = subprocess.check_output(params, stderr=subprocess.STDOUT)
if created:
created += ' '
created += ftype
except Exception as e:
sys.stderr.write("%s\n" % e)
pplog.write("%s: %s\n" % (time.ctime(), e))
pplog.write(repr(params))
pplog.write("Is path to ebook-convert correct?")
exit(1)
else:
pplog.write("Found %s\n" % ftype)
if delete_others:
if keep_opf:
wanted_formats.append('.opf')
if keep_jpg:
wanted_formats.append('.jpg')
for fname in os.listdir(makeBytestr(bookfolder)):
fname = makeUnicode(fname)
filename, extn = os.path.splitext(fname)
if not extn or extn.lower() not in wanted_formats:
if booktype == 'test':
print("Would delete %s" % fname)
pplog.write("Would delete %s\n" % fname)
else:
print("Deleting %s" % fname)
pplog.write("Deleting %s\n" % fname)
try:
os.remove(os.path.join(bookfolder, fname))
except OSError:
pass
if created:
print("Created %s from %s" % (created, source_extn))
pplog.write("%s: Created %s from %s\n" % (time.ctime(), created, source_extn))
else:
print("No extra ebook formats created")
pplog.write("%s: No extra ebook formats created\n" % time.ctime())
elif booktype == 'audiobook':
if not write_singlefile and not write_tags:
print("audiobook preprocessing is disabled")
exit(0)
if not TinyTag:
print("TinyTag library not found")
exit(0)
# this produces a single file audiobook
ffmpeg_params = ['-f', 'concat', '-safe', '0', '-i',
os.path.join(bookfolder, 'partslist.ll'), '-f', 'ffmetadata',
'-i', os.path.join(bookfolder, 'metadata.ll'), '-map_metadata', '1',
'-id3v2_version', '3']
cnt = 0
parts = []
total = 0
author = ''
book = ''
audio_file = ''
out_type = ''
for f in os.listdir(makeBytestr(bookfolder)):
f = makeUnicode(f)
extn = os.path.splitext(f)[1].lstrip('.')
if extn and extn.lower() in audiotypes:
cnt += 1
audio_file = f
try:
audio_path = os.path.join(bookfolder, f)
performer = ''
composer = ''
albumartist = ''
book = ''
track = 0
total = 0
if TinyTag.is_supported(audio_path):
id3r = TinyTag.get(audio_path)
performer = id3r.artist
composer = id3r.composer
albumartist = id3r.albumartist
book = id3r.album
track = id3r.track
total = id3r.track_total
track = check_int(track, 0)
total = check_int(total, 0)
if performer:
performer = performer.strip()
if composer:
composer = composer.strip()
if book:
book = book.strip()
if albumartist:
albumartist = albumartist.strip()
if composer: # if present, should be author
author = composer
elif performer: # author, or narrator if composer == author
author = performer
elif albumartist:
author = albumartist
if author and book:
parts.append([track, book, author, f])
if track == 1:
out_type = extn
except Exception as e:
print("tinytag %s %s" % (type(e).__name__, str(e)))
pass
pplog.write("%s found %s audiofiles\n" % (book, cnt))
if cnt == 1 and not parts: # single file audiobook with no tags
parts = [[1, book, author, audio_file]]
if cnt != len(parts):
print("%s: Incorrect number of parts (found %i from %i)" % (book, len(parts), cnt))
exit(1)
if total and total != cnt:
print("%s: Reported %i parts, got %i" % (book, total, cnt))
exit(1)
if cnt == 1:
print("Only one audio file found, nothing to merge")
exit(0)
# check all parts have the same author and title
if len(parts) > 1:
for part in parts:
if part[1] != book:
print("%s: Inconsistent title: [%s][%s]" % (book, part[1], book))
exit(1)
if part[2] != author:
print("%s: Inconsistent author: [%s][%s]" % (book, part[2], author))
exit(1)
# do we have any track info (value is 0 if not)
tokmatch = ''
if parts[0][0] == 0:
# try to extract part information from filename. Search for token style of part 1 in this order...
for token in [' 001.', ' 01.', ' 1.', ' 001 ', ' 01 ', ' 1 ', '01']:
if tokmatch:
break
for part in parts:
if token in part[3]:
tokmatch = token
break
if tokmatch: # we know the numbering style, get numbers for the other parts
cnt = 0
while cnt < len(parts):
cnt += 1
if tokmatch == ' 001.':
pattern = ' %s.' % str(cnt).zfill(3)
elif tokmatch == ' 01.':
pattern = ' %s.' % str(cnt).zfill(2)
elif tokmatch == ' 1.':
pattern = ' %s.' % str(cnt)
elif tokmatch == ' 001 ':
pattern = ' %s ' % str(cnt).zfill(3)
elif tokmatch == ' 01 ':
pattern = ' %s ' % str(cnt).zfill(2)
elif tokmatch == ' 1 ':
pattern = ' %s ' % str(cnt)
else:
pattern = '%s' % str(cnt).zfill(2)
# standardise numbering of the parts
for part in parts:
if pattern in part[3]:
part[0] = cnt
break
parts.sort(key=lambda x: x[0])
# check all parts are present
cnt = 0
while cnt < len(parts):
if parts[cnt][0] != cnt + 1:
print("%s: No part %i found" % (book, cnt + 1))
exit(1)
cnt += 1
# if we get here, looks like we have all the parts
with open(os.path.join(bookfolder, 'partslist.ll'), 'wb') as f:
for part in parts:
f.write("file '%s'\n" % makeBytestr(part[3]))
if write_tags and authorname and bookname:
if tokmatch or (part[2] != authorname) or (part[1] != bookname):
extn = os.path.splitext(part[3])[1]
params = [ffmpeg, '-i', os.path.join(bookfolder, part[3]),
'-y', '-c:a', 'copy', '-metadata', "album=%s" % bookname,
'-metadata', "artist=%s" % authorname,
'-metadata', "track=%s" % part[0],
os.path.join(bookfolder, "tempaudio%s" % extn)]
try:
_ = subprocess.check_output(params, stderr=subprocess.STDOUT)
os.remove(os.path.join(bookfolder, part[3]))
os.rename(os.path.join(bookfolder, "tempaudio%s" % extn),
os.path.join(bookfolder, part[3]))
print("Metadata written to %s" % part[3])
except Exception as e:
pplog.write("%s: %s\n" % (time.ctime(), e))
pplog.write("Is path to ffmpeg correct?")
sys.stderr.write("%s\n" % e)
if write_singlefile:
params = [ffmpeg, '-i', os.path.join(bookfolder, parts[0][3]),
'-f', 'ffmetadata', '-y', os.path.join(bookfolder, 'metadata.ll')]
try:
_ = subprocess.check_output(params, stderr=subprocess.STDOUT)
print("Metadata written to file")
except Exception as e:
sys.stderr.write("%s\n" % e)
pplog.write("%s: %s\n" % (time.ctime(), e))
pplog.write(repr(params))
pplog.write("Is path to ffmpeg correct?")
exit(1)
params = [ffmpeg]
params.extend(ffmpeg_params)
params.extend(audio_options)
params.append('-y')
if not out_type:
out_type = 'mp3'
outfile = "%s - %s.%s" % (author, book, out_type)
params.append(os.path.join(bookfolder, outfile))
try:
msg = "Processing %d files" % len(parts)
print(msg)
pplog.write("%s: %s\n" % (time.ctime(), msg))
_ = subprocess.check_output(params, stderr=subprocess.STDOUT)
except Exception as e:
sys.stderr.write("%s\n" % e)
pplog.write("%s: %s\n" % (time.ctime(), e))
pplog.write(repr(params))
pplog.write("Is path to ffmpeg correct?")
exit(1)
msg = "%d files merged into %s" % (len(parts), outfile)
print(msg)
pplog.write("%s: %s\n" % (time.ctime(), msg))
os.remove(os.path.join(bookfolder, 'partslist.ll'))
os.remove(os.path.join(bookfolder, 'metadata.ll'))
if not keep_original_audiofiles:
msg = "Removing %d part files" % len(parts)
print(msg)
pplog.write("%s: %s\n" % (time.ctime(), msg))
for part in parts:
os.remove(os.path.join(bookfolder, part[3]))
elif booktype == 'magazine':
if swap_page1:
if not PdfFileWriter:
print("PyPDF3 library not found")
exit(0)
try:
sourcefile = None
for fname in os.listdir(makeBytestr(bookfolder)):
fname = makeUnicode(fname)
filename, extn = os.path.splitext(fname)
if extn.lower() == '.pdf':
sourcefile = fname
break
if not sourcefile:
sys.stderr.write("%s %s\n" % ("No suitable sourcefile found in", bookfolder))
pplog.write("%s: %s %s\n" % (time.ctime(), "No suitable sourcefile found in", bookfolder))
return
fname = os.path.join(bookfolder, sourcefile)
output = PdfFileWriter()
f = open(fname, "rb")
input1 = PdfFileReader(f)
cnt = input1.getNumPages()
output.addPage(input1.getPage(1))
output.addPage(input1.getPage(0))
p = 2
while p < cnt:
output.addPage(input1.getPage(p))
p = p + 1
with open(fname + 'new', "wb") as outputStream:
output.write(outputStream)
msg = "%s has %d pages. Swapped pages 1 and 2" % (fname, cnt)
print(msg)
pplog.write("%s: %s\n" % (time.ctime(), msg))
f.close()
os.remove(fname)
os.rename(fname + 'new', fname)
except Exception as e:
sys.stderr.write("%s\n" % e)
pplog.write("%s: %s\n" % (time.ctime(), e))
else:
print("This example preprocessor only preprocesses eBooks, audiobooks and magazines")
exit(0)
if __name__ == "__main__":
main()