From cc29a824e6d83164dab740cd98313a4795cf5e1f Mon Sep 17 00:00:00 2001 From: B Stack Date: Thu, 25 Feb 2021 17:02:46 -0500 Subject: improve generate.py * Add ability to choose subdir name format. * Add day (D) as a variable to use. * Add ability to use other exif date tags, and filename, to determine the date to use. Timestamp is a last-resort option only! * Change output of the cli equivalent actions to stdout (from stderr). --- .gitignore | 1 + generate.py | 22 ++++++++--- genlib.py | 129 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 123 insertions(+), 29 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/generate.py b/generate.py index 49ed27f..22ff18e 100755 --- a/generate.py +++ b/generate.py @@ -7,19 +7,28 @@ # Usage: # ./generate.py -i /mnt/bgstack15/Backups/bgstack15/Images/Photos/camera/2018/ -o /mnt/public/www/gallery/my2018 -n -d2 -x 'October' -s --nr -from argparse import ArgumentParser +from argparse import ArgumentParser, RawDescriptionHelpFormatter from genlib import * -generate_version="2021-01-29a" +generate_version="2021-02-25a" # WORKHERE the if __main__ thing: -parser = ArgumentParser(description="Make symlink forest for images") # epilog="something" +parser = ArgumentParser(description="Make symlink forest for images", + formatter_class=RawDescriptionHelpFormatter, + epilog="""FORMATTING +Format string will be interpreted as a python f-string, so wrap variable +names with curly braces. +Default format is "{Y}/{M}" + + {Y} is 4-digit year + {M} is month (affected by padding) + {D} is day (affected by padding)""") parser.add_argument("-d","--debug",nargs='?', default=0, type=int, choices=range(0,11), help="Set debug level") parser.add_argument("-v","--version", action="version", version="%(prog)s " + generate_version) g_dryrun = parser.add_mutually_exclusive_group() g_dryrun.add_argument("-n","--dryrun", action="store_true", help="Make no changes (default)") g_dryrun.add_argument("-a","--apply", action="store_true", help="Actually make changes") g_zeropad = parser.add_mutually_exclusive_group() -g_zeropad.add_argument("-z","-0","--zeropad",action="store_true", help="Zero pad month directories (default)") +g_zeropad.add_argument("-z","-0","--zeropad",action="store_true", help="Zero pad month and day strings (default)") g_zeropad.add_argument("--nz","--nozeropad","--no-zeropad",action="store_true", help="Do not zero pad") g_action = parser.add_mutually_exclusive_group() g_action.add_argument("-c","--copy", action="store_true", help="Copy files instead of symlinks. Not recommended.") @@ -31,6 +40,7 @@ g_relative.add_argument("--nr","--norelative","--no-relative", action="store_tru parser.add_argument("-i","--indir",required=True) parser.add_argument("-o","--outdir",required=True) parser.add_argument("-x","--exclude",action="append",help="Exclude pathname matches. Can be used multiple times.") +parser.add_argument("-f","--format",action="store",default="{Y}/{M}",help="Subdirectories should follow this pattern. See FORMATTING heading.") # pull useful values out of the argparse entry args = parser.parse_args() @@ -58,6 +68,7 @@ try: except: # no exclusions pass +subdirformat=args.format if debuglevel >= 10: eprint(args) @@ -75,9 +86,10 @@ these_files = limit( ) print("FOUND FILE COUNT:",len(these_files)) -make_YM_forest( +make_forest( outdir, these_files, + subdirformat = subdirformat, action = action, dryrun = dryrun, debuglevel = debuglevel, diff --git a/genlib.py b/genlib.py index 898542d..62676ad 100644 --- a/genlib.py +++ b/genlib.py @@ -6,12 +6,15 @@ # https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory # https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory # https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python +# Improve: +# store as a variable the method used to determine date. String, from options ['exif datetimeoriginal','filename','timestamp'] or similar. from sys import stderr from exifread import process_file from os import walk, path, stat, symlink from pathlib import Path from datetime import date from shutil import copy2, move +from re import compile as re_compile try: if debuglevel is None: @@ -19,10 +22,32 @@ try: except: debuglevel = 0 +# Sorry, don't care about anything other than 1900s and forward. We also benefit from relying on full-width month and day fields. +dateregex = re_compile("(?P(19|2[0-9])[0-9]{2})(?P[\.-]?)(?P[01][0-9])(?P=separator)(?P[03][0-9])") + # Functions def eprint(*args, **kwargs): print(*args, file=stderr, **kwargs) +# this function is for manual debugging only +def list_all_tags(tf): + tags = [] + try: + with open(tf,'rb') as f: + tags = process_file(f,details=True) + except: + eprint(f"Failed to process tags for {tf}") + pass + for t in tags: + try: + if "JPEGThumbnail" in t: + o = tags[t][:40] # first 40 bytes + else: + o = tags[t] + print(f'{t}: {o}') + except: + print(f'Failed to print {t}') + def list_files(indir,relative=False,debuglevel=debuglevel, excludes=None): x = 0 f = [] @@ -62,45 +87,96 @@ def limit(listf=None,filters=["image","video"]): newlist.append(f) return newlist -def get_file_YM(tf,debuglevel=debuglevel,zero_pad=False): - Y = 0 ; M = 0 +def get_file_YMD(tf,debuglevel=debuglevel,zero_pad=False): + Y = 0 ; M = 0 ; D = 0 #eprint(f"Debuglevel {debuglevel}") try: # read exif data with open(tf,'rb') as f: - tags = process_file(f, details=False, stop_tag="Image DateTime") + tags = process_file(f, details=False) #, stop_tag="Image DateTime") + # prefer Image DateTime if "Image DateTime" in tags: #if debuglevel >= 5: eprint(tf,tags["Image DateTime"]) Y = str(tags["Image DateTime"]).split(' ')[0].split(':') - if Y == "['']" or Y == ['']: - Y = ['0','0'] - M = Y[1] ; Y = Y[0]; + elif "EXIF DateTimeOriginal" in tags: + Y = str(tags["EXIF DateTimeOriginal"]).split(' ')[0].split(':') + if Y == "['']" or Y == ['']: + Y = ['0','0','0'] + M = Y[1] ; D = Y[2] ; Y = Y[0]; # Any other image timestamps could be used here #eprint(tags) except KeyboardInterrupt: - return -1, -1 + return -1, -1, -1 except: if debuglevel >= 1: eprint(f"Unable to extract any exif data for {tf}") - if int(Y) == 0 or int(M) == 0: - # need to just use timestamp + #print(f"Y={Y} M={M} D={D}") + + use_fn = False + try: + if int(Y) == 0 or int(M) == 0 or int(D) == 0: + use_fn = True + except: + use_fn = True + # try to use filename to look for YYYYMMDD or YYYY.MM.DD (pseudoregex) pattern + if use_fn: + fn = path.basename(tf) + try: + match = dateregex.search(fn) + #print(f"match={match.group()}") + # Remove any separators. These two are hardcoded in the global dateregex + match1 = match.group().replace("-","").replace(".","") + Y = str(match1)[:4] + M = str(match1)[4:6] + D = str(match1)[6:8] + #print(f"Y={Y} M={M} D={D}") + except: + if debuglevel >=1: eprint(f"Unable to determine date from filename {tf}") + + # Determine if we need to use the timestamp + use_ts = False + try: + if int(Y) == 0 or int(M) == 0 or int(D) == 0: + use_ts = True + except: + use_ts = True + if use_ts: + if debuglevel >= 9: eprint("Using timestamp...") + # need to just use timestamp, which is really a last resort try: ts = date.fromtimestamp(stat(tf).st_mtime) except KeyboardInterrupt: - return -1, -1 + return -1, -1, -1 except: # no timestamp available from filesystem? if debuglevel >= 1: eprint(f"No timestamp available for {tf}") - if int(Y) == 0: Y = ts.year - if int(M) == 0: M = ts.month + use_ts_y=False ; use_ts_m=False ; use_ts_d=False + try: + if int(Y) == 0: use_ts_y=True + except: + use_ts_y=True + try: + if int(M) == 0: use_ts_m=True + except: + use_ts_m=True + try: + if int(D) == 0: use_ts_d=True + except: + use_ts_d=True + if use_ts_y: Y = ts.year + if use_ts_m: M = ts.month + if use_ts_d: D = ts.day + if zero_pad: M = str(M).zfill(2) + D = str(D).zfill(2) else: M = str(int(M)) + D = str(int(D)) if debuglevel >= 3: - eprint(f"{tf} {Y}/{M}") - return Y, M + eprint(f"{tf} {Y}/{M}/{D}") + return Y, M, D -def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False): +def make_forest(outdir, flist, subdirformat = "{Y}/{M}", action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False): """ For each file in flist, [action] it to outdir, sorting into YYYY/MM subdirs based on exif metadata or timestamp. Action may be one of ['symlink', 'copy', 'move']. @@ -111,7 +187,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de # validate input if action not in ['symlink', 'copy', 'move']: - eprint("make_YM_forest action may be one of ['symlink', 'copy', 'move'].") + eprint("make_forest action may be one of ['symlink', 'copy', 'move'].") return -1 # Learn all directories to make @@ -120,7 +196,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de destdirs = [] for f in flist: try: - Y, M = get_file_YM(f,debuglevel=debuglevel,zero_pad=zero_pad) + Y, M, D = get_file_YMD(f,debuglevel=debuglevel,zero_pad=zero_pad) if Y == -1 and M == -1: #eprint("Stopping due to keyboard variant 2.") stop = True @@ -131,11 +207,16 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de break if zero_pad: M = str(M).zfill(2) + D = str(D).zfill(2) else: M = str(int(M)) - YM = f"{Y}/{M}" - if YM not in destdirs: destdirs.append(YM) - fdict[f] = YM + D = str(int(D)) + + # eval is from https://stackoverflow.com/a/54071505/3569534 + # use out subdirectory format based on what is provided by outsubdir string passed in to this function. + outsubdir = eval(f'f"""{subdirformat}"""') + if outsubdir not in destdirs: destdirs.append(outsubdir) + fdict[f] = outsubdir # finish the for f in flist # short-circuit if keyboard action happened in one of the nested functions @@ -146,7 +227,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de for d in destdirs: dd = path.join(outdir,d) if debuglevel >= 3: - eprint(f"Make dir: {dd}") + print(f"mkdir {dd}") if not dryrun: Path(dd).mkdir(parents=True, exist_ok=True) @@ -159,11 +240,11 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de ff = path.relpath(ff,destfile) if debuglevel >= 2: if action == "copy": - eprint(f"cp -p {ff} {destfile}") + print(f"cp -p {ff} {destfile}") elif action == "symlink": - eprint(f"ln -s {ff} {destfile}") + print(f"ln -s {ff} {destfile}") elif action == "move": - eprint(f"mv {ff} {destfile}") + print(f"mv {ff} {destfile}") if not dryrun: #if False: -- cgit