From cc29a824e6d83164dab740cd98313a4795cf5e1f Mon Sep 17 00:00:00 2001 From: B Stack Date: Thu, 25 Feb 2021 17:02:46 -0500 Subject: improve generate.py * Add ability to choose subdir name format. * Add day (D) as a variable to use. * Add ability to use other exif date tags, and filename, to determine the date to use. Timestamp is a last-resort option only! * Change output of the cli equivalent actions to stdout (from stderr). --- genlib.py | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 105 insertions(+), 24 deletions(-) (limited to 'genlib.py') diff --git a/genlib.py b/genlib.py index 898542d..62676ad 100644 --- a/genlib.py +++ b/genlib.py @@ -6,12 +6,15 @@ # https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory # https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory # https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python +# Improve: +# store as a variable the method used to determine date. String, from options ['exif datetimeoriginal','filename','timestamp'] or similar. from sys import stderr from exifread import process_file from os import walk, path, stat, symlink from pathlib import Path from datetime import date from shutil import copy2, move +from re import compile as re_compile try: if debuglevel is None: @@ -19,10 +22,32 @@ try: except: debuglevel = 0 +# Sorry, don't care about anything other than 1900s and forward. We also benefit from relying on full-width month and day fields. +dateregex = re_compile("(?P(19|2[0-9])[0-9]{2})(?P[\.-]?)(?P[01][0-9])(?P=separator)(?P[03][0-9])") + # Functions def eprint(*args, **kwargs): print(*args, file=stderr, **kwargs) +# this function is for manual debugging only +def list_all_tags(tf): + tags = [] + try: + with open(tf,'rb') as f: + tags = process_file(f,details=True) + except: + eprint(f"Failed to process tags for {tf}") + pass + for t in tags: + try: + if "JPEGThumbnail" in t: + o = tags[t][:40] # first 40 bytes + else: + o = tags[t] + print(f'{t}: {o}') + except: + print(f'Failed to print {t}') + def list_files(indir,relative=False,debuglevel=debuglevel, excludes=None): x = 0 f = [] @@ -62,45 +87,96 @@ def limit(listf=None,filters=["image","video"]): newlist.append(f) return newlist -def get_file_YM(tf,debuglevel=debuglevel,zero_pad=False): - Y = 0 ; M = 0 +def get_file_YMD(tf,debuglevel=debuglevel,zero_pad=False): + Y = 0 ; M = 0 ; D = 0 #eprint(f"Debuglevel {debuglevel}") try: # read exif data with open(tf,'rb') as f: - tags = process_file(f, details=False, stop_tag="Image DateTime") + tags = process_file(f, details=False) #, stop_tag="Image DateTime") + # prefer Image DateTime if "Image DateTime" in tags: #if debuglevel >= 5: eprint(tf,tags["Image DateTime"]) Y = str(tags["Image DateTime"]).split(' ')[0].split(':') - if Y == "['']" or Y == ['']: - Y = ['0','0'] - M = Y[1] ; Y = Y[0]; + elif "EXIF DateTimeOriginal" in tags: + Y = str(tags["EXIF DateTimeOriginal"]).split(' ')[0].split(':') + if Y == "['']" or Y == ['']: + Y = ['0','0','0'] + M = Y[1] ; D = Y[2] ; Y = Y[0]; # Any other image timestamps could be used here #eprint(tags) except KeyboardInterrupt: - return -1, -1 + return -1, -1, -1 except: if debuglevel >= 1: eprint(f"Unable to extract any exif data for {tf}") - if int(Y) == 0 or int(M) == 0: - # need to just use timestamp + #print(f"Y={Y} M={M} D={D}") + + use_fn = False + try: + if int(Y) == 0 or int(M) == 0 or int(D) == 0: + use_fn = True + except: + use_fn = True + # try to use filename to look for YYYYMMDD or YYYY.MM.DD (pseudoregex) pattern + if use_fn: + fn = path.basename(tf) + try: + match = dateregex.search(fn) + #print(f"match={match.group()}") + # Remove any separators. These two are hardcoded in the global dateregex + match1 = match.group().replace("-","").replace(".","") + Y = str(match1)[:4] + M = str(match1)[4:6] + D = str(match1)[6:8] + #print(f"Y={Y} M={M} D={D}") + except: + if debuglevel >=1: eprint(f"Unable to determine date from filename {tf}") + + # Determine if we need to use the timestamp + use_ts = False + try: + if int(Y) == 0 or int(M) == 0 or int(D) == 0: + use_ts = True + except: + use_ts = True + if use_ts: + if debuglevel >= 9: eprint("Using timestamp...") + # need to just use timestamp, which is really a last resort try: ts = date.fromtimestamp(stat(tf).st_mtime) except KeyboardInterrupt: - return -1, -1 + return -1, -1, -1 except: # no timestamp available from filesystem? if debuglevel >= 1: eprint(f"No timestamp available for {tf}") - if int(Y) == 0: Y = ts.year - if int(M) == 0: M = ts.month + use_ts_y=False ; use_ts_m=False ; use_ts_d=False + try: + if int(Y) == 0: use_ts_y=True + except: + use_ts_y=True + try: + if int(M) == 0: use_ts_m=True + except: + use_ts_m=True + try: + if int(D) == 0: use_ts_d=True + except: + use_ts_d=True + if use_ts_y: Y = ts.year + if use_ts_m: M = ts.month + if use_ts_d: D = ts.day + if zero_pad: M = str(M).zfill(2) + D = str(D).zfill(2) else: M = str(int(M)) + D = str(int(D)) if debuglevel >= 3: - eprint(f"{tf} {Y}/{M}") - return Y, M + eprint(f"{tf} {Y}/{M}/{D}") + return Y, M, D -def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False): +def make_forest(outdir, flist, subdirformat = "{Y}/{M}", action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False): """ For each file in flist, [action] it to outdir, sorting into YYYY/MM subdirs based on exif metadata or timestamp. Action may be one of ['symlink', 'copy', 'move']. @@ -111,7 +187,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de # validate input if action not in ['symlink', 'copy', 'move']: - eprint("make_YM_forest action may be one of ['symlink', 'copy', 'move'].") + eprint("make_forest action may be one of ['symlink', 'copy', 'move'].") return -1 # Learn all directories to make @@ -120,7 +196,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de destdirs = [] for f in flist: try: - Y, M = get_file_YM(f,debuglevel=debuglevel,zero_pad=zero_pad) + Y, M, D = get_file_YMD(f,debuglevel=debuglevel,zero_pad=zero_pad) if Y == -1 and M == -1: #eprint("Stopping due to keyboard variant 2.") stop = True @@ -131,11 +207,16 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de break if zero_pad: M = str(M).zfill(2) + D = str(D).zfill(2) else: M = str(int(M)) - YM = f"{Y}/{M}" - if YM not in destdirs: destdirs.append(YM) - fdict[f] = YM + D = str(int(D)) + + # eval is from https://stackoverflow.com/a/54071505/3569534 + # use out subdirectory format based on what is provided by outsubdir string passed in to this function. + outsubdir = eval(f'f"""{subdirformat}"""') + if outsubdir not in destdirs: destdirs.append(outsubdir) + fdict[f] = outsubdir # finish the for f in flist # short-circuit if keyboard action happened in one of the nested functions @@ -146,7 +227,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de for d in destdirs: dd = path.join(outdir,d) if debuglevel >= 3: - eprint(f"Make dir: {dd}") + print(f"mkdir {dd}") if not dryrun: Path(dd).mkdir(parents=True, exist_ok=True) @@ -159,11 +240,11 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de ff = path.relpath(ff,destfile) if debuglevel >= 2: if action == "copy": - eprint(f"cp -p {ff} {destfile}") + print(f"cp -p {ff} {destfile}") elif action == "symlink": - eprint(f"ln -s {ff} {destfile}") + print(f"ln -s {ff} {destfile}") elif action == "move": - eprint(f"mv {ff} {destfile}") + print(f"mv {ff} {destfile}") if not dryrun: #if False: -- cgit