#!/usr/bin/env python3 # Startdate: 2021-01-29 15:35 # Dependencies: # devuan-req: python3-exifread # Reference: # https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory # https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory # https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python # Improve: # store as a variable the method used to determine date. String, from options ['exif datetimeoriginal','filename','timestamp'] or similar. from sys import stderr from exifread import process_file from os import walk, path, stat, symlink from pathlib import Path from datetime import date from shutil import copy2, move from re import compile as re_compile try: if debuglevel is None: debuglevel = 0 except: debuglevel = 0 # Sorry, don't care about anything other than 1900s and forward. We also benefit from relying on full-width month and day fields. dateregex = re_compile("(?P(19|2[0-9])[0-9]{2})(?P[\.-]?)(?P[01][0-9])(?P=separator)(?P[03][0-9])") # Functions def eprint(*args, **kwargs): print(*args, file=stderr, **kwargs) # this function is for manual debugging only def list_all_tags(tf): tags = [] try: with open(tf,'rb') as f: tags = process_file(f,details=True) except: eprint(f"Failed to process tags for {tf}") pass for t in tags: try: if "JPEGThumbnail" in t: o = tags[t][:40] # first 40 bytes else: o = tags[t] print(f'{t}: {o}') except: print(f'Failed to print {t}') def list_files(indir,relative=False,debuglevel=debuglevel, excludes=None): x = 0 f = [] if debuglevel >= 9: eprint(f"Listing all files underneath {indir}") if len(excludes) > 0: eprint("While excluding path matches:",excludes) for (dirpath, dirnames, filenames) in walk(indir): for file in filenames: x += 1 fullpath=path.join(dirpath,file) relpath=path.join(path.relpath(dirpath,indir),file) use = True use_pattern = "" for e in excludes: # simple match, no regex if e in relpath: use = False use_pattern = e break # short-circuit if use: if debuglevel >= 9: eprint(x,fullpath,relpath) f.append(relpath if relative else fullpath) else: if debuglevel >= 9: eprint(x,fullpath,f"ignored per {use_pattern}") return f def limit(listf=None,filters=["image","video"]): # available types are "image", "video" newlist = [] for f in listf: _, ext = path.splitext(f) ext = ext[1:].lower() if ext in ["jpg","jpeg","png","svg","tif","tiff","gif"] and "image" in filters: newlist.append(f) elif ext in ["mp4","webm","avi"] and "video" in filters: newlist.append(f) return newlist def get_file_YMD(tf,debuglevel=debuglevel,zero_pad=False): Y = 0 ; M = 0 ; D = 0 #eprint(f"Debuglevel {debuglevel}") try: # read exif data with open(tf,'rb') as f: tags = process_file(f, details=False) #, stop_tag="Image DateTime") # prefer Image DateTime if "Image DateTime" in tags: #if debuglevel >= 5: eprint(tf,tags["Image DateTime"]) Y = str(tags["Image DateTime"]).split(' ')[0].split(':') elif "EXIF DateTimeOriginal" in tags: Y = str(tags["EXIF DateTimeOriginal"]).split(' ')[0].split(':') if Y == "['']" or Y == ['']: Y = ['0','0','0'] M = Y[1] ; D = Y[2] ; Y = Y[0]; # Any other image timestamps could be used here #eprint(tags) except KeyboardInterrupt: return -1, -1, -1 except: if debuglevel >= 1: eprint(f"Unable to extract any exif data for {tf}") #print(f"Y={Y} M={M} D={D}") use_fn = False try: if int(Y) == 0 or int(M) == 0 or int(D) == 0: use_fn = True except: use_fn = True # try to use filename to look for YYYYMMDD or YYYY.MM.DD (pseudoregex) pattern if use_fn: fn = path.basename(tf) try: match = dateregex.search(fn) #print(f"match={match.group()}") # Remove any separators. These two are hardcoded in the global dateregex match1 = match.group().replace("-","").replace(".","") Y = str(match1)[:4] M = str(match1)[4:6] D = str(match1)[6:8] #print(f"Y={Y} M={M} D={D}") except: if debuglevel >=1: eprint(f"Unable to determine date from filename {tf}") # Determine if we need to use the timestamp use_ts = False try: if int(Y) == 0 or int(M) == 0 or int(D) == 0: use_ts = True except: use_ts = True if use_ts: if debuglevel >= 9: eprint("Using timestamp...") # need to just use timestamp, which is really a last resort try: ts = date.fromtimestamp(stat(tf).st_mtime) except KeyboardInterrupt: return -1, -1, -1 except: # no timestamp available from filesystem? if debuglevel >= 1: eprint(f"No timestamp available for {tf}") use_ts_y=False ; use_ts_m=False ; use_ts_d=False try: if int(Y) == 0: use_ts_y=True except: use_ts_y=True try: if int(M) == 0: use_ts_m=True except: use_ts_m=True try: if int(D) == 0: use_ts_d=True except: use_ts_d=True if use_ts_y: Y = ts.year if use_ts_m: M = ts.month if use_ts_d: D = ts.day if zero_pad: M = str(M).zfill(2) D = str(D).zfill(2) else: M = str(int(M)) D = str(int(D)) if debuglevel >= 3: eprint(f"{tf} {Y}/{M}/{D}") return Y, M, D def make_forest(outdir, flist, subdirformat = "{Y}/{M}", action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False): """ For each file in flist, [action] it to outdir, sorting into YYYY/MM subdirs based on exif metadata or timestamp. Action may be one of ['symlink', 'copy', 'move']. If relative_symlink = True, then make relative symlinks if possible. """ result = 0 stop = False # validate input if action not in ['symlink', 'copy', 'move']: eprint("make_forest action may be one of ['symlink', 'copy', 'move'].") return -1 # Learn all directories to make # and also cache the files with Y/M value fdict = {} destdirs = [] for f in flist: try: Y, M, D = get_file_YMD(f,debuglevel=debuglevel,zero_pad=zero_pad) if Y == -1 and M == -1: #eprint("Stopping due to keyboard variant 2.") stop = True break except KeyboardInterrupt: #eprint("Stopping due to keyboard variant 1.") stop = True break if zero_pad: M = str(M).zfill(2) D = str(D).zfill(2) else: M = str(int(M)) D = str(int(D)) # eval is from https://stackoverflow.com/a/54071505/3569534 # use out subdirectory format based on what is provided by outsubdir string passed in to this function. outsubdir = eval(f'f"""{subdirformat}"""') if outsubdir not in destdirs: destdirs.append(outsubdir) fdict[f] = outsubdir # finish the for f in flist # short-circuit if keyboard action happened in one of the nested functions if stop: return -1 # Make directories for d in destdirs: dd = path.join(outdir,d) if debuglevel >= 3: print(f"mkdir {dd}") if not dryrun: Path(dd).mkdir(parents=True, exist_ok=True) # Make symlinks for f in fdict: ff = f basename = path.basename(ff) destfile = path.join(outdir,fdict[f],basename) if action == "symlink" and relative_symlinks: ff = path.relpath(ff,destfile) if debuglevel >= 2: if action == "copy": print(f"cp -p {ff} {destfile}") elif action == "symlink": print(f"ln -s {ff} {destfile}") elif action == "move": print(f"mv {ff} {destfile}") if not dryrun: #if False: if action == "copy": copy2(ff,destfile) elif action == "symlink": symlink(ff,destfile) elif action == "move": move(ff,destfile) return result