aboutsummaryrefslogtreecommitdiff
path: root/genlib.py
diff options
context:
space:
mode:
Diffstat (limited to 'genlib.py')
-rw-r--r--genlib.py129
1 files changed, 105 insertions, 24 deletions
diff --git a/genlib.py b/genlib.py
index 898542d..62676ad 100644
--- a/genlib.py
+++ b/genlib.py
@@ -6,12 +6,15 @@
# https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
# https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
+# Improve:
+# store as a variable the method used to determine date. String, from options ['exif datetimeoriginal','filename','timestamp'] or similar.
from sys import stderr
from exifread import process_file
from os import walk, path, stat, symlink
from pathlib import Path
from datetime import date
from shutil import copy2, move
+from re import compile as re_compile
try:
if debuglevel is None:
@@ -19,10 +22,32 @@ try:
except:
debuglevel = 0
+# Sorry, don't care about anything other than 1900s and forward. We also benefit from relying on full-width month and day fields.
+dateregex = re_compile("(?P<year>(19|2[0-9])[0-9]{2})(?P<separator>[\.-]?)(?P<month>[01][0-9])(?P=separator)(?P<day>[03][0-9])")
+
# Functions
def eprint(*args, **kwargs):
print(*args, file=stderr, **kwargs)
+# this function is for manual debugging only
+def list_all_tags(tf):
+ tags = []
+ try:
+ with open(tf,'rb') as f:
+ tags = process_file(f,details=True)
+ except:
+ eprint(f"Failed to process tags for {tf}")
+ pass
+ for t in tags:
+ try:
+ if "JPEGThumbnail" in t:
+ o = tags[t][:40] # first 40 bytes
+ else:
+ o = tags[t]
+ print(f'{t}: {o}')
+ except:
+ print(f'Failed to print {t}')
+
def list_files(indir,relative=False,debuglevel=debuglevel, excludes=None):
x = 0
f = []
@@ -62,45 +87,96 @@ def limit(listf=None,filters=["image","video"]):
newlist.append(f)
return newlist
-def get_file_YM(tf,debuglevel=debuglevel,zero_pad=False):
- Y = 0 ; M = 0
+def get_file_YMD(tf,debuglevel=debuglevel,zero_pad=False):
+ Y = 0 ; M = 0 ; D = 0
#eprint(f"Debuglevel {debuglevel}")
try:
# read exif data
with open(tf,'rb') as f:
- tags = process_file(f, details=False, stop_tag="Image DateTime")
+ tags = process_file(f, details=False) #, stop_tag="Image DateTime")
+ # prefer Image DateTime
if "Image DateTime" in tags:
#if debuglevel >= 5: eprint(tf,tags["Image DateTime"])
Y = str(tags["Image DateTime"]).split(' ')[0].split(':')
- if Y == "['']" or Y == ['']:
- Y = ['0','0']
- M = Y[1] ; Y = Y[0];
+ elif "EXIF DateTimeOriginal" in tags:
+ Y = str(tags["EXIF DateTimeOriginal"]).split(' ')[0].split(':')
+ if Y == "['']" or Y == ['']:
+ Y = ['0','0','0']
+ M = Y[1] ; D = Y[2] ; Y = Y[0];
# Any other image timestamps could be used here
#eprint(tags)
except KeyboardInterrupt:
- return -1, -1
+ return -1, -1, -1
except:
if debuglevel >= 1: eprint(f"Unable to extract any exif data for {tf}")
- if int(Y) == 0 or int(M) == 0:
- # need to just use timestamp
+ #print(f"Y={Y} M={M} D={D}")
+
+ use_fn = False
+ try:
+ if int(Y) == 0 or int(M) == 0 or int(D) == 0:
+ use_fn = True
+ except:
+ use_fn = True
+ # try to use filename to look for YYYYMMDD or YYYY.MM.DD (pseudoregex) pattern
+ if use_fn:
+ fn = path.basename(tf)
+ try:
+ match = dateregex.search(fn)
+ #print(f"match={match.group()}")
+ # Remove any separators. These two are hardcoded in the global dateregex
+ match1 = match.group().replace("-","").replace(".","")
+ Y = str(match1)[:4]
+ M = str(match1)[4:6]
+ D = str(match1)[6:8]
+ #print(f"Y={Y} M={M} D={D}")
+ except:
+ if debuglevel >=1: eprint(f"Unable to determine date from filename {tf}")
+
+ # Determine if we need to use the timestamp
+ use_ts = False
+ try:
+ if int(Y) == 0 or int(M) == 0 or int(D) == 0:
+ use_ts = True
+ except:
+ use_ts = True
+ if use_ts:
+ if debuglevel >= 9: eprint("Using timestamp...")
+ # need to just use timestamp, which is really a last resort
try:
ts = date.fromtimestamp(stat(tf).st_mtime)
except KeyboardInterrupt:
- return -1, -1
+ return -1, -1, -1
except:
# no timestamp available from filesystem?
if debuglevel >= 1: eprint(f"No timestamp available for {tf}")
- if int(Y) == 0: Y = ts.year
- if int(M) == 0: M = ts.month
+ use_ts_y=False ; use_ts_m=False ; use_ts_d=False
+ try:
+ if int(Y) == 0: use_ts_y=True
+ except:
+ use_ts_y=True
+ try:
+ if int(M) == 0: use_ts_m=True
+ except:
+ use_ts_m=True
+ try:
+ if int(D) == 0: use_ts_d=True
+ except:
+ use_ts_d=True
+ if use_ts_y: Y = ts.year
+ if use_ts_m: M = ts.month
+ if use_ts_d: D = ts.day
+
if zero_pad:
M = str(M).zfill(2)
+ D = str(D).zfill(2)
else:
M = str(int(M))
+ D = str(int(D))
if debuglevel >= 3:
- eprint(f"{tf} {Y}/{M}")
- return Y, M
+ eprint(f"{tf} {Y}/{M}/{D}")
+ return Y, M, D
-def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False):
+def make_forest(outdir, flist, subdirformat = "{Y}/{M}", action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False):
"""
For each file in flist, [action] it to outdir, sorting into YYYY/MM subdirs based on exif metadata or timestamp.
Action may be one of ['symlink', 'copy', 'move'].
@@ -111,7 +187,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
# validate input
if action not in ['symlink', 'copy', 'move']:
- eprint("make_YM_forest action may be one of ['symlink', 'copy', 'move'].")
+ eprint("make_forest action may be one of ['symlink', 'copy', 'move'].")
return -1
# Learn all directories to make
@@ -120,7 +196,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
destdirs = []
for f in flist:
try:
- Y, M = get_file_YM(f,debuglevel=debuglevel,zero_pad=zero_pad)
+ Y, M, D = get_file_YMD(f,debuglevel=debuglevel,zero_pad=zero_pad)
if Y == -1 and M == -1:
#eprint("Stopping due to keyboard variant 2.")
stop = True
@@ -131,11 +207,16 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
break
if zero_pad:
M = str(M).zfill(2)
+ D = str(D).zfill(2)
else:
M = str(int(M))
- YM = f"{Y}/{M}"
- if YM not in destdirs: destdirs.append(YM)
- fdict[f] = YM
+ D = str(int(D))
+
+ # eval is from https://stackoverflow.com/a/54071505/3569534
+ # use out subdirectory format based on what is provided by outsubdir string passed in to this function.
+ outsubdir = eval(f'f"""{subdirformat}"""')
+ if outsubdir not in destdirs: destdirs.append(outsubdir)
+ fdict[f] = outsubdir
# finish the for f in flist
# short-circuit if keyboard action happened in one of the nested functions
@@ -146,7 +227,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
for d in destdirs:
dd = path.join(outdir,d)
if debuglevel >= 3:
- eprint(f"Make dir: {dd}")
+ print(f"mkdir {dd}")
if not dryrun:
Path(dd).mkdir(parents=True, exist_ok=True)
@@ -159,11 +240,11 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
ff = path.relpath(ff,destfile)
if debuglevel >= 2:
if action == "copy":
- eprint(f"cp -p {ff} {destfile}")
+ print(f"cp -p {ff} {destfile}")
elif action == "symlink":
- eprint(f"ln -s {ff} {destfile}")
+ print(f"ln -s {ff} {destfile}")
elif action == "move":
- eprint(f"mv {ff} {destfile}")
+ print(f"mv {ff} {destfile}")
if not dryrun:
#if False:
bgstack15