aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rwxr-xr-xgenerate.py22
-rw-r--r--genlib.py129
3 files changed, 123 insertions, 29 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/generate.py b/generate.py
index 49ed27f..22ff18e 100755
--- a/generate.py
+++ b/generate.py
@@ -7,19 +7,28 @@
# Usage:
# ./generate.py -i /mnt/bgstack15/Backups/bgstack15/Images/Photos/camera/2018/ -o /mnt/public/www/gallery/my2018 -n -d2 -x 'October' -s --nr
-from argparse import ArgumentParser
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
from genlib import *
-generate_version="2021-01-29a"
+generate_version="2021-02-25a"
# WORKHERE the if __main__ thing:
-parser = ArgumentParser(description="Make symlink forest for images") # epilog="something"
+parser = ArgumentParser(description="Make symlink forest for images",
+ formatter_class=RawDescriptionHelpFormatter,
+ epilog="""FORMATTING
+Format string will be interpreted as a python f-string, so wrap variable
+names with curly braces.
+Default format is "{Y}/{M}"
+
+ {Y} is 4-digit year
+ {M} is month (affected by padding)
+ {D} is day (affected by padding)""")
parser.add_argument("-d","--debug",nargs='?', default=0, type=int, choices=range(0,11), help="Set debug level")
parser.add_argument("-v","--version", action="version", version="%(prog)s " + generate_version)
g_dryrun = parser.add_mutually_exclusive_group()
g_dryrun.add_argument("-n","--dryrun", action="store_true", help="Make no changes (default)")
g_dryrun.add_argument("-a","--apply", action="store_true", help="Actually make changes")
g_zeropad = parser.add_mutually_exclusive_group()
-g_zeropad.add_argument("-z","-0","--zeropad",action="store_true", help="Zero pad month directories (default)")
+g_zeropad.add_argument("-z","-0","--zeropad",action="store_true", help="Zero pad month and day strings (default)")
g_zeropad.add_argument("--nz","--nozeropad","--no-zeropad",action="store_true", help="Do not zero pad")
g_action = parser.add_mutually_exclusive_group()
g_action.add_argument("-c","--copy", action="store_true", help="Copy files instead of symlinks. Not recommended.")
@@ -31,6 +40,7 @@ g_relative.add_argument("--nr","--norelative","--no-relative", action="store_tru
parser.add_argument("-i","--indir",required=True)
parser.add_argument("-o","--outdir",required=True)
parser.add_argument("-x","--exclude",action="append",help="Exclude pathname matches. Can be used multiple times.")
+parser.add_argument("-f","--format",action="store",default="{Y}/{M}",help="Subdirectories should follow this pattern. See FORMATTING heading.")
# pull useful values out of the argparse entry
args = parser.parse_args()
@@ -58,6 +68,7 @@ try:
except:
# no exclusions
pass
+subdirformat=args.format
if debuglevel >= 10:
eprint(args)
@@ -75,9 +86,10 @@ these_files = limit(
)
print("FOUND FILE COUNT:",len(these_files))
-make_YM_forest(
+make_forest(
outdir,
these_files,
+ subdirformat = subdirformat,
action = action,
dryrun = dryrun,
debuglevel = debuglevel,
diff --git a/genlib.py b/genlib.py
index 898542d..62676ad 100644
--- a/genlib.py
+++ b/genlib.py
@@ -6,12 +6,15 @@
# https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
# https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
+# Improve:
+# store as a variable the method used to determine date. String, from options ['exif datetimeoriginal','filename','timestamp'] or similar.
from sys import stderr
from exifread import process_file
from os import walk, path, stat, symlink
from pathlib import Path
from datetime import date
from shutil import copy2, move
+from re import compile as re_compile
try:
if debuglevel is None:
@@ -19,10 +22,32 @@ try:
except:
debuglevel = 0
+# Sorry, don't care about anything other than 1900s and forward. We also benefit from relying on full-width month and day fields.
+dateregex = re_compile("(?P<year>(19|2[0-9])[0-9]{2})(?P<separator>[\.-]?)(?P<month>[01][0-9])(?P=separator)(?P<day>[03][0-9])")
+
# Functions
def eprint(*args, **kwargs):
print(*args, file=stderr, **kwargs)
+# this function is for manual debugging only
+def list_all_tags(tf):
+ tags = []
+ try:
+ with open(tf,'rb') as f:
+ tags = process_file(f,details=True)
+ except:
+ eprint(f"Failed to process tags for {tf}")
+ pass
+ for t in tags:
+ try:
+ if "JPEGThumbnail" in t:
+ o = tags[t][:40] # first 40 bytes
+ else:
+ o = tags[t]
+ print(f'{t}: {o}')
+ except:
+ print(f'Failed to print {t}')
+
def list_files(indir,relative=False,debuglevel=debuglevel, excludes=None):
x = 0
f = []
@@ -62,45 +87,96 @@ def limit(listf=None,filters=["image","video"]):
newlist.append(f)
return newlist
-def get_file_YM(tf,debuglevel=debuglevel,zero_pad=False):
- Y = 0 ; M = 0
+def get_file_YMD(tf,debuglevel=debuglevel,zero_pad=False):
+ Y = 0 ; M = 0 ; D = 0
#eprint(f"Debuglevel {debuglevel}")
try:
# read exif data
with open(tf,'rb') as f:
- tags = process_file(f, details=False, stop_tag="Image DateTime")
+ tags = process_file(f, details=False) #, stop_tag="Image DateTime")
+ # prefer Image DateTime
if "Image DateTime" in tags:
#if debuglevel >= 5: eprint(tf,tags["Image DateTime"])
Y = str(tags["Image DateTime"]).split(' ')[0].split(':')
- if Y == "['']" or Y == ['']:
- Y = ['0','0']
- M = Y[1] ; Y = Y[0];
+ elif "EXIF DateTimeOriginal" in tags:
+ Y = str(tags["EXIF DateTimeOriginal"]).split(' ')[0].split(':')
+ if Y == "['']" or Y == ['']:
+ Y = ['0','0','0']
+ M = Y[1] ; D = Y[2] ; Y = Y[0];
# Any other image timestamps could be used here
#eprint(tags)
except KeyboardInterrupt:
- return -1, -1
+ return -1, -1, -1
except:
if debuglevel >= 1: eprint(f"Unable to extract any exif data for {tf}")
- if int(Y) == 0 or int(M) == 0:
- # need to just use timestamp
+ #print(f"Y={Y} M={M} D={D}")
+
+ use_fn = False
+ try:
+ if int(Y) == 0 or int(M) == 0 or int(D) == 0:
+ use_fn = True
+ except:
+ use_fn = True
+ # try to use filename to look for YYYYMMDD or YYYY.MM.DD (pseudoregex) pattern
+ if use_fn:
+ fn = path.basename(tf)
+ try:
+ match = dateregex.search(fn)
+ #print(f"match={match.group()}")
+ # Remove any separators. These two are hardcoded in the global dateregex
+ match1 = match.group().replace("-","").replace(".","")
+ Y = str(match1)[:4]
+ M = str(match1)[4:6]
+ D = str(match1)[6:8]
+ #print(f"Y={Y} M={M} D={D}")
+ except:
+ if debuglevel >=1: eprint(f"Unable to determine date from filename {tf}")
+
+ # Determine if we need to use the timestamp
+ use_ts = False
+ try:
+ if int(Y) == 0 or int(M) == 0 or int(D) == 0:
+ use_ts = True
+ except:
+ use_ts = True
+ if use_ts:
+ if debuglevel >= 9: eprint("Using timestamp...")
+ # need to just use timestamp, which is really a last resort
try:
ts = date.fromtimestamp(stat(tf).st_mtime)
except KeyboardInterrupt:
- return -1, -1
+ return -1, -1, -1
except:
# no timestamp available from filesystem?
if debuglevel >= 1: eprint(f"No timestamp available for {tf}")
- if int(Y) == 0: Y = ts.year
- if int(M) == 0: M = ts.month
+ use_ts_y=False ; use_ts_m=False ; use_ts_d=False
+ try:
+ if int(Y) == 0: use_ts_y=True
+ except:
+ use_ts_y=True
+ try:
+ if int(M) == 0: use_ts_m=True
+ except:
+ use_ts_m=True
+ try:
+ if int(D) == 0: use_ts_d=True
+ except:
+ use_ts_d=True
+ if use_ts_y: Y = ts.year
+ if use_ts_m: M = ts.month
+ if use_ts_d: D = ts.day
+
if zero_pad:
M = str(M).zfill(2)
+ D = str(D).zfill(2)
else:
M = str(int(M))
+ D = str(int(D))
if debuglevel >= 3:
- eprint(f"{tf} {Y}/{M}")
- return Y, M
+ eprint(f"{tf} {Y}/{M}/{D}")
+ return Y, M, D
-def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False):
+def make_forest(outdir, flist, subdirformat = "{Y}/{M}", action = "symlink", dryrun=True, debuglevel=debuglevel, zero_pad=False, relative_symlinks = False):
"""
For each file in flist, [action] it to outdir, sorting into YYYY/MM subdirs based on exif metadata or timestamp.
Action may be one of ['symlink', 'copy', 'move'].
@@ -111,7 +187,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
# validate input
if action not in ['symlink', 'copy', 'move']:
- eprint("make_YM_forest action may be one of ['symlink', 'copy', 'move'].")
+ eprint("make_forest action may be one of ['symlink', 'copy', 'move'].")
return -1
# Learn all directories to make
@@ -120,7 +196,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
destdirs = []
for f in flist:
try:
- Y, M = get_file_YM(f,debuglevel=debuglevel,zero_pad=zero_pad)
+ Y, M, D = get_file_YMD(f,debuglevel=debuglevel,zero_pad=zero_pad)
if Y == -1 and M == -1:
#eprint("Stopping due to keyboard variant 2.")
stop = True
@@ -131,11 +207,16 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
break
if zero_pad:
M = str(M).zfill(2)
+ D = str(D).zfill(2)
else:
M = str(int(M))
- YM = f"{Y}/{M}"
- if YM not in destdirs: destdirs.append(YM)
- fdict[f] = YM
+ D = str(int(D))
+
+ # eval is from https://stackoverflow.com/a/54071505/3569534
+ # use out subdirectory format based on what is provided by outsubdir string passed in to this function.
+ outsubdir = eval(f'f"""{subdirformat}"""')
+ if outsubdir not in destdirs: destdirs.append(outsubdir)
+ fdict[f] = outsubdir
# finish the for f in flist
# short-circuit if keyboard action happened in one of the nested functions
@@ -146,7 +227,7 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
for d in destdirs:
dd = path.join(outdir,d)
if debuglevel >= 3:
- eprint(f"Make dir: {dd}")
+ print(f"mkdir {dd}")
if not dryrun:
Path(dd).mkdir(parents=True, exist_ok=True)
@@ -159,11 +240,11 @@ def make_YM_forest(outdir, flist, action = "symlink", dryrun=True, debuglevel=de
ff = path.relpath(ff,destfile)
if debuglevel >= 2:
if action == "copy":
- eprint(f"cp -p {ff} {destfile}")
+ print(f"cp -p {ff} {destfile}")
elif action == "symlink":
- eprint(f"ln -s {ff} {destfile}")
+ print(f"ln -s {ff} {destfile}")
elif action == "move":
- eprint(f"mv {ff} {destfile}")
+ print(f"mv {ff} {destfile}")
if not dryrun:
#if False:
bgstack15