diff options
Diffstat (limited to 'inc/scrub.py')
-rwxr-xr-x | inc/scrub.py | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/inc/scrub.py b/inc/scrub.py new file mode 100755 index 0000000..10cfe14 --- /dev/null +++ b/inc/scrub.py @@ -0,0 +1,122 @@ +#!/bin/env python3 +# Filename: scrub.py +# Location: Various +# Author: bgstack15@gmail.com +# Startdate: 2016-09-28 +# Title: Script that Simultaneously Copies and Scrubs a Directory +# Purpose: Prepare projects for publication by removing private information like usernames and hostnames +# Package: Various +# History: +# 2016-10-03 working on batch rename files +# 2016-10-20 added not ".tgz" in source.name +# Usage: +# Store this file with any package that gets published. Adjust scrub.txt in local directory. +# # First line: source directory Second line: target directory. WILL BE OVERWRITTEN! +# /etc/ansible +# /home/bjones/ansible.clean +# # Rest of the lines are "OLD WORD" "NEW WORD" +# bjones bgstack15 +# rsmith rmstack15 +# Reference: +# http://stackoverflow.com/questions/79968/split-a-string-by-spaces-preserving-quoted-substrings-in-python/524796#524796 +# http://stackoverflow.com/questions/6706953/python-using-subprocess-to-call-sed#6707003 +# http://stackoverflow.com/questions/6584871/remove-last-character-if-its-a-backslash/6584893#6584893 +# http://stackoverflow.com/questions/2212643/python-recursive-folder-read/2212728#2212728 +# parallel lists: http://stackoverflow.com/questions/1663807/how-can-i-iterate-through-two-lists-in-parallel-in-python +# file renames http://stackoverflow.com/questions/225735/batch-renaming-of-files-in-a-directory/7917798#7917798 +# Improve: +# Add option to specify scrub file +# Add exclude option to scrub file, such as .git and so on +# Accept CLI options like source, destination, even exclusions? +# Add flag for performing file renames as well, or file renames only +import re, shlex, os, sys, shutil +from pathlib import Path + +# scrubpy version +scrubpyversion = "2016-10-20a" + +# Define functions + +def removeComments(string): + #string = re.sub(re.compile("/\*.*?\*/",re.DOTALL ) ,"", string) + #string = re.sub(re.compile("//.*?\n" ) ,"" ,string) + pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|(//|#)[^\r\n]*$)" + regex = re.compile(pattern, re.MULTILINE|re.DOTALL) + def _replacer(match): + if match.group(2) is not None: + return "" + else: + return match.group(1) + return regex.sub(_replacer, string) + +# Main code +stringfile = open('scrub.txt','r') +count=0 +thisdir="" +newdir="" +oldstrings=[] +newstrings=[] + +while True: + x = stringfile.readline().rstrip() + count += 1 + if not x: break + x = removeComments(x) + #print("x=" + x) + y = shlex.split (x) + if len(y) >= 1: + if thisdir == "": + thisdir = y[0] + elif newdir == "": + newdir = y[0] + if len(y) >= 2: + #print("y[0]=" + y[0] + "\t and y[1]=" + y[1]) + oldstrings.append(y[0]) + newstrings.append(y[1]) + +# After the file is done +stringfile.close() +#newdir = thisdir.rstrip('\/') + ".scrubbed/" + +if False: + print("\nthisdir=" + thisdir) + print("newdir=" + newdir + '\n') + print("oldstrings are:") + print(oldstrings) + print("newstrings are:") + print(newstrings) + +# Clean scrubbed directory +try: + shutil.rmtree(newdir) +except: + foo=1 + +shutil.copytree(thisdir,newdir,symlinks=True) + +# Execute substitutions +for rootfolder, subdirs, files in os.walk(thisdir): + for filename in files: + sourcepath = os.path.join(rootfolder, filename) + with open( sourcepath, "r" ) as source: + if not ".swp" in source.name and not ".git" in source.name and not ".tgz" in source.name: + destdir = rootfolder.replace(thisdir.rstrip('\/'),newdir.rstrip('\/')) + destfile = os.path.join(destdir, filename) + #print("sourcefile=" + source.name) + #print("destfile=" + destfile + '\n') + with open( destfile, "w") as target: + data = source.read() + for oldword, newword in zip(oldstrings, newstrings): + data = data.replace(oldword,newword) + changed = data + target.write(changed) + +# Execute file renames +# Used "file renames" reference, as well as the structure of directory traversal used earlier, which was from a different source. +for rootfolder, subdirs, files in os.walk(newdir): + for filename in files: + oldpath = os.path.join(rootfolder, filename) + for oldword, newword in zip(oldstrings, newstrings): + if oldword in oldpath: + #print("oldword=" + oldword + "\toldpath=" + oldpath) + os.rename(oldpath, oldpath.replace(oldword,newword)) |