1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#!/bin/env python3
# Filename: scrub.py
# Location: Various
# Author: bgstack15@gmail.com
# Startdate: 2016-09-28
# Title: Script that Simultaneously Copies and Scrubs a Directory
# Purpose: Prepare projects for publication by removing private information like usernames and hostnames
# Package: Various
# History:
# Usage:
# Store this file with any package that gets published. Adjust scrub.txt in local directory.
# # First line: source directory Second line: target directory. WILL BE OVERWRITTEN!
# /etc/ansible
# /home/bjones/ansible.clean
# # Rest of the lines are "OLD WORD" "NEW WORD"
# bjones bgstack15
# rsmith rmstack15
# Reference:
# http://stackoverflow.com/questions/79968/split-a-string-by-spaces-preserving-quoted-substrings-in-python/524796#524796
# http://stackoverflow.com/questions/6706953/python-using-subprocess-to-call-sed#6707003
# http://stackoverflow.com/questions/6584871/remove-last-character-if-its-a-backslash/6584893#6584893
# http://stackoverflow.com/questions/2212643/python-recursive-folder-read/2212728#2212728
# parallel lists: http://stackoverflow.com/questions/1663807/how-can-i-iterate-through-two-lists-in-parallel-in-python
# Improve:
# Add option to specify scrub file
# Add exclude option to scrub file, such as .git and so on
# Accept CLI options like source, destination, even exclusions?
# Also change filenames
import re, shlex, os, sys, shutil
from pathlib import Path
# scrubpy version
scrubpyversion = "2016-09-29b"
# Define functions
def removeComments(string):
#string = re.sub(re.compile("/\*.*?\*/",re.DOTALL ) ,"", string)
#string = re.sub(re.compile("//.*?\n" ) ,"" ,string)
pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|(//|#)[^\r\n]*$)"
regex = re.compile(pattern, re.MULTILINE|re.DOTALL)
def _replacer(match):
if match.group(2) is not None:
return ""
else:
return match.group(1)
return regex.sub(_replacer, string)
# Main code
stringfile = open('scrub.txt','r')
count=0
thisdir=""
newdir=""
oldstrings=[]
newstrings=[]
while True:
x = stringfile.readline().rstrip()
count += 1
if not x: break
x = removeComments(x)
#print("x=" + x)
y = shlex.split (x)
if len(y) >= 1:
if thisdir == "":
thisdir = y[0]
elif newdir == "":
newdir = y[0]
if len(y) >= 2:
#print("y[0]=" + y[0] + "\t and y[1]=" + y[1])
oldstrings.append(y[0])
newstrings.append(y[1])
# After the file is done
stringfile.close()
#newdir = thisdir.rstrip('\/') + ".scrubbed/"
if False:
print("\nthisdir=" + thisdir)
print("newdir=" + newdir + '\n')
print("oldstrings are:")
print(oldstrings)
print("newstrings are:")
print(newstrings)
# Clean scrubbed directory
try:
shutil.rmtree(newdir)
except:
foo=1
shutil.copytree(thisdir,newdir,symlinks=True)
# Execute substitutions
for rootfolder, subdirs, files in os.walk(thisdir):
for filename in files:
sourcepath = os.path.join(rootfolder, filename)
with open( sourcepath, "r" ) as source:
if not ".swp" in source.name and not ".git" in source.name:
destdir = rootfolder.replace(thisdir.rstrip('\/'),newdir.rstrip('\/'))
destfile = os.path.join(destdir, filename)
#print("sourcefile=" + source.name)
#print("destfile=" + destfile + '\n')
with open( destfile, "w") as target:
data = source.read()
for oldword, newword in zip(oldstrings, newstrings):
data = data.replace(oldword,newword)
changed = data
target.write(changed)
|