summaryrefslogtreecommitdiff
path: root/fetch-images.sh
blob: 604b8ef54bdb25ac8edb1d59db785c002f4497db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/sh
# startdate 2020-05-29 20:04
# After running this, be sure to do the sed.
#    sed -i -f fix-images-in-html.sed /mnt/public/www/gitlab-issues/*.html
# Improve:
#    It is probably an artifact of the weird way the asset svgs are embedded, but I cannot get them to display at all even though they are downloaded successfully. I have seen this before, the little embedded images you cannot easily download and simply display.

INDIR=/mnt/public/www/gitlab-issues
INGLOB=*.html

SEDSCRIPT=/mnt/public/work/devuan/fix-images-in-html.sed

INSERVER=https://git.devuan.org
INSERVERREGEX="https://git(lab)?\.devuan\.org"

cd "${INDIR}"

# could use this line to get all the assets, but they do not display regardless due to html weirdness
#orig_src="$( grep -oE '(\<src|xlink:href)="?\/[^"]*"' ${INGLOB} | grep -vE '\.js' | awk -F'"' '!x[$0]++{print $2}' )"
orig_src="$( grep -oE "(src|xlink:href)=\"(${INSERVERREGEX})?\/[^\"]*\"" ${INGLOB} | grep -vE '\.js' | awk -F'"' '!x[$2]++{print $2}' )"

cat /dev/null > "${SEDSCRIPT}"

echo "${orig_src}" | while read line ; do
   #echo "${line}" | awk -F'"' '{print $2}'
   getpath="${line}"
   echo "${getpath}" | grep -q -E "https?:\/\/" || getpath="${INSERVER}${getpath}"
   temp="$( echo "${line}" | sed -r -e "s@${INSERVERREGEX}@@g" -e 's@/-/@/@g' )"
   temp="${temp##/}"
   temp=".$( echo "${temp}" | sed -r -e 's/^\.+//g;' )"
   outdir="$( dirname "${temp}" | sed -r -e 's@^/@@' )"
   test ! -d "${outdir}" && mkdir -p "${outdir}"
   targetfile="${temp%%\?*}" # for output file itself remove the question mark which is normally width attribute
   targetfile="${targetfile%%\#*}" # for output file itself remove the question mark which is normally width attribute
   targetfile="${targetfile##/}" # for output file itself remove the pound sign which is some svg thing
   test -n "${DEBUG}" && echo "process ${getpath} and save to ${targetfile}" 1>&2
   test -z "${DRYRUN}" && wget --quiet --content-disposition -O "${targetfile}" "${getpath}"
   # dynamically build a sed script
   echo "s@${line}@${temp##/}@g;" | tee -a "${SEDSCRIPT}"
done
bgstack15