1 files changed, 28 insertions, 19 deletions
diff --git a/obsmirror.sh/obsmirror.sh b/obsmirror.sh/obsmirror.sh
index e0c21b2..655bf2f 100755
--- a/obsmirror.sh/obsmirror.sh
+++ b/obsmirror.sh/obsmirror.sh
@@ -27,33 +27,42 @@ parse_obs_dl_page() {
    grep -oE 'href="[^"]+">' | awk '!x[$0]++' | sed -r -e 's/^href="//;' -e 's/">$//;' | grep -viE 'https?:\/\/[A-Za-z0-9\.]+\.[A-Za-z]+|mirrorlist|orig.*z$|^\/(debug|distribution|factory|ports|repositories|source|tumbleweed|update)\/$|^\?[A-Z]=[A-Z]|^\/|\.dsc$'
 }
 
-logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log"
-tmpfile="$( mktemp )"
+parse_obs_page_and_subdirs() {
+   # call: curl -s -L "${inurl}" | parse_obs_page_and_subdirs "${inurl}" "${tmpfile}"
+   # return to stdout: all the wanted files from this page and its associated subdirs
+   ___input="$( parse_obs_dl_page )"
+   ___inurl="${1}"
+   ___tmpfile="${2}"
+   {
+      echo "${___input}" | grep -vE '\/$' | sed -r -e "s@^@${___inurl}\/@"
+      # iterate over all listed subdirs parse out their files
+      for subdir in $( echo "${___input}" | grep -E "\/$" ) ;
+      do
+         #echo "${___inurl}/${subdir}"
+         curl -s -L "${___inurl}/${subdir}" | parse_obs_dl_page | sed -r -e "s@^@${___inurl}/${subdir}@"
+      done
+   } > "${___tmpfile}"
 
-{
+}
+
+test -n "${OBSMIRROR_CONF}" && . "${OBSMIRROR_CONF}"
+test -z "${logfile}" && logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log"
+test -z "${tmpfile}" && tmpfile="$( mktemp )"
+test -z "${inurl}" && inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable"
+test -z "${workdir}" && workdir=/tmp/obs
+test -z "${outdir}" && outdir=/tmp/var/www/mirror/obs
+test -z "${thisuser}" && thisuser=obsmirror
 
+{
    test "${DEBUG:-NONE}" = "FULL" && set -x
-   inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable"
-   workdir=/tmp/obs-stage
-   outdir=/tmp/var/www/mirror/obs
-   thisuser=$USER
-   echo "logfile=${logfile}"
+      echo "logfile=${logfile}"
 
    mkdir -p "${workdir}" ; chmod "0711" "${workdir}" ; chown "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
    cd "${workdir}"
    # get page contents
    step1="$( curl -s -L "${inurl}" )"
-   step2="$( echo "${step1}" | parse_obs_dl_page )"
-   {
-      echo "${step2}" | grep -vE '\/$' | sed -r -e "s@^@${inurl}\/@"
-      # iterate over all listed subdirs parse out their files
-      for subdir in $( echo "${step2}" | grep -E "\/$" ) ;
-      do
-         #echo "${inurl}/${subdir}"
-         curl -s -L "${inurl}/${subdir}" | parse_obs_dl_page | sed -r -e "s@^@${inurl}/${subdir}@"
-      done
-   } > "${tmpfile}"
-
+   #step2="$( echo "${step1}" | parse_obs_dl_page )"
+   echo "${step1}" | parse_obs_page_and_subdirs "${inurl}" "${tmpfile}"
    # loop over all entries and download them
    for thisurl in $( cat "${tmpfile}" ) ;
    do