diff options
-rwxr-xr-x | obsmirror.sh/obsmirror.sh | 47 |
1 files changed, 28 insertions, 19 deletions
diff --git a/obsmirror.sh/obsmirror.sh b/obsmirror.sh/obsmirror.sh index e0c21b2..655bf2f 100755 --- a/obsmirror.sh/obsmirror.sh +++ b/obsmirror.sh/obsmirror.sh @@ -27,33 +27,42 @@ parse_obs_dl_page() { grep -oE 'href="[^"]+">' | awk '!x[$0]++' | sed -r -e 's/^href="//;' -e 's/">$//;' | grep -viE 'https?:\/\/[A-Za-z0-9\.]+\.[A-Za-z]+|mirrorlist|orig.*z$|^\/(debug|distribution|factory|ports|repositories|source|tumbleweed|update)\/$|^\?[A-Z]=[A-Z]|^\/|\.dsc$' } -logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log" -tmpfile="$( mktemp )" +parse_obs_page_and_subdirs() { + # call: curl -s -L "${inurl}" | parse_obs_page_and_subdirs "${inurl}" "${tmpfile}" + # return to stdout: all the wanted files from this page and its associated subdirs + ___input="$( parse_obs_dl_page )" + ___inurl="${1}" + ___tmpfile="${2}" + { + echo "${___input}" | grep -vE '\/$' | sed -r -e "s@^@${___inurl}\/@" + # iterate over all listed subdirs parse out their files + for subdir in $( echo "${___input}" | grep -E "\/$" ) ; + do + #echo "${___inurl}/${subdir}" + curl -s -L "${___inurl}/${subdir}" | parse_obs_dl_page | sed -r -e "s@^@${___inurl}/${subdir}@" + done + } > "${___tmpfile}" -{ +} + +test -n "${OBSMIRROR_CONF}" && . "${OBSMIRROR_CONF}" +test -z "${logfile}" && logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log" +test -z "${tmpfile}" && tmpfile="$( mktemp )" +test -z "${inurl}" && inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable" +test -z "${workdir}" && workdir=/tmp/obs +test -z "${outdir}" && outdir=/tmp/var/www/mirror/obs +test -z "${thisuser}" && thisuser=obsmirror +{ test "${DEBUG:-NONE}" = "FULL" && set -x - inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable" - workdir=/tmp/obs-stage - outdir=/tmp/var/www/mirror/obs - thisuser=$USER - echo "logfile=${logfile}" + echo "logfile=${logfile}" mkdir -p "${workdir}" ; chmod "0711" "${workdir}" ; chown "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}" cd "${workdir}" # get page contents step1="$( curl -s -L "${inurl}" )" - step2="$( echo "${step1}" | parse_obs_dl_page )" - { - echo "${step2}" | grep -vE '\/$' | sed -r -e "s@^@${inurl}\/@" - # iterate over all listed subdirs parse out their files - for subdir in $( echo "${step2}" | grep -E "\/$" ) ; - do - #echo "${inurl}/${subdir}" - curl -s -L "${inurl}/${subdir}" | parse_obs_dl_page | sed -r -e "s@^@${inurl}/${subdir}@" - done - } > "${tmpfile}" - + #step2="$( echo "${step1}" | parse_obs_dl_page )" + echo "${step1}" | parse_obs_page_and_subdirs "${inurl}" "${tmpfile}" # loop over all entries and download them for thisurl in $( cat "${tmpfile}" ) ; do |