From 5fa4379088f05e149799780615ebf8a2bea91a00 Mon Sep 17 00:00:00 2001 From: B Stack Date: Thu, 27 Feb 2020 08:25:22 -0500 Subject: refactor again parse Packages.gz.mirrorlist, and choose a single mirror to pull all contents from. --- obsmirror.sh/obsmirror.sh | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/obsmirror.sh/obsmirror.sh b/obsmirror.sh/obsmirror.sh index 655bf2f..86180f3 100755 --- a/obsmirror.sh/obsmirror.sh +++ b/obsmirror.sh/obsmirror.sh @@ -42,7 +42,6 @@ parse_obs_page_and_subdirs() { curl -s -L "${___inurl}/${subdir}" | parse_obs_dl_page | sed -r -e "s@^@${___inurl}/${subdir}@" done } > "${___tmpfile}" - } test -n "${OBSMIRROR_CONF}" && . "${OBSMIRROR_CONF}" @@ -57,11 +56,22 @@ test -z "${thisuser}" && thisuser=obsmirror test "${DEBUG:-NONE}" = "FULL" && set -x echo "logfile=${logfile}" - mkdir -p "${workdir}" ; chmod "0711" "${workdir}" ; chown "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}" + mkdir -p "${workdir}" ; chmod "0755" "${workdir}" ; chown "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}" cd "${workdir}" - # get page contents + # get mirrorlist of Packages.gz file and find the one that lists the most packages + step1="$( curl -s -L "${inurl}/Packages.gz.mirrorlist" )" + options="$( echo "${step1}" | grep -oE 'href="[^"]+">' | awk '!x[$0]++' | sed -r -e 's/^href="//;' -e 's/">$//;' | grep -iE '^(ht|f)tps?:\/\/.*Packages\.gz$' )" + results="$( + for entry in ${options} ; do + curl -s -L "${entry}" | zgrep -cE '^Package:' | sed -r -e "s@\$@ ${entry}@;" + done )" + topresult_line="$( echo "${results}" | sort -nr | head -n1 | sed -r -e 's/\/Packages\.gz$//;' )" + topresult_packagecount="$( echo "${topresult_line}" | awk '{print $1}' )" + topresult="$( echo "${topresult_line}" | awk '{print $2}' )" + echo "USING ${topresult} with ${topresult_packagecount} packages" 1>&2 + inurl="${topresult}" + step1="$( curl -s -L "${inurl}" )" - #step2="$( echo "${step1}" | parse_obs_dl_page )" echo "${step1}" | parse_obs_page_and_subdirs "${inurl}" "${tmpfile}" # loop over all entries and download them for thisurl in $( cat "${tmpfile}" ) ; @@ -70,7 +80,7 @@ test -z "${thisuser}" && thisuser=obsmirror thisdir="$( dirname "${thisfile}" )" test -d "${thisdir}" || mkdir -p "${thisdir}" test -n "${VERBOSE}" && echo "FROM ${thisurl} TO ${thisfile}" - test -z "${DRYRUN}" && wget --continue --no-verbose -O "${thisfile}" "${thisurl}" & + test -z "${DRYRUN}" && wget --no-verbose -O "${thisfile}" "${thisurl}" & done } 2>&1 | tee -a "${logfile}" -- cgit