|
#!/bin/sh
|
|
# File: /etc/installed/obsmirror.sh
|
|
# Location: https://gitlab.com/bgstack15/former-gists/tree/master/obsmirror.sh
|
|
# Author: bgstack15
|
|
# Startdate: 2020-03-03 08:43
|
|
# SPDX-License-Identifier: CC-BY-SA-4.0
|
|
# Title: Script that scrapes down OBS site to serve a copy to intranet
|
|
# Purpose: save down my OBS site so I can serve it locally
|
|
# History:
|
|
# 2020-01-05 v1: begin which used httrack
|
|
# 2020-02-28 v2: complete rewrite to exclude httrack
|
|
# 2020-03-03 v3: complete rewrite to get explicit files and loop through their contents, and rebuild apt repo
|
|
# 2020-03-13 add on-prompt notifications
|
|
# 2022-04-01 22:33
|
|
# Usage:
|
|
# in a cron job: /etc/cron.d/mirror.cron
|
|
# 50 12 * * * root /etc/installed/obsmirror.sh 1>/dev/null 2>&1
|
|
# Reference:
|
|
# https://software.opensuse.org//download.html?project=home%3Abgstack15&package=freefilesync
|
|
# /mnt/public/www/internal/repo/devuan-deb/update-devuan-deb.sh
|
|
# https://medium.com/sqooba/create-your-own-custom-and-authenticated-apt-repository-1e4a4cf0b864
|
|
# https://unix.stackexchange.com/questions/113898/how-to-merge-two-files-based-on-the-matching-of-two-columns/113903#113903
|
|
# sed|sed to get line numbers printed https://stackoverflow.com/questions/52882594/insert-line-numbers-into-file-with-sed/52884598#52884598
|
|
# Improve:
|
|
# Documentation:
|
|
# Download the release key and trust it.
|
|
# curl -s http://repo.example.com/mirror/obs/Release.key | apt-key add -
|
|
# Use a sources.list.d/ file with contents:
|
|
# deb https://repo.example.com/mirror/obs/ /
|
|
# Dependencies:
|
|
# binaries: wget sed awk
|
|
# user: obsmirror
|
|
umask 0002
|
|
exec 8>&0
|
|
|
|
test -n "${OBSMIRROR_CONF}" && . "${OBSMIRROR_CONF}"
|
|
test -z "${logfile}" && logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log"
|
|
test -z "${inurl}" && inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable"
|
|
test -z "${workdir}" && workdir=/tmp/obs
|
|
test -z "${thisuser}" && thisuser=obsmirror
|
|
test -z "${tempdir}" && tempdir="$( TMPDIR="${TMPDIR:-/tmp}" mktemp -d )"
|
|
# also use include_sources resign_repo gpg_passfile gpg_keyfile DEBUG
|
|
|
|
exec 3>&1
|
|
show() {
|
|
printf "%s" "${*}" 1>&3
|
|
}
|
|
|
|
reset_show() {
|
|
printf "\r%s" "${*}" 1>&3
|
|
}
|
|
|
|
get_file() {
|
|
# call: get_file "${tu}" "${md5sum}"
|
|
___tu="${1}"
|
|
___sum1="${2}" # nominally from the locally-generated Packages from previous run
|
|
___sum2="${3}" # from current set
|
|
tn="${___tu##${inurl}}"
|
|
tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
|
|
td="$( dirname "${tf}" )"
|
|
test -d "${td}" || mkdir -p "${td}"
|
|
gotten="skipped "
|
|
if test -z "${DRYRUN}" ;
|
|
then
|
|
# determine if file is good enough
|
|
___matches_either=0
|
|
___md5sum_file="$( md5sum "${tf}" 2>/dev/null | awk '{print $1}' )"
|
|
# sum1 check is disabled. If upstream obs rebuilds a package, we would never download the new package of the exact same name+version! So all my fancy sum1 logic is useless.
|
|
#test "${___md5sum_file}" = "${___sum1}" && ___matches_either=1
|
|
test "${___md5sum_file}" = "${___sum2}" && ___matches_either=1
|
|
#test -n "${VERBOSE}" && printf "%s matches:%s\n" "${tn}" "${___matches_either}"
|
|
if test -z "${___sum2}" || test "${___matches_either}" = "0" ;
|
|
then
|
|
# so the checksum is empty, or the given checksum does not match the existing downloaded file
|
|
# use the Link headers because provo-mirror sucks and presents a lot of 404s.
|
|
# if Links header does not exist then this list will be short.
|
|
___links="$( printf '%s\n%s\n' "${___tu}" "$( curl --head "${___tu}" --silent | awk '/Link:/ && !/type=/{print $2}' | tr -d '<>;' )" | sed -e '/^\s*$/d' )"
|
|
test -n "${VERBOSE}" && show "retrieving ${___tu}" 2>/dev/null || :
|
|
___valid=0
|
|
___x=0
|
|
while test ${___valid} -eq 0 && test ${___x} -lt $( echo "${___links}" | wc -l ) ;
|
|
do
|
|
___x=$((___x+1))
|
|
___tl="$( echo "${___links}" | sed -n "${___x}p" 2>/dev/null )"
|
|
test -n "${___tl}" && wget --content-disposition --no-verbose --quiet --output-document "${tf}" "${___tl}" && ___valid=1
|
|
grep -qiE '404 Not Found' "${tf}" 2>/dev/null && ___valid=0
|
|
! test -s "${tf}" && ___valid=0
|
|
___matches_either=0
|
|
___md5sum_file="$( mdtsum "${tf}" 2>/dev/null | awk '{print $1}' )"
|
|
#test "${___md5sum_file}" = "${___sum1}" && ___matches_either=1
|
|
test "${___md5sum_file}" = "${___sum2}" && ___matches_either=1
|
|
if test "${___matches_either}" = "1" ;
|
|
then
|
|
___valid=1
|
|
fi
|
|
done
|
|
test ${___valid:-0} -eq 1 && gotten="DOWNLOADED"
|
|
test ${___valid:-0} -eq 0 && gotten='x FAILED '
|
|
fi
|
|
fi
|
|
test -n "${VERBOSE}" && reset_show 2>/dev/null || :
|
|
test -n "${VERBOSE}" && echo "${gotten} ${___tu} -> ${tf}"
|
|
#echo "PAUSED: " ; read -u8 foo
|
|
}
|
|
|
|
wget_verbose=--quiet
|
|
test -n "${VERBOSE}" && unset wget_verbose
|
|
{
|
|
test "${DEBUG:-NONE}" = "FULL" && set -x
|
|
echo "logfile=${logfile}"
|
|
|
|
# These files define an apt repo
|
|
# archive the Packages file, which might be generated locally by rebuild-apt-repo.sh the previous time, and might have more useful md5sums of the packages
|
|
/bin/cp -pf "${workdir}/Packages" "${tempdir}/Packages.$$"
|
|
for word in InRelease Packages Packages.gz Release Release.gpg Release.key Sources Sources.gz ;
|
|
do
|
|
get_file "${inurl}/${word}"
|
|
done
|
|
|
|
# loop through named packages and download them
|
|
# extract these 2 fields, from both old and new Packages files. Combine them into one line. Sed|sed adds the line numbers so we can put the lines back in original order just because, and then sort.
|
|
awk '/Filename:|MD5/{print $2}' "${tempdir}/Packages.$$" | xargs -n2 | sed '=' | sed 'N; s/\n/ /' | sort -k2 > "${tempdir}/old_list"
|
|
awk '/Filename:|MD5/{print $2}' "${workdir}/Packages" | xargs -n2 | sed '=' | sed 'N; s/\n/ /' | sort -k2 > "${tempdir}/new_list"
|
|
#awk '/Filename:|MD5/{print $2}' "${workdir}/Packages" | xargs -n2 | while read word sum
|
|
# The sort above was so join will work. Print these columns, sort by original line number, and then remove the lineno column with awk. Then split into these var names and process.
|
|
join -j 2 -a 2 -o 2.1,2.2,1.3,2.3 "${tempdir}/old_list" "${tempdir}/new_list" | sort -k1 | awk '{$1="";print}' | while read word sum_old sum_new
|
|
do
|
|
get_file "$( echo "${word}" | sed -r -e "s@^\.@${inurl}@;" )" "${sum_old}" "${sum_new}"
|
|
#echo "a=${a} b=${b}"
|
|
done
|
|
|
|
# loop through dsc, orig.tar.gz, and debian.tar.xz files
|
|
test -n "${include_sources}" && {
|
|
for word in $( sed -n -r -e '/Files:/,/^\s*$/{/^ /p;}' ${workdir}/Sources | awk '{print $NF}' ) ;
|
|
do
|
|
get_file "${inurl}/${word}"
|
|
done
|
|
}
|
|
|
|
test -n "${resign_repo}" && . /etc/installed/rebuild-apt-repo.sh
|
|
|
|
chown -R "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
|
|
} 2>&1 | tee -a "${logfile}"
|
|
|
|
test -z "${NO_CLEAN}" && rm -rf "${tempdir:-NOTHINGTODEL}"
|
Comments