summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--obsmirror.sh/obsmirror.conf.example9
-rwxr-xr-xobsmirror.sh/obsmirror.sh142
-rwxr-xr-xobsmirror.sh/obsmirror2.sh71
3 files changed, 90 insertions, 132 deletions
diff --git a/obsmirror.sh/obsmirror.conf.example b/obsmirror.sh/obsmirror.conf.example
new file mode 100644
index 0000000..9c47ca9
--- /dev/null
+++ b/obsmirror.sh/obsmirror.conf.example
@@ -0,0 +1,9 @@
+# vim: syntax=sh
+logfile="/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log"
+inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable"
+workdir=/var/tmp/obs
+include_sources=
+resign_repo=yes
+gpg_passfile=/root/.gnupg/filename
+gpg_keyfile=/var/www/deb/public.gpg
+thisuser=obsmirror
diff --git a/obsmirror.sh/obsmirror.sh b/obsmirror.sh/obsmirror.sh
index 8cafb2a..2b6522c 100755
--- a/obsmirror.sh/obsmirror.sh
+++ b/obsmirror.sh/obsmirror.sh
@@ -1,17 +1,22 @@
#!/bin/sh
# File: /etc/installed/obsmirror.sh
-# License: CC-BY-SA 4.0
+# Location: https://gitlab.com/bgstack15/former-gists/tree/master/obsmirror.sh
# Author: bgstack15
-# Startdate: 2020-01-05 18:01
+# Startdate: 2020-03-03 08:43
+# SPDX-License-Identifier: CC-BY-SA-4.0
# Title: Script that scrapes down OBS site to serve a copy to intranet
# Purpose: save down my OBS site so I can serve it locally
# History:
+# 2020-01-05 v1: begin which used httrack
+# 2020-02-28 v2: complete rewrite to exclude httrack
+# 2020-03-03 v3: complete rewrite to get explicit files and loop through their contents, and rebuild apt repo
# Usage:
# in a cron job: /etc/cron.d/mirror.cron
# 50 12 * * * root /etc/installed/obsmirror.sh 1>/dev/null 2>&1
# Reference:
-# https://unix.stackexchange.com/questions/114044/how-to-make-wget-download-recursive-combining-accept-with-exclude-directorie?rq=1
# https://software.opensuse.org//download.html?project=home%3Abgstack15&package=freefilesync
+# /mnt/public/www/smith122/repo/devuan-deb/update-devuan-deb.sh
+# https://medium.com/sqooba/create-your-own-custom-and-authenticated-apt-repository-1e4a4cf0b864
# Improve:
# Documentation:
# Download the release key and trust it.
@@ -19,76 +24,91 @@
# Use a sources.list.d/ file with contents:
# deb https://repo.example.com/mirror/obs/ /
# Dependencies:
-# binaries: curl wget grep sed awk chmod chown rm
+# binaries: wget sed awk
# user: obsmirror
-
-parse_obs_dl_page() {
- # simply wget the ${inurl} and play around with this master string. Goal is to remove all links that are not dpkg, gzip, repo files, or subdirs.
- grep -oE 'href="[^"]+">' | awk '!x[$0]++' | sed -r -e 's/^href="//;' -e 's/">$//;' | grep -viE 'https?:\/\/[A-Za-z0-9\.]+\.[A-Za-z]+|mirrorlist|orig.*z$|^\/(debug|distribution|factory|ports|repositories|source|tumbleweed|update)\/$|^\?[A-Z]=[A-Z]|^\/|\.dsc$'
-}
-
-parse_obs_page_and_subdirs() {
- # call: curl -s -L "${inurl}" | parse_obs_page_and_subdirs "${inurl}" "${tmpfile}"
- # return to stdout: all the wanted files from this page and its associated subdirs
- ___input="$( parse_obs_dl_page )"
- ___inurl="${1}"
- ___tmpfile="${2}"
- {
- echo "${___input}" | grep -vE '\/$' | sed -r -e "s@^@${___inurl}\/@"
- # iterate over all listed subdirs parse out their files
- for subdir in $( echo "${___input}" | grep -E "\/$" ) ;
- do
- #echo "${___inurl}/${subdir}"
- curl -s -L "${___inurl}/${subdir}" | parse_obs_dl_page | sed -r -e "s@^@${___inurl}/${subdir}@"
- done
- } > "${___tmpfile}"
-}
+umask 0002
test -n "${OBSMIRROR_CONF}" && . "${OBSMIRROR_CONF}"
test -z "${logfile}" && logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log"
-test -z "${tmpfile}" && tmpfile="$( mktemp )"
test -z "${inurl}" && inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable"
test -z "${workdir}" && workdir=/tmp/obs
-test -z "${outdir}" && outdir=/tmp/var/www/mirror/obs
test -z "${thisuser}" && thisuser=obsmirror
+# also use include_sources resign_repo gpg_passfile gpg_keyfile DEBUG
+
+get_file() {
+ # call: get_file "${tu}" "${md5sum}"
+ ___tu="${1}"
+ ___sum="${2}"
+ tn="${___tu##${inurl}}"
+ tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
+ td="$( dirname "${tf}" )"
+ test -d "${td}" || mkdir -p "${td}"
+ gotten="skipped "
+ if test -z "${DRYRUN}" ;
+ then
+ if test -z "${___sum}" || test "$( md5sum "${tf}" 2>/dev/null | awk '{print $1}' )" != "${___sum}" ;
+ then
+ wget --content-disposition --no-verbose --quiet -O "${tf}" "${___tu}" && gotten=DOWNLOADED
+ fi
+ fi
+ test -n "${VERBOSE}" && echo "${gotten} ${___tu} -> ${tf}"
+}
+wget_verbose=--quiet
+test -n "${VERBOSE}" && unset wget_verbose
{
test "${DEBUG:-NONE}" = "FULL" && set -x
- echo "logfile=${logfile}"
-
- mkdir -p "${workdir}" ; chmod "0755" "${workdir}" ; chown "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
- cd "${workdir}"
- test "${use_top_result}" = "yes" && {
- # get mirrorlist of Packages.gz file and find the one that lists the most packages
- step1="$( curl -s -L "${inurl}/Packages.gz.mirrorlist" )"
- options="$( echo "${step1}" | grep -oE 'href="[^"]+">' | awk '!x[$0]++' | sed -r -e 's/^href="//;' -e 's/">$//;' | grep -iE '^(ht|f)tps?:\/\/.*Packages\.gz$' )"
- echo "${options}" 1>&2
- results="$(
- for entry in ${options} ; do
- # use package count
- #curl -s -L "${entry}" | zgrep -cE '^Package:' | sed -r -e "s@\$@ ${entry}@;"
- # use last modified timestamp of Packages.gz file
- wget --content-disposition --quiet "${entry}" -O tmpfile.$$ ; find tmpfile.$$ -printf "%T@ ${entry}\n"
- done ; rm tmpfile.$$ )"
- topresult_line="$( echo "${results}" | sort -nr | head -n1 | sed -r -e 's/\/Packages\.gz$//;' )"
- topresult_packagecount="$( echo "${topresult_line}" | awk '{print $1}' )"
- topresult="$( echo "${topresult_line}" | awk '{print $2}' )"
- echo "USING ${topresult} with ${topresult_packagecount} packages" 1>&2
- inurl="${topresult}"
- }
+ echo "logfile=${logfile}"
- step1="$( curl -s -L "${inurl}" )"
- echo "${step1}" | parse_obs_page_and_subdirs "${inurl}" "${tmpfile}"
- # loop over all entries and download them
- for thisurl in $( cat "${tmpfile}" ) ;
+ # These files define an apt repo
+ for word in InRelease Packages Packages.gz Release Release.gpg Release.key Sources Sources.gz ;
do
- thisfile="$( echo "${thisurl}" | sed -r -e "s@${inurl}@${workdir}@" -e 's/%2B/+/g;' )"
- thisdir="$( dirname "${thisfile}" )"
- test -d "${thisdir}" || mkdir -p "${thisdir}"
- test -n "${VERBOSE}" && echo "FROM ${thisurl} TO ${thisfile}"
- test -z "${DRYRUN}" && wget --no-verbose -O "${thisfile}" "${thisurl}" &
+ get_file "${inurl}/${word}"
done
-} 2>&1 | tee -a "${logfile}"
+ # loop through named packages and download them
+ #for word in $( awk '/Filename:/{print $2}' "${workdir}/Packages" ) ;
+ awk '/Filename:|MD5/{print $2}' "${workdir}/Packages" | xargs -n2 | while read word sum
+ do
+ get_file "$( echo "${word}" | sed -r -e "s@^\.@${inurl}@;" )" "${sum}"
+ #echo "a=${a} b=${b}"
+ done
-rm "${tmpfile:-NOTHINGTODEL}"
+ # loop through dsc, orig.tar.gz, and debian.tar.xz files
+ test -n "${include_sources}" && {
+ for word in $( sed -n -r -e '/Files:/,/^\s*$/{/^ /p;}' ${workdir}/Sources | awk '{print $NF}' ) ;
+ do
+ get_file "${inurl}/${word}"
+ done
+ }
+
+ test -n "${resign_repo}" && {
+ # rebuild release files
+ repodir="${workdir}"
+ cd "${repodir}"
+ dpkg-scanpackages -m . > Packages
+ gzip -9c < Packages > Packages.gz
+ # create the Release file
+ PKGS="$(wc -c Packages)"
+ PKGS_GZ="$(wc -c Packages.gz)"
+ cat <<EOF > Release
+Architectures: all
+Date: $(date -u '+%a, %d %b %Y %T %Z')
+MD5Sum:
+ $(md5sum Packages | cut -d" " -f1) $PKGS
+ $(md5sum Packages.gz | cut -d" " -f1) $PKGS_GZ
+SHA1:
+ $(sha1sum Packages | cut -d" " -f1) $PKGS
+ $(sha1sum Packages.gz | cut -d" " -f1) $PKGS_GZ
+SHA256:
+ $(sha256sum Packages | cut -d" " -f1) $PKGS
+ $(sha256sum Packages.gz | cut -d" " -f1) $PKGS_GZ
+EOF
+ test -e "${gpg_passfile}" && gpg --batch --yes --passphrase-file "${gpg_passfile}" -abs -o Release.gpg Release
+ test -e "${gpg_passfile}" && gpg --batch --yes --passphrase-file "${gpg_passfile}" --clearsign -o InRelease Release
+ # and because we are resigning it, replace Release.key with the one we used
+ test -e "${gpg_keyfile}" && cp -p "${gpg_keyfile}" Release.key
+ }
+
+ chown -R "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
+} 2>&1 | tee -a "${logfile}"
diff --git a/obsmirror.sh/obsmirror2.sh b/obsmirror.sh/obsmirror2.sh
deleted file mode 100755
index 71284dd..0000000
--- a/obsmirror.sh/obsmirror2.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/sh
-# File: /etc/installed/obsmirror.sh
-# Author: bgstack15
-# SPDX-License-Identifier: CC-BY-SA-4.0
-# Startdate: 2020-03-03 08:43
-# Title: Script that scrapes down OBS site to serve a copy to intranet
-# Purpose: save down my OBS site so I can serve it locally
-# History:
-# 2020-01-05 v1: begin which used httrack
-# 2020-02-28 v2: complete rewrite to exclude httrack
-# 2020-03-03 v3: complete rewrite to get explicit files and loop through their contents
-# Usage:
-# in a cron job: /etc/cron.d/mirror.cron
-# 50 12 * * * root /etc/installed/obsmirror.sh 1>/dev/null 2>&1
-# Reference:
-# https://software.opensuse.org//download.html?project=home%3Abgstack15&package=freefilesync
-# Improve:
-# Documentation:
-# Download the release key and trust it.
-# curl -s http://repo.example.com/mirror/obs/Release.key | apt-key add -
-# Use a sources.list.d/ file with contents:
-# deb https://repo.example.com/mirror/obs/ /
-# Dependencies:
-# binaries: wget sed awk
-# user: obsmirror
-umask 0002
-
-test -n "${OBSMIRROR_CONF}" && . "${OBSMIRROR_CONF}"
-test -z "${logfile}" && logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log"
-test -z "${inurl}" && inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable"
-test -z "${workdir}" && workdir=/tmp/obs
-# also use include_sources DEBUG
-
-get_file() {
- # call: get_file "${tu}" "${md5sum}"
- ___tu="${1}"
- tn="$( basename "${___tu}" )"
- tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
- td="$( dirname "${tf}" )"
- test -d "${td}" || mkdir -p "${td}"
- test -n "${DRYRUN}" && test -n "${VERBOSE}" && echo "${___tu} -> ${tf}"
- test -z "${DRYRUN}" && wget --content-disposition --no-verbose ${wget_verbose} -O "${tf}" "${___tu}"
-}
-
-wget_verbose=--quiet
-test -n "${VERBOSE}" && unset wget_verbose
-{
- test "${DEBUG:-NONE}" = "FULL" && set -x
- echo "logfile=${logfile}"
-
- # These files define an apt repo
- for word in InRelease Packages Packages.gz Release Release.gpg Release.key Sources Sources.gz ;
- do
- get_file "${inurl}/${word}"
- done
-
- # loop through named packages and download them
- for word in $( awk '/Filename:/{print $2}' "${workdir}/Packages" ) ;
- do
- get_file "$( echo "${word}" | sed -r -e "s@^\.@${inurl}@;" )"
- done
-
- # loop through dsc, orig.tar.gz, and debian.tar.xz files
- test -n "${include_sources}" && {
- for word in $( sed -n -r -e '/Files:/,/^\s*$/{/^ /p;}' ${workdir}/Sources | awk '{print $NF}' ) ;
- do
- get_file "${inurl}/${word}"
- done
- }
-
-} 2>&1 | tee -a "${logfile}"
bgstack15