Previously, I wrote about how to mirror an Open Build Service apt
repository. The original script used httrack and some crazy logic. Due to the
unsatisfactory performance of the first version, I rewrote the entire script,
twice! I am much more satisfied with my v3 of the script, which is hosting in
the same place as before. So this rewrite is
in some ways way simpler than it was before. Now, the script has a dedicated
function for downloading a file, and it only does so if there is no checksum
passed, or if the checksum of the local file is not the one passed to the
function. How an apt repo works is it has checksums of all of its files in
various metadata files, which are the only explicitly named files we download
first. We loop through the filenames in the Packages file, and pass those
filenames and checksums to the getter function. If the local file has the same
checksum, we skip that file. A flag exists for choosing to download the dpkg
sources as well. And then, for some reason I cannot quite explain, a few files
of mine never downloaded and matched the checksums in the Package file. The
deb files install, so they're valid, and I trust my own repo. So I decided to
just re-sign all the apt repo files. That is, I rebuild the apt repo entirely
and then sign it with my own gpg key for my internal network. And I finally
split out the config into a config file so I can provide an example, and not
store my actual data on the Internet!
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 |
#!/bin/sh
# File: /etc/installed/obsmirror.sh
# Location: https://gitlab.com/bgstack15/former-gists/tree/master/obsmirror.sh
# Author: bgstack15
# Startdate: 2020-03-03 08:43
# SPDX-License-Identifier: CC-BY-SA-4.0
# Title: Script that scrapes down OBS site to serve a copy to intranet
# Purpose: save down my OBS site so I can serve it locally
# History:
# 2020-01-05 v1: begin which used httrack
# 2020-02-28 v2: complete rewrite to exclude httrack
# 2020-03-03 v3: complete rewrite to get explicit files and loop through their contents, and rebuild apt repo
# Usage:
# in a cron job: /etc/cron.d/mirror.cron
# 50 12 * * * root OBSMIRROR_CONF=/etc/installed/obsmirror.conf /etc/installed/obsmirror.sh 1>/dev/null 2>&1
# Reference:
# https://software.opensuse.org//download.html?project=home%3Abgstack15&package=freefilesync
# /mnt/public/www/repo/devuan-deb/update-devuan-deb.sh
# https://medium.com/sqooba/create-your-own-custom-and-authenticated-apt-repository-1e4a4cf0b864
# Improve:
# Documentation:
# Download the release key and trust it.
# curl -s http://repo.example.com/mirror/obs/Release.key | apt-key add -
# Use a sources.list.d/ file with contents:
# deb https://repo.example.com/mirror/obs/ /
# Dependencies:
# binaries: wget sed awk
# user: obsmirror
umask 0002
test -n "${OBSMIRROR_CONF}" && . "${OBSMIRROR_CONF}"
test -z "${logfile}" && logfile="/tmp/var/log/obsmirror/obsmirror.$( date "+%FT%H%M%S" ).log"
test -z "${inurl}" && inurl="http://download.opensuse.org/repositories/home:/bgstack15/Debian_Unstable"
test -z "${workdir}" && workdir=/tmp/obs
test -z "${thisuser}" && thisuser=obsmirror
# also use include_sources resign_repo gpg_passfile gpg_keyfile DEBUG
get_file() {
# call: get_file "${tu}" "${md5sum}"
___tu="${1}"
___sum="${2}"
tn="${___tu##${inurl}}"
tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
td="$( dirname "${tf}" )"
test -d "${td}" || mkdir -p "${td}"
gotten="skipped "
if test -z "${DRYRUN}" ;
then
if test -z "${___sum}" || test "$( md5sum "${tf}" 2>/dev/null | awk '{print $1}' )" != "${___sum}" ;
then
wget --content-disposition --no-verbose --quiet -O "${tf}" "${___tu}" && gotten=DOWNLOADED
fi
fi
test -n "${VERBOSE}" && echo "${gotten} ${___tu} -> ${tf}"
}
wget_verbose=--quiet
test -n "${VERBOSE}" && unset wget_verbose
{
test "${DEBUG:-NONE}" = "FULL" && set -x
echo "logfile=${logfile}"
# These files define an apt repo
for word in InRelease Packages Packages.gz Release Release.gpg Release.key Sources Sources.gz ;
do
get_file "${inurl}/${word}"
done
# loop through named packages and download them
#for word in $( awk '/Filename:/{print $2}' "${workdir}/Packages" ) ;
awk '/Filename:|MD5/{print $2}' "${workdir}/Packages" | xargs -n2 | while read word sum
do
get_file "$( echo "${word}" | sed -r -e "s@^\.@${inurl}@;" )" "${sum}"
#echo "a=${a} b=${b}"
done
# loop through dsc, orig.tar.gz, and debian.tar.xz files
test -n "${include_sources}" && {
for word in $( sed -n -r -e '/Files:/,/^\s*$/{/^ /p;}' ${workdir}/Sources | awk '{print $NF}' ) ;
do
get_file "${inurl}/${word}"
done
}
test -n "${resign_repo}" && {
# rebuild release files
repodir="${workdir}"
cd "${repodir}"
dpkg-scanpackages -m . > Packages
gzip -9c < Packages > Packages.gz
# create the Release file
PKGS="$(wc -c Packages)"
PKGS_GZ="$(wc -c Packages.gz)"
old_headers1="$( grep -E '^(Archive|Codename|Origin|Label|Architectures):' Release )"
old_headers2="$( grep -E '^(Description):' Release )"
cat < Release
${old_headers1}
Date: $(date -u '+%a, %d %b %Y %T %Z')
${old_headers2}
MD5Sum:
$(md5sum Packages | cut -d" " -f1) $PKGS
$(md5sum Packages.gz | cut -d" " -f1) $PKGS_GZ
SHA1:
$(sha1sum Packages | cut -d" " -f1) $PKGS
$(sha1sum Packages.gz | cut -d" " -f1) $PKGS_GZ
SHA256:
$(sha256sum Packages | cut -d" " -f1) $PKGS
$(sha256sum Packages.gz | cut -d" " -f1) $PKGS_GZ
EOF
test -e "${gpg_passfile}" && gpg --batch --yes --passphrase-file "${gpg_passfile}" --pinentry-mode loopback -abs -o Release.gpg Release
test -e "${gpg_passfile}" && gpg --batch --yes --passphrase-file "${gpg_passfile}" --pinentry-mode loopback --clearsign -o InRelease Release
# and because we are resigning it, replace Release.key with the one we used
test -e "${gpg_keyfile}" && cp -p "${gpg_keyfile}" Release.key
}
chown -R "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
} 2>&1 | tee -a "${logfile}"
|
Comments