1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
#!/bin/sh
# File: yummirror.sh
# Location: https://gitlab.com/bgstack15/coprmirror
# Author: bgstack15
# Startdate: 2021-08-16 09:57
# SPDX-License-Identifier: GPL-3.0
# Title: Script that copies a single copr yum repo
# Project: coprmirror
# Purpose: mirror copr locally, given baseurl; part of a larger set
# History:
# Usage:
# Called by a larger script that catches the entire copr. This file only does a single yum repo for a single architecture and OS release. See coprmirror.sh
# References:
# https://unix.stackexchange.com/questions/19701/yum-how-can-i-view-variables-like-releasever-basearch-yum0
# https://gitlab.com/bgstack15/former-gists/-/blob/master/obsmirror.sh/obsmirror.sh
# Improve:
# resign_repo is not implemented yet.
# Dependencies:
# jq, yum-utils, awk, sed, grep
umask 0002
test -n "${YUMMIRROR_CONF}" && . "${YUMMIRROR_CONF}"
test -z "${logfile}" && logfile=./yummirror.$( date "+%FT%H%M%S" ).log
test -z "${inurl}" && inurl='https://copr-be.cloud.fedoraproject.org/results/bgstack15/stackrpms/epel-7-$basearch/'
test -z "${workdir}" && workdir=/tmp/copr
test -z "${thisuser}" && thisuser=${USER}
# also use include_sources resign_repo DEBUG VERBOSE
exec 3>&1
show() {
printf "%s" "${*}" 1>&3
}
reset_show() {
printf "\r%s" "${*}" 1>&3
}
## Functions
get_file() {
# call: get_file "${tu}" "${md5sum}" "absolute"
___tu="${1}"
___sum="${2}"
___abs="${3}"
if test "${___abs}" = "absolute" ; then
tn="$( basename "${___tu}" )"
else
tn="${___tu##${inurl}}"
fi
tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
td="$( dirname "${tf}" )"
test -d "${td}" || mkdir -p "${td}"
gotten="skipped "
#printf '\n%s\n' "inside get_file ${@}, DRYRUN=${DRYRUN}"
if test -z "${DRYRUN}" || test "${DRYRUN}" = "metadata";
then
if test -z "${___sum}" || test "$( sha256sum "${tf}" 2>/dev/null | awk '{print $1}' )" != "${___sum}" ;
then
test -n "${VERBOSE}" && show "retrieving ${___tu}" 2>/dev/null || :
wget --content-disposition --no-verbose --quiet -O "${tf}" "${___tu}" && gotten=DOWNLOADED
fi
fi
test -n "${VERBOSE}" && reset_show 2>/dev/null || :
echo "${gotten} ${___tu} -> ${tf}"
}
## MAIN
# Interpret any yum vars in the inurl
if echo "${inurl}" | grep -qE '\$' ;
then
echo "" | jq 1>/dev/null 2>&1 || { echo "Need jq to interpret yum vars in baseurl ${inurl}. Aborted." ; exit 1 ; }
raw="$( python -c 'import yum, json; yb = yum.YumBase(); print json.dumps(yb.conf.yumvar, indent=2)' )"
# validated on centos 7
basearch="$( echo "${raw}" | sed -n '1!p' | jq '.basearch' | tr -d '"' )"
releasever="$( echo "${raw}" | sed -n '1!p' | jq '.releasever' | tr -d '"' )"
#echo "${inurl}" | awk -v "basearch=${basearch}" -v "releasever=${releasever}" '{gsub("\$basearch",basearch,$0);gsub("\$releasever",releasever,$0);print}'
newurl="$( echo "${inurl}" | sed -r -e "s/\\\$basearch/${basearch}/g;" -e "s/\\\$releasever/${releasever}/g;" )"
test -n "${DEBUG}" && {
echo "Interpreting ${inurl} -> ${newurl}"
} 1>&2
inurl="${newurl}"
fi
# sync to workdir
wget_verbose=--quiet
test -n "${VERBOSE}" && unset wget_verbose
{
test "${DEBUG:-NONE}" = "FULL" && set -x
echo "logfile=${logfile}"
mkdir -p "${workdir}" ; cd "${workdir}"
# This file lists the files that define a yum repo
for word in repodata/repomd.xml ;
do
# coprmirror.sh will pass DRYRUN=metadata if it has anything set in dryrun, and we interpret "metadata" here, because we absolutely need the repomd.xml and associated files no matter what, even for a dry run.
DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}"
done
# loop through the important files listed in that file.
# read sha256sum, so we don't have to download these if we already have them.
metadata_list="$( grep -iE '<checksum |href' repodata/repomd.xml | awk -F'"' '/checksum/{print $3} /href/{print $2}' | awk "/^>/{gsub(\"^>\",\"\",\$0);gsub(\"<.*$\",\"\",\$0);a=\$0;} /repodata/{print a,\$0}" )"
echo "${metadata_list}" | while read sum word ;
do
DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}" "${sum}" 3>/dev/null
done
# go ahead and fetch the gpgkey if resign_repo=no
if echo "${resign_repo}" | grep -qiE 'yes|\<y\>|1|true' ;
then
# do something
echo "Resigning functionality not built yet. Skipping..."
else
if test -n "${gpgkey}" ;
then
# fetch the gpgkey
# note: yum variable parsing not yet implemented here. I would need to turn the variable parsing into a function and call it here.
# cannot use get_file because it uses a relative path evaluation
#wget --output-file="${workdir}/pubkey.gpg" "${gpgkey}"
get_file "${gpgkey}" "" "absolute"
fi
fi
# COPR always provides a primary.xml.gz file, which lists the assets to download
full_list="$( zgrep -iE 'checksum|href' $( grep -oiE 'href=.*primary.xml.gz.*$' repodata/repomd.xml 2>/dev/null | awk -F'"' '{print $2}' ; echo "none.XXXXXXX" ) 2>/dev/null | awk "/checksum/{gsub(\".*\\\">\",\"\",\$0);gsub(\"</.*\",\"\",\$0);a=\$0;} /href/{gsub(\".*=\\\"\",\"\",\$0);gsub(\"\\\"/.*\",\"\",\$0);print a,\$0;}" )"
#echo "FULL_LIST=${full_list}"
# protect against the "metadata only" run
test "${DRYRUN}" = "metadata" && DRYRUN=1
echo "${full_list}" | while read sum word ;
do
if echo "${word}" | grep -qiE "\.src\.rpm" ;
then
# if a srpm, only get it if user has set include_sources.
if echo "${include_sources}" | grep -qiE 'yes|\<y\>|1|true' ;
then
get_file "${inurl}/${word}" "${sum}"
fi
else
# always get all regular rpms and other non-.src.rpm files if any
get_file "${inurl}/${word}" "${sum}"
fi
done
chown -R "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
} 2>&1 | tee -a "${logfile}"
|