Knowledge Base

Preserving for the future: Shell scripts, AoC, and more

yummirror.sh (Source)

#!/bin/sh
# File: yummirror.sh
# Location: https://gitlab.com/bgstack15/coprmirror
# Author: bgstack15
# Startdate: 2021-08-16 09:57
# SPDX-License-Identifier: GPL-3.0
# Title: Script that copies a single copr yum repo
# Project: coprmirror
# Purpose: mirror copr locally, given baseurl; part of a larger set
# History:
# Usage:
#    Called by a larger script that catches the entire copr. This file only does a single yum repo for a single architecture and OS release. See coprmirror.sh
# References:
#    https://unix.stackexchange.com/questions/19701/yum-how-can-i-view-variables-like-releasever-basearch-yum0
#    https://gitlab.com/bgstack15/former-gists/-/blob/master/obsmirror.sh/obsmirror.sh
# Improve:
#    resign_repo is not implemented yet.
# Dependencies:
#    jq, yum-utils, awk, sed, grep
umask 0002
test -n "${YUMMIRROR_CONF}" && . "${YUMMIRROR_CONF}"
test -z "${logfile}" && logfile=./yummirror.$( date "+%FT%H%M%S" ).log
test -z "${inurl}" && inurl='https://copr-be.cloud.fedoraproject.org/results/bgstack15/stackrpms/epel-7-$basearch/'
test -z "${workdir}" && workdir=/tmp/copr
test -z "${thisuser}" && thisuser=${USER}
# also use include_sources resign_repo DEBUG VERBOSE
exec 3>&1
show() {
    printf "%s" "${*}" 1>&3
}
reset_show() {
    printf "\r%s" "${*}" 1>&3
}
## Functions
get_file() {
   # call: get_file "${tu}" "${md5sum}" "absolute"
   ___tu="${1}"
   ___sum="${2}"
   ___abs="${3}"
   if test "${___abs}" = "absolute" ; then
      tn="$( basename "${___tu}" )"
   else
      tn="${___tu##${inurl}}"
   fi
   tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
   td="$( dirname "${tf}" )"
   test -d "${td}" || mkdir -p "${td}"
   gotten="skipped   "
   #printf '\n%s\n' "inside get_file ${@}, DRYRUN=${DRYRUN}"
   if test -z "${DRYRUN}" || test "${DRYRUN}" = "metadata";
   then
      if test -z "${___sum}" || test "$( sha256sum "${tf}" 2>/dev/null | awk '{print $1}' )" != "${___sum}" ;
      then
         test -n "${VERBOSE}" && show "retrieving ${___tu}" 2>/dev/null || :
         wget --content-disposition --no-verbose --quiet -O "${tf}" "${___tu}" && gotten=DOWNLOADED
      fi
   fi
   test -n "${VERBOSE}" && reset_show 2>/dev/null || :
   echo "${gotten} ${___tu} -> ${tf}"
}
## MAIN
# Interpret any yum vars in the inurl
if echo "${inurl}" | grep -qE '\$' ;
then
   echo "" | jq 1>/dev/null 2>&1 || { echo "Need jq to interpret yum vars in baseurl ${inurl}. Aborted." ; exit 1 ; }
   raw="$( python -c 'import yum, json; yb = yum.YumBase(); print json.dumps(yb.conf.yumvar, indent=2)' )"
   # validated on centos 7
   basearch="$( echo "${raw}" | sed -n '1!p' | jq '.basearch' | tr -d '"' )"
   releasever="$( echo "${raw}" | sed -n '1!p' | jq '.releasever' | tr -d '"' )"
   #echo "${inurl}" | awk -v "basearch=${basearch}" -v "releasever=${releasever}" '{gsub("\$basearch",basearch,$0);gsub("\$releasever",releasever,$0);print}'
   newurl="$( echo "${inurl}" | sed -r -e "s/\\\$basearch/${basearch}/g;" -e "s/\\\$releasever/${releasever}/g;" )"
   test -n "${DEBUG}" && {
      echo "Interpreting ${inurl} -> ${newurl}"
   } 1>&2
   inurl="${newurl}"
fi
# clean up trailing slashes
inurl="${inurl%%/}"
# sync to workdir
wget_verbose=--quiet
test -n "${VERBOSE}" && unset wget_verbose
{
   test "${DEBUG:-NONE}" = "FULL" && set -x
   echo "logfile=${logfile}"
   mkdir -p "${workdir}" ; cd "${workdir}"
   # This file lists the files that define a yum repo
   for word in repodata/repomd.xml ;
   do
      # coprmirror.sh will pass DRYRUN=metadata if it has anything set in dryrun, and we interpret "metadata" here, because we absolutely need the repomd.xml and associated files no matter what, even for a dry run.
      DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}"
   done
   # loop through the important files listed in that file.
   # read sha256sum, so we don't have to download these if we already have them.
   metadata_list="$( grep -iE '<checksum |href' repodata/repomd.xml | awk -F'"' '/checksum/{print $3} /href/{print $2}' | awk "/^>/{gsub(\"^>\",\"\",\$0);gsub(\"<.*$\",\"\",\$0);a=\$0;} /repodata/{print a,\$0}" )"
   echo "${metadata_list}" | while read sum word ;
   do
      DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}" "${sum}" 3>/dev/null
   done
   # go ahead and fetch the gpgkey if resign_repo=no
   if echo "${resign_repo}" | grep -qiE 'yes|\<y\>|1|true' ;
   then
      # do something
      echo "Resigning functionality not built yet. Skipping..."
   else
      if test -n "${gpgkey}" ;
      then
         # fetch the gpgkey
         # note: yum variable parsing not yet implemented here. I would need to turn the variable parsing into a function and call it here.
         # cannot use get_file because it uses a relative path evaluation
         #wget --output-file="${workdir}/pubkey.gpg" "${gpgkey}"
         get_file "${gpgkey}" "" "absolute"
      fi
   fi
   # COPR always provides a primary.xml.gz file, which lists the assets to download
   full_list="$( zgrep -iE 'checksum|href' $( grep -oiE 'href=.*primary.xml.gz.*$' repodata/repomd.xml 2>/dev/null | awk -F'"' '{print $2}' ; echo "none.XXXXXXX" ) 2>/dev/null | awk "/checksum/{gsub(\".*\\\">\",\"\",\$0);gsub(\"</.*\",\"\",\$0);a=\$0;} /href/{gsub(\".*=\\\"\",\"\",\$0);gsub(\"\\\"/.*\",\"\",\$0);print a,\$0;}" )"
   #echo "FULL_LIST=${full_list}"
   # protect against the "metadata only" run
   test "${DRYRUN}" = "metadata" && DRYRUN=1
   echo "${full_list}" | while read sum word ;
   do
      if echo "${word}" | grep -qiE "\.src\.rpm" ;
      then
         # if a srpm, only get it if user has set include_sources.
         if echo "${include_sources}" | grep -qiE 'yes|\<y\>|1|true' ;
         then
            get_file "${inurl}/${word}" "${sum}"
         fi
      else
         # always get all regular rpms and other non-.src.rpm files if any
         get_file "${inurl}/${word}" "${sum}"
      fi
   done
   chown -R "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
} 2>&1 | tee -a "${logfile}"