| TIP: Click on subject to list as thread! | ANSI |
| echo: | |
|---|---|
| to: | |
| from: | |
| date: | |
| subject: | Favs |
23 May 16 14:35, you wrote to me:
ac>> Years ago I wrote a sh script to download a web page, compare it to
ac>> a local copy using diff and email me any changes. The script runs
ac>> twice a day as a cron job.
BR> Cool, dig it up and post!
Here you go...
#!/bin/sh
# webmonitor.sh
#
# Monitor web pages for changes and email the diffs.
WGETOPT="--cache=off --no-check-certificate"
ELINKSOPT="-no-references -no-numbering -dump-width 75"
DIFFOPT="-uw"
BINDIR="/usr/local/bin"
WORKDIR="$HOME/bin/cron/webmonitor"
MAILTO=$USER
# Pretend we're Firefox instead of Wget.
#USERAGENT="Mozilla/5.0 (X11; Linux i686; rv:7.0) Gecko/20100101
Firefox/7.0"
#USERAGENT="Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101
Firefox/10.0"
#USERAGENT="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0)
Gecko/20100101 Firefox/25.0"
USERAGENT="Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101
Firefox/40.0"
# Check that Wget and ELinks are installed.
require_util()
{
if [ ! -f $BINDIR/$1 ]; then
echo "$0: $1 is missing!"
exit 1
fi
}
require_util "wget"
require_util "elinks"
# Get command-line args.
URL=$1
CACHEFILE=$2
# We require two args, otherwise complain.
if [ $# -ne 2 ]; then
echo "Usage: $0 "
exit 1
fi
# Chdir to our working directory, or die trying.
cd "$WORKDIR"
if [ $? -ne 0 ]; then
echo "$0: Unable to change to working directory: $WORKDIR"
exit 1
fi
# Yoink the web page we want.
wget -U "$USERAGENT" $WGETOPT -O $CACHEFILE.new "$URL"
# Bail out if Wget failed. This ensures we don't overwrite our old cache.
if [ $? -ne 0 ]; then
echo "$0: Error retrieving $URL"
exit 1
fi
# If we don't already have a cached copy from before the Wget then this
# is probably our first run, and we don't have much to do except copy
# the new file to the old cache.
if [ ! -f $CACHEFILE.cache ]; then
mv $CACHEFILE.new $CACHEFILE.cache
exit 0
fi
# We have a cached page and a new page, so run both through ELinks.
elinks -dump $ELINKSOPT $CACHEFILE.cache > $CACHEFILE.cache.dump
elinks -dump $ELINKSOPT $CACHEFILE.new > $CACHEFILE.new.dump
# We want the dumpfiles to have the same timestamp as their source files.
# This is useful as diff will show the timestamps of both files in the
# header of the output.
touch -r $CACHEFILE.cache $CACHEFILE.cache.dump
touch -r $CACHEFILE.new $CACHEFILE.new.dump
# Compare and enjoy.
diff $DIFFOPT $CACHEFILE.cache.dump $CACHEFILE.new.dump > $CACHEFILE.dump.diff
# If any changes were detected, send e-mail with the diff as the text.
if [ $? -ne 0 ]; then
cat $CACHEFILE.dump.diff | mail -s "[webmonitor] Changed: $URL" $MAILTO
fi
# Clean up any temp files, and make the newly downloaded web page
# become our cached page.
rm -f $CACHEFILE.cache.dump $CACHEFILE.new.dump $CACHEFILE.dump.diff
mv $CACHEFILE.new $CACHEFILE.cache
--- GoldED+/BSD 1.1.5-b20160201
* Origin: Blizzard of Ozz, Melbourne, Victoria, Australia (3:633/267)SEEN-BY: 633/267 712/848 @PATH: 633/267 |
|
| SOURCE: echomail via fidonet.ozzmosis.com | |
Email questions or comments to sysop@ipingthereforeiam.com
All parts of this website painstakingly hand-crafted in the U.S.A.!
IPTIA BBS/MUD/Terminal/Game Server List, © 2025 IPTIA Consulting™.