#!/bin/sh
#
# Get new PDF reprint files from another lab's web server.
# This is to be run from cron as a special user just for this task,
# allowing files copied from the remote site to be owned by the special user.
# We can then keep track of the origin of PDF files in our collection.
# If you have an annount on the other machine, it would be easier to use
# "rsync" command instead.
#
# For example, user "xxxx_lab" can have a crontab as follows:
# % su xxxx_lab
# Password:
# % crontab -l
# 31 2 * * * /bin/sh /usr/local/bin/getNewFromXxxxLab
#
#------- History:
# 2003-09-17 v1.00: Tomoyuki Naito -- first version for Sato Lab
# 2003-09-23 v1.01: Izumi Ohzawa -- modified for use in my lab.
#

PATH=/usr/local/bin:/usr/bin:/bin

# -----------------------------------------------------------------
# Change parameters in this block to suit your setup.
#
### Configuration for Xxxx Lab  ###
# These are parameters for THEIR PDF file collection: 
theirBaseURL="http://localhost.localuniv.edu/myreprints"
theirPMIDlist="PMID.xxxxlist"
getList="sync_xxxx.list"
logFile="sync_xxxx.log"

# This is where all OUR PDF reprint files are stored:
PMIDdirectory="/documents/myreprints/PMID"

# This is OUR list of PDF files:
myPMIDlist="PMID.filelist"
myPMIDtemp="PMID.temp"
fileCount="xcount.txt"

# This is where other temporary work files are created:
workDir="/usr/local/www/data/myreprints"

# -----------------------------------------------------------------

# cd to SAMBA shared PMID directory and create file listing
# (Must break up ls command for old ls for count >5000 or so. sigh.)
cd $PMIDdirectory
/bin/ls -1 1*.pdf > $workDir/$myPMIDlist
/bin/ls -1 [23456789]*.pdf >> $workDir/$myPMIDlist

# cd to WWW work directory
cd $workDir

# Delete old list obtained from their site the last time around
/bin/rm -f $theirPMIDlist

# Get current PDF file listing from their lab.
# We assume they are updating this file periodically.
# echo "wget $theirBaseURL/$theirPMIDlist"
wget -nv $theirBaseURL/$theirPMIDlist

# Taking a diff, produce a file containing filenames we need to copy (add) from their server.
diff $myPMIDlist $theirPMIDlist | grep '>' | cut -f2 -d " " > $getList

# Testing
# cat $getList

# cd to SAMBA shared PMID directory
cd $PMIDdirectory

/bin/rm -f 11*.1
/bin/rm -f 12*.1
/bin/rm -f 13*.1
/bin/rm -f 1*.1
/bin/rm -f *.1
/bin/rm -f 11*.2
/bin/rm -f 12*.2
/bin/rm -f 13*.2
/bin/rm -f 1*.2
/bin/rm -f *.2

#  Get PDF files we don't have using wget.
# -i option uses a file to specify files to get
# echo "wget --base=$theirBaseURL/PMID/ -i $workDir/$getList"
wget -nv --base=$theirBaseURL/PMID/ -i $workDir/$getList


# Update our PDF file list and keep a log
# /bin/ls -1 *.pdf > $workDir/$myPMIDlist
/bin/ls -1 1*.pdf > $workDir/$myPMIDlist
/bin/ls -1 [23456789]*.pdf >> $workDir/$myPMIDlist

wc -l $workDir/$myPMIDlist | cut -f1 -d "/" > $workDir/$fileCount
/bin/date >> $workDir/$logFile

# done

