#!/bin/sh

# Public domain notice for all NCBI EDirect scripts is located at:
# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice

if [ "$#" -eq 0 ]
then
  echo "Must supply path for archive files"
  exit 1
fi

target="$1"
flag="strict"

if [ "$#" -gt 1 ]
then
  flag="$1"
  target="$2"
fi

osname=$( uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/' )
if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
then
  target=`cygpath -w "$target"`
fi

target=${target%/}

rm -f "versioned.xml.gz"
rm -f "versioned.snt"

if [ ! -f "$target/versioned.uid" ]
then
  exit
fi

total=.REFRESH_TOTAL
local=.REFRESH_LOCAL

cat "$target/versioned.uid" | sort -n | uniq > $total

cat "$total" | fetch-pubmed -strict "$target" |
xtract -pattern PubmedArticle -element MedlineCitation/PMID |
sort | uniq > $local

missing=$(comm -23 "$total" "$local")

rm "$total"
rm "$local"

if [ -n "$missing" ]
then
  echo "$missing" |
  epost -db pubmed -format uid |
  efetch -format xml |
  xtract -flag "$flag" -compress -format flush |
  rchive -gzip -flag "$flag" -archive "$target" \
    -index MedlineCitation/PMID^Version -pattern PubmedArticle
fi
