aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/download.sh
blob: a87ab730868a1d3824516a1633ea4503651e206e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
function usage() {
  echo
  echo "Usage: $(basename $0) <collection name>"
  echo
  echo "Copies nltk data to proper locations from local copy of repository."
  echo "Assumes script is in repo tools directory."
  echo
  echo "Clone the repo:"
  printf '\t%s\n' 'git clone git@github.com:<owner>/ipm-server.git'
  echo
  echo "Now switch branches to the one with the data on it (and this script):"
  printf '\t%s\n' 'git branch gh-pages remotes/origin/gh-pages'
  printf '\t%s\n' 'git checkout gh-pages'
  echo
  echo "Remember to use sudo if installing to /usr/share (default)"
  echo
  echo set NLTK_DATA_DIR to target directory if different than /usr/share, e.g.:
  printf '\t%s %s\n' 'NLTK_DATA_DIR=./local/dir' "$(basename $0) book"
  echo
}

[ $# -eq 0 ] && { usage; exit 1; }

collection=$1
data_dir=${NLTK_DATA_DIR:-/usr/share/nltk_data}
script_dir="$( cd "$( dirname "$0" )" && pwd )"
repo_dir=$(readlink -f "$script_dir/..")
package_dir=$repo_dir/packages
collections_dir=$repo_dir/collections

mkdir -p $data_dir
pushd $data_dir

python -c "import xml.etree.ElementTree as e
for item in e.parse('$collections_dir/$collection.xml').getroot().findall('item'): 
  print item.get('ref')" |
while read item 
do
  package=$(find $package_dir -name $item.zip -print)
  target_dir=$(basename $(dirname $package))
  target_file=$target_dir/$item.zip 
  mkdir -p $target_dir
  cp $package $target_file 
  unzip -u -d $target_dir $target_file
done

popd