blob: a87ab730868a1d3824516a1633ea4503651e206e (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
#!/bin/bash
function usage() {
echo
echo "Usage: $(basename $0) <collection name>"
echo
echo "Copies nltk data to proper locations from local copy of repository."
echo "Assumes script is in repo tools directory."
echo
echo "Clone the repo:"
printf '\t%s\n' 'git clone git@github.com:<owner>/ipm-server.git'
echo
echo "Now switch branches to the one with the data on it (and this script):"
printf '\t%s\n' 'git branch gh-pages remotes/origin/gh-pages'
printf '\t%s\n' 'git checkout gh-pages'
echo
echo "Remember to use sudo if installing to /usr/share (default)"
echo
echo set NLTK_DATA_DIR to target directory if different than /usr/share, e.g.:
printf '\t%s %s\n' 'NLTK_DATA_DIR=./local/dir' "$(basename $0) book"
echo
}
[ $# -eq 0 ] && { usage; exit 1; }
collection=$1
data_dir=${NLTK_DATA_DIR:-/usr/share/nltk_data}
script_dir="$( cd "$( dirname "$0" )" && pwd )"
repo_dir=$(readlink -f "$script_dir/..")
package_dir=$repo_dir/packages
collections_dir=$repo_dir/collections
mkdir -p $data_dir
pushd $data_dir
python -c "import xml.etree.ElementTree as e
for item in e.parse('$collections_dir/$collection.xml').getroot().findall('item'):
print item.get('ref')" |
while read item
do
package=$(find $package_dir -name $item.zip -print)
target_dir=$(basename $(dirname $package))
target_file=$target_dir/$item.zip
mkdir -p $target_dir
cp $package $target_file
unzip -u -d $target_dir $target_file
done
popd
|