7 files changed, 291 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..b11367f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,12 @@
+PYTHON = python3
+BASEURL = https://raw.githubusercontent.com/HydroRoll-Team/ipm-server/gh-pages/packages
+
+pkg_index:
+	$(PYTHON) tools/build_collections.py .
+	$(PYTHON) tools/build_pkg_index.py . $(BASEURL) index.xml
+	git add collections
+	git add index.xml
+	git commit -m "updated data index"
+
+grammars:
+	git commit -m "updated grammar files" packages/grammars
+\ No newline at end of file
diff --git a/index.xsl b/index.xsl
new file mode 100644
index 0000000..3bffdfd
--- /dev/null
+++ b/index.xsl
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+    <xsl:template match="/ipm_package_data">
+        <HTML>
+            <HEAD>
+                <TITLE>IPM PACKAGE SERVER</TITLE>
+            </HEAD>
+            <BODY bgcolor="white" text="navy">
+                <H1>INFINI RULE PACKAGES</H1>
+                <P>IPM has built-in support for dozens of packages and collections, as listed below.
+        To use these within IPM/INFINI we recommend that you use the IPM <TT>&gt;&gt;&gt;
+                        ipm add</TT> command.</P>
+                <P>Please consult the README file included with each
+                    packages for further information.</P>
+                <OL>
+                    <xsl:for-each select="//packages/package">
+                        <LI><I>
+                                <xsl:value-of select="@name" />
+                            </I> [<xsl:element
+                                name="a">
+                                <xsl:attribute name="href">
+                                    <xsl:value-of select="@url" />
+                                </xsl:attribute>
+        download </xsl:element> |<xsl:element name="a">
+                                <xsl:attribute name="href">
+                                    <xsl:value-of select="@webpage" />
+                                </xsl:attribute>
+        source </xsl:element>] <BR /> id: <tt>
+                                <xsl:value-of select="@id" />
+                            </tt>;
+        size: <xsl:value-of select="@size" />; author: <xsl:value-of select="@author" />; copyright: <xsl:value-of
+                                select="@copyright" />; license: <xsl:value-of select="@license" />; <P />
+                        </LI>
+                    </xsl:for-each>
+                </OL>
+                <HR />
+                <A href="http://ipm.hydroroll.team/index">IPM PACKAGE SERVER</A>
+            </BODY>
+        </HTML>
+    </xsl:template>
+</xsl:stylesheet>
+\ No newline at end of file
diff --git a/packages/dnd/ndice.xml b/packages/dnd/ndice.xml
new file mode 100644
index 0000000..619e2ae
--- /dev/null
+++ b/packages/dnd/ndice.xml
@@ -0,0 +1,6 @@
+<package id="ndice"
+    name="infini example: ndice"
+    webpage="https://github.com/HydroRoll-Team/infini/blob/master/tests/examples/ndice/"
+    author="苏向夜"
+    unzip="1"
+/>
+\ No newline at end of file
diff --git a/packages/dnd/ndice.zip b/packages/dnd/ndice.zip
new file mode 100644
index 0000000..0d8650e
--- /dev/null
+++ b/packages/dnd/ndice.zip
diff --git a/tools/build_collections.py b/tools/build_collections.py
new file mode 100644
index 0000000..a02a6ad
--- /dev/null
+++ b/tools/build_collections.py
@@ -0,0 +1,72 @@
+
+import os
+import sys
+from glob import glob
+from typing import List
+from xml.etree import ElementTree
+
+
+def _indent_xml(xml, prefix=""):
+    """
+    Helper for ``build_index()``: Given an XML ``ElementTree``, modify it
+    (and its descendents) ``text`` and ``tail`` attributes to generate
+    an indented tree, where each nested element is indented by 2
+    spaces with respect to its parent.
+    """
+    if len(xml) > 0:
+        xml.text = (xml.text or "").strip() + "\n" + prefix + "  "
+        for child in xml:
+            _indent_xml(child, prefix + "  ")
+        for child in xml[:-1]:
+            child.tail = (child.tail or "").strip() + "\n" + prefix + "  "
+        xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix
+
+
+if len(sys.argv) != 2:
+    print("Usage: ")
+    print("build_collections.py <path-to-packages>")
+    sys.exit(-1)
+
+ROOT = sys.argv[1]
+
+
+def write(file_name: str, coll_name: str, items: List[str]) -> None:
+    """Write `collections/{file_name}.xml` with `file_name` as the collection `id`,
+    `coll_name` as the collection `name`, and `items` as a list of collection items.
+
+    :param file_name: The id of the collection, equivalent to the file name,
+        e.g. `all-collections`.
+    :type file_name: str
+    :param coll_name: The name of the collection, e.g. `"All collections"`
+    :type coll_name: str
+    :param items: A list of names for the collection items, e.g. `["dnd", "coc", ...]`
+    :type items: List[str]
+    """
+    et = ElementTree.Element("collection", id=file_name, name=coll_name)
+    et.extend(ElementTree.Element("item", ref=item) for item in sorted(items))
+    _indent_xml(et)
+    with open(os.path.join(ROOT, "collections", file_name + ".xml"), "w", encoding="utf8") as f:
+        f.write(ElementTree.tostring(et).decode("utf8"))
+
+
+def get_id(xml_path: str) -> str:
+    """Given a full path, extract only the filename (i.e. the nltk_data id)
+
+    :param xml_path: A full path, e.g. "./packages/collections/coc.xml"
+    :type xml_path: str
+    :return: The filename, without the extension, e.g. "coc"
+    :rtype: str
+    """
+    return os.path.splitext(os.path.basename(xml_path))[0]
+
+
+# Write `collection/all-collections.xml` based on all files under /packages/collections
+collections_items = [get_id(xml_path)
+                 for xml_path in glob(f"{ROOT}/packages/collections/*.xml")]
+write("all-collections", "All the collections", collections_items)
+
+# Write `collection/all-ipm.xml` and `collection/all.xml` based on all files under /packages
+all_items = [get_id(xml_path)
+             for xml_path in glob(f"{ROOT}/packages/**/*.xml")]
+write("all-nltk", "All packages available on ipm-server gh-pages branch", all_items)
+write("all", "All packages", all_items)
diff --git a/tools/build_pkg_index.py b/tools/build_pkg_index.py
new file mode 100644
index 0000000..883d06c
--- /dev/null
+++ b/tools/build_pkg_index.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+"""
+Build the collections package index.  Usage:
+
+  build_pkg_index.py <path-to-packages> <base-url> <output-file>
+"""
+
+from xml.etree import ElementTree
+import sys
+xml_header = """<?xml version="1.0"?>
+<?xml-stylesheet href="index.xsl" type="text/xsl"?>
+"""
+
+
+def _indent_xml(xml, prefix=""):
+    """
+    Helper for ``build_index()``: Given an XML ``ElementTree``, modify it
+    (and its descendents) ``text`` and ``tail`` attributes to generate
+    an indented tree, where each nested element is indented by 2
+    spaces with respect to its parent.
+    """
+    if len(xml) > 0:
+        xml.text = (xml.text or "").strip() + "\n" + prefix + "  "
+        for child in xml:
+            _indent_xml(child, prefix + "  ")
+        for child in xml[:-1]:
+            child.tail = (child.tail or "").strip() + "\n" + prefix + "  "
+        xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix
+
+
+def build_index(root, base_url):
+    """
+    Create a new data.xml index file, by combining the xml description
+    files for various packages and collections.  ``root`` should be the
+    path to a directory containing the package xml and zip files; and
+    the collection xml files.  The ``root`` directory is expected to
+    have the following subdirectories::
+
+      root/
+        packages/ .................. subdirectory for packages
+        collections/ ............... xml files for collections
+
+    For each package, there should be two files: ``package.zip``
+    (where *package* is the package name)
+    which contains the package itself as a compressed zip file; and
+    ``package.xml``, which is an xml description of the package.  The
+    zipfile ``package.zip`` should expand to a single subdirectory
+    named ``package/``.  The base filename ``package`` must match
+    the identifier given in the package's xml file.
+
+    For each collection, there should be a single file ``collection.zip``
+    describing the collection, where *collection* is the name of the collection.
+
+    All identifiers (for both packages and collections) must be unique.
+    """
+    # Find all packages.
+    packages = []
+    for pkg_xml, zf, subdir in _find_packages(os.path.join(root, "packages")):
+        zipstat = os.stat(zf.filename)
+        url = f"{base_url}/{subdir}/{os.path.split(zf.filename)[1]}"
+        unzipped_size = sum(zf_info.file_size for zf_info in zf.infolist())
+
+        # Fill in several fields of the package xml with calculated values.
+        pkg_xml.set("unzipped_size", "%s" % unzipped_size)
+        pkg_xml.set("size", "%s" % zipstat.st_size)
+        pkg_xml.set("checksum", "%s" % md5_hexdigest(zf.filename))
+        pkg_xml.set("subdir", subdir)
+        # pkg_xml.set('svn_revision', _svn_revision(zf.filename))
+        if not pkg_xml.get("url"):
+            pkg_xml.set("url", url)
+
+        # Record the package.
+        packages.append(pkg_xml)
+
+    # Find all collections
+    collections = list(_find_collections(os.path.join(root, "collections")))
+
+    # Check that all UIDs are unique
+    uids = set()
+    for item in packages + collections:
+        if item.get("id") in uids:
+            raise ValueError("Duplicate UID: %s" % item.get("id"))
+        uids.add(item.get("id"))
+
+    # Put it all together
+    top_elt = ElementTree.Element("ipm_package_data")
+    top_elt.append(ElementTree.Element("packages"))
+    top_elt[0].extend(sorted(packages, key=lambda package: package.get("id")))
+    top_elt.append(ElementTree.Element("collections"))
+    top_elt[1].extend(
+        sorted(collections, key=lambda collection: collection.get("id")))
+
+    _indent_xml(top_elt)
+    return top_elt
+
+
+if len(sys.argv) != 4:
+    print("Usage: ")
+    print("build_pkg_index.py <path-to-packages> <base-url> <output-file>")
+    sys.exit(-1)
+
+ROOT, BASE_URL, OUT = sys.argv[1:]
+
+index = build_index(ROOT, BASE_URL)
+s = ElementTree.tostring(index)
+s = s.decode("utf8")
+out = open(OUT, 'w')
+out.write(xml_header)
+out.write(s)
+out.write('\n')
+out.close()
diff --git a/tools/download.sh b/tools/download.sh
new file mode 100644
index 0000000..a87ab73
--- /dev/null
+++ b/tools/download.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+function usage() {
+  echo
+  echo "Usage: $(basename $0) <collection name>"
+  echo
+  echo "Copies nltk data to proper locations from local copy of repository."
+  echo "Assumes script is in repo tools directory."
+  echo
+  echo "Clone the repo:"
+  printf '\t%s\n' 'git clone git@github.com:<owner>/ipm-server.git'
+  echo
+  echo "Now switch branches to the one with the data on it (and this script):"
+  printf '\t%s\n' 'git branch gh-pages remotes/origin/gh-pages'
+  printf '\t%s\n' 'git checkout gh-pages'
+  echo
+  echo "Remember to use sudo if installing to /usr/share (default)"
+  echo
+  echo set NLTK_DATA_DIR to target directory if different than /usr/share, e.g.:
+  printf '\t%s %s\n' 'NLTK_DATA_DIR=./local/dir' "$(basename $0) book"
+  echo
+}
+
+[ $# -eq 0 ] && { usage; exit 1; }
+
+collection=$1
+data_dir=${NLTK_DATA_DIR:-/usr/share/nltk_data}
+script_dir="$( cd "$( dirname "$0" )" && pwd )"
+repo_dir=$(readlink -f "$script_dir/..")
+package_dir=$repo_dir/packages
+collections_dir=$repo_dir/collections
+
+mkdir -p $data_dir
+pushd $data_dir
+
+python -c "import xml.etree.ElementTree as e
+for item in e.parse('$collections_dir/$collection.xml').getroot().findall('item'): 
+  print item.get('ref')" |
+while read item 
+do
+  package=$(find $package_dir -name $item.zip -print)
+  target_dir=$(basename $(dirname $package))
+  target_file=$target_dir/$item.zip 
+  mkdir -p $target_dir
+  cp $package $target_file 
+  unzip -u -d $target_dir $target_file
+done
+
+popd
+\ No newline at end of file