aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--Makefile12
-rw-r--r--index.xsl41
-rw-r--r--packages/dnd/ndice.xml6
-rw-r--r--packages/dnd/ndice.zipbin0 -> 4505 bytes
-rw-r--r--tools/build_collections.py72
-rw-r--r--tools/build_pkg_index.py112
-rw-r--r--tools/download.sh48
7 files changed, 291 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..b11367f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,12 @@
+PYTHON = python3
+BASEURL = https://raw.githubusercontent.com/HydroRoll-Team/ipm-server/gh-pages/packages
+
+pkg_index:
+ $(PYTHON) tools/build_collections.py .
+ $(PYTHON) tools/build_pkg_index.py . $(BASEURL) index.xml
+ git add collections
+ git add index.xml
+ git commit -m "updated data index"
+
+grammars:
+ git commit -m "updated grammar files" packages/grammars \ No newline at end of file
diff --git a/index.xsl b/index.xsl
new file mode 100644
index 0000000..3bffdfd
--- /dev/null
+++ b/index.xsl
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+ <xsl:template match="/ipm_package_data">
+ <HTML>
+ <HEAD>
+ <TITLE>IPM PACKAGE SERVER</TITLE>
+ </HEAD>
+ <BODY bgcolor="white" text="navy">
+ <H1>INFINI RULE PACKAGES</H1>
+ <P>IPM has built-in support for dozens of packages and collections, as listed below.
+ To use these within IPM/INFINI we recommend that you use the IPM <TT>&gt;&gt;&gt;
+ ipm add</TT> command.</P>
+ <P>Please consult the README file included with each
+ packages for further information.</P>
+ <OL>
+ <xsl:for-each select="//packages/package">
+ <LI><I>
+ <xsl:value-of select="@name" />
+ </I> [<xsl:element
+ name="a">
+ <xsl:attribute name="href">
+ <xsl:value-of select="@url" />
+ </xsl:attribute>
+ download </xsl:element> |<xsl:element name="a">
+ <xsl:attribute name="href">
+ <xsl:value-of select="@webpage" />
+ </xsl:attribute>
+ source </xsl:element>] <BR /> id: <tt>
+ <xsl:value-of select="@id" />
+ </tt>;
+ size: <xsl:value-of select="@size" />; author: <xsl:value-of select="@author" />; copyright: <xsl:value-of
+ select="@copyright" />; license: <xsl:value-of select="@license" />; <P />
+ </LI>
+ </xsl:for-each>
+ </OL>
+ <HR />
+ <A href="http://ipm.hydroroll.team/index">IPM PACKAGE SERVER</A>
+ </BODY>
+ </HTML>
+ </xsl:template>
+</xsl:stylesheet> \ No newline at end of file
diff --git a/packages/dnd/ndice.xml b/packages/dnd/ndice.xml
new file mode 100644
index 0000000..619e2ae
--- /dev/null
+++ b/packages/dnd/ndice.xml
@@ -0,0 +1,6 @@
+<package id="ndice"
+ name="infini example: ndice"
+ webpage="https://github.com/HydroRoll-Team/infini/blob/master/tests/examples/ndice/"
+ author="苏向夜"
+ unzip="1"
+/> \ No newline at end of file
diff --git a/packages/dnd/ndice.zip b/packages/dnd/ndice.zip
new file mode 100644
index 0000000..0d8650e
--- /dev/null
+++ b/packages/dnd/ndice.zip
Binary files differ
diff --git a/tools/build_collections.py b/tools/build_collections.py
new file mode 100644
index 0000000..a02a6ad
--- /dev/null
+++ b/tools/build_collections.py
@@ -0,0 +1,72 @@
+
+import os
+import sys
+from glob import glob
+from typing import List
+from xml.etree import ElementTree
+
+
+def _indent_xml(xml, prefix=""):
+ """
+ Helper for ``build_index()``: Given an XML ``ElementTree``, modify it
+ (and its descendents) ``text`` and ``tail`` attributes to generate
+ an indented tree, where each nested element is indented by 2
+ spaces with respect to its parent.
+ """
+ if len(xml) > 0:
+ xml.text = (xml.text or "").strip() + "\n" + prefix + " "
+ for child in xml:
+ _indent_xml(child, prefix + " ")
+ for child in xml[:-1]:
+ child.tail = (child.tail or "").strip() + "\n" + prefix + " "
+ xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix
+
+
+if len(sys.argv) != 2:
+ print("Usage: ")
+ print("build_collections.py <path-to-packages>")
+ sys.exit(-1)
+
+ROOT = sys.argv[1]
+
+
+def write(file_name: str, coll_name: str, items: List[str]) -> None:
+ """Write `collections/{file_name}.xml` with `file_name` as the collection `id`,
+ `coll_name` as the collection `name`, and `items` as a list of collection items.
+
+ :param file_name: The id of the collection, equivalent to the file name,
+ e.g. `all-collections`.
+ :type file_name: str
+ :param coll_name: The name of the collection, e.g. `"All collections"`
+ :type coll_name: str
+ :param items: A list of names for the collection items, e.g. `["dnd", "coc", ...]`
+ :type items: List[str]
+ """
+ et = ElementTree.Element("collection", id=file_name, name=coll_name)
+ et.extend(ElementTree.Element("item", ref=item) for item in sorted(items))
+ _indent_xml(et)
+ with open(os.path.join(ROOT, "collections", file_name + ".xml"), "w", encoding="utf8") as f:
+ f.write(ElementTree.tostring(et).decode("utf8"))
+
+
+def get_id(xml_path: str) -> str:
+ """Given a full path, extract only the filename (i.e. the nltk_data id)
+
+ :param xml_path: A full path, e.g. "./packages/collections/coc.xml"
+ :type xml_path: str
+ :return: The filename, without the extension, e.g. "coc"
+ :rtype: str
+ """
+ return os.path.splitext(os.path.basename(xml_path))[0]
+
+
+# Write `collection/all-collections.xml` based on all files under /packages/collections
+collections_items = [get_id(xml_path)
+ for xml_path in glob(f"{ROOT}/packages/collections/*.xml")]
+write("all-collections", "All the collections", collections_items)
+
+# Write `collection/all-ipm.xml` and `collection/all.xml` based on all files under /packages
+all_items = [get_id(xml_path)
+ for xml_path in glob(f"{ROOT}/packages/**/*.xml")]
+write("all-nltk", "All packages available on ipm-server gh-pages branch", all_items)
+write("all", "All packages", all_items)
diff --git a/tools/build_pkg_index.py b/tools/build_pkg_index.py
new file mode 100644
index 0000000..883d06c
--- /dev/null
+++ b/tools/build_pkg_index.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+"""
+Build the collections package index. Usage:
+
+ build_pkg_index.py <path-to-packages> <base-url> <output-file>
+"""
+
+from xml.etree import ElementTree
+import sys
+xml_header = """<?xml version="1.0"?>
+<?xml-stylesheet href="index.xsl" type="text/xsl"?>
+"""
+
+
+def _indent_xml(xml, prefix=""):
+ """
+ Helper for ``build_index()``: Given an XML ``ElementTree``, modify it
+ (and its descendents) ``text`` and ``tail`` attributes to generate
+ an indented tree, where each nested element is indented by 2
+ spaces with respect to its parent.
+ """
+ if len(xml) > 0:
+ xml.text = (xml.text or "").strip() + "\n" + prefix + " "
+ for child in xml:
+ _indent_xml(child, prefix + " ")
+ for child in xml[:-1]:
+ child.tail = (child.tail or "").strip() + "\n" + prefix + " "
+ xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix
+
+
+def build_index(root, base_url):
+ """
+ Create a new data.xml index file, by combining the xml description
+ files for various packages and collections. ``root`` should be the
+ path to a directory containing the package xml and zip files; and
+ the collection xml files. The ``root`` directory is expected to
+ have the following subdirectories::
+
+ root/
+ packages/ .................. subdirectory for packages
+ collections/ ............... xml files for collections
+
+ For each package, there should be two files: ``package.zip``
+ (where *package* is the package name)
+ which contains the package itself as a compressed zip file; and
+ ``package.xml``, which is an xml description of the package. The
+ zipfile ``package.zip`` should expand to a single subdirectory
+ named ``package/``. The base filename ``package`` must match
+ the identifier given in the package's xml file.
+
+ For each collection, there should be a single file ``collection.zip``
+ describing the collection, where *collection* is the name of the collection.
+
+ All identifiers (for both packages and collections) must be unique.
+ """
+ # Find all packages.
+ packages = []
+ for pkg_xml, zf, subdir in _find_packages(os.path.join(root, "packages")):
+ zipstat = os.stat(zf.filename)
+ url = f"{base_url}/{subdir}/{os.path.split(zf.filename)[1]}"
+ unzipped_size = sum(zf_info.file_size for zf_info in zf.infolist())
+
+ # Fill in several fields of the package xml with calculated values.
+ pkg_xml.set("unzipped_size", "%s" % unzipped_size)
+ pkg_xml.set("size", "%s" % zipstat.st_size)
+ pkg_xml.set("checksum", "%s" % md5_hexdigest(zf.filename))
+ pkg_xml.set("subdir", subdir)
+ # pkg_xml.set('svn_revision', _svn_revision(zf.filename))
+ if not pkg_xml.get("url"):
+ pkg_xml.set("url", url)
+
+ # Record the package.
+ packages.append(pkg_xml)
+
+ # Find all collections
+ collections = list(_find_collections(os.path.join(root, "collections")))
+
+ # Check that all UIDs are unique
+ uids = set()
+ for item in packages + collections:
+ if item.get("id") in uids:
+ raise ValueError("Duplicate UID: %s" % item.get("id"))
+ uids.add(item.get("id"))
+
+ # Put it all together
+ top_elt = ElementTree.Element("ipm_package_data")
+ top_elt.append(ElementTree.Element("packages"))
+ top_elt[0].extend(sorted(packages, key=lambda package: package.get("id")))
+ top_elt.append(ElementTree.Element("collections"))
+ top_elt[1].extend(
+ sorted(collections, key=lambda collection: collection.get("id")))
+
+ _indent_xml(top_elt)
+ return top_elt
+
+
+if len(sys.argv) != 4:
+ print("Usage: ")
+ print("build_pkg_index.py <path-to-packages> <base-url> <output-file>")
+ sys.exit(-1)
+
+ROOT, BASE_URL, OUT = sys.argv[1:]
+
+index = build_index(ROOT, BASE_URL)
+s = ElementTree.tostring(index)
+s = s.decode("utf8")
+out = open(OUT, 'w')
+out.write(xml_header)
+out.write(s)
+out.write('\n')
+out.close()
diff --git a/tools/download.sh b/tools/download.sh
new file mode 100644
index 0000000..a87ab73
--- /dev/null
+++ b/tools/download.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+function usage() {
+ echo
+ echo "Usage: $(basename $0) <collection name>"
+ echo
+ echo "Copies nltk data to proper locations from local copy of repository."
+ echo "Assumes script is in repo tools directory."
+ echo
+ echo "Clone the repo:"
+ printf '\t%s\n' 'git clone git@github.com:<owner>/ipm-server.git'
+ echo
+ echo "Now switch branches to the one with the data on it (and this script):"
+ printf '\t%s\n' 'git branch gh-pages remotes/origin/gh-pages'
+ printf '\t%s\n' 'git checkout gh-pages'
+ echo
+ echo "Remember to use sudo if installing to /usr/share (default)"
+ echo
+ echo set NLTK_DATA_DIR to target directory if different than /usr/share, e.g.:
+ printf '\t%s %s\n' 'NLTK_DATA_DIR=./local/dir' "$(basename $0) book"
+ echo
+}
+
+[ $# -eq 0 ] && { usage; exit 1; }
+
+collection=$1
+data_dir=${NLTK_DATA_DIR:-/usr/share/nltk_data}
+script_dir="$( cd "$( dirname "$0" )" && pwd )"
+repo_dir=$(readlink -f "$script_dir/..")
+package_dir=$repo_dir/packages
+collections_dir=$repo_dir/collections
+
+mkdir -p $data_dir
+pushd $data_dir
+
+python -c "import xml.etree.ElementTree as e
+for item in e.parse('$collections_dir/$collection.xml').getroot().findall('item'):
+ print item.get('ref')" |
+while read item
+do
+ package=$(find $package_dir -name $item.zip -print)
+ target_dir=$(basename $(dirname $package))
+ target_file=$target_dir/$item.zip
+ mkdir -p $target_dir
+ cp $package $target_file
+ unzip -u -d $target_dir $target_file
+done
+
+popd \ No newline at end of file