diff options
| author | 2024-03-08 01:19:41 +0800 | |
|---|---|---|
| committer | 2024-03-08 01:19:41 +0800 | |
| commit | a9b7b0295dae63a2346c1556e1c2381ca4d25f9f (patch) | |
| tree | bbaf08eaf467bc8402a3b9c736f971834449c1d6 | |
| parent | e1f06bcc1638fbfe326039dfc5fa814e20fe7c62 (diff) | |
| download | ipm-server-a9b7b0295dae63a2346c1556e1c2381ca4d25f9f.tar.gz ipm-server-a9b7b0295dae63a2346c1556e1c2381ca4d25f9f.zip | |
feat(site): build the simple index demo
| -rw-r--r-- | Makefile | 12 | ||||
| -rw-r--r-- | index.xsl | 41 | ||||
| -rw-r--r-- | packages/dnd/ndice.xml | 6 | ||||
| -rw-r--r-- | packages/dnd/ndice.zip | bin | 0 -> 4505 bytes | |||
| -rw-r--r-- | tools/build_collections.py | 72 | ||||
| -rw-r--r-- | tools/build_pkg_index.py | 112 | ||||
| -rw-r--r-- | tools/download.sh | 48 |
7 files changed, 291 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b11367f --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +PYTHON = python3 +BASEURL = https://raw.githubusercontent.com/HydroRoll-Team/ipm-server/gh-pages/packages + +pkg_index: + $(PYTHON) tools/build_collections.py . + $(PYTHON) tools/build_pkg_index.py . $(BASEURL) index.xml + git add collections + git add index.xml + git commit -m "updated data index" + +grammars: + git commit -m "updated grammar files" packages/grammars
\ No newline at end of file diff --git a/index.xsl b/index.xsl new file mode 100644 index 0000000..3bffdfd --- /dev/null +++ b/index.xsl @@ -0,0 +1,41 @@ +<?xml version="1.0"?> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:template match="/ipm_package_data"> + <HTML> + <HEAD> + <TITLE>IPM PACKAGE SERVER</TITLE> + </HEAD> + <BODY bgcolor="white" text="navy"> + <H1>INFINI RULE PACKAGES</H1> + <P>IPM has built-in support for dozens of packages and collections, as listed below. + To use these within IPM/INFINI we recommend that you use the IPM <TT>>>> + ipm add</TT> command.</P> + <P>Please consult the README file included with each + packages for further information.</P> + <OL> + <xsl:for-each select="//packages/package"> + <LI><I> + <xsl:value-of select="@name" /> + </I> [<xsl:element + name="a"> + <xsl:attribute name="href"> + <xsl:value-of select="@url" /> + </xsl:attribute> + download </xsl:element> |<xsl:element name="a"> + <xsl:attribute name="href"> + <xsl:value-of select="@webpage" /> + </xsl:attribute> + source </xsl:element>] <BR /> id: <tt> + <xsl:value-of select="@id" /> + </tt>; + size: <xsl:value-of select="@size" />; author: <xsl:value-of select="@author" />; copyright: <xsl:value-of + select="@copyright" />; license: <xsl:value-of select="@license" />; <P /> + </LI> + </xsl:for-each> + </OL> + <HR /> + <A href="http://ipm.hydroroll.team/index">IPM PACKAGE SERVER</A> + </BODY> + </HTML> + </xsl:template> +</xsl:stylesheet>
\ No newline at end of file diff --git a/packages/dnd/ndice.xml b/packages/dnd/ndice.xml new file mode 100644 index 0000000..619e2ae --- /dev/null +++ b/packages/dnd/ndice.xml @@ -0,0 +1,6 @@ +<package id="ndice" + name="infini example: ndice" + webpage="https://github.com/HydroRoll-Team/infini/blob/master/tests/examples/ndice/" + author="苏向夜" + unzip="1" +/>
\ No newline at end of file diff --git a/packages/dnd/ndice.zip b/packages/dnd/ndice.zip Binary files differnew file mode 100644 index 0000000..0d8650e --- /dev/null +++ b/packages/dnd/ndice.zip diff --git a/tools/build_collections.py b/tools/build_collections.py new file mode 100644 index 0000000..a02a6ad --- /dev/null +++ b/tools/build_collections.py @@ -0,0 +1,72 @@ + +import os +import sys +from glob import glob +from typing import List +from xml.etree import ElementTree + + +def _indent_xml(xml, prefix=""): + """ + Helper for ``build_index()``: Given an XML ``ElementTree``, modify it + (and its descendents) ``text`` and ``tail`` attributes to generate + an indented tree, where each nested element is indented by 2 + spaces with respect to its parent. + """ + if len(xml) > 0: + xml.text = (xml.text or "").strip() + "\n" + prefix + " " + for child in xml: + _indent_xml(child, prefix + " ") + for child in xml[:-1]: + child.tail = (child.tail or "").strip() + "\n" + prefix + " " + xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix + + +if len(sys.argv) != 2: + print("Usage: ") + print("build_collections.py <path-to-packages>") + sys.exit(-1) + +ROOT = sys.argv[1] + + +def write(file_name: str, coll_name: str, items: List[str]) -> None: + """Write `collections/{file_name}.xml` with `file_name` as the collection `id`, + `coll_name` as the collection `name`, and `items` as a list of collection items. + + :param file_name: The id of the collection, equivalent to the file name, + e.g. `all-collections`. + :type file_name: str + :param coll_name: The name of the collection, e.g. `"All collections"` + :type coll_name: str + :param items: A list of names for the collection items, e.g. `["dnd", "coc", ...]` + :type items: List[str] + """ + et = ElementTree.Element("collection", id=file_name, name=coll_name) + et.extend(ElementTree.Element("item", ref=item) for item in sorted(items)) + _indent_xml(et) + with open(os.path.join(ROOT, "collections", file_name + ".xml"), "w", encoding="utf8") as f: + f.write(ElementTree.tostring(et).decode("utf8")) + + +def get_id(xml_path: str) -> str: + """Given a full path, extract only the filename (i.e. the nltk_data id) + + :param xml_path: A full path, e.g. "./packages/collections/coc.xml" + :type xml_path: str + :return: The filename, without the extension, e.g. "coc" + :rtype: str + """ + return os.path.splitext(os.path.basename(xml_path))[0] + + +# Write `collection/all-collections.xml` based on all files under /packages/collections +collections_items = [get_id(xml_path) + for xml_path in glob(f"{ROOT}/packages/collections/*.xml")] +write("all-collections", "All the collections", collections_items) + +# Write `collection/all-ipm.xml` and `collection/all.xml` based on all files under /packages +all_items = [get_id(xml_path) + for xml_path in glob(f"{ROOT}/packages/**/*.xml")] +write("all-nltk", "All packages available on ipm-server gh-pages branch", all_items) +write("all", "All packages", all_items) diff --git a/tools/build_pkg_index.py b/tools/build_pkg_index.py new file mode 100644 index 0000000..883d06c --- /dev/null +++ b/tools/build_pkg_index.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +""" +Build the collections package index. Usage: + + build_pkg_index.py <path-to-packages> <base-url> <output-file> +""" + +from xml.etree import ElementTree +import sys +xml_header = """<?xml version="1.0"?> +<?xml-stylesheet href="index.xsl" type="text/xsl"?> +""" + + +def _indent_xml(xml, prefix=""): + """ + Helper for ``build_index()``: Given an XML ``ElementTree``, modify it + (and its descendents) ``text`` and ``tail`` attributes to generate + an indented tree, where each nested element is indented by 2 + spaces with respect to its parent. + """ + if len(xml) > 0: + xml.text = (xml.text or "").strip() + "\n" + prefix + " " + for child in xml: + _indent_xml(child, prefix + " ") + for child in xml[:-1]: + child.tail = (child.tail or "").strip() + "\n" + prefix + " " + xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix + + +def build_index(root, base_url): + """ + Create a new data.xml index file, by combining the xml description + files for various packages and collections. ``root`` should be the + path to a directory containing the package xml and zip files; and + the collection xml files. The ``root`` directory is expected to + have the following subdirectories:: + + root/ + packages/ .................. subdirectory for packages + collections/ ............... xml files for collections + + For each package, there should be two files: ``package.zip`` + (where *package* is the package name) + which contains the package itself as a compressed zip file; and + ``package.xml``, which is an xml description of the package. The + zipfile ``package.zip`` should expand to a single subdirectory + named ``package/``. The base filename ``package`` must match + the identifier given in the package's xml file. + + For each collection, there should be a single file ``collection.zip`` + describing the collection, where *collection* is the name of the collection. + + All identifiers (for both packages and collections) must be unique. + """ + # Find all packages. + packages = [] + for pkg_xml, zf, subdir in _find_packages(os.path.join(root, "packages")): + zipstat = os.stat(zf.filename) + url = f"{base_url}/{subdir}/{os.path.split(zf.filename)[1]}" + unzipped_size = sum(zf_info.file_size for zf_info in zf.infolist()) + + # Fill in several fields of the package xml with calculated values. + pkg_xml.set("unzipped_size", "%s" % unzipped_size) + pkg_xml.set("size", "%s" % zipstat.st_size) + pkg_xml.set("checksum", "%s" % md5_hexdigest(zf.filename)) + pkg_xml.set("subdir", subdir) + # pkg_xml.set('svn_revision', _svn_revision(zf.filename)) + if not pkg_xml.get("url"): + pkg_xml.set("url", url) + + # Record the package. + packages.append(pkg_xml) + + # Find all collections + collections = list(_find_collections(os.path.join(root, "collections"))) + + # Check that all UIDs are unique + uids = set() + for item in packages + collections: + if item.get("id") in uids: + raise ValueError("Duplicate UID: %s" % item.get("id")) + uids.add(item.get("id")) + + # Put it all together + top_elt = ElementTree.Element("ipm_package_data") + top_elt.append(ElementTree.Element("packages")) + top_elt[0].extend(sorted(packages, key=lambda package: package.get("id"))) + top_elt.append(ElementTree.Element("collections")) + top_elt[1].extend( + sorted(collections, key=lambda collection: collection.get("id"))) + + _indent_xml(top_elt) + return top_elt + + +if len(sys.argv) != 4: + print("Usage: ") + print("build_pkg_index.py <path-to-packages> <base-url> <output-file>") + sys.exit(-1) + +ROOT, BASE_URL, OUT = sys.argv[1:] + +index = build_index(ROOT, BASE_URL) +s = ElementTree.tostring(index) +s = s.decode("utf8") +out = open(OUT, 'w') +out.write(xml_header) +out.write(s) +out.write('\n') +out.close() diff --git a/tools/download.sh b/tools/download.sh new file mode 100644 index 0000000..a87ab73 --- /dev/null +++ b/tools/download.sh @@ -0,0 +1,48 @@ +#!/bin/bash +function usage() { + echo + echo "Usage: $(basename $0) <collection name>" + echo + echo "Copies nltk data to proper locations from local copy of repository." + echo "Assumes script is in repo tools directory." + echo + echo "Clone the repo:" + printf '\t%s\n' 'git clone git@github.com:<owner>/ipm-server.git' + echo + echo "Now switch branches to the one with the data on it (and this script):" + printf '\t%s\n' 'git branch gh-pages remotes/origin/gh-pages' + printf '\t%s\n' 'git checkout gh-pages' + echo + echo "Remember to use sudo if installing to /usr/share (default)" + echo + echo set NLTK_DATA_DIR to target directory if different than /usr/share, e.g.: + printf '\t%s %s\n' 'NLTK_DATA_DIR=./local/dir' "$(basename $0) book" + echo +} + +[ $# -eq 0 ] && { usage; exit 1; } + +collection=$1 +data_dir=${NLTK_DATA_DIR:-/usr/share/nltk_data} +script_dir="$( cd "$( dirname "$0" )" && pwd )" +repo_dir=$(readlink -f "$script_dir/..") +package_dir=$repo_dir/packages +collections_dir=$repo_dir/collections + +mkdir -p $data_dir +pushd $data_dir + +python -c "import xml.etree.ElementTree as e +for item in e.parse('$collections_dir/$collection.xml').getroot().findall('item'): + print item.get('ref')" | +while read item +do + package=$(find $package_dir -name $item.zip -print) + target_dir=$(basename $(dirname $package)) + target_file=$target_dir/$item.zip + mkdir -p $target_dir + cp $package $target_file + unzip -u -d $target_dir $target_file +done + +popd
\ No newline at end of file |
