From a9b7b0295dae63a2346c1556e1c2381ca4d25f9f Mon Sep 17 00:00:00 2001 From: 简律纯 Date: Fri, 8 Mar 2024 01:19:41 +0800 Subject: feat(site): build the simple index demo --- Makefile | 12 +++++ index.xsl | 41 +++++++++++++++++ packages/dnd/ndice.xml | 6 +++ packages/dnd/ndice.zip | Bin 0 -> 4505 bytes tools/build_collections.py | 72 +++++++++++++++++++++++++++++ tools/build_pkg_index.py | 112 +++++++++++++++++++++++++++++++++++++++++++++ tools/download.sh | 48 +++++++++++++++++++ 7 files changed, 291 insertions(+) create mode 100644 Makefile create mode 100644 index.xsl create mode 100644 packages/dnd/ndice.xml create mode 100644 packages/dnd/ndice.zip create mode 100644 tools/build_collections.py create mode 100644 tools/build_pkg_index.py create mode 100644 tools/download.sh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b11367f --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +PYTHON = python3 +BASEURL = https://raw.githubusercontent.com/HydroRoll-Team/ipm-server/gh-pages/packages + +pkg_index: + $(PYTHON) tools/build_collections.py . + $(PYTHON) tools/build_pkg_index.py . $(BASEURL) index.xml + git add collections + git add index.xml + git commit -m "updated data index" + +grammars: + git commit -m "updated grammar files" packages/grammars \ No newline at end of file diff --git a/index.xsl b/index.xsl new file mode 100644 index 0000000..3bffdfd --- /dev/null +++ b/index.xsl @@ -0,0 +1,41 @@ + + + + + + IPM PACKAGE SERVER + + +

INFINI RULE PACKAGES

IPM has built-in support for dozens of packages and collections, as listed below. + To use these within IPM/INFINI we recommend that you use the IPM >>> + ipm add command.

Please consult the README file included with each + packages for further information.

+ + [ + + + + download | + + + + source ]
id: + +; + size: ; author: ; copyright: ; license: ;

+ IPM PACKAGE SERVER + + + + \ No newline at end of file diff --git a/packages/dnd/ndice.xml b/packages/dnd/ndice.xml new file mode 100644 index 0000000..619e2ae --- /dev/null +++ b/packages/dnd/ndice.xml @@ -0,0 +1,6 @@ + \ No newline at end of file diff --git a/packages/dnd/ndice.zip b/packages/dnd/ndice.zip new file mode 100644 index 0000000..0d8650e Binary files /dev/null and b/packages/dnd/ndice.zip differ diff --git a/tools/build_collections.py b/tools/build_collections.py new file mode 100644 index 0000000..a02a6ad --- /dev/null +++ b/tools/build_collections.py @@ -0,0 +1,72 @@ + +import os +import sys +from glob import glob +from typing import List +from xml.etree import ElementTree + + +def _indent_xml(xml, prefix=""): + """ + Helper for ``build_index()``: Given an XML ``ElementTree``, modify it + (and its descendents) ``text`` and ``tail`` attributes to generate + an indented tree, where each nested element is indented by 2 + spaces with respect to its parent. + """ + if len(xml) > 0: + xml.text = (xml.text or "").strip() + "\n" + prefix + " " + for child in xml: + _indent_xml(child, prefix + " ") + for child in xml[:-1]: + child.tail = (child.tail or "").strip() + "\n" + prefix + " " + xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix + + +if len(sys.argv) != 2: + print("Usage: ") + print("build_collections.py ") + sys.exit(-1) + +ROOT = sys.argv[1] + + +def write(file_name: str, coll_name: str, items: List[str]) -> None: + """Write `collections/{file_name}.xml` with `file_name` as the collection `id`, + `coll_name` as the collection `name`, and `items` as a list of collection items. + + :param file_name: The id of the collection, equivalent to the file name, + e.g. `all-collections`. + :type file_name: str + :param coll_name: The name of the collection, e.g. `"All collections"` + :type coll_name: str + :param items: A list of names for the collection items, e.g. `["dnd", "coc", ...]` + :type items: List[str] + """ + et = ElementTree.Element("collection", id=file_name, name=coll_name) + et.extend(ElementTree.Element("item", ref=item) for item in sorted(items)) + _indent_xml(et) + with open(os.path.join(ROOT, "collections", file_name + ".xml"), "w", encoding="utf8") as f: + f.write(ElementTree.tostring(et).decode("utf8")) + + +def get_id(xml_path: str) -> str: + """Given a full path, extract only the filename (i.e. the nltk_data id) + + :param xml_path: A full path, e.g. "./packages/collections/coc.xml" + :type xml_path: str + :return: The filename, without the extension, e.g. "coc" + :rtype: str + """ + return os.path.splitext(os.path.basename(xml_path))[0] + + +# Write `collection/all-collections.xml` based on all files under /packages/collections +collections_items = [get_id(xml_path) + for xml_path in glob(f"{ROOT}/packages/collections/*.xml")] +write("all-collections", "All the collections", collections_items) + +# Write `collection/all-ipm.xml` and `collection/all.xml` based on all files under /packages +all_items = [get_id(xml_path) + for xml_path in glob(f"{ROOT}/packages/**/*.xml")] +write("all-nltk", "All packages available on ipm-server gh-pages branch", all_items) +write("all", "All packages", all_items) diff --git a/tools/build_pkg_index.py b/tools/build_pkg_index.py new file mode 100644 index 0000000..883d06c --- /dev/null +++ b/tools/build_pkg_index.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +""" +Build the collections package index. Usage: + + build_pkg_index.py

+""" + +from xml.etree import ElementTree +import sys +xml_header = """ + +""" + + +def _indent_xml(xml, prefix=""): + """ + Helper for ``build_index()``: Given an XML ``ElementTree``, modify it + (and its descendents) ``text`` and ``tail`` attributes to generate + an indented tree, where each nested element is indented by 2 + spaces with respect to its parent. + """ + if len(xml) > 0: + xml.text = (xml.text or "").strip() + "\n" + prefix + " " + for child in xml: + _indent_xml(child, prefix + " ") + for child in xml[:-1]: + child.tail = (child.tail or "").strip() + "\n" + prefix + " " + xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix + + +def build_index(root, base_url): + """ + Create a new data.xml index file, by combining the xml description + files for various packages and collections. ``root`` should be the + path to a directory containing the package xml and zip files; and + the collection xml files. The ``root`` directory is expected to + have the following subdirectories:: + + root/ + packages/ .................. subdirectory for packages + collections/ ............... xml files for collections + + For each package, there should be two files: ``package.zip`` + (where *package* is the package name) + which contains the package itself as a compressed zip file; and + ``package.xml``, which is an xml description of the package. The + zipfile ``package.zip`` should expand to a single subdirectory + named ``package/``. The base filename ``package`` must match + the identifier given in the package's xml file. + + For each collection, there should be a single file ``collection.zip`` + describing the collection, where *collection* is the name of the collection. + + All identifiers (for both packages and collections) must be unique. + """ + # Find all packages. + packages = [] + for pkg_xml, zf, subdir in _find_packages(os.path.join(root, "packages")): + zipstat = os.stat(zf.filename) + url = f"{base_url}/{subdir}/{os.path.split(zf.filename)[1]}" + unzipped_size = sum(zf_info.file_size for zf_info in zf.infolist()) + + # Fill in several fields of the package xml with calculated values. + pkg_xml.set("unzipped_size", "%s" % unzipped_size) + pkg_xml.set("size", "%s" % zipstat.st_size) + pkg_xml.set("checksum", "%s" % md5_hexdigest(zf.filename)) + pkg_xml.set("subdir", subdir) + # pkg_xml.set('svn_revision', _svn_revision(zf.filename)) + if not pkg_xml.get("url"): + pkg_xml.set("url", url) + + # Record the package. + packages.append(pkg_xml) + + # Find all collections + collections = list(_find_collections(os.path.join(root, "collections"))) + + # Check that all UIDs are unique + uids = set() + for item in packages + collections: + if item.get("id") in uids: + raise ValueError("Duplicate UID: %s" % item.get("id")) + uids.add(item.get("id")) + + # Put it all together + top_elt = ElementTree.Element("ipm_package_data") + top_elt.append(ElementTree.Element("packages")) + top_elt[0].extend(sorted(packages, key=lambda package: package.get("id"))) + top_elt.append(ElementTree.Element("collections")) + top_elt[1].extend( + sorted(collections, key=lambda collection: collection.get("id"))) + + _indent_xml(top_elt) + return top_elt + + +if len(sys.argv) != 4: + print("Usage: ") + print("build_pkg_index.py

") + sys.exit(-1) + +ROOT, BASE_URL, OUT = sys.argv[1:] + +index = build_index(ROOT, BASE_URL) +s = ElementTree.tostring(index) +s = s.decode("utf8") +out = open(OUT, 'w') +out.write(xml_header) +out.write(s) +out.write('\n') +out.close() diff --git a/tools/download.sh b/tools/download.sh new file mode 100644 index 0000000..a87ab73 --- /dev/null +++ b/tools/download.sh @@ -0,0 +1,48 @@ +#!/bin/bash +function usage() { + echo + echo "Usage: $(basename $0) " + echo + echo "Copies nltk data to proper locations from local copy of repository." + echo "Assumes script is in repo tools directory." + echo + echo "Clone the repo:" + printf '\t%s\n' 'git clone git@github.com:/ipm-server.git' + echo + echo "Now switch branches to the one with the data on it (and this script):" + printf '\t%s\n' 'git branch gh-pages remotes/origin/gh-pages' + printf '\t%s\n' 'git checkout gh-pages' + echo + echo "Remember to use sudo if installing to /usr/share (default)" + echo + echo set NLTK_DATA_DIR to target directory if different than /usr/share, e.g.: + printf '\t%s %s\n' 'NLTK_DATA_DIR=./local/dir' "$(basename $0) book" + echo +} + +[ $# -eq 0 ] && { usage; exit 1; } + +collection=$1 +data_dir=${NLTK_DATA_DIR:-/usr/share/nltk_data} +script_dir="$( cd "$( dirname "$0" )" && pwd )" +repo_dir=$(readlink -f "$script_dir/..") +package_dir=$repo_dir/packages +collections_dir=$repo_dir/collections + +mkdir -p $data_dir +pushd $data_dir + +python -c "import xml.etree.ElementTree as e +for item in e.parse('$collections_dir/$collection.xml').getroot().findall('item'): + print item.get('ref')" | +while read item +do + package=$(find $package_dir -name $item.zip -print) + target_dir=$(basename $(dirname $package)) + target_file=$target_dir/$item.zip + mkdir -p $target_dir + cp $package $target_file + unzip -u -d $target_dir $target_file +done + +popd \ No newline at end of file -- cgit v1.2.3-70-g09d2