aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/build_collections.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/build_collections.py')
-rw-r--r--tools/build_collections.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/tools/build_collections.py b/tools/build_collections.py
new file mode 100644
index 0000000..a02a6ad
--- /dev/null
+++ b/tools/build_collections.py
@@ -0,0 +1,72 @@
+
+import os
+import sys
+from glob import glob
+from typing import List
+from xml.etree import ElementTree
+
+
+def _indent_xml(xml, prefix=""):
+ """
+ Helper for ``build_index()``: Given an XML ``ElementTree``, modify it
+ (and its descendents) ``text`` and ``tail`` attributes to generate
+ an indented tree, where each nested element is indented by 2
+ spaces with respect to its parent.
+ """
+ if len(xml) > 0:
+ xml.text = (xml.text or "").strip() + "\n" + prefix + " "
+ for child in xml:
+ _indent_xml(child, prefix + " ")
+ for child in xml[:-1]:
+ child.tail = (child.tail or "").strip() + "\n" + prefix + " "
+ xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix
+
+
+if len(sys.argv) != 2:
+ print("Usage: ")
+ print("build_collections.py <path-to-packages>")
+ sys.exit(-1)
+
+ROOT = sys.argv[1]
+
+
+def write(file_name: str, coll_name: str, items: List[str]) -> None:
+ """Write `collections/{file_name}.xml` with `file_name` as the collection `id`,
+ `coll_name` as the collection `name`, and `items` as a list of collection items.
+
+ :param file_name: The id of the collection, equivalent to the file name,
+ e.g. `all-collections`.
+ :type file_name: str
+ :param coll_name: The name of the collection, e.g. `"All collections"`
+ :type coll_name: str
+ :param items: A list of names for the collection items, e.g. `["dnd", "coc", ...]`
+ :type items: List[str]
+ """
+ et = ElementTree.Element("collection", id=file_name, name=coll_name)
+ et.extend(ElementTree.Element("item", ref=item) for item in sorted(items))
+ _indent_xml(et)
+ with open(os.path.join(ROOT, "collections", file_name + ".xml"), "w", encoding="utf8") as f:
+ f.write(ElementTree.tostring(et).decode("utf8"))
+
+
+def get_id(xml_path: str) -> str:
+ """Given a full path, extract only the filename (i.e. the nltk_data id)
+
+ :param xml_path: A full path, e.g. "./packages/collections/coc.xml"
+ :type xml_path: str
+ :return: The filename, without the extension, e.g. "coc"
+ :rtype: str
+ """
+ return os.path.splitext(os.path.basename(xml_path))[0]
+
+
+# Write `collection/all-collections.xml` based on all files under /packages/collections
+collections_items = [get_id(xml_path)
+ for xml_path in glob(f"{ROOT}/packages/collections/*.xml")]
+write("all-collections", "All the collections", collections_items)
+
+# Write `collection/all-ipm.xml` and `collection/all.xml` based on all files under /packages
+all_items = [get_id(xml_path)
+ for xml_path in glob(f"{ROOT}/packages/**/*.xml")]
+write("all-nltk", "All packages available on ipm-server gh-pages branch", all_items)
+write("all", "All packages", all_items)