aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/build_collections.py
blob: b7b2d26f0933943c98952e3b906438a53545925e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

import os
import sys
from glob import glob
from typing import List
from xml.etree import ElementTree


def _indent_xml(xml, prefix=""):
    """
    Helper for ``build_index()``: Given an XML ``ElementTree``, modify it
    (and its descendents) ``text`` and ``tail`` attributes to generate
    an indented tree, where each nested element is indented by 2
    spaces with respect to its parent.
    """
    if len(xml) > 0:
        xml.text = (xml.text or "").strip() + "\n" + prefix + "  "
        for child in xml:
            _indent_xml(child, prefix + "  ")
        for child in xml[:-1]:
            child.tail = (child.tail or "").strip() + "\n" + prefix + "  "
        xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix


if len(sys.argv) != 2:
    print("Usage: ")
    print("build_collections.py <path-to-packages>")
    sys.exit(-1)

ROOT = sys.argv[1]


def write(file_name: str, coll_name: str, items: List[str]) -> None:
    """Write `collections/{file_name}.xml` with `file_name` as the collection `id`,
    `coll_name` as the collection `name`, and `items` as a list of collection items.

    :param file_name: The id of the collection, equivalent to the file name,
        e.g. `all-collections`.
    :type file_name: str
    :param coll_name: The name of the collection, e.g. `"All collections"`
    :type coll_name: str
    :param items: A list of names for the collection items, e.g. `["dnd", "coc", ...]`
    :type items: List[str]
    """
    et = ElementTree.Element("collection", id=file_name, name=coll_name)
    et.extend(ElementTree.Element("item", ref=item) for item in sorted(items))
    _indent_xml(et)
    with open(os.path.join(ROOT, "collections", file_name + ".xml"), "w", encoding="utf8") as f:
        f.write(ElementTree.tostring(et).decode("utf8"))


def get_id(xml_path: str) -> str:
    """Given a full path, extract only the filename (i.e. the nltk_data id)

    :param xml_path: A full path, e.g. "./packages/collections/coc.xml"
    :type xml_path: str
    :return: The filename, without the extension, e.g. "coc"
    :rtype: str
    """
    return os.path.splitext(os.path.basename(xml_path))[0]


# Write `collection/all-collections.xml` based on all files under /packages/collections
collections_items = [get_id(xml_path)
                 for xml_path in glob(f"{ROOT}/packages/collections/*.xml")]
write("all-collections", "All the collections", collections_items)

# Write `collection/all-ipm.xml` and `collection/all.xml` based on all files under /packages
all_items = [get_id(xml_path)
             for xml_path in glob(f"{ROOT}/packages/**/*.xml")]
write("all-ipm", "All packages available on ipm-server gh-pages branch", all_items)
write("all", "All packages", all_items)