From be8de118db913711eb72ae5187d26e54a0055727 Mon Sep 17 00:00:00 2001
From: 简律纯 <i@jyunko.cn>
Date: Fri, 15 Dec 2023 09:11:47 +0800
Subject: refactor(docs): optmst `docs` dir & `deps`

---
 .../integrations/search/internal/.eslintrc         |   6 +
 .../integrations/search/internal/_/index.ts        |  74 ++++++++++
 .../integrations/search/internal/extract/index.ts  | 107 ++++++++++++++
 .../search/internal/highlight/index.ts             | 162 +++++++++++++++++++++
 .../integrations/search/internal/index.ts          |  26 ++++
 .../integrations/search/internal/tokenize/index.ts | 136 +++++++++++++++++
 6 files changed, 511 insertions(+)
 create mode 100644 docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc
 create mode 100644 docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts
 create mode 100644 docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts
 create mode 100644 docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts
 create mode 100644 docs/src/templates/assets/javascripts/integrations/search/internal/index.ts
 create mode 100644 docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts

(limited to 'docs/src/templates/assets/javascripts/integrations/search/internal')

diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc b/docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc
new file mode 100644
index 00000000..9368ceb6
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc
@@ -0,0 +1,6 @@
+{
+  "rules": {
+    "no-fallthrough": "off",
+    "no-underscore-dangle": "off"
+  }
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts
new file mode 100644
index 00000000..ae8f6104
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Helper types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Visitor function
+ *
+ * @param start - Start offset
+ * @param end - End offset
+ */
+type VisitorFn = (
+  start: number, end: number
+) => void
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Split a string using the given separator
+ *
+ * @param input - Input value
+ * @param separator - Separator
+ * @param fn - Visitor function
+ */
+export function split(
+  input: string, separator: RegExp, fn: VisitorFn
+): void {
+  separator = new RegExp(separator, "g")
+
+  /* Split string using separator */
+  let match: RegExpExecArray | null
+  let index = 0
+  do {
+    match = separator.exec(input)
+
+    /* Emit non-empty range */
+    const until = match?.index ?? input.length
+    if (index < until)
+      fn(index, until)
+
+    /* Update last index */
+    if (match) {
+      const [term] = match
+      index = match.index + term.length
+
+      /* Support zero-length lookaheads */
+      if (term.length === 0)
+        separator.lastIndex = match.index + 1
+    }
+  } while (match)
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts
new file mode 100644
index 00000000..2a98b9e1
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Extraction type
+ *
+ * This type defines the possible values that are encoded into the first two
+ * bits of a section that is part of the blocks of a tokenization table. There
+ * are three types of interest: HTML opening and closing tags, as well as the
+ * actual text content we need to extract for indexing.
+ */
+export const enum Extract {
+  TAG_OPEN  = 0,                       /* HTML opening tag */
+  TEXT      = 1,                       /* Text content */
+  TAG_CLOSE = 2                        /* HTML closing tag */
+}
+
+/* ----------------------------------------------------------------------------
+ * Helper types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Visitor function
+ *
+ * @param block - Block index
+ * @param type - Extraction type
+ * @param start - Start offset
+ * @param end - End offset
+ */
+type VisitorFn = (
+  block: number, type: Extract, start: number, end: number
+) => void
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Split a string into markup and text sections
+ *
+ * This function scans a string and divides it up into sections of markup and
+ * text. For each section, it invokes the given visitor function with the block
+ * index, extraction type, as well as start and end offsets. Using a visitor
+ * function (= streaming data) is ideal for minimizing pressure on the GC.
+ *
+ * @param input - Input value
+ * @param fn - Visitor function
+ */
+export function extract(
+  input: string, fn: VisitorFn
+): void {
+
+  let block = 0                        /* Current block */
+  let start = 0                        /* Current start offset */
+  let end = 0                          /* Current end offset */
+
+  /* Split string into sections */
+  for (let stack = 0; end < input.length; end++) {
+
+    /* Opening tag after non-empty section */
+    if (input.charAt(end) === "<" && end > start) {
+      fn(block, Extract.TEXT, start, start = end)
+
+    /* Closing tag */
+    } else if (input.charAt(end) === ">") {
+      if (input.charAt(start + 1) === "/") {
+        if (--stack === 0)
+          fn(block++, Extract.TAG_CLOSE, start, end + 1)
+
+      /* Tag is not self-closing */
+      } else if (input.charAt(end - 1) !== "/") {
+        if (stack++ === 0)
+          fn(block, Extract.TAG_OPEN, start, end + 1)
+      }
+
+      /* New section */
+      start = end + 1
+    }
+  }
+
+  /* Add trailing section */
+  if (end > start)
+    fn(block, Extract.TEXT, start, end)
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts
new file mode 100644
index 00000000..7cc3bf1a
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Position table
+ */
+export type PositionTable = number[][]
+
+/**
+ * Position
+ */
+export type Position = number
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Highlight all occurrences in a string
+ *
+ * This function receives a field's value (e.g. like `title` or `text`), it's
+ * position table that was generated during indexing, and the positions found
+ * when executing the query. It then highlights all occurrences, and returns
+ * their concatenation. In case of multiple blocks, two are returned.
+ *
+ * @param input - Input value
+ * @param table - Table for indexing
+ * @param positions - Occurrences
+ * @param full - Full results
+ *
+ * @returns Highlighted string value
+ */
+export function highlight(
+  input: string, table: PositionTable, positions: Position[], full = false
+): string {
+  return highlightAll([input], table, positions, full).pop()!
+}
+
+/**
+ * Highlight all occurrences in a set of strings
+ *
+ * @param inputs - Input values
+ * @param table - Table for indexing
+ * @param positions - Occurrences
+ * @param full - Full results
+ *
+ * @returns Highlighted string values
+ */
+export function highlightAll(
+  inputs: string[], table: PositionTable, positions: Position[], full = false
+): string[] {
+
+  /* Map blocks to input values */
+  const mapping = [0]
+  for (let t = 1; t < table.length; t++) {
+    const prev = table[t - 1]
+    const next = table[t]
+
+    /* Check if table points to new block */
+    const p = prev[prev.length - 1] >>> 2 & 0x3FF
+    const q = next[0]               >>> 12
+
+    /* Add block to mapping */
+    mapping.push(+(p > q) + mapping[mapping.length - 1])
+  }
+
+  /* Highlight strings one after another */
+  return inputs.map((input, i) => {
+    let cursor = 0
+
+    /* Map occurrences to blocks */
+    const blocks = new Map<number, number[]>()
+    for (const p of positions.sort((a, b) => a - b)) {
+      const index = p & 0xFFFFF
+      const block = p >>> 20
+      if (mapping[block] !== i)
+        continue
+
+      /* Ensure presence of block group */
+      let group = blocks.get(block)
+      if (typeof group === "undefined")
+        blocks.set(block, group = [])
+
+      /* Add index to group */
+      group.push(index)
+    }
+
+    /* Just return string, if no occurrences */
+    if (blocks.size === 0)
+      return input
+
+    /* Compute slices */
+    const slices: string[] = []
+    for (const [block, indexes] of blocks) {
+      const t = table[block]
+
+      /* Extract positions and length */
+      const start  = t[0]            >>> 12
+      const end    = t[t.length - 1] >>> 12
+      const length = t[t.length - 1] >>> 2 & 0x3FF
+
+      /* Add prefix, if full results are desired */
+      if (full && start > cursor)
+        slices.push(input.slice(cursor, start))
+
+      /* Extract and highlight slice */
+      let slice = input.slice(start, end + length)
+      for (const j of indexes.sort((a, b) => b - a)) {
+
+        /* Retrieve offset and length of match */
+        const p = (t[j] >>> 12) - start
+        const q = (t[j] >>> 2 & 0x3FF) + p
+
+        /* Wrap occurrence */
+        slice = [
+          slice.slice(0, p),
+          "<mark>",
+          slice.slice(p, q),
+          "</mark>",
+          slice.slice(q)
+        ].join("")
+      }
+
+      /* Update cursor */
+      cursor = end + length
+
+      /* Append slice and abort if we have two */
+      if (slices.push(slice) === 2)
+        break
+    }
+
+    /* Add suffix, if full results are desired */
+    if (full && cursor < input.length)
+      slices.push(input.slice(cursor))
+
+    /* Return highlighted slices */
+    return slices.join("")
+  })
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/index.ts
new file mode 100644
index 00000000..c752329e
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/index.ts
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+export * from "./_"
+export * from "./extract"
+export * from "./highlight"
+export * from "./tokenize"
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts
new file mode 100644
index 00000000..f5089bc9
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+import { split } from "../_"
+import {
+  Extract,
+  extract
+} from "../extract"
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Split a string or set of strings into tokens
+ *
+ * This tokenizer supersedes the default tokenizer that is provided by Lunr.js,
+ * as it is aware of HTML tags and allows for multi-character splitting.
+ *
+ * It takes the given inputs, splits each of them into markup and text sections,
+ * tokenizes and segments (if necessary) each of them, and then indexes them in
+ * a table by using a compact bit representation. Bitwise techniques are used
+ * to write and read from the table during indexing and querying.
+ *
+ * @see https://bit.ly/3W3Xw4J - Search: better, faster, smaller
+ *
+ * @param input - Input value(s)
+ *
+ * @returns Tokens
+ */
+export function tokenize(
+  input?: string | string[]
+): lunr.Token[] {
+  const tokens: lunr.Token[] = []
+  if (typeof input === "undefined")
+    return tokens
+
+  /* Tokenize strings one after another */
+  const inputs = Array.isArray(input) ? input : [input]
+  for (let i = 0; i < inputs.length; i++) {
+    const table = lunr.tokenizer.table
+    const total = table.length
+
+    /* Split string into sections and tokenize content blocks */
+    extract(inputs[i], (block, type, start, end) => {
+      table[block += total] ||= []
+      switch (type) {
+
+        /* Handle markup */
+        case Extract.TAG_OPEN:
+        case Extract.TAG_CLOSE:
+          table[block].push(
+            start       << 12 |
+            end - start <<  2 |
+            type
+          )
+          break
+
+        /* Handle text content */
+        case Extract.TEXT:
+          const section = inputs[i].slice(start, end)
+          split(section, lunr.tokenizer.separator, (index, until) => {
+
+            /**
+             * Apply segmenter after tokenization. Note that the segmenter will
+             * also split words at word boundaries, which is not what we want,
+             * so we need to check if we can somehow mitigate this behavior.
+             */
+            if (typeof lunr.segmenter !== "undefined") {
+              const subsection = section.slice(index, until)
+              if (/^[MHIK]$/.test(lunr.segmenter.ctype_(subsection))) {
+                const segments = lunr.segmenter.segment(subsection)
+                for (let s = 0, l = 0; s < segments.length; s++) {
+
+                  /* Add block to section */
+                  table[block] ||= []
+                  table[block].push(
+                    start + index + l  << 12 |
+                    segments[s].length <<  2 |
+                    type
+                  )
+
+                  /* Add token with position */
+                  tokens.push(new lunr.Token(
+                    segments[s].toLowerCase(), {
+                      position: block << 20 | table[block].length - 1
+                    }
+                  ))
+
+                  /* Keep track of length */
+                  l += segments[s].length
+                }
+                return
+              }
+            }
+
+            /* Add block to section */
+            table[block].push(
+              start + index << 12 |
+              until - index <<  2 |
+              type
+            )
+
+            /* Add token with position */
+            tokens.push(new lunr.Token(
+              section.slice(index, until).toLowerCase(), {
+                position: block << 20 | table[block].length - 1
+              }
+            ))
+          })
+      }
+    })
+  }
+
+  /* Return tokens */
+  return tokens
+}
-- 
cgit v1.2.3-70-g09d2