aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/docs/src/templates/assets/javascripts/integrations/search/internal
diff options
context:
space:
mode:
Diffstat (limited to 'docs/src/templates/assets/javascripts/integrations/search/internal')
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc6
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts74
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts107
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts162
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/internal/index.ts26
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts136
6 files changed, 511 insertions, 0 deletions
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc b/docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc
new file mode 100644
index 00000000..9368ceb6
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/.eslintrc
@@ -0,0 +1,6 @@
+{
+ "rules": {
+ "no-fallthrough": "off",
+ "no-underscore-dangle": "off"
+ }
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts
new file mode 100644
index 00000000..ae8f6104
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/_/index.ts
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Helper types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Visitor function
+ *
+ * @param start - Start offset
+ * @param end - End offset
+ */
+type VisitorFn = (
+ start: number, end: number
+) => void
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Split a string using the given separator
+ *
+ * @param input - Input value
+ * @param separator - Separator
+ * @param fn - Visitor function
+ */
+export function split(
+ input: string, separator: RegExp, fn: VisitorFn
+): void {
+ separator = new RegExp(separator, "g")
+
+ /* Split string using separator */
+ let match: RegExpExecArray | null
+ let index = 0
+ do {
+ match = separator.exec(input)
+
+ /* Emit non-empty range */
+ const until = match?.index ?? input.length
+ if (index < until)
+ fn(index, until)
+
+ /* Update last index */
+ if (match) {
+ const [term] = match
+ index = match.index + term.length
+
+ /* Support zero-length lookaheads */
+ if (term.length === 0)
+ separator.lastIndex = match.index + 1
+ }
+ } while (match)
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts
new file mode 100644
index 00000000..2a98b9e1
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Extraction type
+ *
+ * This type defines the possible values that are encoded into the first two
+ * bits of a section that is part of the blocks of a tokenization table. There
+ * are three types of interest: HTML opening and closing tags, as well as the
+ * actual text content we need to extract for indexing.
+ */
+export const enum Extract {
+ TAG_OPEN = 0, /* HTML opening tag */
+ TEXT = 1, /* Text content */
+ TAG_CLOSE = 2 /* HTML closing tag */
+}
+
+/* ----------------------------------------------------------------------------
+ * Helper types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Visitor function
+ *
+ * @param block - Block index
+ * @param type - Extraction type
+ * @param start - Start offset
+ * @param end - End offset
+ */
+type VisitorFn = (
+ block: number, type: Extract, start: number, end: number
+) => void
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Split a string into markup and text sections
+ *
+ * This function scans a string and divides it up into sections of markup and
+ * text. For each section, it invokes the given visitor function with the block
+ * index, extraction type, as well as start and end offsets. Using a visitor
+ * function (= streaming data) is ideal for minimizing pressure on the GC.
+ *
+ * @param input - Input value
+ * @param fn - Visitor function
+ */
+export function extract(
+ input: string, fn: VisitorFn
+): void {
+
+ let block = 0 /* Current block */
+ let start = 0 /* Current start offset */
+ let end = 0 /* Current end offset */
+
+ /* Split string into sections */
+ for (let stack = 0; end < input.length; end++) {
+
+ /* Opening tag after non-empty section */
+ if (input.charAt(end) === "<" && end > start) {
+ fn(block, Extract.TEXT, start, start = end)
+
+ /* Closing tag */
+ } else if (input.charAt(end) === ">") {
+ if (input.charAt(start + 1) === "/") {
+ if (--stack === 0)
+ fn(block++, Extract.TAG_CLOSE, start, end + 1)
+
+ /* Tag is not self-closing */
+ } else if (input.charAt(end - 1) !== "/") {
+ if (stack++ === 0)
+ fn(block, Extract.TAG_OPEN, start, end + 1)
+ }
+
+ /* New section */
+ start = end + 1
+ }
+ }
+
+ /* Add trailing section */
+ if (end > start)
+ fn(block, Extract.TEXT, start, end)
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts
new file mode 100644
index 00000000..7cc3bf1a
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Position table
+ */
+export type PositionTable = number[][]
+
+/**
+ * Position
+ */
+export type Position = number
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Highlight all occurrences in a string
+ *
+ * This function receives a field's value (e.g. like `title` or `text`), it's
+ * position table that was generated during indexing, and the positions found
+ * when executing the query. It then highlights all occurrences, and returns
+ * their concatenation. In case of multiple blocks, two are returned.
+ *
+ * @param input - Input value
+ * @param table - Table for indexing
+ * @param positions - Occurrences
+ * @param full - Full results
+ *
+ * @returns Highlighted string value
+ */
+export function highlight(
+ input: string, table: PositionTable, positions: Position[], full = false
+): string {
+ return highlightAll([input], table, positions, full).pop()!
+}
+
+/**
+ * Highlight all occurrences in a set of strings
+ *
+ * @param inputs - Input values
+ * @param table - Table for indexing
+ * @param positions - Occurrences
+ * @param full - Full results
+ *
+ * @returns Highlighted string values
+ */
+export function highlightAll(
+ inputs: string[], table: PositionTable, positions: Position[], full = false
+): string[] {
+
+ /* Map blocks to input values */
+ const mapping = [0]
+ for (let t = 1; t < table.length; t++) {
+ const prev = table[t - 1]
+ const next = table[t]
+
+ /* Check if table points to new block */
+ const p = prev[prev.length - 1] >>> 2 & 0x3FF
+ const q = next[0] >>> 12
+
+ /* Add block to mapping */
+ mapping.push(+(p > q) + mapping[mapping.length - 1])
+ }
+
+ /* Highlight strings one after another */
+ return inputs.map((input, i) => {
+ let cursor = 0
+
+ /* Map occurrences to blocks */
+ const blocks = new Map<number, number[]>()
+ for (const p of positions.sort((a, b) => a - b)) {
+ const index = p & 0xFFFFF
+ const block = p >>> 20
+ if (mapping[block] !== i)
+ continue
+
+ /* Ensure presence of block group */
+ let group = blocks.get(block)
+ if (typeof group === "undefined")
+ blocks.set(block, group = [])
+
+ /* Add index to group */
+ group.push(index)
+ }
+
+ /* Just return string, if no occurrences */
+ if (blocks.size === 0)
+ return input
+
+ /* Compute slices */
+ const slices: string[] = []
+ for (const [block, indexes] of blocks) {
+ const t = table[block]
+
+ /* Extract positions and length */
+ const start = t[0] >>> 12
+ const end = t[t.length - 1] >>> 12
+ const length = t[t.length - 1] >>> 2 & 0x3FF
+
+ /* Add prefix, if full results are desired */
+ if (full && start > cursor)
+ slices.push(input.slice(cursor, start))
+
+ /* Extract and highlight slice */
+ let slice = input.slice(start, end + length)
+ for (const j of indexes.sort((a, b) => b - a)) {
+
+ /* Retrieve offset and length of match */
+ const p = (t[j] >>> 12) - start
+ const q = (t[j] >>> 2 & 0x3FF) + p
+
+ /* Wrap occurrence */
+ slice = [
+ slice.slice(0, p),
+ "<mark>",
+ slice.slice(p, q),
+ "</mark>",
+ slice.slice(q)
+ ].join("")
+ }
+
+ /* Update cursor */
+ cursor = end + length
+
+ /* Append slice and abort if we have two */
+ if (slices.push(slice) === 2)
+ break
+ }
+
+ /* Add suffix, if full results are desired */
+ if (full && cursor < input.length)
+ slices.push(input.slice(cursor))
+
+ /* Return highlighted slices */
+ return slices.join("")
+ })
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/index.ts
new file mode 100644
index 00000000..c752329e
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/index.ts
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+export * from "./_"
+export * from "./extract"
+export * from "./highlight"
+export * from "./tokenize"
diff --git a/docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts b/docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts
new file mode 100644
index 00000000..f5089bc9
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+import { split } from "../_"
+import {
+ Extract,
+ extract
+} from "../extract"
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Split a string or set of strings into tokens
+ *
+ * This tokenizer supersedes the default tokenizer that is provided by Lunr.js,
+ * as it is aware of HTML tags and allows for multi-character splitting.
+ *
+ * It takes the given inputs, splits each of them into markup and text sections,
+ * tokenizes and segments (if necessary) each of them, and then indexes them in
+ * a table by using a compact bit representation. Bitwise techniques are used
+ * to write and read from the table during indexing and querying.
+ *
+ * @see https://bit.ly/3W3Xw4J - Search: better, faster, smaller
+ *
+ * @param input - Input value(s)
+ *
+ * @returns Tokens
+ */
+export function tokenize(
+ input?: string | string[]
+): lunr.Token[] {
+ const tokens: lunr.Token[] = []
+ if (typeof input === "undefined")
+ return tokens
+
+ /* Tokenize strings one after another */
+ const inputs = Array.isArray(input) ? input : [input]
+ for (let i = 0; i < inputs.length; i++) {
+ const table = lunr.tokenizer.table
+ const total = table.length
+
+ /* Split string into sections and tokenize content blocks */
+ extract(inputs[i], (block, type, start, end) => {
+ table[block += total] ||= []
+ switch (type) {
+
+ /* Handle markup */
+ case Extract.TAG_OPEN:
+ case Extract.TAG_CLOSE:
+ table[block].push(
+ start << 12 |
+ end - start << 2 |
+ type
+ )
+ break
+
+ /* Handle text content */
+ case Extract.TEXT:
+ const section = inputs[i].slice(start, end)
+ split(section, lunr.tokenizer.separator, (index, until) => {
+
+ /**
+ * Apply segmenter after tokenization. Note that the segmenter will
+ * also split words at word boundaries, which is not what we want,
+ * so we need to check if we can somehow mitigate this behavior.
+ */
+ if (typeof lunr.segmenter !== "undefined") {
+ const subsection = section.slice(index, until)
+ if (/^[MHIK]$/.test(lunr.segmenter.ctype_(subsection))) {
+ const segments = lunr.segmenter.segment(subsection)
+ for (let s = 0, l = 0; s < segments.length; s++) {
+
+ /* Add block to section */
+ table[block] ||= []
+ table[block].push(
+ start + index + l << 12 |
+ segments[s].length << 2 |
+ type
+ )
+
+ /* Add token with position */
+ tokens.push(new lunr.Token(
+ segments[s].toLowerCase(), {
+ position: block << 20 | table[block].length - 1
+ }
+ ))
+
+ /* Keep track of length */
+ l += segments[s].length
+ }
+ return
+ }
+ }
+
+ /* Add block to section */
+ table[block].push(
+ start + index << 12 |
+ until - index << 2 |
+ type
+ )
+
+ /* Add token with position */
+ tokens.push(new lunr.Token(
+ section.slice(index, until).toLowerCase(), {
+ position: block << 20 | table[block].length - 1
+ }
+ ))
+ })
+ }
+ })
+ }
+
+ /* Return tokens */
+ return tokens
+}