aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/docs/src/templates/assets/javascripts/integrations/search/query
diff options
context:
space:
mode:
Diffstat (limited to 'docs/src/templates/assets/javascripts/integrations/search/query')
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/query/.eslintrc6
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/query/_/index.ts172
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/query/index.ts25
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/query/segment/index.ts81
-rw-r--r--docs/src/templates/assets/javascripts/integrations/search/query/transform/index.ts99
5 files changed, 383 insertions, 0 deletions
diff --git a/docs/src/templates/assets/javascripts/integrations/search/query/.eslintrc b/docs/src/templates/assets/javascripts/integrations/search/query/.eslintrc
new file mode 100644
index 00000000..3031c7e3
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/query/.eslintrc
@@ -0,0 +1,6 @@
+{
+ "rules": {
+ "no-control-regex": "off",
+ "@typescript-eslint/no-explicit-any": "off"
+ }
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/query/_/index.ts b/docs/src/templates/assets/javascripts/integrations/search/query/_/index.ts
new file mode 100644
index 00000000..14482e43
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/query/_/index.ts
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+import { split } from "../../internal"
+import { transform } from "../transform"
+
+/* ----------------------------------------------------------------------------
+ * Types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Search query clause
+ */
+export interface SearchQueryClause {
+ presence: lunr.Query.presence /* Clause presence */
+ term: string /* Clause term */
+}
+
+/* ------------------------------------------------------------------------- */
+
+/**
+ * Search query terms
+ */
+export type SearchQueryTerms = Record<string, boolean>
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Transform search query
+ *
+ * This function lexes the given search query and applies the transformation
+ * function to each term, preserving markup like `+` and `-` modifiers.
+ *
+ * @param query - Search query
+ *
+ * @returns Search query
+ */
+export function transformSearchQuery(
+ query: string
+): string {
+
+ /* Split query terms with tokenizer */
+ return transform(query, part => {
+ const terms: string[] = []
+
+ /* Initialize lexer and analyze part */
+ const lexer = new lunr.QueryLexer(part)
+ lexer.run()
+
+ /* Extract and tokenize term from lexeme */
+ for (const { type, str: term, start, end } of lexer.lexemes)
+ switch (type) {
+
+ /* Hack: remove colon - see https://bit.ly/3wD3T3I */
+ case "FIELD":
+ if (!["title", "text", "tags"].includes(term))
+ part = [
+ part.slice(0, end),
+ " ",
+ part.slice(end + 1)
+ ].join("")
+ break
+
+ /* Tokenize term */
+ case "TERM":
+ split(term, lunr.tokenizer.separator, (...range) => {
+ terms.push([
+ part.slice(0, start),
+ term.slice(...range),
+ part.slice(end)
+ ].join(""))
+ })
+ }
+
+ /* Return terms */
+ return terms
+ })
+}
+
+/* ------------------------------------------------------------------------- */
+
+/**
+ * Parse a search query for analysis
+ *
+ * Lunr.js itself has a bug where it doesn't detect or remove wildcards for
+ * query clauses, so we must do this here.
+ *
+ * @see https://bit.ly/3DpTGtz - GitHub issue
+ *
+ * @param value - Query value
+ *
+ * @returns Search query clauses
+ */
+export function parseSearchQuery(
+ value: string
+): SearchQueryClause[] {
+ const query = new lunr.Query(["title", "text", "tags"])
+ const parser = new lunr.QueryParser(value, query)
+
+ /* Parse Search query */
+ parser.parse()
+ for (const clause of query.clauses) {
+ clause.usePipeline = true
+
+ /* Handle leading wildcard */
+ if (clause.term.startsWith("*")) {
+ clause.wildcard = lunr.Query.wildcard.LEADING
+ clause.term = clause.term.slice(1)
+ }
+
+ /* Handle trailing wildcard */
+ if (clause.term.endsWith("*")) {
+ clause.wildcard = lunr.Query.wildcard.TRAILING
+ clause.term = clause.term.slice(0, -1)
+ }
+ }
+
+ /* Return query clauses */
+ return query.clauses
+}
+
+/**
+ * Analyze the search query clauses in regard to the search terms found
+ *
+ * @param query - Search query clauses
+ * @param terms - Search terms
+ *
+ * @returns Search query terms
+ */
+export function getSearchQueryTerms(
+ query: SearchQueryClause[], terms: string[]
+): SearchQueryTerms {
+ const clauses = new Set<SearchQueryClause>(query)
+
+ /* Match query clauses against terms */
+ const result: SearchQueryTerms = {}
+ for (let t = 0; t < terms.length; t++)
+ for (const clause of clauses)
+ if (terms[t].startsWith(clause.term)) {
+ result[clause.term] = true
+ clauses.delete(clause)
+ }
+
+ /* Annotate unmatched non-stopword query clauses */
+ for (const clause of clauses)
+ if (lunr.stopWordFilter?.(clause.term))
+ result[clause.term] = false
+
+ /* Return query terms */
+ return result
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/query/index.ts b/docs/src/templates/assets/javascripts/integrations/search/query/index.ts
new file mode 100644
index 00000000..763e2fd4
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/query/index.ts
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+export * from "./_"
+export * from "./segment"
+export * from "./transform"
diff --git a/docs/src/templates/assets/javascripts/integrations/search/query/segment/index.ts b/docs/src/templates/assets/javascripts/integrations/search/query/segment/index.ts
new file mode 100644
index 00000000..b96796f4
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/query/segment/index.ts
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Segment a search query using the inverted index
+ *
+ * This function implements a clever approach to text segmentation for Asian
+ * languages, as it used the information already available in the search index.
+ * The idea is to greedily segment the search query based on the tokens that are
+ * already part of the index, as described in the linked issue.
+ *
+ * @see https://bit.ly/3lwjrk7 - GitHub issue
+ *
+ * @param query - Query value
+ * @param index - Inverted index
+ *
+ * @returns Segmented query value
+ */
+export function segment(
+ query: string, index: object
+): Iterable<string> {
+ const segments = new Set<string>()
+
+ /* Segment search query */
+ const wordcuts = new Uint16Array(query.length)
+ for (let i = 0; i < query.length; i++)
+ for (let j = i + 1; j < query.length; j++) {
+ const value = query.slice(i, j)
+ if (value in index)
+ wordcuts[i] = j - i
+ }
+
+ /* Compute longest matches with minimum overlap */
+ const stack = [0]
+ for (let s = stack.length; s > 0;) {
+ const p = stack[--s]
+ for (let q = 1; q < wordcuts[p]; q++)
+ if (wordcuts[p + q] > wordcuts[p] - q) {
+ segments.add(query.slice(p, p + q))
+ stack[s++] = p + q
+ }
+
+ /* Continue at end of query string */
+ const q = p + wordcuts[p]
+ if (wordcuts[q] && q < query.length - 1)
+ stack[s++] = q
+
+ /* Add current segment */
+ segments.add(query.slice(p, q))
+ }
+
+ // @todo fix this case in the code block above, this is a hotfix
+ if (segments.has(""))
+ return new Set([query])
+
+ /* Return segmented query value */
+ return segments
+}
diff --git a/docs/src/templates/assets/javascripts/integrations/search/query/transform/index.ts b/docs/src/templates/assets/javascripts/integrations/search/query/transform/index.ts
new file mode 100644
index 00000000..41497786
--- /dev/null
+++ b/docs/src/templates/assets/javascripts/integrations/search/query/transform/index.ts
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* ----------------------------------------------------------------------------
+ * Helper types
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Visitor function
+ *
+ * @param value - String value
+ *
+ * @returns String term(s)
+ */
+type VisitorFn = (
+ value: string
+) => string | string[]
+
+/* ----------------------------------------------------------------------------
+ * Functions
+ * ------------------------------------------------------------------------- */
+
+/**
+ * Default transformation function
+ *
+ * 1. Trim excess whitespace from left and right.
+ *
+ * 2. Search for parts in quotation marks and prepend a `+` modifier to denote
+ * that the resulting document must contain all parts, converting the query
+ * to an `AND` query (as opposed to the default `OR` behavior). While users
+ * may expect parts enclosed in quotation marks to map to span queries, i.e.
+ * for which order is important, Lunr.js doesn't support them, so the best
+ * we can do is to convert the parts to an `AND` query.
+ *
+ * 3. Replace control characters which are not located at the beginning of the
+ * query or preceded by white space, or are not followed by a non-whitespace
+ * character or are at the end of the query string. Furthermore, filter
+ * unmatched quotation marks.
+ *
+ * 4. Split the query string at whitespace, then pass each part to the visitor
+ * function for tokenization, and append a wildcard to every resulting term
+ * that is not explicitly marked with a `+`, `-`, `~` or `^` modifier, since
+ * it ensures consistent and stable ranking when multiple terms are entered.
+ * Also, if a fuzzy or boost modifier are given, but no numeric value has
+ * been entered, default to 1 to not induce a query error.
+ *
+ * @param query - Query value
+ * @param fn - Visitor function
+ *
+ * @returns Transformed query value
+ */
+export function transform(
+ query: string, fn: VisitorFn = term => term
+): string {
+ return query
+
+ /* => 1 */
+ .trim()
+
+ /* => 2 */
+ .split(/"([^"]+)"/g)
+ .map((parts, index) => index & 1
+ ? parts.replace(/^\b|^(?![^\x00-\x7F]|$)|\s+/g, " +")
+ : parts
+ )
+ .join("")
+
+ /* => 3 */
+ .replace(/"|(?:^|\s+)[*+\-:^~]+(?=\s+|$)/g, "")
+
+ /* => 4 */
+ .split(/\s+/g)
+ .reduce((prev, term) => {
+ const next = fn(term)
+ return [...prev, ...Array.isArray(next) ? next : [next]]
+ }, [] as string[])
+ .map(term => /([~^]$)/.test(term) ? `${term}1` : term)
+ .map(term => /(^[+-]|[~^]\d+$)/.test(term) ? term : `${term}*`)
+ .join(" ")
+}