1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
/*
* Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/* ----------------------------------------------------------------------------
* Helper types
* ------------------------------------------------------------------------- */
/**
* Visitor function
*
* @param value - String value
*
* @returns String term(s)
*/
type VisitorFn = (
value: string
) => string | string[]
/* ----------------------------------------------------------------------------
* Functions
* ------------------------------------------------------------------------- */
/**
* Default transformation function
*
* 1. Trim excess whitespace from left and right.
*
* 2. Search for parts in quotation marks and prepend a `+` modifier to denote
* that the resulting document must contain all parts, converting the query
* to an `AND` query (as opposed to the default `OR` behavior). While users
* may expect parts enclosed in quotation marks to map to span queries, i.e.
* for which order is important, Lunr.js doesn't support them, so the best
* we can do is to convert the parts to an `AND` query.
*
* 3. Replace control characters which are not located at the beginning of the
* query or preceded by white space, or are not followed by a non-whitespace
* character or are at the end of the query string. Furthermore, filter
* unmatched quotation marks.
*
* 4. Split the query string at whitespace, then pass each part to the visitor
* function for tokenization, and append a wildcard to every resulting term
* that is not explicitly marked with a `+`, `-`, `~` or `^` modifier, since
* it ensures consistent and stable ranking when multiple terms are entered.
* Also, if a fuzzy or boost modifier are given, but no numeric value has
* been entered, default to 1 to not induce a query error.
*
* @param query - Query value
* @param fn - Visitor function
*
* @returns Transformed query value
*/
export function transform(
query: string, fn: VisitorFn = term => term
): string {
return query
/* => 1 */
.trim()
/* => 2 */
.split(/"([^"]+)"/g)
.map((parts, index) => index & 1
? parts.replace(/^\b|^(?![^\x00-\x7F]|$)|\s+/g, " +")
: parts
)
.join("")
/* => 3 */
.replace(/"|(?:^|\s+)[*+\-:^~]+(?=\s+|$)/g, "")
/* => 4 */
.split(/\s+/g)
.reduce((prev, term) => {
const next = fn(term)
return [...prev, ...Array.isArray(next) ? next : [next]]
}, [] as string[])
.map(term => /([~^]$)/.test(term) ? `${term}1` : term)
.map(term => /(^[+-]|[~^]\d+$)/.test(term) ? term : `${term}*`)
.join(" ")
}
|