diff options
Diffstat (limited to 'src/templates/assets/javascripts/integrations')
27 files changed, 2766 insertions, 0 deletions
diff --git a/src/templates/assets/javascripts/integrations/clipboard/index.ts b/src/templates/assets/javascripts/integrations/clipboard/index.ts new file mode 100644 index 00000000..cf46f601 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/clipboard/index.ts @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import ClipboardJS from "clipboard" +import { + Observable, + Subject, + map, + tap +} from "rxjs" + +import { translation } from "~/_" +import { getElement } from "~/browser" + +/* ---------------------------------------------------------------------------- + * Helper types + * ------------------------------------------------------------------------- */ + +/** + * Setup options + */ +interface SetupOptions { + alert$: Subject<string> /* Alert subject */ +} + +/* ---------------------------------------------------------------------------- + * Helper functions + * ------------------------------------------------------------------------- */ + +/** + * Extract text to copy + * + * @param el - HTML element + * + * @returns Extracted text + */ +function extract(el: HTMLElement): string { + el.setAttribute("data-md-copying", "") + const copy = el.closest("[data-copy]") + const text = copy + ? copy.getAttribute("data-copy")! + : el.innerText + el.removeAttribute("data-md-copying") + return text +} + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Set up Clipboard.js integration + * + * @param options - Options + */ +export function setupClipboardJS( + { alert$ }: SetupOptions +): void { + if (ClipboardJS.isSupported()) { + new Observable<ClipboardJS.Event>(subscriber => { + new ClipboardJS("[data-clipboard-target], [data-clipboard-text]", { + text: el => ( + el.getAttribute("data-clipboard-text")! || + extract(getElement( + el.getAttribute("data-clipboard-target")! + )) + ) + }) + .on("success", ev => subscriber.next(ev)) + }) + .pipe( + tap(ev => { + const trigger = ev.trigger as HTMLElement + trigger.focus() + }), + map(() => translation("clipboard.copied")) + ) + .subscribe(alert$) + } +} diff --git a/src/templates/assets/javascripts/integrations/index.ts b/src/templates/assets/javascripts/integrations/index.ts new file mode 100644 index 00000000..5d91a9d5 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/index.ts @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +export * from "./clipboard" +export * from "./instant" +export * from "./search" +export * from "./sitemap" +export * from "./version" diff --git a/src/templates/assets/javascripts/integrations/instant/.eslintrc b/src/templates/assets/javascripts/integrations/instant/.eslintrc new file mode 100644 index 00000000..5adf108a --- /dev/null +++ b/src/templates/assets/javascripts/integrations/instant/.eslintrc @@ -0,0 +1,6 @@ +{ + "rules": { + "no-self-assign": "off", + "no-null/no-null": "off" + } +} diff --git a/src/templates/assets/javascripts/integrations/instant/index.ts b/src/templates/assets/javascripts/integrations/instant/index.ts new file mode 100644 index 00000000..d321b751 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/instant/index.ts @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { + EMPTY, + Observable, + Subject, + bufferCount, + catchError, + concat, + debounceTime, + distinctUntilKeyChanged, + endWith, + filter, + fromEvent, + ignoreElements, + map, + of, + sample, + share, + skip, + startWith, + switchMap, + take, + withLatestFrom +} from "rxjs" + +import { configuration, feature } from "~/_" +import { + Viewport, + getElement, + getElements, + getLocation, + getOptionalElement, + request, + setLocation, + setLocationHash +} from "~/browser" +import { getComponentElement } from "~/components" + +import { fetchSitemap } from "../sitemap" + +/* ---------------------------------------------------------------------------- + * Helper types + * ------------------------------------------------------------------------- */ + +/** + * Setup options + */ +interface SetupOptions { + location$: Subject<URL> // Location subject + viewport$: Observable<Viewport> // Viewport observable + progress$: Subject<number> // Progress suject +} + +/* ---------------------------------------------------------------------------- + * Helper functions + * ------------------------------------------------------------------------- */ + +/** + * Create a map of head elements for lookup and replacement + * + * @param head - Document head + * + * @returns Element map + */ +function lookup(head: HTMLHeadElement): Map<string, HTMLElement> { + + // @todo When resolving URLs, we must make sure to use the correct base for + // resolution. The next time we refactor instant loading, we should use the + // location subject as a source, which is also used for anchor links tracking, + // but for now we just rely on canonical. + const canonical = getElement<HTMLLinkElement>("[rel=canonical]", head) + canonical.href = canonical.href.replace("//localhost:", "//127.0.0.1") + + // Create tag map and index elements in head + const tags = new Map<string, HTMLElement>() + for (const el of getElements(":scope > *", head)) { + let html = el.outerHTML + + // If the current element is a style sheet or script, we must resolve the + // URL relative to the current location and make it absolute, so it's easy + // to deduplicate it later on by comparing the outer HTML of tags. We must + // keep identical style sheets and scripts without replacing them. + for (const key of ["href", "src"]) { + const value = el.getAttribute(key)! + if (value === null) + continue + + // Resolve URL relative to current location + const url = new URL(value, canonical.href) + const ref = el.cloneNode() as HTMLElement + + // Set resolved URL and retrieve HTML for deduplication + ref.setAttribute(key, `${url}`) + html = ref.outerHTML + break + } + + // Index element in tag map + tags.set(html, el) + } + + // Return tag map + return tags +} + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Set up instant navigation + * + * This is a heavily orchestrated operation - see inline comments to learn how + * this works with Material for MkDocs, and how you can hook into it. + * + * @param options - Options + * + * @returns Document observable + */ +export function setupInstantNavigation( + { location$, viewport$, progress$ }: SetupOptions +): Observable<Document> { + const config = configuration() + if (location.protocol === "file:") + return EMPTY + + // Load sitemap immediately, so we have it available when the user initiates + // the first instant navigation request, and canonicalize URLs to the current + // base URL. The base URL will remain stable in between loads, as it's only + // read at the first initialization of the application. + const sitemap$ = fetchSitemap() + .pipe( + map(paths => paths.map(path => `${new URL(path, config.base)}`)) + ) + + // Intercept inter-site navigation - to keep the number of event listeners + // low we use the fact that uncaptured events bubble up to the body. This also + // has the nice property that we don't need to detach and then again attach + // event listeners when instant navigation occurs. + const instant$ = fromEvent<MouseEvent>(document.body, "click") + .pipe( + withLatestFrom(sitemap$), + switchMap(([ev, sitemap]) => { + if (!(ev.target instanceof Element)) + return EMPTY + + // Skip, as target is not within a link - clicks on non-link elements + // are also captured, which we need to exclude from processing + const el = ev.target.closest("a") + if (el === null) + return EMPTY + + // Skip, as link opens in new window - we now know we have captured a + // click on a link, but the link either has a `target` property defined, + // or the user pressed the `meta` or `ctrl` key to open it in a new + // window. Thus, we need to filter those events, too. + if (el.target || ev.metaKey || ev.ctrlKey) + return EMPTY + + // Next, we must check if the URL is relevant for us, i.e., if it's an + // internal link to a page that is managed by MkDocs. Only then we can + // be sure that the structure of the page to be loaded adheres to the + // current document structure and can subsequently be injected into it + // without doing a full reload. For this reason, we must canonicalize + // the URL by removing all search parameters and hash fragments. + const url = new URL(el.href) + url.search = url.hash = "" + + // Skip, if URL is not included in the sitemap - this could be the case + // when linking between versions or languages, or to another page that + // the author included as part of the build, but that is not managed by + // MkDocs. In that case we must not continue with instant navigation. + if (!sitemap.includes(`${url}`)) + return EMPTY + + // We now know that we have a link to an internal page, so we prevent + // the browser from navigation and emit the URL for instant navigation. + // Note that this also includes anchor links, which means we need to + // implement anchor positioning ourselves. The reason for this is that + // if we wouldn't manage anchor links as well, scroll restoration will + // not work correctly (e.g. following an anchor link and scrolling). + ev.preventDefault() + return of(new URL(el.href)) + }), + share() + ) + + // Before fetching for the first time, resolve the absolute favicon position, + // as the browser will try to fetch the icon immediately + instant$.pipe(take(1)) + .subscribe(() => { + const favicon = getOptionalElement<HTMLLinkElement>("link[rel=icon]") + if (typeof favicon !== "undefined") + favicon.href = favicon.href + }) + + // Enable scroll restoration before window unloads - this is essential to + // ensure that full reloads (F5) restore the viewport offset correctly. If + // only popstate events wouldn't reset the scroll position prior to their + // emission, we could just reset this in popstate. Meh. + fromEvent(window, "beforeunload") + .subscribe(() => { + history.scrollRestoration = "auto" + }) + + // When an instant navigation event occurs, disable scroll restoration, since + // we must normalize and synchronize the behavior across all browsers. For + // instance, when the user clicks the back or forward button, the browser + // would immediately jump to the position of the previous document. + instant$.pipe(withLatestFrom(viewport$)) + .subscribe(([url, { offset }]) => { + history.scrollRestoration = "manual" + + // While it would be better UX to defer the history state change until the + // document was fully fetched and parsed, we must schedule it here, since + // popstate events are emitted when history state changes happen. Moreover + // we need to back up the current viewport offset, so we can restore it + // when popstate events occur, e.g., when the browser's back and forward + // buttons are used for navigation. + history.replaceState(offset, "") + history.pushState(null, "", url) + }) + + // Emit URL that should be fetched via instant navigation on location subject, + // which was passed into this function. Instant navigation can be intercepted + // by other parts of the application, which can synchronously back up or + // restore state before instant navigation happens. + instant$.subscribe(location$) + + // Fetch document - when fetching, we could use `responseType: document`, but + // since all MkDocs links are relative, we need to make sure that the current + // location matches the document we just loaded. Otherwise any relative links + // in the document might use the old location. If the request fails for some + // reason, we fall back to regular navigation and set the location explicitly, + // which will force-load the page. Furthermore, we must pre-warm the buffer + // for the duplicate check, or the first click on an anchor link will also + // trigger an instant navigation event, which doesn't make sense. + const response$ = location$ + .pipe( + startWith(getLocation()), + distinctUntilKeyChanged("pathname"), + skip(1), + switchMap(url => request(url, { progress$ }) + .pipe( + catchError(() => { + setLocation(url, true) + return EMPTY + }) + ) + ) + ) + + // Initialize the DOM parser, parse the returned HTML, and replace selected + // components before handing control down to the application + const dom = new DOMParser() + const document$ = response$ + .pipe( + switchMap(res => res.text()), + switchMap(res => { + const next = dom.parseFromString(res, "text/html") + for (const selector of [ + "[data-md-component=announce]", + "[data-md-component=container]", + "[data-md-component=header-topic]", + "[data-md-component=outdated]", + "[data-md-component=logo]", + "[data-md-component=skip]", + ...feature("navigation.tabs.sticky") + ? ["[data-md-component=tabs]"] + : [] + ]) { + const source = getOptionalElement(selector) + const target = getOptionalElement(selector, next) + if ( + typeof source !== "undefined" && + typeof target !== "undefined" + ) { + source.replaceWith(target) + } + } + + // Update meta tags + const source = lookup(document.head) + const target = lookup(next.head) + for (const [html, el] of target) { + + // Hack: skip stylesheets and scripts until we manage to replace them + // entirely in order to omit flashes of white content @todo refactor + if ( + el.getAttribute("rel") === "stylesheet" || + el.hasAttribute("src") + ) + continue + + if (source.has(html)) { + source.delete(html) + } else { + document.head.appendChild(el) + } + } + + // Remove meta tags that are not present in the new document + for (const el of source.values()) + + // Hack: skip stylesheets and scripts until we manage to replace them + // entirely in order to omit flashes of white content @todo refactor + if ( + el.getAttribute("rel") === "stylesheet" || + el.hasAttribute("src") + ) + continue + else + el.remove() + + // After components and meta tags were replaced, re-evaluate scripts + // that were provided by the author as part of Markdown files + const container = getComponentElement("container") + return concat(getElements("script", container)) + .pipe( + switchMap(el => { + const script = next.createElement("script") + if (el.src) { + for (const name of el.getAttributeNames()) + script.setAttribute(name, el.getAttribute(name)!) + el.replaceWith(script) + + // Complete when script is loaded + return new Observable(observer => { + script.onload = () => observer.complete() + }) + + // Complete immediately + } else { + script.textContent = el.textContent + el.replaceWith(script) + return EMPTY + } + }), + ignoreElements(), + endWith(next) + ) + }), + share() + ) + + // Intercept popstate events, e.g. when using the browser's back and forward + // buttons, and emit new location for fetching and parsing + const popstate$ = fromEvent<PopStateEvent>(window, "popstate") + popstate$.pipe(map(getLocation)) + .subscribe(location$) + + // Intercept clicks on anchor links, and scroll document into position - as + // we disabled scroll restoration, we need to do this manually here + location$ + .pipe( + startWith(getLocation()), + bufferCount(2, 1), + filter(([prev, next]) => ( + prev.pathname === next.pathname && + prev.hash !== next.hash + )), + map(([, next]) => next) + ) + .subscribe(url => { + if (history.state !== null || !url.hash) { + window.scrollTo(0, history.state?.y ?? 0) + } else { + history.scrollRestoration = "auto" + setLocationHash(url.hash) + history.scrollRestoration = "manual" + } + }) + + // Intercept clicks on the same anchor link - we must use a distinct pipeline + // for this, or we'd end up in a loop, setting the hash again and again + location$ + .pipe( + sample(instant$), + startWith(getLocation()), + bufferCount(2, 1), + filter(([prev, next]) => ( + prev.pathname === next.pathname && + prev.hash === next.hash + )), + map(([, next]) => next) + ) + .subscribe(url => { + history.scrollRestoration = "auto" + setLocationHash(url.hash) + history.scrollRestoration = "manual" + + // Hack: we need to make sure that we don't end up with multiple history + // entries for the same anchor link, so we just remove the last entry + history.back() + }) + + // After parsing the document, check if the current history entry has a state. + // This may happen when users press the back or forward button to visit a page + // that was already seen. If there's no state, it means a new page was visited + // and we should scroll to the top, unless an anchor is given. + document$.pipe(withLatestFrom(location$)) + .subscribe(([, url]) => { + if (history.state !== null || !url.hash) { + window.scrollTo(0, history.state?.y ?? 0) + } else { + setLocationHash(url.hash) + } + }) + + // If the current history is not empty, register an event listener updating + // the current history state whenever the scroll position changes. This must + // be debounced and cannot be done in popstate, as popstate has already + // removed the entry from the history. + viewport$ + .pipe( + distinctUntilKeyChanged("offset"), + debounceTime(100) + ) + .subscribe(({ offset }) => { + history.replaceState(offset, "") + }) + + // Return document + return document$ +} diff --git a/src/templates/assets/javascripts/integrations/search/_/index.ts b/src/templates/assets/javascripts/integrations/search/_/index.ts new file mode 100644 index 00000000..0e217fa4 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/_/index.ts @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { + SearchDocument, + SearchIndex, + SearchOptions, + setupSearchDocumentMap +} from "../config" +import { + Position, + PositionTable, + highlight, + highlightAll, + tokenize +} from "../internal" +import { + SearchQueryTerms, + getSearchQueryTerms, + parseSearchQuery, + segment, + transformSearchQuery +} from "../query" + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Search item + */ +export interface SearchItem + extends SearchDocument +{ + score: number /* Score (relevance) */ + terms: SearchQueryTerms /* Search query terms */ +} + +/** + * Search result + */ +export interface SearchResult { + items: SearchItem[][] /* Search items */ + suggest?: string[] /* Search suggestions */ +} + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Create field extractor factory + * + * @param table - Position table map + * + * @returns Extractor factory + */ +function extractor(table: Map<string, PositionTable>) { + return (name: keyof SearchDocument) => { + return (doc: SearchDocument) => { + if (typeof doc[name] === "undefined") + return undefined + + /* Compute identifier and initialize table */ + const id = [doc.location, name].join(":") + table.set(id, lunr.tokenizer.table = []) + + /* Return field value */ + return doc[name] + } + } +} + +/** + * Compute the difference of two lists of strings + * + * @param a - 1st list of strings + * @param b - 2nd list of strings + * + * @returns Difference + */ +function difference(a: string[], b: string[]): string[] { + const [x, y] = [new Set(a), new Set(b)] + return [ + ...new Set([...x].filter(value => !y.has(value))) + ] +} + +/* ---------------------------------------------------------------------------- + * Class + * ------------------------------------------------------------------------- */ + +/** + * Search index + */ +export class Search { + + /** + * Search document map + */ + protected map: Map<string, SearchDocument> + + /** + * Search options + */ + protected options: SearchOptions + + /** + * The underlying Lunr.js search index + */ + protected index: lunr.Index + + /** + * Internal position table map + */ + protected table: Map<string, PositionTable> + + /** + * Create the search integration + * + * @param data - Search index + */ + public constructor({ config, docs, options }: SearchIndex) { + const field = extractor(this.table = new Map()) + + /* Set up document map and options */ + this.map = setupSearchDocumentMap(docs) + this.options = options + + /* Set up document index */ + this.index = lunr(function () { + this.metadataWhitelist = ["position"] + this.b(0) + + /* Set up (multi-)language support */ + if (config.lang.length === 1 && config.lang[0] !== "en") { + // @ts-expect-error - namespace indexing not supported + this.use(lunr[config.lang[0]]) + } else if (config.lang.length > 1) { + this.use(lunr.multiLanguage(...config.lang)) + } + + /* Set up custom tokenizer (must be after language setup) */ + this.tokenizer = tokenize as typeof lunr.tokenizer + lunr.tokenizer.separator = new RegExp(config.separator) + + /* Set up custom segmenter, if loaded */ + lunr.segmenter = "TinySegmenter" in lunr + ? new lunr.TinySegmenter() + : undefined + + /* Compute functions to be removed from the pipeline */ + const fns = difference([ + "trimmer", "stopWordFilter", "stemmer" + ], config.pipeline) + + /* Remove functions from the pipeline for registered languages */ + for (const lang of config.lang.map(language => ( + // @ts-expect-error - namespace indexing not supported + language === "en" ? lunr : lunr[language] + ))) + for (const fn of fns) { + this.pipeline.remove(lang[fn]) + this.searchPipeline.remove(lang[fn]) + } + + /* Set up index reference */ + this.ref("location") + + /* Set up index fields */ + this.field("title", { boost: 1e3, extractor: field("title") }) + this.field("text", { boost: 1e0, extractor: field("text") }) + this.field("tags", { boost: 1e6, extractor: field("tags") }) + + /* Add documents to index */ + for (const doc of docs) + this.add(doc, { boost: doc.boost }) + }) + } + + /** + * Search for matching documents + * + * @param query - Search query + * + * @returns Search result + */ + public search(query: string): SearchResult { + + // Experimental Chinese segmentation + query = query.replace(/\p{sc=Han}+/gu, value => { + return [...segment(value, this.index.invertedIndex)] + .join("* ") + }) + + // @todo: move segmenter (above) into transformSearchQuery + query = transformSearchQuery(query) + if (!query) + return { items: [] } + + /* Parse query to extract clauses for analysis */ + const clauses = parseSearchQuery(query) + .filter(clause => ( + clause.presence !== lunr.Query.presence.PROHIBITED + )) + + /* Perform search and post-process results */ + const groups = this.index.search(query) + + /* Apply post-query boosts based on title and search query terms */ + .reduce<SearchItem[]>((item, { ref, score, matchData }) => { + let doc = this.map.get(ref) + if (typeof doc !== "undefined") { + + /* Shallow copy document */ + doc = { ...doc } + if (doc.tags) + doc.tags = [...doc.tags] + + /* Compute and analyze search query terms */ + const terms = getSearchQueryTerms( + clauses, + Object.keys(matchData.metadata) + ) + + /* Highlight matches in fields */ + for (const field of this.index.fields) { + if (typeof doc[field] === "undefined") + continue + + /* Collect positions from matches */ + const positions: Position[] = [] + for (const match of Object.values(matchData.metadata)) + if (typeof match[field] !== "undefined") + positions.push(...match[field].position) + + /* Skip highlighting, if no positions were collected */ + if (!positions.length) + continue + + /* Load table and determine highlighting method */ + const table = this.table.get([doc.location, field].join(":"))! + const fn = Array.isArray(doc[field]) + ? highlightAll + : highlight + + // @ts-expect-error - stop moaning, TypeScript! + doc[field] = fn(doc[field], table, positions, field !== "text") + } + + /* Highlight title and text and apply post-query boosts */ + const boost = +!doc.parent + + Object.values(terms) + .filter(t => t).length / + Object.keys(terms).length + + /* Append item */ + item.push({ + ...doc, + score: score * (1 + boost ** 2), + terms + }) + } + return item + }, []) + + /* Sort search results again after applying boosts */ + .sort((a, b) => b.score - a.score) + + /* Group search results by article */ + .reduce((items, result) => { + const doc = this.map.get(result.location) + if (typeof doc !== "undefined") { + const ref = doc.parent + ? doc.parent.location + : doc.location + items.set(ref, [...items.get(ref) || [], result]) + } + return items + }, new Map<string, SearchItem[]>()) + + /* Ensure that every item set has an article */ + for (const [ref, items] of groups) + if (!items.find(item => item.location === ref)) { + const doc = this.map.get(ref)! + items.push({ ...doc, score: 0, terms: {} }) + } + + /* Generate search suggestions, if desired */ + let suggest: string[] | undefined + if (this.options.suggest) { + const titles = this.index.query(builder => { + for (const clause of clauses) + builder.term(clause.term, { + fields: ["title"], + presence: lunr.Query.presence.REQUIRED, + wildcard: lunr.Query.wildcard.TRAILING + }) + }) + + /* Retrieve suggestions for best match */ + suggest = titles.length + ? Object.keys(titles[0].matchData.metadata) + : [] + } + + /* Return search result */ + return { + items: [...groups.values()], + ...typeof suggest !== "undefined" && { suggest } + } + } +} diff --git a/src/templates/assets/javascripts/integrations/search/config/index.ts b/src/templates/assets/javascripts/integrations/search/config/index.ts new file mode 100644 index 00000000..3d88d1c6 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/config/index.ts @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Search configuration + */ +export interface SearchConfig { + lang: string[] /* Search languages */ + separator: string /* Search separator */ + pipeline: SearchPipelineFn[] /* Search pipeline */ +} + +/** + * Search document + */ +export interface SearchDocument { + location: string /* Document location */ + title: string /* Document title */ + text: string /* Document text */ + tags?: string[] /* Document tags */ + boost?: number /* Document boost */ + parent?: SearchDocument /* Document parent */ +} + +/** + * Search options + */ +export interface SearchOptions { + suggest: boolean /* Search suggestions */ +} + +/* ------------------------------------------------------------------------- */ + +/** + * Search index + */ +export interface SearchIndex { + config: SearchConfig /* Search configuration */ + docs: SearchDocument[] /* Search documents */ + options: SearchOptions /* Search options */ +} + +/* ---------------------------------------------------------------------------- + * Helper types + * ------------------------------------------------------------------------- */ + +/** + * Search pipeline function + */ +type SearchPipelineFn = + | "trimmer" /* Trimmer */ + | "stopWordFilter" /* Stop word filter */ + | "stemmer" /* Stemmer */ + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Create a search document map + * + * This function creates a mapping of URLs (including anchors) to the actual + * articles and sections. It relies on the invariant that the search index is + * ordered with the main article appearing before all sections with anchors. + * If this is not the case, the logic music be changed. + * + * @param docs - Search documents + * + * @returns Search document map + */ +export function setupSearchDocumentMap( + docs: SearchDocument[] +): Map<string, SearchDocument> { + const map = new Map<string, SearchDocument>() + for (const doc of docs) { + const [path] = doc.location.split("#") + + /* Add document article */ + const article = map.get(path) + if (typeof article === "undefined") { + map.set(path, doc) + + /* Add document section */ + } else { + map.set(doc.location, doc) + doc.parent = article + } + } + + /* Return search document map */ + return map +} diff --git a/src/templates/assets/javascripts/integrations/search/highlighter/index.ts b/src/templates/assets/javascripts/integrations/search/highlighter/index.ts new file mode 100644 index 00000000..0fcbb19e --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/highlighter/index.ts @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import escapeHTML from "escape-html" + +import { SearchConfig } from "../config" + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Search highlight function + * + * @param value - Value + * + * @returns Highlighted value + */ +export type SearchHighlightFn = (value: string) => string + +/** + * Search highlight factory function + * + * @param query - Query value + * + * @returns Search highlight function + */ +export type SearchHighlightFactoryFn = (query: string) => SearchHighlightFn + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Create a search highlighter + * + * @param config - Search configuration + * + * @returns Search highlight factory function + */ +export function setupSearchHighlighter( + config: SearchConfig +): SearchHighlightFactoryFn { + // Hack: temporarily remove pure lookaheads and lookbehinds + const regex = config.separator.split("|").map(term => { + const temp = term.replace(/(\(\?[!=<][^)]+\))/g, "") + return temp.length === 0 ? "�" : term + }) + .join("|") + + const separator = new RegExp(regex, "img") + const highlight = (_: unknown, data: string, term: string) => { + return `${data}<mark data-md-highlight>${term}</mark>` + } + + /* Return factory function */ + return (query: string) => { + query = query + .replace(/[\s*+\-:~^]+/g, " ") + .trim() + + /* Create search term match expression */ + const match = new RegExp(`(^|${config.separator}|)(${ + query + .replace(/[|\\{}()[\]^$+*?.-]/g, "\\$&") + .replace(separator, "|") + })`, "img") + + /* Highlight string value */ + return value => escapeHTML(value) + .replace(match, highlight) + .replace(/<\/mark>(\s+)<mark[^>]*>/img, "$1") + } +} diff --git a/src/templates/assets/javascripts/integrations/search/index.ts b/src/templates/assets/javascripts/integrations/search/index.ts new file mode 100644 index 00000000..94c010bb --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/index.ts @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +export * from "./_" +export * from "./config" +export * from "./highlighter" +export * from "./query" +export * from "./worker" diff --git a/src/templates/assets/javascripts/integrations/search/internal/.eslintrc b/src/templates/assets/javascripts/integrations/search/internal/.eslintrc new file mode 100644 index 00000000..9368ceb6 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/internal/.eslintrc @@ -0,0 +1,6 @@ +{ + "rules": { + "no-fallthrough": "off", + "no-underscore-dangle": "off" + } +} diff --git a/src/templates/assets/javascripts/integrations/search/internal/_/index.ts b/src/templates/assets/javascripts/integrations/search/internal/_/index.ts new file mode 100644 index 00000000..ae8f6104 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/internal/_/index.ts @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* ---------------------------------------------------------------------------- + * Helper types + * ------------------------------------------------------------------------- */ + +/** + * Visitor function + * + * @param start - Start offset + * @param end - End offset + */ +type VisitorFn = ( + start: number, end: number +) => void + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Split a string using the given separator + * + * @param input - Input value + * @param separator - Separator + * @param fn - Visitor function + */ +export function split( + input: string, separator: RegExp, fn: VisitorFn +): void { + separator = new RegExp(separator, "g") + + /* Split string using separator */ + let match: RegExpExecArray | null + let index = 0 + do { + match = separator.exec(input) + + /* Emit non-empty range */ + const until = match?.index ?? input.length + if (index < until) + fn(index, until) + + /* Update last index */ + if (match) { + const [term] = match + index = match.index + term.length + + /* Support zero-length lookaheads */ + if (term.length === 0) + separator.lastIndex = match.index + 1 + } + } while (match) +} diff --git a/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts b/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts new file mode 100644 index 00000000..2a98b9e1 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/internal/extract/index.ts @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Extraction type + * + * This type defines the possible values that are encoded into the first two + * bits of a section that is part of the blocks of a tokenization table. There + * are three types of interest: HTML opening and closing tags, as well as the + * actual text content we need to extract for indexing. + */ +export const enum Extract { + TAG_OPEN = 0, /* HTML opening tag */ + TEXT = 1, /* Text content */ + TAG_CLOSE = 2 /* HTML closing tag */ +} + +/* ---------------------------------------------------------------------------- + * Helper types + * ------------------------------------------------------------------------- */ + +/** + * Visitor function + * + * @param block - Block index + * @param type - Extraction type + * @param start - Start offset + * @param end - End offset + */ +type VisitorFn = ( + block: number, type: Extract, start: number, end: number +) => void + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Split a string into markup and text sections + * + * This function scans a string and divides it up into sections of markup and + * text. For each section, it invokes the given visitor function with the block + * index, extraction type, as well as start and end offsets. Using a visitor + * function (= streaming data) is ideal for minimizing pressure on the GC. + * + * @param input - Input value + * @param fn - Visitor function + */ +export function extract( + input: string, fn: VisitorFn +): void { + + let block = 0 /* Current block */ + let start = 0 /* Current start offset */ + let end = 0 /* Current end offset */ + + /* Split string into sections */ + for (let stack = 0; end < input.length; end++) { + + /* Opening tag after non-empty section */ + if (input.charAt(end) === "<" && end > start) { + fn(block, Extract.TEXT, start, start = end) + + /* Closing tag */ + } else if (input.charAt(end) === ">") { + if (input.charAt(start + 1) === "/") { + if (--stack === 0) + fn(block++, Extract.TAG_CLOSE, start, end + 1) + + /* Tag is not self-closing */ + } else if (input.charAt(end - 1) !== "/") { + if (stack++ === 0) + fn(block, Extract.TAG_OPEN, start, end + 1) + } + + /* New section */ + start = end + 1 + } + } + + /* Add trailing section */ + if (end > start) + fn(block, Extract.TEXT, start, end) +} diff --git a/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts b/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts new file mode 100644 index 00000000..7cc3bf1a --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/internal/highlight/index.ts @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Position table + */ +export type PositionTable = number[][] + +/** + * Position + */ +export type Position = number + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Highlight all occurrences in a string + * + * This function receives a field's value (e.g. like `title` or `text`), it's + * position table that was generated during indexing, and the positions found + * when executing the query. It then highlights all occurrences, and returns + * their concatenation. In case of multiple blocks, two are returned. + * + * @param input - Input value + * @param table - Table for indexing + * @param positions - Occurrences + * @param full - Full results + * + * @returns Highlighted string value + */ +export function highlight( + input: string, table: PositionTable, positions: Position[], full = false +): string { + return highlightAll([input], table, positions, full).pop()! +} + +/** + * Highlight all occurrences in a set of strings + * + * @param inputs - Input values + * @param table - Table for indexing + * @param positions - Occurrences + * @param full - Full results + * + * @returns Highlighted string values + */ +export function highlightAll( + inputs: string[], table: PositionTable, positions: Position[], full = false +): string[] { + + /* Map blocks to input values */ + const mapping = [0] + for (let t = 1; t < table.length; t++) { + const prev = table[t - 1] + const next = table[t] + + /* Check if table points to new block */ + const p = prev[prev.length - 1] >>> 2 & 0x3FF + const q = next[0] >>> 12 + + /* Add block to mapping */ + mapping.push(+(p > q) + mapping[mapping.length - 1]) + } + + /* Highlight strings one after another */ + return inputs.map((input, i) => { + let cursor = 0 + + /* Map occurrences to blocks */ + const blocks = new Map<number, number[]>() + for (const p of positions.sort((a, b) => a - b)) { + const index = p & 0xFFFFF + const block = p >>> 20 + if (mapping[block] !== i) + continue + + /* Ensure presence of block group */ + let group = blocks.get(block) + if (typeof group === "undefined") + blocks.set(block, group = []) + + /* Add index to group */ + group.push(index) + } + + /* Just return string, if no occurrences */ + if (blocks.size === 0) + return input + + /* Compute slices */ + const slices: string[] = [] + for (const [block, indexes] of blocks) { + const t = table[block] + + /* Extract positions and length */ + const start = t[0] >>> 12 + const end = t[t.length - 1] >>> 12 + const length = t[t.length - 1] >>> 2 & 0x3FF + + /* Add prefix, if full results are desired */ + if (full && start > cursor) + slices.push(input.slice(cursor, start)) + + /* Extract and highlight slice */ + let slice = input.slice(start, end + length) + for (const j of indexes.sort((a, b) => b - a)) { + + /* Retrieve offset and length of match */ + const p = (t[j] >>> 12) - start + const q = (t[j] >>> 2 & 0x3FF) + p + + /* Wrap occurrence */ + slice = [ + slice.slice(0, p), + "<mark>", + slice.slice(p, q), + "</mark>", + slice.slice(q) + ].join("") + } + + /* Update cursor */ + cursor = end + length + + /* Append slice and abort if we have two */ + if (slices.push(slice) === 2) + break + } + + /* Add suffix, if full results are desired */ + if (full && cursor < input.length) + slices.push(input.slice(cursor)) + + /* Return highlighted slices */ + return slices.join("") + }) +} diff --git a/src/templates/assets/javascripts/integrations/search/internal/index.ts b/src/templates/assets/javascripts/integrations/search/internal/index.ts new file mode 100644 index 00000000..c752329e --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/internal/index.ts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +export * from "./_" +export * from "./extract" +export * from "./highlight" +export * from "./tokenize" diff --git a/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts b/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts new file mode 100644 index 00000000..f5089bc9 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/internal/tokenize/index.ts @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { split } from "../_" +import { + Extract, + extract +} from "../extract" + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Split a string or set of strings into tokens + * + * This tokenizer supersedes the default tokenizer that is provided by Lunr.js, + * as it is aware of HTML tags and allows for multi-character splitting. + * + * It takes the given inputs, splits each of them into markup and text sections, + * tokenizes and segments (if necessary) each of them, and then indexes them in + * a table by using a compact bit representation. Bitwise techniques are used + * to write and read from the table during indexing and querying. + * + * @see https://bit.ly/3W3Xw4J - Search: better, faster, smaller + * + * @param input - Input value(s) + * + * @returns Tokens + */ +export function tokenize( + input?: string | string[] +): lunr.Token[] { + const tokens: lunr.Token[] = [] + if (typeof input === "undefined") + return tokens + + /* Tokenize strings one after another */ + const inputs = Array.isArray(input) ? input : [input] + for (let i = 0; i < inputs.length; i++) { + const table = lunr.tokenizer.table + const total = table.length + + /* Split string into sections and tokenize content blocks */ + extract(inputs[i], (block, type, start, end) => { + table[block += total] ||= [] + switch (type) { + + /* Handle markup */ + case Extract.TAG_OPEN: + case Extract.TAG_CLOSE: + table[block].push( + start << 12 | + end - start << 2 | + type + ) + break + + /* Handle text content */ + case Extract.TEXT: + const section = inputs[i].slice(start, end) + split(section, lunr.tokenizer.separator, (index, until) => { + + /** + * Apply segmenter after tokenization. Note that the segmenter will + * also split words at word boundaries, which is not what we want, + * so we need to check if we can somehow mitigate this behavior. + */ + if (typeof lunr.segmenter !== "undefined") { + const subsection = section.slice(index, until) + if (/^[MHIK]$/.test(lunr.segmenter.ctype_(subsection))) { + const segments = lunr.segmenter.segment(subsection) + for (let s = 0, l = 0; s < segments.length; s++) { + + /* Add block to section */ + table[block] ||= [] + table[block].push( + start + index + l << 12 | + segments[s].length << 2 | + type + ) + + /* Add token with position */ + tokens.push(new lunr.Token( + segments[s].toLowerCase(), { + position: block << 20 | table[block].length - 1 + } + )) + + /* Keep track of length */ + l += segments[s].length + } + return + } + } + + /* Add block to section */ + table[block].push( + start + index << 12 | + until - index << 2 | + type + ) + + /* Add token with position */ + tokens.push(new lunr.Token( + section.slice(index, until).toLowerCase(), { + position: block << 20 | table[block].length - 1 + } + )) + }) + } + }) + } + + /* Return tokens */ + return tokens +} diff --git a/src/templates/assets/javascripts/integrations/search/query/.eslintrc b/src/templates/assets/javascripts/integrations/search/query/.eslintrc new file mode 100644 index 00000000..3031c7e3 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/query/.eslintrc @@ -0,0 +1,6 @@ +{ + "rules": { + "no-control-regex": "off", + "@typescript-eslint/no-explicit-any": "off" + } +} diff --git a/src/templates/assets/javascripts/integrations/search/query/_/index.ts b/src/templates/assets/javascripts/integrations/search/query/_/index.ts new file mode 100644 index 00000000..14482e43 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/query/_/index.ts @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { split } from "../../internal" +import { transform } from "../transform" + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Search query clause + */ +export interface SearchQueryClause { + presence: lunr.Query.presence /* Clause presence */ + term: string /* Clause term */ +} + +/* ------------------------------------------------------------------------- */ + +/** + * Search query terms + */ +export type SearchQueryTerms = Record<string, boolean> + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Transform search query + * + * This function lexes the given search query and applies the transformation + * function to each term, preserving markup like `+` and `-` modifiers. + * + * @param query - Search query + * + * @returns Search query + */ +export function transformSearchQuery( + query: string +): string { + + /* Split query terms with tokenizer */ + return transform(query, part => { + const terms: string[] = [] + + /* Initialize lexer and analyze part */ + const lexer = new lunr.QueryLexer(part) + lexer.run() + + /* Extract and tokenize term from lexeme */ + for (const { type, str: term, start, end } of lexer.lexemes) + switch (type) { + + /* Hack: remove colon - see https://bit.ly/3wD3T3I */ + case "FIELD": + if (!["title", "text", "tags"].includes(term)) + part = [ + part.slice(0, end), + " ", + part.slice(end + 1) + ].join("") + break + + /* Tokenize term */ + case "TERM": + split(term, lunr.tokenizer.separator, (...range) => { + terms.push([ + part.slice(0, start), + term.slice(...range), + part.slice(end) + ].join("")) + }) + } + + /* Return terms */ + return terms + }) +} + +/* ------------------------------------------------------------------------- */ + +/** + * Parse a search query for analysis + * + * Lunr.js itself has a bug where it doesn't detect or remove wildcards for + * query clauses, so we must do this here. + * + * @see https://bit.ly/3DpTGtz - GitHub issue + * + * @param value - Query value + * + * @returns Search query clauses + */ +export function parseSearchQuery( + value: string +): SearchQueryClause[] { + const query = new lunr.Query(["title", "text", "tags"]) + const parser = new lunr.QueryParser(value, query) + + /* Parse Search query */ + parser.parse() + for (const clause of query.clauses) { + clause.usePipeline = true + + /* Handle leading wildcard */ + if (clause.term.startsWith("*")) { + clause.wildcard = lunr.Query.wildcard.LEADING + clause.term = clause.term.slice(1) + } + + /* Handle trailing wildcard */ + if (clause.term.endsWith("*")) { + clause.wildcard = lunr.Query.wildcard.TRAILING + clause.term = clause.term.slice(0, -1) + } + } + + /* Return query clauses */ + return query.clauses +} + +/** + * Analyze the search query clauses in regard to the search terms found + * + * @param query - Search query clauses + * @param terms - Search terms + * + * @returns Search query terms + */ +export function getSearchQueryTerms( + query: SearchQueryClause[], terms: string[] +): SearchQueryTerms { + const clauses = new Set<SearchQueryClause>(query) + + /* Match query clauses against terms */ + const result: SearchQueryTerms = {} + for (let t = 0; t < terms.length; t++) + for (const clause of clauses) + if (terms[t].startsWith(clause.term)) { + result[clause.term] = true + clauses.delete(clause) + } + + /* Annotate unmatched non-stopword query clauses */ + for (const clause of clauses) + if (lunr.stopWordFilter?.(clause.term)) + result[clause.term] = false + + /* Return query terms */ + return result +} diff --git a/src/templates/assets/javascripts/integrations/search/query/index.ts b/src/templates/assets/javascripts/integrations/search/query/index.ts new file mode 100644 index 00000000..763e2fd4 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/query/index.ts @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +export * from "./_" +export * from "./segment" +export * from "./transform" diff --git a/src/templates/assets/javascripts/integrations/search/query/segment/index.ts b/src/templates/assets/javascripts/integrations/search/query/segment/index.ts new file mode 100644 index 00000000..b96796f4 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/query/segment/index.ts @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Segment a search query using the inverted index + * + * This function implements a clever approach to text segmentation for Asian + * languages, as it used the information already available in the search index. + * The idea is to greedily segment the search query based on the tokens that are + * already part of the index, as described in the linked issue. + * + * @see https://bit.ly/3lwjrk7 - GitHub issue + * + * @param query - Query value + * @param index - Inverted index + * + * @returns Segmented query value + */ +export function segment( + query: string, index: object +): Iterable<string> { + const segments = new Set<string>() + + /* Segment search query */ + const wordcuts = new Uint16Array(query.length) + for (let i = 0; i < query.length; i++) + for (let j = i + 1; j < query.length; j++) { + const value = query.slice(i, j) + if (value in index) + wordcuts[i] = j - i + } + + /* Compute longest matches with minimum overlap */ + const stack = [0] + for (let s = stack.length; s > 0;) { + const p = stack[--s] + for (let q = 1; q < wordcuts[p]; q++) + if (wordcuts[p + q] > wordcuts[p] - q) { + segments.add(query.slice(p, p + q)) + stack[s++] = p + q + } + + /* Continue at end of query string */ + const q = p + wordcuts[p] + if (wordcuts[q] && q < query.length - 1) + stack[s++] = q + + /* Add current segment */ + segments.add(query.slice(p, q)) + } + + // @todo fix this case in the code block above, this is a hotfix + if (segments.has("")) + return new Set([query]) + + /* Return segmented query value */ + return segments +} diff --git a/src/templates/assets/javascripts/integrations/search/query/transform/index.ts b/src/templates/assets/javascripts/integrations/search/query/transform/index.ts new file mode 100644 index 00000000..41497786 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/query/transform/index.ts @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* ---------------------------------------------------------------------------- + * Helper types + * ------------------------------------------------------------------------- */ + +/** + * Visitor function + * + * @param value - String value + * + * @returns String term(s) + */ +type VisitorFn = ( + value: string +) => string | string[] + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Default transformation function + * + * 1. Trim excess whitespace from left and right. + * + * 2. Search for parts in quotation marks and prepend a `+` modifier to denote + * that the resulting document must contain all parts, converting the query + * to an `AND` query (as opposed to the default `OR` behavior). While users + * may expect parts enclosed in quotation marks to map to span queries, i.e. + * for which order is important, Lunr.js doesn't support them, so the best + * we can do is to convert the parts to an `AND` query. + * + * 3. Replace control characters which are not located at the beginning of the + * query or preceded by white space, or are not followed by a non-whitespace + * character or are at the end of the query string. Furthermore, filter + * unmatched quotation marks. + * + * 4. Split the query string at whitespace, then pass each part to the visitor + * function for tokenization, and append a wildcard to every resulting term + * that is not explicitly marked with a `+`, `-`, `~` or `^` modifier, since + * it ensures consistent and stable ranking when multiple terms are entered. + * Also, if a fuzzy or boost modifier are given, but no numeric value has + * been entered, default to 1 to not induce a query error. + * + * @param query - Query value + * @param fn - Visitor function + * + * @returns Transformed query value + */ +export function transform( + query: string, fn: VisitorFn = term => term +): string { + return query + + /* => 1 */ + .trim() + + /* => 2 */ + .split(/"([^"]+)"/g) + .map((parts, index) => index & 1 + ? parts.replace(/^\b|^(?![^\x00-\x7F]|$)|\s+/g, " +") + : parts + ) + .join("") + + /* => 3 */ + .replace(/"|(?:^|\s+)[*+\-:^~]+(?=\s+|$)/g, "") + + /* => 4 */ + .split(/\s+/g) + .reduce((prev, term) => { + const next = fn(term) + return [...prev, ...Array.isArray(next) ? next : [next]] + }, [] as string[]) + .map(term => /([~^]$)/.test(term) ? `${term}1` : term) + .map(term => /(^[+-]|[~^]\d+$)/.test(term) ? term : `${term}*`) + .join(" ") +} diff --git a/src/templates/assets/javascripts/integrations/search/worker/_/index.ts b/src/templates/assets/javascripts/integrations/search/worker/_/index.ts new file mode 100644 index 00000000..26713573 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/worker/_/index.ts @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A RTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { + ObservableInput, + Subject, + first, + merge, + of, + switchMap +} from "rxjs" + +import { feature } from "~/_" +import { watchToggle, watchWorker } from "~/browser" + +import { SearchIndex } from "../../config" +import { + SearchMessage, + SearchMessageType +} from "../message" + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Set up search worker + * + * This function creates and initializes a web worker that is used for search, + * so that the user interface doesn't freeze. In general, the application does + * not care how search is implemented, as long as the web worker conforms to + * the format expected by the application as defined in `SearchMessage`. This + * allows the author to implement custom search functionality, by providing a + * custom web worker via configuration. + * + * Material for MkDocs' built-in search implementation makes use of Lunr.js, an + * efficient and fast implementation for client-side search. Leveraging a tiny + * iframe-based web worker shim, search is even supported for the `file://` + * protocol, enabling search for local non-hosted builds. + * + * If the protocol is `file://`, search initialization is deferred to mitigate + * freezing, as it's now synchronous by design - see https://bit.ly/3C521EO + * + * @see https://bit.ly/3igvtQv - How to implement custom search + * + * @param url - Worker URL + * @param index$ - Search index observable input + * + * @returns Search worker + */ +export function setupSearchWorker( + url: string, index$: ObservableInput<SearchIndex> +): Subject<SearchMessage> { + const worker$ = watchWorker<SearchMessage>(url) + merge( + of(location.protocol !== "file:"), + watchToggle("search") + ) + .pipe( + first(active => active), + switchMap(() => index$) + ) + .subscribe(({ config, docs }) => worker$.next({ + type: SearchMessageType.SETUP, + data: { + config, + docs, + options: { + suggest: feature("search.suggest") + } + } + })) + + /* Return search worker */ + return worker$ +} diff --git a/src/templates/assets/javascripts/integrations/search/worker/index.ts b/src/templates/assets/javascripts/integrations/search/worker/index.ts new file mode 100644 index 00000000..7120ad6e --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/worker/index.ts @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +export * from "./_" +export * from "./message" diff --git a/src/templates/assets/javascripts/integrations/search/worker/main/.eslintrc b/src/templates/assets/javascripts/integrations/search/worker/main/.eslintrc new file mode 100644 index 00000000..3df9d551 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/worker/main/.eslintrc @@ -0,0 +1,6 @@ +{ + "rules": { + "no-console": "off", + "@typescript-eslint/no-misused-promises": "off" + } +} diff --git a/src/templates/assets/javascripts/integrations/search/worker/main/index.ts b/src/templates/assets/javascripts/integrations/search/worker/main/index.ts new file mode 100644 index 00000000..2df38080 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/worker/main/index.ts @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A RTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import lunr from "lunr" + +import { getElement } from "~/browser/element/_" +import "~/polyfills" + +import { Search } from "../../_" +import { SearchConfig } from "../../config" +import { + SearchMessage, + SearchMessageType +} from "../message" + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Add support for `iframe-worker` shim + * + * While `importScripts` is synchronous when executed inside of a web worker, + * it's not possible to provide a synchronous shim implementation. The cool + * thing is that awaiting a non-Promise will convert it into a Promise, so + * extending the type definition to return a `Promise` shouldn't break anything. + * + * @see https://bit.ly/2PjDnXi - GitHub comment + * + * @param urls - Scripts to load + * + * @returns Promise resolving with no result + */ +declare global { + function importScripts(...urls: string[]): Promise<void> | void +} + +/* ---------------------------------------------------------------------------- + * Data + * ------------------------------------------------------------------------- */ + +/** + * Search index + */ +let index: Search + +/* ---------------------------------------------------------------------------- + * Helper functions + * ------------------------------------------------------------------------- */ + +/** + * Fetch (= import) multi-language support through `lunr-languages` + * + * This function automatically imports the stemmers necessary to process the + * languages which are defined as part of the search configuration. + * + * If the worker runs inside of an `iframe` (when using `iframe-worker` as + * a shim), the base URL for the stemmers to be loaded must be determined by + * searching for the first `script` element with a `src` attribute, which will + * contain the contents of this script. + * + * @param config - Search configuration + * + * @returns Promise resolving with no result + */ +async function setupSearchLanguages( + config: SearchConfig +): Promise<void> { + let base = "../lunr" + + /* Detect `iframe-worker` and fix base URL */ + if (typeof parent !== "undefined" && "IFrameWorker" in parent) { + const worker = getElement<HTMLScriptElement>("script[src]")! + const [path] = worker.src.split("/worker") + + /* Prefix base with path */ + base = base.replace("..", path) + } + + /* Add scripts for languages */ + const scripts = [] + for (const lang of config.lang) { + switch (lang) { + + /* Add segmenter for Japanese */ + case "ja": + scripts.push(`${base}/tinyseg.js`) + break + + /* Add segmenter for Hindi and Thai */ + case "hi": + case "th": + scripts.push(`${base}/wordcut.js`) + break + } + + /* Add language support */ + if (lang !== "en") + scripts.push(`${base}/min/lunr.${lang}.min.js`) + } + + /* Add multi-language support */ + if (config.lang.length > 1) + scripts.push(`${base}/min/lunr.multi.min.js`) + + /* Load scripts synchronously */ + if (scripts.length) + await importScripts( + `${base}/min/lunr.stemmer.support.min.js`, + ...scripts + ) +} + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Message handler + * + * @param message - Source message + * + * @returns Target message + */ +export async function handler( + message: SearchMessage +): Promise<SearchMessage> { + switch (message.type) { + + /* Search setup message */ + case SearchMessageType.SETUP: + await setupSearchLanguages(message.data.config) + index = new Search(message.data) + return { + type: SearchMessageType.READY + } + + /* Search query message */ + case SearchMessageType.QUERY: + const query = message.data + try { + return { + type: SearchMessageType.RESULT, + data: index.search(query) + } + + /* Return empty result in case of error */ + } catch (err) { + console.warn(`Invalid query: ${query} – see https://bit.ly/2s3ChXG`) + console.warn(err) + return { + type: SearchMessageType.RESULT, + data: { items: [] } + } + } + + /* All other messages */ + default: + throw new TypeError("Invalid message type") + } +} + +/* ---------------------------------------------------------------------------- + * Worker + * ------------------------------------------------------------------------- */ + +/* Expose Lunr.js in global scope, or stemmers won't work */ +self.lunr = lunr + +/* Handle messages */ +addEventListener("message", async ev => { + postMessage(await handler(ev.data)) +}) diff --git a/src/templates/assets/javascripts/integrations/search/worker/message/index.ts b/src/templates/assets/javascripts/integrations/search/worker/message/index.ts new file mode 100644 index 00000000..54d5001e --- /dev/null +++ b/src/templates/assets/javascripts/integrations/search/worker/message/index.ts @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A RTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { SearchResult } from "../../_" +import { SearchIndex } from "../../config" + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Search message type + */ +export const enum SearchMessageType { + SETUP, /* Search index setup */ + READY, /* Search index ready */ + QUERY, /* Search query */ + RESULT /* Search results */ +} + +/* ------------------------------------------------------------------------- */ + +/** + * Message containing the data necessary to setup the search index + */ +export interface SearchSetupMessage { + type: SearchMessageType.SETUP /* Message type */ + data: SearchIndex /* Message data */ +} + +/** + * Message indicating the search index is ready + */ +export interface SearchReadyMessage { + type: SearchMessageType.READY /* Message type */ +} + +/** + * Message containing a search query + */ +export interface SearchQueryMessage { + type: SearchMessageType.QUERY /* Message type */ + data: string /* Message data */ +} + +/** + * Message containing results for a search query + */ +export interface SearchResultMessage { + type: SearchMessageType.RESULT /* Message type */ + data: SearchResult /* Message data */ +} + +/* ------------------------------------------------------------------------- */ + +/** + * Message exchanged with the search worker + */ +export type SearchMessage = + | SearchSetupMessage + | SearchReadyMessage + | SearchQueryMessage + | SearchResultMessage + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Type guard for search ready messages + * + * @param message - Search worker message + * + * @returns Test result + */ +export function isSearchReadyMessage( + message: SearchMessage +): message is SearchReadyMessage { + return message.type === SearchMessageType.READY +} + +/** + * Type guard for search result messages + * + * @param message - Search worker message + * + * @returns Test result + */ +export function isSearchResultMessage( + message: SearchMessage +): message is SearchResultMessage { + return message.type === SearchMessageType.RESULT +} diff --git a/src/templates/assets/javascripts/integrations/sitemap/index.ts b/src/templates/assets/javascripts/integrations/sitemap/index.ts new file mode 100644 index 00000000..08695bad --- /dev/null +++ b/src/templates/assets/javascripts/integrations/sitemap/index.ts @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { + EMPTY, + Observable, + catchError, + defaultIfEmpty, + map, + of, + tap +} from "rxjs" + +import { configuration } from "~/_" +import { getElements, requestXML } from "~/browser" + +/* ---------------------------------------------------------------------------- + * Types + * ------------------------------------------------------------------------- */ + +/** + * Sitemap, i.e. a list of URLs + */ +export type Sitemap = string[] + +/* ---------------------------------------------------------------------------- + * Helper functions + * ------------------------------------------------------------------------- */ + +/** + * Preprocess a list of URLs + * + * This function replaces the `site_url` in the sitemap with the actual base + * URL, to allow instant navigation to work in occasions like Netlify previews. + * + * @param urls - URLs + * + * @returns URL path parts + */ +function preprocess(urls: Sitemap): Sitemap { + if (urls.length < 2) + return [""] + + /* Take the first two URLs and remove everything after the last slash */ + const [root, next] = [...urls] + .sort((a, b) => a.length - b.length) + .map(url => url.replace(/[^/]+$/, "")) + + /* Compute common prefix */ + let index = 0 + if (root === next) + index = root.length + else + while (root.charCodeAt(index) === next.charCodeAt(index)) + index++ + + /* Remove common prefix and return in original order */ + return urls.map(url => url.replace(root.slice(0, index), "")) +} + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Fetch the sitemap for the given base URL + * + * @param base - Base URL + * + * @returns Sitemap observable + */ +export function fetchSitemap(base?: URL): Observable<Sitemap> { + const cached = __md_get<Sitemap>("__sitemap", sessionStorage, base) + if (cached) { + return of(cached) + } else { + const config = configuration() + return requestXML(new URL("sitemap.xml", base || config.base)) + .pipe( + map(sitemap => preprocess(getElements("loc", sitemap) + .map(node => node.textContent!) + )), + catchError(() => EMPTY), // @todo refactor instant loading + defaultIfEmpty([]), + tap(sitemap => __md_set("__sitemap", sitemap, sessionStorage, base)) + ) + } +} diff --git a/src/templates/assets/javascripts/integrations/version/.eslintrc b/src/templates/assets/javascripts/integrations/version/.eslintrc new file mode 100644 index 00000000..38a5714d --- /dev/null +++ b/src/templates/assets/javascripts/integrations/version/.eslintrc @@ -0,0 +1,5 @@ +{ + "rules": { + "no-null/no-null": "off" + } +} diff --git a/src/templates/assets/javascripts/integrations/version/index.ts b/src/templates/assets/javascripts/integrations/version/index.ts new file mode 100644 index 00000000..38d78f17 --- /dev/null +++ b/src/templates/assets/javascripts/integrations/version/index.ts @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +import { + EMPTY, + Subject, + catchError, + combineLatest, + filter, + fromEvent, + map, + of, + switchMap, + withLatestFrom +} from "rxjs" + +import { configuration } from "~/_" +import { + getElement, + getLocation, + requestJSON, + setLocation +} from "~/browser" +import { getComponentElements } from "~/components" +import { + Version, + renderVersionSelector +} from "~/templates" + +import { fetchSitemap } from "../sitemap" + +/* ---------------------------------------------------------------------------- + * Helper types + * ------------------------------------------------------------------------- */ + +/** + * Setup options + */ +interface SetupOptions { + document$: Subject<Document> /* Document subject */ +} + +/* ---------------------------------------------------------------------------- + * Functions + * ------------------------------------------------------------------------- */ + +/** + * Set up version selector + * + * @param options - Options + */ +export function setupVersionSelector( + { document$ }: SetupOptions +): void { + const config = configuration() + const versions$ = requestJSON<Version[]>( + new URL("../versions.json", config.base) + ) + .pipe( + catchError(() => EMPTY) // @todo refactor instant loading + ) + + /* Determine current version */ + const current$ = versions$ + .pipe( + map(versions => { + const [, current] = config.base.match(/([^/]+)\/?$/)! + return versions.find(({ version, aliases }) => ( + version === current || aliases.includes(current) + )) || versions[0] + }) + ) + + /* Intercept inter-version navigation */ + versions$ + .pipe( + map(versions => new Map(versions.map(version => [ + `${new URL(`../${version.version}/`, config.base)}`, + version + ]))), + switchMap(urls => fromEvent<MouseEvent>(document.body, "click") + .pipe( + filter(ev => !ev.metaKey && !ev.ctrlKey), + withLatestFrom(current$), + switchMap(([ev, current]) => { + if (ev.target instanceof Element) { + const el = ev.target.closest("a") + if (el && !el.target && urls.has(el.href)) { + const url = el.href + // This is a temporary hack to detect if a version inside the + // version selector or on another part of the site was clicked. + // If we're inside the version selector, we definitely want to + // find the same page, as we might have different deployments + // due to aliases. However, if we're outside the version + // selector, we must abort here, because we might otherwise + // interfere with instant navigation. We need to refactor this + // at some point together with instant navigation. + // + // See https://github.com/squidfunk/mkdocs-material/issues/4012 + if (!ev.target.closest(".md-version")) { + const version = urls.get(url)! + if (version === current) + return EMPTY + } + ev.preventDefault() + return of(url) + } + } + return EMPTY + }), + switchMap(url => { + const { version } = urls.get(url)! + return fetchSitemap(new URL(url)) + .pipe( + map(sitemap => { + const location = getLocation() + const path = location.href.replace(config.base, "") + return sitemap.includes(path.split("#")[0]) + ? new URL(`../${version}/${path}`, config.base) + : new URL(url) + }) + ) + }) + ) + ) + ) + .subscribe(url => setLocation(url, true)) + + /* Render version selector and warning */ + combineLatest([versions$, current$]) + .subscribe(([versions, current]) => { + const topic = getElement(".md-header__topic") + topic.appendChild(renderVersionSelector(versions, current)) + }) + + /* Integrate outdated version banner with instant navigation */ + document$.pipe(switchMap(() => current$)) + .subscribe(current => { + + /* Check if version state was already determined */ + let outdated = __md_get("__outdated", sessionStorage) + if (outdated === null) { + outdated = true + + /* Obtain and normalize default versions */ + let ignored = config.version?.default || "latest" + if (!Array.isArray(ignored)) + ignored = [ignored] + + /* Check if version is considered a default */ + main: for (const ignore of ignored) + for (const alias of current.aliases) + if (new RegExp(ignore, "i").test(alias)) { + outdated = false + break main + } + + /* Persist version state in session storage */ + __md_set("__outdated", outdated, sessionStorage) + } + + /* Unhide outdated version banner */ + if (outdated) + for (const warning of getComponentElements("outdated")) + warning.hidden = false + }) +} |
