Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/deploy-to-developer-portal-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,15 @@ jobs:
- name: 'Deploy to Developer Portal Bucket'
run: |
gsutil -m rsync -r -d -c ./docusaurus/website/build/ gs://staging-developer-portal/plugin-tools

# TODO: Wire all the algolia stuff up here.
# - name: Serve built site
# run: npm run serve -w website -- --port 3000 &

# - name: Wait for server
# uses: grafana/plugin-actions/wait-for-grafana@wait-for-grafana/v1.0.2
# with:
# url: "http://localhost:3000/developers/plugin-tools/"

# - name: Test http server is running
# run: curl -f http://localhost:3000/developers/plugin-tools || exit 0
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ yarn-error.log*
.docusaurus
.cache-loader
docusaurus/website/scripts/plugin.schema.json
docusaurus/website/storage
*.bak

# Allow vscode config inside templates
Expand Down
3 changes: 3 additions & 0 deletions docusaurus/website/.env.development
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ DEV_PORTAL_FARO_CONFIG_URL=/connect/af1fca71911a9641ebdffddb56889e97
DEV_PORTAL_FARO_CONFIG_APP_NAME=grafana-website
DEV_PORTAL_FARO_CONFIG_ENVIRONMENT=development
DEV_PORTAL_ONETRUST_DOMAIN_ID=019644f3-5dcf-741c-8b6d-42fb8feae57f-test
ALGOLIA_APP_ID=J0ORNBBUBP
ALGOLIA_SEARCH_API_KEY=643751b8e30f5d9b73efd38b55abdfc4
ALGOLIA_SEARCH_INDEX=developer-tools
5 changes: 4 additions & 1 deletion docusaurus/website/.env.production
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ DEV_PORTAL_RUDDERSTACK_SDK_URL=https://rsdk.grafana.com
DEV_PORTAL_FARO_CONFIG_URL=/connect/af1fca71911a9641ebdffddb56889e97
DEV_PORTAL_FARO_CONFIG_APP_NAME=grafana-website
DEV_PORTAL_FARO_CONFIG_ENVIRONMENT=production
DEV_PORTAL_ONETRUST_DOMAIN_ID=019644f3-5dcf-741c-8b6d-42fb8feae57f
DEV_PORTAL_ONETRUST_DOMAIN_ID=019644f3-5dcf-741c-8b6d-42fb8feae57f
ALGOLIA_APP_ID=J0ORNBBUBP
ALGOLIA_SEARCH_API_KEY=643751b8e30f5d9b73efd38b55abdfc4
ALGOLIA_SEARCH_INDEX=developer-tools
41 changes: 35 additions & 6 deletions docusaurus/website/docusaurus.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,6 @@ const config: Config = {
},
},
],
[
'docusaurus-lunr-search',
{
disableVersioning: true,
},
],
[
'@docusaurus/plugin-client-redirects',
{
Expand Down Expand Up @@ -117,6 +111,41 @@ const config: Config = {
],

themeConfig: {
algolia: {
// The application ID provided by Algolia
appId: process.env.ALGOLIA_APP_ID,

// Public API key: it is safe to commit it
apiKey: process.env.ALGOLIA_SEARCH_API_KEY,

indexName: process.env.ALGOLIA_SEARCH_INDEX,

// Optional: see doc section below
contextualSearch: true,

// Optional: Specify domains where the navigation should occur through window.location instead on history.push. Useful when our Algolia config crawls multiple documentation sites and we want to navigate with window.location.href to them.
// externalUrlRegex: 'external\\.com|domain\\.com',

// Optional: Replace parts of the item URLs from Algolia. Useful when using the same search index for multiple deployments using a different baseUrl. You can use regexp or string in the `from` param. For example: localhost:3000 vs myCompany.com/docs
// replaceSearchResultPathname: {
// from: '/docs/', // or as RegExp: /\/docs\//
// to: '/',
// },

// Optional: Algolia search parameters
// searchParameters: {
// facetFilters: ['language:en', ['docusaurus_tag:default', 'docusaurus_tag:docs-default-current']],
// },

// Optional: path for search page that enabled by default (`false` to disable it)
searchPagePath: 'search',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could disable the search page completely if we want to. We don't have one in prod today 👍


// Optional: whether the insights feature is enabled or not on Docsearch (`false` by default)
insights: false,

// Optional: whether you want to use the new Ask AI feature (undefined by default)
// askAi: 'YOUR_ALGOLIA_ASK_AI_ASSISTANT_ID',
},
announcementBar: {
id: 'community_callout',
content:
Expand Down
9 changes: 7 additions & 2 deletions docusaurus/website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
"lint": "eslint --cache '../docs/**/*.{md,mdx}'",
"typecheck": "tsc",
"test": "vitest",
"test:ci": "vitest --run"
"test:ci": "vitest --run",
"generate-index": "node ./scripts/algolia/crawl.ts",
"upload-to-algolia": "node ./scripts/algolia/uploadToAlgolia.ts"
},
"dependencies": {
"@coffeeandfun/remove-pii": "^2.0.0",
Expand All @@ -25,10 +27,10 @@
"@docusaurus/preset-classic": "^3.9.2",
"@grafana/faro-web-sdk": "^1.9.0",
"@mdx-js/react": "^3.0.0",
"algoliasearch": "^5.45.0",
"clsx": "^2.0.0",
"cookiejs": "^2.1.2",
"debounce": "^3.0.0",
"docusaurus-lunr-search": "^3.6.0",
"gridsome-remark-figure-caption": "1.2.2",
"prism-react-renderer": "^2.1.0",
"react": "^18.2.0",
Expand All @@ -39,7 +41,10 @@
"@docusaurus/module-type-aliases": "^3.9.2",
"@docusaurus/remark-plugin-npm2yarn": "^3.9.2",
"@docusaurus/tsconfig": "^3.9.2",
"@types/glob": "^9.0.0",
"crawlee": "^3.15.3",
"dotenv": "^17.2.3",
"glob": "^11.1.0",
"raw-loader": "^4.0.2",
"typescript": "^5.9.3",
"vitest": "^4.0.10"
Expand Down
236 changes: 236 additions & 0 deletions docusaurus/website/scripts/algolia/crawl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
import { type CheerioAPI, CheerioCrawler, type LoadedRequest, type Request, log, LogLevel, Sitemap } from 'crawlee';
import { createHash } from 'node:crypto';
import { inspect } from 'util';
import { type DocSearchRecord } from './types.ts';

log.setLevel(LogLevel.INFO);

function generateAlgoliaRecords(request: LoadedRequest<Request>, $: CheerioAPI) {
const version = $('meta[name="docsearch:version"]').attr('content') || 'current';
const lang = $('meta[name="docsearch:lang"]').attr('content') || 'en';
const docusaurus_tag = $('meta[name="docsearch:docusaurus_tag"]').attr('content') || 'default';
let position = 0;
const lvl0 =
$(
'.menu__link.menu__link--active, menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active'
)
.last()
.text() || 'Documentation';

// Helper function to extract heading hierarchy records
function extractHeadingLevel(level: number, positionRef: { current: number }) {
const selector = level === 1 ? 'header h1, article h1' : `article h${level}`;
let elements = $(selector);

// Special filter for h3 elements to exclude "Was this page helpful?"
if (level === 3) {
elements = elements.filter((_, el) => $(el).text() !== 'Was this page helpful?');
}

const records: DocSearchRecord[] = [];

elements.each((_, el) => {
// Build hierarchy (same logic as before)
const hierarchy: DocSearchRecord['hierarchy'] = {
lvl0,
lvl1: null,
lvl2: null,
lvl3: null,
lvl4: null,
lvl5: null,
lvl6: null,
};
if (level === 1) {
hierarchy.lvl1 = $(el).text();
for (let i = 2; i <= 6; i++) {
hierarchy[`lvl${i}`] = null;
}
} else {
hierarchy.lvl1 = $(el).closest('article').find('header h1, > h1').first().text();
for (let i = 2; i < level; i++) {
hierarchy[`lvl${i}`] = $(el).prevAll(`h${i}`).first().text() || null;
}
hierarchy[`lvl${level}`] = $(el).text();
}

// Calculate URL (same as before)
const prodUrl = new URL(request.url);
prodUrl.host = 'grafana.com';
prodUrl.protocol = 'https';
prodUrl.port = '';

const anchor = $(el).attr('id') ?? null; // Changed: null instead of ''
const url = `${prodUrl.toString()}${anchor ? `#${anchor}` : ''}`;

// Generate objectID
const objectID = createHash('sha256')
.update(Object.values(hierarchy).filter(Boolean).join('-') + `-lvl${level}-${positionRef.current}`)
.digest('hex');

// Create heading record
const headingRecord: DocSearchRecord = {
objectID,
type: `lvl${level}` as DocSearchRecord['type'],
hierarchy,
content: null, // Headings have no content
url,
url_without_anchor: prodUrl.toString(),
anchor,
weight: {
...calculateWeight({ url, type: `lvl${level}` as DocSearchRecord['type'] }),
position: positionRef.current++,
},
version,
lang,
language: lang,
docusaurus_tag,
};

records.push(headingRecord);

// Find content under this heading
const allBetweenHeadings = $(el).nextUntil(`h1,h2,h3,h4,h5,h6`);
const contentElements = allBetweenHeadings
.filter('p, li, td:last-child') // Check the elements themselves
.add(allBetweenHeadings.find('p, li, td:last-child')) // Check descendants
.toArray();

if (contentElements.length > 0) {
const contentText = contentElements
.map((el) => $(el).text().trim())
.filter(Boolean)
.join(' ');

if (contentText) {
const contentObjectID = createHash('sha256')
.update(Object.values(hierarchy).filter(Boolean).join('-') + `-content-${positionRef.current}`)
.digest('hex');

const contentRecord: DocSearchRecord = {
objectID: contentObjectID,
type: 'content',
hierarchy,
content: contentText,
url,
url_without_anchor: prodUrl.toString(),
anchor,
weight: {
...calculateWeight({ url, type: 'content' }),
position: positionRef.current++,
},
version,
lang,
language: lang,
docusaurus_tag,
};

records.push(contentRecord);
}
}
});

return records;
}

const positionRef = { current: position };
const lvl1 = extractHeadingLevel(1, positionRef);
const lvl2 = extractHeadingLevel(2, positionRef);
const lvl3 = extractHeadingLevel(3, positionRef);
const lvl4 = extractHeadingLevel(4, positionRef);
const lvl5 = extractHeadingLevel(5, positionRef);
const lvl6 = extractHeadingLevel(6, positionRef);

const parsedUrl = new URL(request.url);
const basePath = '/developers/plugin-tools/';
let pathname = parsedUrl.pathname;

if (pathname.startsWith(basePath)) {
pathname = pathname.slice(basePath.length);
}

const hierarchy = [...lvl1, ...lvl2, ...lvl3, ...lvl4, ...lvl5, ...lvl6];
return hierarchy;
}

function generateObjectId(request: LoadedRequest<Request>) {
const parsedUrl = new URL(request.url);
const basePath = '/developers/plugin-tools/';
let pathname = parsedUrl.pathname;

if (pathname.startsWith(basePath)) {
pathname = pathname.slice(basePath.length);
}
const segments = pathname.split('/').filter(Boolean);
return segments.length > 0 ? `${segments.join('_')}` : `index`;
}

const levelWeights = {
lvl0: 100,
lvl1: 100,
lvl2: 90,
lvl3: 80,
lvl4: 70,
lvl5: 60,
lvl6: 50,
content: 0,
};

const basePath = '/developers/plugin-tools/';

function calculateWeight({ url, type }: { url: string; type: DocSearchRecord['type'] }) {
const pathname = new URL(url).pathname;
const pathnameWithoutBasePath = pathname.startsWith(basePath) ? pathname.slice(basePath.length) : pathname;
const depth = pathnameWithoutBasePath.split('/').filter(Boolean).length;
const pageRank = Math.max(0, 110 - depth * 10);
return {
pageRank,
level: levelWeights[type],
};
}

const crawler = new CheerioCrawler({
// The crawler downloads and processes the web pages in parallel, with a concurrency
// automatically managed based on the available system memory and CPU (see AutoscaledPool class).
// Here we define some hard limits for the concurrency.
minConcurrency: 10,
maxConcurrency: 50,
// On error, retry each page at most once.
maxRequestRetries: 1,

// Increase the timeout for processing of each page.
requestHandlerTimeoutSecs: 30,

// maxRequestsPerCrawl: 10,

// This function will be called for each URL to crawl.
// It accepts a single parameter, which is an object with options as:
// https://crawlee.dev/js/api/cheerio-crawler/interface/CheerioCrawlerOptions#requestHandler
// We use for demonstration only 2 of them:
// - request: an instance of the Request class with information such as the URL that is being crawled and HTTP method
// - $: the cheerio object containing parsed HTML
async requestHandler({ pushData, request, $ }) {
log.info(`Processing ${request.url}...`);
const result = generateAlgoliaRecords(request, $);
log.debug(inspect(result, { depth: null, colors: true }));
const objectID = generateObjectId(request);
await pushData(result, objectID);
},

// This function is called if the page processing failed more than maxRequestRetries + 1 times.
failedRequestHandler({ request }) {
log.warning(`Request ${request.url} failed twice.`);
},
});

const { urls } = await Sitemap.load('http://localhost:3000/developers/plugin-tools/sitemap.xml');
const localhostUrls = urls
.filter((url: string) => !url.endsWith('/search'))
.map((url: string) => url.replace(/https:\/\/grafana(-dev)?\.com/, 'http://localhost:3000'));

await crawler.run(localhostUrls);

// Can pass individual urls for testing purposes.
// const url = ['http://localhost:3000/developers/plugin-tools/how-to-guides/extend-configurations/'];
// await crawler.run(url);

log.info('Crawler finished.');
Loading
Loading