Skip to content

Instantly share code, notes, and snippets.

@MartinMiles
Last active May 3, 2025 16:21
Show Gist options
  • Save MartinMiles/21b7250f6e97a672dbb8a7113ee276f1 to your computer and use it in GitHub Desktop.
Save MartinMiles/21b7250f6e97a672dbb8a7113ee276f1 to your computer and use it in GitHub Desktop.
Admin folder tool for XM Cloud to regenerate the sitemap media library item in a development environment + sitemap generation at Next.js
<%@ Page language="c#" %>
<%@ Import Namespace="Microsoft.Extensions.DependencyInjection" %>
<%@ Import Namespace="Sitecore.Abstractions" %>
<%@ Import Namespace="Sitecore.Configuration" %>
<%@ Import Namespace="Sitecore.Data" %>
<%@ Import Namespace="Sitecore.Data.Items" %>
<%@ Import Namespace="Sitecore.DependencyInjection" %>
<%@ Import Namespace="Sitecore.Events" %>
<%@ Import Namespace="Sitecore.Globalization" %>
<%@ Import Namespace="Sitecore.Pipelines" %>
<%@ Import Namespace="Sitecore.Publishing" %>
<%@ Import Namespace="Sitecore.Sites" %>
<%@ Import Namespace="Sitecore.Web" %>
<%@ Import Namespace="Sitecore.XA.Foundation.SiteMetadata.EventHandlers" %>
<%@ Import Namespace="Sitecore.XA.Foundation.SiteMetadata.Sitemap" %>
<%@ Import Namespace="Sitecore.XA.Foundation.SiteMetadata.Pipelines.Sitemap.GenerateSitemapJob" %>
<script runat="server">
// IMPORTANT
// Change the values of those variables to match the Headless SXA site name and home item you want to generate the sitemap for.
private const string SITE_NAME = "sxastarter";
private const string HOME_ITEM_ID = "{BEE17B56-2406-4935-A730-CFE90356BE2C}";
</script>
<!DOCTYPE html>
<html>
<head>
<title>Sitecore XM Cloud Sitemap Developer Utilities</title>
<meta content="C#" name="CODE_LANGUAGE">
</head>
<body style="font-family: sans-serif">
<h1>Sitecore XM Cloud Sitemap Developer Utilities</h1>
<form runat="server">
<div>
Start with this button. It calls the SitemapCacheClearer.OnPublishEnd method with a mock OnPublishEnd event. The Sitecore CacheItemClearer should create one Sitemap refresh job per SXA site. It should save the sitemaps to the media library. You can track the jobs in the <a href="/sitecore/admin/Jobs.aspx" target="_blank">jobs admin page</a>.
</div>
<br />
<asp:Button id="SitemapCacheClearer"
Text="1. SitemapCacheClearer.OnPublishEnd()"
OnClick="SitemapCacheClearer_Click"
runat="server"/>
<br /><br />
<div>
If the above button does not generate the sitemap media library items, try these buttons in order.
</div>
<br />
<div>
This one runs the sitemap.generateSitemapJob pipeline for the site you defined in the SITE_NAME variable. It should save the sitemap to the media library.
</div>
<asp:Button id="GenerateSitemapJob"
Text="2. sitemap.generateSitemapJob pipeline"
OnClick="GenerateSitemapJob_Click"
runat="server"/>
<br /><br />
<div>
This one runs only the GenerateSitemap processor of the sitemap.generateSitemapJob pipeline for the site you defined in the SITE_NAME variable. It does not save the sitemap to the media library.
</div>
<asp:Button id="GenerateSitemap"
Text="3. GenerateSitemap.Process()"
OnClick="GenerateSitemap_Click"
runat="server"/>
<br /><br />
<div>
This one runs the ItemCrawler and lists the page items it found for the HOME_ITEM_ID variable you defined. It does not save the sitemap to the media library.
</div>
<asp:Button id="ItemCrawler"
Text="4. ItemCrawler.GetItems()"
OnClick="ItemCrawler_Click"
runat="server"/>
</form>
<br />
<h2>Output</h2>
<code>
<%=Output %>
</code>
</body>
</html>
<script runat="server">
private string output = "None yet. Please click one of the buttons.";
void SitemapCacheClearer_Click(Object sender, EventArgs e)
{
output = GetTime() + " Starting SitemapCacheClearer.OnPublishEnd()\n\n";
SitemapCacheClearer clearer = new SitemapCacheClearer();
Database master = Factory.GetDatabase("master");
List<string> targets = new List<string>(new string[] { "Edge" });
PublishOptions options = new PublishOptions(master, master, PublishMode.SingleItem, Language.EnglishLanguage, DateTime.Now, targets);
Publisher publisher = new Publisher(options);
SitecoreEventArgs args = new SitecoreEventArgs("OnPublishEnd", new object[] { publisher }, new EventResult());
clearer.OnPublishEnd(null, args);
output += "Cancelled?: " + args.Result.Cancel.ToString() + "\n";
if (args.Result.HasMessages)
{
output += "Messages:\n";
foreach (string message in args.Result.Messages)
{
output += "- " + message + "\n";
}
}
if (args.Result.HasReturnValues)
{
output += "Return Values:\n";
foreach (var returnValue in args.Result.ReturnValues)
{
output += "- " + returnValue.ToString() + "\n";
}
}
output += "\n" + GetTime() + " SitemapCacheClearer.OnPublishEnd() done. The Sitemap should have been saved to the media library.";
}
void GenerateSitemapJob_Click(Object sender, EventArgs e)
{
output = GetTime() + " Starting the sitemap.generateSitemapJob pipeline.\n\n";
SiteContext siteContext = SiteContextFactory.GetSiteContext(SITE_NAME);
GenerateSitemapJobArgs args = new GenerateSitemapJobArgs();
args.SiteContext = siteContext;
BaseCorePipelineManager pipelineManager = ServiceLocator.ServiceProvider.GetService<BaseCorePipelineManager>();
pipelineManager.Run("sitemap.generateSitemapJob", (PipelineArgs)args);
var messages = args.GetMessages();
output += "Aborted?: " + args.Aborted.ToString() + "\n";
output += "Suspended?: " + args.Suspended.ToString() + "\n";
if (messages.Length > 0)
{
output += "Messages:\n";
foreach (PipelineMessage message in messages)
{
output += "- " + message.Text + "\n";
}
}
output += "\nGenerated Sitemap:\n";
foreach (string item in args.SitemapContent.Values)
{
output += item + "\n";
}
if (args.Aborted || args.Suspended)
{
output += "\nThe pipeline was aborted or suspended. The Sitemap might not have been saved to the media library.\n";
} else {
output += "\nThe Sitemap should have been saved to the media library.\n";
}
output += "\n" + GetTime() + " sitemap.generateSitemapJob pipeline done.";
}
void GenerateSitemap_Click(Object sender, EventArgs e)
{
output = GetTime() + " Starting GenerateSitemap.Process()\n\n";
SiteContext siteContext = Sitecore.Sites.SiteContextFactory.GetSiteContext(SITE_NAME);
GenerateSitemapJobArgs args = new GenerateSitemapJobArgs();
args.SiteContext = siteContext;
GenerateSitemap processor = new GenerateSitemap();
processor.Process(args);
output += "Generated Sitemap (Not saved to the media library):\n";
foreach (string item in args.SitemapContent.Values)
{
output += item + "\n";
}
output += "\n" + GetTime() + " GenerateSitemap.Process() done.";
}
void ItemCrawler_Click(Object sender, EventArgs e)
{
output = GetTime() + " Starting ItemCrawler.GetItems()\n\n";
Database master = Factory.GetDatabase("master");
Item homeItem = master.GetItem(HOME_ITEM_ID);
ItemCrawler crawler = new ItemCrawler();
IList<Item> items = crawler.GetItems(homeItem);
output += "Pages returned by ItemCrawler.GetItems():\n";
foreach (Item item in items)
{
output += "- " + item.Name + "\n";
}
output += "\n" + GetTime() + " ItemCrawler.GetItems() done.";
}
string GetTime()
{
return DateTime.Now.ToLongTimeString();
}
private string Output {
get {
return Server.HtmlEncode(output).Replace(" ", "&nbsp;").Replace("\n", "<br />");
}
}
</script>
import type { NextApiRequest, NextApiResponse } from 'next';
import {
AxiosDataFetcher,
GraphQLSitemapXmlService,
AxiosResponse,
} from '@sitecore-jss/sitecore-jss-nextjs';
import { siteResolver } from 'lib/site-resolver';
import config from 'temp/config';
import { getPublicUrl } from '../../utils/publicUrlUtil';
import { Builder, parseString } from 'xml2js';
const ABSOLUTE_URL_REGEXP = '^(?:[a-z]+:)?//';
const FRENCH_URL_DESIRED_AUTHORITY = process.env.PUBLIC_FR_HOSTNAME || '';
const FRENCH_URL_INVALID_AUTHORITY_AND_PATH_PREFIX = process.env.PUBLIC_EN_HOSTNAME + '/fr' || '';
type Url = {
loc: string[];
lastmod?: string[];
changefreq?: string[];
priority?: string[];
'xhtml:link': {
$: {
xmlns: string;
rel: string;
hreflang: string;
href: string;
};
}[];
};
// Function to filter <url> nodes based on <loc> subnode content
const filterUrlsEN = (url: Url) => {
const loc = url.loc[0];
return !loc.includes(FRENCH_URL_INVALID_AUTHORITY_AND_PATH_PREFIX);
};
const filterUrlsFR = (url: Url) => {
const loc = url.loc[0];
return loc.includes(FRENCH_URL_INVALID_AUTHORITY_AND_PATH_PREFIX);
};
const updateLoc = (url: Url) => {
if (url.loc && url.loc[0]) {
url.loc[0] = url.loc[0].replace(
FRENCH_URL_INVALID_AUTHORITY_AND_PATH_PREFIX,
FRENCH_URL_DESIRED_AUTHORITY
);
}
return url;
};
const updateFrenchXhtmlURLs = (url: Url) => {
if (url['xhtml:link']) {
url['xhtml:link'].forEach((link) => {
if (link.$.hreflang === 'fr') {
link.$.href = link.$.href.replace(
FRENCH_URL_INVALID_AUTHORITY_AND_PATH_PREFIX,
FRENCH_URL_DESIRED_AUTHORITY
); // Update the href value
}
});
}
return url;
};
const sitemapApi = async (
req: NextApiRequest,
res: NextApiResponse
): Promise<NextApiResponse | void> => {
const {
query: { id },
} = req;
// Resolve site based on hostname
const hostName = req.headers['host']?.split(':')[0] || 'localhost';
const site = siteResolver.getByHost(hostName);
// create sitemap graphql service
const sitemapXmlService = new GraphQLSitemapXmlService({
endpoint: config.graphQLEndpoint,
apiKey: config.sitecoreApiKey,
siteName: site.name,
});
// if url has sitemap-{n}.xml type. The id - can be null if it's sitemap.xml request
const sitemapPath = await sitemapXmlService.getSitemap(id as string);
// Determine language of current site
let lang = 'localhost';
if (process.env.PUBLIC_FR_HOSTNAME && hostName.includes(process.env.PUBLIC_FR_HOSTNAME)) {
lang = 'fr';
} else if (process.env.PUBLIC_EN_HOSTNAME && hostName.includes(process.env.PUBLIC_EN_HOSTNAME)) {
lang = 'en';
}
// if sitemap is match otherwise redirect to 404 page
if (sitemapPath) {
const isAbsoluteUrl = sitemapPath.match(ABSOLUTE_URL_REGEXP);
const sitemapUrl = isAbsoluteUrl ? sitemapPath : `${config.sitecoreApiHost}${sitemapPath}`;
res.setHeader('Content-Type', 'text/xml;charset=utf-8');
return new AxiosDataFetcher()
.get(sitemapUrl, {
responseType: 'stream',
})
.then((response: AxiosResponse) => {
if (lang === 'localhost') {
response.data.pipe(res);
return;
}
// BEGIN CUSTOMIZATION - Filter the sitemap per domain/language, and set the French domain to French URLs.
// Need to prepare stream from sitemap url
const dataChunks: Buffer[] = [];
response.data.on('data', (chunk: Buffer) => {
dataChunks.push(chunk);
});
response.data.on('end', () => {
// Concatenate the data chunks to get the complete XML content
const xmlData = Buffer.concat(dataChunks).toString();
// Now, parse the XML data into an object using xml2js
parseString(xmlData, (err, result) => {
if (err) {
console.error('Error parsing XML:', err);
return;
}
// Use the result object with existing code to update the loc property
if (lang == 'en') {
result.urlset.url = result.urlset.url.filter(filterUrlsEN);
result.urlset.url = result.urlset.url.map(updateFrenchXhtmlURLs);
} else if (lang == 'fr') {
result.urlset.url = result.urlset.url.filter(filterUrlsFR);
result.urlset.url = result.urlset.url.map(updateLoc);
result.urlset.url = result.urlset.url.map(updateFrenchXhtmlURLs);
}
// Convert the modified object back to XML format
const xmlBuilder = new Builder();
const modifiedXml = xmlBuilder.buildObject(result);
// pipe 'modifiedXml' to response
res.setHeader('Content-Type', 'text/xml');
res.send(modifiedXml);
// END CUSTOMIZATION
});
});
})
.catch(() => res.redirect('/404'));
}
// this approach if user goes to /sitemap.xml - under it generate xml page with list of sitemaps
const sitemaps = await sitemapXmlService.fetchSitemaps();
if (!sitemaps.length) {
return res.redirect('/404');
}
const SitemapLinks = sitemaps
.map((item) => {
const parseUrl = item.split('/');
const lastSegment = parseUrl[parseUrl.length - 1];
return `<sitemap>
<loc>${getPublicUrl()}/${lastSegment}</loc>
</sitemap>`;
})
.join('');
res.setHeader('Content-Type', 'text/xml;charset=utf-8');
return res.send(`
<sitemapindex xmlns="http://sitemaps.org/schemas/sitemap/0.9" encoding="UTF-8">${SitemapLinks}</sitemapindex>
`);
};
export default sitemapApi;
@MartinMiles
Copy link
Author

Credits to:

  • Jeff L'Heureux
  • Mike Payne

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment