From 37ed3b9cd0accba5e4a02b407c6ae2f79b30ed68 Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 16:00:31 +0530 Subject: [PATCH 1/8] Programmatic sitemap generator --- README.md | 32 ++++++++++++++++++++++------ package-lock.json | 29 ++++++++++++++++++++++++++ package.json | 3 ++- src/index.ts | 6 ++++-- src/sitemaper.ts | 53 +++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 110 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 49d2fdf..3165a3f 100644 --- a/README.md +++ b/README.md @@ -12,16 +12,23 @@ - 📦 Customizable Output Path. - 🎨 Flexible Change Frequency. -## 🕹 Usage +## 🕹 CLI Usage + +### Simple usage: + +``` +npx sitemaper -w https://www.nayanui.com +``` +### Advanced Usage: ``` -npx sitemaper --website https://www.example.com --depth 10 --output ./sitemap.xml --changefreq daily +npx sitemaper --website https://www.nayanui.com --depth 10 --output ./sitemap.xml --changefreq daily ``` You can also use the shorter version of this command. ``` -npx sitemaper -w https://www.example.com -d 10 -o ./sitemap.xml -f daily +npx sitemaper -w https://www.nayanui.com -d 10 -o ./sitemap.xml -f daily ``` You can also integrate Sitemaper with your localhost to generate sitemaps without any deployments. @@ -34,15 +41,28 @@ this case it crawl your localhost URL and replace it with replacement URL. | Parameter | Default | Usage | |-------------------|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| --website / -w | https://www.example.com | Pass website base URL to start crawling. | -| --replacer / -r | '' | Pass replacement URL to replace crawled url, this will be mostly useful to crawl localhost and replace it with original URL. | +| --website / -w | https://www.nayanui.com | Pass website base URL to start crawling. | +| --replacer / -r | '' | Pass replacement URL to replace crawled url, this will be mostly useful to crawl localhost and replace it with original URL. | | --depth / -d | 10 | Pass depth to let the generator know how depth it need to crawl. | | --output / -o | ./sitemap.xml | Pass output to let the generator know where to keep generated sitemap. | | --changefreq / -f | daily | Pass change frequency to let the generator know how frequently your content change, possible options are ***always, hourly, daily, weekly, monthly, yearly, never***. | +## 🕹 Programatic Usage + +You can also use Sitemaper programmatically with Node.js. Check out the implementation below. +``` +import { generateSitemap, validateSitemap } from 'sitemaper'; + +generateSitemap('https://www.nayanui.com', '', 10, './sitemap.xml', 'daily'); + +validateSitemap('./sitemap.xml'); + +``` + ## 🖥 Future plans -Create a web application to automatically generate and submit sitemaps to search engines on a schedule. +- Support multiple sitemaps if website is bigger than certain limit. +- Create a web application to automatically generate and submit sitemaps to search engines on a schedule. ## 🤝 Contributing diff --git a/package-lock.json b/package-lock.json index 696ebc9..d54c2e2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,7 @@ "ora": "^8.1.0", "ts-node": "^10.9.2", "typescript": "^5.6.3", + "xml2js": "^0.6.2", "xmlbuilder": "^15.1.1" }, "bin": { @@ -680,6 +681,12 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", "license": "MIT" }, + "node_modules/sax": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.4.1.tgz", + "integrity": "sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==", + "license": "ISC" + }, "node_modules/signal-exit": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", @@ -834,6 +841,28 @@ "node": ">=18" } }, + "node_modules/xml2js": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.6.2.tgz", + "integrity": "sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==", + "license": "MIT", + "dependencies": { + "sax": ">=0.6.0", + "xmlbuilder": "~11.0.0" + }, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/xml2js/node_modules/xmlbuilder": { + "version": "11.0.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", + "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", + "license": "MIT", + "engines": { + "node": ">=4.0" + } + }, "node_modules/xmlbuilder": { "version": "15.1.1", "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-15.1.1.tgz", diff --git a/package.json b/package.json index f3e13ec..f8fbd03 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "version": "1.1.0", "private": false, "sideEffects": false, - "description": "Simple tool for generating sitemaps for your website.", + "description": "Sitemaper is a powerful sitemap generator designed to simplify the process of creating accurate and efficient sitemaps for websites. It crawls through your site, maps its structure, and generates an optimized sitemap, helping improve SEO and site visibility.", "type": "module", "main": "./src/index.ts", "homepage": "https://www.nayanui.com/devtools/sitemaper", @@ -23,6 +23,7 @@ "ora": "^8.1.0", "ts-node": "^10.9.2", "typescript": "^5.6.3", + "xml2js": "^0.6.2", "xmlbuilder": "^15.1.1" }, "files": [ diff --git a/src/index.ts b/src/index.ts index 1b28bd5..fce4bcf 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,7 @@ #!/usr/bin/env node import { Command } from 'commander'; -import { crawlWebsite } from "./sitemaper.js"; +import { generateSitemap, validateSitemap } from "./sitemaper.js"; import { existsSync } from 'fs'; import { URL } from 'url'; @@ -65,7 +65,9 @@ program // console.log({ website, replacer, depth, output, changefreq }); - crawlWebsite(website, replacer, depth, output, changefreq); + return generateSitemap(website, replacer, depth, output, changefreq); }); +export { generateSitemap, validateSitemap }; + program.parse(process.argv); diff --git a/src/sitemaper.ts b/src/sitemaper.ts index fa882af..4ff2804 100644 --- a/src/sitemaper.ts +++ b/src/sitemaper.ts @@ -1,7 +1,9 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; -import { writeFileSync } from 'fs'; +import {readFileSync, writeFileSync} from 'fs'; import { URL } from 'url'; +// @ts-ignore +import { parseStringPromise } from 'xml2js'; import xmlbuilder from 'xmlbuilder'; import ora from 'ora'; // Import ora for loader @@ -37,7 +39,7 @@ const fetchAndParse = async (url: string, website: string, spinner: any): Promis }; // Function to generate XML sitemap -const generateSitemap = (urls: { url: string; depth: number }[], maxDepth: number, changefreq: string, website: string, replacer: string): string => { +const buildSitemap = (urls: { url: string; depth: number }[], maxDepth: number, changefreq: string, website: string, replacer: string): string => { const root = xmlbuilder .create('urlset', { version: '1.0', encoding: 'UTF-8' }) .att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9') @@ -67,7 +69,7 @@ const generateSitemap = (urls: { url: string; depth: number }[], maxDepth: numbe }; // Main function to crawl and generate sitemap -export const crawlWebsite = async (website: string, replacer: string, maxDepth: number, output: string, changefreq: string): Promise => { +export const generateSitemap = async (website: string, replacer: string, maxDepth: number, output: string, changefreq: string): Promise => { const spinner = ora(`Crawling website: ${website}`).start(); // Start the spinner const queue: { url: string; depth: number }[] = [{ url: website, depth: 0 }]; @@ -95,10 +97,53 @@ export const crawlWebsite = async (website: string, replacer: string, maxDepth: return { url, depth }; }); - const sitemapXml = generateSitemap(urlsWithDepth, maxDepth, changefreq, website, replacer); + const sitemapXml = buildSitemap(urlsWithDepth, maxDepth, changefreq, website, replacer); // Save the generated sitemap to a file writeFileSync(output, sitemapXml); spinner.succeed(`Sitemap saved to ${output}`); }; + +// Function to validate the sitemap file +export const validateSitemap = async (sitemapPath: string): Promise => { + try { + const sitemapContent = readFileSync(sitemapPath, 'utf-8'); + + // Parse the XML sitemap + const result = await parseStringPromise(sitemapContent); + + // Check if root is + if (!result.urlset || !Array.isArray(result.urlset.url)) { + throw new Error("Invalid sitemap: Root element must be ."); + } + + // Validate each entry + result.urlset.url.forEach((entry: any) => { + if (!entry.loc || !entry.loc[0]) { + throw new Error("Invalid sitemap: Each must contain a element."); + } + + // Validate URL format + try { + new URL(entry.loc[0]); + } catch { + throw new Error(`Invalid URL format in sitemap: ${entry.loc[0]}`); + } + + // Optionally validate changefreq and priority (if they exist) + if (entry.changefreq && !['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'].includes(entry.changefreq[0])) { + throw new Error(`Invalid value: ${entry.changefreq[0]}`); + } + if (entry.priority && (isNaN(entry.priority[0]) || entry.priority[0] < 0 || entry.priority[0] > 1)) { + throw new Error(`Invalid value: ${entry.priority[0]}`); + } + }); + + console.log('Sitemap is valid.'); + return true; + } catch (error: any) { + console.error('Sitemap validation error:', error.message); + return false; + } +}; \ No newline at end of file From 705676926924f39e80734a5b4fa9d88f3c6f75c3 Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 16:00:44 +0530 Subject: [PATCH 2/8] Programmatic sitemap generator --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 3165a3f..b270782 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ this case it crawl your localhost URL and replace it with replacement URL. ## 🕹 Programatic Usage You can also use Sitemaper programmatically with Node.js. Check out the implementation below. + ``` import { generateSitemap, validateSitemap } from 'sitemaper'; From 33f073f1d8c32c56358bc7362f0a368d238b06e0 Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 16:06:06 +0530 Subject: [PATCH 3/8] Programmatic sitemap generator --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b270782..1a5dc29 100644 --- a/README.md +++ b/README.md @@ -62,8 +62,10 @@ validateSitemap('./sitemap.xml'); ## 🖥 Future plans -- Support multiple sitemaps if website is bigger than certain limit. -- Create a web application to automatically generate and submit sitemaps to search engines on a schedule. +- [x] Create Sitemaper CLI tool to generate sitemaps efficiently. +- [x] Create Sitemaper programatic API's to generate sitemaps efficiently. +- [ ] Support multiple sitemaps if website is bigger than certain limit. +- [ ] Create a web application to automatically generate and submit sitemaps to search engines on a schedule. ## 🤝 Contributing From e3a1ff52e535383293d3b65aa38c44e4a3bdfbea Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 16:06:26 +0530 Subject: [PATCH 4/8] Programmatic sitemap generator --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1a5dc29..d7e1425 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ validateSitemap('./sitemap.xml'); ## 🖥 Future plans - [x] Create Sitemaper CLI tool to generate sitemaps efficiently. -- [x] Create Sitemaper programatic API's to generate sitemaps efficiently. +- [x] Create Sitemaper programmatic API's to generate sitemaps efficiently. - [ ] Support multiple sitemaps if website is bigger than certain limit. - [ ] Create a web application to automatically generate and submit sitemaps to search engines on a schedule. From f8013a0d5e14dd7b9694a26caac57036f7d05ff3 Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 16:53:01 +0530 Subject: [PATCH 5/8] Programmatic sitemap generator --- README.md | 33 +++++++++++++++++++------- src/index.ts | 62 +++++++++++------------------------------------- src/sitemaper.ts | 15 ++++++++---- src/utils.ts | 40 +++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 61 deletions(-) create mode 100644 src/utils.ts diff --git a/README.md b/README.md index d7e1425..be471ab 100644 --- a/README.md +++ b/README.md @@ -14,27 +14,27 @@ ## 🕹 CLI Usage -### Simple usage: +### Simple generation usage: ``` -npx sitemaper -w https://www.nayanui.com +npx sitemaper generate -w https://www.nayanui.com ``` -### Advanced Usage: +### Advanced generation Usage: ``` -npx sitemaper --website https://www.nayanui.com --depth 10 --output ./sitemap.xml --changefreq daily +npx sitemaper generate --website https://www.nayanui.com --depth 10 --output ./sitemap.xml --changefreq daily ``` You can also use the shorter version of this command. ``` -npx sitemaper -w https://www.nayanui.com -d 10 -o ./sitemap.xml -f daily +npx sitemaper generate -w https://www.nayanui.com -d 10 -o ./sitemap.xml -f daily ``` You can also integrate Sitemaper with your localhost to generate sitemaps without any deployments. ``` -npx sitemaper -w http://localhost:3000 -r https://www.nayanui.com -d 10 -o ./sitemap.xml -f daily +npx sitemaper generate -w http://localhost:3000 -r https://www.nayanui.com -d 10 -o ./sitemap.xml -f daily ``` this case it crawl your localhost URL and replace it with replacement URL. @@ -47,6 +47,23 @@ this case it crawl your localhost URL and replace it with replacement URL. | --output / -o | ./sitemap.xml | Pass output to let the generator know where to keep generated sitemap. | | --changefreq / -f | daily | Pass change frequency to let the generator know how frequently your content change, possible options are ***always, hourly, daily, weekly, monthly, yearly, never***. | + +### Sitemap validation usage: + +``` +npx sitemaper validate --output ./sitemap.xml +``` + +You can also use the shorter version of this command. + +``` +npx sitemaper validate -o ./sitemap.xml +``` + +| Parameter | Default | Usage | +|-------------------|-------------------------|-----------------------------------------------------------------------------| +| --output / -o | ./sitemap.xml | Pass output to let the generator know where to find and validate sitemap. | + ## 🕹 Programatic Usage You can also use Sitemaper programmatically with Node.js. Check out the implementation below. @@ -62,8 +79,8 @@ validateSitemap('./sitemap.xml'); ## 🖥 Future plans -- [x] Create Sitemaper CLI tool to generate sitemaps efficiently. -- [x] Create Sitemaper programmatic API's to generate sitemaps efficiently. +- [x] Create Sitemaper CLI tool to generate and validate sitemaps efficiently. +- [x] Create Sitemaper programmatic API's to generate and validate sitemaps efficiently. - [ ] Support multiple sitemaps if website is bigger than certain limit. - [ ] Create a web application to automatically generate and submit sitemaps to search engines on a schedule. diff --git a/src/index.ts b/src/index.ts index fce4bcf..9d03bfe 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,71 +1,37 @@ #!/usr/bin/env node +import {readFileSync} from "fs"; import { Command } from 'commander'; import { generateSitemap, validateSitemap } from "./sitemaper.js"; -import { existsSync } from 'fs'; -import { URL } from 'url'; +import {validateChangefreq, validateDepth, validateOutput, validateWebsite} from "./utils.js"; +const { name, version, description } = JSON.parse(readFileSync('./package.json', 'utf8')); const program = new Command(); -// Helper function to validate depth is a positive integer -const validateDepth = (depth: string) => { - const parsedDepth = parseInt(depth, 10); - if (isNaN(parsedDepth) || parsedDepth < 1) { - throw new Error('Depth must be a positive integer greater than 0.'); - } - return parsedDepth; -}; - -// Helper function to validate changefreq value -const validateChangefreq = (changefreq: string) => { - const validOptions = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never']; - if (!validOptions.includes(changefreq)) { - throw new Error(`Invalid changefreq value. Accepted values are: ${validOptions.join(', ')}`); - } - return changefreq; -}; - -// Helper function to validate website URL -const validateWebsite = (website: string) => { - try { - new URL(website); - return website; - } catch (error) { - throw new Error('Invalid website URL.'); - } -}; - -// Helper function to validate output path (for example, it checks if the directory exists) -const validateOutput = (output: string) => { - const pathParts = output.split('/'); - const dirPath = pathParts.slice(0, -1).join('/'); - if (dirPath && !existsSync(dirPath)) { - throw new Error(`The directory ${dirPath} does not exist.`); - } - return output; -}; - -program - .name('sitemaper') - .description('Simple tool for generating sitemaps for your website.') - .version('1.0.0'); +program.name(name).description(description).version(version); program + .command('generate') .option('-w, --website ', 'The URL of the website to crawl', validateWebsite) .option('-r, --replacer ', 'The URL of the website to be replaced', validateWebsite) .option('-d, --depth ', 'Depth of the website to crawl', validateDepth) .option('-o, --output ', 'Output path for the sitemap.xml', validateOutput) .option('-f, --changefreq ', 'Change frequency for the sitemap (always, hourly, daily, weekly, monthly, yearly, never)', validateChangefreq) .action((options) => { - const website = options.website || 'https://www.example.com'; + const website = options.website || 'https://www.nayanui.com'; const replacer = options.replacer || ''; const depth = options.depth || 10; const output = options.output || './sitemap.xml'; const changefreq = options.changefreq || 'daily'; + generateSitemap(website, replacer, depth, output, changefreq); + }); - // console.log({ website, replacer, depth, output, changefreq }); - - return generateSitemap(website, replacer, depth, output, changefreq); +program + .command('validate') + .option('-o, --output ', 'Output path for the sitemap.xml', validateOutput) + .action((options) => { + const output = options.output || './sitemap.xml'; + validateSitemap(output); }); export { generateSitemap, validateSitemap }; diff --git a/src/sitemaper.ts b/src/sitemaper.ts index 4ff2804..8c85b85 100644 --- a/src/sitemaper.ts +++ b/src/sitemaper.ts @@ -106,7 +106,9 @@ export const generateSitemap = async (website: string, replacer: string, maxDept }; // Function to validate the sitemap file -export const validateSitemap = async (sitemapPath: string): Promise => { +export const validateSitemap = async (sitemapPath: string): Promise<{ status: boolean, message: string }> => { + const spinner = ora(`Validating sitemap: ${sitemapPath}`).start(); // Start the spinner + try { const sitemapContent = readFileSync(sitemapPath, 'utf-8'); @@ -120,6 +122,7 @@ export const validateSitemap = async (sitemapPath: string): Promise => // Validate each entry result.urlset.url.forEach((entry: any) => { + spinner.text = `Total links found ${result.urlset.url.length}, Validating ${entry.loc || entry.loc[0]}`; if (!entry.loc || !entry.loc[0]) { throw new Error("Invalid sitemap: Each must contain a element."); } @@ -140,10 +143,12 @@ export const validateSitemap = async (sitemapPath: string): Promise => } }); - console.log('Sitemap is valid.'); - return true; + const message = `Sitemap is valid: ${sitemapPath}`; + spinner.succeed(message); + return { status: true, message: message }; } catch (error: any) { - console.error('Sitemap validation error:', error.message); - return false; + const message = `Sitemap validation error: ${error.message}`; + spinner.fail(message) + return { status: true, message: message }; } }; \ No newline at end of file diff --git a/src/utils.ts b/src/utils.ts new file mode 100644 index 0000000..8785724 --- /dev/null +++ b/src/utils.ts @@ -0,0 +1,40 @@ +import {URL} from "url"; +import {existsSync} from "fs"; + +// Helper function to validate depth is a positive integer +export const validateDepth = (depth: string) => { + const parsedDepth = parseInt(depth, 10); + if (isNaN(parsedDepth) || parsedDepth < 1) { + throw new Error('Depth must be a positive integer greater than 0.'); + } + return parsedDepth; +}; + +// Helper function to validate changefreq value +export const validateChangefreq = (changefreq: string) => { + const validOptions = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never']; + if (!validOptions.includes(changefreq)) { + throw new Error(`Invalid changefreq value. Accepted values are: ${validOptions.join(', ')}`); + } + return changefreq; +}; + +// Helper function to validate website URL +export const validateWebsite = (website: string) => { + try { + new URL(website); + return website; + } catch (error) { + throw new Error('Invalid website URL.'); + } +}; + +// Helper function to validate output path (for example, it checks if the directory exists) +export const validateOutput = (output: string) => { + const pathParts = output.split('/'); + const dirPath = pathParts.slice(0, -1).join('/'); + if (dirPath && !existsSync(dirPath)) { + throw new Error(`The directory ${dirPath} does not exist.`); + } + return output; +}; \ No newline at end of file From 1a416ad7f2e72d3f8030e6b920b2f5723518fcf0 Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 16:53:19 +0530 Subject: [PATCH 6/8] Programmatic sitemap generator --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index d54c2e2..1a30615 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "sitemaper", - "version": "1.1.0", + "version": "1.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "sitemaper", - "version": "1.1.0", + "version": "1.2.0", "license": "ISC", "dependencies": { "@types/node": "^22.7.5", diff --git a/package.json b/package.json index f8fbd03..c5320ef 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sitemaper", - "version": "1.1.0", + "version": "1.2.0", "private": false, "sideEffects": false, "description": "Sitemaper is a powerful sitemap generator designed to simplify the process of creating accurate and efficient sitemaps for websites. It crawls through your site, maps its structure, and generates an optimized sitemap, helping improve SEO and site visibility.", From 9b135bb32584ab85e72edef5f986f131488df4c4 Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 23:43:39 +0530 Subject: [PATCH 7/8] Programmatic sitemap generator --- .github/workflows/npm-publish.yml | 4 +-- README.md | 16 +++++----- package.json | 3 +- src/index.ts | 38 +++++++++++----------- src/sitemaper.ts | 53 +++++++++++++++---------------- src/utils.ts | 18 +++++------ 6 files changed, 65 insertions(+), 67 deletions(-) diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml index 2a18f2d..4130377 100644 --- a/.github/workflows/npm-publish.yml +++ b/.github/workflows/npm-publish.yml @@ -16,8 +16,8 @@ jobs: - name: Set up Node.js uses: actions/setup-node@v3 with: - node-version: '20' - cache: 'npm' + node-version: "20" + cache: "npm" # Add npm authentication for publishing - name: Authenticate with npm diff --git a/README.md b/README.md index be471ab..0974fec 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ ``` npx sitemaper generate -w https://www.nayanui.com ``` + ### Advanced generation Usage: ``` @@ -40,13 +41,12 @@ npx sitemaper generate -w http://localhost:3000 -r https://www.nayanui.com -d 10 this case it crawl your localhost URL and replace it with replacement URL. | Parameter | Default | Usage | -|-------------------|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ----------------- | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --website / -w | https://www.nayanui.com | Pass website base URL to start crawling. | -| --replacer / -r | '' | Pass replacement URL to replace crawled url, this will be mostly useful to crawl localhost and replace it with original URL. | +| --replacer / -r | '' | Pass replacement URL to replace crawled url, this will be mostly useful to crawl localhost and replace it with original URL. | | --depth / -d | 10 | Pass depth to let the generator know how depth it need to crawl. | | --output / -o | ./sitemap.xml | Pass output to let the generator know where to keep generated sitemap. | -| --changefreq / -f | daily | Pass change frequency to let the generator know how frequently your content change, possible options are ***always, hourly, daily, weekly, monthly, yearly, never***. | - +| --changefreq / -f | daily | Pass change frequency to let the generator know how frequently your content change, possible options are **_always, hourly, daily, weekly, monthly, yearly, never_**. | ### Sitemap validation usage: @@ -60,9 +60,9 @@ You can also use the shorter version of this command. npx sitemaper validate -o ./sitemap.xml ``` -| Parameter | Default | Usage | -|-------------------|-------------------------|-----------------------------------------------------------------------------| -| --output / -o | ./sitemap.xml | Pass output to let the generator know where to find and validate sitemap. | +| Parameter | Default | Usage | +| ------------- | ------------- | ------------------------------------------------------------------------- | +| --output / -o | ./sitemap.xml | Pass output to let the generator know where to find and validate sitemap. | ## 🕹 Programatic Usage @@ -73,7 +73,7 @@ import { generateSitemap, validateSitemap } from 'sitemaper'; generateSitemap('https://www.nayanui.com', '', 10, './sitemap.xml', 'daily'); -validateSitemap('./sitemap.xml'); +validateSitemap('./sitemap.xml'); ``` diff --git a/package.json b/package.json index c5320ef..a6a9626 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,8 @@ "sitemaper": "./dist/index.js" }, "scripts": { - "build": "tsc" + "build": "tsc", + "format": "npx prettier --write --print-width 150 ." }, "author": "Niranjan Devasani", "license": "ISC", diff --git a/src/index.ts b/src/index.ts index 9d03bfe..978c475 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,36 +1,36 @@ #!/usr/bin/env node -import {readFileSync} from "fs"; -import { Command } from 'commander'; +import { readFileSync } from "fs"; +import { Command } from "commander"; import { generateSitemap, validateSitemap } from "./sitemaper.js"; -import {validateChangefreq, validateDepth, validateOutput, validateWebsite} from "./utils.js"; -const { name, version, description } = JSON.parse(readFileSync('./package.json', 'utf8')); +import { validateChangefreq, validateDepth, validateOutput, validateWebsite } from "./utils.js"; +const { name, version, description } = JSON.parse(readFileSync("./package.json", "utf8")); const program = new Command(); program.name(name).description(description).version(version); program - .command('generate') - .option('-w, --website ', 'The URL of the website to crawl', validateWebsite) - .option('-r, --replacer ', 'The URL of the website to be replaced', validateWebsite) - .option('-d, --depth ', 'Depth of the website to crawl', validateDepth) - .option('-o, --output ', 'Output path for the sitemap.xml', validateOutput) - .option('-f, --changefreq ', 'Change frequency for the sitemap (always, hourly, daily, weekly, monthly, yearly, never)', validateChangefreq) + .command("generate") + .option("-w, --website ", "The URL of the website to crawl", validateWebsite) + .option("-r, --replacer ", "The URL of the website to be replaced", validateWebsite) + .option("-d, --depth ", "Depth of the website to crawl", validateDepth) + .option("-o, --output ", "Output path for the sitemap.xml", validateOutput) + .option("-f, --changefreq ", "Change frequency for the sitemap (always, hourly, daily, weekly, monthly, yearly, never)", validateChangefreq) .action((options) => { - const website = options.website || 'https://www.nayanui.com'; - const replacer = options.replacer || ''; - const depth = options.depth || 10; - const output = options.output || './sitemap.xml'; - const changefreq = options.changefreq || 'daily'; - generateSitemap(website, replacer, depth, output, changefreq); + const website = options.website || "https://www.nayanui.com"; + const replacer = options.replacer || ""; + const depth = options.depth || 10; + const output = options.output || "./sitemap.xml"; + const changefreq = options.changefreq || "daily"; + generateSitemap(website, replacer, depth, output, changefreq); }); program - .command('validate') - .option('-o, --output ', 'Output path for the sitemap.xml', validateOutput) + .command("validate") + .option("-o, --output ", "Output path for the sitemap.xml", validateOutput) .action((options) => { - const output = options.output || './sitemap.xml'; + const output = options.output || "./sitemap.xml"; validateSitemap(output); }); diff --git a/src/sitemaper.ts b/src/sitemaper.ts index 8c85b85..83257fa 100644 --- a/src/sitemaper.ts +++ b/src/sitemaper.ts @@ -1,11 +1,11 @@ -import axios from 'axios'; -import * as cheerio from 'cheerio'; -import {readFileSync, writeFileSync} from 'fs'; -import { URL } from 'url'; +import axios from "axios"; +import * as cheerio from "cheerio"; +import { readFileSync, writeFileSync } from "fs"; +import { URL } from "url"; // @ts-ignore -import { parseStringPromise } from 'xml2js'; -import xmlbuilder from 'xmlbuilder'; -import ora from 'ora'; // Import ora for loader +import { parseStringPromise } from "xml2js"; +import xmlbuilder from "xmlbuilder"; +import ora from "ora"; // Import ora for loader // Set to store visited URLs const visited = new Set(); @@ -18,8 +18,8 @@ const fetchAndParse = async (url: string, website: string, spinner: any): Promis const links: string[] = []; // Extract links from tags - $('a').each((_: any, element: any) => { - const href = $(element).attr('href'); + $("a").each((_: any, element: any) => { + const href = $(element).attr("href"); if (href) { const absoluteUrl = new URL(href, url).href; @@ -41,27 +41,24 @@ const fetchAndParse = async (url: string, website: string, spinner: any): Promis // Function to generate XML sitemap const buildSitemap = (urls: { url: string; depth: number }[], maxDepth: number, changefreq: string, website: string, replacer: string): string => { const root = xmlbuilder - .create('urlset', { version: '1.0', encoding: 'UTF-8' }) - .att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9') - .att('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance') - .att( - 'xsi:schemaLocation', - 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd' - ); + .create("urlset", { version: "1.0", encoding: "UTF-8" }) + .att("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9") + .att("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") + .att("xsi:schemaLocation", "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"); - root.com('Generated by Sitemaper, Know more about sitemaper @ https://nayanui.com/sitemaper'); + root.com("Generated by Sitemaper, Know more about sitemaper @ https://nayanui.com/sitemaper"); urls.forEach(({ url, depth }) => { const priority = (maxDepth - depth) / maxDepth; // Calculate priority based on depth (0 to 1) const finalUrl = !!replacer ? url.replace(website, replacer) : url; root - .ele('url') - .ele('loc', finalUrl) + .ele("url") + .ele("loc", finalUrl) .up() - .ele('changefreq', changefreq) + .ele("changefreq", changefreq) .up() // Set change frequency to weekly - .ele('priority', priority.toFixed(1)) + .ele("priority", priority.toFixed(1)) .up(); // Set priority based on depth }); @@ -92,8 +89,8 @@ export const generateSitemap = async (website: string, replacer: string, maxDept } // Prepare the URLs with their corresponding depth for the sitemap - const urlsWithDepth = Array.from(visited).map(url => { - const depth = url.split('/').length - website.split('/').length; // Calculate depth based on URL structure + const urlsWithDepth = Array.from(visited).map((url) => { + const depth = url.split("/").length - website.split("/").length; // Calculate depth based on URL structure return { url, depth }; }); @@ -106,11 +103,11 @@ export const generateSitemap = async (website: string, replacer: string, maxDept }; // Function to validate the sitemap file -export const validateSitemap = async (sitemapPath: string): Promise<{ status: boolean, message: string }> => { +export const validateSitemap = async (sitemapPath: string): Promise<{ status: boolean; message: string }> => { const spinner = ora(`Validating sitemap: ${sitemapPath}`).start(); // Start the spinner try { - const sitemapContent = readFileSync(sitemapPath, 'utf-8'); + const sitemapContent = readFileSync(sitemapPath, "utf-8"); // Parse the XML sitemap const result = await parseStringPromise(sitemapContent); @@ -135,7 +132,7 @@ export const validateSitemap = async (sitemapPath: string): Promise<{ status: bo } // Optionally validate changefreq and priority (if they exist) - if (entry.changefreq && !['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'].includes(entry.changefreq[0])) { + if (entry.changefreq && !["always", "hourly", "daily", "weekly", "monthly", "yearly", "never"].includes(entry.changefreq[0])) { throw new Error(`Invalid value: ${entry.changefreq[0]}`); } if (entry.priority && (isNaN(entry.priority[0]) || entry.priority[0] < 0 || entry.priority[0] > 1)) { @@ -148,7 +145,7 @@ export const validateSitemap = async (sitemapPath: string): Promise<{ status: bo return { status: true, message: message }; } catch (error: any) { const message = `Sitemap validation error: ${error.message}`; - spinner.fail(message) + spinner.fail(message); return { status: true, message: message }; } -}; \ No newline at end of file +}; diff --git a/src/utils.ts b/src/utils.ts index 8785724..17e21f3 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,20 +1,20 @@ -import {URL} from "url"; -import {existsSync} from "fs"; +import { URL } from "url"; +import { existsSync } from "fs"; // Helper function to validate depth is a positive integer export const validateDepth = (depth: string) => { const parsedDepth = parseInt(depth, 10); if (isNaN(parsedDepth) || parsedDepth < 1) { - throw new Error('Depth must be a positive integer greater than 0.'); + throw new Error("Depth must be a positive integer greater than 0."); } return parsedDepth; }; // Helper function to validate changefreq value export const validateChangefreq = (changefreq: string) => { - const validOptions = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never']; + const validOptions = ["always", "hourly", "daily", "weekly", "monthly", "yearly", "never"]; if (!validOptions.includes(changefreq)) { - throw new Error(`Invalid changefreq value. Accepted values are: ${validOptions.join(', ')}`); + throw new Error(`Invalid changefreq value. Accepted values are: ${validOptions.join(", ")}`); } return changefreq; }; @@ -25,16 +25,16 @@ export const validateWebsite = (website: string) => { new URL(website); return website; } catch (error) { - throw new Error('Invalid website URL.'); + throw new Error("Invalid website URL."); } }; // Helper function to validate output path (for example, it checks if the directory exists) export const validateOutput = (output: string) => { - const pathParts = output.split('/'); - const dirPath = pathParts.slice(0, -1).join('/'); + const pathParts = output.split("/"); + const dirPath = pathParts.slice(0, -1).join("/"); if (dirPath && !existsSync(dirPath)) { throw new Error(`The directory ${dirPath} does not exist.`); } return output; -}; \ No newline at end of file +}; From 075bad0998f4831fb3e5857f656b7c6a94930864 Mon Sep 17 00:00:00 2001 From: nj Date: Fri, 18 Oct 2024 23:45:58 +0530 Subject: [PATCH 8/8] Programmatic sitemap generator --- src/sitemaper.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sitemaper.ts b/src/sitemaper.ts index 83257fa..7fe64b7 100644 --- a/src/sitemaper.ts +++ b/src/sitemaper.ts @@ -46,7 +46,7 @@ const buildSitemap = (urls: { url: string; depth: number }[], maxDepth: number, .att("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") .att("xsi:schemaLocation", "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"); - root.com("Generated by Sitemaper, Know more about sitemaper @ https://nayanui.com/sitemaper"); + root.com("Generated by Sitemaper, Know more about sitemaper @ https://www.nayanui.com/devtools/sitemaper"); urls.forEach(({ url, depth }) => { const priority = (maxDepth - depth) / maxDepth; // Calculate priority based on depth (0 to 1)