diff --git a/.gitignore b/.gitignore index fa3e7a8..6dc737b 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ venv # IntelliJ project files .idea *.iml +.run out gen diff --git a/app/src/helpers/helpers.test.ts b/app/src/helpers/helpers.test.ts index 16e775a..6821128 100644 --- a/app/src/helpers/helpers.test.ts +++ b/app/src/helpers/helpers.test.ts @@ -4,8 +4,10 @@ const internalUrl = 'https://medium.com/'; const internalUrlWww = 'https://www.medium.com/'; const sameBaseDomainUrl = 'https://app.medium.com/'; const internalUrlCoUk = 'https://medium.co.uk/'; -const sameBaseDomainUrlCoUk = 'https://app.medium.co.uk/'; const differentBaseDomainUrlCoUk = 'https://other.domain.co.uk/'; +const sameBaseDomainUrlCoUk = 'https://app.medium.co.uk/'; +const sameBaseDomainUrlTidalListen = 'https://listen.tidal.com/'; +const sameBaseDomainUrlTidalLogin = 'https://login.tidal.com/'; const internalUrlSubPath = 'topic/technology'; const externalUrl = 'https://www.wikipedia.org/wiki/Electron'; const wildcardRegex = /.*/; @@ -48,6 +50,16 @@ test('urls on the same "base domain" should be considered internal, even with a ); }); +test('urls on the same "base domain" should be considered internal, even with different sub domains', () => { + expect( + linkIsInternal( + sameBaseDomainUrlTidalListen, + sameBaseDomainUrlTidalLogin, + undefined, + ), + ).toEqual(true); +}); + test('urls on the same "base domain" should be considered internal, long SLD', () => { expect( linkIsInternal(internalUrlCoUk, sameBaseDomainUrlCoUk, undefined), diff --git a/app/src/helpers/helpers.ts b/app/src/helpers/helpers.ts index 7948f60..027a085 100644 --- a/app/src/helpers/helpers.ts +++ b/app/src/helpers/helpers.ts @@ -52,7 +52,9 @@ export function linkIsInternal( if (internalUrlRegex) { const regex = RegExp(internalUrlRegex); - return regex.test(newUrl); + if (regex.test(newUrl)) { + return true; + } } try { @@ -60,13 +62,10 @@ export function linkIsInternal( // 1. app.foo.com and foo.com // 2. www.foo.com and foo.com // 3. www.foo.com and app.foo.com - const currentDomain = new URL(currentUrl).hostname.replace(/^www\./, ''); - const newDomain = new URL(newUrl).hostname.replace(/^www./, ''); - const [longerDomain, shorterDomain] = - currentDomain.length > newDomain.length - ? [currentDomain, newDomain] - : [newDomain, currentDomain]; - return longerDomain.endsWith(shorterDomain); + + // Only use the tld and the main domain for domain-ish test + // Enables domain-ish equality for blog.foo.com and shop.foo.com + return domainify(currentUrl) === domainify(newUrl); } catch (err) { log.error( 'Failed to parse domains as determining if link is internal. From:', @@ -79,6 +78,27 @@ export function linkIsInternal( } } +/** + * Helper to determine domain-ish equality for many cases, the trivial ones + * and the trickier ones, e.g. `blog.foo.com` and `shop.foo.com`, + * in a way that is "good enough", and doesn't need a list of SLDs. + * See chat at https://github.com/nativefier/nativefier/pull/1171#pullrequestreview-649132523 + */ +function domainify(url: string): string { + // So here's what we're doing here: + // Get the hostname from the url + const hostname = new URL(url).hostname; + // Drop the first section if the domain + const domain = hostname.split('.').slice(1).join('.'); + // Check the length, if it's too short, the hostname was probably the domain + // Or if the domain doesn't have a . in it we went too far + if (domain.length < 6 || domain.split('.').length === 0) { + return hostname; + } + // This SHOULD be the domain, but nothing is 100% guaranteed + return domain; +} + export function shouldInjectCss(): boolean { try { return fs.existsSync(INJECT_DIR);