linkIsInternal: consider shop.foo.com and blog.foo.com as internal (still without extra dep or SLD list) (PR #1171)

Domain-ish URL test now also covers considering internal URLs with different sub-domains like
- `https://listen.tidal.com`
- `https://login.tidal.com`

Co-authored-by: Daniel Fuchs <dfuchs@multamedio.de>
Co-authored-by: Adam Weeden <adamweeden@gmail.com>
Co-authored-by: Ronan Jouchet <ronan@jouchet.fr>
This commit is contained in:
RippiN 2021-05-03 19:22:44 +02:00 committed by GitHub
parent d6f0a28a90
commit 067327e863
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 9 deletions

1
.gitignore vendored
View File

@ -49,6 +49,7 @@ venv
# IntelliJ project files
.idea
*.iml
.run
out
gen

View File

@ -4,8 +4,10 @@ const internalUrl = 'https://medium.com/';
const internalUrlWww = 'https://www.medium.com/';
const sameBaseDomainUrl = 'https://app.medium.com/';
const internalUrlCoUk = 'https://medium.co.uk/';
const sameBaseDomainUrlCoUk = 'https://app.medium.co.uk/';
const differentBaseDomainUrlCoUk = 'https://other.domain.co.uk/';
const sameBaseDomainUrlCoUk = 'https://app.medium.co.uk/';
const sameBaseDomainUrlTidalListen = 'https://listen.tidal.com/';
const sameBaseDomainUrlTidalLogin = 'https://login.tidal.com/';
const internalUrlSubPath = 'topic/technology';
const externalUrl = 'https://www.wikipedia.org/wiki/Electron';
const wildcardRegex = /.*/;
@ -48,6 +50,16 @@ test('urls on the same "base domain" should be considered internal, even with a
);
});
test('urls on the same "base domain" should be considered internal, even with different sub domains', () => {
expect(
linkIsInternal(
sameBaseDomainUrlTidalListen,
sameBaseDomainUrlTidalLogin,
undefined,
),
).toEqual(true);
});
test('urls on the same "base domain" should be considered internal, long SLD', () => {
expect(
linkIsInternal(internalUrlCoUk, sameBaseDomainUrlCoUk, undefined),

View File

@ -52,7 +52,9 @@ export function linkIsInternal(
if (internalUrlRegex) {
const regex = RegExp(internalUrlRegex);
return regex.test(newUrl);
if (regex.test(newUrl)) {
return true;
}
}
try {
@ -60,13 +62,10 @@ export function linkIsInternal(
// 1. app.foo.com and foo.com
// 2. www.foo.com and foo.com
// 3. www.foo.com and app.foo.com
const currentDomain = new URL(currentUrl).hostname.replace(/^www\./, '');
const newDomain = new URL(newUrl).hostname.replace(/^www./, '');
const [longerDomain, shorterDomain] =
currentDomain.length > newDomain.length
? [currentDomain, newDomain]
: [newDomain, currentDomain];
return longerDomain.endsWith(shorterDomain);
// Only use the tld and the main domain for domain-ish test
// Enables domain-ish equality for blog.foo.com and shop.foo.com
return domainify(currentUrl) === domainify(newUrl);
} catch (err) {
log.error(
'Failed to parse domains as determining if link is internal. From:',
@ -79,6 +78,27 @@ export function linkIsInternal(
}
}
/**
* Helper to determine domain-ish equality for many cases, the trivial ones
* and the trickier ones, e.g. `blog.foo.com` and `shop.foo.com`,
* in a way that is "good enough", and doesn't need a list of SLDs.
* See chat at https://github.com/nativefier/nativefier/pull/1171#pullrequestreview-649132523
*/
function domainify(url: string): string {
// So here's what we're doing here:
// Get the hostname from the url
const hostname = new URL(url).hostname;
// Drop the first section if the domain
const domain = hostname.split('.').slice(1).join('.');
// Check the length, if it's too short, the hostname was probably the domain
// Or if the domain doesn't have a . in it we went too far
if (domain.length < 6 || domain.split('.').length === 0) {
return hostname;
}
// This SHOULD be the domain, but nothing is 100% guaranteed
return domain;
}
export function shouldInjectCss(): boolean {
try {
return fs.existsSync(INJECT_DIR);