const test = require('ava');
const urlRegex = require('..');
const fixtures = [
"http://-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
'//223.255.255.254',
'//a.b-c.de',
'//foo.ws',
'//localhost:8080',
'//userid:password@example.com',
'//➡.ws/䨹',
'ftp://foo.bar/baz',
'http://1337.net',
'http://142.42.1.1/',
'http://142.42.1.1:8080/',
'http://223.255.255.254',
'http://a.b-c.de',
'http://a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com',
'http://a_b.z.com',
'http://code.google.com/events/#&product=browser',
'http://example.com#foo',
'http://example.com.',
'http://example.com?foo=bar',
'http://foo.bar/?q=Test%20URL-encoded%20stuff',
'http://foo.com/(something)?after=parens',
'http://foo.com/blah_(wikipedia)#cite-1',
'http://foo.com/blah_(wikipedia)_blah#cite-1',
'http://foo.com/blah_blah',
'http://foo.com/blah_blah/',
'http://foo.com/blah_blah_(wikipedia)',
'http://foo.com/blah_blah_(wikipedia)_(again)',
'http://foo.com/unicode_(✪)_in_parens',
'http://j.mp',
'http://localhost/',
'http://mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
'http://userid:password@example.com',
'http://userid:password@example.com/',
'http://userid:password@example.com:8080',
'http://userid:password@example.com:8080/',
'http://userid@example.com',
'http://userid@example.com/',
'http://userid@example.com:8080',
'http://userid@example.com:8080/',
'http://www.example.com/wpstyle/?p=364',
'http://www.microsoft.xn--comindex-g03d.html.irongeek.com',
'http://⌘.ws',
'http://⌘.ws/',
'http://☺.damowmow.com/',
'http://✪df.ws/123',
'http://➡.ws/䨹',
'https://www.example.com/foo/?bar=baz&inga=42&quux',
'ws://223.255.255.254',
'ws://a.b-c.de',
'ws://foo.ws',
'ws://localhost:8080',
'ws://userid:password@example.com',
'ws://➡.ws/䨹',
'www.google.com/unicorn'
];
for (const x of fixtures) {
test(`match exact URLs: ${x}`, (t) => {
t.true(
urlRegex({
exact: true,
auth: true,
parens: true,
trailingPeriod: true
}).test(x)
);
});
}
for (const x of [
'http://مثال.إختبار',
'http://उदाहरण.परीक्षा',
'http://例子.测试'
]) {
test(`match exact URLs with strict set to true: ${x}`, (t) => {
t.true(
urlRegex({ exact: true, strict: true, auth: true, parens: true }).test(x)
);
});
}
test('match URLs in text', (t) => {
const fixture = `
Foo //bar.net/?q=Query with spaces
Lorem ipsum //dolor.sit
example.com
with path
[and another](https://another.example.com) and
`;
t.deepEqual(fixture.match(urlRegex({ strict: true })), [
'//bar.net/?q=Query',
'//dolor.sit',
'http://example.com',
'http://example.com/with-path',
'https://another.example.com'
]);
});
for (const x of [
'http://',
'http://.',
'http://..',
'http://../',
'http://?',
'http://??',
'http://??/',
'http://#',
'http://##',
'http://##/',
'http://foo.bar?q=Spaces should be encoded',
'//',
'//a',
'///a',
'///',
'http:///a',
'rdar://1234',
'h://test',
'http:// shouldfail.com',
':// should fail',
'http://foo.bar/foo(bar)baz quux',
'http://-error-.invalid/',
'http://-a.b.co',
'http://a.b-.co',
'http://123.123.123',
'http://3628126748',
'http://.www.foo.bar/',
'http://.www.foo.bar./',
'http://go/ogle.com',
'http://foo.bar/ /',
'http://a.b_z.com',
'http://ab_.z.com',
'http://google\\.com',
'http://www(google.com',
'http://www.example.xn--overly-long-punycode-test-string-test-tests-123-test-test123/',
'http://www=google.com',
'https://www.g.com/error\n/bleh/bleh',
'/foo.bar/',
'///www.foo.bar./'
]) {
test(`do not match URLs: ${x}`, (t) => {
t.false(urlRegex({ exact: true }).test(x));
});
}
test('do not match URLs: foo.com', (t) => {
t.false(urlRegex({ exact: true, strict: true }).test('foo.com'));
});
for (const x of [
"-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
'//223.255.255.254',
'//a.b-c.de',
'//foo.ws',
'//localhost:8080',
'//userid:password@example.com',
'//➡.ws/䨹',
'1337.net',
'142.42.1.1/',
'142.42.1.1:8080/',
'223.255.255.254',
'a.b-c.de',
'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com',
'code.google.com/events/#&product=browser',
'example.com#foo',
'example.com',
'example.com.',
'example.com?foo=bar',
'foo.bar/?q=Test%20URL-encoded%20stuff',
'foo.bar/baz',
'foo.com/(something)?after=parens',
'foo.com/blah_(wikipedia)#cite-1',
'foo.com/blah_(wikipedia)_blah#cite-1',
'foo.com/blah_blah',
'foo.com/blah_blah/',
'foo.com/blah_blah_(wikipedia)',
'foo.com/blah_blah_(wikipedia)_(again)',
'foo.com/unicode_(✪)_in_parens',
'foo.ws',
'google.com',
'j.mp',
'localhost/',
'localhost:8080',
'mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
'userid:password@example.com',
'userid:password@example.com/',
'userid:password@example.com:8080',
'userid:password@example.com:8080/',
'userid@example.com',
'userid@example.com/',
'userid@example.com:8080',
'userid@example.com:8080/',
'www.example.com/foo/?bar=baz&inga=42&quux',
'www.example.com/wpstyle/?p=364',
'www.google.com/unicorn',
'www.microsoft.xn--comindex-g03d.html.irongeek.com',
'⌘.ws',
'⌘.ws/',
'☺.damowmow.com/',
'✪df.ws/123',
'➡.ws/䨹'
]) {
test(`match using list of TLDs: ${x}`, (t) => {
t.true(
urlRegex({
exact: true,
auth: true,
parens: true,
trailingPeriod: true
}).test(x)
);
});
}
test('opt out of matching basic auth', (t) => {
const strictFixturesWithAuth = [
"http://-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
'http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
'http://userid:password@example.com',
'http://userid:password@example.com/with/path',
'http://userid:password@example.com:8080',
'http://userid:password@example.com:8080/path',
'http://userid@example.com',
'http://userid@example.com/with/path',
'http://userid@localhost:8080',
'http://userid@localhost:8080/path'
];
for (const x of strictFixturesWithAuth) {
// With protocol
t.false(urlRegex({ exact: true, strict: true, auth: false }).test(x));
// Relative protocol
t.false(urlRegex({ exact: true, auth: false }).test(x.replace('http', '')));
// No protocol
t.false(
urlRegex({ exact: true, auth: false }).test(x.replace('http://', ''))
);
}
const textFixture = `
Lorem ipsum http://userid:password@example.com:8080 dolor sit
example.com
another //userid:password@example.com one
bites //userid:password@example.com/with/path the dust
and http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body another one
and another one
and another one gone
and another userid@example.com one gone
another http://userid@example.com/ one
bites http://userid@localhost:8080 the
dust http://userid@localhost:8080/path
`;
// Strict matches none because auth always breaks the url
t.is(
textFixture.match(urlRegex({ exact: false, strict: true, auth: false })),
null
);
// Non-strict will only match domain:port/path as auth separates the protocol
const textFixtureMatches = [
'example.com:8080',
'example.com:8080/',
'example.com',
'example.com',
'example.com/with/path',
'example.com:123/one/two.three?q1=a1&q2=a2#body',
'example.com:123/one/two.three?q1=a1&q2=a2#body',
'example.com',
'example.com',
'example.com/',
'localhost:8080',
'localhost:8080/path'
];
// With protocol
t.deepEqual(
textFixtureMatches,
textFixture.match(urlRegex({ exact: false, auth: false }))
);
// Relative protocol
t.deepEqual(
textFixtureMatches,
textFixture.replace('http:', '').match(urlRegex())
);
// No protocol
t.deepEqual(
textFixtureMatches,
textFixture.replace('http://', '').match(urlRegex())
);
});
test('match using explicit list of TLDs', (t) => {
const fixtures = [
"-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
"-.~_!$&'()*+';=:%40:80%2f::::::@example.onion",
'//223.255.255.254',
'//a.b-c.de',
'//foo.ws',
'//localhost:8080',
'//userid:password@example.com',
'//➡.onion/䨹',
'//➡.ws/䨹',
'1337.net',
'142.42.1.1/',
'142.42.1.1:8080/',
'223.255.255.254',
'a.b-c.de',
'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com',
'code.google.com/events/#&product=browser',
'example.com#foo',
'example.com.',
'example.com?foo=bar',
'example.onion',
'foo.bar/?q=Test%20URL-encoded%20stuff',
'foo.bar/baz',
'foo.com/(something)?after=parens',
'foo.com/blah_(wikipedia)#cite-1',
'foo.com/blah_(wikipedia)_blah#cite-1',
'foo.com/blah_blah',
'foo.com/blah_blah/',
'foo.com/blah_blah_(wikipedia)',
'foo.com/blah_blah_(wikipedia)_(again)',
'foo.com/unicode_(✪)_in_parens',
'foo.ws',
'j.mp',
'localhost/',
'localhost:8080',
'mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'mw1.unicorn.education/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'unicorn.education',
'user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
'userid:password@example.com',
'userid:password@example.com/',
'userid:password@example.com:8080',
'userid:password@example.com:8080/',
'userid:password@example.education',
'userid@example.com',
'userid@example.com/',
'userid@example.com:8080',
'userid@example.com:8080/',
'www.example.com/foo/?bar=baz&inga=42&quux',
'www.example.com/wpstyle/?p=364',
'www.example.onion/wpstyle/?p=364',
'www.google.com/unicorn',
'www.microsoft.xn--comindex-g03d.html.irongeek.com',
'⌘.ws',
'⌘.ws/',
'☺.damowmow.com/',
'✪df.ws/123',
'➡.ws/䨹'
];
for (const x of fixtures) {
t.true(
urlRegex({
exact: true,
auth: true,
parens: true,
tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar', 'onion', 'education'],
trailingPeriod: true
}).test(x)
);
}
});
test('fail if not in explicit list of TLDs', (t) => {
const fixtures = [
"-.~_!$&'()*+';=:%40:80%2f::::::@example.biz",
'//a.b-c.uk',
'//foo.uk',
'//userid:password@example.biz',
'//➡.cn/䨹',
'1337.biz',
'a.b-c.cn',
'a.b-c.ly',
'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.biz',
'code.google.biz/events/#&product=browser',
'example.biz#foo',
'example.biz.',
'example.biz?foo=bar',
'foo.baz/?q=Test%20URL-encoded%20stuff',
'foo.baz/baz',
'foo.baz/blah_blah',
'foo.biz/(something)?after=parens',
'foo.biz/blah_(wikipedia)#cite-1',
'foo.biz/blah_(wikipedia)_blah#cite-1',
'foo.biz/blah_blah_(wikipedia)',
'foo.biz/unicode_(✪)_in_parens',
'foo.co.uk/blah_blah/',
'foo.jp',
'foo.onion/blah_blah_(wikipedia)_(again)',
'j.onion',
'mw1.google.biz/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'user:pass@example.biz:123/one/two.three?q1=a1&q2=a2#body',
'userid:password@example.biz',
'userid:password@example.biz/',
'userid:password@example.biz:8080',
'userid:password@example.biz:8080/',
'userid@example.biz',
'userid@example.biz/',
'userid@example.biz:8080',
'userid@example.biz:8080/',
'www.example.biz/foo/?bar=baz&inga=42&quux',
'www.example.education/wpstyle/?p=364',
'www.google.biz/unicorn',
'www.microsoft.xn--comindex-g03d.html.irongeek.biz',
'⌘.onion',
'⌘.onion/',
'☺.damowmow.biz/',
'✪df.onion/123',
'➡.onion/䨹',
'➡.uk/䨹'
];
for (const x of fixtures) {
t.false(
urlRegex({
exact: true,
auth: true,
parens: true,
tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']
}).test(x)
);
}
});
test('do not match URLs with non-strict mode', (t) => {
t.false(
urlRegex({ exact: true, auth: true, parens: true }).test(
'018137.113.215.4074.138.129.172220.179.206.94180.213.144.175250.45.147.1364868726sgdm6nohQ'
)
);
});
test('IPv4', (t) => {
t.true(urlRegex().test('1.1.1.1'));
t.false(urlRegex({ ipv4: false }).test('1.1.1.1'));
});
test('IPv6', (t) => {
t.true(urlRegex().test('2606:4700:4700::1111'));
t.false(urlRegex({ ipv6: false }).test('2606:4700:4700::1111'));
});
test('parses similar to Gmail by default', (t) => {
t.deepEqual(
"foo@bar.com [foo]@bar.com foo bar @foob.com 'text@example.com, some text'".match(
urlRegex()
),
['bar.com', 'bar.com', 'foob.com', 'example.com']
);
});
test('apostrophes', (t) => {
t.deepEqual(
"background: url('http://example.com/pic.jpg');".match(urlRegex()),
['http://example.com/pic.jpg']
);
t.deepEqual(
"background: url('http://example.com/pic.jpg');".match(
urlRegex({ apostrophes: true })
),
["http://example.com/pic.jpg'"]
);
t.deepEqual(
"background: url('http://example.com/pic.jpg');".match(
urlRegex({ parens: true, apostrophes: true })
),
["http://example.com/pic.jpg');"]
);
});
test('returns string', (t) => {
t.true(typeof urlRegex({ returnString: true }) === 'string');
});
test('localhost', (t) => {
t.deepEqual(
"background: url('http://localhost/pic.jpg');".match(
urlRegex({ localhost: true })
),
['http://localhost/pic.jpg']
);
t.deepEqual(
"background: url('http://localhost/pic.jpg');".match(
urlRegex({ localhost: false })
),
['pic.jp']
);
});
for (const [source, withTrailingPeriod, withoutTrailingPeriod] of [
[
'background example.com. foobar.com',
['example.com.', 'foobar.com'],
['example.com', 'foobar.com']
],
[
'https://example.com/dir.',
['https://example.com/dir.'],
['https://example.com/dir']
],
[
'https://example.com/dir. ',
['https://example.com/dir.'],
['https://example.com/dir']
],
[
'https://example.com/dir.\n',
['https://example.com/dir.'],
['https://example.com/dir']
],
[
'https://example.com/index.html',
['https://example.com/index.html'],
['https://example.com/index.html']
],
[
'https://example.com/index.html.',
['https://example.com/index.html.'],
['https://example.com/index.html']
],
[
'https://example.com/dir.with.dot/.',
['https://example.com/dir.with.dot/.'],
['https://example.com/dir.with.dot/']
],
// Question marks
['Have you ever visited example.com?', ['example.com?'], ['example.com']],
['example.com/?', ['example.com/?'], ['example.com/']],
[
'https://example.com/dir?',
['https://example.com/dir?'],
['https://example.com/dir']
],
// Exclamation marks
['You should check out example.com!', ['example.com'], ['example.com']],
['Here is example.com/!', ['example.com/!'], ['example.com/']],
[
'https://example.com/dir/!',
['https://example.com/dir/!'],
['https://example.com/dir/']
],
[
'https://example.com/dir!',
['https://example.com/dir!'],
['https://example.com/dir']
]
]) {
const sourceTitle = source.replace('\n', '\\n');
test(`trailingPeriod: true (${sourceTitle})`, (t) => {
t.deepEqual(
source.match(urlRegex({ trailingPeriod: true })),
withTrailingPeriod
);
});
test(`trailingPeriod: false (${sourceTitle})`, (t) => {
t.deepEqual(
source.match(urlRegex({ trailingPeriod: false })),
withoutTrailingPeriod
);
});
}