Last active
November 14, 2025 03:48
-
-
Save annibal/fb98149eee5655f57d1ebba7d03b7993 to your computer and use it in GitHub Desktop.
Examples of URL - both Valid and Invalid, plus some more info about URL validation. Someday i'll build the perfect URL validation, then from it, the amazing "makeValidThisURL()"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "rules": { | |
| "domain": [ | |
| ".com .net .org and .info domain names cannot exceed 67 characters", | |
| ".info and .biz must have at least 3 characters not including .info and .biz", | |
| "Other domain names cannot exceed 22 characters not including the extension", | |
| "2 letter domains are not allowed." | |
| ], | |
| "edge-cases": [ | |
| "'http://url.com/path///////otherpath' is technically valid per RFC 3986 and 9110. Most servers normalize or collapse them ('/path///////otherpath' and '/path/otherpath' yield the same resource), others don't (several empty segments (\"\") between 'path' and 'otherpath').", | |
| "'http://url.com/path' and 'http://url.com/path/' are distinct URIs. One refers to a resource literally named 'path', the other to a resource inside a collection named 'path/'. Most web servers issue a redirect (301 → /path/). Relative link resolution differs: '../' from '/path/' climbs to '/', but from '/path' it depends on whether the client normalized it first.", | |
| "https://例子.公司 → this is valid per RFC 3986 (Unicode allowed in IRIs), but invalid if you restrict to ASCII URLs." | |
| ] | |
| }, | |
| "reference": [ | |
| "https://publicsuffix.org/list/public_suffix_list.dat", | |
| "https://mathiasbynens.be/demo/url-regex", | |
| "https://gist.github.com/j3j5/8336b0224167636bed462950400ff2df", | |
| "https://gist.github.com/GuillaumeLestringant/36c11afcc35c8c5b9123", | |
| "https://url.spec.whatwg.org/", | |
| "http://www.ietf.org/rfc/rfc2732.txt", | |
| "https://news.ycombinator.com/item?id=7928968" | |
| ], | |
| "urlExamples": { | |
| "valid": [ | |
| { "url": "http://[1080:0:0:0:8:800:200C:417A]/index.html", | |
| "desc": "some IP and Hex/Decimal/Octal bullshit" | |
| }, | |
| { "url": "http://0xadc229b7", | |
| "desc": "some IP and Hex/Decimal/Octal bullshit" | |
| }, | |
| { "url": "http://2915183031", | |
| "desc": "some IP and Hex/Decimal/Octal bullshit" | |
| }, | |
| { "url": "http://025560424667.", | |
| "desc": "some IP and Hex/Decimal/Octal bullshit" | |
| }, | |
| { "url": "http://www.hotmail.co", | |
| "desc": "real live example" | |
| }, | |
| { "url": "https://www.eltiempo.com", | |
| "desc": "real live example" | |
| }, | |
| { "url": "https://www.nytimes.com", | |
| "desc": "real live example" | |
| }, | |
| { "url": "https://www.wikipedia.org", | |
| "desc": "real live example" | |
| }, | |
| { "url": "http://example.com", | |
| "desc": "basic example of an URL" | |
| }, | |
| { "url": "https://api.example.io/v2/users/123/profile", | |
| "desc": "common url found in the interwebs" | |
| }, | |
| { "url": "https://store.example.com/products?id=42&sort=price", | |
| "desc": "another common type of url" | |
| }, | |
| { "url": "http://example.com:0/", | |
| "desc": "RFC 3986 allows port zero. Some old APIs interpret it as 'use default port', though no TCP listener ever binds to it." | |
| }, | |
| { "url": "https://www.example.org/index.html", | |
| "desc": "basic with resource" | |
| }, | |
| { "url": "https://sub.domain.example.co.uk/path/to/resource", | |
| "desc": "basic with path" | |
| }, | |
| { "url": "https://example.net:8080/test?q=1&r=2", | |
| "desc": "basic with port and search" | |
| }, | |
| { "url": "https://192.168.0.12/admin", | |
| "desc": "ip instead of domain with single path" | |
| }, | |
| { "url": "https://user:pass@example.com/login", | |
| "desc": "prefixed with authentication" | |
| }, | |
| { "url": "https://example.com?query=param", | |
| "desc": "basic with search param" | |
| }, | |
| { "url": "https://example.com/path#section2", | |
| "desc": "basic with hash" | |
| }, | |
| { "url": "https://例子.公司.cn/关于", | |
| "desc": "internationalized domain name (IDN)" | |
| }, | |
| { "url": "http://example.com:0/", | |
| "desc": "port 0 (edge case)" | |
| }, | |
| { "url": "https://xn--fsq.com/…", | |
| "desc": "IDN + weird char" | |
| }, | |
| { "url": "ftp://files.server.net/downloads/file.zip", | |
| "desc": "non-HTTP scheme" | |
| }, | |
| { "url": "https://例子.公司", | |
| "desc": "Unicode host (if restricting to punycode/ASCII)" | |
| }, | |
| { "url": "http://api.safebox.360.cn/Interface/getToolsDataApi/guid/{B7800CD1-75EE-41D3-9CFC-7E9B051B84AD}", | |
| "desc": "check next url" | |
| }, | |
| { "url": "https://gist.github.com/dperini/729294?permalink_comment_id=3905413#gistcomment-3905413", | |
| "desc": "long live internet discussions" | |
| }, | |
| { "url": "https://www.samsung.", | |
| "desc": "took from juanluisbaptiste's url-validator repository (commit 'c0903add6d0fa85fb7dd42b8cfaa1c2da4889d77', file '/test_files/urls.txt') in github" | |
| }, | |
| { "url": "https:www.ford.com", | |
| "desc": "took from juanluisbaptiste's url-validator repository (commit 'c0903add6d0fa85fb7dd42b8cfaa1c2da4889d77', file '/test_files/urls.txt') in github" | |
| }, | |
| { "url": "www.amazon.com", | |
| "desc": "took from juanluisbaptiste's url-validator repository (commit 'c0903add6d0fa85fb7dd42b8cfaa1c2da4889d77', file '/test_files/urls.txt') in github" | |
| }, | |
| { "url": "www.apple.com", | |
| "desc": "took from juanluisbaptiste's url-validator repository (commit 'c0903add6d0fa85fb7dd42b8cfaa1c2da4889d77', file '/test_files/urls.txt') in github" | |
| }, | |
| { "json-should-allow": "trailing-commas" } | |
| ], | |
| "wrong": [ | |
| { "url": "http://- -", | |
| "desc": "nonsense host with spaces/hyphen" | |
| }, | |
| { "url": "example.com", | |
| "desc": "missing scheme" | |
| }, | |
| { "url": "http:///example.com", | |
| "desc": "too many slashes" | |
| }, | |
| { "url": "http:// example.com", | |
| "desc": "space after double /." | |
| }, | |
| { "url": "http://exa mple.com", | |
| "desc": "space in host" | |
| }, | |
| { "url": "http://-example.com", | |
| "desc": "leading hyphen in domain label" | |
| }, | |
| { "url": "http://example..com", | |
| "desc": "empty label (double dot)" | |
| }, | |
| { "url": "http://.com", | |
| "desc": "only TLD" | |
| }, | |
| { "url": "http://.example.com", | |
| "desc": "leading dot" | |
| }, | |
| { "url": "http://example.com:99999/", | |
| "desc": "port > 65535" | |
| }, | |
| { "url": "http://example.com:port/", | |
| "desc": "non-numeric port" | |
| }, | |
| { "url": "http://user@:80/", | |
| "desc": "userinfo but no host" | |
| }, | |
| { "url": "http://?q=1", | |
| "desc": "no host, only query" | |
| }, | |
| { "url": "http://[::1", | |
| "desc": "unclosed IPv6 bracket" | |
| }, | |
| { "url": "http://[::1]/path]", | |
| "desc": "stray closing bracket" | |
| }, | |
| { "url": "http://192.168.1.999/", | |
| "desc": "invalid IPv4 octet" | |
| }, | |
| { "url": "http://256.256.256.256", | |
| "desc": "invalid IPv4" | |
| }, | |
| { "url": "http://exa_mple.com", | |
| "desc": "underscore in domain label" | |
| }, | |
| { "url": "http://example.com/space in/path", | |
| "desc": "unescaped space in path" | |
| }, | |
| { "url": "https://%GG.com", | |
| "desc": "invalid percent-encoding" | |
| }, | |
| { "url": "ftp//example.com", | |
| "desc": "missing colon after scheme" | |
| }, | |
| { "url": ": //example.com", | |
| "desc": "missing scheme (just punctuation & spaces)" | |
| }, | |
| { "url": "http:///", | |
| "desc": "scheme + slashes only" | |
| }, | |
| { "url": "data:text/plain,Hello world", | |
| "desc": "non-hierarchical scheme (if you expect http/https)" | |
| }, | |
| { "url": "mailto:user@@example.com", | |
| "desc": "double @ in mailto" | |
| }, | |
| { "url": "http://example.com // comment wtf", | |
| "desc": "plain text after" | |
| }, | |
| { "url": "Use only http://example.com", | |
| "desc": "plain text before" | |
| }, | |
| { "url": "the http://example.com of url", | |
| "desc": "plain text around" | |
| }, | |
| { "url": "#http://example.com", | |
| "desc": "shell comment" | |
| }, | |
| { "url": "'http://example.com'", | |
| "desc": "single string" | |
| }, | |
| { "url": "\"http://example.com\"", | |
| "desc": "double string" | |
| }, | |
| { "url": "`http://example.com`", | |
| "desc": "backtick string" | |
| }, | |
| { "url": "[http://example.com]", | |
| "desc": "array element" | |
| }, | |
| { "url": "http://example..com./", | |
| "desc": "double dot plus trailing dot is technically valid, but this is ugly and should not exist." | |
| }, | |
| { "json-should-allow": "trailing-commas" } | |
| ], | |
| "json-should-allow": "trailing-commas" | |
| }, | |
| "json-should-allow": "trailing-commas" | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment