PHPXRef 0.7.1 : phpBB-3.2.11-deutsch : /vendor/s9e/text-formatter/src/Parser/AttributeFilters/UrlFilter.js source

[Summary view] [Print] [Text view]
   1  /** @const */
   2  var UrlFilter =
   3  {
   4      /**
   5      * @param  {*} attrValue
   6      * @param  {!Object} urlConfig
   7      * @param  {Logger} logger
   8      * @return {*}
   9      */
  10      filter: function(attrValue, urlConfig, logger)
  11      {
  12          /**
  13          * Trim the URL to conform with HTML5 then parse it
  14          * @link http://dev.w3.org/html5/spec/links.html#attr-hyperlink-href
  15          */
  16          var p = UrlFilter.parseUrl(attrValue.replace(/^\s+/, '').replace(/\s+$/, ''));
  17  
  18          var error = UrlFilter.validateUrl(urlConfig, p);
  19          if (error)
  20          {
  21              if (logger)
  22              {
  23                  p['attrValue'] = attrValue;
  24                  logger.err(error, p);
  25              }
  26  
  27              return false;
  28          }
  29  
  30          return UrlFilter.rebuildUrl(urlConfig, p);
  31      },
  32  
  33      /**
  34      * Parse a URL and return its components
  35      *
  36      * Similar to PHP's own parse_url() except that all parts are always returned
  37      *
  38      * @param  {!string} url Original URL
  39      * @return {!Object}
  40      */
  41      parseUrl: function(url)
  42      {
  43          var regexp = /^(?:([a-z][-+.\w]*):)?(?:\/\/(?:([^:\/?#]*)(?::([^\/?#]*)?)?@)?(?:(\[[a-f\d:]+\]|[^:\/?#]+)(?::(\d*))?)?(?![^\/?#]))?([^?#]*)(\?[^#]*)?(#.*)?$/i;
  44  
  45          // NOTE: this regexp always matches because of the last three captures
  46          var m = regexp['exec'](url),
  47              parts = {},
  48              tokens = ['scheme', 'user', 'pass', 'host', 'port', 'path', 'query', 'fragment'];
  49          tokens.forEach(
  50              function(name, i)
  51              {
  52                  parts[name] = (m[i + 1] > '') ? m[i + 1] : '';
  53              }
  54          );
  55  
  56          /**
  57          * @link http://tools.ietf.org/html/rfc3986#section-3.1
  58          *
  59          * 'An implementation should accept uppercase letters as equivalent to lowercase in
  60          * scheme names (e.g., allow "HTTP" as well as "http") for the sake of robustness but
  61          * should only produce lowercase scheme names for consistency.'
  62          */
  63          parts['scheme'] = parts['scheme'].toLowerCase();
  64  
  65          /**
  66          * Normalize the domain label separators and remove trailing dots
  67          * @link http://url.spec.whatwg.org/#domain-label-separators
  68          */
  69          parts['host'] = parts['host'].replace(/[\u3002\uff0e\uff61]/g, '.').replace(/\.+$/g, '');
  70  
  71          // Test whether host has non-ASCII characters and punycode it if possible
  72          if (/[^\x00-\x7F]/.test(parts['host']) && typeof punycode !== 'undefined')
  73          {
  74              parts['host'] = punycode.toASCII(parts['host']);
  75          }
  76  
  77          return parts;
  78      },
  79  
  80      /**
  81      * Rebuild a parsed URL
  82      *
  83      * @param  {!Object} urlConfig
  84      * @param  {!Object} p
  85      * @return {!string}
  86      */
  87      rebuildUrl: function(urlConfig, p)
  88      {
  89          var url = '';
  90          if (p['scheme'] !== '')
  91          {
  92              url += p['scheme'] + ':';
  93          }
  94          if (p['host'] === '')
  95          {
  96              // Allow the file: scheme to not have a host and ensure it starts with slashes
  97              if (p['scheme'] === 'file')
  98              {
  99                  url += '//';
 100              }
 101          }
 102          else
 103          {
 104              url += '//';
 105  
 106              // Add the credentials if applicable
 107              if (p['user'] !== '')
 108              {
 109                  // Reencode the credentials in case there are invalid chars in them, or suspicious
 110                  // characters such as : or @ that could confuse a browser into connecting to the
 111                  // wrong host (or at least, to a host that is different than the one we thought)
 112                  url += rawurlencode(decodeURIComponent(p['user']));
 113  
 114                  if (p['pass'] !== '')
 115                  {
 116                      url += ':' + rawurlencode(decodeURIComponent(p['pass']));
 117                  }
 118  
 119                  url += '@';
 120              }
 121  
 122              url += p['host'];
 123  
 124              // Append the port number (note that as per the regexp it can only contain digits)
 125              if (p['port'] !== '')
 126              {
 127                  url += ':' + p['port'];
 128              }
 129          }
 130  
 131          // Build the path, including the query and fragment parts
 132          var path = p['path'] + p['query'] + p['fragment'];
 133  
 134          /**
 135          * "For consistency, URI producers and normalizers should use uppercase hexadecimal digits
 136          * for all percent- encodings."
 137          *
 138          * @link http://tools.ietf.org/html/rfc3986#section-2.1
 139          */
 140          path = path.replace(
 141              /%.?[a-f]/g,
 142              function (str)
 143              {
 144                  return str.toUpperCase();
 145              },
 146              path
 147          );
 148  
 149          // Append the sanitized path to the URL
 150          url += UrlFilter.sanitizeUrl(path);
 151  
 152          // Replace the first colon if there's no scheme and it could potentially be interpreted as
 153          // the scheme separator
 154          if (!p['scheme'])
 155          {
 156              url = url.replace(/^([^\/]*):/, '$1%3A');
 157          }
 158  
 159          return url;
 160      },
 161  
 162      /**
 163      * Sanitize a URL for safe use regardless of context
 164      *
 165      * This method URL-encodes some sensitive characters in case someone would want to use the URL in
 166      * some JavaScript thingy, or in CSS. We also encode characters that are not allowed in the path
 167      * of a URL as defined in RFC 3986 appendix A, including percent signs that are not immediately
 168      * followed by two hex digits.
 169      *
 170      * " and ' to prevent breaking out of quotes (JavaScript or otherwise)
 171      * ( and ) to prevent the use of functions in JavaScript (eval()) or CSS (expression())
 172      * < and > to prevent breaking out of <script>
 173      * \r and \n because they're illegal in JavaScript
 174      * [ and ] because the W3 validator rejects them and they "should" be escaped as per RFC 3986
 175      * Non-ASCII characters as per RFC 3986
 176      * Control codes and spaces, as per RFC 3986
 177      *
 178      * @link http://sla.ckers.org/forum/read.php?2,51478
 179      * @link http://timelessrepo.com/json-isnt-a-javascript-subset
 180      * @link http://www.ietf.org/rfc/rfc3986.txt
 181      * @link http://stackoverflow.com/a/1547922
 182      * @link http://tools.ietf.org/html/rfc3986#appendix-A
 183      *
 184      * @param  {!string} url Original URL
 185      * @return {!string}     Sanitized URL
 186      */
 187      sanitizeUrl: function(url)
 188      {
 189          return url.replace(/[^\u0020-\u007E]+/g, encodeURIComponent).replace(/%(?![0-9A-Fa-f]{2})|[^!#-&*-;=?-Z_a-z~]/g, escape);
 190      },
 191  
 192      /**
 193      * Validate a parsed URL
 194      *
 195      * @param  {!Object} urlConfig
 196      * @param  {!Object} p
 197      * @return {string|undefined}
 198      */
 199      validateUrl: function(urlConfig, p)
 200      {
 201          if (p['scheme'] !== '' && !urlConfig.allowedSchemes.test(p['scheme']))
 202          {
 203              return 'URL scheme is not allowed';
 204          }
 205  
 206          if (p['host'] === '')
 207          {
 208              // Reject malformed URLs such as http:///example.org but allow schemeless paths
 209              if (p['scheme'] !== 'file' && p['scheme'] !== '')
 210              {
 211                  return 'Missing host';
 212              }
 213          }
 214          else
 215          {
 216              /**
 217              * Test whether the host is valid
 218              * @link http://tools.ietf.org/html/rfc1035#section-2.3.1
 219              * @link http://tools.ietf.org/html/rfc1123#section-2
 220              */
 221              var regexp = /^(?!-)[-a-z0-9]{0,62}[a-z0-9](?:\.(?!-)[-a-z0-9]{0,62}[a-z0-9])*$/i;
 222              if (!regexp.test(p['host']))
 223              {
 224                  // If the host invalid, retest as an IPv4 and IPv6 address (IPv6 in brackets)
 225                  if (!NetworkFilter.filterIpv4(p['host'])
 226                   && !NetworkFilter.filterIpv6(p['host'].replace(/^\[(.*)\]$/, '$1', p['host'])))
 227                  {
 228                      return 'URL host is invalid';
 229                  }
 230              }
 231  
 232              if ((urlConfig.disallowedHosts && urlConfig.disallowedHosts.test(p['host']))
 233               || (urlConfig.restrictedHosts && !urlConfig.restrictedHosts.test(p['host'])))
 234              {
 235                  return 'URL host is not allowed';
 236              }
 237          }
 238      }
 239  };
PHP Cross Reference of phpBB-3.2.11-deutsch

/vendor/s9e/text-formatter/src/Parser/AttributeFilters/ -> UrlFilter.js (source)