PHPXRef 0.7.1 : phpBB-3.3.14-deutsch : /vendor/s9e/text-formatter/src/Parser/AttributeFilters/UrlFilter.js source

[Summary view] [Print] [Text view]
   1  /** @const */
   2  var UrlFilter =
   3  {
   4      /**
   5      * @param  {*}        attrValue
   6      * @param  {!Object}  urlConfig
   7      * @param  {?Logger=} logger
   8      * @return {*}
   9      */
  10      filter: function(attrValue, urlConfig, logger)
  11      {
  12          /**
  13          * Trim the URL to conform with HTML5 then parse it
  14          * @link http://dev.w3.org/html5/spec/links.html#attr-hyperlink-href
  15          */
  16          var p = UrlFilter.parseUrl(attrValue.replace(/^\s+/, '').replace(/\s+$/, ''));
  17  
  18          var error = UrlFilter.validateUrl(urlConfig, p);
  19          if (error)
  20          {
  21              if (logger)
  22              {
  23                  p['attrValue'] = attrValue;
  24                  logger.err(error, p);
  25              }
  26  
  27              return false;
  28          }
  29  
  30          return UrlFilter.rebuildUrl(urlConfig, p);
  31      },
  32  
  33      /**
  34      * Parse a URL and return its components
  35      *
  36      * Similar to PHP's own parse_url() except that all parts are always returned
  37      *
  38      * @param  {string} url Original URL
  39      * @return {!Object}
  40      */
  41      parseUrl: function(url)
  42      {
  43          var regexp = /^(?:([a-z][-+.\w]*):)?(?:\/\/(?:([^:\/?#]*)(?::([^\/?#]*)?)?@)?(?:(\[[a-f\d:]+\]|[^:\/?#]+)(?::(\d*))?)?(?![^\/?#]))?([^?#]*)(\?[^#]*)?(#.*)?$/i;
  44  
  45          // NOTE: this regexp always matches because of the last three captures
  46          var m = regexp['exec'](url),
  47              parts = {},
  48              tokens = ['scheme', 'user', 'pass', 'host', 'port', 'path', 'query', 'fragment'];
  49          tokens.forEach(
  50              function(name, i)
  51              {
  52                  parts[name] = (m[i + 1] > '') ? m[i + 1] : '';
  53              }
  54          );
  55  
  56          /**
  57          * @link http://tools.ietf.org/html/rfc3986#section-3.1
  58          *
  59          * 'An implementation should accept uppercase letters as equivalent to lowercase in
  60          * scheme names (e.g., allow "HTTP" as well as "http") for the sake of robustness but
  61          * should only produce lowercase scheme names for consistency.'
  62          */
  63          parts['scheme'] = parts['scheme'].toLowerCase();
  64  
  65          /**
  66          * Normalize the domain label separators and remove trailing dots
  67          * @link http://url.spec.whatwg.org/#domain-label-separators
  68          */
  69          parts['host'] = parts['host'].replace(/[\u3002\uff0e\uff61]/g, '.').replace(/\.+$/g, '');
  70  
  71          // Test whether host has non-ASCII characters and punycode it if possible
  72          if (/[^\x00-\x7F]/.test(parts['host']) && typeof punycode !== 'undefined')
  73          {
  74              parts['host'] = punycode.toASCII(parts['host']);
  75          }
  76  
  77          return parts;
  78      },
  79  
  80      /**
  81      * Rebuild a parsed URL
  82      *
  83      * @param  {!Object} urlConfig
  84      * @param  {!Object} p
  85      * @return {string}
  86      */
  87      rebuildUrl: function(urlConfig, p)
  88      {
  89          var url = '';
  90          if (p['scheme'] !== '')
  91          {
  92              url += p['scheme'] + ':';
  93          }
  94          if (p['host'] !== '')
  95          {
  96              url += '//';
  97  
  98              // Add the credentials if applicable
  99              if (p['user'] !== '')
 100              {
 101                  // Reencode the credentials in case there are invalid chars in them, or suspicious
 102                  // characters such as : or @ that could confuse a browser into connecting to the
 103                  // wrong host (or at least, to a host that is different than the one we thought)
 104                  url += rawurlencode(decodeURIComponent(p['user']));
 105  
 106                  if (p['pass'] !== '')
 107                  {
 108                      url += ':' + rawurlencode(decodeURIComponent(p['pass']));
 109                  }
 110  
 111                  url += '@';
 112              }
 113  
 114              url += p['host'];
 115  
 116              // Append the port number (note that as per the regexp it can only contain digits)
 117              if (p['port'] !== '')
 118              {
 119                  url += ':' + p['port'];
 120              }
 121          }
 122          else if (p['scheme'] === 'file')
 123          {
 124              // Allow the file: scheme to not have a host and ensure it starts with slashes
 125              url += '//';
 126          }
 127  
 128          // Build the path, including the query and fragment parts
 129          var path = p['path'] + p['query'] + p['fragment'];
 130  
 131          /**
 132          * "For consistency, URI producers and normalizers should use uppercase hexadecimal digits
 133          * for all percent- encodings."
 134          *
 135          * @link http://tools.ietf.org/html/rfc3986#section-2.1
 136          */
 137          path = path.replace(
 138              /%.?[a-f]/g,
 139              function (str)
 140              {
 141                  return str.toUpperCase();
 142              },
 143              path
 144          );
 145  
 146          // Append the sanitized path to the URL
 147          url += UrlFilter.sanitizeUrl(path);
 148  
 149          // Replace the first colon if there's no scheme and it could potentially be interpreted as
 150          // the scheme separator
 151          if (!p['scheme'])
 152          {
 153              url = url.replace(/^([^\/]*):/, '$1%3A');
 154          }
 155  
 156          return url;
 157      },
 158  
 159      /**
 160      * Sanitize a URL for safe use regardless of context
 161      *
 162      * This method URL-encodes some sensitive characters in case someone would want to use the URL in
 163      * some JavaScript thingy, or in CSS. We also encode characters that are not allowed in the path
 164      * of a URL as defined in RFC 3986 appendix A, including percent signs that are not immediately
 165      * followed by two hex digits.
 166      *
 167      * " and ' to prevent breaking out of quotes (JavaScript or otherwise)
 168      * ( and ) to prevent the use of functions in JavaScript (eval()) or CSS (expression())
 169      * < and > to prevent breaking out of <script>
 170      * \r and \n because they're illegal in JavaScript
 171      * [ and ] because the W3 validator rejects them and they "should" be escaped as per RFC 3986
 172      * Non-ASCII characters as per RFC 3986
 173      * Control codes and spaces, as per RFC 3986
 174      *
 175      * @link http://sla.ckers.org/forum/read.php?2,51478
 176      * @link http://timelessrepo.com/json-isnt-a-javascript-subset
 177      * @link http://www.ietf.org/rfc/rfc3986.txt
 178      * @link http://stackoverflow.com/a/1547922
 179      * @link http://tools.ietf.org/html/rfc3986#appendix-A
 180      *
 181      * @param  {string} url Original URL
 182      * @return {string}     Sanitized URL
 183      */
 184      sanitizeUrl: function(url)
 185      {
 186          return url.replace(/[^\u0020-\u007E]+/g, encodeURIComponent).replace(
 187              /%(?![0-9A-Fa-f]{2})|[^!#-&*-;=?-Z_a-z~]/g,
 188              function (m)
 189              {
 190                  return '%' + m[0].charCodeAt(0).toString(16).toUpperCase();
 191              }
 192          );
 193      },
 194  
 195      /**
 196      * Validate a parsed URL
 197      *
 198      * @param  {!Object} urlConfig
 199      * @param  {!Object} p
 200      * @return {string|undefined}
 201      */
 202      validateUrl: function(urlConfig, p)
 203      {
 204          if (p['scheme'] !== '' && !urlConfig.allowedSchemes.test(p['scheme']))
 205          {
 206              return 'URL scheme is not allowed';
 207          }
 208  
 209          if (p['host'] !== '')
 210          {
 211              /**
 212              * Test whether the host is valid
 213              * @link http://tools.ietf.org/html/rfc1035#section-2.3.1
 214              * @link http://tools.ietf.org/html/rfc1123#section-2
 215              */
 216              var regexp = /^(?!-)[-a-z0-9]{0,62}[a-z0-9](?:\.(?!-)[-a-z0-9]{0,62}[a-z0-9])*$/i;
 217              if (!regexp.test(p['host']))
 218              {
 219                  // If the host invalid, retest as an IPv4 and IPv6 address (IPv6 in brackets)
 220                  if (!NetworkFilter.filterIpv4(p['host'])
 221                   && !NetworkFilter.filterIpv6(p['host'].replace(/^\[(.*)\]$/, '$1', p['host'])))
 222                  {
 223                      return 'URL host is invalid';
 224                  }
 225              }
 226  
 227              if ((urlConfig.disallowedHosts && urlConfig.disallowedHosts.test(p['host']))
 228               || (urlConfig.restrictedHosts && !urlConfig.restrictedHosts.test(p['host'])))
 229              {
 230                  return 'URL host is not allowed';
 231              }
 232          }
 233          else if (/^(?:(?:f|ht)tps?)$/.test(p['scheme']))
 234          {
 235              return 'Missing host';
 236          }
 237      }
 238  };
PHP Cross Reference of phpBB-3.3.14-deutsch

/vendor/s9e/text-formatter/src/Parser/AttributeFilters/ -> UrlFilter.js (source)