Safely encode dynamically-built HTML and JavaScript

If you build fragments of HTML and JavaScript dynamically from browser script, you need to take care to encode any special characters in your strings. Building your HTML using DOM createElement() and createTextNode() is often better, but sometimes the better solution is to generate a block of HTML. Here’s a couple of simple functions to do that, quickly.

One thing you don’t want when you’re building HTML dynamically in script is to have any special characters in your text turn your HTML into gibberish, or interpreted as HTML. Take this string, for example:

<p><a href="http://www.example.com/">Don't forget to escape!</a></p>

Now, if you want to write exactly that to a page, “angle brackets” and everything, without it being turned into a hyperlink inside a paragraph, you’ll want it to come out looking like this in the HTML:

&lt;p&gt;&lt;a href=&quot;http://www.example.com/&quot;&gt;Don&#39;t forget to escape!&lt;/a&gt;&lt;/p&gt;

Note that the apostrophe in “Don’t” has been replaced by a decimal HTML entity; this allows the whole string to be contained within a JavaScript string, whether it’s delimited by single or double quotes, which can be useful if you’re generating JavaScript for embedding in an HTML page:

var a = "&lt;p&gt;&lt;a href=&quot;http://www.example.com/&quot;&gt;Don&#39;t forget to escape!&lt;/a&gt;&lt;/p&gt;";
var b = '&lt;p&gt;&lt;a href=&quot;http://www.example.com/&quot;&gt;Don&#39;t forget to escape!&lt;/a&gt;&lt;/p&gt;';

Of course, if you’re building JavaScript dynamically, you need to handle more characters and deal with them a little differently. For example, there’s a range of special characters like tabs, end of line characters, and unprintable characters that need to be escaped, and in the JavaScript way.

So here’s two functions for dynamically building HTML and JavaScript friendly text from any text, assuming a character set of UTF-8. You would use encodeHTML() to encode text that is concatenated with HTML tags, and encodeJS() when you need to add some inline JavaScript to your HTML. You can see what each of them does on this example page, contrasted with a simple/expensive DOM createTextNode() method (view source for more details).

/**
* encode special HTML characters, so text is safe when building HTML dynamically
* @param {String} text the text to encode
* @return {String}
*/
var encodeHTML = (function() {

    var encodeHTMLmap = {
        "&" : "&amp;",
        "'" : "&#39;",
        '"' : "&quot;",
        "<" : "&lt;",
        ">" : "&gt;"
    };

    /**
    * encode character as HTML entity
    * @param {String} ch character to map to entity
    * @return {String}
    */
    function encodeHTMLmapper(ch) {
        return encodeHTMLmap[ch];
    }

    return function(text) {
        // search for HTML special characters, convert to HTML entities
        return text.replace(/[&"'<>]/g, encodeHTMLmapper);
    };

})();

/**
* encode special JavaScript characters, so text is safe when building JavaScript/HTML dynamically
* NB: conservatively assumes that HTML special characters are unsafe, and encodes them too
* @param {String} text
* @return {String}
*/
var encodeJS = (function() {

    /**
    * encode character as Unicode hexadecimal escape sequence
    * @param {String} ch character to encode
    * @return {String}
    */
    function toUnicodeHex(ch) {
        var c = ch.charCodeAt(0),
            s = c.toString(16);

        // see if we can use 2-digit hex code
        if (c < 0x100) {
            return "\\x" + ("00" + s).slice(-2);
        }

        // must use 4-digit hex code
        return "\\u" + ("0000" + s).slice(-4);
    }

    return function(text) {
        // search for JavaScript and HTML special characters, convert to Unicode hex
        return text.replace(/[\\\/"'&<>\x00-\x1f\x7f-\xa0\u2000-\u200f\u2028-\u202f]/g, toUnicodeHex);
    };

})();

Here’s an example of using each:

// build some HTML
var p = "<p>" + encodeHTML(name) + "<p>";

// build some HTML with inline JavaScript
var params = latLng.lat() + ',' + latLng.lng() + ",'" + encodeJS(name) + "'";
var a = '<a href="#" onclick="fmap.showDirections(' + params + '); return false;">Directions</a>';

Job is done, dynamically, safely, and quickly.

Facebooktwittergoogle_plusredditlinkedinmailFacebooktwittergoogle_plusredditlinkedinmail