Do you have solution to substring text with HTML tags in Javascript?
For example:
var str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.' html_substr(str, 20) // return Lorem ipsum <a href="#">dolor <strong>si</strong></a> html_substr(str, 30) // return Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, co
Advertisement
Answer
Taking into consideration that parsing html with regex is a bad idea, here is a solution that does just that 🙂
EDIT: Just to be clear: This is not a valid solution, it was meant as an exercise that made very lenient assumptions about the input string, and as such should be taken with a grain of salt. Read the link above and see why parsing html with regex can never be done.
function htmlSubstring(s, n) { var m, r = /<([^>s]*)[^>]*>/g, stack = [], lasti = 0, result = ''; //for each tag, while we don't have enough characters while ((m = r.exec(s)) && n) { //get the text substring between the last tag and this one var temp = s.substring(lasti, m.index).substr(0, n); //append to the result and count the number of characters added result += temp; n -= temp.length; lasti = r.lastIndex; if (n) { result += m[0]; if (m[1].indexOf('/') === 0) { //if this is a closing tag, than pop the stack (does not account for bad html) stack.pop(); } else if (m[1].lastIndexOf('/') !== m[1].length - 1) { //if this is not a self closing tag than push it in the stack stack.push(m[1]); } } } //add the remainder of the string, if needed (there are no more tags in here) result += s.substr(lasti, n); //fix the unclosed tags while (stack.length) { result += '</' + stack.pop() + '>'; } return result; }
Example: http://jsfiddle.net/danmana/5mNNU/
Note: patrick dw’s solution may be safer regarding bad html, but I’m not sure how well it handles white spaces.