Do you have solution to substring text with HTML tags in Javascript?
For example:
JavaScript
x
8
1
var str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.'
2
3
html_substr(str, 20)
4
// return Lorem ipsum <a href="#">dolor <strong>si</strong></a>
5
6
html_substr(str, 30)
7
// return Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, co
8
Advertisement
Answer
Taking into consideration that parsing html with regex is a bad idea, here is a solution that does just that 🙂
EDIT: Just to be clear: This is not a valid solution, it was meant as an exercise that made very lenient assumptions about the input string, and as such should be taken with a grain of salt. Read the link above and see why parsing html with regex can never be done.
JavaScript
1
39
39
1
function htmlSubstring(s, n) {
2
var m, r = /<([^>s]*)[^>]*>/g,
3
stack = [],
4
lasti = 0,
5
result = '';
6
7
//for each tag, while we don't have enough characters
8
while ((m = r.exec(s)) && n) {
9
//get the text substring between the last tag and this one
10
var temp = s.substring(lasti, m.index).substr(0, n);
11
//append to the result and count the number of characters added
12
result += temp;
13
n -= temp.length;
14
lasti = r.lastIndex;
15
16
if (n) {
17
result += m[0];
18
if (m[1].indexOf('/') === 0) {
19
//if this is a closing tag, than pop the stack (does not account for bad html)
20
stack.pop();
21
} else if (m[1].lastIndexOf('/') !== m[1].length - 1) {
22
//if this is not a self closing tag than push it in the stack
23
stack.push(m[1]);
24
}
25
}
26
}
27
28
//add the remainder of the string, if needed (there are no more tags in here)
29
result += s.substr(lasti, n);
30
31
//fix the unclosed tags
32
while (stack.length) {
33
result += '</' + stack.pop() + '>';
34
}
35
36
return result;
37
38
}
39
Example: http://jsfiddle.net/danmana/5mNNU/
Note: patrick dw’s solution may be safer regarding bad html, but I’m not sure how well it handles white spaces.