I am trying to convert
myHtml = ` <span style="font-family: "Open Sans", Arial, sans-serif; font-size: 14px; text-align: justify;"> In nec <i>convallis</i> justo. Quisque egestas mollis nibh non hendrerit. <strong>Phasellus</strong> tempus sapien in ultricies aliquet. Maecenas nec risus viverra tortor rhoncus venenatis in sit amet enim. Integer id ipsum non leo finibus sagittis in eu velit. Curabitur sed dolor dui. <span>Mauris <strong>aliquam <i>magna</i></strong> a ipsum</span> tincidunt tempor vitae sit amet ante. Maecenas pellentesque augue vitae quam faucibus, vel convallis dolor placerat. Pellentesque semper justo a turpis euismod, ac gravida enim suscipit.</span> `;
into
data = [
{
openTag:
'<span style="font-family: "Open Sans", Arial, sans-serif; font-size: 14px; text-align: justify;">',
closingTag: "</span>",
children: [
{ value: "In nec" },
{ openTag: "<i>", value: "convallis", closingTag: "</i>" },
{ value: " justo. Quisque egestas mollis nibh non hendrerit. " },
{ openTag: "<strong>", value: "Phasellus", closingTag: "</strong>" },
{
value:
" tempus sapien in ultricies aliquet. Maecenas nec risus viverra tortor rhoncus venenatis in sit amet enim. Integer id ipsum non leo finibus sagittis in eu velit. Curabitur sed dolor dui. "
},
{
opentag: "<span>",
children: [
{ value: "Mauris " },
{
opentag: "<strong>",
childeren: [
{ value: "aliquam" },
{ opentag: "<i>", value: "magna", closingTag: "</i>" }
],
closingTag: "</strong>"
},
{ value: " a ipsum" }
],
closingTag: "</span>"
},
{
value:
" tincidunt tempor vitae sit amet ante. Maecenas pellentesque augue vitae quam faucibus, vel convallis dolor placerat. Pellentesque semper justo a turpis euismod, ac gravida enim suscipit."
}
]
}
];
Current Output
{
"rawTagName": null,
"children": [
{
"children": []
},
{
"rawTagName": "span",
"children": [
{
"value": "n n In nec "
},
{
"rawTagName": "i",
"value": "convallis"
},
{
"value": " justo. Quisque egestas mollis nibh non hendrerit. "
},
{
"rawTagName": "strong",
"value": "Phasellus"
},
{
"value": " tempus sapien in ultricies aliquet. Maecenas nec risus viverra tortor rhoncus venenatis in sit amet enim. Integer id ipsum non leo finibus sagittis in eu velit. Curabitur sed dolor dui. "
},
{
"rawTagName": "span",
"children": [
{
"children": []
},
{
"rawTagName": "strong",
"children": [
{
"value": "aliquam "
},
{
"rawTagName": "i",
"value": "magna"
}
]
},
{
"children": []
}
]
},
{
"value": " tincidunt tempor vitae sit amet ante. Maecenas pellentesque augue vitae quam faucibus, vel convallis dolor placerat. Pellentesque semper justo a turpis euismod, ac gravida enim suscipit."
}
]
},
{
"children": []
}
]
}
Below is my current approach using recursion
import { parse } from "node-html-parser";
// Write Javascript code!
const myHtml = `
<span style="font-family: "Open Sans", Arial, sans-serif; font-size: 14px; text-align: justify;">
In nec <i>convallis</i> justo. Quisque egestas mollis nibh non hendrerit. <strong>Phasellus</strong> tempus sapien in ultricies aliquet. Maecenas nec risus viverra tortor rhoncus venenatis in sit amet enim. Integer id ipsum non leo finibus sagittis in eu velit. Curabitur sed dolor dui. <span>Mauris <strong>aliquam <i>magna</i></strong> a ipsum</span> tincidunt tempor vitae sit amet ante. Maecenas pellentesque augue vitae quam faucibus, vel convallis dolor placerat. Pellentesque semper justo a turpis euismod, ac gravida enim suscipit.</span>
`;
const tranform = str => {
const nodeAsObject = root => {
if (root.childNodes.length === 0) {
return { value: root.rawText };
}
if (
root.childNodes.length === 1 &&
root.childNodes[0].childNodes.length === 0
) {
return {
rawTagName: root.rawTagName,
value: root.rawText
};
}
return {
rawTagName: root.rawTagName,
children: root.childNodes.map(x => {
return {
rawTagName: x.rawTagName,
children: x.childNodes.map(y => {
return nodeAsObject(y);
})
};
})
};
};
return nodeAsObject(parse(str));
};
console.log(tranform(myHtml));
const pre = document.getElementById("pre");
pre.innerHTML = JSON.stringify(tranform(myHtml), null, " ");
Advertisement
Answer
I went for the recursive approach and created an output that is similar to your expected output.
const myHtml = `
<span style="font-family: "Open Sans", Arial, sans-serif; font-size: 14px; text-align: justify;">
In nec <i>convallis</i> justo. Quisque egestas mollis nibh non hendrerit. <strong>Phasellus</strong> tempus sapien in ultricies aliquet. Maecenas nec risus viverra tortor rhoncus venenatis in sit amet enim. Integer id ipsum non leo finibus sagittis in eu velit. Curabitur sed dolor dui. <span>Mauris <strong>aliquam <i>magna</i></strong> a ipsum</span> tincidunt tempor vitae sit amet ante. Maecenas pellentesque augue vitae quam faucibus, vel convallis dolor placerat. Pellentesque semper justo a turpis euismod, ac gravida enim suscipit.</span>
`;
let revisedHtml;
const parser = htmlStr => {
let htmlElements = parse(htmlStr);
revisedHtml = htmlElements.childNodes.map(node => {
return createTranslatedNode(node);
});
return htmlElements;
};
const createTranslatedNode = node => {
let currentNode = {};
// This is a textNode.
if (!node.rawTagName) {
currentNode = { value: node?.rawText?.trim() };
}
// This is a tagNode
if (node.rawTagName) {
currentNode = {
openTag: `<${node.rawTagName} ${node.rawAttrs}>`,
closingTag: `</${node.rawTagName}>`
};
}
if (node?.childNodes?.length === 1 && !node?.childNodes[0].rawTagName) {
currentNode.value = node?.childNodes[0].rawText?.trim();
}
if (node?.childNodes?.length > 1) {
currentNode.children = node.childNodes.map(childNode => {
return createTranslatedNode(childNode);
});
}
return currentNode;
};
console.log(revisedHtml);
I do some assumptions with the open and close tags since I just do some string concatination to add the <> around the tags.
Other than that I trim() the value inputs to remove the unwanted whitespace around the value.
This does make some assumptions about the html, like it always having start and closing tags, and such. A further improvement that could be done would be to test for that also.