2013-10-03 37 views
1

我对jQuery中的一些简单机制感到困惑。 让我们假设我有以下的div容器在我的网站:以纯文本形式获取元素内容与所有后代

<div id="myDiv"> 
    <math xmlns="http://www.w3.org/1998/Math/MathML"> 
     <mrow> 
     <mi>n</mi> 
     <mo stretchy="false">≥</mo> 
     <mn>2</mn> 
     </mrow> 
    </math> 

    - Some text here 
    > Some quote here 
</div> 

现在我想找回没有任何修改(!)的内容,让我自己的一些变化之后。

首次尝试

$('#myDiv').text(); 

哦,所有子标签都没有了。那不是我想要的。

第二次尝试

$('#myDiv').html(); 

嗯,这看起来更好。但仔细一看,最后一行的“>”现在也是HTML编码的。那不是我想要的。

问题

我怎样才能得到的DOM的元素内容的纯文本副本?

更新

从接受的答案

除了及其“转义文本”暗示我发现这nice little workaround using a <script>-Tag

+0

嗯,这很有趣...渴望知道如何实现这个 –

回答

1

你需要挖掘一点,找出一些可能的解决方案。

例如,我挖了一堆,看到这三个线程,如THREAD1,THREAD2,THREAD3

利用这些资源,我做了这样的事情:

//Translation Look Up 
function get_html_translation_table (table, quote_style) { 
    var entities = {}, 
    hash_map = {}, 
    decimal; 
    var constMappingTable = {}, 
    constMappingQuoteStyle = {}; 
    var useTable = {}, 
    useQuoteStyle = {}; 

    // Translate arguments 
    constMappingTable[0] = 'HTML_SPECIALCHARS'; 
    constMappingTable[1] = 'HTML_ENTITIES'; 
    constMappingQuoteStyle[0] = 'ENT_NOQUOTES'; 
    constMappingQuoteStyle[2] = 'ENT_COMPAT'; 
    constMappingQuoteStyle[3] = 'ENT_QUOTES'; 

    useTable = !isNaN(table) ? constMappingTable[table] : table ? table.toUpperCase() : 'HTML_SPECIALCHARS'; 
    useQuoteStyle = !isNaN(quote_style) ? constMappingQuoteStyle[quote_style] : quote_style ? quote_style.toUpperCase() : 'ENT_COMPAT'; 

    if (useTable !== 'HTML_SPECIALCHARS' && useTable !== 'HTML_ENTITIES') { 
    throw new Error("Table: " + useTable + ' not supported'); 
    // return false; 
    } 

    entities['38'] = '&amp;'; 
    if (useTable === 'HTML_ENTITIES') { 
    entities['160'] = '&nbsp;'; 
    entities['161'] = '&iexcl;'; 
    entities['162'] = '&cent;'; 
    entities['163'] = '&pound;'; 
    entities['164'] = '&curren;'; 
    entities['165'] = '&yen;'; 
    entities['166'] = '&brvbar;'; 
    entities['167'] = '&sect;'; 
    entities['168'] = '&uml;'; 
    entities['169'] = '&copy;'; 
    entities['170'] = '&ordf;'; 
    entities['171'] = '&laquo;'; 
    entities['172'] = '&not;'; 
    entities['173'] = '&shy;'; 
    entities['174'] = '&reg;'; 
    entities['175'] = '&macr;'; 
    entities['176'] = '&deg;'; 
    entities['177'] = '&plusmn;'; 
    entities['178'] = '&sup2;'; 
    entities['179'] = '&sup3;'; 
    entities['180'] = '&acute;'; 
    entities['181'] = '&micro;'; 
    entities['182'] = '&para;'; 
    entities['183'] = '&middot;'; 
    entities['184'] = '&cedil;'; 
    entities['185'] = '&sup1;'; 
    entities['186'] = '&ordm;'; 
    entities['187'] = '&raquo;'; 
    entities['188'] = '&frac14;'; 
    entities['189'] = '&frac12;'; 
    entities['190'] = '&frac34;'; 
    entities['191'] = '&iquest;'; 
    entities['192'] = '&Agrave;'; 
    entities['193'] = '&Aacute;'; 
    entities['194'] = '&Acirc;'; 
    entities['195'] = '&Atilde;'; 
    entities['196'] = '&Auml;'; 
    entities['197'] = '&Aring;'; 
    entities['198'] = '&AElig;'; 
    entities['199'] = '&Ccedil;'; 
    entities['200'] = '&Egrave;'; 
    entities['201'] = '&Eacute;'; 
    entities['202'] = '&Ecirc;'; 
    entities['203'] = '&Euml;'; 
    entities['204'] = '&Igrave;'; 
    entities['205'] = '&Iacute;'; 
    entities['206'] = '&Icirc;'; 
    entities['207'] = '&Iuml;'; 
    entities['208'] = '&ETH;'; 
    entities['209'] = '&Ntilde;'; 
    entities['210'] = '&Ograve;'; 
    entities['211'] = '&Oacute;'; 
    entities['212'] = '&Ocirc;'; 
    entities['213'] = '&Otilde;'; 
    entities['214'] = '&Ouml;'; 
    entities['215'] = '&times;'; 
    entities['216'] = '&Oslash;'; 
    entities['217'] = '&Ugrave;'; 
    entities['218'] = '&Uacute;'; 
    entities['219'] = '&Ucirc;'; 
    entities['220'] = '&Uuml;'; 
    entities['221'] = '&Yacute;'; 
    entities['222'] = '&THORN;'; 
    entities['223'] = '&szlig;'; 
    entities['224'] = '&agrave;'; 
    entities['225'] = '&aacute;'; 
    entities['226'] = '&acirc;'; 
    entities['227'] = '&atilde;'; 
    entities['228'] = '&auml;'; 
    entities['229'] = '&aring;'; 
    entities['230'] = '&aelig;'; 
    entities['231'] = '&ccedil;'; 
    entities['232'] = '&egrave;'; 
    entities['233'] = '&eacute;'; 
    entities['234'] = '&ecirc;'; 
    entities['235'] = '&euml;'; 
    entities['236'] = '&igrave;'; 
    entities['237'] = '&iacute;'; 
    entities['238'] = '&icirc;'; 
    entities['239'] = '&iuml;'; 
    entities['240'] = '&eth;'; 
    entities['241'] = '&ntilde;'; 
    entities['242'] = '&ograve;'; 
    entities['243'] = '&oacute;'; 
    entities['244'] = '&ocirc;'; 
    entities['245'] = '&otilde;'; 
    entities['246'] = '&ouml;'; 
    entities['247'] = '&divide;'; 
    entities['248'] = '&oslash;'; 
    entities['249'] = '&ugrave;'; 
    entities['250'] = '&uacute;'; 
    entities['251'] = '&ucirc;'; 
    entities['252'] = '&uuml;'; 
    entities['253'] = '&yacute;'; 
    entities['254'] = '&thorn;'; 
    entities['255'] = '&yuml;'; 
    } 

    if (useQuoteStyle !== 'ENT_NOQUOTES') { 
    entities['34'] = '&quot;'; 
    } 
    if (useQuoteStyle === 'ENT_QUOTES') { 
    entities['39'] = '&#39;'; 
    } 
    entities['60'] = '&lt;'; 
    entities['62'] = '&gt;'; 


    // ascii decimals to real symbols 
    for (decimal in entities) { 
    if (entities.hasOwnProperty(decimal)) { 
     hash_map[String.fromCharCode(decimal)] = entities[decimal]; 
    } 
    } 

    return hash_map; 
} 

//decode 
function html_entity_decode (string, quote_style) { 
    var hash_map = {}, 
    symbol = '', 
    tmp_str = '', 
    entity = ''; 
    tmp_str = string.toString(); 

    if (false === (hash_map = get_html_translation_table('HTML_ENTITIES', quote_style))) { 
    return false; 
    } 

    delete(hash_map['&']); 
    hash_map['&'] = '&amp;'; 

    for (symbol in hash_map) { 
    entity = hash_map[symbol]; 
    tmp_str = tmp_str.split(entity).join(symbol); 
    } 
    tmp_str = tmp_str.split('&#039;').join("'"); 

    return tmp_str; 
} 

//Now Get your Content 
var d = document.createElement("div"); 
d.innerHTML = $('#myDiv').html() ; 
console.log(html_entity_decode (d.innerHTML)); 

这给像下面的一个结果(我猜是你在找什么):

<math xmlns="http://www.w3.org/1998/Math/MathML"> 
    <mrow> 
    <mi>n</mi> 
    <mo stretchy="false">=</mo> 
    <mn>2</mn> 
    </mrow> 
</math> 

- Some text here 
> Some quote here 

这是所有相关代码的工作小提琴:

http://jsfiddle.net/Ddjag/1/

这里的诀窍是将文本内容解码为un escaped个字符,这正是解码所执行的功能。

+0

谢谢。这对我有很大的帮助。 – shadowhorst

+0

@shadowhorst:乐意帮忙! –

1

你正在依靠一个怪诞,>有时可以像你一样使用。 比较,例如,与<,您不能使用未转义。

我会说.html()的输出是你想要的。 在任何您希望输出值的上下文中,都希望确保纯文本字符被编码。

据我所知,一旦浏览器解析代码并构建DOM树,它就不会存储该树的原始表示。 特别是,它不存储原始可选语法块的任何位置,如存在/缺少可选引号字符或标记之间的空白量。

相关问题