getAllStatuses(); $renderer = get_active_status_renderer(); echo ''; } function topic_icons_css() { echo "\n"; } function topic_icons_label( $label ) { global $topic; if (bb_is_front() || bb_is_forum() || bb_is_view() || bb_is_tag()) { $icon_set_name = topic_icons_get_active_icon_set(); $icon_set_url = ICON_SET_URL_BASE . $icon_set_name; $status = get_active_status_interpreter()->getStatus(bb_get_location(), $topic); $renderer = get_active_status_renderer(); $image = $renderer->renderStatus($status); $tooltip = $renderer->renderStatusTooltip($status); $exists = file_exists(dirname(__FILE__).'/icon-sets/'.$icon_set_name.'/'.$image); if (!$exists) { return sprintf(__('
%s
%s'), get_topic_link($topic->topic_id), ICON_SET_URL_BASE.'/empty.png', ICON_WIDTH, ICON_HEIGHT, $tooltip, $label); } else if (strlen($tooltip) > 0) { return sprintf(__('
%s%s
%s'), get_topic_link($topic->topic_id), $icon_set_url.'/'.$image, ICON_WIDTH, ICON_HEIGHT, $tooltip, $tooltip, $label); } else { return sprintf(__('
%s
%s'), get_topic_link($topic->topic_id), $icon_set_url.'/'.$image, ICON_WIDTH, ICON_HEIGHT, $tooltip, $label); } } return $label; } function topic_icons_init( ) { remove_filter('bb_topic_labels', 'bb_closed_label', 10); remove_filter('bb_topic_labels', 'bb_sticky_label', 20); add_filter('bb_topic_labels', 'topic_icons_label', 11); add_action('bb_head', 'topic_icons_css'); add_action('bb_admin_menu_generator', 'topic_icons_admin_page_add'); add_action('bb_admin-header.php', 'topic_icons_admin_page_process'); topic_icons_register_status_interpreter('default', new DefaultStatusInterpreter(BUSY_THRESHOLD)); topic_icons_register_status_renderer('default', new DefaultStatusRenderer()); } topic_icons_init(); ?> Digital Humanities Questions & Answers » Topic: DOMDocument and UTF-8 http://digitalhumanities.org/answers/topic/domdocument-and-utf-8 Digital Humanities Questions & Answers » Topic: DOMDocument and UTF-8 en-US Sun, 24 Mar 2019 23:23:38 +0000 http://bbpress.org/?v=1.0.2 <![CDATA[Search]]> q http://digitalhumanities.org/answers/search.php inactinique on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1393 Thu, 22 Sep 2011 18:38:55 +0000 inactinique 1393@http://digitalhumanities.org/answers/ <p>Hi </p> <p><em>Replying to @Anupam Basu's <a href="http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1392">post</a>:</em></p> <p>I tried with Firefox on another computer with which I never visited the website. It's really not loading properly. Thanks anyway for your help.</p> <p><em>Replying to @Stéfan Sinclair's <a href="http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1390">post</a>:</em></p> <p>Your trick just worked Stéfan!</p> <p>Thanks a lot to all for your help!</p> <p>Best,<br /> Frédéric </p> Anupam Basu on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1392 Thu, 22 Sep 2011 17:48:39 +0000 Anupam Basu 1392@http://digitalhumanities.org/answers/ <p>It just worked for me across Firefox, Chrome and Safari. Could you make sure you're not loading from a cache? If it still doesn't work, could you try to capture the raw HTTP response with something like FireBug and post it?</p> <p>best,<br /> -Anupam </p> inactinique on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1391 Thu, 22 Sep 2011 13:58:51 +0000 inactinique 1391@http://digitalhumanities.org/answers/ <p><em>Replying to @Patrick Murray-John's <a href="http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1389">post</a>:</em></p> <p>Hi Patrick,<br /> no I already tried this, it does not work.<br /> Thanks,<br /> Frédéric </p> Stéfan Sinclair on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1390 Thu, 22 Sep 2011 13:53:37 +0000 Stéfan Sinclair 1390@http://digitalhumanities.org/answers/ <p>The problem definitely seems to be in the parsing that happens in loadHTML – I don't think the charset meta tag is being respected by the parser, so the parsing is defaulting to latin1 (as per HTTP specs). One possible hack is to inject a second meta tag before you do the HTML loading:</p> <div class="bb_syntax"><div class="code"><pre class="php" style="font-family:monospace;"><span style="color: #000088;">$path</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">'http://www.zotero.org/inactinique/cv'</span><span style="color: #339933;">;</span> <span style="color: #000088;">$html</span> <span style="color: #339933;">=</span> <span style="color: #990000;">file_get_contents</span><span style="color: #009900;">&#40;</span><span style="color: #000088;">$path</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span> &nbsp; <span style="color: #666666; font-style: italic;">// now we inject another meta tag</span> <span style="color: #000088;">$contentType</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">'&lt;meta http-equiv=&quot;Content-Type&quot; content=&quot;text/html; charset=utf-8&quot;/&gt;'</span><span style="color: #339933;">;</span> <span style="color: #000088;">$html</span> <span style="color: #339933;">=</span> <span style="color: #990000;">str_replace</span><span style="color: #009900;">&#40;</span><span style="color: #0000ff;">'&lt;head&gt;'</span><span style="color: #339933;">,</span> <span style="color: #0000ff;">'&lt;head&gt;'</span> <span style="color: #339933;">.</span> <span style="color: #000088;">$contentType</span><span style="color: #339933;">,</span> <span style="color: #000088;">$html</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span> &nbsp; <span style="color: #666666; font-style: italic;">// now we can continue</span> <span style="color: #000088;">$dom</span> <span style="color: #339933;">=</span> <span style="color: #000000; font-weight: bold;">new</span> DOMDocument<span style="color: #009900;">&#40;</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span> <span style="color: #339933;">@</span><span style="color: #000088;">$dom</span><span style="color: #339933;">-&gt;</span><span style="color: #004000;">loadHTML</span><span style="color: #009900;">&#40;</span><span style="color: #000088;">$html</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span> <span style="color: #b1b100;">echo</span> <span style="color: #000088;">$dom</span><span style="color: #339933;">-&gt;</span><span style="color: #004000;">saveHTML</span><span style="color: #009900;">&#40;</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span></pre></div></div> Patrick Murray-John on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1389 Thu, 22 Sep 2011 13:01:11 +0000 Patrick Murray-John 1389@http://digitalhumanities.org/answers/ <p>Try adding this to how you create the DOMDocument at line 29. </p> <p>$dom = new DOMDocument('1.0', 'UTF-8');</p> <p>It might be redundant with Anupam's suggestion for how to get the html. I'd try it both with and without that approach. One way or the other I think it'll work.</p> <p>HTH </p> inactinique on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1388 Thu, 22 Sep 2011 11:53:37 +0000 inactinique 1388@http://digitalhumanities.org/answers/ <p>Thanks for your answers. Unfortunately, it doesn't work. One more thing:<br /> The bad encoding of the - in this case - "é" on my website's CV page is appearing in the code this way: &Atilde;&copy; - meaning that the special caracters have been wrongly encoded with HTML code. </p> Anupam Basu on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1387 Thu, 22 Sep 2011 08:57:01 +0000 Anupam Basu 1387@http://digitalhumanities.org/answers/ <p>You can slip the UTF-8 encoding explicitly into your file. Change line 27 of your code to the following and it should load correctly.</p> <p><code><br /> if($html = '&lt;?xml encoding="UTF-8"&gt;' . file_get_contents($path)) {<br /> </code> </p> Conal Tuohy on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1386 Thu, 22 Sep 2011 08:43:21 +0000 Conal Tuohy 1386@http://digitalhumanities.org/answers/ <p>I<em>Replying to @<a href='http://digitalhumanities.org/answers/profile/inactinique'>inactinique</a>'s <a href="http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1385">post</a>:</em></p> <p>I'm not a PHP expert, either, so I can't suggest a proper fix to UTF-8-encode the Zotero data, but perhaps you could change the encoding declaration on that one page? </p> inactinique on "DOMDocument and UTF-8" http://digitalhumanities.org/answers/topic/domdocument-and-utf-8#post-1385 Thu, 22 Sep 2011 06:14:21 +0000 inactinique 1385@http://digitalhumanities.org/answers/ <p>Hi,</p> <p>I'm trying to use an under development plug-in for wordpress that is developped by @<a href='http://digitalhumanities.org/answers/profile/clioweb'>clioweb</a> and @stakats from the CHNM. The aim of this plug-in is to take the content of a zotero user's CV and to publish it on your wordpress blog. Sean Takats is using it on his blog: <a href="http://quintessenceofham.org/cv/" rel="nofollow">http://quintessenceofham.org/cv/</a></p> <p>My problem is the following: I'm French, so I've got accent in my name. See here: <a href="http://www.zotero.org/inactinique/cv" rel="nofollow">http://www.zotero.org/inactinique/cv</a></p> <p>When I'm using this plug-in, the accents in my name (and in all titles of my zotero CV page also - temporarily, I just found expressions in French with no accents for those titles) are not properly encoded on my website: <a href="http://www.clavert.net/wordpress/?page_id=333" rel="nofollow">http://www.clavert.net/wordpress/?page_id=333</a></p> <p>If you look at the HTML code of the zotero CV page, the page is declared as UTF-8 encoded (just as my website). Accents in titles, in this code, are directly typed-in (é, è, etc), but in the rest of the source code, accents are encoded in HTML (&eacute; &egrave; etc).</p> <p>So I suppose that the problem is the way DOMDocument and loadHTML in the php code of the plug-in are handling special caracters.</p> <p>The source code of the plug-in is here: <a href="https://github.com/inactinique/scholarpress-vitaware" rel="nofollow">https://github.com/inactinique/scholarpress-vitaware</a></p> <p>Does someone have an idea? Please, be quite explicit in your answers - I'm starting with php.</p> <p>Best regards,<br /> Frédéric Clavert </p>