2017-05-14 22 views

我想修改一些解析HTML超链接的文本,并将它们放入数据库中的代码。PHP的:更改HTML HREF解析器功能,以便只匹配,如果它在URL中找到一个静态字符串


<a href="http://example.com/images/test1.jpg">my image</a>


<a href="http://example.com/thisismyunique/string/test2.jpg">my image2</a>

匹配基于它在url中有“/ thisismyunique/string”。


class blcHTMLLink extends blcParser { 
    var $supported_formats = array('html'); 

    * Parse a string for HTML links - <a href="URL">anchor text</a> 
    * @param string $content The text to parse. 
    * @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used. 
    * @param string $default_link_text 
    * @return array An array of new blcLinkInstance objects. The objects will include info about the links found, but not about the corresponding container entity. 
    function parse($content, $base_url = '', $default_link_text = ''){ 
     //remove all <code></code> blocks first 
     $content = preg_replace('/<code[^>]*>.+?<\/code>/si', ' ', $content); 

     //Find links 
     $params = array(
      'base_url' => $base_url, 
      'default_link_text' => $default_link_text, 
     $instances = $this->map($content, array($this, 'parser_callback'), $params); 

     //The parser callback returns NULL when it finds an invalid link. Filter out those nulls 
     //from the list of instances. 
     $instances = array_filter($instances); 

     return $instances; 

    * blcHTMLLink::parser_callback() 
    * @access private 
    * @param array $link 
    * @param array $params 
    * @return blcLinkInstance|null 
    function parser_callback($link, $params){ 
     global $blclog; 
     $base_url = $params['base_url']; 

     $url = $raw_url = $link['href']; 
     $url = trim($url); 
     //$blclog->debug(__CLASS__ .':' . __FUNCTION__ . ' Found a link, raw URL = "' . $raw_url . '"'); 

     //Sometimes links may contain shortcodes. Execute them. 
     $url = do_shortcode($url); 

     //Skip empty URLs 
     if (empty($url)){ 
      $blclog->warn(__CLASS__ .':' . __FUNCTION__ . ' Skipping the link (empty URL)'); 
      return null; 

     //Attempt to parse the URL 
     $parts = @parse_url($url); 
     if(!$parts) { 
      $blclog->warn(__CLASS__ .':' . __FUNCTION__ . ' Skipping the link (parse_url failed)', $url); 
      return null; //Skip invalid URLs 

     if (!isset($parts['scheme'])){ 
      //No scheme - likely a relative URL. Turn it into an absolute one. 
      //TODO: Also log the original URL and base URL. 
      $url = $this->relative2absolute($url, $base_url); //$base_url comes from $params 
      $blclog->info(__CLASS__ .':' . __FUNCTION__ . ' Convert relative URL to absolute. Absolute URL = "' . $url . '"'); 

     //Skip invalid links (again) 
     if (!$url || (strlen($url)<6)) { 
      $blclog->info(__CLASS__ .':' . __FUNCTION__ . ' Skipping the link (invalid/short URL)', $url); 
      return null; 

     //Remove left-to-right marks. See: https://en.wikipedia.org/wiki/Left-to-right_mark 
     $ltrm = json_decode('"\u200E"'); 
     $url = str_replace($ltrm, '', $url); 

     $text = $link['#link_text']; 

     //The URL is okay, create and populate a new link instance. 
     $instance = new blcLinkInstance(); 

     $instance->raw_url = $raw_url; 
     $instance->link_text = $text; 

     $link_obj = new blcLink($url); //Creates or loads the link 

     return $instance; 




$blockedWord = '/thisismyunique/string'; 
$blockedWordPosition = strpos($link['href'], $blockedWord); 
$hasBlockedWord = $blockedWordPosition !== false; 



