2016-03-07 37 views
5

苹果表示,NSRegularExpression是基于ICU正则表达式库:目前支持的模式语法https://developer.apple.com/library/ios/documentation/Foundation/Reference/NSRegularExpression_Class/命名捕获组NSRegularExpression - 获得一定范围内的组的名称

是由ICU规定。 ICU正则表达式在http://userguide.icu-project.org/strings/regexp中描述。

该网页(上icu-project.org)声称,命名捕捉组现在支持,使用相同的语法.NET正则表达式:

(?<name>...)命名捕获组。 <angle brackets>是字面的 - 它们出现在模式中。

我写了一个程序,它得到一个唯一的匹配这似乎正确的多个范围 - 尽管每个范围返回两次(原因不明) - 但我的唯一信息是范围的指数和它的文本范围。

例如,正则表达式:^(?<foo>foo)\.(?<bar>bar)\.(?<bar2>baz)$与测试字符串foo.bar.baz

给我的结果:

Idx Start Length  Text 
0  0  11   foo.bar.baz 
1  0   3   foo 
2  4   3   bar 
3  8   3   baz 

有没有办法知道,“baz”从捕获组bar2来的?

+0

您是否看过[用NSRegularExpression命名捕获组](http://stackoverflow.com/questions/24814974/named-capture-groups-with-nsregularexpression)? –

+0

@Thomas @Thomas我确实看到了,但是从2014年开始,讨论都说不支持命名捕获组 - 但是(至少在iOS 9和OS X 10.11上)确实支持 - 并且它们正在工作至少在我的机器上,我不能将结果范围映射回他们来自的组。 – Dai

+0

* iOS 4.0及更高版本*中提供的Apple文档状态... –

回答

1

我面临同样的问题,并最终支持我自己的解决方案。随意评论或改善;-)

extension NSRegularExpression { 
    typealias GroupNamesSearchResult = (NSTextCheckingResult, NSTextCheckingResult, Int) 

    private func textCheckingResultsOfNamedCaptureGroups() throws -> [String:GroupNamesSearchResult] { 
     var groupnames = [String:GroupNamesSearchResult]() 

     let greg = try NSRegularExpression(pattern: "^\\(\\?<([\\w\\a_-]*)>.*\\)$", options: NSRegularExpressionOptions.DotMatchesLineSeparators) 
     let reg = try NSRegularExpression(pattern: "\\([^\\(\\)]*\\)", options: NSRegularExpressionOptions.DotMatchesLineSeparators) 
     let m = reg.matchesInString(self.pattern, options: NSMatchingOptions.WithTransparentBounds, range: NSRange(location: 0, length: self.pattern.utf16.count)) 
     for (n,g) in m.enumerate() { 
      let gstring = self.pattern.substringWithRange(g.rangeAtIndex(0).toRange()!) 
      print(self.pattern.substringWithRange(g.rangeAtIndex(0).toRange()!)) 
      let gmatch = greg.matchesInString(gstring, options: NSMatchingOptions.Anchored, range: NSRange(location: 0, length: gstring.utf16.count)) 
      if gmatch.count > 0{ 
       groupnames[gstring.substringWithRange(gmatch[0].rangeAtIndex(1).toRange()!)] = (g,gmatch[0],n) 
      } 

     } 
     return groupnames 
    } 
    func indexOfNamedCaptureGroups() throws -> [String:Int] { 
     var groupnames = [String:Int]() 
     for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
      groupnames[name] = n + 1 
     } 
     //print(groupnames) 
     return groupnames 
    } 

    func rangesOfNamedCaptureGroups(match:NSTextCheckingResult) throws -> [String:Range<Int>] { 
     var ranges = [String:Range<Int>]() 
     for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
      ranges[name] = match.rangeAtIndex(n+1).toRange() 
     } 
     return ranges 
    } 
} 

下面是一个使用示例:

let node = "'test_literal'" 
let regex = try NSRegularExpression(pattern: "^(?<delimiter>'|\")(?<value>.*)(?:\\k<delimiter>)$", options: NSRegularExpressionOptions.DotMatchesLineSeparators) 
let match = regex.matchesInString(node, options: NSMatchingOptions.Anchored, range: NSRange(location: 0,length: node.utf16.count)) 
if match.count > 0 { 

    let ranges = try regex.rangesOfNamedCaptureGroups(match[0]) 
    guard let range = ranges["value"] else { 

    } 
} 
+0

有趣!你能用一些使用例子来修改你的答案吗? – Dai

+0

完成。在第二次看后,我也意识到这并不明显......在几个月前编写了这个代码,并有理由去在两个不同的调用中进行匹配和范围。 –

1

Daniele Bernardini创建我对代码工作。

有一些变化:

  • 首先代码是现在斯威夫特3
  • 丹尼尔的代码有它不会捕捉嵌套捕捉的缺陷兼容。我已经使正则表达式略微不积极,以允许嵌套捕获组。
  • 我更喜欢实际接收Set中的实际捕获。我添加了一个名为captureGroups()的方法,它以字符串而不是范围的形式返回捕获。

    import Foundation 
    
    extension String { 
        func matchingStrings(regex: String) -> [[String]] { 
         guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] } 
         let nsString = self as NSString 
         let results = regex.matches(in: self, options: [], range: NSMakeRange(0, nsString.length)) 
         return results.map { result in 
          (0..<result.numberOfRanges).map { result.rangeAt($0).location != NSNotFound 
           ? nsString.substring(with: result.rangeAt($0)) 
           : "" 
          } 
         } 
        } 
    
        func range(from nsRange: NSRange) -> Range<String.Index>? { 
         guard 
          let from16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location, limitedBy: utf16.endIndex), 
          let to16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location + nsRange.length, limitedBy: utf16.endIndex), 
          let from = from16.samePosition(in: self), 
          let to = to16.samePosition(in: self) 
          else { return nil } 
         return from ..< to 
        } 
    
    } 
    
    extension NSRegularExpression { 
        typealias GroupNamesSearchResult = (NSTextCheckingResult, NSTextCheckingResult, Int) 
    
        private func textCheckingResultsOfNamedCaptureGroups() -> [String:GroupNamesSearchResult] { 
         var groupnames = [String:GroupNamesSearchResult]() 
    
         guard let greg = try? NSRegularExpression(pattern: "^\\(\\?<([\\w\\a_-]*)>$", options: NSRegularExpression.Options.dotMatchesLineSeparators) else { 
          // This never happens but the alternative is to make this method throwing 
          return groupnames 
         } 
         guard let reg = try? NSRegularExpression(pattern: "\\(.*?>", options: NSRegularExpression.Options.dotMatchesLineSeparators) else { 
          // This never happens but the alternative is to make this method throwing 
          return groupnames 
         } 
         let m = reg.matches(in: self.pattern, options: NSRegularExpression.MatchingOptions.withTransparentBounds, range: NSRange(location: 0, length: self.pattern.utf16.count)) 
         for (n,g) in m.enumerated() { 
          let r = self.pattern.range(from: g.rangeAt(0)) 
          let gstring = self.pattern.substring(with: r!) 
          let gmatch = greg.matches(in: gstring, options: NSRegularExpression.MatchingOptions.anchored, range: NSRange(location: 0, length: gstring.utf16.count)) 
          if gmatch.count > 0{ 
           let r2 = gstring.range(from: gmatch[0].rangeAt(1))! 
           groupnames[gstring.substring(with: r2)] = (g, gmatch[0],n) 
          } 
    
         } 
         return groupnames 
        } 
    
        func indexOfNamedCaptureGroups() throws -> [String:Int] { 
         var groupnames = [String:Int]() 
         for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
          groupnames[name] = n + 1 
         } 
         return groupnames 
        } 
    
        func rangesOfNamedCaptureGroups(match:NSTextCheckingResult) throws -> [String:Range<Int>] { 
         var ranges = [String:Range<Int>]() 
         for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
          ranges[name] = match.rangeAt(n+1).toRange() 
         } 
         return ranges 
        } 
    
        private func nameForIndex(_ index: Int, from: [String:GroupNamesSearchResult]) -> String? { 
         for (name,(_,_,n)) in from { 
          if (n + 1) == index { 
           return name 
          } 
         } 
         return nil 
        } 
    
        func captureGroups(string: String, options: NSRegularExpression.MatchingOptions = []) -> [String:String] { 
         return captureGroups(string: string, options: options, range: NSRange(location: 0, length: string.utf16.count)) 
        } 
    
        func captureGroups(string: String, options: NSRegularExpression.MatchingOptions = [], range: NSRange) -> [String:String] { 
         var dict = [String:String]() 
         let matchResult = matches(in: string, options: options, range: range) 
         let names = try self.textCheckingResultsOfNamedCaptureGroups() 
         for (n,m) in matchResult.enumerated() { 
          for i in (0..<m.numberOfRanges) { 
           let r2 = string.range(from: m.rangeAt(i))! 
           let g = string.substring(with: r2) 
           if let name = nameForIndex(i, from: names) { 
            dict[name] = g 
           } 
          } 
         } 
         return dict 
        } 
    } 
    

使用新的方法captureGroups()的一个例子是:

let node = "'test_literal'" 
    let regex = try NSRegularExpression(pattern: "^(?<all>(?<delimiter>'|\")(?<value>.*)(?:\\k<delimiter>))$", options: NSRegularExpression.Options.dotMatchesLineSeparators) 
    let match2 = regex.captureGroups(string: node, options: NSRegularExpression.MatchingOptions.anchored) 
    print(match2) 

它将打印:

[ “分隔符”: “\'”, “所有”: “\'test_literal \'”,“value”:“test_literal”]

+0

谢谢,你救了我。 –

2

由于支持iOS11命名的捕获组。NSTextCheckingResult具有功能open func range(withName name: String) -> NSRange

使用正则表达式:^(?<foo>foo)\.(?<bar>bar)\.(?<bar2>baz)$与测试字符串foo.bar.baz给出4个结果匹配。函数match.range(withName: "bar2")返回字符串的范围baz

+0

我编写了一个基于该扩展的扩展来创建所有命名捕获组及其值的字典:https://stackoverflow.com/a/48309290/235297 –