彭峰的博客

无标签

发布日期: 2021-05-02

文章字数: 1.3k

阅读时长: 7 分

阅读次数:

LeetCode 10 .正则匹配

10. Regular Expression Matching

Given an input string (s) and a pattern (p), implement regular expression matching with support for '.' and '*'.

1 2	'.' Matches any single character. '*' Matches zero or more of the preceding element.

The matching should cover the entire input string (not partial).

Note:

s could be empty and contains only lowercase letters a-z.
p could be empty and contains only lowercase letters a-z, and characters like . or *.

Example 1:

Input:
s = "aa"
p = "a"
Output: false
Explanation: "a" does not match the entire string "aa".

Example 2:

Input:
s = "aa"
p = "a*"
Output: true
Explanation: '*' means zero or more of the preceding element, 'a'. Therefore, by repeating 'a' once, it becomes "aa".

Example 3:

Input:
s = "ab"
p = ".*"
Output: true
Explanation: ".*" means "zero or more (*) of any character (.)".

Example 4:

Input:
s = "aab"
p = "c*a*b"
Output: true
Explanation: c can be repeated 0 times, a can be repeated 1 time. Therefore, it matches "aab".

Example 5:

Input:
s = "mississippi"
p = "mis*is*p*."
Output: false

递归实现

递归实现最大的缺点就是效率低，因为需要进行很多重复的运算。

我们可以对字符逐一进行匹配。容易发现，对于‘*’来讲，需要对对应字符串的后序字符串进行匹配。对于‘.’来讲，它对应着任何字符，将与其匹配的所有字符都视为匹配成功。

public boolean isMatch(String s, String p) {  
    if(p.isEmpty()){
        return s.isEmpty();
    }
    boolean first_match = (!s.isEmpty() && (s.charAt(0) == p.charAt(0) || p.charAt(0) == '.'));
    if(p.length() >= 2 && p.charAt(1) == '*'){
        return (isMatch(s,p.substring(2)) || (first_match && isMatch(s.substring(1),p)));
        
    } else {
        return first_match && isMatch(s.substring(1),p.substring(1));
    }
}

动态规划实现

我们可以使用一个数组记录已经计算过的结果结果，以避免重复的计算。也就是使用动态规划的思想。

自顶向下

class Solution {
    Boolean[][] result;
    public boolean isMatch(String s, String p) {
        result = new Boolean[s.length() + 1][p.length() + 1 ];
        // return result[0][0];
        return dp(0,0,s,p);
    }
    
    public boolean dp(int sIndex, int pIndex,String s, String p){
        if(result[sIndex][pIndex] != null){
            return result[sIndex][pIndex] == true;
        }
        boolean ans;
        int sLen = s.length();
        int pLen = p.length();
        if(pIndex == pLen){
            ans = sIndex == sLen;
        } else {
            boolean first_match = (sIndex < sLen) && (p.charAt(pIndex) == s.charAt(sIndex) || p.charAt(pIndex) == '.');
            if(pIndex + 1 < pLen && p.charAt(pIndex + 1) == '*'){
                ans = dp(sIndex,pIndex+2,s,p) || first_match && dp(sIndex+1,pIndex,s,p);
            } else {
                ans = first_match && dp(sIndex+1,pIndex+1,s,p);
            }
        }
        
        result[sIndex][pIndex] = ans?true:false;
        return ans;
    }
}

自底向上

    public boolean isMatch(String text, String pattern) {
        boolean[][] dp = new boolean[text.length() + 1][pattern.length() + 1];
        dp[text.length()][pattern.length()] = true;

        for (int i = text.length(); i >= 0; i--){
            for (int j = pattern.length() - 1; j >= 0; j--){
                boolean first_match = (i < text.length() &&
                                       (pattern.charAt(j) == text.charAt(i) ||
                                        pattern.charAt(j) == '.'));
                if (j + 1 < pattern.length() && pattern.charAt(j+1) == '*'){
                    dp[i][j] = dp[i][j+2] || first_match && dp[i+1][j];
                } else {
                    dp[i][j] = first_match && dp[i+1][j+1];
                }
            }
        }
        return dp[0][0];
    }
}

NFA （非自动性有穷向量机）

可以使用编译原理中的非自动性有穷向量机来处理。将正则表达式看成是一种语言。然后根据输入进行状态转换。简单来讲就是：假设初始状态为A，当接收到输入为*的值，就进入状态B。而正则匹配的原理可以参考如下链接

原理：https://swtch.com/~rsc/regexp/regexp1.html

class Solution {
    private static final char ONE= '.';
    private static final char MANY= '*';
    private static final char SPLIT= '<';
    private static final char END= '$';

    public boolean isMatch(String s, String p) {
        if (p.isEmpty()) return s.isEmpty();
        return nfa(s, p);
    }

    static class State {
        public int id; 
        public char c;    // state char
        public State out1;  // next state 1
        public State out2;  // next state 2 (for split state only)
        
        public State(int id, char c){
            this.id= id;
            this.c= c;
        }
        @Override
        public int hashCode(){
            return id*c;
        }
        @Override
        public boolean equals(Object o){
            return id==((State)o).id && c==((State)o).c;
        }
    }

    private boolean nfa(String s, String p){
        State match= new State(p.length(), END);
        Set<State> clist= new HashSet<>();  // current list of states
        Set<State> nlist= new HashSet<>();  // next list of states
        
        State start= buildNfa(p, match);
        addState(clist, start); 
        for(int i=0; i<s.length() && !clist.isEmpty(); i++){
            step(clist, s.charAt(i), nlist);
            Set<State> temp= clist;
            clist= nlist;
            nlist= temp;
            nlist.clear();
        }
        return hasMatch(clist, match);
    }
    
    private void step(Set<State> clist, char c, Set<State> nlist){
        for(State state:clist)
            if (state.c==ONE && state.out1!=null)
                addState(nlist, state.out1);
            else if (c==state.c && state.out1!=null)
               addState(nlist, state.out1);
    }
    
    private void addState(Set<State> list, State state){
        if (state.c==SPLIT){
            addState(list, state.out1);
            addState(list, state.out2);
            return;
        }
        list.add(state);
    }    
    
    private boolean hasMatch(Set<State> list, State match){
        return list.contains(match);
    }
    
    private State buildNfa(String p, State match){
        int n= p.length();
        State start= null;
        State prev_state= null, state= null, split= null;
        for(int i=0; i<p.length(); i++){
            char c= p.charAt(i);
            char next_c= i<n-1 ? p.charAt(i+1) : END;
            if (next_c==MANY){
                split= new State(i+1, SPLIT);  // split state character
                state= new State(i, c);
                split.out2= state;
                state.out1= split;
                if (prev_state!=null) prev_state.out1= split;
                if (start==null) start= split;
                prev_state= split;
                i++; // consume 2 characters
            }else{
                state= new State(i, p.charAt(i));
                if (prev_state!=null) prev_state.out1= state;
                if (start==null) start= state;
                prev_state= state;
            }
        }
        prev_state.out1= match;
        return start;
    }    
}