ARTS （第50周）

越学习，随着眼界的拓宽，越会发现应该学习更多东西。

Algorithm 算法

中文AC自动机



/**
 * AC自动机
 * 
 * @description
 * 
 * @author lfy
 */

public class ACAutomaton {

	public static void main(String[] args) {
		// 模式串
		List<String> strs = Arrays.asList("aba", "BCD", "AAA");
		// 初始化
		ACAutomaton acc = new ACAutomaton(strs, null);
		// 模式串
		String text = "ababa";// 主串
		// 进行匹配
		List<ACAutomatonResult> r = acc.match(text);
		// 结果
		System.out.println(r);
	}

	Node root = new Node('/');// 根节点
	IdGenerator idGenerator;// id生成器

	public ACAutomaton(List<String> strs, IdGenerator idGenerator) {
		this.idGenerator = idGenerator;
		for (String s : strs) {
			if (idGenerator != null) {
				insert(s, idGenerator.getId());
			} else {
				insert(s);
			}
		}
		build();
	}

	public ACAutomaton(String[] strs, IdGenerator idGenerator) {
		this.idGenerator = idGenerator;
		for (String s : strs) {
			if (idGenerator != null) {
				insert(s, idGenerator.getId());
			} else {
				insert(s);
			}
		}
		build();
	}

	public ACAutomaton(String s, IdGenerator idGenerator) {
		this.idGenerator = idGenerator;
		if (idGenerator != null) {
			insert(s, idGenerator.getId());
		} else {
			insert(s);
		}
		build();
	}

	// 插入
	private void insert(String s) {
		insert(s.toCharArray());
	}

	// 插入
	private void insert(String s, long id) {
		insert(s.toCharArray(), id);
	}

	/**
	 * 匹配
	 */
	public List<ACAutomatonResult> match(String s) {
		return match(s.toCharArray());
	}

	// 构建失效指针
	private void build() {
		// 从根节点开始生成
		Queue<Node> queue = new LinkedList<>();
		root.f = null;// root匹配 方便后面进行跳出
		Collection<Node> leve1 = root.children.values();// 第一级的树
		for (Node cn : leve1) {
			if (cn != null) {
				// 从第二级开始
				queue.add(cn);
				cn.f = root;// 第一级子节点失败全部指向root
			}
		}
		// 动态规划
		while (!queue.isEmpty()) {
			// 取出元素 父级节点
			Node p = queue.remove();
			// 子节点
			Collection<Node> values = p.children.values();
			for (Node c : values) {
				if (c == null)
					continue;
				queue.add(c);
				Node f = p.f;// 父节点的fail节点
				while (f != null) {
					Node node = f.getChild(c.v);
					if (node != null) {
						// 父节点的失败节点里查找子节点
						// 找到就可以跳出了
						c.f = node;
						break;
					}
					// 找不到 就找失败节点的失败节点 直到root
					// 如果root都没有就结束循环 跳出后赋值为root 下面的if判断
					f = f.f;

				}
				if (f == null) {
					// 找不到的时候 在上面的循环里 如果找到了失败节点就会跳出 f不会为空
					c.f = root;
				}
			}

		}

	}

	/**
	 * 匹配
	 */
	private List<ACAutomatonResult> match(char[] text) { // text 是主串
		ArrayList<ACAutomatonResult> res = new ArrayList<ACAutomatonResult>();
		int len = text.length;
		// 带头指针
		// 即当前匹配字符的前一个字符
		Node p = root;
		for (int i = 0; i < len; i++) {
			char idx = text[i];// 当前值的索引
			// 当前带头指针的后一个字符和当前值不相等
			// 就去找失败指针重新进行匹配 直到找不到 或者root为止
			Node next = p.getChild(idx);
			while (p != root && next == null) {//
				p = p.f;
				next = p.getChild(idx);
			}
			// 匹配的话 p就前进一个节点
			// 如果不匹配 p就会变成null 在下面的循环里就会被重设为root
			p = next;
			// 找不到匹配值的时候 重设为root
			if (p == null) {
				p = root;
			}
			// 遍历节点和当前节点的失效指针
			// 判断是否是结束节点
			Node temp = p;
			while (temp != root) {
				if (temp.isEnd == true) {
					// 偏移量和长度
					int pos = i - temp.len + 1;
					int l = p.len;
					// 匹配的字符串
					String str = new String(text, pos, l);
					ACAutomatonResult r = new ACAutomatonResult(pos, l, str, temp.wordId);
					res.add(r);
				}
				temp = temp.f;// 重设设失效指针，即相同后缀的指针 判断isEnd值
			}
		}
		return res;
	}

	/**
	 * 插入到trie树
	 * 
	 * @param text
	 */
	private void insert(char[] text) {
		Node p = root;
		for (int i = 0; i < text.length; ++i) {
			if (p.getChild(text[i]) == null) {
				Node newNode = new Node(text[i]);
				p.setChild(text[i], newNode);
			}
			p = p.getChild(text[i]);
			p.len = i + 1;// 深度
		}
		//
		p.isEnd = true;
		p.len = text.length;
	}

	/**
	 * 插入到trie树
	 * 
	 * @param text
	 */
	private void insert(char[] text, long wordId) {
		Node p = root;
		for (int i = 0; i < text.length; ++i) {
			if (p.getChild(text[i]) == null) {
				Node newNode = new Node(text[i]);
				p.setChild(text[i], newNode);
			}
			p = p.getChild(text[i]);
			p.len = i + 1;// 深度
		}
		//
		p.isEnd = true;
		p.len = text.length;
		p.wordId = wordId;
	}

	// trie树的节点
	private class Node {

		private char v;// 值

		// private Node[] n = new Node[26];// 下一个节点也可以理解成子节点
		// 为匹配中文 使用hashmap记录子节点
		private HashMap<Character, Node> children = new HashMap<Character, Node>();

		private Node f;// 失败指针

		private boolean isEnd = false;// 是否最后一个词缀

		private int len = -1;// 从根到这里的长度

		private long wordId = -1;

		/**
		 * 设置子节点
		 * 
		 * @param c
		 * @param node
		 */
		private void setChild(char c, Node node) {
			Character key = Character.valueOf(c);
			children.put(key, node);
		}

		/**
		 * 获取子节点
		 * 
		 * @param c
		 * @return
		 */
		private Node getChild(char c) {
			Character key = Character.valueOf(c);
			return children.get(key);
		}

		private Node(char v) {
			this.v = v;
		}

		@Override
		public String toString() {
			if (f != this)
				return "Node [v=" + v + ", f=" + f + "]";
			return "Node [v=" + v + ", f= root]";
		}

	}
}

ac自动机的结果

/**
 * AC自动机结果
 * @author la-huan
 *
 */
public class ACAutomatonResult {

	private int pos;
	private int len;
	private String str;
	private long wordId;
	protected ACAutomatonResult(int pos, int len, String str,long wordId) {
		this.pos = pos;
		this.len = len;
		this.str = str;
		this.wordId=wordId;
	}
	protected ACAutomatonResult(int pos, int len, String str) {
		this.pos = pos;
		this.len = len;
		this.str = str;
	}
	
	public long getWordId() {
		return wordId;
	}
	/**
	 * 偏移量
	 * 
	 * @return
	 */
	public int getPos() {
		return pos;
	}

	/**
	 * 长度
	 * 
	 * @return
	 */
	public int getLen() {
		return len;
	}

	/**
	 * 匹配的字符串
	 * 
	 * @return
	 */
	public String getStr() {
		return str;
	}
	
	@Override
	public String toString() {
		//return "ACAutomatonResult [pos=" + pos + ", len=" + len + ", str=" + str + "]";
		return getJson();
	}

	public String getJson() {
		return "{\"pos\":\"" + pos + "\", \"len\":\"" + len + "\", \"str\":\"" + str + "\"}";
	}
	
	
}

Review 英文文章

https://redis.io/commands

redis 指令大全

Tip 技巧

这样几行代码，如果不增加排序，时间复杂度是n，如果增加排序时间复杂度是n^2。

但结果却是增加排序后的速度将会比不排序的速度更快。

原因就是我们的代码是编译后组成一个个指令给cpu运行的，而cpu如果支持流水线技术，就会对我们的代码指令做一定的行为预测。因此导致了排序后更快。

public static void main(String[] args) {
		// Generate data
		int arraySize = 3276;
		int data[] = new int[arraySize];
		Random rnd = new Random(0);
		for (int c = 0; c < arraySize; ++c)
			data[c] = rnd.nextInt() % 256;
		//no 
		//Arrays.sort(data);
		// Test
		long start = System.nanoTime();
		long sum = 0;
		for (int i = 0; i < 100000; ++i) {
			// Primary loop
			for (int c = 0; c < arraySize; ++c) {
				if (data[c] >= 128){
					sum += data[c];
				}
			}
		}
		// 统计时间
		System.out.println((System.nanoTime() - start) / 1000000000.0);
		System.out.println("sum = " + sum);
	}

https://www.cnblogs.com/snailclimb/p/9086334.html java nio的selector

https://mp.weixin.qq.com/s?__biz=Mzg2OTA0Njk0OA==&mid=2247484966&idx=1&sn=86495501e12ad0c62476d98985d43c85&source=41#wechat_redirect Redis的常见问题

https://blog.csdn.net/qq_33330687/article/details/104621483 分支预测是什么？为什么有序数组比无序数组快？

ARTS （第50周）

Algorithm 算法

中文AC自动机

Review 英文文章

Tip 技巧

Share 分享