HashMap 简介 HashMap
是Map
接口最常用的实现类,是非线程安全的。在 Jdk1.8 中,底层的数据结构为“数组+链表+红黑树”,相对于之前版本的“数组+链表”的组合性能有很大的提升,这里主要对 Jdk1.8 中的HashMap
源码进行分析。
1 2 public class HashMap <K,V> extends AbstractMap <K,V> implements Map <K,V>, Cloneable, Serializable
AbstractMap
其实已经实现了Map
接口,所以这里再实现一遍是没有意义的,这是 Java 集合创始的一个小失误。
属性 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 static final int DEFAULT_INITIAL_CAPACITY = 1 << 4 ; static final int MAXIMUM_CAPACITY = 1 << 30 ;static final float DEFAULT_LOAD_FACTOR = 0.75f ;static final int TREEIFY_THRESHOLD = 8 ;static final int UNTREEIFY_THRESHOLD = 6 ;static final int MIN_TREEIFY_CAPACITY = 64 ;transient Node<k,v>[] table;transient Set<map.entry<k,v>> entrySet;transient int size;transient int modCount;int threshold;final float loadFactor;
Node Node
为链表的节点类实现。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 static class Node <K,V> implements Map .Entry<K,V> { final int hash; final K key; V value; Node<K,V> next; Node(int hash, K key, V value, Node<K,V> next) { this .hash = hash; this .key = key; this .value = value; this .next = next; } public final K getKey () { return key; } public final V getValue () { return value; } public final String toString () { return key + "=" + value; } public final int hashCode () { return Objects.hashCode(key) ^ Objects.hashCode(value); } public final V setValue (V newValue) { V oldValue = value; value = newValue; return oldValue; } public final boolean equals (Object o) { if (o == this ) return true ; if (o instanceof Map.Entry) { Map.Entry<?,?> e = (Map.Entry<?,?>)o; if (Objects.equals(key, e.getKey()) && Objects.equals(value, e.getValue())) return true ; } return false ; } }
TreeNode TreeNode
是红黑树的节点实现,其中常用的方法有putTreeVal
、getTreeNode
和treeify
。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 static final class TreeNode <K,V> extends LinkedHashMap .Entry<K,V> { TreeNode<K,V> parent; TreeNode<K,V> left; TreeNode<K,V> right; TreeNode<K,V> prev; boolean red; TreeNode(int hash, K key, V val, Node<K,V> next) { super (hash, key, val, next); } final TreeNode<K,V> root () { for (TreeNode<K,V> r = this , p;;) { if ((p = r.parent) == null ) return r; r = p; } } final TreeNode<K,V> putTreeVal (HashMap<K,V> map, Node<K,V>[] tab, int h, K k, V v) final TreeNode<K,V> getTreeNode (int h, Object k) final void treeify (Node<K,V>[] tab) ... }
putTreeVal putTreeVal
主要用来处理红黑树节点,操作时同时还会维护链表的结构。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 final TreeNode<K,V> putTreeVal (HashMap<K,V> map, Node<K,V>[] tab, int h, K k, V v) { Class<?> kc = null ; boolean searched = false ; TreeNode<K,V> root = (parent != null ) ? root() : this ; for (TreeNode<K,V> p = root;;) { int dir, ph; K pk; if ((ph = p.hash) > h) dir = -1 ; else if (ph < h) dir = 1 ; else if ((pk = p.key) == k || (k != null && k.equals(pk))) return p; else if ((kc == null && (kc = comparableClassFor(k)) == null ) || (dir = compareComparables(kc, k, pk)) == 0 ) { if (!searched) { TreeNode<K,V> q, ch; searched = true ; if (((ch = p.left) != null && (q = ch.find(h, k, kc)) != null ) || ((ch = p.right) != null && (q = ch.find(h, k, kc)) != null )) return q; } dir = tieBreakOrder(k, pk); } TreeNode<K,V> xp = p; if ((p = (dir <= 0 ) ? p.left : p.right) == null ) { Node<K,V> xpn = xp.next; TreeNode<K,V> x = map.newTreeNode(h, k, v, xpn); if (dir <= 0 ) xp.left = x; else xp.right = x; xp.next = x; x.parent = x.prev = xp; if (xpn != null ) ((TreeNode<K,V>)xpn).prev = x; moveRootToFront(tab, balanceInsertion(root, x)); return null ; } } }
getTreeNode getTreeNode
主要用来获取红黑树中一个具体的树节点。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 final TreeNode<K,V> getTreeNode (int h, Object k) { return ((parent != null ) ? root() : this ).find(h, k, null ); } final TreeNode<K,V> root () { for (TreeNode<K,V> r = this , p;;) { if ((p = r.parent) == null ) return r; r = p; } } final TreeNode<K,V> find (int h, Object k, Class<?> kc) { TreeNode<K,V> p = this ; do { int ph, dir; K pk; TreeNode<K,V> pl = p.left, pr = p.right, q; if ((ph = p.hash) > h) p = pl; else if (ph < h) p = pr; else if ((pk = p.key) == k || (k != null && k.equals(pk))) return p; else if (pl == null ) p = pr; else if (pr == null ) p = pl; else if ((kc != null || (kc = comparableClassFor(k)) != null ) && (dir = compareComparables(kc, k, pk)) != 0 ) p = (dir < 0 ) ? pl : pr; else if ((q = pr.find(h, k, kc)) != null ) return q; else p = pl; } while (p != null ); return null ; }
不同的节点的哈希值在和table
长度与运算之后是可能相等的,所以红黑树中同一个索引的节点的哈希值不一定都相等。比如:
1 2 3 节点 1 的 hash 值:1110 0000 0000 1000 0111 节点 2 的 hash 值:1001 1111 0000 1010 0111 table.length-1: 0000 0000 0000 0000 0111
节点 1 和节点 2 的哈希值并不相同,但是在和table
长度与运算后得到的索引位置是相同的。
treeify treeify
方法用来构建一棵以调用该方法的节点为根节点的红黑树。由于红黑树依然维护着链表结构,每次通过next
属性获得下一个节点时,都会从根节点开始向下查找,根据hash
值的大小找到合适的位置放入,并设置好parent
与left
或right
属性以关联节点。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 final void treeify (Node<K,V>[] tab) { TreeNode<K,V> root = null ; for (TreeNode<K,V> x = this , next; x != null ; x = next) { next = (TreeNode<K,V>)x.next; x.left = x.right = null ; if (root == null ) { x.parent = null ; x.red = false ; root = x; } else { K k = x.key; int h = x.hash; Class<?> kc = null ; for (TreeNode<K,V> p = root;;) { int dir, ph; K pk = p.key; if ((ph = p.hash) > h) dir = -1 ; else if (ph < h) dir = 1 ; else if ((kc == null && (kc = comparableClassFor(k)) == null ) || (dir = compareComparables(kc, k, pk)) == 0 ) dir = tieBreakOrder(k, pk); TreeNode<K,V> xp = p; if ((p = (dir <= 0 ) ? p.left : p.right) == null ) { x.parent = xp; if (dir <= 0 ) xp.left = x; else xp.right = x; root = balanceInsertion(root, x); break ; } } } } moveRootToFront(tab, root); }
构造函数 HashMap
包含了四种构造函数:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 public HashMap (int initialCapacity, float loadFactor) { if (initialCapacity < 0 ) throw new IllegalArgumentException ("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException ("Illegal load factor: " + loadFactor); this .loadFactor = loadFactor; this .threshold = tableSizeFor(initialCapacity); } public HashMap (int initialCapacity) { this (initialCapacity, DEFAULT_LOAD_FACTOR); } public HashMap () { this .loadFactor = DEFAULT_LOAD_FACTOR; } public HashMap (Map<? extends K, ? extends V> m) { this .loadFactor = DEFAULT_LOAD_FACTOR; putMapEntries(m, false ); }
四个构造函数中都没有涉及table
的初始化,所以table
并不是在HashMap
初始化的时候初始化的,而是在第一次put
的时候初始化,下文中会有介绍。
tableSizeFor tableSizeFor
方法主要用来计算返回一个大于等于且最接近给定输入的 2 的次方数。代码如下:
1 2 3 4 5 6 7 8 9 static final int tableSizeFor (int cap) { int n = cap - 1 ; n |= n >>> 1 ; n |= n >>> 2 ; n |= n >>> 4 ; n |= n >>> 8 ; n |= n >>> 16 ; return (n < 0 ) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1 ; }
n |= n >>> x
中,>>>
表示无符号右移且空位补 0,|=
表示两个数的每一位进行或操作。以输入为 97 来解释:
1 2 3 4 5 6 7 n = 97: 0110 0001 n |= n >>> 1: 0111 0001 n |= n >>> 2: 0111 1101 n |= n >>> 4: 0111 1111 n |= n >>> 8: 0111 1111 n |= n >>> 16: 0111 1111 n + 1: 1000 0000 = 128
我们可以发现:只要一直做右移然后按位或运算,最后可以得到一个大于等于输入且最接近给定输入的 2 的次方数。输入的数字最大的情况就是每一位上都是 1,此时输出就等于输入,所以这个方法可以保证输出一定大于等于输入。
hash hash
主要用来计算给定key
对应的哈希值:
1 2 3 4 static final int hash (Object key) { int h; return (key == null ) ? 0 : (h = key.hashCode()) ^ (h >>> 16 ); }
方法中首先得到key
的哈希值,然后将它和它的高 16 位进行异或运算得到新的哈希值:
1 2 3 4 5 6 key.hashCode(); h >>> 16 ; hashcode = h ^ (h >>> 16 );
和高 16 位进行异或运算是为了在table
的长度很小时保证哈希值的均匀性,减少碰撞的概率。
(n - 1) & hash 当我们需要定位到key
在table
中的索引位置时,需要使用除留余数法对table
长度取模得到,但是由于取模运算的效率很低,所以这里使用与运算提高效率。假设table
的长度为 16,具体过程如下:
1 2 3 4 5 6 hashcode = h ^ (h >>> 16 ); table.length - 1 ; hash & (table.length - 1 );
之后的源码中经常出现(n - 1) & hash
,其中 n 就是数组的长度,本质上就是通过上述方法计算数组索引。
resize resize
方法的作用主要为两点:
当数组并未初始化时,对数组进行初始化;
若数组已经初始化,则对数组进行扩容,也就是创建一个两倍大小的新数组,并将原来的元素放入新数组中;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 final Node<K,V>[] resize() { Node<K,V>[] oldTab = table; int oldCap = (oldTab == null ) ? 0 : oldTab.length; int oldThr = threshold; int newCap, newThr = 0 ; if (oldCap > 0 ) { if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } else if ((newCap = oldCap << 1 ) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1 ; } else if (oldThr > 0 ) newCap = oldThr; else { newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int )(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } if (newThr == 0 ) { float ft = (float )newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float )MAXIMUM_CAPACITY ? (int )ft : Integer.MAX_VALUE); } threshold = newThr; @SuppressWarnings({"rawtypes","unchecked"}) Node<K,V>[] newTab = (Node<K,V>[])new Node [newCap]; table = newTab; if (oldTab != null ) { for (int j = 0 ; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null ) { oldTab[j] = null ; if (e.next == null ) newTab[e.hash & (newCap - 1 )] = e; else if (e instanceof TreeNode) ((TreeNode<K,V>)e).split(this , newTab, j, oldCap); else { Node<K,V> loHead = null , loTail = null ; Node<K,V> hiHead = null , hiTail = null ; Node<K,V> next; do { next = e.next; if ((e.hash & oldCap) == 0 ) { if (loTail == null ) loHead = e; else loTail.next = e; loTail = e; } else { if (hiTail == null ) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null ); if (loTail != null ) { loTail.next = null ; newTab[j] = loHead; } if (hiTail != null ) { hiTail.next = null ; newTab[j + oldCap] = hiHead; } } } } } return newTab; }
旧数组为空但阈值大于 0 所有的构造函数中都没有对table
进行初始化,仅仅通过this.threshold = tableSizeFor(initialCapacity)
设置了阈值,所以table
真的分配的时间是在第一次扩容的时候,也就是上面else if (oldThr > 0)
之后的代码。
链表拆分 原数组位置上的某一条链表的所有节点,若将它们放到扩容后的新数组中,则最多分布在两个不同索引位置的链表上,并且这两个位置的索引值一定是扩容前原索引或者原索引+原容量。下面以table
从 8 扩容到 16 为例:
1 2 3 4 table.length 1000(8) -> 10000(16) table.length - 1 0111(7) -> 01111(15) 0010(2) 0010(2) -> 00010(2) 1010(10) 0010(2) -> 01010(10)
前文中讲到,索引的计算方法为key.hashCode & (table.length - 1)
。假设扩容前的数组容量为8 = 1000
,扩容后的数组容量为16 = 10000
,则它们对应的table.length - 1
为7 = 0111
和15 = 01111
,不同在于第四位的数字是 0 或 1。如果key.hashCode
的第四位是 0,那和0111
或01111
做与运算的结果都是一样的。如果key.hashCode
的第四位是 1,那么得到的结果是不同的。从01111
到011111
,差值为1000
,那么使用1000
和key.hashCode
做与操作就可以判断第四位是否为 1。如果是从 16 扩容到 32,也就是从01111
到0111111
,那么差值就是10000
,就需要使用10000
和key.hashCode
做与操作判断第五位是否为 1。实际上,我们可以发现差值就是扩容前的原容量,那么我们将key.hashCode
和原容量做与操作就可以知道扩容前后的索引是否有差异。其次,如果索引在扩容后变化,那么一定原索引+旧table
的容量,因为扩容前索引保留的几位在扩容后仍然会保留,而需要判断的那一位如果为 1 则正是旧table
的容量大小。下面我们再来分析链表拆分的代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 Node<K,V> loHead = null , loTail = null ; Node<K,V> hiHead = null , hiTail = null ; Node<K,V> next; do { next = e.next; if ((e.hash & oldCap) == 0 ) { if (loTail == null ) loHead = e; else loTail.next = e; loTail = e; } else { if (hiTail == null ) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null ); if (loTail != null ) { loTail.next = null ; newTab[j] = loHead; } if (hiTail != null ) { hiTail.next = null ; newTab[j + oldCap] = hiHead; }
treeifyBin treeifyBin
用来将链表节点转为树节点,并且在转换的过程中仍然保持了链表的特点,最后通过treeify
方法构建红黑树:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 final void treeifyBin (Node<K,V>[] tab, int hash) { int n, index; Node<K,V> e; if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) resize(); else if ((e = tab[index = (n - 1 ) & hash]) != null ) { TreeNode<K,V> hd = null , tl = null ; do { TreeNode<K,V> p = replacementTreeNode(e, null ); if (tl == null ) hd = p; else { p.prev = tl; tl.next = p; } tl = p; } while ((e = e.next) != null ); if ((tab[index] = hd) != null ) hd.treeify(tab); } } TreeNode<K,V> replacementTreeNode (Node<K,V> p, Node<K,V> next) { return new TreeNode <>(p.hash, p.key, p.value, next); }
put put
方法通过内置的putVal
方法添加键值对。putVal
中判断了table
是否需要扩容,是否需要将链表转化为红黑树以及覆盖原节点的值等:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 public V put (K key, V value) { return putVal(hash(key), key, value, false , true ); } final V putVal (int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; if ((tab = table) == null || (n = tab.length) == 0 ) n = (tab = resize()).length; if ((p = tab[i = (n - 1 ) & hash]) == null ) tab[i] = newNode(hash, key, value, null ); else { Node<K,V> e; K k; if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this , tab, hash, key, value); else { for (int binCount = 0 ; ; ++binCount) { if ((e = p.next) == null ) { p.next = newNode(hash, key, value, null ); if (binCount >= TREEIFY_THRESHOLD - 1 ) treeifyBin(tab, hash); break ; } if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break ; p = e; } } if (e != null ) { V oldValue = e.value; if (!onlyIfAbsent || oldValue == null ) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; if (++size > threshold) resize(); afterNodeInsertion(evict); return null ; }
get 通过key
的值获取到对应的value
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 public V get (Object key) { Node<K,V> e; return (e = getNode(hash(key), key)) == null ? null : e.value; } final Node<K,V> getNode (int hash, Object key) { Node<K,V>[] tab; Node<K,V> first, e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1 ) & hash]) != null ) { if (first.hash == hash && ((k = first.key) == key || (key != null && key.equals(k)))) return first; if ((e = first.next) != null ) { if (first instanceof TreeNode) return ((TreeNode<K,V>)first).getTreeNode(hash, key); do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } while ((e = e.next) != null ); } } return null ; }
remove remove
主要用来删除指定的键值对,输入参数为key
的值,代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 public V remove (Object key) { Node<K,V> e; return (e = removeNode(hash(key), key, null , false , true )) == null ? null : e.value; } final Node<K,V> removeNode (int hash, Object key, Object value, boolean matchValue, boolean movable) { Node<K,V>[] tab; Node<K,V> p; int n, index; if ((tab = table) != null && (n = tab.length) > 0 && (p = tab[index = (n - 1 ) & hash]) != null ) { Node<K,V> node = null , e; K k; V v; if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) node = p; else if ((e = p.next) != null ) { if (p instanceof TreeNode) node = ((TreeNode<K,V>)p).getTreeNode(hash, key); else { do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) { node = e; break ; } p = e; } while ((e = e.next) != null ); } } if (node != null && (!matchValue || (v = node.value) == value || (value != null && value.equals(v)))) { if (node instanceof TreeNode) ((TreeNode<K,V>)node).removeTreeNode(this , tab, movable); else if (node == p) tab[index] = node.next; else p.next = node.next; ++modCount; --size; afterNodeRemoval(node); return node; } } return null ; }
参考
HashMap 源码解读——逐句分析 resize 方法的实现
HashMap 源码&底层数据结构分析
n |= n >>> 1——JDK10 的 HashMap 原理 tableSizeFor(initialCapacity) 方法
java 中的移位运算符:<<,>>,>>>总结