A HashMap source

public class HashMap<K,V> extends AbstractMap<K,V>
    implements Map<K,V>, Cloneable, Serializable {
    // 继承了AbstractMap类，实现了Cloneable和Serializable接口
    
    // 一些主要的参数
    //首先是默认的初始化容量，可以看到这里是1左移4位，即16
    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4;
    
    // 最大容量为2的30次方
    static final int MAXIMUM_CAPACITY = 1 << 30
    
    // 负载因子0.75
    static final float DEFAULT_LOAD_FACTOR = 0.75f;
    
    // 由链表转换为红黑树的阈值，这里可以看到是8
    static final int TREEIFY_THRESHOLD = 8;
    
    // 由红黑树转换为链表的阈值，这里可以看到是6
    static final int UNTREEIFY_THRESHOLD = 6;
    
    // 扩容的阈值，容量和负载因子的乘积
    int threshold;
    
    // 当链表的的长度大于8时，会判断容量是否大于64，如果大于64则转换成红黑树，小于64则先扩容
    static final int MIN_TREEIFY_CAPACITY = 64;
    
    // 哈希表，存储键值对
    transient Node<K,V>[] table;
    
    // 存储的元素个数
    transient int size;
    
    // 当HashMap里存储的值被改变时（增删），modCount加1，主要用来实现fail-fast机制
    transient int modCount;
    
    // 构造方法，可以自定义容量和负载因子
    public HashMap(int initialCapacity, float loadFactor) {
        if (initialCapacity < 0)
            throw new IllegalArgumentException("Illegal initial capacity: " +
                                               initialCapacity);
        if (initialCapacity > MAXIMUM_CAPACITY)
            initialCapacity = MAXIMUM_CAPACITY;
        if (loadFactor <= 0 || Float.isNaN(loadFactor))
            throw new IllegalArgumentException("Illegal load factor: " +
                                               loadFactor);
        this.loadFactor = loadFactor;
        this.threshold = tableSizeFor(initialCapacity);
    }
    
    // 构造方法，可以自定义容量，实际上时调用上面的构造方法，传入自定义的容量及默认的构造因子
    public HashMap(int initialCapacity) {
        this(initialCapacity, DEFAULT_LOAD_FACTOR);
    }
    
    // 无参构造方法，调用此方法使用默认负载因子
    public HashMap() {
        this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted
    }
    
    
    // Node节点的内部结构,可以看到这里是单向链表
    static class Node<K,V> implements Map.Entry<K,V> {
        final int hash;
        final K key;
        V value;
        Node<K,V> next;

        Node(int hash, K key, V value, Node<K,V> next) {
            this.hash = hash;
            this.key = key;
            this.value = value;
            this.next = next;
        }
        
        public final K getKey()        { return key; }
        public final V getValue()      { return value; }
        public final String toString() { return key + "=" + value; }

        public final int hashCode() {
            return Objects.hashCode(key) ^ Objects.hashCode(value);
        }

        public final V setValue(V newValue) {
            V oldValue = value;
            value = newValue;
            return oldValue;
        }

        public final boolean equals(Object o) {
            if (o == this)
                return true;
            if (o instanceof Map.Entry) {
                Map.Entry<?,?> e = (Map.Entry<?,?>)o;
                if (Objects.equals(key, e.getKey()) &&
                    Objects.equals(value, e.getValue()))
                    return true;
            }
            return false;
        }
    
    }
    
    // hash方法，将key的hashcode的高16为与低16位进行异或运算
    static final int hash(Object key) {
        int h;
        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
    }
    
    // 调用构造方法传入HashMap的容量时，根据传入的容量值来计算实际的容量值（大于传入值的最小2的n次方）
    static final int tableSizeFor(int cap) {
        int n = cap - 1;
        n |= n >>> 1;
        n |= n >>> 2;
        n |= n >>> 4;
        n |= n >>> 8;
        n |= n >>> 16;
        return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
    }
    
    // 返回当前元素个数
    public int size() {
        return size;
    }
    
    // 判断是否为空
    public boolean isEmpty() {
        return size == 0;
    }
    
    // get方法，获取key对应的value，底层调用getNode方法
    public V get(Object key) {
        Node<K,V> e;
        return (e = getNode(hash(key), key)) == null ? null : e.value;
    }
    
    final Node<K,V> getNode(int hash, Object key) {
        Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
        
        // 判断哈希表不为空且长度大于0，且对应下标的元素不为空
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (first = tab[(n - 1) & hash]) != null) {
            
            // 如果头节点的key与要找的key相等，则返回头节点
            if (first.hash == hash && // always check first node
                ((k = first.key) == key || (key != null && key.equals(k))))
                return first;
                
            // 否则，判断头节点的下一个节点是否为空
            if ((e = first.next) != null) {
                // 如果不为空，则判断头节点是否为树节点
                if (first instanceof TreeNode)
                    // 如果头节点是树节点，则掉用说明是红黑树，调用树节点的方法，返回对应的值
                    return ((TreeNode<K,V>)first).getTreeNode(hash, key);
                // 如果头节点不是树节点，则遍历链表，如果找到相等的key，返回对应的节点
                do {
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        return e;
                } while ((e = e.next) != null);
            }
        }
        // 没找到，返回null
        return null;
    }
    
    // 判断是否包含某个key,底层调用的getNode方法
    public boolean containsKey(Object key) {
        return getNode(hash(key), key) != null;
    }
    
    // put方法，底层调用putVal方法
    public V put(K key, V value) {
        return putVal(hash(key), key, value, false, true);
    }
    
    
    final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
                   boolean evict) {
        Node<K,V>[] tab; Node<K,V> p; int n, i;
        // 如果哈希表为空或者长度等于0，则进行扩容操作
        if ((tab = table) == null || (n = tab.length) == 0)
            n = (tab = resize()).length;
        // 哈希表不为空，则根据根据hash和length找到对应的链表和二叉树，判断是否为空
        if ((p = tab[i = (n - 1) & hash]) == null)
            // 为空则直接将key和value以及对应的hash封装成一个Node对象，放到哈希表对应的位置
            tab[i] = newNode(hash, key, value, null);
        else {
            // 如果不为空，则判断头节点的key和要put的键值对的key是否相等
            Node<K,V> e; K k;
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k))))
                // 相等则将头节点赋值给e
                e = p;
            // 如果不相等，则判断头节点是否为树节点
            else if (p instanceof TreeNode)
                // 如果头节点为树节点，则调用树节点方法存储键值对
                e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
            else {
                // 如果头节点不是树节点，则遍历链表
                for (int binCount = 0; ; ++binCount) {
                    // 如果遍历到尾节点依旧没有找到，则直接将键值对封装成节点，放到链表尾部
                    if ((e = p.next) == null) {
                        p.next = newNode(hash, key, value, null);
                        // 如果链表的长度大于8，则尝试转化成红黑树（这里如果哈希表的长度小于64，则先扩容）
                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                            treeifyBin(tab, hash);
                        break;
                    }
                    // 如果遍历的中途找到了相同的key，则跳出循环
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        break;
                    p = e;
                }
            }
            
            // e不为空，则说明找到了key相同的节点，新值覆盖旧值，将旧值返回
            if (e != null) { // existing mapping for key
                V oldValue = e.value;
                if (!onlyIfAbsent || oldValue == null)
                    e.value = value;
                afterNodeAccess(e);
                return oldValue;
            }
        }
        // 没有找到key相同的节点，说明插入了新的节点，则将modCount加一
        ++modCount;
        // 判断是否超过扩容的域指，超过则扩容
        if (++size > threshold)
            resize();
        // 新增节点的后续工作
        afterNodeInsertion(evict);
        return null;
    }
    
    // 扩容方法
    final Node<K,V>[] resize() {
        Node<K,V>[] oldTab = table;
        // 当前容量
        int oldCap = (oldTab == null) ? 0 : oldTab.length;
        // 当前扩容阈值
        int oldThr = threshold;
        // 新的容量和阈值，初始值为0
        int newCap, newThr = 0;
        
        // 如果当前容量大于0，判断是否为达到容量上限
        if (oldCap > 0) {
            // 如果达到上限，则将扩容阈值设置为Integer最大值（相当于不再扩容），返回旧的哈希表
            if (oldCap >= MAXIMUM_CAPACITY) {
                threshold = Integer.MAX_VALUE;
                return oldTab;
            }
            // 如果未达到上限，则将新容量置为当前容量两倍（左移一位），扩容阈值也变为原来的两倍
            else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                     oldCap >= DEFAULT_INITIAL_CAPACITY)
                newThr = oldThr << 1; // double threshold
        }
        // 如果当前容量小于等于0且扩容阈值大于0，则将新的容量设置为当前阈值
        else if (oldThr > 0) // initial capacity was placed in threshold
            newCap = oldThr;
        // 如果当前容量和扩容阈值都小于0，则将新的容量和扩容阈值都置为默认值
        else {               // zero initial threshold signifies using defaults
            newCap = DEFAULT_INITIAL_CAPACITY;
            newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
        }
        // 如果新的阈值为初始值（没有被赋值），则赋值
        if (newThr == 0) {
            float ft = (float)newCap * loadFactor;
            newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                      (int)ft : Integer.MAX_VALUE);
        }
        threshold = newThr;
        @SuppressWarnings({"rawtypes","unchecked"})
        
        // 根据新的容量创建新的哈希表
        Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
        table = newTab;
        // 当前旧哈希表不为空时，进行元素的搬迁工作，将旧哈希表中的元素搬迁到新的哈希表中
        if (oldTab != null) {
            for (int j = 0; j < oldCap; ++j) {
                Node<K,V> e;
                if ((e = oldTab[j]) != null) {
                    oldTab[j] = null;
                    if (e.next == null)
                        newTab[e.hash & (newCap - 1)] = e;
                    else if (e instanceof TreeNode)
                        ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                    else { // preserve order
                        Node<K,V> loHead = null, loTail = null;
                        Node<K,V> hiHead = null, hiTail = null;
                        Node<K,V> next;
                        do {
                            next = e.next;
                            if ((e.hash & oldCap) == 0) {
                                if (loTail == null)
                                    loHead = e;
                                else
                                    loTail.next = e;
                                loTail = e;
                            }
                            else {
                                if (hiTail == null)
                                    hiHead = e;
                                else
                                    hiTail.next = e;
                                hiTail = e;
                            }
                        } while ((e = next) != null);
                        if (loTail != null) {
                            loTail.next = null;
                            newTab[j] = loHead;
                        }
                        if (hiTail != null) {
                            hiTail.next = null;
                            newTab[j + oldCap] = hiHead;
                        }
                    }
                }
            }
        }
        // 返回新的哈希表
        return newTab;
    }
    
    // 尝试将链表转化成红黑树的方法
    final void treeifyBin(Node<K,V>[] tab, int hash) {
        int n, index; Node<K,V> e;
        // 如果哈希表的长度小于设置阈值（64），则进行扩容操作
        if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
            resize();
        
        // 否则将对应链表转化成红黑树
        else if ((e = tab[index = (n - 1) & hash]) != null) {
            TreeNode<K,V> hd = null, tl = null;
            do {
                TreeNode<K,V> p = replacementTreeNode(e, null);
                if (tl == null)
                    hd = p;
                else {
                    p.prev = tl;
                    tl.next = p;
                }
                tl = p;
            } while ((e = e.next) != null);
            if ((tab[index] = hd) != null)
                hd.treeify(tab);
        }
    }
    
    // 删除某个key对应的键值对，底层调用removeNode方法
    public V remove(Object key) {
        Node<K,V> e;
        return (e = removeNode(hash(key), key, null, false, true)) == null ?
            null : e.value;
    }
    
    
    final Node<K,V> removeNode(int hash, Object key, Object value,
                               boolean matchValue, boolean movable) {
        Node<K,V>[] tab; Node<K,V> p; int n, index;
        // 判断哈希表不为空且要找的key对应的链表或者红黑树不为空
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (p = tab[index = (n - 1) & hash]) != null) {
            // 如果头节点的key和我们要找的key相等，则将头节点赋值给node
            Node<K,V> node = null, e; K k; V v;
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k))))
                node = p;
            // 否则如果第二个节点不为空，则判断头节点是否为树节点
            else if ((e = p.next) != null) {
                // 如果头节点为树节点，则调用树节点的方法
                if (p instanceof TreeNode)
                    node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
                // 如果不为树节点，则遍历链表
                else {
                    do {
                        // 如果找到相等的key，则将对应的节点赋值给node
                        if (e.hash == hash &&
                            ((k = e.key) == key ||
                             (key != null && key.equals(k)))) {
                            node = e;
                            break;
                        }
                        p = e;
                    } while ((e = e.next) != null);
                    // 注意：上面的循环结束后，p是e的上一个节点
                }
            }
            // 如果node不为null则说明找到了对应节点
            if (node != null && (!matchValue || (v = node.value) == value ||
                                 (value != null && value.equals(v)))) {
                // 如果该节点是树节点，则调用树节点的方法删除
                if (node instanceof TreeNode)
                    ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
                // 如果是node是头节点
                else if (node == p)
                    tab[index] = node.next;
                // node是中间的某个节点
                else
                    p.next = node.next;
                // 收尾工作
                ++modCount;
                --size;
                afterNodeRemoval(node);
                return node;
            }
        }
        // 找不到返回null
        return null;
    }
    
    
Copy the code

These are some of the common methods used in HashMap

Some questions:

1. Why is the hash() method used to evaluate the key’s hashcode 16 bits higher and 16 bits lower?

When we put a key/value pair, the HashMap will call the hash method of the key’s hashcode to get a hash value, and then ampersand the hash value with the array leng-1 to get index. Place the key-value pair on a linked list or red-black tree corresponding to the index index.

Therefore, when we use HashMap, the capacity of HashMap will be in a range, neither too small nor too large, so the range of length-1 is also in a certain range most of the time. When the hash value and length-1 are ampersand, Will the two values of binary for & operation, so when the length – 1 in this range, the binary high 16 basic it is 0, then the hash of binary high 16 operations in either 1 or 0, final & the result is 0, the equivalent of high 16, who was not involved in operations, so that it will greatly increase the probability of hash conflict, Therefore, we first calculate the high and low 16 bits of Hashcode once, which indirectly involves the high part of Hashcode in the process of index calculation, thus reducing hash conflicts.

2. Why is the capacity of HashMap 2 ^ N?

When we determine the index of a key, we need to perform a modulo operation between the hash of the key and length-1. There are two ways to perform this operation: one is to directly calculate the remainder (hash%length), and the other is to use &. The latter is used at the bottom of HashMap. But hash%length==hash&(length-1) is only available if length is a power of 2, which is the first reason. Secondly, when length is a power of 2, the binary low bits of length-1 are all 1. In this way, the low bits of hash can be retained when the & operation is performed with hash. In other words, the result of the low bits & operation depends on the hash value instead of length, which can reduce hash conflicts. (Imagine that when a bit of Length-1 is 0, the result will be 0 no matter what the hash value is, which means that the index will never take some value, which will lead to increased hash conflicts.)

3.fail-fast

Fail – fast mechanism performance when the HashMap for certain traversal procedures, if the inside of the add and remove the element is, will throw ConcurrentModificationException, such as call foreach method to traverse, or traversed by the iterator, The underlying layer will first assign the modCount variable to an exceptModCount variable, and judge whether the two variables are equal during the traversal. If they are equal, the execution will continue; if they are not equal, the above exception will be thrown. The fast-fail mechanism is mainly to avoid HasMap being modified during the traversal.

4. Differences between HashMap1.7 and 1.8

Main differences:

1.7 The underlying data structure is array and linked list. 1.8 The underlying data structure is array linked list and red-black tree. The linked list is converted into red-black tree to avoid the query efficiency decrease when the linked list is too long.
1.7 adopts the head interpolation method and 1.8 adopts the tail interpolation method. If no matching key is found after traversing the linked list, 1.7 puts the new node at the head of the linked list and 1.8 puts it at the tail, avoiding the problem of dead links when multi-threading simultaneously expands.

Some questions:

1. Why is the hash() method used to evaluate the key’s hashcode 16 bits higher and 16 bits lower?

2. Why is the capacity of HashMap 2 ^ N?

3.fail-fast

4. Differences between HashMap1.7 and 1.8

Related Posts

# Best guide to the Cool Stream API

Object

Linkerd 2.10(Step by Step) – Set the service profile