preface

The source code of this paper is libdispatch-1173.40.5, which mainly analyzes the specific implementation principle of commonly used DISPATCH API. The dispatch_object_s data structure is used for subsequent analysis

struct dispatch_object_s {
	const struct dispatch_object_vtable_s *do_vtable.int volatile do_ref_cnt// Reference countint volatile do_xref_cnt, // External reference count, the object memory is freed only when both are 0struct dispatch_object_s *volatile do_next; 	// Next do
	struct dispatch_queue_s *do_targetq;          // Target queue
	void *do_ctxt;                                / / context
	void *do_finalizer;                           // Call the function when it is destroyed
	unsigned int do_suspend_cnt;                  //suspend suspends the count
};
Copy the code

Where do_vtable contains the object type and function pointer;

dispatch_object_t

“Dispatch_object_t” is a union of “dispatch_object_t” for all data structures in the union.

typedef union {
	struct _os_object_s* _os_obj;
	struct dispatch_object_s* _do;             / / the object structure
	struct dispatch_continuation_s* _dc;       // The dispatch_aync block is encapsulated into this data structure
	struct dispatch_queue_s* _dq;              / / the queue
	struct dispatch_queue_attr_s* _dqa;        // Queue attributes
	struct dispatch_group_s* _dg;              // Group operation
	struct dispatch_source_s* _ds;             / / the source structure
	struct dispatch_mach_s* _dm;
	struct dispatch_mach_msg_s* _dmsg;
	struct dispatch_timer_aggregate_s* _dta;
	struct dispatch_source_attr_s* _dsa;       / / the source attribute
	struct dispatch_semaphore_s* _dsema;       / / semaphore
	struct dispatch_data_s* _ddata;
	struct dispatch_io_s* _dchannel;
	struct dispatch_operation_s* _doperation;
	struct dispatch_disk_s* _ddisk;
} dispatch_object_t __attribute__((__transparent_union__));
Copy the code

dispatch_continuation_s

struct dispatch_continuation_s {
	struct dispatch_object_s *volatile do_next; // Next task
	dispatch_function_t dc_func;                // The method of execution
	void *dc_ctxt;                              // Method context
	void *dc_data;                              // Related data
	void *dc_other                              // Other information
}
Copy the code

Dispatch_continuation_s is the structure of the task in dispatch_continuation_s. Incoming blocks are queued as this structure object.

dispatch_queue_s

struct dispatch_queue_s {
	struct dispatch_queue_s *do_targetq;               // Target queue, which eventually points to a system's default queue
	struct dispatch_object_s *volatile dq_items_head;  // Queue header
	struct dispatch_object_s *volatile dq_items_tail;  // The end of the queue
	unsigned long dq_serialnum;                        // Queue number
	const char *dq_label;                              / / the queue name
	dispatch_priority_t dq_priority;                   / / priority
	dispatch_priority_t volatile dq_override;          // Whether it is overwritten
	uint16_t dq_width;                                 // The number of tasks that can be executed concurrently
	dispatch_queue_t dq_specific_q;                    // Special queue
	uint32_t dq_side_suspend_cnt;                      // The number of paused tasks
  
	const struct queue_vtable_s *do_vtable {           // Some function Pointers to the queue
		unsigned long const do_type;               // Queue types, such as DISPATCH_QUEUE_CONCURRENT_TYPE, DISPATCH_QUEUE_SERIAL_TYPE, DISPATCH_QUEUE_GLOBAL_ROOT_TYPE...
		const char *const do_kind;                 // Queue type, for example: "serial-queue", "concurrent-queue", "global-queue", "main-queue", "runloop-queue"" Mgr-queue "...
		void (*const do_dispose)(/*params*/);      // Destroy the queue
		void (*const do_suspend)(/*params*/);      // Pause the queue
		void (*const do_resume)(/*params*/);       // Restore the queue
		void (*const do_invoke)(/*params*/);       // Start processing the queue
		void (*const do_wakeup)(/*params*/);       // Wake up the queue
		void (*const do_set_targetq)(/*params*/);  // Set target queue
	};
}
Copy the code

Dispatch_queue_s is the structure of the queue. In its DO_vtable, there are many Pointers to functions corresponding to the queue operation methods. There should be some macros to call these methods in the queue. For example, the do_dispose method should have a macro dx_Dispose:

#define dx_dispose(queue) &(queue)->do_vtable->_os_obj_vtable->do_dispose(queue)
Copy the code

Understand the relationship between queues and threads

dispatch_once

The source code is as follows:

void
dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
{
	dispatch_once_gate_t l = (dispatch_once_gate_t)val;

#if! DISPATCH_ONCE_INLINE_FASTPATH || DISPATCH_ONCE_USE_QUIESCENT_COUNTER
  // atomically get l->dgo_once
	uintptr_t v = os_atomic_load(&l->dgo_once, acquire);
  // Check if the above value is DLOCK_ONCE_DONE(most likely yes, indicating that func has been assigned), if yes, return directly
	if (likely(v == DLOCK_ONCE_DONE)) {
		return;
	}
#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
  // Different decision forms
	if (likely(DISPATCH_ONCE_IS_GEN(v))) {
		return _dispatch_once_mark_done_if_quiesced(l, v);
	}
#endif
#endif
  // Atomicity determines whether an assignment has been made
	if (_dispatch_once_gate_tryenter(l)) {
		return _dispatch_once_callout(l, ctxt, func);
	}
  // The thread blocks waiting for dispatch_function_t func to complete
	return _dispatch_once_wait(l);
}

static inline bool
_dispatch_once_gate_tryenter(dispatch_once_gate_t l)
{
  // if l->dgo_once equals DLOCK_ONCE_UNLOCKED,
  // If so, assign the thread ID
	return os_atomic_cmpxchg(&l->dgo_once, DLOCK_ONCE_UNLOCKED,
			(uintptr_t)_dispatch_lock_value_for_self(), relaxed);
}

static void
_dispatch_once_callout(dispatch_once_gate_t l, void *ctxt,
		dispatch_function_t func)
{
  // Call func in block
	_dispatch_client_callout(ctxt, func);
  // Broadcast to wake up all waiting threads
	_dispatch_once_gate_broadcast(l);
}
Copy the code

The general process is as follows (copied from others’ drawings) :

GCD dispatch_once

queue

The main apis used by queue are as follows:

dispatch_queue_main_t
dispatch_get_main_queue(void);
dispatch_queue_global_t
dispatch_get_global_queue(long identifier, unsigned long flags);
dispatch_queue_t
dispatch_queue_create(const char *_Nullable label, dispatch_queue_attr_t _Nullable attr);
Copy the code

dispatch_queue_create

The pseudocode is as follows:

dispatch_queue_t
dispatch_queue_create(const char *label, dispatch_queue_attr_t attr)
{
	return _dispatch_lane_create_with_target(label, attr,
											 DISPATCH_TARGET_QUEUE_DEFAULT, true);
}

//_dispatch_lane_create_with_target
static dispatch_queue_t
_dispatch_lane_create_with_target(const char *label, dispatch_queue_attr_t dqa,
								  dispatch_queue_t tq, bool legacy)
{
  //Step 1: Normalize arguments (qos, overcommit, tq)
  dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);// Returns an empty attribute object for serial queues (DQA is NULL), and a default initialization value for parallel queues
  Overcommit whether new threads need to be created. Tq target queue is DISPATCH_TARGET_QUEUE_DEFAULT by default
  if(! tq) { tq = _dispatch_root_queues[2 * (qos - 1) + overcommit];// If the destination queue is not specified, it is obtained from the root queue array
  }
  Initialize the queue Initialize the queue Initialize the queue
  // Apply for memory space
  dispatch_lane_t dq = _dispatch_object_alloc(vtable,
												sizeof(struct dispatch_lane_s));
  // Initialize the serial number from 17, the rest is as follows:
  // skip zero
  // 1-main_q // main queue
  // 2 -mgr_q // Manage the queue
  // 3 -mgr_root_q // The target queue of the management queue
  / / 4,5,6,7,8,9,10,11,12,13,14,15 - global the queues, global queue has set up a file in the root queue array initialization, respectively according to different qos specified priority queue
  // 17 - workloop_fallback_q
  // we use 'xadd' on Intel, so the initial value == next assigned
	dq->dq_serialnum = os_atomic_inc_orig(&_dispatch_queue_serial_numbers, relaxed);
  // Number of concurrent queues (1 for serial queues and DISPATCH_QUEUE_WIDTH_MAX for parallel queues)
	dq->dq_width = dqai.dqai_concurrent ? DISPATCH_QUEUE_WIDTH_MAX : 1;
	dq->dq_label = label;// Specify a name
	dq->dq_priority = _dispatch_priority_make((dispatch_qos_t)dqai.dqai_qos,
											  dqai.dqai_relpri);// Specify the priority
	dq->do_targetq = tq;// Specify the destination queue
  
  return dq._dq;
}

// Allocated global queue, specifying queues of different qos classes
struct dispatch_queue_global_s _dispatch_root_queues[] = {
	// Initialize the property. _DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE,0,
		.dq_label = "com.apple.root.maintenance-qos",
		.dq_serialnum = 4,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.maintenance-qos.overcommit",
		.dq_serialnum = 5,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, 0,
		.dq_label = "com.apple.root.background-qos",
		.dq_serialnum = 6,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.background-qos.overcommit",
		.dq_serialnum = 7,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, 0,
		.dq_label = "com.apple.root.utility-qos",
		.dq_serialnum = 8,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.utility-qos.overcommit",
		.dq_serialnum = 9,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT, DISPATCH_PRIORITY_FLAG_FALLBACK,
		.dq_label = "com.apple.root.default-qos",
		.dq_serialnum = 10,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT,
			DISPATCH_PRIORITY_FLAG_FALLBACK | DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.default-qos.overcommit",
		.dq_serialnum = 11,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, 0,
		.dq_label = "com.apple.root.user-initiated-qos",
		.dq_serialnum = 12,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.user-initiated-qos.overcommit",
		.dq_serialnum = 13,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, 0,
		.dq_label = "com.apple.root.user-interactive-qos",
		.dq_serialnum = 14,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.user-interactive-qos.overcommit",
		.dq_serialnum = 15,),};Copy the code

Dispatch_queue_create creates a queue and initializes its parameters, including its name, priority, number of concurrent requests, sequence number, and destination queue.

dispatch_get_global_queue

// The core function
static inline dispatch_queue_global_t
_dispatch_get_root_queue(dispatch_qos_t qos, bool overcommit)
{
	if (unlikely(qos < DISPATCH_QOS_MIN || qos > DISPATCH_QOS_MAX)) {
		DISPATCH_CLIENT_CRASH(qos, "Corrupted priority");
	}
	return &_dispatch_root_queues[2 * (qos - 1) + overcommit];
}
Copy the code

Queues are initialized with _dispatch_root_queues based on qos and overcommit.

dispatch_get_main_queue

// Core code
struct dispatch_queue_static_s _dispatch_main_q = {
	DISPATCH_GLOBAL_OBJECT_HEADER(queue_main),
#if! DISPATCH_USE_RESOLVERS
	.do_targetq = _dispatch_get_default_queue(true),
#endif
	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1) |
			DISPATCH_QUEUE_ROLE_BASE_ANON,
	.dq_label = "com.apple.main-thread",
	.dq_atomic_flags = DQF_THREAD_BOUND | DQF_WIDTH(1),
	.dq_serialnum = 1};Copy the code

The main queue is _dispatch_main_q queue, whose queue name is com.apple.main-thread, width is 1, serial number is 1.

async

void dispatch_async(dispatch_queue_t dq, dispatch_block_t work)
{
  // Allocate dispatch_CONTINUATION memory
	dispatch_continuation_t dc = _dispatch_continuation_alloc();
	uintptr_t dc_flags = DC_FLAG_CONSUME;
	dispatch_qos_t qos;
	// Initialize the primary copy(work) to avoid the block being destroyed before execution;
  / / specify the dc - > dc_ctxt = work;
  //dc->func=_dispatch_call_block_and_release; Used to release a block after it completes execution
	qos = _dispatch_continuation_init(dc, dq, work, 0, dc_flags);
  //_dispatch_continuation_async core functions are dX_push function Pointers saved in do_vtable
	_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
}

static inline void
_dispatch_continuation_async(dispatch_queue_class_t dqu,
		dispatch_continuation_t dc, dispatch_qos_t qos, uintptr_t dc_flags)
{
#if DISPATCH_INTROSPECTION
	if(! (dc_flags & DC_FLAG_NO_INTROSPECTION)) { _dispatch_trace_item_push(dqu, dc); }#else
	(void)dc_flags;
#endif
	return dx_push(dqu._dq, dc, qos);// The core is the dq_push pointer stored in do_vtable
}
Copy the code

The pointer to the dx_push function is specified when init.c is initialized as follows:

//workloop class instance vtable
_dispatch_workloop_push 
/ / queue_serial queue_runloop, source and channel, the Mach serial/runloop/source/channel/Mach
_dispatch_lane_push
//queue_concurrent concurrent queue
_dispatch_lane_concurrent_push
//queue_global,queue_pthread_root Global queue and root thread
_dispatch_root_queue_push
//queue_mgr manages queues
_dispatch_mgr_queue_push
/ / queue_main the home team
_dispatch_main_queue_push
Copy the code

The following analyzes common primary and global queues:

void _dispatch_main_queue_push(dispatch_queue_main_t dq, dispatch_object_t dou,
							   dispatch_qos_t qos)
{
  / / pseudo code
  _disaptch_queue_push();//push to main queue
  dx_wakeup()// Call the dq_wakeup function in do_vtable to wakeup the queue and execute
} 				
Copy the code

Call stack for MAC (ios has some differences) :

dx_wakeup
  _dispatch_main_queue_wakeup
  	_dispatch_runloop_queue_wakeup
  		_dispatch_runloop_queue_poke
  			_dispatch_runloop_queue_handle_init// Dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f
  				mach_port_construct// Build a thread-Mach port
  				_dispatch_runloop_queue_set_handle
  					dq->do_ctxt = (void(*)uintptr_t)handle
  		_dispatch_send_wakeup_runloop_thread// Wake up the runloop through the Mach port built above
Copy the code

The main thread runloop call stack looks like this:

push
mach port
_dispatch_send_wakeup_runloop_thread
mach port
runloop
runloop
libdispatch.dylib
_dispatch_main_queue_callback_4CF
block

Global Queue call stack:

_dispatch_root_queue_push
 	_dispatch_root_queue_push_override
 		_dispatch_root_queue_push_inlineOs_mpsc_push_list Atomic push to the queue
 			_dispatch_root_queue_poke
 				_dispatch_root_queue_poke_slow
 					_pthread_workqueue_addthreads// Add the task to the work queue
Copy the code

The work queue call thread stack looks like this:

global quque
push
root queue
pop

Other queue types can be traced through the source code and call stack analysis, as long as you understand the working principle, the source code needs to deal with various situations of the queue logic is more complex;

Sync

Dispatch_sync Join global queue analysis:

dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
	NSLog(@"global queue sync fired");
});
Copy the code

The dispatch_sync call stack is as follows:

dispatch_sync
	_dispatch_sync_f
		_dispatch_sync_f_inline
			_dispatch_barrier_sync_f// Serial queue
  			_dispatch_queue_try_acquire_barrier_sync// Attempts to lock the queue. If the lock fails, the queue is being scheduled
  				_dispatch_sync_f_slow
  			_dispatch_sync_recurse// Nested calls with multiple target queues are recursive calls
  			_dispatch_lane_barrier_sync_invoke_and_complete// If none of the above occurs, the queue triggers the call immediately
  	_dispatch_sync_f_slow// Concurrent queue
  		_dispatch_sync_function_invoke
  			_dispatch_client_callout
  				__main_block_invoke
Copy the code

Execute tasks synchronously. If a task is being executed or the current queue is scheduled, the system locks the task (semaphore or KEvent) until the task is executed. Otherwise, the system directly executes the task to ensure synchronous execution.

The little knowledge

__builtin_expect

GCC introduced to optimize compiler instructions, avoid instruction jump, improve CPU efficiency;

By introducing pipeline into CPU, CPU efficiency can be improved. More simply, allowing the CPU to pre-fetch the next instruction provides CPU efficiency. As shown below:

It can be seen that CPU flow money can reduce the CPU waiting time for instructions, thus improving the EFFICIENCY of the CPU. If there is a jump instruction, the pre-fetched instruction is useless. When executing the current instruction, the CPU fetches the next instruction of the current instruction from memory. After executing the current instruction, the CPU discovers that instead of executing the next instruction, it is executing the instruction at offset. The CPU can only refetch instructions at offset from memory. Therefore, jump instructions reduce pipeline efficiency, and thus CPU efficiency. In summary, skip statements should be avoided when writing programs. So how do you avoid jump statements? The answer is **builtin_expect. This instruction was introduced by GCC to “allow the programmer to tell the compiler which branch is most likely to be executed.” The command is written as: **builtin_expect(EXP, N). The probability that e to the N is equal to N is high. The __builtin_expect directive is generally used to encapsulate it as likely and Unlikely macros, which are commonly used in kernel programming. These macros are written as follows:

#define likely(x) __builtin_expect(!! (x), 1) //x is likely to be true
#define unlikely(x) __builtin_expect(!! (x), 0) //x is probably false
Copy the code

Just be clear:

ifLikely (value) // equivalent toif(value)
if(unlikely(value)) // Is equivalent toif(value)
Copy the code

However, statements following if are more likely to be executed with likely(), and statements following else are more likely to be executed with Unlikely (). In this way, the compiler can reduce the performance penalty of instruction jumps by following the most likely code right behind it during compilation.

Builtin_expect instructions

GCC __builtin_expect

os_atomic_cmpxchg

The p variable is equivalent to the PTR pointer of atomic_t type used to obtain the value of the current memory access restriction rule M, which is used to compare the old value E, and assign a new value V if it is equal.

#define os_atomic_cmpxchg(p, e, v, m) \
		({ _os_atomic_basetypeof(p) _r = (e); \
		atomic_compare_exchange_strong_explicit(_os_atomic_c11_atomic(p), \
		&_r, v, memory_order_##m, memory_order_relaxed); })

typedef enum memory_order {
  memory_order_relaxed = __ATOMIC_RELAXED,
  memory_order_consume = __ATOMIC_CONSUME,
  memory_order_acquire = __ATOMIC_ACQUIRE,
  memory_order_release = __ATOMIC_RELEASE,
  memory_order_acq_rel = __ATOMIC_ACQ_REL,
  memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
Copy the code

For comparison, see atomic_cmpxchg:

static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
Copy the code

Atomic_cmpxchg is used to compare the value of the Atmoic variable and replace it with the new value if it is equal to the value in memory. The PTR argument points to an atomic_t variable; The parameter old represents the value compared to memory; New represents the value of the substitution.

Atomic_cmpxchg source code analysis

Linux atomic_cmpxchg()/Atomic_read()/Atomic_set()/Atomic_add()/Atomic_sub()/atomi

std::memory_order

LLDB debugging

Use breakpoint matching and mirror matching to find function symbols, you can view all link library function symbols and break points, follow up the function call stack;

Breakpoint set -r -n _dispatch_ // Image lookup -r -n _dispatch_ libdispatch.dylib // Specify a matching function to find the libdispatch.dylib dynamic libraryCopy the code

LLDB supports Python script parsing, and you can import Python scripts for advanced debugging.

Refenrence

Dispatch

IOS Multithreading summary

Dig up the libDispatch source code

The threading principle of GCD