review

I talked about the nature of categories and when RWE is assigned, and when I look for an RWE assignment, I find a method attachCategories, the load of categories.

attachCategories

// Attach method lists and properties and protocols from categories to a class.
// Assumes the categories in cats are all loaded and sorted by load order, 
// oldest categories first.
static void attachCategories(Class cls, const locstamped_category_t *cats_list, uint32_t cats_count,
                 int flags)
{
    if (slowpath(PrintReplacedMethods)) {
        printReplacements(cls, cats_list, cats_count);
    }
    if (slowpath(PrintConnecting)) {
        _objc_inform("CLASS: attaching %d categories to%s class '%s'%s",
                     cats_count, (flags & ATTACH_EXISTING) ? " existing" : "",
                     cls->nameForLogging(), (flags & ATTACH_METACLASS) ? " (meta)" : "");
    }

    /* * Only a few classes have more than 64 categories during launch. * This uses a little stack, and avoids malloc. * * Categories must be added in the proper order, which is back * to front. To do that with the chunking, we iterate cats_list * from front to back, build up the local buffers backwards, * and call attachLists on the chunks. attachLists prepends the * lists, so the final result is in the expected order. */
    constexpr uint32_t ATTACH_BUFSIZ = 64;
    method_list_t   *mlists[ATTACH_BUFSIZ];
    property_list_t *proplists[ATTACH_BUFSIZ];
    protocol_list_t *protolists[ATTACH_BUFSIZ];

    uint32_t mcount = 0;
    uint32_t propcount = 0;
    uint32_t protocount = 0;
    bool fromBundle = NO;
    bool isMeta = (flags & ATTACH_METACLASS);
    auto rwe = cls->data()->extAllocIfNeeded();

    for (uint32_t i = 0; i < cats_count; i++) {
        auto& entry = cats_list[i];

        method_list_t *mlist = entry.cat->methodsForMeta(isMeta);
        if (mlist) {
            if (mcount == ATTACH_BUFSIZ) {
                prepareMethodLists(cls, mlists, mcount, NO, fromBundle, __func__);
                rwe->methods.attachLists(mlists, mcount);
                mcount = 0;
            }
            mlists[ATTACH_BUFSIZ - ++mcount] = mlist;
            fromBundle |= entry.hi->isBundle();
        }

        property_list_t *proplist =
            entry.cat->propertiesForMeta(isMeta, entry.hi);
        if (proplist) {
            if (propcount == ATTACH_BUFSIZ) {
                rwe->properties.attachLists(proplists, propcount);
                propcount = 0;
            }
            proplists[ATTACH_BUFSIZ - ++propcount] = proplist;
        }

        protocol_list_t *protolist = entry.cat->protocolsForMeta(isMeta);
        if (protolist) {
            if (protocount == ATTACH_BUFSIZ) {
                rwe->protocols.attachLists(protolists, protocount);
                protocount = 0; } protolists[ATTACH_BUFSIZ - ++protocount] = protolist; }}if (mcount > 0) {
        prepareMethodLists(cls, mlists + ATTACH_BUFSIZ - mcount, mcount,
                           NO, fromBundle, __func__);
        rwe->methods.attachLists(mlists + ATTACH_BUFSIZ - mcount, mcount);
        if (flags & ATTACH_EXISTING) {
            flushCaches(cls, __func__, [](Class c){
                // constant caches have been dealt with in prepareMethodLists
                // if the class still is constant here, it's fine to keep
                return! c->cache.isConstantOptimizedCache(); }); } } rwe->properties.attachLists(proplists + ATTACH_BUFSIZ - propcount, propcount); rwe->protocols.attachLists(protolists + ATTACH_BUFSIZ - protocount, protocount); }Copy the code

We don’t know when to call attachCategories back, and then push back from the known method, search globally for attachCategories, and get the following two lines, and then trace back, there are too many branches to judge

  • realizeClassWithoutSwift->methodizeClass->attachToClass->attachCategories
  • load_categories_nolock->attachCategories

The load flow of the main class is four cases

Add the following method to each method to determine exactly how it is loaded

const char *mangledName = cls->nonlazyMangledName();
const char *person = "LKTeacher";
if (strcmp(mangledName, person) == 0) {
    printf("********load_categories_nolock**********\n");
}
Copy the code
  1. There are both main classes and categoriesloadmethods

_read_images Non-lazy load ->realizeClassWithoutSwift->methodizeClass-> load_categories_NOLock ->attachCategories

  1. The main class haveloadMethod, classification is notloadmethods

Read_images Lazy load ->realizeClassWithoutSwift->attachToClass->methodizeClass

  1. The main class withoutloadMethods, the classification isloadmethods

Read_images Lazy load ->realizeClassWithoutSwift->attachToClass->methodizeClass

  1. There are no main classes or categoriesloadmethods

Don’t go

Load flow tracking for classification

Add a breakpoint to the method, trace debugging, and verify that the class loading process is following the above process.printThe class is not loaded. Proceed to the next breakpoint

Go to the class-related loading method

You can see that at compile time,nameBecomes the name of the category

You can see that the classified information has been obtained andmlistsThe last address iscls

You can verify the above process

attachLists

    void attachLists(List* const * addedLists, uint32_t addedCount) {
        if (addedCount == 0) return;

        if (hasArray()) {
            // many lists -> many lists
            uint32_t oldCount = array()->count;
            uint32_t newCount = oldCount + addedCount;
            array_t *newArray = (array_t *)malloc(array_t::byteSize(newCount));
            newArray->count = newCount;
            array()->count = newCount;

            for (int i = oldCount - 1; i >= 0; i--)
                newArray->lists[i + addedCount] = array()->lists[i];
            for (unsigned i = 0; i < addedCount; i++)
                newArray->lists[i] = addedLists[i];
            free(array());
            setArray(newArray);
            validate();
        }
        else if(! list && addedCount ==1) {
            // 0 lists -> 1 list
            list = addedLists[0];
            validate();
        } 
        else {
            // 1 list -> many lists
            Ptr<List> oldList = list;
            uint32_t oldCount = oldList ? 1 : 0;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)malloc(array_t::byteSize(newCount)));
            array()->count = newCount;
            if (oldList) array()->lists[addedCount] = oldList;
            for (unsigned i = 0; i < addedCount; i++) array()->lists[i] = addedLists[i]; validate(); }}Copy the code

else if (! List && addedCount == 1) else addedLists[0] = 1, array()->count = oldCount + addedCount, Add oldList to array()->lists; add addedLists to array()->lists if (hasArray()); add addedLists to array()->lists if (hasArray())

printAs you can see, it’s a new categoryLKTeacher(LK)In the first element. When will it go inif (hasArray()).

Loading of multiple categories

If (hasArray()); if (hasArray()); if (hasArray());

printAs you can see, the last element is the main class.

conclusion

There’s a distinction between a lazy load and a non-lazy load between a main class and a class. If either class writes a load method, the main class will go through the non-lazy load process. If neither of these methods is written, then these methods will be put directly into the data by the system via Macho the first time the message is sent. Do not use the load method if it is not necessary. It is especially not recommended to use the load method in a class.

supplement

methodListData structure of

In front of the printro You can seemethodListThe data structure ofmethod_list_t *To see themethod_list_t.

struct method_list_t : entsize_list_tt<method_t, method_list_t, 0xffff0003.method_t::pointer_modifier> {
    // Omit the intermediate code
};

struct entsize_list_tt {
    uint32_t entsizeAndFlags;
    uint32_t count;

    uint32_t entsize() const {
        return entsizeAndFlags & ~FlagMask;
    }
    uint32_t flags() const {
        return entsizeAndFlags & FlagMask;
    }

    Element& getOrEnd(uint32_t i) const { 
        ASSERT(i <= count);
        return *PointerModifier::modify(*this, (Element *)((uint8_t *)this + sizeof(*this) + i*entsize()));
    }
    Element& get(uint32_t i) const { 
        ASSERT(i < count);
        return getOrEnd(i);
    }

   // Omit the intermediate code
};
Copy the code

You can see that the get(0) method is addressed by a pointer shift

(lldb) p $3.get(0)
(method_t) $5 = {}
Copy the code

So the data that comes out of get is method_t, so look at method_t

struct method_t {
    static const uint32_t smallMethodListFlag = 0x80000000;

    method_t(const method_t &other) = delete;

    // The representation of a "big" method. This is the traditional
    // representation of three pointers storing the selector, types
    // and implementation.
    struct big {
        SEL name;
        const char *types;
        MethodListIMP imp;
    };

private:
    bool isSmall() const {
        return ((uintptr_t)this & 1) = =1;
    }

    // The representation of a "small" method. This stores three
    // relative offsets to the name, types, and implementation.
    struct small {
        // The name field either refers to a selector (in the shared
        // cache) or a selref (everywhere else).
        RelativePointer<const void *> name;
        RelativePointer<const char *> types;
        RelativePointer<IMP> imp;

        bool inSharedCache() const {
            return (CONFIG_SHARED_CACHE_RELATIVE_DIRECT_SELECTORS &&
                    objc::inSharedCache((uintptr_t)this)); }}; IMP imp(bool needsLock)const {
        if (isSmall()) {
            IMP imp = remappedImp(needsLock);
            if(! imp) imp = ptrauth_sign_unauthenticated(small().imp.get(), ptrauth_key_function_pointer,0);
            return imp;
        }
        return big().imp;
    }
    
    // Omit the intermediate code
}
Copy the code

In method_t, there are two methods: small and big, and imp can be used to extract imp from struct.

The main class is not implementedloadMethod, multiple classification implementationloadMethod, data loading process

Add multiple categories and implement the load method. The main class does not implement the load method. After running the view process, it is found that the attachCategories method is actually used

  • The main class withoutloadMethods, the classification isloadmethods

Read_images Lazy load ->realizeClassWithoutSwift->attachToClass->methodizeClass

Why is the attachCategories method used to add a breakpoint to view the process?

A new method has been discoveredprepare_load_methods, which forces the main class and all classes (except those with no content) to be loaded.

void prepare_load_methods(const headerType *mhdr)
{
    size_t count, i;

    runtimeLock.assertLocked();

    classref_t const *classlist = 
        _getObjc2NonlazyClassList(mhdr, &count);
    for (i = 0; i < count; i++) {
        schedule_class_load(remapClass(classlist[i]));
    }

    category_t * const *categorylist = _getObjc2NonlazyCategoryList(mhdr, &count);
    for (i = 0; i < count; i++) {
        category_t *cat = categorylist[i];
        Class cls = remapClass(cat->cls);
        if(! cls)continue;  // category for ignored weak-linked class
        if (cls->isSwiftStable()) {
            _objc_fatal("Swift class extensions and categories on Swift "
                        "classes are not allowed to have +load methods");
        }
        const char *mangledName = cls->nonlazyMangledName();
        const char *person = "LKTeacher";
        if (strcmp(mangledName, person) == 0) {
            printf("********%s**********\n",__func__); } realizeClassWithoutSwift(cls, nil); ASSERT(cls->ISA()->isRealized()); add_category_to_loadable_list(cat); }}Copy the code

Whether the load of the classification needs to be sorted

The storage of categories is in the form of two-dimensional Pointers,

static method_t *getMethodNoSuper_nolock(Class cls, SEL sel)
{
    runtimeLock.assertLocked();

    ASSERT(cls->isRealized());
    // fixme nil cls? 
    // fixme nil sel?

    auto const methods = cls->data()->methods();
    for(auto mlists = methods.beginLists(), end = methods.endLists(); mlists ! = end; ++mlists) {// <rdar://problem/46904873> getMethodNoSuper_nolock is the hottest
        // caller of search_method_list, inlining it turns
        // getMethodNoSuper_nolock into a frame-less function and eliminates
        // any store from this codepath.
        method_t *m = search_method_list_inline(*mlists, sel);
        if (m) return m;
    }

    return nil;
}
Copy the code

So you take the first category, look for the method, then you take the second category, look again until you find the method. So the categories don’t need to be sorted.

        if (keyValue == probeValue) {
            // `probe` is a match.
            // Rewind looking for the *first* occurrence of this value.
            // This is required for correct category overrides.
            while (probe > first && keyValue == (uintptr_t)getName((probe - 1))) {
                probe--;
            }
            return &*probe;
        }
Copy the code

The while method for binary search, because when the class and the classification are both read in ro->data, the methods are integrated, so you need to probe – make sure you are looking for the first method, the classification method.

class_ro_t

 struct class_ro_t {
    uint32_t m_flags;
    uint32_t m_instanceStart;
    uint32_t m_instanceSize;
    uint32_t m_reserved;

    lldb::addr_t m_ivarLayout_ptr;
    lldb::addr_t m_name_ptr;
    lldb::addr_t m_baseMethods_ptr;
    lldb::addr_t m_baseProtocols_ptr;
    lldb::addr_t m_ivars_ptr;

    lldb::addr_t m_weakIvarLayout_ptr;
    lldb::addr_t m_baseProperties_ptr;

    std::string m_name;

    bool Read(Process *process, lldb::addr_t addr);
};

bool ClassDescriptorV2::class_ro_t::Read(Process *process, lldb::addr_t addr) {
  size_t ptr_size = process->GetAddressByteSize();

  size_t size = sizeof(uint32_t)   // uint32_t flags;
                + sizeof(uint32_t) // uint32_t instanceStart;
                + sizeof(uint32_t) // uint32_t instanceSize;
                + (ptr_size == 8 ? sizeof(uint32_t)
                                 : 0) // uint32_t reserved; // __LP64__ only
                + ptr_size            // const uint8_t *ivarLayout;
                + ptr_size            // const char *name;
                + ptr_size            // const method_list_t *baseMethods;
                + ptr_size            // const protocol_list_t *baseProtocols;
                + ptr_size            // const ivar_list_t *ivars;
                + ptr_size            // const uint8_t *weakIvarLayout;
                + ptr_size;           // const property_list_t *baseProperties;

  DataBufferHeap buffer(size, '\ 0');
  Status error;

  process->ReadMemory(addr, buffer.GetBytes(), size, error);
  if (error.Fail()) {
    return false;
  }

  DataExtractor extractor(buffer.GetBytes(), size, process->GetByteOrder(),
                          process->GetAddressByteSize());

  lldb::offset_t cursor = 0;

  m_flags = extractor.GetU32_unchecked(&cursor);
  m_instanceStart = extractor.GetU32_unchecked(&cursor);
  m_instanceSize = extractor.GetU32_unchecked(&cursor);
  if (ptr_size == 8)
    m_reserved = extractor.GetU32_unchecked(&cursor);
  else
    m_reserved = 0;
  m_ivarLayout_ptr = extractor.GetAddress_unchecked(&cursor);
  m_name_ptr = extractor.GetAddress_unchecked(&cursor);
  m_baseMethods_ptr = extractor.GetAddress_unchecked(&cursor);
  m_baseProtocols_ptr = extractor.GetAddress_unchecked(&cursor);
  m_ivars_ptr = extractor.GetAddress_unchecked(&cursor);
  m_weakIvarLayout_ptr = extractor.GetAddress_unchecked(&cursor);
  m_baseProperties_ptr = extractor.GetAddress_unchecked(&cursor);

  DataBufferHeap name_buf(1024.'\ 0');

  process->ReadCStringFromMemory(m_name_ptr, (char *)name_buf.GetBytes(),
                                 name_buf.GetByteSize(), error);

  if (error.Fail()) {
    return false;
  }

  m_name.assign((char *)name_buf.GetBytes());

  return true;
}
Copy the code