Okay, here is my approach:
1. Find non-exported nt!ExWorkerQueue. On pre-Win8 operating systems it could be easily done with static analysis ( or dynamic, if you have a suitable engine to do so ):
Part of ProcessWorkItemQueueInternal routine:
You may notice, that all of routines are not active. That means that you won't get any of those by your "schedule APC and RtlCaptureStackBackTrace" method because usually work items are pretty small and executes extremely quick. Your APC simple interrupts worker thread waiting on new work items and actually you're getting nothing interested.
However, if we talk about some active rootkits, they usually don't return the execution from the worker routine - they perform an infinite loop inside. In this case, APC and RtlCaptureStackBackTrace will work, but then you don't need to know anything about work items internals. Schedule your APC to all system threads to see if anything is executing from pool memory or hidden module..
Tigzy, I gave you some hints. I respect your efforts, but honestly, your questions and attempts are so lame sometimes... You shouldn't "develop" anything ( especially in kernel-mode ) without deep understanding of what you're doing.
					
										1. Find non-exported nt!ExWorkerQueue. On pre-Win8 operating systems it could be easily done with static analysis ( or dynamic, if you have a suitable engine to do so ):
Code: Select all
Win8 implementation is slightly different, you can obtain the sought-for pointer from KPRCB -> KNODE ( pls note, for each NODE head is different )
//2000:
//--------------------------------------------------------------------------
//nt!ExQueueWorkItem:
//...
//80419040 6bc03c          imul    eax,eax,3Ch
//80419043 0520dc4680      add     eax,offset nt!ExWorkerQueue (8046dc20)
//80419048 50              push    eax
//80419049 e8575f0100      call    nt!KeInsertQueue (8042efa5)
//
//XP:
//--------------------------------------------------------------------------
//...
//804e4173 6bf63c          imul    esi,esi,3Ch
//804e4176 81c6c0225680    add     esi,offset nt!ExWorkerQueue (805622c0)
//804e417c 56              push    esi
//804e417d e8abffffff      call    nt!KeInsertQueue (804e412d)
//
//
//2003:
//--------------------------------------------------------------------------
//...
//80828d6c 6bf63c          imul    esi,esi,3Ch
//80828d6f 57              push    edi
//80828d70 81c660058b80    add     esi,offset nt!ExWorkerQueue (808b0560)
//80828d76 56              push    esi
//80828d77 e846000000      call    nt!KeInsertQueue (80828dc2)
//
//
//Vista SP2:
//--------------------------------------------------------------------------
//...
//e1c77184 6bf63c          imul    esi,esi,3Ch
//e1c77187 50              push    eax
//e1c77188 81c6c078d0e1    add     esi,offset nt!ExWorkerQueue (e1d078c0)
//e1c7718e 56              push    esi
//e1c7718f e8a3540000      call    nt!KeInsertQueue (e1c7c637)
//
//7:
//--------------------------------------------------------------------------
//...
//82877f40 6bf63c          imul    esi,esi,3Ch
//82877f43 57              push    edi
//82877f44 81c6406d9482    add     esi,offset nt!ExWorkerQueue (82946d40)
//82877f4a 50              push    eax
//82877f4b 8bc6            mov     eax,esi
//82877f4d e832000000      call    nt!KiInsertQueue (82877f84)
Code: Select all
2. Now you can traverse the ExWorkerQueue:
//mov eax, dword ptr fs:[0x20]
//mov eax, dword ptr [ eax + 0x4CC ]
//add eax, 0xC0
Code: Select all
3. Usually all of worker queues are empty. That happens because all of worker threads are waiting on KQUEUE->Header ( which is DISPATCHER_HEADER ). nt!KiInsertQueue removes first waiting thread and schedule it for the execution. In the very rare case, when all of worker threads are busy, queue is put in signaled state and the new entry is being put to head or tail of the list. When such happens, system starts additional dynamic worker threads to serve the requests. Dynamic threads terminates after some idle time, while the static worker threads terminates only when shutdown occur.
void ProcessWorkItemQueues ( IN PWORKITEM_LIST_ENTRY WorkItemListHead )
{
    ULONG          Type;
    
    for ( Type = CriticalWorkQueue; Type < MaximumWorkQueue; Type++ )
    {
        ProcessWorkItemQueueInternal ( WorkItemListHead, (PKQUEUE)( (ULONG_PTR)g_ExWorkerQueue + Type * 0x3C /* sizeof ( EX_WORK_QUEUE ) */ ), Type );
    }
}
/// This function should be executed on each core
/// Please excuse my _asm inline, I really don't like __readfsdword intrinsic :D 
void ProcessWorkItemQueuesWin8 ( IN PWORKITEM_LIST_ENTRY WorkItemListHead )
{
    ULONG     Type;
    ULONG_PTR WorkerQueueHead;
    __asm
    {
        mov eax, dword ptr fs:[0x20]
        mov eax, dword ptr [ eax + 0x4CC ]
        add eax, 0xC0
        mov WorkerQueueHead, eax
    }
    for ( Type = CriticalWorkQueue; Type < MaximumWorkQueue; Type++ )
    {
        ProcessWorkItemQueueInternal ( WorkItemListHead, (PKQUEUE)( WorkerQueueHead + Type * 0x38 /* sizeof ( EX_WORK_QUEUE ) - Win8 */ ), Type );
    }
}
Part of ProcessWorkItemQueueInternal routine:
Code: Select all
And this is how it looks to the user ( pls see the attachment ).void ProcessWorkItemQueueInternal ( IN PWORKITEM_LIST_ENTRY WorkItemListHead, IN PKQUEUE Queue, IN ULONG QueueType )
{
    PLIST_ENTRY ListEntry;
    for ( ListEntry = Queue -> EntryListHead.Flink;
          ListEntry != &Queue -> EntryListHead;
          ListEntry = ListEntry -> Flink )
    {
        AddWorkItem ( WorkItemListHead, QueueType, (ULONG_PTR)((PWORK_QUEUE_ITEM)ListEntry) -> WorkerRoutine, FALSE, 0 /* Not Assigned Yet */, 0, 0 );
    }
    for ( ListEntry = Queue -> ThreadListHead.Flink;
          ListEntry != &Queue -> ThreadListHead;
          ListEntry = ListEntry -> Flink )
    {
        ULONG_PTR Thread = (ULONG_PTR)ListEntry - g_OSDependentOffsets._KTHREAD_QueueListEntry;
        if ( *(PUCHAR)( (ULONG_PTR)Thread + g_OSDependentOffsets._KTHREAD_KernelStackResident ) & KERNEL_STACK_RESIDENT_BIT  )
        {
               /// Some stack backtrace is performed here, sorry, i can't share this piece of code - pls use your brain
                AddWorkItem ( WorkItemListHead, QueueType, WorkerRoutine, ..., Thread,
                              *(PUCHAR)( Thread + g_OSDependentOffsets._KTHREAD_State ), *(PUCHAR)( Thread + g_OSDependentOffsets._KTHREAD_WaitReason ) );
        } else
        {
            /// Thread is not currently active
            AddWorkItem ( WorkItemListHead, QueueType, 0, FALSE, Thread,
                          *(PUCHAR)( Thread + g_OSDependentOffsets._KTHREAD_State ), *(PUCHAR)( Thread + g_OSDependentOffsets._KTHREAD_WaitReason ) );
        }
    }
}
You may notice, that all of routines are not active. That means that you won't get any of those by your "schedule APC and RtlCaptureStackBackTrace" method because usually work items are pretty small and executes extremely quick. Your APC simple interrupts worker thread waiting on new work items and actually you're getting nothing interested.
However, if we talk about some active rootkits, they usually don't return the execution from the worker routine - they perform an infinite loop inside. In this case, APC and RtlCaptureStackBackTrace will work, but then you don't need to know anything about work items internals. Schedule your APC to all system threads to see if anything is executing from pool memory or hidden module..
Tigzy, I gave you some hints. I respect your efforts, but honestly, your questions and attempts are so lame sometimes... You shouldn't "develop" anything ( especially in kernel-mode ) without deep understanding of what you're doing.
Attachments



