On Tue, 18 Mar 2025, Corinna Vinschen wrote:
> Subdir of winsup/cygwin, probably. What I'm most curious about is the
> size it adds to the DLL. I wonder if, say, an extra 32K is really
> usefully spent, given it only checks a small part of ntdll.dll, and only
> once per process tree, too.
I did this with msys-2.0.dll, but it shouldn't matter as a delta.
all are stripped msys-2.0.dll size
start:
3,246,118 bytes
with udis86 vendored, but not called:
3,247,142 bytes
with find_fast_cwd_pointer rewritten to use udis86:
3,328,550 bytes
(I know the second one isn't realistic, the linker could exclude unused
code, I was just kind of curious)
This is with all the "translate to assembly text, intel or at&t syntax"
and "table of strings for opcodes" stuff removed to try to save space,
still a net increase of 82,432 bytes.
Here's the new find_fast_cwd_pointer function:
static fcwd_access_t **
find_fast_cwd_pointer ()
{
/* Fetch entry points of relevant functions in ntdll.dll. */
HMODULE ntdll = GetModuleHandle ("ntdll.dll");
if (!ntdll)
return NULL;
const uint8_t *get_dir = (const uint8_t *)
GetProcAddress (ntdll, "RtlGetCurrentDirectory_U");
const uint8_t *ent_crit = (const uint8_t *)
GetProcAddress (ntdll, "RtlEnterCriticalSection");
if (!get_dir || !ent_crit)
return NULL;
ud_t ud_obj;
ud_init (&ud_obj);
ud_set_mode (&ud_obj, 64);
ud_set_input_buffer (&ud_obj, (const uint8_t *) get_dir, 80);
ud_set_pc (&ud_obj, (const uint64_t) get_dir);
const ud_operand_t *opr;
/* Search first relative call instruction in RtlGetCurrentDirectory_U. */
const uint8_t *use_cwd = NULL;
while (ud_disassemble (&ud_obj))
{
if (ud_insn_mnemonic (&ud_obj) == UD_Icall)
{
opr = ud_insn_opr (&ud_obj, 0);
if (opr->type == UD_OP_JIMM && opr->size == 32)
{
/* Fetch offset from instruction and compute address of called
function. This function actually fetches the current FAST_CWD
instance and performs some other actions, not important to us.
*/
use_cwd = (const uint8_t *) (ud_insn_off (&ud_obj) +
ud_insn_len (&ud_obj) +
opr->lval.sdword);
break;
}
}
}
if (!use_cwd)
return NULL;
ud_set_input_buffer (&ud_obj, (const uint8_t *) use_cwd, 120);
ud_set_pc (&ud_obj, (const uint64_t) use_cwd);
/* Next we search for the locking mechanism and perform a sanity check.
On Pre- (or Post-) Windows 8 we basically look for the
RtlEnterCriticalSection call. Windows 8 does not call
RtlEnterCriticalSection. The code manipulates the FastPebLock manually,
probably because RtlEnterCriticalSection has been converted to an inline
function. Either way, we test if the code uses the FastPebLock. */
PRTL_CRITICAL_SECTION lockaddr = NULL;
/* both cases have an `lea rel(%rip)` on the lock */
while (ud_disassemble (&ud_obj))
{
if (ud_insn_mnemonic (&ud_obj) == UD_Ilea)
{
/* this seems to follow intel syntax, in that operand 0 is the
dest and 1 is the src */
opr = ud_insn_opr (&ud_obj, 1);
if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32)
{
lockaddr = (PRTL_CRITICAL_SECTION) (ud_insn_off (&ud_obj) +
ud_insn_len (&ud_obj) +
opr->lval.sdword);
break;
}
}
}
/* Test if lock address is FastPebLock. */
if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
return NULL;
/* Next is either the `callq RtlEnterCriticalSection', or on Windows 8,
a `lock btr` */
while (ud_disassemble (&ud_obj))
{
ud_mnemonic_code_t insn = ud_insn_mnemonic (&ud_obj);
if (insn == UD_Icall)
{
opr = ud_insn_opr (&ud_obj, 0);
if (opr->type == UD_OP_JIMM && opr->size == 32)
{
if (ent_crit != (const uint8_t *) (ud_insn_off (&ud_obj) +
ud_insn_len (&ud_obj) +
opr->lval.sdword))
return NULL;
break;
}
}
else if (insn == UD_Ibtr && ud_obj.pfx_lock)
{
/* for Windows 8 */
opr = ud_insn_opr (&ud_obj, 0);
if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 &&
opr->size == 32)
{
if (lockaddr != (PRTL_CRITICAL_SECTION) (ud_insn_off (&ud_obj) +
ud_insn_len (&ud_obj) + opr->lval.sdword -
offsetof (RTL_CRITICAL_SECTION, LockCount)))
return NULL;
break;
}
}
}
fcwd_access_t **f_cwd_ptr = NULL;
ud_type_t reg = UD_NONE;
/* now we're looking for a movq rel(%rip) */
while (ud_disassemble (&ud_obj))
{
if (ud_insn_mnemonic (&ud_obj) == UD_Imov)
{
const ud_operand_t *opr0 = ud_insn_opr (&ud_obj, 0);
opr = ud_insn_opr (&ud_obj, 1);
if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
opr->index == UD_NONE && opr->scale == 0 &&
opr->offset == 32 && opr->size == 64 &&
opr0->type == UD_OP_REG)
{
f_cwd_ptr = (fcwd_access_t **) (ud_insn_off (&ud_obj) +
ud_insn_len (&ud_obj) +
opr->lval.sdword);
reg = opr0->base;
break;
}
}
}
/* Check that the next instruction tests if the fetched value is NULL. */
if (!ud_disassemble (&ud_obj) || ud_insn_mnemonic (&ud_obj) != UD_Itest)
return NULL;
opr = ud_insn_opr (&ud_obj, 0);
if (opr->type != UD_OP_REG || opr->base != reg ||
memcmp (opr, ud_insn_opr (&ud_obj, 1), offsetof (ud_operand_t, _legacy)))
return NULL;
return f_cwd_ptr;
}