ZPP: Reduce code bloat for Z_PARAM_STR#19359
Conversation
4cee049 to
a434a77
Compare
Zend/zend_API.h
Outdated
| static zend_always_inline zend_string *zend_parse_arg_str_no_null(zval *arg, uint32_t arg_num) | ||
| { | ||
| if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) { | ||
| return Z_STR_P(arg); | ||
| } else { | ||
| return zend_parse_arg_str_slow(arg, arg_num); | ||
| } | ||
| } | ||
|
|
There was a problem hiding this comment.
While I agree with getting rid of the dest param for the _slow functions, does this particular extra function actually change anything? Please check the asm; given that it's force inlined it ought to be equivalent to zend_parse_arg_str_ex.
|
@bwoebi Applying the following patch: diff --git a/Zend/zend_API.h b/Zend/zend_API.h
index e6d17cf29bb..b1f3202b4db 100644
--- a/Zend/zend_API.h
+++ b/Zend/zend_API.h
@@ -2088,13 +2088,7 @@ ZEND_API ZEND_COLD void zend_class_redeclaration_error_ex(int type, zend_string
}
#define Z_PARAM_STR(dest) \
- Z_PARAM_PROLOGUE(false, false); \
- dest = zend_parse_arg_str_no_null(_arg, _i); \
- if (UNEXPECTED(dest == NULL)) { \
- _expected_type = Z_EXPECTED_STRING; \
- _error_code = ZPP_ERROR_WRONG_ARG; \
- break; \
- }
+ Z_PARAM_STR_EX(dest, 0, 0)
#define Z_PARAM_STR_OR_NULL(dest) \
Z_PARAM_STR_EX(dest, 1, 0)
@@ -2318,15 +2312,6 @@ static zend_always_inline bool zend_parse_arg_str(zval *arg, zend_string **dest,
return zend_parse_arg_str_ex(arg, dest, check_null, arg_num, /* frameless */ false);
}
-static zend_always_inline zend_string *zend_parse_arg_str_no_null(zval *arg, uint32_t arg_num)
-{
- if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) {
- return Z_STR_P(arg);
- } else {
- return zend_parse_arg_str_slow(arg, arg_num);
- }
-}
-
static zend_always_inline bool zend_parse_arg_string(zval *arg, char **dest, size_t *dest_len, bool check_null, uint32_t arg_num)
{
zend_string *str;gives me the following assembly for Address range 0x733e00 to 0x733e71:
0x0000000000733e00 <+0>: push %rbx
0x0000000000733e01 <+1>: sub $0x10,%rsp
0x0000000000733e05 <+5>: mov 0x2c(%rdi),%r9d
0x0000000000733e09 <+9>: cmp $0x1,%r9d
0x0000000000733e0d <+13>: jne 0x41aeda <zif_zend_test_is_string_marked_as_valid_utf8-3247910>
0x0000000000733e13 <+19>: mov %rsi,%rdx
0x0000000000733e16 <+22>: cmpb $0x6,0x58(%rdi)
0x0000000000733e1a <+26>: jne 0x733e40 <zif_zend_test_is_string_marked_as_valid_utf8+64>
0x0000000000733e1c <+28>: mov 0x50(%rdi),%rax
0x0000000000733e20 <+32>: testb $0x2,0x5(%rax)
0x0000000000733e24 <+36>: setne %al
0x0000000000733e27 <+39>: movzbl %al,%eax
0x0000000000733e2a <+42>: add $0x2,%eax
0x0000000000733e2d <+45>: mov %eax,0x8(%rdx)
0x0000000000733e30 <+48>: add $0x10,%rsp
0x0000000000733e34 <+52>: pop %rbx
0x0000000000733e35 <+53>: ret
0x0000000000733e36 <+54>: cs nopw 0x0(%rax,%rax,1)
0x0000000000733e40 <+64>: mov %rsi,0x8(%rsp)
0x0000000000733e45 <+69>: lea 0x50(%rdi),%rbx
0x0000000000733e49 <+73>: mov $0x1,%esi
0x0000000000733e4e <+78>: add $0x50,%rdi
0x0000000000733e52 <+82>: mov %r9d,0x4(%rsp)
0x0000000000733e57 <+87>: call 0x7af5c0 <zend_parse_arg_str_slow>
0x0000000000733e5c <+92>: mov 0x4(%rsp),%r9d
0x0000000000733e61 <+97>: test %rax,%rax
0x0000000000733e64 <+100>: je 0x41aebe <zif_zend_test_is_string_marked_as_valid_utf8.cold>
0x0000000000733e6a <+106>: mov 0x8(%rsp),%rdx
0x0000000000733e6f <+111>: jmp 0x733e20 <zif_zend_test_is_string_marked_as_valid_utf8+32>
Address range 0x41aebe to 0x41aef7:
0x000000000041aebe <-3247938>: mov $0x9,%edi
0x000000000041aec3 <-3247933>: mov $0x4,%ecx
0x000000000041aec8 <-3247928>: add $0x10,%rsp
0x000000000041aecc <-3247924>: mov %rbx,%r8
0x000000000041aecf <-3247921>: xor %edx,%edx
0x000000000041aed1 <-3247919>: mov %r9d,%esi
0x000000000041aed4 <-3247916>: pop %rbx
0x000000000041aed5 <-3247915>: jmp 0x41f8d1 <zend_wrong_parameter_error>
0x000000000041aeda <-3247910>: mov $0x1,%edi
0x000000000041aedf <-3247905>: mov $0x1,%esi
0x000000000041aee4 <-3247900>: xor %ebx,%ebx
0x000000000041aee6 <-3247898>: call 0x41f39f <zend_wrong_parameters_count_error>
0x000000000041aeeb <-3247893>: xor %r9d,%r9d
0x000000000041aeee <-3247890>: mov $0x1,%edi
0x000000000041aef3 <-3247885>: xor %ecx,%ecx
0x000000000041aef5 <-3247883>: jmp 0x41aec8 <zif_zend_test_is_string_marked_as_valid_utf8-3247928>Arguably I don't know anything about codegen and assembly, but it doesn't seem equivalent to me? |
|
The new code has one branch less in the hot path, but does an extra push/pop (no idea why). I'd say the saved branch is better though. |
|
The motivation in my original PR was to get rid of the stack protector overhead, something not showcased in the assembly here. |
a434a77 to
417706a
Compare
417706a to
61337e0
Compare
|
please, run |
|
On current master (commit ./buildconf
./configure -C --disable-all --prefix=/home/girgias/Dev/custom-php/
make -j22
size sapi/cli/php --format=SysVI get the following output: Rebasing this commit on top of commit |
|
Closing in favour of #21739 |
This idea is based on @nielsdos previous PR to reduce code bloat: #18436 To do so we create a specialized function to handle parameters that only accept strings and not null such that we can assign the zend_string to the destination directly.
Will take the assembly of the
ext/zend_testfunctionZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8)as an example:Compiled in a release build and retrieved using:
Prior to this commit (
master@98e0dbcefedeecf5e4d032e54df1aeebaa118754)the ASM looks like:Afterwards it looks like:
For reference we have 772 usages of
Z_PARAM_STR(found usinggit grep -o "Z_PARAM_STR" | wc -l)This approach should also work for the "path" and "C string" version, and more generally for the pure HashTable and object FZPP specifier