speccpu/x264.MCBuilder: add with_disabled_vectorize_get_ref

This commit is contained in:
2025-04-27 17:19:41 +08:00
parent dfabe0d12b
commit 516d525ef4

View File

@@ -87,49 +87,52 @@ class MCBuilder:
self.mc_src = self.mc_src.replace("src[x] * weight->i_scale", "src[x]")
return self
def with_disabled_vectorize_get_ref(self):
"""
Disable auto-vectorization for the get_ref function's loop.
Adds a pragma directive before the main for loop in get_ref.
Returns:
self: Returns the builder instance for method chaining
Example:
>>> builder = MCBuilder()
>>> "#pragma clang loop vectorize(disable)" not in builder.mc_src
True
>>> modified = builder.with_disabled_vectorize_get_ref()
>>> "#pragma clang loop vectorize(disable)" in modified.mc_src
True
>>> target_loop = "for( int x = 0; x < i_width; x++ )"
>>> lines = modified.mc_src.splitlines()
>>> for i, line in enumerate(lines):
... if target_loop in line and i > 0:
... if "#pragma clang loop vectorize(disable)" in lines[i-1]:
... print("Pragma correctly placed")
... break
Pragma correctly placed
"""
pragma_str = "#pragma clang loop vectorize(disable)"
target_loop = "for( int x = 0; x < i_width; x++ )"
# Split the source into lines to process
lines = self.mc_src.splitlines()
modified_lines = []
# Process each line
for line in lines:
# Add pragma before the target loop
if target_loop in line:
modified_lines.append(pragma_str)
modified_lines.append(line)
# Rebuild the source string
self.mc_src = "\n".join(modified_lines)
return self
def build(self):
return self.mc_src
def get_ref_add_pragma(mc_lines):
# Initialize variables
modified_lines = []
pragma_added = False
pragma_str = "#pragma clang loop vectorize(disable)"
# Iterate through each line to find the target for loop
for i, line in enumerate(mc_lines):
# If there are already #pragma string, skip it.
if i > 1 and pragma_str in mc_lines[i - 1]:
continue
if "for( int x = 0; x < i_width; x++ )" in line:
modified_lines.append("#pragma clang loop vectorize(disable)\n")
pragma_added = True
modified_lines.append(line)
return modified_lines, pragma_added
def disable_mc_loop_vectorize(mc_path: Path) -> bool:
"""
Disable x264 mc.c loop vectorizing, mainly for "get_ref" performance.
If needed, update the file located at mc_path, returns true if that file is updated.
"""
# Read the content of mc.c
with open(mc_path, "r") as mc_file:
mc_lines = mc_file.readlines()
mc_pragma_lines, mc_modified = get_ref_add_pragma(mc_lines)
if mc_modified:
with open(mc_path, "w") as mc:
mc.writelines(mc_pragma_lines)
return mc_modified
def update_exe(build_exe, exe_dir):
exe_file = exe_dir / "x264_s_base.mytest-m64"