speccpu/x264.MCBuilder: add with_disabled_vectorize_get_ref
This commit is contained in:
@@ -87,49 +87,52 @@ class MCBuilder:
|
||||
self.mc_src = self.mc_src.replace("src[x] * weight->i_scale", "src[x]")
|
||||
return self
|
||||
|
||||
def with_disabled_vectorize_get_ref(self):
|
||||
"""
|
||||
Disable auto-vectorization for the get_ref function's loop.
|
||||
Adds a pragma directive before the main for loop in get_ref.
|
||||
|
||||
Returns:
|
||||
self: Returns the builder instance for method chaining
|
||||
|
||||
Example:
|
||||
>>> builder = MCBuilder()
|
||||
>>> "#pragma clang loop vectorize(disable)" not in builder.mc_src
|
||||
True
|
||||
>>> modified = builder.with_disabled_vectorize_get_ref()
|
||||
>>> "#pragma clang loop vectorize(disable)" in modified.mc_src
|
||||
True
|
||||
>>> target_loop = "for( int x = 0; x < i_width; x++ )"
|
||||
>>> lines = modified.mc_src.splitlines()
|
||||
>>> for i, line in enumerate(lines):
|
||||
... if target_loop in line and i > 0:
|
||||
... if "#pragma clang loop vectorize(disable)" in lines[i-1]:
|
||||
... print("Pragma correctly placed")
|
||||
... break
|
||||
Pragma correctly placed
|
||||
"""
|
||||
pragma_str = "#pragma clang loop vectorize(disable)"
|
||||
target_loop = "for( int x = 0; x < i_width; x++ )"
|
||||
|
||||
# Split the source into lines to process
|
||||
lines = self.mc_src.splitlines()
|
||||
modified_lines = []
|
||||
|
||||
# Process each line
|
||||
for line in lines:
|
||||
# Add pragma before the target loop
|
||||
if target_loop in line:
|
||||
modified_lines.append(pragma_str)
|
||||
modified_lines.append(line)
|
||||
|
||||
# Rebuild the source string
|
||||
self.mc_src = "\n".join(modified_lines)
|
||||
return self
|
||||
|
||||
def build(self):
|
||||
return self.mc_src
|
||||
|
||||
|
||||
def get_ref_add_pragma(mc_lines):
|
||||
# Initialize variables
|
||||
modified_lines = []
|
||||
pragma_added = False
|
||||
|
||||
pragma_str = "#pragma clang loop vectorize(disable)"
|
||||
|
||||
# Iterate through each line to find the target for loop
|
||||
for i, line in enumerate(mc_lines):
|
||||
# If there are already #pragma string, skip it.
|
||||
if i > 1 and pragma_str in mc_lines[i - 1]:
|
||||
continue
|
||||
if "for( int x = 0; x < i_width; x++ )" in line:
|
||||
modified_lines.append("#pragma clang loop vectorize(disable)\n")
|
||||
pragma_added = True
|
||||
|
||||
modified_lines.append(line)
|
||||
|
||||
return modified_lines, pragma_added
|
||||
|
||||
|
||||
def disable_mc_loop_vectorize(mc_path: Path) -> bool:
|
||||
"""
|
||||
Disable x264 mc.c loop vectorizing, mainly for "get_ref" performance.
|
||||
If needed, update the file located at mc_path, returns true if that file is updated.
|
||||
"""
|
||||
# Read the content of mc.c
|
||||
with open(mc_path, "r") as mc_file:
|
||||
mc_lines = mc_file.readlines()
|
||||
|
||||
mc_pragma_lines, mc_modified = get_ref_add_pragma(mc_lines)
|
||||
|
||||
if mc_modified:
|
||||
with open(mc_path, "w") as mc:
|
||||
mc.writelines(mc_pragma_lines)
|
||||
|
||||
return mc_modified
|
||||
|
||||
|
||||
def update_exe(build_exe, exe_dir):
|
||||
exe_file = exe_dir / "x264_s_base.mytest-m64"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user