speccpu/x264.MCBuilder: add with_disabled_vectorize_get_ref
This commit is contained in:
@@ -87,47 +87,50 @@ class MCBuilder:
|
|||||||
self.mc_src = self.mc_src.replace("src[x] * weight->i_scale", "src[x]")
|
self.mc_src = self.mc_src.replace("src[x] * weight->i_scale", "src[x]")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def build(self):
|
def with_disabled_vectorize_get_ref(self):
|
||||||
return self.mc_src
|
"""
|
||||||
|
Disable auto-vectorization for the get_ref function's loop.
|
||||||
|
Adds a pragma directive before the main for loop in get_ref.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
self: Returns the builder instance for method chaining
|
||||||
|
|
||||||
def get_ref_add_pragma(mc_lines):
|
Example:
|
||||||
# Initialize variables
|
>>> builder = MCBuilder()
|
||||||
modified_lines = []
|
>>> "#pragma clang loop vectorize(disable)" not in builder.mc_src
|
||||||
pragma_added = False
|
True
|
||||||
|
>>> modified = builder.with_disabled_vectorize_get_ref()
|
||||||
|
>>> "#pragma clang loop vectorize(disable)" in modified.mc_src
|
||||||
|
True
|
||||||
|
>>> target_loop = "for( int x = 0; x < i_width; x++ )"
|
||||||
|
>>> lines = modified.mc_src.splitlines()
|
||||||
|
>>> for i, line in enumerate(lines):
|
||||||
|
... if target_loop in line and i > 0:
|
||||||
|
... if "#pragma clang loop vectorize(disable)" in lines[i-1]:
|
||||||
|
... print("Pragma correctly placed")
|
||||||
|
... break
|
||||||
|
Pragma correctly placed
|
||||||
|
"""
|
||||||
pragma_str = "#pragma clang loop vectorize(disable)"
|
pragma_str = "#pragma clang loop vectorize(disable)"
|
||||||
|
target_loop = "for( int x = 0; x < i_width; x++ )"
|
||||||
|
|
||||||
# Iterate through each line to find the target for loop
|
# Split the source into lines to process
|
||||||
for i, line in enumerate(mc_lines):
|
lines = self.mc_src.splitlines()
|
||||||
# If there are already #pragma string, skip it.
|
modified_lines = []
|
||||||
if i > 1 and pragma_str in mc_lines[i - 1]:
|
|
||||||
continue
|
|
||||||
if "for( int x = 0; x < i_width; x++ )" in line:
|
|
||||||
modified_lines.append("#pragma clang loop vectorize(disable)\n")
|
|
||||||
pragma_added = True
|
|
||||||
|
|
||||||
|
# Process each line
|
||||||
|
for line in lines:
|
||||||
|
# Add pragma before the target loop
|
||||||
|
if target_loop in line:
|
||||||
|
modified_lines.append(pragma_str)
|
||||||
modified_lines.append(line)
|
modified_lines.append(line)
|
||||||
|
|
||||||
return modified_lines, pragma_added
|
# Rebuild the source string
|
||||||
|
self.mc_src = "\n".join(modified_lines)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def build(self):
|
||||||
def disable_mc_loop_vectorize(mc_path: Path) -> bool:
|
return self.mc_src
|
||||||
"""
|
|
||||||
Disable x264 mc.c loop vectorizing, mainly for "get_ref" performance.
|
|
||||||
If needed, update the file located at mc_path, returns true if that file is updated.
|
|
||||||
"""
|
|
||||||
# Read the content of mc.c
|
|
||||||
with open(mc_path, "r") as mc_file:
|
|
||||||
mc_lines = mc_file.readlines()
|
|
||||||
|
|
||||||
mc_pragma_lines, mc_modified = get_ref_add_pragma(mc_lines)
|
|
||||||
|
|
||||||
if mc_modified:
|
|
||||||
with open(mc_path, "w") as mc:
|
|
||||||
mc.writelines(mc_pragma_lines)
|
|
||||||
|
|
||||||
return mc_modified
|
|
||||||
|
|
||||||
|
|
||||||
def update_exe(build_exe, exe_dir):
|
def update_exe(build_exe, exe_dir):
|
||||||
|
|||||||
Reference in New Issue
Block a user