From 3930d68b3eb751242bbf0162057c2207319b34fb Mon Sep 17 00:00:00 2001 From: 079035 <62355576+079035@users.noreply.github.com> Date: Sat, 30 Aug 2025 19:36:33 -0400 Subject: [PATCH 1/6] Add Arvo fuzzing infrastructure This adds fuzzing support for Arvo under the experimental contrib directory. --- infra/experimental/contrib/arvo/__init__.py | 0 infra/experimental/contrib/arvo/arvo_data.py | 205 ++++ .../contrib/arvo/arvo_reproducer.py | 1082 +++++++++++++++++ .../contrib/arvo/arvo_reproducer_test.py | 120 ++ infra/experimental/contrib/arvo/arvo_utils.py | 769 ++++++++++++ .../contrib/arvo/component_fixes.json | 40 + .../contrib/arvo/hacks/__init__.py | 102 ++ .../contrib/arvo/hacks/cryptofuzz.py | 22 + infra/experimental/contrib/arvo/hacks/curl.py | 19 + .../contrib/arvo/hacks/dlplibs.py | 17 + .../experimental/contrib/arvo/hacks/duckdb.py | 20 + .../experimental/contrib/arvo/hacks/ffmpeg.py | 17 + infra/experimental/contrib/arvo/hacks/flac.py | 18 + .../contrib/arvo/hacks/freeradius.py | 17 + infra/experimental/contrib/arvo/hacks/gdal.py | 48 + .../contrib/arvo/hacks/ghostscript.py | 22 + .../experimental/contrib/arvo/hacks/gnutls.py | 18 + .../contrib/arvo/hacks/graphicsmagick.py | 25 + .../contrib/arvo/hacks/imagemagick.py | 34 + .../contrib/arvo/hacks/jbig2dec.py | 21 + infra/experimental/contrib/arvo/hacks/lcms.py | 17 + .../contrib/arvo/hacks/libheif.py | 17 + .../contrib/arvo/hacks/libredwg.py | 20 + .../contrib/arvo/hacks/libreoffice.py | 49 + .../contrib/arvo/hacks/libyang.py | 19 + infra/experimental/contrib/arvo/hacks/lwan.py | 17 + .../contrib/arvo/hacks/openh264.py | 30 + .../contrib/arvo/hacks/quickjs.py | 17 + .../contrib/arvo/hacks/radare2.py | 17 + infra/experimental/contrib/arvo/hacks/skia.py | 19 + .../contrib/arvo/hacks/uwebsockets.py | 22 + .../contrib/arvo/hacks/wireshark.py | 16 + .../contrib/arvo/hacks/wolfssl.py | 22 + infra/experimental/contrib/arvo/hacks/yara.py | 19 + .../contrib/arvo/string_replacement.json | 96 ++ 35 files changed, 3013 insertions(+) create mode 100644 infra/experimental/contrib/arvo/__init__.py create mode 100644 infra/experimental/contrib/arvo/arvo_data.py create mode 100644 infra/experimental/contrib/arvo/arvo_reproducer.py create mode 100644 infra/experimental/contrib/arvo/arvo_reproducer_test.py create mode 100644 infra/experimental/contrib/arvo/arvo_utils.py create mode 100644 infra/experimental/contrib/arvo/component_fixes.json create mode 100644 infra/experimental/contrib/arvo/hacks/__init__.py create mode 100644 infra/experimental/contrib/arvo/hacks/cryptofuzz.py create mode 100644 infra/experimental/contrib/arvo/hacks/curl.py create mode 100644 infra/experimental/contrib/arvo/hacks/dlplibs.py create mode 100644 infra/experimental/contrib/arvo/hacks/duckdb.py create mode 100644 infra/experimental/contrib/arvo/hacks/ffmpeg.py create mode 100644 infra/experimental/contrib/arvo/hacks/flac.py create mode 100644 infra/experimental/contrib/arvo/hacks/freeradius.py create mode 100644 infra/experimental/contrib/arvo/hacks/gdal.py create mode 100644 infra/experimental/contrib/arvo/hacks/ghostscript.py create mode 100644 infra/experimental/contrib/arvo/hacks/gnutls.py create mode 100644 infra/experimental/contrib/arvo/hacks/graphicsmagick.py create mode 100644 infra/experimental/contrib/arvo/hacks/imagemagick.py create mode 100644 infra/experimental/contrib/arvo/hacks/jbig2dec.py create mode 100644 infra/experimental/contrib/arvo/hacks/lcms.py create mode 100644 infra/experimental/contrib/arvo/hacks/libheif.py create mode 100644 infra/experimental/contrib/arvo/hacks/libredwg.py create mode 100644 infra/experimental/contrib/arvo/hacks/libreoffice.py create mode 100644 infra/experimental/contrib/arvo/hacks/libyang.py create mode 100644 infra/experimental/contrib/arvo/hacks/lwan.py create mode 100644 infra/experimental/contrib/arvo/hacks/openh264.py create mode 100644 infra/experimental/contrib/arvo/hacks/quickjs.py create mode 100644 infra/experimental/contrib/arvo/hacks/radare2.py create mode 100644 infra/experimental/contrib/arvo/hacks/skia.py create mode 100644 infra/experimental/contrib/arvo/hacks/uwebsockets.py create mode 100644 infra/experimental/contrib/arvo/hacks/wireshark.py create mode 100644 infra/experimental/contrib/arvo/hacks/wolfssl.py create mode 100644 infra/experimental/contrib/arvo/hacks/yara.py create mode 100644 infra/experimental/contrib/arvo/string_replacement.json diff --git a/infra/experimental/contrib/arvo/__init__.py b/infra/experimental/contrib/arvo/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/infra/experimental/contrib/arvo/arvo_data.py b/infra/experimental/contrib/arvo/arvo_data.py new file mode 100644 index 000000000000..cce9d8b267fc --- /dev/null +++ b/infra/experimental/contrib/arvo/arvo_data.py @@ -0,0 +1,205 @@ +"""ARVO data management module. + +This module provides data management functions for ARVO reproducer, +including configuration mappings and Docker/build script fixes. +""" + +from pathlib import Path +from typing import Any, Dict, Tuple +from datetime import datetime +from hacks import get_project_hack + +from arvo_utils import (DockerfileModifier, CHANGED_KEY, CHANGED_TYPE, + GLOBAL_STR_REPLACE, UPDATE_TABLE) + + +def update_resource_info(item_name: str, item_url: str, + item_type: str) -> Tuple[str, str, str]: + """Update resource information based on configuration tables. + + Args: + item_name: Name of the resource item. + item_url: URL of the resource. + item_type: Type of the resource. + + Returns: + Tuple of (updated_name, updated_url, updated_type). + """ + if item_name in CHANGED_KEY: + item_name = CHANGED_KEY[item_name] + + if item_name in UPDATE_TABLE: + resource_type = CHANGED_TYPE.get(item_name, 'git') + return item_name, UPDATE_TABLE[item_name], resource_type + else: + return item_name, item_url, item_type + + +def dockerfile_cleaner(dockerfile_path: str | Path) -> None: + """Clean dockerfile by removing git branch-specific arguments. + + Args: + dockerfile_path: Path to the Dockerfile to clean. + """ + dft = DockerfileModifier(dockerfile_path) + dft.replace(r'(--single-branch\s+)', "") # --single-branch + dft.replace(r'(--branch\s+\S+\s+|-b\s\S+\s+|--branch=\S+\s+)', + "") # remove --branch or -b + dft.flush() + + +def fix_dockerfile(dockerfile_path: str | Path, + project: str | None = None, + commit_date: datetime | None = None) -> bool: + """Fix the dockerfile for specific projects and general issues. + + Args: + dockerfile_path: Path to the Dockerfile to fix. + project: Name of the project for project-specific fixes. + commit_date: Target commit date (required for some projects like GDAL). + + Returns: + True if fixes were applied successfully, False otherwise. + """ + + dockerfile_cleaner(dockerfile_path) + dft = DockerfileModifier(dockerfile_path) + + # Some dockerfile forgets to apt update before apt install + # and we have to install/set ca-certificate/git sslVerify to avoid + # certificates issues + # TODO: improve regex + dft.replace_once( + r'RUN apt', "RUN apt update -y && apt install git ca-certificates -y && " + "git config --global http.sslVerify false && " + "git config --global --add safe.directory '*'\nRUN apt") + dft.str_replace_all(GLOBAL_STR_REPLACE) + + # Apply project-specific hacks that solve building/compiling problems + if project: + hack = get_project_hack(project) + if hack: + # Pass commit_date to the hack if it needs it + if hasattr(hack, 'set_commit_date') and commit_date: + hack.set_commit_date(commit_date) + success = hack.apply_dockerfile_fixes(dft) + if not success: + return False + + dft.clean_comments() + return dft.flush() + + +def fix_build_script(file_path: Path, project_name: str) -> bool: + """Fix the build script for specific projects. + + Args: + file_path: Path to the build script file. + project_name: Name of the project. + + Returns: + True if fixes were applied successfully, False otherwise. + """ + if not file_path.exists(): + return True + + dft = DockerfileModifier(file_path) + + # Apply project-specific build script hacks + hack = get_project_hack(project_name) + if hack: + success = hack.apply_build_script_fixes(dft) + if not success: + return False + + return dft.flush() + + +def extra_scripts(project_name: str, source_dir: Path) -> bool: + """Execute extra scripts for specific projects. + + This function allows us to modify build.sh scripts and other stuff + to modify the compiling setting. + + Args: + project_name: Name of the project. + source_dir: Path to the source directory. + + Returns: + True if scripts executed successfully, False otherwise. + """ + # Apply project-specific extra fixes + hack = get_project_hack(project_name) + if hack: + success = hack.apply_extra_fixes(source_dir) + if not success: + return False + + return True + + +def special_component(project_name: str, item_key: str, item: Dict[str, Any], + dockerfile: str | Path) -> bool: + """Check if a component requires special handling. + + TODO: Theoretically, we can remove this func since other parts gonna + handle the submodule, but not tested. + These components are submodules, but their info are in srcmap. + + Args: + project_name: Name of the project. + item_key: Key of the item in srcmap. + item: Item data from srcmap. + dockerfile: Path to the dockerfile. + + Returns: + True if component should be skipped, False otherwise. + """ + # These components are submodules, but their info are in srcmap + if project_name == 'libressl' and item_key == '/src/libressl/openbsd': + return False + + if project_name == 'gnutls' and item_key == '/src/gnutls/nettle': + # Just Ignore since we have submodule update --init + with open(dockerfile, encoding='utf-8') as f: + dt = f.read() + if item['rev'] not in dt: + return True + else: + return False + + return False + + +def skip_component(project_name: str, item_name: str) -> bool: + """Check if a component should be skipped during processing. + + TODO: solve the submodule problem in a decent way + + Args: + project_name: Name of the project. + item_name: Name of the item/component. + + Returns: + True if component should be skipped, False otherwise. + """ + NO_OPERATION = ( + "/src", + "/src/LPM/external.protobuf/src/external.protobuf", + "/src/libprotobuf-mutator/build/external.protobuf/src/external.protobuf", + ) + item_name = item_name.strip(" ") + + # Special for skia, Skip since they are done by submodule init + if project_name in ['skia', 'skia-ftz']: + if item_name.startswith("/src/skia/"): + return True + + if item_name in NO_OPERATION: + return True + + return False + + +if __name__ == "__main__": + pass diff --git a/infra/experimental/contrib/arvo/arvo_reproducer.py b/infra/experimental/contrib/arvo/arvo_reproducer.py new file mode 100644 index 000000000000..393f5ffa17c8 --- /dev/null +++ b/infra/experimental/contrib/arvo/arvo_reproducer.py @@ -0,0 +1,1082 @@ +# ARVO reproducer +# Paper: https://arxiv.org/abs/2408.02153 +# ARVO Implementation: https://github.com/n132/ARVO +# Neil — May 5, 2025 — Seattle, USA +# Jordi — July 30, 2025 +"""ARVO reproducer module. + +This module reproduces a vulnerability and its fix on OSS-Fuzz. +Login gcloud: + $ gcloud auth application-default login + +Classes: + BuildData: Named tuple for build configuration data. + +Functions: + Main reproducing functions and utilities for OSS-Fuzz + vulnerability reproduction. +""" + +import argparse +import json +import logging +import os +import re +import subprocess +import tempfile +import time +from bisect import bisect_right +from datetime import datetime +from pathlib import Path +from typing import Any +from urllib.parse import parse_qs, urlparse + +import requests +from dateutil.parser import parse +from google.cloud import storage +from dataclasses import dataclass + +from arvo_data import (extra_scripts, fix_build_script, fix_dockerfile, + skip_component, special_component, update_resource_info) +from arvo_utils import (DockerfileModifier, VersionControlTool, check_call, + clean_dir, clone, docker_build, docker_run, execute, + hg_clone, leave_ret, svn_clone, OSS_ERR, OSS_OUT, + OSS_WORK, PNAME_TABLE) + +# Global storage client +storage_client: storage.Client | None = None + + +@dataclass +class BuildData: + project_name: str + engine: str + sanitizer: str + architecture: str + + +def parse_oss_fuzz_report(report_text: bytes, + local_id: int) -> dict[str, Any] | bool: + """Parse OSS-Fuzz report text and extract relevant information. + + Args: + report_text: Raw report text as bytes. + local_id: Local ID of the issue. + + Returns: + Dictionary containing parsed report data, or False if parsing fails. + """ + text = report_text.decode( + 'unicode_escape', errors='ignore') # decode escaped unicode like \u003d + + def extract(pattern: str, default: str = '') -> str: + """Extract information using regex pattern.""" + match = re.search(pattern, text) + if not match: + if default == '': + logging.error(f"FAILED to PARSE {pattern} {local_id=}") + exit(1) + else: + return default + return match.group(1).strip() + + result = { + "project": + extract(r'(?:Target|Project):\s*(\S+)', 'NOTFOUND'), + "job_type": + extract(r'Job Type:\s*(\S+)'), + "platform": + extract(r'Platform Id:\s*(\S+)', 'linux'), + "crash_type": + extract(r'Crash Type:\s*(.+)'), + "crash_address": + extract(r'Crash Address:\s*(\S+)'), + "severity": + extract(r'Security Severity:\s*(\w+)', 'Medium'), + "regressed": + extract(r'(?:Regressed|Crash Revision):\s*(https?://\S+)', + "NO_REGRESS"), + "reproducer": + extract(r'(?:Minimized Testcase|Reproducer Testcase|Download).*:' + r'\s*(https?://\S+)'), + "verified_fixed": + extract(r'(?:fixed in|Fixed:)\s*(https?://\S+revisions\S+)', + 'NO_FIX'), + "localId": + local_id + } + + sanitizer_map = { + "address (ASAN)": "address", + "memory (MSAN)": "memory", + "undefined (UBSAN)": "undefined", + "asan": "address", + "msan": "memory", + "ubsan": "undefined", + } + + fuzz_target = extract(r'(?:Fuzz Target|Fuzz target binary):\s*(\S+)', + 'NOTFOUND') + + if len(result['job_type'].split("_")) == 2: + return False + else: + result['sanitizer'] = sanitizer_map[result['job_type'].split("_")[1]] + + if fuzz_target != 'NOTFOUND': + result['fuzz_target'] = fuzz_target + if result['project'] == "NOTFOUND": + result['project'] = result['job_type'].split("_")[-1] + + return result + + +def fetch_issue(local_id: int | str) -> dict[str, Any] | bool: + """Fetch issue information from OSS-Fuzz tracker. + + Args: + local_id: Local ID of the issue to fetch. + + Returns: + Dictionary containing issue information, or False if fetch fails. + """ + # TODO: Replace this with proper issue tracker API calls + url = (f'https://issues.oss-fuzz.com/action/issues/{local_id}/' + f'events?currentTrackerId=391') + session = requests.Session() + + # Step 1: Get the token from the cookie + session.get("https://issues.oss-fuzz.com/") + xsrf_token = session.cookies.get("XSRF_TOKEN") + + headers = { + 'accept': + 'application/json, text/plain, */*', + 'accept-language': + 'en,zh-CN;q=0.9,zh;q=0.8,ar;q=0.7', + 'priority': + 'u=1, i', + 'referer': + 'https://issues.oss-fuzz.com/', + 'sec-ch-ua': + '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"', + 'sec-ch-ua-mobile': + '?0', + 'sec-ch-ua-platform': + '"Linux"', + 'sec-fetch-dest': + 'empty', + 'sec-fetch-mode': + 'cors', + 'sec-fetch-site': + 'same-origin', + 'user-agent': + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' + '(KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', + 'X-XSRF-Token': + xsrf_token + } + + response = session.get(url, headers=headers) + raw_text = response.content + + try: + result = parse_oss_fuzz_report(raw_text, int(local_id)) + except Exception: + logging.error(f"FAIL on {local_id}, skip") + return False + + return result + + +def parse_job_type(job_type: str) -> dict[str, Any]: + """Parse job type string into components. + + Args: + job_type: Job type string from OSS-Fuzz. + + Returns: + Dictionary containing parsed job type components. + """ + parts = job_type.split('_') + remainder = [] + parsed = {} + + while len(parts) > 0: + part = parts.pop(0) + if part in ['afl', 'honggfuzz', 'libfuzzer']: + parsed['engine'] = part + elif part in ['asan', 'ubsan', 'msan']: + parsed['sanitizer'] = part + elif part == 'i386': + parsed['arch'] = part + elif part == 'untrusted': + parsed['untrusted'] = True + else: + remainder.append(part) + + if len(remainder) > 0: + parsed['project'] = '_'.join(remainder) + if 'arch' not in parsed: + parsed['arch'] = 'x86_64' + if 'engine' not in parsed: + parsed['engine'] = 'none' + if 'untrusted' not in parsed: + parsed['untrusted'] = False + + return parsed + + +def download_build_artifacts(metadata: dict[str, Any], url: str, + outdir: Path) -> list[str] | bool: + """Download build artifacts from Google Cloud Storage. + + Args: + metadata: Issue metadata containing build information. + url: URL to download artifacts from. + outdir: Output directory for downloaded files. + + Returns: + List of downloaded file paths on success, False on failure. + """ + global storage_client + if storage_client is None: + storage_client = storage.Client() + + bucket_map = { + "libfuzzer_address_i386": "clusterfuzz-builds-i386", + "libfuzzer_memory_i386": "clusterfuzz-builds-i386", + "libfuzzer_undefined_i386": "clusterfuzz-builds-i386", + "libfuzzer_address": "clusterfuzz-builds", + "libfuzzer_memory": "clusterfuzz-builds", + "libfuzzer_undefined": "clusterfuzz-builds", + "afl_address": "clusterfuzz-builds-afl", + "honggfuzz_address": "clusterfuzz-builds-honggfuzz", + } + + sanitizer_map = { + "address (ASAN)": "address", + "memory (MSAN)": "memory", + "undefined (UBSAN)": "undefined", + "asan": "address", + "msan": "memory", + "ubsan": "undefined", + "address": "address", + "memory": "memory", + "undefined": "undefined", + None: "", + } + + job_name = metadata["job_type"] + job = parse_job_type(job_name) + + # These don't have any build artifacts + if job['untrusted'] or job['engine'] == 'none': + return False + + # Prefer the info from the job name, since the metadata + # format has changed several times. + if 'project' in metadata: + project = metadata["project"] + else: + project = job['project'] + + if 'sanitizer' in metadata: + sanitizer = sanitizer_map[metadata["sanitizer"]] + assert sanitizer == sanitizer_map[job['sanitizer']] + else: + sanitizer = sanitizer_map[job['sanitizer']] + + fuzzer = job['engine'] + bucket_string = f"{fuzzer}_{sanitizer}" + if job['arch'] == 'i386': + bucket_string += '_i386' + + assert bucket_string in bucket_map + bucket_name = bucket_map[bucket_string] + + # Grab the revision from the URL + urlparams = parse_qs(urlparse(url).query) + + if 'revision' in urlparams: + revision = urlparams['revision'][0] + elif 'range' in urlparams: + revision = urlparams['range'][0].split(':')[1] + else: + return False + + zip_name = f'{project}-{sanitizer}-{revision}.zip' + srcmap_name = f'{project}-{sanitizer}-{revision}.srcmap.json' + zip_path = f'{project}/{zip_name}' + srcmap_path = f'{project}/{srcmap_name}' + downloaded_files = [] + bucket = storage_client.bucket(bucket_name) + + for path, name in [(srcmap_path, srcmap_name)]: + download_path = outdir / name + + if download_path.exists(): + logging.info(f'Skipping {name} (already exists)') + downloaded_files.append(download_path) + continue + + blob = bucket.blob(path) + if not blob.exists(): + logging.info(f'Skipping {name} (not found)') + continue + + blob.download_to_filename(str(download_path)) + logging.info(f'Downloaded {name}') + downloaded_files.append(download_path) + + return [str(f) for f in downloaded_files] + + +def get_project_name(issue: dict[str, Any], srcmap: str | Path) -> str | bool: + """Get project name from issue and srcmap data. + + Args: + issue: Issue dictionary containing project information. + srcmap: Path to the srcmap file. + + Returns: + Project name on success, False on failure. + """ + if 'project' not in issue: + logging.error("[FAILED] to get project field in issue") + return False + else: + project_name = issue['project'] + + if project_name in PNAME_TABLE: + return PNAME_TABLE[project_name] # handling special cases + + with open(srcmap, encoding='utf-8') as f: + info1 = json.load(f) + + expected_name = "/src/" + project_name + if expected_name in info1: + return project_name + else: + logging.error( + f"Failed to locate the main component, plz add that to pname_table") + return False + + +def get_language(project_dir: Path) -> str | bool: + """Get programming language from project.yaml file. + + Args: + project_dir: Path to the project directory. + + Returns: + Language string on success, False on failure. + """ + project_yaml = project_dir / "project.yaml" + if not project_yaml.exists(): + return False + + with open(project_yaml, encoding='utf-8') as f: + content = f.read() + + matches = re.findall(r'language\s*:\s*([^\s]+)', content) + if len(matches) != 1: + logging.error(f"[!] Get more than one languages") + return False + + return str(matches[0]) + + +def get_sanitizer(fuzzer_sanitizer: str) -> str | bool: + """Convert fuzzer sanitizer short name to full name. + + Args: + fuzzer_sanitizer: Short sanitizer name (asan, msan, ubsan). + + Returns: + Full sanitizer name on success, False on failure. + """ + sanitizer_map = {'asan': "address", 'msan': 'memory', 'ubsan': 'undefined'} + + return sanitizer_map.get(fuzzer_sanitizer, False) + + +def download_poc(issue: dict[str, Any], path: Path, name: str) -> Path | bool: + """Download proof-of-concept file from issue. + + Args: + issue: Issue dictionary containing reproducer URL. + path: Directory to save the POC file. + name: Name for the downloaded file. + + Returns: + Path to downloaded file on success, False on failure. + """ + session = requests.Session() + url = issue['reproducer'] + response = session.head(url, allow_redirects=True) + + if response.status_code != 200: + return False + + reproducer_path = path / name + response = session.get(url) + + if response.status_code != 200: + return False + + reproducer_path.write_bytes(response.content) + return reproducer_path + + +def prepare_ossfuzz(project_name: str, + commit_date: str | datetime) -> tuple[Path, Path] | bool: + """Prepare OSS-Fuzz repository for the specified project and date. + + Args: + project_name: Name of the project. + commit_date: Target commit date or commit hash. + + Returns: + Tuple of (temp_dir, project_dir) on success, False on failure. + """ + # 1. Clone OSS Fuzz + tmp_dir = clone("https://github.com/google/oss-fuzz.git", name="oss-fuzz") + if tmp_dir is False: + return False + + # 2. Get the Commit Close to Commit_Date + tmp_oss_fuzz_dir = tmp_dir / "oss-fuzz" + + if isinstance(commit_date, str): + oss_fuzz_commit = commit_date + else: + # Remove the cmd variable and use the list directly + result = execute([ + 'git', 'log', '--before=' + commit_date.isoformat(), '-n1', + '--format=%H' + ], tmp_oss_fuzz_dir) + if result.success and result.output: + oss_fuzz_commit = result.output.strip() + else: + oss_fuzz_commit = False + + if oss_fuzz_commit is False: + cmd = ['git', 'log', '--reverse', '--format=%H'] + result = execute(cmd, tmp_oss_fuzz_dir) + if result.success and result.output: + oss_fuzz_commit = result.output.splitlines()[0].strip() + else: + oss_fuzz_commit = False + + if oss_fuzz_commit is False: + logging.error('Failed to get oldest oss-fuzz commit') + return leave_ret(False, tmp_dir) + + # 3. Reset OSS Fuzz + gt = VersionControlTool(tmp_oss_fuzz_dir) + if not gt.reset(oss_fuzz_commit): + logging.error("Failed to Reset OSS-Fuzz") + return leave_ret(False, tmp_dir) + + # 4. Locate Project Dir + tmp_list = [x for x in tmp_oss_fuzz_dir.iterdir() if x.is_dir()] + if tmp_oss_fuzz_dir / "projects" in tmp_list: + proj_dir = tmp_oss_fuzz_dir / "projects" / project_name + elif tmp_oss_fuzz_dir / "targets" in tmp_list: + proj_dir = tmp_oss_fuzz_dir / "targets" / project_name + else: + logging.error(f"Failed to locate the project({project_name}) in oss-fuzz") + return leave_ret(False, tmp_dir) + + return (tmp_dir, proj_dir) + + +def rebase_dockerfile(dockerfile_path: str | Path, commit_date: str) -> bool: + """Rebase dockerfile to use historical base image. + + Args: + dockerfile_path: Path to the Dockerfile to rebase. + commit_date: Target commit date for base image. + + Returns: + True if rebase succeeded, False otherwise. + """ + + def _get_base(date: str, + repo: str = "gcr.io/oss-fuzz-base/base-builder") -> str: + """Get base image hash for the specified date.""" + cache_name = repo.split("/")[-1] + cache_file = f"/tmp/{cache_name}_cache.json" + cache_ttl = 86400 # 24 hours + result_json = [] + + if os.path.exists(cache_file) and ( + time.time() - os.path.getmtime(cache_file)) < cache_ttl: + with open(cache_file, 'r', encoding='utf-8') as f: + result_json = json.load(f) + else: + cmd = [ + "gcloud", "container", "images", "list-tags", repo, "--format=json", + "--sort-by=timestamp" + ] + result = execute(cmd) + if result.success and result.output: + result_json = json.loads(result.output) + with open(cache_file, 'w', encoding='utf-8') as f: + f.write(json.dumps(result_json, indent=4)) + else: + return "" + + timestamps = [] + for item in result_json: + timestamps.append(int(parse(item['timestamp']['datetime']).timestamp())) + + target_ts = int(parse(date).timestamp()) + return result_json[bisect_right(timestamps, target_ts - 1) - + 1]['digest'].split(":")[1] + + # Load the Dockerfile + try: + with open(dockerfile_path, encoding='utf-8') as f: + data = f.read() + except IOError: + logging.error(f"No such a dockerfile: {dockerfile_path}") + return False + + # Locate the Repo + match = re.search(r'FROM .*', data) + if match is None: + logging.error("Failed to get the base-image: {dockerfile_path}") + return False + else: + repo = match[0][5:] + + if "@sha256" in repo: + repo = repo.split("@sha256")[0] + if repo == 'ossfuzz/base-builder' or repo == 'ossfuzz/base-libfuzzer': + repo = "gcr.io/oss-fuzz-base/base-builder" + if ":" in repo: + repo = repo.split(":")[0] + + image_hash = _get_base(commit_date, repo) + + # We insert update since some old dockerfile doesn't have that line + data = re.sub( + r"FROM .*", + f"FROM {repo}@sha256:" + image_hash + "\nRUN apt-get update -y\n", data) + + with open(dockerfile_path, 'w', encoding='utf-8') as f: + f.write(data) + + return True + + +def update_revision_info(dockerfile: str | Path, src_path: str, + item: dict[str, Any], commit_date: datetime | Path, + approximate: str) -> bool: + """Update revision information in dockerfile. + + Args: + dockerfile: Path to the dockerfile. + src_path: Source path in the dockerfile. + item: Item information containing URL, revision, and type. + commit_date: Target commit date or path for replacement mode. + approximate: Approximation direction ('+' or '-'). + + Returns: + True if update succeeded, False otherwise. + """ + item_url = item['url'] + item_rev = item['rev'] + item_type = item['type'] + dft = DockerfileModifier(dockerfile) + + if item_url.startswith("http:"): + keyword = item_url[4:] + elif item_url.startswith("https:"): + keyword = item_url[5:] + else: + keyword = item_url + + hits, line_count = dft.get_line(keyword) + # mismatch + if len(hits) != 1: + return False + + line = hits[0] + if item_type == 'git': + pattern = re.compile(rf"{item_type}\s+clone") + elif item_type == 'hg': + pattern = re.compile(rf"{item_type}\s+clone") + elif item_type == 'svn': + pattern = re.compile(rf"RUN\s+svn\s+(co|checkout)+") + else: + logging.error("NOT supported protocol") + return False + + if len(pattern.findall(line)) != 1: # mismatch + return False + + if isinstance(commit_date, Path): + rep_path = commit_date + # Replace mode: for bisection + # Replace the original line with ADD/COPY command + # Then RUN init/update the submodule + dft.replace_line_at(line_count - 1, f"ADD {rep_path.name} {src_path}") + dft.insert_line_at( + line_count, f"RUN bash -cx 'pushd {src_path} ;(git submodule init && " + f"git submodule update --force) ;popd'") + dft.flush() + return True + else: + # Insertion Mode + if item_type == "git": + if approximate == '-': + dft.insert_line_at( + line_count, f"RUN bash -cx 'pushd {src_path} ; " + f"(git reset --hard {item_rev}) || " + f"(commit=$(git log --before='{commit_date.isoformat()}' " + f"--format='%H' -n1) && " + f"git reset --hard $commit || exit 99) ; " + f"(git submodule init && git submodule update --force) ;popd'") + else: + dft.insert_line_at( + line_count, f"RUN bash -cx 'pushd {src_path} ; " + f"(git reset --hard {item_rev}) || " + f"(commit=$(git log --since='{commit_date.isoformat()}' " + f"--format='%H' --reverse | head -n1) && " + f"git reset --hard $commit || exit 99) ; " + f"(git submodule init && git submodule update --force) ;popd'") + elif item_type == 'hg': + # TODO: support approximate + dft.insert_line_at( + line_count, f'RUN bash -cx "pushd {src_path} ; ' + f'(hg update --clean -r {item_rev} && ' + f'hg purge --config extensions.purge=)|| exit 99 ; popd"') + elif item_type == "svn": + # TODO: support approximate + dft.replace(pattern, f"RUN svn checkout -r {item_rev}") + else: + logging.error("Failed to support {item_type}") + return False + + dft.flush() + return True + + +def build_fuzzers_impl(local_id: int | str, + project_dir: Path, + engine: str, + sanitizer: str, + architecture: str, + source_path: Path | None, + mount_path: Path | None = None, + no_dump: bool = False, + custom_script: list[str] | None = None) -> bool: + """Build fuzzers using Docker. + + Args: + local_id: Local ID for logging and output directories. + project_dir: Path to the project directory. + engine: Fuzzing engine to use. + sanitizer: Sanitizer to use. + architecture: Target architecture. + source_path: Path to source code. + mount_path: Mount path for source code in container. + no_dump: Whether to suppress log output. + custom_script: Additional custom script commands. + + Returns: + True if build succeeded, False otherwise. + """ + if custom_script is None: + custom_script = [] + + # Set the LogFile + log_file = OSS_ERR / f"{local_id}_Image.log" + logging.info(f"Check the output in file: {log_file}") + + # Clean The WORK/OUT DIR + project_out = OSS_OUT / f"{local_id}_OUT" + project_work = OSS_WORK / f"{local_id}_WORK" + + if project_out.exists(): + check_call(["sudo", "rm", "-rf", str(project_out)]) + if project_work.exists(): + check_call(["sudo", "rm", "-rf", str(project_work)]) + + project_out.mkdir() + project_work.mkdir() + + args = [ + '-t', f'gcr.io/oss-fuzz/{local_id}', '--file', + str(project_dir / "Dockerfile"), + str(project_dir) + ] + + if not docker_build(args, log_file=log_file): + logging.error(f"Failed to build DockerImage") + return False + + # Build Succeed, Try Compiling + if log_file and log_file.exists(): + os.remove(str(log_file)) + + env = [ + 'FUZZING_ENGINE=' + engine, + 'SANITIZER=' + sanitizer, + 'ARCHITECTURE=' + architecture, + 'FUZZING_LANGUAGE=' + str(get_language(project_dir)), + ] + + command = sum([['-e', x] for x in env], []) + + # Mount the Source/Dependencies (we try to replace this with + # modifying dockerfile) + if source_path and mount_path: + for item in source_path.iterdir(): + command += ['-v', f'{item}:{mount_path / item.name}'] + + # Mount out/work dir + command += [ + '-v', f'{project_out}:/out', '-v', f'{project_work}:/work', '-t', + f'gcr.io/oss-fuzz/{local_id}' + ] + + # supports for submodule tracker + command += custom_script + + if not no_dump: + log_file = OSS_ERR / f"{local_id}_Compile.log" + logging.info(f"Check the output in file: {str(log_file)}") + else: + log_file = None + + result = docker_run(command, log_file=log_file) + if not result: + logging.error('Failed to Build Targets') + return False + else: + if log_file and log_file.exists() and str(log_file) != "/dev/null": + os.remove(str(log_file)) + + logging.info(f"OUT: {project_out}") + return True + + +def build_fuzzer_with_source(local_id: int | str, project_name: str, + srcmap: str | Path, sanitizer: str, engine: str, + arch: str, commit_date: datetime, + issue: dict[str, Any], tag: str) -> bool: + """Build fuzzer with source code from srcmap. + + Args: + local_id: Local ID for the build. + project_name: Name of the project. + srcmap: Path to the srcmap file. + sanitizer: Sanitizer to use. + engine: Fuzzing engine. + arch: Target architecture. + commit_date: Target commit date. + issue: Issue information. + tag: Build tag ('fix' or 'vul'). + + Returns: + True if build succeeded, False otherwise. + """ + # Build source_dir + + with open(srcmap, encoding='utf-8') as f: + srcmap_items = json.loads(f.read()) + + if ("/src" in srcmap_items and + srcmap_items['/src']['url'] == 'https://github.com/google/oss-fuzz.git'): + result = prepare_ossfuzz(project_name, srcmap_items['/src']['rev']) + else: + result = prepare_ossfuzz(project_name, commit_date) + + if not result: + return False + else: + tmp_dir, project_dir = result + + dockerfile = project_dir / 'Dockerfile' + logging.info(f"dockerfile: {dockerfile}") + + build_data = BuildData(sanitizer=sanitizer, + architecture=arch, + engine=engine, + project_name=project_name) + + # Step ZERO: Rebase Dockerfiles + if not rebase_dockerfile(dockerfile, str(commit_date).replace(" ", "-")): + logging.error( + f"build_fuzzer_with_source: Failed to Rebase Dockerfile, {local_id}") + return leave_ret(False, tmp_dir) + + # Step ONE: Fix Dockerfiles + if not fix_dockerfile(dockerfile, project_name, commit_date): + logging.error( + f"build_fuzzer_with_source: Failed to Fix Dockerfile, {local_id}") + return leave_ret(False, tmp_dir) + + # Step TWO: Prepare Dependencies + with open(srcmap, encoding='utf-8') as f: + data = json.loads(f.read()) + + source_dir = Path(tempfile.mkdtemp()) + src = source_dir / "src" + src.mkdir(parents=True, exist_ok=True) + docker_volume = [] + unsorted = list(data.keys()) + sorted_keys = sorted(unsorted, key=len) + main_component = get_project_name(issue, srcmap) + + if main_component is False: + return leave_ret(False, tmp_dir) + + force_no_err_dump = "/src/xz" in sorted_keys + + # Handle Srcmap Info + for item_key in sorted_keys: + # logging.info(f"Prepare Dependency: {x}") + if skip_component(project_name, item_key): + continue + + if tag == 'fix' and main_component == item_key: + approximate = '+' + else: + approximate = '-' + + new_data = {} + new_data['rev'] = data[item_key]['rev'] + new_key, new_data['url'], new_data['type'] = update_resource_info( + item_key, data[item_key]['url'], data[item_key]['type']) + + del data[item_key] + data[new_key] = new_data + + item_name = new_key + item_url = data[new_key]['url'] + item_type = data[new_key]['type'] + item_rev = data[new_key]['rev'] + item_name = "/".join(item_name.split("/")[2:]) + + if special_component(project_name, new_key, data[new_key], dockerfile): + continue + + if (item_name == 'aflplusplus' and + item_url == 'https://github.com/AFLplusplus/AFLplusplus.git'): + continue + + if (item_name == 'libfuzzer' and + 'llvm.org/svn/llvm-project/compiler-rt/trunk/lib/fuzzer' in item_url): + continue + + # Broken Revision + if item_rev == "" or item_rev == "UNKNOWN": + logging.error(f"Broken Meta: No Revision Provided") + return leave_ret(False, [tmp_dir, source_dir]) + + # Ignore not named dependencies if it's not main + if item_name.strip(" ") == "" and len(data.keys()) == 1: + logging.error(f"Broken Meta: Found Not Named Dep") + return leave_ret(False, [tmp_dir, source_dir]) + + # Broken type + if item_type not in ['git', 'svn', 'hg']: + logging.error(f"Broken Meta: No support for {item_type}") + return leave_ret(False, [tmp_dir, source_dir]) + + # Try to perform checkout in dockerfile, + # which could make reproducing more reliable + if update_revision_info(dockerfile, new_key, data[new_key], commit_date, + approximate): + continue + + # Prepare the dependencies and record them. We'll use -v to mount them + # to the docker container + if item_type == 'git': + clone_result = clone(item_url, + item_rev, + src, + item_name, + commit_date=commit_date) + + if clone_result is False: + logging.error(f"[!] build_from_srcmap: Failed to clone & checkout " + f"[{local_id}]: {item_name}") + return leave_ret(False, [tmp_dir, source_dir]) + elif clone_result is None: + command = (f'git log --before="{commit_date.isoformat()}" ' + f'-n 1 --format="%H"') + result = subprocess.run(command, + stdout=subprocess.PIPE, + text=True, + shell=True, + cwd=src / item_name) + commit_hash = result.stdout.strip() + if not check_call(['git', "reset", '--hard', commit_hash], + cwd=src / item_name): + logging.error(f"[!] build_from_srcmap: Failed to clone & checkout " + f"[{local_id}]: {item_name}") + return leave_ret(False, [tmp_dir, source_dir]) + + docker_volume.append(new_key) + + elif item_type == 'svn': + if not svn_clone(item_url, item_rev, src, item_name): + logging.error( + f"[!] build_from_srcmap/svn: Failed clone & checkout: {item_name}") + return leave_ret(False, [tmp_dir, source_dir]) + docker_volume.append(new_key) + + elif item_type == 'hg': + if not hg_clone(item_url, item_rev, src, item_name): + logging.error( + f"[!] build_from_srcmap/hg: Failed clone & checkout: {item_name}") + return leave_ret(False, [tmp_dir, source_dir]) + docker_volume.append(new_key) + else: + logging.error(f"Failed to support {item_type}") + exit(1) + + # Step Three: Extra Scripts + if not extra_scripts(project_name, source_dir): + logging.error(f"Failed to Run ExtraScripts, {local_id}") + return leave_ret(False, [tmp_dir, source_dir]) + + if not fix_build_script(project_dir / "build.sh", project_name): + logging.error(f"Failed to Fix Build.sh, {local_id}") + return leave_ret(False, [tmp_dir, source_dir]) + + # Let's Build It + result = build_fuzzers_impl(local_id, + project_dir=project_dir, + engine=build_data.engine, + sanitizer=build_data.sanitizer, + architecture=build_data.architecture, + source_path=source_dir / "src", + mount_path=Path("/src"), + no_dump=force_no_err_dump) + + # we need sudo since the docker container root touched the folder + check_call(["sudo", "rm", "-rf", str(source_dir)]) + return leave_ret(result, tmp_dir) + + +def build_from_srcmap(srcmap: Path, issue: dict[str, Any], tag: str) -> bool: + """Build fuzzer from srcmap file. + + Args: + srcmap: Path to the srcmap file. + issue: Issue dictionary. + tag: Build tag ('fix' or 'vul'). + + Returns: + True if build succeeded, False otherwise. + """ + # Get Basic Information + fuzzer_info = issue['job_type'].split("_") + engine = fuzzer_info[0] + sanitizer = get_sanitizer(fuzzer_info[1]) + arch = 'i386' if fuzzer_info[2] == 'i386' else 'x86_64' + + # Get Issue Date + issue_date = srcmap.name.split(".")[0].split("-")[-1] + commit_date = datetime.strptime(issue_date + " +0000", '%Y%m%d%H%M %z') + + if 'issue' not in issue: + issue['issue'] = {'localId': issue['localId']} + + if engine not in ['libfuzzer', 'afl', 'honggfuzz', 'centipede']: + logging.error("Failed to get engine") + return False + + if sanitizer is False: + logging.error("Failed to get Sanitizer") + return False + + return build_fuzzer_with_source(issue['issue']['localId'], issue['project'], + srcmap, sanitizer, engine, arch, commit_date, + issue, tag) + + +def arvo_reproducer(local_id: int | str, tag: str) -> bool: + """Main ARVO reproducer function. + + Args: + local_id: Local ID of the vulnerability. + tag: Version tag ('fix' or 'vul'). + + Returns: + True if reproduction succeeded, False otherwise. + """ + logging.info(f"Working on {local_id}") + + # 1. Fetch the basic info for the vulnerability + issue = fetch_issue(local_id) # TODO, refactor a fast way + if not issue: + logging.error(f"Failed to get the srcmap or issue for {local_id}") + return False + + tmpdir = Path(tempfile.mkdtemp()) + srcmap_url = issue['regressed'] if tag == 'vul' else issue['verified_fixed'] + srcmap_files = download_build_artifacts(issue, srcmap_url, tmpdir) + + if not srcmap_files: + logging.error(f"Failed to get the srcmap for {local_id}") + return False + + srcmap = Path(srcmap_files[0]) + + # Early issues don't have 'project' field. Set project for issues that + # didn't have it. + if 'project' not in issue: + issue['project'] = issue['fuzzer'].split("_")[1] + + # 2. Download the PoC + logging.info("Downloading PoC") + case_dir = Path(tempfile.mkdtemp()) + + try: + case_path = download_poc(issue, case_dir, "crash_case") + except Exception: + logging.error(f"Failed to Download the Reproducer") + return False + + logging.info(f"POC: {case_path}") + if not case_path or not case_path.exists(): + logging.error(f"Failed to Download the Reproducer") + return False + + # 3. Build the Vulnerable Software + logging.info("Building the Binary") + result = build_from_srcmap(srcmap, issue, tag) + + if not result: + logging.error(f"Failed to build old fuzzers from srcmap") + return False + + return True + + +def main() -> None: + """Main function.""" + parser = argparse.ArgumentParser(description='Reproduce ') + parser.add_argument('--issueId', + help='The issueId of the found vulnerability ' + 'https://issues.oss-fuzz.com/', + required=True) + parser.add_argument('--version', + default='fix', + help="The fixed version or vulnerable version") + args = parser.parse_args() + + # In this script, localId == issueId + arvo_reproducer(args.issueId, args.version) + + +if __name__ == "__main__": + main() diff --git a/infra/experimental/contrib/arvo/arvo_reproducer_test.py b/infra/experimental/contrib/arvo/arvo_reproducer_test.py new file mode 100644 index 000000000000..1000443b038b --- /dev/null +++ b/infra/experimental/contrib/arvo/arvo_reproducer_test.py @@ -0,0 +1,120 @@ +"""Test module for ARVO reproducer functionality. + +This module contains functional tests for the ARVO reproducer components: +1. The functionality of reproducer components. +2. The building of a project's fuzzers from a vulnerability found on OSS-Fuzz. +""" + +import shutil +import tempfile +import unittest +import warnings +from datetime import datetime +from pathlib import Path +from typing import Dict, Any + +from arvo_reproducer import (arvo_reproducer, download_poc, fetch_issue, + prepare_ossfuzz, rebase_dockerfile) +from arvo_utils import execute + +# Suppress Google auth warnings +warnings.filterwarnings("ignore", + category=UserWarning, + module="google.auth._default") + +# Test constants +REPRODUCE_TEST_LOCAL_ID = 42487096 +UNITTEST_LOCAL_ID = 42498388 + + +class ArvoReproducingTest(unittest.TestCase): + """Test class for ARVO reproducer functionality.""" + + def test_reproduce(self) -> None: + """Test the complete reproduction process.""" + result = arvo_reproducer(REPRODUCE_TEST_LOCAL_ID, 'vul') + self.assertEqual(result, True) + + case_dir = Path(tempfile.mkdtemp()) + issue = fetch_issue(REPRODUCE_TEST_LOCAL_ID) # TODO, refactor a fast way + download_poc(issue, case_dir, "crash_case") + + (case_dir / "stderr").touch() + with open(case_dir / "stderr", 'wb') as f: + execute([ + f'/tmp/{REPRODUCE_TEST_LOCAL_ID}_OUT/set_eval_fuzzer', + str(case_dir / "crash_case") + ], + stdout=f, + stderr=f) + + with open(case_dir / "stderr", 'rb') as f: + crash_info = f.read() + + self.assertEqual( + b"SUMMARY: AddressSanitizer: heap-buffer-overflow " in crash_info, True) + + shutil.rmtree(case_dir) + + +class ArvoUnitTests(unittest.TestCase): + """Unit tests for individual ARVO reproducer components.""" + + def test_fetch_issue(self) -> None: + """Test if we can get issues from OSS-Fuzz.""" + expected_issue_cve_2021_38593: Dict[str, Any] = { + 'project': + 'qt', + 'job_type': + 'libfuzzer_asan_i386_qt', + 'platform': + 'linux', + 'crash_type': + 'UNKNOWN WRITE', + 'crash_address': + '0x10000000', + 'severity': + 'High', + 'regressed': + 'https://oss-fuzz.com/revisions?job=libfuzzer_asan_i386_qt&' + 'range=202106240616:202106250624', + 'reproducer': + 'https://oss-fuzz.com/download?testcase_id=6379642528333824', + 'verified_fixed': + 'https://oss-fuzz.com/revisions?job=libfuzzer_asan_i386_qt&' + 'range=202107280604:202107290609', + 'localId': + 42498388, + 'sanitizer': + 'address', + 'fuzz_target': + 'qtsvg_svg_qsvgrenderer_render' + } + + issue = fetch_issue(UNITTEST_LOCAL_ID) + self.assertEqual(expected_issue_cve_2021_38593, issue) + + def test_download_poc(self) -> None: + """Test if we can download proof-of-concept files.""" + issue = fetch_issue(UNITTEST_LOCAL_ID) + case_dir = Path(tempfile.mkdtemp()) + + result = download_poc(issue, case_dir, "crash_case") + self.assertEqual(result.name, "crash_case") + + shutil.rmtree(case_dir) + + def test_rebase_dockerfile(self) -> None: + """Test if we can get the historical dockerfile and rebase it.""" + commit_date = datetime.strptime("202409200607" + " +0000", '%Y%m%d%H%M %z') + result = prepare_ossfuzz("libxml2", commit_date) + + commit_date_str = str(commit_date).replace(" ", "-") + rebase_result = rebase_dockerfile(result[1] / "Dockerfile", commit_date_str) + + self.assertEqual(rebase_result, True) + shutil.rmtree(result[0]) + + +if __name__ == '__main__': + unittest.main() diff --git a/infra/experimental/contrib/arvo/arvo_utils.py b/infra/experimental/contrib/arvo/arvo_utils.py new file mode 100644 index 000000000000..20395f79908d --- /dev/null +++ b/infra/experimental/contrib/arvo/arvo_utils.py @@ -0,0 +1,769 @@ +"""ARVO utilities module. + +This module provides utility functions for ARVO reproducer including: +- Command execution functions +- Version control operations +- Docker operations +- File system utilities +- Dockerfile modification tools +""" + +import json +import os +import logging +import re +import shutil +import subprocess +import tempfile +import warnings +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Tuple + +import pytz +from dataclasses import dataclass + + +def load_repo_map(file_name: str) -> Dict[str, Any]: + """Load repository mapping from JSON file. + + Args: + file_name: Name of the JSON file to load. + + Returns: + Dictionary containing the loaded JSON data. + """ + json_path = os.path.join(os.path.dirname(__file__), file_name) + with open(json_path, encoding='utf-8') as f: + return json.load(f) + + +# Configuration constants - Order matters +GLOBAL_STR_REPLACE = load_repo_map("string_replacement.json") +UPDATE_TABLE = load_repo_map("component_fixes.json") + +# Global constants +OSS_OUT = OSS_WORK = OSS_ERR = Path("/tmp") + +# Only include non git project +CHANGED_TYPE = {'/src/graphicsmagick': 'hg'} + +CHANGED_KEY = { + '/src/mdbtools/test': '/src/mdbtools', +} + +PNAME_TABLE = { + 'libpng-proto': "libprotobuf-mutator", + 'pcapplusplus': "PcapPlusPlus", + 'skia-ftz': 'skia', +} + +# Configure logging +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s') + +# Suppress Google auth warnings +warnings.filterwarnings("ignore", + category=UserWarning, + module="google.auth._default") + + +@dataclass +class CommandResult: + success: bool + output: bytes | None + returncode: int + + +def execute(cmd: List[str], + cwd: Path = Path("/tmp"), + stdout: int = subprocess.PIPE, + stderr: int = subprocess.PIPE) -> CommandResult: + """ + Execute a command and return its result. + + Args: + cmd: Command to execute as a list of strings. + cwd: Working directory for the command. + stdout: Stdout redirection target. + stderr: Stderr redirection target. + + Returns: + CommandResult: with success, output, and returncode. + success is True if returncode==0, regardless of output. + output is the stdout bytes if present, else None. + """ + try: + result = subprocess.run(cmd, + cwd=cwd, + stderr=stderr, + stdout=stdout, + check=False) + output = result.stdout if result.stdout and result.stdout.strip( + ) != b'' else None + return CommandResult(success=(result.returncode == 0), + output=output, + returncode=result.returncode) + except (subprocess.SubprocessError, OSError): + return CommandResult(success=False, output=None, returncode=-1) + + +def check_call(cmd: List[str], + cwd: Path = Path("/tmp"), + stdout: int = subprocess.PIPE, + stderr: int = subprocess.PIPE) -> bool: + """Execute a command and return success status. + + Args: + cmd: Command to execute as a list of strings. + cwd: Working directory for the command. + stdout: Stdout redirection target. + stderr: Stderr redirection target. + + Returns: + True if command succeeded, False otherwise. + """ + try: + result = subprocess.run(cmd, + cwd=cwd, + stderr=stderr, + stdout=stdout, + check=False) + return result.returncode == 0 + except (subprocess.SubprocessError, OSError): + return False + + +def _git_pull(cwd: Path) -> bool: + """Pull latest changes from git repository. + + Args: + cwd: Path to the git repository. + + Returns: + True if pull succeeded, False otherwise. + """ + with open("/dev/null", 'w', encoding='utf-8') as f: + return check_call(['git', 'pull'], cwd=cwd, stderr=f, stdout=f) + + +def _hg_pull(cwd: Path) -> bool: + """Pull latest changes from mercurial repository. + + Args: + cwd: Path to the mercurial repository. + + Returns: + True if pull succeeded, False otherwise. + """ + with open("/dev/null", 'w', encoding='utf-8') as f: + return check_call(['hg', 'pull'], cwd=cwd, stderr=f, stdout=f) + + +def _svn_pull(cwd: Path) -> bool: + """Update SVN repository to latest revision. + + Args: + cwd: Path to the SVN repository. + + Returns: + True if update succeeded, False otherwise. + """ + with open("/dev/null", 'w', encoding='utf-8') as f: + return check_call(['svn', 'update'], cwd=cwd, stderr=f, stdout=f) + + +def clone(url: str, + commit: str | None = None, + dest: str | Path | None = None, + name: str | None = None, + main_repo: bool = False, + commit_date: datetime | None = None) -> Path | bool: + """Clone a git repository and optionally checkout a specific commit. + + Args: + url: Repository URL to clone. + commit: Specific commit to checkout. + dest: Destination directory for cloning. + name: Name for the cloned repository directory. + main_repo: Whether this is the main repository. + commit_date: Date of the commit for fallback checkout. + + Returns: + Path to cloned repository on success, False on failure. + """ + + def _git_clone(url: str, dest: Path, name: str | None) -> bool: + """Helper function to perform git clone operation.""" + cmd = ['git', 'clone', url] + if name is not None: + cmd.append(name) + if not check_call(cmd, dest): + return False + return True + + def _check_out(commit: str, path: Path) -> bool: + """Helper function to checkout a specific commit.""" + with open('/dev/null', 'w', encoding='utf-8') as f: + return check_call(['git', "reset", '--hard', commit], cwd=path, stdout=f) + + dest_path = Path(dest) if dest else Path(tempfile.mkdtemp()) + + if not _git_clone(url, dest_path, name): + logging.error(f"[!] - clone: Failed to clone {url}") + return False + + if commit: + logging.info(f"Checkout to commit {commit}") + repo_name = list(dest_path.iterdir())[0] if name is None else name + repo_path = dest_path / repo_name + + if _check_out(commit, repo_path): + return dest_path + else: + if main_repo: + logging.error(f"[!] - clone: Failed to checkout {repo_name}") + return False + else: + if commit_date is None: + logging.warning( + f"[!] - clone: Failed to checkout {repo_name} but it's not the main component, using the latest version" + ) + return dest_path + logging.warning( + "[!] Failed to checkout, try a version before required commit") + cmd = [ + "git", "log", f"--before='{commit_date.isoformat()}'", + "--format='%H'", "-n1" + ] + fallback_result = execute(cmd, repo_path) + if fallback_result.success and fallback_result.output: + fallback_commit = fallback_result.output.decode().strip("'") + logging.info(f"Checkout to {fallback_commit}") + if _check_out(fallback_commit, repo_path): + return dest_path + logging.error(f"[!] - clone: Failed to checkout {repo_name}") + return False + + return dest_path + + +def svn_clone(url: str, + commit: str | None = None, + dest: str | Path | None = None, + rename: str | None = None) -> Path | bool: + """Clone an SVN repository and optionally checkout a specific revision. + + Args: + url: SVN repository URL. + commit: Specific revision to checkout. + dest: Destination directory. + rename: Name for the cloned directory. + + Returns: + Path to cloned repository on success, False on failure. + """ + + def _svn_clone(url: str, dest: Path, name: str | None = None) -> bool: + """Helper function to perform SVN checkout operation.""" + cmd = ["svn", "co", url] + if name: + cmd.append(name) + if not check_call(cmd, dest): + return False + return True + + tmp_path = Path(dest) if dest else Path(tempfile.mkdtemp()) + + if not _svn_clone(url, tmp_path, rename): + logging.error(f"[!] - svn_clone: Failed to clone {url}") + return False + + if commit: + repo_name = rename if rename else list(tmp_path.iterdir())[0] + repo_path = tmp_path / repo_name + if not check_call(['svn', "up", '--force', '-r', commit], cwd=repo_path): + return False + + return tmp_path + + +def hg_clone(url: str, + commit: str | None = None, + dest: str | Path | None = None, + rename: str | None = None) -> Path | bool: + """Clone a Mercurial repository and optionally checkout a specific commit. + + Args: + url: Mercurial repository URL. + commit: Specific commit to checkout. + dest: Destination directory. + rename: Name for the cloned directory. + + Returns: + Path to cloned repository on success, False on failure. + """ + + def _hg_clone(url: str, dest: Path, name: str | None = None) -> bool: + """Helper function to perform hg clone operation.""" + cmd = ["hg", "clone", url] + if name: + cmd.append(name) + if not check_call(cmd, dest): + return False + return True + + tmp_path = Path(dest) if dest else Path(tempfile.mkdtemp()) + + if not _hg_clone(url, tmp_path, rename): + logging.error(f"[!] - hg_clone: Failed to clone {url}") + return False + + if commit: + repo_name = rename if rename else list(tmp_path.iterdir())[0] + repo_path = tmp_path / repo_name + if not (check_call(['hg', "update", '--clean', '-r', commit], cwd=repo_path) + and check_call(['hg', "purge", '--config', 'extensions.purge='], + cwd=repo_path)): + return False + + return tmp_path + + +class DockerfileModifier: + """A class for modifying Dockerfile content with various text operations.""" + + def __init__(self, path: str | Path) -> None: + """ + Initialize the DockerfileModifier. + + This constructor loads the Dockerfile content and performs a clean up: + - Removes all comment lines (lines starting with #) + - Removes line continuations (backslash-newline) + - Collapses multiple blank lines into a single blank line + This normalization makes further text processing and modifications more robust and predictable. + + Args: + path: Path to the Dockerfile to modify. + """ + self.path = Path(path) + with open(self.path, encoding='utf-8') as f: + self.content = f.read() + + # Clean up the content + comments = re.compile(r'^\s*#.*\n', re.MULTILINE) + self.content = comments.sub("", self.content) + self.content = self.content.replace("\\\n", "") + blank_line = re.compile(r'\n(\s)*\n', re.MULTILINE) + self.content = blank_line.sub("\n", self.content) + + def flush(self) -> bool: + """Write the modified content back to the file. + + Returns: + True if write succeeded, False otherwise. + """ + try: + with open(self.path, 'w', encoding='utf-8') as f: + f.write(self.content) + return True + except IOError: + return False + + def str_replace(self, old: str, new: str) -> None: + """Replace all occurrences of old string with new string. + + Args: + old: String to replace. + new: Replacement string. + """ + self.content = self.content.replace(old, new) + + def str_replace_all(self, pairs: Dict[str, str]) -> None: + """Replace multiple string pairs. + + Args: + pairs: Dictionary of old -> new string mappings. + """ + for key, value in pairs.items(): + self.str_replace(key, value) + + def replace_line_at(self, pos: int, line: str) -> None: + """Replace the line at specified position. + + Args: + pos: Line position (0-indexed). + line: New line content. + """ + lines = self.content.split("\n") + if 0 <= pos < len(lines): + lines[pos] = line + self.content = "\n".join(lines) + + def replace(self, old: str, new: str, flags: int = 0) -> None: + """Replace using regular expressions. + + Args: + old: Regular expression pattern. + new: Replacement string. + flags: Regular expression flags. + """ + self.content = re.sub(old, new, self.content, flags=flags) + + def replace_once(self, old: str, new: str) -> None: + """Replace first occurrence using regular expressions. + + Args: + old: Regular expression pattern. + new: Replacement string. + """ + self.content = re.sub(old, new, self.content, count=1) + + def insert_line_before(self, target: str, newline: str) -> bool | None: + """Insert a new line before the target line. + + Args: + target: Target line to find. + newline: New line to insert. + + Returns: + None if target not found, otherwise inserts the line. + """ + line_num = self.locate_str(target) + if line_num is False: + return False + self.insert_line_at(line_num, newline) + return None + + def insert_line_after(self, target: str, newline: str) -> bool | None: + """Insert a new line after the target line. + + Args: + target: Target line to find. + newline: New line to insert. + + Returns: + None if target not found, otherwise inserts the line. + """ + line_num = self.locate_str(target) + if line_num is False: + return False + self.insert_line_at(line_num + 1, newline) + return None + + def insert_line_at(self, pos: int, line: str) -> None: + """Insert a line at specified position. + + Args: + pos: Position to insert at. + line: Line content to insert. + """ + lines = self.content.split("\n") + lines.insert(pos, line) + self.content = "\n".join(lines) + + def remove_range(self, starts: int, ends: int) -> None: + """Remove lines in the specified range. + + Args: + starts: Start line number (inclusive). + ends: End line number (exclusive). + """ + lines = self.content.split("\n") + new_lines = [] + for num, line in enumerate(lines): + if not (starts <= num < ends): + new_lines.append(line) + self.content = '\n'.join(new_lines) + + def clean_comments(self) -> None: + """Remove comment lines from the content.""" + pattern = re.compile(r'^#.*', re.MULTILINE) + self.content = pattern.sub('', self.content) + newline_pattern = re.compile(r'^\n', re.MULTILINE) + self.content = newline_pattern.sub('', self.content) + + def locate_str(self, keyword: str) -> int | bool: + """Find the line number containing the keyword. + + Args: + keyword: Keyword to search for. + + Returns: + Line number if found, False otherwise. + """ + lines = self.content.split("\n") + for line_num, line in enumerate(lines): + if keyword in line: + return line_num + return False + + def get_line(self, keyword: str) -> Tuple[List[str], int]: + """Get lines containing the keyword and the last line number. + + Args: + keyword: Keyword to search for. + + Returns: + Tuple of (matching_lines, last_line_number). + """ + lines = self.content.split("\n") + matching_lines = [] + last_line_num = 0 + + for line_num, line in enumerate(lines, 1): + if keyword in line: + matching_lines.append(line) + last_line_num = line_num + + if len(matching_lines) < 2: + return matching_lines, last_line_num + + # Use regex for more precise matching + pattern = re.compile(rf"{keyword}(\s.*$|$)") + matching_lines = [] + last_line_num = 0 + + for line_num, line in enumerate(lines, 1): + if pattern.search(line): + matching_lines.append(line) + last_line_num = line_num + + return matching_lines, last_line_num + + +class VersionControlTool: + """A unified interface for version control operations (git, hg, svn).""" + + def __init__(self, + repo_path: str | Path, + vc_type: str = 'git', + revision: str | None = None, + latest: bool = False) -> None: + """Initialize the VersionControlTool. + + Args: + repo_path: Path to the repository or URL to clone. + vc_type: Version control type ('git', 'hg', 'svn'). + revision: Specific revision to checkout. + latest: Whether to pull latest changes. + + Raises: + ValueError: If vc_type is not supported. + """ + if vc_type not in ['git', 'hg', 'svn']: + raise ValueError(f'VersionControlTool: Does not support {vc_type}') + + self.type = vc_type + repo_path_obj = Path(repo_path) if isinstance(repo_path, str) else repo_path + + if not repo_path_obj.exists(): + repo_path_obj = self.clone(str(repo_path), revision) + if not repo_path_obj: + raise RuntimeError(f'VersionControlTool: Failed to init {repo_path}') + + self.repo = repo_path_obj + self.name = self.repo.name + + if latest and not self.pull(): + raise RuntimeError(f'VersionControlTool: Failed to Update {repo_path}') + + def pull(self) -> bool: + """Pull latest changes from the repository. + + Returns: + True if pull succeeded, False otherwise. + """ + if self.type == 'git': + return _git_pull(self.repo) + elif self.type == 'hg': + return _hg_pull(self.repo) + else: + return _svn_pull(self.repo) + + def clone(self, url: str, revision: str | None = None) -> Path | bool: + """Clone the repository. + + Args: + url: Repository URL to clone. + revision: Specific revision to checkout. + + Returns: + Path to cloned repository on success, False on failure. + """ + if self.type == 'git': + repo = clone(url, revision) + if repo is not False: + self.repo = list(repo.iterdir())[0] + return self.repo + elif self.type == 'hg': + repo = hg_clone(url, revision) + if repo is not False: + self.repo = list(repo.iterdir())[0] + return self.repo + else: + repo = svn_clone(url, revision) + if repo is not False: + self.repo = list(repo.iterdir())[0] + return self.repo + + return False + + def commit_date(self, commit: str) -> str | bool: + """Get the date of a specific commit. + + Args: + commit: Commit hash or revision. + + Returns: + Formatted date string on success, False on failure. + """ + + def time_reformat(original_str: str) -> str: + """Reformat time string to standard format.""" + original_dt = datetime.strptime(original_str, "%Y-%m-%d %H:%M:%S %z") + utc_dt = original_dt.astimezone(pytz.utc) + return utc_dt.strftime("%Y%m%d%H%M") + + if self.type == 'git': + result = execute(['git', 'show', '-s', '--format=%ci', commit], self.repo) + if result.success and result.output: + return time_reformat(result.output.decode()) + elif self.type == 'hg': + result = execute(['hg', 'log', '-r', commit, '--template', '{date}'], + self.repo) + if result.success and result.output: + timestamp = int(result.output.decode().split(".")[0]) + return datetime.utcfromtimestamp(timestamp).strftime('%Y%m%d%H%M') + else: + result = execute(['svn', 'log', '-r', commit, '-q'], self.repo) + if result.success and result.output: + lines = result.output.decode().split('\n') + if len(lines) > 1: + date_part = lines[1].split(' | ')[2].split(' (')[0] + return time_reformat(date_part) + return False + + def reset(self, commit: str) -> bool: + """Reset the repository to a specific commit. + + Args: + commit: Commit hash or revision to reset to. + + Returns: + True if reset succeeded, False otherwise. + """ + if self.type == 'git': + cmd = ['git', 'reset', '--hard', commit] + with open('/dev/null', 'w', encoding='utf-8') as f: + return check_call(cmd, self.repo, stdout=f) + elif self.type == 'hg': + cmd1 = ['hg', 'update', '--clean', '-r', commit] + cmd2 = ['hg', "purge", '--config', 'extensions.purge='] + return (check_call(cmd1, self.repo) and check_call(cmd2, self.repo)) + elif self.type == "svn": + return check_call(['svn', "up", '--force', '-r', commit], cwd=self.repo) + + return False + + +def docker_build(args: List[str], log_file: Path | None = None) -> bool: + """Build a Docker image. + + Args: + args: Arguments for docker build command. + log_file: Optional log file to write output. + + Returns: + True if build succeeded, False otherwise. + """ + cmd = ['docker', 'build'] + cmd.extend(args) + logging.info("Docker Build: \n" + " ".join(cmd)) + + if log_file: + with open(log_file, 'w', encoding='utf-8') as f: + result = check_call(cmd, stderr=f, stdout=f) + f.write("\n" + " ".join(cmd) + "\n") + return result + else: + return check_call(cmd) + + +def docker_run(args: List[str], + rm: bool = True, + log_file: Path | None = None) -> bool: + """Run a Docker container. + + Args: + args: Arguments for docker run command. + rm: Whether to automatically remove the container when it exits. + log_file: Optional log file to write output. + + Returns: + True if run succeeded, False otherwise. + """ + if rm: + cmd = ['docker', 'run', '--rm', '--privileged'] + else: + cmd = ['docker', 'run', '--privileged'] + + cmd.extend(args) + logging.info("Docker Run: \n" + " ".join(cmd)) + + if log_file: + with open(log_file, 'w', encoding='utf-8') as f: + result = check_call(cmd, stdout=f, stderr=f) + f.write("\n" + " ".join(cmd) + "\n") + return result + else: + return check_call(cmd) + + +def clean_dir(directory: Path) -> bool: + """Remove a directory and all its contents. + + Args: + directory: Directory to remove. + + Returns: + True if removal succeeded, False otherwise. + """ + if not directory.exists(): + return True + + try: + shutil.rmtree(directory) + return True + except OSError: + logging.warning(f"[FAILED] to remove tmp file {directory}") + return False + + +def leave_ret(return_val: Any, tmp_dirs: Path | list[Path]) -> Any: + """ + Clean up temporary directories and return a value. + + This function is used to ensure that any temporary directories created during + the execution of a process are properly removed before returning a result. + It accepts either a single Path or a list of Paths, and attempts to remove + each directory (and its contents) using clean_dir. This helps prevent + resource leaks and keeps the filesystem clean after temporary work is done. + + Args: + return_val: Value to return after cleanup. + tmp_dirs: Temporary directory or list of directories to clean up. + + Returns: + The return_val parameter, after cleanup is performed. + """ + if isinstance(tmp_dirs, list): + for tmp_dir in tmp_dirs: + clean_dir(tmp_dir) + else: + clean_dir(tmp_dirs) + return return_val + + +if __name__ == "__main__": + pass diff --git a/infra/experimental/contrib/arvo/component_fixes.json b/infra/experimental/contrib/arvo/component_fixes.json new file mode 100644 index 000000000000..722b83e9f7c8 --- /dev/null +++ b/infra/experimental/contrib/arvo/component_fixes.json @@ -0,0 +1,40 @@ +{ + "/src/freetype2": "https://github.com/freetype/freetype2", + "/src/freetype": "https://github.com/freetype/freetype", + "/src/pcre2": "https://github.com/PCRE2Project/pcre2", + "/src/skia/third_party/externals/libjpeg-turbo": "https://github.com/libjpeg-turbo/libjpeg-turbo.git", + "/src/radare2-regressions": "https://github.com/rlaemmert/radare2-regressions.git", + "/src/x264": "https://code.videolan.org/videolan/x264.git", + "/src/x265": "https://bitbucket.org/multicoreware/x265_git.git", + "/src/vorbis": "https://gitlab.xiph.org/xiph/vorbis.git", + "/src/theora": "https://gitlab.xiph.org/xiph/theora.git", + "/src/opus": "https://gitlab.xiph.org/xiph/opus.git", + "/src/ogg": "https://gitlab.xiph.org/xiph/ogg.git", + "/src/libxml2": "https://gitlab.gnome.org/GNOME/libxml2.git", + "/src/wireshark": "https://github.com/wireshark/wireshark.git", + "/src/kimageformats": "https://invent.kde.org/frameworks/kimageformats.git", + "/src/extra-cmake-modules": "https://invent.kde.org/frameworks/extra-cmake-modules.git", + "/src/kcodecs": "https://github.com/KDE/kcodecs.git", + "/src/karchive": "https://invent.kde.org/frameworks/karchive.git", + "/src/libtheora": "https://gitlab.xiph.org/xiph/theora.git", + "/src/libva": "https://github.com/intel/libva.git", + "/src/libssh2": "https://github.com/libssh2/libssh2.git", + "/src/quickjs": "https://github.com/bellard/quickjs", + "/src/lwan": "https://github.com/lpereira/lwan.git", + "/src/graphicsmagick": "https://foss.heptapod.net/graphicsmagick/graphicsmagick", + "/src/llvm": "https://github.com/llvm/llvm-project.git", + "/src/pcre": "https://github.com/PhilipHazel/pcre2", + "/src/gnulib": "https://github.com/coreutils/gnulib.git", + "/src/net-snmp": "https://github.com/net-snmp/net-snmp.git", + "/src/harfbuzz": "https://github.com/harfbuzz/harfbuzz.git", + "/src/matio": "https://github.com/tbeu/matio.git", + "/src/aspell": "https://github.com/gnuaspell/aspell.git", + "/src/libsndfile": "https://github.com/libsndfile/libsndfile.git", + "/src/poppler": "https://gitlab.freedesktop.org/poppler/poppler.git", + "/src/gdal/poppler": "https://gitlab.freedesktop.org/poppler/poppler.git", + "/src/gdal/curl": "https://github.com/curl/curl.git", + "/src/ghostpdl": "https://cgit.ghostscript.com/ghostpdl.git", + "/src/cryptofuzz": "https://github.com/MozillaSecurity/cryptofuzz.git", + "/src/python-library-fuzzers": "https://github.com/hugovk/python-library-fuzzers.git", + "/src/libmicrohttpd": "https://git.gnunet.org/libmicrohttpd.git" +} \ No newline at end of file diff --git a/infra/experimental/contrib/arvo/hacks/__init__.py b/infra/experimental/contrib/arvo/hacks/__init__.py new file mode 100644 index 000000000000..30943a506f22 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/__init__.py @@ -0,0 +1,102 @@ +"""Project-specific hacks for fixing Dockerfiles and build scripts. + +This module contains project-specific fixes that solve building/compiling problems +for various OSS-Fuzz projects. Each project has its own module with dedicated +hack functions. +""" + +from abc import ABC, abstractmethod +from pathlib import Path + +try: + from ..arvo_utils import DockerfileModifier +except ImportError: + # Fallback for when module is imported directly + from arvo_utils import DockerfileModifier + + +class ProjectHack(ABC): + """Base class for project-specific hacks.""" + + def __init__(self): + self.commit_date = None + + def set_commit_date(self, commit_date): + """Set the commit date for hacks that need it.""" + self.commit_date = commit_date + + @abstractmethod + def apply_dockerfile_fixes(self, dft: DockerfileModifier) -> bool: + """Apply project-specific fixes to a Dockerfile. + + Args: + dft: DockerfileModifier instance for the project's Dockerfile. + + Returns: + True if fixes were applied successfully, False otherwise. + """ + pass + + def apply_build_script_fixes(self, dft: DockerfileModifier) -> bool: + """Apply project-specific fixes to a build script. + + Args: + dft: DockerfileModifier instance for the project's build script. + + Returns: + True if fixes were applied successfully, False otherwise. + """ + # Default implementation - no build script fixes + return True + + def apply_extra_fixes(self, source_dir: Path) -> bool: + """Apply extra project-specific fixes that require file system operations. + + Args: + source_dir: Path to the source directory. + + Returns: + True if fixes were applied successfully, False otherwise. + """ + # Default implementation - no extra fixes + return True + + +# Registry of all project hacks +PROJECT_HACKS = {} + + +def register_hack(project_name: str, hack_class: type): + """Register a project hack class.""" + PROJECT_HACKS[project_name] = hack_class + + +def get_project_hack(project_name: str) -> ProjectHack | None: + """Get a project hack instance by name.""" + hack_class = PROJECT_HACKS.get(project_name) + if hack_class: + return hack_class() + return None + + +# Helper functions that can be reused across projects +def x265_fix(dft: DockerfileModifier) -> None: + """Apply x265-specific fixes to the dockerfile modifier. + + This is a common fix used by multiple projects that depend on x265. + The order of these replacements matters. + """ + dft.replace( + r'RUN\shg\sclone\s.*bitbucket.org/multicoreware/x265\s*(x265)*', + "RUN git clone " + "https://bitbucket.org/multicoreware/x265_git.git x265\n") + dft.replace( + r'RUN\shg\sclone\s.*hg.videolan.org/x265\s*(x265)*', "RUN git clone " + "https://bitbucket.org/multicoreware/x265_git.git x265\n") + + +# Import all project hacks to register them +from . import (cryptofuzz, curl, dlplibs, duckdb, ffmpeg, flac, freeradius, + gdal, ghostscript, gnutls, graphicsmagick, imagemagick, jbig2dec, + lcms, libheif, libredwg, libreoffice, libyang, lwan, openh264, + quickjs, radare2, skia, uwebsockets, wireshark, wolfssl, yara) diff --git a/infra/experimental/contrib/arvo/hacks/cryptofuzz.py b/infra/experimental/contrib/arvo/hacks/cryptofuzz.py new file mode 100644 index 000000000000..64e4f53bc2e4 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/cryptofuzz.py @@ -0,0 +1,22 @@ +"""Cryptofuzz project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class CryptofuzzHack(ProjectHack): + """Hacks for the Cryptofuzz project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix Cryptofuzz Dockerfile issues.""" + # Fix libressl update script + dft.insert_line_before( + "RUN cd $SRC/libressl && ./update.sh", + "RUN sed -n -i '/^# setup source paths$/,$p' $SRC/libressl/update.sh") + + # Remove cryptofuzz-corpora line (from old implementation) + dft.replace(r".*https://github.com/guidovranken/cryptofuzz-corpora.*", "") + return True + + +# Register the hack +register_hack("cryptofuzz", CryptofuzzHack) diff --git a/infra/experimental/contrib/arvo/hacks/curl.py b/infra/experimental/contrib/arvo/hacks/curl.py new file mode 100644 index 000000000000..85e99c459cd0 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/curl.py @@ -0,0 +1,19 @@ +"""Curl project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class CurlHack(ProjectHack): + """Hacks for the Curl project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix Curl Dockerfile issues.""" + # Check if download_zlib.sh exists and replace zlib URL + dft.append_line( + 'RUN [ -f "/src/curl_fuzzer/scripts/download_zlib.sh" ] && sed -i \'s|https://www.zlib.net/zlib-1.2.11.tar.gz|https://www.zlib.net/fossils/zlib-1.2.11.tar.gz|g\' /src/curl_fuzzer/scripts/download_zlib.sh || true' + ) + return True + + +# Register the hack +register_hack("curl", CurlHack) diff --git a/infra/experimental/contrib/arvo/hacks/dlplibs.py b/infra/experimental/contrib/arvo/hacks/dlplibs.py new file mode 100644 index 000000000000..6ce57f61894d --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/dlplibs.py @@ -0,0 +1,17 @@ +"""DLPLibs project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class DLPLibsHack(ProjectHack): + """Hacks for the DLPLibs project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix DLPLibs Dockerfile issues.""" + dft.replace(r"ADD", '# ADD') + dft.replace(r"RUN wget", '#RUN wget') + return True + + +# Register the hack +register_hack("dlplibs", DLPLibsHack) diff --git a/infra/experimental/contrib/arvo/hacks/duckdb.py b/infra/experimental/contrib/arvo/hacks/duckdb.py new file mode 100644 index 000000000000..24b78833ab47 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/duckdb.py @@ -0,0 +1,20 @@ +"""DuckDB project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class DuckDBHack(ProjectHack): + """Hacks for the DuckDB project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """No Dockerfile fixes needed for DuckDB.""" + return True + + def apply_build_script_fixes(self, dft) -> bool: + """Fix DuckDB build script issues.""" + dft.replace(r'^make$', 'make -j`nproc`\n') + return True + + +# Register the hack +register_hack("duckdb", DuckDBHack) diff --git a/infra/experimental/contrib/arvo/hacks/ffmpeg.py b/infra/experimental/contrib/arvo/hacks/ffmpeg.py new file mode 100644 index 000000000000..ca53d904d6b4 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/ffmpeg.py @@ -0,0 +1,17 @@ +"""FFmpeg project-specific hacks.""" + +from . import ProjectHack, register_hack, x265_fix + + +class FFmpegHack(ProjectHack): + """Hacks for the FFmpeg project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix FFmpeg Dockerfile issues.""" + # Apply x265 fixes + x265_fix(dft) + return True + + +# Register the hack +register_hack("ffmpeg", FFmpegHack) diff --git a/infra/experimental/contrib/arvo/hacks/flac.py b/infra/experimental/contrib/arvo/hacks/flac.py new file mode 100644 index 000000000000..f1c44d2c6e42 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/flac.py @@ -0,0 +1,18 @@ +"""FLAC project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class FLACHack(ProjectHack): + """Hacks for the FLAC project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix FLAC Dockerfile issues.""" + # Check if the problematic repository is referenced + if dft.locate_str('guidovranken/flac-fuzzers') is not False: + return False # Not fixable since the repo is removed and there is no mirror + return True + + +# Register the hack +register_hack("flac", FLACHack) diff --git a/infra/experimental/contrib/arvo/hacks/freeradius.py b/infra/experimental/contrib/arvo/hacks/freeradius.py new file mode 100644 index 000000000000..f8533682c06c --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/freeradius.py @@ -0,0 +1,17 @@ +"""FreeRADIUS project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class FreeRADIUSHack(ProjectHack): + """Hacks for the FreeRADIUS project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix FreeRADIUS Dockerfile issues.""" + dft.str_replace('sha256sum -c', 'pwd') + dft.str_replace("curl -s -O ", 'curl -s -O -L ') + return True + + +# Register the hack +register_hack("freeradius", FreeRADIUSHack) diff --git a/infra/experimental/contrib/arvo/hacks/gdal.py b/infra/experimental/contrib/arvo/hacks/gdal.py new file mode 100644 index 000000000000..a77ad703a5b2 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/gdal.py @@ -0,0 +1,48 @@ +"""GDAL project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class GDALHack(ProjectHack): + """Hacks for the GDAL project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix GDAL Dockerfile issues.""" + if not self.commit_date: + # GDAL hacks require commit_date to work properly + return False + + dft.append_line(f'ARG ARVO_TS="{self.commit_date.isoformat()}"') + + # Remove all --depth and checkout the cloned repo in build.sh + build_clone_fix = r'''RUN awk -v ts="$ARVO_TS" '\ + /git clone/ { \ + gsub(/--depth[= ][0-9]+/, "", $0); \ + if (NF == 3) dir = $3; \ + else { \ + repo = $NF; \ + gsub(/.*\//, "", repo); \ + gsub(/\.git$/, "", repo); \ + dir = repo; \ + } \ + print $0 " && (pushd " dir " && commit=$(git log --before=\"" ts "\" --format=\"%H\" -n1) && git reset --hard $commit || exit 99 && popd) && (pushd " dir " && git submodule init && git submodule update --force && popd)"; \ + next \ + } \ + { print }' $SRC/build.sh > $SRC/build.sh.tmp && mv $SRC/build.sh.tmp $SRC/build.sh + ''' + dft.append_line(build_clone_fix) + + # Fix GNUmakefile + line = '''RUN [ -f /src/gdal/gdal/GNUmakefile ] && sed -i 's|(cd frmts; $(MAKE))|(cd frmts; $(MAKE) clean; $(MAKE))|' /src/gdal/gdal/GNUmakefile || true''' + dft.append_line(line) + + # Fix build script path + dft.append_line( + '''RUN sed -i 's|BUILD_SH_FROM_REPO="$SRC/gdal/fuzzers/build.sh"|BUILD_SH_FROM_REPO=$0|g' $SRC/build.sh''' + ) + + return True + + +# Register the hack +register_hack("gdal", GDALHack) diff --git a/infra/experimental/contrib/arvo/hacks/ghostscript.py b/infra/experimental/contrib/arvo/hacks/ghostscript.py new file mode 100644 index 000000000000..0f854dd61626 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/ghostscript.py @@ -0,0 +1,22 @@ +"""Ghostscript project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class GhostscriptHack(ProjectHack): + """Hacks for the Ghostscript project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """No Dockerfile fixes needed for Ghostscript.""" + return True + + def apply_build_script_fixes(self, dft) -> bool: + """Fix Ghostscript build script issues.""" + old = r"mv \$SRC\/freetype freetype" + new = "cp -r $SRC/freetype freetype" + dft.replace(old, new) + return True + + +# Register the hack +register_hack("ghostscript", GhostscriptHack) diff --git a/infra/experimental/contrib/arvo/hacks/gnutls.py b/infra/experimental/contrib/arvo/hacks/gnutls.py new file mode 100644 index 000000000000..c07a07abba44 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/gnutls.py @@ -0,0 +1,18 @@ +"""GnuTLS project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class GnuTLSHack(ProjectHack): + """Hacks for the GnuTLS project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix GnuTLS Dockerfile issues.""" + dft.str_replace(" libnettle6 ", " ") + dft.replace(r".*client_corpus_no_fuzzer_mode.*", "") + dft.replace(r".*server_corpus_no_fuzzer_mode.*", "") + return True + + +# Register the hack +register_hack("gnutls", GnuTLSHack) diff --git a/infra/experimental/contrib/arvo/hacks/graphicsmagick.py b/infra/experimental/contrib/arvo/hacks/graphicsmagick.py new file mode 100644 index 000000000000..00d3f0cee485 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/graphicsmagick.py @@ -0,0 +1,25 @@ +"""GraphicsMagick project-specific hacks.""" + +from . import ProjectHack, register_hack, x265_fix + + +class GraphicsMagickHack(ProjectHack): + """Hacks for the GraphicsMagick project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix GraphicsMagick Dockerfile issues.""" + # Fix mercurial clone with retry logic + dft.replace( + r'RUN hg clone .* graphicsmagick', 'RUN (CMD="hg clone --insecure ' + 'https://foss.heptapod.net/graphicsmagick/graphicsmagick ' + 'graphicsmagick" && ' + 'for x in `seq 1 100`; do $($CMD); ' + 'if [ $? -eq 0 ]; then break; fi; done)') + + # Apply x265 fixes + x265_fix(dft) + return True + + +# Register the hack +register_hack("graphicsmagick", GraphicsMagickHack) diff --git a/infra/experimental/contrib/arvo/hacks/imagemagick.py b/infra/experimental/contrib/arvo/hacks/imagemagick.py new file mode 100644 index 000000000000..6188913155b2 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/imagemagick.py @@ -0,0 +1,34 @@ +"""ImageMagick project-specific hacks.""" + +from pathlib import Path +from . import ProjectHack, register_hack + + +class ImageMagickHack(ProjectHack): + """Hacks for the ImageMagick project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix ImageMagick Dockerfile issues.""" + # Fix heic corpus download issue + dft.replace(r'RUN svn .*heic_corpus.*', + "RUN mkdir /src/heic_corpus && touch /src/heic_corpus/XxX") + return True + + def apply_extra_fixes(self, source_dir: Path) -> bool: + """Apply extra ImageMagick-specific fixes.""" + # TODO: Improve this hack + target = (source_dir / "src" / "imagemagick" / "Magick++" / "fuzz" / + "build.sh") + if target.exists(): + with open(target, encoding='utf-8') as f: + lines = f.readlines() + for x in range(3): + if lines and "zip" in lines[-x - 1]: + del lines[-x - 1] + with open(target, 'w', encoding='utf-8') as f: + f.write("\n".join(lines)) + return True + + +# Register the hack +register_hack("imagemagick", ImageMagickHack) diff --git a/infra/experimental/contrib/arvo/hacks/jbig2dec.py b/infra/experimental/contrib/arvo/hacks/jbig2dec.py new file mode 100644 index 000000000000..59ae2ce38394 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/jbig2dec.py @@ -0,0 +1,21 @@ +"""JBIG2DEC project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class JBIG2DECHack(ProjectHack): + """Hacks for the JBIG2DEC project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix JBIG2DEC Dockerfile issues.""" + dft.replace(r'RUN cd tests .*', "") + return True + + def apply_build_script_fixes(self, dft) -> bool: + """Fix JBIG2DEC build script issues.""" + dft.replace('unzip.*', 'exit 0') + return True + + +# Register the hack +register_hack("jbig2dec", JBIG2DECHack) diff --git a/infra/experimental/contrib/arvo/hacks/lcms.py b/infra/experimental/contrib/arvo/hacks/lcms.py new file mode 100644 index 000000000000..ea2590071465 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/lcms.py @@ -0,0 +1,17 @@ +"""LCMS project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class LCMSHack(ProjectHack): + """Hacks for the LCMS project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix LCMS Dockerfile issues.""" + # TODO: improve this tmp patch + dft.replace(r'#add more seeds from the testbed dir.*\n', "") + return True + + +# Register the hack +register_hack("lcms", LCMSHack) diff --git a/infra/experimental/contrib/arvo/hacks/libheif.py b/infra/experimental/contrib/arvo/hacks/libheif.py new file mode 100644 index 000000000000..9653d69f8b18 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/libheif.py @@ -0,0 +1,17 @@ +"""LibHeif project-specific hacks.""" + +from . import ProjectHack, register_hack, x265_fix + + +class LibHeifHack(ProjectHack): + """Hacks for the LibHeif project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix LibHeif Dockerfile issues.""" + # Apply x265 fixes + x265_fix(dft) + return True + + +# Register the hack +register_hack("libheif", LibHeifHack) diff --git a/infra/experimental/contrib/arvo/hacks/libredwg.py b/infra/experimental/contrib/arvo/hacks/libredwg.py new file mode 100644 index 000000000000..621d70992cb2 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/libredwg.py @@ -0,0 +1,20 @@ +"""LibreDWG project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class LibreDWGHack(ProjectHack): + """Hacks for the LibreDWG project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """No Dockerfile fixes needed for LibreDWG.""" + return True + + def apply_build_script_fixes(self, dft) -> bool: + """Fix LibreDWG build script issues.""" + dft.replace(r'^make$', 'make -j`nproc`\n') + return True + + +# Register the hack +register_hack("libredwg", LibreDWGHack) diff --git a/infra/experimental/contrib/arvo/hacks/libreoffice.py b/infra/experimental/contrib/arvo/hacks/libreoffice.py new file mode 100644 index 000000000000..f642448f87f9 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/libreoffice.py @@ -0,0 +1,49 @@ +"""LibreOffice project-specific hacks.""" + +from pathlib import Path +from . import ProjectHack, register_hack + + +class LibreOfficeHack(ProjectHack): + """Hacks for the LibreOffice project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix LibreOffice Dockerfile issues.""" + # Fix setup script and disable various commands + dft.str_replace( + 'RUN ./bin/oss-fuzz-setup.sh', + "RUN sed -i 's|svn export --force -q https://github.com|" + "#svn export --force -q https://github.com|g' " + "./bin/oss-fuzz-setup.sh") + dft.str_replace('RUN svn export', '# RUN svn export') + dft.str_replace('ADD ', '# ADD ') + dft.str_replace('RUN zip', '# RUN zip') + dft.str_replace('RUN mkdir afl-testcases', "# RUN mkdir afl-testcases") + dft.str_replace( + 'RUN ./bin/oss-fuzz-setup.sh', + "# RUN ./bin/oss-fuzz-setup.sh") # Avoid downloading not related stuff + return True + + def apply_build_script_fixes(self, dft) -> bool: + """Fix LibreOffice build script issues.""" + # If you don't want to destroy your life. + # Please leave this project alone. too hard to fix and the compiling + # takes several hours + line = '$SRC/libreoffice/bin/oss-fuzz-build.sh' + dft.insert_line_before( + line, "sed -i 's/make fuzzers/make fuzzers -i/g' " + "$SRC/libreoffice/bin/oss-fuzz-build.sh") + dft.insert_line_before( + line, "sed -n -i '/#starting corpuses/q;p' " + "$SRC/libreoffice/bin/oss-fuzz-build.sh") + dft.insert_line_before( + line, r"sed -n -i '/pushd instdir\/program/q;p' " + r"$SRC/libreoffice/bin/oss-fuzz-build.sh") + dft.insert_line_before( + line, 'echo "pushd instdir/program && mv *fuzzer $OUT" >> ' + '$SRC/libreoffice/bin/oss-fuzz-build.sh') + return True + + +# Register the hack +register_hack("libreoffice", LibreOfficeHack) diff --git a/infra/experimental/contrib/arvo/hacks/libyang.py b/infra/experimental/contrib/arvo/hacks/libyang.py new file mode 100644 index 000000000000..4a370f8573d5 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/libyang.py @@ -0,0 +1,19 @@ +"""LibYang project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class LibYangHack(ProjectHack): + """Hacks for the LibYang project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix LibYang Dockerfile issues.""" + dft.str_replace( + 'RUN git clone https://github.com/PCRE2Project/pcre2 pcre2 &&', + "RUN git clone https://github.com/PCRE2Project/pcre2 pcre2\n" + "RUN ") + return True + + +# Register the hack +register_hack("libyang", LibYangHack) diff --git a/infra/experimental/contrib/arvo/hacks/lwan.py b/infra/experimental/contrib/arvo/hacks/lwan.py new file mode 100644 index 000000000000..05f1f81334b8 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/lwan.py @@ -0,0 +1,17 @@ +"""Lwan project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class LwanHack(ProjectHack): + """Hacks for the Lwan project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix Lwan Dockerfile issues.""" + dft.str_replace('git://github.com/lpereira/lwan', + 'https://github.com/lpereira/lwan.git') + return True + + +# Register the hack +register_hack("lwan", LwanHack) diff --git a/infra/experimental/contrib/arvo/hacks/openh264.py b/infra/experimental/contrib/arvo/hacks/openh264.py new file mode 100644 index 000000000000..13db38c8a2d3 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/openh264.py @@ -0,0 +1,30 @@ +"""OpenH264 project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class OpenH264Hack(ProjectHack): + """Hacks for the OpenH264 project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """No Dockerfile fixes needed for OpenH264.""" + return True + + def apply_build_script_fixes(self, dft) -> bool: + """Fix OpenH264 build script issues.""" + lines = dft.content.split("\n") + starts = -1 + ends = -1 + for num, line in enumerate(lines): + if "# prepare corpus" in line: + starts = num + elif "# build" in line: + ends = num + break + if starts != -1 and ends != -1: + dft.remove_range(starts, ends) + return True + + +# Register the hack +register_hack("openh264", OpenH264Hack) diff --git a/infra/experimental/contrib/arvo/hacks/quickjs.py b/infra/experimental/contrib/arvo/hacks/quickjs.py new file mode 100644 index 000000000000..6289f0404192 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/quickjs.py @@ -0,0 +1,17 @@ +"""QuickJS project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class QuickJSHack(ProjectHack): + """Hacks for the QuickJS project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix QuickJS Dockerfile issues.""" + dft.str_replace('https://github.com/horhof/quickjs', + 'https://github.com/bellard/quickjs') + return True + + +# Register the hack +register_hack("quickjs", QuickJSHack) diff --git a/infra/experimental/contrib/arvo/hacks/radare2.py b/infra/experimental/contrib/arvo/hacks/radare2.py new file mode 100644 index 000000000000..88cea82013b0 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/radare2.py @@ -0,0 +1,17 @@ +"""Radare2 project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class Radare2Hack(ProjectHack): + """Hacks for the Radare2 project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix Radare2 Dockerfile issues.""" + dft.str_replace("https://github.com/radare/radare2-regressions", + 'https://github.com/rlaemmert/radare2-regressions.git') + return True + + +# Register the hack +register_hack("radare2", Radare2Hack) diff --git a/infra/experimental/contrib/arvo/hacks/skia.py b/infra/experimental/contrib/arvo/hacks/skia.py new file mode 100644 index 000000000000..5cd1d891c733 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/skia.py @@ -0,0 +1,19 @@ +"""Skia project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class SkiaHack(ProjectHack): + """Hacks for the Skia project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix Skia Dockerfile issues.""" + # Comment out wget commands and fix build script + dft.str_replace('RUN wget', "# RUN wget") + dft.insert_line_after('COPY build.sh $SRC/', + "RUN sed -i 's/cp.*zip.*//g' $SRC/build.sh") + return True + + +# Register the hack +register_hack("skia", SkiaHack) diff --git a/infra/experimental/contrib/arvo/hacks/uwebsockets.py b/infra/experimental/contrib/arvo/hacks/uwebsockets.py new file mode 100644 index 000000000000..42f3ad756077 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/uwebsockets.py @@ -0,0 +1,22 @@ +"""uWebSockets project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class UWebSocketsHack(ProjectHack): + """Hacks for the uWebSockets project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """No Dockerfile fixes needed for uWebSockets.""" + return True + + def apply_build_script_fixes(self, dft) -> bool: + """Fix uWebSockets build script issues.""" + # https://github.com/alexhultman/zlib -> https://github.com/madler/zlib.git + script = "sed -i 's/alexhultman/madler/g' fuzzing/Makefile" + dft.insert_line_at(0, script) + return True + + +# Register the hack +register_hack("uwebsockets", UWebSocketsHack) diff --git a/infra/experimental/contrib/arvo/hacks/wireshark.py b/infra/experimental/contrib/arvo/hacks/wireshark.py new file mode 100644 index 000000000000..1bb5f2184ae7 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/wireshark.py @@ -0,0 +1,16 @@ +"""Wireshark project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class WiresharkHack(ProjectHack): + """Hacks for the Wireshark project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix Wireshark Dockerfile issues.""" + dft.replace(r"RUN git clone .*wireshark.*", "") + return True + + +# Register the hack +register_hack("wireshark", WiresharkHack) diff --git a/infra/experimental/contrib/arvo/hacks/wolfssl.py b/infra/experimental/contrib/arvo/hacks/wolfssl.py new file mode 100644 index 000000000000..ed1e93bce6c5 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/wolfssl.py @@ -0,0 +1,22 @@ +"""WolfSSL project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class WolfSSLHack(ProjectHack): + """Hacks for the WolfSSL project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix WolfSSL Dockerfile issues.""" + # Replace gsutil cp command with a simple touch and zip + dft.str_replace( + 'RUN gsutil cp ' + 'gs://wolfssl-backup.clusterfuzz-external.appspot.com/' + 'corpus/libFuzzer/wolfssl_cryptofuzz-disable-fastmath/public.zip ' + '$SRC/corpus_wolfssl_disable-fastmath.zip', "RUN touch 0xdeadbeef && " + "zip $SRC/corpus_wolfssl_disable-fastmath.zip 0xdeadbeef") + return True + + +# Register the hack +register_hack("wolfssl", WolfSSLHack) diff --git a/infra/experimental/contrib/arvo/hacks/yara.py b/infra/experimental/contrib/arvo/hacks/yara.py new file mode 100644 index 000000000000..9250c5bf2833 --- /dev/null +++ b/infra/experimental/contrib/arvo/hacks/yara.py @@ -0,0 +1,19 @@ +"""YARA project-specific hacks.""" + +from . import ProjectHack, register_hack + + +class YARAHack(ProjectHack): + """Hacks for the YARA project.""" + + def apply_dockerfile_fixes(self, dft) -> bool: + """Fix YARA Dockerfile issues.""" + if 'bison' not in dft.content: + dft.insert_line_before( + "RUN git clone https://github.com/VirusTotal/yara.git", + "RUN apt install -y bison") + return True + + +# Register the hack +register_hack("yara", YARAHack) diff --git a/infra/experimental/contrib/arvo/string_replacement.json b/infra/experimental/contrib/arvo/string_replacement.json new file mode 100644 index 000000000000..348a514ea106 --- /dev/null +++ b/infra/experimental/contrib/arvo/string_replacement.json @@ -0,0 +1,96 @@ +{ + "http://download.icu-project.org/files/icu4c/59.1/": + "https://github.com/unicode-org/icu/releases/download/release-59-1/", + "git://git.gnome.org/libxml2": + "https://gitlab.gnome.org/GNOME/libxml2.git", + "svn co svn://vcs.exim.org/pcre2/code/trunk pcre2": + "git clone https://github.com/PCRE2Project/pcre2 pcre2", + "https://git.savannah.nongnu.org/r/freetype/freetype2": + "https://github.com/freetype/freetype2", + "https://git.savannah.gnu.org/git/freetype/freetype2.git": + "https://github.com/freetype/freetype2", + "git://git.sv.nongnu.org/freetype/freetype2.git": + "https://github.com/freetype/freetype2", + "ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4.4.1.1.tar.gz": + "-L http://ppmcore.mpi-cbg.de/upload/netcdf-4.4.1.1.tar.gz", + "RUN curl http": + "RUN curl -L http", + "&& curl http": + "&& curl -L http", + "https://github.com/01org/libva": + "https://github.com/intel/libva.git", + "https://github.com/intel/libva\n": + "https://github.com/intel/libva.git\n", + "http://www.zlib.net/zlib-1.2.11.tar.gz": + "https://www.zlib.net/fossils/zlib-1.2.11.tar.gz", + "https://jannau.net/dav1d_fuzzer_seed_corpus.zip": + "https://download.videolan.org/pub/videolan/testing/contrib/dav1d/dav1d_fuzzer_seed_corpus.zip", + "git://git.xiph.org/ogg.git": + "https://gitlab.xiph.org/xiph/ogg.git", + "https://github.com/xiph/ogg.git": + "https://gitlab.xiph.org/xiph/ogg.git", + "git://git.xiph.org/opus.git": + "https://gitlab.xiph.org/xiph/opus.git", + "git://git.xiph.org/theora.git": + "https://gitlab.xiph.org/xiph/theora.git", + "git://git.xiph.org/vorbis.git": + "https://gitlab.xiph.org/xiph/vorbis.git", + "svn co http://svn.xiph.org/trunk/ogg": + "git clone https://gitlab.xiph.org/xiph/ogg.git", + "git://git.videolan.org/git/x264.git": + "https://code.videolan.org/videolan/x264.git", + "http://lcamtuf.coredump.cx/afl/demo/afl_testcases.tgz": + "https://lcamtuf.coredump.cx/afl/demo/afl_testcases.tgz", + "https://downloads.apache.org/maven/maven-3/3.6.3/binaries/": + "https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/", + "https://downloads.apache.org/maven/maven-3/3.8.6/binaries/": + "https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.6/", + "https://downloads.apache.org/maven/maven-3/3.8.5/binaries/": + "https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.5/", + "https://dlcdn.apache.org/maven/maven-3/3.8.6/binaries/": + "https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.6/", + "https://dlcdn.apache.org/maven/maven-3/3.8.5/binaries/": + "https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.5/", + "https://opus-codec.org/static/testvectors/opus_testvectors.tar.gz": + "http://opus-codec.org/static/testvectors/opus_testvectors.tar.gz", + "https://anongit.freedesktop.org/git/harfbuzz.git": + "https://github.com/harfbuzz/harfbuzz.git", + "git://anongit.kde.org/extra-cmake-modules": + "https://invent.kde.org/frameworks/extra-cmake-modules.git", + "git://anongit.kde.org/kimageformats": + "https://invent.kde.org/frameworks/kimageformats.git", + "git://anongit.kde.org/karchive": + "https://invent.kde.org/frameworks/karchive.git", + "git://git.savannah.gnu.org/gnulib.git": + "https://github.com/coreutils/gnulib.git", + "svn co http://llvm.org/svn/llvm-project/llvm/trunk": + "git clone https://github.com/llvm/llvm-project.git", + "svn co svn://vcs.exim.org/pcre/code/trunk": + "git clone https://github.com/PhilipHazel/pcre2", + "https://github.com/cmeister2/libssh2.git": + "https://github.com/libssh2/libssh2.git", + "git://git.code.sf.net/p/matio/matio": + "https://github.com/tbeu/matio.git", + "https://github.com/cmeister2/aspell.git": + "https://github.com/gnuaspell/aspell.git", + "https://github.com/erikd/libsndfile.git": + "https://github.com/libsndfile/libsndfile.git", + "https://anongit.freedesktop.org/git/poppler/poppler.git": + "https://gitlab.freedesktop.org/poppler/poppler.git", + "https://gitlab.freedesktop.org/ceyhunalp/poppler.git": + "https://gitlab.freedesktop.org/poppler/poppler.git", + "git.ghostscript.com/ghostpdl.git": + "cgit.ghostscript.com/ghostpdl.git", + "https://github.com/guidovranken/cryptofuzz\n": + "https://github.com/MozillaSecurity/cryptofuzz.git\n", + "https://gnunet.org/git": + "https://git.gnunet.org", + " --depth 1": + "", + " --depth=1": + "", + " --depth ": + " --jobs ", + " --recursive ": + " " +} \ No newline at end of file From f0a3bdc9d36c0fe5f62b2a1afddaa69a5888e41a Mon Sep 17 00:00:00 2001 From: 079035 <62355576+079035@users.noreply.github.com> Date: Mon, 1 Sep 2025 15:14:53 -0400 Subject: [PATCH 2/6] refactor(arvo_data.py): reduce unnecessary variable allocations --- infra/experimental/contrib/arvo/arvo_data.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/infra/experimental/contrib/arvo/arvo_data.py b/infra/experimental/contrib/arvo/arvo_data.py index cce9d8b267fc..0bd387d6047e 100644 --- a/infra/experimental/contrib/arvo/arvo_data.py +++ b/infra/experimental/contrib/arvo/arvo_data.py @@ -82,8 +82,7 @@ def fix_dockerfile(dockerfile_path: str | Path, # Pass commit_date to the hack if it needs it if hasattr(hack, 'set_commit_date') and commit_date: hack.set_commit_date(commit_date) - success = hack.apply_dockerfile_fixes(dft) - if not success: + if not hack.apply_dockerfile_fixes(dft): return False dft.clean_comments() @@ -107,9 +106,7 @@ def fix_build_script(file_path: Path, project_name: str) -> bool: # Apply project-specific build script hacks hack = get_project_hack(project_name) - if hack: - success = hack.apply_build_script_fixes(dft) - if not success: + if hack and not hack.apply_build_script_fixes(dft): return False return dft.flush() @@ -130,11 +127,8 @@ def extra_scripts(project_name: str, source_dir: Path) -> bool: """ # Apply project-specific extra fixes hack = get_project_hack(project_name) - if hack: - success = hack.apply_extra_fixes(source_dir) - if not success: - return False - + if hack and not hack.apply_extra_fixes(source_dir): + return False return True From ec1c9cf81df74c4355dea24dc630865398e25e05 Mon Sep 17 00:00:00 2001 From: 079035 <62355576+079035@users.noreply.github.com> Date: Mon, 1 Sep 2025 17:11:56 -0400 Subject: [PATCH 3/6] refactor(hacks): dynamic importing using importlib + yapf styling --- infra/experimental/contrib/arvo/arvo_data.py | 2 +- .../contrib/arvo/hacks/__init__.py | 28 +++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/infra/experimental/contrib/arvo/arvo_data.py b/infra/experimental/contrib/arvo/arvo_data.py index 0bd387d6047e..20a786a49304 100644 --- a/infra/experimental/contrib/arvo/arvo_data.py +++ b/infra/experimental/contrib/arvo/arvo_data.py @@ -107,7 +107,7 @@ def fix_build_script(file_path: Path, project_name: str) -> bool: # Apply project-specific build script hacks hack = get_project_hack(project_name) if hack and not hack.apply_build_script_fixes(dft): - return False + return False return dft.flush() diff --git a/infra/experimental/contrib/arvo/hacks/__init__.py b/infra/experimental/contrib/arvo/hacks/__init__.py index 30943a506f22..bdc8a7aa7d35 100644 --- a/infra/experimental/contrib/arvo/hacks/__init__.py +++ b/infra/experimental/contrib/arvo/hacks/__init__.py @@ -5,6 +5,8 @@ hack functions. """ +import importlib + from abc import ABC, abstractmethod from pathlib import Path @@ -95,8 +97,24 @@ def x265_fix(dft: DockerfileModifier) -> None: "https://bitbucket.org/multicoreware/x265_git.git x265\n") -# Import all project hacks to register them -from . import (cryptofuzz, curl, dlplibs, duckdb, ffmpeg, flac, freeradius, - gdal, ghostscript, gnutls, graphicsmagick, imagemagick, jbig2dec, - lcms, libheif, libredwg, libreoffice, libyang, lwan, openh264, - quickjs, radare2, skia, uwebsockets, wireshark, wolfssl, yara) +def _load_project_hacks(): + """Dynamically load all project hack modules in this package.""" + package_path = Path(__file__).parent + package_name = __name__ + + # Find all Python files in the hacks directory (excluding __init__.py) + for item in package_path.glob("*.py"): + if item.name == "__init__.py" or item.name.startswith("__"): + continue + + module_name = item.stem + try: + # Import the module - this will trigger its register_hack() call + importlib.import_module(f".{module_name}", package_name) + except ImportError: + # Silently skip modules that fail to import - no hack to apply + pass + + +# Load all project hacks +_load_project_hacks() From 4ad9e20d8a64ab968be44c11b79e61d1bff3c996 Mon Sep 17 00:00:00 2001 From: 079035 <62355576+079035@users.noreply.github.com> Date: Mon, 1 Sep 2025 17:43:04 -0400 Subject: [PATCH 4/6] fix(hacks): fix false dockerfile hacks --- .../contrib/arvo/hacks/cryptofuzz.py | 2 - infra/experimental/contrib/arvo/hacks/curl.py | 19 -------- infra/experimental/contrib/arvo/hacks/flac.py | 18 ------- .../contrib/arvo/hacks/freeradius.py | 17 ------- infra/experimental/contrib/arvo/hacks/gdal.py | 48 ------------------- 5 files changed, 104 deletions(-) delete mode 100644 infra/experimental/contrib/arvo/hacks/curl.py delete mode 100644 infra/experimental/contrib/arvo/hacks/flac.py delete mode 100644 infra/experimental/contrib/arvo/hacks/freeradius.py delete mode 100644 infra/experimental/contrib/arvo/hacks/gdal.py diff --git a/infra/experimental/contrib/arvo/hacks/cryptofuzz.py b/infra/experimental/contrib/arvo/hacks/cryptofuzz.py index 64e4f53bc2e4..dec885c2a9fb 100644 --- a/infra/experimental/contrib/arvo/hacks/cryptofuzz.py +++ b/infra/experimental/contrib/arvo/hacks/cryptofuzz.py @@ -13,8 +13,6 @@ def apply_dockerfile_fixes(self, dft) -> bool: "RUN cd $SRC/libressl && ./update.sh", "RUN sed -n -i '/^# setup source paths$/,$p' $SRC/libressl/update.sh") - # Remove cryptofuzz-corpora line (from old implementation) - dft.replace(r".*https://github.com/guidovranken/cryptofuzz-corpora.*", "") return True diff --git a/infra/experimental/contrib/arvo/hacks/curl.py b/infra/experimental/contrib/arvo/hacks/curl.py deleted file mode 100644 index 85e99c459cd0..000000000000 --- a/infra/experimental/contrib/arvo/hacks/curl.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Curl project-specific hacks.""" - -from . import ProjectHack, register_hack - - -class CurlHack(ProjectHack): - """Hacks for the Curl project.""" - - def apply_dockerfile_fixes(self, dft) -> bool: - """Fix Curl Dockerfile issues.""" - # Check if download_zlib.sh exists and replace zlib URL - dft.append_line( - 'RUN [ -f "/src/curl_fuzzer/scripts/download_zlib.sh" ] && sed -i \'s|https://www.zlib.net/zlib-1.2.11.tar.gz|https://www.zlib.net/fossils/zlib-1.2.11.tar.gz|g\' /src/curl_fuzzer/scripts/download_zlib.sh || true' - ) - return True - - -# Register the hack -register_hack("curl", CurlHack) diff --git a/infra/experimental/contrib/arvo/hacks/flac.py b/infra/experimental/contrib/arvo/hacks/flac.py deleted file mode 100644 index f1c44d2c6e42..000000000000 --- a/infra/experimental/contrib/arvo/hacks/flac.py +++ /dev/null @@ -1,18 +0,0 @@ -"""FLAC project-specific hacks.""" - -from . import ProjectHack, register_hack - - -class FLACHack(ProjectHack): - """Hacks for the FLAC project.""" - - def apply_dockerfile_fixes(self, dft) -> bool: - """Fix FLAC Dockerfile issues.""" - # Check if the problematic repository is referenced - if dft.locate_str('guidovranken/flac-fuzzers') is not False: - return False # Not fixable since the repo is removed and there is no mirror - return True - - -# Register the hack -register_hack("flac", FLACHack) diff --git a/infra/experimental/contrib/arvo/hacks/freeradius.py b/infra/experimental/contrib/arvo/hacks/freeradius.py deleted file mode 100644 index f8533682c06c..000000000000 --- a/infra/experimental/contrib/arvo/hacks/freeradius.py +++ /dev/null @@ -1,17 +0,0 @@ -"""FreeRADIUS project-specific hacks.""" - -from . import ProjectHack, register_hack - - -class FreeRADIUSHack(ProjectHack): - """Hacks for the FreeRADIUS project.""" - - def apply_dockerfile_fixes(self, dft) -> bool: - """Fix FreeRADIUS Dockerfile issues.""" - dft.str_replace('sha256sum -c', 'pwd') - dft.str_replace("curl -s -O ", 'curl -s -O -L ') - return True - - -# Register the hack -register_hack("freeradius", FreeRADIUSHack) diff --git a/infra/experimental/contrib/arvo/hacks/gdal.py b/infra/experimental/contrib/arvo/hacks/gdal.py deleted file mode 100644 index a77ad703a5b2..000000000000 --- a/infra/experimental/contrib/arvo/hacks/gdal.py +++ /dev/null @@ -1,48 +0,0 @@ -"""GDAL project-specific hacks.""" - -from . import ProjectHack, register_hack - - -class GDALHack(ProjectHack): - """Hacks for the GDAL project.""" - - def apply_dockerfile_fixes(self, dft) -> bool: - """Fix GDAL Dockerfile issues.""" - if not self.commit_date: - # GDAL hacks require commit_date to work properly - return False - - dft.append_line(f'ARG ARVO_TS="{self.commit_date.isoformat()}"') - - # Remove all --depth and checkout the cloned repo in build.sh - build_clone_fix = r'''RUN awk -v ts="$ARVO_TS" '\ - /git clone/ { \ - gsub(/--depth[= ][0-9]+/, "", $0); \ - if (NF == 3) dir = $3; \ - else { \ - repo = $NF; \ - gsub(/.*\//, "", repo); \ - gsub(/\.git$/, "", repo); \ - dir = repo; \ - } \ - print $0 " && (pushd " dir " && commit=$(git log --before=\"" ts "\" --format=\"%H\" -n1) && git reset --hard $commit || exit 99 && popd) && (pushd " dir " && git submodule init && git submodule update --force && popd)"; \ - next \ - } \ - { print }' $SRC/build.sh > $SRC/build.sh.tmp && mv $SRC/build.sh.tmp $SRC/build.sh - ''' - dft.append_line(build_clone_fix) - - # Fix GNUmakefile - line = '''RUN [ -f /src/gdal/gdal/GNUmakefile ] && sed -i 's|(cd frmts; $(MAKE))|(cd frmts; $(MAKE) clean; $(MAKE))|' /src/gdal/gdal/GNUmakefile || true''' - dft.append_line(line) - - # Fix build script path - dft.append_line( - '''RUN sed -i 's|BUILD_SH_FROM_REPO="$SRC/gdal/fuzzers/build.sh"|BUILD_SH_FROM_REPO=$0|g' $SRC/build.sh''' - ) - - return True - - -# Register the hack -register_hack("gdal", GDALHack) From d6d34613c3bc13909b4f2d831a26600acf8fe776 Mon Sep 17 00:00:00 2001 From: 079035 <62355576+079035@users.noreply.github.com> Date: Mon, 1 Sep 2025 18:57:20 -0400 Subject: [PATCH 5/6] refactor(hacks): not using project hack dict and use importlib directly --- .../contrib/arvo/hacks/__init__.py | 51 ++++++------------- .../contrib/arvo/hacks/cryptofuzz.py | 6 +-- .../contrib/arvo/hacks/dlplibs.py | 6 +-- .../experimental/contrib/arvo/hacks/duckdb.py | 6 +-- .../experimental/contrib/arvo/hacks/ffmpeg.py | 6 +-- .../contrib/arvo/hacks/ghostscript.py | 6 +-- .../experimental/contrib/arvo/hacks/gnutls.py | 6 +-- .../contrib/arvo/hacks/graphicsmagick.py | 6 +-- .../contrib/arvo/hacks/imagemagick.py | 6 +-- .../contrib/arvo/hacks/jbig2dec.py | 6 +-- infra/experimental/contrib/arvo/hacks/lcms.py | 6 +-- .../contrib/arvo/hacks/libheif.py | 6 +-- .../contrib/arvo/hacks/libredwg.py | 6 +-- .../contrib/arvo/hacks/libreoffice.py | 7 +-- .../contrib/arvo/hacks/libyang.py | 6 +-- infra/experimental/contrib/arvo/hacks/lwan.py | 6 +-- .../contrib/arvo/hacks/openh264.py | 6 +-- .../contrib/arvo/hacks/quickjs.py | 6 +-- .../contrib/arvo/hacks/radare2.py | 6 +-- infra/experimental/contrib/arvo/hacks/skia.py | 6 +-- .../contrib/arvo/hacks/uwebsockets.py | 6 +-- .../contrib/arvo/hacks/wireshark.py | 6 +-- .../contrib/arvo/hacks/wolfssl.py | 6 +-- infra/experimental/contrib/arvo/hacks/yara.py | 6 +-- 24 files changed, 39 insertions(+), 151 deletions(-) diff --git a/infra/experimental/contrib/arvo/hacks/__init__.py b/infra/experimental/contrib/arvo/hacks/__init__.py index bdc8a7aa7d35..ee8d081ed676 100644 --- a/infra/experimental/contrib/arvo/hacks/__init__.py +++ b/infra/experimental/contrib/arvo/hacks/__init__.py @@ -64,21 +64,25 @@ def apply_extra_fixes(self, source_dir: Path) -> bool: return True -# Registry of all project hacks -PROJECT_HACKS = {} - +def get_project_hack(project_name: str) -> ProjectHack | None: + """Get a project hack instance by name using dynamic import.""" + try: + # Try to import the module for this project + module = importlib.import_module(f".{project_name}", __name__) -def register_hack(project_name: str, hack_class: type): - """Register a project hack class.""" - PROJECT_HACKS[project_name] = hack_class + # Look for a class that ends with 'Hack' and is a subclass of ProjectHack + for attr_name in dir(module): + attr = getattr(module, attr_name) + if (isinstance(attr, type) and issubclass(attr, ProjectHack) and + attr != ProjectHack): + return attr() + # If no hack class found, return None + return None -def get_project_hack(project_name: str) -> ProjectHack | None: - """Get a project hack instance by name.""" - hack_class = PROJECT_HACKS.get(project_name) - if hack_class: - return hack_class() - return None + except ImportError: + # No hack module for this project + return None # Helper functions that can be reused across projects @@ -95,26 +99,3 @@ def x265_fix(dft: DockerfileModifier) -> None: dft.replace( r'RUN\shg\sclone\s.*hg.videolan.org/x265\s*(x265)*', "RUN git clone " "https://bitbucket.org/multicoreware/x265_git.git x265\n") - - -def _load_project_hacks(): - """Dynamically load all project hack modules in this package.""" - package_path = Path(__file__).parent - package_name = __name__ - - # Find all Python files in the hacks directory (excluding __init__.py) - for item in package_path.glob("*.py"): - if item.name == "__init__.py" or item.name.startswith("__"): - continue - - module_name = item.stem - try: - # Import the module - this will trigger its register_hack() call - importlib.import_module(f".{module_name}", package_name) - except ImportError: - # Silently skip modules that fail to import - no hack to apply - pass - - -# Load all project hacks -_load_project_hacks() diff --git a/infra/experimental/contrib/arvo/hacks/cryptofuzz.py b/infra/experimental/contrib/arvo/hacks/cryptofuzz.py index dec885c2a9fb..4e510c15334c 100644 --- a/infra/experimental/contrib/arvo/hacks/cryptofuzz.py +++ b/infra/experimental/contrib/arvo/hacks/cryptofuzz.py @@ -1,6 +1,6 @@ """Cryptofuzz project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class CryptofuzzHack(ProjectHack): @@ -14,7 +14,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: "RUN sed -n -i '/^# setup source paths$/,$p' $SRC/libressl/update.sh") return True - - -# Register the hack -register_hack("cryptofuzz", CryptofuzzHack) diff --git a/infra/experimental/contrib/arvo/hacks/dlplibs.py b/infra/experimental/contrib/arvo/hacks/dlplibs.py index 6ce57f61894d..8331ae3368ce 100644 --- a/infra/experimental/contrib/arvo/hacks/dlplibs.py +++ b/infra/experimental/contrib/arvo/hacks/dlplibs.py @@ -1,6 +1,6 @@ """DLPLibs project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class DLPLibsHack(ProjectHack): @@ -11,7 +11,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: dft.replace(r"ADD", '# ADD') dft.replace(r"RUN wget", '#RUN wget') return True - - -# Register the hack -register_hack("dlplibs", DLPLibsHack) diff --git a/infra/experimental/contrib/arvo/hacks/duckdb.py b/infra/experimental/contrib/arvo/hacks/duckdb.py index 24b78833ab47..b6bce8e2b6ba 100644 --- a/infra/experimental/contrib/arvo/hacks/duckdb.py +++ b/infra/experimental/contrib/arvo/hacks/duckdb.py @@ -1,6 +1,6 @@ """DuckDB project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class DuckDBHack(ProjectHack): @@ -14,7 +14,3 @@ def apply_build_script_fixes(self, dft) -> bool: """Fix DuckDB build script issues.""" dft.replace(r'^make$', 'make -j`nproc`\n') return True - - -# Register the hack -register_hack("duckdb", DuckDBHack) diff --git a/infra/experimental/contrib/arvo/hacks/ffmpeg.py b/infra/experimental/contrib/arvo/hacks/ffmpeg.py index ca53d904d6b4..83d3785d4c77 100644 --- a/infra/experimental/contrib/arvo/hacks/ffmpeg.py +++ b/infra/experimental/contrib/arvo/hacks/ffmpeg.py @@ -1,6 +1,6 @@ """FFmpeg project-specific hacks.""" -from . import ProjectHack, register_hack, x265_fix +from . import ProjectHack, x265_fix class FFmpegHack(ProjectHack): @@ -11,7 +11,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: # Apply x265 fixes x265_fix(dft) return True - - -# Register the hack -register_hack("ffmpeg", FFmpegHack) diff --git a/infra/experimental/contrib/arvo/hacks/ghostscript.py b/infra/experimental/contrib/arvo/hacks/ghostscript.py index 0f854dd61626..d9dd01020a66 100644 --- a/infra/experimental/contrib/arvo/hacks/ghostscript.py +++ b/infra/experimental/contrib/arvo/hacks/ghostscript.py @@ -1,6 +1,6 @@ """Ghostscript project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class GhostscriptHack(ProjectHack): @@ -16,7 +16,3 @@ def apply_build_script_fixes(self, dft) -> bool: new = "cp -r $SRC/freetype freetype" dft.replace(old, new) return True - - -# Register the hack -register_hack("ghostscript", GhostscriptHack) diff --git a/infra/experimental/contrib/arvo/hacks/gnutls.py b/infra/experimental/contrib/arvo/hacks/gnutls.py index c07a07abba44..b0c634289bb5 100644 --- a/infra/experimental/contrib/arvo/hacks/gnutls.py +++ b/infra/experimental/contrib/arvo/hacks/gnutls.py @@ -1,6 +1,6 @@ """GnuTLS project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class GnuTLSHack(ProjectHack): @@ -12,7 +12,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: dft.replace(r".*client_corpus_no_fuzzer_mode.*", "") dft.replace(r".*server_corpus_no_fuzzer_mode.*", "") return True - - -# Register the hack -register_hack("gnutls", GnuTLSHack) diff --git a/infra/experimental/contrib/arvo/hacks/graphicsmagick.py b/infra/experimental/contrib/arvo/hacks/graphicsmagick.py index 00d3f0cee485..1e3a5eff8edb 100644 --- a/infra/experimental/contrib/arvo/hacks/graphicsmagick.py +++ b/infra/experimental/contrib/arvo/hacks/graphicsmagick.py @@ -1,6 +1,6 @@ """GraphicsMagick project-specific hacks.""" -from . import ProjectHack, register_hack, x265_fix +from . import ProjectHack, x265_fix class GraphicsMagickHack(ProjectHack): @@ -19,7 +19,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: # Apply x265 fixes x265_fix(dft) return True - - -# Register the hack -register_hack("graphicsmagick", GraphicsMagickHack) diff --git a/infra/experimental/contrib/arvo/hacks/imagemagick.py b/infra/experimental/contrib/arvo/hacks/imagemagick.py index 6188913155b2..530ac3047064 100644 --- a/infra/experimental/contrib/arvo/hacks/imagemagick.py +++ b/infra/experimental/contrib/arvo/hacks/imagemagick.py @@ -1,7 +1,7 @@ """ImageMagick project-specific hacks.""" from pathlib import Path -from . import ProjectHack, register_hack +from . import ProjectHack class ImageMagickHack(ProjectHack): @@ -28,7 +28,3 @@ def apply_extra_fixes(self, source_dir: Path) -> bool: with open(target, 'w', encoding='utf-8') as f: f.write("\n".join(lines)) return True - - -# Register the hack -register_hack("imagemagick", ImageMagickHack) diff --git a/infra/experimental/contrib/arvo/hacks/jbig2dec.py b/infra/experimental/contrib/arvo/hacks/jbig2dec.py index 59ae2ce38394..9a138f807b8f 100644 --- a/infra/experimental/contrib/arvo/hacks/jbig2dec.py +++ b/infra/experimental/contrib/arvo/hacks/jbig2dec.py @@ -1,6 +1,6 @@ """JBIG2DEC project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class JBIG2DECHack(ProjectHack): @@ -15,7 +15,3 @@ def apply_build_script_fixes(self, dft) -> bool: """Fix JBIG2DEC build script issues.""" dft.replace('unzip.*', 'exit 0') return True - - -# Register the hack -register_hack("jbig2dec", JBIG2DECHack) diff --git a/infra/experimental/contrib/arvo/hacks/lcms.py b/infra/experimental/contrib/arvo/hacks/lcms.py index ea2590071465..ae6e6f183c44 100644 --- a/infra/experimental/contrib/arvo/hacks/lcms.py +++ b/infra/experimental/contrib/arvo/hacks/lcms.py @@ -1,6 +1,6 @@ """LCMS project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class LCMSHack(ProjectHack): @@ -11,7 +11,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: # TODO: improve this tmp patch dft.replace(r'#add more seeds from the testbed dir.*\n', "") return True - - -# Register the hack -register_hack("lcms", LCMSHack) diff --git a/infra/experimental/contrib/arvo/hacks/libheif.py b/infra/experimental/contrib/arvo/hacks/libheif.py index 9653d69f8b18..9bfbc62ac82e 100644 --- a/infra/experimental/contrib/arvo/hacks/libheif.py +++ b/infra/experimental/contrib/arvo/hacks/libheif.py @@ -1,6 +1,6 @@ """LibHeif project-specific hacks.""" -from . import ProjectHack, register_hack, x265_fix +from . import ProjectHack, x265_fix class LibHeifHack(ProjectHack): @@ -11,7 +11,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: # Apply x265 fixes x265_fix(dft) return True - - -# Register the hack -register_hack("libheif", LibHeifHack) diff --git a/infra/experimental/contrib/arvo/hacks/libredwg.py b/infra/experimental/contrib/arvo/hacks/libredwg.py index 621d70992cb2..ead2b708d2a6 100644 --- a/infra/experimental/contrib/arvo/hacks/libredwg.py +++ b/infra/experimental/contrib/arvo/hacks/libredwg.py @@ -1,6 +1,6 @@ """LibreDWG project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class LibreDWGHack(ProjectHack): @@ -14,7 +14,3 @@ def apply_build_script_fixes(self, dft) -> bool: """Fix LibreDWG build script issues.""" dft.replace(r'^make$', 'make -j`nproc`\n') return True - - -# Register the hack -register_hack("libredwg", LibreDWGHack) diff --git a/infra/experimental/contrib/arvo/hacks/libreoffice.py b/infra/experimental/contrib/arvo/hacks/libreoffice.py index f642448f87f9..f635dcbcfa85 100644 --- a/infra/experimental/contrib/arvo/hacks/libreoffice.py +++ b/infra/experimental/contrib/arvo/hacks/libreoffice.py @@ -1,7 +1,6 @@ """LibreOffice project-specific hacks.""" -from pathlib import Path -from . import ProjectHack, register_hack +from . import ProjectHack class LibreOfficeHack(ProjectHack): @@ -43,7 +42,3 @@ def apply_build_script_fixes(self, dft) -> bool: line, 'echo "pushd instdir/program && mv *fuzzer $OUT" >> ' '$SRC/libreoffice/bin/oss-fuzz-build.sh') return True - - -# Register the hack -register_hack("libreoffice", LibreOfficeHack) diff --git a/infra/experimental/contrib/arvo/hacks/libyang.py b/infra/experimental/contrib/arvo/hacks/libyang.py index 4a370f8573d5..95aee82d7a44 100644 --- a/infra/experimental/contrib/arvo/hacks/libyang.py +++ b/infra/experimental/contrib/arvo/hacks/libyang.py @@ -1,6 +1,6 @@ """LibYang project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class LibYangHack(ProjectHack): @@ -13,7 +13,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: "RUN git clone https://github.com/PCRE2Project/pcre2 pcre2\n" "RUN ") return True - - -# Register the hack -register_hack("libyang", LibYangHack) diff --git a/infra/experimental/contrib/arvo/hacks/lwan.py b/infra/experimental/contrib/arvo/hacks/lwan.py index 05f1f81334b8..926c461bea30 100644 --- a/infra/experimental/contrib/arvo/hacks/lwan.py +++ b/infra/experimental/contrib/arvo/hacks/lwan.py @@ -1,6 +1,6 @@ """Lwan project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class LwanHack(ProjectHack): @@ -11,7 +11,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: dft.str_replace('git://github.com/lpereira/lwan', 'https://github.com/lpereira/lwan.git') return True - - -# Register the hack -register_hack("lwan", LwanHack) diff --git a/infra/experimental/contrib/arvo/hacks/openh264.py b/infra/experimental/contrib/arvo/hacks/openh264.py index 13db38c8a2d3..b6bef96b702a 100644 --- a/infra/experimental/contrib/arvo/hacks/openh264.py +++ b/infra/experimental/contrib/arvo/hacks/openh264.py @@ -1,6 +1,6 @@ """OpenH264 project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class OpenH264Hack(ProjectHack): @@ -24,7 +24,3 @@ def apply_build_script_fixes(self, dft) -> bool: if starts != -1 and ends != -1: dft.remove_range(starts, ends) return True - - -# Register the hack -register_hack("openh264", OpenH264Hack) diff --git a/infra/experimental/contrib/arvo/hacks/quickjs.py b/infra/experimental/contrib/arvo/hacks/quickjs.py index 6289f0404192..59c014fb52ed 100644 --- a/infra/experimental/contrib/arvo/hacks/quickjs.py +++ b/infra/experimental/contrib/arvo/hacks/quickjs.py @@ -1,6 +1,6 @@ """QuickJS project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class QuickJSHack(ProjectHack): @@ -11,7 +11,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: dft.str_replace('https://github.com/horhof/quickjs', 'https://github.com/bellard/quickjs') return True - - -# Register the hack -register_hack("quickjs", QuickJSHack) diff --git a/infra/experimental/contrib/arvo/hacks/radare2.py b/infra/experimental/contrib/arvo/hacks/radare2.py index 88cea82013b0..07c9b9407253 100644 --- a/infra/experimental/contrib/arvo/hacks/radare2.py +++ b/infra/experimental/contrib/arvo/hacks/radare2.py @@ -1,6 +1,6 @@ """Radare2 project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class Radare2Hack(ProjectHack): @@ -11,7 +11,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: dft.str_replace("https://github.com/radare/radare2-regressions", 'https://github.com/rlaemmert/radare2-regressions.git') return True - - -# Register the hack -register_hack("radare2", Radare2Hack) diff --git a/infra/experimental/contrib/arvo/hacks/skia.py b/infra/experimental/contrib/arvo/hacks/skia.py index 5cd1d891c733..f7f47b6f4528 100644 --- a/infra/experimental/contrib/arvo/hacks/skia.py +++ b/infra/experimental/contrib/arvo/hacks/skia.py @@ -1,6 +1,6 @@ """Skia project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class SkiaHack(ProjectHack): @@ -13,7 +13,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: dft.insert_line_after('COPY build.sh $SRC/', "RUN sed -i 's/cp.*zip.*//g' $SRC/build.sh") return True - - -# Register the hack -register_hack("skia", SkiaHack) diff --git a/infra/experimental/contrib/arvo/hacks/uwebsockets.py b/infra/experimental/contrib/arvo/hacks/uwebsockets.py index 42f3ad756077..1f24cbf100f5 100644 --- a/infra/experimental/contrib/arvo/hacks/uwebsockets.py +++ b/infra/experimental/contrib/arvo/hacks/uwebsockets.py @@ -1,6 +1,6 @@ """uWebSockets project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class UWebSocketsHack(ProjectHack): @@ -16,7 +16,3 @@ def apply_build_script_fixes(self, dft) -> bool: script = "sed -i 's/alexhultman/madler/g' fuzzing/Makefile" dft.insert_line_at(0, script) return True - - -# Register the hack -register_hack("uwebsockets", UWebSocketsHack) diff --git a/infra/experimental/contrib/arvo/hacks/wireshark.py b/infra/experimental/contrib/arvo/hacks/wireshark.py index 1bb5f2184ae7..0153a1ce2cda 100644 --- a/infra/experimental/contrib/arvo/hacks/wireshark.py +++ b/infra/experimental/contrib/arvo/hacks/wireshark.py @@ -1,6 +1,6 @@ """Wireshark project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class WiresharkHack(ProjectHack): @@ -10,7 +10,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: """Fix Wireshark Dockerfile issues.""" dft.replace(r"RUN git clone .*wireshark.*", "") return True - - -# Register the hack -register_hack("wireshark", WiresharkHack) diff --git a/infra/experimental/contrib/arvo/hacks/wolfssl.py b/infra/experimental/contrib/arvo/hacks/wolfssl.py index ed1e93bce6c5..c86c6dbc515b 100644 --- a/infra/experimental/contrib/arvo/hacks/wolfssl.py +++ b/infra/experimental/contrib/arvo/hacks/wolfssl.py @@ -1,6 +1,6 @@ """WolfSSL project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class WolfSSLHack(ProjectHack): @@ -16,7 +16,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: '$SRC/corpus_wolfssl_disable-fastmath.zip', "RUN touch 0xdeadbeef && " "zip $SRC/corpus_wolfssl_disable-fastmath.zip 0xdeadbeef") return True - - -# Register the hack -register_hack("wolfssl", WolfSSLHack) diff --git a/infra/experimental/contrib/arvo/hacks/yara.py b/infra/experimental/contrib/arvo/hacks/yara.py index 9250c5bf2833..7e2fe410561e 100644 --- a/infra/experimental/contrib/arvo/hacks/yara.py +++ b/infra/experimental/contrib/arvo/hacks/yara.py @@ -1,6 +1,6 @@ """YARA project-specific hacks.""" -from . import ProjectHack, register_hack +from . import ProjectHack class YARAHack(ProjectHack): @@ -13,7 +13,3 @@ def apply_dockerfile_fixes(self, dft) -> bool: "RUN git clone https://github.com/VirusTotal/yara.git", "RUN apt install -y bison") return True - - -# Register the hack -register_hack("yara", YARAHack) From e4ac4132396bbc2431b665869f99ea92b36c34c0 Mon Sep 17 00:00:00 2001 From: 079035 <62355576+079035@users.noreply.github.com> Date: Wed, 3 Sep 2025 07:47:36 -0400 Subject: [PATCH 6/6] fix(hacks-init): escaping period mentioned by github bot --- infra/experimental/contrib/arvo/hacks/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/experimental/contrib/arvo/hacks/__init__.py b/infra/experimental/contrib/arvo/hacks/__init__.py index ee8d081ed676..1faa4a9c0c86 100644 --- a/infra/experimental/contrib/arvo/hacks/__init__.py +++ b/infra/experimental/contrib/arvo/hacks/__init__.py @@ -97,5 +97,5 @@ def x265_fix(dft: DockerfileModifier) -> None: "RUN git clone " "https://bitbucket.org/multicoreware/x265_git.git x265\n") dft.replace( - r'RUN\shg\sclone\s.*hg.videolan.org/x265\s*(x265)*', "RUN git clone " + r'RUN\shg\sclone\s.*hg\.videolan\.org/x265\s*(x265)*', "RUN git clone " "https://bitbucket.org/multicoreware/x265_git.git x265\n")