From 51a065e23af76917cc6848031f00e7768b895f98 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 01:31:33 +0000 Subject: [PATCH 1/8] Add Legion Slim 5 RTX 4060 optimized inference example Agent-Logs-Url: https://github.com/adelmorad273-cmyk/llama-cpp-python/sessions/8d4c18b1-fddb-4d4d-8d5c-f8e9099eeb94 Co-authored-by: adelmorad273-cmyk <269225024+adelmorad273-cmyk@users.noreply.github.com> --- .../high_level_api/legion_slim5_rtx4060.py | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 examples/high_level_api/legion_slim5_rtx4060.py diff --git a/examples/high_level_api/legion_slim5_rtx4060.py b/examples/high_level_api/legion_slim5_rtx4060.py new file mode 100644 index 0000000000..03ab135723 --- /dev/null +++ b/examples/high_level_api/legion_slim5_rtx4060.py @@ -0,0 +1,132 @@ +""" +Optimized llama-cpp-python configuration for: + Lenovo Legion Slim 5 (16" RH8) + - CPU: Intel Core i7-13700H (6P + 8E cores) + - GPU: NVIDIA GeForce RTX 4060 Laptop (8 GB VRAM, GDDR6) + - RAM: 16 GB DDR5-5200 + - SSD: 1 TB NVMe + +Install with CUDA support first: + CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir +""" + +import argparse +import json + +from llama_cpp import Llama + +# --------------------------------------------------------------------------- +# Hardware constants for this machine +# --------------------------------------------------------------------------- +VRAM_GB = 8 # RTX 4060 Laptop VRAM +N_PHYSICAL_CORES = 6 # P-cores only (best single-thread perf on i7-13700H) + +# --------------------------------------------------------------------------- +# Recommended quantisation levels (pick one based on your model size) +# --------------------------------------------------------------------------- +# Model 7B / 8B: +# Q5_K_M → ~5.5 GB VRAM ✅ recommended +# Q6_K → ~6.5 GB VRAM ✅ excellent quality +# Q8_0 → ~8.5 GB VRAM ⚠️ tight fit, may spill to CPU RAM +# +# Model 13B: +# Q4_K_M → ~7.5 GB VRAM ✅ fits +# Q5_K_M → ~9.0 GB VRAM ❌ exceeds VRAM + + +def build_llm( + model_path: str, + n_ctx: int = 4096, + n_gpu_layers: int = -1, # -1 = offload all layers to GPU + n_batch: int = 512, + verbose: bool = False, +) -> Llama: + """ + Create a Llama instance tuned for the Legion Slim 5 / RTX 4060 laptop. + + Args: + model_path: Path to the .gguf model file. + n_ctx: Context window size (tokens). 4096 is safe for 8 GB VRAM. + n_gpu_layers: Number of transformer layers to offload to the GPU. + Use -1 to offload everything (default). Reduce if you + see CUDA out-of-memory errors. + n_batch: Batch size for prompt evaluation. + verbose: Print llama.cpp loading messages. + + Returns: + A ready-to-use Llama instance. + """ + return Llama( + model_path=model_path, + # --- GPU offload --- + n_gpu_layers=n_gpu_layers, # RTX 4060 has 8 GB – offload as much as fits + offload_kqv=True, # keep KV-cache on GPU for faster inference + # --- CPU threads --- + n_threads=N_PHYSICAL_CORES, # use P-cores only for best throughput + n_threads_batch=N_PHYSICAL_CORES, + # --- Context / batching --- + n_ctx=n_ctx, + n_batch=n_batch, + # --- Memory --- + use_mmap=True, # fast model loading from NVMe SSD + use_mlock=False, # don't pin 16 GB RAM – OS needs headroom + # --- Misc --- + verbose=verbose, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run inference optimised for the Lenovo Legion Slim 5 / RTX 4060" + ) + parser.add_argument( + "-m", "--model", + required=True, + help="Path to the .gguf model file (e.g. mistral-7b-Q5_K_M.gguf)", + ) + parser.add_argument( + "-p", "--prompt", + default="What are the names of the planets in the solar system?", + help="Prompt text", + ) + parser.add_argument( + "--max-tokens", type=int, default=256, + help="Maximum number of tokens to generate", + ) + parser.add_argument( + "--n-ctx", type=int, default=4096, + help="Context window size", + ) + parser.add_argument( + "--n-gpu-layers", type=int, default=-1, + help="GPU layers to offload (-1 = all)", + ) + parser.add_argument( + "--verbose", action="store_true", + help="Print llama.cpp loading messages", + ) + args = parser.parse_args() + + print(f"Loading model: {args.model}") + print(f"GPU layers : {'all' if args.n_gpu_layers == -1 else args.n_gpu_layers}") + print(f"Context size : {args.n_ctx} tokens\n") + + llm = build_llm( + model_path=args.model, + n_ctx=args.n_ctx, + n_gpu_layers=args.n_gpu_layers, + verbose=args.verbose, + ) + + output = llm( + args.prompt, + max_tokens=args.max_tokens, + stop=["Q:", "\n\n"], + echo=True, + ) + + print(json.dumps(output, indent=2, ensure_ascii=False)) + + +if __name__ == "__main__": + main() From 6d879604c2216d01983018852ec40b5e063c8f1a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 06:13:22 +0000 Subject: [PATCH 2/8] Initial plan From 4837a5a4da1ee50b000a3af3f2fd322c627b9540 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 06:15:21 +0000 Subject: [PATCH 3/8] Update legion_slim5_rtx4060.py for Windows/PowerShell usability Agent-Logs-Url: https://github.com/adelmorad273-cmyk/llama-cpp-python/sessions/640206ba-9009-4a18-9699-6690fbaa7b0d Co-authored-by: adelmorad273-cmyk <269225024+adelmorad273-cmyk@users.noreply.github.com> --- .../high_level_api/legion_slim5_rtx4060.py | 127 +++++++++++++++--- 1 file changed, 109 insertions(+), 18 deletions(-) diff --git a/examples/high_level_api/legion_slim5_rtx4060.py b/examples/high_level_api/legion_slim5_rtx4060.py index 03ab135723..88c44d40e1 100644 --- a/examples/high_level_api/legion_slim5_rtx4060.py +++ b/examples/high_level_api/legion_slim5_rtx4060.py @@ -7,11 +7,26 @@ - SSD: 1 TB NVMe Install with CUDA support first: - CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir + + Bash / Linux / macOS: + CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir + + PowerShell (Windows): + $env:CMAKE_ARGS = "-DGGML_CUDA=on" + python -m pip install llama-cpp-python --force-reinstall --no-cache-dir + + Tip (Windows): install into a virtual environment to avoid dependency conflicts + with other tools in your global environment: + python -m venv .venv-llama + ./.venv-llama/Scripts/Activate.ps1 + $env:CMAKE_ARGS = "-DGGML_CUDA=on" + python -m pip install llama-cpp-python --force-reinstall --no-cache-dir """ import argparse import json +import os +import sys from llama_cpp import Llama @@ -89,6 +104,11 @@ def main() -> None: default="What are the names of the planets in the solar system?", help="Prompt text", ) + parser.add_argument( + "--system-prompt", + default=None, + help="Optional system prompt prepended before the user prompt", + ) parser.add_argument( "--max-tokens", type=int, default=256, help="Maximum number of tokens to generate", @@ -101,29 +121,100 @@ def main() -> None: "--n-gpu-layers", type=int, default=-1, help="GPU layers to offload (-1 = all)", ) + parser.add_argument( + "--seed", type=int, default=-1, + help="RNG seed for reproducible output (-1 = random)", + ) + parser.add_argument( + "--temperature", type=float, default=0.8, + help="Sampling temperature (0.0 = greedy, higher = more creative)", + ) + parser.add_argument( + "--top-p", type=float, default=0.95, + help="Nucleus sampling probability threshold", + ) + parser.add_argument( + "--repeat-penalty", type=float, default=1.1, + help="Penalty applied to repeated tokens (1.0 = disabled)", + ) + parser.add_argument( + "--json-output", action="store_true", + help="Print only raw JSON output (no banner); useful for piping", + ) parser.add_argument( "--verbose", action="store_true", help="Print llama.cpp loading messages", ) args = parser.parse_args() - print(f"Loading model: {args.model}") - print(f"GPU layers : {'all' if args.n_gpu_layers == -1 else args.n_gpu_layers}") - print(f"Context size : {args.n_ctx} tokens\n") - - llm = build_llm( - model_path=args.model, - n_ctx=args.n_ctx, - n_gpu_layers=args.n_gpu_layers, - verbose=args.verbose, - ) - - output = llm( - args.prompt, - max_tokens=args.max_tokens, - stop=["Q:", "\n\n"], - echo=True, - ) + # --- Validate model path ------------------------------------------------- + model_path = os.path.abspath(args.model) + if not os.path.isfile(model_path): + print( + f"ERROR: model file not found: {model_path}\n" + " Make sure the path is correct and the file exists.", + file=sys.stderr, + ) + sys.exit(1) + + if not args.json_output: + print(f"Loading model: {model_path}") + print(f"GPU layers : {'all' if args.n_gpu_layers == -1 else args.n_gpu_layers}") + print(f"Context size : {args.n_ctx} tokens\n") + + # --- Load model ---------------------------------------------------------- + try: + llm = build_llm( + model_path=model_path, + n_ctx=args.n_ctx, + n_gpu_layers=args.n_gpu_layers, + verbose=args.verbose, + ) + except Exception as exc: + err = str(exc) + print(f"ERROR: failed to load model – {err}", file=sys.stderr) + if args.n_gpu_layers == -1 and ( + "out of memory" in err.lower() or "cudaerror" in err.lower() + ): + print( + " Hint: GPU ran out of VRAM while loading all layers.\n" + " Try reducing --n-gpu-layers (e.g. --n-gpu-layers 28) to keep\n" + " some layers on CPU RAM instead.", + file=sys.stderr, + ) + sys.exit(1) + + # --- Build prompt -------------------------------------------------------- + if args.system_prompt: + full_prompt = f"{args.system_prompt}\n\n{args.prompt}" + else: + full_prompt = args.prompt + + # --- Run inference ------------------------------------------------------- + try: + output = llm( + full_prompt, + max_tokens=args.max_tokens, + stop=["Q:", "\n\n"], + echo=True, + seed=args.seed, + temperature=args.temperature, + top_p=args.top_p, + repeat_penalty=args.repeat_penalty, + ) + except Exception as exc: + err = str(exc) + print(f"ERROR: inference failed – {err}", file=sys.stderr) + if args.n_gpu_layers == -1 and ( + "out of memory" in err.lower() or "cudaerror" in err.lower() + ): + print( + " Hint: GPU ran out of VRAM during inference.\n" + " Try reducing --n-gpu-layers (e.g. --n-gpu-layers 28) to keep\n" + " some layers on CPU RAM instead.", + file=sys.stderr, + ) + sys.exit(1) print(json.dumps(output, indent=2, ensure_ascii=False)) From f0a11996a4ca5983a12e6d6b51673aa9ed8d597e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 06:16:26 +0000 Subject: [PATCH 4/8] Fix CUDA error string matching to catch all variations Agent-Logs-Url: https://github.com/adelmorad273-cmyk/llama-cpp-python/sessions/640206ba-9009-4a18-9699-6690fbaa7b0d Co-authored-by: adelmorad273-cmyk <269225024+adelmorad273-cmyk@users.noreply.github.com> --- examples/high_level_api/legion_slim5_rtx4060.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/high_level_api/legion_slim5_rtx4060.py b/examples/high_level_api/legion_slim5_rtx4060.py index 88c44d40e1..90a00454cf 100644 --- a/examples/high_level_api/legion_slim5_rtx4060.py +++ b/examples/high_level_api/legion_slim5_rtx4060.py @@ -174,7 +174,7 @@ def main() -> None: err = str(exc) print(f"ERROR: failed to load model – {err}", file=sys.stderr) if args.n_gpu_layers == -1 and ( - "out of memory" in err.lower() or "cudaerror" in err.lower() + "out of memory" in err.lower() or "cuda" in err.lower() ): print( " Hint: GPU ran out of VRAM while loading all layers.\n" @@ -206,7 +206,7 @@ def main() -> None: err = str(exc) print(f"ERROR: inference failed – {err}", file=sys.stderr) if args.n_gpu_layers == -1 and ( - "out of memory" in err.lower() or "cudaerror" in err.lower() + "out of memory" in err.lower() or "cuda" in err.lower() ): print( " Hint: GPU ran out of VRAM during inference.\n" From 882f6f7109e29953da028b6878f547d6e722449e Mon Sep 17 00:00:00 2001 From: adelmorad273-cmyk Date: Sun, 5 Apr 2026 09:07:00 +0200 Subject: [PATCH 5/8] Create SECURITY.md for security policy Add a security policy document outlining supported versions and vulnerability reporting. --- SECURITY.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..034e848032 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,21 @@ +# Security Policy + +## Supported Versions + +Use this section to tell people about which versions of your project are +currently being supported with security updates. + +| Version | Supported | +| ------- | ------------------ | +| 5.1.x | :white_check_mark: | +| 5.0.x | :x: | +| 4.0.x | :white_check_mark: | +| < 4.0 | :x: | + +## Reporting a Vulnerability + +Use this section to tell people how to report a vulnerability. + +Tell them where to go, how often they can expect to get an update on a +reported vulnerability, what to expect if the vulnerability is accepted or +declined, etc. From 98dafcd431eb4bbe10e224fb377e60df61a1769d Mon Sep 17 00:00:00 2001 From: "continue[bot]" <230936708+continue[bot]@users.noreply.github.com> Date: Sun, 19 Apr 2026 10:59:03 +0000 Subject: [PATCH 6/8] Add agentsmd-updater check --- .continue/checks/agentsmd-updater.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .continue/checks/agentsmd-updater.md diff --git a/.continue/checks/agentsmd-updater.md b/.continue/checks/agentsmd-updater.md new file mode 100644 index 0000000000..b22f5b4a53 --- /dev/null +++ b/.continue/checks/agentsmd-updater.md @@ -0,0 +1,5 @@ +--- +name: agentsmd-updater +--- + +You are maintaining the project's AGENTS.md file. Review the pull request and identify new build steps, scripts, directory changes, dependencies, environment variables, architectures, code style rules, or workflows that an AI coding agent should know. Compare these findings with the existing AGENTS.md and update the file so it stays accurate, complete, and practical for automated agents. Keep the structure clean and keep explanations brief. If the file is missing you should create one. Do not modify any other file. \ No newline at end of file From 57dd0e576fd97be171b5eb8ca6eeba05ccba91cc Mon Sep 17 00:00:00 2001 From: adel sabry <269225024+adelmorad273-cmyk@users.noreply.github.com> Date: Sun, 19 Apr 2026 13:22:14 +0200 Subject: [PATCH 7/8] Create codeql.yml --- .github/workflows/codeql.yml | 101 +++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000..ebbb37ee58 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,101 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL Advanced" + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '40 8 * * 6' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: actions + build-mode: none + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + - name: Run manual build steps + if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 + with: + category: "/language:${{matrix.language}}" From a28c5e65c13acb0c1e977b1c9ad6b2cdbadd3d4a Mon Sep 17 00:00:00 2001 From: "continue[bot]" <230936708+continue[bot]@users.noreply.github.com> Date: Sun, 19 Apr 2026 14:26:29 +0000 Subject: [PATCH 8/8] Add Accessibility Fix Agent check --- .continue/checks/accessibility-fix-agent.md | 415 ++++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100644 .continue/checks/accessibility-fix-agent.md diff --git a/.continue/checks/accessibility-fix-agent.md b/.continue/checks/accessibility-fix-agent.md new file mode 100644 index 0000000000..9efa63ac1d --- /dev/null +++ b/.continue/checks/accessibility-fix-agent.md @@ -0,0 +1,415 @@ +--- +name: Accessibility Fix Agent +--- + +## Purpose + +Systematically scan the repository for accessibility issues, categorize them by WCAG compliance level and severity, and create tracked GitHub issues with specific remediation steps. This agent focuses on identifying and documenting accessibility violations that prevent users with disabilities from fully accessing the application. + +## Execution Steps + +### 1. Repository Scan + +1. Scan all HTML, JSX, TSX, and template files in the repository +2. Identify accessibility violations using the triage system below +3. For each unique issue type found, check if a GitHub issue already exists with the `accessibility` label +4. If no issue exists, create a new GitHub issue with detailed remediation steps + +### 2. Issue Classification + +Classify each accessibility issue using the triage system below to determine: + +- WCAG Level (A, AA, AAA) +- Severity (Critical, High, Medium, Low) +- Issue Category +- Affected files and line numbers + +### 3. GitHub Issue Creation + +For each new accessibility issue: + +1. Create GitHub issue with title format: `[A11Y] [Severity] Brief description` +2. Apply labels: `accessibility`, `wcag-[level]`, `severity-[level]` +3. Use the issue template below with specific violations and fixes +4. Group multiple instances of the same issue type into one issue with multiple file references + +### 4. Comment Marking + +Add HTML comment markers to affected code locations: + +```html + +[problematic code] + + +``` + +## Triage System + +### Category 1: Missing Alt Text (WCAG A - Critical) + +**Detection:** + +- `` tags without `alt` attribute +- `` on content images (not decorative) +- `` without `alt` + +**Impact:** Screen readers cannot describe images to blind users + +**Remediation:** + +- Add descriptive `alt` text for content images +- Use `alt=""` for decorative images +- For complex images, consider `aria-describedby` + +### Category 2: Keyboard Navigation Issues (WCAG A - Critical) + +**Detection:** + +- Interactive elements without keyboard support (missing `tabindex`, `onKeyDown`) +- `
` or `` used as buttons without proper ARIA roles +- Focus trap in modals without proper management +- Positive `tabindex` values (anti-pattern) +- Missing visible focus indicators (`:focus` styles) + +**Impact:** Keyboard-only users cannot access functionality + +**Remediation:** + +- Use semantic HTML (`