Source Code

/ src / repository / loader.py

from pathlib import Path
import pathspec
import re

from repository.models import File

def load_ignore_files(base: Path, filenames: list[str]) -> pathspec.PathSpec:
    patterns = []

    for name in filenames:
        path = base / name
        if not path.exists():
            continue

        patterns.extend(path.read_text().splitlines())

    return pathspec.PathSpec.from_lines(
        pathspec.patterns.GitWildMatchPattern,
        patterns
    )

def strip_comments(content: str, file_extension: str) -> str:
    """Remove comments from content based on file type"""
    if not content:
        return content

    extension = file_extension.lower()

    if extension in ['.py', '.sh', '.yml', '.yaml']:

        content = re.sub(r'^\s*#.*$', '', content, flags=re.MULTILINE)

    elif extension in ['.js']:

        content = re.sub(r'^\s*//.*$', '', content, flags=re.MULTILINE)

        content = re.sub(r'/\*[\s\S]*?\*/', '', content)

    elif extension == '.css':

        content = re.sub(r'/\*[\s\S]*?\*/', '', content)

    elif extension in ['.html', '.htm', '.xml', '.xhtml', '.svg']:

        content = re.sub(r'<!--[\s\S]*?-->', '', content)

    content = '\n'.join(line.rstrip() for line in content.splitlines())

    return content

def load_single_file(file_path: Path, base: Path) -> None:
    rel_path = file_path.relative_to(base).as_posix()
    is_directory = file_path.is_dir()

    content = ""
    if not is_directory:
        try:
            content = file_path.read_text(encoding="utf-8")

            extension = file_path.suffix.lower()
            content = strip_comments(content, extension)
        except UnicodeDecodeError:
            content = ""

    File.objects.update_or_create(
        path=rel_path,
        defaults={
            "name": file_path.name,
            "content": content,
            "is_directory": is_directory,
        },
    )

    print(f"Loaded: {rel_path}")


def load_files() -> None:
    repo_root = Path(".").resolve()
    base = repo_root / "src"

    ignore = load_ignore_files(
        repo_root,
        [".gitignore", ".repoignore"],
    )

    found_paths = set()

    for file_path in base.rglob("*"):
        rel = file_path.relative_to(base).as_posix()

        if ignore.match_file(f"src/{rel}"):
            continue

        load_single_file(file_path, base)
        found_paths.add(rel)

    db_paths = set(File.objects.values_list('path', flat=True))
    paths_to_delete = db_paths - found_paths

    if paths_to_delete:
        File.objects.filter(path__in=paths_to_delete).delete()
        print(f"Deleted {len(paths_to_delete)} files: {paths_to_delete}")