Source Code

/ ssg / src / repository / loader.py

from pathlib import Path
import pathspec
import re

from repository.models import File

def load_ignore_files(base: Path, filenames: list[str]) -> pathspec.PathSpec:
    patterns = []

    for name in filenames:
        path = base / name
        if not path.exists():
            continue

        patterns.extend(path.read_text().splitlines())

    return pathspec.PathSpec.from_lines(
        pathspec.patterns.GitWildMatchPattern,
        patterns
    )

def strip_comments(content: str, file_extension: str) -> str:
    """Remove comments from content based on file type"""
    if not content:
        return content

    extension = file_extension.lower()

    if extension in ['.py', '.sh', '.yml', '.yaml']:

        content = re.sub(r'^\s*#.*$', '', content, flags=re.MULTILINE)

    elif extension in ['.js']:

        content = re.sub(r'^\s*//.*$', '', content, flags=re.MULTILINE)

        content = re.sub(r'/\*[\s\S]*?\*/', '', content)

    elif extension == '.css':

        content = re.sub(r'/\*[\s\S]*?\*/', '', content)

    elif extension in ['.html', '.htm', '.xml', '.xhtml', '.svg']:

        content = re.sub(r'<!--[\s\S]*?-->', '', content)

    content = '\n'.join(line.rstrip() for line in content.splitlines())

    return content

def load_single_file(file_path: Path, base: Path) -> None:
    rel_path = file_path.relative_to(base).as_posix()
    is_directory = file_path.is_dir()

    content = ""
    if not is_directory:
        try:
            content = file_path.read_text(encoding="utf-8")
            extension = file_path.suffix.lower()
            content = strip_comments(content, extension)
        except UnicodeDecodeError:
            content = ""

    try:
        obj, created = File.objects.update_or_create(
            path=rel_path,
            defaults={
                "name": file_path.name,
                "content": content,
                "is_directory": is_directory,
            },
        )
        print(f"DEBUG: {'Created' if created else 'Updated'}: {rel_path}")
    except Exception as e:
        print(f"DEBUG: ERROR saving {rel_path}: {e}")
        raise


def load_files() -> None:
    repo_root = Path(".").resolve()

    ignore = load_ignore_files(
        repo_root,
        [".gitignore", ".repoignore"],
    )

    found_paths = set()

    for file_path in repo_root.rglob("*"):
        rel = file_path.relative_to(repo_root).as_posix()

        if ignore.match_file(f"{rel}"):
            continue

        load_single_file(file_path, repo_root)
        found_paths.add(rel)

    db_paths = set(File.objects.values_list('path', flat=True))
    paths_to_delete = db_paths - found_paths

    if paths_to_delete:
        File.objects.filter(path__in=paths_to_delete).delete()
        print(f"Deleted {len(paths_to_delete)} files")