goose/tests/utils/test_file_utils.py

from unittest.mock import patch

import pytest
from goose.utils.file_utils import (
    create_extensions_list,
    create_language_weighting,
)  # Adjust the import path as necessary


# tests for `create_extensions_list`
def test_create_extensions_list_valid_input():
    """Test with valid input and multiple file extensions."""
    project_root = "/fake/project/root"
    max_n = 3
    files = [
        "/fake/project/root/file1.py",
        "/fake/project/root/file2.py",
        "/fake/project/root/file3.md",
        "/fake/project/root/file4.md",
        "/fake/project/root/file5.txt",
        "/fake/project/root/file6.py",
        "/fake/project/root/file7.md",
    ]

    with patch("goose.utils.file_utils.create_file_list", return_value=files):
        extensions = create_extensions_list(project_root, max_n)
        assert extensions == [".py", ".md", ".txt"], "Should return the top 3 extensions in the correct order"


def test_create_extensions_list_zero_max_n():
    """Test that a ValueError is raised when max_n is 0."""
    project_root = "/fake/project/root"
    max_n = 0

    with pytest.raises(ValueError, match="Number of file extensions must be greater than 0"):
        create_extensions_list(project_root, max_n)


def test_create_extensions_list_no_files():
    """Test with a project root that contains no files."""
    project_root = "/fake/project/root"
    max_n = 3

    with patch("goose.utils.file_utils.create_file_list", return_value=[]):
        extensions = create_extensions_list(project_root, max_n)
        assert extensions == [], "Should return an empty list when no files are present"


def test_create_extensions_list_fewer_extensions_than_max_n():
    """Test when there are fewer unique extensions than max_n."""
    project_root = "/fake/project/root"
    max_n = 5
    files = [
        "/fake/project/root/file1.py",
        "/fake/project/root/file2.py",
        "/fake/project/root/file3.md",
    ]

    with patch("goose.utils.file_utils.create_file_list", return_value=files):
        extensions = create_extensions_list(project_root, max_n)
        assert extensions == [".py", ".md"], "Should return all available extensions when fewer than max_n"


def test_create_extensions_list_files_without_extensions():
    """Test that files without extensions are ignored."""
    project_root = "/fake/project/root"
    max_n = 3
    files = [
        "/fake/project/root/file1",
        "/fake/project/root/file2.py",
        "/fake/project/root/file3",
        "/fake/project/root/file4.md",
    ]

    with patch("goose.utils.file_utils.create_file_list", return_value=files):
        extensions = create_extensions_list(project_root, max_n)
        assert extensions == [".py", ".md"], "Should ignore files without extensions"


# tests for `create_language_weighting`
def test_create_language_weighting_normal_case():
    """Test the function with multiple files and different sizes."""
    files = [
        "/fake/project/file1.py",
        "/fake/project/file2.py",
        "/fake/project/file3.md",
        "/fake/project/file4.txt",
    ]

    sizes = {
        "/fake/project/file1.py": 100,
        "/fake/project/file2.py": 200,
        "/fake/project/file3.md": 50,
        "/fake/project/file4.txt": 150,
    }

    # Mocking os.path.getsize to return different sizes for different files
    with patch("os.path.getsize") as mock_getsize:
        mock_getsize.side_effect = lambda file: sizes[file]

        result = create_language_weighting(files)

    total = sum(sizes.values())

    expected_result = {
        ".py": 300 / total * 100,  # 300 out of 600 total
        ".txt": 150 / total * 100,  # 150 out of 600 total
        ".md": 50 / total * 100,  # 50 out of 600 total
    }

    # Check if the result matches the expected output
    assert result[".py"] == pytest.approx(expected_result.get(".py"), 0.01)
    assert result[".txt"] == pytest.approx(expected_result.get(".txt"), 0.01)
    assert result[".md"] == pytest.approx(expected_result.get(".md"), 0.01)


def test_create_language_weighting_no_files():
    """Test the function when no files are provided."""
    files = []

    result = create_language_weighting(files)
    assert result == {}, "Should return an empty dictionary when no files are provided"


def test_create_language_weighting_files_without_extensions():
    """Test the function when files have no extensions."""
    files = [
        "/fake/project/file1",
        "/fake/project/file2",
    ]

    with patch("os.path.getsize", return_value=100):
        result = create_language_weighting(files)

    assert result == {}, "Should return an empty dictionary when files have no extensions"


def test_create_language_weighting_zero_total_size():
    """Test the function when all files have a size of 0."""
    files = [
        "/fake/project/file1.py",
        "/fake/project/file2.py",
    ]

    with patch("os.path.getsize", return_value=0):
        result = create_language_weighting(files)

    assert result == {".py": 0}


def test_create_language_weighting_single_file():
    """Test the function with a single file."""
    files = [
        "/fake/project/file1.py",
    ]

    with patch("os.path.getsize", return_value=100):
        result = create_language_weighting(files)

    assert result == {".py": 100.0}, "Should return 100% for the single file's extension"


def test_create_language_weighting_mixed_extensions():
    """Test the function with files of mixed extensions and sizes."""
    files = [
        "/fake/project/file1.py",
        "/fake/project/file2.py",
        "/fake/project/file3.md",
        "/fake/project/file4.txt",
        "/fake/project/file5.md",
    ]

    with patch("os.path.getsize") as mock_getsize:
        mock_getsize.side_effect = lambda file: {
            "/fake/project/file1.py": 100,
            "/fake/project/file2.py": 100,
            "/fake/project/file3.md": 200,
            "/fake/project/file4.txt": 300,
            "/fake/project/file5.md": 100,
        }[file]

        result = create_language_weighting(files)

    expected_result = {
        ".txt": 37.5,  # 300 out of 800 total
        ".md": 37.5,  # 300 out of 800 total
        ".py": 25.0,  # 200 out of 800 total
    }

    assert result[".txt"] == pytest.approx(expected_result.get(".txt"), 0.01)
    assert result[".md"] == pytest.approx(expected_result.get(".md"), 0.01)
    assert result[".py"] == pytest.approx(expected_result.get(".py"), 0.01)