From 86a9c8b30132f117e666209deb6dce1a412d7bdf Mon Sep 17 00:00:00 2001 From: Darkdragon-001 Date: Wed, 23 Mar 2022 09:16:15 +0100 Subject: [PATCH 1/4] Return None when the submodule commit is not contained in the diff --- gitlab_submodule/objects.py | 2 +- gitlab_submodule/submodule_commit.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/gitlab_submodule/objects.py b/gitlab_submodule/objects.py index 09b23ca..6796661 100644 --- a/gitlab_submodule/objects.py +++ b/gitlab_submodule/objects.py @@ -61,7 +61,7 @@ class Subproject: def __init__(self, submodule: Submodule, project: Optional[Project], - commit: Union[ProjectCommit, Commit]): + commit: Union[ProjectCommit, Commit, None]): self.submodule = submodule self.project = project self.commit = commit diff --git a/gitlab_submodule/submodule_commit.py b/gitlab_submodule/submodule_commit.py index f88582f..403783e 100644 --- a/gitlab_submodule/submodule_commit.py +++ b/gitlab_submodule/submodule_commit.py @@ -11,12 +11,15 @@ def get_submodule_commit( submodule: Submodule, submodule_project: Optional[Project] = None, - ) -> Union[ProjectCommit, Commit]: + ) -> Union[ProjectCommit, Commit, None]: commit_id = _get_submodule_commit_id( submodule.parent_project, submodule.path, submodule.parent_ref, ) + if commit_id is None: + return None + if submodule_project is not None: commit = submodule_project.commits.get(commit_id) else: @@ -67,6 +70,6 @@ def _get_submodule_commit_id( if len(matches) == 1: return matches[0] - # should never happen - raise RuntimeError(f'Did not find any commit id for submodule ' - f'"{submodule_path}" at url "{project.web_url}"') + # diff can't be retrieved probably because it was too big + # see https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits + return None From 57a6d70a8897e731d26689f564382d9d81bac3f8 Mon Sep 17 00:00:00 2001 From: ValentinFrancois Date: Sun, 27 Mar 2022 18:14:15 +0200 Subject: [PATCH 2/4] handle commit diff limitations - log warning - add test - update README.md --- README.md | 59 ++++++++++++++++++++-------- gitlab_submodule/objects.py | 2 +- gitlab_submodule/submodule_commit.py | 21 ++++++++-- tests/test_submodule_commit.py | 35 +++++++++++++++-- 4 files changed, 92 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 592fa55..572f219 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,8 @@ Gitlab project, and more importantly to get the commits they're pointing to. Internally, it reads and parses the `.gitmodules` file at the root of the Project. To get the commit id of a submodule, it finds the last commit that -updated the submodule and parses its diff. +updated the submodule and parses its diff (this can sometimes fail due to a +[limit of the GitLab API itself](https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits) - see [Limitations](Limitations)). --- **About the future of this package** @@ -75,7 +76,7 @@ for subproject in subprojects: print('- {} ({}) -> {}'.format( subproject.submodule.path, subproject.project.web_url, - subproject.commit.id)) + subproject.commit.id if subproject.commit else '?')) ``` Output: ``` @@ -95,13 +96,16 @@ for subproject in subprojects: - print('- {} ({}) -> {}'.format( - subproject.submodule.path, - subproject.project.web_url, -- subproject.commit.id)) +- subproject.commit.id if subproject.commit else '?')) + head_subproject_commit = subproject.project.commits.list( + ref=subproject.project.default_branch)[0] -+ up_to_date = subproject.commit.id == head_subproject_commit.id -+ print('- {}: {}'.format( -+ subproject.submodule.path, -+ 'ok' if up_to_date else '/!\\ must update')) ++ if subproject.commit is None: # can happen with very large commit diffs ++ status = 'cannot check' ++ elif subproject.commit.id == head_subproject_commit.id: ++ status = 'ok' ++ else: ++ status = '/!\\ must update' ++ print('- {}: {}'.format(subproject.submodule.path, status)) ``` Output: @@ -125,7 +129,7 @@ iterate_subprojects( self_managed_gitlab_host: Optional[str] = None ) -> Generator[Subproject, None, None] ``` -Parameters: +####Parameters: - `project`: a `gitlab.v4.objects.Project` object - `gitlab`: the `gitlab.Gitlab` instance that you used to authenticate, or its `projects: gitlab.v4.objects.ProjectManager` attribute @@ -139,16 +143,31 @@ Parameters: self-managed GitLab instance, you should pass its url here otherwise it may be impossible to know from the URL that it's a GitLab project. -Returns: Generator of `Subproject` objects +####Returns: +Generator of `Subproject` objects + +####Limitations: +- due to https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits, + some very large commit diffs won't be parsed entirely. This means that when + inspecting the diff of the latest commit that updated `./`, + in some rare cases `./` might not be part of the diff + object returned by the GitLab API. In that case we have no other choice than + set `Subproject.commit` to `None`, that's why the two examples above + check if `subproject.commit` is not `None` before using the value + `subproject.commit.id`. + +--- ### `list_subprojects(...)` Same parameters as [`iterate_subprojects(...)`](#iterate_subprojects) but returns a `list` of [`Subproject`](#class-subproject) objects. +--- + ### class `Subproject` Basic objects that contain the info about a Gitlab subproject. -Attributes: +####Attributes: - `project: Optional[gitlab.v4.objects.Project]`: the Gitlab project that the submodule links to (can be `None` if the submodule is not hosted on GitLab) - `submodule: `[`Submodule`](#class-submodule): a basic object that contains @@ -157,7 +176,7 @@ Attributes: the submodule points to (if the submodule is not hosted on GitLab, it will be a dummy `Commit` object with a single attribute `id`) -Example `str()` output: +####Example `str()` output: ``` => { 'submodule': => {'name': 'share/extensions', 'parent_project': => {'id': 3472737, 'description': 'Inkscape vector image editor', 'name': 'inkscape', 'name_with_namespace': 'Inkscape / inkscape', 'path': 'inkscape', 'path_with_namespace': 'inkscape/inkscape', 'created_at': '2017-06-09T14:16:35.615Z', 'default_branch': 'master', 'tag_list': [], 'topics': [], 'ssh_url_to_repo': 'git@gitlab.com:inkscape/inkscape.git', 'http_url_to_repo': 'https://gitlab.com/inkscape/inkscape.git', 'web_url': 'https://gitlab.com/inkscape/inkscape', 'readme_url': 'https://gitlab.com/inkscape/inkscape/-/blob/master/README.md', 'avatar_url': 'https://gitlab.com/uploads/-/system/project/avatar/3472737/inkscape.png', 'forks_count': 900, 'star_count': 2512, 'last_activity_at': '2022-01-29T23:45:49.894Z', 'namespace': {'id': 470642, 'name': 'Inkscape', 'path': 'inkscape', 'kind': 'group', 'full_path': 'inkscape', 'parent_id': None, 'avatar_url': '/uploads/-/system/group/avatar/470642/inkscape.png', 'web_url': 'https://gitlab.com/groups/inkscape'}}, 'parent_ref': 'e371b2f826adcba316f2e64bbf2f697043373d0b', 'path': 'share/extensions', 'url': 'https://gitlab.com/inkscape/extensions.git'}, @@ -166,6 +185,8 @@ Example `str()` output: } ``` +--- + ### `list_submodules(...)` Lists the info about the project submodules found in the `.gitmodules` file. ```python @@ -173,18 +194,21 @@ list_project_submodules( project: Project, ref: Optional[str] = None) -> List[Submodule] ``` -Parameters: +####Parameters: - `project`: a `gitlab.v4.objects.Project` object - `ref`: (optional) a ref to a branch, commit, tag etc. Defaults to the HEAD of the project default branch. -Returns: list of `Submodule` objects +####Returns: +`list` of `Submodule` objects + +--- ### class `Submodule` Represents the `.gitmodules` config of a submodule + adds info about the parent project -Attributes: +####Attributes: - `parent_project: gitlab.v4.objects.Project`: project that uses the submodule - `parent_ref: str`: ref where the `.gitmodules` file was read - `name: str`: local name used by git for the submodule @@ -192,11 +216,13 @@ Attributes: - `url: str`: URL linking to the location of the repo of the submodule (not necessarily Gitlab) -Example `str()` output: +####Example `str()` output: ``` => {'name': 'share/extensions', 'parent_project': => {'id': 3472737, 'description': 'Inkscape vector image editor', 'name': 'inkscape', 'name_with_namespace': 'Inkscape / inkscape', 'path': 'inkscape', 'path_with_namespace': 'inkscape/inkscape', 'created_at': '2017-06-09T14:16:35.615Z', 'default_branch': 'master', 'tag_list': [], 'topics': [], 'ssh_url_to_repo': 'git@gitlab.com:inkscape/inkscape.git', 'http_url_to_repo': 'https://gitlab.com/inkscape/inkscape.git', 'web_url': 'https://gitlab.com/inkscape/inkscape', 'readme_url': 'https://gitlab.com/inkscape/inkscape/-/blob/master/README.md', 'avatar_url': 'https://gitlab.com/uploads/-/system/project/avatar/3472737/inkscape.png', 'forks_count': 900, 'star_count': 2512, 'last_activity_at': '2022-01-29T23:45:49.894Z', 'namespace': {'id': 470642, 'name': 'Inkscape', 'path': 'inkscape', 'kind': 'group', 'full_path': 'inkscape', 'parent_id': None, 'avatar_url': '/uploads/-/system/group/avatar/470642/inkscape.png', 'web_url': 'https://gitlab.com/groups/inkscape'}}, 'parent_ref': 'e371b2f826adcba316f2e64bbf2f697043373d0b', 'path': 'share/extensions', 'url': 'https://gitlab.com/inkscape/extensions.git'} ``` +--- + ### `submodule_to_subproject(...)` Converts a `Submodule` object to a [`Subproject`](#class-subproject) object, assuming it's hosted on Gitlab. @@ -204,7 +230,7 @@ hosted on Gitlab. Raises a `FileNotFoundError` if the path of the submodule actually doesn't exist in the host repo or if the url of the submodule doesn't link to an existing repo (both can happen if you modify the `.gitmodules` file without -using one of the `git submodule` commands) +using one of the `git submodule` commands). ```python submodule_to_subproject( @@ -215,6 +241,7 @@ submodule_to_subproject( ``` Parameter details: See [`iterate_subprojects(...)`](#iterate_subprojects) +--- ## Contributing diff --git a/gitlab_submodule/objects.py b/gitlab_submodule/objects.py index 6796661..caf2245 100644 --- a/gitlab_submodule/objects.py +++ b/gitlab_submodule/objects.py @@ -61,7 +61,7 @@ class Subproject: def __init__(self, submodule: Submodule, project: Optional[Project], - commit: Union[ProjectCommit, Commit, None]): + commit: Optional[Union[ProjectCommit, Commit]]): self.submodule = submodule self.project = project self.commit = commit diff --git a/gitlab_submodule/submodule_commit.py b/gitlab_submodule/submodule_commit.py index 403783e..f50c69b 100644 --- a/gitlab_submodule/submodule_commit.py +++ b/gitlab_submodule/submodule_commit.py @@ -1,6 +1,8 @@ from typing import Optional, Union +import logging import re +from os import path from gitlab.v4.objects import Project, ProjectCommit from gitlab.exceptions import GitlabGetError @@ -8,10 +10,13 @@ from gitlab_submodule.objects import Submodule, Commit +logger = logging.getLogger(__name__) + + def get_submodule_commit( submodule: Submodule, submodule_project: Optional[Project] = None, - ) -> Union[ProjectCommit, Commit, None]: + ) -> Optional[Union[ProjectCommit, Commit]]: commit_id = _get_submodule_commit_id( submodule.parent_project, submodule.path, @@ -31,7 +36,7 @@ def _get_submodule_commit_id( project: Project, submodule_path: str, ref: Optional[str] = None, -) -> str: +) -> Optional[str]: """This uses a trick: - The .gitmodules files doesn't contain the actual commit sha that the submodules points to. @@ -57,7 +62,9 @@ def _get_submodule_commit_id( update_submodule_commit = project.commits.get(last_commit_id) submodule_commit_regex = r'Subproject commit ([a-zA-Z0-9]+)\n' + n_files_in_diff = 0 for diff_file in update_submodule_commit.diff(as_list=False): + n_files_in_diff += 1 if diff_file['new_path'] == submodule_path: # either the commit id was added for the first time, # or it was updated -> we can find one or two matches @@ -70,6 +77,12 @@ def _get_submodule_commit_id( if len(matches) == 1: return matches[0] - # diff can't be retrieved probably because it was too big - # see https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits + logger.warning( + f'Could not retrieve commit id for submodule at ' + f'{path.join(".", submodule_path)} for project ' + f'{project.path_with_namespace}, probably because the last commit ' + f'that updated the submodule has a too large diff ' + f'({n_files_in_diff} files). More info on Gitlab API diff limits: ' + f'https://docs.gitlab.com/ee/development/diffs.html' + f'#diff-collection-limits') return None diff --git a/tests/test_submodule_commit.py b/tests/test_submodule_commit.py index 2567f3b..fcfb4e6 100644 --- a/tests/test_submodule_commit.py +++ b/tests/test_submodule_commit.py @@ -1,4 +1,6 @@ +import logging import unittest +from unittest.mock import Mock from gitlab import Gitlab from gitlab.v4.objects import ProjectCommit @@ -38,10 +40,10 @@ def test__get_submodule_commit_id_with_absolute_urls(self): def test_get_submodule_commit_with_absolute_urls(self): gl = Gitlab() - inkscape = gl.projects.get( + project = gl.projects.get( 'python-gitlab-submodule-test/test-projects/gitlab-absolute-urls') submodules = list_project_submodules( - inkscape, + project, ref='ce9b1e50b34372d82df098f3ffded58ef4be03ec') submodule_projects = [ submodule_to_project(submodule, gl.projects) @@ -63,9 +65,9 @@ def test_get_submodule_commit_with_absolute_urls(self): def test_get_submodule_commit_with_relative_urls(self): gl = Gitlab() - inkscape = gl.projects.get( + project = gl.projects.get( 'python-gitlab-submodule-test/test-projects/gitlab-relative-urls') - submodules = list_project_submodules(inkscape, ref='main') + submodules = list_project_submodules(project, ref='main') submodule_projects = [ submodule_to_project(submodule, gl.projects) for submodule in submodules[:4]] @@ -84,3 +86,28 @@ def test_get_submodule_commit_with_relative_urls(self): '906828a297594114e3b1c48d2191eb31a91284c9'}, # 4 {commit.id for commit in submodule_commits} ) + + def test_get_submodule_commit_too_big_diff(self): + + def mock_get_commit(*_, **__): + mock_commit = Mock() + mock_diff = [ + {'new_path': f'mock_file_{i}.txt'} + for i in range(1, 1001) + ] + mock_commit.diff = lambda *_, **__: mock_diff + return mock_commit + + gl = Gitlab() + project = gl.projects.get( + 'python-gitlab-submodule-test/test-projects/gitlab-relative-urls') + submodule = list_project_submodules(project, ref='main')[0] + submodule_project = submodule_to_project(submodule, gl.projects) + submodule.parent_project.commits.get = mock_get_commit + with self.assertLogs(level=logging.WARNING) as log: + submodule_commit = get_submodule_commit(submodule, + submodule_project) + self.assertEqual(1, len(log.output)) + self.assertIn('the submodule has a too large diff (1000 files).', + log.output[0]) + self.assertIsNone(submodule_commit) From 07135f5f254b88485ec8e8c755868a521465ac9e Mon Sep 17 00:00:00 2001 From: ValentinFrancois Date: Sun, 27 Mar 2022 18:28:29 +0200 Subject: [PATCH 3/4] fix README.md --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 572f219..ec44e2d 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,8 @@ Gitlab project, and more importantly to get the commits they're pointing to. Internally, it reads and parses the `.gitmodules` file at the root of the Project. To get the commit id of a submodule, it finds the last commit that updated the submodule and parses its diff (this can sometimes fail due to a -[limit of the GitLab API itself](https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits) - see [Limitations](Limitations)). +[limit of the GitLab API itself](https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits) +- see [Limitations](#limitations)). --- **About the future of this package** @@ -129,7 +130,7 @@ iterate_subprojects( self_managed_gitlab_host: Optional[str] = None ) -> Generator[Subproject, None, None] ``` -####Parameters: +#### Parameters: - `project`: a `gitlab.v4.objects.Project` object - `gitlab`: the `gitlab.Gitlab` instance that you used to authenticate, or its `projects: gitlab.v4.objects.ProjectManager` attribute @@ -143,10 +144,10 @@ iterate_subprojects( self-managed GitLab instance, you should pass its url here otherwise it may be impossible to know from the URL that it's a GitLab project. -####Returns: +#### Returns: Generator of `Subproject` objects -####Limitations: +#### Limitations: - due to https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits, some very large commit diffs won't be parsed entirely. This means that when inspecting the diff of the latest commit that updated `./`, @@ -167,7 +168,7 @@ returns a `list` of [`Subproject`](#class-subproject) objects. ### class `Subproject` Basic objects that contain the info about a Gitlab subproject. -####Attributes: +#### Attributes: - `project: Optional[gitlab.v4.objects.Project]`: the Gitlab project that the submodule links to (can be `None` if the submodule is not hosted on GitLab) - `submodule: `[`Submodule`](#class-submodule): a basic object that contains @@ -176,7 +177,7 @@ Basic objects that contain the info about a Gitlab subproject. the submodule points to (if the submodule is not hosted on GitLab, it will be a dummy `Commit` object with a single attribute `id`) -####Example `str()` output: +#### Example `str()` output: ``` => { 'submodule': => {'name': 'share/extensions', 'parent_project': => {'id': 3472737, 'description': 'Inkscape vector image editor', 'name': 'inkscape', 'name_with_namespace': 'Inkscape / inkscape', 'path': 'inkscape', 'path_with_namespace': 'inkscape/inkscape', 'created_at': '2017-06-09T14:16:35.615Z', 'default_branch': 'master', 'tag_list': [], 'topics': [], 'ssh_url_to_repo': 'git@gitlab.com:inkscape/inkscape.git', 'http_url_to_repo': 'https://gitlab.com/inkscape/inkscape.git', 'web_url': 'https://gitlab.com/inkscape/inkscape', 'readme_url': 'https://gitlab.com/inkscape/inkscape/-/blob/master/README.md', 'avatar_url': 'https://gitlab.com/uploads/-/system/project/avatar/3472737/inkscape.png', 'forks_count': 900, 'star_count': 2512, 'last_activity_at': '2022-01-29T23:45:49.894Z', 'namespace': {'id': 470642, 'name': 'Inkscape', 'path': 'inkscape', 'kind': 'group', 'full_path': 'inkscape', 'parent_id': None, 'avatar_url': '/uploads/-/system/group/avatar/470642/inkscape.png', 'web_url': 'https://gitlab.com/groups/inkscape'}}, 'parent_ref': 'e371b2f826adcba316f2e64bbf2f697043373d0b', 'path': 'share/extensions', 'url': 'https://gitlab.com/inkscape/extensions.git'}, @@ -194,12 +195,12 @@ list_project_submodules( project: Project, ref: Optional[str] = None) -> List[Submodule] ``` -####Parameters: +#### Parameters: - `project`: a `gitlab.v4.objects.Project` object - `ref`: (optional) a ref to a branch, commit, tag etc. Defaults to the HEAD of the project default branch. -####Returns: +#### Returns: `list` of `Submodule` objects --- @@ -208,7 +209,7 @@ list_project_submodules( Represents the `.gitmodules` config of a submodule + adds info about the parent project -####Attributes: +#### Attributes: - `parent_project: gitlab.v4.objects.Project`: project that uses the submodule - `parent_ref: str`: ref where the `.gitmodules` file was read - `name: str`: local name used by git for the submodule @@ -216,7 +217,7 @@ parent project - `url: str`: URL linking to the location of the repo of the submodule (not necessarily Gitlab) -####Example `str()` output: +#### Example `str()` output: ``` => {'name': 'share/extensions', 'parent_project': => {'id': 3472737, 'description': 'Inkscape vector image editor', 'name': 'inkscape', 'name_with_namespace': 'Inkscape / inkscape', 'path': 'inkscape', 'path_with_namespace': 'inkscape/inkscape', 'created_at': '2017-06-09T14:16:35.615Z', 'default_branch': 'master', 'tag_list': [], 'topics': [], 'ssh_url_to_repo': 'git@gitlab.com:inkscape/inkscape.git', 'http_url_to_repo': 'https://gitlab.com/inkscape/inkscape.git', 'web_url': 'https://gitlab.com/inkscape/inkscape', 'readme_url': 'https://gitlab.com/inkscape/inkscape/-/blob/master/README.md', 'avatar_url': 'https://gitlab.com/uploads/-/system/project/avatar/3472737/inkscape.png', 'forks_count': 900, 'star_count': 2512, 'last_activity_at': '2022-01-29T23:45:49.894Z', 'namespace': {'id': 470642, 'name': 'Inkscape', 'path': 'inkscape', 'kind': 'group', 'full_path': 'inkscape', 'parent_id': None, 'avatar_url': '/uploads/-/system/group/avatar/470642/inkscape.png', 'web_url': 'https://gitlab.com/groups/inkscape'}}, 'parent_ref': 'e371b2f826adcba316f2e64bbf2f697043373d0b', 'path': 'share/extensions', 'url': 'https://gitlab.com/inkscape/extensions.git'} ``` From 7122002321c1d676e42e6e4936272f3f942d58f9 Mon Sep 17 00:00:00 2001 From: ValentinFrancois Date: Sun, 27 Mar 2022 18:30:40 +0200 Subject: [PATCH 4/4] fix README.md (2) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ec44e2d..253448c 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ Gitlab project, and more importantly to get the commits they're pointing to. Internally, it reads and parses the `.gitmodules` file at the root of the Project. To get the commit id of a submodule, it finds the last commit that updated the submodule and parses its diff (this can sometimes fail due to a -[limit of the GitLab API itself](https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits) -- see [Limitations](#limitations)). +[limit of the GitLab API itself](https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits) - +see [Limitations](#limitations)). --- **About the future of this package**