diff --git a/community/document-readers/gitlab-document-reader/README.md b/community/document-readers/gitlab-document-reader/README.md new file mode 100644 index 00000000..8e75f9ce --- /dev/null +++ b/community/document-readers/gitlab-document-reader/README.md @@ -0,0 +1,251 @@ +# GitLab Document Reader + +[English](#english) | [中文](#chinese) + + +## English + +GitLab Document Reader is a Spring AI document reader implementation that allows you to read issues and repository files from GitLab projects and convert them into documents. It supports both public repositories and provides flexible filtering options. + +### Features + +#### GitLab Issue Reader +- Read issues from GitLab projects or groups +- Filter issues by: + - State (open, closed, all) + - Labels + - Milestone + - Author + - Assignee + - Created/Updated date ranges + - And more... +- Support for issue metadata including: + - State + - URL + - Labels + - Creation date + - Author + - Assignee + +#### GitLab Repository Reader +- Read files from GitLab repositories +- Support for: + - Single file reading + - Directory traversal + - Recursive file listing + - File pattern filtering (glob patterns) +- File metadata including: + - File path + - File name + - Size + - URL + - Last commit ID + - Content SHA256 + +### Usage + +#### Reading Issues + +Basic usage to read all open issues: +```java +GitLabIssueReader reader = new GitLabIssueReader( + "https://gitlab.com", + "namespace", + "project-name" +); +List documents = reader.get(); +``` + +Advanced filtering with configuration: +```java +GitLabIssueConfig config = GitLabIssueConfig.builder() + .state(GitLabIssueState.CLOSED) + .labels(Arrays.asList("bug", "critical")) + .createdAfter(LocalDateTime.now().minusDays(30)) + .build(); + +GitLabIssueReader reader = new GitLabIssueReader( + "https://gitlab.com", + "namespace", + "project-name", + null, + config +); +List documents = reader.get(); +``` + +#### Reading Repository Files + +Basic usage to read a single file: +```java +GitLabRepositoryReader reader = new GitLabRepositoryReader( + "https://gitlab.com", + "namespace", + "project-name" +); +List documents = reader.setRef("main") + .setFilePath("README.md") + .get(); +``` + +Reading all markdown files recursively: +```java +GitLabRepositoryReader reader = new GitLabRepositoryReader( + "https://gitlab.com", + "namespace", + "project-name" +); +List documents = reader.setRef("main") + .setPattern("**/*.md") + .setRecursive(true) + .get(); +``` + +### Dependencies + +Add the following dependency to your project: + +```xml + + com.alibaba.cloud.ai + gitlab-document-reader + ${spring-ai-alibaba.version} + +``` + +The GitLab Document Reader internally uses GitLab4J API for GitLab integration, which is automatically managed through transitive dependencies. + +### Limitations + +- Only supports public repositories +- Rate limits apply based on GitLab's API restrictions +- File size limits apply based on GitLab's API restrictions + +### License + +This project is licensed under the Apache License 2.0 - see the LICENSE file for details. + +--- + + +## 中文 + +GitLab Document Reader 是一个 Spring AI 文档读取器实现,可以从 GitLab 项目中读取 issues 和仓库文件并将它们转换为文档。它支持公开仓库访问,并提供灵活的过滤选项。 + +### 功能特性 + +#### GitLab Issue 读取器 +- 从 GitLab 项目或群组中读取 issues +- 支持多种过滤条件: + - 状态(开放、关闭、全部) + - 标签 + - 里程碑 + - 作者 + - 指派人 + - 创建/更新时间范围 + - 更多... +- 支持的 issue 元数据包括: + - 状态 + - URL + - 标签 + - 创建时间 + - 作者 + - 指派人 + +#### GitLab 仓库读取器 +- 读取 GitLab 仓库中的文件 +- 支持功能: + - 单文件读取 + - 目录遍历 + - 递归文件列表 + - 文件模式过滤(glob 模式) +- 文件元数据包括: + - 文件路径 + - 文件名 + - 大小 + - URL + - 最后提交 ID + - 内容 SHA256 + +### 使用方法 + +#### 读取 Issues + +基本用法(读取所有开放的 issues): +```java +GitLabIssueReader reader = new GitLabIssueReader( + "https://gitlab.com", + "namespace", + "project-name" +); +List documents = reader.get(); +``` + +使用高级配置进行过滤: +```java +GitLabIssueConfig config = GitLabIssueConfig.builder() + .state(GitLabIssueState.CLOSED) + .labels(Arrays.asList("bug", "critical")) + .createdAfter(LocalDateTime.now().minusDays(30)) + .build(); + +GitLabIssueReader reader = new GitLabIssueReader( + "https://gitlab.com", + "namespace", + "project-name", + null, + config +); +List documents = reader.get(); +``` + +#### 读取仓库文件 + +基本用法(读取单个文件): +```java +GitLabRepositoryReader reader = new GitLabRepositoryReader( + "https://gitlab.com", + "namespace", + "project-name" +); +List documents = reader.setRef("main") + .setFilePath("README.md") + .get(); +``` + +递归读取所有 markdown 文件: +```java +GitLabRepositoryReader reader = new GitLabRepositoryReader( + "https://gitlab.com", + "namespace", + "project-name" +); +List documents = reader.setRef("main") + .setPattern("**/*.md") + .setRecursive(true) + .get(); +``` + +### 依赖配置 + +在项目中添加以下依赖: + +```xml + + com.alibaba.cloud.ai + gitlab-document-reader + ${spring-ai-alibaba.version} + +``` + +GitLab Document Reader 内部使用 GitLab4J API 进行 GitLab 集成,这些依赖会通过传递依赖自动管理。 + +### 使用限制 + +- 仅支持公开仓库 +- 受 GitLab API 速率限制约束 +- 受 GitLab API 文件大小限制约束 + +### 许可证 + +本项目采用 Apache License 2.0 许可证 - 详见 LICENSE 文件。 \ No newline at end of file diff --git a/community/document-readers/gitlab-document-reader/pom.xml b/community/document-readers/gitlab-document-reader/pom.xml new file mode 100644 index 00000000..1eda7c95 --- /dev/null +++ b/community/document-readers/gitlab-document-reader/pom.xml @@ -0,0 +1,117 @@ + + + + + 4.0.0 + + com.alibaba.cloud.ai + spring-ai-alibaba + ${revision} + ../../../pom.xml + + + gitlab-document-reader + gitlab-document-reader + gitlab reader for Spring AI Alibaba + jar + https://github.com/alibaba/spring-ai-alibaba + + https://github.com/alibaba/spring-ai-alibaba + git://github.com/alibaba/spring-ai-alibaba.git + git@github.com:alibaba/spring-ai-alibaba.git + + + + 17 + 17 + UTF-8 + 3.1.1 + + + + + + com.alibaba.cloud.ai + spring-ai-alibaba-core + ${project.parent.version} + + + + org.gitlab4j + gitlab4j-api + 6.0.0-rc.8 + + + + + org.springframework.ai + spring-ai-test + test + + + + org.springframework.boot + spring-boot-starter-test + test + + + + io.projectreactor + reactor-test + test + + + + io.micrometer + micrometer-observation-test + test + + + + + + + + org.springframework.boot + spring-boot-maven-plugin + ${spring-boot.version} + + + org.apache.maven.plugins + maven-deploy-plugin + ${maven-deploy-plugin.version} + + true + + + + + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + + + \ No newline at end of file diff --git a/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/AbstractGitLabReader.java b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/AbstractGitLabReader.java new file mode 100644 index 00000000..5cee6665 --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/AbstractGitLabReader.java @@ -0,0 +1,82 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +import org.gitlab4j.api.GitLabApi; +import org.gitlab4j.api.GitLabApiException; +import org.gitlab4j.api.models.Project; +import org.springframework.ai.document.DocumentReader; +import org.springframework.util.Assert; + +/** + * Abstract base class for GitLab document readers. Provides common functionality for + * GitLab API access. Only supports public repositories. + * + * @author brianxiadong + */ +public abstract class AbstractGitLabReader implements DocumentReader { + + // GitLab API client for interacting with GitLab server + protected final GitLabApi gitLabApi; + + // GitLab project object containing project details + protected final Project project; + + // Web URL of the GitLab project + protected final String projectUrl; + + /** + * Constructor for accessing public GitLab repositories. + * @param hostUrl GitLab host URL + * @param namespace Project namespace (e.g. "spring-ai") + * @param projectName Project name (e.g. "spring-ai") + * @throws GitLabApiException if project cannot be found + */ + protected AbstractGitLabReader(String hostUrl, String namespace, String projectName) throws GitLabApiException { + Assert.hasText(hostUrl, "Host URL must not be empty"); + Assert.hasText(namespace, "Namespace must not be empty"); + Assert.hasText(projectName, "Project name must not be empty"); + + this.gitLabApi = new GitLabApi(hostUrl, ""); // Empty token for public access + this.project = gitLabApi.getProjectApi().getProject(namespace, projectName); + this.projectUrl = project.getWebUrl(); + } + + /** + * Get the GitLab API client. + * @return GitLab API client + */ + protected GitLabApi getGitLabApi() { + return gitLabApi; + } + + /** + * Get the project. + * @return GitLab project + */ + protected Project getProject() { + return project; + } + + /** + * Get the project URL. + * @return Project URL + */ + protected String getProjectUrl() { + return projectUrl; + } + +} diff --git a/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueConfig.java b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueConfig.java new file mode 100644 index 00000000..ccc80a18 --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueConfig.java @@ -0,0 +1,231 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +import java.time.LocalDateTime; +import java.util.List; + +/** + * Configuration class for GitLab issue reader. Contains all parameters for filtering and + * retrieving issues. + * + * @author brianxiadong + */ +public class GitLabIssueConfig { + + // Assignee username to filter issues + private String assignee; + + // Author username to filter issues + private String author; + + // Whether to return only confidential issues + private Boolean confidential; + + // Return issues created after this date + private LocalDateTime createdAfter; + + // Return issues created before this date + private LocalDateTime createdBefore; + + // List of issue IIDs to filter + private List iids; + + // Type of issues to return (issue, incident, test_case) + private GitLabIssueType issueType; + + // Labels to filter issues + private List labels; + + // Milestone title to filter issues + private String milestone; + + // Whether to return only non-archived issues + private Boolean nonArchived; + + // Scope of issues to return (created_by_me, assigned_to_me, all) + private GitLabScope scope; + + // Search query to filter issues + private String search; + + // State of issues to return (opened, closed, all) + private GitLabIssueState state; + + // Return issues updated after this date + private LocalDateTime updatedAfter; + + // Return issues updated before this date + private LocalDateTime updatedBefore; + + private GitLabIssueConfig() { + // Use builder pattern to create instances + } + + public String getAssignee() { + return assignee; + } + + public String getAuthor() { + return author; + } + + public Boolean getConfidential() { + return confidential; + } + + public LocalDateTime getCreatedAfter() { + return createdAfter; + } + + public LocalDateTime getCreatedBefore() { + return createdBefore; + } + + public List getIids() { + return iids; + } + + public GitLabIssueType getIssueType() { + return issueType; + } + + public List getLabels() { + return labels; + } + + public String getMilestone() { + return milestone; + } + + public Boolean getNonArchived() { + return nonArchived; + } + + public GitLabScope getScope() { + return scope; + } + + public String getSearch() { + return search; + } + + public GitLabIssueState getState() { + return state; + } + + public LocalDateTime getUpdatedAfter() { + return updatedAfter; + } + + public LocalDateTime getUpdatedBefore() { + return updatedBefore; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + + private final GitLabIssueConfig config; + + private Builder() { + config = new GitLabIssueConfig(); + } + + public Builder assignee(String assignee) { + config.assignee = assignee; + return this; + } + + public Builder author(String author) { + config.author = author; + return this; + } + + public Builder confidential(Boolean confidential) { + config.confidential = confidential; + return this; + } + + public Builder createdAfter(LocalDateTime createdAfter) { + config.createdAfter = createdAfter; + return this; + } + + public Builder createdBefore(LocalDateTime createdBefore) { + config.createdBefore = createdBefore; + return this; + } + + public Builder iids(List iids) { + config.iids = iids; + return this; + } + + public Builder issueType(GitLabIssueType issueType) { + config.issueType = issueType; + return this; + } + + public Builder labels(List labels) { + config.labels = labels; + return this; + } + + public Builder milestone(String milestone) { + config.milestone = milestone; + return this; + } + + public Builder nonArchived(Boolean nonArchived) { + config.nonArchived = nonArchived; + return this; + } + + public Builder scope(GitLabScope scope) { + config.scope = scope; + return this; + } + + public Builder search(String search) { + config.search = search; + return this; + } + + public Builder state(GitLabIssueState state) { + config.state = state != null ? state : GitLabIssueState.OPEN; + return this; + } + + public Builder updatedAfter(LocalDateTime updatedAfter) { + config.updatedAfter = updatedAfter; + return this; + } + + public Builder updatedBefore(LocalDateTime updatedBefore) { + config.updatedBefore = updatedBefore; + return this; + } + + public GitLabIssueConfig build() { + return config; + } + + } + +} diff --git a/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueReader.java b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueReader.java new file mode 100644 index 00000000..babb5d9f --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueReader.java @@ -0,0 +1,194 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +import org.gitlab4j.api.GitLabApi; +import org.gitlab4j.api.GitLabApiException; +import org.gitlab4j.api.IssuesApi; +import org.gitlab4j.api.models.Issue; +import org.gitlab4j.api.models.IssueFilter; +import org.gitlab4j.models.Constants; +import org.springframework.ai.document.Document; + +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.*; + +/** + * GitLab issues reader. Reads issues from GitLab projects or groups and converts them to + * documents. Only supports public repositories. + * + * @author brianxiadong + */ +public class GitLabIssueReader extends AbstractGitLabReader { + + // GitLab group path for filtering issues by group + private final String groupPath; + + // Configuration object containing all issue filtering parameters + private final GitLabIssueConfig config; + + /** + * Constructor for GitLabIssueReader with default configuration (open issues). + * @param hostUrl GitLab host URL + * @param namespace Project namespace (e.g. "spring-ai") + * @param projectName Project name (e.g. "spring-ai") + * @throws GitLabApiException if project cannot be found + */ + public GitLabIssueReader(String hostUrl, String namespace, String projectName) throws GitLabApiException { + this(hostUrl, namespace, projectName, null, null); + } + + /** + * Constructor for GitLabIssueReader. + * @param hostUrl GitLab host URL + * @param namespace Project namespace (e.g. "spring-ai") + * @param projectName Project name (e.g. "spring-ai") + * @param groupPath Group path (optional) + * @param config Issue configuration (optional, defaults to open issues) + * @throws GitLabApiException if project cannot be found + */ + public GitLabIssueReader(String hostUrl, String namespace, String projectName, String groupPath, + GitLabIssueConfig config) throws GitLabApiException { + super(hostUrl, namespace, projectName); + this.groupPath = groupPath; + this.config = config != null ? config : GitLabIssueConfig.builder().state(GitLabIssueState.OPEN).build(); + } + + /** + * Convert a GitLab issue to a Document. + * @param issue GitLab issue + * @return Document representation of the issue + */ + private Document buildDocumentFromIssue(Issue issue) { + String title = issue.getTitle(); + String description = issue.getDescription(); + + Map metadata = new HashMap<>(); + + // Required fields + metadata.put("state", issue.getState()); + metadata.put("url", issue.getWebUrl()); + + // Optional fields, only add if not empty + if (issue.getLabels() != null && !issue.getLabels().isEmpty()) { + metadata.put("labels", issue.getLabels()); + } + + if (issue.getCreatedAt() != null) { + metadata.put("created_at", issue.getCreatedAt()); + } + + if (issue.getClosedAt() != null) { + metadata.put("closed_at", issue.getClosedAt()); + } + + if (issue.getAssignee() != null && issue.getAssignee().getUsername() != null) { + metadata.put("assignee", issue.getAssignee().getUsername()); + } + + if (issue.getAuthor() != null && issue.getAuthor().getUsername() != null) { + metadata.put("author", issue.getAuthor().getUsername()); + } + + return new Document(String.valueOf(issue.getIid()), + String.format("%s\n%s", title, description != null ? description : ""), metadata); + } + + /** + * Convert LocalDateTime to ISO string format for GitLab API. + * @param dateTime LocalDateTime to convert + * @return ISO formatted string or null + */ + private Date toGitLabDateFormat(LocalDateTime dateTime) { + if (dateTime == null) { + return null; + } + return Date.from(dateTime.atZone(ZoneId.systemDefault()).toInstant()); + } + + @Override + public List get() { + try { + IssuesApi issuesApi = gitLabApi.getIssuesApi(); + + // Convert Integer iids to Long iids + List longIids = config.getIids() != null ? config.getIids().stream().map(Long::valueOf).toList() + : null; + + // Build the filter parameters + IssueFilter filter = new IssueFilter().withIids(longIids) + .withState(config.getState() != null + ? Constants.IssueState.valueOf(config.getState().getValue().toUpperCase()) + : Constants.IssueState.OPENED) + .withLabels(config.getLabels()) + .withMilestone(config.getMilestone()) + .withScope(config.getScope() != null + ? Constants.IssueScope.valueOf(config.getScope().getValue().toUpperCase()) : null) + .withSearch(config.getSearch()) + .withCreatedAfter( + config.getCreatedAfter() != null ? toGitLabDateFormat(config.getCreatedAfter()) : null) + .withCreatedBefore( + config.getCreatedBefore() != null ? toGitLabDateFormat(config.getCreatedBefore()) : null) + .withUpdatedAfter( + config.getUpdatedAfter() != null ? toGitLabDateFormat(config.getUpdatedAfter()) : null) + .withUpdatedBefore( + config.getUpdatedBefore() != null ? toGitLabDateFormat(config.getUpdatedBefore()) : null); + + // Handle assignee and author + String assignee = config.getAssignee(); + if (assignee != null) { + try { + Long assigneeId = Long.parseLong(assignee); + filter.withAssigneeId(assigneeId); + } + catch (NumberFormatException e) { + // If not a number, treat as username + filter.withoutAssigneeUsername(assignee); + } + } + + String author = config.getAuthor(); + if (author != null) { + try { + Long authorId = Long.parseLong(author); + filter.withAuthorId(authorId); + } + catch (NumberFormatException e) { + // If not a number, treat as username + filter.withoutAuthorUsername(author); + } + } + + List issues; + if (groupPath != null) { + // Get group issues using IssuesApi.getGroupIssues(groupPath, filter) + issues = issuesApi.getGroupIssues(groupPath, filter); + } + else { + // Get project issues using IssuesApi.getIssues(projectId, filter) + issues = issuesApi.getIssues(project.getId(), filter); + } + + return issues.stream().map(this::buildDocumentFromIssue).toList(); + + } + catch (GitLabApiException e) { + throw new RuntimeException("Failed to load issues from GitLab", e); + } + } + +} diff --git a/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueState.java b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueState.java new file mode 100644 index 00000000..a467ae61 --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueState.java @@ -0,0 +1,50 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +/** + * Issue state enum. Used to decide what issues to retrieve. + * + * @author brianxiadong + */ +public enum GitLabIssueState { + + /** + * Issues that are open + */ + OPEN("opened"), + + /** + * Issues that are closed + */ + CLOSED("closed"), + + /** + * All issues, open and closed + */ + ALL("all"); + + private final String value; + + GitLabIssueState(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + +} diff --git a/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueType.java b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueType.java new file mode 100644 index 00000000..fa7d0b6c --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueType.java @@ -0,0 +1,55 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +/** + * Issue type enum. Used to decide what issues to retrieve. + * + * @author brianxiadong + */ +public enum GitLabIssueType { + + /** + * Regular issues + */ + ISSUE("issue"), + + /** + * Incident issues + */ + INCIDENT("incident"), + + /** + * Test case issues + */ + TEST_CASE("test_case"), + + /** + * Task issues + */ + TASK("task"); + + private final String value; + + GitLabIssueType(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + +} diff --git a/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabRepositoryReader.java b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabRepositoryReader.java new file mode 100644 index 00000000..eabe4703 --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabRepositoryReader.java @@ -0,0 +1,191 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +import org.gitlab4j.api.GitLabApiException; +import org.gitlab4j.api.RepositoryApi; +import org.gitlab4j.api.models.RepositoryFile; +import org.gitlab4j.api.models.TreeItem; +import org.springframework.ai.document.Document; +import org.springframework.util.StringUtils; + +import java.nio.charset.StandardCharsets; +import java.util.*; + +/** + * GitLab repository reader. Reads files from GitLab repositories and converts them to + * documents. Only supports public repositories. + * + * @author brianxiadong + */ +public class GitLabRepositoryReader extends AbstractGitLabReader { + + private String ref; + + private String filePath; + + private String pattern; + + private boolean recursive; + + /** + * Constructor for GitLabRepositoryReader. + * @param hostUrl GitLab host URL + * @param namespace Project namespace (e.g. "spring-ai") + * @param projectName Project name (e.g. "spring-ai") + * @throws GitLabApiException if project cannot be found + */ + public GitLabRepositoryReader(String hostUrl, String namespace, String projectName) throws GitLabApiException { + super(hostUrl, namespace, projectName); + this.ref = "main"; // Default branch + } + + /** + * Set the Git reference (branch, tag, or commit) to read from. + * @param ref Git reference + * @return this reader instance + */ + public GitLabRepositoryReader setRef(String ref) { + this.ref = ref; + return this; + } + + /** + * Set the file path to read. If null, will read all files in the repository. + * @param filePath File path relative to repository root + * @return this reader instance + */ + public GitLabRepositoryReader setFilePath(String filePath) { + this.filePath = filePath; + return this; + } + + /** + * Set the file pattern to filter files. Supports glob patterns like: - "*.md" for all + * markdown files - "docs/*.txt" for all text files in docs directory - + * "src/**\/*.java" for all Java files in src directory and subdirectories + * @param pattern File pattern in glob format + * @return this reader instance + */ + public GitLabRepositoryReader setPattern(String pattern) { + this.pattern = pattern; + return this; + } + + /** + * Set whether to recursively read files in subdirectories. + * @param recursive Whether to read recursively + * @return this reader instance + */ + public GitLabRepositoryReader setRecursive(boolean recursive) { + this.recursive = recursive; + return this; + } + + @Override + public List get() { + try { + return loadData(ref, filePath, pattern, recursive); + } + catch (GitLabApiException e) { + throw new RuntimeException("Failed to load files from GitLab", e); + } + } + + /** + * Load files from GitLab repository. + * @param ref Git reference (branch, tag, or commit) + * @param filePath File path to load (optional) + * @param pattern File pattern to filter (optional) + * @param recursive Whether to read recursively + * @return List of documents + * @throws GitLabApiException if API call fails + */ + List loadData(String ref, String filePath, String pattern, boolean recursive) throws GitLabApiException { + try { + if (StringUtils.hasText(filePath)) { + return Collections.singletonList(loadSingleFile(filePath, ref)); + } + + RepositoryApi repositoryApi = gitLabApi.getRepositoryApi(); + List items = repositoryApi.getTree(project.getId(), filePath, ref, recursive); + + List documents = new ArrayList<>(); + for (TreeItem item : items) { + if (TreeItem.Type.BLOB.equals(item.getType())) { + // Apply pattern filter if specified + if (pattern != null && !pattern.isEmpty()) { + String path = item.getPath(); + // Convert glob pattern to regex pattern + String regexPattern = pattern.replace(".", "\\.") // Escape dots + .replace("**", ".*") // Match any characters across + // directories + .replace("*", "[^/]*") // Match any characters except + // directory separator + .replace("?", "."); // Match single character + if (!path.matches(regexPattern)) { + continue; + } + } + documents.add(loadSingleFile(item.getPath(), ref)); + } + } + + return documents; + } + catch (GitLabApiException e) { + throw new RuntimeException("Failed to load repository data from GitLab", e); + } + } + + /** + * Load a single file from the repository. + * @param filePath Path to the file + * @param ref Branch name or commit ID + * @return Document representation of the file + * @throws GitLabApiException if there is an error accessing the GitLab API + */ + private Document loadSingleFile(String filePath, String ref) throws GitLabApiException { + RepositoryFile file = gitLabApi.getRepositoryFileApi().getFile(project.getId(), filePath, ref); + byte[] content = Base64.getDecoder().decode(file.getContent()); + String fileContent = new String(content, StandardCharsets.UTF_8); + + Map metadata = new HashMap<>(); + + // Required fields + metadata.put("file_path", file.getFilePath()); + metadata.put("file_name", file.getFileName()); + metadata.put("size", content.length); + metadata.put("url", String.format("%s/repository/files/%s/raw", projectUrl, + StringUtils.replace(file.getFilePath(), "/", "%2F"))); + + // Optional fields, only add if not empty + if (file.getLastCommitId() != null) { + metadata.put("last_commit_id", file.getLastCommitId()); + } + + if (file.getRef() != null) { + metadata.put("ref", file.getRef()); + } + + if (file.getContentSha256() != null) { + metadata.put("content_sha256", file.getContentSha256()); + } + + return new Document(file.getBlobId(), fileContent, metadata); + } + +} \ No newline at end of file diff --git a/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabScope.java b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabScope.java new file mode 100644 index 00000000..a5a8b2f4 --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/main/java/com/alibaba/cloud/ai/reader/gitlab/GitLabScope.java @@ -0,0 +1,50 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +/** + * Scope enum. Used to determine the scope of the issue. + * + * @author brianxiadong + */ +public enum GitLabScope { + + /** + * Issues created by the authenticated user + */ + CREATED_BY_ME("created_by_me"), + + /** + * Issues assigned to the authenticated user + */ + ASSIGNED_TO_ME("assigned_to_me"), + + /** + * All issues + */ + ALL("all"); + + private final String value; + + GitLabScope(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + +} diff --git a/community/document-readers/gitlab-document-reader/src/test/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueReaderTest.java b/community/document-readers/gitlab-document-reader/src/test/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueReaderTest.java new file mode 100644 index 00000000..0938e6aa --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/test/java/com/alibaba/cloud/ai/reader/gitlab/GitLabIssueReaderTest.java @@ -0,0 +1,121 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +import org.gitlab4j.api.GitLabApiException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.ai.document.Document; + +import java.time.LocalDateTime; +import java.util.Arrays; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Test cases for GitLabIssueReader. Using real issues from Spring AI project + * (https://gitlab.com/spring-ai/spring-ai). + * + * @author brianxiadong + */ +class GitLabIssueReaderTest { + + private static final String TEST_HOST_URL = "https://gitlab.com"; + + private static final String TEST_NAMESPACE = ""; + + private static final String TEST_PROJECT_NAME = ""; + + private GitLabIssueReader reader; + + @BeforeEach + void setUp() throws GitLabApiException { + // Create GitLabIssueReader instance for accessing public project + reader = new GitLabIssueReader(TEST_HOST_URL, TEST_NAMESPACE, TEST_PROJECT_NAME); + } + + @Test + void testGetIssuesWithDefaultParameters() { + // Get all open issues directly + List documents = reader.get(); + + // Verify results + assertThat(documents).isNotEmpty(); + + // Verify basic structure of first document + Document doc = documents.get(0); + assertThat(doc.getId()).isNotNull(); + assertThat(doc.getText()).isNotBlank(); + assertThat(doc.getMetadata()).containsKey("state").containsKey("url"); + + // Verify default state is OPEN + assertThat(doc.getMetadata().get("state")).isEqualTo("opened"); + } + + @Test + void testLoadDataWithCustomParameters() throws GitLabApiException { + // Create new reader instance with custom parameters + GitLabIssueConfig config = GitLabIssueConfig.builder() + .confidential(false) + .createdAfter(LocalDateTime.now().minusDays(365)) + .issueType(GitLabIssueType.ISSUE) + .labels(Arrays.asList("enhancement", "feature")) + .nonArchived(true) + .scope(GitLabScope.ALL) + .state(GitLabIssueState.CLOSED) + .build(); + + reader = new GitLabIssueReader(TEST_HOST_URL, TEST_NAMESPACE, TEST_PROJECT_NAME, null, config); + + // Get issues + List documents = reader.get(); + + // Verify results + assertThat(documents).isNotEmpty(); + + // Verify all documents match our filter criteria + for (Document doc : documents) { + assertThat(doc.getMetadata()).containsEntry("state", "closed").hasEntrySatisfying("labels", labels -> { + @SuppressWarnings("unchecked") + List labelList = (List) labels; + assertThat(labelList).containsAnyOf("enhancement", "feature"); + }).containsKey("url"); + + // Verify document content + assertThat(doc.getId()).isNotNull(); + assertThat(doc.getText()).isNotBlank(); + } + } + + @Test + void testLoadSpecificIssue() throws GitLabApiException { + // Create configuration to get specific issue + GitLabIssueConfig config = GitLabIssueConfig.builder().iids(Arrays.asList(1)).build(); + + reader = new GitLabIssueReader(TEST_HOST_URL, TEST_NAMESPACE, TEST_PROJECT_NAME, null, config); + + // Get specific issue (#1) + List documents = reader.get(); + + // Verify results + assertThat(documents).hasSize(1); + Document doc = documents.get(0); + assertThat(doc.getId()).isEqualTo("1"); + assertThat(doc.getMetadata()).containsKey("state").containsKey("url"); + } + +} diff --git a/community/document-readers/gitlab-document-reader/src/test/java/com/alibaba/cloud/ai/reader/gitlab/GitLabRepositoryReaderTest.java b/community/document-readers/gitlab-document-reader/src/test/java/com/alibaba/cloud/ai/reader/gitlab/GitLabRepositoryReaderTest.java new file mode 100644 index 00000000..1b1de7a1 --- /dev/null +++ b/community/document-readers/gitlab-document-reader/src/test/java/com/alibaba/cloud/ai/reader/gitlab/GitLabRepositoryReaderTest.java @@ -0,0 +1,163 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.gitlab; + +import org.gitlab4j.api.GitLabApiException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.ai.document.Document; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Test cases for GitLabRepositoryReader. Using real repository from Spring AI project + * (https://gitlab.com/spring-ai/spring-ai). + * + * @author brianxiadong + */ +class GitLabRepositoryReaderTest { + + private static final String TEST_HOST_URL = "https://gitlab.com"; + + private static final String TEST_NAMESPACE = ""; + + private static final String TEST_PROJECT_NAME = ""; + + private static final String TEST_REF = "master"; + + private static final String TEST_FILE_PATH = "README.md"; + + private GitLabRepositoryReader reader; + + @BeforeEach + void setUp() throws GitLabApiException { + // Create GitLabRepositoryReader instance for accessing public project + reader = new GitLabRepositoryReader(TEST_HOST_URL, TEST_NAMESPACE, TEST_PROJECT_NAME); + } + + @Test + void testGetSingleFile() { + // Configure reader to load a single file + List documents = reader.setRef(TEST_REF).setFilePath(TEST_FILE_PATH).get(); + + // Verify results + assertThat(documents).hasSize(1); + Document doc = documents.get(0); + assertThat(doc.getId()).isNotNull(); + assertThat(doc.getContent()).isNotBlank(); + assertThat(doc.getMetadata()).containsKey("file_path") + .containsKey("file_name") + .containsKey("url") + .containsKey("ref"); + } + + @Test + void testGetAllFiles() { + // Configure reader to load all files from root directory + List documents = reader.setRef(TEST_REF).setRecursive(true).get(); + + // Verify results + assertThat(documents).isNotEmpty(); + + // Verify each document has required metadata + for (Document doc : documents) { + assertThat(doc.getId()).isNotNull(); + assertThat(doc.getText()).isNotBlank(); + assertThat(doc.getMetadata()).containsKey("file_path") + .containsKey("file_name") + .containsKey("url") + .containsKey("ref"); + } + } + + @Test + void testGetMarkdownFiles() { + // Configure reader to load only markdown files + List documents = reader.setRef(TEST_REF).setPattern("*.md").setRecursive(true).get(); + + // Verify results + assertThat(documents).isNotEmpty(); + + // Verify all documents are markdown files + for (Document doc : documents) { + assertThat(doc.getId()).isNotNull(); + assertThat(doc.getText()).isNotBlank(); + assertThat(doc.getMetadata()).containsKey("file_path") + .containsKey("file_name") + .containsKey("url") + .containsKey("ref"); + + String filePath = (String) doc.getMetadata().get("file_path"); + assertThat(filePath).endsWith(".md"); + } + } + + @Test + void testGetFilesInDirectory() { + // Configure reader to load files from a specific directory + List documents = reader.setRef(TEST_REF).setFilePath("docs").setRecursive(true).get(); + + // Verify results + assertThat(documents).isNotEmpty(); + + // Verify all files are from the docs directory + for (Document doc : documents) { + String filePath = (String) doc.getMetadata().get("file_path"); + assertThat(filePath).startsWith("docs/"); + } + } + + @Test + void testGetFilesWithComplexPattern() { + // Configure reader to load Java files from src directory and its subdirectories + List documents = reader.setRef(TEST_REF).setPattern("src/**/*.java").setRecursive(true).get(); + + // Verify results + assertThat(documents).isNotEmpty(); + + // Verify all files match the pattern + for (Document doc : documents) { + String filePath = (String) doc.getMetadata().get("file_path"); + assertThat(filePath).startsWith("src/").endsWith(".java"); + } + } + + @Test + void testGetFilesWithMetadata() { + // Configure reader to load a single file and check metadata + List documents = reader.setRef(TEST_REF).setFilePath(TEST_FILE_PATH).get(); + + // Verify results + assertThat(documents).hasSize(1); + Document doc = documents.get(0); + + // Verify required metadata fields + assertThat(doc.getMetadata()).containsKey("file_path") + .containsKey("file_name") + .containsKey("size") + .containsKey("url") + .containsKey("ref"); + + // Verify metadata values + assertThat(doc.getMetadata().get("file_path")).isEqualTo(TEST_FILE_PATH); + assertThat(doc.getMetadata().get("file_name")).isEqualTo("README.md"); + assertThat(doc.getMetadata().get("size")).isInstanceOf(Integer.class); + assertThat(doc.getMetadata().get("url")).asString().contains(TEST_FILE_PATH); + } + +} diff --git a/pom.xml b/pom.xml index e6021eb2..6b9d4660 100644 --- a/pom.xml +++ b/pom.xml @@ -67,6 +67,7 @@ community/document-readers/arxiv-document-reader community/document-readers/chatgpt-data-document-reader community/document-readers/gpt-repo-document-reader + community/document-readers/gitlab-document-reader community/document-readers/gitbook-document-reader community/document-readers/huggingface-fs-document-reader