Skip to content

Commit

Permalink
code format
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaohai-78 committed Jan 18, 2025
1 parent f1b87df commit aee0eaa
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 212 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,86 +33,87 @@

/**
* A class for reading and parsing video information and subtitles from Bilibili.
* Implements the DocumentReader interface to provide methods for obtaining document content.
* Implements the DocumentReader interface to provide methods for obtaining document
* content.
*/
public class BilibiliDocumentReader implements DocumentReader {

private static final Logger logger = LoggerFactory.getLogger(BilibiliDocumentReader.class);
private static final String API_BASE_URL = "https://api.bilibili.com/x/web-interface/view?bvid=";
private final String resourcePath;
private final ObjectMapper objectMapper;
private static final WebClient WEB_CLIENT = WebClient.builder()
.defaultHeader(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
.codecs(configurer -> configurer.defaultCodecs().maxInMemorySize(5 * 1024 * 1024))
.build();

public BilibiliDocumentReader(String resourcePath) {
Assert.hasText(resourcePath, "Query string must not be empty");
this.resourcePath = resourcePath;
this.objectMapper = new ObjectMapper();
}

@Override
public List<Document> get() {
List<Document> documents = new ArrayList<>();
try {
String bvid = extractBvid(resourcePath);
String videoInfoResponse = fetchVideoInfo(bvid);
JsonNode videoData = parseJson(videoInfoResponse).path("data");
String title = videoData.path("title").asText();
String description = videoData.path("desc").asText();
Document infoDoc = new Document("Video information", Map.of("title", title, "description", description));
documents.add(infoDoc);
String documentContent = fetchAndProcessSubtitles(videoData, title, description);
documents.add(new Document(documentContent));
} catch (IllegalArgumentException e) {
logger.error("Invalid input: {}", e.getMessage());
documents.add(new Document("Error: Invalid input"));
} catch (IOException e) {
logger.error("Error parsing JSON: {}", e.getMessage(), e);
documents.add(new Document("Error parsing JSON: " + e.getMessage()));
} catch (Exception e) {
logger.error("Unexpected error: {}", e.getMessage(), e);
documents.add(new Document("Unexpected error: " + e.getMessage()));
}
return documents;
}

private String extractBvid(String resourcePath) {
return resourcePath.replaceAll(".*(BV\\w+).*", "$1");
}

private String fetchVideoInfo(String bvid) {
return WEB_CLIENT.get()
.uri(API_BASE_URL + bvid)
.retrieve()
.bodyToMono(String.class)
.block();
}

private JsonNode parseJson(String jsonResponse) throws IOException {
return objectMapper.readTree(jsonResponse);
}

private String fetchAndProcessSubtitles(JsonNode videoData, String title, String description) throws IOException {
JsonNode subtitleList = videoData.path("subtitle").path("list");
if (subtitleList.isArray() && subtitleList.size() > 0) {
String subtitleUrl = subtitleList.get(0).path("subtitle_url").asText();
String subtitleResponse = WEB_CLIENT.get()
.uri(subtitleUrl)
.retrieve()
.bodyToMono(String.class)
.block();

JsonNode subtitleJson = parseJson(subtitleResponse);
StringBuilder rawTranscript = new StringBuilder();
subtitleJson.path("body").forEach(node -> rawTranscript.append(node.path("content").asText()).append(" "));

return String.format("Video Title: %s, Description: %s\nTranscript: %s",
title, description, rawTranscript.toString().trim());
} else {
return String.format("No subtitles found for video: %s. Returning an empty transcript.", resourcePath);
}
}
}
private static final Logger logger = LoggerFactory.getLogger(BilibiliDocumentReader.class);

private static final String API_BASE_URL = "https://api.bilibili.com/x/web-interface/view?bvid=";

private final String resourcePath;

private final ObjectMapper objectMapper;

private static final WebClient WEB_CLIENT = WebClient.builder()
.defaultHeader(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
.codecs(configurer -> configurer.defaultCodecs().maxInMemorySize(5 * 1024 * 1024))
.build();

public BilibiliDocumentReader(String resourcePath) {
Assert.hasText(resourcePath, "Query string must not be empty");
this.resourcePath = resourcePath;
this.objectMapper = new ObjectMapper();
}

@Override
public List<Document> get() {
List<Document> documents = new ArrayList<>();
try {
String bvid = extractBvid(resourcePath);
String videoInfoResponse = fetchVideoInfo(bvid);
JsonNode videoData = parseJson(videoInfoResponse).path("data");
String title = videoData.path("title").asText();
String description = videoData.path("desc").asText();
Document infoDoc = new Document("Video information", Map.of("title", title, "description", description));
documents.add(infoDoc);
String documentContent = fetchAndProcessSubtitles(videoData, title, description);
documents.add(new Document(documentContent));
}
catch (IllegalArgumentException e) {
logger.error("Invalid input: {}", e.getMessage());
documents.add(new Document("Error: Invalid input"));
}
catch (IOException e) {
logger.error("Error parsing JSON: {}", e.getMessage(), e);
documents.add(new Document("Error parsing JSON: " + e.getMessage()));
}
catch (Exception e) {
logger.error("Unexpected error: {}", e.getMessage(), e);
documents.add(new Document("Unexpected error: " + e.getMessage()));
}
return documents;
}

private String extractBvid(String resourcePath) {
return resourcePath.replaceAll(".*(BV\\w+).*", "$1");
}

private String fetchVideoInfo(String bvid) {
return WEB_CLIENT.get().uri(API_BASE_URL + bvid).retrieve().bodyToMono(String.class).block();
}

private JsonNode parseJson(String jsonResponse) throws IOException {
return objectMapper.readTree(jsonResponse);
}

private String fetchAndProcessSubtitles(JsonNode videoData, String title, String description) throws IOException {
JsonNode subtitleList = videoData.path("subtitle").path("list");
if (subtitleList.isArray() && subtitleList.size() > 0) {
String subtitleUrl = subtitleList.get(0).path("subtitle_url").asText();
String subtitleResponse = WEB_CLIENT.get().uri(subtitleUrl).retrieve().bodyToMono(String.class).block();

JsonNode subtitleJson = parseJson(subtitleResponse);
StringBuilder rawTranscript = new StringBuilder();
subtitleJson.path("body").forEach(node -> rawTranscript.append(node.path("content").asText()).append(" "));

return String.format("Video Title: %s, Description: %s\nTranscript: %s", title, description,
rawTranscript.toString().trim());
}
else {
return String.format("No subtitles found for video: %s. Returning an empty transcript.", resourcePath);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
*/
public class BilibiliDocumentReaderTest {

private static final Logger logger = LoggerFactory.getLogger(BilibiliDocumentReader.class);
private static final Logger logger = LoggerFactory.getLogger(BilibiliDocumentReader.class);

@Test
void bilibiliDocumentReaderTest() {
BilibiliDocumentReader bilibiliDocumentReader = new BilibiliDocumentReader(
"https://www.bilibili.com/video/BV1KMwgeKECx/?t=7&vd_source=3069f51b168ac07a9e3c4ba94ae26af5");
List<Document> documents = bilibiliDocumentReader.get();
logger.info("documents: {}", documents);
}

@Test
void bilibiliDocumentReaderTest() {
BilibiliDocumentReader bilibiliDocumentReader = new BilibiliDocumentReader("https://www.bilibili.com/video/BV1KMwgeKECx/?t=7&vd_source=3069f51b168ac07a9e3c4ba94ae26af5");
List<Document> documents = bilibiliDocumentReader.get();
logger.info("documents: {}", documents);
}
}
Loading

0 comments on commit aee0eaa

Please sign in to comment.