Skip to content

Commit

Permalink
Fix parsing issue when channel image is before channel description - #55
Browse files Browse the repository at this point in the history
  • Loading branch information
prof18 committed Jan 4, 2020
1 parent 8abd156 commit d028006
Show file tree
Hide file tree
Showing 7 changed files with 377 additions and 74 deletions.
3 changes: 2 additions & 1 deletion rssparser/src/main/java/com/prof/rssparser/Image.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ package com.prof.rssparser
data class Image(
var title: String? = null,
var url: String? = null,
var link: String? = null
var link: String? = null,
var description: String? = null
)
170 changes: 97 additions & 73 deletions rssparser/src/main/java/com/prof/rssparser/core/CoreXMLParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package com.prof.rssparser.core

import android.util.Log
import com.prof.rssparser.Article
import com.prof.rssparser.Channel
import com.prof.rssparser.Image
Expand Down Expand Up @@ -62,32 +61,42 @@ object CoreXMLParser {
if (eventType == XmlPullParser.START_TAG) {
if (xmlPullParser.name.equals(RSSKeywords.RSS_CHANNEL, ignoreCase = true)) {
insideChannel = true
insideItem = false
insideChannelImage = false

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM, ignoreCase = true)) {
insideItem = true
insideChannel = false
insideChannelImage = false

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_CHANNEL_IMAGE, ignoreCase = true)) {
insideItem = false
insideChannel = false
insideChannelImage = true
channelImage = Image()

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_TITLE, ignoreCase = true)) {
when {
insideChannel -> channelTitle = xmlPullParser.nextText().trim()
insideChannelImage -> channelImage?.title = xmlPullParser.nextText().trim()
insideItem -> currentArticle.title = xmlPullParser.nextText().trim()
if (insideChannel) {
when {
insideChannelImage -> {
channelImage?.title = xmlPullParser.nextText().trim()
}
insideItem -> {
currentArticle.title = xmlPullParser.nextText().trim()
}
else -> {
channelTitle = xmlPullParser.nextText().trim()
}
}
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_LINK, ignoreCase = true)) {
when {
insideChannel -> channelLink = xmlPullParser.nextText().trim()
insideChannelImage -> channelImage?.link = xmlPullParser.nextText().trim()
insideItem -> currentArticle.link = xmlPullParser.nextText().trim()
if (insideChannel) {
when {
insideChannelImage -> {
channelImage?.link = xmlPullParser.nextText().trim()
}
insideItem -> {
currentArticle.link = xmlPullParser.nextText().trim()
}
else -> {
channelLink = xmlPullParser.nextText().trim()
}
}
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_AUTHOR, ignoreCase = true)) {
Expand All @@ -108,88 +117,103 @@ object CoreXMLParser {
} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_URL, ignoreCase = true)) {
if (insideChannelImage) {
channelImage?.url = xmlPullParser.nextText().trim()
Log.d("PARSER", "")
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_ENCLOSURE, ignoreCase = true)) {
if (insideItem) {
val type = xmlPullParser.getAttributeValue(null, RSSKeywords.RSS_ITEM_TYPE)
if (type != null && type.contains("image/")) {
if (type != null && type.contains("image")) {
currentArticle.image = xmlPullParser.getAttributeValue(null, RSSKeywords.RSS_ITEM_URL)
} else {
// let's try if there is the url
val url = xmlPullParser.getAttributeValue(null, RSSKeywords.RSS_ITEM_URL)
if (url != null) {
currentArticle.image = url
}
}
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_DESCRIPTION, ignoreCase = true)) {
if (insideChannel) {
channelDescription = xmlPullParser.nextText().trim()
} else if (insideItem) {
val description = xmlPullParser.nextText()
currentArticle.description = description.trim()
if (currentArticle.image == null) {
currentArticle.image = getImageUrl(description)
}
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_CONTENT, ignoreCase = true)) {
if (insideItem) {
val content = xmlPullParser.nextText().trim()
currentArticle.content = content
if (currentArticle.image == null) {
currentArticle.image = getImageUrl(content)
if (insideItem) {
val description = xmlPullParser.nextText()
currentArticle.description = description.trim()
if (currentArticle.image == null) {
currentArticle.image = getImageUrl(description)
}
} else if (insideChannelImage) {
channelImage?.description = xmlPullParser.nextText().trim()
} else {
channelDescription = xmlPullParser.nextText().trim()
}
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_PUB_DATE, ignoreCase = true)) {
if (insideItem) {
val nextTokenType = xmlPullParser.next()
if (nextTokenType == XmlPullParser.TEXT) {
currentArticle.pubDate = xmlPullParser.text.trim()
}
// Skip to be able to find date inside 'tag' tag
continue
} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_CONTENT, ignoreCase = true)) {
if (insideItem) {
val content = xmlPullParser.nextText().trim()
currentArticle.content = content
if (currentArticle.image == null) {
currentArticle.image = getImageUrl(content)
}
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_TIME, ignoreCase = true)) {
if (insideItem) {
currentArticle.pubDate = xmlPullParser.nextText()
} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_PUB_DATE, ignoreCase = true)) {
if (insideItem) {
val nextTokenType = xmlPullParser.next()
if (nextTokenType == XmlPullParser.TEXT) {
currentArticle.pubDate = xmlPullParser.text.trim()
}
// Skip to be able to find date inside 'tag' tag
continue
}

} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_GUID, ignoreCase = true)) {
if (insideItem) {
currentArticle.guid = xmlPullParser.nextText().trim()
}
} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_TIME, ignoreCase = true)) {
if (insideItem) {
currentArticle.pubDate = xmlPullParser.nextText()
}

} else if (eventType == XmlPullParser.END_TAG && xmlPullParser.name.equals("item", ignoreCase = true)) {
// The item is correctly parsed
insideItem = false
articleList.add(currentArticle)
currentArticle = Article()
} else if (xmlPullParser.name.equals(RSSKeywords.RSS_ITEM_GUID, ignoreCase = true)) {
if (insideItem) {
currentArticle.guid = xmlPullParser.nextText().trim()
}
}
eventType = xmlPullParser.next()

} else if (eventType == XmlPullParser.END_TAG && xmlPullParser.name.equals(RSSKeywords.RSS_ITEM, ignoreCase = true)) {
// The item is correctly parsed
insideItem = false
articleList.add(currentArticle)
currentArticle = Article()
} else if (eventType == XmlPullParser.END_TAG && xmlPullParser.name.equals(RSSKeywords.RSS_CHANNEL, ignoreCase = true)) {
// The channel is correctly parsed
insideChannel = false
} else if (eventType == XmlPullParser.END_TAG && xmlPullParser.name.equals(RSSKeywords.RSS_CHANNEL_IMAGE, ignoreCase = true)) {
// The channel image is correctly parsed
insideChannelImage = false
}
return Channel(channelTitle, channelLink, channelDescription, channelImage, articleList)
eventType = xmlPullParser.next()
}
return Channel(channelTitle, channelLink, channelDescription, channelImage, articleList)
}

/**
* Finds the first img tag and get the src as featured image
*
* @param input The content in which to search for the tag
* @return The url, if there is one
*/
private fun getImageUrl(input: String): String? {

var url: String? = null
val patternImg = Pattern.compile("(<img .*?>)")
val matcherImg = patternImg.matcher(input)
if (matcherImg.find()) {
val imgTag = matcherImg.group(1)
val patternLink = Pattern.compile("src\\s*=\\s*\"(.+?)\"")
val matcherLink = patternLink.matcher(imgTag)
if (matcherLink.find()) {
url = matcherLink.group(1).trim()
}
/**
* Finds the first img tag and get the src as featured image
*
* @param input The content in which to search for the tag
* @return The url, if there is one
*/
private fun getImageUrl(input: String): String? {

var url: String? = null
val patternImg = Pattern.compile("(<img .*?>)")
val matcherImg = patternImg.matcher(input)
if (matcherImg.find()) {
val imgTag = matcherImg.group(1)
val patternLink = Pattern.compile("src\\s*=\\s*\"(.+?)\"")
val matcherLink = patternLink.matcher(imgTag)
if (matcherLink.find()) {
url = matcherLink.group(1).trim()
}
return url
}
return url
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ class CoreXMLParserImage2FeedTest {
assertEquals(channel.image?.url, "https://www.mundodeportivo.com/rsc/images/logo_MD_feed.png")
}

@Test
fun channelImageDescription_isCorrect() {
assertEquals(channel.image?.description, "Mundo Deportivo es tu diario deportivo On Line. Noticias de deporte, fútbol y del Barça")
}

@Test
@Throws
fun size_isCorrect() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package com.prof18.rssparser

import android.os.Build
import com.prof.rssparser.Article
import com.prof.rssparser.Channel
import com.prof.rssparser.core.CoreXMLParser
import org.junit.Assert
import org.junit.Before
import org.junit.Test
import org.junit.runner.RunWith
import org.robolectric.RobolectricTestRunner
import org.robolectric.annotation.Config

@RunWith(RobolectricTestRunner::class)
@Config(sdk = [Build.VERSION_CODES.P])
class CoreXMLParserImageChannelReverseTest {
private lateinit var articleList: MutableList<Article>
private lateinit var article: Article
private val feedPath = "/feed-test-image-channel-reverse.xml"
private lateinit var channel: Channel

@Before
fun setUp() {
val inputStream = javaClass.getResourceAsStream(feedPath)!!
val feed = inputStream.bufferedReader().use { it.readText() }
channel = CoreXMLParser.parseXML(feed)
articleList = channel.articles
article = articleList[0]
}

@Test
fun channelTitle_isCorrect() {
Assert.assertEquals(channel.title, "The Joe Rogan Experience")
}

@Test
fun channelDesc_isCorrect() {
Assert.assertEquals(channel.description, "The podcast of Comedian Joe Rogan..")
}

@Test
fun channelLink_isCorrect() {
Assert.assertEquals(channel.link, "https://www.joerogan.com")
}

@Test
fun channelImageTitle_isCorrect() {
Assert.assertEquals(channel.image?.title, "The Joe Rogan Experience")
}

@Test
fun channelImageLink_isCorrect() {
Assert.assertEquals(channel.image?.link, "https://www.joerogan.com")
}

@Test
fun channelImageUrl_isCorrect() {
Assert.assertEquals(channel.image?.url, "http://static.libsyn.com/p/assets/7/1/f/3/71f3014e14ef2722/JREiTunesImage2.jpg")
}

@Test
fun channelImageDescription_isCorrect() {
Assert.assertNull(channel.image?.description)
}

@Test
@Throws
fun size_isCorrect() {
Assert.assertEquals(articleList.size, 6)
}


@Test
@Throws
fun title_isCorrect() {
Assert.assertEquals(article.title, "#1405 - Sober October 3 Recap")
}

@Test
@Throws
fun author_isCorrect() {
Assert.assertEquals(article.author, null)
}

@Test
@Throws
fun link_isCorrect() {
Assert.assertEquals(article.link, "http://traffic.libsyn.com/joeroganexp/p1405.mp3")
}

@Test
@Throws
fun pubDate_isCorrect() {
Assert.assertEquals(article.pubDate, "Tue, 24 Dec 2019 20:00:00 +0000")
}

@Test
@Throws
fun description_isPresent() {
Assert.assertEquals(article.description, "Joe is joined by Ari Shaffir, Bert Kreischer & Tom Segura to recap their 3rd annual Sober October challenge.")
}

@Test
@Throws
fun content_isCorrect() {
Assert.assertEquals(article.content, "Joe is joined by Ari Shaffir, Bert Kreischer & Tom Segura to recap their 3rd annual Sober October challenge.")
}

@Test
@Throws
fun image_isCorrect() {
Assert.assertEquals(article.image, "http://traffic.libsyn.com/joeroganexp/p1405.mp3?dest-id=19997")
}

@Test
@Throws
fun categories_isCorrect() {
assert(article.categories.isEmpty())
}

@Test
@Throws
fun guid_isCorrect() {
Assert.assertEquals(article.guid, "0d7147a3-f1c1-4ae6-bbf8-2e0a493639ca")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import android.os.Build
import com.prof.rssparser.Article
import com.prof.rssparser.Channel
import com.prof.rssparser.core.CoreXMLParser
import org.junit.Assert
import org.junit.Assert.*
import org.junit.Before
import org.junit.Test
Expand Down Expand Up @@ -75,6 +76,11 @@ class CoreXMLParserImageFeedTest {
assertEquals(channel.image?.url, "https://cdn.movieweb.com/assets/1/sites/movieweb.com/chrome-touch-icon-192x192.png")
}

@Test
fun channelImageDescription_isCorrect() {
assertNull(channel.image?.description)
}

@Test
@Throws
fun size_isCorrect() {
Expand Down
Loading

0 comments on commit d028006

Please sign in to comment.