Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import org.scalajs.linker.interface.ModuleSplitStyle

import scala.sys.process.*

lazy val projectVersion = "2.3.2"
lazy val projectVersion = "2.3.3"
lazy val organizationName = "ru.trett"
lazy val scala3Version = "3.7.4"
lazy val circeVersion = "0.14.15"
Expand Down Expand Up @@ -77,15 +77,14 @@ lazy val server = project
dockerRepository := sys.env.get("REGISTRY"),
dockerExposedPorts := Seq(8080),
watchSources ++= (client / Compile / watchSources).value,
Compile / compile := ((Compile / compile).dependsOn(client / Compile / fastLinkJS)).value,
Compile / compile := (Compile / compile).dependsOn(client / Compile / fastLinkJS).value,
javaOptions += "-Dotel.java.global-autoconfigure.enabled=true",
libraryDependencies ++= Seq(
"org.typelevel" %% "cats-effect" % "3.6.3",
"org.slf4j" % "slf4j-api" % "2.0.17",
"ch.qos.logback" % "logback-classic" % "1.5.21",
"org.flywaydb" % "flyway-core" % "11.17.2",
"com.github.pureconfig" %% "pureconfig-core" % "0.17.9",
"com.rometools" % "rome" % "2.1.0"
"com.github.pureconfig" %% "pureconfig-core" % "0.17.9"
),
libraryDependencies ++= Seq(
"org.http4s" %% "http4s-ember-server",
Expand Down
1 change: 1 addition & 0 deletions client/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion scripts/local-docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ services:
- host.docker.internal:host-gateway

server:
image: server:2.3.2
image: server:2.3.3
container_name: rss_server
restart: always
depends_on:
Expand Down
188 changes: 188 additions & 0 deletions server/src/main/scala/ru/trett/rss/server/parser/Atom_1_0_Parser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
package ru.trett.rss.server.parser

import ru.trett.rss.server.models.Channel
import ru.trett.rss.server.models.Feed

import java.time.OffsetDateTime
import java.time.format.DateTimeFormatter
import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.ChronoField
import javax.xml.stream.XMLEventReader
import javax.xml.stream.events.EndElement
import javax.xml.stream.events.StartElement

import scala.annotation.tailrec
import scala.util.Try
import org.typelevel.log4cats.Logger
import org.typelevel.log4cats.syntax.*
import cats.effect.Sync
import cats.syntax.all.*
import scala.collection.mutable.ListBuffer

class Atom_1_0_Parser[F[_]: Sync: Logger] extends FeedParser[F, XMLEventReader]:

private lazy val formatRfc3339: DateTimeFormatter = new DateTimeFormatterBuilder()
.appendPattern("yyyy-MM-dd'T'HH:mm:ss")
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
.appendPattern("XX")
.toFormatter()

private case class FeedState(
title: String = "",
hasFeed: Boolean = false,
entries: ListBuffer[Feed] = ListBuffer.empty
)

private case class EntryState(
title: String = "",
link: String = "",
summary: String = "",
content: String = "",
updated: Option[OffsetDateTime] = None,
published: Option[OffsetDateTime] = None
)

private[parser] def parse(
eventReader: XMLEventReader,
link: String
): F[Either[ParserError, Channel]] =
@tailrec
def loop(state: FeedState): FeedState =
if (!eventReader.hasNext) state
else
eventReader.nextEvent() match {
case el: StartElement =>
el.getName.getLocalPart match {
case "feed" => loop(state.copy(hasFeed = true))
case "entry" =>
state.entries += parseEntry(eventReader)
loop(state)
case "title" => loop(state.copy(title = readElementText(eventReader)))
case _ => loop(state)
}
case _ => loop(state)
}

for
_ <- info"Starting to parse Atom 1.0 feed from link: $link"
finalState <- Sync[F].interruptible(loop(FeedState()))
_ <-
info"Parsed ${finalState.entries.length} entries from Atom 1.0 feed: ${finalState.title}"
result =
if (finalState.hasFeed)
Right(Channel(0L, finalState.title, link, finalState.entries.toList))
else Left(ParserError.InvalidFeed("Missing <feed> element"))
yield result

private def parseEntry(eventReader: XMLEventReader): Feed =
@tailrec
def loop(state: EntryState): EntryState =
if (!eventReader.hasNext) state
else
eventReader.nextEvent() match {
case el: StartElement =>
val namespace = el.getName.getNamespaceURI
val isAtomNamespace =
namespace == "http://www.w3.org/2005/Atom" || namespace == ""

if (!isAtomNamespace) {
skipElement(eventReader)
loop(state)
} else {
el.getName.getLocalPart match {
case "title" if state.title.isEmpty =>
loop(state.copy(title = readElementText(eventReader)))
case "link" =>
val (href, rel) = extractLinkAttributes(el)
if (
state.link.isEmpty && (rel == "alternate" || rel == "self" || rel.isEmpty)
)
loop(state.copy(link = href))
else loop(state)
case "summary" =>
loop(state.copy(summary = readElementText(eventReader)))
case "content" =>
loop(state.copy(content = readElementText(eventReader)))
case "updated" =>
loop(
state
.copy(updated = parseDate(readElementText(eventReader)))
)
case "published" =>
loop(
state.copy(published =
parseDate(readElementText(eventReader))
)
)
case _ =>
skipElement(eventReader)
loop(state)
}
}
case el: EndElement if el.getName.getLocalPart == "entry" => state
case _ => loop(state)
}

val finalState = loop(EntryState())
val description =
if (finalState.content.nonEmpty) finalState.content else finalState.summary
val pubDate =
finalState.updated.orElse(finalState.published).orElse(Some(OffsetDateTime.now()))
Feed(finalState.link, "", 0L, finalState.title, description, pubDate, false)

private def skipElement(eventReader: XMLEventReader): Unit =
@tailrec
def loop(depth: Int): Unit =
if (eventReader.hasNext && depth > 0) {
eventReader.nextEvent() match {
case _: StartElement => loop(depth + 1)
case _: EndElement => loop(depth - 1)
case _ => loop(depth)
}
}
loop(1)

private def readElementText(eventReader: XMLEventReader): String =
@tailrec
def loop(depth: Int, textBuffer: StringBuilder): String =
if (!eventReader.hasNext || depth <= 0) textBuffer.toString().trim()
else
eventReader.nextEvent() match {
case _: StartElement => loop(depth + 1, textBuffer)
case _: EndElement => loop(depth - 1, textBuffer)
case el if el.isCharacters =>
val text = el.asCharacters().getData
if (depth == 1 && text.trim().nonEmpty) loop(depth, textBuffer.append(text))
else loop(depth, textBuffer)
case _ => loop(depth, textBuffer)
}

loop(1, new StringBuilder())

private def extractLinkAttributes(startElement: StartElement): (String, String) =
val attributes = startElement.getAttributes

@tailrec
def loop(href: String, rel: String): (String, String) =
if (!attributes.hasNext) (href, rel)
else {
val attr = attributes.next()
attr.getName.getLocalPart match {
case "href" => loop(attr.getValue, rel)
case "rel" => loop(href, attr.getValue)
case _ => loop(href, rel)
}
}

loop("", "")

private def parseDate(dateStr: String): Option[OffsetDateTime] =
if (dateStr.isEmpty) None
else
Try(OffsetDateTime.parse(dateStr, formatRfc3339))
.orElse(Try(OffsetDateTime.parse(dateStr)))
.toOption

object Atom_1_0_Parser:
def make[F[_]: Sync: Logger]: FeedParser[F, XMLEventReader] =
new Atom_1_0_Parser[F]
20 changes: 20 additions & 0 deletions server/src/main/scala/ru/trett/rss/server/parser/FeedParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ru.trett.rss.server.parser

import javax.xml.stream.XMLEventReader
import cats.effect.Sync
import org.typelevel.log4cats.Logger
import ru.trett.rss.server.models.Channel

trait FeedParser[F[_], -A] {
private[parser] def parse(reader: A, link: String): F[Either[ParserError, Channel]]
}

trait FeedParserRegistry[F[_]]:
def get(parserType: ParserType): Option[FeedParser[F, XMLEventReader]]

object FeedParserRegistry:
given default[F[_]](using Sync[F], Logger[F]): FeedParserRegistry[F] with
override def get(parserType: ParserType): Option[FeedParser[F, XMLEventReader]] =
parserType match
case ParserType.Rss20 => Some(Rss_2_0_Parser.make[F])
case ParserType.Atom10 => Some(Atom_1_0_Parser.make[F])
91 changes: 91 additions & 0 deletions server/src/main/scala/ru/trett/rss/server/parser/Parser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package ru.trett.rss.server.parser

import javax.xml.stream.{XMLInputFactory, XMLEventReader}
import javax.xml.stream.events.StartElement
import cats.effect.{Async, Resource}
import cats.syntax.all._
import fs2.Stream
import org.typelevel.log4cats.Logger
import ru.trett.rss.server.models.Channel
import scala.annotation.tailrec
import java.io.InputStream

object Parser {
private val xmlInputFactory: XMLInputFactory = {
val factory = XMLInputFactory.newInstance()
factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false)
factory.setProperty(XMLInputFactory.SUPPORT_DTD, false)
factory
}

def parse[F[_]: Async](input: Stream[F, Byte], link: String)(using
logger: Logger[F],
registry: FeedParserRegistry[F]
): F[Either[ParserError, Channel]] = {
def findRootElement(reader: XMLEventReader): Option[StartElement] = {
@tailrec
def loop(): Option[StartElement] = {
if (!reader.hasNext) None
else {
reader.peek() match {
case start: StartElement => Some(start)
case _ =>
reader.nextEvent()
loop()
}
}
}

loop()
}

def createReader(inputStream: InputStream): Resource[F, XMLEventReader] =
Resource.make(Async[F].blocking(xmlInputFactory.createXMLEventReader(inputStream)))(
reader =>
Async[F]
.blocking(reader.close())
.handleError(err => logger.error(err)(err.getMessage))
)

input
.through(fs2.io.toInputStream)
.evalMap { is =>
Resource
.fromAutoCloseable(Async[F].blocking(is))
.use { inputStream =>
createReader(inputStream).use { reader =>
Async[F].blocking(findRootElement(reader)).flatMap {
case Some(el) =>
parseChannel(link, registry, reader, el.getName.getLocalPart)
case None => Async[F].pure(Left(ParserError.NoRootElement))
}
}
}
.handleError(e => Left(ParserError.ParseFailure(e)))
}
.compile
.last
.map(_.getOrElse(Left(ParserError.EmptyFeed)))
}

private def parseChannel[F[_]: Async](
link: String,
registry: FeedParserRegistry[F],
reader: XMLEventReader,
rootName: String
) =
ParserType.fromRoot(rootName) match {
case Some(parserType) =>
registry.get(parserType) match {
case Some(feedParser) =>
feedParser.parse(reader, link).handleError { e =>
Left(ParserError.ParseFailure(e))
}
case None =>
Async[F].pure(Left(ParserError.UnsupportedFormat(rootName)))
}
case None =>
Async[F].pure(Left(ParserError.UnsupportedFormat(rootName)))
}

}
22 changes: 22 additions & 0 deletions server/src/main/scala/ru/trett/rss/server/parser/ParserError.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package ru.trett.rss.server.parser

sealed trait ParserError extends Throwable:
def message: String
override def getMessage: String = message

object ParserError:
final case class UnsupportedFormat(format: String) extends ParserError:
override val message: String = s"Unsupported feed format: $format"

final case class InvalidFeed(reason: String) extends ParserError:
override val message: String = s"Invalid feed: $reason"

final case class ParseFailure(cause: Throwable) extends ParserError:
override val message: String = s"Failed to parse feed: ${cause.getMessage}"
override def getCause: Throwable = cause

case object NoRootElement extends ParserError:
override val message: String = "No root element found in feed"

case object EmptyFeed extends ParserError:
override val message: String = "Feed appears to be empty"
20 changes: 20 additions & 0 deletions server/src/main/scala/ru/trett/rss/server/parser/ParserType.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ru.trett.rss.server.parser

sealed trait ParserType {
def rootElement: String
}

object ParserType {
case object Rss20 extends ParserType {
val rootElement: String = "rss"
}

case object Atom10 extends ParserType {
val rootElement: String = "feed"
}

val values: List[ParserType] = List(Rss20, Atom10)

def fromRoot(root: String): Option[ParserType] =
values.find(_.rootElement.equalsIgnoreCase(root))
}
Loading