ianhenderson.org / 2023 october 4

how the RSS feed for this site works

this site now has an RSS feed at http://feed.ianhenderson.org/rss. the main design goal was to avoid any extra work outside of just editing HTML files. i'm not interested in making a "static site generator" with a build step, and i don't want to have to update the RSS feed by hand on every post.

the solution i arrived at is to derive the RSS from the HTML of the front page. whenever a request for the RSS feed arrives, the HTML is downloaded, the <a> elements are enumerated, and the ones with a feed-published-at attribute are included as items in the feed. to publish a post, i only have to add a link to it on the front page (which i would do anyway) and include the feed-published-at attribute. if i want to add more information to the feed item in the future (like a description or media enclosure), that can be done with additional feed- attributes.

one technical note is that feed-published-at is not a standard HTML attribute. validators will complain about it. but i couldn't find an alternative i liked. one option would be to use data-* attributes (e.g. data-published-at), but these are meant to be private to the page. it feels weird to me to have an external feed generator program working with data-* attributes. microdata requires you to put the data as text content on the page or add extraneous <data> elements, both of which also feel bad to me.

anyway, here's the complete source code to the feed generator as originally written. save it in a directory on its own as (e.g.) feed.go, then run go mod init the-name-doesnt-matter-for-go-run-so-put-whatever-you-want-here (i hate how go makes you do this btw), go mod tidy, and finally go run feed.go to see it in action. the rss feed will be served at http://127.0.0.1:8002/rss.

// by ian henderson <ian@ianhenderson.org>
// published on 4 october 2023
// this software belongs to a future without copyright.  please use it however you'd like.

package main

import (
    "encoding/xml"
    "fmt"
    "golang.org/x/net/html"
    "log"
    "net/http"
    "net/url"
    "sort"
    "strings"
    "time"
)

// the page to extract the RSS feed from:
const targetURL = "http://ianhenderson.org/"

type rss struct {
    XMLName xml.Name `xml:"rss"`
    Version string `xml:"version,attr"`
    Channel channel `xml:"channel"`
}

type channel struct {
    URL string `xml:"link"`
    Title string `xml:"title"`
    Description string `xml:"description"`
    Language string `xml:"language"`
    Items []item `xml:"item"`
}

type item struct {
    PublishedAt RSSTime `xml:"pubDate"`
    URL string `xml:"link"`
    Title string `xml:"title"`
    GUID string `xml:"guid"`
}

type RSSTime time.Time

func (t RSSTime) MarshalText() ([]byte, error) {
    // date format recommended by https://validator.w3.org/feed/docs/warning/ProblematicalRFC822Date.html
    return []byte(time.Time(t).Format("Mon, 02 Jan 2006 15:04 -0700")), nil
}

func nestedContent(node *html.Node, content []string) []string {
    if node == nil {
        return content
    }
    if node.Type == html.TextNode {
        content = append(content, node.Data)
    }
    content = nestedContent(node.FirstChild, content)
    return nestedContent(node.NextSibling, content)
}

func textContent(node *html.Node) string {
    return strings.Join(nestedContent(node.FirstChild, []string{}), "")
}

func (f *channel) addItems(baseURL *url.URL, node *html.Node) error {
    if node == nil {
        return nil
    }
    if node.Type == html.ElementNode {
        if node.Data == "a" {
            item := item{}
            includeInFeed := false
            for _, attr := range node.Attr {
                if attr.Namespace != "" {
                    continue
                }
                if attr.Key == "feed-published-at" {
                    time, err := time.Parse(time.RFC3339, attr.Val)
                    if err != nil {
                        return err
                    }
                    item.PublishedAt = RSSTime(time)
                    includeInFeed = true
                }
                if attr.Key == "href" {
                    url, err := url.Parse(attr.Val)
                    if err != nil {
                        return err
                    }
                    if !url.IsAbs() {
                        url = baseURL.ResolveReference(url)
                    }
                    item.URL = url.String()
                    item.GUID = item.URL
                }
            }
            if includeInFeed {
                item.Title = textContent(node)
                f.Items = append(f.Items, item)
            }
        } else if node.Data == "title" {
            f.Title = textContent(node)
        } else if node.Data == "html" {
            for _, attr := range node.Attr {
                if attr.Key == "lang" {
                    f.Language = attr.Val
                }
            }
        }
        if err := f.addItems(baseURL, node.FirstChild); err != nil {
            return err
        }
    }
    return f.addItems(baseURL, node.NextSibling)
}

func serveRSS(response http.ResponseWriter, request *http.Request) error {
    targetResponse, err := http.Get(targetURL)
    if err != nil {
        return err
    }
    root, err := html.Parse(targetResponse.Body)
    if err != nil {
        return err
    }
    channel := channel{
        URL: targetURL,
        Description: fmt.Sprintf("the latest links from %s", targetURL),
    }
    baseURL, err := url.Parse(targetURL)
    if err != nil {
        return err
    }
    if err := channel.addItems(baseURL, root.FirstChild); err != nil {
        return err
    }
    sort.Slice(channel.Items, func (i, j int) bool {
        return time.Time(channel.Items[i].PublishedAt).Before(time.Time(channel.Items[j].PublishedAt))
    })
    output, err := xml.MarshalIndent(rss{
        Version: "2.0",
        Channel: channel,
    }, "", "    ")
    if err != nil {
        return err
    }
    response.Header().Set("Content-Type", "application/rss+xml")
    response.Write([]byte(xml.Header))
    response.Write(output)
    return nil
}

type rssHandler struct{}

func (rssHandler) ServeHTTP(response http.ResponseWriter, request *http.Request) {
    if err := serveRSS(response, request); err != nil {
        log.Print(err)
        response.WriteHeader(500)
        fmt.Fprintf(response, "500 internal error")
    }
}

func main() {
    mux := http.NewServeMux()
    mux.Handle("/rss", rssHandler{})
    http.ListenAndServe("127.0.0.1:8002", mux)
}