1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
use std::pin::Pin;

use super::Crawler;

pub fn default_crawlers() -> Vec<Pin<Box<dyn Crawler>>> {
    vec![Box::pin(frn::FrnCrawler {}), Box::pin(cba::CbaCrawler {})]
}

pub mod util {
    use std::collections::HashMap;
    use url::Url;
    pub fn query_map(url: &Url) -> HashMap<String, String> {
        url.query_pairs().into_owned().collect()
    }

    pub fn set_query_map(url: &mut Url, map: &HashMap<String, String>) {
        url.query_pairs_mut().clear().extend_pairs(map.iter());
    }

    pub fn insert_or_add(map: &mut HashMap<String, String>, key: &str, default: usize, add: usize) {
        if let Some(value) = map.get_mut(key) {
            let num: Result<usize, _> = value.parse();
            let num = num.map_or(default, |num| num + add);
            *value = num.to_string();
        } else {
            map.insert(key.into(), default.to_string());
        }
    }
}

pub mod frn {
    use super::util::{insert_or_add, query_map, set_query_map};
    use crate::rss::{Crawler, FetchedFeedPage, Next};

    pub struct FrnCrawler {}

    #[async_trait::async_trait]
    impl Crawler for FrnCrawler {
        async fn next(&self, feed_page: FetchedFeedPage) -> anyhow::Result<Next> {
            match feed_page.items.len() {
                0 => Ok(Next::Finished),
                _ => {
                    let len = feed_page.items.len();
                    let mut url = feed_page.url;
                    let mut params = query_map(&url);
                    insert_or_add(&mut params, "start", len, len);
                    set_query_map(&mut url, &params);
                    Ok(Next::NextPage(url))
                }
            }
        }

        fn domains(&self) -> Vec<String> {
            vec![
                "freie-radios.net".to_string(),
                "www.freie-radios.net".to_string(),
            ]
        }
    }
}

pub mod cba {
    use super::util::{insert_or_add, query_map, set_query_map};
    use crate::rss::{Crawler, FetchedFeedPage, Next};

    pub struct CbaCrawler {}

    #[async_trait::async_trait]
    impl Crawler for CbaCrawler {
        async fn next(&self, feed_page: FetchedFeedPage) -> anyhow::Result<Next> {
            let len = feed_page.items.len();
            match len {
                0 => Ok(Next::Finished),
                _ => {
                    let mut url = feed_page.url;
                    let mut params = query_map(&url);
                    insert_or_add(&mut params, "offset", len, len);
                    set_query_map(&mut url, &params);
                    Ok(Next::NextPage(url))
                }
            }
        }

        fn domains(&self) -> Vec<String> {
            vec!["cba.media".to_string(), "cba.fro.at".to_string()]
        }
    }
}