11package de .mediathekview .mserver .crawler .zdf ;
22
3+ import de .mediathekview .mlib .daten .Film ;
34import de .mediathekview .mlib .daten .Sender ;
45import de .mediathekview .mlib .messages .listener .MessageListener ;
56import de .mediathekview .mserver .base .config .MServerConfigManager ;
7+ import de .mediathekview .mserver .base .messages .ServerMessages ;
8+ import de .mediathekview .mserver .crawler .basic .AbstractCrawler ;
69import de .mediathekview .mserver .crawler .basic .CrawlerUrlDTO ;
7- import de .mediathekview .mserver .crawler .zdf . tasks . ZdfDayPageHtmlTask ;
8- import de .mediathekview .mserver .crawler .zdf .tasks .ZdfLetterListHtmlTask ;
9- import de .mediathekview .mserver .crawler .zdf .tasks .ZdfTopicPageHtmlTask ;
10- import de .mediathekview .mserver .crawler .zdf .tasks .ZdfTopicsPageHtmlTask ;
10+ import de .mediathekview .mserver .crawler .basic . TopicUrlDTO ;
11+ import de .mediathekview .mserver .crawler .zdf .tasks .ZdfFilmTask ;
12+ import de .mediathekview .mserver .crawler .zdf .tasks .ZdfLetterPageTask ;
13+ import de .mediathekview .mserver .crawler .zdf .tasks .ZdfTopicSeasonTask ;
1114import de .mediathekview .mserver .progress .listeners .SenderProgressListener ;
12- import org .jetbrains .annotations .NotNull ;
13-
14- import java .time .LocalDateTime ;
15- import java .time .format .DateTimeFormatter ;
16- import java .time .temporal .ChronoUnit ;
1715import java .util .Collection ;
1816import java .util .Queue ;
1917import java .util .Set ;
2018import java .util .concurrent .ConcurrentLinkedQueue ;
2119import java .util .concurrent .ExecutionException ;
2220import java .util .concurrent .ForkJoinPool ;
21+ import java .util .concurrent .RecursiveTask ;
22+ import org .apache .logging .log4j .LogManager ;
23+ import org .apache .logging .log4j .Logger ;
2324
24- public class ZdfCrawler extends AbstractZdfCrawler {
25-
26- private static final int MAXIMUM_DAYS_HTML_PAST = 7 ;
25+ public class ZdfCrawler extends AbstractCrawler {
2726
28- public ZdfCrawler (
29- final ForkJoinPool aForkJoinPool ,
30- final Collection <MessageListener > aMessageListeners ,
31- final Collection <SenderProgressListener > aProgressListeners ,
32- final MServerConfigManager rootConfig ) {
33- super (aForkJoinPool , aMessageListeners , aProgressListeners , rootConfig , ZdfConstants .PARTNER_TO_SENDER );
34- }
35-
36- @ Override
37- protected @ NotNull String getUrlBase () {
38- return ZdfConstants .URL_BASE ;
39- }
40-
41- @ Override
42- protected String getApiUrlBase () {
43- return ZdfConstants .URL_API_BASE ;
44- }
27+ private static final Logger LOG = LogManager .getLogger (ZdfCrawler .class );
28+ private static final int MAX_LETTER_PAGEGS = 27 ;
4529
46- @ Override
47- protected @ NotNull String getUrlDay () {
48- return ZdfConstants .URL_DAY ;
30+ public ZdfCrawler (ForkJoinPool aForkJoinPool , Collection <MessageListener > aMessageListeners , Collection <SenderProgressListener > aProgressListeners , MServerConfigManager rootConfig ) {
31+ super (aForkJoinPool , aMessageListeners , aProgressListeners , rootConfig );
4932 }
5033
5134 @ Override
@@ -54,52 +37,39 @@ public Sender getSender() {
5437 }
5538
5639 @ Override
57- public Queue < CrawlerUrlDTO > getTopicsEntries () throws ExecutionException , InterruptedException {
40+ protected RecursiveTask < Set < Film >> createCrawlerTask () {
5841
59- final Queue <CrawlerUrlDTO > letterListUrl = new ConcurrentLinkedQueue <>();
60- letterListUrl .add (new CrawlerUrlDTO (ZdfConstants .URL_TOPICS ));
42+ final String authKey = "aa3noh4ohz9eeboo8shiesheec9ciequ9Quah7el" ;
43+ try {
44+ ZdfLetterPageTask letterPageTask = new ZdfLetterPageTask (this , createLetterPageUrls (), authKey );
45+ final Set <TopicUrlDTO > topicUrls = forkJoinPool .submit (letterPageTask ).get ();
6146
62- final ZdfLetterListHtmlTask letterTask = new ZdfLetterListHtmlTask ( this , letterListUrl );
63- final Set < CrawlerUrlDTO > letterUrls = forkJoinPool . submit ( letterTask ). get ( );
47+ printMessage (
48+ ServerMessages . DEBUG_ALL_SENDUNG_FOLGEN_COUNT , getSender (). getName (), topicUrls . size () );
6449
65- final ZdfTopicsPageHtmlTask topicsTask =
66- new ZdfTopicsPageHtmlTask (this , new ConcurrentLinkedQueue <>(letterUrls ) );
67- final Set <CrawlerUrlDTO > topicsUrls = forkJoinPool .submit (topicsTask ).get ();
50+ ZdfTopicSeasonTask topicSeasonTask =
51+ new ZdfTopicSeasonTask (this , new ConcurrentLinkedQueue <>(topicUrls ), authKey );
52+ final Set <ZdfFilmDto > shows = forkJoinPool .submit (topicSeasonTask ).get ();
6853
69- final ZdfTopicPageHtmlTask topicTask =
70- new ZdfTopicPageHtmlTask (this , new ConcurrentLinkedQueue <>(topicsUrls ));
71- return new ConcurrentLinkedQueue <>(forkJoinPool .submit (topicTask ).get ());
72- }
54+ printMessage (
55+ ServerMessages .DEBUG_ALL_SENDUNG_FOLGEN_COUNT , getSender ().getName (), shows .size ());
7356
74- @ Override
75- protected Collection <CrawlerUrlDTO > getExtraDaysEntries ()
76- throws ExecutionException , InterruptedException {
77-
78- final ZdfDayPageHtmlTask dayTask =
79- new ZdfDayPageHtmlTask (getApiUrlBase (), this , getExtraDayUrls ());
80- return forkJoinPool .submit (dayTask ).get ();
57+ return new ZdfFilmTask (this , new ConcurrentLinkedQueue <>(shows ), authKey );
58+ } catch (final InterruptedException ex ) {
59+ LOG .debug ("{} crawler interrupted." , getSender ().getName (), ex );
60+ Thread .currentThread ().interrupt ();
61+ } catch (final ExecutionException ex ) {
62+ LOG .fatal ("Exception in {} crawler." , getSender ().getName (), ex );
63+ }
64+ return null ;
8165 }
8266
83- private Queue <CrawlerUrlDTO > getExtraDayUrls () {
67+ private Queue <CrawlerUrlDTO > createLetterPageUrls () {
8468 final Queue <CrawlerUrlDTO > urls = new ConcurrentLinkedQueue <>();
85- for (int i = 0 ; i <= getMaximumDaysPast (); i ++) {
86-
87- final LocalDateTime local = LocalDateTime .now ().minus (i , ChronoUnit .DAYS );
88- final String date = local .format (DateTimeFormatter .ofPattern ("yyyy-MM-dd" ));
89- final String url = String .format (ZdfConstants .URL_HTML_DAY , date );
90- urls .add (new CrawlerUrlDTO (url ));
69+ for (int i = 0 ; i < MAX_LETTER_PAGEGS ; i ++) {
70+ urls .add (new CrawlerUrlDTO (ZdfUrlBuilder .buildLetterPageUrl (i )));
9171 }
9272
9373 return urls ;
9474 }
95-
96- private int getMaximumDaysPast () {
97- final Integer maximumDaysForSendungVerpasstSection =
98- crawlerConfig .getMaximumDaysForSendungVerpasstSection ();
99- if (maximumDaysForSendungVerpasstSection == null
100- || maximumDaysForSendungVerpasstSection > MAXIMUM_DAYS_HTML_PAST ) {
101- return MAXIMUM_DAYS_HTML_PAST ;
102- }
103- return maximumDaysForSendungVerpasstSection ;
104- }
10575}
0 commit comments