Skip to content

Commit 0be2778

Browse files
committed
erster Durchstich
1 parent afc22a9 commit 0be2778

19 files changed

+856
-115
lines changed

src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfConstants.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,31 @@ public final class ZdfConstants {
1515
public static final String URL_TOPICS = URL_BASE + "/sendungen-a-z";
1616
/** Base url of the ZDF api. */
1717
public static final String URL_API_BASE = "https://api.zdf.de";
18+
19+
// todo: next-page, itemsfilter so ok?
20+
public static final String URL_LETTER_PAGE =
21+
URL_API_BASE
22+
+ "/graphql?operationName=specialPageByCanonical&" +
23+
"variables=%s&" +
24+
"extensions=%s";
25+
public static final String URL_LETTER_PAGE_VARIABLES =
26+
"{\"staticGridClusterPageSize\":6,\"staticGridClusterOffset\":0,\"canonical\":\"sendungen-100\",\"endCursor\":null,\"tabIndex\":%d,\"itemsFilter\":{\"teaserUsageNotIn\":[\"TIVI_HBBTV_ONLY\"]}}";
27+
public static final String URL_LETTER_PAGE_EXTENSIONS =
28+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"7d33167e7700ba57779f48b28b5d485c8ada0a1d5dfbdc8a261b7bd62ca28ba7\"}}";
29+
30+
// todo filter raus => ok? "filterBy":{"idIn":["13-fragen-_season_1"]}
31+
public static final String URL_TOPIC_PAGE = URL_API_BASE + "/graphql?operationName=seasonByCanonical&" +
32+
"variables=%s&" +
33+
"extensions=%s";
34+
public static final String URL_TOPIC_PAGE_VARIABLES = "{\"seasonIndex\":%d,\"episodesPageSize\":%d,\"canonical\":\"%s\",\"sortBy\":[{\"field\":\"EDITORIAL_DATE\",\"direction\":\"DESC\"}]}";
35+
public static final String URL_TOPIC_PAGE_EXTENSIONS =
36+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"9412a0f4ac55dc37d46975d461ec64bfd14380d815df843a1492348f77b5c99a\"}}";
37+
38+
public static final String URL_FILM_ENRY =
39+
URL_API_BASE + "/graphql?operationName=GetVideoMetaByCanonical&"
40+
+ "variables={\"canonical\"=\"%s\"}&"
41+
+ "extensions={\"persistedQuery\"={\"version\"=1,\"sha256Hash\"=\"737eb4421d274259baa3051929f4ecfef2d2afc59f12a9d82285c14dbdd1dd0d\"}}";
42+
1843
/** Url to search the films. */
1944
public static final String URL_DAY =
2045
URL_API_BASE
Lines changed: 38 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,34 @@
11
package de.mediathekview.mserver.crawler.zdf;
22

3+
import de.mediathekview.mlib.daten.Film;
34
import de.mediathekview.mlib.daten.Sender;
45
import de.mediathekview.mlib.messages.listener.MessageListener;
56
import de.mediathekview.mserver.base.config.MServerConfigManager;
7+
import de.mediathekview.mserver.base.messages.ServerMessages;
8+
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
69
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
7-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfDayPageHtmlTask;
8-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfLetterListHtmlTask;
9-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfTopicPageHtmlTask;
10-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfTopicsPageHtmlTask;
10+
import de.mediathekview.mserver.crawler.basic.TopicUrlDTO;
11+
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfFilmTask;
12+
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfLetterPageTask;
13+
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfTopicSeasonTask;
1114
import de.mediathekview.mserver.progress.listeners.SenderProgressListener;
12-
import org.jetbrains.annotations.NotNull;
13-
14-
import java.time.LocalDateTime;
15-
import java.time.format.DateTimeFormatter;
16-
import java.time.temporal.ChronoUnit;
1715
import java.util.Collection;
1816
import java.util.Queue;
1917
import java.util.Set;
2018
import java.util.concurrent.ConcurrentLinkedQueue;
2119
import java.util.concurrent.ExecutionException;
2220
import java.util.concurrent.ForkJoinPool;
21+
import java.util.concurrent.RecursiveTask;
22+
import org.apache.logging.log4j.LogManager;
23+
import org.apache.logging.log4j.Logger;
2324

24-
public class ZdfCrawler extends AbstractZdfCrawler {
25-
26-
private static final int MAXIMUM_DAYS_HTML_PAST = 7;
25+
public class ZdfCrawler extends AbstractCrawler {
2726

28-
public ZdfCrawler(
29-
final ForkJoinPool aForkJoinPool,
30-
final Collection<MessageListener> aMessageListeners,
31-
final Collection<SenderProgressListener> aProgressListeners,
32-
final MServerConfigManager rootConfig) {
33-
super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig, ZdfConstants.PARTNER_TO_SENDER);
34-
}
35-
36-
@Override
37-
protected @NotNull String getUrlBase() {
38-
return ZdfConstants.URL_BASE;
39-
}
40-
41-
@Override
42-
protected String getApiUrlBase() {
43-
return ZdfConstants.URL_API_BASE;
44-
}
27+
private static final Logger LOG = LogManager.getLogger(ZdfCrawler.class);
28+
private static final int MAX_LETTER_PAGEGS = 27;
4529

46-
@Override
47-
protected @NotNull String getUrlDay() {
48-
return ZdfConstants.URL_DAY;
30+
public ZdfCrawler(ForkJoinPool aForkJoinPool, Collection<MessageListener> aMessageListeners, Collection<SenderProgressListener> aProgressListeners, MServerConfigManager rootConfig) {
31+
super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig);
4932
}
5033

5134
@Override
@@ -54,52 +37,39 @@ public Sender getSender() {
5437
}
5538

5639
@Override
57-
public Queue<CrawlerUrlDTO> getTopicsEntries() throws ExecutionException, InterruptedException {
40+
protected RecursiveTask<Set<Film>> createCrawlerTask() {
5841

59-
final Queue<CrawlerUrlDTO> letterListUrl = new ConcurrentLinkedQueue<>();
60-
letterListUrl.add(new CrawlerUrlDTO(ZdfConstants.URL_TOPICS));
42+
final String authKey = "aa3noh4ohz9eeboo8shiesheec9ciequ9Quah7el";
43+
try {
44+
ZdfLetterPageTask letterPageTask = new ZdfLetterPageTask(this, createLetterPageUrls(), authKey);
45+
final Set<TopicUrlDTO> topicUrls = forkJoinPool.submit(letterPageTask).get();
6146

62-
final ZdfLetterListHtmlTask letterTask = new ZdfLetterListHtmlTask(this, letterListUrl);
63-
final Set<CrawlerUrlDTO> letterUrls = forkJoinPool.submit(letterTask).get();
47+
printMessage(
48+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), topicUrls.size());
6449

65-
final ZdfTopicsPageHtmlTask topicsTask =
66-
new ZdfTopicsPageHtmlTask(this, new ConcurrentLinkedQueue<>(letterUrls));
67-
final Set<CrawlerUrlDTO> topicsUrls = forkJoinPool.submit(topicsTask).get();
50+
ZdfTopicSeasonTask topicSeasonTask =
51+
new ZdfTopicSeasonTask(this, new ConcurrentLinkedQueue<>(topicUrls), authKey);
52+
final Set<ZdfFilmDto> shows = forkJoinPool.submit(topicSeasonTask).get();
6853

69-
final ZdfTopicPageHtmlTask topicTask =
70-
new ZdfTopicPageHtmlTask(this, new ConcurrentLinkedQueue<>(topicsUrls));
71-
return new ConcurrentLinkedQueue<>(forkJoinPool.submit(topicTask).get());
72-
}
54+
printMessage(
55+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
7356

74-
@Override
75-
protected Collection<CrawlerUrlDTO> getExtraDaysEntries()
76-
throws ExecutionException, InterruptedException {
77-
78-
final ZdfDayPageHtmlTask dayTask =
79-
new ZdfDayPageHtmlTask(getApiUrlBase(), this, getExtraDayUrls());
80-
return forkJoinPool.submit(dayTask).get();
57+
return new ZdfFilmTask(this, new ConcurrentLinkedQueue<>(shows), authKey);
58+
} catch (final InterruptedException ex) {
59+
LOG.debug("{} crawler interrupted.", getSender().getName(), ex);
60+
Thread.currentThread().interrupt();
61+
} catch (final ExecutionException ex) {
62+
LOG.fatal("Exception in {} crawler.", getSender().getName(), ex);
63+
}
64+
return null;
8165
}
8266

83-
private Queue<CrawlerUrlDTO> getExtraDayUrls() {
67+
private Queue<CrawlerUrlDTO> createLetterPageUrls() {
8468
final Queue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>();
85-
for (int i = 0; i <= getMaximumDaysPast(); i++) {
86-
87-
final LocalDateTime local = LocalDateTime.now().minus(i, ChronoUnit.DAYS);
88-
final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
89-
final String url = String.format(ZdfConstants.URL_HTML_DAY, date);
90-
urls.add(new CrawlerUrlDTO(url));
69+
for (int i = 0; i < MAX_LETTER_PAGEGS; i++) {
70+
urls.add(new CrawlerUrlDTO(ZdfUrlBuilder.buildLetterPageUrl(i)));
9171
}
9272

9373
return urls;
9474
}
95-
96-
private int getMaximumDaysPast() {
97-
final Integer maximumDaysForSendungVerpasstSection =
98-
crawlerConfig.getMaximumDaysForSendungVerpasstSection();
99-
if (maximumDaysForSendungVerpasstSection == null
100-
|| maximumDaysForSendungVerpasstSection > MAXIMUM_DAYS_HTML_PAST) {
101-
return MAXIMUM_DAYS_HTML_PAST;
102-
}
103-
return maximumDaysForSendungVerpasstSection;
104-
}
10575
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package de.mediathekview.mserver.crawler.zdf;
2+
3+
import de.mediathekview.mlib.daten.Sender;
4+
import de.mediathekview.mlib.messages.listener.MessageListener;
5+
import de.mediathekview.mserver.base.config.MServerConfigManager;
6+
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
7+
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfDayPageHtmlTask;
8+
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfLetterListHtmlTask;
9+
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfTopicPageHtmlTask;
10+
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfTopicsPageHtmlTask;
11+
import de.mediathekview.mserver.progress.listeners.SenderProgressListener;
12+
import org.jetbrains.annotations.NotNull;
13+
14+
import java.time.LocalDateTime;
15+
import java.time.format.DateTimeFormatter;
16+
import java.time.temporal.ChronoUnit;
17+
import java.util.Collection;
18+
import java.util.Queue;
19+
import java.util.Set;
20+
import java.util.concurrent.ConcurrentLinkedQueue;
21+
import java.util.concurrent.ExecutionException;
22+
import java.util.concurrent.ForkJoinPool;
23+
24+
public class ZdfCrawlerOld extends AbstractZdfCrawler {
25+
26+
private static final int MAXIMUM_DAYS_HTML_PAST = 7;
27+
28+
public ZdfCrawlerOld(
29+
final ForkJoinPool aForkJoinPool,
30+
final Collection<MessageListener> aMessageListeners,
31+
final Collection<SenderProgressListener> aProgressListeners,
32+
final MServerConfigManager rootConfig) {
33+
super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig, ZdfConstants.PARTNER_TO_SENDER);
34+
}
35+
36+
@Override
37+
protected @NotNull String getUrlBase() {
38+
return ZdfConstants.URL_BASE;
39+
}
40+
41+
@Override
42+
protected String getApiUrlBase() {
43+
return ZdfConstants.URL_API_BASE;
44+
}
45+
46+
@Override
47+
protected @NotNull String getUrlDay() {
48+
return ZdfConstants.URL_DAY;
49+
}
50+
51+
@Override
52+
public Sender getSender() {
53+
return Sender.ZDF;
54+
}
55+
56+
@Override
57+
public Queue<CrawlerUrlDTO> getTopicsEntries() throws ExecutionException, InterruptedException {
58+
59+
final Queue<CrawlerUrlDTO> letterListUrl = new ConcurrentLinkedQueue<>();
60+
letterListUrl.add(new CrawlerUrlDTO(ZdfConstants.URL_TOPICS));
61+
62+
final ZdfLetterListHtmlTask letterTask = new ZdfLetterListHtmlTask(this, letterListUrl);
63+
final Set<CrawlerUrlDTO> letterUrls = forkJoinPool.submit(letterTask).get();
64+
65+
final ZdfTopicsPageHtmlTask topicsTask =
66+
new ZdfTopicsPageHtmlTask(this, new ConcurrentLinkedQueue<>(letterUrls));
67+
final Set<CrawlerUrlDTO> topicsUrls = forkJoinPool.submit(topicsTask).get();
68+
69+
final ZdfTopicPageHtmlTask topicTask =
70+
new ZdfTopicPageHtmlTask(this, new ConcurrentLinkedQueue<>(topicsUrls));
71+
return new ConcurrentLinkedQueue<>(forkJoinPool.submit(topicTask).get());
72+
}
73+
74+
@Override
75+
protected Collection<CrawlerUrlDTO> getExtraDaysEntries()
76+
throws ExecutionException, InterruptedException {
77+
78+
final ZdfDayPageHtmlTask dayTask =
79+
new ZdfDayPageHtmlTask(getApiUrlBase(), this, getExtraDayUrls());
80+
return forkJoinPool.submit(dayTask).get();
81+
}
82+
83+
private Queue<CrawlerUrlDTO> getExtraDayUrls() {
84+
final Queue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>();
85+
for (int i = 0; i <= getMaximumDaysPast(); i++) {
86+
87+
final LocalDateTime local = LocalDateTime.now().minus(i, ChronoUnit.DAYS);
88+
final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
89+
final String url = String.format(ZdfConstants.URL_HTML_DAY, date);
90+
urls.add(new CrawlerUrlDTO(url));
91+
}
92+
93+
return urls;
94+
}
95+
96+
private int getMaximumDaysPast() {
97+
final Integer maximumDaysForSendungVerpasstSection =
98+
crawlerConfig.getMaximumDaysForSendungVerpasstSection();
99+
if (maximumDaysForSendungVerpasstSection == null
100+
|| maximumDaysForSendungVerpasstSection > MAXIMUM_DAYS_HTML_PAST) {
101+
return MAXIMUM_DAYS_HTML_PAST;
102+
}
103+
return maximumDaysForSendungVerpasstSection;
104+
}
105+
}
Lines changed: 63 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,89 @@
11
package de.mediathekview.mserver.crawler.zdf;
22

3-
import de.mediathekview.mlib.daten.Film;
3+
import de.mediathekview.mlib.daten.Sender;
44
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
55

6+
import java.time.LocalDateTime;
67
import java.util.Objects;
7-
import java.util.Optional;
88

9-
public class ZdfFilmDto {
9+
public class ZdfFilmDto extends CrawlerUrlDTO {
10+
private final Sender sender;
11+
private final String title;
12+
private final String description;
13+
private final String website;
14+
private final LocalDateTime time;
15+
private final String videoType;
16+
private String topic;
1017

11-
private final Optional<Film> film;
12-
private final Optional<String> urlSignLanguage;
13-
private final Optional<String> videoUrl;
18+
public ZdfFilmDto(
19+
Sender sender,
20+
String title,
21+
String description,
22+
String website,
23+
LocalDateTime time,
24+
String videoType,
25+
String downloadUrl) {
26+
super(downloadUrl);
27+
this.topic = "";
28+
this.title = title;
29+
this.description = description;
30+
this.sender = sender;
31+
this.website = website;
32+
this.time = time;
33+
this.videoType = videoType;
34+
}
1435

15-
public ZdfFilmDto(final Optional<Film> film, final String videoUrl, String urlSignLanguage) {
16-
this.film = film;
17-
if (videoUrl == null) {
18-
this.videoUrl = Optional.empty();
19-
} else {
20-
this.videoUrl = Optional.of(videoUrl);
21-
}
36+
public String getTitle() {
37+
return title;
38+
}
2239

23-
if (urlSignLanguage != null && !urlSignLanguage.isEmpty()) {
24-
this.urlSignLanguage = Optional.of(urlSignLanguage);
25-
} else {
26-
this.urlSignLanguage = Optional.empty();
27-
}
40+
public String getDescription() {
41+
return description;
2842
}
2943

30-
public Optional<String> getUrl() {
31-
return videoUrl;
44+
public Sender getSender() {
45+
return sender;
3246
}
33-
34-
public Optional<Film> getFilm() {
35-
return film;
47+
48+
public String getWebsite() {
49+
return website;
3650
}
3751

38-
public Optional<String> getUrlSignLanguage() {
39-
return urlSignLanguage;
52+
public LocalDateTime getTime() {
53+
return time;
4054
}
4155

4256
@Override
4357
public boolean equals(Object o) {
44-
if (this == o) return true;
4558
if (o == null || getClass() != o.getClass()) return false;
4659
if (!super.equals(o)) return false;
47-
ZdfFilmDto that = (ZdfFilmDto) o;
48-
return Objects.equals(film, that.film) && Objects.equals(urlSignLanguage, that.urlSignLanguage);
60+
61+
ZdfFilmDto filmDto = (ZdfFilmDto) o;
62+
return sender == filmDto.sender && Objects.equals(topic, filmDto.topic) && Objects.equals(title, filmDto.title) && Objects.equals(description, filmDto.description) && Objects.equals(website, filmDto.website) && Objects.equals(time, filmDto.time) && Objects.equals(videoType, filmDto.videoType);
4963
}
5064

5165
@Override
5266
public int hashCode() {
53-
return Objects.hash(super.hashCode(), film, urlSignLanguage);
67+
int result = super.hashCode();
68+
result = 31 * result + Objects.hashCode(sender);
69+
result = 31 * result + Objects.hashCode(topic);
70+
result = 31 * result + Objects.hashCode(title);
71+
result = 31 * result + Objects.hashCode(description);
72+
result = 31 * result + Objects.hashCode(website);
73+
result = 31 * result + Objects.hashCode(time);
74+
result = 31 * result + Objects.hashCode(videoType);
75+
return result;
76+
}
77+
78+
public String getVideoType() {
79+
return videoType;
80+
}
81+
82+
public String getTopic() {
83+
return topic;
84+
}
85+
86+
public void setTopic(String topic) {
87+
this.topic = topic;
5488
}
5589
}

0 commit comments

Comments
 (0)