apache-release
+
+ docs
+
${user.name}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/ConfigExamplesTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/ConfigExamplesTest.java
new file mode 100644
index 00000000000..1429984cbf0
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/ConfigExamplesTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.parser.Parser;
+
+/**
+ * Validates configuration examples used in documentation.
+ *
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ * This test class validates that each example is valid and can be loaded by TikaLoader.
+ *
+ * Important: When modifying examples in the config-examples directory,
+ * ensure the JSON remains valid and these tests pass. The documentation will automatically
+ * reflect your changes.
+ *
+ * TODO: Consider auto-generating the full config JSON files from the actual config classes
+ * (e.g., PDFParserConfig, TesseractOCRConfig) during the build process. This would:
+ *
+ * - Guarantee JSON always matches actual defaults
+ * - Automatically catch when fields are added/removed
+ * - Use Jackson's ORDER_MAP_ENTRIES_BY_KEYS for consistent ordering
+ *
+ * Challenge: Jackson doesn't write comments in JSON output, so enum options would need
+ * to be documented via annotations and a post-processor, or in the AsciiDoc directly.
+ */
+public class ConfigExamplesTest {
+
+ private static final String EXAMPLES_DIR = "/config-examples/";
+
+ @TempDir
+ Path tempDir;
+
+ private Parser loadAndValidate(String resourceName) throws Exception {
+ try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+ assertNotNull(is, "Resource not found: " + resourceName);
+ String json = new String(is.readAllBytes(), StandardCharsets.UTF_8);
+ Path configFile = tempDir.resolve("tika-config.json");
+ Files.writeString(configFile, json, StandardCharsets.UTF_8);
+ TikaLoader loader = TikaLoader.load(configFile);
+ Parser parser = loader.loadParsers();
+ assertNotNull(parser, "Parser should not be null for: " + resourceName);
+ return parser;
+ }
+ }
+
+ @Test
+ public void testPdfParserBasicConfig() throws Exception {
+ loadAndValidate("pdf-parser-basic.json");
+ }
+
+ @Test
+ public void testPdfParserFullConfig() throws Exception {
+ loadAndValidate("pdf-parser-full.json");
+ }
+
+ @Test
+ public void testTesseractBasicConfig() throws Exception {
+ loadAndValidate("tesseract-basic.json");
+ }
+
+ @Test
+ public void testTesseractFullConfig() throws Exception {
+ loadAndValidate("tesseract-full.json");
+ }
+
+ @Test
+ public void testFullMigrationExample() throws Exception {
+ loadAndValidate("migration-full-example.json");
+ }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/migration-full-example.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/migration-full-example.json
new file mode 100644
index 00000000000..014a7b69d7e
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/migration-full-example.json
@@ -0,0 +1,26 @@
+{
+ "parsers": [
+ {
+ "pdf-parser": {
+ "extractInlineImages": true,
+ "extractUniqueInlineImagesOnly": true,
+ "sortByPosition": true,
+ "maxMainMemoryBytes": 1000000000
+ }
+ },
+ {
+ "tesseract-ocr-parser": {
+ "language": "eng+fra",
+ "pageSegMode": "1",
+ "timeoutSeconds": 300,
+ "otherTesseractConfig": {
+ "textord_initialx_ile": "0.75",
+ "textord_noise_hfract": "0.15625"
+ }
+ }
+ },
+ {
+ "default-parser": {}
+ }
+ ]
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-basic.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-basic.json
new file mode 100644
index 00000000000..591e214ee67
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-basic.json
@@ -0,0 +1,10 @@
+{
+ "parsers": [
+ {
+ "pdf-parser": {
+ "extractInlineImages": true,
+ "sortByPosition": true
+ }
+ }
+ ]
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-full.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-full.json
new file mode 100644
index 00000000000..9f455918de2
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-full.json
@@ -0,0 +1,53 @@
+{
+ "parsers": [
+ {
+ "pdf-parser": {
+ // Options: DONT_CHECK, ALLOW_EXTRACTION_FOR_ACCESSIBILITY, IGNORE_ACCESSIBILITY_ALLOWANCE
+ "accessCheckMode": "DONT_CHECK",
+ "averageCharTolerance": 0.3,
+ "catchIntermediateIOExceptions": true,
+ "detectAngles": false,
+ "dropThreshold": 2.5,
+ "enableAutoSpace": true,
+ "extractAcroFormContent": true,
+ "extractActions": false,
+ "extractAnnotationText": true,
+ "extractBookmarksText": true,
+ "extractFontNames": false,
+ "extractIncrementalUpdateInfo": true,
+ "extractInlineImageMetadataOnly": false,
+ "extractInlineImages": false,
+ "extractMarkedContent": false,
+ "extractUniqueInlineImagesOnly": true,
+ "ifXFAExtractOnlyXFA": false,
+ "ignoreContentStreamSpaceGlyphs": false,
+ // Options: NONE, RAW_IMAGES, RENDER_PAGES_BEFORE_PARSE, RENDER_PAGES_AT_PAGE_END
+ "imageStrategy": "NONE",
+ "maxIncrementalUpdates": 10,
+ "maxMainMemoryBytes": 536870912,
+ "ocr": {
+ "dpi": 300,
+ // Options: PNG, TIFF, JPEG
+ "imageFormat": "PNG",
+ "imageQuality": 1.0,
+ // Options: RGB, GRAY
+ "imageType": "GRAY",
+ // Options: NO_TEXT, TEXT_ONLY, VECTOR_GRAPHICS_ONLY, ALL
+ "renderingStrategy": "ALL",
+ // Options: AUTO, NO_OCR, OCR_ONLY, OCR_AND_TEXT_EXTRACTION
+ "strategy": "AUTO",
+ "strategyAuto": {
+ "totalCharsPerPage": 10,
+ "unmappedUnicodeCharsPerPage": 10
+ }
+ },
+ "parseIncrementalUpdates": false,
+ "setKCMS": false,
+ "sortByPosition": false,
+ "spacingTolerance": 0.5,
+ "suppressDuplicateOverlappingText": false,
+ "throwOnEncryptedPayload": false
+ }
+ }
+ ]
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/tesseract-basic.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/tesseract-basic.json
new file mode 100644
index 00000000000..f41a367acca
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/tesseract-basic.json
@@ -0,0 +1,10 @@
+{
+ "parsers": [
+ {
+ "tesseract-ocr-parser": {
+ "language": "eng",
+ "timeoutSeconds": 120
+ }
+ }
+ ]
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/tesseract-full.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/tesseract-full.json
new file mode 100644
index 00000000000..4e3e75aeaed
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/tesseract-full.json
@@ -0,0 +1,35 @@
+{
+ "parsers": [
+ {
+ "tesseract-ocr-parser": {
+ "applyRotation": false,
+ "colorspace": "gray",
+ "density": 300,
+ "depth": 4,
+ "enableImagePreprocessing": false,
+ "filter": "triangle",
+ "imageMagickPath": "",
+ "inlineContent": false,
+ "language": "eng",
+ "maxFileSizeToOcr": 2147483647,
+ "minFileSizeToOcr": 0,
+ // Additional Tesseract configuration parameters as key-value pairs
+ "otherTesseractConfig": {
+ "preserve_interword_spaces": "1",
+ "textord_initialx_ile": "0.75",
+ "textord_noise_hfract": "0.15625"
+ },
+ // Options: TXT, HOCR
+ "outputType": "TXT",
+ "pageSeparator": "",
+ "pageSegMode": "1",
+ "preserveInterwordSpacing": false,
+ "resize": 200,
+ "skipOcr": false,
+ "tessdataPath": "",
+ "tesseractPath": "",
+ "timeoutSeconds": 120
+ }
+ }
+ ]
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
new file mode 100644
index 00000000000..70fe7947bb3
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fs;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+
+/**
+ * Validates file system fetcher/emitter configuration examples used in documentation.
+ *
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+ private static final String EXAMPLES_DIR = "/config-examples/";
+
+ @TempDir
+ Path tempDir;
+
+ private void loadAndValidate(String resourceName) throws Exception {
+ try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+ assertNotNull(is, "Resource not found: " + resourceName);
+ String json = new String(is.readAllBytes(), StandardCharsets.UTF_8);
+ Path configFile = tempDir.resolve("tika-config.json");
+ Files.writeString(configFile, json, StandardCharsets.UTF_8);
+ TikaLoader loader = TikaLoader.load(configFile);
+ assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+ }
+ }
+
+ @Test
+ public void testFileSystemFetcherConfig() throws Exception {
+ loadAndValidate("file-system-fetcher.json");
+ }
+
+ @Test
+ public void testFileSystemEmitterConfig() throws Exception {
+ loadAndValidate("file-system-emitter.json");
+ }
+
+ @Test
+ public void testFileSystemPipelineConfig() throws Exception {
+ loadAndValidate("file-system-pipeline.json");
+ }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
new file mode 100644
index 00000000000..4f01761e450
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
@@ -0,0 +1,13 @@
+{
+ "emitters": [
+ {
+ "file-system-emitter": {
+ "id": "my-emitter",
+ "basePath": "/data/output",
+ "fileExtension": "json",
+ "onExists": "REPLACE",
+ "prettyPrint": true
+ }
+ }
+ ]
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
new file mode 100644
index 00000000000..201d4fa099e
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
@@ -0,0 +1,11 @@
+{
+ "fetchers": [
+ {
+ "file-system-fetcher": {
+ "id": "my-fetcher",
+ "basePath": "/data/documents",
+ "extractFileSystemMetadata": true
+ }
+ }
+ ]
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
new file mode 100644
index 00000000000..3d95755eff9
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
@@ -0,0 +1,27 @@
+{
+ "fetchers": [
+ {
+ "file-system-fetcher": {
+ "id": "input-fetcher",
+ "basePath": "/data/input",
+ "extractFileSystemMetadata": true
+ }
+ }
+ ],
+ "emitters": [
+ {
+ "file-system-emitter": {
+ "id": "output-emitter",
+ "basePath": "/data/output",
+ "fileExtension": "json",
+ "onExists": "SKIP",
+ "prettyPrint": false
+ }
+ }
+ ],
+ "parsers": [
+ {
+ "default-parser": {}
+ }
+ ]
+}
diff --git a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java
index 39849524858..b8b7e4389d4 100644
--- a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java
+++ b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.config.loader;
+import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
@@ -68,6 +69,9 @@ public static synchronized ObjectMapper getMapper() {
public static ObjectMapper createMapper() {
ObjectMapper mapper = new ObjectMapper();
+ // Allow comments in JSON config files (// and /* */ style)
+ mapper.configure(JsonParser.Feature.ALLOW_COMMENTS, true);
+
// Fail on unknown properties to catch configuration errors early
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true);
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ConfigExamplesTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ConfigExamplesTest.java
new file mode 100644
index 00000000000..43dd1391e30
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ConfigExamplesTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.core;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+
+/**
+ * Validates server configuration examples used in documentation.
+ *
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+ private static final String EXAMPLES_DIR = "/config-examples/";
+
+ @TempDir
+ Path tempDir;
+
+ private void loadAndValidate(String resourceName) throws Exception {
+ try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+ assertNotNull(is, "Resource not found: " + resourceName);
+ String json = new String(is.readAllBytes(), StandardCharsets.UTF_8);
+ Path configFile = tempDir.resolve("tika-config.json");
+ Files.writeString(configFile, json, StandardCharsets.UTF_8);
+ TikaLoader loader = TikaLoader.load(configFile);
+ assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+ }
+ }
+
+ @Test
+ public void testServerBasicConfig() throws Exception {
+ loadAndValidate("server-basic.json");
+ }
+
+ @Test
+ public void testServerWithParsersConfig() throws Exception {
+ loadAndValidate("server-with-parsers.json");
+ }
+}
diff --git a/tika-server/tika-server-core/src/test/resources/config-examples/server-basic.json b/tika-server/tika-server-core/src/test/resources/config-examples/server-basic.json
new file mode 100644
index 00000000000..d133c0deeb1
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/resources/config-examples/server-basic.json
@@ -0,0 +1,13 @@
+{
+ "server": {
+ "port": 9998,
+ "host": "localhost",
+ "taskTimeoutMillis": 300000,
+ "enableUnsecureFeatures": false
+ },
+ "parsers": [
+ {
+ "default-parser": {}
+ }
+ ]
+}
diff --git a/tika-server/tika-server-core/src/test/resources/config-examples/server-with-parsers.json b/tika-server/tika-server-core/src/test/resources/config-examples/server-with-parsers.json
new file mode 100644
index 00000000000..fadb08a55f7
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/resources/config-examples/server-with-parsers.json
@@ -0,0 +1,24 @@
+{
+ "server": {
+ "port": 9998,
+ "host": "0.0.0.0",
+ "taskTimeoutMillis": 600000,
+ "returnStackTrace": true
+ },
+ "parsers": [
+ {
+ "pdf-parser": {
+ "extractInlineImages": true,
+ "ocrStrategy": "AUTO"
+ }
+ },
+ {
+ "default-parser": {}
+ }
+ ],
+ "detectors": [
+ {
+ "default-detector": {}
+ }
+ ]
+}