inception-project · reckart · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11']
+        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
 
     steps:
     - uses: actions/checkout@v3
@@ -22,6 +22,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
+        python -m pip install --upgrade pip setuptools wheel
         pip install --upgrade -e .[contrib,test]
         pip install flake8
     - name: Lint with flake8

diff --git a/.gitignore b/.gitignore
@@ -211,3 +211,4 @@ dmypy.json
 /lvenv/
 /models/*
 /cache
+.vscode
diff --git a/Makefile b/Makefile
@@ -3,7 +3,7 @@ PYTHON_FILES = tests
 test:
 	python -m pytest -m "not performance" tests/
 
-gunicorn:
+serve:
 	gunicorn -w 4 -b 127.0.0.1:5000 --reload wsgi:app
 
 black:

diff --git a/README.md b/README.md
@@ -163,6 +163,6 @@ do not need to run INCEpTION during (early) development.
 
 The simplest way to develop in deployment setting, that is using `gunicorn` is to just run
 
-    make gunicorn
+    make serve
 
 This starts `gunicorn` with 4 workers and hot-code reloading.
diff --git a/ariadne/contrib/adapters.py b/ariadne/contrib/adapters.py
@@ -81,7 +81,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
                 end = token.end
                 label = Counter([self._label_map[pred] for pred in grouped_prediction]).most_common(1)[0][0]
                 prediction = create_prediction(cas, layer, feature, begin, end, label)
-                cas.add_annotation(prediction)
+                cas.add(prediction)
 
     def _tokenize_bert(self, cas_tokens: List[str]) -> List[torch.Tensor]:
         grouped_bert_tokens = [torch.LongTensor([self._tokenizer.cls_token_id])]
@@ -191,7 +191,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
             label_id = torch.argmax(outputs[0]).item()
             label = self._label_map[label_id]
             prediction = create_prediction(cas, layer, feature, sentence.begin, sentence.end, label)
-            cas.add_annotation(prediction)
+            cas.add(prediction)
 
     def _build_model(self):
         model = AutoModelWithHeads.from_pretrained(self._base_model_name)

diff --git a/ariadne/contrib/flair.py b/ariadne/contrib/flair.py
@@ -31,11 +31,7 @@ def fix_whitespaces(cas_tokens):
             dist = following_cas_token.begin - cas_token.end
         else:
             dist = 1
-        token = Token(
-            cas_token.get_covered_text(),
-            whitespace_after=dist,
-            start_position=cas_token.begin
-        )         
+        token = Token(cas_token.get_covered_text(), whitespace_after=dist, start_position=cas_token.begin)
         tokens.append(token)
     return tokens
 
@@ -46,7 +42,7 @@ def __init__(self, model_name: str, model_directory: Path = None, split_sentence
         self._model = Tagger.load(model_name)
         self._split_sentences = split_sentences
 
-    def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str): 
+    def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
         # Extract the sentences from the CAS
         if self._split_sentences:
             sentences = []
@@ -67,18 +63,18 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
                     end = named_entity.end_position
                     label = named_entity.tag
                     prediction = create_prediction(cas, layer, feature, begin, end, label)
-                    cas.add(prediction) 
+                    cas.add(prediction)
 
         else:
             cas_tokens = cas.select(TOKEN_TYPE)
             text = fix_whitespaces(cas_tokens)
             sent = Sentence(text)
-            
+
             self._model.predict(sent)
 
             for named_entity in sent.get_spans():
                 begin = named_entity.start_position
                 end = named_entity.end_position
                 label = named_entity.tag
                 prediction = create_prediction(cas, layer, feature, begin, end, label)
-                cas.add(prediction) 
+                cas.add(prediction)
diff --git a/ariadne/contrib/log_only.py b/ariadne/contrib/log_only.py
@@ -23,7 +23,11 @@
 
 class LogOnlyRecommender(Classifier):
     def fit(self, documents: List[TrainingDocument], layer: str, feature: str, project_id, user_id: str):
-        print(f'Training triggered for [{feature}] on [{layer}] in [{len(documents)}] documents from project [{project_id}] for user [{user_id}]')
+        print(
+            f"Training triggered for [{feature}] on [{layer}] in [{len(documents)}] documents from project [{project_id}] for user [{user_id}]"
+        )
 
     def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
-        print(f'Prediction triggered on document [{document_id}] for [{feature}] on [{layer}] in project [{project_id}] for user [{user_id}]')
+        print(
+            f"Prediction triggered on document [{document_id}] for [{feature}] on [{layer}] in project [{project_id}] for user [{user_id}]"
+        )
diff --git a/ariadne/contrib/nltk.py b/ariadne/contrib/nltk.py
@@ -31,4 +31,4 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
             begin = cas_token.begin
             end = begin + len(stem)
             prediction = create_prediction(cas, layer, feature, begin, end, stem)
-            cas.add_annotation(prediction)
+            cas.add(prediction)
diff --git a/ariadne/contrib/simalign.py b/ariadne/contrib/simalign.py
@@ -53,5 +53,5 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
                 prediction = create_relation_prediction(
                     cas, layer, feature, src_tokens[source_idx], trg_tokens[target_idx], ""
                 )
-                cas.add_annotation(prediction)
+                cas.add(prediction)
             break
diff --git a/ariadne/contrib/sklearn.py b/ariadne/contrib/sklearn.py
@@ -165,7 +165,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
                 if begin is not None and end is not None:
                     if tag == "O" or (tag.startswith("B") and prev_tag.startswith("I")):
                         prediction = create_prediction(cas, layer, feature, begin, end, "X")
-                        cas.add_annotation(prediction)
+                        cas.add(prediction)
 
                 if tag.startswith("B"):
                     begin = token.begin

diff --git a/ariadne/contrib/stringmatcher.py b/ariadne/contrib/stringmatcher.py
@@ -90,7 +90,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
             for mention, label_id in m.search(term=term, max_dist=2):
                 label = le.inverse_transform([label_id])[0]
                 prediction = create_prediction(cas, layer, feature, begin, end, label)
-                cas.add_annotation(prediction)
+                cas.add(prediction)
 
     def _generate_candidates(self, cas: Cas, n: int):
         # We generate token n-grams

diff --git a/ariadne/contrib/transformers.py b/ariadne/contrib/transformers.py
@@ -14,11 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from transformers import  pipeline, AutoTokenizer, AutoModelForTokenClassification
+from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
 from ariadne.classifier import Classifier
 from ariadne.contrib.inception_util import create_prediction
 from cassis import Cas
 
+
 class TransformerNerClassifier(Classifier):
     def __init__(self, model_name: str):
         super().__init__()
@@ -27,16 +28,12 @@ def __init__(self, model_name: str):
         self.model = AutoModelForTokenClassification.from_pretrained(model_name)
         self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer, aggregation_strategy="first")
 
-
-
     def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
-
         document_text = cas.sofa_string
         predictions = self.ner_pipeline(document_text)
         for prediction in predictions:
-            start_char = prediction['start']
-            end_char = prediction['end']
-            label = prediction['entity_group']
+            start_char = prediction["start"]
+            end_char = prediction["end"]
+            label = prediction["entity_group"]
             cas_prediction = create_prediction(cas, layer, feature, start_char, end_char, label)
-            cas.add(cas_prediction) 
-
+            cas.add(cas_prediction)
diff --git a/ariadne/demo/demo_link_feature.py b/ariadne/demo/demo_link_feature.py
@@ -0,0 +1,100 @@
+# Licensed to the Technische Universität Darmstadt under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The Technische Universität Darmstadt
+# licenses this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+
+from cassis import Cas
+
+from ariadne.classifier import Classifier
+from ariadne.protocol import TrainingDocument
+from collections import defaultdict
+from ariadne.contrib.inception_util import create_span_prediction
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class DemoLinkFeatureRecommender(Classifier):
+    def fit(self, documents: List[TrainingDocument], layer: str, feature: str, project_id, user_id: str):
+        logger.info(
+            f"Training triggered for [{feature}] on [{layer}] in [{len(documents)}] documents from project [{project_id}] for user [{user_id}]"
+        )
+
+        # Count how often each mention has been annotated with a given label
+        counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
+
+        for document in documents:
+            cas = document.cas
+            for annotation in cas.select(layer):
+                source = annotation.get_covered_text().lower()
+                links = annotation.get(feature)
+                if links:
+                    for link in links.elements:
+                        target = link.target.get_covered_text().lower()
+                        role = link.role.lower()
+                        counts[source][target][role] += 1
+
+        # Create a new dictionary that contains only the source/target/role with the highest count
+        # for each link
+        best_links = {
+            source: {
+                target: max(candidate_counts, key=candidate_counts.get) if candidate_counts else ""
+                for target, candidate_counts in target_counts.items()
+            }
+            for source, target_counts in counts.items()
+        }
+
+        logger.info("Best labels: %s", best_links)
+        self._save_model(user_id, best_links)
+
+        logger.info("Training finished for user [%s]", user_id)
+
+    def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
+        logger.info(
+            f"Prediction triggered on document [{document_id}] for [{feature}] on [{layer}] in project [{project_id}] for user [{user_id}]"
+        )
+
+        model = self._load_model(user_id)
+
+        if model is None:
+            return
+
+        # Look for source annotations in the CAS and check if any of the mentions in the model correspond to the text starting
+        # at that token
+        for source in cas.select(layer):
+            source_text = source.get_covered_text().lower()
+            if source_text in model:
+                sentence = list(
+                    cas.select_covering("de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", source)
+                )
+                if not sentence:
+                    continue
+
+                # Look for a suitable target annotation in the same sentence
+                for target in cas.select_covered(layer, sentence[0]):
+                    target_text = target.get_covered_text().lower()
+
+                    # Source and target exist, create a link with the appropriate role
+                    if target_text in model[source_text]:
+                        role = model[source_text][target_text]
+                        LinkType = cas.typesystem.get_type("custom.SpanLinksLink")
+                        link = LinkType(role=role, target=target)
+                        FSArray = cas.typesystem.get_type("uima.cas.FSArray")
+                        links = FSArray(elements=[link])
+                        suggestion = create_span_prediction(cas, layer, feature, source.begin, source.end, links)
+                        cas.add(suggestion)
+
+        logger.info("Prediction finished for user [%s]", user_id)
-Original file line number
+Diff line change
@@ Expand Up / @@ -211,3 +211,4 @@ dmypy.json @@
     /lvenv/
     /models/*
     /cache
+    .vscode