Skip to content

Commit e2a2a94

Browse files
committed
Added Bray-Curtis dissimilarity
1 parent 425431f commit e2a2a94

4 files changed

Lines changed: 301 additions & 28 deletions

File tree

docs/source/overview.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,14 @@ The fixation index is a measure of population differentiation in the presence of
7171
```{math}
7272
\frac{\pi_{\text{Between}} - \pi_{\text{Within}}}{\pi_{\text{Between}}} .
7373
```
74+
75+
This estimate is normally too naif in "real life" as it is biased by for example very different population sizes, but it can be used in these idealized conditions, especially as the population sizes of sub-populations is quite regular.
76+
77+
78+
#### Bray-Curtis dissimilarity
79+
The Bray-Curtis dissimilarity (Bray and Curtis, 1957) is a measure of how how dissimilar two subpopulations are in terms of their species richness. In the context of our model, we define a "species" as a specific set of features. What this implementation does, then, is to count how many individuals possess a certain set of features within a sub-population and compare this to a second subpopulation. In addition to the dissimilarity index between subpopulations, I implemented it between sets of population, which is useful for example to compare two groups of subpopulations as in the case of continental models. The index is implemented with the package `distancia` and is defined as:
80+
81+
```{math}
82+
D_{\text{BC}} = \frac{\sum_{i=1}^{n}|x_i - y_i|}{\sum_{i=1}^{n}|x_i + y_i|}
83+
```
84+
where, for a species $i$, $x_i$ and $y_i$ are the number of individuals of that species found in subpopulations $X$ and $Y$.

exploration.ipynb

Lines changed: 201 additions & 28 deletions
Large diffs are not rendered by default.

metapypulation/metapopulation.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55

66
from collections.abc import Set, Iterator
7+
from distancia import BrayCurtis
78
from itertools import pairwise, permutations
89
import numpy as np
910
import pandas as pd
@@ -355,6 +356,91 @@ def fixation_index(self, subpop_id_1: int, subpop_id_2: int) -> float:
355356
fixation_index = float((np.mean(pairwise_differences_between) - np.mean(pairwise_differences_within)) / np.mean(pairwise_differences_between))
356357

357358
return fixation_index
359+
360+
361+
def bray_curtis_by_subpopulation_pair(self, subpop_id_1: int, subpop_id_2: int) -> int:
362+
"""
363+
Calculate the Bray-Curtis index of dissimilarity between any two subpopulations. The index is 0 for completely equal subpopulations,
364+
and 1 for completely different ones.
365+
366+
Args:
367+
subpop_id_1 (int): id of the first subpopulation.
368+
subpop_id_2 (int): id of the second subpopulation.
369+
370+
Returns:
371+
bray_curtis (int): Bray-Curtis dissimilarity between two subpopulations
372+
"""
373+
individuals_subpop_1 = self.subpopulations[subpop_id_1].population
374+
individuals_subpop_2 = self.subpopulations[subpop_id_2].population
375+
376+
number_of_features = self.number_of_features
377+
traits_pop_1 = np.zeros((self.subpopulations[subpop_id_1].get_population_size(), number_of_features))
378+
traits_pop_2 = np.zeros((self.subpopulations[subpop_id_2].get_population_size(), number_of_features))
379+
380+
i = 0
381+
for individual in individuals_subpop_1:
382+
traits_pop_1[i] = (individual.features)
383+
i += 1
384+
j = 0
385+
for individual in individuals_subpop_2:
386+
traits_pop_2[j] = (individual.features)
387+
j += 1
388+
389+
# print(traits_pop_1)
390+
traits_pop_1_sorted = np.sort(traits_pop_1)
391+
traits_pop_2_sorted = np.sort(traits_pop_2)
392+
unique_traits_pop_1, counts_pop_1 = np.unique(traits_pop_1_sorted, axis = 0, return_counts = True)
393+
unique_traits_pop_2, counts_pop_2 = np.unique(traits_pop_2_sorted, axis = 0, return_counts = True)
394+
395+
# Create an instance of the BrayCurti class
396+
bray_curtis_instance = BrayCurtis()
397+
398+
# Calculate the Bray-Curtis distance between the two samples
399+
bray_curtis_distance = bray_curtis_instance.calculate(counts_pop_1, counts_pop_2)
400+
401+
return bray_curtis_distance
402+
403+
def bray_curtis_by_sets_of_subpopulations(self, group_of_subpop_id_1: List[int], group_of_subpop_id_2: List[int]) -> int:
404+
"""
405+
Calculate the Bray-Curtis index of dissimilarity between any two sets of subpopulations. The index is 0 for completely equal subpopulations,
406+
and 1 for completely different ones.
407+
408+
Args:
409+
group_of_subpop_id_1 (List[int]): list of ids of the first set of subpopulations.
410+
group_of_subpop_id_2 (List[int]): list of ids of the second set of subpopulations.
411+
412+
Returns:
413+
bray_curtis (int): Bray-Curtis dissimilarity between two sets of subpopulations
414+
"""
415+
416+
traits_pop_1 = []
417+
traits_pop_2 = []
418+
419+
for subpop_id in group_of_subpop_id_1:
420+
individuals = self.subpopulations[subpop_id].population
421+
for individual in individuals:
422+
traits_pop_1.append(individual.features)
423+
traits_pop_1 = np.array(traits_pop_1)
424+
425+
for subpop_id in group_of_subpop_id_2:
426+
individuals = self.subpopulations[subpop_id].population
427+
for individual in individuals:
428+
traits_pop_2.append(individual.features)
429+
traits_pop_2 = np.array(traits_pop_2)
430+
431+
traits_pop_1_sorted = np.sort(traits_pop_1)
432+
traits_pop_2_sorted = np.sort(traits_pop_2)
433+
unique_traits_pop_1, counts_pop_1 = np.unique(traits_pop_1_sorted, axis = 0, return_counts = True)
434+
unique_traits_pop_2, counts_pop_2 = np.unique(traits_pop_2_sorted, axis = 0, return_counts = True)
435+
436+
# Create an instance of the BrayCurti class
437+
bray_curtis_instance = BrayCurtis()
438+
439+
# Calculate the Bray-Curtis distance between the two samples
440+
bray_curtis_distance = bray_curtis_instance.calculate(counts_pop_1, counts_pop_2)
441+
442+
return bray_curtis_distance
443+
358444

359445
class SubpopulationIterator(object):
360446
"""

plotting.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def metapopulation_plot_comparison(dataset_1, dataset_2, title, legend_1, legend
7171
elif what_measure == 'beta':
7272
dataset_1_global = pd.read_csv(f"{dataset_1}_beta_diversity.csv", index_col=0)
7373
dataset_2_global = pd.read_csv(f"{dataset_2}_beta_diversity.csv", index_col=0)
74+
elif what_measure == 'bray-curtis-0-4':
75+
dataset_1_global = pd.read_csv(f"{dataset_1}_bray-curtis_0-4.csv", index_col=0)
76+
dataset_2_global = pd.read_csv(f"{dataset_2}_bray-curtis_0-4.csv", index_col=0)
7477
else:
7578
raise ValueError("You need to decide what measure you will plot: 'set_counts', 'shannon', 'simpson' or 'gini'?")
7679

0 commit comments

Comments
 (0)