library(revolver)
#>  [ ctree - Clone Trees in cancer ] 
#> Author :  Giulio Caravagna <gcaravagn@gmail.com> 
#> GitHub :  caravagn/ctree 
#> 
#> Available datasets ~ use data('xxx', package='REVOLVER_datasets') to load dataset 'xxx'
#> 
#> ◉ TRACERx_NEJM_2017 Mutations from TRACERx lung (PMID: 28445112). n = 99 patients, multi-region WES, CCF. 
#> ◉ TRACERx_NEJM_2017_REVOLVER REVOLVER analysis of TRACERx_NEJM_2017. 
#> ◉ YATES_BREAST_NATMED_2015 Mutations from breast cancers (PMID: 26099045). n = 50 patients, multi-region WES, binary. 
#> ◉ CROSS_CRC_ADENOCARCINOMA_NATECOEVO_2018 Mutations from colorectal adenocarcinomas (PMID: 30177804). n = 19 patients, multi-region WES, binary. 
#> ◉ MSEQ_CRC_ADENOCARCINOMA_SET6 Mutations, copy number and purity from the Set6 multi-region colorectal adenocarcinoma sample (doi:10.1101/586560). n = 1 patient, multi-region WGS 
#> ◉ MSEQ_CRC_ADENOCARCINOMA_SET7 Mutations, copy number and purity from the Set7 multi-region colorectal adenocarcinoma sample (doi:10.1101/586560). n = 1 patient, multi-region WGS 
#>  [ Mtree - Mutation Trees in cancer ] 
#> Author :  Giulio Caravagna <gcaravagn@gmail.com> 
#> GitHub :  caravagn/mtree 
#>  [ REVOLVER - Repeated Evolution in Cancer ] 
#> Author :  Giulio Caravagna <gcaravagn@gmail.com> 
#> GitHub :  caravagn/revolver 
#>    WWW :  https://caravagn.github.io/revolver/ 
#> 
#>  > REVOLVER is part of the "evoverse" [https://bit.ly/2orn94e] - a collection of packages to implement Cancer Evolution analyses from cancer sequencing data.
library(evoverse.datasets)
require(tidyverse)

Data

Cross et al. The evolutionary landscape of colorectal tumorigenesis. Nat Ecol Evol. 2018 2(10). PMID 30177804.

  • n=19 adenomas (unmatched);
  • multi-region sequencing with binary data;
  • annotated driver events from Cross et al.;

Building the cohort

data(CROSS_CRC_ADENOCARCINOMA_NATECOEVO_2018, packages = 'evoverse.datasets')

print(CROSS_CRC_ADENOCARCINOMA_NATECOEVO_2018)
#> # A tibble: 63 x 7
#>    patientID variantID CCF               is.clonal is.driver Misc   cluster
#>    <chr>     <chr>     <chr>             <lgl>     <lgl>     <chr>  <chr>  
#>  1 adenoma_1 ACVR2A    R1:1;R2:1;R3:1;R… TRUE      TRUE      NOTHI… 1      
#>  2 adenoma_1 APC       R1:1;R2:1;R3:1;R… TRUE      TRUE      NOTHI… 1      
#>  3 adenoma_2 APC       R1:1;R2:1;R3:1;R… TRUE      TRUE      NOTHI… 1      
#>  4 adenoma_2 KRAS      R1:0;R2:0;R3:0;R… FALSE     TRUE      NOTHI… 2      
#>  5 adenoma_2 ARID2     R1:0;R2:0;R3:0;R… FALSE     TRUE      NOTHI… 2      
#>  6 adenoma_2 TP53      R1:1;R2:1;R3:1;R… FALSE     TRUE      NOTHI… 3      
#>  7 adenoma_3 PIK3CA    R1:0;R2:0;R3:0;R… FALSE     TRUE      NOTHI… 1      
#>  8 adenoma_3 FBXW7     R1:1;R2:1;R3:1;R… TRUE      TRUE      NOTHI… 2      
#>  9 adenoma_3 APC       R1:1;R2:1;R3:1;R… TRUE      TRUE      NOTHI… 2      
#> 10 adenoma_3 AKAP9     R1:0;R2:0;R3:1;R… FALSE     TRUE      NOTHI… 3      
#> # … with 53 more rows

# Constructor
CROSS_CRC_ADENOCARCINOMA_REVOLVER = revolver_cohort(
  CROSS_CRC_ADENOCARCINOMA_NATECOEVO_2018, 
  MIN.CLUSTER.SIZE = 0, 
  annotation = "Colorectal adenocarcinomas (Cross et al, PMID 30177804)")
#>  [ REVOLVER ~ Cohort constructor ] 
#> ◉  Use only drivers 1 
#> ◉ Reject clusters with size below 0 
#> 
#> =-=-=-=-=-=-=-=-=-=-=-
#>  REVOLVER input data
#> =-=-=-=-=-=-=-=-=-=-=-
#> # A tibble: 63 x 9
#>    Misc  patientID variantID cluster is.driver is.clonal CCF   id   
#>    <chr> <chr>     <chr>     <chr>   <lgl>     <lgl>     <chr> <chr>
#>  1 NOTH… adenoma_1 ACVR2A    1       TRUE      TRUE      R1:1… __mu…
#>  2 NOTH… adenoma_1 APC       1       TRUE      TRUE      R1:1… __mu…
#>  3 NOTH… adenoma_2 APC       1       TRUE      TRUE      R1:1… __mu…
#>  4 NOTH… adenoma_2 KRAS      2       TRUE      FALSE     R1:0… __mu…
#>  5 NOTH… adenoma_2 ARID2     2       TRUE      FALSE     R1:0… __mu…
#>  6 NOTH… adenoma_2 TP53      3       TRUE      FALSE     R1:1… __mu…
#>  7 NOTH… adenoma_3 PIK3CA    1       TRUE      FALSE     R1:0… __mu…
#>  8 NOTH… adenoma_3 FBXW7     2       TRUE      TRUE      R1:1… __mu…
#>  9 NOTH… adenoma_3 APC       2       TRUE      TRUE      R1:1… __mu…
#> 10 NOTH… adenoma_3 AKAP9     3       TRUE      FALSE     R1:0… __mu…
#> # … with 53 more rows, and 1 more variable: cluster_size <int>
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  Extracting dataset for each patient (this may take some time)
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> 
#> adenoma_1 2 entries 
#> 
#> adenoma_2 4 entries 
#> 
#> adenoma_3 8 entries 
#> 
#> adenoma_4 5 entries 
#> 
#> adenoma_5 3 entries 
#> 
#> adenoma_6 2 entries 
#> 
#> adenoma_7 3 entries 
#> 
#> adenoma_8 4 entries 
#> 
#> adenoma_9 1 entries 
#> 
#> carcinoma_1 3 entries 
#> 
#> carcinoma_10 1 entries 
#> 
#> carcinoma_2 5 entries 
#> 
#> carcinoma_3 1 entries 
#> 
#> carcinoma_5 2 entries 
#> 
#> carcinoma_6 5 entries 
#> 
#> carcinoma_7 4 entries 
#> 
#> carcinoma_8 2 entries 
#> 
#> carcinoma_9_distal 3 entries 
#> 
#> carcinoma_9_proximal 5 entries 
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  Extracting clones' table for each patient
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> adenoma_1 2 entries  -->  1 clones 
#> adenoma_2 4 entries  -->  3 clones 
#> adenoma_3 8 entries  -->  4 clones 
#> adenoma_4 5 entries  -->  3 clones 
#> adenoma_5 3 entries  -->  1 clones 
#> adenoma_6 2 entries  -->  1 clones 
#> adenoma_7 3 entries  -->  1 clones 
#> adenoma_8 4 entries  -->  1 clones 
#> adenoma_9 1 entries  -->  1 clones 
#> carcinoma_1 3 entries  -->  2 clones 
#> carcinoma_10 1 entries  -->  1 clones 
#> carcinoma_2 5 entries  -->  1 clones 
#> carcinoma_3 1 entries  -->  1 clones 
#> carcinoma_5 2 entries  -->  1 clones 
#> carcinoma_6 5 entries  -->  1 clones 
#> carcinoma_7 4 entries  -->  1 clones 
#> carcinoma_8 2 entries  -->  1 clones 
#> carcinoma_9_distal 3 entries  -->  1 clones 
#> carcinoma_9_proximal 5 entries  -->  1 clones

We can check the cohort, and flag put drivers that are not recurrent.

# Diagnostic
revolver_check_cohort(CROSS_CRC_ADENOCARCINOMA_REVOLVER)
#> 
#>  ERROR  Some driver variantIDs occur only once and should therefore be removed. 
#> 
#>  You can use `revolver::Stats_drivers(x)` to retrieve them. 
#> # A tibble: 9 x 7
#>   variantID numClonal p_clonal numSubclonal p_subclonal N_tot  p_tot
#>   <chr>         <dbl>    <dbl>        <dbl>       <dbl> <dbl>  <dbl>
#> 1 ACVR2A            1   0.0526            0      0          1 0.0526
#> 2 TGIF1             1   0.0526            0      0          1 0.0526
#> 3 SMAD3             1   0.0526            0      0          1 0.0526
#> 4 SOX9              1   0.0526            0      0          1 0.0526
#> 5 ARID2             0   0                 1      0.0526     1 0.0526
#> 6 AKAP9             0   0                 1      0.0526     1 0.0526
#> 7 GNAS              0   0                 1      0.0526     1 0.0526
#> 8 SMAD4             0   0                 1      0.0526     1 0.0526
#> 9 CHD4              0   0                 1      0.0526     1 0.0526
#> 
#>  WARNING  Some patients have only one clone with drivers, and therefore they will just be expanded. 
#> # A tibble: 15 x 7
#>    patientID numBiopsies numMutations numDriverMutati… numClonesWithDr…
#>    <chr>           <int>        <int>            <int>            <int>
#>  1 adenoma_1           6            2                2                1
#>  2 adenoma_5           4            3                3                1
#>  3 adenoma_6           2            2                2                1
#>  4 adenoma_7           2            3                3                1
#>  5 adenoma_8           2            4                4                1
#>  6 adenoma_9           2            1                1                1
#>  7 carcinom…           5            1                1                1
#>  8 carcinom…           7            5                5                1
#>  9 carcinom…           6            1                1                1
#> 10 carcinom…           6            2                2                1
#> 11 carcinom…          13            5                5                1
#> 12 carcinom…           8            4                4                1
#> 13 carcinom…           5            2                2                1
#> 14 carcinom…           5            3                3                1
#> 15 carcinom…           5            5                5                1
#> # … with 2 more variables: numTruncalMutations <int>,
#> #   numSubclonalMutations <int>

# Driver events that occur in 1 patient
non_recurrent = Stats_drivers(CROSS_CRC_ADENOCARCINOMA_REVOLVER) %>% 
  filter(N_tot == 1) %>% 
  pull(variantID)

# Remove drivers
CROSS_CRC_ADENOCARCINOMA_REVOLVER = remove_drivers(CROSS_CRC_ADENOCARCINOMA_REVOLVER, non_recurrent)
#> Removing driver events ACVR2A, TGIF1, SMAD3, SOX9, ARID2, AKAP9, GNAS, SMAD4, CHD4 
#> # A tibble: 9 x 7
#>   variantID numClonal p_clonal numSubclonal p_subclonal N_tot  p_tot
#>   <chr>         <dbl>    <dbl>        <dbl>       <dbl> <dbl>  <dbl>
#> 1 ACVR2A            1   0.0526            0      0          1 0.0526
#> 2 TGIF1             1   0.0526            0      0          1 0.0526
#> 3 SMAD3             1   0.0526            0      0          1 0.0526
#> 4 SOX9              1   0.0526            0      0          1 0.0526
#> 5 ARID2             0   0                 1      0.0526     1 0.0526
#> 6 AKAP9             0   0                 1      0.0526     1 0.0526
#> 7 GNAS              0   0                 1      0.0526     1 0.0526
#> 8 SMAD4             0   0                 1      0.0526     1 0.0526
#> 9 CHD4              0   0                 1      0.0526     1 0.0526
#> Retained patients 19 
#> 
#>  WARNING  Some patients have only one clone with drivers, and therefore they will just be expanded. 
#> # A tibble: 16 x 7
#>    patientID numBiopsies numMutations numDriverMutati… numClonesWithDr…
#>    <chr>           <int>        <int>            <int>            <int>
#>  1 adenoma_1           6            2                1                1
#>  2 adenoma_5           4            3                3                1
#>  3 adenoma_6           2            2                2                1
#>  4 adenoma_7           2            3                3                1
#>  5 adenoma_8           2            4                4                1
#>  6 adenoma_9           2            1                1                1
#>  7 carcinom…           4            3                2                1
#>  8 carcinom…           5            1                1                1
#>  9 carcinom…           7            5                4                1
#> 10 carcinom…           6            1                1                1
#> 11 carcinom…           6            2                2                1
#> 12 carcinom…          13            5                5                1
#> 13 carcinom…           8            4                3                1
#> 14 carcinom…           5            2                2                1
#> 15 carcinom…           5            3                3                1
#> 16 carcinom…           5            5                4                1
#> # … with 2 more variables: numTruncalMutations <int>,
#> #   numSubclonalMutations <int>

Constructing mutation trees

revolver uses the mtree package to implement mutation trees from binary data.

# Compute the trees
CROSS_CRC_ADENOCARCINOMA_REVOLVER = compute_mutation_trees(CROSS_CRC_ADENOCARCINOMA_REVOLVER)
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  Constructing Mutations Tree objects via mtree - https://caravagn.github.io/mtree/
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> Input patients.  
#> adenoma_1, adenoma_2, adenoma_3, adenoma_4, adenoma_5, adenoma_6, adenoma_7, adenoma_8, adenoma_9, carcinoma_1, carcinoma_10, carcinoma_2, carcinoma_3, carcinoma_5, carcinoma_6, carcinoma_7, carcinoma_8, carcinoma_9_distal, carcinoma_9_proximal 
#>  [ mtree ~ generate mutation trees for adenoma_1 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 10
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5    R6
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           2 TRUE      TRUE          1     1     1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_2 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 3 x 8
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl>
#> 1 2           2 TRUE      FALSE         0     0     0     1
#> 2 1           1 TRUE      TRUE          1     1     1     1
#> 3 3           1 TRUE      FALSE         1     1     1     0
#> 
#> There are no alternatives!
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_3 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 4 x 9
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2           5 TRUE      TRUE          1     1     1     1     1
#> 2 1           1 TRUE      FALSE         0     0     0     1     1
#> 3 3           1 FALSE     FALSE         0     0     1     0     0
#> 4 4           1 FALSE     FALSE         0     1     0     0     0
#> 
#> There are no alternatives!
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_4 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 3 x 8
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl>
#> 1 1           3 TRUE      TRUE          1     1     1     1
#> 2 2           1 TRUE      FALSE         0     0     1     0
#> 3 3           1 FALSE     FALSE         0     0     0     1
#> 
#> There are no alternatives!
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_5 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 8
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl>
#> 1 1           3 TRUE      TRUE          1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_6 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 6
#>   cluster nMuts is.driver is.clonal    R1    R2
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl>
#> 1 1           2 TRUE      TRUE          1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_7 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 6
#>   cluster nMuts is.driver is.clonal    R1    R2
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl>
#> 1 1           3 TRUE      TRUE          1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_8 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 6
#>   cluster nMuts is.driver is.clonal    R1    R2
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl>
#> 1 1           4 TRUE      TRUE          1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for adenoma_9 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 6
#>   cluster nMuts is.driver is.clonal    R1    R2
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl>
#> 1 1           1 TRUE      TRUE          1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_1 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 2 x 8
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl>
#> 1 1           2 TRUE      TRUE          1     1     1     1
#> 2 2           1 FALSE     FALSE         1     0     0     0
#> 
#> There are no alternatives!
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_10 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 9
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           1 TRUE      TRUE          1     1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_2 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 11
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5    R6
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           5 TRUE      TRUE          1     1     1     1     1     1
#> # … with 1 more variable: R7 <dbl>
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_3 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 10
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5    R6
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           1 TRUE      TRUE          1     1     1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_5 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 10
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5    R6
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           2 TRUE      TRUE          1     1     1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_6 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 17
#>   cluster nMuts is.driver is.clonal    R1   R10   R11   R12   R13    R2
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           5 TRUE      TRUE          1     1     1     1     1     1
#> # … with 7 more variables: R3 <dbl>, R4 <dbl>, R5 <dbl>, R6 <dbl>,
#> #   R7 <dbl>, R8 <dbl>, R9 <dbl>
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_7 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 12
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5    R6
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           4 TRUE      TRUE          1     1     1     1     1     1
#> # … with 2 more variables: R7 <dbl>, R8 <dbl>
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_8 ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 9
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           2 TRUE      TRUE          1     1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_9_distal ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 9
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           3 TRUE      TRUE          1     1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0 
#>  [ mtree ~ generate mutation trees for carcinoma_9_proximal ] 
#> Sampler :  10000 (cutoff),  5000 (sampling),  100 (max store) 
#> Suppes' conditions :  >= 
#> # A tibble: 1 x 9
#>   cluster nMuts is.driver is.clonal    R1    R2    R3    R4    R5
#>   <chr>   <int> <lgl>     <lgl>     <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1           5 TRUE      TRUE          1     1     1     1     1
#> 
#> Sampler: this model has 1 node, it has trivial models.
#> ✔ Trees with non-zero sscore 1 storing 1 
#> 
#>  Combinations of Information Transfer :  0

Fitting models with REVOLVER

Function revolver_fit implements the 2-steps REVOLVER algorithm to fit the data.

We use the following parameters:

  • initial.solution = NA, to sample random initial solutions for every run of EM;
  • n = 3, to repeat the fit 3 times, and retain the one with lower median goodness-of-fit penalty.
  • parallel = FALSE, to run serially the fits;
CROSS_CRC_ADENOCARCINOMA_REVOLVER = revolver_fit(
  CROSS_CRC_ADENOCARCINOMA_REVOLVER, 
  parallel = F, 
  n = 3, 
  initial.solution = NA)
#>  [ REVOLVER Transfer Learning fit ~ Colorectal adenocarcinomas (Cross et al, PMID 30177804) ] 
#> 
#>  WARNING  Some patients have only one clone with drivers, and therefore they will just be expanded. 
#> # A tibble: 16 x 7
#>    patientID numBiopsies numMutations numDriverMutati… numClonesWithDr…
#>    <chr>           <int>        <int>            <int>            <int>
#>  1 adenoma_1           6            2                1                1
#>  2 adenoma_5           4            3                3                1
#>  3 adenoma_6           2            2                2                1
#>  4 adenoma_7           2            3                3                1
#>  5 adenoma_8           2            4                4                1
#>  6 adenoma_9           2            1                1                1
#>  7 carcinom…           4            3                2                1
#>  8 carcinom…           5            1                1                1
#>  9 carcinom…           7            5                4                1
#> 10 carcinom…           6            1                1                1
#> 11 carcinom…           6            2                2                1
#> 12 carcinom…          13            5                5                1
#> 13 carcinom…           8            4                3                1
#> 14 carcinom…           5            2                2                1
#> 15 carcinom…           5            3                3                1
#> 16 carcinom…           5            5                4                1
#> # … with 2 more variables: numTruncalMutations <int>,
#> #   numSubclonalMutations <int>
#> 
#> Fitting  N = 19 patients 
#> 
#> # A tibble: 19 x 6
#>    patientID            hasTrees numTrees  maxScore  minScore combInfTransf
#>    <chr>                <lgl>       <int>     <dbl>     <dbl>         <int>
#>  1 adenoma_1            TRUE            1 1         1                     1
#>  2 adenoma_2            TRUE            1 0.0113    0.0113                1
#>  3 adenoma_3            TRUE            1 0.0000852 0.0000852             1
#>  4 adenoma_4            TRUE            1 0.00255   0.00255               1
#>  5 adenoma_5            TRUE            1 1         1                     1
#>  6 adenoma_6            TRUE            1 1         1                     1
#>  7 adenoma_7            TRUE            1 1         1                     1
#>  8 adenoma_8            TRUE            1 1         1                     1
#>  9 adenoma_9            TRUE            1 1         1                     1
#> 10 carcinoma_1          TRUE            1 0.0505    0.0505                1
#> 11 carcinoma_10         TRUE            1 1         1                     1
#> 12 carcinoma_2          TRUE            1 1         1                     1
#> 13 carcinoma_3          TRUE            1 1         1                     1
#> 14 carcinoma_5          TRUE            1 1         1                     1
#> 15 carcinoma_6          TRUE            1 1         1                     1
#> 16 carcinoma_7          TRUE            1 1         1                     1
#> 17 carcinoma_8          TRUE            1 1         1                     1
#> 18 carcinoma_9_distal   TRUE            1 1         1                     1
#> 19 carcinoma_9_proximal TRUE            1 1         1                     1
#> 
#> Initial solution : Randomized (uniform probability) 
#> 
#> Sampled solutions:  n = 3 
#> 
#> Parallel exectuion (via 'easypar') : TRUE 
#> [1] "w"
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  1] Expectation Maximization
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> 
#>         Number of Solutions   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
#>    Combinations of Transfer   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |
#> 
#>              Initialization   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |
#> 
#> # 1    :     E: OK   M:    1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |# A tibble: 19 x 9
#>    patientID hasTrees numTrees maxScore minScore combInfTransf Solution
#>    <chr>     <lgl>       <int>    <dbl>    <dbl>         <int>    <int>
#>  1 adenoma_1 TRUE            1  1.00e+0  1.00e+0             1        1
#>  2 adenoma_2 TRUE            1  1.13e-2  1.13e-2             1        1
#>  3 adenoma_3 TRUE            1  8.52e-5  8.52e-5             1        1
#>  4 adenoma_4 TRUE            1  2.55e-3  2.55e-3             1        1
#>  5 adenoma_5 TRUE            1  1.00e+0  1.00e+0             1        1
#>  6 adenoma_6 TRUE            1  1.00e+0  1.00e+0             1        1
#>  7 adenoma_7 TRUE            1  1.00e+0  1.00e+0             1        1
#>  8 adenoma_8 TRUE            1  1.00e+0  1.00e+0             1        1
#>  9 adenoma_9 TRUE            1  1.00e+0  1.00e+0             1        1
#> 10 carcinom… TRUE            1  5.05e-2  5.05e-2             1        1
#> 11 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 12 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 13 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 14 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 15 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 16 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 17 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 18 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 19 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> # … with 2 more variables: converged <lgl>, penalty <dbl>
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  2] Transfering orderings across patients
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> [1] "w"
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  1] Expectation Maximization
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> 
#>         Number of Solutions   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
#>    Combinations of Transfer   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |
#> 
#>              Initialization   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |
#> 
#> # 1    :     E: OK   M:    1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |# A tibble: 19 x 9
#>    patientID hasTrees numTrees maxScore minScore combInfTransf Solution
#>    <chr>     <lgl>       <int>    <dbl>    <dbl>         <int>    <int>
#>  1 adenoma_1 TRUE            1  1.00e+0  1.00e+0             1        1
#>  2 adenoma_2 TRUE            1  1.13e-2  1.13e-2             1        1
#>  3 adenoma_3 TRUE            1  8.52e-5  8.52e-5             1        1
#>  4 adenoma_4 TRUE            1  2.55e-3  2.55e-3             1        1
#>  5 adenoma_5 TRUE            1  1.00e+0  1.00e+0             1        1
#>  6 adenoma_6 TRUE            1  1.00e+0  1.00e+0             1        1
#>  7 adenoma_7 TRUE            1  1.00e+0  1.00e+0             1        1
#>  8 adenoma_8 TRUE            1  1.00e+0  1.00e+0             1        1
#>  9 adenoma_9 TRUE            1  1.00e+0  1.00e+0             1        1
#> 10 carcinom… TRUE            1  5.05e-2  5.05e-2             1        1
#> 11 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 12 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 13 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 14 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 15 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 16 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 17 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 18 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 19 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> # … with 2 more variables: converged <lgl>, penalty <dbl>
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  2] Transfering orderings across patients
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> [1] "w"
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  1] Expectation Maximization
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> 
#>         Number of Solutions   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
#>    Combinations of Transfer   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |
#> 
#>              Initialization   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |
#> 
#> # 1    :     E: OK   M:    1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |   1 |# A tibble: 19 x 9
#>    patientID hasTrees numTrees maxScore minScore combInfTransf Solution
#>    <chr>     <lgl>       <int>    <dbl>    <dbl>         <int>    <int>
#>  1 adenoma_1 TRUE            1  1.00e+0  1.00e+0             1        1
#>  2 adenoma_2 TRUE            1  1.13e-2  1.13e-2             1        1
#>  3 adenoma_3 TRUE            1  8.52e-5  8.52e-5             1        1
#>  4 adenoma_4 TRUE            1  2.55e-3  2.55e-3             1        1
#>  5 adenoma_5 TRUE            1  1.00e+0  1.00e+0             1        1
#>  6 adenoma_6 TRUE            1  1.00e+0  1.00e+0             1        1
#>  7 adenoma_7 TRUE            1  1.00e+0  1.00e+0             1        1
#>  8 adenoma_8 TRUE            1  1.00e+0  1.00e+0             1        1
#>  9 adenoma_9 TRUE            1  1.00e+0  1.00e+0             1        1
#> 10 carcinom… TRUE            1  5.05e-2  5.05e-2             1        1
#> 11 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 12 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 13 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 14 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 15 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 16 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 17 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 18 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> 19 carcinom… TRUE            1  1.00e+0  1.00e+0             1        1
#> # … with 2 more variables: converged <lgl>, penalty <dbl>
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  2] Transfering orderings across patients
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> 
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#>  Selecting solution with minimal median penalty
#> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#> Solution #1 0.857142857142857 
#> Solution #2 0.857142857142857 
#> Solution #3 0.857142857142857 
#>   Best solution is # 1 
#> REVOLVER Transfer Learning fit  COMPLETED

Computing REVOLVER hierarchical clusters

plot_clusters(CROSS_CRC_ADENOCARCINOMA_REVOLVER, cutoff_trajectories = 1, cutoff_drivers = 0)

plot(CROSS_CRC_ADENOCARCINOMA_REVOLVER)

plot_drivers_clonality(CROSS_CRC_ADENOCARCINOMA_REVOLVER)

plot_drivers_occurrence(CROSS_CRC_ADENOCARCINOMA_REVOLVER)