This script searches for all relatives in a 23 and Me account that share any chromosomome segments with a “Subject Relative”. This should make it easier to choose a starting point of deeper research into unknown relatives.
df <- read.csv("John_Doe_relatives_download.csv")
df0 <- df %>%
filter(!chromosome_number == "") %>%
mutate(
chromosome_number = ifelse(chromosome_number == "X",
"23", chromosome_number),
percent_dna_shared = gsub("%", "", percent_dna_shared),
chromosome_number = as.numeric(chromosome_number),
chromosome_start_point = as.numeric(chromosome_start_point),
chromosome_end_point = as.numeric(chromosome_end_point),
genetic_distance = as.numeric(genetic_distance),
num_snps = as.numeric(num_snps),
birth_year = as.numeric(birth_year),
percent_dna_shared = as.numeric(percent_dna_shared),
num_segments_shared = as.numeric(num_segments_shared)
)
df1 <- df0 %>%
select(display_name, chromosome_number, chromosome_start_point, chromosome_end_point)
subject <- "John Doe" # User Input: Enter Subject display_name
lower <- df1 %>%
filter(display_name == subject) %>%
select(chromosome_start_point)
upper <- df1 %>%
filter(display_name == subject) %>%
select(chromosome_end_point)
chromosome <- df1 %>%
filter(display_name == subject) %>%
select(chromosome_number)
df2 <- data.frame(
subject,
chromosome,
lower,
upper
)
head(df2)
Each observation of this subject
generates a chromosome
segment range as related to the the Profile Subject
subject is the display_name
lower equals the subject’s
chromosome_start_point
upper eqauls the subject’s
chromosome_end_point
.
chromosome equals the observations
chromosome_number
.
df3 <- left_join(df2, df1, by = 'chromosome_number', relationship = "many-to-many")
By rejoining the two datasets it is possible to see the chromosome segments that the near relative subject has in common with relatives of the Profile Subject.
subject_list <- df3 %>%
select(subject,
chromosome_start_point.x,
chromosome_end_point.x,
chromosome_number,
display_name,
chromosome_start_point.y,
chromosome_end_point.y) %>%
#Filter to Range
filter(!display_name == subject) %>%
filter(
(chromosome_start_point.y <= chromosome_start_point.x &
chromosome_end_point.y >= chromosome_end_point.x) |
(chromosome_start_point.y <= chromosome_end_point.x &
chromosome_end_point.y >= chromosome_end_point.x) |
(chromosome_start_point.y > chromosome_start_point.x &
chromosome_end_point.y < chromosome_end_point.x) |
(chromosome_start_point.y > chromosome_start_point.x &
chromosome_end_point.y < chromosome_end_point.x)
) %>%
group_by(display_name) %>%
arrange(display_name) %>%
ungroup() %>%
group_by(display_name) %>%
summarize(count = n()) %>%
arrange(desc(count)) %>%
filter(count > 1) %>%
select(display_name) %>%
list()
subject_list
This list shows other relatives that the Subject shares segments with.
df3 %>%
filter(display_name %in% c(subject_list[[1]]$display_name)) %>%
mutate(genetic_distance = chromosome_end_point.y - chromosome_start_point.y) %>%
arrange(genetic_distance) %>%
group_by(display_name) %>%
summarize(total_distance = sum(genetic_distance) / 2) %>%
arrange(desc(total_distance)) %>%
View()
df3 %>%
filter(display_name %in% c(subject_list[[1]]$display_name)) %>%
ggplot(aes(x = chromosome_number, color = display_name)) +
geom_linerange(aes(ymin = chromosome_start_point.y,
ymax = chromosome_end_point.y,
x = chromosome_number,
size = 3.5, alpha = 0.2)) +
scale_y_continuous(labels = scales::label_number(accuracy = 1)) +
scale_x_continuous(breaks = 1:23) +
theme_bw() +
labs(title = paste("Shared Chromos with ", subject))