统一脑区命名
2026/5/12 17:59:12 网站建设 项目流程

最近在做全脑跨物种比较,涉及多个物种脑区信息,在数据清洗过程中转换名称步骤比较复杂,所以就整理了一个跨物种脑区统一转换表格

# library(dplyr) # 0. 定义原始列名映射字典 col_mapping <- c( "A1" = "Final_Layer_Annotation", "Amygdala" = "celltype.V2", "Clastrum" = "subtype.V2", "Hippocampus" = "celltype.sub", "Hypothalamus" = "celltype.V1", "Striatum" = "celltype.V2", "V1" = "Final_Layer_Annotation" ) for (region in names(all_metadata_list)) { orig_col <- col_mapping[[region]] # 【强制转换为字符串】防止因子的整数代码污染! cleaned_types <- as.character(all_metadata_list[[region]][[orig_col]]) # ========================================== # 第一阶段:精确匹配(长文本描述与特殊修正) # ========================================== mapping_dict <- c( "Upper-layer intratelencephalic" = "GLUT_UL_IT", "Deep-layer intratelencephalic" = "GLUT_DL_IT", "Deep-layer corticothalamic and 6b" = "GLUT_DL_CT6b", "Deep-layer near-projecting" = "GLUT_DL_NP", "Amygdala excitatory" = "GLUT_AMY", "Mammillary body" = "GLUT_MB", "Midbrain-derived inhibitory" = "GABA_MDI", "Cholinergic_GABA" = "GABA_Cho", "Endothelial & Vascular" = "Vasc", "Choroid plexus" = "ChP", "Lower rhombic lip" = "LRL", "Dopaminergic Neuron" = "DA" ) for (old_name in names(mapping_dict)) { cleaned_types <- ifelse(cleaned_types == old_name, mapping_dict[[old_name]], cleaned_types) } # ========================================== # 第二阶段:非神经元细胞(Non-neurons)缩写规范 # ========================================== cleaned_types <- gsub("Astrocyte", "Astro", cleaned_types) cleaned_types <- gsub("Oligodendrocyte", "Oligo", cleaned_types) cleaned_types <- gsub("Microglia", "Micro", cleaned_types) cleaned_types <- gsub("Vascular", "Vasc", cleaned_types) cleaned_types <- gsub("Ependymal", "Epen", cleaned_types) # ========================================== # 第三阶段:纹状体 MSN 系列规范 # ========================================== cleaned_types <- gsub("(?i)Eccentric medium spiny neuron", "eMSN", cleaned_types, perl = TRUE) cleaned_types <- gsub("(?i)Medium spiny neuron", "MSN", cleaned_types, perl = TRUE) # ========================================== # 第四阶段:抑制性神经元(GABAergic)规范 # ========================================== cleaned_types <- gsub("^Gaba_", "GABA_", cleaned_types) cleaned_types <- gsub("^(CGE|MGE|LGE)_", "GABA_", cleaned_types) cleaned_types <- gsub("(CGE|MGE) interneuron", "GABA_\\1", cleaned_types) cleaned_types <- gsub("(PVALB|VIP|SST|LAMP5|RELN) neurons", "GABA_\\1", cleaned_types) cleaned_types <- gsub("GABA_Vip", "GABA_VIP", cleaned_types) # 处理 Chandelier 细胞 cleaned_types <- gsub("PVALB Chandelier neurons|GABA_PVALB Chandelier", "GABA_PV_Cha", cleaned_types) cleaned_types <- gsub("LAMP5-LHX6 and Chandelier", "GABA_LAMP5-LHX6_Cha", cleaned_types) # 全局将所有的 PVALB 简写为 PV (涵盖上文生成的 GABA_PVALB 等) cleaned_types <- gsub("PVALB", "PV", cleaned_types) # ========================================== # 第五阶段:兴奋性神经元(Glutamatergic)与海马体规范 # ========================================== # 统一前缀为 GLUT_ (涵盖 Glut, Glu_) cleaned_types <- gsub("Glut_?", "GLUT_", cleaned_types, ignore.case = TRUE) cleaned_types <- gsub("Glu_", "GLUT_", cleaned_types, ignore.case = TRUE) # 精准捕获 Glu_SUB cleaned_types <- gsub("GLUT_+", "GLUT_", cleaned_types) # 海马体特异性转换 cleaned_types <- gsub("Hippocampal dentate gyrus", "GLUT_DG", cleaned_types) cleaned_types <- gsub("Hippocampal ", "GLUT_", cleaned_types) cleaned_types <- gsub("GLUT_CA1-3", "GLUT_CA", cleaned_types) # 简化 CA1-3 # 皮层层级转换 is_layer_neuron <- grepl("^L[1-6]", cleaned_types) if (any(is_layer_neuron)) { cleaned_types[is_layer_neuron] <- paste0("GLUT_", cleaned_types[is_layer_neuron]) } cleaned_types <- gsub("GLUT_L([0-6])[-/]+L?([0-6])", "GLUT_L\\1\\2", cleaned_types) cleaned_types <- gsub("GLUT_L([0-9ab]+)[ -]+", "GLUT_L\\1", cleaned_types) # ========================================== # 第六阶段:最终安全检查、符号清洗与扫尾 # ========================================== cleaned_types <- gsub("GABA_GABA", "GABA_Unknown", cleaned_types) # 移除特殊符号以提高下游系统兼容性 cleaned_types <- gsub("-", "_", cleaned_types) # 全局:将所有连字符转为下划线 cleaned_types <- gsub("\\+", "", cleaned_types) # 全局:移除所有加号 (例如 AVP+, AVP++) # 将清洗完的干净数据统一写入 Unified_CellType 列 all_metadata_list[[region]]$Unified_CellType <- cleaned_types } cat("✅ 数据已按最新规范统一清洗!\n")
for (region in names(all_metadata_list)) { meta <- all_metadata_list[[region]] meta <- meta %>% mutate( # ========================================== # 划分 Level 3 (最精细的亚类映射,作为基石) # ========================================== Level_3 = case_when( # 1. 神经胶质细胞 (原样继承或合并) Unified_CellType %in% c("Astro", "Oligo", "Micro", "OPC", "Vasc") ~ Unified_CellType, Unified_CellType %in% c("Epen", "ChP") ~ "Epen_ChP", Unified_CellType == "LRL" ~ "Progenitor_LRL", # 菱脑唇前体细胞 # 2. GABA: 按发育起源划分 # MGE 起源:PV, SST, LHX6, SOX6 及枝状细胞 (Chandelier) grepl("PV|SST|LHX6|SOX6|MGE", Unified_CellType) & grepl("GABA", Unified_CellType) ~ "GABA_MGE", # CGE 起源:VIP, LAMP5, RELN, CCK grepl("VIP|LAMP5|RELN|CCK|CGE", Unified_CellType) & grepl("GABA", Unified_CellType) ~ "GABA_CGE", # LGE 起源 (纹状体主要投射神经元):MSN 系列 grepl("MSN", Unified_CellType) ~ "GABA_LGE_MSN", # 间脑/特异性区域 GABA (如下丘脑特异性抑制细胞) grepl("GABA", Unified_CellType) & !grepl("PV|SST|LHX6|SOX6|MGE|VIP|LAMP5|RELN|CCK|CGE|MSN", Unified_CellType) ~ "GABA_Other_Diencephalon", # 3. GLUT: 皮层按投射,皮层下按区域 # 皮层 IT (Intratelencephalic) grepl("GLUT_.*IT$", Unified_CellType) ~ "Cortical_IT", # 皮层 CT (Corticothalamic) grepl("GLUT_.*CT", Unified_CellType) ~ "Cortical_CT", # 皮层 ET/PT (Extratelencephalic) grepl("GLUT_.*ET", Unified_CellType) ~ "Cortical_ET", # 皮层 NP (Near-projecting) grepl("GLUT_.*NP", Unified_CellType) ~ "Cortical_NP", # 皮层 L6b / CAR3 (特殊的深层群体) grepl("L6b|CAR3", Unified_CellType) ~ "Cortical_L6b", # 海马体 (Hippocampal) grepl("GLUT_CA|GLUT_DG|GLUT_SUB", Unified_CellType) ~ "Hippocampal_GLUT", # 皮层下/下丘脑/杏仁核等特异性 GLUT grepl("GLUT", Unified_CellType) & !grepl("IT$|CT|ET|NP|L6b|CAR3|CA|DG|SUB", Unified_CellType) ~ "Subcortical_GLUT", # 4. 其他特殊神经元 Unified_CellType == "DA" ~ "DA_Neuron", Unified_CellType == "HDC neuron" ~ "Histaminergic_Neuron", # 兜底 TRUE ~ "Unknown" ), # ========================================== # 划分 Level 2 (谱系大类,基于 Level 3 向上聚合) # ========================================== Level_2 = case_when( grepl("GABA", Level_3) ~ "GABA", grepl("Cortical|Hippocampal|Subcortical|GLUT", Level_3) ~ "GLUT", Level_3 %in% c("DA_Neuron", "Histaminergic_Neuron") ~ "Other_Neuron", Level_3 == "Progenitor_LRL" ~ "Progenitor", TRUE ~ Level_3 # 胶质细胞 (Astro, Oligo等) 在 Level 2 保持不变 ), # ========================================== # 划分 Level 1 (神经元 vs 非神经元) # ========================================== Level_1 = case_when( Level_2 %in% c("GABA", "GLUT", "Other_Neuron") ~ "Neuron", Level_2 %in% c("Astro", "Oligo", "Micro", "OPC", "Vasc", "Epen_ChP") ~ "Non_Neuron", TRUE ~ "Other" # 给发育早期的前体细胞留位置 ) ) # 将修改后的 meta 覆盖写回列表 all_metadata_list[[region]] <- meta } cat("✅ 成功完成 Level 1, 2, 3 的多层级注释!\n")

细胞类型命名标准化说明 (Cell Type Nomenclature Standardization)

为了在全脑/多脑区尺度上进行无缝的数据整合,同时确保在不同编程环境(如 R 和 Python)中的语法兼容性,我们对各个原始数据集(涵盖 A1, Amygdala, Clastrum, Hippocampus, Hypothalamus, Striatum, V1 等区域)的细胞亚群进行了系统性的重命名与符号清洗。主要标准化原则如下:

1. 系统兼容性符号清洗 (Syntax-Safe Formatting)

为了避免特殊字符在下游分析算法或对象读取中引发报错,执行了严格的符号过滤:

  • 移除加号:移除了所有细胞类型名称中的+符号(例如,GLUT_AVP+/GLUT_AVP++被统一截断为GLUT_AVP)。
  • 下划线替代连字符:将所有的连字符(-)和多余空格全局替换为下划线(_),确保名称格式的连贯性(例如,原先转化生成的GLUT_L45-IT最终变为GLUT_L45_IT)。

2. 非神经元细胞简称映射 (Non-neurons Abbreviation)

对所有非神经元细胞使用了规范化的高级缩写,以优化降维图谱的图例展示效果。

原命名 (Original Name)标准化简称 (Standardized Name)
AstrocyteAstro
OligodendrocyteOligo
MicrogliaMicro
Vascular / Endothelial & VascularVasc
EpendymalEpen

3. 描述性长文本与特殊亚类映射 (Descriptive Nomenclature Mapping)

针对过于口语化的长文本命名及特定高频出现的细胞亚类,提取其核心解剖与投射属性,并赋予机器可读的标准缩写。

原命名 (Original Name)标准化简称 (Standardized Name)
Upper-layer intratelencephalicGLUT_UL_IT
Deep-layer intratelencephalicGLUT_DL_IT
Deep-layer corticothalamic and 6bGLUT_DL_CT6b
Deep-layer near-projectingGLUT_DL_NP
Amygdala excitatoryGLUT_AMY
Mammillary bodyGLUT_MB
Midbrain-derived inhibitoryGABA_MDI
Cholinergic_GABAGABA_Cho
PVALB (全称或包含 PVALB 的命名)PV(例如:GABA_PV)
PVALB Chandelier neuronsGABA_PV_Cha
LAMP5-LHX6 and ChandelierGABA_LAMP5_LHX6_Cha

4. 特征神经元与区域特异性整合 (Specific Neuronal Subtypes)

统一了纹状体(Striatum)和海马体(Hippocampus)中具有高度特异性的细胞命名,特别是将海马体的兴奋性群体全部归入GLUT_大类。

原命名 (Original Name)标准化简称 (Standardized Name)
Medium spiny neuronMSN
Eccentric medium spiny neuroneMSN
Glu_SUBGLUT_SUB
Hippocampal dentate gyrusGLUT_DG
Hippocampal CA1-3GLUT_CA
Hippocampal CA4GLUT_CA4

5. 神经元大类前缀与皮层属性压缩 (Lineage Prefixing & Layer Formatting)

  • 强制前缀统一:所有神经元均使用标准大写递质前缀(GLUT_GABA_)。原始数据中发育起源(CGE_,MGE_)及各种变体拼写(Glut,Glu_,Gaba_)均被强制归一化。

  • 皮层属性紧凑化:针对皮层兴奋性神经元,提取了层级范围与投射属性,整合为单一下划线连接的标准结构。

    • L4/5 IT➡️GLUT_L45_IT

    • L3-L6 IT➡️GLUT_L36_IT

    • L6 CT➡️GLUT_L6_CT

层级命名对应表

Level 1 (大类)Level 2 (谱系)Level 3 (发育起源/投射属性/解剖亚区)包含的原始细胞类型 (Unified_CellType)
Non-NeuronAstroAstroAstro
OligoOligoOligo
MicroMicroMicro
OPCOPCOPC
VascVascVasc
Epen/ChPEpen/ChPEpen, ChP
NeuronGABAGABA_MGE(内侧起源)GABA_PV, GABA_SST, GABA_SOX6, GABA_LHX6, GABA_PV_Cha 等
GABA_CGE(尾侧起源)GABA_VIP, GABA_LAMP5, GABA_RELN, GABA_CCK 等
GABA_LGE / MSN(外侧/纹状体)MSN, eMSN, MSN_DRD1, MSN_DRD2
GABA_Diencephalon(间脑局部)下丘脑的大量特征GABA (GABA_AGRP, GABA_POMC 等)
GLUTCortical_IT(端脑内)GLUT_L.*IT (L2IT, L34IT, UL_IT, DL_IT 等)
Cortical_CT(丘脑投射)GLUT_L6CT, GLUT_DL_CT6b
Cortical_ET(端脑外)GLUT_L5ET
Cortical_NP(近距离)GLUT_L56NP, GLUT_DL_NP
Hippocampal(海马)GLUT_CA, GLUT_DG, GLUT_CA4, GLUT_SUB
Subcortical_GLUT(皮层下)GLUT_AMY, 下丘脑各类 GLUT (GLUT_OXT, AVP 等)
DA/OtherDA_NeuronDA (多巴胺), HDC neuron (组胺)

需要专业的网站建设服务?

联系我们获取免费的网站建设咨询和方案报价,让我们帮助您实现业务目标

立即咨询