#########################################
#
# Welch's t-Test Text and Graphics Function
#

def Welchs_t_Test(x1 =        None,
                  x2 =        None,
                test = "two-sided",
       variable_name =          "",
             x1_name =        "x₁",
             x2_name =        "x₂",
               alpha =        0.05):
    """
    Applies a Welch's t-Test to two samples with tablular and graphical output

    Author: WJ Capehart, South Dakota Mines.

    Licensing: Creative Commons BY-NC-SA version 4.0
       (https://creativecommons.org/licenses/by-nc-sa/4.0/)

    Citation: 

    "Welchs_t_Test(): A basic Welch's t-Test Utility Function 
        including textual and graphical output" by William J Capehart 
        is licensed under Creative Commons Attribution-NonCommercial-
        ShareAlike 4.0 
         

    Args:
        x1 (array): Experimental Sample
        x2 (array): Control Sample
        test (string): t-test: ["two-sided","greater","less"]
        variable_name (string): User Text Name for Variable
        x1_name (string): Experimental Sample Name
        x2_name (string): Control Sample Name
        alpha (float): Experimental Sample
        
    Returns:
        t_stat (float): Calculated t-statistic 
        p (float): p-value
        df (int): degrees of freedom
        Verdict (string): Hypothesis Result
    """


    
    #
    # Libraries
    #

    import numpy             as np
    import matplotlib.pyplot as plt
    import seaborn           as sns    
    import scipy.stats       as stats



    #
    # Test Language for Output
    #
    
    if (test == "greater"):
        Ho_sym, Ha_sym = ["≤", ">"]
        x1_fill        = "magenta"
        x2_fill        = "cyan"
        x1_line        = "darkred"
        x2_line        = "darkblue"
        t_color        = "blue"
    elif (test == "less"):
        Ho_sym, Ha_sym = ["≥", "<"]   
        x1_fill        = "cyan"
        x2_fill        = "magenta"
        x1_line        = "darkblue"
        x2_line        = "darkred"
        t_color        = "blue"
    else:
        Ho_sym, Ha_sym = ["=", "≠"] 
        x1_fill        = "cyan"
        x2_fill        = "grey"
        x1_line        = "darkblue"
        x2_line        = "black"
        t_color        = "green"
        
    Ho_Text = x1_name + " " + Ho_sym + " " + x2_name
    Ha_Text = x1_name + " " + Ha_sym + " " + x2_name
    
    #
    # Run T-Test
    #
    
    t_test_out = \
         stats.ttest_ind(a           =    x1,
                         b           =    x2,
                         alternative =  test,
                         equal_var   = False)
    #
    # Output from T-Test
    #
    
    t_stat   = t_test_out.statistic
    p        = t_test_out.pvalue
    df       = t_test_out.df
    
    P_t_stat = stats.t.cdf(x  = t_stat,
                           df =     df)
    
    #
    # Get Crit T-Thresholds, and calculated p vals s (<, >, two-tail)
    #
    
    if (test == "greater"):
        t_threshold = stats.t.ppf(q  =   1 - alpha,
                                  df =          df)
        p_calc      = 1-P_t_stat
        alpha_tail  = "{:02}".format(1-alpha)
    elif (test == "less"):
        t_threshold = stats.t.ppf(q  =       alpha,
                                  df =          df)  
        p_calc      = P_t_stat
        alpha_tail  = "{:02}".format(1-alpha)
    else:
        t_threshold = stats.t.ppf(q  = 1 - alpha/2,
                                  df =          df) 
        p_calc      = 2*(1-P_t_stat)
        alpha_high  = "{:03}".format(1-alpha/2)
        alpha_low   = "{:03}".format(alpha/2)
    
    #
    # Verdict of t-Test given alpha
    #
    
    if (p > alpha):
        Verdict = "Ho cannot be rejected; " + Ho_Text
    else:
        Verdict = "Ho is rejected; " + Ha_Text
    
    
    #
    # Welch's T Test Report
    #

    print("")
    print("╔═══════════════════════════════════════════════════")
    print("║╭──────────────────────────────────────────────")
    print("║│  Welch's t-Test Report (ɑ="+str(alpha)+")")
    if (variable_name != ""):
        print("║│ Variable :", variable_name )
    print("║│       Ho :", Ho_Text)
    print("║│       Ha :", Ha_Text)
    print("║│        t :", t_stat)
    if (test != "two-sided"):
        print("║│   t₍₁₋ₐ₎ :", t_threshold)
    else:
        print("║│  t₍₁₋½ₐ₎ :", t_threshold)
    print("║│        p :", p,p_calc )
    print("║│   Result :", Verdict  ) 
    print("║╰──────────────────────────────────────────────")
    
    #
    # Calucate p(t) for Graphics
    #
    
    p_t_stat = stats.t.pdf(x  = t_stat,
                           df =     df)
    
    
    #
    # X-Axis for T-Values for Plots
    #
    
    t_plotedge  = stats.t.ppf(q  = 0.99999,
                              df =      df)
    
    t_plotedge_max = np.max([t_plotedge,
                                 t_stat])
    
    t_plotedge_min = np.min([-t_plotedge,
                                  t_stat])
    
    t_axis_plot_range = np.linspace(start =  t_plotedge_min,
                                     stop =  t_plotedge_max,
                                     num  =            1000)
    
    #
    # Plot T-Curve
    #

    sns.set_theme(style = "ticks", 
          rc    = {"axes.spines.right":False, 
                     "axes.spines.top":False})
    
    fig, [ax1, ax2] = plt.subplots(nrows = 1,
                                   ncols = 2,
                                   figsize = [8,3])
    
    if (variable_name != ""):
        fig.suptitle("Welch's t-Test : "+ variable_name,fontsize="small")
    else:
        fig.suptitle("Welch's t-Test",fontsize="small")


    #
    # Plot x1 and x2
    #

    sns.kdeplot(x1, 
                color       = x1_fill, 
                ax          =     ax1,
                fill        =    True,
                alpha       =  0.3333,
                common_norm =   False)
    sns.kdeplot(x2, 
                color       = x2_fill, 
                ax          =     ax1,
                fill        =    True,
                alpha       =  0.3333,
                common_norm =   False)
    ax1.legend([x1_name,x2_name],
                framealpha = 0,      # makes legend transparent
                fontsize = "xx-small") 
    sns.kdeplot(x1, 
                color       = x1_line, 
                ax          =     ax1,
                fill        =   False,
                common_norm =   False)   
    sns.kdeplot(x2, 
                color       = x2_line, 
                ax          =     ax1,
                fill        =   False,
                common_norm =   False)
    ax1.set_xlabel(variable_name, fontsize="x-small")
    ax2.set_ylabel(r"$p(x)$", fontsize="x-small")
    ax1.set_title("Probability Distributions", fontsize="x-small")   

    #
    # Plot t-Distribution
    #

    if (p > alpha):
        ax2.set_title(r"$H_0$: " + Ho_Text, fontsize="x-small")
    else:
        ax2.set_title(r"$H_a$: " + Ha_Text, fontsize="x-small")
    
    
    ax2.plot(t_axis_plot_range, 
             stats.t.pdf(x  = t_axis_plot_range,
                         df = df), 
             color = "grey",
             label = r"$p(t)$")
    
    if (test == "greater"):
        t_axis_greater = np.linspace(start = t_plotedge_min,
                                      stop =    t_threshold,
                                       num =           1000)
        ax2.fill_between(t_axis_greater, 
                         stats.t.pdf(x  = t_axis_greater,
                                     df = df), 
                         color = t_color,
                         alpha = 0.33333,
                         label = r"$P(t)$ = "+alpha_tail)
    
    elif (test == "less"):
        t_axis_less = np.linspace(start =    t_threshold,
                                   stop = t_plotedge_max,
                                    num =           1000)
        ax2.fill_between(t_axis_less, 
                         stats.t.pdf(x  = t_axis_less,
                                     df = df), 
                         alpha = 0.33333,
                         color = t_color,
                         label = r"1-$P(t)$ = "+alpha_tail)
    
    else:
        t_axis_2tail = np.linspace(start = -t_threshold,
                                    stop =  t_threshold,
                                     num =         1000)
        ax2.fill_between(t_axis_2tail, 
                         stats.t.pdf(x  = t_axis_2tail,
                                     df = df), 
                         alpha = 0.33333,
                         color = t_color,
                         label = alpha_low+" > P(t) > "+alpha_high)
    
    ax2.plot([t_stat,   t_stat],
             [    -1, p_t_stat],
             marker    = "o",
             color     = "red",
             label     = r"Welch's $t$ value",
             linestyle = "dotted")
    
    ax2.legend(framealpha = 0,      # makes legend transparent
                 fontsize = "xx-small") 
    
    ax2.set_ylim(bottom = 0)
    ax2.set_xlabel(r"$t$",    fontsize="x-small")
    ax2.set_ylabel(r"$p(t)$", fontsize="x-small")


    ax1.tick_params(labelsize = "x-small")
    ax2.tick_params(labelsize = "x-small")

    
    plt.tight_layout
    plt.show()
   
    print("╚═══════════════════════════════════════════════════")
    print("")

    return {"t": t_stat,
            "p": p,
           "df": df,
       "Result": Verdict}
#
#########################################

#########################################
#
# Demonstration Libraries
#

import pandas as pd

#
#########################################

#########################################
#
# Demonstration Libraries
#

url_xlsx = "http://kyrill.ias.sdsmt.edu:8080/thredds/fileServer/CLASS_Examples/CEE_284_Area/Statistics/Air_Quality_Extraction_H0700.xlsx"

training = pd.read_excel(io         = url_xlsx,
                         sheet_name = "Training").  \
              set_index("Time")

testing  = pd.read_excel(io         = url_xlsx,
                         sheet_name = "Testing").  \
              set_index("Time")

display(training)
display(testing)
#
#########################################

#########################################
#
# Demonstration of Function
#

#
# Two-Tail Function Output
#

T_test_Ha_Testing_ne_Training = \
    Welchs_t_Test(x1 =         testing["C6H6 (μg/m³)"],
                  x2 =        training["C6H6 (μg/m³)"],
                test =                     "two-sided",
       variable_name = "Benzine Concentration [μg/m³]",
             x1_name =                       "Testing",
             x2_name =                      "Training",
               alpha =                            0.05)

print("Two-Tail Function Output")
display(T_test_Ha_Testing_ne_Training)
print("")
print("")

#
# Right-Tail Function Output
#

T_test_Ha_Testing_gt_Training = \
    Welchs_t_Test(x1 =         testing["C6H6 (μg/m³)"],
                  x2 =        training["C6H6 (μg/m³)"],
                test =                       "greater",
       variable_name = "Benzine Concentration [μg/m³]",
             x1_name =                       "Testing",
             x2_name =                      "Training",
               alpha =                            0.05)

print("Right-Tail Function Output")
display(T_test_Ha_Testing_gt_Training)
print("")
print("")

#
# Left-Tail Function Output
#

T_test_Ha_Testing_lt_Training = \
    Welchs_t_Test(x1 =         testing["C6H6 (μg/m³)"],
                  x2 =        training["C6H6 (μg/m³)"],
                test =                          "less",
       variable_name = "Benzine Concentration [μg/m³]",
             x1_name =                       "Testing",
             x2_name =                      "Training",
               alpha =                            0.05)

print("Left-Tail Function Output")
display(T_test_Ha_Testing_lt_Training)
print("")
print("")

#
#########################################

╔═══════════════════════════════════════════════════
║╭──────────────────────────────────────────────
║│  Welch's t-Test Report (ɑ=0.05)
║│ Variable : Benzine Concentration [μg/m³]
║│       Ho : Testing = Training
║│       Ha : Testing ≠ Training
║│        t : 1.1986856285319138
║│  t₍₁₋½ₐ₎ : 2.0094557942758993
║│        p : 0.2364019304228048 0.23640193042280466
║│   Result : Ho cannot be rejected; Testing = Training
║╰──────────────────────────────────────────────

╚═══════════════════════════════════════════════════

Two-Tail Function Output

{'t': 1.1986856285319138,
 'p': 0.2364019304228048,
 'df': 49.11541799673543,
 'Result': 'Ho cannot be rejected; Testing = Training'}



╔═══════════════════════════════════════════════════
║╭──────────────────────────────────────────────
║│  Welch's t-Test Report (ɑ=0.05)
║│ Variable : Benzine Concentration [μg/m³]
║│       Ho : Testing ≤ Training
║│       Ha : Testing > Training
║│        t : 1.1986856285319138
║│   t₍₁₋ₐ₎ : 1.676474979734221
║│        p : 0.1182009652114024 0.11820096521140233
║│   Result : Ho cannot be rejected; Testing ≤ Training
║╰──────────────────────────────────────────────

╚═══════════════════════════════════════════════════

Right-Tail Function Output

{'t': 1.1986856285319138,
 'p': 0.1182009652114024,
 'df': 49.11541799673543,
 'Result': 'Ho cannot be rejected; Testing ≤ Training'}



╔═══════════════════════════════════════════════════
║╭──────────────────────────────────────────────
║│  Welch's t-Test Report (ɑ=0.05)
║│ Variable : Benzine Concentration [μg/m³]
║│       Ho : Testing ≥ Training
║│       Ha : Testing < Training
║│        t : 1.1986856285319138
║│   t₍₁₋ₐ₎ : -1.6764749797342218
║│        p : 0.8817990347885977 0.8817990347885977
║│   Result : Ho cannot be rejected; Testing ≥ Training
║╰──────────────────────────────────────────────

╚═══════════════════════════════════════════════════

Left-Tail Function Output

{'t': 1.1986856285319138,
 'p': 0.8817990347885977,
 'df': 49.11541799673543,
 'Result': 'Ho cannot be rejected; Testing ≥ Training'}

#########################################
#
# Version Information Utility
#

%load_ext version_information

%version_information numpy, scipy, matplotlib, pandas, version_information

#
#########################################

	Sample	Month	Hour	C6H6 (μg/m³)	CO (mg/m³)	NOx (ppb)	NO2 (μg/m³)	T (° C)	RH (%)	AH (g/kg)
Time
2004-04-07 07:00:00	Train	Apr	7	12.935403	2.8	152.0	111.0	14.300	58.175	0.943075
2004-04-12 07:00:00	Train	Apr	7	0.945425	0.5	39.0	53.0	13.475	50.100	0.770567
2004-04-13 07:00:00	Train	Apr	7	19.123453	3.9	328.0	130.0	10.975	64.150	0.839760
2004-05-02 07:00:00	Train	May	7	4.755590	0.8	48.0	46.0	14.300	66.175	1.072763
2004-05-04 07:00:00	Train	May	7	23.037221	4.5	302.0	109.0	16.550	78.000	1.457225
...	...	...	...	...	...	...	...	...	...	...
2005-03-03 07:00:00	Train	Mar	7	6.742318	1.4	262.8	135.5	2.850	75.575	0.574151
2005-03-04 07:00:00	Train	Mar	7	7.486889	1.9	408.9	138.7	3.125	77.300	0.598485
2005-03-05 07:00:00	Train	Mar	7	2.042536	0.8	167.5	132.2	4.625	54.575	0.468217
2005-03-21 07:00:00	Train	Mar	7	6.074366	1.3	270.2	112.8	12.800	68.050	1.002346
2005-03-26 07:00:00	Train	Mar	7	5.691083	1.1	174.6	79.7	16.100	69.500	1.262319

	Sample	Month	Hour	C6H6 (μg/m³)	CO (mg/m³)	NOx (ppb)	NO2 (μg/m³)	T (° C)	RH (%)	AH (g/kg)
Time
2004-04-06 07:00:00	Test	Apr	7	24.134258	4.5	329.0	134.0	15.300	55.225	0.953788
2004-04-27 07:00:00	Test	Apr	7	23.419668	4.1	314.0	122.0	15.900	48.525	0.870351
2004-05-01 07:00:00	Test	May	7	6.451626	1.2	75.0	67.0	16.875	66.550	1.268826
2004-05-03 07:00:00	Test	May	7	21.128692	3.4	283.0	101.0	16.825	62.450	1.186945
2004-05-07 07:00:00	Test	May	7	19.779412	3.7	214.0	106.0	14.900	56.900	0.958184
2004-05-14 07:00:00	Test	May	7	22.780588	3.7	272.0	140.0	18.475	49.375	1.039693
2004-05-15 07:00:00	Test	May	7	9.061727	1.4	106.0	68.0	16.025	47.375	0.856424
2004-05-20 07:00:00	Test	May	7	19.618987	2.8	188.0	97.0	16.600	55.950	1.048554
2004-06-08 07:00:00	Test	Jun	7	19.370545	3.3	309.0	100.0	19.525	57.725	1.296638
2004-06-13 07:00:00	Test	Jun	7	1.279695	0.4	25.0	36.0	21.375	47.750	1.200534
2004-06-29 07:00:00	Test	Jun	7	23.496479	3.4	228.0	102.0	24.725	49.475	1.520097
2004-07-08 07:00:00	Test	Jul	7	16.944141	2.2	231.0	115.0	25.875	37.375	1.228885
2004-07-15 07:00:00	Test	Jul	7	15.564889	2.3	168.0	108.0	21.325	48.350	1.211945
2004-08-11 07:00:00	Test	Aug	7	9.468788	1.4	89.0	42.0	26.800	60.525	2.100800
2004-08-12 07:00:00	Test	Aug	7	10.956517	1.7	87.0	60.0	26.400	61.725	2.092947
2004-09-19 07:00:00	Test	Sep	7	4.809205	1.0	139.0	62.0	16.850	66.825	1.272082
2004-09-27 07:00:00	Test	Sep	7	26.900112	5.5	799.0	132.0	15.000	57.700	0.977823
2004-10-28 07:00:00	Test	Oct	7	19.994179	3.4	640.0	88.0	15.725	76.425	1.355764
2004-10-29 07:00:00	Test	Oct	7	15.929432	2.9	366.0	111.0	20.175	66.925	1.564267
2004-11-14 07:00:00	Test	Nov	7	0.212798	0.2	25.0	21.0	10.175	41.925	0.520914
2004-11-18 07:00:00	Test	Nov	7	9.890093	2.1	531.0	162.0	6.825	74.500	0.741531
2004-11-29 07:00:00	Test	Nov	7	9.817125	2.0	325.0	90.0	8.725	83.700	0.945385
2005-01-01 07:00:00	Test	Jan	7	4.458207	1.4	168.1	96.7	3.025	60.700	0.466738
2005-01-20 07:00:00	Test	Jan	7	2.741115	1.0	181.9	104.4	7.450	55.250	0.573390
2005-02-03 07:00:00	Test	Feb	7	4.247511	0.6	215.6	133.9	1.350	63.125	0.432208
2005-02-21 07:00:00	Test	Feb	7	5.462115	1.2	183.3	123.1	4.200	80.900	0.674249
2005-02-24 07:00:00	Test	Feb	7	5.482765	1.5	289.7	169.5	3.750	83.775	0.677052
2005-02-26 07:00:00	Test	Feb	7	1.912104	1.0	143.7	108.2	5.725	54.325	0.502158
2005-02-28 07:00:00	Test	Feb	7	0.655256	0.4	88.8	65.5	2.500	52.700	0.390810
2005-03-10 07:00:00	Test	Mar	7	6.423963	1.3	249.1	100.0	4.325	76.075	0.639467

Software	Version
Python	3.12.8 64bit [Clang 18.1.8 ]
IPython	9.0.2
OS	macOS 15.3.1 arm64 arm 64bit
numpy	1.26.4
scipy	1.15.2
matplotlib	3.9.4
pandas	2.2.3
version_information	1.0.4
Thu Mar 13 09:06:03 2025 MDT

Welch's t-Test Text and Graphics Function¶

License¶

Libraries¶

Welch's t-Test Function Declaration¶

Testing Example¶

Support Libraries for Demonstration¶

Executing the Function¶

Version Information¶