Skip to content

Commit 1f52b08

Browse files
committed
Merge branch 'josesho-v0.2.1' into v0.2.1
2 parents a85a47b + 6842bd5 commit 1f52b08

15 files changed

Lines changed: 181 additions & 31 deletions

dabest/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@
2323
from ._stats_tools import effsize as effsize
2424
from ._classes import TwoGroupsEffectSize
2525

26-
__version__ = "0.2.0"
26+
__version__ = "0.2.1"

dabest/_api.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/python
2+
# -*-coding: utf-8 -*-
3+
# Author: Joses Ho
4+
# Email : joseshowh@gmail.com
5+
6+
7+
def load(data, idx, x=None, y=None, paired=False, id_col=None,
8+
ci=95, resamples=5000, random_seed=12345):
9+
'''
10+
Loads data in preparation for estimation statistics.
11+
12+
This is designed to work with pandas DataFrames.
13+
14+
Parameters
15+
----------
16+
data : pandas DataFrame
17+
idx : tuple
18+
List of column names (if 'x' is not supplied) or of category names
19+
(if 'x' is supplied). This can be expressed as a tuple of tuples,
20+
with each individual tuple producing its own contrast plot
21+
x, y : strings, default None
22+
Column names for data to be plotted on the x-axis and y-axis.
23+
paired : boolean, default False.
24+
id_col : default None.
25+
Required if `paired` is True.
26+
ci : integer, default 95
27+
The confidence interval width. The default of 95 produces 95%
28+
confidence intervals.
29+
resamples : integer, default 5000.
30+
The number of resamples taken to generate the bootstraps which are used
31+
to generate the confidence intervals.
32+
random_seed : int, default 12345
33+
This integer is used to seed the random number generator during
34+
bootstrap resampling, ensuring that the confidence intervals
35+
reported are replicable.
36+
37+
Returns
38+
-------
39+
A `Dabest` object.
40+
41+
Example
42+
--------
43+
Load libraries.
44+
45+
>>> import numpy as np
46+
>>> import pandas as pd
47+
>>> import dabest
48+
49+
Create dummy data for demonstration.
50+
51+
>>> np.random.seed(88888)
52+
>>> N = 10
53+
>>> c1 = sp.stats.norm.rvs(loc=100, scale=5, size=N)
54+
>>> t1 = sp.stats.norm.rvs(loc=115, scale=5, size=N)
55+
>>> df = pd.DataFrame({'Control 1' : c1, 'Test 1': t1})
56+
57+
Load the data.
58+
59+
>>> my_data = dabest.load(df, idx=("Control 1", "Test 1"))
60+
61+
'''
62+
from ._classes import Dabest
63+
64+
return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed)

dabest/_archive/README.md

Lines changed: 0 additions & 3 deletions
This file was deleted.

dabest/_classes.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,8 @@ def __init__(self, control, test, effect_size,
325325
326326
Parameters
327327
----------
328-
control, test : array-like
328+
control : array-like
329+
test : array-like
329330
These should be numerical iterables.
330331
effect_size : string.
331332
Any one of the following are accepted inputs:
@@ -344,9 +345,43 @@ def __init__(self, control, test, effect_size,
344345
345346
Returns
346347
-------
347-
A `TwoGroupEffectSize` object.
348-
349-
348+
A :py:class:`TwoGroupEffectSize` object.
349+
350+
difference : float
351+
The effect size of the difference between the control and the test.
352+
353+
effect_size : string
354+
The type of effect size reported.
355+
356+
is_paired : boolean
357+
Whether or not the difference is paired (ie. repeated measures).
358+
359+
ci : float
360+
Returns the width of the confidence interval, in percent.
361+
362+
alpha : float
363+
Returns the significance level of the statistical test as a float
364+
between 0 and 1.
365+
366+
resamples : int
367+
The number of resamples performed during the bootstrap procedure.
368+
369+
bootstraps : nmupy ndarray
370+
The generated bootstraps of the effect size.
371+
372+
random_seed : int
373+
The number used to initialise the numpy random seed generator, ie.
374+
`seed_value` from `numpy.random.seed(seed_value)` is returned.
375+
376+
bca_low, bca_high : float
377+
The bias-corrected and accelerated confidence interval lower limit
378+
and upper limits, respectively.
379+
380+
pct_low, pct_high : float
381+
The percentile confidence interval lower limit and upper limits,
382+
respectively.
383+
384+
350385
Examples
351386
--------
352387
>>> import numpy as np
@@ -360,6 +395,36 @@ def __init__(self, control, test, effect_size,
360395
The unpaired mean difference is -0.253 [95%CI -0.782, 0.241]
361396
5000 bootstrap samples. The confidence interval is bias-corrected
362397
and accelerated.
398+
>>> effsize.to_dict()
399+
{'alpha': 0.05,
400+
'bca_high': 0.2413346581369784,
401+
'bca_interval_idx': (109, 4858),
402+
'bca_low': -0.7818088458343655,
403+
'bootstraps': array([-1.09875628, -1.08840014, -1.08258695, ..., 0.66675324,
404+
0.75814087, 0.80848265]),
405+
'ci': 95,
406+
'difference': -0.25315417702752846,
407+
'effect_size': 'mean difference',
408+
'is_paired': False,
409+
'pct_high': 0.25135646125431527,
410+
'pct_interval_idx': (125, 4875),
411+
'pct_low': -0.763588353717278,
412+
'pvalue_brunner_munzel': nan,
413+
'pvalue_kruskal': nan,
414+
'pvalue_mann_whitney': 0.2600723060808019,
415+
'pvalue_paired_students_t': nan,
416+
'pvalue_students_t': 0.34743913903372836,
417+
'pvalue_welch': 0.3474493875548965,
418+
'pvalue_wilcoxon': nan,
419+
'random_seed': 12345,
420+
'resamples': 5000,
421+
'statistic_brunner_munzel': nan,
422+
'statistic_kruskal': nan,
423+
'statistic_mann_whitney': 406.0,
424+
'statistic_paired_students_t': nan,
425+
'statistic_students_t': 0.9472545159069105,
426+
'statistic_welch': 0.9472545159069105,
427+
'statistic_wilcoxon': nan}
363428
"""
364429

365430
from numpy import array, isnan
@@ -532,7 +597,8 @@ def __init__(self, control, test, effect_size,
532597
# Mann-Whitney test: Non parametric,
533598
# does not assume normality of distributions
534599
try:
535-
mann_whitney = spstats.mannwhitneyu(control, test)
600+
mann_whitney = spstats.mannwhitneyu(control, test,
601+
alternative='two-sided')
536602
self.__pvalue_mann_whitney = mann_whitney.pvalue
537603
self.__statistic_mann_whitney = mann_whitney.statistic
538604
except ValueError:
Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,15 @@ def gapped_lines(data, x, y, type='mean_sd', offset=0.2, ax=None,
139139
if 'lw' not in keys:
140140
kwargs['lw'] = 2.
141141

142-
# Grab the order in which the groups appear.
143-
group_order = pd.unique(data[x])
142+
# # Grab the order in which the groups appear.
143+
# group_order = pd.unique(data[x])
144+
145+
# Grab the order in which the groups appear,
146+
# depending on whether the x-column is categorical.
147+
if isinstance(data[x].dtype, pd.CategoricalDtype):
148+
group_order = pd.unique(data[x]).categories
149+
else:
150+
group_order = pd.unique(data[x])
144151

145152
means = data.groupby(x)[y].mean().reindex(index=group_order)
146153
sd = data.groupby(x)[y].std().reindex(index=group_order)

dabest/pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[pytest]
22
filterwarnings =
33
ignore::UserWarning
4-
ignore::DeprecationWarning
4+
ignore::DeprecationWarning

dabest/tests/README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
# Testing
22

3-
43
We use [pytest](https://docs.pytest.org/en/latest) to execute the tests. More documentation of the testing paradigm will be added in the near future.
4+
5+
To run the tests, go to the root of this repo directory and run
6+
7+
```shell
8+
pytest dabest
9+
```
10+
11+

dabest/tests/test_01_effsizes_pvals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def test_unpaired_stats():
135135

136136
unpaired_es = TwoGroupsEffectSize(c, t, "mean_diff", is_paired=False)
137137

138-
p1 = sp.stats.mannwhitneyu(c, t).pvalue
138+
p1 = sp.stats.mannwhitneyu(c, t, alternative="two-sided").pvalue
139139
assert unpaired_es.pvalue_mann_whitney == pytest.approx(p1)
140140

141141
p2 = sp.stats.ttest_ind(c, t, nan_policy='omit').pvalue

0 commit comments

Comments
 (0)