Regression & GLM

NextStat includes a full GLM stack — linear, logistic, Poisson, and negative binomial regression — with MLE fitting, Bayesian sampling, cross-validation, and formula interface. All computation runs in Rust; Python provides the API surface.

Linear Regression

import nextstat.glm.linear as lm

fit = lm.fit(X, y)
print(fit.params, fit.se, fit.confint())

# With formula interface
fit = lm.from_formula("y ~ x1 + x2 + x3", data)

# Ridge regression (MAP/L2 prior)
fit = lm.fit(X, y, prior_scale=1.0)

Logistic Regression

import nextstat.glm.logistic as logit

fit = logit.fit(X, y)

# Separation detection warning is automatic
# from_formula interface
fit = logit.from_formula("outcome ~ age + treatment", data)

Poisson & Negative Binomial

import nextstat.glm.poisson as pois
import nextstat.glm.negbin as nb

fit = pois.fit(X, y, exposure=offset)
fit = nb.fit(X, y)

# Formula interface
fit = pois.from_formula("counts ~ x1 + x2", data, exposure="log_pop")

Cross-Validation

from nextstat.glm.cv import cross_val_score, kfold_indices
from nextstat.glm.metrics import rmse, log_loss, poisson_deviance

scores = cross_val_score(lm.fit, X, y, k=5, metric=rmse)

Hierarchical Models

import nextstat.hier as hier

# Random intercept
fit = hier.linear_random_intercept(X, y, groups)

# Random intercept + slope
fit = hier.linear_random_slope(X, y, groups)

# Correlated random effects (LKJ + Cholesky)
fit = hier.logistic_correlated_intercept_slope(X, y, groups)

# Formula interface
fit = hier.linear_random_intercept_from_formula(
    "y ~ x1 + x2", data, group_col="school"
)

Summaries & Robust SE

from nextstat.summary import fit_summary, summary_to_str
from nextstat.robust import hc0, hc1, hc2, hc3, cluster_1way

summary = fit_summary(fit)
print(summary_to_str(summary))

# Robust covariance
robust_cov = hc3(fit)
cluster_cov = cluster_1way(fit, groups)

scikit-learn Adapters

from nextstat.sklearn import (
    NextStatLinearRegression,
    NextStatLogisticRegression,
    NextStatPoissonRegressor,
)

model = NextStatLinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

Gamma GLM

Gamma distribution with log link for strictly positive continuous responses (insurance claims, hospital costs).

import nextstat

fit = nextstat.gamma_glm(y, X)
print(fit['params'])       # coefficients
print(fit['alpha'])        # shared shape parameter
print(fit['se'])           # standard errors
print(fit['nll'])          # negative log-likelihood

Tweedie GLM

Compound Poisson-Gamma with power p ∈ (1, 2) and log link. Handles exact zeros — ideal for insurance aggregate claims, rainfall data.

fit = nextstat.tweedie_glm(y, X, p=1.5)
print(fit['params'])       # coefficients
print(fit['phi'])          # dispersion
print(fit['p'])            # power parameter

GEV & GPD (Extreme Value)

Generalized Extreme Value for block maxima and Generalized Pareto for peaks-over-threshold.

# GEV — block maxima (flood, wind, temperature)
gev = nextstat.gev_fit(block_maxima)
print(f"mu={gev['mu']:.2f}, sigma={gev['sigma']:.2f}, xi={gev['xi']:.3f}")
print(f"100-year return level: {gev['return_level_100']:.1f}")

# GPD — peaks over threshold (VaR, ES)
gpd = nextstat.gpd_fit(exceedances)
print(f"sigma={gpd['sigma']:.2f}, xi={gpd['xi']:.3f}")
print(f"99th percentile excess: {gpd['quantile_99']:.1f}")

Meta-Analysis

Fixed-effects (inverse-variance) and random-effects (DerSimonian-Laird) pooling with heterogeneity diagnostics.

# Fixed-effects meta-analysis
fe = nextstat.meta_fixed(effects, se_values)
print(f"Pooled = {fe['pooled']:.3f} [{fe['ci_lower']:.3f}, {fe['ci_upper']:.3f}]")

# Random-effects meta-analysis
re = nextstat.meta_random(effects, se_values)
print(f"Pooled = {re['pooled']:.3f}, tau2 = {re['tau_squared']:.4f}")
print(f"I2 = {re['i_squared']:.1f}%, Q = {re['q']:.2f}, p = {re['q_p_value']:.4f}")

# Per-study weights for forest plot
for w, lo, hi in zip(re['weights'], re['study_ci_lower'], re['study_ci_upper']):
    print(f"  weight={w:.1f}%  [{lo:.3f}, {hi:.3f}]")