{ "cells": [ { "cell_type": "markdown", "id": "7243286f", "metadata": {}, "source": [ "(tess)=\n", "\n", "# Fitting TESS data" ] }, { "cell_type": "code", "execution_count": null, "id": "4f50b524-485b-433b-bbf7-063133d8bd4c", "metadata": {}, "outputs": [], "source": [ "import exoplanet\n", "\n", "exoplanet.utils.docs_setup()\n", "print(f\"exoplanet.__version__ = '{exoplanet.__version__}'\")" ] }, { "cell_type": "markdown", "id": "b77acbc6", "metadata": {}, "source": [ "In this tutorial, we will reproduce the fits to the transiting planet in the Pi Mensae system discovered by [Huang et al. (2018)](https://arxiv.org/abs/1809.05967).\n", "The data processing and model are similar to the {ref}`joint` case study, but with a few extra bits like aperture selection and de-trending.\n", "\n", "To start, we need to download the target pixel file:" ] }, { "cell_type": "code", "execution_count": null, "id": "46dc3f48", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import lightkurve as lk\n", "import matplotlib.pyplot as plt\n", "from astropy.io import fits\n", "\n", "lc_file = lk.search_lightcurve(\n", " \"TIC 261136679\", sector=1, author=\"SPOC\"\n", ").download(quality_bitmask=\"hardest\", flux_column=\"pdcsap_flux\")\n", "lc = lc_file.remove_nans().normalize().remove_outliers()\n", "time = lc.time.value\n", "flux = lc.flux\n", "\n", "# For the purposes of this example, we'll discard some of the data\n", "m = (lc.quality == 0) & (\n", " np.random.default_rng(261136679).uniform(size=len(time)) < 0.3\n", ")\n", "\n", "with fits.open(lc_file.filename) as hdu:\n", " hdr = hdu[1].header\n", "\n", "texp = hdr[\"FRAMETIM\"] * hdr[\"NUM_FRM\"]\n", "texp /= 60.0 * 60.0 * 24.0\n", "\n", "ref_time = 0.5 * (np.min(time) + np.max(time))\n", "x = np.ascontiguousarray(time[m] - ref_time, dtype=np.float64)\n", "y = np.ascontiguousarray(1e3 * (flux[m] - 1.0), dtype=np.float64)\n", "\n", "plt.plot(x, y, \".k\")\n", "plt.xlabel(\"time [days]\")\n", "plt.ylabel(\"relative flux [ppt]\")\n", "_ = plt.xlim(x.min(), x.max())" ] }, { "cell_type": "markdown", "id": "81ed6be1", "metadata": {}, "source": [ "## Transit search\n", "\n", "Now, let's use [the box least squares periodogram from AstroPy](http://docs.astropy.org/en/latest/timeseries/bls.html)\n", "(Note: you'll need AstroPy v3.1 or more recent to use this feature) to estimate the period, phase, and depth of the transit." ] }, { "cell_type": "code", "execution_count": null, "id": "96b0c78c", "metadata": {}, "outputs": [], "source": [ "from astropy.timeseries import BoxLeastSquares\n", "\n", "period_grid = np.exp(np.linspace(np.log(1), np.log(15), 50000))\n", "\n", "bls = BoxLeastSquares(x, y)\n", "bls_power = bls.power(period_grid, 0.1, oversample=20)\n", "\n", "# Save the highest peak as the planet candidate\n", "index = np.argmax(bls_power.power)\n", "bls_period = bls_power.period[index]\n", "bls_t0 = bls_power.transit_time[index]\n", "bls_depth = bls_power.depth[index]\n", "transit_mask = bls.transit_mask(x, bls_period, 0.2, bls_t0)\n", "\n", "fig, axes = plt.subplots(2, 1, figsize=(10, 10))\n", "\n", "# Plot the periodogram\n", "ax = axes[0]\n", "ax.axvline(np.log10(bls_period), color=\"C1\", lw=5, alpha=0.8)\n", "ax.plot(np.log10(bls_power.period), bls_power.power, \"k\")\n", "ax.annotate(\n", " \"period = {0:.4f} d\".format(bls_period),\n", " (0, 1),\n", " xycoords=\"axes fraction\",\n", " xytext=(5, -5),\n", " textcoords=\"offset points\",\n", " va=\"top\",\n", " ha=\"left\",\n", " fontsize=12,\n", ")\n", "ax.set_ylabel(\"bls power\")\n", "ax.set_yticks([])\n", "ax.set_xlim(np.log10(period_grid.min()), np.log10(period_grid.max()))\n", "ax.set_xlabel(\"log10(period)\")\n", "\n", "# Plot the folded transit\n", "ax = axes[1]\n", "x_fold = (x - bls_t0 + 0.5 * bls_period) % bls_period - 0.5 * bls_period\n", "m = np.abs(x_fold) < 0.4\n", "ax.plot(x_fold[m], y[m], \".k\")\n", "\n", "# Overplot the phase binned light curve\n", "bins = np.linspace(-0.41, 0.41, 32)\n", "denom, _ = np.histogram(x_fold, bins)\n", "num, _ = np.histogram(x_fold, bins, weights=y)\n", "denom[num == 0] = 1.0\n", "ax.plot(0.5 * (bins[1:] + bins[:-1]), num / denom, color=\"C1\")\n", "\n", "ax.set_xlim(-0.3, 0.3)\n", "ax.set_ylabel(\"de-trended flux [ppt]\")\n", "_ = ax.set_xlabel(\"time since transit\")" ] }, { "cell_type": "markdown", "id": "287e3e93", "metadata": {}, "source": [ "## The transit model in PyMC3\n", "\n", "The transit model, initialization, and sampling are all nearly the same as the one in {ref}`joint`." ] }, { "cell_type": "code", "execution_count": null, "id": "fbb85aad", "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "import exoplanet as xo\n", "import pymc3 as pm\n", "import aesara_theano_fallback.tensor as tt\n", "\n", "import pymc3_ext as pmx\n", "from celerite2.theano import terms, GaussianProcess\n", "\n", "phase_lc = np.linspace(-0.3, 0.3, 100)\n", "\n", "\n", "def build_model(mask=None, start=None):\n", " if mask is None:\n", " mask = np.ones(len(x), dtype=bool)\n", " with pm.Model() as model:\n", " # Parameters for the stellar properties\n", " mean = pm.Normal(\"mean\", mu=0.0, sd=10.0)\n", " u_star = xo.QuadLimbDark(\"u_star\")\n", " star = xo.LimbDarkLightCurve(u_star)\n", "\n", " # Stellar parameters from Huang et al (2018)\n", " M_star_huang = 1.094, 0.039\n", " R_star_huang = 1.10, 0.023\n", " BoundedNormal = pm.Bound(pm.Normal, lower=0, upper=3)\n", " m_star = BoundedNormal(\n", " \"m_star\", mu=M_star_huang[0], sd=M_star_huang[1]\n", " )\n", " r_star = BoundedNormal(\n", " \"r_star\", mu=R_star_huang[0], sd=R_star_huang[1]\n", " )\n", "\n", " # Orbital parameters for the planets\n", " t0 = pm.Normal(\"t0\", mu=bls_t0, sd=1)\n", " log_period = pm.Normal(\"log_period\", mu=np.log(bls_period), sd=1)\n", " period = pm.Deterministic(\"period\", tt.exp(log_period))\n", "\n", " # Fit in terms of transit depth (assuming b<1)\n", " b = pm.Uniform(\"b\", lower=0, upper=1)\n", " log_depth = pm.Normal(\"log_depth\", mu=np.log(bls_depth), sigma=2.0)\n", " ror = pm.Deterministic(\n", " \"ror\",\n", " star.get_ror_from_approx_transit_depth(\n", " 1e-3 * tt.exp(log_depth), b\n", " ),\n", " )\n", " r_pl = pm.Deterministic(\"r_pl\", ror * r_star)\n", "\n", " # log_r_pl = pm.Normal(\n", " # \"log_r_pl\",\n", " # sd=1.0,\n", " # mu=0.5 * np.log(1e-3 * np.array(bls_depth))\n", " # + np.log(R_star_huang[0]),\n", " # )\n", " # r_pl = pm.Deterministic(\"r_pl\", tt.exp(log_r_pl))\n", " # ror = pm.Deterministic(\"ror\", r_pl / r_star)\n", " # b = xo.distributions.ImpactParameter(\"b\", ror=ror)\n", "\n", " ecs = pmx.UnitDisk(\"ecs\", testval=np.array([0.01, 0.0]))\n", " ecc = pm.Deterministic(\"ecc\", tt.sum(ecs**2))\n", " omega = pm.Deterministic(\"omega\", tt.arctan2(ecs[1], ecs[0]))\n", " xo.eccentricity.kipping13(\"ecc_prior\", fixed=True, observed=ecc)\n", "\n", " # Transit jitter & GP parameters\n", " log_sigma_lc = pm.Normal(\n", " \"log_sigma_lc\", mu=np.log(np.std(y[mask])), sd=10\n", " )\n", " log_rho_gp = pm.Normal(\"log_rho_gp\", mu=0, sd=10)\n", " log_sigma_gp = pm.Normal(\n", " \"log_sigma_gp\", mu=np.log(np.std(y[mask])), sd=10\n", " )\n", "\n", " # Orbit model\n", " orbit = xo.orbits.KeplerianOrbit(\n", " r_star=r_star,\n", " m_star=m_star,\n", " period=period,\n", " t0=t0,\n", " b=b,\n", " ecc=ecc,\n", " omega=omega,\n", " )\n", "\n", " # Compute the model light curve\n", " light_curves = (\n", " star.get_light_curve(orbit=orbit, r=r_pl, t=x[mask], texp=texp)\n", " * 1e3\n", " )\n", " light_curve = tt.sum(light_curves, axis=-1) + mean\n", " resid = y[mask] - light_curve\n", "\n", " # GP model for the light curve\n", " kernel = terms.SHOTerm(\n", " sigma=tt.exp(log_sigma_gp),\n", " rho=tt.exp(log_rho_gp),\n", " Q=1 / np.sqrt(2),\n", " )\n", " gp = GaussianProcess(kernel, t=x[mask], yerr=tt.exp(log_sigma_lc))\n", " gp.marginal(\"gp\", observed=resid)\n", " # pm.Deterministic(\"gp_pred\", gp.predict(resid))\n", "\n", " # Compute and save the phased light curve models\n", " pm.Deterministic(\n", " \"lc_pred\",\n", " 1e3\n", " * star.get_light_curve(\n", " orbit=orbit, r=r_pl, t=t0 + phase_lc, texp=texp\n", " )[..., 0],\n", " )\n", "\n", " # Fit for the maximum a posteriori parameters, I've found that I can get\n", " # a better solution by trying different combinations of parameters in turn\n", " if start is None:\n", " start = model.test_point\n", " map_soln = pmx.optimize(\n", " start=start, vars=[log_sigma_lc, log_sigma_gp, log_rho_gp]\n", " )\n", " map_soln = pmx.optimize(start=map_soln, vars=[log_depth])\n", " map_soln = pmx.optimize(start=map_soln, vars=[b])\n", " map_soln = pmx.optimize(start=map_soln, vars=[log_period, t0])\n", " map_soln = pmx.optimize(start=map_soln, vars=[u_star])\n", " map_soln = pmx.optimize(start=map_soln, vars=[log_depth])\n", " map_soln = pmx.optimize(start=map_soln, vars=[b])\n", " map_soln = pmx.optimize(start=map_soln, vars=[ecs])\n", " map_soln = pmx.optimize(start=map_soln, vars=[mean])\n", " map_soln = pmx.optimize(\n", " start=map_soln, vars=[log_sigma_lc, log_sigma_gp, log_rho_gp]\n", " )\n", " map_soln = pmx.optimize(start=map_soln)\n", "\n", " extras = dict(\n", " zip(\n", " [\"light_curves\", \"gp_pred\"],\n", " pmx.eval_in_model([light_curves, gp.predict(resid)], map_soln),\n", " )\n", " )\n", "\n", " return model, map_soln, extras\n", "\n", "\n", "model0, map_soln0, extras0 = build_model()" ] }, { "cell_type": "markdown", "id": "82a9efd6", "metadata": { "lines_to_next_cell": 2 }, "source": [ "Here's how we plot the initial light curve model:" ] }, { "cell_type": "code", "execution_count": null, "id": "acbc3832", "metadata": {}, "outputs": [], "source": [ "def plot_light_curve(soln, extras, mask=None):\n", " if mask is None:\n", " mask = np.ones(len(x), dtype=bool)\n", "\n", " fig, axes = plt.subplots(3, 1, figsize=(10, 7), sharex=True)\n", "\n", " ax = axes[0]\n", " ax.plot(x[mask], y[mask], \"k\", label=\"data\")\n", " gp_mod = extras[\"gp_pred\"] + soln[\"mean\"]\n", " ax.plot(x[mask], gp_mod, color=\"C2\", label=\"gp model\")\n", " ax.legend(fontsize=10)\n", " ax.set_ylabel(\"relative flux [ppt]\")\n", "\n", " ax = axes[1]\n", " ax.plot(x[mask], y[mask] - gp_mod, \"k\", label=\"de-trended data\")\n", " for i, l in enumerate(\"b\"):\n", " mod = extras[\"light_curves\"][:, i]\n", " ax.plot(x[mask], mod, label=\"planet {0}\".format(l))\n", " ax.legend(fontsize=10, loc=3)\n", " ax.set_ylabel(\"de-trended flux [ppt]\")\n", "\n", " ax = axes[2]\n", " mod = gp_mod + np.sum(extras[\"light_curves\"], axis=-1)\n", " ax.plot(x[mask], y[mask] - mod, \"k\")\n", " ax.axhline(0, color=\"#aaaaaa\", lw=1)\n", " ax.set_ylabel(\"residuals [ppt]\")\n", " ax.set_xlim(x[mask].min(), x[mask].max())\n", " ax.set_xlabel(\"time [days]\")\n", "\n", " return fig\n", "\n", "\n", "_ = plot_light_curve(map_soln0, extras0)" ] }, { "cell_type": "markdown", "id": "b0e2da0a", "metadata": {}, "source": [ "As in {ref}`joint`, we can do some sigma clipping to remove significant outliers." ] }, { "cell_type": "code", "execution_count": null, "id": "55f88cd4", "metadata": {}, "outputs": [], "source": [ "mod = (\n", " extras0[\"gp_pred\"]\n", " + map_soln0[\"mean\"]\n", " + np.sum(extras0[\"light_curves\"], axis=-1)\n", ")\n", "resid = y - mod\n", "rms = np.sqrt(np.median(resid**2))\n", "mask = np.abs(resid) < 5 * rms\n", "\n", "plt.figure(figsize=(10, 5))\n", "plt.plot(x, resid, \"k\", label=\"data\")\n", "plt.plot(x[~mask], resid[~mask], \"xr\", label=\"outliers\")\n", "plt.axhline(0, color=\"#aaaaaa\", lw=1)\n", "plt.ylabel(\"residuals [ppt]\")\n", "plt.xlabel(\"time [days]\")\n", "plt.legend(fontsize=12, loc=3)\n", "_ = plt.xlim(x.min(), x.max())" ] }, { "cell_type": "markdown", "id": "fadaadef", "metadata": {}, "source": [ "And then we re-build the model using the data without outliers." ] }, { "cell_type": "code", "execution_count": null, "id": "2447e838", "metadata": {}, "outputs": [], "source": [ "model, map_soln, extras = build_model(mask, map_soln0)\n", "_ = plot_light_curve(map_soln, extras, mask)" ] }, { "cell_type": "markdown", "id": "f9a72cbc", "metadata": {}, "source": [ "Now that we have the model, we can sample:" ] }, { "cell_type": "code", "execution_count": null, "id": "d0ef07a9", "metadata": {}, "outputs": [], "source": [ "import platform\n", "\n", "with model:\n", " trace = pm.sample(\n", " tune=1500,\n", " draws=1000,\n", " start=map_soln,\n", " # Parallel sampling runs poorly or crashes on macos\n", " cores=1 if platform.system() == \"Darwin\" else 2,\n", " chains=2,\n", " target_accept=0.95,\n", " return_inferencedata=True,\n", " random_seed=[261136679, 261136680],\n", " init=\"adapt_full\",\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "9d4f2bc1", "metadata": {}, "outputs": [], "source": [ "import arviz as az\n", "\n", "az.summary(\n", " trace,\n", " var_names=[\n", " \"omega\",\n", " \"ecc\",\n", " \"r_pl\",\n", " \"b\",\n", " \"t0\",\n", " \"period\",\n", " \"r_star\",\n", " \"m_star\",\n", " \"u_star\",\n", " \"mean\",\n", " ],\n", ")" ] }, { "cell_type": "markdown", "id": "d27f96a5", "metadata": {}, "source": [ "## Results\n", "\n", "After sampling, we can make the usual plots.\n", "First, let's look at the folded light curve plot:" ] }, { "cell_type": "code", "execution_count": null, "id": "d409b7d6", "metadata": {}, "outputs": [], "source": [ "flat_samps = trace.posterior.stack(sample=(\"chain\", \"draw\"))\n", "\n", "# Compute the GP prediction\n", "gp_mod = extras[\"gp_pred\"] + map_soln[\"mean\"] # np.median(\n", "# flat_samps[\"gp_pred\"].values + flat_samps[\"mean\"].values[None, :], axis=-1\n", "# )\n", "\n", "# Get the posterior median orbital parameters\n", "p = np.median(flat_samps[\"period\"])\n", "t0 = np.median(flat_samps[\"t0\"])\n", "\n", "# Plot the folded data\n", "x_fold = (x[mask] - t0 + 0.5 * p) % p - 0.5 * p\n", "plt.plot(x_fold, y[mask] - gp_mod, \".k\", label=\"data\", zorder=-1000)\n", "\n", "# Overplot the phase binned light curve\n", "bins = np.linspace(-0.41, 0.41, 50)\n", "denom, _ = np.histogram(x_fold, bins)\n", "num, _ = np.histogram(x_fold, bins, weights=y[mask])\n", "denom[num == 0] = 1.0\n", "plt.plot(\n", " 0.5 * (bins[1:] + bins[:-1]), num / denom, \"o\", color=\"C2\", label=\"binned\"\n", ")\n", "\n", "# Plot the folded model\n", "pred = np.percentile(flat_samps[\"lc_pred\"], [16, 50, 84], axis=-1)\n", "plt.plot(phase_lc, pred[1], color=\"C1\", label=\"model\")\n", "art = plt.fill_between(\n", " phase_lc, pred[0], pred[2], color=\"C1\", alpha=0.5, zorder=1000\n", ")\n", "art.set_edgecolor(\"none\")\n", "\n", "# Annotate the plot with the planet's period\n", "txt = \"period = {0:.5f} +/- {1:.5f} d\".format(\n", " np.mean(flat_samps[\"period\"].values), np.std(flat_samps[\"period\"].values)\n", ")\n", "plt.annotate(\n", " txt,\n", " (0, 0),\n", " xycoords=\"axes fraction\",\n", " xytext=(5, 5),\n", " textcoords=\"offset points\",\n", " ha=\"left\",\n", " va=\"bottom\",\n", " fontsize=12,\n", ")\n", "\n", "plt.legend(fontsize=10, loc=4)\n", "plt.xlim(-0.5 * p, 0.5 * p)\n", "plt.xlabel(\"time since transit [days]\")\n", "plt.ylabel(\"de-trended flux\")\n", "_ = plt.xlim(-0.15, 0.15)" ] }, { "cell_type": "markdown", "id": "49fed894", "metadata": {}, "source": [ "And a corner plot of some of the key parameters:" ] }, { "cell_type": "code", "execution_count": null, "id": "2b890e1e", "metadata": {}, "outputs": [], "source": [ "import corner\n", "import astropy.units as u\n", "\n", "trace.posterior[\"r_earth\"] = (\n", " trace.posterior[\"r_pl\"].coords,\n", " (trace.posterior[\"r_pl\"].values * u.R_sun).to(u.R_earth).value,\n", ")\n", "\n", "_ = corner.corner(\n", " trace,\n", " var_names=[\"period\", \"r_earth\", \"b\", \"ecc\"],\n", " labels=[\n", " \"period [days]\",\n", " \"radius [Earth radii]\",\n", " \"impact param\",\n", " \"eccentricity\",\n", " ],\n", ")" ] }, { "cell_type": "markdown", "id": "b123ab5f", "metadata": {}, "source": [ "These all seem consistent with the previously published values." ] }, { "cell_type": "markdown", "id": "673982ce", "metadata": {}, "source": [ "## Citations\n", "\n", "As described in the [citation tutorial](https://docs.exoplanet.codes/en/stable/tutorials/citation/), we can use [citations.get_citations_for_model](https://docs.exoplanet.codes/en/stable/user/api/#exoplanet.citations.get_citations_for_model) to construct an acknowledgement and BibTeX listing that includes the relevant citations for this model." ] }, { "cell_type": "code", "execution_count": null, "id": "3c7d67b5", "metadata": {}, "outputs": [], "source": [ "with model:\n", " txt, bib = xo.citations.get_citations_for_model()\n", "print(txt)" ] }, { "cell_type": "code", "execution_count": null, "id": "d5718b9b", "metadata": {}, "outputs": [], "source": [ "print(bib.split(\"\\n\\n\")[0] + \"\\n\\n...\")" ] }, { "cell_type": "code", "execution_count": null, "id": "bb47eab4-4908-4177-8d5f-6be14b7a8af0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.4" } }, "nbformat": 4, "nbformat_minor": 5 }