this is for holding javascript data
Edward Brown added file figures/sample-datasets/FittingData.ipynb
about 9 years ago
Commit id: 094a7f0aa2301cef73c84f12518161626709881b
deletions | additions
diff --git a/figures/sample-datasets/FittingData.ipynb b/figures/sample-datasets/FittingData.ipynb
index f6c5319..28c6022 100644
--- a/figures/sample-datasets/FittingData.ipynb
+++ b/figures/sample-datasets/FittingData.ipynb
...
{
"metadata": {
"name": "",
"signature":
"sha256:3a516480996866b6e191eb0415c8800297bd3471679e625f31f9974815c8534c" "sha256:b382b270d73291c9480f8f29937bcc7418093574952da7f1a728b871fad84908"
},
"nbformat": 3,
"nbformat_minor": 0,
...
"cell_type": "markdown",
"metadata": {},
"source": [
"##Create a class `sampleDataSets`\n",
"This sets up "We also need to import a
custom `Python` class to make the fake
datasets." datasets. You will need the file `measurements.py` to be in the same directory as this notebook."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from
numpy measurements import
linspace,zeros\n",
"from numpy.random import standard_normal, random, random_integers\n",
"\n",
"class sampleDataSets:\n",
" \"\"\"\n",
" Sets up a linear relation, y = m*x + b. Datasets can be generated \n",
" from this relation by adding gaussian fluctuations to each y. The std. \n",
" deviation of the fluctuation are chosen from a uniform random distribution \n",
" between 0.3 and 0.7. There are 3 choices for datasets.\n",
" 1. Fits the data much better than would be indicated by the size of its\n",
" quoted uncertainties. The real fluctations have a std. dev. that is \n",
" 1/5 of the quoted one.\n",
" 2. Uncertainties are drawn from a normal distribution with a standard\n",
" deviation matching the size of the errorbars. This should produce\n",
" an ideal chi^2 distribution if many trials are conducted.\n",
" 3. Identical to 2, but 20% of the datapoints are given 5 sigma \n",
" fluctuations.\n",
" \"\"\"\n",
" \n",
" _slope = 3.0\n",
" _intercept = 1.0\n",
" _sig_low = 0.3\n",
" _sig_high = 0.7\n",
"\n",
" def __init__(self):\n",
" \"\"\"\n",
" Sets the relation, the nominal size of the errorbars, and the actual \n",
" size of the errorbars.\n",
" \"\"\"\n",
" a = self._sig_low\n",
" b = self._sig_high\n",
" self._amp = (b-a)*random() + a\n",
" self._fake = 0.2*self._amp\n",
" \n",
" def make_dataset(self,x,use_fake=False,with_outliers=False):\n",
" \"\"\"\n",
" Constructs a dataset from the given relation with errorbars drawn from \n",
" a normal distribution.\n",
" \n",
" Arguments\n",
" ---------\n",
" x := [array-like] the values at which the relation should be \n",
" evaluated\n",
" use_fake := if True, then reduce the standard deviation of the \n",
" fluctuations by a factor of 10. This dataset will \n",
" have an anomalously low chi^2.\n",
" with_outliers:= if True, then 20% of the points will have 10 sigma \n",
" fluctuations.\n",
" \n",
" Returns\n",
" -------\n",
" y := an ndarray of length(x.size) containing the dataset\n",
" \"\"\"\n",
" \n",
" m = self._slope\n",
" b = self._intercept\n",
" if use_fake:\n",
" sig = self._fake\n",
" else:\n",
" sig = self._amp\n",
" y = m*x + b + sig*standard_normal(len(x))\n",
" if with_outliers:\n",
" n = int(0.2*x.size)\n",
" sgn = zeros(n)\n",
" for i in range(n):\n",
" if random() < 0.5:\n",
" sgn[i] = -1.0\n",
" else:\n",
" sgn[i] = 1.0\n",
" indcs = random_integers(0,x.size-1,size=(2))\n",
" y[indcs] = m*x[indcs] + b + sgn*5.0*sig\n",
" return y\n",
" \n",
" def quoted_error(self):\n",
" \"\"\"\n",
" returns the quoted standard deviation\n",
" \"\"\"\n",
" return self._amp\n",
" \n",
" def unrealistic_dataset(self,x):\n",
" \"\"\"\n",
" Returns a dataset with actual uncertainties much less than the quoted \n",
" errorbars.\n",
" \n",
" Arguments\n",
" ---------\n",
" x := [array-like] the values at which the relation should be \n",
" evaluated\n",
" \n",
" Returns\n",
" -------\n",
" y := an ndarray of length(x.size) containing the dataset \n",
" \"\"\"\n",
" \n",
" return self.make_dataset(x,use_fake=True)\n",
"\n",
" def realistic_dataset(self,x):\n",
" \"\"\"\n",
" Returns a dataset with uncertainties that agree with the quoted \n",
" errorbars.\n",
" \n",
" Arguments\n",
" ---------\n",
" x := [array-like] the values at which the relation should be \n",
" evaluated\n",
" \n",
" Returns\n",
" -------\n",
" y := an ndarray of length(x.size) containing the dataset \n",
" \"\"\"\n",
"\n",
" return self.make_dataset(x,use_fake=False)\n",
"\n",
" def dataset_with_outliers(self,x):\n",
" \"\"\"\n",
" Returns a dataset with 80% of the points drawn from the normal \n",
" distribution, and 20% of the points having 10-sigma fluctuations.\n",
"\n",
" Arguments\n",
" ---------\n",
" x := [array-like] the values at which the relation should be \n",
" evaluated\n",
" \n",
" Returns\n",
" -------\n",
" y := an ndarray of length(x.size) containing the dataset \n",
" \"\"\"\n",
"\n",
" return self.make_dataset(x,use_fake=False,with_outliers=True)\n",
" \n",
" def fit(self,x):\n",
" \"\"\"\n",
" Returns the true relation, y = m*x + b\n",
"\n",
" Arguments\n",
" ---------\n",
" x := [array-like] the values at which the relation should be \n",
" evaluated\n",
" \n",
" Returns\n",
" -------\n",
" y := an ndarray of length(x.size) containing the underlying \n",
" relation.\n",
" \"\"\"\n",
" \n",
" m = self._slope\n",
" b = self._intercept\n",
" return m*x + b\n" measurementWithUncertainty"
],
"language": "python",
"metadata": {},