Size: 1025
Comment:
|
Size: 1928
Comment:
|
Deletions are marked like this. | Additions are marked like this. |
Line 15: | Line 15: |
Line 27: | Line 26: |
plot(rain1, 'g', rain2, 'b') | # plot(rain1, 'g', rain2, 'b') |
Line 43: | Line 42: |
# plotting functions | # built-in plotting functions |
Line 47: | Line 46: |
# newer pandas version can drop NaN's, # current one can only fill, # otherwise drop by hand (hint: nan is not equal to nan :) ds1=ds1[ds1==ds1] ds2=ds2[ds2==ds2] # now we have series of different length print ds1.shape[0], ds2.shape[0] # to get the equal length series it's possible to use index from # one of the series ds2_nan = ds2.reindex(ds1.index) ds2_backfill = ds2.reindex(ds1.index, method = 'backfill') # Basic stats print "Max: %.2f Min: %.2f Mean: %.2f Median: %.2f Count: %.2f" % (ds2.max(), ds2.min(), ds2.mean(), ds2.median(), ds2.count()) # Cumulative sum ds2.cumsum() # DataFrame - 2D labelled arrays df=p.DataFrame({"helgoland":ds2, "hamburg":ds1}) # series of different length will share the same (extended) index print ds1.fillna(0).count(), ds2.fillna(0).count() print df['hamburg'].fillna(0).count(), df['helgoland'].fillna(0).count() |
Pandas
1 import numpy as np
2 import pandas as p
3 import Nio
4
5 nc1 = Nio.open_file('10147-precip.nc') # hamburg
6 nc2 = Nio.open_file('10015-precip.nc') # helgoland
7
8 time1 = nc1.variables['time'][:]
9 time2 = nc2.variables['time'][:]
10
11 rain1 = nc1.variables['rainfall_rate_hour'][:]
12 rain2 = nc2.variables['rainfall_rate_hour'][:]
13
14
15 # plot data
16 # plot(rain1, 'g', rain2, 'b')
17
18 # Timestamps shall be python dates
19 dates1 = num2date(epoch2num(time1))
20 dates2 = num2date(epoch2num(time2))
21
22 # Indexed arrays - p.Series
23 ds1 = p.Series(rain1, index = dates1)
24 ds2 = p.Series(rain2, index = dates2)
25
26 # Pandas is using numpy.na representation of not-a-number,
27 # while Nio returns masked arrays
28 # Many basic array operations are valid for pandas Series
29 ds1 = np.where(ds1<0, nan, ds1)
30 ds2 = np.where(ds2<0, nan, ds2)
31
32 # built-in plotting functions
33 ds1.plot()
34 ds2.plot()
35
36 # newer pandas version can drop NaN's,
37 # current one can only fill,
38 # otherwise drop by hand (hint: nan is not equal to nan :)
39 ds1=ds1[ds1==ds1]
40 ds2=ds2[ds2==ds2]
41
42 # now we have series of different length
43 print ds1.shape[0], ds2.shape[0]
44
45 # to get the equal length series it's possible to use index from
46 # one of the series
47 ds2_nan = ds2.reindex(ds1.index)
48 ds2_backfill = ds2.reindex(ds1.index, method = 'backfill')
49
50 # Basic stats
51 print "Max: %.2f Min: %.2f Mean: %.2f Median: %.2f Count: %.2f" % (ds2.max(), ds2.min(), ds2.mean(), ds2.median(), ds2.count())
52
53 # Cumulative sum
54 ds2.cumsum()
55
56 # DataFrame - 2D labelled arrays
57 df=p.DataFrame({"helgoland":ds2, "hamburg":ds1})
58
59 # series of different length will share the same (extended) index
60 print ds1.fillna(0).count(), ds2.fillna(0).count()
61 print df['hamburg'].fillna(0).count(), df['helgoland'].fillna(0).count()