Differences between revisions 11 and 12
Revision 11 as of 2012-10-24 20:20:11
Size: 2024
Editor: MikhailItkin
Comment:
Revision 12 as of 2012-10-24 20:20:34
Size: 2037
Editor: MikhailItkin
Comment:
Deletions are marked like this. Additions are marked like this.
Line 8: Line 8:
 * Grouping

Pandas

  • Indexed arrays
  • DateFrame

  • DateRange

  • Indexing, slicing
  • Apply common numpy statistics
  • Data alignment
  • Grouping

   1 import numpy as np
   2 import pandas as p
   3 import Nio
   4 
   5 nc1 = Nio.open_file('10147-precip.nc') # hamburg
   6 nc2 = Nio.open_file('10015-precip.nc') # helgoland
   7 
   8 time1 = nc1.variables['time'][:]
   9 time2 = nc2.variables['time'][:]
  10 
  11 rain1 = nc1.variables['rainfall_rate_hour'][:]
  12 rain2 = nc2.variables['rainfall_rate_hour'][:]
  13 
  14 
  15 # plot data 
  16 # plot(rain1, 'g', rain2, 'b')
  17 
  18 # Timestamps shall be python dates
  19 dates1 = num2date(epoch2num(time1))
  20 dates2 = num2date(epoch2num(time2))
  21 
  22 # Indexed arrays - p.Series
  23 ds1 = p.Series(rain1, index = dates1)
  24 ds2 = p.Series(rain2, index = dates2)
  25 
  26 # Pandas is using numpy.na representation of not-a-number,
  27 # while Nio returns masked arrays
  28 # Many basic array operations are valid for pandas Series
  29 ds1 = np.where(ds1<0, nan, ds1)
  30 ds2 = np.where(ds2<0, nan, ds2)
  31 
  32 # built-in plotting functions
  33 ds1.plot()
  34 ds2.plot()
  35 
  36 # newer pandas version can drop NaN's, 
  37 # current one can only fill, 
  38 # otherwise drop by hand (hint: nan is not equal to nan :)
  39 ds1=ds1[ds1==ds1]
  40 ds2=ds2[ds2==ds2]
  41 
  42 # now we have series of different length
  43 print ds1.shape[0], ds2.shape[0]
  44 
  45 # to get the equal length series it's possible to use index from 
  46 # one of the series
  47 ds2_nan = ds2.reindex(ds1.index)
  48 ds2_backfill = ds2.reindex(ds1.index, method = 'backfill')
  49 
  50 # Basic stats
  51 print "Max: %.2f Min: %.2f Mean: %.2f Median: %.2f Count: %.2f" % (ds2.max(), ds2.min(), ds2.mean(), ds2.median(), ds2.count())
  52 
  53 # Cumulative sum 
  54 ds2.cumsum()
  55 
  56 # DataFrame - 2D labelled arrays
  57 df=p.DataFrame({"helgoland":ds2, "hamburg":ds1})
  58 
  59 # series of different length will share the same (extended) index
  60 print ds1.fillna(0).count(), ds2.fillna(0).count()
  61 print df['hamburg'].fillna(0).count(), df['helgoland'].fillna(0).count()
  62 
  63 # drop incomplete rows
  64 df.dropIncompleteRows()
  65 
  66 # correlation
  67 df.corr()
  68 
  69 # aggregation

LehreWiki: PythonCourse/PythonLES/Pandas (last edited 2012-11-05 10:53:39 by anonymous)