Differences between revisions 2 and 9 (spanning 7 versions)
Revision 2 as of 2012-10-24 15:14:38
Size: 498
Editor: MikhailItkin
Comment:
Revision 9 as of 2012-10-24 17:28:29
Size: 1456
Editor: MikhailItkin
Comment:
Deletions are marked like this. Additions are marked like this.
Line 3: Line 3:
 * Series
Line 7: Line 6:
 * Apply common numpy statistics
Line 12: Line 12:
import pandas as P import pandas as p
Line 15: Line 15:
hamNc = Nio.open_file('10147-precip.nc')
helNc = Nio.open_file('10015-precip.nc')
nc1 = Nio.open_file('10147-precip.nc') # hamburg
nc2 = Nio.open_file('10015-precip.nc') # helgoland
Line 18: Line 18:
hamTime = hamNc.variables['time'][:]
helTime = helNc.variables['time'][:]
time1 = nc1.variables['time'][:]
time2 = nc2.variables['time'][:]
Line 21: Line 21:
hamRain = hamNc.variables['rainfall_rain_rate'][:]
helRain = helNc.variables['rainfall_rain_rate'][:]
ham = hamNc.variables['rainfall_rain_rate'][:]
rain1 = nc1.variables['rainfall_rate_hour'][:]
rain2 = nc2.variables['rainfall_rate_hour'][:]


# plot data
# plot(rain1, 'g', rain2, 'b')

# Timestamps shall be python dates
dates1 = num2date(epoch2num(time1))
dates2 = num2date(epoch2num(time2))

# Indexed arrays - p.Series
ds1 = p.Series(rain1, index = dates1)
ds2 = p.Series(rain2, index = dates2)

# Pandas is using numpy.na representation of not-a-number,
# while Nio returns masked arrays
# Many basic array operations are valid for pandas Series
ds1 = np.where(ds1<0, nan, ds1)
ds2 = np.where(ds2<0, nan, ds2)

# built-in plotting functions
ds1.plot()
ds2.plot()

# newer pandas version can drop NaN's,
# current one can only fill,
# otherwise drop by hand (hint: nan is not equal to nan :)
ds1=ds1[ds1==ds1]
ds2=ds2[ds2==ds2]

# now we have series of different length
print ds1.shape[0], ds2.shape[0]

# to get the equal index it's possible to use from
# one of the series
ds2_nan = ds2.reindex(ds1.index)
ds2_backfill = ds2.reindex(ds1.index, method = 'backfill')



}}}

Pandas

  • Indexed arrays
  • DateFrame

  • DateRange

  • Indexing, slicing
  • Apply common numpy statistics
  • Data alignment

   1 import numpy as np
   2 import pandas as p
   3 import Nio
   4 
   5 nc1 = Nio.open_file('10147-precip.nc') # hamburg
   6 nc2 = Nio.open_file('10015-precip.nc') # helgoland
   7 
   8 time1 = nc1.variables['time'][:]
   9 time2 = nc2.variables['time'][:]
  10 
  11 rain1 = nc1.variables['rainfall_rate_hour'][:]
  12 rain2 = nc2.variables['rainfall_rate_hour'][:]
  13 
  14 
  15 # plot data 
  16 # plot(rain1, 'g', rain2, 'b')
  17 
  18 # Timestamps shall be python dates
  19 dates1 = num2date(epoch2num(time1))
  20 dates2 = num2date(epoch2num(time2))
  21 
  22 # Indexed arrays - p.Series
  23 ds1 = p.Series(rain1, index = dates1)
  24 ds2 = p.Series(rain2, index = dates2)
  25 
  26 # Pandas is using numpy.na representation of not-a-number,
  27 # while Nio returns masked arrays
  28 # Many basic array operations are valid for pandas Series
  29 ds1 = np.where(ds1<0, nan, ds1)
  30 ds2 = np.where(ds2<0, nan, ds2)
  31 
  32 # built-in plotting functions
  33 ds1.plot()
  34 ds2.plot()
  35 
  36 # newer pandas version can drop NaN's, 
  37 # current one can only fill, 
  38 # otherwise drop by hand (hint: nan is not equal to nan :)
  39 ds1=ds1[ds1==ds1]
  40 ds2=ds2[ds2==ds2]
  41 
  42 # now we have series of different length
  43 print ds1.shape[0], ds2.shape[0]
  44 
  45 # to get the equal index it's possible to use from 
  46 # one of the series
  47 ds2_nan = ds2.reindex(ds1.index)
  48 ds2_backfill = ds2.reindex(ds1.index, method = 'backfill')

LehreWiki: PythonCourse/PythonLES/Pandas (last edited 2012-11-05 10:53:39 by anonymous)