#acl AdminGroup:read,write,revert EditorGroup:read,write,revert StudentGroup:read,write,revert All:read
#format wiki
#language en
#pragma section-numbers off
<<TableOfContents(2)>>

= Reading different data formats =
This lesson deals with the ways of reading and writing data in different formats

= Basic Python =
The [[http://docs.python.org/lib/bltin-file-objects.html|file object]] can be used for reading and  writing plain text as well as unformatted binary data. The following code writes a message in the file with the name ''out.txt'', reads and print the data

{{{
#!python

file('out.txt','w').write('Hallo Datentraeger')
print file('out.txt').read()
}}}
 * {{{write()}}} writes a string to the file
 * {{{read()}}} reads complete file
 * {{{read(N)}}} reads N bytes
 * {{{readlines()}}} reads the file with linebreaks
 * {{{readline()}}} reads only the next line

== Pickle ==
The [[http://docs.python.org/lib/module-pickle.html|pickle module]] implements an algorithm for serializing and de-serializing a Python object structure. ''Pickling'' is the process whereby a Python object hierarchy is converted into a byte stream, and ''unpickling'' is the inverse operation, whereby a byte stream is converted back into an object hierarchy. The [[http://docs.python.org/lib/module-cPickle.html|cPickle module]] is a much faster implementation and should be preferred.

{{{
#!python

a={'A':1}# a python object
pickle.dump(a,open('test.dat','w')) # writes object to file

b=pickle.load(open('test.dat','r')) # reads the object from file
}}}
== Comma Separated Values ==
The so-called CSV (Comma Separated Values) format is the most common import and export format for spreadsheets (Excel) and databases.  The [[http://docs.python.org/lib/module-csv.html|csv module]] enables CSV file reading and writing

= NumPy/SciPy =
Arrays can be read from a file and written to a file using the function and method:

 * {{{fromfile(...)}}}
 * {{{.tofile(...)}}}

= Scientific data format module matrix =
||<tablewidth="200px">Format\module ||[[http://pysclint.sourceforge.net/pyhdf/|pyhdf]] ||[[http://www.gdal.org/|GDAL]] ||[[http://www.pyngl.ucar.edu/|PyNIO]] ||[[http://www.pytables.org/moin|PyTables]] ||
||Netcdf ||read ||read/write ||read/write || ||
||HDF4 ||read/write ||read/write ||read/write || ||
||HDF5 || ||read/write || ||read/write ||
||GRIB || ||read ||read/write || ||

Pyhdf is a simple interface to HDF. If you don't need other formats Pyhdf may be a good choice. GDAL has some overhead because of the single abstract data model for all formats but supports the largest number of different satellite formats. PyNIO is an excellent choice for dealing with meteorological and climatological datasets. Since PyNGL/PyNIO recently became open source it can be highly recommended. PyTables could be a choice for dealing with HDF5 if GDAL is not sufficient.

= HDF =
NASA's standard file format, developed by NCSA. The [[http://hdf.ncsa.uiuc.edu/index.html|HDF format]] is a self-describing data format. HDF files can contain binary data and allow direct access to parts of the file without first parsing the entire contents.

The HDF versions 4 and 5 are not compatible.

Different modules are available for reading and writing HDF files

== HDF4 pyhdf ==
[[http://pysclint.sourceforge.net/pyhdf/|pyhdf]] is a simple python interface to the NCSA HDF4 library.

The following example demonstrates how to read [[http://eosweb.larc.nasa.gov/PRODOCS/misr/level3/download_data.html|level-3 data]] from the [[http://www-misr.jpl.nasa.gov/|Multi-angle Imaging Spectral Radiometer (MISR)]] on the [[http://terra.nasa.gov/|Terra Satellite]]

{{{
#!python

from scipy import array
from pylab import imshow,colorbar,title,savefig
from pyhdf.SD import SD

f=SD('MISR_AM1_CGLS_MAY_2007_F04_0025.hdf')
print f.datasets().keys()
data=array(f.select('NDVI average').get())
data[data<0]=0

imshow(data,interpolation='nearest',cmap=cm.YlGn)
colorbar(shrink=0.5)
title('Normalized Difference Vegetation Index')
}}}
Line 5 opens the HDF file object. line 6 prints the keywords of the included datasets. From this one can identify the keyword for the desired parameter. Line 7 reads the data in a SciPy array. Line 8 selects the negative (bad and missing) data and sets them to zero.

{{attachment:ndvi.png}}

== HDF5 ==
[[http://www.pytables.org/moin|PyTables]] is a package for managing hierarchical datasets and designed to efficiently and easily cope with extremely large amounts of data.

= netCDF =
[[http://www.pyngl.ucar.edu/|PyNIO]] is a Python package that allows read and/or write access to a variety of data formats using an interface modelled on [[http://www.unidata.ucar.edu/software/netcdf/|netCDF]].

The following example demonstrates how to use the [[http://www.pyngl.ucar.edu/|PyNGL]] module to read and display sea ice concentration data.

{{{
#!python

import os
from scipy import array,arange,nan
from PyNGL import Ngl
from PyNGL import Nio

def Ngl_map(C,lat,lon,psfile):
    rlist            = Ngl.Resources()
    rlist.wkColorMap = 'posneg_1'
    wks_type = "ps"
    wks = Ngl.open_wks(wks_type,psfile,rlist)
    resources = Ngl.Resources()
    resources.sfXArray        = lon[:,:]
    resources.sfYArray        = lat[:,:]
    resources.mpProjection          = "Stereographic"
    resources.mpDataBaseVersion     = "MediumRes"
    resources.mpLimitMode           = "LatLon"
    resources.mpMinLonF             = 0
    resources.mpMaxLonF             = 360
    resources.mpMinLatF             = 65
    resources.mpMaxLatF             = 90
    resources.mpCenterLonF           = 0.
    resources.mpFillOn     = True
    igray = Ngl.new_color(wks,0.7,0.7,0.7)
    resources.mpFillColors = [0,-1,igray,-1]
    resources.cnLineDrawOrder      = "Predraw"
    resources.cnFillOn             = True
    resources.cnFillDrawOrder       = "Predraw"
    resources.cnLineLabelsOn        = False
    resources.nglSpreadColorStart  = 8
    resources.nglSpreadColorEnd      = -2
    resources.cnLevelSelectionMode = "ExplicitLevels" # Define own levels.
    resources.cnLevels             = arange(0.,100,10)
    resources.lbTitleString             = 'Concentration [%]'
    resources.lbOrientation             = "Horizontal"
    resources.cnFillMode           = "RasterFill"
    resources.cnLinesOn            = False
    resources.tiMainString     = "~F22~Arctic Sea Ice Coverage~C~~F21~September average from SSM/I"
    map = Ngl.contour_map(wks,C[:,:],resources)

grid = Nio.open_file('grid_north_12km.nc')
lat=array(grid.variables['latitude'])
lon=array(grid.variables['longitude'])
nc = Nio.open_file('climatology_09.nc')
C=array(nc.variables['concentration'])[0,:,:].astype(float)
Ngl_map(C,lat,lon,'map')
os.system('gv map.ps &')
}}}
The data files can be downloaded from the [[ftp://ftp.ifremer.fr/ifremer/cersat/products/gridded/psi-concentration/data|ftp server]] of the [[http://cersat.ifremer.fr/|Center for Satellite Exploitation and Research (CERSAT)]] which is one of the major world data centers for oceanography.

The September mean sea ice concentration values derived from the [[http://nsidc.org/data/docs/daac/ssmi_instrument.gd.html|Special Sensor Microwave Imager (SSM/I)]]  are stored in the netCDF file [[ftp://ftp.ifremer.fr/ifremer/cersat/products/gridded/psi-concentration/data/arctic/climatology/netcdf/climatology_09.nc.Z|climatology_09.nc]] . Compressed data with the extension {{{.Z}}} or {{{.gz}}} can be uncompressed using {{{uncompress}}} or {{{gunzip}}}, respectively. A description of the data and the algorithm can be found [[ftp://ftp.ifremer.fr/ifremer/cersat/products/gridded/psi-drift/documentation/ssmi.pdf|here]].

The main program that reads the data starts in line 40. At first the coordinates of the corresponding pixels  are read into the variables {{{lat}}} and {{{lon}}}.  The ice concentration data are read in the variable {{{C}}}. The function {{{Ngl_map}}} creates a postscript output in the file {{{map.ps}}} which is displayed using the command {{{gv}}}.

{{attachment:september.png}}

= Various Satellite data formats =
The [[http://www.gdal.org/|Geospatial Data Abstraction Library (GDAL)]] has a [[http://www.gdal.org/gdal_datamodel.html|single abstract data model]] for all [[http://www.gdal.org/formats_list.html|supported formats]]

== Envisat ASAR ==
The following code demonstrates how to read in an Envisat ASAR image using the gdal module.

{{{
#!python

import gdal, struct
from scipy import array,empty,uint16
from pylab import imshow

filename='/pf/u/u242023/ASA_IMM_1PNPDK20080410_095538_000001462067_00337_31954_4832.N1'

f=gdal.Open(filename)
a=f.GetRasterBand(1)
img=empty((a.YSize,a.XSize),dtype=uint16)


for yi in xrange(a.YSize):
    scanline=struct.unpack("H"*a.XSize,a.ReadRaster(0,yi,a.XSize,1,a.XSize,1,gdal.GDT_UInt16))
    img[yi,:]=array(scanline).astype(uint16)

imshow(img,vmin=0,vmax=3000,interpolation='nearest',origin='lower')
}}}
{{attachment:asar_weser_elbe.png}}

== Landsat 7 ==
{{{
#!python

import sys,os
home=os.getenv('HOME')
import gdal,glob
from pylab import *

def sensor_params():
    Freqs={'B10':(0.45,0.52),'B20':(0.52,0.60),'B30':(0.63,0.69),'B40':(0.77,0.90),'B50':(1.55,1.75),'B70':(2.09,2.35),'B61':(10.4,12.5),'B62':(10.4,12.5),'B80':(0.52,0.90)}
    Resolution={'B10':1,'B20':1,'B30':1,'B40':1,'B50':1,'B70':1,'B61':0.5,'B62':0.5,'B80':2} # Relative to 30m
    return {'f':Freqs,'r':Resolution}


def read_ls7(datadir,crop):
    SP=sensor_params()
    files=glob.glob(datadir+'*.TIF')
    B={}
    for fn in files:
        band=fn[-7:-4]
        C=((crop*SP['r'][band]).astype(int))
        print 'Reading Band ',band,'Size Position',C
        B[band]=gdal.Open(fn).ReadAsArray(xsize=C[0],ysize=C[1],xoff=C[2],yoff=C[3])
    return B

def display_rgb(B,**kw):
    RGB=array([B['B30'],B['B20'],B['B10']]).transpose()
    if kw.has_key('crop'):
        C=kw['crop']#extent is (left, right, bottom, top) data values of the axes
        extent=(C[2],C[2]+C[0],C[3],C[3]+C[1])
        imshow(RGB,origin='lower',interpolation='nearest',extent=extent)
    else:
        imshow(RGB,origin='lower',interpolation='nearest')
    show()
    return

datadir='/pf/u/u241127/satbild/LE71770012008201SGS00/'
crop=array([600,600,2600,4700]) # Crop region xsize,ysize,xoff,yoff
B=read_ls7(datadir,crop)
display_rgb(B,crop=crop)
savefig('landsat7.png',dpi=100)
}}}
{{attachment:landsat7.png}}