Back to home page

MITgcm

 
 

    


File indexing completed on 2018-03-02 18:45:16 UTC

view on githubraw file Latest commit a15d675b on 1999-05-26 23:19:47 UTC
a15d675b90 Alis*0001 #!/usr/local/bin/perl -w
                0002 
                0003 # MITgcmUV DataSet joining utility.
                0004 # Tested with perl 4.0 and newer.
                0005 # Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25}
                0006 # Zhangfan XING, xing@pacific.jpl.nasa.gov
                0007 # Adapted to work with MDS I/O format by adcroft@mit.edu, 5/7/1999
                0008 #
                0009 # LOGS:
                0010 # 980707, version 0.0.1, basically works
                0011 # 980721, version 0.2.0, proper handling of data file's header and terminator
                0012 #         for diff bytesex.
                0013 # 990507, HACK'd by AJA. Needs to be properly merged with the original joinds
                0014 
                0015 #------
                0016 # usage
                0017 #------
                0018 sub usage {
                0019     print STDERR
                0020         "\nUsage:$0 [-Ddir0 -Ddir1 ...] " .
                0021         "prefix suffix [(little-endian|big-endian)]\n"; 
                0022     print STDERR "\nMITgcmUV DataSet joining utility, version 0.3.0\n";
                0023     print STDERR
                0024     "Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n";
                0025     print STDERR "Report problem to xing\@pacific.jpl.nasa.gov\n\n";
                0026     exit 1;
                0027 }
                0028 
                0029 #------------------------------
                0030 # product of a list of integers
                0031 #------------------------------
                0032 sub listprod {
                0033     local ($product) = 1;
                0034     local ($x);
                0035     foreach $x (@_) {
                0036         $product *= $x;
                0037     }
                0038     $product;
                0039 }
                0040 
                0041 #----------------
                0042 # @list1 + @list2
                0043 #----------------
                0044 sub lists_add {
                0045     local (*l1,*l2) = @_;
                0046     ($#l1 == $#l2) || return undef;
                0047 
                0048     local (@l);
                0049     for (local($i)=0;$i<=$#l1;$i++) {
                0050         $l[$i]=$l1[$i]+$l2[$i];
                0051     }
                0052     @l;
                0053 }
                0054 
                0055 #-------------
                0056 # pos to index
                0057 # 0-based.
                0058 #-------------
                0059 sub pos2index {
                0060 
                0061     local ($pos,@dim) = @_;
                0062     local ($rightmost) = pop(@dim);
                0063 
                0064     local (@index,$d);
                0065     foreach $d (@dim) {
                0066         push(@index,$pos%$d);
                0067         $pos = int($pos/$d);
                0068     }
                0069 
                0070     # self-guarding
                0071     unless ($rightmost > $pos) {
                0072         return undef;
                0073     }
                0074 
                0075     push(@index,$pos);
                0076     @index;
                0077 }
                0078 
                0079 #-------------
                0080 # index to pos
                0081 # 0-based.
                0082 #-------------
                0083 sub index2pos {
                0084     local (*index,*dim) = @_;
                0085 
                0086     return undef unless ($#index == $#dim);
                0087 
                0088     local ($pos) = $index[$#index];
                0089     for (local($i)=$#dim;$i>0;$i--) {
                0090         $pos = $pos * $dim[$i-1] + $index[$i-1];
                0091     }
                0092     $pos;
                0093 }
                0094 
                0095 #-------------------------
                0096 # check machine's bytesex.
                0097 # returns "little-endian" or "big-endian"
                0098 # or dies if unable to figure out
                0099 #-------------------------
                0100 sub mach_bytesex {
                0101 
                0102     local ($foo)  = pack("s2",1,2);
                0103     if ($foo eq "\1\0\2\0") {
                0104         return "little-endian";
                0105     } elsif ($foo eq "\0\1\0\2") {
                0106         return "big-endian";
                0107     } else {
                0108         die "Your machine has a strange bytesex.\n".
                0109         "Email your platform info to xing\@pacific.jpl.nasa.gov\n";
                0110     }
                0111 }
                0112 
                0113 #--------------------------------------------------
                0114 # check bytesex of a fortran unformatted data file
                0115 # current machine's bytesex is used as a reference.
                0116 # returns: one of "little-endian", "big-endian", "undecidable" and "unknown"
                0117 #--------------------------------------------------
                0118 sub file_bytesex {
                0119 
                0120     # only if this platform's bytesex is either big- or little-endian
                0121     # otherwise dies. Hope this won't happen.
                0122     local($mach_bytesex) = &mach_bytesex();
                0123 
                0124     local ($file) = shift;
                0125     local (*FILE);
                0126 
                0127     open(FILE,$file) || die "$file: $!\n";
                0128 
                0129     local(@fstat) = stat(FILE);
                0130     local ($size) = $fstat[7] - 8;  # total data size in bytes
                0131 
                0132     local($hdr,$tmr) = ("","");
                0133     read(FILE,$hdr,4);
                0134     seek(FILE,-4,2);
                0135     read(FILE,$tmr,4);
                0136     close(FILE);
                0137 
                0138     # this part checks for self-consistency of Fortran unformatted file
                0139     ($hdr eq $tmr) || die "$file: not a Fortran unformatted data file.\n";
                0140 
                0141     local ($ori) = unpack("I",$hdr);
                0142     local ($rev) = unpack("I",join("",reverse(split(//,$hdr))));
                0143 
                0144     ($ori != $size && $rev != $size) &&
                0145         return "unknown";
                0146 
                0147     ($ori == $size && $rev == $size) &&
                0148         return "undecidable";
                0149 
                0150     local ($opposite) = ($mach_bytesex eq "little-endian") ?
                0151                 "big-endian" : "little-endian";
                0152 
                0153     return ($ori == $size) ? $mach_bytesex : $opposite;
                0154 
                0155 }
                0156 
                0157 #--------------------------------
                0158 # check meta info for one dataset
                0159 #--------------------------------
                0160 
                0161 sub check_meta {
                0162 
                0163     local ($ds,$dir) = @_;
                0164     local ($fmeta) = "$dir/$ds.meta";
                0165 
                0166     #~~~~~~~~~~~~~~~~
                0167     # check meta info
                0168     #~~~~~~~~~~~~~~~~
                0169 
                0170     undef $/;       # read to the end of file
                0171     open(MFILE,"<$fmeta") || die "$fmeta: $!\n";
                0172     $_=<MFILE>;
                0173     close(MFILE);
                0174     $/ = "\n";      # never mess up
                0175     
                0176     s/\([^)]*\)//g;         #rm (.*)
                0177     s/\/\/[^\n]*\n//g;      #rm comment lines
                0178     s/\/\*.*\*\///g;        #rm inline comments
                0179     s/\s+//g;               #rm white spaces
                0180     /nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];nrecords=\[(.+)\];timeStepNumber=\[(.+)\];/
                0181         || die "$fmeta: meta file format error\n";
                0182     local ($nDims_,$dimList_,$format_,$nrecords_,$timeStepNumber_) = ($1,$2,$3,$4,$5);
                0183 
                0184     # check Identifier
                0185     (defined $timeStepNumber) || ($timeStepNumber = $timeStepNumber_);
                0186     ($timeStepNumber eq $timeStepNumber_) ||
                0187         die "$fmeta: timeStepNumber $timeStepNumber_ inconsistent with other dataset\n";
                0188 
                0189     # check Number of dimensions
                0190     (defined $nDims) || ($nDims = $nDims_);
                0191     ($nDims eq $nDims_) ||
                0192         die "$fmeta: nDims $nDims_ inconsistent with other dataset\n";
                0193 
                0194     # check Field format
                0195     (defined $format) || ($format = $format_);
                0196     ($format eq $format_) ||
                0197         die "$fmeta: format $format_ inconsistent with other dataset\n";
                0198 
                0199     # check dimList
                0200     # calc dimesions and leading index of this subset
                0201     local (@dimList_) = split(/,/,$dimList_);
                0202 
                0203     ($nDims_*3 == $#dimList_+1) ||
                0204         die "$fmeta: nDims and dimList conflicting\n";
                0205     
                0206     local (@Dim,@dim,@Index0) = ();
                0207     for (local($i)=0;$i<$nDims_;$i++) {
                0208         push(@Dim,$dimList_[$i*3]);
                0209         push(@dim,$dimList_[$i*3+2]-$dimList_[$i*3+1]+1);
                0210         push(@Index0,$dimList_[$i*3+1]-1);
                0211     }
                0212     local ($Dim_) = join(",",@Dim);
                0213     local ($dim_) = join(",",@dim);
                0214 
                0215     (defined $Dim) || ($Dim = $Dim_);
                0216     ($Dim eq $Dim_) ||
                0217         die "$fmeta: dimList Global inconsistent with other dataset\n";
                0218 
                0219     (defined $dim) || ($dim = $dim_);
                0220     ($dim eq $dim_) ||
                0221         die "$fmeta: dimList Local inconsistent with other dataset\n";
                0222 
                0223     $ds_Index0{$ds} = join(",", @Index0);
                0224 
                0225 #   print STDOUT "Okay $fmeta\n";
                0226 }
                0227 
                0228 #-------------------------------
                0229 # check completeness of datasets
                0230 # need to be more sophisticated
                0231 #-------------------------------
                0232 sub check_entirety {
                0233 
                0234     local (*Dim,*dim,*ds_Index0) = @_;
                0235 
                0236     local ($N) = &listprod(@Dim);
                0237     local ($n) = &listprod(@dim);
                0238     ($N) || return 0;       # against null dimension
                0239     ($n) || return 0;       # against null dimension
                0240     ($N%$n) && return 0;        # $N/$n must be a whole number
                0241 
                0242     local (@ds) = keys %ds_Index0;
                0243     ($#ds+1 == $N/$n) || return 0;  # Num of datasets must match subdomain
                0244 
                0245     1;
                0246 }
                0247 
                0248 #------------------
                0249 # merge one dataset
                0250 # assume @Dim, @dim and $bytes existing
                0251 # assume $Byte_Reorder existing
                0252 #------------------
                0253 sub merge_data {
                0254 
                0255     local ($ds,$dir,*Index0) = @_;
                0256     local ($fdata) = "$dir/$ds.data";
                0257 
                0258     # data size of one subset in bytes as told by meta info
                0259     local ($size) = &listprod(@dim) * $bytes;
                0260 
                0261     open(DFILE, "<$fdata") || die "$fdata: $!\n";
                0262 
                0263     local ($raw) = "";
                0264 #aja    sysread(DFILE,$raw,4);
                0265     # Swap header if bytesex is diff from machine's
                0266     local ($hdr);
                0267     if ($Byte_Reorder) {
                0268         $hdr = unpack("I",join("",reverse(split(//,$raw))));
                0269     } else {
                0270         $hdr = unpack("I",$raw);
                0271     }
                0272 
                0273 #aja    ($size == $hdr) ||
                0274 #aja        die "$fdata: $hdr bytes inconsistent with meta info\n";
                0275 
                0276     print STDOUT "$ds.data: $size bytes, okay, ";
                0277 
                0278 #   seek(DFILE,4,0);    # rewind back to the beginning of data
                0279 
                0280     local ($data) = "";     # old perl (< 4.0) needs this to 
                0281     sysread(DFILE,$data,$size); # avoid warning by sysread() 
                0282     local ($len_chunk) = $dim[0] * $bytes;
                0283     local ($num_chunk) = $size/$len_chunk;
                0284 
                0285     local ($pos,@index,$Pos,@Index);
                0286     for (local($i)=0;$i<$num_chunk;$i++) {
                0287         $pos = $i * $dim[0];
                0288         @index = &pos2index($pos,@dim);
                0289         @Index = &lists_add(*index,*Index0);
                0290         $Pos = &index2pos(*Index,*Dim);
                0291 #aja        seek(FILE,$Pos*$bytes+4,0);
                0292         seek(FILE,$Pos*$bytes,0);
                0293         syswrite(FILE,$data,$len_chunk,$pos*$bytes);
                0294     }
                0295 
                0296     close(DFILE);
                0297 
                0298     print STDOUT "merged from $dir\n";
                0299 }
                0300 
                0301 #============
                0302 # main script
                0303 #============
                0304 
                0305 #------------
                0306 # parse @ARGV
                0307 #............
                0308 
                0309 ($#ARGV >= 1) || &usage();
                0310 
                0311 undef @dirs;
                0312 while (1) {
                0313     $x = shift(@ARGV);
                0314     unless ($x =~ /^-D(.+)$/) {
                0315         unshift(@ARGV,$x);
                0316         last;
                0317     }
                0318     push(@dirs,$1);
                0319 }
                0320 (@dirs) || push(@dirs,".");
                0321 # @dirs is not empty after this line.
                0322 #print STDOUT join(" ",@dirs), "\n";
                0323 
                0324 ($#ARGV >= 1) || &usage();
                0325 
                0326 # data set prefix and suffix
                0327 $pref = shift(@ARGV);
                0328 $suff = shift(@ARGV);
                0329 
                0330 ($#ARGV >= 1) && &usage();
                0331 undef $forced_bytesex;
                0332 if (@ARGV) {
                0333     $forced_bytesex = shift(@ARGV);
                0334     $forced_bytesex =~ /^(little|big)-endian$/ || &usage();
                0335 }
                0336 #print STDOUT $forced_bytesex, "\n";
                0337 
                0338 #--------------------------
                0339 # obtain a list of datasets
                0340 #..........................
                0341 
                0342 # %ds_dir is a hash to store the directory that a dataset is in.
                0343 # After this step, it is assured that, for a dataset $ds,
                0344 # both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}.
                0345 
                0346 %ds_dir = ();
                0347 foreach $dir (@dirs) {
                0348     opendir(DIR, $dir) || die "$dir: $!\n";
                0349     @fmeta = grep(/^$pref\.$suff\.\d+\.\d+\.meta$/, readdir(DIR));
                0350     closedir(DIR);
                0351     foreach $fmeta (@fmeta) {
                0352         $ds = $fmeta; $ds =~ s/\.meta$//g;
                0353         (defined $ds_dir{$ds}) &&
                0354             die "$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n";
                0355         (-f "$dir/$ds.data") || die "In $dir, $ds.data missing\n";
                0356         $ds_dir{$ds} = $dir;
                0357     }
                0358 }
                0359 
                0360 @ds = sort(keys %ds_dir);    # list of datasets
                0361 (@ds) || die "No dataset found.\n";
                0362 print STDOUT "There are ", $#ds+1, " datasets.\n";
                0363 
                0364 #---------------------------------
                0365 # check meta info for all datasets
                0366 #.................................
                0367 
                0368 undef $timeStepNumber;
                0369 undef $nDims;
                0370 undef $format;
                0371 
                0372 undef $Dim;
                0373 undef $dim;
                0374 undef %ds_Index0;
                0375 
                0376 #..............................................
                0377 # check each meta file and set some global vars
                0378 
                0379 foreach $ds (@ds) {
                0380     &check_meta($ds,$ds_dir{$ds});
                0381 }
                0382 print STDOUT "All existing meta files are self- and mutually consistent.\n";
                0383 
                0384 #print join(" ",$timeStepNumber,$nDims,$format,$Dim,$dim), "\n";
                0385 #foreach $ds (@ds) {
                0386 #   $dir = $ds_dir{$ds};
                0387 #   $Index0 = $ds_Index0{$ds};
                0388 #   print "$ds\n";
                0389 #   print "$Index0\n";
                0390 #}
                0391 
                0392 @Dim = split(/,/,$Dim);
                0393 @dim = split(/,/,$dim);
                0394 
                0395 #................................
                0396 # check meta info in its entirety
                0397 
                0398 &check_entirety(*Dim,*dim,*ds_Index0) ||
                0399     die "Datasets are not complete!\n";
                0400 
                0401 print STDOUT "Datasets are complete.\n";
                0402 
                0403 #...........
                0404 # set $bytes
                0405 
                0406 if ($format eq "float32") {
                0407     $bytes = 4;
                0408 } elsif ($format eq "float64") {
                0409     $bytes = 8
                0410 } else {
                0411     die "format '$format' unknown\n";
                0412 }
                0413 
                0414 #---------------------------
                0415 # check and merge data files
                0416 #...........................
                0417 
                0418 #........................
                0419 # check machine's bytesex
                0420 # it dies if neither little- nor big-endian.
                0421 
                0422 $Mach_Bytesex = &mach_bytesex();
                0423 print STDOUT "Current machine's endianness: $Mach_Bytesex\n";
                0424 
                0425 #...................
                0426 # check file bytesex and resolve related issues
                0427 #aja undef $File_Bytesex;
                0428 #aja foreach $ds (@ds) {
                0429 #aja    $fdata = "$ds.data";
                0430 #aja    $file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata");
                0431 #aja    ($file_bytesex eq "unknown") &&
                0432 #aja        die "$fdata: endianness is neither little- nor big-endian.\n";
                0433 #aja    print STDOUT "$fdata: $file_bytesex\n";
                0434 #aja    unless ($File_Bytesex) {
                0435 #aja        $File_Bytesex = $file_bytesex;
                0436 #aja    } else {
                0437 #aja        ($File_Bytesex eq $file_bytesex) ||
                0438 #aja        die "Data files are mutually inconsistent in endianness\n";
                0439 #aja    }
                0440 #aja }
                0441 $File_Bytesex = 'big-endian';
                0442 
                0443 #------------------
                0444 # set $Byte_Reorder, which controls swapping of bytes in
                0445 # header and terminator of Fortran unformatted data files.
                0446 #aja $Byte_Reorder = 0;
                0447 $Byte_Reorder = 0;
                0448 
                0449 # if machine and data file have the same bytesex, no need for swapping
                0450 #aja ($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
                0451 
                0452 # if we can't determine bytesex of data file, need forced one from @ARGV.
                0453 if ($File_Bytesex eq "undecidable") {
                0454     # if no forced bytesex available, dies.
                0455     ($forced_bytesex) ||
                0456         die "Endianness of data files is undecidable, " .
                0457         "you have to give one at command line.\n";
                0458     ($forced_bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
                0459     print STDOUT "Endianness of data files is undecidable.\n";
                0460     print STDOUT "Data file header/tail will be treated as ";
                0461     print STDOUT "$forced_bytesex as you have instructed.\n";
                0462 # otherwise
                0463 } else {
                0464 # give a warining, if swapping is needed.
                0465 ($Byte_Reorder) &&
                0466     print STDOUT
                0467     "Please note: data files have different bytesex than machine!\n";
                0468 }
                0469 
                0470 #................
                0471 # merge data sets
                0472 
                0473 $Size = &listprod(@Dim) * $bytes;
                0474 
                0475 $fout = "$pref.$suff.data";
                0476 
                0477 open(FILE, ">$fout") || die "$fout: $!\n";
                0478 
                0479 # prepare header and teminator. Do byte reordering if necessary
                0480 $HdrTmr = pack("I",$Size);
                0481 ($Byte_Reorder) && ($HdrTmr = join("",reverse(split(//,$HdrTmr))));
                0482 
                0483 # write 4 byte header
                0484 #aja syswrite(FILE,$HdrTmr,4);
                0485 
                0486 # merge each dataset
                0487 foreach $ds (@ds) {
                0488     $dir = $ds_dir{$ds};
                0489     @Index0 = split(/,/,$ds_Index0{$ds});
                0490     &merge_data($ds,$dir,*Index0);
                0491 }
                0492 
                0493 # write 4 byte terminator
                0494 #aja seek(FILE,$Size+4,0);
                0495 #aja syswrite(FILE,$HdrTmr,4);
                0496 
                0497 close(FILE);
                0498 
                0499 print STDOUT "Global data (" .
                0500     join("x",@Dim) .
                0501     ") is in ./$fout (endianness is $File_Bytesex).\n";
                0502 
                0503 exit 0;