Back to home page

MITgcm

 
 

    


File indexing completed on 2018-03-02 18:45:15 UTC

view on githubraw file Latest commit a15d675b on 1999-05-26 23:19:47 UTC
a15d675b90 Alis*0001 #!/usr/local/bin/perl -w
                0002 
                0003 # MITgcmUV dataset joining utility.
                0004 # Tested with perl 4.0 and newer.
                0005 # Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25}
                0006 # Zhangfan XING, xing@pacific.jpl.nasa.gov
                0007 #
                0008 # LOGS:
                0009 # 980707, version 0.0.1, basically works
                0010 # 980721, version 0.2.0, proper handling of data file's header and terminator
                0011 #         for diff bytesex.
                0012 
                0013 #------
                0014 # usage
                0015 #------
                0016 sub usage {
                0017     print STDERR
                0018         "\nUsage:$0 [-Ddir0 -Ddir1 ...] " .
                0019         "prefix suffix [(little-endian|big-endian)]\n"; 
                0020     print STDERR "\nMITgcmUV dataset joining utility, version 0.2.0\n";
                0021     print STDERR
                0022     "Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n";
                0023     print STDERR "Report problem to xing\@pacific.jpl.nasa.gov\n\n";
                0024     exit 1;
                0025 }
                0026 
                0027 #------------------------------
                0028 # product of a list of integers
                0029 #------------------------------
                0030 sub listprod {
                0031     local ($product) = 1;
                0032     local ($x);
                0033     foreach $x (@_) {
                0034         $product *= $x;
                0035     }
                0036     $product;
                0037 }
                0038 
                0039 #----------------
                0040 # @list1 + @list2
                0041 #----------------
                0042 sub lists_add {
                0043     local (*l1,*l2) = @_;
                0044     ($#l1 == $#l2) || return undef;
                0045 
                0046     local (@l);
                0047     for (local($i)=0;$i<=$#l1;$i++) {
                0048         $l[$i]=$l1[$i]+$l2[$i];
                0049     }
                0050     @l;
                0051 }
                0052 
                0053 #-------------
                0054 # pos to index
                0055 # 0-based.
                0056 #-------------
                0057 sub pos2index {
                0058 
                0059     local ($pos,@dim) = @_;
                0060     local ($rightmost) = pop(@dim);
                0061 
                0062     local (@index,$d);
                0063     foreach $d (@dim) {
                0064         push(@index,$pos%$d);
                0065         $pos = int($pos/$d);
                0066     }
                0067 
                0068     # self-guarding
                0069     unless ($rightmost > $pos) {
                0070         return undef;
                0071     }
                0072 
                0073     push(@index,$pos);
                0074     @index;
                0075 }
                0076 
                0077 #-------------
                0078 # index to pos
                0079 # 0-based.
                0080 #-------------
                0081 sub index2pos {
                0082     local (*index,*dim) = @_;
                0083 
                0084     return undef unless ($#index == $#dim);
                0085 
                0086     local ($pos) = $index[$#index];
                0087     for (local($i)=$#dim;$i>0;$i--) {
                0088         $pos = $pos * $dim[$i-1] + $index[$i-1];
                0089     }
                0090     $pos;
                0091 }
                0092 
                0093 #-------------------------
                0094 # check machine's bytesex.
                0095 # returns "little-endian" or "big-endian"
                0096 # or dies if unable to figure out
                0097 #-------------------------
                0098 sub mach_bytesex {
                0099 
                0100     local ($foo)  = pack("s2",1,2);
                0101     if ($foo eq "\1\0\2\0") {
                0102         return "little-endian";
                0103     } elsif ($foo eq "\0\1\0\2") {
                0104         return "big-endian";
                0105     } else {
                0106         die "Your machine has a strange bytesex.\n".
                0107         "Email your platform info to xing\@pacific.jpl.nasa.gov\n";
                0108     }
                0109 }
                0110 
                0111 #--------------------------------------------------
                0112 # check bytesex of a fortran unformatted data file
                0113 # current machine's bytesex is used as a reference.
                0114 # returns: one of "little-endian", "big-endian", "undecidable" and "unknown"
                0115 #--------------------------------------------------
                0116 sub file_bytesex {
                0117 
                0118     # only if this platform's bytesex is either big- or little-endian
                0119     # otherwise dies. Hope this won't happen.
                0120     local($mach_bytesex) = &mach_bytesex();
                0121 
                0122     local ($file) = shift;
                0123     local (*FILE);
                0124 
                0125     open(FILE,$file) || die "$file: $!\n";
                0126 
                0127     local(@fstat) = stat(FILE);
                0128     local ($size) = $fstat[7] - 8;  # total data size in bytes
                0129 
                0130     local($hdr,$tmr) = ("","");
                0131     read(FILE,$hdr,4);
                0132     seek(FILE,-4,2);
                0133     read(FILE,$tmr,4);
                0134     close(FILE);
                0135 
                0136     # this part checks for self-consistency of Fortran unformatted file
                0137     ($hdr eq $tmr) || die "$file: not a Fortran unformatted data file.\n";
                0138 
                0139     local ($ori) = unpack("I",$hdr);
                0140     local ($rev) = unpack("I",join("",reverse(split(//,$hdr))));
                0141 
                0142     ($ori != $size && $rev != $size) &&
                0143         return "unknown";
                0144 
                0145     ($ori == $size && $rev == $size) &&
                0146         return "undecidable";
                0147 
                0148     local ($opposite) = ($mach_bytesex eq "little-endian") ?
                0149                 "big-endian" : "little-endian";
                0150 
                0151     return ($ori == $size) ? $mach_bytesex : $opposite;
                0152 
                0153 }
                0154 
                0155 #--------------------------------
                0156 # check meta info for one dataset
                0157 #--------------------------------
                0158 
                0159 sub check_meta {
                0160 
                0161     local ($ds,$dir) = @_;
                0162     local ($fmeta) = "$dir/$ds.meta";
                0163 
                0164     #~~~~~~~~~~~~~~~~
                0165     # check meta info
                0166     #~~~~~~~~~~~~~~~~
                0167 
                0168     undef $/;       # read to the end of file
                0169     open(MFILE,"<$fmeta") || die "$fmeta: $!\n";
                0170     $_=<MFILE>;
                0171     close(MFILE);
                0172     $/ = "\n";      # never mess up
                0173     
                0174     s/\([^)]*\)//g;         #rm (.*)
                0175     s/\/\/[^\n]*\n//g;      #rm comment lines
                0176     s/\/\*.*\*\///g;        #rm inline comments
                0177     s/\s+//g;               #rm white spaces
                0178     /id=\[(.+)\];nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];/
                0179         || die "$fmeta: meta file format error\n";
                0180     local ($id_,$nDims_,$dimList_,$format_) = ($1,$2,$3,$4);
                0181 
                0182     # check Identifier
                0183     (defined $id) || ($id = $id_);
                0184     ($id eq $id_) ||
                0185         die "$fmeta: id $id_ inconsistent with other dataset\n";
                0186 
                0187     # check Number of dimensions
                0188     (defined $nDims) || ($nDims = $nDims_);
                0189     ($nDims eq $nDims_) ||
                0190         die "$fmeta: nDims $nDims_ inconsistent with other dataset\n";
                0191 
                0192     # check Field format
                0193     (defined $format) || ($format = $format_);
                0194     ($format eq $format_) ||
                0195         die "$fmeta: format $format_ inconsistent with other dataset\n";
                0196 
                0197     # check dimList
                0198     # calc dimesions and leading index of this subset
                0199     local (@dimList_) = split(/,/,$dimList_);
                0200 
                0201     ($nDims_*3 == $#dimList_+1) ||
                0202         die "$fmeta: nDims and dimList conflicting\n";
                0203     
                0204     local (@Dim,@dim,@Index0) = ();
                0205     for (local($i)=0;$i<$nDims_;$i++) {
                0206         push(@Dim,$dimList_[$i*3]);
                0207         push(@dim,$dimList_[$i*3+2]-$dimList_[$i*3+1]+1);
                0208         push(@Index0,$dimList_[$i*3+1]-1);
                0209     }
                0210     local ($Dim_) = join(",",@Dim);
                0211     local ($dim_) = join(",",@dim);
                0212 
                0213     (defined $Dim) || ($Dim = $Dim_);
                0214     ($Dim eq $Dim_) ||
                0215         die "$fmeta: dimList Global inconsistent with other dataset\n";
                0216 
                0217     (defined $dim) || ($dim = $dim_);
                0218     ($dim eq $dim_) ||
                0219         die "$fmeta: dimList Local inconsistent with other dataset\n";
                0220 
                0221     $ds_Index0{$ds} = join(",", @Index0);
                0222 
                0223 #   print STDOUT "Okay $fmeta\n";
                0224 }
                0225 
                0226 #-------------------------------
                0227 # check completeness of datasets
                0228 # need to be more sophisticated
                0229 #-------------------------------
                0230 sub check_entirety {
                0231 
                0232     local (*Dim,*dim,*ds_Index0) = @_;
                0233 
                0234     local ($N) = &listprod(@Dim);
                0235     local ($n) = &listprod(@dim);
                0236     ($N) || return 0;       # against null dimension
                0237     ($n) || return 0;       # against null dimension
                0238     ($N%$n) && return 0;        # $N/$n must be a whole number
                0239 
                0240     local (@ds) = keys %ds_Index0;
                0241     ($#ds+1 == $N/$n) || return 0;  # Num of datasets must match subdomain
                0242 
                0243     1;
                0244 }
                0245 
                0246 #------------------
                0247 # merge one dataset
                0248 # assume @Dim, @dim and $bytes existing
                0249 # assume $Byte_Reorder existing
                0250 #------------------
                0251 sub merge_data {
                0252 
                0253     local ($ds,$dir,*Index0) = @_;
                0254     local ($fdata) = "$dir/$ds.data";
                0255 
                0256     # data size of one subset in bytes as told by meta info
                0257     local ($size) = &listprod(@dim) * $bytes;
                0258 
                0259     open(DFILE, "<$fdata") || die "$fdata: $!\n";
                0260 
                0261     local ($raw) = "";
                0262     sysread(DFILE,$raw,4);
                0263     # Swap header if bytesex is diff from machine's
                0264     local ($hdr);
                0265     if ($Byte_Reorder) {
                0266         $hdr = unpack("I",join("",reverse(split(//,$raw))));
                0267     } else {
                0268         $hdr = unpack("I",$raw);
                0269     }
                0270 
                0271     ($size == $hdr) ||
                0272         die "$fdata: $hdr bytes inconsistent with meta info\n";
                0273 
                0274     print STDOUT "$ds.data: $size bytes, okay, ";
                0275 
                0276 #   seek(DFILE,4,0);    # rewind back to the beginning of data
                0277 
                0278     local ($data) = "";     # old perl (< 4.0) needs this to 
                0279     sysread(DFILE,$data,$size); # avoid warning by sysread() 
                0280     local ($len_chunk) = $dim[0] * $bytes;
                0281     local ($num_chunk) = $size/$len_chunk;
                0282 
                0283     local ($pos,@index,$Pos,@Index);
                0284     for (local($i)=0;$i<$num_chunk;$i++) {
                0285         $pos = $i * $dim[0];
                0286         @index = &pos2index($pos,@dim);
                0287         @Index = &lists_add(*index,*Index0);
                0288         $Pos = &index2pos(*Index,*Dim);
                0289         seek(FILE,$Pos*$bytes+4,0);
                0290         syswrite(FILE,$data,$len_chunk,$pos*$bytes);
                0291     }
                0292 
                0293     close(DFILE);
                0294 
                0295     print STDOUT "merged from $dir\n";
                0296 }
                0297 
                0298 #============
                0299 # main script
                0300 #============
                0301 
                0302 #------------
                0303 # parse @ARGV
                0304 #............
                0305 
                0306 ($#ARGV >= 1) || &usage();
                0307 
                0308 undef @dirs;
                0309 while (1) {
                0310     $x = shift(@ARGV);
                0311     unless ($x =~ /^-D(.+)$/) {
                0312         unshift(@ARGV,$x);
                0313         last;
                0314     }
                0315     push(@dirs,$1);
                0316 }
                0317 (@dirs) || push(@dirs,".");
                0318 # @dirs is not empty after this line.
                0319 #print STDOUT join(" ",@dirs), "\n";
                0320 
                0321 ($#ARGV >= 1) || &usage();
                0322 
                0323 # data set prefix and suffix
                0324 $pref = shift(@ARGV);
                0325 $suff = shift(@ARGV);
                0326 
                0327 ($#ARGV >= 1) && &usage();
                0328 undef $forced_bytesex;
                0329 if (@ARGV) {
                0330     $forced_bytesex = shift(@ARGV);
                0331     $forced_bytesex =~ /^(little|big)-endian$/ || &usage();
                0332 }
                0333 #print STDOUT $forced_bytesex, "\n";
                0334 
                0335 #--------------------------
                0336 # obtain a list of datasets
                0337 #..........................
                0338 
                0339 # %ds_dir is a hash to store the directory that a dataset is in.
                0340 # After this step, it is assured that, for a dataset $ds,
                0341 # both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}.
                0342 
                0343 %ds_dir = ();
                0344 foreach $dir (@dirs) {
                0345     opendir(DIR, $dir) || die "$dir: $!\n";
                0346     @fmeta = grep(/^$pref\.$suff\.p\d+\.t\d+\.meta$/, readdir(DIR));
                0347     closedir(DIR);
                0348     foreach $fmeta (@fmeta) {
                0349         $ds = $fmeta; $ds =~ s/\.meta$//g;
                0350         (defined $ds_dir{$ds}) &&
                0351             die "$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n";
                0352         (-f "$dir/$ds.data") || die "In $dir, $ds.data missing\n";
                0353         $ds_dir{$ds} = $dir;
                0354     }
                0355 }
                0356 
                0357 @ds = sort(keys %ds_dir);    # list of datasets
                0358 (@ds) || die "No dataset found.\n";
                0359 print STDOUT "There are ", $#ds+1, " datasets.\n";
                0360 
                0361 #---------------------------------
                0362 # check meta info for all datasets
                0363 #.................................
                0364 
                0365 undef $id;
                0366 undef $nDims;
                0367 undef $format;
                0368 
                0369 undef $Dim;
                0370 undef $dim;
                0371 undef %ds_Index0;
                0372 
                0373 #..............................................
                0374 # check each meta file and set some global vars
                0375 
                0376 foreach $ds (@ds) {
                0377     &check_meta($ds,$ds_dir{$ds});
                0378 }
                0379 print STDOUT "All existing meta files are self- and mutually consistent.\n";
                0380 
                0381 #print join(" ",$id,$nDims,$format,$Dim,$dim), "\n";
                0382 #foreach $ds (@ds) {
                0383 #   $dir = $ds_dir{$ds};
                0384 #   $Index0 = $ds_Index0{$ds};
                0385 #   print "$ds\n";
                0386 #   print "$Index0\n";
                0387 #}
                0388 
                0389 @Dim = split(/,/,$Dim);
                0390 @dim = split(/,/,$dim);
                0391 
                0392 #................................
                0393 # check meta info in its entirety
                0394 
                0395 &check_entirety(*Dim,*dim,*ds_Index0) ||
                0396     die "Datasets are not complete!\n";
                0397 
                0398 print STDOUT "Datasets are complete.\n";
                0399 
                0400 #...........
                0401 # set $bytes
                0402 
                0403 if ($format eq "float32") {
                0404     $bytes = 4;
                0405 } elsif ($format eq "float64") {
                0406     $bytes = 8
                0407 } else {
                0408     die "format '$format' unknown\n";
                0409 }
                0410 
                0411 #---------------------------
                0412 # check and merge data files
                0413 #...........................
                0414 
                0415 #........................
                0416 # check machine's bytesex
                0417 # it dies if neither little- nor big-endian.
                0418 
                0419 $Mach_Bytesex = &mach_bytesex();
                0420 print STDOUT "Current machine's endianness: $Mach_Bytesex\n";
                0421 
                0422 #...................
                0423 # check file bytesex and resolve realted issues
                0424 undef $File_Bytesex;
                0425 foreach $ds (@ds) {
                0426     $fdata = "$ds.data";
                0427     $file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata");
                0428     ($file_bytesex eq "unknown") &&
                0429         die "$fdata: endianness is neither little- nor big-endian.\n";
                0430     print STDOUT "$fdata: $file_bytesex\n";
                0431     unless ($File_Bytesex) {
                0432         $File_Bytesex = $file_bytesex;
                0433     } else {
                0434         ($File_Bytesex eq $file_bytesex) ||
                0435         die "Data files are mutually inconsistent in endianness\n";
                0436     }
                0437 }
                0438 
                0439 #------------------
                0440 # set $Byte_Reorder, which controls swapping of bytes in
                0441 # header and terminator of Fortran unformatted data files.
                0442 $Byte_Reorder = 1;
                0443 
                0444 # if machine and data file have the same bytesex, no need for swapping
                0445 ($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
                0446 
                0447 # if we can't determine bytesex of data file, need forced one from @ARGV.
                0448 if ($File_Bytesex eq "undecidable") {
                0449     # if no forced bytesex available, dies.
                0450     ($forced_bytesex) ||
                0451         die "Endianness of data files is undecidable, " .
                0452         "you have to give one at command line.\n";
                0453     ($forced_bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0);
                0454     print STDOUT "Endianness of data files is undecidable.\n";
                0455     print STDOUT "Data file header/tail will be treated as ";
                0456     print STDOUT "$forced_bytesex as you have instructed.\n";
                0457 # otherwise
                0458 } else {
                0459 # give a warining, if swapping is needed.
                0460 ($Byte_Reorder) &&
                0461     print STDOUT
                0462     "Please note: data files have different bytesex than machine!\n";
                0463 }
                0464 
                0465 #................
                0466 # merge data sets
                0467 
                0468 $Size = &listprod(@Dim) * $bytes;
                0469 
                0470 $fout = "$pref.$suff.data";
                0471 
                0472 open(FILE, ">$fout") || die "$fout: $!\n";
                0473 
                0474 # prepare header and teminator. Do byte reordering if necessary
                0475 $HdrTmr = pack("I",$Size);
                0476 ($Byte_Reorder) && ($HdrTmr = join("",reverse(split(//,$HdrTmr))));
                0477 
                0478 # write 4 byte header
                0479 syswrite(FILE,$HdrTmr,4);
                0480 
                0481 # merge each dataset
                0482 foreach $ds (@ds) {
                0483     $dir = $ds_dir{$ds};
                0484     @Index0 = split(/,/,$ds_Index0{$ds});
                0485     &merge_data($ds,$dir,*Index0);
                0486 }
                0487 
                0488 # write 4 byte terminator
                0489 seek(FILE,$Size+4,0);
                0490 syswrite(FILE,$HdrTmr,4);
                0491 
                0492 close(FILE);
                0493 
                0494 print STDOUT "Global data (" .
                0495     join("x",@Dim) .
                0496     ") is in ./$fout (endianness is $File_Bytesex).\n";
                0497 
                0498 exit 0;