#!/usr/bin/perl #Open the input and output files open (INPUT, $ARGV[0]); if (-e "$ARGV[0].np") { die "Output file exists!"; } else { open (OUTPUT, ">$ARGV[0].np"); } #Step through input file, one line at a time, and while () { #Check to see if the line is the beginning of a new NP, and: if (/^\(/) { #If so #The previous NP is finished, print it to the output file. print OUTPUT "$this_np\n"; #See how long the label is. @line = split; $np_label = $line[0]; # $np_label is the label plus the leading (. $offset = length($np_label); #Get the first node after the label, and remember it as #the first part of a string describing the immediate constituents. $first_daughter = $line[1]; $first_daughter = substr($first_daughter,1); $this_np = $first_daughter; #Otherwise #See if there is a node label in the right place for #immediate constituents. } elsif (substr($_, $offset+1, 1) eq "(") { #If so #Get the label of the node @line = split; $this_daughter = $line[0]; $this_daughter = substr($this_daughter,1); #Add it to your string of immediate constituents $this_np = $this_np." ".$this_daughter; } } #Print the last NP found. print OUTPUT "$this_np\n"; #Close input and output files. close (INPUT); close (OUTPUT);