#!/usr/bin/perl
#Open the input and output files
open (INPUT, $ARGV[0]);
if (-e "$ARGV[0].np") {
die "Output file exists!";
} else {
open (OUTPUT, ">$ARGV[0].np");
}
#Step through input file, one line at a time, and
while () {
#Check to see if the line is the beginning of a new NP, and:
if (/^\(/) {
#If so
#The previous NP is finished, print it to the output file.
print OUTPUT "$this_np\n";
#See how long the label is.
@line = split;
$np_label = $line[0]; # $np_label is the label plus the leading (.
$offset = length($np_label);
#Get the first node after the label, and remember it as
#the first part of a string describing the immediate constituents.
$first_daughter = $line[1];
$first_daughter = substr($first_daughter,1);
$this_np = $first_daughter;
#Otherwise
#See if there is a node label in the right place for
#immediate constituents.
} elsif (substr($_, $offset+1, 1) eq "(") {
#If so
#Get the label of the node
@line = split;
$this_daughter = $line[0];
$this_daughter = substr($this_daughter,1);
#Add it to your string of immediate constituents
$this_np = $this_np." ".$this_daughter;
}
}
#Print the last NP found.
print OUTPUT "$this_np\n";
#Close input and output files.
close (INPUT);
close (OUTPUT);