#!/pkg/gnu/bin/perl -w #Kevin, Ken, and Justin #The poetry parser # # use strict; use IO::Socket; &help, exit if $#ARGV < 1; my ($intermediate, $final); $intermediate = ($#ARGV == 1) ? '$intermediate$.int' : $ARGV[2]; $final = $ARGV[1]; my($a_line, $cur_index, @lines_info, $getline_ok, %big_array, $fh, %opened, $mode, $name, $pipe, $what_file); #initialize global variables and set constants my ($EOL, $BLANK) = ("\015\012", "\015\012\015\012"); my (@poem, @sentences, @clauses, $wholepoem, $tagged, $clause, $i, %relations); #load the poem into data open(POEM, $ARGV[0]) or die "Error opening poem\n"; open(PARSE, ">$intermediate") or die "Error opening writeto file\n"; chomp (@poem = ); close POEM; $wholepoem = ''; $wholepoem = $wholepoem . " " . $_ foreach (@poem); $wholepoem =~ s/\s\s+/ /g; #create the tagged poem $tagged = &getTagged($wholepoem); #create sets of sentences and clauess @sentences = &getSentences($tagged); push @clauses, [&getClauses($$_{'combined'})] foreach (@sentences); for $i(0..$#clauses){ foreach $clause(@{$clauses[$i]}){ #parseAdverbs looks for adverbs that are directly before an element #they modify. these work relatively independent of complex structure ($$clause{'parsed'}, $$clause{'diagram'}) = &parseAdverbs($$clause{'complete'}); #parseAdjectives looks for adjectives that are directly before #nouns that they modify. Relatively independent of complex structures #after adverbs are removed ($$clause{'parsed'}, $$clause{'diagram'}) = &parseAdjectives($$clause{'parsed'}, $$clause{'diagram'}); #prepositions looks for prepositions that follow preps, verbs, and nouns #this is also independent, once primary advs + adjs are removed ($$clause{'parsed'}, $$clause{'diagram'}) = &parsePrepositions($$clause{'parsed'}, $$clause{'diagram'}); #performing complex verb grouping : passive voice, present perfect, #etc etc etc. ($$clause{'parsed'}, $$clause{'diagram'}) = &parseVerbs($$clause{'parsed'}, $$clause{'diagram'}); #perform verb-dependent noun processing ($$clause{'parsed'}, $$clause{'diagram'}) = &parseNouns($$clause{'parsed'}, $$clause{'diagram'}); #cleasing removes modifications that were put into the diagram #because of secondary modification ($$clause{'diagram'}) = &cleanse($$clause{'parsed'}, $$clause{'diagram'}); ($$clause{'diagram'}) = &makeRelations($$clause{'parsed'}, $$clause{'diagram'}); &outputIntermediateData($clause); &determineCorrelations($clause); } } close PARSE; ####DEBUG### =not debugging $i = 0; my $c; while($i <= $#sentences){ print "\n\n\nSENTENCE [$sentences[$i]{combined}]\n"; foreach $c(@{$clauses[$i]}){ print "\n\tCLAUSE\t[$$c{complete}]\n\t\t[$$c{parsed}]\n"; &printWords("\t", $$c{'diagram'}); } ++$i; } =cut go back #now that we are done all this crazy information gathering, it's time to rock #we use some of justin's converted code experimentally, too &outputFinalAnalysis; exit(0); ################################################################## sub outputFinalAnalysis{ open (FINAL, ">$final") or die "ERROR OPENING OUTPUT FILE"; &readData('results_file.dat'); #get the super array of justin's my ($first, $second); foreach $first(keys %relations){ foreach $second(keys %{$relations{$first}}){ unless($big_array{$first.','.$second} + $big_array{$first.','.$second} > 1){ print FINAL "The aspect of $second modifying $first ". "may indeed be subject to an interesting interpretation\n" if($relations{$first}{$second} eq 'modifier'); print FINAL "$first\'s $relations{$first}{$second} ". "relationship with $second is very interesting\n" unless($relations{$first}{$second} eq 'modifier'); } } } close FINAL; } sub readData { my($what_file) = @_; while (($a_line = &Getline3($what_file),$getline_ok)) { if ($a_line eq '-') { $cur_index = &Getline3($what_file); } else { @lines_info = split(' ', $a_line, 9999); $big_array{$cur_index . ',' . $lines_info[0]} += $lines_info[1]; } } } sub Getline3 { &Pick('',@_); local($_); if ($getline_ok = (($_ = <$fh>) ne '')) { ; } $_; } sub Pick { my($mode,$name,$pipe) = @_; $fh = $name; open($fh,$mode.$name.$pipe) unless $opened{$name}++; } ################################################################## sub determineCorrelations{ my ($clause) = @_; my ($word, $modifier); foreach $word(values %{$$clause{'diagram'}}){ &makeCorrelations($$word{'word'}, $_, 'modifier') foreach (@{$$word{'modifiers'}}); &makeCorrelations($$word{'word'}, $_, $$_{'type'}) foreach (@{$$word{'relations'}}); } } sub makeCorrelations{ my ($word, $structure, $relation) = @_; $relations{$word}{$$structure{'word'}} = $relation; &makeCorrelations($$structure{'word'}, $_, 'modifier') foreach (@{$$structure{'modifiers'}}); &makeCorrelations($$structure{'word'}, $_, $$_{'type'}) foreach (@{$$structure{'relations'}}); } ################################################################## sub help{ print "\n\nParser. For AI Poetry interpretation\n" ."The current working stage of the parser." ."\n\nARGUMENTS : (poem text file) (output file) [optional intermediate file]" ."\n\tThe output file will be overwritten if it exists" ."\n\nOUTPUT : the output file will be written in a complex coding of" ."words and symbols." ."\n\nNOTE: a 'modifier' may be any number of things, but they no longer exist " ."in the actual sentence. They are relatively atomic only affect the thing it " ."modifies, and hence is not necessary in the clause. A relation is often in" ." the clause still, as a subject-verb or verb-object relationship. An" ." exception to this rule is for conjuncted adverbs and adjectives, which " ." are related but removed." ."\n\nEach line contains exactly one clause. Example:\n" .'+word1-pos1>modifier1-pos2|modifier2-pos3>modifier4-pos5<<*related1-pos6#how=' ."\nEach word-pos, modifier-pos, and related-pos pair represent the word and " ."\npart of speech for that word. Spaces and _ may exist inside, so be careful" ." if your parsing is dependent. Each time a > is used, we go up a 'level', ie." ." everything that we write will modify the word itself. there is no other " ." indication that these words are modifiers. Note (as adverbs) there may be" ." modifiers for modifiers. Every \* signifies the beginning of a relation" ." and the ensuing # is the type of relation. An |, which will only exist" ." up at least one level from the basic words of the clause, signify another" ." modifier or relation for this word. a + signifies a new word on the base level" ." the clause is ended with an = and then, in the current implementation, a" ." newline follows it.\n\n"; print "\n\nPRESS ENTER FOR THE REFERENCE PAGE\n"; ; print "\n" foreach (0..24); print "\n\nHere are the meaning of the symbols:" ."\n\t".' + indicates a new word base' ."\n\t".' - indicates a pos for the word' ."\n\t".' > indicates we are going up a level to a modifier for the word' ."\n\t".' | indicates we are adding another modifier/relation to the current level' ."\n\t".' < indicates we are exiting the current level (the last >)' ."\n\t".' * indicates we are going up a level to a relation' ."\n\t".' # indicates we are presenting the explanation for the relation' ."\n\t".' = indicates the end of all modifications'; print "\n\nHere are the possible parts of speech:" ."\n\tadv - adverb" ."\n\tadj - adjective" ."\n\tnoun - noun" ."\n\tverb - verb" ."\n\tprep phrase - prepositional phrase" ."\n\tcomplex verb - multiple verbs separated by'_' : 'can_be' is an example " ."\n\tparts of speech not listed default to the tagged values\n" ."\n\nHERE ARE THE POSSIBLE RELATIONS" ."\n\tsubject-verb" ."\n\tverb-object" ."\n\tI have to lookup the others... sorry\n"; } ################################################################## sub outputIntermediateData{ #write the clause to output my ($clause) = @_; #here is how we will output #+word-pos>modifier-pos|modifier-pos>modifier-pos|modifier-pos<<*related-pos#how= # '+' indicates a new word base # '-' indicates a pos for the word # '>' indicates we are going up a level to a modifier for the word # '|' indicates we are adding another modifier/relation to the current level # '<' indicates we are exiting the current level (the last >) # '*' indicates we are going up a level to a relation # '#' indicates we are presenting the explanation for the relation # '=' indicates the end of all modifications my ($element, $addition); foreach $element(values %{$$clause{'diagram'}}){ print PARSE '+'; &outputEntry($element); } print PARSE "=\n" if $$clause{'diagram'}; } sub outputEntry{ my ($entry) = @_; my ($element); print PARSE $$entry{'word'}.'-'.$$entry{'type'}; if($$entry{'modifiers'}){ print PARSE '>'; &outputEntry($$entry{'modifiers'}[0]); foreach $element(1..$#{$$entry{'modifiers'}}){ print PARSE '|'; &outputEntry($$entry{'modifiers'}[$element]); } print PARSE '<'; } foreach $element(@{$$entry{'relations'}}){ print PARSE '*'.$$element{'word2'}.'#'.$$element{'type'}; } } ################################################################## sub makeRelations{ my ($clause, $diagram) = @_; my (@relations); #N V DO @relations = $clause =~ /(\w+\/NN\w* \w+\/VB\w* \w+\/NN\w*)/g; foreach (@relations){ $_ =~ /(\w+)\/NN\w* (\w+)\/VB\w* (\w+)\/NN\w*/; &addrelation($1, $2, "subject-verb", $diagram); &addrelation($2, $3, "verb-object", $diagram); } #N V (no-DO) @relations = $clause =~ /(\w+\/NN\w* \w+\/VB\w*\b)(?! \w+\/NN\w*)/g; foreach (@relations){ $_ =~ /(\w+)\/NN\w* (\w+)\/VB\w*/; &addrelation($1, $2, "subject-verb", $diagram); } @relations = $clause =~ /(\w+\/EX \w+\/VB\w* \w+\/NN\w*)/g; foreach (@relations){ $_ =~ /w+\/EX (\w+)\/VB\w* (\w+)\/NN\w*/g; &addrelation($2, $1, "subject-verb", $diagram); } return $diagram; } ################################################################## sub cleanse{ #removes diagramed parts of speech that have been re-diagramed #like an adjective that is diagramed because an adverb modifies it #will be deleted as it also modifies a noun my ($clause, $diagram) = @_; my ($key, @words); #rid ourselves of no-long-useful elements foreach $key(keys %$diagram){ delete $$diagram{$key},next unless $clause =~ /\b$key\//; unless (defined $$diagram{$key}{'word'}){ $$diagram{$key}{'word'} = $key; $clause =~ /$key\/(\w\w)/; $$diagram{$key}{'type'} = ($1 eq 'NN') ? 'noun' : ($1 eq 'VB') ? 'verb' : $1; } } #the following just makes sure everything has a defined word and type @words = $clause =~ /(\w+\/\w+)/g; foreach (@words){ $_ =~ /(\w+)\/(\w\w)/; unless( exists $$diagram{$1}){ $$diagram{$1}{'word'} = $1; $$diagram{$1}{'type'} = ($2 eq 'NN') ? 'noun' : ($2 eq 'VB') ? 'verb' : ($2 eq 'JJ') ? 'adj' : ($2 eq 'RB') ? 'adverb' : $2; } } return $diagram; } ################################################################## ################################################################## sub getTagged{ #run the routine to tag everything my ($poem) = @_; my (@words, $query, $remote, $tagged); #we cosnider the followign punctioation : , . ! ? ; : - ' " $poem =~ s/([\!\.\,\"\?\;\:\-])/ $1/g; @words = split / /, $poem; $query = "/~loui/313f00/tagger/tag.cgi?data=". $words[0]; $query = $query . "+$words[$_]" foreach (1..$#words); $query = $query . "%3F"; $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => "www.cs.wustl.edu", PeerPort => "http(80)", ) or die "cannot connect to daytime port at localhost"; $remote->autoflush(1); print $remote "GET $query HTTP/1.0" . $BLANK; while ( <$remote> ) { $tagged = $_; } $tagged =~ s/ ([\!\.\"\?\;\:\-])\/[\!\.\"\?\;\:\-]/$1/g; return $tagged; } ################################################################## sub getSentences{ #separate everything into sentences, do some processing my ($taggedSentence) = @_; my (@sentences, @combined, @partsofspeech, @naked, @words, $sentence); @combined = split /[\.\!\?]/, $taggedSentence; foreach $sentence(@combined){ #FIRST, we cleanse the sentence of unneeded things $sentence =~ s/ ?\w+\/DT//g; #clense articles $sentence =~ s/ ?\w+\/PP\$ (\w+\/NN\w*)/$1/g; #possessive pronouns $sentence =~ s/\w+\/PRP\$ (\w+\/NN\w*)/$1/g; #possessive pronouns $sentence =~ s/ ?\w+\/PP\$ (\w+\/JJ\w*)/$1/g; #possessive pronouns $sentence =~ s/\w+\/PRP\$ (\w+\/JJ\w*)/$1/g; #possessive pronouns $sentence =~ s/ ?\w+\/POS//g; #possessive nouns $sentence =~ s/\/MD/\/VBM/g; #change modal to verb-modal for processing $sentence =~ s/([Tt]here)\/\w+/$1\/EX/; #we only care about existential there $sentence =~ s/([Tt]hat)\/\w+/$1\/TH/; #that is also a relatively special case #change pronouns to be NNN = noun pronouns (nnp is already taken) $sentence =~ s/\/PRP/\/NNN/g; $sentence =~ s/\$//g; #get rid of the incorrect possession for 'her' #get rid of excess spaces $sentence =~ s/\s\s+/ /g; $sentence =~ s/^\s+//; $sentence =~ s/\s+$//; next unless $sentence && $sentence ne '/CD'; push @sentences, { 'combined' => $sentence }; } return @sentences; } ################################################################## sub parseAdverbs{ my ($clause) = @_; my ($type, $diagram); $diagram = {}; #simple parses: adverb adverbs, adverb adjectives, adverb verbs foreach $type('RB', 'JJ', 'VB'){ #first, parse out possible conjuncted adverbs while($clause =~ /((not\/RB )?(\w+)\/RB\w* (and|but|or)\/CC (not\/RB )?)?(\w+)\/RB\w* (\w+)\/($type\w*)/){ my ($modifier, $word, $wordtype, $conj, $neg, $mod2, $neg2) = ($6, $7, $8, $4, $5, $3, $2); if($conj){ if($neg && $neg2){ &addrelation($modifier, $mod2, ($conj eq 'and') ? 'neither' : ($conj eq 'or') ? 'negated either' : 'contrasted neither', $diagram); } elsif($neg || $neg2){ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'exclusion' : ($conj eq 'or') ? 'neg disjunction' : 'neg contrast', $diagram); } else{ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'correspodance' : ($conj eq 'or') ? 'disjunction' : 'pos contrast', $diagram); } &addword($mod2, 'adv', $word, $diagram); } &addword($modifier, 'adv', $word, $diagram); $clause =~ s/((not\/RB )?\w+\/RB\w* (and|but|or)\/CC (not\/RB )?)?\w+\/RB\w* (\w+\/$type\w*)/$5/; } } return ($clause, $diagram); } ############################################################################ sub parseAdjectives{ my ($clause, $diagram) = @_; #simple adjectives: adj nouns while($clause =~ /((\w+)\/JJ\w* (and|but|or)\/CC )?(\w+)\/JJ\w* (\w+)\/(NN\w*)/){ my ($modifier, $word, $wordtype, $conj, $mod2) = ($4, $5, $6, $3, $2); if($conj){ if(&negation($$diagram{$modifier}) && &negation($$diagram{$mod2})){ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'neither' : ($conj eq 'or') ? 'negated either' : 'contrasted neither', $diagram); } elsif(&negation($$diagram{$modifier}) || &negation($$diagram{$mod2})){ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'exclusion' : ($conj eq 'or') ? 'neg disjunction' : 'neg contrast', $diagram); } else{ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'correspodance' : ($conj eq 'or') ? 'disjunction' : 'pos contrast', $diagram); } &addword($mod2, 'adj', $word, $diagram); } &addword($modifier, 'adj', $word, $diagram); $clause =~ s/(\w+\/JJ\w* (and|but|or)\/CC )?\w+\/JJ\w* (\w+\/NN\w*)/$3/; } return ($clause, $diagram); } ############################################################################ sub parsePrepositions{ my ($clause, $diagram) = @_; my ($base, $base2, $wordtype, $type2, $prep, $object); #prep1 prep2 ==> prep1 modified by prep2 while($clause =~ /(\w+)\/(TO|IN) (\w+)\/(NN\w*) (\w+)\/(TO|IN) (\w+)\/NN\w*/){ ($base, $base2, $wordtype, $type2, $prep, $object) = ($1, $3, $2, $4, $5, $7); &addword("$prep $object", 'prep phrase', "$base $base2", $diagram); $clause =~ s/(\w+\/)(TO|IN) (\w+\/NN\w*) \w+\/(TO|IN) \w+\/NN\w*/$1$2 $3/; } #simple prep: verb prep => verb modified by prep while($clause =~ /(\w+)\/(VB\w*) (\w+)\/(IN|TO) (\w+)\/NN\w*/){ ($base, $wordtype, $prep, $object) = ($1, $2, $3, $5); &addword("$prep $object", 'prep phrase', $base, $diagram); $clause =~ s/$base\/$wordtype \w+\/(IN|TO) \w+\/NN\w*/$base\/$wordtype/; } #noun prep => noun modified by prep while($clause =~ /(\w+)\/(NN\w*) (\w+)\/(IN|TO) (\w+)\/NN\w*/){ ($base, $wordtype, $prep, $object) = ($1, $2, $3, $5); &addword("$prep $object", 'prep phrase', $base, $diagram); $clause =~ s/(\w+\/NN\w*) \w+\/(IN|TO) \w+\/NN\w*/$1/; } return ($clause, $diagram); } ############################################################################ sub parseNouns{ my ($clause, $diagram) = @_; my ($base, $modifier, $verb, $type, $noun); #parse through adjective clauses with the word indicating subject: who/that while($clause =~ /(\w+)\/NN\w* (who|that)\/\w+ (\w+)\/VB\w*( (\w+)\/(NN|JJ)\w*)?/){ ($base, $verb, $modifier, $type) = ($1, $3, $5, $6); if($modifier){ # &addrelation($base, $modifier, "action though $verb", $diagram) &addword($modifier, "adj phrase object", $base, $diagram) if ($type eq 'NN'); &addword($modifier, 'adj', $base, $diagram) if ($type eq 'JJ'); } else { # &addrelation($base, $verb, "acts with", $diagram); &addword($verb, "noun-verb", $base, $diagram); } $clause =~ s/(\w+\/NN\w*) (who|that)\/\w+ \w+\/VB\w*( \w+\/(NN|JJ)\w*)?/$1/; } #where when why whose whom which how #parse through adj clauses with the word being merely a symbol while($clause =~ /(\w+)\/NN\w*( (\w+)\/(IN|TO))? (where|when|whom|which|that)\/\w+ (\w+)\/NN\w* (\w+)\/VB\w*/){ ($base, $modifier, $type, $noun, $verb) = ($1, $3, $5, $6, $7); &addword($noun, "object of adj clause $verb $modifier $type", $base, $diagram) if $type; &addword($noun, "object of adj clause $verb $modifier", $base, $diagram) unless $type; # &addrelation($noun, $base, "acted on: $verb $type", $diagram) if $type; # &addrelation($noun, $base, "acted on: $verb", $diagram) unless $type; $clause =~ s/(\w+\/NN\w*)( \w+\/(IN|TO))? (where|when|whom|which|that)\/\w+ \w+\/NN\w* \w+\/VB\w*/$1/; } #parse noun, gerund verb noun while($clause =~ /(\w+)\/NN\w* (,\/, )?(\w+)\/VBG (\w+)\/NN\w*/){ ($base, $verb, $modifier) = ($1, $3, $4); &addword($modifier, "by verb $verb", $base, $diagram); $clause =~ s/(\w+\/NN\w*) (,\/, )?\w+\/VBG \w+\/NN\w*/$1/; } return ($clause, $diagram); } ############################################################################ sub parseVerbs{ my ($clause, $diagram) = @_; my ($base, $modifier, $type, $base2, $type2, $temp, $ex_verb, $conj, $mod2, $object, $object2, $neg2, $neg1); #determine adverbs splitting verbs/helping verbs while($clause =~ /(\w+)\/(TO|VB\w*) (\w+)\/RB\w* (\w+)\/(VB\w*)/){ ($base, $type, $modifier, $base2, $type2) = ($1, $2, $3, $4, $5); &addword($modifier, 'adv', $base2, $diagram); $clause =~ s/(\w+\/(TO|VB\w*)) \w+\/RB\w* (\w+\/VB\w*)/$1 $3/; } #make infinitives into verbs $clause =~ s/(\w+)\/TO (\w+)\/VB\w*/$1_$2\/VBI/g; #bind multiple-verbs while($clause =~ /(\w+)\/(VB[ PZMD]) (\w+)\/(VB\w*)/){ ($base, $type, $base2, $type2) = ($1, $2, $3, $4); #now, if we wanted to do complex calculations, we could push @{$$diagram{"$base\_$base2"}{'modifiers'}}, @{$$diagram{$base}{'modifiers'}} if ($$diagram{$base}{'modifiers'}); push @{$$diagram{"$base\_$base2"}{'modifiers'}}, @{$$diagram{$base2}{'modifiers'}} if ($$diagram{$base2}{'modifiers'}); $$diagram{"$base\_$base2"}{'word'} = "$base\_$base2"; $$diagram{"$base\_$base2"}{'type'} = "complex verb"; #substitute $type2 =~ /VB(\w*)/; $temp = "\_$1" if $1; $temp = '' unless $1; $clause =~ s/(\w+)\/(VB\w*) (\w+)\/(VB\w*)/$1\_$3\/$2$temp/; } #determine adverbs and prepositions directly after a verb, as they may be intersparsed while(($clause =~ /\w+\/VB\w* \w+\/RB\w*( (and|but|or)\/CC \w+\/RB\w*)?/) || ($clause =~ /\w+\/VB\w* \w+\/(IN|TO)\w* \w+\/NN\w*( (and|but|or)\/CC (not\/RB)? \w+\/(IN|TO)\w* \w+\/NN\w*)?/)){ #FIRST, we examine in the case of an adverb if ($clause =~ /(\w+)\/VB\w* (\w+)\/RB\w*( (and|but|or)\/CC (\w+)\/RB\w*)?/){ ($base, $modifier, $conj, $mod2) = ($1, $2, $4, $5); if($conj){ if(&negation($$diagram{$modifier}) && &negation($$diagram{$mod2})){ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'neither' : ($conj eq 'or') ? 'negated either' : 'contrasted neither', $diagram); } elsif(&negation($$diagram{$modifier}) || &negation($$diagram{$mod2})){ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'exclusion' : ($conj eq 'or') ? 'neg disjunction' : 'neg contrast', $diagram); } else{ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'correspodance' : ($conj eq 'or') ? 'disjunction' : 'pos contrast', $diagram); } &addword($mod2, 'adv', $base, $diagram); } &addword($modifier, 'adv', $base, $diagram); $clause =~ s/(\w+\/VB\w*) \w+\/RB\w*( (and|but|or)\/CC \w+\/RB\w*)?/$1/; } #now, in the case of the preposition else{ $clause =~ /(\w+)\/VB\w* (not\/RB )?(\w+)\/(IN|TO)\w* (\w+)\/NN\w*( (and|but|or)\/CC (not\/RB)? (\w+)\/(IN|TO)\w* (\w+)\/NN\w*)?/; ($base, $modifier, $object, $conj, $mod2, $object2, $neg1, $neg2) = ($1, $3, $5, $7, $9, $11, $2, $8); if($conj){ &addword("not", "adv", "$mod2 $object2", $diagram) if ($neg2); &addword("not", "adv", "$modifier $object", $diagram) if ($neg1); if($neg1 && $neg2){ &addrelation("$mod2 $object2", "$modifier $object", ($conj eq 'and') ? 'neither' : ($conj eq 'or') ? 'negated either' : 'contrasted neither', $diagram); } elsif($neg1 || $neg2) { &addrelation("$mod2 $object2", "$modifier $object", ($conj eq 'and') ? 'exclusion' : ($conj eq 'or') ? 'neg disjunction' : 'neg contrast', $diagram); } else{ &addrelation("$mod2 $object2", "$modifier $object", ($conj eq 'and') ? 'correspodance' : ($conj eq 'or') ? 'disjunction' : 'pos contrast', $diagram); } &addword("$mod2 $object2", 'prep phrase', $base, $diagram); } &addword("$modifier $object", 'prep phrase', $base, $diagram); $clause =~ s/(\w+\/VB\w*) (not\/RB )?\w+\/(IN|TO)\w* \w+\/NN\w*( (and|but|or)\/CC (not\/RB)? \w+\/(IN|TO)\w* \w+\/NN\w*)?/$1/; } } #determine existential verbs and their relations while($clause =~ /(\w+)\/NN\w* (is|am|are|were|was|\w*_be|\w*_been|\w*being)\/VB\w* (\w+)\/(JJ|NN)\w*( (and|or|but)\/CC (\w+)\/(JJ|NN)\w*)?/){ ($base, $ex_verb, $modifier, $type, $conj, $mod2, $type2) = ($1, $2, $3, $4, $6, $7, $8); if($conj){ if(&negation($$diagram{$modifier}) && &negation($$diagram{$mod2})){ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'neither' : ($conj eq 'or') ? 'negated either' : 'contrasted neither', $diagram); } elsif(&negation($$diagram{$modifier}) || &negation($$diagram{$mod2})){ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'exclusion' : ($conj eq 'or') ? 'neg disjunction' : 'neg contrast', $diagram); } else{ &addrelation($mod2, $modifier, ($conj eq 'and') ? 'correspodance' : ($conj eq 'or') ? 'disjunction' : 'pos contrast', $diagram); } if($type2 eq 'JJ'){ &addword($mod2, 'adj', $base, $diagram); } elsif(&negation($$diagram{$ex_verb})){ &addword($mod2, "negated noun", $base, $diagram); # &addrelation($base, $mod2, 'negated nouns', $diagram); } else { &addword($mod2, "attrib noun", $base, $diagram); # &addrelation($base, $mod2, 'attrib nouns', $diagram); } } if($type eq 'JJ'){ &addword($modifier, 'adj', $base, $diagram); } elsif(&negation($$diagram{$ex_verb})){ # &addrelation($base, $modifier, 'negated nouns', $diagram); &addword($modifier, "negated noun", $base, $diagram); } else { &addword($modifier, "attrib noun", $base, $diagram); #&addrelation($base, $modifier, 'attrib nouns', $diagram); } $clause =~ s/(\w+\/NN\w*) (is|am|are|were|was|\w*_be|\w*_been|\w*being)\/VB\w* \w+\/(JJ|NN)\w*( (and|or|but)\/CC \w+\/(JJ|NN)\w*)?/$1/; } return ($clause, $diagram); } ############################################################################ sub negation{ #takes in a modifier structure and examines it for a negative #connotation my ($ref) = @_; foreach (@{$$ref{'modifiers'}}){ return 'true' if($$_{'word'} && ($$_{'word'} eq 'not') || ($$_{'word'} eq 'no')); } return 0; } ############################################################################ sub addword{ my ($modifier, $PoS, $word, $dataref) = @_; my (@data); $$dataref{$modifier}{'word'} = $modifier; $$dataref{$modifier}{'type'} = $PoS; @data = split " ", $modifier; foreach (@data){ push @{$$dataref{$modifier}{'modifiers'}}, @{$$dataref{$_}{'modifiers'}} if ($_ ne $modifier) && $$dataref{$_}{'modifiers'}; } push @{$$dataref{$word}{'modifiers'}}, $$dataref{$modifier}; } ############################################################################ sub addrelation{ #presently, a place-place relation is a one-way relation, and will only #be inputted for the one type, so that subject-verb is always instigated #by the subject, and verb-object is always instigated by the verb my ($part1, $part2, $relation, $dataref) = @_; push @{$$dataref{$part1}{'relations'}}, {'word1' => $part1, 'word2' => $part2, 'type' => $relation}; } ############################################################################ ############################################################################ ############################################################################ sub parseSentence{ my ($sentence) = @_; #sentence is expected to be a hash reference of : #'naked', 'speech', 'combined' return unless $sentence; my (%words, @clauses, %final, @whole, $i, $initialPrep, $noun, $verb, $i2, $mod, $word); @whole = split ' ', $$sentence{'combined'}; $initialPrep = &assignPrep(\@whole, -1); #we run through the first time to assign #modifiers that precede their modification $i=0; while($i <= $#whole){ #prenatal adjective if($whole[$i] =~ /\/JJ/){ $i2 = $i; ++$i2 while $i2 < $#whole && ($whole[$i2+1] =~ /\/JJ|\/RB|\/\,/); # ++$i2 while $i2 < $#whole && ($whole[$i2+1] =~ /\/NN/); #currently indexed the the modifier next unless $whole[$i2] =~ /(\w+)\/NN/; $word = $1; $whole[$i] =~ /(\w+)\//; &addword($1, 'adj', $word, \%words); splice @whole, $i, 1; redo; #should advance to the next while } #prenatal adverb if($whole[$i] =~ /\/RB/){ next unless ($whole[$i+1] =~ /(\w+)(\/VB|\/JJ|\/RB)/); $word = $1; $i2 = $i+1; $whole[$i] =~ /(\w+)\//; &addword($1, 'adv', $word, \%words); splice @whole, $i, 1; redo; } #adjective phrase in which the subject is the phase determiner if($whole[$i] =~ /\/NN (who|that)/){ my ($vb, $vbname, $diro, $io, $itr, $search, $tmp); $itr = $i; $diro = $io = -1; ++$itr until $whole[$itr] =~ /\/VB|\/NN/; if($whole[$i] =~ /\/VB([ DPZ])/){ $vb = $itr; ++$itr until $whole[$itr] =~ /(\/VB|\/NN|\/IN)/; $tmp = $1; if($tmp eq "\/IN" && $whole[$itr+1] =~ /\/NN/){ #ind obj $io = ++$itr; ++$itr until $whole[$itr] =~ /(\/VB|\/NN|\/IN)/; $tmp = $1; } if($tmp eq "\/NN"){ #direct object $diro = $itr; } } } ++$i; } $i=0; while ($i <= $#whole){ if($whole[$i] =~ /(\w+?)\/NN/){ #it's a noun! $noun = $1; $words{$noun}{'word'} = $noun; $words{$noun}{'type'} = 'noun'; # $words{$noun}{'modifiers'}[0] = &assignPrep( \@whole, $i) # if $i <= $#whole && $whole[$i+1] =~ /\/IN/; push @{$words{$noun}{'modifiers'}}, &assignAdj(\@whole, \$i) while (($i > 0) && ($whole[$i-1] =~ /\/JJ/)); push @{$words{$noun}{'modifiers'}}, &assignAdjPhrase(\@whole, \$i) while (($i < $#whole) && ($whole[$i-1] =~ /\//)); #FILL IN = who, that, } elsif($whole[$i] =~ /(\w+?)\/VB/){ $verb = $1; $words{$verb}{'word'} = $verb; $words{$verb}{'type'} = 'verb'; push @{$words{$verb}{'modifiers'}}, $initialPrep if defined %$initialPrep; while((($i < $#whole) && ($whole[$i+1] =~ /\/IN|\/RB/)) || (($i > 0) && ($whole[$i-1] =~ /\/RB/))){ push @{$words{$verb}{'modifiers'}}, &assignPrep(\@whole, $i) if ($i < $#whole) && ($whole[$i+1] =~ /\/IN/); push @{$words{$verb}{'modifiers'}}, &assignAdv(\@whole, \$i) if (($i > 0) && ($whole[$i-1] =~ /\/RB/)) || (($i < $#whole) && ($whole[$i+1] =~ /\/RB/)); } } ++$i; } print "Parsed sentence $$sentence{combined}\n"; printWords("\t", %words); ; \return %words; } ################################################################## sub getClauses{ #takes in a sentence, returns an array of clauses in the 'clause form' my ($sentence) = @_; my (@clauses, @temp); #semicolon clause if($sentence =~ /;/){ @temp = split ';', $sentence; push @clauses, &getClauses($_) foreach(@temp); } #complex sentence, dependent clause second elsif ($sentence =~ /(^.*\/NN\w* .*?\/VB\w* .?) (\w+\/IN .*?\/NN\w* .*?\/VB\w* .*?)$/){ @temp = ($1, $2); push @clauses, &getClauses($_) foreach (@temp); } #complex sentence, dependent clause first elsif ($sentence =~ /(^\w+\/IN .*\/NN\w* .*?\/VB\w* .*?) ,\/, (.*?\/NN\w* .*?\/VB\w* .*?)$/){ @temp = ($1, $2); push @clauses, &getClauses($_) foreach (@temp); } #compound sentence elsif ($sentence =~ /(^.*\/NN\w* .*?\/VB\w* .*?)( ,\/,)? \w+\/CC (.*?\/NN\w* .*?\/VB\w* .*)$/){ @temp = ($1, $3); push @clauses, &getClauses($_) foreach (@temp); } #run on sentence # elsif($sentence =~ /(^.*\/NN\w* .*?\/VB\w* .*?)( ,\/,)? (.*?\/NN\w* .*?\/VB\w* .*)$/){ # @temp = ($1, $3); # push @clauses, &getClauses($_) foreach (@temp); # } else { push @clauses, {'complete' => $sentence}; } return @clauses; } ################################################################## ################################################################## ################################################################## sub printClause{ #the clause variable sent to this procedure should be a hash reference of: # 'clause' 'nouns' 'verbs' my ($offset, %c) = @_; } ################################################################## sub printWords{ #The current input is a reference to a hash with the following # word type modifiers relations index # each relation is based on : # word type index my ($offset, $words) = @_; my ($word, @vals); foreach $word(keys %$words){ &printStruct($offset, %{$$words{$word}}); } } sub printStruct{ my ($offset, %struct) = @_; print $offset . $struct{'word'} . "\t" . $struct{'type'} . "\n"; print $offset . "MODIFIERS\n" if ($#{$struct{'modifiers'}} >= 0); foreach (@{$struct{'modifiers'}}){ &printStruct($offset . "\t", %$_) if defined $_; } print $offset . "RELATIONS\n"if ($#{$struct{'relations'}} >= 0); print $offset . "\t" . $$_{'word2'} . "\t" . $$_{'type'} . "\n" foreach (@{$struct{'relations'}}); } ################################################################## =donot need this sub findInitialPrep{ #find an initial preposition my ($ref) = @_; return (0, undef) unless $$ref[0] =~ /(\w+)\/IN/; return (0, undef) unless $$ref[1] =~ /\/DT/; my ($n_i, %result, $i2, %nextPrep); $n_i = 2; while(($n_i <= $#$ref) && !($$ref[$n_i] =~ /\/VB[NPZ]?/) && !($$ref[$n_i] =~ /\/NN/)){} return (0, undef) unless $$ref[$n_i] =~ /\/NN/; #now the preposition is finalized we have the prep (0), article (1), and #object ($n_i) while($n_i > 2){ if($$ref[$n_i-1] =~ /\/JJ/){ $i2 = $n_i; push @{$results{'modifiers'}}, &assignAdj($ref, \$i2); $n_i = $i2 + 1; } else { print "UH OH! keep working on initial preposition\n"; last; } } $result{'word'} = "$$ref[0] $$ref[1] $$ref[2]"; $result{'type'} = "prep phrase"; splice(@$ref, 0, $n_i+1); ($nextPrep[0], $nextPrep[1]) = &findInitialPrep($ref); push @{$result{'modifiers'}}, $nextPrep[1] if $nextPrep[0]; return (1, %result); } sub attribute{ #attribute parses through the current setnecen adn weeds things out #and relates them based on the the interpretation string my ($w_ref, @words, $interpretation) = @_; my (@necessary, $i, @inter); @interp = split " ", $interpretation; } ################################################################## sub assignPrep{ #we assume that there is a potential preposition afterwards #ARGs: reference to record, ref to wordlist, ctr of word #we only need to test forward here! my ($rwords, $ctr) = @_; return (undef) unless ($ctr+3 <= $#$rwords); #no preposition possible return (undef) unless $$rwords[$ctr+1] =~ /\/IN|\/TO/; #no preposition return (undef) unless $$rwords[$ctr+2] =~ /\/DT|\/NN|\/EX/; my ($n_i, %result); if($$rwords[$ctr+2] =~ /there/){ $result{'word'} = $$rwords[$ctr+1]." ".$$rwords[$ctr+2]; $n_i = $ctr+2; } else{ my ($i2, %nextPrep); $n_i = ($$rwords[$ctr+2] =~ /NN/) ? $ctr + 2 : $ctr + 3; while(($n_i <= $#$rwords) && !($$rwords[$n_i] =~ /\/VB[NPZ]?/) && !($$rwords[$n_i] =~ /\/NN/)){} return (undef) unless $$rwords[$n_i] =~ /\/NN/; #now the preposition is finalized we have the prep (0), article (1), and #object ($n_i) while($n_i > $ctr + 3){ if($$rwords[$n_i-1] =~ /\/JJ/){ $i2 = $n_i; push @{$result{'modifiers'}}, &assignAdj($rwords, \$i2); $n_i = $i2 + 1; } else { print "UH OH! keep working on preposition finder\n"; last; } } $result{'word'} = "$$rwords[0] $$rwords[1] $$rwords[2]"; } #need to do this for everything $result{'word'} =~ s/\/\w*//g; #get rid of tags $result{'type'} = "prep phrase"; splice(@$rwords, $ctr+1, ($n_i - $ctr)); push @{$result{'modifiers'}}, &assignPrep($rwords, $ctr); return \%result; } ################################################################## sub assignAdj{ #ARGs: ref to wordlist, ref to ctr of noun #we must test both forward and reverse here my ($rwords, $rctr) = @_; return (undef) unless ($$rctr > 0); #no adj possible return (undef) unless $$rwords[$$rctr-1] =~ /(\w+)\/JJ/; #adj exists! my (@result, $i2); $result[0]{'word'} = $1; $result[0]{'type'} = 'adj'; $i2 = $$rctr-1; $result[0]{'modifiers'} = &assignAdv($rwords, \$i2) if $$rwords[$i2-1] =~ /\/RB/; # print "splicing out middle of ".$$rwords[$$rctr-2]. # $$rwords[$$rctr-1].$$rwords[$$rctr]. " ok?\n"; splice(@$rwords, $$rctr-1, 1); # print "spliced out! ".$$rwords[$$rctr-2]. # $$rwords[$$rctr-1].$$rwords[$$rctr]. " ok?\n"; --$$rctr; #modifier because we delete in front! push @result, &assignAdj($rwords, $rctr); return @result; } ################################################################## sub assignAdv{ #ARGs: rwords to wordlist, ref to ctr of word in modification #we only need to test reverse here! my ($rwords, $rctr) = @_; return (undef) unless (($$rctr > 0) && ($$rwords[$$rctr-1] =~ /(\w+)\/RB/)) || (($$rctr < $#$rwords) && ($$rwords[$$rctr+1] =~ /(\w+)\/RB/)); #adv exists! my (@result, $i2, $temp); if(($$rctr > 0) && ($$rwords[$$rctr-1] =~ /(\w+)\/RB/)){ $temp = {}; $$temp{'word'} = $1; $$temp{'type'} = 'adv'; $i2 = $$rctr-1; $$temp{'modifiers'} = &assignAdv($rwords, \$i2) if $$rwords[$i2-1] =~ /\/RB/; splice(@$rwords, $$rctr-1, 1); $$rctr -= 1; #modifier because we delete in front! push @result, $temp; } if(($$rctr < $#$rwords) && ($$rwords[$$rctr+1] =~ /(\w+)\/RB/)){ $temp = {}; $$temp{'word'} = $1; $$temp{'type'} = 'adv'; $i2 = $$rctr+1; $$temp{'modifiers'} = &assignAdv($rwords, \$i2) if $$rwords[$i2-1] =~ /\/RB/; splice(@$rwords, $$rctr+1, 1); push @result, $temp; } return @result; } =cut return