#!/usr/bin/perl

#### libby.miller@bristol.ac.uk
#### this file takes a list of rdql files and exports them to
#### ntriples. It saves them with the same name.nt in the same dir

#### give it a list of rdql query files
my @infiles=@ARGV;

my $manifest="tests/rdql-tests-2003-04-10/Manifest.rdf";

#### for storing 'using x for xxx ns
my %hashy=();

	foreach $infilen (@infiles){

	print "\nfilen $infilen\n";

	my $outfilen=$infilen;
	$outfilen=~ s/(.*)\/(.*)/$1\/nt\/$2.nt/g;

	open (IN,$infilen);

#	print "outfile: $outfilen\n";

	my $query="";
	my $bool=1;

		while(<IN>){  

#### replace comments with words
		$_=~ s/<(.*)(\/\*)(.*)(\*\/)(.*)>/<$1startcomment$3endcomment$5>/;
		$_=~ s/<(\S*)\/\/(\S*)>/<$1doublelines$2>/g;
		$_=~ s/\"(\S*)\/\/(\S*)\"/"$1doublelines$2"/g;

#### remove comments
		$_=~ s/(\/\*)(.*)(\*\/)//;
		$_=~ s/\/\/.*\n//g;

#### replace ? with _:
		$_=~ s/\?/_:/g;

#### ignore queries with constraints
			if($_=~ m/(\s?AND\s|\sEQ\s|\sNE\s)/i){
				if(!($_=~ m/\/\//)) {
				$bool=0;
				print "match $_\n";
				#print "found a constraint - failing\n";
				}
			}

#### put the comments and the // back 
		$_=~ s/startcomment/\/\*/;
		$_=~ s/endcomment/\*\//;
		$_=~ s/doublelines/\/\//g;

#### remove newlines where empty
		$_=~ s/\n\s?\n/\n/g;

#### find the bits of the queries we're interested in; remove ,
			if($_=~ s/.*?\((.*?)\s?,?(.*?)\s?,?(.*?)\)\s?,?/$1 $2 $3 . \n/g){
			$_=~ s/,/ /g;
			$query=$query . $_;
			}

#### find the namespaces
			if($_=~ m/[using]? (.*) FOR <(.*)>/i){
			print "using " . $1 . " " . $2 . "\n";
			$hashy{$1}=$2;
			}

		}

#### swap in namespaces
		foreach $key (keys %hashy){

		my $k=$key;
		my $val=$hashy{$k};

		$k=~ s/\s//g;
		$val=~ s/\s//g;

		$query=~ s/<$k:(.*)>/<$val$1>/g;

		}


#### clean up
	$query=~ s/\n\s?\n/\n/g;
	$query=~ s/\n\n/\n/g;
	$query=~ s/\n\s*/\n/g;
	$query=~ s/^\s*/\n/g;


		if($bool==0){
		print "found a constraint - failing\n";
		}
		else{
		open (OUT,">$outfilen");
		print OUT "$query \n";
		close(OUT);
		}

	close(IN);

	}


##### fix manifest to point to the NT files
##### call it ManifestNT.rdf

my $manifestnt=$manifest;
$manifestnt=~ s/(.)(\.rdf)/$1NT$2/g;

print "\nMANIFEST " . $manifest;
print "\nMANIFESTNT " . $manifestnt;

open (INN,$manifest);
open (OOT,">$manifestnt");

	while(<INN>){

	$_=~ s/'file:queries\/test-(.*)'/'file:queries\/nt\/test-$1.nt'/g;
	$_=~ s/RDQL-Document/NT-Document/g;

	print OOT $_;

	}

close(OOT);
close(INN);


