Rev. | dcc972c4aa27a52f4caa7382fcb5b816f47b1fea |
---|---|
Size | 1,931 bytes |
Time | 2008-04-11 21:32:10 |
Author | iselllo |
Log Message | I added the file search_pdf_create_txt.sh which is a simple script to
|
#!/bin/bash
# place this file in the directory of pdf files
# you want to search
echo "Do you want to: "
echo "1- create/update index"
echo "2- search biblio"
echo "_________ "
echo "NOTE: index only need to be created once"
read choice
echo "your choice [$choice]"
if [ $choice -eq 1 ]
then
if [ -f ./tmp_menu_file ]
then
rm ./tmp_menu_file
fi
if [ -f ./log_treat ]
then
rm ./log_treat
fi
if [ -d ./text_format ]
then
echo "an index has already been created"
echo "update in progress"
else
mkdir ./text_format
fi
count=0
echo "reading directory"
for file in `ls -1 *.pdf`
do
count=`expr $count + 1`
echo "$file" >>tmp_menu_file
done
echo "reading done"
echo "[$count] pdf files"
filename=`cut -d"." -f 1 tmp_menu_file`
count=0
for file in `echo $filename`
do
count=`expr $count + 1`
echo "loading $file" >>log_treat
echo "loading $file"
if [ -f ./text_format/$file.txt ]
then
echo "file [$file] already treated "
else
pdftotext $file.pdf ./text_format/$file.txt
echo "file [$count,$file] treated ">>log_treat
echo "file [$count,$file] treated "
fi
done
fi
if [ $choice -eq 2 ]
then
cd ./text_format/
fin=0
key1=""
key2=""
key3=""
while [ $fin -eq 0 ]
do
echo "enter up to 3 keyword(s) [AND]"
echo "_____________[crtl-c for end search]"
read key1 key2 key3
echo "keywords: [$key1,$key2,$key3]"
if [ -f ./tmp ]
then
rm ./tmp
fi
echo `egrep -i -l "$key1" *.txt` >>tmp
echo "results for [$key1] only:"
echo `egrep -i -l "$key1" *.txt`
if [ -n "$key2" ]
then
echo "results for [$key2] only:"
echo `egrep -i -l "$key2" *.txt`
for file in `cat tmp`
do
echo `egrep -i -l "$key2" $file`>>tmp2
done
echo "results for [$key1] and [$key2]:"
echo `cat tmp2`
rm ./tmp
if [ -n "$key3" ]
then
echo "results for [$key3] only:"
echo `egrep -i -l "$key3" *.txt`
for file in `cat tmp2`
do
echo `egrep -i -l "$key3" $file`>>tmp
done
rm ./tmp2
echo "results for [$key1] and [$key2] and [$key3]:"
echo `cat tmp`
rm ./tmp
fi
fi
done
exit
fi