...
No Format |
---|
# Ocr plugin, version 2
package Ocr;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Util;
use Mail::SpamAssassin::Plugin;
our @ISA = qw (Mail::SpamAssassin::Plugin);
# constructor: register the eval rule
sub new {
my ( $class, $mailsa ) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless( $self, $class );
$self->register_eval_rule("check_ocr");
return $self;
}
sub check_ocr {
my ( $self, $pms ) = @_;
my $cnt = 0;
foreach my $p ( $pms->{msg}->find_parts("image") ) {
my ( $ctype, $boundary, $charset, $name ) =
Mail::SpamAssassin::Util::parse_content_type(
$p->get_header('content-type') );
if ( $ctype eq "image/gif" ) {
open OCR, "|/usr/bin/convert -flatten - pnm:-|/usr/bin/gocr -i - > /tmp/spamassassin.ocr.$$";
foreach $p ( $p->decode() ) {
print OCR $p;
}
close OCR;
open OCR, "/tmp/spamassassin.ocr.$$";
my @words =
( 'company', 'money', 'stock', 'million', 'thousand', 'buy', 'price', 'don\'t' );
while (<OCR>) {
my $w;
foreach $w (@words) {
if (m/$w/i) {
$cnt++;
}
}
}
unlink "/tmp/spamassassin.ocr.$$";
}
}
return ( $cnt > 1 );
}
1;
|