20181225.  We have a GitHub site now.  Check it out by clicking the link below.

*************************************** https://github.com/hua-lab ***************************************

20181226. A useful regular expression for parsing a fasta sequence

       my ($header, $sequence) = ($fasta =~m/^>?([^\n]*)\n(.*)/s);

20180425. PBIO114O Bioinformatics practice (how_many_sequences.pl)

#! usr/bin/perl

use warnings;

use strict;

# which module is used. Bioperl is a collection of powerful Perl modules for bioinformatics, genomics, and life science

use Bio::DB::Fasta;

# a description for how to use this Perl script

my $USAGE="use the format below"."\n"."perl how_many_sequences.pl -f path to the file"."\n";

# example: perl how_many_sequences.pl -f ./arabidopsis/arabidopsis_genes.fa

# example: perl how_many_sequences.pl -f ./human/human_genome.fa

# An instruction will be printed out on screen for how to use this script


 die "Error opening","\n",$USAGE;


#read file 

my $file=$ARGV[1];

# the file is read by Bio::DB::Fasta

my $db=Bio::DB::Fasta->new("$file");

# Bio::DB::Fasta finds all ids in the database and save them as an array

my @ids=$db->ids;

# sort ids

# my @sort_ids=sort{$a cmp $b}@ids;

#print out ids

# foreach my $id(@sort_ids){    

#       print $id,"\n";

# }

# save the names of species and database into two strings

my ($species,$database)=($file=~/\/(.*)\/(.*)\.fa/);

#print out the inforamtion on screen

print "Species=",$species,"\n";

print "Database=",$database,"\n";

print "total_ids=",scalar @ids,"\n";

20171105.  Read files (e.g. fasta seq files) from a directory (perl script)

                 my $dir="./dir";

                 opendir (DIR,$dir);

                 my @files = grep(/.fa$/,readdir(DIR));

                 closedir (DIR);

20171015. Batch retrieval of RNA-Seq data from an FTP server on a terminal.

               wget -r 'ftp://username:password@ip/directory'

2016.  BLAST Result Parse (PERL, BIOPERL)

sub blast_parse {

    use strict;
    use warnings;
    use Bio::SearchIO;
    use Bio::DB::Fasta;

    my ($query_file, $blast_db)=@_;
    my $query_db=Bio::DB::Fasta->new($query_file);
    my $query_db_index=$query_file.".index";
    my @query_ids=$query_db->ids;
    my @query_ids_sort=sort{$a cmp $b}@query_ids;

    my @hit_ident_aligns;

    LINE: foreach my $query_id(@query_ids_sort){
        my $query_obj=$query_db->get_Seq_by_id($query_id);
        my $query_seq=$query_obj->seq;

        my $seq=">".$id_query."\n".$query_seq."\n";

        open BLAST,">blast.fa";
        print BLAST $seq;
        close BLAST;

        system ("blastx -query  blast.fa -db  $blast_db -evalue 1e-5 -out blast_fa_blastx -num_threads 4");
        # Go through BLAST reports one by one

        my $report = new Bio::SearchIO(        
                     -format => "blast"

        while(my $result = $report->next_result){
                if (! $result->hits()){

                    my $no_hit_query=$result->query_name;
                    my $no_hit=$no_hit_query,"\t","no_hit";
                    next LINE;


                    while(my $hit=$result->next_hit){

                        while(my $hsp=$hit->next_hsp){

                            my $hit_name=$hit->name;
                            my $hit_length=$hit->hit_length();
                            my $align_len=$hsp->length ('hit');
                            my $align_ratio=$align_len/$hit0_length;
                            my $percent_identity=$hsp->percent_identity;
                            my $hit_ident_align=$query_name."\t".$percent_identity."\t".$align_ratio."\t".$hit_name;        



        unlink     $query_db_index;
        unlink "blast.fa";
        unlink "blast_fa_blastx";


    return @hit_ident_aligns;



2016. Remove_add_zeros.pl

#! usr/bin/perl

use strict;
use warnings;

my $mystring="0000120003000";

$mystring =~ s/^0*//;

my  $mystring1 =  sprintf("%010d", $mystring);
 my $mystring2 =  sprintf("%.8f", $mystring);

print "mystring=",$mystring,"\n";
print "mystring1=",$mystring1,"\n";

print "mystring2=",$mystring2,"\n";


2016.  Compare two arrays (compare.pm)

package compare;

use strict;
use warnings;

# Class data and methods are referred to the collection of all objects
# in the class, there is not just one specific object
    my $_count = 0;
    sub get_count {
    sub _incr_count {
    sub _decr_count {

# The constructor for the class
sub new {
    my ($class, %arg) = @_;
    my $self = bless {
        _array1       => $arg{array1}      || croak("Error: no array1"),
        _array2       => $arg{array2}     || croak("Error: no array2"),

    }, $class;
    return $self;

sub same {
    my($self) = @_;
    my $array1=$self->{_array1};
    my $array2=$self->{_array2};

    my @array_one=@$array1;
    my @array_two=@$array2;

    my %array_one= map {$_, 1} @array_one;
    my @same = grep {$array_one {$_}} @array_two;

    return \@same;


sub difference {
    my($self) = @_;
    my $array1=$self->{_array1};
    my $array2=$self->{_array2};

    my @array_one=@$array1;
    my @array_two=@$array2;

    my %array_one= map {$_, 1} @array_one;
    my @difference = grep {!$array_one {$_}} @array_two;

    return \@difference;


    my($self) = @_;


2016.  finding_unique_dup_string_in_array.pl

#! /usr/bin/perl

use strict;
use warnings;

my @array=("rice", "arabidopsis", "rice", "corn","corn", "maize", "arabidopsis","Arabidopsis","F-box", "ubiquitin","ubiquitylation","proteasome","e3","e3","e3","E3");

foreach my $value (@array) {



print "array2=", @array,"\n";

print "Method1\n";

    my %seen1;

    $seen1{$_}++ for @array;

    print $seen1{$_} for @array;

    my @keys=keys %seen1;

    my @common=sort{$seen1{$b} <=> $seen1{$a}}@keys;

    print "unique1_number=",scalar @keys,"\n";
    print "unique=";

    @keys=sort{$a cmp $b}@keys;

    for (@keys){

        print $_,"\t";


    print "\nTop 3 common ones=",@common[0..2],"\n";

print "\n\nMethod2\n";
    my %seen2;
         my @unique;
    my @dups;
    foreach my $value (@array) {


        if (!$seen2{$value}) {    
               push (@unique, $value);
             $seen2{$value} = "true";


            push(@dups, $value);


    @unique=sort{$a cmp $b}@unique;
    @dups=sort{$a cmp $b}@dups;

    print "unique=",@unique,"\n";
    print "dups=",@dups,"\n";

print "\n\nMethod3\n";

    my %seen3;

    my @uniq=grep { !$seen3{$_}++ }@array;

    @uniq=sort{$a cmp $b}@uniq;

    foreach my $uniq(@uniq){

        print $uniq,"\t";


    print "\n";

print "\n\nMethod4\n";

    my @u=uniq(@array);

    @u=sort{$a cmp $b}@u;

    foreach my $u(@u){

        print $u,"\t";


    print "\n";

    sub uniq {
            my %seen;
            return grep { !$seen{$_}++ } @_;

print "\n\nMethod5\n";

    my %unique=map {$_=>1}@array;

    my @unique2= keys %unique,"\n";

    @unique2=sort{$a cmp $b}@unique2;

    print @unique2,"\n";

print "\n\nMethod6\n";

    my %seen6;

    my @joint= keys%seen6;

    @joint=sort @joint;
    print "uniques=", @joint,"\n";

print "\n\nMethod7\n";

    my %seen7;
    my @unis;
    my @dupes;

    ++$seen7{$_} for @array;

    print "uniques=",sort keys%seen7;
    print "\n";


        my $seen7 = $seen7{$_};

        push @unis, $_ if $seen7==1;
        push @dupes, $_ if $seen7>=2;


    print "single=", join(',', sort @unis),"\n";
    print "dups=", join(',', sort @dupes),"\n";



2016. Regular_Expression (Exactly m a's match)

#! usr/bin/perl

use warnings;

use strict;

my $a="ggaabbccaabb";


print $a,"\n";


Make a free website with Yola