Skip to content

Biostumblematic

A biophysicist teaches himself how to code

I’ve started doing some Mass Spec, and one of the issues we have is parsing our results.

The basic workflow is to convert the raw instrument files into peak lists, then use MASCOT to identify the proteins present in the sample. Unfortunately the MASCOT results themselves can be a bit tedious to work with.

I’ve rapidly written a couple of scripts to speed up some common things that I need to do, namely to subtract the proteins in a control experiment from those identified in a sample, and to compare two lists of hits.

Here they are for your enjoyment. I’m trying to get back to a better organization of my scripts, so these are once again available on GitHub as well.

Control_subtractor.py

#! /usr/bin/env python

import sys, csv

def help():
	print '='*20
	print 'Subtracts MASCOT hits of control MS from sample.'
	print '='*20
	print 'To use:'
	print '-'*10
	print 'Export your data in CSV format from MASCOT'
	print 'Invoke the program, followed by the two file names.'
	print 'The file with your control data should be first'
	print '-'*10
	print 'e.g.: Control_subtractor control.csv sample.csv'
	print '-'*10
	print 'This will print the list to the console. If you'
	print 'would like to save the list, cat it to a new file'
	print '-'*10
	print 'e.g.: Control_subtractor c.csv s.csv > hits.txt'
	print '-'*10
	return

def subtractor():
	control_file = open(sys.argv[1])
	sample_file = open(sys.argv[2])
	
	control_reader = csv.reader(control_file)

	control_hits = []
	
	i = 0
	for row in control_reader:
		i += 1

		#Skip the first 65 lines, header info
		if i < 65:
			pass
		elif row[1] == '':
			pass
		elif row[1] in control_hits:
			pass
		else:
			control_hits.append(row[1])
	control_file.close()

	print control_hits

	sample_reader = csv.reader(sample_file)
	
	sample_hits = []

	i = 0
	for row in sample_reader:
		i += 1
		if i < 65:
			pass
		elif row[1] == '':
			pass
		elif row[1] in control_hits:
			pass
		else:
			sample_hits.append(row[1])
	sample_file.close()

	for hit in sample_hits:
		print hit
	return

if sys.argv[1] == '-h':
	help()
elif sys.argv[1] == '--help':
	help()
else:
	subtractor()

Hit_list_compare.py

#!/usr/bin/env python

import sys, string

def help():
	print '='*20
	print 'Compares two lists of hits'
	print '='*20
	print 'To use:'
	print '-'*10
	print 'Generate two lists of IPI identifiers'
	print 'Files should have one identifier per line'
	print 'This can be the output of Control_subtractor'
	print 'Invoke the program, followed by the two file names.'
	print '-'*10
	print 'e.g.: ./Hit_list_compare.py list1.txt list2.txt'
	print '-'*10
	print 'This will print the comparison to the console. If you'
	print 'would like to save the comparison, cat it to a new file'
	print '-'*10
	print 'e.g.: ./Hit_list_compare.py 1.txt 2.txt > compare.txt'
	print '-'*10
	return

def compare():
	list1 = open(sys.argv[1], 'r')
	list2 = open(sys.argv[2], 'r')

	list1_name = string.rstrip(sys.argv[1], '.txt')
	list2_name = string.rstrip(sys.argv[1], '.txt')

	list1_list = []
	list2_list = []

	for line in list1:
		list1_list.append(line)
	for line in list2:
		list2_list.append(line)

	matches = []
	list1_uniques = []
	list2_uniques = []

	for item in list1_list:
		if item in list2_list:
			matches.append(item)
		else:
			list1_uniques.append(item)
	
	for item in list2_list:
		if item in list1_list:
			pass
		else:
			list2_uniques.append(item)

	print '='*20
	print 'MATCHES'
	print '='*20
	for item in matches:
		print item
	print '='*20
	print list1_name+' UNIQUES'
	print '='*20
	for item in list1_uniques:
		print item
	print '='*20
	print list2_name+' UNIQUES'
	print '='*20
	for item in list2_uniques:
		print item

if sys.argv[1] == '-h':
	help()
elif sys.argv[1] == '--help':
	help()
else:
	compare()
Advertisements

%d bloggers like this: