{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# aim: to retrieve compounds from chembl tested against a target\n", "# starting point: UNIPROT ID of target (from e.g. www.rcsb.org)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# some general settngs\n", "import json\n", "import warnings\n", "import logging" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# loading chembl package, creating chembl client\n", "import chembl_webresource_client\n", "from chembl_webresource_client import *\n", "from chembl_webresource_client.settings import Settings\n", "from chembl_webresource_client.new_client import new_client as chembl" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "chembl.target.set_format('json')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "# step 1: finding CHEMBL id of target\n", "# we poke a bit around in the data structure\n", "print(dir(chembl))\n", "print(chembl.drug[0])\n", "print(chembl.drug[0]['molecule_synonyms'][1]['molecule_synonym'])\n", "print(chembl.drug[0]['molecule_chembl_id'])\n", "#print(chembl.target)\n", "#print(chembl.target[0]['target_components'][0]['accession'] )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# step 2: we search chembl.target list for items that have our \n", "# uniprot code in chembl.target[]['target_components'][0]['accession'] dictionary\n", "#ABL_kinase_uniprotID = \"P42684\"\n", "ABL_kinase_uniprotID = \"P00519\"\n", "print(ABL_kinase_uniprotID)\n", "chembl_abl_candidates = []\n", "errorcount = 0\n", "for i in range(len(chembl.target)):\n", " try:\n", " if chembl.target[i]['target_components'][0]['accession'] == ABL_kinase_uniprotID:\n", " chembl_abl_candidates.append(chembl.target[i])\n", " except:\n", " errorcount += 1\n", "print(len(chembl_abl_candidates)) \n", "print(chembl_abl_candidates[0]['target_components'][0]['component_description'],\n", " chembl_abl_candidates[0]['target_chembl_id'] )\n", "\n", "print(\"for \" + str(errorcount) + \" targets no accession id was found\") \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# bingo, the component maescription matches, we have found our CHEMBL id\n", "# this code snippet does a similar job, but returns the CHEMBL id for the ABL family of enzymes as well\n", "chembl_abl_candidates_alt = chembl.target.filter(target_components__accession=ABL_kinase_uniprotID)\n", "print ([(x['target_chembl_id'], x['pref_name']) for x in chembl_abl_candidates_alt])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(chembl.activity)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Let's get all measurement for a target\n", "chembl_abl_activities = chembl.activity.filter(target_chembl_id='CHEMBL1862')\n", "print(len(list(chembl_abl_activities)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import xmltodict" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "# next, we are retrieving details from the assay the activity was measured in\n", "# we keep binding and functional assays\n", "# compounds of 10nM or better are considered as actives\n", "# compounds of 10mycroM or worse are considered as inactives\n", "compounds = CompoundResource()\n", "chembl.assay.set_format('xml')\n", "selected_actives = []\n", "selected_decoys = []\n", "for i in range(len(chembl_abl_activities)):\n", " x = chembl_abl_activities[i]\n", " ID = (x['assay_chembl_id'])\n", " details = chembl.assay.get(ID)\n", " d=xmltodict.parse(details)\n", " if(int(d['assay']['confidence_score']) == 9):\n", " if (d['assay']['assay_type'] in ['B','F']):\n", " if str(x['standard_units']) == 'nM':\n", " if float(x['standard_value']) < 10:\n", " selected_actives.append(x['canonical_smiles'])\n", " elif float(x['standard_value']) > 10000:\n", " selected_decoys.append(x['canonical_smiles'])\n", "#selected_actives = set(selected_actives) \n", "print (len(selected_actives), len(selected_decoys)) \n", "selected_actives = set(selected_actives)\n", "known_drugs = []\n", "with open(\"ABL_smi\",\"w\") as fh:\n", " count = 0\n", " for e in selected_actives:\n", " print(e)\n", " c = compounds.get(smiles=e)\n", " print(c[0]['molecularFormula'],c[0]['knownDrug'])\n", " if c[0]['knownDrug'] == 'Yes':\n", " known_drugs.append((e,c))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(len(known_drugs))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import rdkit" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from rdkit import Chem\n", "from rdkit.Chem import AllChem" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for m in known_drugs:\n", " print(m[0],m[1][0]['chemblId'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "known_drug_smiles = []\n", "known_drug_smiles = []\n", "\n", "for m in known_drugs:\n", " print(\"HA\")\n", " print(len(chembl.drug))\n", " for d in range(len(chembl.drug)):\n", " # print(d)\n", " # print(chembl.drug[d]['molecule_chembl_id'])\n", " # print(m[1][0]['chemblId'])\n", " if chembl.drug[d]['molecule_chembl_id'] == m[1][0]['chemblId']:\n", " print(chembl.drug[d]['molecule_synonyms'][1]['molecule_synonym'])\n", " print(m[0])\n", " known_drug_smiles.append(m[0])\n", " known_drug_names.append(chembl.drug[d]['molecule_synonyms'][1]['molecule_synonym'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from rdkit.Chem import Draw\n", "mols = []\n", "print(len(known_drug_smiles))\n", "for d in known_drug_smiles:\n", " print(d)\n", " m = Chem.MolFromSmiles(d)\n", " AllChem.Compute2DCoords(m)\n", " mols.append(m)\n", "print(len(mols)) \n", "img = Draw.MolsToGridImage(mols, molsPerRow=3,legends=known_drug_names) \n", "img.save(\"ABL-drugs.png\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }