{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# aim: to retrieve compounds from chembl tested against a target\n",
    "# starting point: UNIPROT ID of target (from e.g. www.rcsb.org)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# some general settngs\n",
    "import json\n",
    "import warnings\n",
    "import logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# loading chembl package, creating chembl client\n",
    "import chembl_webresource_client\n",
    "from chembl_webresource_client import *\n",
    "from chembl_webresource_client.settings import Settings\n",
    "from chembl_webresource_client.new_client import new_client as chembl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "chembl.target.set_format('json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# step 1: finding CHEMBL id of target\n",
    "# we poke a bit around in the data structure\n",
    "print(dir(chembl))\n",
    "print(chembl.drug[0])\n",
    "print(chembl.drug[0]['molecule_synonyms'][1]['molecule_synonym'])\n",
    "print(chembl.drug[0]['molecule_chembl_id'])\n",
    "#print(chembl.target)\n",
    "#print(chembl.target[0]['target_components'][0]['accession'] )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# step 2: we search chembl.target list for items that have our \n",
    "# uniprot code in chembl.target[]['target_components'][0]['accession'] dictionary\n",
    "#ABL_kinase_uniprotID =  \"P42684\"\n",
    "ABL_kinase_uniprotID = \"P00519\"\n",
    "print(ABL_kinase_uniprotID)\n",
    "chembl_abl_candidates = []\n",
    "errorcount = 0\n",
    "for i in range(len(chembl.target)):\n",
    "    try:\n",
    "        if chembl.target[i]['target_components'][0]['accession'] ==  ABL_kinase_uniprotID:\n",
    "            chembl_abl_candidates.append(chembl.target[i])\n",
    "    except:\n",
    "        errorcount += 1\n",
    "print(len(chembl_abl_candidates))    \n",
    "print(chembl_abl_candidates[0]['target_components'][0]['component_description'],\n",
    "      chembl_abl_candidates[0]['target_chembl_id'] )\n",
    "\n",
    "print(\"for \" + str(errorcount) + \" targets no accession id was found\")        \n",
    "          "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# bingo, the component maescription matches, we have found our CHEMBL id\n",
    "# this code snippet does a similar job, but returns the CHEMBL id for the ABL family of enzymes as well\n",
    "chembl_abl_candidates_alt = chembl.target.filter(target_components__accession=ABL_kinase_uniprotID)\n",
    "print ([(x['target_chembl_id'], x['pref_name']) for x in chembl_abl_candidates_alt])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(chembl.activity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Let's get all measurement for a target\n",
    "chembl_abl_activities = chembl.activity.filter(target_chembl_id='CHEMBL1862')\n",
    "print(len(list(chembl_abl_activities)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import xmltodict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# next, we are retrieving details from the assay the activity was measured in\n",
    "# we keep binding and functional assays\n",
    "# compounds of 10nM or better are considered as actives\n",
    "# compounds of 10mycroM or worse are considered as inactives\n",
    "compounds = CompoundResource()\n",
    "chembl.assay.set_format('xml')\n",
    "selected_actives = []\n",
    "selected_decoys = []\n",
    "for i in range(len(chembl_abl_activities)):\n",
    "    x = chembl_abl_activities[i]\n",
    "    ID = (x['assay_chembl_id'])\n",
    "    details = chembl.assay.get(ID)\n",
    "    d=xmltodict.parse(details)\n",
    "    if(int(d['assay']['confidence_score']) == 9):\n",
    "        if (d['assay']['assay_type'] in ['B','F']):\n",
    "            if str(x['standard_units']) == 'nM':\n",
    "                if float(x['standard_value']) < 10:\n",
    "                    selected_actives.append(x['canonical_smiles'])\n",
    "                elif float(x['standard_value']) > 10000:\n",
    "                    selected_decoys.append(x['canonical_smiles'])\n",
    "#selected_actives = set(selected_actives)            \n",
    "print (len(selected_actives), len(selected_decoys)) \n",
    "selected_actives = set(selected_actives)\n",
    "known_drugs = []\n",
    "with open(\"ABL_smi\",\"w\") as fh:\n",
    "    count = 0\n",
    "    for e in selected_actives:\n",
    "        print(e)\n",
    "        c = compounds.get(smiles=e)\n",
    "        print(c[0]['molecularFormula'],c[0]['knownDrug'])\n",
    "        if c[0]['knownDrug'] == 'Yes':\n",
    "            known_drugs.append((e,c))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(len(known_drugs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import rdkit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from rdkit import Chem\n",
    "from rdkit.Chem import AllChem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for m in known_drugs:\n",
    "    print(m[0],m[1][0]['chemblId'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "known_drug_smiles = []\n",
    "known_drug_smiles = []\n",
    "\n",
    "for m in known_drugs:\n",
    "    print(\"HA\")\n",
    "    print(len(chembl.drug))\n",
    "    for d in range(len(chembl.drug)):\n",
    "        # print(d)\n",
    "        # print(chembl.drug[d]['molecule_chembl_id'])\n",
    "        # print(m[1][0]['chemblId'])\n",
    "        if chembl.drug[d]['molecule_chembl_id'] == m[1][0]['chemblId']:\n",
    "            print(chembl.drug[d]['molecule_synonyms'][1]['molecule_synonym'])\n",
    "            print(m[0])\n",
    "            known_drug_smiles.append(m[0])\n",
    "            known_drug_names.append(chembl.drug[d]['molecule_synonyms'][1]['molecule_synonym'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from rdkit.Chem import Draw\n",
    "mols = []\n",
    "print(len(known_drug_smiles))\n",
    "for d in known_drug_smiles:\n",
    "    print(d)\n",
    "    m = Chem.MolFromSmiles(d)\n",
    "    AllChem.Compute2DCoords(m)\n",
    "    mols.append(m)\n",
    "print(len(mols))    \n",
    "img = Draw.MolsToGridImage(mols, molsPerRow=3,legends=known_drug_names) \n",
    "img.save(\"ABL-drugs.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}