Doh,
adding the hydrogens fixes it.
> CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()).addImplicitHydrogens(mdlDB00197);
> CDKHydrogenAdder.getInstance(SilentChemObjectBuilder.getInstance()).addImplicitHydrogens(smiDB00197);
On 22 May 2013, at 15:13, John May <[email protected]> wrote:
> Hi Gauri,
>
> Try using one of the other fingerprinters - the issue does need further
> investigation but is likely something messy underneath. If you need a
> substructure based one, PubChem is probably the best bet.
>
> Here are the results for your molecules, the atom types and aromaticity set.
>
> SubstructureFingerprinter
> MDL: {17, 87, 95, 142, 273, 274, 294, 301, 306}
> Smiles: {0, 1, 17, 87, 95, 142, 168, 273, 274, 294, 299, 300, 301, 306}
>
> EStateFingerprinter
> MDL: {15, 16, 18, 34, 35, 49}
> Smiles: {6, 8, 11, 12, 15, 16, 18, 23, 33, 34, 35, 49}
>
> MACCSFingerprinter
> MDL: {21, 35, 46, 56, 65, 71, 80, 82, 87, 88, 91, 95, 97, 104, 105, 109,
> 111, 112, 116, 119, 120, 124, 125, 126, 135, 136, 139, 142, 143, 144, 145,
> 149, 151, 153, 155, 156, 157, 158, 160, 161, 162, 163, 164}
> Smiles: {21, 35, 46, 56, 65, 71, 80, 82, 87, 88, 89, 90, 91, 95, 97, 104,
> 105, 107, 108, 109, 111, 112, 114, 115, 116, 117, 119, 120, 124, 125, 126,
> 128, 130, 131, 135, 136, 138, 139, 140, 142, 143, 144, 145, 146, 148, 149,
> 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164}
>
> KlekotaRothFingerprinter
> MDL: {1153, 1192, 2065, 2546, 2854, 2881, 2974, 3024, 3057, 3223, 3294,
> 3359, 3368, 3398, 3407, 3435, 3454, 3527, 3528, 3559, 3573, 3590, 3595, 3605,
> 3639, 3646, 3647, 3658, 3670, 3680, 3681, 3691, 3702, 3703, 3709, 3736, 3739,
> 3741, 3743, 3749, 3772, 3780, 3787, 3808, 3815, 3881, 3925, 3927, 3941, 3955,
> 3999, 4018, 4079, 4191, 4207, 4236, 4295, 4330, 4520, 4727, 4735, 4828, 4842,
> 4851, 4855}
> Smiles: {0, 19, 296, 297, 301, 302, 340, 581, 591, 646, 668, 676, 838, 839,
> 1147, 1153, 1155, 1192, 1641, 2065, 2546, 2710, 2854, 2855, 2881, 2948, 2974,
> 3024, 3057, 3223, 3294, 3359, 3368, 3398, 3407, 3435, 3454, 3527, 3528, 3559,
> 3573, 3590, 3595, 3605, 3639, 3646, 3647, 3658, 3670, 3680, 3681, 3691, 3702,
> 3703, 3709, 3736, 3739, 3741, 3743, 3749, 3772, 3780, 3787, 3808, 3815, 3881,
> 3925, 3927, 3941, 3955, 3999, 4018, 4079, 4191, 4207, 4236, 4295, 4330, 4520,
> 4727, 4735, 4828, 4842, 4851, 4855}
>
> PubchemFingerprinter
> MDL: {9, 10, 11, 12, 14, 18, 19, 20, 33, 143, 145, 146, 178, 179, 184,
> 185, 186, 192, 255, 257, 284, 285, 286, 293, 332, 333, 337, 341, 351, 352,
> 353, 355, 356, 381, 382, 384, 390, 405, 412, 416, 420, 430, 439, 441, 443,
> 451, 470, 476, 489, 490, 498, 507, 520, 524, 535, 541, 548, 552, 556, 564,
> 565, 567, 570, 573, 574, 578, 579, 582, 584, 586, 589, 592, 594, 595, 603,
> 604, 606, 608, 613, 614, 618, 619, 620, 626, 632, 634, 637, 640, 641, 645,
> 650, 651, 655, 660, 662, 664, 666, 668, 677, 678, 679, 680, 681, 683, 684,
> 688, 689, 692, 696, 697, 698, 699, 704, 708, 709, 710, 712, 713, 714, 719,
> 734, 735, 755, 756, 776, 777, 782, 797, 798, 818, 819}
> Smiles: {9, 10, 11, 12, 14, 18, 19, 20, 33, 143, 145, 146, 178, 179, 184,
> 185, 186, 192, 255, 257, 283, 284, 285, 286, 293, 299, 308, 332, 333, 337,
> 341, 344, 346, 349, 351, 352, 353, 355, 356, 366, 368, 370, 371, 374, 381,
> 382, 384, 390, 392, 393, 405, 406, 412, 416, 420, 430, 434, 439, 441, 443,
> 446, 451, 470, 476, 489, 490, 498, 507, 516, 520, 524, 535, 541, 542, 548,
> 552, 556, 564, 565, 567, 570, 573, 574, 578, 579, 582, 584, 586, 589, 590,
> 592, 594, 595, 599, 603, 604, 606, 608, 613, 614, 617, 618, 619, 620, 626,
> 632, 634, 637, 640, 641, 643, 645, 650, 651, 655, 660, 662, 664, 666, 667,
> 668, 677, 678, 679, 680, 681, 683, 684, 688, 689, 692, 696, 697, 698, 699,
> 704, 708, 709, 710, 712, 713, 714, 719, 734, 735, 755, 756, 776, 777, 782,
> 797, 798, 818, 819}
>
> Fingerprinter
> MDL: {0, 4, 17, 21, 25, 34, 45, 48, 50, 52, 54, 57, 65, 66, 70, 76, 77,
> 81, 83, 84, 86, 88, 89, 95, 102, 103, 106, 107, 108, 111, 112, 113, 117, 120,
> 122, 125, 128, 136, 138, 143, 148, 149, 158, 159, 161, 162, 178, 184, 185,
> 188, 191, 197, 205, 213, 215, 222, 233, 235, 236, 238, 239, 245, 251, 253,
> 259, 262, 270, 271, 273, 275, 278, 283, 285, 290, 295, 300, 311, 319, 320,
> 324, 326, 333, 334, 335, 344, 349, 351, 358, 359, 363, 366, 368, 372, 374,
> 376, 382, 388, 391, 396, 397, 406, 409, 413, 414, 418, 419, 422, 425, 429,
> 430, 434, 440, 442, 443, 448, 454, 465, 470, 473, 474, 482, 491, 492, 496,
> 497, 498, 500, 508, 511, 513, 514, 519, 520, 522, 533, 534, 542, 543, 546,
> 547, 549, 550, 554, 557, 566, 570, 574, 580, 587, 590, 592, 594, 599, 600,
> 601, 613, 617, 621, 625, 626, 634, 636, 637, 642, 643, 644, 648, 656, 657,
> 661, 670, 675, 683, 693, 699, 701, 703, 706, 711, 736, 741, 742, 744, 745,
> 748, 752, 755, 763, 766, 768, 770, 771, 774, 793, 800, 808, 809, 810, 829,
> 830, 831, 839, 840, 855, 867, 872, 883, 887, 898, 907, 909, 912, 916, 917,
> 919, 929, 932, 937, 941, 943, 947, 953, 967, 971, 977, 980, 985, 990, 999,
> 1018}
> Smiles: {0, 4, 17, 21, 25, 34, 45, 48, 50, 52, 54, 57, 65, 66, 70, 76, 77,
> 81, 83, 84, 86, 88, 89, 95, 102, 103, 106, 107, 108, 111, 112, 113, 117, 120,
> 122, 125, 128, 136, 138, 143, 148, 149, 158, 159, 161, 162, 178, 184, 185,
> 188, 191, 197, 205, 213, 215, 222, 233, 235, 236, 238, 239, 245, 251, 253,
> 259, 262, 270, 271, 273, 275, 278, 283, 285, 290, 295, 300, 311, 319, 320,
> 324, 326, 333, 334, 335, 344, 349, 351, 358, 359, 363, 366, 368, 372, 374,
> 376, 382, 388, 391, 396, 397, 406, 409, 413, 414, 418, 419, 422, 425, 429,
> 430, 434, 440, 442, 443, 448, 454, 465, 470, 473, 474, 482, 491, 492, 496,
> 497, 498, 500, 508, 511, 513, 514, 519, 520, 522, 533, 534, 542, 543, 546,
> 547, 549, 550, 554, 557, 566, 570, 574, 580, 587, 590, 592, 594, 599, 600,
> 601, 613, 617, 621, 625, 626, 634, 636, 637, 642, 643, 644, 648, 656, 657,
> 661, 670, 675, 683, 693, 699, 701, 703, 706, 711, 736, 741, 742, 744, 745,
> 748, 752, 755, 763, 766, 768, 770, 771, 774, 793, 800, 808, 809, 810, 829,
> 830, 831, 839, 840, 855, 867, 872, 883, 887, 898, 907, 909, 912, 916, 917,
> 919, 929, 932, 937, 941, 943, 947, 953, 967, 971, 977, 980, 985, 990, 999,
> 1018}
>
> ExtendedFingerprinter
> MDL: {0, 8, 13, 14, 16, 18, 19, 22, 25, 28, 30, 32, 38, 41, 53, 57, 58,
> 61, 62, 65, 68, 74, 75, 77, 78, 85, 88, 93, 112, 118, 122, 127, 135, 136,
> 140, 142, 143, 144, 146, 152, 154, 156, 160, 164, 166, 170, 173, 175, 177,
> 181, 183, 184, 186, 187, 190, 195, 197, 206, 227, 231, 236, 241, 243, 246,
> 247, 250, 251, 259, 261, 266, 271, 272, 282, 292, 293, 302, 306, 313, 314,
> 319, 325, 328, 330, 339, 340, 349, 350, 351, 373, 375, 394, 396, 399, 400,
> 408, 413, 417, 418, 421, 423, 438, 441, 446, 450, 453, 457, 459, 467, 468,
> 470, 473, 477, 481, 486, 489, 494, 499, 506, 510, 516, 519, 520, 525, 538,
> 541, 543, 544, 560, 571, 578, 581, 601, 602, 605, 606, 612, 615, 616, 622,
> 628, 629, 632, 648, 651, 654, 656, 657, 660, 662, 666, 671, 678, 681, 682,
> 688, 694, 695, 697, 700, 712, 718, 721, 727, 741, 745, 751, 752, 759, 761,
> 762, 772, 773, 778, 779, 781, 782, 783, 786, 787, 796, 802, 803, 810, 817,
> 826, 840, 844, 849, 857, 859, 861, 864, 868, 877, 882, 888, 892, 900, 901,
> 902, 904, 906, 909, 911, 918, 921, 923, 926, 932, 935, 941, 947, 949, 953,
> 954, 955, 962, 963, 966, 969, 972, 976, 979, 987, 989, 995, 997, 1009, 1010,
> 1011, 1012, 1013, 1014}
> Smiles: {0, 8, 13, 14, 16, 18, 19, 22, 25, 28, 30, 32, 38, 41, 53, 57, 58,
> 61, 62, 65, 68, 74, 75, 77, 78, 85, 88, 93, 112, 118, 122, 127, 135, 136,
> 140, 142, 143, 144, 146, 152, 154, 156, 160, 164, 166, 170, 173, 175, 177,
> 181, 183, 184, 186, 187, 190, 195, 197, 206, 227, 231, 236, 241, 243, 246,
> 247, 250, 251, 259, 261, 266, 271, 272, 282, 292, 293, 302, 306, 313, 314,
> 319, 325, 328, 330, 339, 340, 349, 350, 351, 373, 375, 394, 396, 399, 400,
> 408, 413, 417, 418, 421, 423, 438, 441, 446, 450, 453, 457, 459, 467, 468,
> 470, 473, 477, 481, 486, 489, 494, 499, 506, 510, 516, 519, 520, 525, 538,
> 541, 543, 544, 560, 571, 578, 581, 601, 602, 605, 606, 612, 615, 616, 622,
> 628, 629, 632, 648, 651, 654, 656, 657, 660, 662, 666, 671, 678, 681, 682,
> 688, 694, 695, 697, 700, 712, 718, 721, 727, 741, 745, 751, 752, 759, 761,
> 762, 772, 773, 778, 779, 781, 782, 783, 786, 787, 796, 802, 803, 810, 817,
> 826, 840, 844, 849, 857, 859, 861, 864, 868, 877, 882, 888, 892, 900, 901,
> 902, 904, 906, 909, 911, 918, 921, 923, 926, 932, 935, 941, 947, 949, 953,
> 954, 955, 962, 963, 966, 969, 972, 976, 979, 987, 989, 995, 997, 1009, 1010,
> 1011, 1012, 1013, 1014}
>
> HybridizationFingerprinter
> MDL: {9, 11, 13, 17, 18, 23, 25, 26, 27, 28, 29, 30, 34, 35, 37, 39, 40,
> 42, 43, 45, 48, 57, 58, 62, 63, 70, 71, 72, 73, 74, 75, 77, 81, 83, 86, 88,
> 89, 90, 93, 94, 95, 96, 98, 100, 102, 103, 104, 109, 115, 119, 125, 129, 130,
> 131, 133, 134, 138, 148, 151, 152, 154, 155, 158, 161, 163, 164, 165, 167,
> 168, 178, 182, 183, 184, 191, 192, 195, 196, 197, 198, 199, 200, 203, 205,
> 208, 211, 214, 216, 218, 220, 224, 225, 229, 230, 232, 240, 253, 255, 256,
> 257, 260, 263, 264, 265, 267, 270, 275, 278, 282, 283, 290, 293, 295, 296,
> 299, 300, 303, 304, 311, 319, 320, 321, 322, 331, 336, 337, 341, 342, 346,
> 350, 354, 355, 356, 363, 364, 365, 368, 370, 372, 373, 377, 382, 385, 386,
> 387, 390, 395, 401, 402, 403, 404, 405, 411, 413, 415, 416, 418, 419, 421,
> 423, 424, 427, 430, 431, 437, 438, 440, 441, 446, 447, 448, 450, 452, 458,
> 459, 462, 464, 467, 471, 479, 482, 483, 487, 488, 497, 498, 499, 500, 502,
> 506, 508, 510, 513, 518, 519, 522, 523, 525, 526, 527, 528, 529, 535, 537,
> 538, 539, 546, 547, 548, 550, 552, 553, 554, 556, 558, 559, 562, 563, 567,
> 569, 573, 576, 579, 582, 584, 587, 588, 593, 594, 600, 605, 606, 607, 613,
> 615, 619, 621, 625, 626, 632, 638, 639, 644, 648, 652, 653, 655, 657, 661,
> 662, 663, 666, 667, 672, 673, 674, 675, 681, 686, 688, 692, 695, 703, 705,
> 708, 709, 712, 717, 719, 721, 723, 724, 727, 730, 731, 732, 734, 738, 739,
> 741, 744, 747, 748, 749, 750, 753, 754, 756, 760, 768, 771, 772, 775, 776,
> 778, 782, 790, 798, 800, 806, 807, 809, 813, 815, 816, 824, 831, 832, 833,
> 834, 835, 837, 838, 840, 843, 847, 851, 852, 855, 856, 857, 860, 861, 870,
> 871, 872, 873, 874, 879, 885, 887, 888, 891, 893, 894, 895, 899, 903, 906,
> 907, 909, 912, 913, 915, 918, 919, 923, 927, 929, 932, 934, 938, 941, 943,
> 949, 955, 959, 963, 971, 975, 979, 983, 985, 988, 992, 995, 996, 999, 1000,
> 1001, 1002, 1003, 1004, 1010, 1021, 1022}
> Smiles: {9, 11, 13, 17, 18, 23, 25, 26, 27, 28, 29, 30, 34, 35, 37, 39, 40,
> 42, 43, 45, 48, 57, 58, 62, 63, 70, 71, 72, 73, 74, 75, 77, 81, 83, 86, 88,
> 89, 90, 93, 94, 95, 96, 98, 100, 102, 103, 104, 109, 115, 119, 125, 129, 130,
> 131, 133, 134, 138, 148, 151, 152, 154, 155, 158, 161, 163, 164, 165, 167,
> 168, 178, 182, 183, 184, 191, 192, 195, 196, 197, 198, 199, 200, 203, 205,
> 208, 211, 214, 216, 218, 220, 224, 225, 229, 230, 232, 240, 253, 255, 256,
> 257, 260, 263, 264, 265, 267, 270, 275, 278, 282, 283, 290, 293, 295, 296,
> 299, 300, 303, 304, 311, 319, 320, 321, 322, 331, 336, 337, 341, 342, 346,
> 350, 354, 355, 356, 363, 364, 365, 368, 370, 372, 373, 377, 382, 385, 386,
> 387, 390, 395, 401, 402, 403, 404, 405, 411, 413, 415, 416, 418, 419, 421,
> 423, 424, 427, 430, 431, 437, 438, 440, 441, 446, 447, 448, 450, 452, 458,
> 459, 462, 464, 467, 471, 479, 482, 483, 487, 488, 497, 498, 499, 500, 502,
> 506, 508, 510, 513, 518, 519, 522, 523, 525, 526, 527, 528, 529, 535, 537,
> 538, 539, 546, 547, 548, 550, 552, 553, 554, 556, 558, 559, 562, 563, 567,
> 569, 573, 576, 579, 582, 584, 587, 588, 593, 594, 600, 605, 606, 607, 613,
> 615, 619, 621, 625, 626, 632, 638, 639, 644, 648, 652, 653, 655, 657, 661,
> 662, 663, 666, 667, 672, 673, 674, 675, 681, 686, 688, 692, 695, 703, 705,
> 708, 709, 712, 717, 719, 721, 723, 724, 727, 730, 731, 732, 734, 738, 739,
> 741, 744, 747, 748, 749, 750, 753, 754, 756, 760, 768, 771, 772, 775, 776,
> 778, 782, 790, 798, 800, 806, 807, 809, 813, 815, 816, 824, 831, 832, 833,
> 834, 835, 837, 838, 840, 843, 847, 851, 852, 855, 856, 857, 860, 861, 870,
> 871, 872, 873, 874, 879, 885, 887, 888, 891, 893, 894, 895, 899, 903, 906,
> 907, 909, 912, 913, 915, 918, 919, 923, 927, 929, 932, 934, 938, 941, 943,
> 949, 955, 959, 963, 971, 975, 979, 983, 985, 988, 992, 995, 996, 999, 1000,
> 1001, 1002, 1003, 1004, 1010, 1021, 1022}
>
> GraphOnlyFingerprinter
> MDL: {2, 30, 40, 44, 45, 54, 58, 95, 113, 119, 130, 134, 156, 162, 166,
> 168, 174, 185, 219, 223, 227, 242, 244, 251, 254, 255, 263, 267, 285, 286,
> 288, 293, 295, 297, 302, 308, 314, 315, 316, 317, 318, 320, 333, 338, 347,
> 353, 356, 357, 362, 366, 376, 381, 417, 421, 450, 454, 455, 457, 461, 475,
> 476, 484, 487, 497, 503, 513, 519, 521, 531, 541, 543, 546, 551, 553, 556,
> 559, 562, 575, 585, 587, 595, 604, 608, 634, 640, 641, 661, 677, 685, 688,
> 706, 716, 722, 733, 737, 741, 742, 744, 755, 759, 763, 774, 780, 784, 788,
> 790, 822, 851, 866, 883, 885, 891, 893, 895, 897, 898, 907, 913, 915, 921,
> 927, 961, 962, 964, 966, 969, 976, 1002}
> Smiles: {2, 30, 40, 44, 45, 54, 58, 95, 113, 119, 130, 134, 156, 162, 166,
> 168, 174, 185, 219, 223, 227, 242, 244, 251, 254, 255, 263, 267, 285, 286,
> 288, 293, 295, 297, 302, 308, 314, 315, 316, 317, 318, 320, 333, 338, 347,
> 353, 356, 357, 362, 366, 376, 381, 417, 421, 450, 454, 455, 457, 461, 475,
> 476, 484, 487, 497, 503, 513, 519, 521, 531, 541, 543, 546, 551, 553, 556,
> 559, 562, 575, 585, 587, 595, 604, 608, 634, 640, 641, 661, 677, 685, 688,
> 706, 716, 722, 733, 737, 741, 742, 744, 755, 759, 763, 774, 780, 784, 788,
> 790, 822, 851, 866, 883, 885, 891, 893, 895, 897, 898, 907, 913, 915, 921,
> 927, 961, 962, 964, 966, 969, 976, 1002}
>
> On 22 May 2013, at 13:45, Gauri S <[email protected]> wrote:
>
>>
>> I have used SmilesParser to parse through the smile and generate the
>> fingerprint , it prints
>> fingerprints of query:{0, 1, 17, 87, 95, 142, 168, 273, 274, 294, 299, 300,
>> 301, 306}
>>
>> when i used sdf file of same molecule and generated fingerprint , it prints
>> bitsetarray: [{17, 87, 95, 142, 273, 274, 294, 301, 306}]
>>
>> even if it same molecule , still it does not consider 0,1,168,299,300 bits
>>
>> So, can anyone please tell me why is this difference and which method is
>> suitable to get the results properly?
>>
>> this is my small part of the code
>>
>> ArrayList<IMolecule> molList= new ArrayList< IMolecule >();
>> ArrayList<BitSet> bitsetarray= new ArrayList< BitSet >();
>> ArrayList<BitSet> bitsetarray1= new ArrayList< BitSet >();
>> ArrayList<String> molidarray= new ArrayList< String >();
>> ArrayList<String> molidarray1= new ArrayList< String >();
>> //ArrayList<String> molidarray2= new ArrayList< String >();
>> IMolecule molecule = null;
>> String query =
>> "CC1=C(C)C2=C(CCC(C)(COC3=CC=C(CC4SC(=O)NC4=O)C=C3)O2)C(C)=C1O";
>> SmilesParser sp = new
>> SmilesParser(DefaultChemObjectBuilder.getInstance());
>>
>> IAtomContainer mol1 = sp.parseSmiles(query);
>> mol1 = new AtomContainer(mol1);
>> BitSet fingerprint1 = fprinter.getFingerprint(mol1);
>> System.out.println("fingerprints of query:"+fingerprint1);
>>
>>
>> File sdfFile = new File("D:/gauri/cdk/Vasodilator/DB00197.sdf");
>>
>> IteratingMDLReader reader = new IteratingMDLReader(
>> new FileInputStream(sdfFile),
>> DefaultIChemObjectBuilder.getInstance());
>>
>> System.out.println("Reading the file...");
>> while (reader.hasNext()) {
>> molecule = (IMolecule)reader.next();
>> molList.add(molecule);
>> fingerprint = fprinter.getFingerprint(molecule);
>> // fprinter.getSize(); // returns 881
>> //fingerprint.length(); // returns the highest set bit
>> bitsetarray.add(fingerprint);
>>
>>
>> molidarray.add(molecule.getProperty("DRUGBANK_ID").toString());
>> //
>> molidarray2.add(molecule.getProperty("SMILES").toString());
>>
>> }
>> --
>> View this message in context:
>> http://old.nabble.com/fingerprints-generated-differently-for-same-molecules-using-different-methods---smileparser-and-reading-the-sdf-file-using-IteratingMDLReader-tp35424370p35424370.html
>> Sent from the cdk-user mailing list archive at Nabble.com.
>>
>>
>> ------------------------------------------------------------------------------
>> Try New Relic Now & We'll Send You this Cool Shirt
>> New Relic is the only SaaS-based application performance monitoring service
>> that delivers powerful full stack analytics. Optimize and monitor your
>> browser, app, & servers with just a few lines of code. Try New Relic
>> and get this awesome Nerd Life shirt! http://p.sf.net/sfu/newrelic_d2d_may
>> _______________________________________________
>> Cdk-user mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/cdk-user
>
------------------------------------------------------------------------------
Try New Relic Now & We'll Send You this Cool Shirt
New Relic is the only SaaS-based application performance monitoring service
that delivers powerful full stack analytics. Optimize and monitor your
browser, app, & servers with just a few lines of code. Try New Relic
and get this awesome Nerd Life shirt! http://p.sf.net/sfu/newrelic_d2d_may
_______________________________________________
Cdk-user mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/cdk-user