From a7794e7da94ac8967095dbc79743a7c6b1760160 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 13 May 2024 10:42:01 +0200
Subject: [PATCH 01/30] Started gaf2aln dev

---
 gaf2aln.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 gaf2aln.py

diff --git a/gaf2aln.py b/gaf2aln.py
new file mode 100644
index 0000000..fea7d30
--- /dev/null
+++ b/gaf2aln.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+gaf2aln
+Convert gaf alignement to sam or paf
+
+@author: alexis.mergez@inrae.fr
+@version: 0.1
+"""
+
+import numpy as np
+import pandas as pd
+import argparse
+import os
+
+version = "0.1"
+
+## Argument parser
+arg_parser = argparse.ArgumentParser(description='GFAvc: GFA version converter')
+arg_parser.add_argument(
+    "--gfa",
+    "-g",
+    dest = "gfa",
+    required = True,
+    help = "Graph (.gfa v1)"
+    ) 
+arg_parser.add_argument(
+    "--gaf",
+    "-a",
+    dest = "gaf",
+    required = True,
+    help = "Alignement file (.gaf)"
+    )  
+arg_parser.add_argument(
+    "--format",
+    "-f",
+    dest = "format",
+    default = "P",
+    help = "Output file format. (S: sam, P: paf (default))"
+    )     
+arg_parser.add_argument(
+    '--version',
+    '-v',
+    action="store_true",
+    dest = "version",
+    help = "Show version"
+)
+args = arg_parser.parse_args()
+
+# Printing version
+if args.version:
+    print(version)
+    os._exit(0)
+
+# Parsing the .gaf file
+with open(args.gaf, 'r') as file:
+    gaf_lines = file.readlines()
+
+gaf_col = [
+    "QRY.NAME", "QRY.LEN", "QRY.START", "QRY.END", "STRAND", 
+    "PATH.MATCH", "PATH.LEN", "ALN.START", "ALN.END",
+    "RES.MATCH", "ALN.BLOCK.LEN", "MAPPING.QUAL"
+    ]
+
+# Creating dictionnary to store alignments
+aln_dict = {}
+for line in range(len(gaf_lines)):
+    ## Splitting the line by tabulation
+    line_content = gaf_lines[line][:-1].split('\t')
+
+    ## Adding alignement info to dictionnary
+    aln_dict[f"ALN_{line+1}"] = {
+        gaf_col[i]: line_content[i] for i in range(len(gaf_col))
+    }
+
+    ## Splitting "PATH.MATCH" into a list
+    aln_dict[f"ALN_{line+1}"]["PATH.MATCH"] = [
+        int(node_id) for node_id in aln_dict[f"ALN_{line+1}"]["PATH.MATCH"].split(">")[1:]
+        ]
+
+    ## Adding tags
+    aln_dict[f"ALN_{line+1}"]["TAGS"] = ",".join(line_content[13:])
+
+# Getting nodes of interest ids
+
+# Debug
+print(aln_dict)
+
+
+
+# Parsing the .gaf
+with open(args.gfa, 'r') as file:
+    gfa_lines = file.readlines()
+
-- 
GitLab


From 0658a3f77518ff5b4c49b461a04d910a3aab673d Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Mon, 13 May 2024 12:44:32 +0200
Subject: [PATCH 02/30] Update gaf2aln.py

---
 gaf2aln.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index fea7d30..ced7e7f 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -82,13 +82,75 @@ for line in range(len(gaf_lines)):
     aln_dict[f"ALN_{line+1}"]["TAGS"] = ",".join(line_content[13:])
 
 # Getting nodes of interest ids
+aln_nodes = np.unique([
+    node_id 
+    for aln in aln_dict.keys() 
+    for node_id in aln_dict[aln]["PATH.MATCH"]
+])
+print(aln_nodes)
 
 # Debug
 print(aln_dict)
 
-
-
-# Parsing the .gaf
+# Parsing the .gfa
 with open(args.gfa, 'r') as file:
     gfa_lines = file.readlines()
 
+# Nodes length dictionnary structured as follow :
+# {<NODE.ID>: <NODE.LENGTH>}
+nodes_length = {}
+# Nodes dictionnary structured as follow :
+# {<ALN.NODE.ID> : {PATHS: {<PATH.NAME>: (start, end)}}}
+nodes = {}
+# Paths dictionnary structured as follow :
+# {<PATH.NAME>: {NODES: {NODE.ID: <NODE.ID>, ORIENT: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}
+paths = {}
+# Links dictionnary structured as follow : 
+# {<FROM.NODE.ID>: {<TO.NODE.ID>: {FROM.ORIENT: <FROM.ORIENT>, TO.ORIENT: <TO.ORIENT>}}}
+links = {}
+
+for line in gfa_lines:
+    line_content = line[:-1].split("\t")
+    line_id = line_content[0]
+    
+    # Segment line
+    if line_id == "S" :
+        
+        nodes_length[str(line_content[1])] = len(line_content[2])
+
+        if line_content[1] in aln_nodes:
+            nodes[str(line_content[1])] = {}
+    
+    # Link line
+    elif line_id == "L":
+        try :
+            links[str(line_content[1])][str(line_content[3])] = {
+                "FROM": str(line_content[2]), 
+                "TO": str(line_content[4])
+            }
+
+        except :
+            links[str(line_content[1])] = {
+                [str(line_content[3])] : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
+            }
+
+    # Path line
+    elif line_id == "P":
+        paths[str(line_content[1])] = {
+            "NODES": [
+                {"NODE.ID": str(node_id[:-1]), "ORIENT": str(node_id[-1])}
+                for node_id in line_content[2].split(',')
+            ],
+            "CIGAR": line_content[3]
+        }
+
+# Getting the start and end position of alignment nodes on each paths
+def getPathPos(node_id, nodes=nodes, nodes_length=nodes_length, links=links, paths=paths)
+
+for node_id in nodes.keys():
+    for path_id in paths.keys():
+        if 
+
+    
+        
+
-- 
GitLab


From 81bae737dd5a53d0bf976f78dcd532cb2db1a439 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Tue, 14 May 2024 10:19:15 +0200
Subject: [PATCH 03/30] Update gaf2aln.py

---
 gaf2aln.py | 141 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 107 insertions(+), 34 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index ced7e7f..823a51d 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -11,6 +11,7 @@ Convert gaf alignement to sam or paf
 import numpy as np
 import pandas as pd
 import argparse
+import concurrent.futures
 import os
 
 version = "0.1"
@@ -38,6 +39,15 @@ arg_parser.add_argument(
     default = "P",
     help = "Output file format. (S: sam, P: paf (default))"
     )     
+arg_parser.add_argument(
+    "--threads",
+    "-t",
+    dest = "threads",
+    required = False,
+    default = 1,
+    type = int,
+    help = "Number of threads"
+    )
 arg_parser.add_argument(
     '--version',
     '-v',
@@ -103,53 +113,116 @@ nodes_length = {}
 # {<ALN.NODE.ID> : {PATHS: {<PATH.NAME>: (start, end)}}}
 nodes = {}
 # Paths dictionnary structured as follow :
-# {<PATH.NAME>: {NODES: {NODE.ID: <NODE.ID>, ORIENT: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}
+# {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}
 paths = {}
 # Links dictionnary structured as follow : 
 # {<FROM.NODE.ID>: {<TO.NODE.ID>: {FROM.ORIENT: <FROM.ORIENT>, TO.ORIENT: <TO.ORIENT>}}}
 links = {}
 
-for line in gfa_lines:
-    line_content = line[:-1].split("\t")
-    line_id = line_content[0]
-    
-    # Segment line
-    if line_id == "S" :
+# Parsing the gfa
+## Multithreading function
+def parse_gfa_line(gfa_lines, aln_nodes=aln_nodes):
+    _nodes, _nodes_length, _links, _paths = {}, {}, {}, {}
+    for line in gfa_lines:
+        line_content = line[:-1].split("\t")
+        line_id = line_content[0]
         
-        nodes_length[str(line_content[1])] = len(line_content[2])
+        # Segment line
+        if line_id == "S" :
+            
+            _nodes_length[str(line_content[1])] = len(line_content[2])
 
-        if line_content[1] in aln_nodes:
-            nodes[str(line_content[1])] = {}
-    
-    # Link line
-    elif line_id == "L":
-        try :
-            links[str(line_content[1])][str(line_content[3])] = {
-                "FROM": str(line_content[2]), 
-                "TO": str(line_content[4])
+            if line_content[1] in aln_nodes:
+                _nodes[str(line_content[1])] = {"PATHS": {}}
+        
+        # Link line
+        elif line_id == "L":
+            try :
+                _links[str(line_content[1])][str(line_content[3])] = {
+                    "FROM": str(line_content[2]), 
+                    "TO": str(line_content[4])
+                }
+
+            except :
+                _links[str(line_content[1])] = {
+                    str(line_content[3]) : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
+                }
+
+        # Path line
+        elif line_id == "P":
+            _paths[str(line_content[1])] = {
+                "NODES": {
+                    str(node_id[:-1]): str(node_id[-1])
+                    for node_id in line_content[2].split(',')
+                },
+                "CIGAR": line_content[3]
             }
 
-        except :
-            links[str(line_content[1])] = {
-                [str(line_content[3])] : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
-            }
+    return [_nodes, _nodes_length, _links, _paths]
+
+## Parsing subsets
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
+res = {}
+quantiles = np.array(range(args.threads+1))/args.threads
+splits = np.quantile(range(len(gfa_lines)), quantiles, method="higher").tolist()
+splits[-1] += 1
+for i in range(len(splits)-1):
+    res[i] = executor.submit(parse_gfa_line, gfa_lines[splits[i]:splits[i+1]])
+executor.shutdown(wait=True)
+
+## Aggregating results
+for _res in res.values():
+    _nodes, _nodes_length, _links, _paths = _res.result()
+    
+    for key, value in _nodes.items(): 
+        nodes[key] = value
+    
+    for key, value in _nodes_length.items():
+        nodes_length[key] = value
 
-    # Path line
-    elif line_id == "P":
-        paths[str(line_content[1])] = {
-            "NODES": [
-                {"NODE.ID": str(node_id[:-1]), "ORIENT": str(node_id[-1])}
-                for node_id in line_content[2].split(',')
-            ],
-            "CIGAR": line_content[3]
-        }
+    for key, value in _links.items():
+        if not key in links.keys():
+            links[key] = value
+        else :
+            for key_sub, value_sub in value.items():
+                links[key][key_sub] = value_sub
+
+    for key, value in _paths.items():
+        paths[key] = value
 
 # Getting the start and end position of alignment nodes on each paths
-def getPathPos(node_id, nodes=nodes, nodes_length=nodes_length, links=links, paths=paths)
+def getPathPos(path_id, nodes=nodes, nodes_length=nodes_length, links=links, paths=paths):
+    cur_pos = 0
+    _dict = nodes.copy()
+    for path_node in paths[path_id]["NODES"].keys():
+        try :
+            _dict[path_node]["PATHS"][path_id] = (cur_pos, cur_pos+nodes_length[path_node]) 
+            cur_pos += nodes_length[path_node]
+        except :
+            cur_pos += nodes_length[path_node]
+    return _dict
+
+# Collecting positions of nodes within paths
+## Searching in each path
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
+res = {}
+for path_name in paths.keys():
+    print(f"Running on {path_name}")
+    res[path_name] = executor.submit(getPathPos, path_name)
+executor.shutdown(wait=True)
+
+## Storing results
+for path in res.keys():
+    _dict = res[path].result()
+    for node_id, path_dict in _dict.items():
+        print(node_id, path_dict)
+        for path_name, coordinates in path_dict["PATHS"].items():
+            nodes[node_id]["PATHS"][path_name] = coordinates
+
+print(nodes)
+
+
 
-for node_id in nodes.keys():
-    for path_id in paths.keys():
-        if 
 
     
         
-- 
GitLab


From 0f682ffdf8b00ee009ac89e8e2d8057d56eeb785 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 14 May 2024 14:57:45 +0200
Subject: [PATCH 04/30] Update gaf2aln.py

---
 gaf2aln.py | 166 +++++++++++++++++++++++------------------------------
 1 file changed, 72 insertions(+), 94 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index 823a51d..a684dc8 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -63,6 +63,7 @@ if args.version:
     os._exit(0)
 
 # Parsing the .gaf file
+print(f"[gaf2aln::GAF Parser] Reading {args.gaf} ...")
 with open(args.gaf, 'r') as file:
     gaf_lines = file.readlines()
 
@@ -73,6 +74,7 @@ gaf_col = [
     ]
 
 # Creating dictionnary to store alignments
+print(f"[gaf2aln::GAF Parser] Extracting alignments ...")
 aln_dict = {}
 for line in range(len(gaf_lines)):
     ## Splitting the line by tabulation
@@ -85,7 +87,7 @@ for line in range(len(gaf_lines)):
 
     ## Splitting "PATH.MATCH" into a list
     aln_dict[f"ALN_{line+1}"]["PATH.MATCH"] = [
-        int(node_id) for node_id in aln_dict[f"ALN_{line+1}"]["PATH.MATCH"].split(">")[1:]
+        str(node_id) for node_id in aln_dict[f"ALN_{line+1}"]["PATH.MATCH"].split(">")[1:]
         ]
 
     ## Adding tags
@@ -93,16 +95,15 @@ for line in range(len(gaf_lines)):
 
 # Getting nodes of interest ids
 aln_nodes = np.unique([
-    node_id 
+    str(node_id) 
     for aln in aln_dict.keys() 
     for node_id in aln_dict[aln]["PATH.MATCH"]
-])
-print(aln_nodes)
+]).tolist()
 
-# Debug
-print(aln_dict)
+del gaf_lines, gaf_col
 
 # Parsing the .gfa
+print(f"[gaf2aln::GFA Parser] Reading {args.gfa} ...")
 with open(args.gfa, 'r') as file:
     gfa_lines = file.readlines()
 
@@ -111,7 +112,9 @@ with open(args.gfa, 'r') as file:
 nodes_length = {}
 # Nodes dictionnary structured as follow :
 # {<ALN.NODE.ID> : {PATHS: {<PATH.NAME>: (start, end)}}}
-nodes = {}
+nodes = {
+    node_id: {"PATHS": {}} for node_id in aln_nodes
+}
 # Paths dictionnary structured as follow :
 # {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}
 paths = {}
@@ -120,106 +123,81 @@ paths = {}
 links = {}
 
 # Parsing the gfa
-## Multithreading function
-def parse_gfa_line(gfa_lines, aln_nodes=aln_nodes):
-    _nodes, _nodes_length, _links, _paths = {}, {}, {}, {}
-    for line in gfa_lines:
-        line_content = line[:-1].split("\t")
-        line_id = line_content[0]
-        
-        # Segment line
-        if line_id == "S" :
-            
-            _nodes_length[str(line_content[1])] = len(line_content[2])
-
-            if line_content[1] in aln_nodes:
-                _nodes[str(line_content[1])] = {"PATHS": {}}
+print(f"[gaf2aln::GFA Parser] Extracting nodes, paths and links ...")
+for line in gfa_lines:
+    line_content = line[:-1].split("\t")
+    line_id = line_content[0]
+    
+    # Segment line
+    if line_id == "S" :
         
-        # Link line
-        elif line_id == "L":
-            try :
-                _links[str(line_content[1])][str(line_content[3])] = {
-                    "FROM": str(line_content[2]), 
-                    "TO": str(line_content[4])
-                }
-
-            except :
-                _links[str(line_content[1])] = {
-                    str(line_content[3]) : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
-                }
-
-        # Path line
-        elif line_id == "P":
-            _paths[str(line_content[1])] = {
-                "NODES": {
-                    str(node_id[:-1]): str(node_id[-1])
-                    for node_id in line_content[2].split(',')
-                },
-                "CIGAR": line_content[3]
+        nodes_length[str(line_content[1])] = len(line_content[2])
+    
+    # Link line
+    elif line_id == "L":
+        try :
+            links[str(line_content[1])][str(line_content[3])] = {
+                "FROM": str(line_content[2]), 
+                "TO": str(line_content[4])
             }
 
-    return [_nodes, _nodes_length, _links, _paths]
-
-## Parsing subsets
-executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
-res = {}
-quantiles = np.array(range(args.threads+1))/args.threads
-splits = np.quantile(range(len(gfa_lines)), quantiles, method="higher").tolist()
-splits[-1] += 1
-for i in range(len(splits)-1):
-    res[i] = executor.submit(parse_gfa_line, gfa_lines[splits[i]:splits[i+1]])
-executor.shutdown(wait=True)
-
-## Aggregating results
-for _res in res.values():
-    _nodes, _nodes_length, _links, _paths = _res.result()
-    
-    for key, value in _nodes.items(): 
-        nodes[key] = value
-    
-    for key, value in _nodes_length.items():
-        nodes_length[key] = value
+        except :
+            links[str(line_content[1])] = {
+                str(line_content[3]) : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
+            }
 
-    for key, value in _links.items():
-        if not key in links.keys():
-            links[key] = value
-        else :
-            for key_sub, value_sub in value.items():
-                links[key][key_sub] = value_sub
+    # Path line
+    elif line_id == "P":
+        paths[str(line_content[1])] = {
+            "NODES": {
+                str(node_id[:-1]): str(node_id[-1])
+                for node_id in line_content[2].split(',')
+            },
+            "CIGAR": line_content[3]
+        }
 
-    for key, value in _paths.items():
-        paths[key] = value
+del gfa_lines
 
 # Getting the start and end position of alignment nodes on each paths
-def getPathPos(path_id, nodes=nodes, nodes_length=nodes_length, links=links, paths=paths):
+print(f"[gaf2aln::Graph processing] Computing nodes positions ...")
+for path_name in paths.keys():
+    print(f"[gaf2aln::Graph processing] Running on {path_name} ...")
     cur_pos = 0
-    _dict = nodes.copy()
-    for path_node in paths[path_id]["NODES"].keys():
+    for path_node in paths[path_name]["NODES"].keys():
         try :
-            _dict[path_node]["PATHS"][path_id] = (cur_pos, cur_pos+nodes_length[path_node]) 
+            nodes[path_node]["PATHS"][path_name] = (cur_pos, cur_pos+nodes_length[path_node]) 
             cur_pos += nodes_length[path_node]
         except :
             cur_pos += nodes_length[path_node]
-    return _dict
 
-# Collecting positions of nodes within paths
-## Searching in each path
-executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
-res = {}
-for path_name in paths.keys():
-    print(f"Running on {path_name}")
-    res[path_name] = executor.submit(getPathPos, path_name)
-executor.shutdown(wait=True)
-
-## Storing results
-for path in res.keys():
-    _dict = res[path].result()
-    for node_id, path_dict in _dict.items():
-        print(node_id, path_dict)
-        for path_name, coordinates in path_dict["PATHS"].items():
-            nodes[node_id]["PATHS"][path_name] = coordinates
-
-print(nodes)
+# Reconstructing alignments for each path
+print(f"[gaf2aln::Alignment processing] Computing alignments ...")
+for aln_name in aln_dict.keys():
+
+    print(f"[gaf2aln::Alignment processing] Looking into alignment {aln_name} ...")
+    for path_name in paths.keys():
+
+        print(f"[gaf2aln::Alignment processing] Running on {path_name} ...")
+        cur_pos, cur_aln = 0, []
+
+        # Traversing alignment path
+        for node_id in aln_dict[aln_name]["PATH.MATCH"]:
+
+            # Checking if node is traversed by the current path
+            if path_name in nodes[node_id]["PATHS"].keys():
+                try : 
+                    cur_aln[-1] += [node_id]
+                except :
+                    cur_aln.append([node_id])
+
+            else :
+                # Checking for emptyness
+                if not len(cur_aln) or not len(cur_aln[-1]): 
+                    cur_aln.append([])
+                else :
+
+
+        # Ajouter le noeud au segment contigue ou finir le dernier segment le cas échéant
 
 
 
-- 
GitLab


From 15e32c6bda5f4b8a99a24c778da233c3ba98e12c Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Tue, 14 May 2024 16:30:50 +0200
Subject: [PATCH 05/30] Update gaf2aln.py

---
 gaf2aln.py | 74 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 18 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index a684dc8..cd73cd4 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -62,6 +62,15 @@ if args.version:
     print(version)
     os._exit(0)
 
+# Toolbox
+def walk2path(walk):
+    """
+    Takes a walk in a single string and returns a list of nodes id with signs (gfa v1 like)
+    """
+    _ = re.findall(r'>\w+|<\w+', walk)
+    # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'
+    return [f'{elem[1:]}{(elem[0] == ">")*"+"+(elem[0] == "<")*"-"}' for elem in _]
+
 # Parsing the .gaf file
 print(f"[gaf2aln::GAF Parser] Reading {args.gaf} ...")
 with open(args.gaf, 'r') as file:
@@ -84,11 +93,12 @@ for line in range(len(gaf_lines)):
     aln_dict[f"ALN_{line+1}"] = {
         gaf_col[i]: line_content[i] for i in range(len(gaf_col))
     }
-
+    
     ## Splitting "PATH.MATCH" into a list
-    aln_dict[f"ALN_{line+1}"]["PATH.MATCH"] = [
-        str(node_id) for node_id in aln_dict[f"ALN_{line+1}"]["PATH.MATCH"].split(">")[1:]
-        ]
+    aln_dict[f"ALN_{line+1}"]["PATH.MATCH"] = {
+        str(node_id[:-1]): node_id[-1] 
+        for node_id in walk2path(aln_dict[f"ALN_{line+1}"]["PATH.MATCH"])
+    }
 
     ## Adding tags
     aln_dict[f"ALN_{line+1}"]["TAGS"] = ",".join(line_content[13:])
@@ -97,7 +107,7 @@ for line in range(len(gaf_lines)):
 aln_nodes = np.unique([
     str(node_id) 
     for aln in aln_dict.keys() 
-    for node_id in aln_dict[aln]["PATH.MATCH"]
+    for node_id in aln_dict[aln]["PATH.MATCH"].keys()
 ]).tolist()
 
 del gaf_lines, gaf_col
@@ -170,34 +180,62 @@ for path_name in paths.keys():
         except :
             cur_pos += nodes_length[path_node]
 
+final_aln = []
+
 # Reconstructing alignments for each path
 print(f"[gaf2aln::Alignment processing] Computing alignments ...")
 for aln_name in aln_dict.keys():
 
     print(f"[gaf2aln::Alignment processing] Looking into alignment {aln_name} ...")
+    aln_dict[aln_name]["HAP.MATCH"] = {}
+
     for path_name in paths.keys():
 
         print(f"[gaf2aln::Alignment processing] Running on {path_name} ...")
-        cur_pos, cur_aln = 0, []
-
+        cur_aln = []
+        in_aln = False # Keeping track of if we are in an alignment
+        
         # Traversing alignment path
-        for node_id in aln_dict[aln_name]["PATH.MATCH"]:
+        for node_id, strand_on_aln in aln_dict[aln_name]["PATH.MATCH"].items():
+            strand_on_path = paths[path_name]["NODES"][node_id]
+
+            # Checking strand
+            if strand_on_aln == strand_on_path :
+                strand = "S"
+            else :
+                strand = "I"
 
             # Checking if node is traversed by the current path
             if path_name in nodes[node_id]["PATHS"].keys():
-                try : 
-                    cur_aln[-1] += [node_id]
-                except :
-                    cur_aln.append([node_id])
+                if not in_aln:
+                    cur_aln.append({
+                        "IN": True,
+                        "NODES": {
+                            node_id: strand
+                        }
+                    })
+                    in_aln = True
+                
+                else : cur_aln[-1]["NODES"][node_id] = strand
 
             else :
-                # Checking for emptyness
-                if not len(cur_aln) or not len(cur_aln[-1]): 
-                    cur_aln.append([])
-                else :
-
+                if in_aln or not len(cur_aln):
+                    cur_aln.append({
+                        "IN": False,
+                        "NODES": {
+                            node_id: strand
+                        }
+                    })
+                    in_aln = False
+                
+                else : cur_aln[-1]["NODES"][node_id] = strand
+
+        aln_dict[aln_name]["HAP.MATCH"][path_name] = cur_aln.copy()
+        print(f"{path_name} :", cur_aln)
+
+        # Traversing the HAP.MATCH to get alignment
+        for 
 
-        # Ajouter le noeud au segment contigue ou finir le dernier segment le cas échéant
 
 
 
-- 
GitLab


From 146d897c7818d60499b5611294ecdc0d2bcb80e5 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 17 May 2024 10:31:52 +0200
Subject: [PATCH 06/30] Update gaf2aln.py

---
 gaf2aln.py | 175 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 121 insertions(+), 54 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index cd73cd4..afc61e2 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -13,6 +13,7 @@ import pandas as pd
 import argparse
 import concurrent.futures
 import os
+import re
 
 version = "0.1"
 
@@ -71,6 +72,35 @@ def walk2path(walk):
     # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'
     return [f'{elem[1:]}{(elem[0] == ">")*"+"+(elem[0] == "<")*"-"}' for elem in _]
 
+def cigar2basealn(cigar):
+    """
+    Takes a CIGAR string and convert it into a list of base level alignment.
+    For example : "345=" -> ["=", "=", ..., "="] of length 345.
+    """
+    _ = re.findall(r'\d+\D', cigar)
+    print(_)
+    final_cigar = []
+    for match in _:
+        final_cigar += [match[-1]]*int(match[:-1])
+
+    print(final_cigar)
+    return final_cigar
+
+def basealn2cigar(base_aln_list):
+    
+    last_elem = base_aln_list[0]
+    CIGAR = [[1, last_elem]]
+    for elem in base_aln_list[1:]:
+        if elem == last_elem:
+            CIGAR[-1][0] += 1
+
+        else :
+            CIGAR[-1][0] = str(CIGAR[-1][0])
+            CIGAR.append([1, elem])
+            last_elem = elem
+    CIGAR[-1][0] = str(CIGAR[-1][0])
+    return "".join(["".join(block) for block in CIGAR if block[1] != ""])
+
 # Parsing the .gaf file
 print(f"[gaf2aln::GAF Parser] Reading {args.gaf} ...")
 with open(args.gaf, 'r') as file:
@@ -100,8 +130,11 @@ for line in range(len(gaf_lines)):
         for node_id in walk2path(aln_dict[f"ALN_{line+1}"]["PATH.MATCH"])
     }
 
+    ## Adding CIGAR
+    aln_dict[f"ALN_{line+1}"]["RAW.CIGAR"] = line_content[-1]
+
     ## Adding tags
-    aln_dict[f"ALN_{line+1}"]["TAGS"] = ",".join(line_content[13:])
+    aln_dict[f"ALN_{line+1}"]["TAGS"] = ",".join(line_content[13:-1])
 
 # Getting nodes of interest ids
 aln_nodes = np.unique([
@@ -168,74 +201,108 @@ for line in gfa_lines:
 
 del gfa_lines
 
-# Getting the start and end position of alignment nodes on each paths
-print(f"[gaf2aln::Graph processing] Computing nodes positions ...")
+# Creating GA (Graph alignment) dictionnary storing given info :
+# {<ALN_ID> : 
+#   { <PATH.ID> : [
+#       { NODE.ID, P.ORIENT, A.ORIENT, P.POS, A.POS, CG }
+#       ]  
+# }
+# }
+
+print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each paths...")
+# Adding nodes positions relative to path
 for path_name in paths.keys():
     print(f"[gaf2aln::Graph processing] Running on {path_name} ...")
     cur_pos = 0
     for path_node in paths[path_name]["NODES"].keys():
         try :
             nodes[path_node]["PATHS"][path_name] = (cur_pos, cur_pos+nodes_length[path_node]) 
-            cur_pos += nodes_length[path_node]
+            cur_pos += nodes_length[path_node]+1
         except :
-            cur_pos += nodes_length[path_node]
-
-final_aln = []
-
-# Reconstructing alignments for each path
-print(f"[gaf2aln::Alignment processing] Computing alignments ...")
-for aln_name in aln_dict.keys():
-
-    print(f"[gaf2aln::Alignment processing] Looking into alignment {aln_name} ...")
-    aln_dict[aln_name]["HAP.MATCH"] = {}
+            cur_pos += nodes_length[path_node]+1
 
-    for path_name in paths.keys():
-
-        print(f"[gaf2aln::Alignment processing] Running on {path_name} ...")
-        cur_aln = []
-        in_aln = False # Keeping track of if we are in an alignment
+# Calculating CIGAR for each nodes in each aln
+print(f"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...")
+# Iterating over alignments
+for aln in aln_dict.keys():
+    
+    print(f"[gaf2aln::CIGAR processing] Running on {aln} ...")
+    # Getting the list of base level alignement (["=", "X", ...] from "1=1X...")
+    raw_cigar = cigar2basealn(aln_dict[aln]["RAW.CIGAR"])
+    
+    cur_pos = 0
+    print(len(raw_cigar))
+    CIGAR={}
+    aln_nodes_id = list(aln_dict[aln]["PATH.MATCH"].keys())
+    for node_id in aln_nodes_id:
+
+        # Starting aln node 
+        if node_id == aln_nodes_id[0] :
+            _cigar = basealn2cigar(raw_cigar[
+                0:nodes_length[node_id]-int(aln_dict[aln]["ALN.START"])
+                ])
+            cur_pos += nodes_length[node_id]-int(aln_dict[aln]["ALN.START"])
+        # Last aln node
+        elif node_id == aln_nodes_id[-1]:
+            _cigar = basealn2cigar(raw_cigar[cur_pos:])
+        else :
+            _cigar = basealn2cigar(raw_cigar[cur_pos:cur_pos+nodes_length[node_id]])
+            cur_pos += nodes_length[node_id]
+
+        CIGAR[node_id] = _cigar
         
-        # Traversing alignment path
-        for node_id, strand_on_aln in aln_dict[aln_name]["PATH.MATCH"].items():
-            strand_on_path = paths[path_name]["NODES"][node_id]
+    aln_dict[aln]["CIGAR"] = CIGAR
+    print(CIGAR)
 
-            # Checking strand
-            if strand_on_aln == strand_on_path :
-                strand = "S"
-            else :
-                strand = "I"
-
-            # Checking if node is traversed by the current path
-            if path_name in nodes[node_id]["PATHS"].keys():
-                if not in_aln:
-                    cur_aln.append({
-                        "IN": True,
-                        "NODES": {
-                            node_id: strand
-                        }
-                    })
-                    in_aln = True
-                
-                else : cur_aln[-1]["NODES"][node_id] = strand
+GA = {}
 
+# Computing alignments nodes positions in paths
+print(f"[gaf2aln::Graph alignment processing] Lifting alignments coordinates paths positions...")
+for aln_name in aln_dict.keys():
+    GA[aln_name] = {}
+    aln_pos = 0
+    _ = list(aln_dict[aln]["PATH.MATCH"].keys())
+    start_end_ids = _[0], _[-1]
+
+    for node_id, orient in aln_dict[aln_name]["PATH.MATCH"].items():
+        for path_name in nodes[node_id]["PATHS"].keys():
+            if node_id == start_end_ids[0] :
+                _apos = (aln_dict[aln_name]["QRY.START"], nodes_length[node_id])
+                _ppos = (
+                    nodes[node_id]["PATHS"][path_name][0]+int(aln_dict[aln_name]["ALN.START"]), 
+                    nodes[node_id]["PATHS"][path_name][1]
+                    )
+            elif node_id == start_end_ids[-1]:
+                _ppos = (
+                    nodes[node_id]["PATHS"][path_name][0], 
+                    nodes[node_id]["PATHS"][path_name][0]+(int(aln_dict[aln_name]["ALN.BLOCK.LEN"]) - aln_pos)
+                    )
+                _apos = (aln_pos, aln_dict[aln_name]["END"])
             else :
-                if in_aln or not len(cur_aln):
-                    cur_aln.append({
-                        "IN": False,
-                        "NODES": {
-                            node_id: strand
-                        }
-                    })
-                    in_aln = False
-                
-                else : cur_aln[-1]["NODES"][node_id] = strand
+                _ppos = nodes[node_id]["PATHS"][path_name]
+                #_apos = (aln_pos, aln_pos+nodes_length[node_id])
 
-        aln_dict[aln_name]["HAP.MATCH"][path_name] = cur_aln.copy()
-        print(f"{path_name} :", cur_aln)
+                
+            _dict = {
+                "NODE.ID": node_id,
+                "P.ORIENT": paths[path_name]["NODES"][node_id],
+                "A.ORIENT": aln_dict[aln_name]["PATH.MATCH"][node_id],
+                "P.POS": _ppos,
+                #"A.POS": _apos,
+                "CIGAR": aln_dict[aln_name]["CIGAR"][node_id]
+            }
 
-        # Traversing the HAP.MATCH to get alignment
-        for 
+            try :
+                GA[aln_name][path_name].append(_dict)
+            except :
+                GA[aln_name][path_name] = [_dict]
+    
+print(GA)
+        
+# Creating the final dictionnary called LA (Linear alignment):
+# Here we merged previous results to get full alignments
 
+for 
 
 
 
-- 
GitLab


From 9c80010ecf0c5d79f0060653e9aaf5acd5f92319 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 17 May 2024 13:46:21 +0200
Subject: [PATCH 07/30] Update gaf2aln.py

---
 gaf2aln.py | 48 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index afc61e2..e8f8406 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -156,7 +156,7 @@ nodes_length = {}
 # Nodes dictionnary structured as follow :
 # {<ALN.NODE.ID> : {PATHS: {<PATH.NAME>: (start, end)}}}
 nodes = {
-    node_id: {"PATHS": {}} for node_id in aln_nodes
+    node_id: {"PATHS": {}, "ALN": {}} for node_id in aln_nodes
 }
 # Paths dictionnary structured as follow :
 # {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}
@@ -221,6 +221,29 @@ for path_name in paths.keys():
         except :
             cur_pos += nodes_length[path_node]+1
 
+print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each alignement...")
+# Adding nodes positions relative to path
+for aln_name in aln_dict.keys():
+    print(f"[gaf2aln::Graph processing] Running on {aln_name} ...")
+    cur_pos = 0
+    _ = list(aln_dict[aln_name]["PATH.MATCH"].keys())
+    start_end_id = (_[0], _[-1])
+    for node_id in aln_dict[aln_name]["PATH.MATCH"].keys():
+        if node_id == start_end_id[0]:
+            start_pos = int(aln_dict[aln_name]["ALN.START"])
+            end_pos = nodes_length[node_id]-int(aln_dict[aln_name]["ALN.START"])
+        elif node_id == start_end_id[1]:
+            start_pos = cur_pos
+            end_pos = int(aln_dict[aln_name]["ALN.END"])
+        else :
+            start_pos = cur_pos
+            end_pos = cur_pos+nodes_length[node_id]
+
+        nodes[node_id]["ALN"][aln_name] = (start_pos, end_pos)
+        cur_pos = end_pos+1
+
+print(nodes)
+
 # Calculating CIGAR for each nodes in each aln
 print(f"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...")
 # Iterating over alignments
@@ -260,14 +283,12 @@ GA = {}
 print(f"[gaf2aln::Graph alignment processing] Lifting alignments coordinates paths positions...")
 for aln_name in aln_dict.keys():
     GA[aln_name] = {}
-    aln_pos = 0
     _ = list(aln_dict[aln]["PATH.MATCH"].keys())
     start_end_ids = _[0], _[-1]
 
     for node_id, orient in aln_dict[aln_name]["PATH.MATCH"].items():
         for path_name in nodes[node_id]["PATHS"].keys():
             if node_id == start_end_ids[0] :
-                _apos = (aln_dict[aln_name]["QRY.START"], nodes_length[node_id])
                 _ppos = (
                     nodes[node_id]["PATHS"][path_name][0]+int(aln_dict[aln_name]["ALN.START"]), 
                     nodes[node_id]["PATHS"][path_name][1]
@@ -275,12 +296,10 @@ for aln_name in aln_dict.keys():
             elif node_id == start_end_ids[-1]:
                 _ppos = (
                     nodes[node_id]["PATHS"][path_name][0], 
-                    nodes[node_id]["PATHS"][path_name][0]+(int(aln_dict[aln_name]["ALN.BLOCK.LEN"]) - aln_pos)
+                    nodes[node_id]["PATHS"][path_name][0]+(int(aln_dict[aln_name]["ALN.END"]) - nodes[node_id]["ALN"][aln_name][0])
                     )
-                _apos = (aln_pos, aln_dict[aln_name]["END"])
             else :
                 _ppos = nodes[node_id]["PATHS"][path_name]
-                #_apos = (aln_pos, aln_pos+nodes_length[node_id])
 
                 
             _dict = {
@@ -288,7 +307,6 @@ for aln_name in aln_dict.keys():
                 "P.ORIENT": paths[path_name]["NODES"][node_id],
                 "A.ORIENT": aln_dict[aln_name]["PATH.MATCH"][node_id],
                 "P.POS": _ppos,
-                #"A.POS": _apos,
                 "CIGAR": aln_dict[aln_name]["CIGAR"][node_id]
             }
 
@@ -302,8 +320,22 @@ print(GA)
 # Creating the final dictionnary called LA (Linear alignment):
 # Here we merged previous results to get full alignments
 
-for 
+for aln_name in GA.keys():
 
+    for path_name, node_list in GA[aln_name].items():
+        
+        contiguity = False
+        orient = 1
+        alns = []
+
+        for node_data in node_list:
+            
+            cur_orient = (node_data["P.ORIENT"] == node_data["A.ORIENT"])
+
+            if not contiguity :
+                alns.append(
+                    {"Q.START": nodes[node_data["NODE.ID"]]["ALN"][aln_name]}
+                )
 
 
 
-- 
GitLab


From 662abb3a4d5c573fc1f965bce54729876d9e858b Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 17 May 2024 18:58:10 +0200
Subject: [PATCH 08/30] Update gaf2aln.py

---
 gaf2aln.py | 224 +++++++++++++++++++++++++++++------------------------
 1 file changed, 122 insertions(+), 102 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index e8f8406..e9850b3 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -78,12 +78,10 @@ def cigar2basealn(cigar):
     For example : "345=" -> ["=", "=", ..., "="] of length 345.
     """
     _ = re.findall(r'\d+\D', cigar)
-    print(_)
     final_cigar = []
     for match in _:
         final_cigar += [match[-1]]*int(match[:-1])
 
-    print(final_cigar)
     return final_cigar
 
 def basealn2cigar(base_aln_list):
@@ -125,10 +123,10 @@ for line in range(len(gaf_lines)):
     }
     
     ## Splitting "PATH.MATCH" into a list
-    aln_dict[f"ALN_{line+1}"]["PATH.MATCH"] = {
-        str(node_id[:-1]): node_id[-1] 
+    aln_dict[f"ALN_{line+1}"]["PATH.MATCH"] = [
+        (str(node_id[:-1]), node_id[-1]) 
         for node_id in walk2path(aln_dict[f"ALN_{line+1}"]["PATH.MATCH"])
-    }
+    ]
 
     ## Adding CIGAR
     aln_dict[f"ALN_{line+1}"]["RAW.CIGAR"] = line_content[-1]
@@ -140,7 +138,7 @@ for line in range(len(gaf_lines)):
 aln_nodes = np.unique([
     str(node_id) 
     for aln in aln_dict.keys() 
-    for node_id in aln_dict[aln]["PATH.MATCH"].keys()
+    for node_id, orient in aln_dict[aln]["PATH.MATCH"]
 ]).tolist()
 
 del gaf_lines, gaf_col
@@ -154,10 +152,12 @@ with open(args.gfa, 'r') as file:
 # {<NODE.ID>: <NODE.LENGTH>}
 nodes_length = {}
 # Nodes dictionnary structured as follow :
-# {<ALN.NODE.ID> : {PATHS: {<PATH.NAME>: (start, end)}}}
-nodes = {
-    node_id: {"PATHS": {}, "ALN": {}} for node_id in aln_nodes
-}
+# { <ALN.NODE.ID> : {
+#   <PATH.NAME>: {"START": start, "END": end, "STRAND": strand), 
+#   <ALN.NAME>: {"START": start, "END": end, "S.OFF": start.offset, "E.OFF": end.offset, "STRAND": strand, "CIGAR": CIGAR}
+#   }
+# }
+nodes = {node_id: {} for node_id in aln_nodes}
 # Paths dictionnary structured as follow :
 # {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}
 paths = {}
@@ -201,48 +201,84 @@ for line in gfa_lines:
 
 del gfa_lines
 
-# Creating GA (Graph alignment) dictionnary storing given info :
-# {<ALN_ID> : 
-#   { <PATH.ID> : [
-#       { NODE.ID, P.ORIENT, A.ORIENT, P.POS, A.POS, CG }
-#       ]  
-# }
-# }
-
 print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each paths...")
 # Adding nodes positions relative to path
 for path_name in paths.keys():
     print(f"[gaf2aln::Graph processing] Running on {path_name} ...")
     cur_pos = 0
+
+    # Iterating over nodes in the path
     for path_node in paths[path_name]["NODES"].keys():
+        # Instead of checking if the node is one interesting node, we try to add to the nodes dict
         try :
-            nodes[path_node]["PATHS"][path_name] = (cur_pos, cur_pos+nodes_length[path_node]) 
+            nodes[path_node][path_name] = {
+                "START": cur_pos, # Start position of the node start in the currrent path
+                "END": cur_pos+nodes_length[path_node], # End position of the node end in the current path
+                "STRAND": paths[path_name]["NODES"][node_id] # Orientation of the node in the current path
+                } 
+
             cur_pos += nodes_length[path_node]+1
         except :
             cur_pos += nodes_length[path_node]+1
 
 print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each alignement...")
 # Adding nodes positions relative to path
-for aln_name in aln_dict.keys():
-    print(f"[gaf2aln::Graph processing] Running on {aln_name} ...")
+
+def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length):
+    # Initializing current position in query
     cur_pos = 0
-    _ = list(aln_dict[aln_name]["PATH.MATCH"].keys())
-    start_end_id = (_[0], _[-1])
-    for node_id in aln_dict[aln_name]["PATH.MATCH"].keys():
+
+    # Getting start and end node ids
+    start_end_id = (aln_dict[aln_name]["PATH.MATCH"][0][0], aln_dict[aln_name]["PATH.MATCH"][-1][0])
+
+    # Creating result dictionnary
+    res = {}
+
+    ## Iterating over node_ids from the given alignment
+    for node_id, orient in aln_dict[aln_name]["PATH.MATCH"]:
+        # Adding entry for current node
+        res[node_id] = {aln_name: {}}
+
+        # First node
         if node_id == start_end_id[0]:
-            start_pos = int(aln_dict[aln_name]["ALN.START"])
-            end_pos = nodes_length[node_id]-int(aln_dict[aln_name]["ALN.START"])
+            start_pos = 0
+            s_off = int(aln_dict[aln_name]["ALN.START"])
+            end_pos = nodes_length[node_id]-s_off
+            e_off = 0
+        # End node
         elif node_id == start_end_id[1]:
             start_pos = cur_pos
-            end_pos = int(aln_dict[aln_name]["ALN.END"])
+            s_off = 0
+            end_pos = int(aln_dict[aln_name]["QRY.END"])
+            e_off = nodes_length[node_id]-(end_pos-cur_pos)
+        # Node in between
         else :
             start_pos = cur_pos
+            s_off, e_off = 0, 0
             end_pos = cur_pos+nodes_length[node_id]
 
-        nodes[node_id]["ALN"][aln_name] = (start_pos, end_pos)
-        cur_pos = end_pos+1
+        res[node_id] = {
+            "START": start_pos, # Start position on the query
+            "END": end_pos, # End position on the query
+            "S.OFF": s_off, # Offset between the start of the alignment and the node's start
+            "E.OFF": e_off, # Offset between the end of the alignment and the node's end 
+            "STRAND": orient # Orientation of the node in the alignment
+            }
+        
+        cur_pos = end_pos
+        print(start_pos, end_pos, s_off, e_off, orient, nodes_length[node_id], cur_pos)
 
-print(nodes)
+    return res
+
+# Storing alignement 
+aln_processing = {}
+for aln_name in aln_dict.keys():
+    print(f"[gaf2aln::Graph processing] Running on {aln_name} ...")
+    
+    _ = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)
+
+    for node_id, res in _.items():
+        nodes[node_id][aln_name] = res
 
 # Calculating CIGAR for each nodes in each aln
 print(f"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...")
@@ -252,90 +288,74 @@ for aln in aln_dict.keys():
     print(f"[gaf2aln::CIGAR processing] Running on {aln} ...")
     # Getting the list of base level alignement (["=", "X", ...] from "1=1X...")
     raw_cigar = cigar2basealn(aln_dict[aln]["RAW.CIGAR"])
-    
-    cur_pos = 0
-    print(len(raw_cigar))
     CIGAR={}
-    aln_nodes_id = list(aln_dict[aln]["PATH.MATCH"].keys())
-    for node_id in aln_nodes_id:
-
-        # Starting aln node 
-        if node_id == aln_nodes_id[0] :
-            _cigar = basealn2cigar(raw_cigar[
-                0:nodes_length[node_id]-int(aln_dict[aln]["ALN.START"])
-                ])
-            cur_pos += nodes_length[node_id]-int(aln_dict[aln]["ALN.START"])
-        # Last aln node
-        elif node_id == aln_nodes_id[-1]:
-            _cigar = basealn2cigar(raw_cigar[cur_pos:])
-        else :
-            _cigar = basealn2cigar(raw_cigar[cur_pos:cur_pos+nodes_length[node_id]])
-            cur_pos += nodes_length[node_id]
 
-        CIGAR[node_id] = _cigar
-        
-    aln_dict[aln]["CIGAR"] = CIGAR
-    print(CIGAR)
+    for node_id, orient in aln_dict[aln]["PATH.MATCH"]:
+
+        _cigar = basealn2cigar(raw_cigar[
+            nodes[node_id][aln]["START"]:nodes[node_id][aln]["END"]
+            ])
+        nodes[node_id][aln]["CIGAR"] = _cigar
+        #print(_cigar, nodes[node_id][aln]["START"], nodes[node_id][aln]["END"])
+
+#print(nodes)
 
-GA = {}
+# Lifting graph alignements to haplotype alignements
 
-# Computing alignments nodes positions in paths
-print(f"[gaf2aln::Graph alignment processing] Lifting alignments coordinates paths positions...")
+ALNS = {}
 for aln_name in aln_dict.keys():
-    GA[aln_name] = {}
-    _ = list(aln_dict[aln]["PATH.MATCH"].keys())
-    start_end_ids = _[0], _[-1]
-
-    for node_id, orient in aln_dict[aln_name]["PATH.MATCH"].items():
-        for path_name in nodes[node_id]["PATHS"].keys():
-            if node_id == start_end_ids[0] :
-                _ppos = (
-                    nodes[node_id]["PATHS"][path_name][0]+int(aln_dict[aln_name]["ALN.START"]), 
-                    nodes[node_id]["PATHS"][path_name][1]
-                    )
-            elif node_id == start_end_ids[-1]:
-                _ppos = (
-                    nodes[node_id]["PATHS"][path_name][0], 
-                    nodes[node_id]["PATHS"][path_name][0]+(int(aln_dict[aln_name]["ALN.END"]) - nodes[node_id]["ALN"][aln_name][0])
-                    )
-            else :
-                _ppos = nodes[node_id]["PATHS"][path_name]
-
-                
-            _dict = {
-                "NODE.ID": node_id,
-                "P.ORIENT": paths[path_name]["NODES"][node_id],
-                "A.ORIENT": aln_dict[aln_name]["PATH.MATCH"][node_id],
-                "P.POS": _ppos,
-                "CIGAR": aln_dict[aln_name]["CIGAR"][node_id]
-            }
+    
+    for path_name in paths.keys():
+        ALNS[(path_name, aln_name)] = []
 
+        _ = []
+        for node_id, orient in aln_dict[aln_name]["PATH.MATCH"].items():
+            
+            n_info = nodes[node_id]
             try :
-                GA[aln_name][path_name].append(_dict)
-            except :
-                GA[aln_name][path_name] = [_dict]
-    
-print(GA)
-        
-# Creating the final dictionnary called LA (Linear alignment):
-# Here we merged previous results to get full alignments
+                if n_info[aln_name]["STRAND"] == n_info[path_name]["STRAND"] :
+                    t_start = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
+                    t_end = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"] 
+                else :
+                    t_end = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
+                    t_start = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"]
+
+                q_start = n_info[aln_name]["START"]
+                q_end = n_info[aln_name]["END"]
+                _CG = n_info[aln_name]["CIGAR"]
+
+                # Non empty temporary list of aln and ending of the last block is the same as the start of the new node : 
+                if len(_) and _[-1]["T.END"] == t_start and _[-1]["Q.END"] == q_start: 
+                    tmp_aln["Q.END"] = q_end
+                    tmp_aln["T.END"] = t_end
+                    tmp_aln["CG"] += _CG
+                elif len(_) and _[-1]["T.END"] == t_start: # Following on the target not on the query (i.e. Insertion)
+                    tmp_aln["T.END"] = t_end
+                    tmp_aln["CG"] += f"{nodes_length[node_id]}I"
+                elif len(_) and _[-1]["Q.END"] == q_start: # Following on the query, not on the target (i.e. Deletion)
+                    tmp_aln["Q.END"] = q_end
+                    tmps_aln["CG"] += f"{nodes_length[node_id]}D"
+                else : # Else, completely different
+                    tmp_aln = {
+                        "Q.START": q_start ,
+                        "Q.END": q_end,
+                        "T.START": t_start,
+                        "T.END": t_end,
+                        "CG": _CG,
+                        }
+            except:
+                # Node is not in the path
+                tmp_aln
 
-for aln_name in GA.keys():
+            
 
-    for path_name, node_list in GA[aln_name].items():
         
-        contiguity = False
-        orient = 1
-        alns = []
-
-        for node_data in node_list:
             
-            cur_orient = (node_data["P.ORIENT"] == node_data["A.ORIENT"])
+            
+
+
+
 
-            if not contiguity :
-                alns.append(
-                    {"Q.START": nodes[node_data["NODE.ID"]]["ALN"][aln_name]}
-                )
 
 
 
-- 
GitLab


From a12ec03c1c4e18dea839511ffcf2b6d893cead6b Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 17 May 2024 20:38:15 +0200
Subject: [PATCH 09/30] Update gaf2aln.py

---
 gaf2aln.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index e9850b3..60794ea 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -309,9 +309,13 @@ for aln_name in aln_dict.keys():
         ALNS[(path_name, aln_name)] = []
 
         _ = []
-        for node_id, orient in aln_dict[aln_name]["PATH.MATCH"].items():
+        for node_id, orient in aln_dict[aln_name]["PATH.MATCH"]:
             
             n_info = nodes[node_id]
+            q_start = n_info[aln_name]["START"]
+            q_end = n_info[aln_name]["END"]
+            _CG = n_info[aln_name]["CIGAR"]
+
             try :
                 if n_info[aln_name]["STRAND"] == n_info[path_name]["STRAND"] :
                     t_start = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
@@ -320,10 +324,6 @@ for aln_name in aln_dict.keys():
                     t_end = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
                     t_start = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"]
 
-                q_start = n_info[aln_name]["START"]
-                q_end = n_info[aln_name]["END"]
-                _CG = n_info[aln_name]["CIGAR"]
-
                 # Non empty temporary list of aln and ending of the last block is the same as the start of the new node : 
                 if len(_) and _[-1]["T.END"] == t_start and _[-1]["Q.END"] == q_start: 
                     tmp_aln["Q.END"] = q_end
@@ -337,7 +337,7 @@ for aln_name in aln_dict.keys():
                     tmps_aln["CG"] += f"{nodes_length[node_id]}D"
                 else : # Else, completely different
                     tmp_aln = {
-                        "Q.START": q_start ,
+                        "Q.START": q_start,
                         "Q.END": q_end,
                         "T.START": t_start,
                         "T.END": t_end,
@@ -345,7 +345,15 @@ for aln_name in aln_dict.keys():
                         }
             except:
                 # Node is not in the path
-                tmp_aln
+                tmp_aln = {
+                    "Q.START": q_start,
+                    "Q.END": q_end,
+                    "T.START": -1,
+                    "T.END": -1,
+                    "CG": f"{nodes_length[node_id]}D"
+                }
+
+print(ALNS)
 
             
 
-- 
GitLab


From a7751c03ea016645940f4c6653c471c7805ca342 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 21 May 2024 17:07:23 +0200
Subject: [PATCH 10/30] Update gaf2aln.py

---
 gaf2aln.py | 129 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 104 insertions(+), 25 deletions(-)

diff --git a/gaf2aln.py b/gaf2aln.py
index 60794ea..7657046 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -167,6 +167,55 @@ links = {}
 
 # Parsing the gfa
 print(f"[gaf2aln::GFA Parser] Extracting nodes, paths and links ...")
+
+def GFA_parser(gfa_lines, nodes = nodes):
+    _links, _nodes, _nodes_length, paths = {}, {}, {}, {}
+    for line in gfa_lines:
+        line_content = line[:-1].split("\t")
+        line_id = line_content[0]
+        
+        # Segment line
+        if line_id == "S" :
+            
+            _nodes_length[str(line_content[1])] = len(line_content[2])
+        
+        # Link line
+        elif line_id == "L":
+            try :
+                _links[str(line_content[1])][str(line_content[3])] = {
+                    "FROM": str(line_content[2]), 
+                    "TO": str(line_content[4])
+                }
+
+            except :
+                _links[str(line_content[1])] = {
+                    str(line_content[3]) : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
+                }
+
+        # Path line
+        elif line_id == "P":
+            _paths[str(line_content[1])] = {
+                "NODES": {
+                    str(node_id[:-1]): str(node_id[-1])
+                    for node_id in line_content[2].split(',')
+                },
+                "CIGAR": line_content[3]
+            }
+
+    return [_links, _nodes, _nodes_length, _paths]
+
+# splits = np.quantile(range(len(gfa_lines)+1), q= np.array(args.threads+1)/args.threads, method='higher').tolist()
+# res = []
+# for i in range(1, len(splits)):
+#     res.append(executor.submit(GFA_parser, gfa_lines[splits[i-1]:splits[i]]))
+
+# for out in res:
+#     results = out.result()
+
+#     for link_id, link_info in results[0].items():
+#         links[]
+
+
 for line in gfa_lines:
     line_content = line[:-1].split("\t")
     line_id = line_content[0]
@@ -201,27 +250,44 @@ for line in gfa_lines:
 
 del gfa_lines
 
-print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each paths...")
-# Adding nodes positions relative to path
-for path_name in paths.keys():
-    print(f"[gaf2aln::Graph processing] Running on {path_name} ...")
+print(f"[gaf2aln::Graph position processing] Computing nodes positions in each paths...")
+def get_node_pos(path_name, nodes = nodes, paths = paths, nodes_length = nodes_length):
+    print(f"[gaf2aln::Graph position processing] Running on {path_name} ...")
     cur_pos = 0
 
+    out = {}
     # Iterating over nodes in the path
     for path_node in paths[path_name]["NODES"].keys():
         # Instead of checking if the node is one interesting node, we try to add to the nodes dict
-        try :
-            nodes[path_node][path_name] = {
+        if path_node in aln_nodes :
+            out[path_node] = {
                 "START": cur_pos, # Start position of the node start in the currrent path
                 "END": cur_pos+nodes_length[path_node], # End position of the node end in the current path
-                "STRAND": paths[path_name]["NODES"][node_id] # Orientation of the node in the current path
+                "STRAND": paths[path_name]["NODES"][path_node] # Orientation of the node in the current path
                 } 
 
             cur_pos += nodes_length[path_node]+1
-        except :
+        else :
             cur_pos += nodes_length[path_node]+1
 
-print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each alignement...")
+    return out
+
+res = {}
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
+# Adding nodes positions relative to path
+for path_name in paths.keys():
+    res[path_name] = executor.submit(get_node_pos, path_name)
+
+executor.shutdown(wait=True)
+
+for path_name, out in res.items():
+    results = out.result()
+    for path_node, node_pos in results.items():
+        nodes[path_node][path_name] = node_pos
+
+del res
+
+print(f"[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...")
 # Adding nodes positions relative to path
 
 def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length):
@@ -271,14 +337,22 @@ def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length
     return res
 
 # Storing alignement 
-aln_processing = {}
+res = {}
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
 for aln_name in aln_dict.keys():
-    print(f"[gaf2aln::Graph processing] Running on {aln_name} ...")
+    print(f"[gaf2aln::Alignment position processing] Running on {aln_name} ...")
     
-    _ = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)
+    res[aln_name] = executor.submit(get_aln_node_info, aln_name)
+    #res[aln_name] = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)
+
+executor.shutdown(wait=True)
 
-    for node_id, res in _.items():
-        nodes[node_id][aln_name] = res
+for aln_name, node_info in res.items():
+    results = node_info.result()
+    for node_id, info in results.items():
+        nodes[node_id][aln_name] = info
+
+del res
 
 # Calculating CIGAR for each nodes in each aln
 print(f"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...")
@@ -306,7 +380,6 @@ ALNS = {}
 for aln_name in aln_dict.keys():
     
     for path_name in paths.keys():
-        ALNS[(path_name, aln_name)] = []
 
         _ = []
         for node_id, orient in aln_dict[aln_name]["PATH.MATCH"]:
@@ -316,7 +389,10 @@ for aln_name in aln_dict.keys():
             q_end = n_info[aln_name]["END"]
             _CG = n_info[aln_name]["CIGAR"]
 
-            try :
+            print(node_id, path_name, q_start, q_end)
+            if path_name in list(n_info.keys()):
+                print("\tIn path")
+
                 if n_info[aln_name]["STRAND"] == n_info[path_name]["STRAND"] :
                     t_start = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
                     t_end = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"] 
@@ -324,6 +400,8 @@ for aln_name in aln_dict.keys():
                     t_end = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
                     t_start = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"]
 
+                print("\t", t_start, t_end)
+
                 # Non empty temporary list of aln and ending of the last block is the same as the start of the new node : 
                 if len(_) and _[-1]["T.END"] == t_start and _[-1]["Q.END"] == q_start: 
                     tmp_aln["Q.END"] = q_end
@@ -343,17 +421,18 @@ for aln_name in aln_dict.keys():
                         "T.END": t_end,
                         "CG": _CG,
                         }
-            except:
+                print("\t", tmp_aln)
+                
+            else : 
+                print("\tNot in path")
                 # Node is not in the path
-                tmp_aln = {
-                    "Q.START": q_start,
-                    "Q.END": q_end,
-                    "T.START": -1,
-                    "T.END": -1,
-                    "CG": f"{nodes_length[node_id]}D"
-                }
 
-print(ALNS)
+        _.append(tmp_aln)
+    ALNS[(path_name, aln_name)] = _
+
+## Debug
+for elem in ALNS[("TO1000#1#chr03", "ALN_1")]:
+    print(elem) 
 
             
 
-- 
GitLab


From 56990c58b0a19b66c42d98e3a5b62dd4a5c90d49 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 31 May 2024 13:42:58 +0200
Subject: [PATCH 11/30] Create Anchors2Path.py

---
 Anchors2Path.py | 166 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 Anchors2Path.py

diff --git a/Anchors2Path.py b/Anchors2Path.py
new file mode 100644
index 0000000..e99b48a
--- /dev/null
+++ b/Anchors2Path.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Anchors2Path
+Give anchors nodes ids with their relative positions in a given path.
+
+@author: alexis.mergez@inrae.fr
+@version: 0.1
+"""
+import re
+import argparse
+import os
+import numpy as np
+import time
+import pandas as pd
+from functools import reduce
+import concurrent.futures
+import gzip
+
+version = "0.1"
+
+## Argument parser
+arg_parser = argparse.ArgumentParser(description='Anchors2Path')
+arg_parser.add_argument(
+    "--gfa",
+    "-g",
+    dest = "gfa",
+    required = True,
+    help = "GFA file"
+    )  
+arg_parser.add_argument(
+    "--output",
+    "-o",
+    dest = "output",
+    required = True,
+    help = "Output name"
+    )  
+arg_parser.add_argument(
+    "--threads",
+    "-t",
+    dest = "threads",
+    required = False,
+    default = 1,
+    type = int,
+    help = "Number of threads"
+    )    
+arg_parser.add_argument(
+    '--version',
+    '-v',
+    action="store_true",
+    dest = "version",
+    help = "Show version"
+)
+arg_parser.add_argument(
+    '--progress',
+    '-P',
+    action="store_true",
+    dest = "progress",
+    help = "Show progress to stdout"
+)
+arg_parser.add_argument(
+    '--pathname',
+    '-r',
+    dest = "pathname",
+    required = True,
+    help = "Pathname"
+)
+args = arg_parser.parse_args()
+
+# Printing version and exiting if required
+if args.version:
+    print(version)
+    os._exit(0)
+
+# Timing the script
+start_time = time.time()
+
+## Reading the gfa into a list
+# If not gzipped :
+if args.gfa[-2:] != "gz" :
+    with open(args.gfa, 'r') as file:
+        gfaLines = file.readlines()
+
+# If gzipped :
+else :
+    with gzip.open(args.gfa, 'r') as file:
+        gfaLines = [line.decode() for line in file.readlines()]
+
+# Progress message
+if args.progress: print(f"[GFAstats::{panname}] Parsing gfa file...")
+
+# Initializing dictionnaries
+Anchors = {}
+## {<NODE_ID>: (path_start, path_end)}
+nodes_length = {}
+path_nodes = {}
+## {<path_id>: <nodes_list>}
+
+for line in gfaLines[1:]:
+
+    # Skipping comment lines
+    if line[0] == "#":
+        lineType = "#"
+
+    # Reading 3 first columns of the current line
+    else :
+        lineType, uid, value = line[:-1].split('\t')[:3]
+
+    if lineType == "S": # Segments = Nodes
+        nodes_length[int(uid)] = len(value)
+
+    elif lineType == "P": # Paths
+        
+        path_nodes[uid] = [k[:-1] for k in value.split(",")]
+
+if args.progress:
+    print(f"[GFAstats::{panname}] Parsed in {round(time.time() - start_time, 2)}s")
+
+# Getting the list of anchor nodes
+node_path_count = {}
+## {<NODE_ID>: <Number of path traversing this node>}
+# Computing number of path traversing each nodes
+for path_id, node_list in path_nodes.items():
+    for node_id in node_list:
+        
+        try :
+            node_path_count[node_id] += 1
+        except :
+            node_path_count[node_id] = 1
+
+# Searching anchors
+n_path = len(list(path_nodes.keys()))
+
+for node_id, count in node_path_count.items():
+    if count == n_path :
+        Anchors[node_id] = []
+
+# Computing path position for each node of the path of interest
+current_pos = 0
+for node_id in path_nodes[args.pathname]:
+    _end = current_pos + nodes_length[node_id]
+
+    # Trying to add anchors path position if it is an anchor
+    try :
+        Anchors[node_id].append( (current_pos, _end) )
+    except:
+        pass
+
+    current_pos = _end
+
+# Transforming data into a table
+ID, START, END = [], [], []
+for node_id, positions in Anchors.items():
+    for start, end in positions:
+        ID.append(node_id)
+        START.append(start)
+        END.append(end)
+
+df = pd.DataFrame(data = {
+    "NODE_ID": ID,
+    "START": START,
+    "END": END
+})
+
+df.to_csv(args.output, sep="\t")
+
-- 
GitLab


From 1296dd803f594d2aeea67cb8cc92d9c007492922 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 31 May 2024 13:49:06 +0200
Subject: [PATCH 12/30] Update Anchors2Path.py

---
 Anchors2Path.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index e99b48a..3006ef3 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -87,7 +87,7 @@ else :
         gfaLines = [line.decode() for line in file.readlines()]
 
 # Progress message
-if args.progress: print(f"[GFAstats::{panname}] Parsing gfa file...")
+if args.progress: print(f"[Anchors2Path] Parsing gfa file...")
 
 # Initializing dictionnaries
 Anchors = {}
@@ -114,7 +114,7 @@ for line in gfaLines[1:]:
         path_nodes[uid] = [k[:-1] for k in value.split(",")]
 
 if args.progress:
-    print(f"[GFAstats::{panname}] Parsed in {round(time.time() - start_time, 2)}s")
+    print(f"[Anchors2Path] Parsed in {round(time.time() - start_time, 2)}s")
 
 # Getting the list of anchor nodes
 node_path_count = {}
-- 
GitLab


From c7d03713a9893274b3f3c106176bfc8bee9b2637 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 31 May 2024 13:52:15 +0200
Subject: [PATCH 13/30] Update Anchors2Path.py

---
 Anchors2Path.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index 3006ef3..2e64c83 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -111,7 +111,7 @@ for line in gfaLines[1:]:
 
     elif lineType == "P": # Paths
         
-        path_nodes[uid] = [k[:-1] for k in value.split(",")]
+        path_nodes[uid] = [int(k[:-1]) for k in value.split(",")]
 
 if args.progress:
     print(f"[Anchors2Path] Parsed in {round(time.time() - start_time, 2)}s")
@@ -142,7 +142,7 @@ for node_id in path_nodes[args.pathname]:
 
     # Trying to add anchors path position if it is an anchor
     try :
-        Anchors[node_id].append( (current_pos, _end) )
+        Anchors[int(node_id)].append( (current_pos, _end) )
     except:
         pass
 
-- 
GitLab


From 537598f921ec0f2a987f9554109802399fafccdb Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 31 May 2024 13:56:17 +0200
Subject: [PATCH 14/30] Update Anchors2Path.py

---
 Anchors2Path.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index 2e64c83..3111c6f 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -162,5 +162,5 @@ df = pd.DataFrame(data = {
     "END": END
 })
 
-df.to_csv(args.output, sep="\t")
+df.to_csv(args.output, sep="\t", index = False)
 
-- 
GitLab


From 745eee2cd00e33eb46ce7410baab3f15fdb05ac1 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 3 Jun 2024 13:20:47 +0200
Subject: [PATCH 15/30] Update

---
 .ipynb_checkpoints/gaf2aln-checkpoint.ipynb | 1723 +++++++++++++
 gaf2aln.ipynb                               | 2443 +++++++++++++++++++
 gaf2aln.py                                  |   38 +-
 3 files changed, 4180 insertions(+), 24 deletions(-)
 create mode 100644 .ipynb_checkpoints/gaf2aln-checkpoint.ipynb
 create mode 100644 gaf2aln.ipynb

diff --git a/.ipynb_checkpoints/gaf2aln-checkpoint.ipynb b/.ipynb_checkpoints/gaf2aln-checkpoint.ipynb
new file mode 100644
index 0000000..effb776
--- /dev/null
+++ b/.ipynb_checkpoints/gaf2aln-checkpoint.ipynb
@@ -0,0 +1,1723 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4ffaf9f6-cc1e-4190-9351-5431c930d25b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import argparse\n",
+    "import concurrent.futures\n",
+    "import os\n",
+    "import re\n",
+    "\n",
+    "# Replace for argparse arguments\n",
+    "class arguments():\n",
+    "    gfa = \"/home/amergez/Documents/Scratch/LeChou/35Bra-v2a/35Bra-v2a.chr03.gfa\"\n",
+    "    gaf = \"/home/amergez/Documents/Scratch/LeChou/35Bra-v2a/Mapping2Graph/GA.FLC2.aln.gaf\"\n",
+    "    threads = 8\n",
+    "    version = False\n",
+    "args = arguments()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "280c8847-22e8-4063-bde8-3e4e72cf20e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Toolbox\n",
+    "def walk2path(walk):\n",
+    "    \"\"\"\n",
+    "    Takes a walk in a single string and returns a list of nodes id with signs (gfa v1 like)\n",
+    "    \"\"\"\n",
+    "    _ = re.findall(r'>\\w+|<\\w+', walk)\n",
+    "    # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'\n",
+    "    return [f'{elem[1:]}{(elem[0] == \">\")*\"+\"+(elem[0] == \"<\")*\"-\"}' for elem in _]\n",
+    "\n",
+    "def cigar2basealn(cigar):\n",
+    "    \"\"\"\n",
+    "    Takes a CIGAR string and convert it into a list of base level alignment.\n",
+    "    For example : \"345=\" -> [\"=\", \"=\", ..., \"=\"] of length 345.\n",
+    "    \"\"\"\n",
+    "    _ = re.findall(r'\\d+\\D', cigar)\n",
+    "    final_cigar = []\n",
+    "    for match in _:\n",
+    "        final_cigar += [match[-1]]*int(match[:-1])\n",
+    "\n",
+    "    return final_cigar\n",
+    "\n",
+    "def basealn2cigar(base_aln_list):\n",
+    "    \n",
+    "    last_elem = base_aln_list[0]\n",
+    "    CIGAR = [[1, last_elem]]\n",
+    "    for elem in base_aln_list[1:]:\n",
+    "        if elem == last_elem:\n",
+    "            CIGAR[-1][0] += 1\n",
+    "\n",
+    "        else :\n",
+    "            CIGAR[-1][0] = str(CIGAR[-1][0])\n",
+    "            CIGAR.append([1, elem])\n",
+    "            last_elem = elem\n",
+    "    CIGAR[-1][0] = str(CIGAR[-1][0])\n",
+    "    return \"\".join([\"\".join(block) for block in CIGAR if block[1] != \"\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "be12e9d4-de76-4c8b-af84-6567549483f4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::GAF Parser] Reading /home/amergez/Documents/Scratch/LeChou/35Bra-v2a/Mapping2Graph/GA.FLC2.aln.gaf ...\n",
+      "[gaf2aln::GAF Parser] Extracting alignments ...\n",
+      "{'ALN_1': {'QRY.NAME': 'FLC2.TO1000#1#chr03', 'QRY.LEN': '3735', 'QRY.START': '0', 'QRY.END': '3735', 'STRAND': '+', 'PATH.MATCH': [('7046526', '+'), ('7046528', '+'), ('7046530', '+'), ('7046531', '+'), ('7046532', '+'), ('7046533', '+'), ('7046534', '+'), ('7046536', '+'), ('7046537', '+'), ('7046539', '+'), ('7046541', '+'), ('7046542', '+'), ('7046544', '+'), ('7046546', '+'), ('7046547', '+'), ('7046549', '+'), ('7046551', '+'), ('7046552', '+'), ('7046554', '+'), ('7046556', '+'), ('7046556', '+'), ('7046556', '+'), ('7046557', '+'), ('7046558', '+'), ('7046559', '+'), ('7046560', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046562', '+'), ('7046564', '+'), ('7046565', '+'), ('7046567', '+'), ('7046568', '+'), ('7046570', '+'), ('7046571', '+'), ('7046573', '+'), ('7046574', '+'), ('7046576', '+'), ('7046577', '+'), ('7046579', '+'), ('7046581', '+'), ('7046583', '+'), ('7046584', '+'), ('7046586', '+'), ('7046587', '+'), ('7046589', '+'), ('7046590', '+'), ('7046592', '+'), ('7046593', '+'), ('7046594', '+'), ('7046596', '+'), ('7046597', '+'), ('7046599', '+'), ('7046600', '+'), ('7046601', '+'), ('7046603', '+'), ('7046604', '+'), ('7046606', '+'), ('7046608', '+'), ('7046609', '+'), ('7046621', '+'), ('7046622', '+'), ('7046624', '+'), ('7046625', '+'), ('7046626', '+'), ('7046628', '+'), ('7046631', '+'), ('7046673', '+'), ('7046631', '+'), ('7046673', '+'), ('7046631', '+'), ('7046632', '+'), ('7046634', '+'), ('7046635', '+'), ('7046637', '+'), ('7046638', '+'), ('7046639', '+'), ('7046641', '+'), ('7046644', '+'), ('7046646', '+'), ('7046647', '+'), ('7046649', '+'), ('7046650', '+'), ('7046652', '+'), ('7046653', '+'), ('7046654', '+'), ('7046656', '+'), ('7046657', '+'), ('7046659', '+'), ('7046660', '+'), ('7046662', '+'), ('7046663', '+'), ('7046665', '+'), ('7046667', '+'), ('7046668', '+'), ('7046670', '+'), ('7046671', '+'), ('7046674', '+'), ('7046675', '+'), ('7046674', '+'), ('7046675', '+'), ('7046676', '+'), ('7046678', '+'), ('7046679', '+'), ('7046680', '+'), ('7046682', '+'), ('7046684', '+'), ('7046685', '+'), ('7046686', '+'), ('7046688', '+'), ('7046690', '+'), ('7046692', '+'), ('7046693', '+'), ('7046695', '+'), ('7046696', '+'), ('7046698', '+'), ('7046700', '+'), ('7046702', '+'), ('7046703', '+'), ('7046704', '+'), ('7046706', '+'), ('7046707', '+'), ('7046709', '+'), ('7046710', '+'), ('7046712', '+'), ('7046713', '+'), ('7046715', '+'), ('7046718', '+'), ('7046717', '+'), ('7046718', '+'), ('7046717', '+'), ('7046718', '+'), ('7046720', '+'), ('7046722', '+'), ('7046724', '+'), ('7046725', '+'), ('7046727', '+'), ('7046728', '+'), ('7046729', '+'), ('7046730', '+'), ('7046731', '+'), ('7046733', '+'), ('7046735', '+'), ('7046736', '+'), ('7046738', '+'), ('7046739', '+'), ('7046740', '+'), ('7046738', '+'), ('7046739', '+'), ('7046740', '+'), ('7046738', '+'), ('7046739', '+'), ('7046741', '+')], 'PATH.LEN': '3822', 'ALN.START': '77', 'ALN.END': '3812', 'RES.MATCH': '3735', 'ALN.BLOCK.LEN': '3735', 'MAPPING.QUAL': '60', 'RAW.CIGAR': 'cg:Z:3735=', 'TAGS': 'AS:f:3735,dv:f:0,id:f:1'}, 'ALN_2': {'QRY.NAME': 'FLC2.TO1000#1#chr03', 'QRY.LEN': '3735', 'QRY.START': '0', 'QRY.END': '3735', 'STRAND': '+', 'PATH.MATCH': [('7594382', '+'), ('7594369', '+'), ('7594371', '+'), ('7594021', '+'), ('7594286', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594374', '+'), ('7594375', '+'), ('7594626', '+'), ('7594011', '+'), ('7594374', '+'), ('7594375', '+'), ('7594369', '+'), ('7594371', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594241', '+'), ('7594248', '+'), ('7594286', '+'), ('7594311', '+'), ('7594315', '+'), ('7594311', '+'), ('7594330', '+'), ('7594311', '+'), ('7594315', '+'), ('7594374', '+'), ('7594311', '+'), ('7594374', '+'), ('7594369', '+'), ('7594021', '+'), ('7594026', '+'), ('7594021', '+'), ('7594021', '+'), ('7594026', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594286', '+'), ('7594374', '+'), ('7594021', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594315', '+'), ('7594286', '+'), ('7594311', '+'), ('7594374', '+'), ('7594021', '+'), ('7594286', '+'), ('7594286', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594374', '+'), ('7594375', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594375', '+'), ('7594374', '+'), ('7594350', '+'), ('7594264', '+'), ('7594207', '+'), ('7594225', '+'), ('7594227', '+'), ('7594120', '+'), ('7594132', '+'), ('7594165', '+'), ('7594172', '+')], 'PATH.LEN': '61224', 'ALN.START': '0', 'ALN.END': '3735', 'RES.MATCH': '3734', 'ALN.BLOCK.LEN': '3735', 'MAPPING.QUAL': '0', 'RAW.CIGAR': 'cg:Z:57=1X3677=', 'TAGS': 'AS:f:3732.06,dv:f:0.000267738,id:f:0.999732'}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Parsing the .gaf file\n",
+    "print(f\"[gaf2aln::GAF Parser] Reading {args.gaf} ...\")\n",
+    "with open(args.gaf, 'r') as file:\n",
+    "    gaf_lines = file.readlines()\n",
+    "\n",
+    "gaf_col = [\n",
+    "    \"QRY.NAME\", \"QRY.LEN\", \"QRY.START\", \"QRY.END\", \"STRAND\", \n",
+    "    \"PATH.MATCH\", \"PATH.LEN\", \"ALN.START\", \"ALN.END\",\n",
+    "    \"RES.MATCH\", \"ALN.BLOCK.LEN\", \"MAPPING.QUAL\"\n",
+    "    ]\n",
+    "\n",
+    "# Creating dictionnary to store alignments\n",
+    "print(f\"[gaf2aln::GAF Parser] Extracting alignments ...\")\n",
+    "aln_dict = {}\n",
+    "for line in range(len(gaf_lines)):\n",
+    "    ## Splitting the line by tabulation\n",
+    "    line_content = gaf_lines[line][:-1].split('\\t')\n",
+    "\n",
+    "    ## Adding alignement info to dictionnary\n",
+    "    aln_dict[f\"ALN_{line+1}\"] = {\n",
+    "        gaf_col[i]: line_content[i] for i in range(len(gaf_col))\n",
+    "    }\n",
+    "    \n",
+    "    ## Splitting \"PATH.MATCH\" into a list\n",
+    "    aln_dict[f\"ALN_{line+1}\"][\"PATH.MATCH\"] = [\n",
+    "        (str(node_id[:-1]), node_id[-1]) \n",
+    "        for node_id in walk2path(aln_dict[f\"ALN_{line+1}\"][\"PATH.MATCH\"])\n",
+    "    ]\n",
+    "\n",
+    "    ## Adding CIGAR\n",
+    "    aln_dict[f\"ALN_{line+1}\"][\"RAW.CIGAR\"] = line_content[-1]\n",
+    "\n",
+    "    ## Adding tags\n",
+    "    aln_dict[f\"ALN_{line+1}\"][\"TAGS\"] = \",\".join(line_content[13:-1])\n",
+    "\n",
+    "# Getting nodes of interest ids\n",
+    "aln_nodes = np.unique([\n",
+    "    str(node_id) \n",
+    "    for aln in aln_dict.keys() \n",
+    "    for node_id, orient in aln_dict[aln][\"PATH.MATCH\"]\n",
+    "]).tolist()\n",
+    "\n",
+    "print(aln_dict)\n",
+    "del gaf_lines, gaf_col"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "2f891424-0d88-4fd3-99ff-b0a8c90587ff",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::GFA Parser] Reading /home/amergez/Documents/Scratch/LeChou/35Bra-v2a/35Bra-v2a.chr03.gfa ...\n",
+      "[gaf2aln::GFA Parser] Extracting nodes, paths and links ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Parsing the .gfa\n",
+    "print(f\"[gaf2aln::GFA Parser] Reading {args.gfa} ...\")\n",
+    "with open(args.gfa, 'r') as file:\n",
+    "    gfa_lines = file.readlines()\n",
+    "\n",
+    "# Nodes length dictionnary structured as follow :\n",
+    "# {<NODE.ID>: <NODE.LENGTH>}\n",
+    "nodes_length = {}\n",
+    "# Nodes dictionnary structured as follow :\n",
+    "# { <ALN.NODE.ID> : {\n",
+    "#   <PATH.NAME>: {\"START\": start, \"END\": end, \"STRAND\": strand), \n",
+    "#   <ALN.NAME>: {\"START\": start, \"END\": end, \"S.OFF\": start.offset, \"E.OFF\": end.offset, \"STRAND\": strand, \"CIGAR\": CIGAR}\n",
+    "#   }\n",
+    "# }\n",
+    "nodes = {node_id: {} for node_id in aln_nodes}\n",
+    "# Paths dictionnary structured as follow :\n",
+    "# {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}\n",
+    "paths = {}\n",
+    "# Links dictionnary structured as follow : \n",
+    "# {<FROM.NODE.ID>: {<TO.NODE.ID>: {FROM.ORIENT: <FROM.ORIENT>, TO.ORIENT: <TO.ORIENT>}}}\n",
+    "links = {}\n",
+    "\n",
+    "# Parsing the gfa\n",
+    "print(f\"[gaf2aln::GFA Parser] Extracting nodes, paths and links ...\")\n",
+    "\n",
+    "def GFA_parser(gfa_lines, nodes = nodes):\n",
+    "    _links, _nodes, _nodes_length, paths = {}, {}, {}, {}\n",
+    "    for line in gfa_lines:\n",
+    "        line_content = line[:-1].split(\"\\t\")\n",
+    "        line_id = line_content[0]\n",
+    "        \n",
+    "        # Segment line\n",
+    "        if line_id == \"S\" :\n",
+    "            \n",
+    "            _nodes_length[str(line_content[1])] = len(line_content[2])\n",
+    "        \n",
+    "        # Link line\n",
+    "        elif line_id == \"L\":\n",
+    "            try :\n",
+    "                _links[str(line_content[1])][str(line_content[3])] = {\n",
+    "                    \"FROM\": str(line_content[2]), \n",
+    "                    \"TO\": str(line_content[4])\n",
+    "                }\n",
+    "\n",
+    "            except :\n",
+    "                _links[str(line_content[1])] = {\n",
+    "                    str(line_content[3]) : {\"FROM.ORIENT\": str(line_content[2]), \"TO.ORIENT\": str(line_content[4])}\n",
+    "                }\n",
+    "\n",
+    "        # Path line\n",
+    "        elif line_id == \"P\":\n",
+    "            _paths[str(line_content[1])] = {\n",
+    "                \"NODES\": {\n",
+    "                    str(node_id[:-1]): str(node_id[-1])\n",
+    "                    for node_id in line_content[2].split(',')\n",
+    "                },\n",
+    "                \"CIGAR\": line_content[3]\n",
+    "            }\n",
+    "\n",
+    "    return [_links, _nodes, _nodes_length, _paths]\n",
+    "\n",
+    "# splits = np.quantile(range(len(gfa_lines)+1), q= np.array(args.threads+1)/args.threads, method='higher').tolist()\n",
+    "# res = []\n",
+    "# for i in range(1, len(splits)):\n",
+    "#     res.append(executor.submit(GFA_parser, gfa_lines[splits[i-1]:splits[i]]))\n",
+    "\n",
+    "# for out in res:\n",
+    "#     results = out.result()\n",
+    "\n",
+    "#     for link_id, link_info in results[0].items():\n",
+    "#         links[]\n",
+    "\n",
+    "\n",
+    "for line in gfa_lines:\n",
+    "    line_content = line[:-1].split(\"\\t\")\n",
+    "    line_id = line_content[0]\n",
+    "    \n",
+    "    # Segment line\n",
+    "    if line_id == \"S\" :\n",
+    "        \n",
+    "        nodes_length[str(line_content[1])] = len(line_content[2])\n",
+    "    \n",
+    "    # Link line\n",
+    "    elif line_id == \"L\":\n",
+    "        try :\n",
+    "            links[str(line_content[1])][str(line_content[3])] = {\n",
+    "                \"FROM\": str(line_content[2]), \n",
+    "                \"TO\": str(line_content[4])\n",
+    "            }\n",
+    "\n",
+    "        except :\n",
+    "            links[str(line_content[1])] = {\n",
+    "                str(line_content[3]) : {\"FROM.ORIENT\": str(line_content[2]), \"TO.ORIENT\": str(line_content[4])}\n",
+    "            }\n",
+    "\n",
+    "    # Path line\n",
+    "    elif line_id == \"P\":\n",
+    "        paths[str(line_content[1])] = {\n",
+    "            \"NODES\": {\n",
+    "                str(node_id[:-1]): str(node_id[-1])\n",
+    "                for node_id in line_content[2].split(',')\n",
+    "            },\n",
+    "            \"CIGAR\": line_content[3]\n",
+    "        }\n",
+    "\n",
+    "del gfa_lines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a403c88e-54ea-4a67-9047-dc44eba7f51a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::Graph position processing] Computing nodes positions in each paths...\n",
+      "[gaf2aln::Graph position processing] Running on Capitata#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on D101#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on D134#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on G06-09-28#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on G07-DH-33#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on HDEM#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on Korso#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on M249#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on OX-heart#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on PL021#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on RC34#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T02#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T03#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T04#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T06#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T07#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T08#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T09#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T10#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T11#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T12#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T13#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T14#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T15#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T16#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T17#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T18#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T19#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T21#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T24#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T25#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T26#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T27#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on TO1000#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on W1701#1#chr03 ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"[gaf2aln::Graph position processing] Computing nodes positions in each paths...\")\n",
+    "def get_node_pos(path_name, nodes = nodes, paths = paths, nodes_length = nodes_length):\n",
+    "    print(f\"[gaf2aln::Graph position processing] Running on {path_name} ...\")\n",
+    "    cur_pos = 0\n",
+    "\n",
+    "    out = {}\n",
+    "    # Iterating over nodes in the path\n",
+    "    for path_node in paths[path_name][\"NODES\"].keys():\n",
+    "        # Instead of checking if the node is one interesting node, we try to add to the nodes dict\n",
+    "        if path_node in aln_nodes :\n",
+    "            out[path_node] = {\n",
+    "                \"START\": cur_pos, # Start position of the node start in the currrent path\n",
+    "                \"END\": cur_pos+nodes_length[path_node], # End position of the node end in the current path\n",
+    "                \"STRAND\": paths[path_name][\"NODES\"][path_node] # Orientation of the node in the current path\n",
+    "                } \n",
+    "\n",
+    "            cur_pos += nodes_length[path_node]+1\n",
+    "        else :\n",
+    "            cur_pos += nodes_length[path_node]+1\n",
+    "\n",
+    "    return out\n",
+    "\n",
+    "res = {}\n",
+    "executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)\n",
+    "# Adding nodes positions relative to path\n",
+    "for path_name in paths.keys():\n",
+    "    res[path_name] = executor.submit(get_node_pos, path_name)\n",
+    "\n",
+    "executor.shutdown(wait=True)\n",
+    "\n",
+    "for path_name, out in res.items():\n",
+    "    results = out.result()\n",
+    "    for path_node, node_pos in results.items():\n",
+    "        nodes[path_node][path_name] = node_pos\n",
+    "\n",
+    "del res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "bed36bd5-30eb-4d02-8b52-1ae5d753f8f8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...\n",
+      "[gaf2aln::Alignment position processing] Running on ALN_1 ...\n",
+      "0 77 77 0 + 154 77\n",
+      "77 82 0 0 + 5 82\n",
+      "82 83 0 0 + 1 83\n",
+      "83 138 0 0 + 55 138\n",
+      "138 139 0 0 + 1 139\n",
+      "139 202 0 0 + 63 202\n",
+      "202 203 0 0 + 1 203\n",
+      "203 379 0 0 + 176 379\n",
+      "379 380 0 0 + 1 380\n",
+      "380 429 0 0 + 49 429\n",
+      "429 430 0 0 + 1 430\n",
+      "430 457 0 0 + 27 457\n",
+      "457 492 0 0 + 35 492\n",
+      "492 494 0 0 + 2 494\n",
+      "494 497 0 0 + 3 497\n",
+      "497 507 0 0 + 10 507\n",
+      "507 508 0 0 + 1 508\n",
+      "508 564 0 0 + 56 564\n",
+      "564 566 0 0 + 2 566\n",
+      "566 567 0 0 + 1 567\n",
+      "567 568 0 0 + 1 568\n",
+      "568 569 0 0 + 1 569\n",
+      "569 824 0 0 + 255 824\n",
+      "824 826 0 0 + 2 826\n",
+      "826 858 0 0 + 32 858\n",
+      "858 859 0 0 + 1 859\n",
+      "859 860 0 0 + 1 860\n",
+      "860 861 0 0 + 1 861\n",
+      "861 862 0 0 + 1 862\n",
+      "862 863 0 0 + 1 863\n",
+      "863 864 0 0 + 1 864\n",
+      "864 865 0 0 + 1 865\n",
+      "865 866 0 0 + 1 866\n",
+      "866 867 0 0 + 1 867\n",
+      "867 868 0 0 + 1 868\n",
+      "868 869 0 0 + 1 869\n",
+      "869 913 0 0 + 44 913\n",
+      "913 919 0 0 + 6 919\n",
+      "919 978 0 0 + 59 978\n",
+      "978 979 0 0 + 1 979\n",
+      "979 1038 0 0 + 59 1038\n",
+      "1038 1045 0 0 + 7 1045\n",
+      "1045 1046 0 0 + 1 1046\n",
+      "1046 1080 0 0 + 34 1080\n",
+      "1080 1081 0 0 + 1 1081\n",
+      "1081 1107 0 0 + 26 1107\n",
+      "1107 1108 0 0 + 1 1108\n",
+      "1108 1183 0 0 + 75 1183\n",
+      "1183 1186 0 0 + 3 1186\n",
+      "1186 1224 0 0 + 38 1224\n",
+      "1224 1257 0 0 + 33 1257\n",
+      "1257 1289 0 0 + 32 1289\n",
+      "1289 1311 0 0 + 22 1311\n",
+      "1311 1359 0 0 + 48 1359\n",
+      "1359 1382 0 0 + 23 1382\n",
+      "1382 1434 0 0 + 52 1434\n",
+      "1434 1451 0 0 + 17 1451\n",
+      "1451 1531 0 0 + 80 1531\n",
+      "1531 1532 0 0 + 1 1532\n",
+      "1532 1543 0 0 + 11 1543\n",
+      "1543 1544 0 0 + 1 1544\n",
+      "1544 1572 0 0 + 28 1572\n",
+      "1572 1573 0 0 + 1 1573\n",
+      "1573 1587 0 0 + 14 1587\n",
+      "1587 1588 0 0 + 1 1588\n",
+      "1588 1616 0 0 + 28 1616\n",
+      "1616 1617 0 0 + 1 1617\n",
+      "1617 1646 0 0 + 29 1646\n",
+      "1646 1661 0 0 + 15 1661\n",
+      "1661 1673 0 0 + 12 1673\n",
+      "1673 1674 0 0 + 1 1674\n",
+      "1674 1726 0 0 + 52 1726\n",
+      "1726 1727 0 0 + 1 1727\n",
+      "1727 1762 0 0 + 35 1762\n",
+      "1762 1763 0 0 + 1 1763\n",
+      "1763 1764 0 0 + 1 1764\n",
+      "1764 1765 0 0 + 1 1765\n",
+      "1765 1766 0 0 + 1 1766\n",
+      "1766 1767 0 0 + 1 1767\n",
+      "1767 1824 0 0 + 57 1824\n",
+      "1824 1825 0 0 + 1 1825\n",
+      "1825 1975 0 0 + 150 1975\n",
+      "1975 1976 0 0 + 1 1976\n",
+      "1976 2015 0 0 + 39 2015\n",
+      "2015 2016 0 0 + 1 2016\n",
+      "2016 2047 0 0 + 31 2047\n",
+      "2047 2055 0 0 + 8 2055\n",
+      "2055 2056 0 0 + 1 2056\n",
+      "2056 2120 0 0 + 64 2120\n",
+      "2120 2121 0 0 + 1 2121\n",
+      "2121 2157 0 0 + 36 2157\n",
+      "2157 2158 0 0 + 1 2158\n",
+      "2158 2170 0 0 + 12 2170\n",
+      "2170 2171 0 0 + 1 2171\n",
+      "2171 2205 0 0 + 34 2205\n",
+      "2205 2206 0 0 + 1 2206\n",
+      "2206 2344 0 0 + 138 2344\n",
+      "2344 2345 0 0 + 1 2345\n",
+      "2345 2364 0 0 + 19 2364\n",
+      "2364 2383 0 0 + 19 2383\n",
+      "2383 2408 0 0 + 25 2408\n",
+      "2408 2409 0 0 + 1 2409\n",
+      "2409 2441 0 0 + 32 2441\n",
+      "2441 2442 0 0 + 1 2442\n",
+      "2442 2580 0 0 + 138 2580\n",
+      "2580 2581 0 0 + 1 2581\n",
+      "2581 2582 0 0 + 1 2582\n",
+      "2582 2583 0 0 + 1 2583\n",
+      "2583 2584 0 0 + 1 2584\n",
+      "2584 2764 0 0 + 180 2764\n",
+      "2764 2765 0 0 + 1 2765\n",
+      "2765 2797 0 0 + 32 2797\n",
+      "2797 2798 0 0 + 1 2798\n",
+      "2798 2878 0 0 + 80 2878\n",
+      "2878 2879 0 0 + 1 2879\n",
+      "2879 2951 0 0 + 72 2951\n",
+      "2951 2952 0 0 + 1 2952\n",
+      "2952 3002 0 0 + 50 3002\n",
+      "3002 3077 0 0 + 75 3077\n",
+      "3077 3078 0 0 + 1 3078\n",
+      "3078 3093 0 0 + 15 3093\n",
+      "3093 3094 0 0 + 1 3094\n",
+      "3094 3097 0 0 + 3 3097\n",
+      "3097 3140 0 0 + 43 3140\n",
+      "3140 3210 0 0 + 70 3210\n",
+      "3210 3211 0 0 + 1 3211\n",
+      "3211 3229 0 0 + 18 3229\n",
+      "3229 3230 0 0 + 1 3230\n",
+      "3230 3276 0 0 + 46 3276\n",
+      "3276 3277 0 0 + 1 3277\n",
+      "3277 3315 0 0 + 38 3315\n",
+      "3315 3316 0 0 + 1 3316\n",
+      "3316 3322 0 0 + 6 3322\n",
+      "3322 3323 0 0 + 1 3323\n",
+      "3323 3348 0 0 + 25 3348\n",
+      "3348 3349 0 0 + 1 3349\n",
+      "3349 3350 0 0 + 1 3350\n",
+      "3350 3351 0 0 + 1 3351\n",
+      "3351 3352 0 0 + 1 3352\n",
+      "3352 3353 0 0 + 1 3353\n",
+      "3353 3354 0 0 + 1 3354\n",
+      "3354 3356 0 0 + 2 3356\n",
+      "3356 3357 0 0 + 1[gaf2aln::Alignment position processing] Running on ALN_2 ...\n",
+      " 03357 \n",
+      "13357  03489 0  +0  10  1+\n",
+      " 1132  23489 \n",
+      "03489  03490  +0  10  2\n",
+      "+2  3 10  34900\n",
+      " 3490+  36421  03 \n",
+      "03  +4 0  1520  3642+\n",
+      " 36421  36444 \n",
+      "04  05  +0  20  3644+\n",
+      " 36441 5 \n",
+      "36855  06  00  +0  41+  36851\n",
+      " 36856 \n",
+      "36876  07  00  +0  2+  36871\n",
+      " 36877 \n",
+      "36937  08  00  +0  6+  36931\n",
+      " 36938 \n",
+      "36948  09  00  +0  1+  36941\n",
+      " 36949 \n",
+      "37089  010  00  +0  14+  37081\n",
+      " 370810 \n",
+      "370910  011  00  +0  1+  37091\n",
+      " 370911 \n",
+      "371011  012  00  +0  1+  37101\n",
+      " 371012 \n",
+      "371412  013  00  +0  4+  37141\n",
+      " 371413 \n",
+      "371513  014  00  +0  1+  37151\n",
+      " 371514 \n",
+      "371614  015  00  +0  1+  37161\n",
+      " 371615 \n",
+      "372015  016  00  +0  4+  37201\n",
+      " 372016\n",
+      " 163721  170  00  0+  +1  13721 \n",
+      "173721\n",
+      " 173722  180  00  0+  +1  13722 \n",
+      "183722\n",
+      " 183735 19  00  100  ++  231  373519\n",
+      "\n",
+      "19 20 0 0 + 1 20\n",
+      "20 21 0 0 + 1 21\n",
+      "21 22 0 0 + 1 22\n",
+      "22 23 0 0 + 1 23\n",
+      "23 24 0 0 + 1 24\n",
+      "24 25 0 0 + 1 25\n",
+      "25 26 0 0 + 1 26\n",
+      "26 27 0 0 + 1 27\n",
+      "27 28 0 0 + 1 28\n",
+      "28 29 0 0 + 1 29\n",
+      "29 30 0 0 + 1 30\n",
+      "30 31 0 0 + 1 31\n",
+      "31 32 0 0 + 1 32\n",
+      "32 33 0 0 + 1 33\n",
+      "33 34 0 0 + 1 34\n",
+      "34 35 0 0 + 1 35\n",
+      "35 36 0 0 + 1 36\n",
+      "36 37 0 0 + 1 37\n",
+      "37 38 0 0 + 1 38\n",
+      "38 39 0 0 + 1 39\n",
+      "39 40 0 0 + 1 40\n",
+      "40 41 0 0 + 1 41\n",
+      "41 42 0 0 + 1 42\n",
+      "42 43 0 0 + 1 43\n",
+      "43 44 0 0 + 1 44\n",
+      "44 45 0 0 + 1 45\n",
+      "45 46 0 0 + 1 46\n",
+      "46 47 0 0 + 1 47\n",
+      "47 48 0 0 + 1 48\n",
+      "48 49 0 0 + 1 49\n",
+      "49 50 0 0 + 1 50\n",
+      "50 51 0 0 + 1 51\n",
+      "51 52 0 0 + 1 52\n",
+      "52 53 0 0 + 1 53\n",
+      "53 54 0 0 + 1 54\n",
+      "54 55 0 0 + 1 55\n",
+      "55 56 0 0 + 1 56\n",
+      "56 57 0 0 + 1 57\n",
+      "57 58 0 0 + 1 58\n",
+      "58 59 0 0 + 1 59\n",
+      "59 60 0 0 + 1 60\n",
+      "60 61 0 0 + 1 61\n",
+      "61 62 0 0 + 1 62\n",
+      "62 63 0 0 + 1 63\n",
+      "63 64 0 0 + 1 64\n",
+      "64 65 0 0 + 1 65\n",
+      "65 66 0 0 + 1 66\n",
+      "66 67 0 0 + 1 67\n",
+      "67 68 0 0 + 1 68\n",
+      "68 69 0 0 + 1 69\n",
+      "69 70 0 0 + 1 70\n",
+      "70 71 0 0 + 1 71\n",
+      "71 72 0 0 + 1 72\n",
+      "72 73 0 0 + 1 73\n",
+      "73 74 0 0 + 1 74\n",
+      "74 75 0 0 + 1 75\n",
+      "75 76 0 0 + 1 76\n",
+      "76 77 0 0 + 1 77\n",
+      "77 78 0 0 + 1 78\n",
+      "78 3735 0 57489 + 61146 3735\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...\")\n",
+    "# Adding nodes positions relative to path\n",
+    "\n",
+    "def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length):\n",
+    "    # Initializing current position in query\n",
+    "    cur_pos = 0\n",
+    "\n",
+    "    # Getting start and end node ids\n",
+    "    start_end_id = (aln_dict[aln_name][\"PATH.MATCH\"][0][0], aln_dict[aln_name][\"PATH.MATCH\"][-1][0])\n",
+    "\n",
+    "    # Creating result dictionnary\n",
+    "    res = {}\n",
+    "\n",
+    "    ## Iterating over node_ids from the given alignment\n",
+    "    for node_id, orient in aln_dict[aln_name][\"PATH.MATCH\"]:\n",
+    "        # Adding entry for current node\n",
+    "        res[node_id] = {aln_name: {}}\n",
+    "\n",
+    "        # First node\n",
+    "        if node_id == start_end_id[0]:\n",
+    "            start_pos = 0\n",
+    "            s_off = int(aln_dict[aln_name][\"ALN.START\"])\n",
+    "            end_pos = nodes_length[node_id]-s_off\n",
+    "            e_off = 0\n",
+    "        # End node\n",
+    "        elif node_id == start_end_id[1]:\n",
+    "            start_pos = cur_pos\n",
+    "            s_off = 0\n",
+    "            end_pos = int(aln_dict[aln_name][\"QRY.END\"])\n",
+    "            e_off = nodes_length[node_id]-(end_pos-cur_pos)\n",
+    "        # Node in between\n",
+    "        else :\n",
+    "            start_pos = cur_pos\n",
+    "            s_off, e_off = 0, 0\n",
+    "            end_pos = cur_pos+nodes_length[node_id]\n",
+    "\n",
+    "        res[node_id] = {\n",
+    "            \"START\": start_pos, # Start position on the query\n",
+    "            \"END\": end_pos, # End position on the query\n",
+    "            \"S.OFF\": s_off, # Offset between the start of the alignment and the node's start\n",
+    "            \"E.OFF\": e_off, # Offset between the end of the alignment and the node's end \n",
+    "            \"STRAND\": orient # Orientation of the node in the alignment\n",
+    "            }\n",
+    "        \n",
+    "        cur_pos = end_pos\n",
+    "        print(start_pos, end_pos, s_off, e_off, orient, nodes_length[node_id], cur_pos)\n",
+    "\n",
+    "    return res\n",
+    "\n",
+    "# Storing alignement \n",
+    "res = {}\n",
+    "executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)\n",
+    "for aln_name in aln_dict.keys():\n",
+    "    print(f\"[gaf2aln::Alignment position processing] Running on {aln_name} ...\")\n",
+    "    \n",
+    "    res[aln_name] = executor.submit(get_aln_node_info, aln_name)\n",
+    "    #res[aln_name] = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)\n",
+    "\n",
+    "executor.shutdown(wait=True)\n",
+    "\n",
+    "for aln_name, node_info in res.items():\n",
+    "    results = node_info.result()\n",
+    "    for node_id, info in results.items():\n",
+    "        nodes[node_id][aln_name] = info\n",
+    "\n",
+    "del res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "4c30727c-7ffc-4852-ad81-ca2a5a7f9957",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...\n",
+      "[gaf2aln::CIGAR processing] Running on ALN_1 ...\n",
+      "[gaf2aln::CIGAR processing] Running on ALN_2 ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculating CIGAR for each nodes in each aln\n",
+    "print(f\"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...\")\n",
+    "# Iterating over alignments\n",
+    "for aln in aln_dict.keys():\n",
+    "    \n",
+    "    print(f\"[gaf2aln::CIGAR processing] Running on {aln} ...\")\n",
+    "    # Getting the list of base level alignement ([\"=\", \"X\", ...] from \"1=1X...\")\n",
+    "    raw_cigar = cigar2basealn(aln_dict[aln][\"RAW.CIGAR\"])\n",
+    "    CIGAR={}\n",
+    "\n",
+    "    for node_id, orient in aln_dict[aln][\"PATH.MATCH\"]:\n",
+    "\n",
+    "        _cigar = basealn2cigar(raw_cigar[\n",
+    "            nodes[node_id][aln][\"START\"]:nodes[node_id][aln][\"END\"]\n",
+    "            ])\n",
+    "        nodes[node_id][aln][\"CIGAR\"] = _cigar\n",
+    "        #print(_cigar, nodes[node_id][aln][\"START\"], nodes[node_id][aln][\"END\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e15e4762-cd71-4afe-bc74-ebe44869fee6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ALN_1\n",
+      "7046526 D134#1#chr03 0 77\n",
+      "\tIn path\n",
+      "\t 73306158 73306235\n",
+      "skipped\n",
+      "\n",
+      "7046528 D134#1#chr03 77 82\n",
+      "\tIn path\n",
+      "\t 73306238 73306243\n",
+      "{'Q.START': 0, 'Q.END': 77, 'T.START': 73306158, 'T.END': 73306235, 'CG': '77='}\n",
+      "7046530 D134#1#chr03 82 83\n",
+      "\tNot in path\n",
+      "7046531 D134#1#chr03 83 138\n",
+      "\tIn path\n",
+      "\t 73306246 73306301\n",
+      "{'Q.START': 77, 'Q.END': 82, 'T.START': 73306238, 'T.END': 73306243, 'CG': '5='}\n",
+      "7046532 D134#1#chr03 138 139\n",
+      "\tNot in path\n",
+      "7046533 D134#1#chr03 139 202\n",
+      "\tIn path\n",
+      "\t 73306302 73306365\n",
+      "{'Q.START': 83, 'Q.END': 138, 'T.START': 73306246, 'T.END': 73306301, 'CG': '55='}\n",
+      "7046534 D134#1#chr03 202 203\n",
+      "\tIn path\n",
+      "\t 73306366 73306367\n",
+      "{'Q.START': 139, 'Q.END': 202, 'T.START': 73306302, 'T.END': 73306365, 'CG': '63='}\n",
+      "7046536 D134#1#chr03 203 379\n",
+      "\tIn path\n",
+      "\t 73306368 73306544\n",
+      "{'Q.START': 202, 'Q.END': 203, 'T.START': 73306366, 'T.END': 73306367, 'CG': '1='}\n",
+      "7046537 D134#1#chr03 379 380\n",
+      "\tIn path\n",
+      "\t 73306545 73306546\n",
+      "{'Q.START': 203, 'Q.END': 379, 'T.START': 73306368, 'T.END': 73306544, 'CG': '176='}\n",
+      "7046539 D134#1#chr03 380 429\n",
+      "\tIn path\n",
+      "\t 73306547 73306596\n",
+      "{'Q.START': 379, 'Q.END': 380, 'T.START': 73306545, 'T.END': 73306546, 'CG': '1='}\n",
+      "7046541 D134#1#chr03 429 430\n",
+      "\tIn path\n",
+      "\t 73306597 73306598\n",
+      "{'Q.START': 380, 'Q.END': 429, 'T.START': 73306547, 'T.END': 73306596, 'CG': '49='}\n",
+      "7046542 D134#1#chr03 430 457\n",
+      "\tIn path\n",
+      "\t 73306599 73306626\n",
+      "{'Q.START': 429, 'Q.END': 430, 'T.START': 73306597, 'T.END': 73306598, 'CG': '1='}\n",
+      "7046544 D134#1#chr03 457 492\n",
+      "\tIn path\n",
+      "\t 73306641 73306676\n",
+      "{'Q.START': 430, 'Q.END': 457, 'T.START': 73306599, 'T.END': 73306626, 'CG': '27='}\n",
+      "7046546 D134#1#chr03 492 494\n",
+      "\tNot in path\n",
+      "7046547 D134#1#chr03 494 497\n",
+      "\tNot in path\n",
+      "7046549 D134#1#chr03 497 507\n",
+      "\tNot in path\n",
+      "7046551 D134#1#chr03 507 508\n",
+      "\tNot in path\n",
+      "7046552 D134#1#chr03 508 564\n",
+      "\tIn path\n",
+      "\t 73306694 73306750\n",
+      "{'Q.START': 457, 'Q.END': 492, 'T.START': 73306641, 'T.END': 73306676, 'CG': '35='}\n",
+      "7046554 D134#1#chr03 564 566\n",
+      "\tNot in path\n",
+      "7046556 D134#1#chr03 568 569\n",
+      "\tIn path\n",
+      "\t 73306753 73306754\n",
+      "{'Q.START': 508, 'Q.END': 564, 'T.START': 73306694, 'T.END': 73306750, 'CG': '56='}\n",
+      "7046556 D134#1#chr03 568 569\n",
+      "\tIn path\n",
+      "\t 73306753 73306754\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
+      "7046556 D134#1#chr03 568 569\n",
+      "\tIn path\n",
+      "\t 73306753 73306754\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
+      "7046557 D134#1#chr03 569 824\n",
+      "\tIn path\n",
+      "\t 73306755 73307010\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
+      "7046558 D134#1#chr03 824 826\n",
+      "\tNot in path\n",
+      "7046559 D134#1#chr03 826 858\n",
+      "\tIn path\n",
+      "\t 73307011 73307043\n",
+      "{'Q.START': 569, 'Q.END': 824, 'T.START': 73306755, 'T.END': 73307010, 'CG': '255='}\n",
+      "7046560 D134#1#chr03 858 859\n",
+      "\tIn path\n",
+      "\t 73307044 73307045\n",
+      "{'Q.START': 826, 'Q.END': 858, 'T.START': 73307011, 'T.END': 73307043, 'CG': '32='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 858, 'Q.END': 859, 'T.START': 73307044, 'T.END': 73307045, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046562 D134#1#chr03 869 913\n",
+      "\tIn path\n",
+      "\t 73307048 73307092\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046564 D134#1#chr03 913 919\n",
+      "\tIn path\n",
+      "\t 73307093 73307099\n",
+      "{'Q.START': 869, 'Q.END': 913, 'T.START': 73307048, 'T.END': 73307092, 'CG': '44='}\n",
+      "7046565 D134#1#chr03 919 978\n",
+      "\tIn path\n",
+      "\t 73307100 73307159\n",
+      "{'Q.START': 913, 'Q.END': 919, 'T.START': 73307093, 'T.END': 73307099, 'CG': '6='}\n",
+      "7046567 D134#1#chr03 978 979\n",
+      "\tIn path\n",
+      "\t 73307160 73307161\n",
+      "{'Q.START': 919, 'Q.END': 978, 'T.START': 73307100, 'T.END': 73307159, 'CG': '59='}\n",
+      "7046568 D134#1#chr03 979 1038\n",
+      "\tIn path\n",
+      "\t 73307162 73307221\n",
+      "{'Q.START': 978, 'Q.END': 979, 'T.START': 73307160, 'T.END': 73307161, 'CG': '1='}\n",
+      "7046570 D134#1#chr03 1038 1045\n",
+      "\tIn path\n",
+      "\t 73307224 73307231\n",
+      "{'Q.START': 979, 'Q.END': 1038, 'T.START': 73307162, 'T.END': 73307221, 'CG': '59='}\n",
+      "7046571 D134#1#chr03 1045 1046\n",
+      "\tIn path\n",
+      "\t 73307232 73307233\n",
+      "{'Q.START': 1038, 'Q.END': 1045, 'T.START': 73307224, 'T.END': 73307231, 'CG': '7='}\n",
+      "7046573 D134#1#chr03 1046 1080\n",
+      "\tIn path\n",
+      "\t 73307234 73307268\n",
+      "{'Q.START': 1045, 'Q.END': 1046, 'T.START': 73307232, 'T.END': 73307233, 'CG': '1='}\n",
+      "7046574 D134#1#chr03 1080 1081\n",
+      "\tIn path\n",
+      "\t 73307269 73307270\n",
+      "{'Q.START': 1046, 'Q.END': 1080, 'T.START': 73307234, 'T.END': 73307268, 'CG': '34='}\n",
+      "7046576 D134#1#chr03 1081 1107\n",
+      "\tIn path\n",
+      "\t 73307271 73307297\n",
+      "{'Q.START': 1080, 'Q.END': 1081, 'T.START': 73307269, 'T.END': 73307270, 'CG': '1='}\n",
+      "7046577 D134#1#chr03 1107 1108\n",
+      "\tNot in path\n",
+      "7046579 D134#1#chr03 1108 1183\n",
+      "\tIn path\n",
+      "\t 73307300 73307375\n",
+      "{'Q.START': 1081, 'Q.END': 1107, 'T.START': 73307271, 'T.END': 73307297, 'CG': '26='}\n",
+      "7046581 D134#1#chr03 1183 1186\n",
+      "\tIn path\n",
+      "\t 73307376 73307379\n",
+      "{'Q.START': 1108, 'Q.END': 1183, 'T.START': 73307300, 'T.END': 73307375, 'CG': '75='}\n",
+      "7046583 D134#1#chr03 1186 1224\n",
+      "\tNot in path\n",
+      "7046584 D134#1#chr03 1224 1257\n",
+      "\tIn path\n",
+      "\t 73307419 73307452\n",
+      "{'Q.START': 1183, 'Q.END': 1186, 'T.START': 73307376, 'T.END': 73307379, 'CG': '3='}\n",
+      "7046586 D134#1#chr03 1257 1289\n",
+      "\tNot in path\n",
+      "7046587 D134#1#chr03 1289 1311\n",
+      "\tIn path\n",
+      "\t 73307475 73307497\n",
+      "{'Q.START': 1224, 'Q.END': 1257, 'T.START': 73307419, 'T.END': 73307452, 'CG': '33='}\n",
+      "7046589 D134#1#chr03 1311 1359\n",
+      "\tNot in path\n",
+      "7046590 D134#1#chr03 1359 1382\n",
+      "\tIn path\n",
+      "\t 73307546 73307569\n",
+      "{'Q.START': 1289, 'Q.END': 1311, 'T.START': 73307475, 'T.END': 73307497, 'CG': '22='}\n",
+      "7046592 D134#1#chr03 1382 1434\n",
+      "\tNot in path\n",
+      "7046593 D134#1#chr03 1434 1451\n",
+      "\tIn path\n",
+      "\t 73307643 73307660\n",
+      "{'Q.START': 1359, 'Q.END': 1382, 'T.START': 73307546, 'T.END': 73307569, 'CG': '23='}\n",
+      "7046594 D134#1#chr03 1451 1531\n",
+      "\tIn path\n",
+      "\t 73307661 73307741\n",
+      "{'Q.START': 1434, 'Q.END': 1451, 'T.START': 73307643, 'T.END': 73307660, 'CG': '17='}\n",
+      "7046596 D134#1#chr03 1531 1532\n",
+      "\tNot in path\n",
+      "7046597 D134#1#chr03 1532 1543\n",
+      "\tIn path\n",
+      "\t 73307744 73307755\n",
+      "{'Q.START': 1451, 'Q.END': 1531, 'T.START': 73307661, 'T.END': 73307741, 'CG': '80='}\n",
+      "7046599 D134#1#chr03 1543 1544\n",
+      "\tNot in path\n",
+      "7046600 D134#1#chr03 1544 1572\n",
+      "\tIn path\n",
+      "\t 73307758 73307786\n",
+      "{'Q.START': 1532, 'Q.END': 1543, 'T.START': 73307744, 'T.END': 73307755, 'CG': '11='}\n",
+      "7046601 D134#1#chr03 1572 1573\n",
+      "\tIn path\n",
+      "\t 73307787 73307788\n",
+      "{'Q.START': 1544, 'Q.END': 1572, 'T.START': 73307758, 'T.END': 73307786, 'CG': '28='}\n",
+      "7046603 D134#1#chr03 1573 1587\n",
+      "\tIn path\n",
+      "\t 73307789 73307803\n",
+      "{'Q.START': 1572, 'Q.END': 1573, 'T.START': 73307787, 'T.END': 73307788, 'CG': '1='}\n",
+      "7046604 D134#1#chr03 1587 1588\n",
+      "\tNot in path\n",
+      "7046606 D134#1#chr03 1588 1616\n",
+      "\tIn path\n",
+      "\t 73307806 73307834\n",
+      "{'Q.START': 1573, 'Q.END': 1587, 'T.START': 73307789, 'T.END': 73307803, 'CG': '14='}\n",
+      "7046608 D134#1#chr03 1616 1617\n",
+      "\tIn path\n",
+      "\t 73307835 73307836\n",
+      "{'Q.START': 1588, 'Q.END': 1616, 'T.START': 73307806, 'T.END': 73307834, 'CG': '28='}\n",
+      "7046609 D134#1#chr03 1617 1646\n",
+      "\tIn path\n",
+      "\t 73307837 73307866\n",
+      "{'Q.START': 1616, 'Q.END': 1617, 'T.START': 73307835, 'T.END': 73307836, 'CG': '1='}\n",
+      "7046621 D134#1#chr03 1646 1661\n",
+      "\tIn path\n",
+      "\t 73307867 73307882\n",
+      "{'Q.START': 1617, 'Q.END': 1646, 'T.START': 73307837, 'T.END': 73307866, 'CG': '29='}\n",
+      "7046622 D134#1#chr03 1661 1673\n",
+      "\tIn path\n",
+      "\t 73307883 73307895\n",
+      "{'Q.START': 1646, 'Q.END': 1661, 'T.START': 73307867, 'T.END': 73307882, 'CG': '15='}\n",
+      "7046624 D134#1#chr03 1673 1674\n",
+      "\tIn path\n",
+      "\t 73307896 73307897\n",
+      "{'Q.START': 1661, 'Q.END': 1673, 'T.START': 73307883, 'T.END': 73307895, 'CG': '12='}\n",
+      "7046625 D134#1#chr03 1674 1726\n",
+      "\tIn path\n",
+      "\t 73307898 73307950\n",
+      "{'Q.START': 1673, 'Q.END': 1674, 'T.START': 73307896, 'T.END': 73307897, 'CG': '1='}\n",
+      "7046626 D134#1#chr03 1726 1727\n",
+      "\tNot in path\n",
+      "7046628 D134#1#chr03 1727 1762\n",
+      "\tIn path\n",
+      "\t 73307953 73307988\n",
+      "{'Q.START': 1674, 'Q.END': 1726, 'T.START': 73307898, 'T.END': 73307950, 'CG': '52='}\n",
+      "7046631 D134#1#chr03 1766 1767\n",
+      "\tIn path\n",
+      "\t 73307991 73307992\n",
+      "{'Q.START': 1727, 'Q.END': 1762, 'T.START': 73307953, 'T.END': 73307988, 'CG': '35='}\n",
+      "7046673 D134#1#chr03 1765 1766\n",
+      "\tIn path\n",
+      "\t 73307993 73307994\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
+      "7046631 D134#1#chr03 1766 1767\n",
+      "\tIn path\n",
+      "\t 73307991 73307992\n",
+      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}\n",
+      "7046673 D134#1#chr03 1765 1766\n",
+      "\tIn path\n",
+      "\t 73307993 73307994\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
+      "7046631 D134#1#chr03 1766 1767\n",
+      "\tIn path\n",
+      "\t 73307991 73307992\n",
+      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}\n",
+      "7046632 D134#1#chr03 1767 1824\n",
+      "\tIn path\n",
+      "\t 73307995 73308052\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
+      "7046634 D134#1#chr03 1824 1825\n",
+      "\tIn path\n",
+      "\t 73308053 73308054\n",
+      "{'Q.START': 1767, 'Q.END': 1824, 'T.START': 73307995, 'T.END': 73308052, 'CG': '57='}\n",
+      "7046635 D134#1#chr03 1825 1975\n",
+      "\tIn path\n",
+      "\t 73308055 73308205\n",
+      "{'Q.START': 1824, 'Q.END': 1825, 'T.START': 73308053, 'T.END': 73308054, 'CG': '1='}\n",
+      "7046637 D134#1#chr03 1975 1976\n",
+      "\tNot in path\n",
+      "7046638 D134#1#chr03 1976 2015\n",
+      "\tIn path\n",
+      "\t 73308208 73308247\n",
+      "{'Q.START': 1825, 'Q.END': 1975, 'T.START': 73308055, 'T.END': 73308205, 'CG': '150='}\n",
+      "7046639 D134#1#chr03 2015 2016\n",
+      "\tNot in path\n",
+      "7046641 D134#1#chr03 2016 2047\n",
+      "\tIn path\n",
+      "\t 73308250 73308281\n",
+      "{'Q.START': 1976, 'Q.END': 2015, 'T.START': 73308208, 'T.END': 73308247, 'CG': '39='}\n",
+      "7046644 D134#1#chr03 2047 2055\n",
+      "\tIn path\n",
+      "\t 73308286 73308294\n",
+      "{'Q.START': 2016, 'Q.END': 2047, 'T.START': 73308250, 'T.END': 73308281, 'CG': '31='}\n",
+      "7046646 D134#1#chr03 2055 2056\n",
+      "\tNot in path\n",
+      "7046647 D134#1#chr03 2056 2120\n",
+      "\tIn path\n",
+      "\t 73308297 73308361\n",
+      "{'Q.START': 2047, 'Q.END': 2055, 'T.START': 73308286, 'T.END': 73308294, 'CG': '8='}\n",
+      "7046649 D134#1#chr03 2120 2121\n",
+      "\tIn path\n",
+      "\t 73308362 73308363\n",
+      "{'Q.START': 2056, 'Q.END': 2120, 'T.START': 73308297, 'T.END': 73308361, 'CG': '64='}\n",
+      "7046650 D134#1#chr03 2121 2157\n",
+      "\tIn path\n",
+      "\t 73308364 73308400\n",
+      "{'Q.START': 2120, 'Q.END': 2121, 'T.START': 73308362, 'T.END': 73308363, 'CG': '1='}\n",
+      "7046652 D134#1#chr03 2157 2158\n",
+      "\tNot in path\n",
+      "7046653 D134#1#chr03 2158 2170\n",
+      "\tIn path\n",
+      "\t 73308403 73308415\n",
+      "{'Q.START': 2121, 'Q.END': 2157, 'T.START': 73308364, 'T.END': 73308400, 'CG': '36='}\n",
+      "7046654 D134#1#chr03 2170 2171\n",
+      "\tIn path\n",
+      "\t 73308416 73308417\n",
+      "{'Q.START': 2158, 'Q.END': 2170, 'T.START': 73308403, 'T.END': 73308415, 'CG': '12='}\n",
+      "7046656 D134#1#chr03 2171 2205\n",
+      "\tIn path\n",
+      "\t 73308418 73308452\n",
+      "{'Q.START': 2170, 'Q.END': 2171, 'T.START': 73308416, 'T.END': 73308417, 'CG': '1='}\n",
+      "7046657 D134#1#chr03 2205 2206\n",
+      "\tNot in path\n",
+      "7046659 D134#1#chr03 2206 2344\n",
+      "\tIn path\n",
+      "\t 73308455 73308593\n",
+      "{'Q.START': 2171, 'Q.END': 2205, 'T.START': 73308418, 'T.END': 73308452, 'CG': '34='}\n",
+      "7046660 D134#1#chr03 2344 2345\n",
+      "\tNot in path\n",
+      "7046662 D134#1#chr03 2345 2364\n",
+      "\tIn path\n",
+      "\t 73308596 73308615\n",
+      "{'Q.START': 2206, 'Q.END': 2344, 'T.START': 73308455, 'T.END': 73308593, 'CG': '138='}\n",
+      "7046663 D134#1#chr03 2364 2383\n",
+      "\tIn path\n",
+      "\t 73308616 73308635\n",
+      "{'Q.START': 2345, 'Q.END': 2364, 'T.START': 73308596, 'T.END': 73308615, 'CG': '19='}\n",
+      "7046665 D134#1#chr03 2383 2408\n",
+      "\tIn path\n",
+      "\t 73308636 73308661\n",
+      "{'Q.START': 2364, 'Q.END': 2383, 'T.START': 73308616, 'T.END': 73308635, 'CG': '19='}\n",
+      "7046667 D134#1#chr03 2408 2409\n",
+      "\tIn path\n",
+      "\t 73308662 73308663\n",
+      "{'Q.START': 2383, 'Q.END': 2408, 'T.START': 73308636, 'T.END': 73308661, 'CG': '25='}\n",
+      "7046668 D134#1#chr03 2409 2441\n",
+      "\tIn path\n",
+      "\t 73308664 73308696\n",
+      "{'Q.START': 2408, 'Q.END': 2409, 'T.START': 73308662, 'T.END': 73308663, 'CG': '1='}\n",
+      "7046670 D134#1#chr03 2441 2442\n",
+      "\tIn path\n",
+      "\t 73308697 73308698\n",
+      "{'Q.START': 2409, 'Q.END': 2441, 'T.START': 73308664, 'T.END': 73308696, 'CG': '32='}\n",
+      "7046671 D134#1#chr03 2442 2580\n",
+      "\tIn path\n",
+      "\t 73308699 73308837\n",
+      "{'Q.START': 2441, 'Q.END': 2442, 'T.START': 73308697, 'T.END': 73308698, 'CG': '1='}\n",
+      "7046674 D134#1#chr03 2582 2583\n",
+      "\tIn path\n",
+      "\t 73308838 73308839\n",
+      "{'Q.START': 2442, 'Q.END': 2580, 'T.START': 73308699, 'T.END': 73308837, 'CG': '138='}\n",
+      "7046675 D134#1#chr03 2583 2584\n",
+      "\tIn path\n",
+      "\t 73308840 73308841\n",
+      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}\n",
+      "7046674 D134#1#chr03 2582 2583\n",
+      "\tIn path\n",
+      "\t 73308838 73308839\n",
+      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}\n",
+      "7046675 D134#1#chr03 2583 2584\n",
+      "\tIn path\n",
+      "\t 73308840 73308841\n",
+      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}\n",
+      "7046676 D134#1#chr03 2584 2764\n",
+      "\tIn path\n",
+      "\t 73308842 73309022\n",
+      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}\n",
+      "7046678 D134#1#chr03 2764 2765\n",
+      "\tNot in path\n",
+      "7046679 D134#1#chr03 2765 2797\n",
+      "\tIn path\n",
+      "\t 73309025 73309057\n",
+      "{'Q.START': 2584, 'Q.END': 2764, 'T.START': 73308842, 'T.END': 73309022, 'CG': '180='}\n",
+      "7046680 D134#1#chr03 2797 2798\n",
+      "\tNot in path\n",
+      "7046682 D134#1#chr03 2798 2878\n",
+      "\tIn path\n",
+      "\t 73309060 73309140\n",
+      "{'Q.START': 2765, 'Q.END': 2797, 'T.START': 73309025, 'T.END': 73309057, 'CG': '32='}\n",
+      "7046684 D134#1#chr03 2878 2879\n",
+      "\tIn path\n",
+      "\t 73309141 73309142\n",
+      "{'Q.START': 2798, 'Q.END': 2878, 'T.START': 73309060, 'T.END': 73309140, 'CG': '80='}\n",
+      "7046685 D134#1#chr03 2879 2951\n",
+      "\tIn path\n",
+      "\t 73309143 73309215\n",
+      "{'Q.START': 2878, 'Q.END': 2879, 'T.START': 73309141, 'T.END': 73309142, 'CG': '1='}\n",
+      "7046686 D134#1#chr03 2951 2952\n",
+      "\tIn path\n",
+      "\t 73309216 73309217\n",
+      "{'Q.START': 2879, 'Q.END': 2951, 'T.START': 73309143, 'T.END': 73309215, 'CG': '72='}\n",
+      "7046688 D134#1#chr03 2952 3002\n",
+      "\tIn path\n",
+      "\t 73309218 73309268\n",
+      "{'Q.START': 2951, 'Q.END': 2952, 'T.START': 73309216, 'T.END': 73309217, 'CG': '1='}\n",
+      "7046690 D134#1#chr03 3002 3077\n",
+      "\tIn path\n",
+      "\t 73309271 73309346\n",
+      "{'Q.START': 2952, 'Q.END': 3002, 'T.START': 73309218, 'T.END': 73309268, 'CG': '50='}\n",
+      "7046692 D134#1#chr03 3077 3078\n",
+      "\tIn path\n",
+      "\t 73309347 73309348\n",
+      "{'Q.START': 3002, 'Q.END': 3077, 'T.START': 73309271, 'T.END': 73309346, 'CG': '75='}\n",
+      "7046693 D134#1#chr03 3078 3093\n",
+      "\tIn path\n",
+      "\t 73309349 73309364\n",
+      "{'Q.START': 3077, 'Q.END': 3078, 'T.START': 73309347, 'T.END': 73309348, 'CG': '1='}\n",
+      "7046695 D134#1#chr03 3093 3094\n",
+      "\tNot in path\n",
+      "7046696 D134#1#chr03 3094 3097\n",
+      "\tIn path\n",
+      "\t 73309367 73309370\n",
+      "{'Q.START': 3078, 'Q.END': 3093, 'T.START': 73309349, 'T.END': 73309364, 'CG': '15='}\n",
+      "7046698 D134#1#chr03 3097 3140\n",
+      "\tIn path\n",
+      "\t 73309371 73309414\n",
+      "{'Q.START': 3094, 'Q.END': 3097, 'T.START': 73309367, 'T.END': 73309370, 'CG': '3='}\n",
+      "7046700 D134#1#chr03 3140 3210\n",
+      "\tIn path\n",
+      "\t 73309415 73309485\n",
+      "{'Q.START': 3097, 'Q.END': 3140, 'T.START': 73309371, 'T.END': 73309414, 'CG': '43='}\n",
+      "7046702 D134#1#chr03 3210 3211\n",
+      "\tIn path\n",
+      "\t 73309486 73309487\n",
+      "{'Q.START': 3140, 'Q.END': 3210, 'T.START': 73309415, 'T.END': 73309485, 'CG': '70='}\n",
+      "7046703 D134#1#chr03 3211 3229\n",
+      "\tIn path\n",
+      "\t 73309488 73309506\n",
+      "{'Q.START': 3210, 'Q.END': 3211, 'T.START': 73309486, 'T.END': 73309487, 'CG': '1='}\n",
+      "7046704 D134#1#chr03 3229 3230\n",
+      "\tIn path\n",
+      "\t 73309507 73309508\n",
+      "{'Q.START': 3211, 'Q.END': 3229, 'T.START': 73309488, 'T.END': 73309506, 'CG': '18='}\n",
+      "7046706 D134#1#chr03 3230 3276\n",
+      "\tIn path\n",
+      "\t 73309509 73309555\n",
+      "{'Q.START': 3229, 'Q.END': 3230, 'T.START': 73309507, 'T.END': 73309508, 'CG': '1='}\n",
+      "7046707 D134#1#chr03 3276 3277\n",
+      "\tNot in path\n",
+      "7046709 D134#1#chr03 3277 3315\n",
+      "\tIn path\n",
+      "\t 73309558 73309596\n",
+      "{'Q.START': 3230, 'Q.END': 3276, 'T.START': 73309509, 'T.END': 73309555, 'CG': '46='}\n",
+      "7046710 D134#1#chr03 3315 3316\n",
+      "\tNot in path\n",
+      "7046712 D134#1#chr03 3316 3322\n",
+      "\tIn path\n",
+      "\t 73309599 73309605\n",
+      "{'Q.START': 3277, 'Q.END': 3315, 'T.START': 73309558, 'T.END': 73309596, 'CG': '38='}\n",
+      "7046713 D134#1#chr03 3322 3323\n",
+      "\tNot in path\n",
+      "7046715 D134#1#chr03 3323 3348\n",
+      "\tIn path\n",
+      "\t 73309608 73309633\n",
+      "{'Q.START': 3316, 'Q.END': 3322, 'T.START': 73309599, 'T.END': 73309605, 'CG': '6='}\n",
+      "7046718 D134#1#chr03 3352 3353\n",
+      "\tIn path\n",
+      "\t 73309634 73309635\n",
+      "{'Q.START': 3323, 'Q.END': 3348, 'T.START': 73309608, 'T.END': 73309633, 'CG': '25='}\n",
+      "7046717 D134#1#chr03 3351 3352\n",
+      "\tIn path\n",
+      "\t 73309636 73309637\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
+      "7046718 D134#1#chr03 3352 3353\n",
+      "\tIn path\n",
+      "\t 73309634 73309635\n",
+      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}\n",
+      "7046717 D134#1#chr03 3351 3352\n",
+      "\tIn path\n",
+      "\t 73309636 73309637\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
+      "7046718 D134#1#chr03 3352 3353\n",
+      "\tIn path\n",
+      "\t 73309634 73309635\n",
+      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}\n",
+      "7046720 D134#1#chr03 3353 3354\n",
+      "\tIn path\n",
+      "\t 73309638 73309639\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
+      "7046722 D134#1#chr03 3354 3356\n",
+      "\tIn path\n",
+      "\t 73309640 73309642\n",
+      "{'Q.START': 3353, 'Q.END': 3354, 'T.START': 73309638, 'T.END': 73309639, 'CG': '1='}\n",
+      "7046724 D134#1#chr03 3356 3357\n",
+      "\tNot in path\n",
+      "7046725 D134#1#chr03 3357 3489\n",
+      "\tIn path\n",
+      "\t 73309645 73309777\n",
+      "{'Q.START': 3354, 'Q.END': 3356, 'T.START': 73309640, 'T.END': 73309642, 'CG': '2='}\n",
+      "7046727 D134#1#chr03 3489 3490\n",
+      "\tNot in path\n",
+      "7046728 D134#1#chr03 3490 3642\n",
+      "\tIn path\n",
+      "\t 73309780 73309932\n",
+      "{'Q.START': 3357, 'Q.END': 3489, 'T.START': 73309645, 'T.END': 73309777, 'CG': '132='}\n",
+      "7046729 D134#1#chr03 3642 3644\n",
+      "\tNot in path\n",
+      "7046730 D134#1#chr03 3644 3685\n",
+      "\tIn path\n",
+      "\t 73309933 73309974\n",
+      "{'Q.START': 3490, 'Q.END': 3642, 'T.START': 73309780, 'T.END': 73309932, 'CG': '152='}\n",
+      "7046731 D134#1#chr03 3685 3687\n",
+      "\tNot in path\n",
+      "7046733 D134#1#chr03 3687 3693\n",
+      "\tIn path\n",
+      "\t 73309977 73309983\n",
+      "{'Q.START': 3644, 'Q.END': 3685, 'T.START': 73309933, 'T.END': 73309974, 'CG': '41='}\n",
+      "7046735 D134#1#chr03 3693 3694\n",
+      "\tNot in path\n",
+      "7046736 D134#1#chr03 3694 3708\n",
+      "\tIn path\n",
+      "\t 73309986 73310000\n",
+      "{'Q.START': 3687, 'Q.END': 3693, 'T.START': 73309977, 'T.END': 73309983, 'CG': '6='}\n",
+      "7046738 D134#1#chr03 3720 3721\n",
+      "\tIn path\n",
+      "\t 73310010 73310011\n",
+      "{'Q.START': 3694, 'Q.END': 3708, 'T.START': 73309986, 'T.END': 73310000, 'CG': '14='}\n",
+      "7046739 D134#1#chr03 3721 3722\n",
+      "\tIn path\n",
+      "\t 73310003 73310004\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
+      "7046740 D134#1#chr03 3716 3720\n",
+      "\tIn path\n",
+      "\t 73310005 73310009\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
+      "7046738 D134#1#chr03 3720 3721\n",
+      "\tIn path\n",
+      "\t 73310010 73310011\n",
+      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}\n",
+      "7046739 D134#1#chr03 3721 3722\n",
+      "\tIn path\n",
+      "\t 73310003 73310004\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
+      "7046740 D134#1#chr03 3716 3720\n",
+      "\tIn path\n",
+      "\t 73310005 73310009\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
+      "7046738 D134#1#chr03 3720 3721\n",
+      "\tIn path\n",
+      "\t 73310010 73310011\n",
+      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}\n",
+      "7046739 D134#1#chr03 3721 3722\n",
+      "\tIn path\n",
+      "\t 73310003 73310004\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
+      "7046741 D134#1#chr03 3722 3735\n",
+      "\tIn path\n",
+      "\t 73310012 73310045\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
+      "ALN_2\n",
+      "7594382 D134#1#chr03 0 1\n",
+      "\tIn path\n",
+      "\t 70220037 70220038\n",
+      "skipped\n",
+      "\n",
+      "7594369 D134#1#chr03 32 33\n",
+      "\tIn path\n",
+      "\t 70219216 70219217\n",
+      "{'Q.START': 0, 'Q.END': 1, 'T.START': 70220037, 'T.END': 70220038, 'CG': '1='}\n",
+      "7594371 D134#1#chr03 15 16\n",
+      "\tIn path\n",
+      "\t 70221163 70221164\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 15, 'Q.END': 16, 'T.START': 70221163, 'T.END': 70221164, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594356 D134#1#chr03 66 67\n",
+      "\tIn path\n",
+      "\t 70219570 70219571\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594626 D134#1#chr03 10 11\n",
+      "\tIn path\n",
+      "\t 70219214 70219215\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594011 D134#1#chr03 11 12\n",
+      "\tIn path\n",
+      "\t 70219995 70219996\n",
+      "{'Q.START': 10, 'Q.END': 11, 'T.START': 70219214, 'T.END': 70219215, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 11, 'Q.END': 12, 'T.START': 70219995, 'T.END': 70219996, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594369 D134#1#chr03 32 33\n",
+      "\tIn path\n",
+      "\t 70219216 70219217\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594371 D134#1#chr03 15 16\n",
+      "\tIn path\n",
+      "\t 70221163 70221164\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 15, 'Q.END': 16, 'T.START': 70221163, 'T.END': 70221164, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594241 D134#1#chr03 20 21\n",
+      "\tIn path\n",
+      "\t 70219220 70219221\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594248 D134#1#chr03 21 22\n",
+      "\tNot in path\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 20, 'Q.END': 21, 'T.START': 70219220, 'T.END': 70219221, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594315 D134#1#chr03 53 54\n",
+      "\tIn path\n",
+      "\t 70219857 70219858\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
+      "7594330 D134#1#chr03 26 27\n",
+      "\tNot in path\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594315 D134#1#chr03 53 54\n",
+      "\tIn path\n",
+      "\t 70219857 70219858\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594369 D134#1#chr03 32 33\n",
+      "\tIn path\n",
+      "\t 70219216 70219217\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
+      "7594026 D134#1#chr03 37 38\n",
+      "\tIn path\n",
+      "\t 70220249 70220250\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 37, 'Q.END': 38, 'T.START': 70220249, 'T.END': 70220250, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594026 D134#1#chr03 37 38\n",
+      "\tIn path\n",
+      "\t 70220249 70220250\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 37, 'Q.END': 38, 'T.START': 70220249, 'T.END': 70220250, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594315 D134#1#chr03 53 54\n",
+      "\tIn path\n",
+      "\t 70219857 70219858\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594356 D134#1#chr03 66 67\n",
+      "\tIn path\n",
+      "\t 70219570 70219571\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594356 D134#1#chr03 66 67\n",
+      "\tIn path\n",
+      "\t 70219570 70219571\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594350 D134#1#chr03 70 71\n",
+      "\tIn path\n",
+      "\t 70219226 70219227\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594264 D134#1#chr03 71 72\n",
+      "\tIn path\n",
+      "\t 70219228 70219229\n",
+      "{'Q.START': 70, 'Q.END': 71, 'T.START': 70219226, 'T.END': 70219227, 'CG': '1='}\n",
+      "7594207 D134#1#chr03 72 73\n",
+      "\tIn path\n",
+      "\t 70219230 70219231\n",
+      "{'Q.START': 71, 'Q.END': 72, 'T.START': 70219228, 'T.END': 70219229, 'CG': '1='}\n",
+      "7594225 D134#1#chr03 73 74\n",
+      "\tIn path\n",
+      "\t 70219232 70219233\n",
+      "{'Q.START': 72, 'Q.END': 73, 'T.START': 70219230, 'T.END': 70219231, 'CG': '1='}\n",
+      "7594227 D134#1#chr03 74 75\n",
+      "\tIn path\n",
+      "\t 70220150 70220151\n",
+      "{'Q.START': 73, 'Q.END': 74, 'T.START': 70219232, 'T.END': 70219233, 'CG': '1='}\n",
+      "7594120 D134#1#chr03 75 76\n",
+      "\tIn path\n",
+      "\t 70219236 70219237\n",
+      "{'Q.START': 74, 'Q.END': 75, 'T.START': 70220150, 'T.END': 70220151, 'CG': '1='}\n",
+      "7594132 D134#1#chr03 76 77\n",
+      "\tIn path\n",
+      "\t 70219777 70219778\n",
+      "{'Q.START': 75, 'Q.END': 76, 'T.START': 70219236, 'T.END': 70219237, 'CG': '1='}\n",
+      "7594165 D134#1#chr03 77 78\n",
+      "\tIn path\n",
+      "\t 70219240 70219241\n",
+      "{'Q.START': 76, 'Q.END': 77, 'T.START': 70219777, 'T.END': 70219778, 'CG': '1='}\n",
+      "7594172 D134#1#chr03 78 3735\n",
+      "\tNot in path\n"
+     ]
+    }
+   ],
+   "source": [
+    "ALNS = {}\n",
+    "## Iterating over alignments\n",
+    "for aln_name in aln_dict.keys():\n",
+    "    \n",
+    "    ## Iterating over paths of the gfa\n",
+    "    for path_name in paths.keys():\n",
+    "        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(aln_name)\n",
+    "        _ = [] # Temporary list holding alignment blocks\n",
+    "\n",
+    "        ## Iterating over alignment nodes of the current alignment\n",
+    "        for node_id, orient in aln_dict[aln_name][\"PATH.MATCH\"]:\n",
+    "\n",
+    "            # Getting node info\n",
+    "            n_info = nodes[node_id]\n",
+    "            q_start = n_info[aln_name][\"START\"] # Start position on the query\n",
+    "            q_end = n_info[aln_name][\"END\"] # End position on the query\n",
+    "            _CG = n_info[aln_name][\"CIGAR\"] # Cigar of the alignment on the current node\n",
+    "\n",
+    "            if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(node_id, path_name, q_start, q_end)\n",
+    "\n",
+    "            ## Checking if path is traversing the current node\n",
+    "            if path_name in list(n_info.keys()):\n",
+    "                if path_name == \"D134#1#chr03\": print(\"\\tIn path\")\n",
+    "\n",
+    "                ## Getting start and end position on the target given the orientation of the node in the alignment and the path\n",
+    "                if n_info[aln_name][\"STRAND\"] == n_info[path_name][\"STRAND\"] :\n",
+    "                    t_start = n_info[path_name][\"START\"]+n_info[aln_name][\"S.OFF\"]\n",
+    "                    t_end = n_info[path_name][\"END\"]+n_info[aln_name][\"E.OFF\"] \n",
+    "                else :\n",
+    "                    t_end = n_info[path_name][\"START\"]+n_info[aln_name][\"S.OFF\"]\n",
+    "                    t_start = n_info[path_name][\"END\"]+n_info[aln_name][\"E.OFF\"]\n",
+    "\n",
+    "                if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"\\t\", t_start, t_end)\n",
+    "\n",
+    "                \"\"\"\n",
+    "                If the latest block t.end and q.end matches with the current node t.start and q.start, \n",
+    "                the node should be added to the block. Else, we terminate the block and add the node to a new block\n",
+    "                \"\"\"\n",
+    "                \n",
+    "                # Non empty temporary list of aln and ending of the last block is the same as the start of the new node : \n",
+    "                if len(_) and _[-1][\"T.END\"] == t_start and _[-1][\"Q.END\"]+1 == q_start: \n",
+    "                    tmp_aln[\"Q.END\"] = q_end\n",
+    "                    tmp_aln[\"T.END\"] = t_end\n",
+    "                    tmp_aln[\"CG\"] += _CG\n",
+    "#                elif len(_) and _[-1][\"T.END\"] == t_start: # Following on the target not on the query (i.e. Insertion)\n",
+    "#                    tmp_aln[\"T.END\"] = t_end\n",
+    "#                    tmp_aln[\"CG\"] += f\"{nodes_length[node_id]}I\"\n",
+    "#                elif len(_) and _[-1][\"Q.END\"]+1 == q_start: # Following on the query, not on the target (i.e. Deletion)\n",
+    "#                    tmp_aln[\"Q.END\"] = q_end\n",
+    "#                    tmp_aln[\"CG\"] += f\"{nodes_length[node_id]}D\"\n",
+    "                else : # Else, completely different\n",
+    "                    try : \n",
+    "                        _.append(tmp_aln)\n",
+    "                        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(tmp_aln)\n",
+    "                    except : \n",
+    "                        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"skipped\\n\")\n",
+    "                    tmp_aln = {\n",
+    "                        \"Q.START\": q_start,\n",
+    "                        \"Q.END\": q_end,\n",
+    "                        \"T.START\": t_start,\n",
+    "                        \"T.END\": t_end,\n",
+    "                        \"CG\": _CG,\n",
+    "                    }\n",
+    "            \n",
+    "            else : \n",
+    "                if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"\\tNot in path\")\n",
+    "                # Node is not in the path\n",
+    "\n",
+    "        del tmp_aln\n",
+    "        \n",
+    "        ALNS[(path_name, aln_name)] = _"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "547f03fa-cbd5-42f9-b668-1ca4404795ba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'Q.START': 0, 'Q.END': 77, 'T.START': 73306158, 'T.END': 73306235, 'CG': '77='}, {'Q.START': 77, 'Q.END': 82, 'T.START': 73306238, 'T.END': 73306243, 'CG': '5='}, {'Q.START': 83, 'Q.END': 138, 'T.START': 73306246, 'T.END': 73306301, 'CG': '55='}, {'Q.START': 139, 'Q.END': 202, 'T.START': 73306302, 'T.END': 73306365, 'CG': '63='}, {'Q.START': 202, 'Q.END': 203, 'T.START': 73306366, 'T.END': 73306367, 'CG': '1='}, {'Q.START': 203, 'Q.END': 379, 'T.START': 73306368, 'T.END': 73306544, 'CG': '176='}, {'Q.START': 379, 'Q.END': 380, 'T.START': 73306545, 'T.END': 73306546, 'CG': '1='}, {'Q.START': 380, 'Q.END': 429, 'T.START': 73306547, 'T.END': 73306596, 'CG': '49='}, {'Q.START': 429, 'Q.END': 430, 'T.START': 73306597, 'T.END': 73306598, 'CG': '1='}, {'Q.START': 430, 'Q.END': 457, 'T.START': 73306599, 'T.END': 73306626, 'CG': '27='}, {'Q.START': 457, 'Q.END': 492, 'T.START': 73306641, 'T.END': 73306676, 'CG': '35='}, {'Q.START': 508, 'Q.END': 564, 'T.START': 73306694, 'T.END': 73306750, 'CG': '56='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 569, 'Q.END': 824, 'T.START': 73306755, 'T.END': 73307010, 'CG': '255='}, {'Q.START': 826, 'Q.END': 858, 'T.START': 73307011, 'T.END': 73307043, 'CG': '32='}, {'Q.START': 858, 'Q.END': 859, 'T.START': 73307044, 'T.END': 73307045, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 869, 'Q.END': 913, 'T.START': 73307048, 'T.END': 73307092, 'CG': '44='}, {'Q.START': 913, 'Q.END': 919, 'T.START': 73307093, 'T.END': 73307099, 'CG': '6='}, {'Q.START': 919, 'Q.END': 978, 'T.START': 73307100, 'T.END': 73307159, 'CG': '59='}, {'Q.START': 978, 'Q.END': 979, 'T.START': 73307160, 'T.END': 73307161, 'CG': '1='}, {'Q.START': 979, 'Q.END': 1038, 'T.START': 73307162, 'T.END': 73307221, 'CG': '59='}, {'Q.START': 1038, 'Q.END': 1045, 'T.START': 73307224, 'T.END': 73307231, 'CG': '7='}, {'Q.START': 1045, 'Q.END': 1046, 'T.START': 73307232, 'T.END': 73307233, 'CG': '1='}, {'Q.START': 1046, 'Q.END': 1080, 'T.START': 73307234, 'T.END': 73307268, 'CG': '34='}, {'Q.START': 1080, 'Q.END': 1081, 'T.START': 73307269, 'T.END': 73307270, 'CG': '1='}, {'Q.START': 1081, 'Q.END': 1107, 'T.START': 73307271, 'T.END': 73307297, 'CG': '26='}, {'Q.START': 1108, 'Q.END': 1183, 'T.START': 73307300, 'T.END': 73307375, 'CG': '75='}, {'Q.START': 1183, 'Q.END': 1186, 'T.START': 73307376, 'T.END': 73307379, 'CG': '3='}, {'Q.START': 1224, 'Q.END': 1257, 'T.START': 73307419, 'T.END': 73307452, 'CG': '33='}, {'Q.START': 1289, 'Q.END': 1311, 'T.START': 73307475, 'T.END': 73307497, 'CG': '22='}, {'Q.START': 1359, 'Q.END': 1382, 'T.START': 73307546, 'T.END': 73307569, 'CG': '23='}, {'Q.START': 1434, 'Q.END': 1451, 'T.START': 73307643, 'T.END': 73307660, 'CG': '17='}, {'Q.START': 1451, 'Q.END': 1531, 'T.START': 73307661, 'T.END': 73307741, 'CG': '80='}, {'Q.START': 1532, 'Q.END': 1543, 'T.START': 73307744, 'T.END': 73307755, 'CG': '11='}, {'Q.START': 1544, 'Q.END': 1572, 'T.START': 73307758, 'T.END': 73307786, 'CG': '28='}, {'Q.START': 1572, 'Q.END': 1573, 'T.START': 73307787, 'T.END': 73307788, 'CG': '1='}, {'Q.START': 1573, 'Q.END': 1587, 'T.START': 73307789, 'T.END': 73307803, 'CG': '14='}, {'Q.START': 1588, 'Q.END': 1616, 'T.START': 73307806, 'T.END': 73307834, 'CG': '28='}, {'Q.START': 1616, 'Q.END': 1617, 'T.START': 73307835, 'T.END': 73307836, 'CG': '1='}, {'Q.START': 1617, 'Q.END': 1646, 'T.START': 73307837, 'T.END': 73307866, 'CG': '29='}, {'Q.START': 1646, 'Q.END': 1661, 'T.START': 73307867, 'T.END': 73307882, 'CG': '15='}, {'Q.START': 1661, 'Q.END': 1673, 'T.START': 73307883, 'T.END': 73307895, 'CG': '12='}, {'Q.START': 1673, 'Q.END': 1674, 'T.START': 73307896, 'T.END': 73307897, 'CG': '1='}, {'Q.START': 1674, 'Q.END': 1726, 'T.START': 73307898, 'T.END': 73307950, 'CG': '52='}, {'Q.START': 1727, 'Q.END': 1762, 'T.START': 73307953, 'T.END': 73307988, 'CG': '35='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1767, 'Q.END': 1824, 'T.START': 73307995, 'T.END': 73308052, 'CG': '57='}, {'Q.START': 1824, 'Q.END': 1825, 'T.START': 73308053, 'T.END': 73308054, 'CG': '1='}, {'Q.START': 1825, 'Q.END': 1975, 'T.START': 73308055, 'T.END': 73308205, 'CG': '150='}, {'Q.START': 1976, 'Q.END': 2015, 'T.START': 73308208, 'T.END': 73308247, 'CG': '39='}, {'Q.START': 2016, 'Q.END': 2047, 'T.START': 73308250, 'T.END': 73308281, 'CG': '31='}, {'Q.START': 2047, 'Q.END': 2055, 'T.START': 73308286, 'T.END': 73308294, 'CG': '8='}, {'Q.START': 2056, 'Q.END': 2120, 'T.START': 73308297, 'T.END': 73308361, 'CG': '64='}, {'Q.START': 2120, 'Q.END': 2121, 'T.START': 73308362, 'T.END': 73308363, 'CG': '1='}, {'Q.START': 2121, 'Q.END': 2157, 'T.START': 73308364, 'T.END': 73308400, 'CG': '36='}, {'Q.START': 2158, 'Q.END': 2170, 'T.START': 73308403, 'T.END': 73308415, 'CG': '12='}, {'Q.START': 2170, 'Q.END': 2171, 'T.START': 73308416, 'T.END': 73308417, 'CG': '1='}, {'Q.START': 2171, 'Q.END': 2205, 'T.START': 73308418, 'T.END': 73308452, 'CG': '34='}, {'Q.START': 2206, 'Q.END': 2344, 'T.START': 73308455, 'T.END': 73308593, 'CG': '138='}, {'Q.START': 2345, 'Q.END': 2364, 'T.START': 73308596, 'T.END': 73308615, 'CG': '19='}, {'Q.START': 2364, 'Q.END': 2383, 'T.START': 73308616, 'T.END': 73308635, 'CG': '19='}, {'Q.START': 2383, 'Q.END': 2408, 'T.START': 73308636, 'T.END': 73308661, 'CG': '25='}, {'Q.START': 2408, 'Q.END': 2409, 'T.START': 73308662, 'T.END': 73308663, 'CG': '1='}, {'Q.START': 2409, 'Q.END': 2441, 'T.START': 73308664, 'T.END': 73308696, 'CG': '32='}, {'Q.START': 2441, 'Q.END': 2442, 'T.START': 73308697, 'T.END': 73308698, 'CG': '1='}, {'Q.START': 2442, 'Q.END': 2580, 'T.START': 73308699, 'T.END': 73308837, 'CG': '138='}, {'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}, {'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}, {'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}, {'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}, {'Q.START': 2584, 'Q.END': 2764, 'T.START': 73308842, 'T.END': 73309022, 'CG': '180='}, {'Q.START': 2765, 'Q.END': 2797, 'T.START': 73309025, 'T.END': 73309057, 'CG': '32='}, {'Q.START': 2798, 'Q.END': 2878, 'T.START': 73309060, 'T.END': 73309140, 'CG': '80='}, {'Q.START': 2878, 'Q.END': 2879, 'T.START': 73309141, 'T.END': 73309142, 'CG': '1='}, {'Q.START': 2879, 'Q.END': 2951, 'T.START': 73309143, 'T.END': 73309215, 'CG': '72='}, {'Q.START': 2951, 'Q.END': 2952, 'T.START': 73309216, 'T.END': 73309217, 'CG': '1='}, {'Q.START': 2952, 'Q.END': 3002, 'T.START': 73309218, 'T.END': 73309268, 'CG': '50='}, {'Q.START': 3002, 'Q.END': 3077, 'T.START': 73309271, 'T.END': 73309346, 'CG': '75='}, {'Q.START': 3077, 'Q.END': 3078, 'T.START': 73309347, 'T.END': 73309348, 'CG': '1='}, {'Q.START': 3078, 'Q.END': 3093, 'T.START': 73309349, 'T.END': 73309364, 'CG': '15='}, {'Q.START': 3094, 'Q.END': 3097, 'T.START': 73309367, 'T.END': 73309370, 'CG': '3='}, {'Q.START': 3097, 'Q.END': 3140, 'T.START': 73309371, 'T.END': 73309414, 'CG': '43='}, {'Q.START': 3140, 'Q.END': 3210, 'T.START': 73309415, 'T.END': 73309485, 'CG': '70='}, {'Q.START': 3210, 'Q.END': 3211, 'T.START': 73309486, 'T.END': 73309487, 'CG': '1='}, {'Q.START': 3211, 'Q.END': 3229, 'T.START': 73309488, 'T.END': 73309506, 'CG': '18='}, {'Q.START': 3229, 'Q.END': 3230, 'T.START': 73309507, 'T.END': 73309508, 'CG': '1='}, {'Q.START': 3230, 'Q.END': 3276, 'T.START': 73309509, 'T.END': 73309555, 'CG': '46='}, {'Q.START': 3277, 'Q.END': 3315, 'T.START': 73309558, 'T.END': 73309596, 'CG': '38='}, {'Q.START': 3316, 'Q.END': 3322, 'T.START': 73309599, 'T.END': 73309605, 'CG': '6='}, {'Q.START': 3323, 'Q.END': 3348, 'T.START': 73309608, 'T.END': 73309633, 'CG': '25='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3353, 'Q.END': 3354, 'T.START': 73309638, 'T.END': 73309639, 'CG': '1='}, {'Q.START': 3354, 'Q.END': 3356, 'T.START': 73309640, 'T.END': 73309642, 'CG': '2='}, {'Q.START': 3357, 'Q.END': 3489, 'T.START': 73309645, 'T.END': 73309777, 'CG': '132='}, {'Q.START': 3490, 'Q.END': 3642, 'T.START': 73309780, 'T.END': 73309932, 'CG': '152='}, {'Q.START': 3644, 'Q.END': 3685, 'T.START': 73309933, 'T.END': 73309974, 'CG': '41='}, {'Q.START': 3687, 'Q.END': 3693, 'T.START': 73309977, 'T.END': 73309983, 'CG': '6='}, {'Q.START': 3694, 'Q.END': 3708, 'T.START': 73309986, 'T.END': 73310000, 'CG': '14='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}, {'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}, {'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(ALNS[(\"D134#1#chr03\", \"ALN_1\")])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/gaf2aln.ipynb b/gaf2aln.ipynb
new file mode 100644
index 0000000..04fe866
--- /dev/null
+++ b/gaf2aln.ipynb
@@ -0,0 +1,2443 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4ffaf9f6-cc1e-4190-9351-5431c930d25b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import argparse\n",
+    "import concurrent.futures\n",
+    "import os\n",
+    "import re\n",
+    "\n",
+    "# Replace for argparse arguments\n",
+    "class arguments():\n",
+    "    gfa = \"/home/amergez/Documents/Scratch/LeChou/35Bra-v2a/35Bra-v2a.chr03.gfa\"\n",
+    "    gaf = \"/home/amergez/Documents/Scratch/LeChou/35Bra-v2a/Mapping2Graph/GA.FLC2.aln.gaf\"\n",
+    "    threads = 8\n",
+    "    version = False\n",
+    "args = arguments()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "280c8847-22e8-4063-bde8-3e4e72cf20e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Toolbox\n",
+    "def walk2path(walk):\n",
+    "    \"\"\"\n",
+    "    Takes a walk in a single string and returns a list of nodes id with signs (gfa v1 like)\n",
+    "    \"\"\"\n",
+    "    _ = re.findall(r'>\\w+|<\\w+', walk)\n",
+    "    # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'\n",
+    "    return [f'{elem[1:]}{(elem[0] == \">\")*\"+\"+(elem[0] == \"<\")*\"-\"}' for elem in _]\n",
+    "\n",
+    "def cigar2basealn(cigar):\n",
+    "    \"\"\"\n",
+    "    Takes a CIGAR string and convert it into a list of base level alignment.\n",
+    "    For example : \"345=\" -> [\"=\", \"=\", ..., \"=\"] of length 345.\n",
+    "    \"\"\"\n",
+    "    _ = re.findall(r'\\d+\\D', cigar)\n",
+    "    final_cigar = []\n",
+    "    for match in _:\n",
+    "        final_cigar += [match[-1]]*int(match[:-1])\n",
+    "\n",
+    "    return final_cigar\n",
+    "\n",
+    "def basealn2cigar(base_aln_list):\n",
+    "    \n",
+    "    last_elem = base_aln_list[0]\n",
+    "    CIGAR = [[1, last_elem]]\n",
+    "    for elem in base_aln_list[1:]:\n",
+    "        if elem == last_elem:\n",
+    "            CIGAR[-1][0] += 1\n",
+    "\n",
+    "        else :\n",
+    "            CIGAR[-1][0] = str(CIGAR[-1][0])\n",
+    "            CIGAR.append([1, elem])\n",
+    "            last_elem = elem\n",
+    "    CIGAR[-1][0] = str(CIGAR[-1][0])\n",
+    "    return \"\".join([\"\".join(block) for block in CIGAR if block[1] != \"\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "be12e9d4-de76-4c8b-af84-6567549483f4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::GAF Parser] Reading /home/amergez/Documents/Scratch/LeChou/35Bra-v2a/Mapping2Graph/GA.FLC2.aln.gaf ...\n",
+      "[gaf2aln::GAF Parser] Extracting alignments ...\n",
+      "{'ALN_1': {'QRY.NAME': 'FLC2.TO1000#1#chr03', 'QRY.LEN': '3735', 'QRY.START': '0', 'QRY.END': '3735', 'STRAND': '+', 'PATH.MATCH': [('7046526', '+'), ('7046528', '+'), ('7046530', '+'), ('7046531', '+'), ('7046532', '+'), ('7046533', '+'), ('7046534', '+'), ('7046536', '+'), ('7046537', '+'), ('7046539', '+'), ('7046541', '+'), ('7046542', '+'), ('7046544', '+'), ('7046546', '+'), ('7046547', '+'), ('7046549', '+'), ('7046551', '+'), ('7046552', '+'), ('7046554', '+'), ('7046556', '+'), ('7046556', '+'), ('7046556', '+'), ('7046557', '+'), ('7046558', '+'), ('7046559', '+'), ('7046560', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046562', '+'), ('7046564', '+'), ('7046565', '+'), ('7046567', '+'), ('7046568', '+'), ('7046570', '+'), ('7046571', '+'), ('7046573', '+'), ('7046574', '+'), ('7046576', '+'), ('7046577', '+'), ('7046579', '+'), ('7046581', '+'), ('7046583', '+'), ('7046584', '+'), ('7046586', '+'), ('7046587', '+'), ('7046589', '+'), ('7046590', '+'), ('7046592', '+'), ('7046593', '+'), ('7046594', '+'), ('7046596', '+'), ('7046597', '+'), ('7046599', '+'), ('7046600', '+'), ('7046601', '+'), ('7046603', '+'), ('7046604', '+'), ('7046606', '+'), ('7046608', '+'), ('7046609', '+'), ('7046621', '+'), ('7046622', '+'), ('7046624', '+'), ('7046625', '+'), ('7046626', '+'), ('7046628', '+'), ('7046631', '+'), ('7046673', '+'), ('7046631', '+'), ('7046673', '+'), ('7046631', '+'), ('7046632', '+'), ('7046634', '+'), ('7046635', '+'), ('7046637', '+'), ('7046638', '+'), ('7046639', '+'), ('7046641', '+'), ('7046644', '+'), ('7046646', '+'), ('7046647', '+'), ('7046649', '+'), ('7046650', '+'), ('7046652', '+'), ('7046653', '+'), ('7046654', '+'), ('7046656', '+'), ('7046657', '+'), ('7046659', '+'), ('7046660', '+'), ('7046662', '+'), ('7046663', '+'), ('7046665', '+'), ('7046667', '+'), ('7046668', '+'), ('7046670', '+'), ('7046671', '+'), ('7046674', '+'), ('7046675', '+'), ('7046674', '+'), ('7046675', '+'), ('7046676', '+'), ('7046678', '+'), ('7046679', '+'), ('7046680', '+'), ('7046682', '+'), ('7046684', '+'), ('7046685', '+'), ('7046686', '+'), ('7046688', '+'), ('7046690', '+'), ('7046692', '+'), ('7046693', '+'), ('7046695', '+'), ('7046696', '+'), ('7046698', '+'), ('7046700', '+'), ('7046702', '+'), ('7046703', '+'), ('7046704', '+'), ('7046706', '+'), ('7046707', '+'), ('7046709', '+'), ('7046710', '+'), ('7046712', '+'), ('7046713', '+'), ('7046715', '+'), ('7046718', '+'), ('7046717', '+'), ('7046718', '+'), ('7046717', '+'), ('7046718', '+'), ('7046720', '+'), ('7046722', '+'), ('7046724', '+'), ('7046725', '+'), ('7046727', '+'), ('7046728', '+'), ('7046729', '+'), ('7046730', '+'), ('7046731', '+'), ('7046733', '+'), ('7046735', '+'), ('7046736', '+'), ('7046738', '+'), ('7046739', '+'), ('7046740', '+'), ('7046738', '+'), ('7046739', '+'), ('7046740', '+'), ('7046738', '+'), ('7046739', '+'), ('7046741', '+')], 'PATH.LEN': '3822', 'ALN.START': '77', 'ALN.END': '3812', 'RES.MATCH': '3735', 'ALN.BLOCK.LEN': '3735', 'MAPPING.QUAL': '60', 'RAW.CIGAR': 'cg:Z:3735=', 'TAGS': 'AS:f:3735,dv:f:0,id:f:1'}, 'ALN_2': {'QRY.NAME': 'FLC2.TO1000#1#chr03', 'QRY.LEN': '3735', 'QRY.START': '0', 'QRY.END': '3735', 'STRAND': '+', 'PATH.MATCH': [('7594382', '+'), ('7594369', '+'), ('7594371', '+'), ('7594021', '+'), ('7594286', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594374', '+'), ('7594375', '+'), ('7594626', '+'), ('7594011', '+'), ('7594374', '+'), ('7594375', '+'), ('7594369', '+'), ('7594371', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594241', '+'), ('7594248', '+'), ('7594286', '+'), ('7594311', '+'), ('7594315', '+'), ('7594311', '+'), ('7594330', '+'), ('7594311', '+'), ('7594315', '+'), ('7594374', '+'), ('7594311', '+'), ('7594374', '+'), ('7594369', '+'), ('7594021', '+'), ('7594026', '+'), ('7594021', '+'), ('7594021', '+'), ('7594026', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594286', '+'), ('7594374', '+'), ('7594021', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594315', '+'), ('7594286', '+'), ('7594311', '+'), ('7594374', '+'), ('7594021', '+'), ('7594286', '+'), ('7594286', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594374', '+'), ('7594375', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594375', '+'), ('7594374', '+'), ('7594350', '+'), ('7594264', '+'), ('7594207', '+'), ('7594225', '+'), ('7594227', '+'), ('7594120', '+'), ('7594132', '+'), ('7594165', '+'), ('7594172', '+')], 'PATH.LEN': '61224', 'ALN.START': '0', 'ALN.END': '3735', 'RES.MATCH': '3734', 'ALN.BLOCK.LEN': '3735', 'MAPPING.QUAL': '0', 'RAW.CIGAR': 'cg:Z:57=1X3677=', 'TAGS': 'AS:f:3732.06,dv:f:0.000267738,id:f:0.999732'}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Parsing the .gaf file\n",
+    "print(f\"[gaf2aln::GAF Parser] Reading {args.gaf} ...\")\n",
+    "with open(args.gaf, 'r') as file:\n",
+    "    gaf_lines = file.readlines()\n",
+    "\n",
+    "gaf_col = [\n",
+    "    \"QRY.NAME\", \"QRY.LEN\", \"QRY.START\", \"QRY.END\", \"STRAND\", \n",
+    "    \"PATH.MATCH\", \"PATH.LEN\", \"ALN.START\", \"ALN.END\",\n",
+    "    \"RES.MATCH\", \"ALN.BLOCK.LEN\", \"MAPPING.QUAL\"\n",
+    "    ]\n",
+    "\n",
+    "# Creating dictionnary to store alignments\n",
+    "print(f\"[gaf2aln::GAF Parser] Extracting alignments ...\")\n",
+    "aln_dict = {}\n",
+    "for line in range(len(gaf_lines)):\n",
+    "    ## Splitting the line by tabulation\n",
+    "    line_content = gaf_lines[line][:-1].split('\\t')\n",
+    "\n",
+    "    ## Adding alignement info to dictionnary\n",
+    "    aln_dict[f\"ALN_{line+1}\"] = {\n",
+    "        gaf_col[i]: line_content[i] for i in range(len(gaf_col))\n",
+    "    }\n",
+    "    \n",
+    "    ## Splitting \"PATH.MATCH\" into a list\n",
+    "    aln_dict[f\"ALN_{line+1}\"][\"PATH.MATCH\"] = [\n",
+    "        (str(node_id[:-1]), node_id[-1]) \n",
+    "        for node_id in walk2path(aln_dict[f\"ALN_{line+1}\"][\"PATH.MATCH\"])\n",
+    "    ]\n",
+    "\n",
+    "    ## Adding CIGAR\n",
+    "    aln_dict[f\"ALN_{line+1}\"][\"RAW.CIGAR\"] = line_content[-1]\n",
+    "\n",
+    "    ## Adding tags\n",
+    "    aln_dict[f\"ALN_{line+1}\"][\"TAGS\"] = \",\".join(line_content[13:-1])\n",
+    "\n",
+    "# Getting nodes of interest ids\n",
+    "aln_nodes = np.unique([\n",
+    "    str(node_id) \n",
+    "    for aln in aln_dict.keys() \n",
+    "    for node_id, orient in aln_dict[aln][\"PATH.MATCH\"]\n",
+    "]).tolist()\n",
+    "\n",
+    "print(aln_dict)\n",
+    "del gaf_lines, gaf_col"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "2f891424-0d88-4fd3-99ff-b0a8c90587ff",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::GFA Parser] Reading /home/amergez/Documents/Scratch/LeChou/35Bra-v2a/35Bra-v2a.chr03.gfa ...\n",
+      "[gaf2aln::GFA Parser] Extracting nodes, paths and links ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Parsing the .gfa\n",
+    "print(f\"[gaf2aln::GFA Parser] Reading {args.gfa} ...\")\n",
+    "with open(args.gfa, 'r') as file:\n",
+    "    gfa_lines = file.readlines()\n",
+    "\n",
+    "# Nodes length dictionnary structured as follow :\n",
+    "# {<NODE.ID>: <NODE.LENGTH>}\n",
+    "nodes_length = {}\n",
+    "# Nodes dictionnary structured as follow :\n",
+    "# { <ALN.NODE.ID> : {\n",
+    "#   <PATH.NAME>: {\"START\": start, \"END\": end, \"STRAND\": strand), \n",
+    "#   <ALN.NAME>: {\"START\": start, \"END\": end, \"S.OFF\": start.offset, \"E.OFF\": end.offset, \"STRAND\": strand, \"CIGAR\": CIGAR}\n",
+    "#   }\n",
+    "# }\n",
+    "nodes = {node_id: {} for node_id in aln_nodes}\n",
+    "# Paths dictionnary structured as follow :\n",
+    "# {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}\n",
+    "paths = {}\n",
+    "# Links dictionnary structured as follow : \n",
+    "# {<FROM.NODE.ID>: {<TO.NODE.ID>: {FROM.ORIENT: <FROM.ORIENT>, TO.ORIENT: <TO.ORIENT>}}}\n",
+    "links = {}\n",
+    "\n",
+    "# Parsing the gfa\n",
+    "print(f\"[gaf2aln::GFA Parser] Extracting nodes, paths and links ...\")\n",
+    "\n",
+    "def GFA_parser(gfa_lines, nodes = nodes):\n",
+    "    _links, _nodes, _nodes_length, paths = {}, {}, {}, {}\n",
+    "    for line in gfa_lines:\n",
+    "        line_content = line[:-1].split(\"\\t\")\n",
+    "        line_id = line_content[0]\n",
+    "        \n",
+    "        # Segment line\n",
+    "        if line_id == \"S\" :\n",
+    "            \n",
+    "            _nodes_length[str(line_content[1])] = len(line_content[2])\n",
+    "        \n",
+    "        # Link line\n",
+    "        elif line_id == \"L\":\n",
+    "            try :\n",
+    "                _links[str(line_content[1])][str(line_content[3])] = {\n",
+    "                    \"FROM\": str(line_content[2]), \n",
+    "                    \"TO\": str(line_content[4])\n",
+    "                }\n",
+    "\n",
+    "            except :\n",
+    "                _links[str(line_content[1])] = {\n",
+    "                    str(line_content[3]) : {\"FROM.ORIENT\": str(line_content[2]), \"TO.ORIENT\": str(line_content[4])}\n",
+    "                }\n",
+    "\n",
+    "        # Path line\n",
+    "        elif line_id == \"P\":\n",
+    "            _paths[str(line_content[1])] = {\n",
+    "                \"NODES\": {\n",
+    "                    str(node_id[:-1]): str(node_id[-1])\n",
+    "                    for node_id in line_content[2].split(',')\n",
+    "                },\n",
+    "                \"CIGAR\": line_content[3]\n",
+    "            }\n",
+    "\n",
+    "    return [_links, _nodes, _nodes_length, _paths]\n",
+    "\n",
+    "# splits = np.quantile(range(len(gfa_lines)+1), q= np.array(args.threads+1)/args.threads, method='higher').tolist()\n",
+    "# res = []\n",
+    "# for i in range(1, len(splits)):\n",
+    "#     res.append(executor.submit(GFA_parser, gfa_lines[splits[i-1]:splits[i]]))\n",
+    "\n",
+    "# for out in res:\n",
+    "#     results = out.result()\n",
+    "\n",
+    "#     for link_id, link_info in results[0].items():\n",
+    "#         links[]\n",
+    "\n",
+    "\n",
+    "for line in gfa_lines:\n",
+    "    line_content = line[:-1].split(\"\\t\")\n",
+    "    line_id = line_content[0]\n",
+    "    \n",
+    "    # Segment line\n",
+    "    if line_id == \"S\" :\n",
+    "        \n",
+    "        nodes_length[str(line_content[1])] = len(line_content[2])\n",
+    "    \n",
+    "    # Link line\n",
+    "    elif line_id == \"L\":\n",
+    "        try :\n",
+    "            links[str(line_content[1])][str(line_content[3])] = {\n",
+    "                \"FROM\": str(line_content[2]), \n",
+    "                \"TO\": str(line_content[4])\n",
+    "            }\n",
+    "\n",
+    "        except :\n",
+    "            links[str(line_content[1])] = {\n",
+    "                str(line_content[3]) : {\"FROM.ORIENT\": str(line_content[2]), \"TO.ORIENT\": str(line_content[4])}\n",
+    "            }\n",
+    "\n",
+    "    # Path line\n",
+    "    elif line_id == \"P\":\n",
+    "        paths[str(line_content[1])] = {\n",
+    "            \"NODES\": {\n",
+    "                str(node_id[:-1]): str(node_id[-1])\n",
+    "                for node_id in line_content[2].split(',')\n",
+    "            },\n",
+    "            \"CIGAR\": line_content[3]\n",
+    "        }\n",
+    "\n",
+    "del gfa_lines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a403c88e-54ea-4a67-9047-dc44eba7f51a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::Graph position processing] Computing nodes positions in each paths...\n",
+      "[gaf2aln::Graph position processing] Running on Capitata#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on D101#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on D134#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on G06-09-28#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on G07-DH-33#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on HDEM#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on Korso#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on M249#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on OX-heart#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on PL021#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on RC34#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T02#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T03#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T04#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T06#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T07#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T08#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T09#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T10#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T11#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T12#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T13#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T14#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T15#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T16#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T17#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T18#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T19#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T21#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T24#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T25#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T26#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on T27#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on TO1000#1#chr03 ...\n",
+      "[gaf2aln::Graph position processing] Running on W1701#1#chr03 ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"[gaf2aln::Graph position processing] Computing nodes positions in each paths...\")\n",
+    "def get_node_pos(path_name, nodes = nodes, paths = paths, nodes_length = nodes_length):\n",
+    "    print(f\"[gaf2aln::Graph position processing] Running on {path_name} ...\")\n",
+    "    cur_pos = 0\n",
+    "\n",
+    "    out = {}\n",
+    "    # Iterating over nodes in the path\n",
+    "    for path_node in paths[path_name][\"NODES\"].keys():\n",
+    "        # Instead of checking if the node is one interesting node, we try to add to the nodes dict\n",
+    "        if path_node in aln_nodes :\n",
+    "            out[path_node] = {\n",
+    "                \"START\": cur_pos, # Start position of the node start in the currrent path\n",
+    "                \"END\": cur_pos+nodes_length[path_node], # End position of the node end in the current path\n",
+    "                \"STRAND\": paths[path_name][\"NODES\"][path_node] # Orientation of the node in the current path\n",
+    "                } \n",
+    "\n",
+    "            cur_pos += nodes_length[path_node]+1\n",
+    "        else :\n",
+    "            cur_pos += nodes_length[path_node]+1\n",
+    "\n",
+    "    return out\n",
+    "\n",
+    "res = {}\n",
+    "executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)\n",
+    "# Adding nodes positions relative to path\n",
+    "for path_name in paths.keys():\n",
+    "    res[path_name] = executor.submit(get_node_pos, path_name)\n",
+    "\n",
+    "executor.shutdown(wait=True)\n",
+    "\n",
+    "for path_name, out in res.items():\n",
+    "    results = out.result()\n",
+    "    for path_node, node_pos in results.items():\n",
+    "        nodes[path_node][path_name] = node_pos\n",
+    "\n",
+    "del res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "bed36bd5-30eb-4d02-8b52-1ae5d753f8f8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...\n",
+      "[gaf2aln::Alignment position processing] Running on ALN_1 ...\n",
+      "0 77 77 0 + 154 77\n",
+      "77 82 0 0 + 5 82\n",
+      "82 83 0 0 + 1 83\n",
+      "83 138 0 0 + 55 138\n",
+      "138 139 0 0 + 1 139\n",
+      "139 202 0 0 + 63 202\n",
+      "202 203 0 0 + 1 203\n",
+      "203 379 0 0 + 176 379\n",
+      "379 380 0 0 + 1 380\n",
+      "380 429 0 0 + 49 429\n",
+      "429 430 0 0 + 1 430\n",
+      "430 457 0 0 + 27 457\n",
+      "457 492 0 0 + 35 492\n",
+      "492 494 0 0 + 2 494\n",
+      "494 497 0 0 + 3 497\n",
+      "497 507 0 0 + 10 507\n",
+      "507 508 0 0 + 1 508\n",
+      "508 564 0 0 + 56 564\n",
+      "564 566 0 0 + 2 566\n",
+      "566 567 0 0 + 1 567\n",
+      "567 568 0 0 + 1 568\n",
+      "568 569 0 0 + 1 569\n",
+      "569 824 0 0 + 255 824\n",
+      "824 826 0 0 + 2 826\n",
+      "826 858 0 0 + 32 858\n",
+      "858 859 0 0 + 1 859\n",
+      "859 860 0 0 + 1 860\n",
+      "860 861 0 0 + 1 861\n",
+      "861 862 0 0 + 1 862\n",
+      "862 863 0 0 + 1 863\n",
+      "863 864 0 0 + 1 864\n",
+      "864 865 0 0 + 1 865\n",
+      "865 866 0 0 + 1 866\n",
+      "866 867 0 0 + 1 867\n",
+      "867 868 0 0 + 1 868\n",
+      "868 869 0 0 + 1 869\n",
+      "869 913 0 0 + 44 913\n",
+      "913 919 0 0 + 6 919\n",
+      "919 978 0 0 + 59 978\n",
+      "978 979 0 0 + 1 979\n",
+      "979 1038 0 0 + 59 1038\n",
+      "1038 1045 0 0 + 7 1045\n",
+      "1045 1046 0 0 + 1 1046\n",
+      "1046 1080 0 0 + 34 1080\n",
+      "1080 1081 0 0 + 1 1081\n",
+      "1081 1107 0 0 + 26 1107\n",
+      "1107 1108 0 0 + 1 1108\n",
+      "1108 1183 0 0 + 75 1183\n",
+      "1183 1186 0 0 + 3 1186\n",
+      "1186 1224 0 0 + 38 1224\n",
+      "1224 1257 0 0 + 33 1257\n",
+      "1257 1289 0 0 + 32 1289\n",
+      "1289 1311 0 0 + 22 1311\n",
+      "1311 1359 0 0 + 48 1359\n",
+      "1359 1382 0 0 + 23 1382\n",
+      "1382 1434 0 0 + 52 1434\n",
+      "1434 1451 0 0 + 17 1451\n",
+      "1451 1531 0 0 + 80 1531\n",
+      "1531 1532 0 0 + 1 1532\n",
+      "1532 1543 0 0 + 11 1543\n",
+      "1543 1544 0 0 + 1 1544\n",
+      "1544 1572 0 0 + 28 1572\n",
+      "1572 1573 0 0 + 1 1573\n",
+      "1573 1587 0 0 + 14 1587\n",
+      "1587 1588 0 0 + 1 1588\n",
+      "1588 1616 0 0 + 28 1616\n",
+      "1616 1617 0 0 + 1 1617\n",
+      "1617 1646 0 0 + 29 1646\n",
+      "1646 1661 0 0 + 15 1661\n",
+      "1661 1673 0 0 + 12 1673\n",
+      "1673 1674 0 0 + 1 1674\n",
+      "1674 1726 0 0 + 52 1726\n",
+      "1726 1727 0 0 + 1 1727\n",
+      "1727 1762 0 0 + 35 1762\n",
+      "1762 1763 0 0 + 1 1763\n",
+      "1763 1764 0 0 + 1 1764\n",
+      "1764 1765 0 0 + 1 1765\n",
+      "1765 1766 0 0 + 1 1766\n",
+      "1766 1767 0 0 + 1 1767\n",
+      "1767 1824 0 0 + 57 1824\n",
+      "1824 1825 0 0 + 1 1825\n",
+      "1825 1975 0 0 + 150 1975\n",
+      "1975 1976 0 0 + 1 1976\n",
+      "1976 2015 0 0 + 39 2015\n",
+      "2015 2016 0 0 + 1 2016\n",
+      "2016 2047 0 0 + 31 2047\n",
+      "2047 2055 0 0 + 8 2055\n",
+      "2055 2056 0 0 + 1 2056\n",
+      "2056 2120 0 0 + 64 2120\n",
+      "2120 2121 0 0 + 1 2121\n",
+      "2121 2157 0 0 + 36 2157\n",
+      "2157 2158 0 0 + 1 2158\n",
+      "2158 2170 0 0 + 12 2170\n",
+      "2170 2171 0 0 + 1 2171\n",
+      "2171 2205 0 0 + 34 2205\n",
+      "2205 2206 0 0 + 1 2206\n",
+      "2206 2344 0 0 + 138 2344\n",
+      "2344 2345 0 0 + 1 2345\n",
+      "2345 2364 0 0 + 19 2364\n",
+      "2364 2383 0 0 + 19 2383\n",
+      "2383 2408 0 0 + 25 2408\n",
+      "2408 2409 0 0 + 1 2409\n",
+      "2409 2441 0 0 + 32 2441\n",
+      "2441 2442 0 0 + 1 2442\n",
+      "2442 2580 0 0 + 138 2580\n",
+      "2580 2581 0 0 + 1 2581\n",
+      "2581 2582 0 0 + 1 2582\n",
+      "2582 2583 0 0 + 1 2583\n",
+      "2583 2584 0 0 + 1 2584\n",
+      "2584 2764 0 0 + 180 2764\n",
+      "2764 2765 0 0 + 1 2765\n",
+      "2765 2797 0 0 + 32 2797\n",
+      "2797 2798 0 0 + 1 2798\n",
+      "2798 2878 0 0 + 80 2878\n",
+      "2878 2879 0 0 + 1 2879\n",
+      "2879 2951 0 0 + 72 2951\n",
+      "2951 2952 0 0 + 1 2952\n",
+      "2952 3002 0 0 + 50 3002\n",
+      "3002 3077 0 0 + 75 3077\n",
+      "3077 3078 0 0 + 1 3078\n",
+      "3078 3093 0 0 + 15 3093\n",
+      "3093 3094 0 0 + 1 3094\n",
+      "3094 3097 0 0 + 3 3097\n",
+      "3097 3140 0 0 + 43 3140\n",
+      "3140 3210 0 0 + 70 3210\n",
+      "3210 3211 0 0 + 1 3211\n",
+      "3211 3229 0 0 + 18 3229\n",
+      "3229 3230 0 0 + 1 3230\n",
+      "3230 3276 0 0 + 46 3276\n",
+      "3276 3277 0 0 + 1 3277\n",
+      "3277 3315 0 0 + 38 3315\n",
+      "3315 3316 0 0 + 1 3316\n",
+      "3316 3322 0 0 + 6 3322\n",
+      "3322 3323 0 0 + 1 3323\n",
+      "3323 3348 0 0 + 25 3348\n",
+      "3348 3349 0 0 + 1 3349\n",
+      "3349 3350 0 0 + 1 3350\n",
+      "3350 3351 0 0 + 1 3351\n",
+      "3351 3352 0 0 + 1 3352\n",
+      "3352 3353 0 0 + 1 3353\n",
+      "3353 3354 0 0 + 1 3354\n",
+      "3354 3356 0 0 + 2 3356\n",
+      "3356 3357 0 0 + 1 3357\n",
+      "3357 3489 0 0 + 132 3489\n",
+      "3489 3490 0 0 + 1 3490\n",
+      "3490 3642 0 0 + 152 3642\n",
+      "3642 3644 0 0 + 2 3644\n",
+      "3644 3685 0 0 + 41 3685\n",
+      "3685 3687 0 0 + 2 3687\n",
+      "3687 3693 0 0 + 6 3693\n",
+      "3693 3694 0 0 + 1 3694\n",
+      "3694 3708 0 0 + 14 3708\n",
+      "3708 3709 0 0 + 1 3709\n",
+      "3709 3710 0 0 + 1 3710\n",
+      "3710 3714 0 0 + 4 3714\n",
+      "3714 3715 0 0 + 1 3715\n",
+      "3715 3716 0 0 + 1 3716\n",
+      "3716 3720 0 0 + 4 3720\n",
+      "3720 3721 0 0 + 1 3721\n",
+      "3721 3722 0 0 + 1 3722\n",
+      "3722 3735 0 10 + 23 3735\n",
+      "[gaf2aln::Alignment position processing] Running on ALN_2 ...\n",
+      "0 1 0 0 + 1 1\n",
+      "1 2 0 0 + 1 2\n",
+      "2 3 0 0 + 1 3\n",
+      "3 4 0 0 + 1 4\n",
+      "4 5 0 0 + 1 5\n",
+      "5 6 0 0 + 1 6\n",
+      "6 7 0 0 + 1 7\n",
+      "7 8 0 0 + 1 8\n",
+      "8 9 0 0 + 1 9\n",
+      "9 10 0 0 + 1 10\n",
+      "10 11 0 0 + 1 11\n",
+      "11 12 0 0 + 1 12\n",
+      "12 13 0 0 + 1 13\n",
+      "13 14 0 0 + 1 14\n",
+      "14 15 0 0 + 1 15\n",
+      "15 16 0 0 + 1 16\n",
+      "16 17 0 0 + 1 17\n",
+      "17 18 0 0 + 1 18\n",
+      "18 19 0 0 + 1 19\n",
+      "19 20 0 0 + 1 20\n",
+      "20 21 0 0 + 1 21\n",
+      "21 22 0 0 + 1 22\n",
+      "22 23 0 0 + 1 23\n",
+      "23 24 0 0 + 1 24\n",
+      "24 25 0 0 + 1 25\n",
+      "25 26 0 0 + 1 26\n",
+      "26 27 0 0 + 1 27\n",
+      "27 28 0 0 + 1 28\n",
+      "28 29 0 0 + 1 29\n",
+      "29 30 0 0 + 1 30\n",
+      "30 31 0 0 + 1 31\n",
+      "31 32 0 0 + 1 32\n",
+      "32 33 0 0 + 1 33\n",
+      "33 34 0 0 + 1 34\n",
+      "34 35 0 0 + 1 35\n",
+      "35 36 0 0 + 1 36\n",
+      "36 37 0 0 + 1 37\n",
+      "37 38 0 0 + 1 38\n",
+      "38 39 0 0 + 1 39\n",
+      "39 40 0 0 + 1 40\n",
+      "40 41 0 0 + 1 41\n",
+      "41 42 0 0 + 1 42\n",
+      "42 43 0 0 + 1 43\n",
+      "43 44 0 0 + 1 44\n",
+      "44 45 0 0 + 1 45\n",
+      "45 46 0 0 + 1 46\n",
+      "46 47 0 0 + 1 47\n",
+      "47 48 0 0 + 1 48\n",
+      "48 49 0 0 + 1 49\n",
+      "49 50 0 0 + 1 50\n",
+      "50 51 0 0 + 1 51\n",
+      "51 52 0 0 + 1 52\n",
+      "52 53 0 0 + 1 53\n",
+      "53 54 0 0 + 1 54\n",
+      "54 55 0 0 + 1 55\n",
+      "55 56 0 0 + 1 56\n",
+      "56 57 0 0 + 1 57\n",
+      "57 58 0 0 + 1 58\n",
+      "58 59 0 0 + 1 59\n",
+      "59 60 0 0 + 1 60\n",
+      "60 61 0 0 + 1 61\n",
+      "61 62 0 0 + 1 62\n",
+      "62 63 0 0 + 1 63\n",
+      "63 64 0 0 + 1 64\n",
+      "64 65 0 0 + 1 65\n",
+      "65 66 0 0 + 1 66\n",
+      "66 67 0 0 + 1 67\n",
+      "67 68 0 0 + 1 68\n",
+      "68 69 0 0 + 1 69\n",
+      "69 70 0 0 + 1 70\n",
+      "70 71 0 0 + 1 71\n",
+      "71 72 0 0 + 1 72\n",
+      "72 73 0 0 + 1 73\n",
+      "73 74 0 0 + 1 74\n",
+      "74 75 0 0 + 1 75\n",
+      "75 76 0 0 + 1 76\n",
+      "76 77 0 0 + 1 77\n",
+      "77 78 0 0 + 1 78\n",
+      "78 3735 0 57489 + 61146 3735\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...\")\n",
+    "# Adding nodes positions relative to path\n",
+    "\n",
+    "def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length):\n",
+    "    # Initializing current position in query\n",
+    "    cur_pos = 0\n",
+    "\n",
+    "    # Getting start and end node ids\n",
+    "    start_end_id = (aln_dict[aln_name][\"PATH.MATCH\"][0][0], aln_dict[aln_name][\"PATH.MATCH\"][-1][0])\n",
+    "\n",
+    "    # Creating result dictionnary\n",
+    "    res = {}\n",
+    "\n",
+    "    ## Iterating over node_ids from the given alignment\n",
+    "    for node_id, orient in aln_dict[aln_name][\"PATH.MATCH\"]:\n",
+    "        # Adding entry for current node\n",
+    "        res[node_id] = {aln_name: {}}\n",
+    "\n",
+    "        # First node\n",
+    "        if node_id == start_end_id[0]:\n",
+    "            start_pos = 0\n",
+    "            s_off = int(aln_dict[aln_name][\"ALN.START\"])\n",
+    "            end_pos = nodes_length[node_id]-s_off\n",
+    "            e_off = 0\n",
+    "        # End node\n",
+    "        elif node_id == start_end_id[1]:\n",
+    "            start_pos = cur_pos\n",
+    "            s_off = 0\n",
+    "            end_pos = int(aln_dict[aln_name][\"QRY.END\"])\n",
+    "            e_off = nodes_length[node_id]-(end_pos-cur_pos)\n",
+    "        # Node in between\n",
+    "        else :\n",
+    "            start_pos = cur_pos\n",
+    "            s_off, e_off = 0, 0\n",
+    "            end_pos = cur_pos+nodes_length[node_id]\n",
+    "\n",
+    "        res[node_id] = {\n",
+    "            \"START\": start_pos, # Start position on the query\n",
+    "            \"END\": end_pos, # End position on the query\n",
+    "            \"S.OFF\": s_off, # Offset between the start of the alignment and the node's start\n",
+    "            \"E.OFF\": e_off, # Offset between the end of the alignment and the node's end \n",
+    "            \"STRAND\": orient # Orientation of the node in the alignment\n",
+    "            }\n",
+    "        \n",
+    "        cur_pos = end_pos\n",
+    "        print(start_pos, end_pos, s_off, e_off, orient, nodes_length[node_id], cur_pos)\n",
+    "\n",
+    "    return res\n",
+    "\n",
+    "# Storing alignement \n",
+    "res = {}\n",
+    "executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)\n",
+    "for aln_name in aln_dict.keys():\n",
+    "    print(f\"[gaf2aln::Alignment position processing] Running on {aln_name} ...\")\n",
+    "    \n",
+    "    res[aln_name] = executor.submit(get_aln_node_info, aln_name)\n",
+    "    #res[aln_name] = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)\n",
+    "\n",
+    "executor.shutdown(wait=True)\n",
+    "\n",
+    "for aln_name, node_info in res.items():\n",
+    "    results = node_info.result()\n",
+    "    for node_id, info in results.items():\n",
+    "        nodes[node_id][aln_name] = info\n",
+    "\n",
+    "del res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "4c30727c-7ffc-4852-ad81-ca2a5a7f9957",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...\n",
+      "[gaf2aln::CIGAR processing] Running on ALN_1 ...\n",
+      "[gaf2aln::CIGAR processing] Running on ALN_2 ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculating CIGAR for each nodes in each aln\n",
+    "print(f\"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...\")\n",
+    "# Iterating over alignments\n",
+    "for aln in aln_dict.keys():\n",
+    "    \n",
+    "    print(f\"[gaf2aln::CIGAR processing] Running on {aln} ...\")\n",
+    "    # Getting the list of base level alignement ([\"=\", \"X\", ...] from \"1=1X...\")\n",
+    "    raw_cigar = cigar2basealn(aln_dict[aln][\"RAW.CIGAR\"])\n",
+    "    CIGAR={}\n",
+    "\n",
+    "    for node_id, orient in aln_dict[aln][\"PATH.MATCH\"]:\n",
+    "\n",
+    "        _cigar = basealn2cigar(raw_cigar[\n",
+    "            nodes[node_id][aln][\"START\"]:nodes[node_id][aln][\"END\"]\n",
+    "            ])\n",
+    "        nodes[node_id][aln][\"CIGAR\"] = _cigar\n",
+    "        #print(_cigar, nodes[node_id][aln][\"START\"], nodes[node_id][aln][\"END\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e15e4762-cd71-4afe-bc74-ebe44869fee6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ALN_1\n",
+      "7046526 D134#1#chr03 0 77\n",
+      "\tIn path\n",
+      "\t 73306158 73306235\n",
+      "skipped\n",
+      "\n",
+      "7046528 D134#1#chr03 77 82\n",
+      "\tIn path\n",
+      "\t 73306238 73306243\n",
+      "{'Q.START': 0, 'Q.END': 77, 'T.START': 73306158, 'T.END': 73306235, 'CG': '77='}\n",
+      "7046530 D134#1#chr03 82 83\n",
+      "\tNot in path\n",
+      "7046531 D134#1#chr03 83 138\n",
+      "\tIn path\n",
+      "\t 73306246 73306301\n",
+      "{'Q.START': 77, 'Q.END': 82, 'T.START': 73306238, 'T.END': 73306243, 'CG': '5='}\n",
+      "7046532 D134#1#chr03 138 139\n",
+      "\tNot in path\n",
+      "7046533 D134#1#chr03 139 202\n",
+      "\tIn path\n",
+      "\t 73306302 73306365\n",
+      "{'Q.START': 83, 'Q.END': 138, 'T.START': 73306246, 'T.END': 73306301, 'CG': '55='}\n",
+      "7046534 D134#1#chr03 202 203\n",
+      "\tIn path\n",
+      "\t 73306366 73306367\n",
+      "{'Q.START': 139, 'Q.END': 202, 'T.START': 73306302, 'T.END': 73306365, 'CG': '63='}\n",
+      "7046536 D134#1#chr03 203 379\n",
+      "\tIn path\n",
+      "\t 73306368 73306544\n",
+      "{'Q.START': 202, 'Q.END': 203, 'T.START': 73306366, 'T.END': 73306367, 'CG': '1='}\n",
+      "7046537 D134#1#chr03 379 380\n",
+      "\tIn path\n",
+      "\t 73306545 73306546\n",
+      "{'Q.START': 203, 'Q.END': 379, 'T.START': 73306368, 'T.END': 73306544, 'CG': '176='}\n",
+      "7046539 D134#1#chr03 380 429\n",
+      "\tIn path\n",
+      "\t 73306547 73306596\n",
+      "{'Q.START': 379, 'Q.END': 380, 'T.START': 73306545, 'T.END': 73306546, 'CG': '1='}\n",
+      "7046541 D134#1#chr03 429 430\n",
+      "\tIn path\n",
+      "\t 73306597 73306598\n",
+      "{'Q.START': 380, 'Q.END': 429, 'T.START': 73306547, 'T.END': 73306596, 'CG': '49='}\n",
+      "7046542 D134#1#chr03 430 457\n",
+      "\tIn path\n",
+      "\t 73306599 73306626\n",
+      "{'Q.START': 429, 'Q.END': 430, 'T.START': 73306597, 'T.END': 73306598, 'CG': '1='}\n",
+      "7046544 D134#1#chr03 457 492\n",
+      "\tIn path\n",
+      "\t 73306641 73306676\n",
+      "{'Q.START': 430, 'Q.END': 457, 'T.START': 73306599, 'T.END': 73306626, 'CG': '27='}\n",
+      "7046546 D134#1#chr03 492 494\n",
+      "\tNot in path\n",
+      "7046547 D134#1#chr03 494 497\n",
+      "\tNot in path\n",
+      "7046549 D134#1#chr03 497 507\n",
+      "\tNot in path\n",
+      "7046551 D134#1#chr03 507 508\n",
+      "\tNot in path\n",
+      "7046552 D134#1#chr03 508 564\n",
+      "\tIn path\n",
+      "\t 73306694 73306750\n",
+      "{'Q.START': 457, 'Q.END': 492, 'T.START': 73306641, 'T.END': 73306676, 'CG': '35='}\n",
+      "7046554 D134#1#chr03 564 566\n",
+      "\tNot in path\n",
+      "7046556 D134#1#chr03 568 569\n",
+      "\tIn path\n",
+      "\t 73306753 73306754\n",
+      "{'Q.START': 508, 'Q.END': 564, 'T.START': 73306694, 'T.END': 73306750, 'CG': '56='}\n",
+      "7046556 D134#1#chr03 568 569\n",
+      "\tIn path\n",
+      "\t 73306753 73306754\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
+      "7046556 D134#1#chr03 568 569\n",
+      "\tIn path\n",
+      "\t 73306753 73306754\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
+      "7046557 D134#1#chr03 569 824\n",
+      "\tIn path\n",
+      "\t 73306755 73307010\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
+      "7046558 D134#1#chr03 824 826\n",
+      "\tNot in path\n",
+      "7046559 D134#1#chr03 826 858\n",
+      "\tIn path\n",
+      "\t 73307011 73307043\n",
+      "{'Q.START': 569, 'Q.END': 824, 'T.START': 73306755, 'T.END': 73307010, 'CG': '255='}\n",
+      "7046560 D134#1#chr03 858 859\n",
+      "\tIn path\n",
+      "\t 73307044 73307045\n",
+      "{'Q.START': 826, 'Q.END': 858, 'T.START': 73307011, 'T.END': 73307043, 'CG': '32='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 858, 'Q.END': 859, 'T.START': 73307044, 'T.END': 73307045, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046561 D134#1#chr03 868 869\n",
+      "\tIn path\n",
+      "\t 73307046 73307047\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046562 D134#1#chr03 869 913\n",
+      "\tIn path\n",
+      "\t 73307048 73307092\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
+      "7046564 D134#1#chr03 913 919\n",
+      "\tIn path\n",
+      "\t 73307093 73307099\n",
+      "{'Q.START': 869, 'Q.END': 913, 'T.START': 73307048, 'T.END': 73307092, 'CG': '44='}\n",
+      "7046565 D134#1#chr03 919 978\n",
+      "\tIn path\n",
+      "\t 73307100 73307159\n",
+      "{'Q.START': 913, 'Q.END': 919, 'T.START': 73307093, 'T.END': 73307099, 'CG': '6='}\n",
+      "7046567 D134#1#chr03 978 979\n",
+      "\tIn path\n",
+      "\t 73307160 73307161\n",
+      "{'Q.START': 919, 'Q.END': 978, 'T.START': 73307100, 'T.END': 73307159, 'CG': '59='}\n",
+      "7046568 D134#1#chr03 979 1038\n",
+      "\tIn path\n",
+      "\t 73307162 73307221\n",
+      "{'Q.START': 978, 'Q.END': 979, 'T.START': 73307160, 'T.END': 73307161, 'CG': '1='}\n",
+      "7046570 D134#1#chr03 1038 1045\n",
+      "\tIn path\n",
+      "\t 73307224 73307231\n",
+      "{'Q.START': 979, 'Q.END': 1038, 'T.START': 73307162, 'T.END': 73307221, 'CG': '59='}\n",
+      "7046571 D134#1#chr03 1045 1046\n",
+      "\tIn path\n",
+      "\t 73307232 73307233\n",
+      "{'Q.START': 1038, 'Q.END': 1045, 'T.START': 73307224, 'T.END': 73307231, 'CG': '7='}\n",
+      "7046573 D134#1#chr03 1046 1080\n",
+      "\tIn path\n",
+      "\t 73307234 73307268\n",
+      "{'Q.START': 1045, 'Q.END': 1046, 'T.START': 73307232, 'T.END': 73307233, 'CG': '1='}\n",
+      "7046574 D134#1#chr03 1080 1081\n",
+      "\tIn path\n",
+      "\t 73307269 73307270\n",
+      "{'Q.START': 1046, 'Q.END': 1080, 'T.START': 73307234, 'T.END': 73307268, 'CG': '34='}\n",
+      "7046576 D134#1#chr03 1081 1107\n",
+      "\tIn path\n",
+      "\t 73307271 73307297\n",
+      "{'Q.START': 1080, 'Q.END': 1081, 'T.START': 73307269, 'T.END': 73307270, 'CG': '1='}\n",
+      "7046577 D134#1#chr03 1107 1108\n",
+      "\tNot in path\n",
+      "7046579 D134#1#chr03 1108 1183\n",
+      "\tIn path\n",
+      "\t 73307300 73307375\n",
+      "{'Q.START': 1081, 'Q.END': 1107, 'T.START': 73307271, 'T.END': 73307297, 'CG': '26='}\n",
+      "7046581 D134#1#chr03 1183 1186\n",
+      "\tIn path\n",
+      "\t 73307376 73307379\n",
+      "{'Q.START': 1108, 'Q.END': 1183, 'T.START': 73307300, 'T.END': 73307375, 'CG': '75='}\n",
+      "7046583 D134#1#chr03 1186 1224\n",
+      "\tNot in path\n",
+      "7046584 D134#1#chr03 1224 1257\n",
+      "\tIn path\n",
+      "\t 73307419 73307452\n",
+      "{'Q.START': 1183, 'Q.END': 1186, 'T.START': 73307376, 'T.END': 73307379, 'CG': '3='}\n",
+      "7046586 D134#1#chr03 1257 1289\n",
+      "\tNot in path\n",
+      "7046587 D134#1#chr03 1289 1311\n",
+      "\tIn path\n",
+      "\t 73307475 73307497\n",
+      "{'Q.START': 1224, 'Q.END': 1257, 'T.START': 73307419, 'T.END': 73307452, 'CG': '33='}\n",
+      "7046589 D134#1#chr03 1311 1359\n",
+      "\tNot in path\n",
+      "7046590 D134#1#chr03 1359 1382\n",
+      "\tIn path\n",
+      "\t 73307546 73307569\n",
+      "{'Q.START': 1289, 'Q.END': 1311, 'T.START': 73307475, 'T.END': 73307497, 'CG': '22='}\n",
+      "7046592 D134#1#chr03 1382 1434\n",
+      "\tNot in path\n",
+      "7046593 D134#1#chr03 1434 1451\n",
+      "\tIn path\n",
+      "\t 73307643 73307660\n",
+      "{'Q.START': 1359, 'Q.END': 1382, 'T.START': 73307546, 'T.END': 73307569, 'CG': '23='}\n",
+      "7046594 D134#1#chr03 1451 1531\n",
+      "\tIn path\n",
+      "\t 73307661 73307741\n",
+      "{'Q.START': 1434, 'Q.END': 1451, 'T.START': 73307643, 'T.END': 73307660, 'CG': '17='}\n",
+      "7046596 D134#1#chr03 1531 1532\n",
+      "\tNot in path\n",
+      "7046597 D134#1#chr03 1532 1543\n",
+      "\tIn path\n",
+      "\t 73307744 73307755\n",
+      "{'Q.START': 1451, 'Q.END': 1531, 'T.START': 73307661, 'T.END': 73307741, 'CG': '80='}\n",
+      "7046599 D134#1#chr03 1543 1544\n",
+      "\tNot in path\n",
+      "7046600 D134#1#chr03 1544 1572\n",
+      "\tIn path\n",
+      "\t 73307758 73307786\n",
+      "{'Q.START': 1532, 'Q.END': 1543, 'T.START': 73307744, 'T.END': 73307755, 'CG': '11='}\n",
+      "7046601 D134#1#chr03 1572 1573\n",
+      "\tIn path\n",
+      "\t 73307787 73307788\n",
+      "{'Q.START': 1544, 'Q.END': 1572, 'T.START': 73307758, 'T.END': 73307786, 'CG': '28='}\n",
+      "7046603 D134#1#chr03 1573 1587\n",
+      "\tIn path\n",
+      "\t 73307789 73307803\n",
+      "{'Q.START': 1572, 'Q.END': 1573, 'T.START': 73307787, 'T.END': 73307788, 'CG': '1='}\n",
+      "7046604 D134#1#chr03 1587 1588\n",
+      "\tNot in path\n",
+      "7046606 D134#1#chr03 1588 1616\n",
+      "\tIn path\n",
+      "\t 73307806 73307834\n",
+      "{'Q.START': 1573, 'Q.END': 1587, 'T.START': 73307789, 'T.END': 73307803, 'CG': '14='}\n",
+      "7046608 D134#1#chr03 1616 1617\n",
+      "\tIn path\n",
+      "\t 73307835 73307836\n",
+      "{'Q.START': 1588, 'Q.END': 1616, 'T.START': 73307806, 'T.END': 73307834, 'CG': '28='}\n",
+      "7046609 D134#1#chr03 1617 1646\n",
+      "\tIn path\n",
+      "\t 73307837 73307866\n",
+      "{'Q.START': 1616, 'Q.END': 1617, 'T.START': 73307835, 'T.END': 73307836, 'CG': '1='}\n",
+      "7046621 D134#1#chr03 1646 1661\n",
+      "\tIn path\n",
+      "\t 73307867 73307882\n",
+      "{'Q.START': 1617, 'Q.END': 1646, 'T.START': 73307837, 'T.END': 73307866, 'CG': '29='}\n",
+      "7046622 D134#1#chr03 1661 1673\n",
+      "\tIn path\n",
+      "\t 73307883 73307895\n",
+      "{'Q.START': 1646, 'Q.END': 1661, 'T.START': 73307867, 'T.END': 73307882, 'CG': '15='}\n",
+      "7046624 D134#1#chr03 1673 1674\n",
+      "\tIn path\n",
+      "\t 73307896 73307897\n",
+      "{'Q.START': 1661, 'Q.END': 1673, 'T.START': 73307883, 'T.END': 73307895, 'CG': '12='}\n",
+      "7046625 D134#1#chr03 1674 1726\n",
+      "\tIn path\n",
+      "\t 73307898 73307950\n",
+      "{'Q.START': 1673, 'Q.END': 1674, 'T.START': 73307896, 'T.END': 73307897, 'CG': '1='}\n",
+      "7046626 D134#1#chr03 1726 1727\n",
+      "\tNot in path\n",
+      "7046628 D134#1#chr03 1727 1762\n",
+      "\tIn path\n",
+      "\t 73307953 73307988\n",
+      "{'Q.START': 1674, 'Q.END': 1726, 'T.START': 73307898, 'T.END': 73307950, 'CG': '52='}\n",
+      "7046631 D134#1#chr03 1766 1767\n",
+      "\tIn path\n",
+      "\t 73307991 73307992\n",
+      "{'Q.START': 1727, 'Q.END': 1762, 'T.START': 73307953, 'T.END': 73307988, 'CG': '35='}\n",
+      "7046673 D134#1#chr03 1765 1766\n",
+      "\tIn path\n",
+      "\t 73307993 73307994\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
+      "7046631 D134#1#chr03 1766 1767\n",
+      "\tIn path\n",
+      "\t 73307991 73307992\n",
+      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}\n",
+      "7046673 D134#1#chr03 1765 1766\n",
+      "\tIn path\n",
+      "\t 73307993 73307994\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
+      "7046631 D134#1#chr03 1766 1767\n",
+      "\tIn path\n",
+      "\t 73307991 73307992\n",
+      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}\n",
+      "7046632 D134#1#chr03 1767 1824\n",
+      "\tIn path\n",
+      "\t 73307995 73308052\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
+      "7046634 D134#1#chr03 1824 1825\n",
+      "\tIn path\n",
+      "\t 73308053 73308054\n",
+      "{'Q.START': 1767, 'Q.END': 1824, 'T.START': 73307995, 'T.END': 73308052, 'CG': '57='}\n",
+      "7046635 D134#1#chr03 1825 1975\n",
+      "\tIn path\n",
+      "\t 73308055 73308205\n",
+      "{'Q.START': 1824, 'Q.END': 1825, 'T.START': 73308053, 'T.END': 73308054, 'CG': '1='}\n",
+      "7046637 D134#1#chr03 1975 1976\n",
+      "\tNot in path\n",
+      "7046638 D134#1#chr03 1976 2015\n",
+      "\tIn path\n",
+      "\t 73308208 73308247\n",
+      "{'Q.START': 1825, 'Q.END': 1975, 'T.START': 73308055, 'T.END': 73308205, 'CG': '150='}\n",
+      "7046639 D134#1#chr03 2015 2016\n",
+      "\tNot in path\n",
+      "7046641 D134#1#chr03 2016 2047\n",
+      "\tIn path\n",
+      "\t 73308250 73308281\n",
+      "{'Q.START': 1976, 'Q.END': 2015, 'T.START': 73308208, 'T.END': 73308247, 'CG': '39='}\n",
+      "7046644 D134#1#chr03 2047 2055\n",
+      "\tIn path\n",
+      "\t 73308286 73308294\n",
+      "{'Q.START': 2016, 'Q.END': 2047, 'T.START': 73308250, 'T.END': 73308281, 'CG': '31='}\n",
+      "7046646 D134#1#chr03 2055 2056\n",
+      "\tNot in path\n",
+      "7046647 D134#1#chr03 2056 2120\n",
+      "\tIn path\n",
+      "\t 73308297 73308361\n",
+      "{'Q.START': 2047, 'Q.END': 2055, 'T.START': 73308286, 'T.END': 73308294, 'CG': '8='}\n",
+      "7046649 D134#1#chr03 2120 2121\n",
+      "\tIn path\n",
+      "\t 73308362 73308363\n",
+      "{'Q.START': 2056, 'Q.END': 2120, 'T.START': 73308297, 'T.END': 73308361, 'CG': '64='}\n",
+      "7046650 D134#1#chr03 2121 2157\n",
+      "\tIn path\n",
+      "\t 73308364 73308400\n",
+      "{'Q.START': 2120, 'Q.END': 2121, 'T.START': 73308362, 'T.END': 73308363, 'CG': '1='}\n",
+      "7046652 D134#1#chr03 2157 2158\n",
+      "\tNot in path\n",
+      "7046653 D134#1#chr03 2158 2170\n",
+      "\tIn path\n",
+      "\t 73308403 73308415\n",
+      "{'Q.START': 2121, 'Q.END': 2157, 'T.START': 73308364, 'T.END': 73308400, 'CG': '36='}\n",
+      "7046654 D134#1#chr03 2170 2171\n",
+      "\tIn path\n",
+      "\t 73308416 73308417\n",
+      "{'Q.START': 2158, 'Q.END': 2170, 'T.START': 73308403, 'T.END': 73308415, 'CG': '12='}\n",
+      "7046656 D134#1#chr03 2171 2205\n",
+      "\tIn path\n",
+      "\t 73308418 73308452\n",
+      "{'Q.START': 2170, 'Q.END': 2171, 'T.START': 73308416, 'T.END': 73308417, 'CG': '1='}\n",
+      "7046657 D134#1#chr03 2205 2206\n",
+      "\tNot in path\n",
+      "7046659 D134#1#chr03 2206 2344\n",
+      "\tIn path\n",
+      "\t 73308455 73308593\n",
+      "{'Q.START': 2171, 'Q.END': 2205, 'T.START': 73308418, 'T.END': 73308452, 'CG': '34='}\n",
+      "7046660 D134#1#chr03 2344 2345\n",
+      "\tNot in path\n",
+      "7046662 D134#1#chr03 2345 2364\n",
+      "\tIn path\n",
+      "\t 73308596 73308615\n",
+      "{'Q.START': 2206, 'Q.END': 2344, 'T.START': 73308455, 'T.END': 73308593, 'CG': '138='}\n",
+      "7046663 D134#1#chr03 2364 2383\n",
+      "\tIn path\n",
+      "\t 73308616 73308635\n",
+      "{'Q.START': 2345, 'Q.END': 2364, 'T.START': 73308596, 'T.END': 73308615, 'CG': '19='}\n",
+      "7046665 D134#1#chr03 2383 2408\n",
+      "\tIn path\n",
+      "\t 73308636 73308661\n",
+      "{'Q.START': 2364, 'Q.END': 2383, 'T.START': 73308616, 'T.END': 73308635, 'CG': '19='}\n",
+      "7046667 D134#1#chr03 2408 2409\n",
+      "\tIn path\n",
+      "\t 73308662 73308663\n",
+      "{'Q.START': 2383, 'Q.END': 2408, 'T.START': 73308636, 'T.END': 73308661, 'CG': '25='}\n",
+      "7046668 D134#1#chr03 2409 2441\n",
+      "\tIn path\n",
+      "\t 73308664 73308696\n",
+      "{'Q.START': 2408, 'Q.END': 2409, 'T.START': 73308662, 'T.END': 73308663, 'CG': '1='}\n",
+      "7046670 D134#1#chr03 2441 2442\n",
+      "\tIn path\n",
+      "\t 73308697 73308698\n",
+      "{'Q.START': 2409, 'Q.END': 2441, 'T.START': 73308664, 'T.END': 73308696, 'CG': '32='}\n",
+      "7046671 D134#1#chr03 2442 2580\n",
+      "\tIn path\n",
+      "\t 73308699 73308837\n",
+      "{'Q.START': 2441, 'Q.END': 2442, 'T.START': 73308697, 'T.END': 73308698, 'CG': '1='}\n",
+      "7046674 D134#1#chr03 2582 2583\n",
+      "\tIn path\n",
+      "\t 73308838 73308839\n",
+      "{'Q.START': 2442, 'Q.END': 2580, 'T.START': 73308699, 'T.END': 73308837, 'CG': '138='}\n",
+      "7046675 D134#1#chr03 2583 2584\n",
+      "\tIn path\n",
+      "\t 73308840 73308841\n",
+      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}\n",
+      "7046674 D134#1#chr03 2582 2583\n",
+      "\tIn path\n",
+      "\t 73308838 73308839\n",
+      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}\n",
+      "7046675 D134#1#chr03 2583 2584\n",
+      "\tIn path\n",
+      "\t 73308840 73308841\n",
+      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}\n",
+      "7046676 D134#1#chr03 2584 2764\n",
+      "\tIn path\n",
+      "\t 73308842 73309022\n",
+      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}\n",
+      "7046678 D134#1#chr03 2764 2765\n",
+      "\tNot in path\n",
+      "7046679 D134#1#chr03 2765 2797\n",
+      "\tIn path\n",
+      "\t 73309025 73309057\n",
+      "{'Q.START': 2584, 'Q.END': 2764, 'T.START': 73308842, 'T.END': 73309022, 'CG': '180='}\n",
+      "7046680 D134#1#chr03 2797 2798\n",
+      "\tNot in path\n",
+      "7046682 D134#1#chr03 2798 2878\n",
+      "\tIn path\n",
+      "\t 73309060 73309140\n",
+      "{'Q.START': 2765, 'Q.END': 2797, 'T.START': 73309025, 'T.END': 73309057, 'CG': '32='}\n",
+      "7046684 D134#1#chr03 2878 2879\n",
+      "\tIn path\n",
+      "\t 73309141 73309142\n",
+      "{'Q.START': 2798, 'Q.END': 2878, 'T.START': 73309060, 'T.END': 73309140, 'CG': '80='}\n",
+      "7046685 D134#1#chr03 2879 2951\n",
+      "\tIn path\n",
+      "\t 73309143 73309215\n",
+      "{'Q.START': 2878, 'Q.END': 2879, 'T.START': 73309141, 'T.END': 73309142, 'CG': '1='}\n",
+      "7046686 D134#1#chr03 2951 2952\n",
+      "\tIn path\n",
+      "\t 73309216 73309217\n",
+      "{'Q.START': 2879, 'Q.END': 2951, 'T.START': 73309143, 'T.END': 73309215, 'CG': '72='}\n",
+      "7046688 D134#1#chr03 2952 3002\n",
+      "\tIn path\n",
+      "\t 73309218 73309268\n",
+      "{'Q.START': 2951, 'Q.END': 2952, 'T.START': 73309216, 'T.END': 73309217, 'CG': '1='}\n",
+      "7046690 D134#1#chr03 3002 3077\n",
+      "\tIn path\n",
+      "\t 73309271 73309346\n",
+      "{'Q.START': 2952, 'Q.END': 3002, 'T.START': 73309218, 'T.END': 73309268, 'CG': '50='}\n",
+      "7046692 D134#1#chr03 3077 3078\n",
+      "\tIn path\n",
+      "\t 73309347 73309348\n",
+      "{'Q.START': 3002, 'Q.END': 3077, 'T.START': 73309271, 'T.END': 73309346, 'CG': '75='}\n",
+      "7046693 D134#1#chr03 3078 3093\n",
+      "\tIn path\n",
+      "\t 73309349 73309364\n",
+      "{'Q.START': 3077, 'Q.END': 3078, 'T.START': 73309347, 'T.END': 73309348, 'CG': '1='}\n",
+      "7046695 D134#1#chr03 3093 3094\n",
+      "\tNot in path\n",
+      "7046696 D134#1#chr03 3094 3097\n",
+      "\tIn path\n",
+      "\t 73309367 73309370\n",
+      "{'Q.START': 3078, 'Q.END': 3093, 'T.START': 73309349, 'T.END': 73309364, 'CG': '15='}\n",
+      "7046698 D134#1#chr03 3097 3140\n",
+      "\tIn path\n",
+      "\t 73309371 73309414\n",
+      "{'Q.START': 3094, 'Q.END': 3097, 'T.START': 73309367, 'T.END': 73309370, 'CG': '3='}\n",
+      "7046700 D134#1#chr03 3140 3210\n",
+      "\tIn path\n",
+      "\t 73309415 73309485\n",
+      "{'Q.START': 3097, 'Q.END': 3140, 'T.START': 73309371, 'T.END': 73309414, 'CG': '43='}\n",
+      "7046702 D134#1#chr03 3210 3211\n",
+      "\tIn path\n",
+      "\t 73309486 73309487\n",
+      "{'Q.START': 3140, 'Q.END': 3210, 'T.START': 73309415, 'T.END': 73309485, 'CG': '70='}\n",
+      "7046703 D134#1#chr03 3211 3229\n",
+      "\tIn path\n",
+      "\t 73309488 73309506\n",
+      "{'Q.START': 3210, 'Q.END': 3211, 'T.START': 73309486, 'T.END': 73309487, 'CG': '1='}\n",
+      "7046704 D134#1#chr03 3229 3230\n",
+      "\tIn path\n",
+      "\t 73309507 73309508\n",
+      "{'Q.START': 3211, 'Q.END': 3229, 'T.START': 73309488, 'T.END': 73309506, 'CG': '18='}\n",
+      "7046706 D134#1#chr03 3230 3276\n",
+      "\tIn path\n",
+      "\t 73309509 73309555\n",
+      "{'Q.START': 3229, 'Q.END': 3230, 'T.START': 73309507, 'T.END': 73309508, 'CG': '1='}\n",
+      "7046707 D134#1#chr03 3276 3277\n",
+      "\tNot in path\n",
+      "7046709 D134#1#chr03 3277 3315\n",
+      "\tIn path\n",
+      "\t 73309558 73309596\n",
+      "{'Q.START': 3230, 'Q.END': 3276, 'T.START': 73309509, 'T.END': 73309555, 'CG': '46='}\n",
+      "7046710 D134#1#chr03 3315 3316\n",
+      "\tNot in path\n",
+      "7046712 D134#1#chr03 3316 3322\n",
+      "\tIn path\n",
+      "\t 73309599 73309605\n",
+      "{'Q.START': 3277, 'Q.END': 3315, 'T.START': 73309558, 'T.END': 73309596, 'CG': '38='}\n",
+      "7046713 D134#1#chr03 3322 3323\n",
+      "\tNot in path\n",
+      "7046715 D134#1#chr03 3323 3348\n",
+      "\tIn path\n",
+      "\t 73309608 73309633\n",
+      "{'Q.START': 3316, 'Q.END': 3322, 'T.START': 73309599, 'T.END': 73309605, 'CG': '6='}\n",
+      "7046718 D134#1#chr03 3352 3353\n",
+      "\tIn path\n",
+      "\t 73309634 73309635\n",
+      "{'Q.START': 3323, 'Q.END': 3348, 'T.START': 73309608, 'T.END': 73309633, 'CG': '25='}\n",
+      "7046717 D134#1#chr03 3351 3352\n",
+      "\tIn path\n",
+      "\t 73309636 73309637\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
+      "7046718 D134#1#chr03 3352 3353\n",
+      "\tIn path\n",
+      "\t 73309634 73309635\n",
+      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}\n",
+      "7046717 D134#1#chr03 3351 3352\n",
+      "\tIn path\n",
+      "\t 73309636 73309637\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
+      "7046718 D134#1#chr03 3352 3353\n",
+      "\tIn path\n",
+      "\t 73309634 73309635\n",
+      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}\n",
+      "7046720 D134#1#chr03 3353 3354\n",
+      "\tIn path\n",
+      "\t 73309638 73309639\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
+      "7046722 D134#1#chr03 3354 3356\n",
+      "\tIn path\n",
+      "\t 73309640 73309642\n",
+      "{'Q.START': 3353, 'Q.END': 3354, 'T.START': 73309638, 'T.END': 73309639, 'CG': '1='}\n",
+      "7046724 D134#1#chr03 3356 3357\n",
+      "\tNot in path\n",
+      "7046725 D134#1#chr03 3357 3489\n",
+      "\tIn path\n",
+      "\t 73309645 73309777\n",
+      "{'Q.START': 3354, 'Q.END': 3356, 'T.START': 73309640, 'T.END': 73309642, 'CG': '2='}\n",
+      "7046727 D134#1#chr03 3489 3490\n",
+      "\tNot in path\n",
+      "7046728 D134#1#chr03 3490 3642\n",
+      "\tIn path\n",
+      "\t 73309780 73309932\n",
+      "{'Q.START': 3357, 'Q.END': 3489, 'T.START': 73309645, 'T.END': 73309777, 'CG': '132='}\n",
+      "7046729 D134#1#chr03 3642 3644\n",
+      "\tNot in path\n",
+      "7046730 D134#1#chr03 3644 3685\n",
+      "\tIn path\n",
+      "\t 73309933 73309974\n",
+      "{'Q.START': 3490, 'Q.END': 3642, 'T.START': 73309780, 'T.END': 73309932, 'CG': '152='}\n",
+      "7046731 D134#1#chr03 3685 3687\n",
+      "\tNot in path\n",
+      "7046733 D134#1#chr03 3687 3693\n",
+      "\tIn path\n",
+      "\t 73309977 73309983\n",
+      "{'Q.START': 3644, 'Q.END': 3685, 'T.START': 73309933, 'T.END': 73309974, 'CG': '41='}\n",
+      "7046735 D134#1#chr03 3693 3694\n",
+      "\tNot in path\n",
+      "7046736 D134#1#chr03 3694 3708\n",
+      "\tIn path\n",
+      "\t 73309986 73310000\n",
+      "{'Q.START': 3687, 'Q.END': 3693, 'T.START': 73309977, 'T.END': 73309983, 'CG': '6='}\n",
+      "7046738 D134#1#chr03 3720 3721\n",
+      "\tIn path\n",
+      "\t 73310010 73310011\n",
+      "{'Q.START': 3694, 'Q.END': 3708, 'T.START': 73309986, 'T.END': 73310000, 'CG': '14='}\n",
+      "7046739 D134#1#chr03 3721 3722\n",
+      "\tIn path\n",
+      "\t 73310003 73310004\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
+      "7046740 D134#1#chr03 3716 3720\n",
+      "\tIn path\n",
+      "\t 73310005 73310009\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
+      "7046738 D134#1#chr03 3720 3721\n",
+      "\tIn path\n",
+      "\t 73310010 73310011\n",
+      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}\n",
+      "7046739 D134#1#chr03 3721 3722\n",
+      "\tIn path\n",
+      "\t 73310003 73310004\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
+      "7046740 D134#1#chr03 3716 3720\n",
+      "\tIn path\n",
+      "\t 73310005 73310009\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
+      "7046738 D134#1#chr03 3720 3721\n",
+      "\tIn path\n",
+      "\t 73310010 73310011\n",
+      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}\n",
+      "7046739 D134#1#chr03 3721 3722\n",
+      "\tIn path\n",
+      "\t 73310003 73310004\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
+      "7046741 D134#1#chr03 3722 3735\n",
+      "\tIn path\n",
+      "\t 73310012 73310045\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
+      "ALN_1\n",
+      "7046526 TO1000#1#chr03 0 77\n",
+      "\t 64684013 64684090\n",
+      "skipped\n",
+      "\n",
+      "7046528 TO1000#1#chr03 77 82\n",
+      "\t 64684091 64684096\n",
+      "{'Q.START': 0, 'Q.END': 77, 'T.START': 64684013, 'T.END': 64684090, 'CG': '77='}\n",
+      "7046530 TO1000#1#chr03 82 83\n",
+      "\t 64684097 64684098\n",
+      "{'Q.START': 77, 'Q.END': 82, 'T.START': 64684091, 'T.END': 64684096, 'CG': '5='}\n",
+      "7046531 TO1000#1#chr03 83 138\n",
+      "\t 64684099 64684154\n",
+      "{'Q.START': 82, 'Q.END': 83, 'T.START': 64684097, 'T.END': 64684098, 'CG': '1='}\n",
+      "7046532 TO1000#1#chr03 138 139\n",
+      "\t 64684155 64684156\n",
+      "{'Q.START': 83, 'Q.END': 138, 'T.START': 64684099, 'T.END': 64684154, 'CG': '55='}\n",
+      "7046533 TO1000#1#chr03 139 202\n",
+      "\t 64684157 64684220\n",
+      "{'Q.START': 138, 'Q.END': 139, 'T.START': 64684155, 'T.END': 64684156, 'CG': '1='}\n",
+      "7046534 TO1000#1#chr03 202 203\n",
+      "\t 64684221 64684222\n",
+      "{'Q.START': 139, 'Q.END': 202, 'T.START': 64684157, 'T.END': 64684220, 'CG': '63='}\n",
+      "7046536 TO1000#1#chr03 203 379\n",
+      "\t 64684223 64684399\n",
+      "{'Q.START': 202, 'Q.END': 203, 'T.START': 64684221, 'T.END': 64684222, 'CG': '1='}\n",
+      "7046537 TO1000#1#chr03 379 380\n",
+      "\t 64684400 64684401\n",
+      "{'Q.START': 203, 'Q.END': 379, 'T.START': 64684223, 'T.END': 64684399, 'CG': '176='}\n",
+      "7046539 TO1000#1#chr03 380 429\n",
+      "\t 64684402 64684451\n",
+      "{'Q.START': 379, 'Q.END': 380, 'T.START': 64684400, 'T.END': 64684401, 'CG': '1='}\n",
+      "7046541 TO1000#1#chr03 429 430\n",
+      "\t 64684452 64684453\n",
+      "{'Q.START': 380, 'Q.END': 429, 'T.START': 64684402, 'T.END': 64684451, 'CG': '49='}\n",
+      "7046542 TO1000#1#chr03 430 457\n",
+      "\t 64684454 64684481\n",
+      "{'Q.START': 429, 'Q.END': 430, 'T.START': 64684452, 'T.END': 64684453, 'CG': '1='}\n",
+      "7046544 TO1000#1#chr03 457 492\n",
+      "\t 64684482 64684517\n",
+      "{'Q.START': 430, 'Q.END': 457, 'T.START': 64684454, 'T.END': 64684481, 'CG': '27='}\n",
+      "7046546 TO1000#1#chr03 492 494\n",
+      "\t 64684518 64684520\n",
+      "{'Q.START': 457, 'Q.END': 492, 'T.START': 64684482, 'T.END': 64684517, 'CG': '35='}\n",
+      "7046547 TO1000#1#chr03 494 497\n",
+      "\t 64684521 64684524\n",
+      "{'Q.START': 492, 'Q.END': 494, 'T.START': 64684518, 'T.END': 64684520, 'CG': '2='}\n",
+      "7046549 TO1000#1#chr03 497 507\n",
+      "\t 64684525 64684535\n",
+      "{'Q.START': 494, 'Q.END': 497, 'T.START': 64684521, 'T.END': 64684524, 'CG': '3='}\n",
+      "7046551 TO1000#1#chr03 507 508\n",
+      "\t 64684536 64684537\n",
+      "{'Q.START': 497, 'Q.END': 507, 'T.START': 64684525, 'T.END': 64684535, 'CG': '10='}\n",
+      "7046552 TO1000#1#chr03 508 564\n",
+      "\t 64684538 64684594\n",
+      "{'Q.START': 507, 'Q.END': 508, 'T.START': 64684536, 'T.END': 64684537, 'CG': '1='}\n",
+      "7046554 TO1000#1#chr03 564 566\n",
+      "\t 64684595 64684597\n",
+      "{'Q.START': 508, 'Q.END': 564, 'T.START': 64684538, 'T.END': 64684594, 'CG': '56='}\n",
+      "7046556 TO1000#1#chr03 568 569\n",
+      "\t 64684598 64684599\n",
+      "{'Q.START': 564, 'Q.END': 566, 'T.START': 64684595, 'T.END': 64684597, 'CG': '2='}\n",
+      "7046556 TO1000#1#chr03 568 569\n",
+      "\t 64684598 64684599\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 64684598, 'T.END': 64684599, 'CG': '1='}\n",
+      "7046556 TO1000#1#chr03 568 569\n",
+      "\t 64684598 64684599\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 64684598, 'T.END': 64684599, 'CG': '1='}\n",
+      "7046557 TO1000#1#chr03 569 824\n",
+      "\t 64684600 64684855\n",
+      "{'Q.START': 568, 'Q.END': 569, 'T.START': 64684598, 'T.END': 64684599, 'CG': '1='}\n",
+      "7046558 TO1000#1#chr03 824 826\n",
+      "\t 64684856 64684858\n",
+      "{'Q.START': 569, 'Q.END': 824, 'T.START': 64684600, 'T.END': 64684855, 'CG': '255='}\n",
+      "7046559 TO1000#1#chr03 826 858\n",
+      "\t 64684859 64684891\n",
+      "{'Q.START': 824, 'Q.END': 826, 'T.START': 64684856, 'T.END': 64684858, 'CG': '2='}\n",
+      "7046560 TO1000#1#chr03 858 859\n",
+      "\t 64684892 64684893\n",
+      "{'Q.START': 826, 'Q.END': 858, 'T.START': 64684859, 'T.END': 64684891, 'CG': '32='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 858, 'Q.END': 859, 'T.START': 64684892, 'T.END': 64684893, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046561 TO1000#1#chr03 868 869\n",
+      "\t 64684894 64684895\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046562 TO1000#1#chr03 869 913\n",
+      "\t 64684896 64684940\n",
+      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
+      "7046564 TO1000#1#chr03 913 919\n",
+      "\t 64684941 64684947\n",
+      "{'Q.START': 869, 'Q.END': 913, 'T.START': 64684896, 'T.END': 64684940, 'CG': '44='}\n",
+      "7046565 TO1000#1#chr03 919 978\n",
+      "\t 64684948 64685007\n",
+      "{'Q.START': 913, 'Q.END': 919, 'T.START': 64684941, 'T.END': 64684947, 'CG': '6='}\n",
+      "7046567 TO1000#1#chr03 978 979\n",
+      "\t 64685008 64685009\n",
+      "{'Q.START': 919, 'Q.END': 978, 'T.START': 64684948, 'T.END': 64685007, 'CG': '59='}\n",
+      "7046568 TO1000#1#chr03 979 1038\n",
+      "\t 64685010 64685069\n",
+      "{'Q.START': 978, 'Q.END': 979, 'T.START': 64685008, 'T.END': 64685009, 'CG': '1='}\n",
+      "7046570 TO1000#1#chr03 1038 1045\n",
+      "\t 64685070 64685077\n",
+      "{'Q.START': 979, 'Q.END': 1038, 'T.START': 64685010, 'T.END': 64685069, 'CG': '59='}\n",
+      "7046571 TO1000#1#chr03 1045 1046\n",
+      "\t 64685078 64685079\n",
+      "{'Q.START': 1038, 'Q.END': 1045, 'T.START': 64685070, 'T.END': 64685077, 'CG': '7='}\n",
+      "7046573 TO1000#1#chr03 1046 1080\n",
+      "\t 64685080 64685114\n",
+      "{'Q.START': 1045, 'Q.END': 1046, 'T.START': 64685078, 'T.END': 64685079, 'CG': '1='}\n",
+      "7046574 TO1000#1#chr03 1080 1081\n",
+      "\t 64685115 64685116\n",
+      "{'Q.START': 1046, 'Q.END': 1080, 'T.START': 64685080, 'T.END': 64685114, 'CG': '34='}\n",
+      "7046576 TO1000#1#chr03 1081 1107\n",
+      "\t 64685117 64685143\n",
+      "{'Q.START': 1080, 'Q.END': 1081, 'T.START': 64685115, 'T.END': 64685116, 'CG': '1='}\n",
+      "7046577 TO1000#1#chr03 1107 1108\n",
+      "\t 64685144 64685145\n",
+      "{'Q.START': 1081, 'Q.END': 1107, 'T.START': 64685117, 'T.END': 64685143, 'CG': '26='}\n",
+      "7046579 TO1000#1#chr03 1108 1183\n",
+      "\t 64685146 64685221\n",
+      "{'Q.START': 1107, 'Q.END': 1108, 'T.START': 64685144, 'T.END': 64685145, 'CG': '1='}\n",
+      "7046581 TO1000#1#chr03 1183 1186\n",
+      "\t 64685222 64685225\n",
+      "{'Q.START': 1108, 'Q.END': 1183, 'T.START': 64685146, 'T.END': 64685221, 'CG': '75='}\n",
+      "7046583 TO1000#1#chr03 1186 1224\n",
+      "\t 64685226 64685264\n",
+      "{'Q.START': 1183, 'Q.END': 1186, 'T.START': 64685222, 'T.END': 64685225, 'CG': '3='}\n",
+      "7046584 TO1000#1#chr03 1224 1257\n",
+      "\t 64685265 64685298\n",
+      "{'Q.START': 1186, 'Q.END': 1224, 'T.START': 64685226, 'T.END': 64685264, 'CG': '38='}\n",
+      "7046586 TO1000#1#chr03 1257 1289\n",
+      "\t 64685299 64685331\n",
+      "{'Q.START': 1224, 'Q.END': 1257, 'T.START': 64685265, 'T.END': 64685298, 'CG': '33='}\n",
+      "7046587 TO1000#1#chr03 1289 1311\n",
+      "\t 64685332 64685354\n",
+      "{'Q.START': 1257, 'Q.END': 1289, 'T.START': 64685299, 'T.END': 64685331, 'CG': '32='}\n",
+      "7046589 TO1000#1#chr03 1311 1359\n",
+      "\t 64685355 64685403\n",
+      "{'Q.START': 1289, 'Q.END': 1311, 'T.START': 64685332, 'T.END': 64685354, 'CG': '22='}\n",
+      "7046590 TO1000#1#chr03 1359 1382\n",
+      "\t 64685404 64685427\n",
+      "{'Q.START': 1311, 'Q.END': 1359, 'T.START': 64685355, 'T.END': 64685403, 'CG': '48='}\n",
+      "7046592 TO1000#1#chr03 1382 1434\n",
+      "\t 64685428 64685480\n",
+      "{'Q.START': 1359, 'Q.END': 1382, 'T.START': 64685404, 'T.END': 64685427, 'CG': '23='}\n",
+      "7046593 TO1000#1#chr03 1434 1451\n",
+      "\t 64685481 64685498\n",
+      "{'Q.START': 1382, 'Q.END': 1434, 'T.START': 64685428, 'T.END': 64685480, 'CG': '52='}\n",
+      "7046594 TO1000#1#chr03 1451 1531\n",
+      "\t 64685499 64685579\n",
+      "{'Q.START': 1434, 'Q.END': 1451, 'T.START': 64685481, 'T.END': 64685498, 'CG': '17='}\n",
+      "7046596 TO1000#1#chr03 1531 1532\n",
+      "\t 64685580 64685581\n",
+      "{'Q.START': 1451, 'Q.END': 1531, 'T.START': 64685499, 'T.END': 64685579, 'CG': '80='}\n",
+      "7046597 TO1000#1#chr03 1532 1543\n",
+      "\t 64685582 64685593\n",
+      "{'Q.START': 1531, 'Q.END': 1532, 'T.START': 64685580, 'T.END': 64685581, 'CG': '1='}\n",
+      "7046599 TO1000#1#chr03 1543 1544\n",
+      "\t 64685594 64685595\n",
+      "{'Q.START': 1532, 'Q.END': 1543, 'T.START': 64685582, 'T.END': 64685593, 'CG': '11='}\n",
+      "7046600 TO1000#1#chr03 1544 1572\n",
+      "\t 64685596 64685624\n",
+      "{'Q.START': 1543, 'Q.END': 1544, 'T.START': 64685594, 'T.END': 64685595, 'CG': '1='}\n",
+      "7046601 TO1000#1#chr03 1572 1573\n",
+      "\t 64685625 64685626\n",
+      "{'Q.START': 1544, 'Q.END': 1572, 'T.START': 64685596, 'T.END': 64685624, 'CG': '28='}\n",
+      "7046603 TO1000#1#chr03 1573 1587\n",
+      "\t 64685627 64685641\n",
+      "{'Q.START': 1572, 'Q.END': 1573, 'T.START': 64685625, 'T.END': 64685626, 'CG': '1='}\n",
+      "7046604 TO1000#1#chr03 1587 1588\n",
+      "\t 64685642 64685643\n",
+      "{'Q.START': 1573, 'Q.END': 1587, 'T.START': 64685627, 'T.END': 64685641, 'CG': '14='}\n",
+      "7046606 TO1000#1#chr03 1588 1616\n",
+      "\t 64685644 64685672\n",
+      "{'Q.START': 1587, 'Q.END': 1588, 'T.START': 64685642, 'T.END': 64685643, 'CG': '1='}\n",
+      "7046608 TO1000#1#chr03 1616 1617\n",
+      "\t 64685673 64685674\n",
+      "{'Q.START': 1588, 'Q.END': 1616, 'T.START': 64685644, 'T.END': 64685672, 'CG': '28='}\n",
+      "7046609 TO1000#1#chr03 1617 1646\n",
+      "\t 64685675 64685704\n",
+      "{'Q.START': 1616, 'Q.END': 1617, 'T.START': 64685673, 'T.END': 64685674, 'CG': '1='}\n",
+      "7046621 TO1000#1#chr03 1646 1661\n",
+      "\t 64685705 64685720\n",
+      "{'Q.START': 1617, 'Q.END': 1646, 'T.START': 64685675, 'T.END': 64685704, 'CG': '29='}\n",
+      "7046622 TO1000#1#chr03 1661 1673\n",
+      "\t 64685721 64685733\n",
+      "{'Q.START': 1646, 'Q.END': 1661, 'T.START': 64685705, 'T.END': 64685720, 'CG': '15='}\n",
+      "7046624 TO1000#1#chr03 1673 1674\n",
+      "\t 64685734 64685735\n",
+      "{'Q.START': 1661, 'Q.END': 1673, 'T.START': 64685721, 'T.END': 64685733, 'CG': '12='}\n",
+      "7046625 TO1000#1#chr03 1674 1726\n",
+      "\t 64685736 64685788\n",
+      "{'Q.START': 1673, 'Q.END': 1674, 'T.START': 64685734, 'T.END': 64685735, 'CG': '1='}\n",
+      "7046626 TO1000#1#chr03 1726 1727\n",
+      "\t 64685789 64685790\n",
+      "{'Q.START': 1674, 'Q.END': 1726, 'T.START': 64685736, 'T.END': 64685788, 'CG': '52='}\n",
+      "7046628 TO1000#1#chr03 1727 1762\n",
+      "\t 64685791 64685826\n",
+      "{'Q.START': 1726, 'Q.END': 1727, 'T.START': 64685789, 'T.END': 64685790, 'CG': '1='}\n",
+      "7046631 TO1000#1#chr03 1766 1767\n",
+      "\t 64685827 64685828\n",
+      "{'Q.START': 1727, 'Q.END': 1762, 'T.START': 64685791, 'T.END': 64685826, 'CG': '35='}\n",
+      "7046673 TO1000#1#chr03 1765 1766\n",
+      "\t 64685829 64685830\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 64685827, 'T.END': 64685828, 'CG': '1='}\n",
+      "7046631 TO1000#1#chr03 1766 1767\n",
+      "\t 64685827 64685828\n",
+      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 64685829, 'T.END': 64685830, 'CG': '1='}\n",
+      "7046673 TO1000#1#chr03 1765 1766\n",
+      "\t 64685829 64685830\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 64685827, 'T.END': 64685828, 'CG': '1='}\n",
+      "7046631 TO1000#1#chr03 1766 1767\n",
+      "\t 64685827 64685828\n",
+      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 64685829, 'T.END': 64685830, 'CG': '1='}\n",
+      "7046632 TO1000#1#chr03 1767 1824\n",
+      "\t 64685831 64685888\n",
+      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 64685827, 'T.END': 64685828, 'CG': '1='}\n",
+      "7046634 TO1000#1#chr03 1824 1825\n",
+      "\t 64685889 64685890\n",
+      "{'Q.START': 1767, 'Q.END': 1824, 'T.START': 64685831, 'T.END': 64685888, 'CG': '57='}\n",
+      "7046635 TO1000#1#chr03 1825 1975\n",
+      "\t 64685891 64686041\n",
+      "{'Q.START': 1824, 'Q.END': 1825, 'T.START': 64685889, 'T.END': 64685890, 'CG': '1='}\n",
+      "7046637 TO1000#1#chr03 1975 1976\n",
+      "\t 64686042 64686043\n",
+      "{'Q.START': 1825, 'Q.END': 1975, 'T.START': 64685891, 'T.END': 64686041, 'CG': '150='}\n",
+      "7046638 TO1000#1#chr03 1976 2015\n",
+      "\t 64686044 64686083\n",
+      "{'Q.START': 1975, 'Q.END': 1976, 'T.START': 64686042, 'T.END': 64686043, 'CG': '1='}\n",
+      "7046639 TO1000#1#chr03 2015 2016\n",
+      "\t 64686084 64686085\n",
+      "{'Q.START': 1976, 'Q.END': 2015, 'T.START': 64686044, 'T.END': 64686083, 'CG': '39='}\n",
+      "7046641 TO1000#1#chr03 2016 2047\n",
+      "\t 64686086 64686117\n",
+      "{'Q.START': 2015, 'Q.END': 2016, 'T.START': 64686084, 'T.END': 64686085, 'CG': '1='}\n",
+      "7046644 TO1000#1#chr03 2047 2055\n",
+      "\t 64686118 64686126\n",
+      "{'Q.START': 2016, 'Q.END': 2047, 'T.START': 64686086, 'T.END': 64686117, 'CG': '31='}\n",
+      "7046646 TO1000#1#chr03 2055 2056\n",
+      "\t 64686127 64686128\n",
+      "{'Q.START': 2047, 'Q.END': 2055, 'T.START': 64686118, 'T.END': 64686126, 'CG': '8='}\n",
+      "7046647 TO1000#1#chr03 2056 2120\n",
+      "\t 64686129 64686193\n",
+      "{'Q.START': 2055, 'Q.END': 2056, 'T.START': 64686127, 'T.END': 64686128, 'CG': '1='}\n",
+      "7046649 TO1000#1#chr03 2120 2121\n",
+      "\t 64686194 64686195\n",
+      "{'Q.START': 2056, 'Q.END': 2120, 'T.START': 64686129, 'T.END': 64686193, 'CG': '64='}\n",
+      "7046650 TO1000#1#chr03 2121 2157\n",
+      "\t 64686196 64686232\n",
+      "{'Q.START': 2120, 'Q.END': 2121, 'T.START': 64686194, 'T.END': 64686195, 'CG': '1='}\n",
+      "7046652 TO1000#1#chr03 2157 2158\n",
+      "\t 64686233 64686234\n",
+      "{'Q.START': 2121, 'Q.END': 2157, 'T.START': 64686196, 'T.END': 64686232, 'CG': '36='}\n",
+      "7046653 TO1000#1#chr03 2158 2170\n",
+      "\t 64686235 64686247\n",
+      "{'Q.START': 2157, 'Q.END': 2158, 'T.START': 64686233, 'T.END': 64686234, 'CG': '1='}\n",
+      "7046654 TO1000#1#chr03 2170 2171\n",
+      "\t 64686248 64686249\n",
+      "{'Q.START': 2158, 'Q.END': 2170, 'T.START': 64686235, 'T.END': 64686247, 'CG': '12='}\n",
+      "7046656 TO1000#1#chr03 2171 2205\n",
+      "\t 64686250 64686284\n",
+      "{'Q.START': 2170, 'Q.END': 2171, 'T.START': 64686248, 'T.END': 64686249, 'CG': '1='}\n",
+      "7046657 TO1000#1#chr03 2205 2206\n",
+      "\t 64686285 64686286\n",
+      "{'Q.START': 2171, 'Q.END': 2205, 'T.START': 64686250, 'T.END': 64686284, 'CG': '34='}\n",
+      "7046659 TO1000#1#chr03 2206 2344\n",
+      "\t 64686287 64686425\n",
+      "{'Q.START': 2205, 'Q.END': 2206, 'T.START': 64686285, 'T.END': 64686286, 'CG': '1='}\n",
+      "7046660 TO1000#1#chr03 2344 2345\n",
+      "\t 64686426 64686427\n",
+      "{'Q.START': 2206, 'Q.END': 2344, 'T.START': 64686287, 'T.END': 64686425, 'CG': '138='}\n",
+      "7046662 TO1000#1#chr03 2345 2364\n",
+      "\t 64686428 64686447\n",
+      "{'Q.START': 2344, 'Q.END': 2345, 'T.START': 64686426, 'T.END': 64686427, 'CG': '1='}\n",
+      "7046663 TO1000#1#chr03 2364 2383\n",
+      "\t 64686448 64686467\n",
+      "{'Q.START': 2345, 'Q.END': 2364, 'T.START': 64686428, 'T.END': 64686447, 'CG': '19='}\n",
+      "7046665 TO1000#1#chr03 2383 2408\n",
+      "\t 64686468 64686493\n",
+      "{'Q.START': 2364, 'Q.END': 2383, 'T.START': 64686448, 'T.END': 64686467, 'CG': '19='}\n",
+      "7046667 TO1000#1#chr03 2408 2409\n",
+      "\t 64686494 64686495\n",
+      "{'Q.START': 2383, 'Q.END': 2408, 'T.START': 64686468, 'T.END': 64686493, 'CG': '25='}\n",
+      "7046668 TO1000#1#chr03 2409 2441\n",
+      "\t 64686496 64686528\n",
+      "{'Q.START': 2408, 'Q.END': 2409, 'T.START': 64686494, 'T.END': 64686495, 'CG': '1='}\n",
+      "7046670 TO1000#1#chr03 2441 2442\n",
+      "\t 64686529 64686530\n",
+      "{'Q.START': 2409, 'Q.END': 2441, 'T.START': 64686496, 'T.END': 64686528, 'CG': '32='}\n",
+      "7046671 TO1000#1#chr03 2442 2580\n",
+      "\t 64686531 64686669\n",
+      "{'Q.START': 2441, 'Q.END': 2442, 'T.START': 64686529, 'T.END': 64686530, 'CG': '1='}\n",
+      "7046674 TO1000#1#chr03 2582 2583\n",
+      "\t 64686670 64686671\n",
+      "{'Q.START': 2442, 'Q.END': 2580, 'T.START': 64686531, 'T.END': 64686669, 'CG': '138='}\n",
+      "7046675 TO1000#1#chr03 2583 2584\n",
+      "\t 64686672 64686673\n",
+      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 64686670, 'T.END': 64686671, 'CG': '1='}\n",
+      "7046674 TO1000#1#chr03 2582 2583\n",
+      "\t 64686670 64686671\n",
+      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 64686672, 'T.END': 64686673, 'CG': '1='}\n",
+      "7046675 TO1000#1#chr03 2583 2584\n",
+      "\t 64686672 64686673\n",
+      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 64686670, 'T.END': 64686671, 'CG': '1='}\n",
+      "7046676 TO1000#1#chr03 2584 2764\n",
+      "\t 64686674 64686854\n",
+      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 64686672, 'T.END': 64686673, 'CG': '1='}\n",
+      "7046678 TO1000#1#chr03 2764 2765\n",
+      "\t 64686855 64686856\n",
+      "{'Q.START': 2584, 'Q.END': 2764, 'T.START': 64686674, 'T.END': 64686854, 'CG': '180='}\n",
+      "7046679 TO1000#1#chr03 2765 2797\n",
+      "\t 64686857 64686889\n",
+      "{'Q.START': 2764, 'Q.END': 2765, 'T.START': 64686855, 'T.END': 64686856, 'CG': '1='}\n",
+      "7046680 TO1000#1#chr03 2797 2798\n",
+      "\t 64686890 64686891\n",
+      "{'Q.START': 2765, 'Q.END': 2797, 'T.START': 64686857, 'T.END': 64686889, 'CG': '32='}\n",
+      "7046682 TO1000#1#chr03 2798 2878\n",
+      "\t 64686892 64686972\n",
+      "{'Q.START': 2797, 'Q.END': 2798, 'T.START': 64686890, 'T.END': 64686891, 'CG': '1='}\n",
+      "7046684 TO1000#1#chr03 2878 2879\n",
+      "\t 64686973 64686974\n",
+      "{'Q.START': 2798, 'Q.END': 2878, 'T.START': 64686892, 'T.END': 64686972, 'CG': '80='}\n",
+      "7046685 TO1000#1#chr03 2879 2951\n",
+      "\t 64686975 64687047\n",
+      "{'Q.START': 2878, 'Q.END': 2879, 'T.START': 64686973, 'T.END': 64686974, 'CG': '1='}\n",
+      "7046686 TO1000#1#chr03 2951 2952\n",
+      "\t 64687048 64687049\n",
+      "{'Q.START': 2879, 'Q.END': 2951, 'T.START': 64686975, 'T.END': 64687047, 'CG': '72='}\n",
+      "7046688 TO1000#1#chr03 2952 3002\n",
+      "\t 64687050 64687100\n",
+      "{'Q.START': 2951, 'Q.END': 2952, 'T.START': 64687048, 'T.END': 64687049, 'CG': '1='}\n",
+      "7046690 TO1000#1#chr03 3002 3077\n",
+      "\t 64687101 64687176\n",
+      "{'Q.START': 2952, 'Q.END': 3002, 'T.START': 64687050, 'T.END': 64687100, 'CG': '50='}\n",
+      "7046692 TO1000#1#chr03 3077 3078\n",
+      "\t 64687177 64687178\n",
+      "{'Q.START': 3002, 'Q.END': 3077, 'T.START': 64687101, 'T.END': 64687176, 'CG': '75='}\n",
+      "7046693 TO1000#1#chr03 3078 3093\n",
+      "\t 64687179 64687194\n",
+      "{'Q.START': 3077, 'Q.END': 3078, 'T.START': 64687177, 'T.END': 64687178, 'CG': '1='}\n",
+      "7046695 TO1000#1#chr03 3093 3094\n",
+      "\t 64687195 64687196\n",
+      "{'Q.START': 3078, 'Q.END': 3093, 'T.START': 64687179, 'T.END': 64687194, 'CG': '15='}\n",
+      "7046696 TO1000#1#chr03 3094 3097\n",
+      "\t 64687197 64687200\n",
+      "{'Q.START': 3093, 'Q.END': 3094, 'T.START': 64687195, 'T.END': 64687196, 'CG': '1='}\n",
+      "7046698 TO1000#1#chr03 3097 3140\n",
+      "\t 64687201 64687244\n",
+      "{'Q.START': 3094, 'Q.END': 3097, 'T.START': 64687197, 'T.END': 64687200, 'CG': '3='}\n",
+      "7046700 TO1000#1#chr03 3140 3210\n",
+      "\t 64687245 64687315\n",
+      "{'Q.START': 3097, 'Q.END': 3140, 'T.START': 64687201, 'T.END': 64687244, 'CG': '43='}\n",
+      "7046702 TO1000#1#chr03 3210 3211\n",
+      "\t 64687316 64687317\n",
+      "{'Q.START': 3140, 'Q.END': 3210, 'T.START': 64687245, 'T.END': 64687315, 'CG': '70='}\n",
+      "7046703 TO1000#1#chr03 3211 3229\n",
+      "\t 64687318 64687336\n",
+      "{'Q.START': 3210, 'Q.END': 3211, 'T.START': 64687316, 'T.END': 64687317, 'CG': '1='}\n",
+      "7046704 TO1000#1#chr03 3229 3230\n",
+      "\t 64687337 64687338\n",
+      "{'Q.START': 3211, 'Q.END': 3229, 'T.START': 64687318, 'T.END': 64687336, 'CG': '18='}\n",
+      "7046706 TO1000#1#chr03 3230 3276\n",
+      "\t 64687339 64687385\n",
+      "{'Q.START': 3229, 'Q.END': 3230, 'T.START': 64687337, 'T.END': 64687338, 'CG': '1='}\n",
+      "7046707 TO1000#1#chr03 3276 3277\n",
+      "\t 64687386 64687387\n",
+      "{'Q.START': 3230, 'Q.END': 3276, 'T.START': 64687339, 'T.END': 64687385, 'CG': '46='}\n",
+      "7046709 TO1000#1#chr03 3277 3315\n",
+      "\t 64687388 64687426\n",
+      "{'Q.START': 3276, 'Q.END': 3277, 'T.START': 64687386, 'T.END': 64687387, 'CG': '1='}\n",
+      "7046710 TO1000#1#chr03 3315 3316\n",
+      "\t 64687427 64687428\n",
+      "{'Q.START': 3277, 'Q.END': 3315, 'T.START': 64687388, 'T.END': 64687426, 'CG': '38='}\n",
+      "7046712 TO1000#1#chr03 3316 3322\n",
+      "\t 64687429 64687435\n",
+      "{'Q.START': 3315, 'Q.END': 3316, 'T.START': 64687427, 'T.END': 64687428, 'CG': '1='}\n",
+      "7046713 TO1000#1#chr03 3322 3323\n",
+      "\t 64687436 64687437\n",
+      "{'Q.START': 3316, 'Q.END': 3322, 'T.START': 64687429, 'T.END': 64687435, 'CG': '6='}\n",
+      "7046715 TO1000#1#chr03 3323 3348\n",
+      "\t 64687438 64687463\n",
+      "{'Q.START': 3322, 'Q.END': 3323, 'T.START': 64687436, 'T.END': 64687437, 'CG': '1='}\n",
+      "7046718 TO1000#1#chr03 3352 3353\n",
+      "\t 64687464 64687465\n",
+      "{'Q.START': 3323, 'Q.END': 3348, 'T.START': 64687438, 'T.END': 64687463, 'CG': '25='}\n",
+      "7046717 TO1000#1#chr03 3351 3352\n",
+      "\t 64687466 64687467\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 64687464, 'T.END': 64687465, 'CG': '1='}\n",
+      "7046718 TO1000#1#chr03 3352 3353\n",
+      "\t 64687464 64687465\n",
+      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 64687466, 'T.END': 64687467, 'CG': '1='}\n",
+      "7046717 TO1000#1#chr03 3351 3352\n",
+      "\t 64687466 64687467\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 64687464, 'T.END': 64687465, 'CG': '1='}\n",
+      "7046718 TO1000#1#chr03 3352 3353\n",
+      "\t 64687464 64687465\n",
+      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 64687466, 'T.END': 64687467, 'CG': '1='}\n",
+      "7046720 TO1000#1#chr03 3353 3354\n",
+      "\t 64687468 64687469\n",
+      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 64687464, 'T.END': 64687465, 'CG': '1='}\n",
+      "7046722 TO1000#1#chr03 3354 3356\n",
+      "\t 64687470 64687472\n",
+      "{'Q.START': 3353, 'Q.END': 3354, 'T.START': 64687468, 'T.END': 64687469, 'CG': '1='}\n",
+      "7046724 TO1000#1#chr03 3356 3357\n",
+      "\t 64687473 64687474\n",
+      "{'Q.START': 3354, 'Q.END': 3356, 'T.START': 64687470, 'T.END': 64687472, 'CG': '2='}\n",
+      "7046725 TO1000#1#chr03 3357 3489\n",
+      "\t 64687475 64687607\n",
+      "{'Q.START': 3356, 'Q.END': 3357, 'T.START': 64687473, 'T.END': 64687474, 'CG': '1='}\n",
+      "7046727 TO1000#1#chr03 3489 3490\n",
+      "\t 64687608 64687609\n",
+      "{'Q.START': 3357, 'Q.END': 3489, 'T.START': 64687475, 'T.END': 64687607, 'CG': '132='}\n",
+      "7046728 TO1000#1#chr03 3490 3642\n",
+      "\t 64687610 64687762\n",
+      "{'Q.START': 3489, 'Q.END': 3490, 'T.START': 64687608, 'T.END': 64687609, 'CG': '1='}\n",
+      "7046729 TO1000#1#chr03 3642 3644\n",
+      "\t 64687763 64687765\n",
+      "{'Q.START': 3490, 'Q.END': 3642, 'T.START': 64687610, 'T.END': 64687762, 'CG': '152='}\n",
+      "7046730 TO1000#1#chr03 3644 3685\n",
+      "\t 64687766 64687807\n",
+      "{'Q.START': 3642, 'Q.END': 3644, 'T.START': 64687763, 'T.END': 64687765, 'CG': '2='}\n",
+      "7046731 TO1000#1#chr03 3685 3687\n",
+      "\t 64687808 64687810\n",
+      "{'Q.START': 3644, 'Q.END': 3685, 'T.START': 64687766, 'T.END': 64687807, 'CG': '41='}\n",
+      "7046733 TO1000#1#chr03 3687 3693\n",
+      "\t 64687811 64687817\n",
+      "{'Q.START': 3685, 'Q.END': 3687, 'T.START': 64687808, 'T.END': 64687810, 'CG': '2='}\n",
+      "7046735 TO1000#1#chr03 3693 3694\n",
+      "\t 64687818 64687819\n",
+      "{'Q.START': 3687, 'Q.END': 3693, 'T.START': 64687811, 'T.END': 64687817, 'CG': '6='}\n",
+      "7046736 TO1000#1#chr03 3694 3708\n",
+      "\t 64687820 64687834\n",
+      "{'Q.START': 3693, 'Q.END': 3694, 'T.START': 64687818, 'T.END': 64687819, 'CG': '1='}\n",
+      "7046738 TO1000#1#chr03 3720 3721\n",
+      "\t 64687835 64687836\n",
+      "{'Q.START': 3694, 'Q.END': 3708, 'T.START': 64687820, 'T.END': 64687834, 'CG': '14='}\n",
+      "7046739 TO1000#1#chr03 3721 3722\n",
+      "\t 64687837 64687838\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 64687835, 'T.END': 64687836, 'CG': '1='}\n",
+      "7046740 TO1000#1#chr03 3716 3720\n",
+      "\t 64687839 64687843\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 64687837, 'T.END': 64687838, 'CG': '1='}\n",
+      "7046738 TO1000#1#chr03 3720 3721\n",
+      "\t 64687835 64687836\n",
+      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 64687839, 'T.END': 64687843, 'CG': '4='}\n",
+      "7046739 TO1000#1#chr03 3721 3722\n",
+      "\t 64687837 64687838\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 64687835, 'T.END': 64687836, 'CG': '1='}\n",
+      "7046740 TO1000#1#chr03 3716 3720\n",
+      "\t 64687839 64687843\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 64687837, 'T.END': 64687838, 'CG': '1='}\n",
+      "7046738 TO1000#1#chr03 3720 3721\n",
+      "\t 64687835 64687836\n",
+      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 64687839, 'T.END': 64687843, 'CG': '4='}\n",
+      "7046739 TO1000#1#chr03 3721 3722\n",
+      "\t 64687837 64687838\n",
+      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 64687835, 'T.END': 64687836, 'CG': '1='}\n",
+      "7046741 TO1000#1#chr03 3722 3735\n",
+      "\t 64687844 64687877\n",
+      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 64687837, 'T.END': 64687838, 'CG': '1='}\n",
+      "ALN_2\n",
+      "7594382 D134#1#chr03 0 1\n",
+      "\tIn path\n",
+      "\t 70220037 70220038\n",
+      "skipped\n",
+      "\n",
+      "7594369 D134#1#chr03 32 33\n",
+      "\tIn path\n",
+      "\t 70219216 70219217\n",
+      "{'Q.START': 0, 'Q.END': 1, 'T.START': 70220037, 'T.END': 70220038, 'CG': '1='}\n",
+      "7594371 D134#1#chr03 15 16\n",
+      "\tIn path\n",
+      "\t 70221163 70221164\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 15, 'Q.END': 16, 'T.START': 70221163, 'T.END': 70221164, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594356 D134#1#chr03 66 67\n",
+      "\tIn path\n",
+      "\t 70219570 70219571\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594626 D134#1#chr03 10 11\n",
+      "\tIn path\n",
+      "\t 70219214 70219215\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594011 D134#1#chr03 11 12\n",
+      "\tIn path\n",
+      "\t 70219995 70219996\n",
+      "{'Q.START': 10, 'Q.END': 11, 'T.START': 70219214, 'T.END': 70219215, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 11, 'Q.END': 12, 'T.START': 70219995, 'T.END': 70219996, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594369 D134#1#chr03 32 33\n",
+      "\tIn path\n",
+      "\t 70219216 70219217\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594371 D134#1#chr03 15 16\n",
+      "\tIn path\n",
+      "\t 70221163 70221164\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 15, 'Q.END': 16, 'T.START': 70221163, 'T.END': 70221164, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594241 D134#1#chr03 20 21\n",
+      "\tIn path\n",
+      "\t 70219220 70219221\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594248 D134#1#chr03 21 22\n",
+      "\tNot in path\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 20, 'Q.END': 21, 'T.START': 70219220, 'T.END': 70219221, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594315 D134#1#chr03 53 54\n",
+      "\tIn path\n",
+      "\t 70219857 70219858\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
+      "7594330 D134#1#chr03 26 27\n",
+      "\tNot in path\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594315 D134#1#chr03 53 54\n",
+      "\tIn path\n",
+      "\t 70219857 70219858\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594369 D134#1#chr03 32 33\n",
+      "\tIn path\n",
+      "\t 70219216 70219217\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
+      "7594026 D134#1#chr03 37 38\n",
+      "\tIn path\n",
+      "\t 70220249 70220250\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 37, 'Q.END': 38, 'T.START': 70220249, 'T.END': 70220250, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594026 D134#1#chr03 37 38\n",
+      "\tIn path\n",
+      "\t 70220249 70220250\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 37, 'Q.END': 38, 'T.START': 70220249, 'T.END': 70220250, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594315 D134#1#chr03 53 54\n",
+      "\tIn path\n",
+      "\t 70219857 70219858\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
+      "7594311 D134#1#chr03 55 56\n",
+      "\tIn path\n",
+      "\t 70219351 70219352\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
+      "7594021 D134#1#chr03 57 58\n",
+      "\tIn path\n",
+      "\t 70219218 70219219\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
+      "7594286 D134#1#chr03 59 60\n",
+      "\tIn path\n",
+      "\t 70219349 70219350\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
+      "7594356 D134#1#chr03 66 67\n",
+      "\tIn path\n",
+      "\t 70219570 70219571\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594356 D134#1#chr03 66 67\n",
+      "\tIn path\n",
+      "\t 70219570 70219571\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
+      "7594375 D134#1#chr03 68 69\n",
+      "\tIn path\n",
+      "\t 70221598 70221599\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594374 D134#1#chr03 69 70\n",
+      "\tIn path\n",
+      "\t 70219092 70219093\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
+      "7594350 D134#1#chr03 70 71\n",
+      "\tIn path\n",
+      "\t 70219226 70219227\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
+      "7594264 D134#1#chr03 71 72\n",
+      "\tIn path\n",
+      "\t 70219228 70219229\n",
+      "{'Q.START': 70, 'Q.END': 71, 'T.START': 70219226, 'T.END': 70219227, 'CG': '1='}\n",
+      "7594207 D134#1#chr03 72 73\n",
+      "\tIn path\n",
+      "\t 70219230 70219231\n",
+      "{'Q.START': 71, 'Q.END': 72, 'T.START': 70219228, 'T.END': 70219229, 'CG': '1='}\n",
+      "7594225 D134#1#chr03 73 74\n",
+      "\tIn path\n",
+      "\t 70219232 70219233\n",
+      "{'Q.START': 72, 'Q.END': 73, 'T.START': 70219230, 'T.END': 70219231, 'CG': '1='}\n",
+      "7594227 D134#1#chr03 74 75\n",
+      "\tIn path\n",
+      "\t 70220150 70220151\n",
+      "{'Q.START': 73, 'Q.END': 74, 'T.START': 70219232, 'T.END': 70219233, 'CG': '1='}\n",
+      "7594120 D134#1#chr03 75 76\n",
+      "\tIn path\n",
+      "\t 70219236 70219237\n",
+      "{'Q.START': 74, 'Q.END': 75, 'T.START': 70220150, 'T.END': 70220151, 'CG': '1='}\n",
+      "7594132 D134#1#chr03 76 77\n",
+      "\tIn path\n",
+      "\t 70219777 70219778\n",
+      "{'Q.START': 75, 'Q.END': 76, 'T.START': 70219236, 'T.END': 70219237, 'CG': '1='}\n",
+      "7594165 D134#1#chr03 77 78\n",
+      "\tIn path\n",
+      "\t 70219240 70219241\n",
+      "{'Q.START': 76, 'Q.END': 77, 'T.START': 70219777, 'T.END': 70219778, 'CG': '1='}\n",
+      "7594172 D134#1#chr03 78 3735\n",
+      "\tNot in path\n",
+      "ALN_2\n",
+      "7594382 TO1000#1#chr03 0 1\n",
+      "\t 61731222 61731223\n",
+      "skipped\n",
+      "\n",
+      "7594369 TO1000#1#chr03 32 33\n",
+      "\t 61731060 61731061\n",
+      "{'Q.START': 0, 'Q.END': 1, 'T.START': 61731222, 'T.END': 61731223, 'CG': '1='}\n",
+      "7594371 TO1000#1#chr03 15 16\n",
+      "\tNot in path\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 61731060, 'T.END': 61731061, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594356 TO1000#1#chr03 66 67\n",
+      "\t 61731519 61731520\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 61731519, 'T.END': 61731520, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594375 TO1000#1#chr03 68 69\n",
+      "\t 61733612 61733613\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594626 TO1000#1#chr03 10 11\n",
+      "\t 61731056 61731057\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
+      "7594011 TO1000#1#chr03 11 12\n",
+      "\t 61733900 61733901\n",
+      "{'Q.START': 10, 'Q.END': 11, 'T.START': 61731056, 'T.END': 61731057, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 11, 'Q.END': 12, 'T.START': 61733900, 'T.END': 61733901, 'CG': '1='}\n",
+      "7594375 TO1000#1#chr03 68 69\n",
+      "\t 61733612 61733613\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594369 TO1000#1#chr03 32 33\n",
+      "\t 61731060 61731061\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
+      "7594371 TO1000#1#chr03 15 16\n",
+      "\tNot in path\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 61731060, 'T.END': 61731061, 'CG': '1='}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594241 TO1000#1#chr03 20 21\n",
+      "\t 61731046 61731047\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594248 TO1000#1#chr03 21 22\n",
+      "\t 61734261 61734262\n",
+      "{'Q.START': 20, 'Q.END': 21, 'T.START': 61731046, 'T.END': 61731047, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 21, 'Q.END': 22, 'T.START': 61734261, 'T.END': 61734262, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594315 TO1000#1#chr03 53 54\n",
+      "\t 61733937 61733938\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 61733937, 'T.END': 61733938, 'CG': '1='}\n",
+      "7594330 TO1000#1#chr03 26 27\n",
+      "\t 61731768 61731769\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 26, 'Q.END': 27, 'T.START': 61731768, 'T.END': 61731769, 'CG': '1='}\n",
+      "7594315 TO1000#1#chr03 53 54\n",
+      "\t 61733937 61733938\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 61733937, 'T.END': 61733938, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594369 TO1000#1#chr03 32 33\n",
+      "\t 61731060 61731061\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 32, 'Q.END': 33, 'T.START': 61731060, 'T.END': 61731061, 'CG': '1='}\n",
+      "7594026 TO1000#1#chr03 37 38\n",
+      "\t 61734267 61734268\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 37, 'Q.END': 38, 'T.START': 61734267, 'T.END': 61734268, 'CG': '1='}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594026 TO1000#1#chr03 37 38\n",
+      "\t 61734267 61734268\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 37, 'Q.END': 38, 'T.START': 61734267, 'T.END': 61734268, 'CG': '1='}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594315 TO1000#1#chr03 53 54\n",
+      "\t 61733937 61733938\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 53, 'Q.END': 54, 'T.START': 61733937, 'T.END': 61733938, 'CG': '1='}\n",
+      "7594311 TO1000#1#chr03 55 56\n",
+      "\t 61731052 61731053\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
+      "7594021 TO1000#1#chr03 57 58\n",
+      "\t 61730922 61730923\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
+      "7594286 TO1000#1#chr03 59 60\n",
+      "\t 61731054 61731055\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
+      "7594356 TO1000#1#chr03 66 67\n",
+      "\t 61731519 61731520\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 61731519, 'T.END': 61731520, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594375 TO1000#1#chr03 68 69\n",
+      "\t 61733612 61733613\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
+      "7594356 TO1000#1#chr03 66 67\n",
+      "\t 61731519 61731520\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 66, 'Q.END': 67, 'T.START': 61731519, 'T.END': 61731520, 'CG': '1='}\n",
+      "7594375 TO1000#1#chr03 68 69\n",
+      "\t 61733612 61733613\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594374 TO1000#1#chr03 69 70\n",
+      "\t 61730920 61730921\n",
+      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
+      "7594350 TO1000#1#chr03 70 71\n",
+      "\t 61731066 61731067\n",
+      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
+      "7594264 TO1000#1#chr03 71 72\n",
+      "\t 61731068 61731069\n",
+      "{'Q.START': 70, 'Q.END': 71, 'T.START': 61731066, 'T.END': 61731067, 'CG': '1='}\n",
+      "7594207 TO1000#1#chr03 72 73\n",
+      "\t 61731070 61731071\n",
+      "{'Q.START': 71, 'Q.END': 72, 'T.START': 61731068, 'T.END': 61731069, 'CG': '1='}\n",
+      "7594225 TO1000#1#chr03 73 74\n",
+      "\t 61731072 61731073\n",
+      "{'Q.START': 72, 'Q.END': 73, 'T.START': 61731070, 'T.END': 61731071, 'CG': '1='}\n",
+      "7594227 TO1000#1#chr03 74 75\n",
+      "\tNot in path\n",
+      "7594120 TO1000#1#chr03 75 76\n",
+      "\t 61731076 61731077\n",
+      "{'Q.START': 73, 'Q.END': 74, 'T.START': 61731072, 'T.END': 61731073, 'CG': '1='}\n",
+      "7594132 TO1000#1#chr03 76 77\n",
+      "\t 61733800 61733801\n",
+      "{'Q.START': 75, 'Q.END': 76, 'T.START': 61731076, 'T.END': 61731077, 'CG': '1='}\n",
+      "7594165 TO1000#1#chr03 77 78\n",
+      "\t 61731080 61731081\n",
+      "{'Q.START': 76, 'Q.END': 77, 'T.START': 61733800, 'T.END': 61733801, 'CG': '1='}\n",
+      "7594172 TO1000#1#chr03 78 3735\n",
+      "\tNot in path\n"
+     ]
+    }
+   ],
+   "source": [
+    "ALNS = {}\n",
+    "## Iterating over alignments\n",
+    "for aln_name in aln_dict.keys():\n",
+    "    \n",
+    "    ## Iterating over paths of the gfa\n",
+    "    for path_name in paths.keys():\n",
+    "        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(aln_name)\n",
+    "        _ = [] # Temporary list holding alignment blocks\n",
+    "\n",
+    "        ## Iterating over alignment nodes of the current alignment\n",
+    "        for node_id, orient in aln_dict[aln_name][\"PATH.MATCH\"]:\n",
+    "\n",
+    "            # Getting node info\n",
+    "            n_info = nodes[node_id]\n",
+    "            q_start = n_info[aln_name][\"START\"] # Start position on the query\n",
+    "            q_end = n_info[aln_name][\"END\"] # End position on the query\n",
+    "            _CG = n_info[aln_name][\"CIGAR\"] # Cigar of the alignment on the current node\n",
+    "\n",
+    "            if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(node_id, path_name, q_start, q_end)\n",
+    "\n",
+    "            ## Checking if path is traversing the current node\n",
+    "            if path_name in list(n_info.keys()):\n",
+    "                if path_name == \"D134#1#chr03\": print(\"\\tIn path\")\n",
+    "\n",
+    "                ## Getting start and end position on the target given the orientation of the node in the alignment and the path\n",
+    "                if n_info[aln_name][\"STRAND\"] == n_info[path_name][\"STRAND\"] :\n",
+    "                    t_start = n_info[path_name][\"START\"]+n_info[aln_name][\"S.OFF\"]\n",
+    "                    t_end = n_info[path_name][\"END\"]+n_info[aln_name][\"E.OFF\"] \n",
+    "                else :\n",
+    "                    t_end = n_info[path_name][\"START\"]+n_info[aln_name][\"S.OFF\"]\n",
+    "                    t_start = n_info[path_name][\"END\"]+n_info[aln_name][\"E.OFF\"]\n",
+    "\n",
+    "                if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"\\t\", t_start, t_end)\n",
+    "\n",
+    "                \"\"\"\n",
+    "                If the latest block t.end and q.end matches with the current node t.start and q.start, \n",
+    "                the node should be added to the block. Else, we terminate the block and add the node to a new block\n",
+    "                \"\"\"\n",
+    "                \n",
+    "                # Non empty temporary list of aln and ending of the last block is the same as the start of the new node : \n",
+    "                if len(_) and _[-1][\"T.END\"] == t_start and _[-1][\"Q.END\"]+1 == q_start: \n",
+    "                    tmp_aln[\"Q.END\"] = q_end\n",
+    "                    tmp_aln[\"T.END\"] = t_end\n",
+    "                    tmp_aln[\"CG\"] += _CG\n",
+    "#                elif len(_) and _[-1][\"T.END\"] == t_start: # Following on the target not on the query (i.e. Insertion)\n",
+    "#                    tmp_aln[\"T.END\"] = t_end\n",
+    "#                    tmp_aln[\"CG\"] += f\"{nodes_length[node_id]}I\"\n",
+    "#                elif len(_) and _[-1][\"Q.END\"]+1 == q_start: # Following on the query, not on the target (i.e. Deletion)\n",
+    "#                    tmp_aln[\"Q.END\"] = q_end\n",
+    "#                    tmp_aln[\"CG\"] += f\"{nodes_length[node_id]}D\"\n",
+    "                else : # Else, completely different\n",
+    "                    try : \n",
+    "                        _.append(tmp_aln)\n",
+    "                        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(tmp_aln)\n",
+    "                    except : \n",
+    "                        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"skipped\\n\")\n",
+    "                    tmp_aln = {\n",
+    "                        \"Q.START\": q_start,\n",
+    "                        \"Q.END\": q_end,\n",
+    "                        \"T.START\": t_start,\n",
+    "                        \"T.END\": t_end,\n",
+    "                        \"CG\": _CG,\n",
+    "                    }\n",
+    "            \n",
+    "            else : \n",
+    "                if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"\\tNot in path\")\n",
+    "                # Node is not in the path\n",
+    "\n",
+    "        del tmp_aln\n",
+    "        \n",
+    "        ALNS[(path_name, aln_name)] = _"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "547f03fa-cbd5-42f9-b668-1ca4404795ba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'Q.START': 0, 'Q.END': 77, 'T.START': 73306158, 'T.END': 73306235, 'CG': '77='}, {'Q.START': 77, 'Q.END': 82, 'T.START': 73306238, 'T.END': 73306243, 'CG': '5='}, {'Q.START': 83, 'Q.END': 138, 'T.START': 73306246, 'T.END': 73306301, 'CG': '55='}, {'Q.START': 139, 'Q.END': 202, 'T.START': 73306302, 'T.END': 73306365, 'CG': '63='}, {'Q.START': 202, 'Q.END': 203, 'T.START': 73306366, 'T.END': 73306367, 'CG': '1='}, {'Q.START': 203, 'Q.END': 379, 'T.START': 73306368, 'T.END': 73306544, 'CG': '176='}, {'Q.START': 379, 'Q.END': 380, 'T.START': 73306545, 'T.END': 73306546, 'CG': '1='}, {'Q.START': 380, 'Q.END': 429, 'T.START': 73306547, 'T.END': 73306596, 'CG': '49='}, {'Q.START': 429, 'Q.END': 430, 'T.START': 73306597, 'T.END': 73306598, 'CG': '1='}, {'Q.START': 430, 'Q.END': 457, 'T.START': 73306599, 'T.END': 73306626, 'CG': '27='}, {'Q.START': 457, 'Q.END': 492, 'T.START': 73306641, 'T.END': 73306676, 'CG': '35='}, {'Q.START': 508, 'Q.END': 564, 'T.START': 73306694, 'T.END': 73306750, 'CG': '56='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 569, 'Q.END': 824, 'T.START': 73306755, 'T.END': 73307010, 'CG': '255='}, {'Q.START': 826, 'Q.END': 858, 'T.START': 73307011, 'T.END': 73307043, 'CG': '32='}, {'Q.START': 858, 'Q.END': 859, 'T.START': 73307044, 'T.END': 73307045, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 869, 'Q.END': 913, 'T.START': 73307048, 'T.END': 73307092, 'CG': '44='}, {'Q.START': 913, 'Q.END': 919, 'T.START': 73307093, 'T.END': 73307099, 'CG': '6='}, {'Q.START': 919, 'Q.END': 978, 'T.START': 73307100, 'T.END': 73307159, 'CG': '59='}, {'Q.START': 978, 'Q.END': 979, 'T.START': 73307160, 'T.END': 73307161, 'CG': '1='}, {'Q.START': 979, 'Q.END': 1038, 'T.START': 73307162, 'T.END': 73307221, 'CG': '59='}, {'Q.START': 1038, 'Q.END': 1045, 'T.START': 73307224, 'T.END': 73307231, 'CG': '7='}, {'Q.START': 1045, 'Q.END': 1046, 'T.START': 73307232, 'T.END': 73307233, 'CG': '1='}, {'Q.START': 1046, 'Q.END': 1080, 'T.START': 73307234, 'T.END': 73307268, 'CG': '34='}, {'Q.START': 1080, 'Q.END': 1081, 'T.START': 73307269, 'T.END': 73307270, 'CG': '1='}, {'Q.START': 1081, 'Q.END': 1107, 'T.START': 73307271, 'T.END': 73307297, 'CG': '26='}, {'Q.START': 1108, 'Q.END': 1183, 'T.START': 73307300, 'T.END': 73307375, 'CG': '75='}, {'Q.START': 1183, 'Q.END': 1186, 'T.START': 73307376, 'T.END': 73307379, 'CG': '3='}, {'Q.START': 1224, 'Q.END': 1257, 'T.START': 73307419, 'T.END': 73307452, 'CG': '33='}, {'Q.START': 1289, 'Q.END': 1311, 'T.START': 73307475, 'T.END': 73307497, 'CG': '22='}, {'Q.START': 1359, 'Q.END': 1382, 'T.START': 73307546, 'T.END': 73307569, 'CG': '23='}, {'Q.START': 1434, 'Q.END': 1451, 'T.START': 73307643, 'T.END': 73307660, 'CG': '17='}, {'Q.START': 1451, 'Q.END': 1531, 'T.START': 73307661, 'T.END': 73307741, 'CG': '80='}, {'Q.START': 1532, 'Q.END': 1543, 'T.START': 73307744, 'T.END': 73307755, 'CG': '11='}, {'Q.START': 1544, 'Q.END': 1572, 'T.START': 73307758, 'T.END': 73307786, 'CG': '28='}, {'Q.START': 1572, 'Q.END': 1573, 'T.START': 73307787, 'T.END': 73307788, 'CG': '1='}, {'Q.START': 1573, 'Q.END': 1587, 'T.START': 73307789, 'T.END': 73307803, 'CG': '14='}, {'Q.START': 1588, 'Q.END': 1616, 'T.START': 73307806, 'T.END': 73307834, 'CG': '28='}, {'Q.START': 1616, 'Q.END': 1617, 'T.START': 73307835, 'T.END': 73307836, 'CG': '1='}, {'Q.START': 1617, 'Q.END': 1646, 'T.START': 73307837, 'T.END': 73307866, 'CG': '29='}, {'Q.START': 1646, 'Q.END': 1661, 'T.START': 73307867, 'T.END': 73307882, 'CG': '15='}, {'Q.START': 1661, 'Q.END': 1673, 'T.START': 73307883, 'T.END': 73307895, 'CG': '12='}, {'Q.START': 1673, 'Q.END': 1674, 'T.START': 73307896, 'T.END': 73307897, 'CG': '1='}, {'Q.START': 1674, 'Q.END': 1726, 'T.START': 73307898, 'T.END': 73307950, 'CG': '52='}, {'Q.START': 1727, 'Q.END': 1762, 'T.START': 73307953, 'T.END': 73307988, 'CG': '35='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1767, 'Q.END': 1824, 'T.START': 73307995, 'T.END': 73308052, 'CG': '57='}, {'Q.START': 1824, 'Q.END': 1825, 'T.START': 73308053, 'T.END': 73308054, 'CG': '1='}, {'Q.START': 1825, 'Q.END': 1975, 'T.START': 73308055, 'T.END': 73308205, 'CG': '150='}, {'Q.START': 1976, 'Q.END': 2015, 'T.START': 73308208, 'T.END': 73308247, 'CG': '39='}, {'Q.START': 2016, 'Q.END': 2047, 'T.START': 73308250, 'T.END': 73308281, 'CG': '31='}, {'Q.START': 2047, 'Q.END': 2055, 'T.START': 73308286, 'T.END': 73308294, 'CG': '8='}, {'Q.START': 2056, 'Q.END': 2120, 'T.START': 73308297, 'T.END': 73308361, 'CG': '64='}, {'Q.START': 2120, 'Q.END': 2121, 'T.START': 73308362, 'T.END': 73308363, 'CG': '1='}, {'Q.START': 2121, 'Q.END': 2157, 'T.START': 73308364, 'T.END': 73308400, 'CG': '36='}, {'Q.START': 2158, 'Q.END': 2170, 'T.START': 73308403, 'T.END': 73308415, 'CG': '12='}, {'Q.START': 2170, 'Q.END': 2171, 'T.START': 73308416, 'T.END': 73308417, 'CG': '1='}, {'Q.START': 2171, 'Q.END': 2205, 'T.START': 73308418, 'T.END': 73308452, 'CG': '34='}, {'Q.START': 2206, 'Q.END': 2344, 'T.START': 73308455, 'T.END': 73308593, 'CG': '138='}, {'Q.START': 2345, 'Q.END': 2364, 'T.START': 73308596, 'T.END': 73308615, 'CG': '19='}, {'Q.START': 2364, 'Q.END': 2383, 'T.START': 73308616, 'T.END': 73308635, 'CG': '19='}, {'Q.START': 2383, 'Q.END': 2408, 'T.START': 73308636, 'T.END': 73308661, 'CG': '25='}, {'Q.START': 2408, 'Q.END': 2409, 'T.START': 73308662, 'T.END': 73308663, 'CG': '1='}, {'Q.START': 2409, 'Q.END': 2441, 'T.START': 73308664, 'T.END': 73308696, 'CG': '32='}, {'Q.START': 2441, 'Q.END': 2442, 'T.START': 73308697, 'T.END': 73308698, 'CG': '1='}, {'Q.START': 2442, 'Q.END': 2580, 'T.START': 73308699, 'T.END': 73308837, 'CG': '138='}, {'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}, {'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}, {'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}, {'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}, {'Q.START': 2584, 'Q.END': 2764, 'T.START': 73308842, 'T.END': 73309022, 'CG': '180='}, {'Q.START': 2765, 'Q.END': 2797, 'T.START': 73309025, 'T.END': 73309057, 'CG': '32='}, {'Q.START': 2798, 'Q.END': 2878, 'T.START': 73309060, 'T.END': 73309140, 'CG': '80='}, {'Q.START': 2878, 'Q.END': 2879, 'T.START': 73309141, 'T.END': 73309142, 'CG': '1='}, {'Q.START': 2879, 'Q.END': 2951, 'T.START': 73309143, 'T.END': 73309215, 'CG': '72='}, {'Q.START': 2951, 'Q.END': 2952, 'T.START': 73309216, 'T.END': 73309217, 'CG': '1='}, {'Q.START': 2952, 'Q.END': 3002, 'T.START': 73309218, 'T.END': 73309268, 'CG': '50='}, {'Q.START': 3002, 'Q.END': 3077, 'T.START': 73309271, 'T.END': 73309346, 'CG': '75='}, {'Q.START': 3077, 'Q.END': 3078, 'T.START': 73309347, 'T.END': 73309348, 'CG': '1='}, {'Q.START': 3078, 'Q.END': 3093, 'T.START': 73309349, 'T.END': 73309364, 'CG': '15='}, {'Q.START': 3094, 'Q.END': 3097, 'T.START': 73309367, 'T.END': 73309370, 'CG': '3='}, {'Q.START': 3097, 'Q.END': 3140, 'T.START': 73309371, 'T.END': 73309414, 'CG': '43='}, {'Q.START': 3140, 'Q.END': 3210, 'T.START': 73309415, 'T.END': 73309485, 'CG': '70='}, {'Q.START': 3210, 'Q.END': 3211, 'T.START': 73309486, 'T.END': 73309487, 'CG': '1='}, {'Q.START': 3211, 'Q.END': 3229, 'T.START': 73309488, 'T.END': 73309506, 'CG': '18='}, {'Q.START': 3229, 'Q.END': 3230, 'T.START': 73309507, 'T.END': 73309508, 'CG': '1='}, {'Q.START': 3230, 'Q.END': 3276, 'T.START': 73309509, 'T.END': 73309555, 'CG': '46='}, {'Q.START': 3277, 'Q.END': 3315, 'T.START': 73309558, 'T.END': 73309596, 'CG': '38='}, {'Q.START': 3316, 'Q.END': 3322, 'T.START': 73309599, 'T.END': 73309605, 'CG': '6='}, {'Q.START': 3323, 'Q.END': 3348, 'T.START': 73309608, 'T.END': 73309633, 'CG': '25='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3353, 'Q.END': 3354, 'T.START': 73309638, 'T.END': 73309639, 'CG': '1='}, {'Q.START': 3354, 'Q.END': 3356, 'T.START': 73309640, 'T.END': 73309642, 'CG': '2='}, {'Q.START': 3357, 'Q.END': 3489, 'T.START': 73309645, 'T.END': 73309777, 'CG': '132='}, {'Q.START': 3490, 'Q.END': 3642, 'T.START': 73309780, 'T.END': 73309932, 'CG': '152='}, {'Q.START': 3644, 'Q.END': 3685, 'T.START': 73309933, 'T.END': 73309974, 'CG': '41='}, {'Q.START': 3687, 'Q.END': 3693, 'T.START': 73309977, 'T.END': 73309983, 'CG': '6='}, {'Q.START': 3694, 'Q.END': 3708, 'T.START': 73309986, 'T.END': 73310000, 'CG': '14='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}, {'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}, {'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(ALNS[(\"D134#1#chr03\", \"ALN_1\")])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/gaf2aln.py b/gaf2aln.py
index 7657046..25fdebf 100644
--- a/gaf2aln.py
+++ b/gaf2aln.py
@@ -412,40 +412,30 @@ for aln_name in aln_dict.keys():
                     tmp_aln["CG"] += f"{nodes_length[node_id]}I"
                 elif len(_) and _[-1]["Q.END"] == q_start: # Following on the query, not on the target (i.e. Deletion)
                     tmp_aln["Q.END"] = q_end
-                    tmps_aln["CG"] += f"{nodes_length[node_id]}D"
+                    tmp_aln["CG"] += f"{nodes_length[node_id]}D"
                 else : # Else, completely different
+                    try : 
+                        _.append(tmp_aln)
+                    except : pass
                     tmp_aln = {
                         "Q.START": q_start,
                         "Q.END": q_end,
                         "T.START": t_start,
                         "T.END": t_end,
                         "CG": _CG,
-                        }
-                print("\t", tmp_aln)
-                
+                    }
+
             else : 
                 print("\tNot in path")
                 # Node is not in the path
 
-        _.append(tmp_aln)
-    ALNS[(path_name, aln_name)] = _
-
-## Debug
-for elem in ALNS[("TO1000#1#chr03", "ALN_1")]:
-    print(elem) 
-
             
+        ALNS[(path_name, aln_name)] = _
 
-        
-            
-            
-
-
-
-
-
-
-
-    
-        
-
+## Debug
+for elem in ALNS.keys():
+    print(elem)
+ 
+for key, elem in ALNS.items():
+    print(key)
+    print(elem)
\ No newline at end of file
-- 
GitLab


From ba8d0c2bf543e8e82785974a8c4da89487304e75 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 3 Jun 2024 13:36:27 +0200
Subject: [PATCH 16/30] Update Anchors2Path.py

---
 Anchors2Path.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index 3111c6f..e9582f8 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -121,12 +121,14 @@ node_path_count = {}
 ## {<NODE_ID>: <Number of path traversing this node>}
 # Computing number of path traversing each nodes
 for path_id, node_list in path_nodes.items():
-    for node_id in node_list:
+    for node_id, counts in np.unique(node_list, return_counts=True):
         
-        try :
-            node_path_count[node_id] += 1
-        except :
-            node_path_count[node_id] = 1
+        # Filtering anchors based on appearance (we keep unique anchors)
+        if counts == 1 :
+            try :
+                node_path_count[node_id] += 1
+            except :
+                node_path_count[node_id] = 1
 
 # Searching anchors
 n_path = len(list(path_nodes.keys()))
@@ -137,17 +139,23 @@ for node_id, count in node_path_count.items():
 
 # Computing path position for each node of the path of interest
 current_pos = 0
+ordered_anchors = []
 for node_id in path_nodes[args.pathname]:
     _end = current_pos + nodes_length[node_id]
 
     # Trying to add anchors path position if it is an anchor
     try :
         Anchors[int(node_id)].append( (current_pos, _end) )
+        
+        # Keeping track of order of appearance of anchors
+        ordered_anchors.append(int(node_id))
     except:
         pass
 
     current_pos = _end
 
+# Filtering anchors based on 
+
 # Transforming data into a table
 ID, START, END = [], [], []
 for node_id, positions in Anchors.items():
-- 
GitLab


From 022035307bd73924f77b2e7b0995bafe03210e30 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 3 Jun 2024 13:42:34 +0200
Subject: [PATCH 17/30] Update Anchors2Path.py

---
 Anchors2Path.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index e9582f8..8a8261d 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -121,10 +121,11 @@ node_path_count = {}
 ## {<NODE_ID>: <Number of path traversing this node>}
 # Computing number of path traversing each nodes
 for path_id, node_list in path_nodes.items():
-    for node_id, counts in np.unique(node_list, return_counts=True):
-        
+    nodes_counts = np.unique(node_list, return_counts=True)
+    for i in range(len(nodes_counts)):
+        node_id, count = nodes_counts[0][i], nodes_counts[1][i]
         # Filtering anchors based on appearance (we keep unique anchors)
-        if counts == 1 :
+        if count == 1 :
             try :
                 node_path_count[node_id] += 1
             except :
-- 
GitLab


From 8a62837dad2792b87c61deb44cc1927f0dca03a9 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 3 Jun 2024 14:48:26 +0200
Subject: [PATCH 18/30] Update Anchors2Path.py

---
 Anchors2Path.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index 8a8261d..a3df05c 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -124,6 +124,7 @@ for path_id, node_list in path_nodes.items():
     nodes_counts = np.unique(node_list, return_counts=True)
     for i in range(len(nodes_counts)):
         node_id, count = nodes_counts[0][i], nodes_counts[1][i]
+        print(node_id, count)
         # Filtering anchors based on appearance (we keep unique anchors)
         if count == 1 :
             try :
-- 
GitLab


From 1a0dd8fb93808ec443394ae2eaf4eaf50955e5a4 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 3 Jun 2024 15:02:06 +0200
Subject: [PATCH 19/30] Update Anchors2Path.py

---
 Anchors2Path.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index a3df05c..98565e4 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -121,11 +121,11 @@ node_path_count = {}
 ## {<NODE_ID>: <Number of path traversing this node>}
 # Computing number of path traversing each nodes
 for path_id, node_list in path_nodes.items():
+    print(path_id)
     nodes_counts = np.unique(node_list, return_counts=True)
     for i in range(len(nodes_counts)):
         node_id, count = nodes_counts[0][i], nodes_counts[1][i]
-        print(node_id, count)
-        # Filtering anchors based on appearance (we keep unique anchors)
+        print("\t", node_id, count)
         if count == 1 :
             try :
                 node_path_count[node_id] += 1
-- 
GitLab


From 291de812648d640e807678e771f2198153cc64f0 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 3 Jun 2024 15:05:03 +0200
Subject: [PATCH 20/30] Update Anchors2Path.py

---
 Anchors2Path.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index 98565e4..40c119f 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -17,7 +17,7 @@ from functools import reduce
 import concurrent.futures
 import gzip
 
-version = "0.1"
+version = "0.1.1"
 
 ## Argument parser
 arg_parser = argparse.ArgumentParser(description='Anchors2Path')
@@ -72,6 +72,8 @@ if args.version:
     print(version)
     os._exit(0)
 
+print("Version:\t", version)
+
 # Timing the script
 start_time = time.time()
 
@@ -121,9 +123,10 @@ node_path_count = {}
 ## {<NODE_ID>: <Number of path traversing this node>}
 # Computing number of path traversing each nodes
 for path_id, node_list in path_nodes.items():
-    print(path_id)
+
     nodes_counts = np.unique(node_list, return_counts=True)
-    for i in range(len(nodes_counts)):
+    print(path_id)
+    for i in range(len(nodes_counts[0])):
         node_id, count = nodes_counts[0][i], nodes_counts[1][i]
         print("\t", node_id, count)
         if count == 1 :
-- 
GitLab


From 02a3e63c343d0dc0123c49ba5fe5b46dcbbc0aa8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 3 Jun 2024 15:07:48 +0200
Subject: [PATCH 21/30] Update Anchors2Path.py

---
 Anchors2Path.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Anchors2Path.py b/Anchors2Path.py
index 40c119f..eb325ec 100644
--- a/Anchors2Path.py
+++ b/Anchors2Path.py
@@ -17,7 +17,7 @@ from functools import reduce
 import concurrent.futures
 import gzip
 
-version = "0.1.1"
+version = "0.1.2"
 
 ## Argument parser
 arg_parser = argparse.ArgumentParser(description='Anchors2Path')
@@ -125,10 +125,10 @@ node_path_count = {}
 for path_id, node_list in path_nodes.items():
 
     nodes_counts = np.unique(node_list, return_counts=True)
-    print(path_id)
+    print(f"[Anchors2Path] Counting nodes in {path_id}")
     for i in range(len(nodes_counts[0])):
         node_id, count = nodes_counts[0][i], nodes_counts[1][i]
-        print("\t", node_id, count)
+        
         if count == 1 :
             try :
                 node_path_count[node_id] += 1
-- 
GitLab


From c4ff0c313ab30d67b4d66086ac6ccad16f99b2ed Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 27 Jun 2024 17:11:32 +0200
Subject: [PATCH 22/30] Adding UniP

---
 GFAvc.py       | 6 ++----
 PanGeTools.def | 4 ++++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/GFAvc.py b/GFAvc.py
index 7ac2173..5cf601f 100644
--- a/GFAvc.py
+++ b/GFAvc.py
@@ -44,6 +44,7 @@ if args.version:
 
 with open(args.gfa, 'r') as file:
     gfa = file.readlines()
+    # for line in file :
 
 ## Changing version number in header
 assert gfa[0].split('\t')[1] == "VN:Z:1.1"
@@ -59,10 +60,7 @@ for lineID in sorted(range(len(gfa)), reverse = True):
 
     if gfa[lineID][0] == "W" :
 
-        curLine = gfa.pop(lineID).split('\t')
-
-        # Removing '\n' at the end of the line
-        curLine[-1] = curLine[-1][:-1]
+        curLine = gfa.pop(lineID).strip().split('\t')
         
         # Transforming '>..>..<..>..' to ['>..', '>..', '<..', '>..']
         curWalk = re.findall(r'>\w+|<\w+', curLine[-1])
diff --git a/PanGeTools.def b/PanGeTools.def
index 41e76ac..0e0a29c 100644
--- a/PanGeTools.def
+++ b/PanGeTools.def
@@ -209,6 +209,10 @@ Stage: build
     mv bin/GraphAligner /apps/bin/
     cd /apps && rm -rf GraphAligner
 
+    # Installing UniP
+    cd /apps
+    git clone https://forgemia.inra.fr/alexis.mergez/unip.git UniP
+
     # Cleaning step
     ## Removing tarballs
     rm /apps/*.tar*
-- 
GitLab


From 734e3396599ba5ab710bfc74ee5232899df1aac8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Tue, 9 Jul 2024 20:07:20 +0200
Subject: [PATCH 23/30] Linked Unip in container

---
 PanGeTools.def | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PanGeTools.def b/PanGeTools.def
index 0e0a29c..1f850ac 100644
--- a/PanGeTools.def
+++ b/PanGeTools.def
@@ -212,11 +212,15 @@ Stage: build
     # Installing UniP
     cd /apps
     git clone https://forgemia.inra.fr/alexis.mergez/unip.git UniP
+    ln -s /apps/UniP/UniP.py /apps/bin/UniP.py
 
     # Cleaning step
     ## Removing tarballs
     rm /apps/*.tar*
 
+%apprun unip
+    exec UniP.py "$@"
+
 %apprun gfaffix
     exec gfaffix "$@"
 
-- 
GitLab


From 079dbc0ccf7aa9ec634d3dd288e055d2ca064c78 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Tue, 9 Jul 2024 20:07:49 +0200
Subject: [PATCH 24/30] Created Pancat unofficial container

---
 .gitlab-ci.yml | 30 +++++++++++++++++++++++++++++-
 Pancat.def     | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 Pancat.def

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9068aa4..834c834 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -39,9 +39,19 @@ test_for_changes:
       else 
         RUN_panache="0";
       fi
+    
+    # Check block for Pancat.def
+    - curMd5=$(git show ${curTag}:Pancat.def | md5sum | cut -d' ' -f1)
+    - prevMd5=$(git show ${prevTag}:Pancat.def | md5sum | cut -d' ' -f1)
+    - if [ $curMd5 != $prevMd5 ]; then 
+        RUN_Pancat="1"; 
+      else 
+        RUN_Pancat="0";
+      fi
 
     - echo "RUN_PanGeTools=$RUN_PanGeTools" >> build.env
     - echo "RUN_panache=$RUN_panache" >> build.env
+    - echo "RUN_Pancat=$RUN_Pancat" >> build.env
 
   artifacts:
     reports:
@@ -87,4 +97,22 @@ build:panache:
         apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" panache.sif oras://"$CI_REGISTRY_IMAGE"/panache:latest;
       fi
 
-
+build:Pancat:
+  stage: build
+  tags:
+    - stable  # Using Stable runners as test runners don't work properly
+  needs: 
+    - job: "test_for_changes"
+      artifacts: true
+  image:
+    name: kaczmarj/apptainer:latest
+    entrypoint: [""]
+  rules:
+    - if: $CI_COMMIT_TAG                 # Run this job when a tag is created
+  script:
+    - if [ $RUN_Pancat == "1" ]; then
+        sed -i "s/VERSION_NUMBER/${CI_COMMIT_TAG#v}/" Pancat.def;
+        apptainer build Pancat.sif Pancat.def;
+        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Pancat.sif oras://"$CI_REGISTRY_IMAGE"/pancat:"$CI_COMMIT_TAG";
+        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Pancat.sif oras://"$CI_REGISTRY_IMAGE"/pancat:latest;
+      fi
diff --git a/Pancat.def b/Pancat.def
new file mode 100644
index 0000000..67aac3d
--- /dev/null
+++ b/Pancat.def
@@ -0,0 +1,36 @@
+Bootstrap: docker
+From: ghcr.io/mamba-org/micromamba:latest
+Stage: build
+
+%environment
+    export MAMBA_DOCKERFILE_ACTIVATE=1
+    export PATH="$PATH:/apps/Pancat"
+
+%post
+
+    export MAMBA_DOCKERFILE_ACTIVATE=1
+    apt-get update && apt-get upgrade -y
+    apt-get install -y git
+
+    # Creating base environment with micromamba
+    mkdir /apps
+    micromamba create -y -p /apps/base -c conda-forge -c bioconda \
+        python=3.10.*
+
+    git clone https://github.com/Tharos-ux/pancat.git /apps/Pancat
+    cd /apps/Pancat
+    micromamba run -p /apps/base pip install -r requirements.txt --upgrade
+    micromamba run -p /apps/base python -m pip install . --quiet
+
+%runscript
+    exec micromamba run -p /apps/base "$@"
+
+%labels
+    Author alexis.mergez@inrae.fr
+    Image.version VERSION_NUMBER
+    pancat.home https://github.com/Tharos-ux/pancat.git
+    about.home https://forgemia.inra.fr/alexis.mergez/pan1capps
+
+%help
+    Unofficial Apptainer container for Pancat (https://github.com/Tharos-ux/pancat.git).
+    Image version : VERSION_NUMBER
-- 
GitLab


From db6fa3107a42e3fb6204ca7aacb4541878876a22 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Tue, 9 Jul 2024 20:10:51 +0200
Subject: [PATCH 25/30] Tricking CICD

---
 Pancat.def | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Pancat.def b/Pancat.def
index 67aac3d..f87205f 100644
--- a/Pancat.def
+++ b/Pancat.def
@@ -7,7 +7,6 @@ Stage: build
     export PATH="$PATH:/apps/Pancat"
 
 %post
-
     export MAMBA_DOCKERFILE_ACTIVATE=1
     apt-get update && apt-get upgrade -y
     apt-get install -y git
@@ -33,4 +32,4 @@ Stage: build
 
 %help
     Unofficial Apptainer container for Pancat (https://github.com/Tharos-ux/pancat.git).
-    Image version : VERSION_NUMBER
+    Image version: VERSION_NUMBER
-- 
GitLab


From b20d7852034afeeb4cc03f76ba4f45361623fa05 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 26 Jul 2024 12:00:11 +0200
Subject: [PATCH 26/30] Update Pancat.def

---
 Pancat.def | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Pancat.def b/Pancat.def
index f87205f..56ba7e0 100644
--- a/Pancat.def
+++ b/Pancat.def
@@ -33,3 +33,4 @@ Stage: build
 %help
     Unofficial Apptainer container for Pancat (https://github.com/Tharos-ux/pancat.git).
     Image version: VERSION_NUMBER
+    
-- 
GitLab


From d8cfcfcefe79d85b48b6038eb56ca89b2295ae7a Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Thu, 1 Aug 2024 17:59:44 +0200
Subject: [PATCH 27/30] Update PanGeTools.def

- Added Seqwish v0.7.9
- Updated VG to v1.58.0
---
 PanGeTools.def | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/PanGeTools.def b/PanGeTools.def
index 1f850ac..79465d8 100644
--- a/PanGeTools.def
+++ b/PanGeTools.def
@@ -96,7 +96,7 @@ Stage: build
 
     # Installing vg
     cd /apps/bin
-    wget --no-check-certificate -c https://github.com/vgteam/vg/releases/download/v1.56.0/vg
+    wget --no-check-certificate -c https://github.com/vgteam/vg/releases/download/v1.58.0/vg
     chmod +x vg
 
     # Installing panacus
@@ -126,6 +126,14 @@ Stage: build
     cmake -H. -Bbuild && cmake --build build -- -j $(nproc)
     mv /apps/wfmash-v0.10.5/build/bin/wfmash /apps/bin/wfmash
 
+    # Installing seqwish
+    cd /apps
+    wget --no-check-certificate https://github.com/ekg/seqwish/releases/download/v0.7.9/seqwish-v0.7.9.tar.gz
+    tar -zxvf seqwish-v0.7.9.tar.gz
+    cd seqwish-v0.7.9
+    cmake -H. -Bbuild && cmake --build build -- -j $(nproc)
+    mv /apps/seqwish-v0.7.9/bin/seqwish /apps/bin/seqwish
+
     # Installing bgzip
     cd /apps
     wget --no-check-certificate https://github.com/samtools/htslib/releases/download/1.19.1/htslib-1.19.1.tar.bz2
@@ -257,6 +265,9 @@ Stage: build
 %apprun wfmash
     exec wfmash "$@"
 
+%apprun seqwish
+    exec seqwish "$@"
+
 %apprun minimap2
     exec minimap2 "$@"
 
@@ -320,13 +331,14 @@ Stage: build
     GFAffix.Version 0.1.5
     smoothxg.Version 0.7.2
     Odgi.Version 0.8.6
-    vg.Version 1.56.0
+    vg.Version 1.58.0
     panacus.Version 0.2.3
     gfatools.Version 0.5
     GFAvc.Version 0.3
     GFAstats.Version 0.3.2
     Samtools.Version 1.19
     wfmash.Version 0.10.5
+    seqwish.Version 0.7.9
     htslib.Version 1.19.1
     minimap2.Version 2.26
     vcfbub.Version 0.1.0
@@ -344,7 +356,7 @@ Stage: build
         - GFAffix       v0.1.5
         - smoothxg      v0.7.2
         - odgi          v0.8.6
-        - vg            v1.56.0
+        - vg            v1.58.0
         - panacus       v0.2.3
         - gfatools      v0.5
         - GFAvc         v0.3
@@ -352,6 +364,7 @@ Stage: build
         - samtools      v1.19
         - bgzip         v1.19.1
         - wfmash        v0.10.5
+        - seqwish       v0.7.9
         - minimap2      v2.26
         - vcfbub        v0.1.0
         - vcflib        v1.0.9
-- 
GitLab


From faf2580e937979f6e2baaaf1c4ba790bcb64196b Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 2 Aug 2024 16:55:16 +0200
Subject: [PATCH 28/30] Bumped Wfmash version

---
 .gitignore     |   3 +-
 .gitlab-ci.yml | 119 ++++++++++++++-----------------------------------
 PanGeTools.def |  12 ++---
 3 files changed, 42 insertions(+), 92 deletions(-)

diff --git a/.gitignore b/.gitignore
index 1fcf15d..75c3153 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-*.sif
\ No newline at end of file
+*.sif
+PGT-beta.def
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 834c834..40251c1 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,118 +1,67 @@
-release_job:
-  stage: .pre
-  image: registry.gitlab.com/gitlab-org/release-cli:latest
-  rules:
-    - if: $CI_COMMIT_TAG                 # Run this job when a tag is created
-  script:
-    - echo "running release_job"
-  release:                               # See https://docs.gitlab.com/ee/ci/yaml/#release for available properties
-    tag_name: '$CI_COMMIT_TAG'
-    description: '$CI_COMMIT_TAG'
-
-test_for_changes:
-  stage: .pre
+Build_PanGeTools:
+  stage: build
   tags:
     - stable  # Using Stable runners as test runners don't work properly
-  image: ubuntu:latest
+  image:
+    name: kaczmarj/apptainer:latest
+    entrypoint: [""]
   rules:
-    - if: $CI_COMMIT_TAG
+    - if: $CI_COMMIT_TAG                 # Run this job when a tag is created
+      when: manual
   script:
-    - apt update -y && apt install -y git
-    # Getting current and previous tags
-    - curTag=$(git describe --abbrev=0 --tags)
-    - prevTag=$(git describe --abbrev=0 --tags HEAD^)
-
-    # Check block for PanGeTools.def
-    - curMd5=$(git show ${curTag}:PanGeTools.def | md5sum | cut -d' ' -f1)
-    - prevMd5=$(git show ${prevTag}:PanGeTools.def | md5sum | cut -d' ' -f1)
-    - if [ $curMd5 != $prevMd5 ]; then 
-        RUN_PanGeTools="1"; 
-      else 
-        RUN_PanGeTools="0";
-      fi
-    
-    # Check block for panache.def
-    - curMd5=$(git show ${curTag}:Panache.def | md5sum | cut -d' ' -f1)
-    - prevMd5=$(git show ${prevTag}:Panache.def | md5sum | cut -d' ' -f1)
-    - if [ $curMd5 != $prevMd5 ]; then 
-        RUN_panache="1"; 
-      else 
-        RUN_panache="0";
-      fi
-    
-    # Check block for Pancat.def
-    - curMd5=$(git show ${curTag}:Pancat.def | md5sum | cut -d' ' -f1)
-    - prevMd5=$(git show ${prevTag}:Pancat.def | md5sum | cut -d' ' -f1)
-    - if [ $curMd5 != $prevMd5 ]; then 
-        RUN_Pancat="1"; 
-      else 
-        RUN_Pancat="0";
-      fi
-
-    - echo "RUN_PanGeTools=$RUN_PanGeTools" >> build.env
-    - echo "RUN_panache=$RUN_panache" >> build.env
-    - echo "RUN_Pancat=$RUN_Pancat" >> build.env
-
+    - sed -i "s/VERSION_NUMBER/${CI_COMMIT_TAG#v}/" PanGeTools.def
+    - apptainer build PanGeTools.sif PanGeTools.def
+    - apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" PanGeTools.sif oras://"$CI_REGISTRY_IMAGE"/pangetools:"$CI_COMMIT_TAG"
   artifacts:
-    reports:
-      dotenv: build.env
+    paths:
+      - PanGeTools.sif
+    expire_in: 1 week
 
-build:PanGeTools:
-  stage: build
+Latest_PanGeTools:
+  stage: deploy
   tags:
     - stable  # Using Stable runners as test runners don't work properly
   needs: 
-    - job: "test_for_changes"
-      artifacts: true
+    - job: "Build_PanGeTools"
   image:
     name: kaczmarj/apptainer:latest
     entrypoint: [""]
   rules:
-    - if: $CI_COMMIT_TAG                 # Run this job when a tag is created
+    - if: $CI_COMMIT_TAG                # Run this job when a tag is created
+      when: manual
   script:
-    - if [ $RUN_PanGeTools == "1" ]; then
-        sed -i "s/VERSION_NUMBER/${CI_COMMIT_TAG#v}/" PanGeTools.def;
-        apptainer build PanGeTools.sif PanGeTools.def;
-        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" PanGeTools.sif oras://"$CI_REGISTRY_IMAGE"/pangetools:"$CI_COMMIT_TAG";
-        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" PanGeTools.sif oras://"$CI_REGISTRY_IMAGE"/pangetools:latest;
-      fi
+    - apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" PanGeTools.sif oras://"$CI_REGISTRY_IMAGE"/pangetools:latest
 
-build:panache:
+Build_Pancat:
   stage: build
   tags:
     - stable  # Using Stable runners as test runners don't work properly
-  needs: 
-    - job: "test_for_changes"
-      artifacts: true
   image:
     name: kaczmarj/apptainer:latest
     entrypoint: [""]
   rules:
     - if: $CI_COMMIT_TAG                 # Run this job when a tag is created
+      when: manual
   script:
-    - if [ $RUN_panache == "1" ]; then
-        sed -i "s/VERSION_NUMBER/${CI_COMMIT_TAG#v}/" Panache.def;
-        apptainer build Panache.sif Panache.def;
-        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" panache.sif oras://"$CI_REGISTRY_IMAGE"/panache:"$CI_COMMIT_TAG";
-        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" panache.sif oras://"$CI_REGISTRY_IMAGE"/panache:latest;
-      fi
+    - sed -i "s/VERSION_NUMBER/${CI_COMMIT_TAG#v}/" Pancat.def
+    - apptainer build Pancat.sif Pancat.def
+    - apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Pancat.sif oras://"$CI_REGISTRY_IMAGE"/pancat:"$CI_COMMIT_TAG"
+  artifacts:
+    paths:
+      - Pancat.sif
+    expire_in: 1 week
 
-build:Pancat:
-  stage: build
+Latest_Pancat:
+  stage: deploy
   tags:
     - stable  # Using Stable runners as test runners don't work properly
   needs: 
-    - job: "test_for_changes"
-      artifacts: true
+    - job: "Build_Pancat"
   image:
     name: kaczmarj/apptainer:latest
     entrypoint: [""]
   rules:
-    - if: $CI_COMMIT_TAG                 # Run this job when a tag is created
+    - if: $CI_COMMIT_TAG                # Run this job when a tag is created
+      when: manual
   script:
-    - if [ $RUN_Pancat == "1" ]; then
-        sed -i "s/VERSION_NUMBER/${CI_COMMIT_TAG#v}/" Pancat.def;
-        apptainer build Pancat.sif Pancat.def;
-        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Pancat.sif oras://"$CI_REGISTRY_IMAGE"/pancat:"$CI_COMMIT_TAG";
-        apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Pancat.sif oras://"$CI_REGISTRY_IMAGE"/pancat:latest;
-      fi
+    - apptainer push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Pancat.sif oras://"$CI_REGISTRY_IMAGE"/pancat:latest
\ No newline at end of file
diff --git a/PanGeTools.def b/PanGeTools.def
index 79465d8..ecc05f1 100644
--- a/PanGeTools.def
+++ b/PanGeTools.def
@@ -120,11 +120,11 @@ Stage: build
 
     # Installing wfmash
     cd /apps
-    wget --no-check-certificate https://github.com/waveygang/wfmash/releases/download/v0.10.5/wfmash-v0.10.5.tar.gz
-    tar -zxvf wfmash-v0.10.5.tar.gz
-    cd wfmash-v0.10.5
+    wget --no-check-certificate https://github.com/waveygang/wfmash/releases/download/v0.17.0/wfmash-v0.17.0.tar.gz
+    tar -zxvf wfmash-v0.17.0.tar.gz
+    cd wfmash-v0.17.0
     cmake -H. -Bbuild && cmake --build build -- -j $(nproc)
-    mv /apps/wfmash-v0.10.5/build/bin/wfmash /apps/bin/wfmash
+    mv /apps/wfmash-v0.17.0/build/bin/wfmash /apps/bin/wfmash
 
     # Installing seqwish
     cd /apps
@@ -337,7 +337,7 @@ Stage: build
     GFAvc.Version 0.3
     GFAstats.Version 0.3.2
     Samtools.Version 1.19
-    wfmash.Version 0.10.5
+    wfmash.Version 0.17.0
     seqwish.Version 0.7.9
     htslib.Version 1.19.1
     minimap2.Version 2.26
@@ -363,7 +363,7 @@ Stage: build
         - GFAstats      v0.3.2
         - samtools      v1.19
         - bgzip         v1.19.1
-        - wfmash        v0.10.5
+        - wfmash        v0.17.0
         - seqwish       v0.7.9
         - minimap2      v2.26
         - vcfbub        v0.1.0
-- 
GitLab


From c77499c2e10b85b05cfebf6fc7b0556bbb7b70f1 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 5 Aug 2024 17:54:08 +0200
Subject: [PATCH 29/30] GFAvc v0.4

Changelog :
- Added support for gzipped GFA
- Stripping escape character when parsing
- Added GFA1.0 to GFA1.1 conversion direction (intended for PGGB GFA)
- Optional index can be passed to add START-END range to walks when converting from GFA1.0 to GFA1.0

Note on GFA1.0 to GF1.1 conversion :
As PGGB has no reference, every sample is set as reference (i.e. added to the space separated list from the GFA header under RS:Z:...). This simplify the use of VG tools such as vg surject which only surject onto reference walks. This also prevent paths/walks from having different names between vg and non-vg file format (no phase_block)
---
 GFAvc.py | 187 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 150 insertions(+), 37 deletions(-)

diff --git a/GFAvc.py b/GFAvc.py
index 5cf601f..f130f10 100644
--- a/GFAvc.py
+++ b/GFAvc.py
@@ -2,33 +2,45 @@
 # -*- coding: utf-8 -*-
 """
 GFAvc: GFA version converter.
-Convert GFA from v1.1 to v1.0 (Convert walk to paths)
+Convert GFA from v1.1 to v1.0 (Convert walk to paths) and vice versa.
 
 @author: alexis.mergez@inrae.fr
-@version: 0.3
+@version: 0.4
 """
 import re
 import argparse
 import os
+import gzip
 
-version = "0.3"
+version = "0.4"
 
 ## Argument parser
 arg_parser = argparse.ArgumentParser(description='GFAvc: GFA version converter')
 arg_parser.add_argument(
-    "--gfa",
+    "--gfa1",
     "-g",
-    dest = "gfa",
-    required = True,
-    help = "GFA 1.1 file."
-    )  
+    dest = "GFA1",
+    help = "GFA 1.1 file. (Gzip or not)"
+    )
+arg_parser.add_argument(
+    "--gfa",
+    "-G",
+    dest = "GFA",
+    help = "GFA 1.0 file. (Gzip or not)"
+    )
 arg_parser.add_argument(
     "--outName",
     "-o",
     dest = "outName",
     required = True,
     help = "Output file name."
-    )     
+    )
+arg_parser.add_argument(
+    "--index",
+    "-i",
+    dest = "index",
+    help = "TSV containing start stop for each path in order to convert to walk (optional)"
+    )
 arg_parser.add_argument(
     '--version',
     '-v',
@@ -38,45 +50,146 @@ arg_parser.add_argument(
 )
 args = arg_parser.parse_args()
 
+#% Returning version 
 if args.version:
     print(version)
-    os._exit(0)
+    os._exit(0)     
+
+#% Parsing index
+if args.index is not None:
+    with open(args.index, 'r') as handle:
+        file = [line.rstrip() for line in handle.readlines()]
+
+    index = {}
+    for line in file:
+        split = line.split("\t")
+        index[split[0]] = [split[1], split[2]]
+
+else : index = None
+
+#% Conversion functions
+def gfa11_to_gfa10(gfa1_file = args.GFA1):
+    #% Reading GFA
+    # If not gzipped :
+    if gfa1_file[-2:] != "gz" :
+        with open(gfa1_file, 'r') as file:
+            gfa = [line.rstrip() for line in file.readlines()]
+
+    # If gzipped :
+    else :
+        with gzip.open(gfa1_file, 'r') as file:
+            gfa = [line.decode().rstrip() for line in file.readlines()]
+
+    #% Changing version number in header
+    assert gfa[0].split('\t')[1] == "VN:Z:1.1"
+    _ = gfa[0].split('\t')
+    _[1] = "VN:Z:1.0"
+    gfa[0] = "\t".join(_)
+
+    #% Iterating in reverse to put paths at the end.
+    for lineID in sorted(range(len(gfa)), reverse = True):
+        if gfa[lineID][0] == "S" :
+            _ = '\t'.join(gfa[lineID].split('\t')[:3])
+            gfa[lineID] = f"{_}"
+
+        if gfa[lineID][0] == "W" :
+
+            curLine = gfa.pop(lineID).split('\t')
+            
+            # Transforming '>..>..<..>..' to ['>..', '>..', '<..', '>..']
+            curWalk = re.findall(r'>\w+|<\w+', curLine[-1])
+
+            # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'
+            path = [f'{elem[1:]}{(elem[0] == ">")*"+"+(elem[0] == "<")*"-"}' for elem in curWalk]
+
+            newLine = ['P', f"{'#'.join(curLine[1:5])}-{curLine[5]}", ','.join(path), '*']
+
+            gfa.append('\t'.join(newLine))
+
+        #% Moving path lines to the end
+        if gfa[lineID][0] == "P" :
+
+            curLine = gfa.pop(lineID)
+            gfa.append(curLine)
+
+    return gfa
+
+def gfa10_to_gfa11(gfa_file = args.GFA, index = index):
+
+    #% Reading GFA
+    # If not gzipped :
+    if gfa_file[-2:] != "gz" :
+        with open(gfa_file, 'r') as file:
+            gfa = [line.rstrip() for line in file.readlines()]
+
+    # If gzipped :
+    else :
+        with gzip.open(gfa_file, 'r') as file:
+            gfa = [line.decode().rstrip() for line in file.readlines()]
+
+    sign = {"+": ">", "-": "<"}
+    samples = []
+
+    #% Iterating in reverse to put walks at the end.
+    for lineID in sorted(range(len(gfa)), reverse = True):
+
+        if gfa[lineID][0] == "P" :
+
+            curLine = gfa.pop(lineID).split('\t')
 
-with open(args.gfa, 'r') as file:
-    gfa = file.readlines()
-    # for line in file :
+            # Converting '..+,..+,..-,..+' to ['>..', '>..', '<..', '>..']
+            walk = [
+                f"{sign[elem[-1]]}{elem[:-1]}" 
+                for elem in curLine[-2].split(',')
+            ]
 
-## Changing version number in header
-assert gfa[0].split('\t')[1] == "VN:Z:1.1"
-_ = gfa[0].split('\t')
-_[1] = "VN:Z:1.0"
-gfa[0] = "\t".join(_)
+            splittedID = curLine[1].split(":")
+            if len(splittedID) == 2:
+                # Range info is available
+                ID = splittedID[0].split("#")
+                RANGE = splittedID[1].split("-")
+            elif len(splittedID) == 1 and index is not None:
+                ID = splittedID[0].split("#")
+                RANGE = index[splittedID[0]]
+            else :
+                ID = splittedID[0].split("#")
+                RANGE = 2*["*"]
+                    
+            newLine = ['W'] + ID + RANGE + [f"{''.join(walk)}"]
+            samples.append(ID[0])
 
-## Iterating in reverse to put paths at the end.
-for lineID in sorted(range(len(gfa)), reverse = True):
-    if gfa[lineID][0] == "S" :
-        _ = '\t'.join(gfa[lineID].split('\t')[:3])
-        gfa[lineID] = f"{_}\n"
+            gfa.append('\t'.join(newLine))
 
-    if gfa[lineID][0] == "W" :
+        #% Moving walk lines to the end
+        if gfa[lineID][0] == "W" :
 
-        curLine = gfa.pop(lineID).strip().split('\t')
-        
-        # Transforming '>..>..<..>..' to ['>..', '>..', '<..', '>..']
-        curWalk = re.findall(r'>\w+|<\w+', curLine[-1])
+            curLine = gfa.pop(lineID)
+            gfa.append(curLine)
 
-        # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'
-        path = [f'{elem[1:]}{(elem[0] == ">")*"+"+(elem[0] == "<")*"-"}' for elem in curWalk]
+    samples = list(set(samples))
+    #% Changing version number in header
+    assert gfa[0].split('\t')[1] == "VN:Z:1.0"
+    _ = gfa[0].split('\t')
+    _[1] = "VN:Z:1.1"
+    _.append(f"RS:Z:{' '.join(samples)}")
+    gfa[0] = "\t".join(_)
 
-        newLine = ['P', f"{'#'.join(curLine[1:5])}-{curLine[5]}", ','.join(path), '*\n']
+    return gfa
 
-        gfa.append('\t'.join(newLine))
+#% Selecting the conversion direction
+if args.GFA is None and args.GFA1 is not None:
+    print("[GFAvc] Converting from GFA 1.1 to GFA 1.0 ...")
+    gfa = gfa11_to_gfa10()
 
-## Moving path lines to the end
-    if gfa[lineID][0] == "P" :
+elif args.GFA is not None and args.GFA1 is None and args.refname is not None:
+    print("[GFAvc] Converting from GFA 1.0 to GFA 1.1 ...")
+    gfa = gfa10_to_gfa11()
 
-        curLine = gfa.pop(lineID)
-        gfa.append(curLine)
+else:
+    print("[GFAvc] Unable to convert !")
+    os._exit(1)
 
+#% Exporting
+gfa[-1] = f"{gfa[-1]}\n"
 with open(args.outName, "w") as file:
-    file.write("".join(gfa))
\ No newline at end of file
+    file.write("\n".join(gfa))
\ No newline at end of file
-- 
GitLab


From 76a571bbe0328bb858451b95e61d7d88c707a1d8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 5 Aug 2024 17:54:21 +0200
Subject: [PATCH 30/30] Removing/moving old scripts

---
 DotPlot_BED_corrector.py |  129 --
 gaf2aln.ipynb            | 2443 --------------------------------------
 gaf2aln.py               |  441 -------
 3 files changed, 3013 deletions(-)
 delete mode 100644 DotPlot_BED_corrector.py
 delete mode 100644 gaf2aln.ipynb
 delete mode 100644 gaf2aln.py

diff --git a/DotPlot_BED_corrector.py b/DotPlot_BED_corrector.py
deleted file mode 100644
index f4858af..0000000
--- a/DotPlot_BED_corrector.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-DotPlot Bed like file corrector.
-Correct splitted paths from odgi untangle bed like file used for creating dotplots.
-See Odgi documentation for the dot plot tutorial
-
-@author: alexis.mergez@inrae.fr
-@version: 0.1
-"""
-import re
-import argparse
-import os
-import numpy as np
-import time
-import pandas as pd
-from functools import reduce
-import concurrent.futures
-import gzip
-
-version = "0.1"
-
-## Argument parser
-arg_parser = argparse.ArgumentParser(description='GFAstats: GFA statistics')
-arg_parser.add_argument(
-    "--input",
-    "-i",
-    dest = "input",
-    required = True,
-    help = "Bed like file"
-    )  
-arg_parser.add_argument(
-    "--output",
-    "-o",
-    dest = "output",
-    required = True,
-    help = "Output name"
-    )  
-arg_parser.add_argument(
-    '--version',
-    '-v',
-    action="store_true",
-    dest = "version",
-    help = "Show version"
-)
-arg_parser.add_argument(
-    '--progress',
-    '-P',
-    action="store_true",
-    dest = "progress",
-    help = "Show progress to stdout"
-)
-args = arg_parser.parse_args()
-
-# Printing version and exiting if required
-if args.version:
-    print(version)
-    os._exit(0)
-
-# importing bed file with pandas
-if args.progress : print(f"[Bed_corrector::Parsing] Reading {args.input} ...")
-bed = pd.read_csv(
-    args.input,
-    sep = '\t'
-)
-
-# Getting the queries name and checking if we get multiples for one path
-if args.progress : print(f"[Bed_corrector::Identify] Searching for splitted paths ...")
-## Getting unique query names
-queries = bed["query.name"].unique()
-
-## Extracting path name and ranges from unique queries names
-paths = [query.split(":")[0] for query in queries]
-ranges = np.array([query.split(":")[1].split("-") for query in queries])
-
-## Creating temporary dataframe to store previous info
-temp_df = pd.DataFrame({
-    "queries" : queries,
-    "path" : paths,
-    "start" : ranges[:, 0],
-    "end" : ranges[:, 1]
-})
-
-## Getting the path that are splitted (i.e. more than one unique occurence)
-splitted_paths = []
-_ = np.unique(paths, return_counts = True)
-for path, count in zip(_[0], _[1]):
-    if count > 1 : # More than a repeat
-        splitted_paths.append(path)
-        if args.progress : print(f"[Bed_corrector::Identify] {path} is splitted")
-
-# For each splitted path identified, we search the minimum start and the maximum end
-if args.progress : print(f"[Bed_corrector::Identify] Searching for min start and max end of splitted paths ...")
-min_start = {}
-max_end = {} 
-for path_name in splitted_paths:
-    min_start[path_name] = int(temp_df[temp_df.path == path_name].start.min())
-    max_end[path_name] = int(temp_df[temp_df.path == path_name].end.max())
-
-# Traversing bed dataframe and offsetting paths based on the min_start of the main path
-if args.progress : print(f"[Bed_corrector::Patching] Correcting splitted paths ...")
-## Getting a copy of columns to modify
-path_names, starts, ends = bed["query.name"].tolist(), bed["query.start"].tolist(), bed["query.end"].tolist()
-
-## Iterating over lines
-for i in range(len(path_names)):
-    ## Getting the name and the range of the current line query
-    path, ranges = path_names[i].split(":")
-
-    if path in splitted_paths:
-        #print("Before :", path_names[i], starts[i], ends[i])
-        
-        ## Computing offset based on min start for this path
-        offset = (int(ranges.split('-')[0])-min_start[path])
-        #print("Offset:", offset)
-
-        ## Patching the info with new range (min start, max end) and shifted coordinates
-        path_names[i] = f"{path}:{min_start[path]}-{max_end[path]}"
-        starts[i] = int(starts[i])+offset
-        ends[i] = int(ends[i])+offset
-        #print("After :", path_names[i], starts[i], ends[i])
-
-## Patching the bed
-bed["query.name"] = path_names
-bed["query.start"] = starts
-bed["query.end"] = ends
-
-# Exporting
-bed.to_csv(args.output, sep="\t", index = False)
\ No newline at end of file
diff --git a/gaf2aln.ipynb b/gaf2aln.ipynb
deleted file mode 100644
index 04fe866..0000000
--- a/gaf2aln.ipynb
+++ /dev/null
@@ -1,2443 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "4ffaf9f6-cc1e-4190-9351-5431c930d25b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import argparse\n",
-    "import concurrent.futures\n",
-    "import os\n",
-    "import re\n",
-    "\n",
-    "# Replace for argparse arguments\n",
-    "class arguments():\n",
-    "    gfa = \"/home/amergez/Documents/Scratch/LeChou/35Bra-v2a/35Bra-v2a.chr03.gfa\"\n",
-    "    gaf = \"/home/amergez/Documents/Scratch/LeChou/35Bra-v2a/Mapping2Graph/GA.FLC2.aln.gaf\"\n",
-    "    threads = 8\n",
-    "    version = False\n",
-    "args = arguments()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "280c8847-22e8-4063-bde8-3e4e72cf20e7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Toolbox\n",
-    "def walk2path(walk):\n",
-    "    \"\"\"\n",
-    "    Takes a walk in a single string and returns a list of nodes id with signs (gfa v1 like)\n",
-    "    \"\"\"\n",
-    "    _ = re.findall(r'>\\w+|<\\w+', walk)\n",
-    "    # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'\n",
-    "    return [f'{elem[1:]}{(elem[0] == \">\")*\"+\"+(elem[0] == \"<\")*\"-\"}' for elem in _]\n",
-    "\n",
-    "def cigar2basealn(cigar):\n",
-    "    \"\"\"\n",
-    "    Takes a CIGAR string and convert it into a list of base level alignment.\n",
-    "    For example : \"345=\" -> [\"=\", \"=\", ..., \"=\"] of length 345.\n",
-    "    \"\"\"\n",
-    "    _ = re.findall(r'\\d+\\D', cigar)\n",
-    "    final_cigar = []\n",
-    "    for match in _:\n",
-    "        final_cigar += [match[-1]]*int(match[:-1])\n",
-    "\n",
-    "    return final_cigar\n",
-    "\n",
-    "def basealn2cigar(base_aln_list):\n",
-    "    \n",
-    "    last_elem = base_aln_list[0]\n",
-    "    CIGAR = [[1, last_elem]]\n",
-    "    for elem in base_aln_list[1:]:\n",
-    "        if elem == last_elem:\n",
-    "            CIGAR[-1][0] += 1\n",
-    "\n",
-    "        else :\n",
-    "            CIGAR[-1][0] = str(CIGAR[-1][0])\n",
-    "            CIGAR.append([1, elem])\n",
-    "            last_elem = elem\n",
-    "    CIGAR[-1][0] = str(CIGAR[-1][0])\n",
-    "    return \"\".join([\"\".join(block) for block in CIGAR if block[1] != \"\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "be12e9d4-de76-4c8b-af84-6567549483f4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[gaf2aln::GAF Parser] Reading /home/amergez/Documents/Scratch/LeChou/35Bra-v2a/Mapping2Graph/GA.FLC2.aln.gaf ...\n",
-      "[gaf2aln::GAF Parser] Extracting alignments ...\n",
-      "{'ALN_1': {'QRY.NAME': 'FLC2.TO1000#1#chr03', 'QRY.LEN': '3735', 'QRY.START': '0', 'QRY.END': '3735', 'STRAND': '+', 'PATH.MATCH': [('7046526', '+'), ('7046528', '+'), ('7046530', '+'), ('7046531', '+'), ('7046532', '+'), ('7046533', '+'), ('7046534', '+'), ('7046536', '+'), ('7046537', '+'), ('7046539', '+'), ('7046541', '+'), ('7046542', '+'), ('7046544', '+'), ('7046546', '+'), ('7046547', '+'), ('7046549', '+'), ('7046551', '+'), ('7046552', '+'), ('7046554', '+'), ('7046556', '+'), ('7046556', '+'), ('7046556', '+'), ('7046557', '+'), ('7046558', '+'), ('7046559', '+'), ('7046560', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046561', '+'), ('7046562', '+'), ('7046564', '+'), ('7046565', '+'), ('7046567', '+'), ('7046568', '+'), ('7046570', '+'), ('7046571', '+'), ('7046573', '+'), ('7046574', '+'), ('7046576', '+'), ('7046577', '+'), ('7046579', '+'), ('7046581', '+'), ('7046583', '+'), ('7046584', '+'), ('7046586', '+'), ('7046587', '+'), ('7046589', '+'), ('7046590', '+'), ('7046592', '+'), ('7046593', '+'), ('7046594', '+'), ('7046596', '+'), ('7046597', '+'), ('7046599', '+'), ('7046600', '+'), ('7046601', '+'), ('7046603', '+'), ('7046604', '+'), ('7046606', '+'), ('7046608', '+'), ('7046609', '+'), ('7046621', '+'), ('7046622', '+'), ('7046624', '+'), ('7046625', '+'), ('7046626', '+'), ('7046628', '+'), ('7046631', '+'), ('7046673', '+'), ('7046631', '+'), ('7046673', '+'), ('7046631', '+'), ('7046632', '+'), ('7046634', '+'), ('7046635', '+'), ('7046637', '+'), ('7046638', '+'), ('7046639', '+'), ('7046641', '+'), ('7046644', '+'), ('7046646', '+'), ('7046647', '+'), ('7046649', '+'), ('7046650', '+'), ('7046652', '+'), ('7046653', '+'), ('7046654', '+'), ('7046656', '+'), ('7046657', '+'), ('7046659', '+'), ('7046660', '+'), ('7046662', '+'), ('7046663', '+'), ('7046665', '+'), ('7046667', '+'), ('7046668', '+'), ('7046670', '+'), ('7046671', '+'), ('7046674', '+'), ('7046675', '+'), ('7046674', '+'), ('7046675', '+'), ('7046676', '+'), ('7046678', '+'), ('7046679', '+'), ('7046680', '+'), ('7046682', '+'), ('7046684', '+'), ('7046685', '+'), ('7046686', '+'), ('7046688', '+'), ('7046690', '+'), ('7046692', '+'), ('7046693', '+'), ('7046695', '+'), ('7046696', '+'), ('7046698', '+'), ('7046700', '+'), ('7046702', '+'), ('7046703', '+'), ('7046704', '+'), ('7046706', '+'), ('7046707', '+'), ('7046709', '+'), ('7046710', '+'), ('7046712', '+'), ('7046713', '+'), ('7046715', '+'), ('7046718', '+'), ('7046717', '+'), ('7046718', '+'), ('7046717', '+'), ('7046718', '+'), ('7046720', '+'), ('7046722', '+'), ('7046724', '+'), ('7046725', '+'), ('7046727', '+'), ('7046728', '+'), ('7046729', '+'), ('7046730', '+'), ('7046731', '+'), ('7046733', '+'), ('7046735', '+'), ('7046736', '+'), ('7046738', '+'), ('7046739', '+'), ('7046740', '+'), ('7046738', '+'), ('7046739', '+'), ('7046740', '+'), ('7046738', '+'), ('7046739', '+'), ('7046741', '+')], 'PATH.LEN': '3822', 'ALN.START': '77', 'ALN.END': '3812', 'RES.MATCH': '3735', 'ALN.BLOCK.LEN': '3735', 'MAPPING.QUAL': '60', 'RAW.CIGAR': 'cg:Z:3735=', 'TAGS': 'AS:f:3735,dv:f:0,id:f:1'}, 'ALN_2': {'QRY.NAME': 'FLC2.TO1000#1#chr03', 'QRY.LEN': '3735', 'QRY.START': '0', 'QRY.END': '3735', 'STRAND': '+', 'PATH.MATCH': [('7594382', '+'), ('7594369', '+'), ('7594371', '+'), ('7594021', '+'), ('7594286', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594374', '+'), ('7594375', '+'), ('7594626', '+'), ('7594011', '+'), ('7594374', '+'), ('7594375', '+'), ('7594369', '+'), ('7594371', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594241', '+'), ('7594248', '+'), ('7594286', '+'), ('7594311', '+'), ('7594315', '+'), ('7594311', '+'), ('7594330', '+'), ('7594311', '+'), ('7594315', '+'), ('7594374', '+'), ('7594311', '+'), ('7594374', '+'), ('7594369', '+'), ('7594021', '+'), ('7594026', '+'), ('7594021', '+'), ('7594021', '+'), ('7594026', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594021', '+'), ('7594286', '+'), ('7594374', '+'), ('7594021', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594286', '+'), ('7594311', '+'), ('7594315', '+'), ('7594286', '+'), ('7594311', '+'), ('7594374', '+'), ('7594021', '+'), ('7594286', '+'), ('7594286', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594374', '+'), ('7594375', '+'), ('7594374', '+'), ('7594356', '+'), ('7594374', '+'), ('7594375', '+'), ('7594374', '+'), ('7594350', '+'), ('7594264', '+'), ('7594207', '+'), ('7594225', '+'), ('7594227', '+'), ('7594120', '+'), ('7594132', '+'), ('7594165', '+'), ('7594172', '+')], 'PATH.LEN': '61224', 'ALN.START': '0', 'ALN.END': '3735', 'RES.MATCH': '3734', 'ALN.BLOCK.LEN': '3735', 'MAPPING.QUAL': '0', 'RAW.CIGAR': 'cg:Z:57=1X3677=', 'TAGS': 'AS:f:3732.06,dv:f:0.000267738,id:f:0.999732'}}\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Parsing the .gaf file\n",
-    "print(f\"[gaf2aln::GAF Parser] Reading {args.gaf} ...\")\n",
-    "with open(args.gaf, 'r') as file:\n",
-    "    gaf_lines = file.readlines()\n",
-    "\n",
-    "gaf_col = [\n",
-    "    \"QRY.NAME\", \"QRY.LEN\", \"QRY.START\", \"QRY.END\", \"STRAND\", \n",
-    "    \"PATH.MATCH\", \"PATH.LEN\", \"ALN.START\", \"ALN.END\",\n",
-    "    \"RES.MATCH\", \"ALN.BLOCK.LEN\", \"MAPPING.QUAL\"\n",
-    "    ]\n",
-    "\n",
-    "# Creating dictionnary to store alignments\n",
-    "print(f\"[gaf2aln::GAF Parser] Extracting alignments ...\")\n",
-    "aln_dict = {}\n",
-    "for line in range(len(gaf_lines)):\n",
-    "    ## Splitting the line by tabulation\n",
-    "    line_content = gaf_lines[line][:-1].split('\\t')\n",
-    "\n",
-    "    ## Adding alignement info to dictionnary\n",
-    "    aln_dict[f\"ALN_{line+1}\"] = {\n",
-    "        gaf_col[i]: line_content[i] for i in range(len(gaf_col))\n",
-    "    }\n",
-    "    \n",
-    "    ## Splitting \"PATH.MATCH\" into a list\n",
-    "    aln_dict[f\"ALN_{line+1}\"][\"PATH.MATCH\"] = [\n",
-    "        (str(node_id[:-1]), node_id[-1]) \n",
-    "        for node_id in walk2path(aln_dict[f\"ALN_{line+1}\"][\"PATH.MATCH\"])\n",
-    "    ]\n",
-    "\n",
-    "    ## Adding CIGAR\n",
-    "    aln_dict[f\"ALN_{line+1}\"][\"RAW.CIGAR\"] = line_content[-1]\n",
-    "\n",
-    "    ## Adding tags\n",
-    "    aln_dict[f\"ALN_{line+1}\"][\"TAGS\"] = \",\".join(line_content[13:-1])\n",
-    "\n",
-    "# Getting nodes of interest ids\n",
-    "aln_nodes = np.unique([\n",
-    "    str(node_id) \n",
-    "    for aln in aln_dict.keys() \n",
-    "    for node_id, orient in aln_dict[aln][\"PATH.MATCH\"]\n",
-    "]).tolist()\n",
-    "\n",
-    "print(aln_dict)\n",
-    "del gaf_lines, gaf_col"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "2f891424-0d88-4fd3-99ff-b0a8c90587ff",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[gaf2aln::GFA Parser] Reading /home/amergez/Documents/Scratch/LeChou/35Bra-v2a/35Bra-v2a.chr03.gfa ...\n",
-      "[gaf2aln::GFA Parser] Extracting nodes, paths and links ...\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Parsing the .gfa\n",
-    "print(f\"[gaf2aln::GFA Parser] Reading {args.gfa} ...\")\n",
-    "with open(args.gfa, 'r') as file:\n",
-    "    gfa_lines = file.readlines()\n",
-    "\n",
-    "# Nodes length dictionnary structured as follow :\n",
-    "# {<NODE.ID>: <NODE.LENGTH>}\n",
-    "nodes_length = {}\n",
-    "# Nodes dictionnary structured as follow :\n",
-    "# { <ALN.NODE.ID> : {\n",
-    "#   <PATH.NAME>: {\"START\": start, \"END\": end, \"STRAND\": strand), \n",
-    "#   <ALN.NAME>: {\"START\": start, \"END\": end, \"S.OFF\": start.offset, \"E.OFF\": end.offset, \"STRAND\": strand, \"CIGAR\": CIGAR}\n",
-    "#   }\n",
-    "# }\n",
-    "nodes = {node_id: {} for node_id in aln_nodes}\n",
-    "# Paths dictionnary structured as follow :\n",
-    "# {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}\n",
-    "paths = {}\n",
-    "# Links dictionnary structured as follow : \n",
-    "# {<FROM.NODE.ID>: {<TO.NODE.ID>: {FROM.ORIENT: <FROM.ORIENT>, TO.ORIENT: <TO.ORIENT>}}}\n",
-    "links = {}\n",
-    "\n",
-    "# Parsing the gfa\n",
-    "print(f\"[gaf2aln::GFA Parser] Extracting nodes, paths and links ...\")\n",
-    "\n",
-    "def GFA_parser(gfa_lines, nodes = nodes):\n",
-    "    _links, _nodes, _nodes_length, paths = {}, {}, {}, {}\n",
-    "    for line in gfa_lines:\n",
-    "        line_content = line[:-1].split(\"\\t\")\n",
-    "        line_id = line_content[0]\n",
-    "        \n",
-    "        # Segment line\n",
-    "        if line_id == \"S\" :\n",
-    "            \n",
-    "            _nodes_length[str(line_content[1])] = len(line_content[2])\n",
-    "        \n",
-    "        # Link line\n",
-    "        elif line_id == \"L\":\n",
-    "            try :\n",
-    "                _links[str(line_content[1])][str(line_content[3])] = {\n",
-    "                    \"FROM\": str(line_content[2]), \n",
-    "                    \"TO\": str(line_content[4])\n",
-    "                }\n",
-    "\n",
-    "            except :\n",
-    "                _links[str(line_content[1])] = {\n",
-    "                    str(line_content[3]) : {\"FROM.ORIENT\": str(line_content[2]), \"TO.ORIENT\": str(line_content[4])}\n",
-    "                }\n",
-    "\n",
-    "        # Path line\n",
-    "        elif line_id == \"P\":\n",
-    "            _paths[str(line_content[1])] = {\n",
-    "                \"NODES\": {\n",
-    "                    str(node_id[:-1]): str(node_id[-1])\n",
-    "                    for node_id in line_content[2].split(',')\n",
-    "                },\n",
-    "                \"CIGAR\": line_content[3]\n",
-    "            }\n",
-    "\n",
-    "    return [_links, _nodes, _nodes_length, _paths]\n",
-    "\n",
-    "# splits = np.quantile(range(len(gfa_lines)+1), q= np.array(args.threads+1)/args.threads, method='higher').tolist()\n",
-    "# res = []\n",
-    "# for i in range(1, len(splits)):\n",
-    "#     res.append(executor.submit(GFA_parser, gfa_lines[splits[i-1]:splits[i]]))\n",
-    "\n",
-    "# for out in res:\n",
-    "#     results = out.result()\n",
-    "\n",
-    "#     for link_id, link_info in results[0].items():\n",
-    "#         links[]\n",
-    "\n",
-    "\n",
-    "for line in gfa_lines:\n",
-    "    line_content = line[:-1].split(\"\\t\")\n",
-    "    line_id = line_content[0]\n",
-    "    \n",
-    "    # Segment line\n",
-    "    if line_id == \"S\" :\n",
-    "        \n",
-    "        nodes_length[str(line_content[1])] = len(line_content[2])\n",
-    "    \n",
-    "    # Link line\n",
-    "    elif line_id == \"L\":\n",
-    "        try :\n",
-    "            links[str(line_content[1])][str(line_content[3])] = {\n",
-    "                \"FROM\": str(line_content[2]), \n",
-    "                \"TO\": str(line_content[4])\n",
-    "            }\n",
-    "\n",
-    "        except :\n",
-    "            links[str(line_content[1])] = {\n",
-    "                str(line_content[3]) : {\"FROM.ORIENT\": str(line_content[2]), \"TO.ORIENT\": str(line_content[4])}\n",
-    "            }\n",
-    "\n",
-    "    # Path line\n",
-    "    elif line_id == \"P\":\n",
-    "        paths[str(line_content[1])] = {\n",
-    "            \"NODES\": {\n",
-    "                str(node_id[:-1]): str(node_id[-1])\n",
-    "                for node_id in line_content[2].split(',')\n",
-    "            },\n",
-    "            \"CIGAR\": line_content[3]\n",
-    "        }\n",
-    "\n",
-    "del gfa_lines"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "a403c88e-54ea-4a67-9047-dc44eba7f51a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[gaf2aln::Graph position processing] Computing nodes positions in each paths...\n",
-      "[gaf2aln::Graph position processing] Running on Capitata#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on D101#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on D134#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on G06-09-28#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on G07-DH-33#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on HDEM#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on Korso#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on M249#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on OX-heart#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on PL021#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on RC34#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T02#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T03#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T04#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T06#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T07#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T08#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T09#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T10#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T11#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T12#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T13#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T14#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T15#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T16#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T17#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T18#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T19#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T21#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T24#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T25#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T26#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on T27#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on TO1000#1#chr03 ...\n",
-      "[gaf2aln::Graph position processing] Running on W1701#1#chr03 ...\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"[gaf2aln::Graph position processing] Computing nodes positions in each paths...\")\n",
-    "def get_node_pos(path_name, nodes = nodes, paths = paths, nodes_length = nodes_length):\n",
-    "    print(f\"[gaf2aln::Graph position processing] Running on {path_name} ...\")\n",
-    "    cur_pos = 0\n",
-    "\n",
-    "    out = {}\n",
-    "    # Iterating over nodes in the path\n",
-    "    for path_node in paths[path_name][\"NODES\"].keys():\n",
-    "        # Instead of checking if the node is one interesting node, we try to add to the nodes dict\n",
-    "        if path_node in aln_nodes :\n",
-    "            out[path_node] = {\n",
-    "                \"START\": cur_pos, # Start position of the node start in the currrent path\n",
-    "                \"END\": cur_pos+nodes_length[path_node], # End position of the node end in the current path\n",
-    "                \"STRAND\": paths[path_name][\"NODES\"][path_node] # Orientation of the node in the current path\n",
-    "                } \n",
-    "\n",
-    "            cur_pos += nodes_length[path_node]+1\n",
-    "        else :\n",
-    "            cur_pos += nodes_length[path_node]+1\n",
-    "\n",
-    "    return out\n",
-    "\n",
-    "res = {}\n",
-    "executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)\n",
-    "# Adding nodes positions relative to path\n",
-    "for path_name in paths.keys():\n",
-    "    res[path_name] = executor.submit(get_node_pos, path_name)\n",
-    "\n",
-    "executor.shutdown(wait=True)\n",
-    "\n",
-    "for path_name, out in res.items():\n",
-    "    results = out.result()\n",
-    "    for path_node, node_pos in results.items():\n",
-    "        nodes[path_node][path_name] = node_pos\n",
-    "\n",
-    "del res"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "bed36bd5-30eb-4d02-8b52-1ae5d753f8f8",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...\n",
-      "[gaf2aln::Alignment position processing] Running on ALN_1 ...\n",
-      "0 77 77 0 + 154 77\n",
-      "77 82 0 0 + 5 82\n",
-      "82 83 0 0 + 1 83\n",
-      "83 138 0 0 + 55 138\n",
-      "138 139 0 0 + 1 139\n",
-      "139 202 0 0 + 63 202\n",
-      "202 203 0 0 + 1 203\n",
-      "203 379 0 0 + 176 379\n",
-      "379 380 0 0 + 1 380\n",
-      "380 429 0 0 + 49 429\n",
-      "429 430 0 0 + 1 430\n",
-      "430 457 0 0 + 27 457\n",
-      "457 492 0 0 + 35 492\n",
-      "492 494 0 0 + 2 494\n",
-      "494 497 0 0 + 3 497\n",
-      "497 507 0 0 + 10 507\n",
-      "507 508 0 0 + 1 508\n",
-      "508 564 0 0 + 56 564\n",
-      "564 566 0 0 + 2 566\n",
-      "566 567 0 0 + 1 567\n",
-      "567 568 0 0 + 1 568\n",
-      "568 569 0 0 + 1 569\n",
-      "569 824 0 0 + 255 824\n",
-      "824 826 0 0 + 2 826\n",
-      "826 858 0 0 + 32 858\n",
-      "858 859 0 0 + 1 859\n",
-      "859 860 0 0 + 1 860\n",
-      "860 861 0 0 + 1 861\n",
-      "861 862 0 0 + 1 862\n",
-      "862 863 0 0 + 1 863\n",
-      "863 864 0 0 + 1 864\n",
-      "864 865 0 0 + 1 865\n",
-      "865 866 0 0 + 1 866\n",
-      "866 867 0 0 + 1 867\n",
-      "867 868 0 0 + 1 868\n",
-      "868 869 0 0 + 1 869\n",
-      "869 913 0 0 + 44 913\n",
-      "913 919 0 0 + 6 919\n",
-      "919 978 0 0 + 59 978\n",
-      "978 979 0 0 + 1 979\n",
-      "979 1038 0 0 + 59 1038\n",
-      "1038 1045 0 0 + 7 1045\n",
-      "1045 1046 0 0 + 1 1046\n",
-      "1046 1080 0 0 + 34 1080\n",
-      "1080 1081 0 0 + 1 1081\n",
-      "1081 1107 0 0 + 26 1107\n",
-      "1107 1108 0 0 + 1 1108\n",
-      "1108 1183 0 0 + 75 1183\n",
-      "1183 1186 0 0 + 3 1186\n",
-      "1186 1224 0 0 + 38 1224\n",
-      "1224 1257 0 0 + 33 1257\n",
-      "1257 1289 0 0 + 32 1289\n",
-      "1289 1311 0 0 + 22 1311\n",
-      "1311 1359 0 0 + 48 1359\n",
-      "1359 1382 0 0 + 23 1382\n",
-      "1382 1434 0 0 + 52 1434\n",
-      "1434 1451 0 0 + 17 1451\n",
-      "1451 1531 0 0 + 80 1531\n",
-      "1531 1532 0 0 + 1 1532\n",
-      "1532 1543 0 0 + 11 1543\n",
-      "1543 1544 0 0 + 1 1544\n",
-      "1544 1572 0 0 + 28 1572\n",
-      "1572 1573 0 0 + 1 1573\n",
-      "1573 1587 0 0 + 14 1587\n",
-      "1587 1588 0 0 + 1 1588\n",
-      "1588 1616 0 0 + 28 1616\n",
-      "1616 1617 0 0 + 1 1617\n",
-      "1617 1646 0 0 + 29 1646\n",
-      "1646 1661 0 0 + 15 1661\n",
-      "1661 1673 0 0 + 12 1673\n",
-      "1673 1674 0 0 + 1 1674\n",
-      "1674 1726 0 0 + 52 1726\n",
-      "1726 1727 0 0 + 1 1727\n",
-      "1727 1762 0 0 + 35 1762\n",
-      "1762 1763 0 0 + 1 1763\n",
-      "1763 1764 0 0 + 1 1764\n",
-      "1764 1765 0 0 + 1 1765\n",
-      "1765 1766 0 0 + 1 1766\n",
-      "1766 1767 0 0 + 1 1767\n",
-      "1767 1824 0 0 + 57 1824\n",
-      "1824 1825 0 0 + 1 1825\n",
-      "1825 1975 0 0 + 150 1975\n",
-      "1975 1976 0 0 + 1 1976\n",
-      "1976 2015 0 0 + 39 2015\n",
-      "2015 2016 0 0 + 1 2016\n",
-      "2016 2047 0 0 + 31 2047\n",
-      "2047 2055 0 0 + 8 2055\n",
-      "2055 2056 0 0 + 1 2056\n",
-      "2056 2120 0 0 + 64 2120\n",
-      "2120 2121 0 0 + 1 2121\n",
-      "2121 2157 0 0 + 36 2157\n",
-      "2157 2158 0 0 + 1 2158\n",
-      "2158 2170 0 0 + 12 2170\n",
-      "2170 2171 0 0 + 1 2171\n",
-      "2171 2205 0 0 + 34 2205\n",
-      "2205 2206 0 0 + 1 2206\n",
-      "2206 2344 0 0 + 138 2344\n",
-      "2344 2345 0 0 + 1 2345\n",
-      "2345 2364 0 0 + 19 2364\n",
-      "2364 2383 0 0 + 19 2383\n",
-      "2383 2408 0 0 + 25 2408\n",
-      "2408 2409 0 0 + 1 2409\n",
-      "2409 2441 0 0 + 32 2441\n",
-      "2441 2442 0 0 + 1 2442\n",
-      "2442 2580 0 0 + 138 2580\n",
-      "2580 2581 0 0 + 1 2581\n",
-      "2581 2582 0 0 + 1 2582\n",
-      "2582 2583 0 0 + 1 2583\n",
-      "2583 2584 0 0 + 1 2584\n",
-      "2584 2764 0 0 + 180 2764\n",
-      "2764 2765 0 0 + 1 2765\n",
-      "2765 2797 0 0 + 32 2797\n",
-      "2797 2798 0 0 + 1 2798\n",
-      "2798 2878 0 0 + 80 2878\n",
-      "2878 2879 0 0 + 1 2879\n",
-      "2879 2951 0 0 + 72 2951\n",
-      "2951 2952 0 0 + 1 2952\n",
-      "2952 3002 0 0 + 50 3002\n",
-      "3002 3077 0 0 + 75 3077\n",
-      "3077 3078 0 0 + 1 3078\n",
-      "3078 3093 0 0 + 15 3093\n",
-      "3093 3094 0 0 + 1 3094\n",
-      "3094 3097 0 0 + 3 3097\n",
-      "3097 3140 0 0 + 43 3140\n",
-      "3140 3210 0 0 + 70 3210\n",
-      "3210 3211 0 0 + 1 3211\n",
-      "3211 3229 0 0 + 18 3229\n",
-      "3229 3230 0 0 + 1 3230\n",
-      "3230 3276 0 0 + 46 3276\n",
-      "3276 3277 0 0 + 1 3277\n",
-      "3277 3315 0 0 + 38 3315\n",
-      "3315 3316 0 0 + 1 3316\n",
-      "3316 3322 0 0 + 6 3322\n",
-      "3322 3323 0 0 + 1 3323\n",
-      "3323 3348 0 0 + 25 3348\n",
-      "3348 3349 0 0 + 1 3349\n",
-      "3349 3350 0 0 + 1 3350\n",
-      "3350 3351 0 0 + 1 3351\n",
-      "3351 3352 0 0 + 1 3352\n",
-      "3352 3353 0 0 + 1 3353\n",
-      "3353 3354 0 0 + 1 3354\n",
-      "3354 3356 0 0 + 2 3356\n",
-      "3356 3357 0 0 + 1 3357\n",
-      "3357 3489 0 0 + 132 3489\n",
-      "3489 3490 0 0 + 1 3490\n",
-      "3490 3642 0 0 + 152 3642\n",
-      "3642 3644 0 0 + 2 3644\n",
-      "3644 3685 0 0 + 41 3685\n",
-      "3685 3687 0 0 + 2 3687\n",
-      "3687 3693 0 0 + 6 3693\n",
-      "3693 3694 0 0 + 1 3694\n",
-      "3694 3708 0 0 + 14 3708\n",
-      "3708 3709 0 0 + 1 3709\n",
-      "3709 3710 0 0 + 1 3710\n",
-      "3710 3714 0 0 + 4 3714\n",
-      "3714 3715 0 0 + 1 3715\n",
-      "3715 3716 0 0 + 1 3716\n",
-      "3716 3720 0 0 + 4 3720\n",
-      "3720 3721 0 0 + 1 3721\n",
-      "3721 3722 0 0 + 1 3722\n",
-      "3722 3735 0 10 + 23 3735\n",
-      "[gaf2aln::Alignment position processing] Running on ALN_2 ...\n",
-      "0 1 0 0 + 1 1\n",
-      "1 2 0 0 + 1 2\n",
-      "2 3 0 0 + 1 3\n",
-      "3 4 0 0 + 1 4\n",
-      "4 5 0 0 + 1 5\n",
-      "5 6 0 0 + 1 6\n",
-      "6 7 0 0 + 1 7\n",
-      "7 8 0 0 + 1 8\n",
-      "8 9 0 0 + 1 9\n",
-      "9 10 0 0 + 1 10\n",
-      "10 11 0 0 + 1 11\n",
-      "11 12 0 0 + 1 12\n",
-      "12 13 0 0 + 1 13\n",
-      "13 14 0 0 + 1 14\n",
-      "14 15 0 0 + 1 15\n",
-      "15 16 0 0 + 1 16\n",
-      "16 17 0 0 + 1 17\n",
-      "17 18 0 0 + 1 18\n",
-      "18 19 0 0 + 1 19\n",
-      "19 20 0 0 + 1 20\n",
-      "20 21 0 0 + 1 21\n",
-      "21 22 0 0 + 1 22\n",
-      "22 23 0 0 + 1 23\n",
-      "23 24 0 0 + 1 24\n",
-      "24 25 0 0 + 1 25\n",
-      "25 26 0 0 + 1 26\n",
-      "26 27 0 0 + 1 27\n",
-      "27 28 0 0 + 1 28\n",
-      "28 29 0 0 + 1 29\n",
-      "29 30 0 0 + 1 30\n",
-      "30 31 0 0 + 1 31\n",
-      "31 32 0 0 + 1 32\n",
-      "32 33 0 0 + 1 33\n",
-      "33 34 0 0 + 1 34\n",
-      "34 35 0 0 + 1 35\n",
-      "35 36 0 0 + 1 36\n",
-      "36 37 0 0 + 1 37\n",
-      "37 38 0 0 + 1 38\n",
-      "38 39 0 0 + 1 39\n",
-      "39 40 0 0 + 1 40\n",
-      "40 41 0 0 + 1 41\n",
-      "41 42 0 0 + 1 42\n",
-      "42 43 0 0 + 1 43\n",
-      "43 44 0 0 + 1 44\n",
-      "44 45 0 0 + 1 45\n",
-      "45 46 0 0 + 1 46\n",
-      "46 47 0 0 + 1 47\n",
-      "47 48 0 0 + 1 48\n",
-      "48 49 0 0 + 1 49\n",
-      "49 50 0 0 + 1 50\n",
-      "50 51 0 0 + 1 51\n",
-      "51 52 0 0 + 1 52\n",
-      "52 53 0 0 + 1 53\n",
-      "53 54 0 0 + 1 54\n",
-      "54 55 0 0 + 1 55\n",
-      "55 56 0 0 + 1 56\n",
-      "56 57 0 0 + 1 57\n",
-      "57 58 0 0 + 1 58\n",
-      "58 59 0 0 + 1 59\n",
-      "59 60 0 0 + 1 60\n",
-      "60 61 0 0 + 1 61\n",
-      "61 62 0 0 + 1 62\n",
-      "62 63 0 0 + 1 63\n",
-      "63 64 0 0 + 1 64\n",
-      "64 65 0 0 + 1 65\n",
-      "65 66 0 0 + 1 66\n",
-      "66 67 0 0 + 1 67\n",
-      "67 68 0 0 + 1 68\n",
-      "68 69 0 0 + 1 69\n",
-      "69 70 0 0 + 1 70\n",
-      "70 71 0 0 + 1 71\n",
-      "71 72 0 0 + 1 72\n",
-      "72 73 0 0 + 1 73\n",
-      "73 74 0 0 + 1 74\n",
-      "74 75 0 0 + 1 75\n",
-      "75 76 0 0 + 1 76\n",
-      "76 77 0 0 + 1 77\n",
-      "77 78 0 0 + 1 78\n",
-      "78 3735 0 57489 + 61146 3735\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...\")\n",
-    "# Adding nodes positions relative to path\n",
-    "\n",
-    "def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length):\n",
-    "    # Initializing current position in query\n",
-    "    cur_pos = 0\n",
-    "\n",
-    "    # Getting start and end node ids\n",
-    "    start_end_id = (aln_dict[aln_name][\"PATH.MATCH\"][0][0], aln_dict[aln_name][\"PATH.MATCH\"][-1][0])\n",
-    "\n",
-    "    # Creating result dictionnary\n",
-    "    res = {}\n",
-    "\n",
-    "    ## Iterating over node_ids from the given alignment\n",
-    "    for node_id, orient in aln_dict[aln_name][\"PATH.MATCH\"]:\n",
-    "        # Adding entry for current node\n",
-    "        res[node_id] = {aln_name: {}}\n",
-    "\n",
-    "        # First node\n",
-    "        if node_id == start_end_id[0]:\n",
-    "            start_pos = 0\n",
-    "            s_off = int(aln_dict[aln_name][\"ALN.START\"])\n",
-    "            end_pos = nodes_length[node_id]-s_off\n",
-    "            e_off = 0\n",
-    "        # End node\n",
-    "        elif node_id == start_end_id[1]:\n",
-    "            start_pos = cur_pos\n",
-    "            s_off = 0\n",
-    "            end_pos = int(aln_dict[aln_name][\"QRY.END\"])\n",
-    "            e_off = nodes_length[node_id]-(end_pos-cur_pos)\n",
-    "        # Node in between\n",
-    "        else :\n",
-    "            start_pos = cur_pos\n",
-    "            s_off, e_off = 0, 0\n",
-    "            end_pos = cur_pos+nodes_length[node_id]\n",
-    "\n",
-    "        res[node_id] = {\n",
-    "            \"START\": start_pos, # Start position on the query\n",
-    "            \"END\": end_pos, # End position on the query\n",
-    "            \"S.OFF\": s_off, # Offset between the start of the alignment and the node's start\n",
-    "            \"E.OFF\": e_off, # Offset between the end of the alignment and the node's end \n",
-    "            \"STRAND\": orient # Orientation of the node in the alignment\n",
-    "            }\n",
-    "        \n",
-    "        cur_pos = end_pos\n",
-    "        print(start_pos, end_pos, s_off, e_off, orient, nodes_length[node_id], cur_pos)\n",
-    "\n",
-    "    return res\n",
-    "\n",
-    "# Storing alignement \n",
-    "res = {}\n",
-    "executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)\n",
-    "for aln_name in aln_dict.keys():\n",
-    "    print(f\"[gaf2aln::Alignment position processing] Running on {aln_name} ...\")\n",
-    "    \n",
-    "    res[aln_name] = executor.submit(get_aln_node_info, aln_name)\n",
-    "    #res[aln_name] = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)\n",
-    "\n",
-    "executor.shutdown(wait=True)\n",
-    "\n",
-    "for aln_name, node_info in res.items():\n",
-    "    results = node_info.result()\n",
-    "    for node_id, info in results.items():\n",
-    "        nodes[node_id][aln_name] = info\n",
-    "\n",
-    "del res"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "4c30727c-7ffc-4852-ad81-ca2a5a7f9957",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...\n",
-      "[gaf2aln::CIGAR processing] Running on ALN_1 ...\n",
-      "[gaf2aln::CIGAR processing] Running on ALN_2 ...\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Calculating CIGAR for each nodes in each aln\n",
-    "print(f\"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...\")\n",
-    "# Iterating over alignments\n",
-    "for aln in aln_dict.keys():\n",
-    "    \n",
-    "    print(f\"[gaf2aln::CIGAR processing] Running on {aln} ...\")\n",
-    "    # Getting the list of base level alignement ([\"=\", \"X\", ...] from \"1=1X...\")\n",
-    "    raw_cigar = cigar2basealn(aln_dict[aln][\"RAW.CIGAR\"])\n",
-    "    CIGAR={}\n",
-    "\n",
-    "    for node_id, orient in aln_dict[aln][\"PATH.MATCH\"]:\n",
-    "\n",
-    "        _cigar = basealn2cigar(raw_cigar[\n",
-    "            nodes[node_id][aln][\"START\"]:nodes[node_id][aln][\"END\"]\n",
-    "            ])\n",
-    "        nodes[node_id][aln][\"CIGAR\"] = _cigar\n",
-    "        #print(_cigar, nodes[node_id][aln][\"START\"], nodes[node_id][aln][\"END\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "e15e4762-cd71-4afe-bc74-ebe44869fee6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ALN_1\n",
-      "7046526 D134#1#chr03 0 77\n",
-      "\tIn path\n",
-      "\t 73306158 73306235\n",
-      "skipped\n",
-      "\n",
-      "7046528 D134#1#chr03 77 82\n",
-      "\tIn path\n",
-      "\t 73306238 73306243\n",
-      "{'Q.START': 0, 'Q.END': 77, 'T.START': 73306158, 'T.END': 73306235, 'CG': '77='}\n",
-      "7046530 D134#1#chr03 82 83\n",
-      "\tNot in path\n",
-      "7046531 D134#1#chr03 83 138\n",
-      "\tIn path\n",
-      "\t 73306246 73306301\n",
-      "{'Q.START': 77, 'Q.END': 82, 'T.START': 73306238, 'T.END': 73306243, 'CG': '5='}\n",
-      "7046532 D134#1#chr03 138 139\n",
-      "\tNot in path\n",
-      "7046533 D134#1#chr03 139 202\n",
-      "\tIn path\n",
-      "\t 73306302 73306365\n",
-      "{'Q.START': 83, 'Q.END': 138, 'T.START': 73306246, 'T.END': 73306301, 'CG': '55='}\n",
-      "7046534 D134#1#chr03 202 203\n",
-      "\tIn path\n",
-      "\t 73306366 73306367\n",
-      "{'Q.START': 139, 'Q.END': 202, 'T.START': 73306302, 'T.END': 73306365, 'CG': '63='}\n",
-      "7046536 D134#1#chr03 203 379\n",
-      "\tIn path\n",
-      "\t 73306368 73306544\n",
-      "{'Q.START': 202, 'Q.END': 203, 'T.START': 73306366, 'T.END': 73306367, 'CG': '1='}\n",
-      "7046537 D134#1#chr03 379 380\n",
-      "\tIn path\n",
-      "\t 73306545 73306546\n",
-      "{'Q.START': 203, 'Q.END': 379, 'T.START': 73306368, 'T.END': 73306544, 'CG': '176='}\n",
-      "7046539 D134#1#chr03 380 429\n",
-      "\tIn path\n",
-      "\t 73306547 73306596\n",
-      "{'Q.START': 379, 'Q.END': 380, 'T.START': 73306545, 'T.END': 73306546, 'CG': '1='}\n",
-      "7046541 D134#1#chr03 429 430\n",
-      "\tIn path\n",
-      "\t 73306597 73306598\n",
-      "{'Q.START': 380, 'Q.END': 429, 'T.START': 73306547, 'T.END': 73306596, 'CG': '49='}\n",
-      "7046542 D134#1#chr03 430 457\n",
-      "\tIn path\n",
-      "\t 73306599 73306626\n",
-      "{'Q.START': 429, 'Q.END': 430, 'T.START': 73306597, 'T.END': 73306598, 'CG': '1='}\n",
-      "7046544 D134#1#chr03 457 492\n",
-      "\tIn path\n",
-      "\t 73306641 73306676\n",
-      "{'Q.START': 430, 'Q.END': 457, 'T.START': 73306599, 'T.END': 73306626, 'CG': '27='}\n",
-      "7046546 D134#1#chr03 492 494\n",
-      "\tNot in path\n",
-      "7046547 D134#1#chr03 494 497\n",
-      "\tNot in path\n",
-      "7046549 D134#1#chr03 497 507\n",
-      "\tNot in path\n",
-      "7046551 D134#1#chr03 507 508\n",
-      "\tNot in path\n",
-      "7046552 D134#1#chr03 508 564\n",
-      "\tIn path\n",
-      "\t 73306694 73306750\n",
-      "{'Q.START': 457, 'Q.END': 492, 'T.START': 73306641, 'T.END': 73306676, 'CG': '35='}\n",
-      "7046554 D134#1#chr03 564 566\n",
-      "\tNot in path\n",
-      "7046556 D134#1#chr03 568 569\n",
-      "\tIn path\n",
-      "\t 73306753 73306754\n",
-      "{'Q.START': 508, 'Q.END': 564, 'T.START': 73306694, 'T.END': 73306750, 'CG': '56='}\n",
-      "7046556 D134#1#chr03 568 569\n",
-      "\tIn path\n",
-      "\t 73306753 73306754\n",
-      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
-      "7046556 D134#1#chr03 568 569\n",
-      "\tIn path\n",
-      "\t 73306753 73306754\n",
-      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
-      "7046557 D134#1#chr03 569 824\n",
-      "\tIn path\n",
-      "\t 73306755 73307010\n",
-      "{'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}\n",
-      "7046558 D134#1#chr03 824 826\n",
-      "\tNot in path\n",
-      "7046559 D134#1#chr03 826 858\n",
-      "\tIn path\n",
-      "\t 73307011 73307043\n",
-      "{'Q.START': 569, 'Q.END': 824, 'T.START': 73306755, 'T.END': 73307010, 'CG': '255='}\n",
-      "7046560 D134#1#chr03 858 859\n",
-      "\tIn path\n",
-      "\t 73307044 73307045\n",
-      "{'Q.START': 826, 'Q.END': 858, 'T.START': 73307011, 'T.END': 73307043, 'CG': '32='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 858, 'Q.END': 859, 'T.START': 73307044, 'T.END': 73307045, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046561 D134#1#chr03 868 869\n",
-      "\tIn path\n",
-      "\t 73307046 73307047\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046562 D134#1#chr03 869 913\n",
-      "\tIn path\n",
-      "\t 73307048 73307092\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}\n",
-      "7046564 D134#1#chr03 913 919\n",
-      "\tIn path\n",
-      "\t 73307093 73307099\n",
-      "{'Q.START': 869, 'Q.END': 913, 'T.START': 73307048, 'T.END': 73307092, 'CG': '44='}\n",
-      "7046565 D134#1#chr03 919 978\n",
-      "\tIn path\n",
-      "\t 73307100 73307159\n",
-      "{'Q.START': 913, 'Q.END': 919, 'T.START': 73307093, 'T.END': 73307099, 'CG': '6='}\n",
-      "7046567 D134#1#chr03 978 979\n",
-      "\tIn path\n",
-      "\t 73307160 73307161\n",
-      "{'Q.START': 919, 'Q.END': 978, 'T.START': 73307100, 'T.END': 73307159, 'CG': '59='}\n",
-      "7046568 D134#1#chr03 979 1038\n",
-      "\tIn path\n",
-      "\t 73307162 73307221\n",
-      "{'Q.START': 978, 'Q.END': 979, 'T.START': 73307160, 'T.END': 73307161, 'CG': '1='}\n",
-      "7046570 D134#1#chr03 1038 1045\n",
-      "\tIn path\n",
-      "\t 73307224 73307231\n",
-      "{'Q.START': 979, 'Q.END': 1038, 'T.START': 73307162, 'T.END': 73307221, 'CG': '59='}\n",
-      "7046571 D134#1#chr03 1045 1046\n",
-      "\tIn path\n",
-      "\t 73307232 73307233\n",
-      "{'Q.START': 1038, 'Q.END': 1045, 'T.START': 73307224, 'T.END': 73307231, 'CG': '7='}\n",
-      "7046573 D134#1#chr03 1046 1080\n",
-      "\tIn path\n",
-      "\t 73307234 73307268\n",
-      "{'Q.START': 1045, 'Q.END': 1046, 'T.START': 73307232, 'T.END': 73307233, 'CG': '1='}\n",
-      "7046574 D134#1#chr03 1080 1081\n",
-      "\tIn path\n",
-      "\t 73307269 73307270\n",
-      "{'Q.START': 1046, 'Q.END': 1080, 'T.START': 73307234, 'T.END': 73307268, 'CG': '34='}\n",
-      "7046576 D134#1#chr03 1081 1107\n",
-      "\tIn path\n",
-      "\t 73307271 73307297\n",
-      "{'Q.START': 1080, 'Q.END': 1081, 'T.START': 73307269, 'T.END': 73307270, 'CG': '1='}\n",
-      "7046577 D134#1#chr03 1107 1108\n",
-      "\tNot in path\n",
-      "7046579 D134#1#chr03 1108 1183\n",
-      "\tIn path\n",
-      "\t 73307300 73307375\n",
-      "{'Q.START': 1081, 'Q.END': 1107, 'T.START': 73307271, 'T.END': 73307297, 'CG': '26='}\n",
-      "7046581 D134#1#chr03 1183 1186\n",
-      "\tIn path\n",
-      "\t 73307376 73307379\n",
-      "{'Q.START': 1108, 'Q.END': 1183, 'T.START': 73307300, 'T.END': 73307375, 'CG': '75='}\n",
-      "7046583 D134#1#chr03 1186 1224\n",
-      "\tNot in path\n",
-      "7046584 D134#1#chr03 1224 1257\n",
-      "\tIn path\n",
-      "\t 73307419 73307452\n",
-      "{'Q.START': 1183, 'Q.END': 1186, 'T.START': 73307376, 'T.END': 73307379, 'CG': '3='}\n",
-      "7046586 D134#1#chr03 1257 1289\n",
-      "\tNot in path\n",
-      "7046587 D134#1#chr03 1289 1311\n",
-      "\tIn path\n",
-      "\t 73307475 73307497\n",
-      "{'Q.START': 1224, 'Q.END': 1257, 'T.START': 73307419, 'T.END': 73307452, 'CG': '33='}\n",
-      "7046589 D134#1#chr03 1311 1359\n",
-      "\tNot in path\n",
-      "7046590 D134#1#chr03 1359 1382\n",
-      "\tIn path\n",
-      "\t 73307546 73307569\n",
-      "{'Q.START': 1289, 'Q.END': 1311, 'T.START': 73307475, 'T.END': 73307497, 'CG': '22='}\n",
-      "7046592 D134#1#chr03 1382 1434\n",
-      "\tNot in path\n",
-      "7046593 D134#1#chr03 1434 1451\n",
-      "\tIn path\n",
-      "\t 73307643 73307660\n",
-      "{'Q.START': 1359, 'Q.END': 1382, 'T.START': 73307546, 'T.END': 73307569, 'CG': '23='}\n",
-      "7046594 D134#1#chr03 1451 1531\n",
-      "\tIn path\n",
-      "\t 73307661 73307741\n",
-      "{'Q.START': 1434, 'Q.END': 1451, 'T.START': 73307643, 'T.END': 73307660, 'CG': '17='}\n",
-      "7046596 D134#1#chr03 1531 1532\n",
-      "\tNot in path\n",
-      "7046597 D134#1#chr03 1532 1543\n",
-      "\tIn path\n",
-      "\t 73307744 73307755\n",
-      "{'Q.START': 1451, 'Q.END': 1531, 'T.START': 73307661, 'T.END': 73307741, 'CG': '80='}\n",
-      "7046599 D134#1#chr03 1543 1544\n",
-      "\tNot in path\n",
-      "7046600 D134#1#chr03 1544 1572\n",
-      "\tIn path\n",
-      "\t 73307758 73307786\n",
-      "{'Q.START': 1532, 'Q.END': 1543, 'T.START': 73307744, 'T.END': 73307755, 'CG': '11='}\n",
-      "7046601 D134#1#chr03 1572 1573\n",
-      "\tIn path\n",
-      "\t 73307787 73307788\n",
-      "{'Q.START': 1544, 'Q.END': 1572, 'T.START': 73307758, 'T.END': 73307786, 'CG': '28='}\n",
-      "7046603 D134#1#chr03 1573 1587\n",
-      "\tIn path\n",
-      "\t 73307789 73307803\n",
-      "{'Q.START': 1572, 'Q.END': 1573, 'T.START': 73307787, 'T.END': 73307788, 'CG': '1='}\n",
-      "7046604 D134#1#chr03 1587 1588\n",
-      "\tNot in path\n",
-      "7046606 D134#1#chr03 1588 1616\n",
-      "\tIn path\n",
-      "\t 73307806 73307834\n",
-      "{'Q.START': 1573, 'Q.END': 1587, 'T.START': 73307789, 'T.END': 73307803, 'CG': '14='}\n",
-      "7046608 D134#1#chr03 1616 1617\n",
-      "\tIn path\n",
-      "\t 73307835 73307836\n",
-      "{'Q.START': 1588, 'Q.END': 1616, 'T.START': 73307806, 'T.END': 73307834, 'CG': '28='}\n",
-      "7046609 D134#1#chr03 1617 1646\n",
-      "\tIn path\n",
-      "\t 73307837 73307866\n",
-      "{'Q.START': 1616, 'Q.END': 1617, 'T.START': 73307835, 'T.END': 73307836, 'CG': '1='}\n",
-      "7046621 D134#1#chr03 1646 1661\n",
-      "\tIn path\n",
-      "\t 73307867 73307882\n",
-      "{'Q.START': 1617, 'Q.END': 1646, 'T.START': 73307837, 'T.END': 73307866, 'CG': '29='}\n",
-      "7046622 D134#1#chr03 1661 1673\n",
-      "\tIn path\n",
-      "\t 73307883 73307895\n",
-      "{'Q.START': 1646, 'Q.END': 1661, 'T.START': 73307867, 'T.END': 73307882, 'CG': '15='}\n",
-      "7046624 D134#1#chr03 1673 1674\n",
-      "\tIn path\n",
-      "\t 73307896 73307897\n",
-      "{'Q.START': 1661, 'Q.END': 1673, 'T.START': 73307883, 'T.END': 73307895, 'CG': '12='}\n",
-      "7046625 D134#1#chr03 1674 1726\n",
-      "\tIn path\n",
-      "\t 73307898 73307950\n",
-      "{'Q.START': 1673, 'Q.END': 1674, 'T.START': 73307896, 'T.END': 73307897, 'CG': '1='}\n",
-      "7046626 D134#1#chr03 1726 1727\n",
-      "\tNot in path\n",
-      "7046628 D134#1#chr03 1727 1762\n",
-      "\tIn path\n",
-      "\t 73307953 73307988\n",
-      "{'Q.START': 1674, 'Q.END': 1726, 'T.START': 73307898, 'T.END': 73307950, 'CG': '52='}\n",
-      "7046631 D134#1#chr03 1766 1767\n",
-      "\tIn path\n",
-      "\t 73307991 73307992\n",
-      "{'Q.START': 1727, 'Q.END': 1762, 'T.START': 73307953, 'T.END': 73307988, 'CG': '35='}\n",
-      "7046673 D134#1#chr03 1765 1766\n",
-      "\tIn path\n",
-      "\t 73307993 73307994\n",
-      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
-      "7046631 D134#1#chr03 1766 1767\n",
-      "\tIn path\n",
-      "\t 73307991 73307992\n",
-      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}\n",
-      "7046673 D134#1#chr03 1765 1766\n",
-      "\tIn path\n",
-      "\t 73307993 73307994\n",
-      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
-      "7046631 D134#1#chr03 1766 1767\n",
-      "\tIn path\n",
-      "\t 73307991 73307992\n",
-      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}\n",
-      "7046632 D134#1#chr03 1767 1824\n",
-      "\tIn path\n",
-      "\t 73307995 73308052\n",
-      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}\n",
-      "7046634 D134#1#chr03 1824 1825\n",
-      "\tIn path\n",
-      "\t 73308053 73308054\n",
-      "{'Q.START': 1767, 'Q.END': 1824, 'T.START': 73307995, 'T.END': 73308052, 'CG': '57='}\n",
-      "7046635 D134#1#chr03 1825 1975\n",
-      "\tIn path\n",
-      "\t 73308055 73308205\n",
-      "{'Q.START': 1824, 'Q.END': 1825, 'T.START': 73308053, 'T.END': 73308054, 'CG': '1='}\n",
-      "7046637 D134#1#chr03 1975 1976\n",
-      "\tNot in path\n",
-      "7046638 D134#1#chr03 1976 2015\n",
-      "\tIn path\n",
-      "\t 73308208 73308247\n",
-      "{'Q.START': 1825, 'Q.END': 1975, 'T.START': 73308055, 'T.END': 73308205, 'CG': '150='}\n",
-      "7046639 D134#1#chr03 2015 2016\n",
-      "\tNot in path\n",
-      "7046641 D134#1#chr03 2016 2047\n",
-      "\tIn path\n",
-      "\t 73308250 73308281\n",
-      "{'Q.START': 1976, 'Q.END': 2015, 'T.START': 73308208, 'T.END': 73308247, 'CG': '39='}\n",
-      "7046644 D134#1#chr03 2047 2055\n",
-      "\tIn path\n",
-      "\t 73308286 73308294\n",
-      "{'Q.START': 2016, 'Q.END': 2047, 'T.START': 73308250, 'T.END': 73308281, 'CG': '31='}\n",
-      "7046646 D134#1#chr03 2055 2056\n",
-      "\tNot in path\n",
-      "7046647 D134#1#chr03 2056 2120\n",
-      "\tIn path\n",
-      "\t 73308297 73308361\n",
-      "{'Q.START': 2047, 'Q.END': 2055, 'T.START': 73308286, 'T.END': 73308294, 'CG': '8='}\n",
-      "7046649 D134#1#chr03 2120 2121\n",
-      "\tIn path\n",
-      "\t 73308362 73308363\n",
-      "{'Q.START': 2056, 'Q.END': 2120, 'T.START': 73308297, 'T.END': 73308361, 'CG': '64='}\n",
-      "7046650 D134#1#chr03 2121 2157\n",
-      "\tIn path\n",
-      "\t 73308364 73308400\n",
-      "{'Q.START': 2120, 'Q.END': 2121, 'T.START': 73308362, 'T.END': 73308363, 'CG': '1='}\n",
-      "7046652 D134#1#chr03 2157 2158\n",
-      "\tNot in path\n",
-      "7046653 D134#1#chr03 2158 2170\n",
-      "\tIn path\n",
-      "\t 73308403 73308415\n",
-      "{'Q.START': 2121, 'Q.END': 2157, 'T.START': 73308364, 'T.END': 73308400, 'CG': '36='}\n",
-      "7046654 D134#1#chr03 2170 2171\n",
-      "\tIn path\n",
-      "\t 73308416 73308417\n",
-      "{'Q.START': 2158, 'Q.END': 2170, 'T.START': 73308403, 'T.END': 73308415, 'CG': '12='}\n",
-      "7046656 D134#1#chr03 2171 2205\n",
-      "\tIn path\n",
-      "\t 73308418 73308452\n",
-      "{'Q.START': 2170, 'Q.END': 2171, 'T.START': 73308416, 'T.END': 73308417, 'CG': '1='}\n",
-      "7046657 D134#1#chr03 2205 2206\n",
-      "\tNot in path\n",
-      "7046659 D134#1#chr03 2206 2344\n",
-      "\tIn path\n",
-      "\t 73308455 73308593\n",
-      "{'Q.START': 2171, 'Q.END': 2205, 'T.START': 73308418, 'T.END': 73308452, 'CG': '34='}\n",
-      "7046660 D134#1#chr03 2344 2345\n",
-      "\tNot in path\n",
-      "7046662 D134#1#chr03 2345 2364\n",
-      "\tIn path\n",
-      "\t 73308596 73308615\n",
-      "{'Q.START': 2206, 'Q.END': 2344, 'T.START': 73308455, 'T.END': 73308593, 'CG': '138='}\n",
-      "7046663 D134#1#chr03 2364 2383\n",
-      "\tIn path\n",
-      "\t 73308616 73308635\n",
-      "{'Q.START': 2345, 'Q.END': 2364, 'T.START': 73308596, 'T.END': 73308615, 'CG': '19='}\n",
-      "7046665 D134#1#chr03 2383 2408\n",
-      "\tIn path\n",
-      "\t 73308636 73308661\n",
-      "{'Q.START': 2364, 'Q.END': 2383, 'T.START': 73308616, 'T.END': 73308635, 'CG': '19='}\n",
-      "7046667 D134#1#chr03 2408 2409\n",
-      "\tIn path\n",
-      "\t 73308662 73308663\n",
-      "{'Q.START': 2383, 'Q.END': 2408, 'T.START': 73308636, 'T.END': 73308661, 'CG': '25='}\n",
-      "7046668 D134#1#chr03 2409 2441\n",
-      "\tIn path\n",
-      "\t 73308664 73308696\n",
-      "{'Q.START': 2408, 'Q.END': 2409, 'T.START': 73308662, 'T.END': 73308663, 'CG': '1='}\n",
-      "7046670 D134#1#chr03 2441 2442\n",
-      "\tIn path\n",
-      "\t 73308697 73308698\n",
-      "{'Q.START': 2409, 'Q.END': 2441, 'T.START': 73308664, 'T.END': 73308696, 'CG': '32='}\n",
-      "7046671 D134#1#chr03 2442 2580\n",
-      "\tIn path\n",
-      "\t 73308699 73308837\n",
-      "{'Q.START': 2441, 'Q.END': 2442, 'T.START': 73308697, 'T.END': 73308698, 'CG': '1='}\n",
-      "7046674 D134#1#chr03 2582 2583\n",
-      "\tIn path\n",
-      "\t 73308838 73308839\n",
-      "{'Q.START': 2442, 'Q.END': 2580, 'T.START': 73308699, 'T.END': 73308837, 'CG': '138='}\n",
-      "7046675 D134#1#chr03 2583 2584\n",
-      "\tIn path\n",
-      "\t 73308840 73308841\n",
-      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}\n",
-      "7046674 D134#1#chr03 2582 2583\n",
-      "\tIn path\n",
-      "\t 73308838 73308839\n",
-      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}\n",
-      "7046675 D134#1#chr03 2583 2584\n",
-      "\tIn path\n",
-      "\t 73308840 73308841\n",
-      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}\n",
-      "7046676 D134#1#chr03 2584 2764\n",
-      "\tIn path\n",
-      "\t 73308842 73309022\n",
-      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}\n",
-      "7046678 D134#1#chr03 2764 2765\n",
-      "\tNot in path\n",
-      "7046679 D134#1#chr03 2765 2797\n",
-      "\tIn path\n",
-      "\t 73309025 73309057\n",
-      "{'Q.START': 2584, 'Q.END': 2764, 'T.START': 73308842, 'T.END': 73309022, 'CG': '180='}\n",
-      "7046680 D134#1#chr03 2797 2798\n",
-      "\tNot in path\n",
-      "7046682 D134#1#chr03 2798 2878\n",
-      "\tIn path\n",
-      "\t 73309060 73309140\n",
-      "{'Q.START': 2765, 'Q.END': 2797, 'T.START': 73309025, 'T.END': 73309057, 'CG': '32='}\n",
-      "7046684 D134#1#chr03 2878 2879\n",
-      "\tIn path\n",
-      "\t 73309141 73309142\n",
-      "{'Q.START': 2798, 'Q.END': 2878, 'T.START': 73309060, 'T.END': 73309140, 'CG': '80='}\n",
-      "7046685 D134#1#chr03 2879 2951\n",
-      "\tIn path\n",
-      "\t 73309143 73309215\n",
-      "{'Q.START': 2878, 'Q.END': 2879, 'T.START': 73309141, 'T.END': 73309142, 'CG': '1='}\n",
-      "7046686 D134#1#chr03 2951 2952\n",
-      "\tIn path\n",
-      "\t 73309216 73309217\n",
-      "{'Q.START': 2879, 'Q.END': 2951, 'T.START': 73309143, 'T.END': 73309215, 'CG': '72='}\n",
-      "7046688 D134#1#chr03 2952 3002\n",
-      "\tIn path\n",
-      "\t 73309218 73309268\n",
-      "{'Q.START': 2951, 'Q.END': 2952, 'T.START': 73309216, 'T.END': 73309217, 'CG': '1='}\n",
-      "7046690 D134#1#chr03 3002 3077\n",
-      "\tIn path\n",
-      "\t 73309271 73309346\n",
-      "{'Q.START': 2952, 'Q.END': 3002, 'T.START': 73309218, 'T.END': 73309268, 'CG': '50='}\n",
-      "7046692 D134#1#chr03 3077 3078\n",
-      "\tIn path\n",
-      "\t 73309347 73309348\n",
-      "{'Q.START': 3002, 'Q.END': 3077, 'T.START': 73309271, 'T.END': 73309346, 'CG': '75='}\n",
-      "7046693 D134#1#chr03 3078 3093\n",
-      "\tIn path\n",
-      "\t 73309349 73309364\n",
-      "{'Q.START': 3077, 'Q.END': 3078, 'T.START': 73309347, 'T.END': 73309348, 'CG': '1='}\n",
-      "7046695 D134#1#chr03 3093 3094\n",
-      "\tNot in path\n",
-      "7046696 D134#1#chr03 3094 3097\n",
-      "\tIn path\n",
-      "\t 73309367 73309370\n",
-      "{'Q.START': 3078, 'Q.END': 3093, 'T.START': 73309349, 'T.END': 73309364, 'CG': '15='}\n",
-      "7046698 D134#1#chr03 3097 3140\n",
-      "\tIn path\n",
-      "\t 73309371 73309414\n",
-      "{'Q.START': 3094, 'Q.END': 3097, 'T.START': 73309367, 'T.END': 73309370, 'CG': '3='}\n",
-      "7046700 D134#1#chr03 3140 3210\n",
-      "\tIn path\n",
-      "\t 73309415 73309485\n",
-      "{'Q.START': 3097, 'Q.END': 3140, 'T.START': 73309371, 'T.END': 73309414, 'CG': '43='}\n",
-      "7046702 D134#1#chr03 3210 3211\n",
-      "\tIn path\n",
-      "\t 73309486 73309487\n",
-      "{'Q.START': 3140, 'Q.END': 3210, 'T.START': 73309415, 'T.END': 73309485, 'CG': '70='}\n",
-      "7046703 D134#1#chr03 3211 3229\n",
-      "\tIn path\n",
-      "\t 73309488 73309506\n",
-      "{'Q.START': 3210, 'Q.END': 3211, 'T.START': 73309486, 'T.END': 73309487, 'CG': '1='}\n",
-      "7046704 D134#1#chr03 3229 3230\n",
-      "\tIn path\n",
-      "\t 73309507 73309508\n",
-      "{'Q.START': 3211, 'Q.END': 3229, 'T.START': 73309488, 'T.END': 73309506, 'CG': '18='}\n",
-      "7046706 D134#1#chr03 3230 3276\n",
-      "\tIn path\n",
-      "\t 73309509 73309555\n",
-      "{'Q.START': 3229, 'Q.END': 3230, 'T.START': 73309507, 'T.END': 73309508, 'CG': '1='}\n",
-      "7046707 D134#1#chr03 3276 3277\n",
-      "\tNot in path\n",
-      "7046709 D134#1#chr03 3277 3315\n",
-      "\tIn path\n",
-      "\t 73309558 73309596\n",
-      "{'Q.START': 3230, 'Q.END': 3276, 'T.START': 73309509, 'T.END': 73309555, 'CG': '46='}\n",
-      "7046710 D134#1#chr03 3315 3316\n",
-      "\tNot in path\n",
-      "7046712 D134#1#chr03 3316 3322\n",
-      "\tIn path\n",
-      "\t 73309599 73309605\n",
-      "{'Q.START': 3277, 'Q.END': 3315, 'T.START': 73309558, 'T.END': 73309596, 'CG': '38='}\n",
-      "7046713 D134#1#chr03 3322 3323\n",
-      "\tNot in path\n",
-      "7046715 D134#1#chr03 3323 3348\n",
-      "\tIn path\n",
-      "\t 73309608 73309633\n",
-      "{'Q.START': 3316, 'Q.END': 3322, 'T.START': 73309599, 'T.END': 73309605, 'CG': '6='}\n",
-      "7046718 D134#1#chr03 3352 3353\n",
-      "\tIn path\n",
-      "\t 73309634 73309635\n",
-      "{'Q.START': 3323, 'Q.END': 3348, 'T.START': 73309608, 'T.END': 73309633, 'CG': '25='}\n",
-      "7046717 D134#1#chr03 3351 3352\n",
-      "\tIn path\n",
-      "\t 73309636 73309637\n",
-      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
-      "7046718 D134#1#chr03 3352 3353\n",
-      "\tIn path\n",
-      "\t 73309634 73309635\n",
-      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}\n",
-      "7046717 D134#1#chr03 3351 3352\n",
-      "\tIn path\n",
-      "\t 73309636 73309637\n",
-      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
-      "7046718 D134#1#chr03 3352 3353\n",
-      "\tIn path\n",
-      "\t 73309634 73309635\n",
-      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}\n",
-      "7046720 D134#1#chr03 3353 3354\n",
-      "\tIn path\n",
-      "\t 73309638 73309639\n",
-      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}\n",
-      "7046722 D134#1#chr03 3354 3356\n",
-      "\tIn path\n",
-      "\t 73309640 73309642\n",
-      "{'Q.START': 3353, 'Q.END': 3354, 'T.START': 73309638, 'T.END': 73309639, 'CG': '1='}\n",
-      "7046724 D134#1#chr03 3356 3357\n",
-      "\tNot in path\n",
-      "7046725 D134#1#chr03 3357 3489\n",
-      "\tIn path\n",
-      "\t 73309645 73309777\n",
-      "{'Q.START': 3354, 'Q.END': 3356, 'T.START': 73309640, 'T.END': 73309642, 'CG': '2='}\n",
-      "7046727 D134#1#chr03 3489 3490\n",
-      "\tNot in path\n",
-      "7046728 D134#1#chr03 3490 3642\n",
-      "\tIn path\n",
-      "\t 73309780 73309932\n",
-      "{'Q.START': 3357, 'Q.END': 3489, 'T.START': 73309645, 'T.END': 73309777, 'CG': '132='}\n",
-      "7046729 D134#1#chr03 3642 3644\n",
-      "\tNot in path\n",
-      "7046730 D134#1#chr03 3644 3685\n",
-      "\tIn path\n",
-      "\t 73309933 73309974\n",
-      "{'Q.START': 3490, 'Q.END': 3642, 'T.START': 73309780, 'T.END': 73309932, 'CG': '152='}\n",
-      "7046731 D134#1#chr03 3685 3687\n",
-      "\tNot in path\n",
-      "7046733 D134#1#chr03 3687 3693\n",
-      "\tIn path\n",
-      "\t 73309977 73309983\n",
-      "{'Q.START': 3644, 'Q.END': 3685, 'T.START': 73309933, 'T.END': 73309974, 'CG': '41='}\n",
-      "7046735 D134#1#chr03 3693 3694\n",
-      "\tNot in path\n",
-      "7046736 D134#1#chr03 3694 3708\n",
-      "\tIn path\n",
-      "\t 73309986 73310000\n",
-      "{'Q.START': 3687, 'Q.END': 3693, 'T.START': 73309977, 'T.END': 73309983, 'CG': '6='}\n",
-      "7046738 D134#1#chr03 3720 3721\n",
-      "\tIn path\n",
-      "\t 73310010 73310011\n",
-      "{'Q.START': 3694, 'Q.END': 3708, 'T.START': 73309986, 'T.END': 73310000, 'CG': '14='}\n",
-      "7046739 D134#1#chr03 3721 3722\n",
-      "\tIn path\n",
-      "\t 73310003 73310004\n",
-      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
-      "7046740 D134#1#chr03 3716 3720\n",
-      "\tIn path\n",
-      "\t 73310005 73310009\n",
-      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
-      "7046738 D134#1#chr03 3720 3721\n",
-      "\tIn path\n",
-      "\t 73310010 73310011\n",
-      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}\n",
-      "7046739 D134#1#chr03 3721 3722\n",
-      "\tIn path\n",
-      "\t 73310003 73310004\n",
-      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
-      "7046740 D134#1#chr03 3716 3720\n",
-      "\tIn path\n",
-      "\t 73310005 73310009\n",
-      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
-      "7046738 D134#1#chr03 3720 3721\n",
-      "\tIn path\n",
-      "\t 73310010 73310011\n",
-      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}\n",
-      "7046739 D134#1#chr03 3721 3722\n",
-      "\tIn path\n",
-      "\t 73310003 73310004\n",
-      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}\n",
-      "7046741 D134#1#chr03 3722 3735\n",
-      "\tIn path\n",
-      "\t 73310012 73310045\n",
-      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}\n",
-      "ALN_1\n",
-      "7046526 TO1000#1#chr03 0 77\n",
-      "\t 64684013 64684090\n",
-      "skipped\n",
-      "\n",
-      "7046528 TO1000#1#chr03 77 82\n",
-      "\t 64684091 64684096\n",
-      "{'Q.START': 0, 'Q.END': 77, 'T.START': 64684013, 'T.END': 64684090, 'CG': '77='}\n",
-      "7046530 TO1000#1#chr03 82 83\n",
-      "\t 64684097 64684098\n",
-      "{'Q.START': 77, 'Q.END': 82, 'T.START': 64684091, 'T.END': 64684096, 'CG': '5='}\n",
-      "7046531 TO1000#1#chr03 83 138\n",
-      "\t 64684099 64684154\n",
-      "{'Q.START': 82, 'Q.END': 83, 'T.START': 64684097, 'T.END': 64684098, 'CG': '1='}\n",
-      "7046532 TO1000#1#chr03 138 139\n",
-      "\t 64684155 64684156\n",
-      "{'Q.START': 83, 'Q.END': 138, 'T.START': 64684099, 'T.END': 64684154, 'CG': '55='}\n",
-      "7046533 TO1000#1#chr03 139 202\n",
-      "\t 64684157 64684220\n",
-      "{'Q.START': 138, 'Q.END': 139, 'T.START': 64684155, 'T.END': 64684156, 'CG': '1='}\n",
-      "7046534 TO1000#1#chr03 202 203\n",
-      "\t 64684221 64684222\n",
-      "{'Q.START': 139, 'Q.END': 202, 'T.START': 64684157, 'T.END': 64684220, 'CG': '63='}\n",
-      "7046536 TO1000#1#chr03 203 379\n",
-      "\t 64684223 64684399\n",
-      "{'Q.START': 202, 'Q.END': 203, 'T.START': 64684221, 'T.END': 64684222, 'CG': '1='}\n",
-      "7046537 TO1000#1#chr03 379 380\n",
-      "\t 64684400 64684401\n",
-      "{'Q.START': 203, 'Q.END': 379, 'T.START': 64684223, 'T.END': 64684399, 'CG': '176='}\n",
-      "7046539 TO1000#1#chr03 380 429\n",
-      "\t 64684402 64684451\n",
-      "{'Q.START': 379, 'Q.END': 380, 'T.START': 64684400, 'T.END': 64684401, 'CG': '1='}\n",
-      "7046541 TO1000#1#chr03 429 430\n",
-      "\t 64684452 64684453\n",
-      "{'Q.START': 380, 'Q.END': 429, 'T.START': 64684402, 'T.END': 64684451, 'CG': '49='}\n",
-      "7046542 TO1000#1#chr03 430 457\n",
-      "\t 64684454 64684481\n",
-      "{'Q.START': 429, 'Q.END': 430, 'T.START': 64684452, 'T.END': 64684453, 'CG': '1='}\n",
-      "7046544 TO1000#1#chr03 457 492\n",
-      "\t 64684482 64684517\n",
-      "{'Q.START': 430, 'Q.END': 457, 'T.START': 64684454, 'T.END': 64684481, 'CG': '27='}\n",
-      "7046546 TO1000#1#chr03 492 494\n",
-      "\t 64684518 64684520\n",
-      "{'Q.START': 457, 'Q.END': 492, 'T.START': 64684482, 'T.END': 64684517, 'CG': '35='}\n",
-      "7046547 TO1000#1#chr03 494 497\n",
-      "\t 64684521 64684524\n",
-      "{'Q.START': 492, 'Q.END': 494, 'T.START': 64684518, 'T.END': 64684520, 'CG': '2='}\n",
-      "7046549 TO1000#1#chr03 497 507\n",
-      "\t 64684525 64684535\n",
-      "{'Q.START': 494, 'Q.END': 497, 'T.START': 64684521, 'T.END': 64684524, 'CG': '3='}\n",
-      "7046551 TO1000#1#chr03 507 508\n",
-      "\t 64684536 64684537\n",
-      "{'Q.START': 497, 'Q.END': 507, 'T.START': 64684525, 'T.END': 64684535, 'CG': '10='}\n",
-      "7046552 TO1000#1#chr03 508 564\n",
-      "\t 64684538 64684594\n",
-      "{'Q.START': 507, 'Q.END': 508, 'T.START': 64684536, 'T.END': 64684537, 'CG': '1='}\n",
-      "7046554 TO1000#1#chr03 564 566\n",
-      "\t 64684595 64684597\n",
-      "{'Q.START': 508, 'Q.END': 564, 'T.START': 64684538, 'T.END': 64684594, 'CG': '56='}\n",
-      "7046556 TO1000#1#chr03 568 569\n",
-      "\t 64684598 64684599\n",
-      "{'Q.START': 564, 'Q.END': 566, 'T.START': 64684595, 'T.END': 64684597, 'CG': '2='}\n",
-      "7046556 TO1000#1#chr03 568 569\n",
-      "\t 64684598 64684599\n",
-      "{'Q.START': 568, 'Q.END': 569, 'T.START': 64684598, 'T.END': 64684599, 'CG': '1='}\n",
-      "7046556 TO1000#1#chr03 568 569\n",
-      "\t 64684598 64684599\n",
-      "{'Q.START': 568, 'Q.END': 569, 'T.START': 64684598, 'T.END': 64684599, 'CG': '1='}\n",
-      "7046557 TO1000#1#chr03 569 824\n",
-      "\t 64684600 64684855\n",
-      "{'Q.START': 568, 'Q.END': 569, 'T.START': 64684598, 'T.END': 64684599, 'CG': '1='}\n",
-      "7046558 TO1000#1#chr03 824 826\n",
-      "\t 64684856 64684858\n",
-      "{'Q.START': 569, 'Q.END': 824, 'T.START': 64684600, 'T.END': 64684855, 'CG': '255='}\n",
-      "7046559 TO1000#1#chr03 826 858\n",
-      "\t 64684859 64684891\n",
-      "{'Q.START': 824, 'Q.END': 826, 'T.START': 64684856, 'T.END': 64684858, 'CG': '2='}\n",
-      "7046560 TO1000#1#chr03 858 859\n",
-      "\t 64684892 64684893\n",
-      "{'Q.START': 826, 'Q.END': 858, 'T.START': 64684859, 'T.END': 64684891, 'CG': '32='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 858, 'Q.END': 859, 'T.START': 64684892, 'T.END': 64684893, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046561 TO1000#1#chr03 868 869\n",
-      "\t 64684894 64684895\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046562 TO1000#1#chr03 869 913\n",
-      "\t 64684896 64684940\n",
-      "{'Q.START': 868, 'Q.END': 869, 'T.START': 64684894, 'T.END': 64684895, 'CG': '1='}\n",
-      "7046564 TO1000#1#chr03 913 919\n",
-      "\t 64684941 64684947\n",
-      "{'Q.START': 869, 'Q.END': 913, 'T.START': 64684896, 'T.END': 64684940, 'CG': '44='}\n",
-      "7046565 TO1000#1#chr03 919 978\n",
-      "\t 64684948 64685007\n",
-      "{'Q.START': 913, 'Q.END': 919, 'T.START': 64684941, 'T.END': 64684947, 'CG': '6='}\n",
-      "7046567 TO1000#1#chr03 978 979\n",
-      "\t 64685008 64685009\n",
-      "{'Q.START': 919, 'Q.END': 978, 'T.START': 64684948, 'T.END': 64685007, 'CG': '59='}\n",
-      "7046568 TO1000#1#chr03 979 1038\n",
-      "\t 64685010 64685069\n",
-      "{'Q.START': 978, 'Q.END': 979, 'T.START': 64685008, 'T.END': 64685009, 'CG': '1='}\n",
-      "7046570 TO1000#1#chr03 1038 1045\n",
-      "\t 64685070 64685077\n",
-      "{'Q.START': 979, 'Q.END': 1038, 'T.START': 64685010, 'T.END': 64685069, 'CG': '59='}\n",
-      "7046571 TO1000#1#chr03 1045 1046\n",
-      "\t 64685078 64685079\n",
-      "{'Q.START': 1038, 'Q.END': 1045, 'T.START': 64685070, 'T.END': 64685077, 'CG': '7='}\n",
-      "7046573 TO1000#1#chr03 1046 1080\n",
-      "\t 64685080 64685114\n",
-      "{'Q.START': 1045, 'Q.END': 1046, 'T.START': 64685078, 'T.END': 64685079, 'CG': '1='}\n",
-      "7046574 TO1000#1#chr03 1080 1081\n",
-      "\t 64685115 64685116\n",
-      "{'Q.START': 1046, 'Q.END': 1080, 'T.START': 64685080, 'T.END': 64685114, 'CG': '34='}\n",
-      "7046576 TO1000#1#chr03 1081 1107\n",
-      "\t 64685117 64685143\n",
-      "{'Q.START': 1080, 'Q.END': 1081, 'T.START': 64685115, 'T.END': 64685116, 'CG': '1='}\n",
-      "7046577 TO1000#1#chr03 1107 1108\n",
-      "\t 64685144 64685145\n",
-      "{'Q.START': 1081, 'Q.END': 1107, 'T.START': 64685117, 'T.END': 64685143, 'CG': '26='}\n",
-      "7046579 TO1000#1#chr03 1108 1183\n",
-      "\t 64685146 64685221\n",
-      "{'Q.START': 1107, 'Q.END': 1108, 'T.START': 64685144, 'T.END': 64685145, 'CG': '1='}\n",
-      "7046581 TO1000#1#chr03 1183 1186\n",
-      "\t 64685222 64685225\n",
-      "{'Q.START': 1108, 'Q.END': 1183, 'T.START': 64685146, 'T.END': 64685221, 'CG': '75='}\n",
-      "7046583 TO1000#1#chr03 1186 1224\n",
-      "\t 64685226 64685264\n",
-      "{'Q.START': 1183, 'Q.END': 1186, 'T.START': 64685222, 'T.END': 64685225, 'CG': '3='}\n",
-      "7046584 TO1000#1#chr03 1224 1257\n",
-      "\t 64685265 64685298\n",
-      "{'Q.START': 1186, 'Q.END': 1224, 'T.START': 64685226, 'T.END': 64685264, 'CG': '38='}\n",
-      "7046586 TO1000#1#chr03 1257 1289\n",
-      "\t 64685299 64685331\n",
-      "{'Q.START': 1224, 'Q.END': 1257, 'T.START': 64685265, 'T.END': 64685298, 'CG': '33='}\n",
-      "7046587 TO1000#1#chr03 1289 1311\n",
-      "\t 64685332 64685354\n",
-      "{'Q.START': 1257, 'Q.END': 1289, 'T.START': 64685299, 'T.END': 64685331, 'CG': '32='}\n",
-      "7046589 TO1000#1#chr03 1311 1359\n",
-      "\t 64685355 64685403\n",
-      "{'Q.START': 1289, 'Q.END': 1311, 'T.START': 64685332, 'T.END': 64685354, 'CG': '22='}\n",
-      "7046590 TO1000#1#chr03 1359 1382\n",
-      "\t 64685404 64685427\n",
-      "{'Q.START': 1311, 'Q.END': 1359, 'T.START': 64685355, 'T.END': 64685403, 'CG': '48='}\n",
-      "7046592 TO1000#1#chr03 1382 1434\n",
-      "\t 64685428 64685480\n",
-      "{'Q.START': 1359, 'Q.END': 1382, 'T.START': 64685404, 'T.END': 64685427, 'CG': '23='}\n",
-      "7046593 TO1000#1#chr03 1434 1451\n",
-      "\t 64685481 64685498\n",
-      "{'Q.START': 1382, 'Q.END': 1434, 'T.START': 64685428, 'T.END': 64685480, 'CG': '52='}\n",
-      "7046594 TO1000#1#chr03 1451 1531\n",
-      "\t 64685499 64685579\n",
-      "{'Q.START': 1434, 'Q.END': 1451, 'T.START': 64685481, 'T.END': 64685498, 'CG': '17='}\n",
-      "7046596 TO1000#1#chr03 1531 1532\n",
-      "\t 64685580 64685581\n",
-      "{'Q.START': 1451, 'Q.END': 1531, 'T.START': 64685499, 'T.END': 64685579, 'CG': '80='}\n",
-      "7046597 TO1000#1#chr03 1532 1543\n",
-      "\t 64685582 64685593\n",
-      "{'Q.START': 1531, 'Q.END': 1532, 'T.START': 64685580, 'T.END': 64685581, 'CG': '1='}\n",
-      "7046599 TO1000#1#chr03 1543 1544\n",
-      "\t 64685594 64685595\n",
-      "{'Q.START': 1532, 'Q.END': 1543, 'T.START': 64685582, 'T.END': 64685593, 'CG': '11='}\n",
-      "7046600 TO1000#1#chr03 1544 1572\n",
-      "\t 64685596 64685624\n",
-      "{'Q.START': 1543, 'Q.END': 1544, 'T.START': 64685594, 'T.END': 64685595, 'CG': '1='}\n",
-      "7046601 TO1000#1#chr03 1572 1573\n",
-      "\t 64685625 64685626\n",
-      "{'Q.START': 1544, 'Q.END': 1572, 'T.START': 64685596, 'T.END': 64685624, 'CG': '28='}\n",
-      "7046603 TO1000#1#chr03 1573 1587\n",
-      "\t 64685627 64685641\n",
-      "{'Q.START': 1572, 'Q.END': 1573, 'T.START': 64685625, 'T.END': 64685626, 'CG': '1='}\n",
-      "7046604 TO1000#1#chr03 1587 1588\n",
-      "\t 64685642 64685643\n",
-      "{'Q.START': 1573, 'Q.END': 1587, 'T.START': 64685627, 'T.END': 64685641, 'CG': '14='}\n",
-      "7046606 TO1000#1#chr03 1588 1616\n",
-      "\t 64685644 64685672\n",
-      "{'Q.START': 1587, 'Q.END': 1588, 'T.START': 64685642, 'T.END': 64685643, 'CG': '1='}\n",
-      "7046608 TO1000#1#chr03 1616 1617\n",
-      "\t 64685673 64685674\n",
-      "{'Q.START': 1588, 'Q.END': 1616, 'T.START': 64685644, 'T.END': 64685672, 'CG': '28='}\n",
-      "7046609 TO1000#1#chr03 1617 1646\n",
-      "\t 64685675 64685704\n",
-      "{'Q.START': 1616, 'Q.END': 1617, 'T.START': 64685673, 'T.END': 64685674, 'CG': '1='}\n",
-      "7046621 TO1000#1#chr03 1646 1661\n",
-      "\t 64685705 64685720\n",
-      "{'Q.START': 1617, 'Q.END': 1646, 'T.START': 64685675, 'T.END': 64685704, 'CG': '29='}\n",
-      "7046622 TO1000#1#chr03 1661 1673\n",
-      "\t 64685721 64685733\n",
-      "{'Q.START': 1646, 'Q.END': 1661, 'T.START': 64685705, 'T.END': 64685720, 'CG': '15='}\n",
-      "7046624 TO1000#1#chr03 1673 1674\n",
-      "\t 64685734 64685735\n",
-      "{'Q.START': 1661, 'Q.END': 1673, 'T.START': 64685721, 'T.END': 64685733, 'CG': '12='}\n",
-      "7046625 TO1000#1#chr03 1674 1726\n",
-      "\t 64685736 64685788\n",
-      "{'Q.START': 1673, 'Q.END': 1674, 'T.START': 64685734, 'T.END': 64685735, 'CG': '1='}\n",
-      "7046626 TO1000#1#chr03 1726 1727\n",
-      "\t 64685789 64685790\n",
-      "{'Q.START': 1674, 'Q.END': 1726, 'T.START': 64685736, 'T.END': 64685788, 'CG': '52='}\n",
-      "7046628 TO1000#1#chr03 1727 1762\n",
-      "\t 64685791 64685826\n",
-      "{'Q.START': 1726, 'Q.END': 1727, 'T.START': 64685789, 'T.END': 64685790, 'CG': '1='}\n",
-      "7046631 TO1000#1#chr03 1766 1767\n",
-      "\t 64685827 64685828\n",
-      "{'Q.START': 1727, 'Q.END': 1762, 'T.START': 64685791, 'T.END': 64685826, 'CG': '35='}\n",
-      "7046673 TO1000#1#chr03 1765 1766\n",
-      "\t 64685829 64685830\n",
-      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 64685827, 'T.END': 64685828, 'CG': '1='}\n",
-      "7046631 TO1000#1#chr03 1766 1767\n",
-      "\t 64685827 64685828\n",
-      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 64685829, 'T.END': 64685830, 'CG': '1='}\n",
-      "7046673 TO1000#1#chr03 1765 1766\n",
-      "\t 64685829 64685830\n",
-      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 64685827, 'T.END': 64685828, 'CG': '1='}\n",
-      "7046631 TO1000#1#chr03 1766 1767\n",
-      "\t 64685827 64685828\n",
-      "{'Q.START': 1765, 'Q.END': 1766, 'T.START': 64685829, 'T.END': 64685830, 'CG': '1='}\n",
-      "7046632 TO1000#1#chr03 1767 1824\n",
-      "\t 64685831 64685888\n",
-      "{'Q.START': 1766, 'Q.END': 1767, 'T.START': 64685827, 'T.END': 64685828, 'CG': '1='}\n",
-      "7046634 TO1000#1#chr03 1824 1825\n",
-      "\t 64685889 64685890\n",
-      "{'Q.START': 1767, 'Q.END': 1824, 'T.START': 64685831, 'T.END': 64685888, 'CG': '57='}\n",
-      "7046635 TO1000#1#chr03 1825 1975\n",
-      "\t 64685891 64686041\n",
-      "{'Q.START': 1824, 'Q.END': 1825, 'T.START': 64685889, 'T.END': 64685890, 'CG': '1='}\n",
-      "7046637 TO1000#1#chr03 1975 1976\n",
-      "\t 64686042 64686043\n",
-      "{'Q.START': 1825, 'Q.END': 1975, 'T.START': 64685891, 'T.END': 64686041, 'CG': '150='}\n",
-      "7046638 TO1000#1#chr03 1976 2015\n",
-      "\t 64686044 64686083\n",
-      "{'Q.START': 1975, 'Q.END': 1976, 'T.START': 64686042, 'T.END': 64686043, 'CG': '1='}\n",
-      "7046639 TO1000#1#chr03 2015 2016\n",
-      "\t 64686084 64686085\n",
-      "{'Q.START': 1976, 'Q.END': 2015, 'T.START': 64686044, 'T.END': 64686083, 'CG': '39='}\n",
-      "7046641 TO1000#1#chr03 2016 2047\n",
-      "\t 64686086 64686117\n",
-      "{'Q.START': 2015, 'Q.END': 2016, 'T.START': 64686084, 'T.END': 64686085, 'CG': '1='}\n",
-      "7046644 TO1000#1#chr03 2047 2055\n",
-      "\t 64686118 64686126\n",
-      "{'Q.START': 2016, 'Q.END': 2047, 'T.START': 64686086, 'T.END': 64686117, 'CG': '31='}\n",
-      "7046646 TO1000#1#chr03 2055 2056\n",
-      "\t 64686127 64686128\n",
-      "{'Q.START': 2047, 'Q.END': 2055, 'T.START': 64686118, 'T.END': 64686126, 'CG': '8='}\n",
-      "7046647 TO1000#1#chr03 2056 2120\n",
-      "\t 64686129 64686193\n",
-      "{'Q.START': 2055, 'Q.END': 2056, 'T.START': 64686127, 'T.END': 64686128, 'CG': '1='}\n",
-      "7046649 TO1000#1#chr03 2120 2121\n",
-      "\t 64686194 64686195\n",
-      "{'Q.START': 2056, 'Q.END': 2120, 'T.START': 64686129, 'T.END': 64686193, 'CG': '64='}\n",
-      "7046650 TO1000#1#chr03 2121 2157\n",
-      "\t 64686196 64686232\n",
-      "{'Q.START': 2120, 'Q.END': 2121, 'T.START': 64686194, 'T.END': 64686195, 'CG': '1='}\n",
-      "7046652 TO1000#1#chr03 2157 2158\n",
-      "\t 64686233 64686234\n",
-      "{'Q.START': 2121, 'Q.END': 2157, 'T.START': 64686196, 'T.END': 64686232, 'CG': '36='}\n",
-      "7046653 TO1000#1#chr03 2158 2170\n",
-      "\t 64686235 64686247\n",
-      "{'Q.START': 2157, 'Q.END': 2158, 'T.START': 64686233, 'T.END': 64686234, 'CG': '1='}\n",
-      "7046654 TO1000#1#chr03 2170 2171\n",
-      "\t 64686248 64686249\n",
-      "{'Q.START': 2158, 'Q.END': 2170, 'T.START': 64686235, 'T.END': 64686247, 'CG': '12='}\n",
-      "7046656 TO1000#1#chr03 2171 2205\n",
-      "\t 64686250 64686284\n",
-      "{'Q.START': 2170, 'Q.END': 2171, 'T.START': 64686248, 'T.END': 64686249, 'CG': '1='}\n",
-      "7046657 TO1000#1#chr03 2205 2206\n",
-      "\t 64686285 64686286\n",
-      "{'Q.START': 2171, 'Q.END': 2205, 'T.START': 64686250, 'T.END': 64686284, 'CG': '34='}\n",
-      "7046659 TO1000#1#chr03 2206 2344\n",
-      "\t 64686287 64686425\n",
-      "{'Q.START': 2205, 'Q.END': 2206, 'T.START': 64686285, 'T.END': 64686286, 'CG': '1='}\n",
-      "7046660 TO1000#1#chr03 2344 2345\n",
-      "\t 64686426 64686427\n",
-      "{'Q.START': 2206, 'Q.END': 2344, 'T.START': 64686287, 'T.END': 64686425, 'CG': '138='}\n",
-      "7046662 TO1000#1#chr03 2345 2364\n",
-      "\t 64686428 64686447\n",
-      "{'Q.START': 2344, 'Q.END': 2345, 'T.START': 64686426, 'T.END': 64686427, 'CG': '1='}\n",
-      "7046663 TO1000#1#chr03 2364 2383\n",
-      "\t 64686448 64686467\n",
-      "{'Q.START': 2345, 'Q.END': 2364, 'T.START': 64686428, 'T.END': 64686447, 'CG': '19='}\n",
-      "7046665 TO1000#1#chr03 2383 2408\n",
-      "\t 64686468 64686493\n",
-      "{'Q.START': 2364, 'Q.END': 2383, 'T.START': 64686448, 'T.END': 64686467, 'CG': '19='}\n",
-      "7046667 TO1000#1#chr03 2408 2409\n",
-      "\t 64686494 64686495\n",
-      "{'Q.START': 2383, 'Q.END': 2408, 'T.START': 64686468, 'T.END': 64686493, 'CG': '25='}\n",
-      "7046668 TO1000#1#chr03 2409 2441\n",
-      "\t 64686496 64686528\n",
-      "{'Q.START': 2408, 'Q.END': 2409, 'T.START': 64686494, 'T.END': 64686495, 'CG': '1='}\n",
-      "7046670 TO1000#1#chr03 2441 2442\n",
-      "\t 64686529 64686530\n",
-      "{'Q.START': 2409, 'Q.END': 2441, 'T.START': 64686496, 'T.END': 64686528, 'CG': '32='}\n",
-      "7046671 TO1000#1#chr03 2442 2580\n",
-      "\t 64686531 64686669\n",
-      "{'Q.START': 2441, 'Q.END': 2442, 'T.START': 64686529, 'T.END': 64686530, 'CG': '1='}\n",
-      "7046674 TO1000#1#chr03 2582 2583\n",
-      "\t 64686670 64686671\n",
-      "{'Q.START': 2442, 'Q.END': 2580, 'T.START': 64686531, 'T.END': 64686669, 'CG': '138='}\n",
-      "7046675 TO1000#1#chr03 2583 2584\n",
-      "\t 64686672 64686673\n",
-      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 64686670, 'T.END': 64686671, 'CG': '1='}\n",
-      "7046674 TO1000#1#chr03 2582 2583\n",
-      "\t 64686670 64686671\n",
-      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 64686672, 'T.END': 64686673, 'CG': '1='}\n",
-      "7046675 TO1000#1#chr03 2583 2584\n",
-      "\t 64686672 64686673\n",
-      "{'Q.START': 2582, 'Q.END': 2583, 'T.START': 64686670, 'T.END': 64686671, 'CG': '1='}\n",
-      "7046676 TO1000#1#chr03 2584 2764\n",
-      "\t 64686674 64686854\n",
-      "{'Q.START': 2583, 'Q.END': 2584, 'T.START': 64686672, 'T.END': 64686673, 'CG': '1='}\n",
-      "7046678 TO1000#1#chr03 2764 2765\n",
-      "\t 64686855 64686856\n",
-      "{'Q.START': 2584, 'Q.END': 2764, 'T.START': 64686674, 'T.END': 64686854, 'CG': '180='}\n",
-      "7046679 TO1000#1#chr03 2765 2797\n",
-      "\t 64686857 64686889\n",
-      "{'Q.START': 2764, 'Q.END': 2765, 'T.START': 64686855, 'T.END': 64686856, 'CG': '1='}\n",
-      "7046680 TO1000#1#chr03 2797 2798\n",
-      "\t 64686890 64686891\n",
-      "{'Q.START': 2765, 'Q.END': 2797, 'T.START': 64686857, 'T.END': 64686889, 'CG': '32='}\n",
-      "7046682 TO1000#1#chr03 2798 2878\n",
-      "\t 64686892 64686972\n",
-      "{'Q.START': 2797, 'Q.END': 2798, 'T.START': 64686890, 'T.END': 64686891, 'CG': '1='}\n",
-      "7046684 TO1000#1#chr03 2878 2879\n",
-      "\t 64686973 64686974\n",
-      "{'Q.START': 2798, 'Q.END': 2878, 'T.START': 64686892, 'T.END': 64686972, 'CG': '80='}\n",
-      "7046685 TO1000#1#chr03 2879 2951\n",
-      "\t 64686975 64687047\n",
-      "{'Q.START': 2878, 'Q.END': 2879, 'T.START': 64686973, 'T.END': 64686974, 'CG': '1='}\n",
-      "7046686 TO1000#1#chr03 2951 2952\n",
-      "\t 64687048 64687049\n",
-      "{'Q.START': 2879, 'Q.END': 2951, 'T.START': 64686975, 'T.END': 64687047, 'CG': '72='}\n",
-      "7046688 TO1000#1#chr03 2952 3002\n",
-      "\t 64687050 64687100\n",
-      "{'Q.START': 2951, 'Q.END': 2952, 'T.START': 64687048, 'T.END': 64687049, 'CG': '1='}\n",
-      "7046690 TO1000#1#chr03 3002 3077\n",
-      "\t 64687101 64687176\n",
-      "{'Q.START': 2952, 'Q.END': 3002, 'T.START': 64687050, 'T.END': 64687100, 'CG': '50='}\n",
-      "7046692 TO1000#1#chr03 3077 3078\n",
-      "\t 64687177 64687178\n",
-      "{'Q.START': 3002, 'Q.END': 3077, 'T.START': 64687101, 'T.END': 64687176, 'CG': '75='}\n",
-      "7046693 TO1000#1#chr03 3078 3093\n",
-      "\t 64687179 64687194\n",
-      "{'Q.START': 3077, 'Q.END': 3078, 'T.START': 64687177, 'T.END': 64687178, 'CG': '1='}\n",
-      "7046695 TO1000#1#chr03 3093 3094\n",
-      "\t 64687195 64687196\n",
-      "{'Q.START': 3078, 'Q.END': 3093, 'T.START': 64687179, 'T.END': 64687194, 'CG': '15='}\n",
-      "7046696 TO1000#1#chr03 3094 3097\n",
-      "\t 64687197 64687200\n",
-      "{'Q.START': 3093, 'Q.END': 3094, 'T.START': 64687195, 'T.END': 64687196, 'CG': '1='}\n",
-      "7046698 TO1000#1#chr03 3097 3140\n",
-      "\t 64687201 64687244\n",
-      "{'Q.START': 3094, 'Q.END': 3097, 'T.START': 64687197, 'T.END': 64687200, 'CG': '3='}\n",
-      "7046700 TO1000#1#chr03 3140 3210\n",
-      "\t 64687245 64687315\n",
-      "{'Q.START': 3097, 'Q.END': 3140, 'T.START': 64687201, 'T.END': 64687244, 'CG': '43='}\n",
-      "7046702 TO1000#1#chr03 3210 3211\n",
-      "\t 64687316 64687317\n",
-      "{'Q.START': 3140, 'Q.END': 3210, 'T.START': 64687245, 'T.END': 64687315, 'CG': '70='}\n",
-      "7046703 TO1000#1#chr03 3211 3229\n",
-      "\t 64687318 64687336\n",
-      "{'Q.START': 3210, 'Q.END': 3211, 'T.START': 64687316, 'T.END': 64687317, 'CG': '1='}\n",
-      "7046704 TO1000#1#chr03 3229 3230\n",
-      "\t 64687337 64687338\n",
-      "{'Q.START': 3211, 'Q.END': 3229, 'T.START': 64687318, 'T.END': 64687336, 'CG': '18='}\n",
-      "7046706 TO1000#1#chr03 3230 3276\n",
-      "\t 64687339 64687385\n",
-      "{'Q.START': 3229, 'Q.END': 3230, 'T.START': 64687337, 'T.END': 64687338, 'CG': '1='}\n",
-      "7046707 TO1000#1#chr03 3276 3277\n",
-      "\t 64687386 64687387\n",
-      "{'Q.START': 3230, 'Q.END': 3276, 'T.START': 64687339, 'T.END': 64687385, 'CG': '46='}\n",
-      "7046709 TO1000#1#chr03 3277 3315\n",
-      "\t 64687388 64687426\n",
-      "{'Q.START': 3276, 'Q.END': 3277, 'T.START': 64687386, 'T.END': 64687387, 'CG': '1='}\n",
-      "7046710 TO1000#1#chr03 3315 3316\n",
-      "\t 64687427 64687428\n",
-      "{'Q.START': 3277, 'Q.END': 3315, 'T.START': 64687388, 'T.END': 64687426, 'CG': '38='}\n",
-      "7046712 TO1000#1#chr03 3316 3322\n",
-      "\t 64687429 64687435\n",
-      "{'Q.START': 3315, 'Q.END': 3316, 'T.START': 64687427, 'T.END': 64687428, 'CG': '1='}\n",
-      "7046713 TO1000#1#chr03 3322 3323\n",
-      "\t 64687436 64687437\n",
-      "{'Q.START': 3316, 'Q.END': 3322, 'T.START': 64687429, 'T.END': 64687435, 'CG': '6='}\n",
-      "7046715 TO1000#1#chr03 3323 3348\n",
-      "\t 64687438 64687463\n",
-      "{'Q.START': 3322, 'Q.END': 3323, 'T.START': 64687436, 'T.END': 64687437, 'CG': '1='}\n",
-      "7046718 TO1000#1#chr03 3352 3353\n",
-      "\t 64687464 64687465\n",
-      "{'Q.START': 3323, 'Q.END': 3348, 'T.START': 64687438, 'T.END': 64687463, 'CG': '25='}\n",
-      "7046717 TO1000#1#chr03 3351 3352\n",
-      "\t 64687466 64687467\n",
-      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 64687464, 'T.END': 64687465, 'CG': '1='}\n",
-      "7046718 TO1000#1#chr03 3352 3353\n",
-      "\t 64687464 64687465\n",
-      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 64687466, 'T.END': 64687467, 'CG': '1='}\n",
-      "7046717 TO1000#1#chr03 3351 3352\n",
-      "\t 64687466 64687467\n",
-      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 64687464, 'T.END': 64687465, 'CG': '1='}\n",
-      "7046718 TO1000#1#chr03 3352 3353\n",
-      "\t 64687464 64687465\n",
-      "{'Q.START': 3351, 'Q.END': 3352, 'T.START': 64687466, 'T.END': 64687467, 'CG': '1='}\n",
-      "7046720 TO1000#1#chr03 3353 3354\n",
-      "\t 64687468 64687469\n",
-      "{'Q.START': 3352, 'Q.END': 3353, 'T.START': 64687464, 'T.END': 64687465, 'CG': '1='}\n",
-      "7046722 TO1000#1#chr03 3354 3356\n",
-      "\t 64687470 64687472\n",
-      "{'Q.START': 3353, 'Q.END': 3354, 'T.START': 64687468, 'T.END': 64687469, 'CG': '1='}\n",
-      "7046724 TO1000#1#chr03 3356 3357\n",
-      "\t 64687473 64687474\n",
-      "{'Q.START': 3354, 'Q.END': 3356, 'T.START': 64687470, 'T.END': 64687472, 'CG': '2='}\n",
-      "7046725 TO1000#1#chr03 3357 3489\n",
-      "\t 64687475 64687607\n",
-      "{'Q.START': 3356, 'Q.END': 3357, 'T.START': 64687473, 'T.END': 64687474, 'CG': '1='}\n",
-      "7046727 TO1000#1#chr03 3489 3490\n",
-      "\t 64687608 64687609\n",
-      "{'Q.START': 3357, 'Q.END': 3489, 'T.START': 64687475, 'T.END': 64687607, 'CG': '132='}\n",
-      "7046728 TO1000#1#chr03 3490 3642\n",
-      "\t 64687610 64687762\n",
-      "{'Q.START': 3489, 'Q.END': 3490, 'T.START': 64687608, 'T.END': 64687609, 'CG': '1='}\n",
-      "7046729 TO1000#1#chr03 3642 3644\n",
-      "\t 64687763 64687765\n",
-      "{'Q.START': 3490, 'Q.END': 3642, 'T.START': 64687610, 'T.END': 64687762, 'CG': '152='}\n",
-      "7046730 TO1000#1#chr03 3644 3685\n",
-      "\t 64687766 64687807\n",
-      "{'Q.START': 3642, 'Q.END': 3644, 'T.START': 64687763, 'T.END': 64687765, 'CG': '2='}\n",
-      "7046731 TO1000#1#chr03 3685 3687\n",
-      "\t 64687808 64687810\n",
-      "{'Q.START': 3644, 'Q.END': 3685, 'T.START': 64687766, 'T.END': 64687807, 'CG': '41='}\n",
-      "7046733 TO1000#1#chr03 3687 3693\n",
-      "\t 64687811 64687817\n",
-      "{'Q.START': 3685, 'Q.END': 3687, 'T.START': 64687808, 'T.END': 64687810, 'CG': '2='}\n",
-      "7046735 TO1000#1#chr03 3693 3694\n",
-      "\t 64687818 64687819\n",
-      "{'Q.START': 3687, 'Q.END': 3693, 'T.START': 64687811, 'T.END': 64687817, 'CG': '6='}\n",
-      "7046736 TO1000#1#chr03 3694 3708\n",
-      "\t 64687820 64687834\n",
-      "{'Q.START': 3693, 'Q.END': 3694, 'T.START': 64687818, 'T.END': 64687819, 'CG': '1='}\n",
-      "7046738 TO1000#1#chr03 3720 3721\n",
-      "\t 64687835 64687836\n",
-      "{'Q.START': 3694, 'Q.END': 3708, 'T.START': 64687820, 'T.END': 64687834, 'CG': '14='}\n",
-      "7046739 TO1000#1#chr03 3721 3722\n",
-      "\t 64687837 64687838\n",
-      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 64687835, 'T.END': 64687836, 'CG': '1='}\n",
-      "7046740 TO1000#1#chr03 3716 3720\n",
-      "\t 64687839 64687843\n",
-      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 64687837, 'T.END': 64687838, 'CG': '1='}\n",
-      "7046738 TO1000#1#chr03 3720 3721\n",
-      "\t 64687835 64687836\n",
-      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 64687839, 'T.END': 64687843, 'CG': '4='}\n",
-      "7046739 TO1000#1#chr03 3721 3722\n",
-      "\t 64687837 64687838\n",
-      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 64687835, 'T.END': 64687836, 'CG': '1='}\n",
-      "7046740 TO1000#1#chr03 3716 3720\n",
-      "\t 64687839 64687843\n",
-      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 64687837, 'T.END': 64687838, 'CG': '1='}\n",
-      "7046738 TO1000#1#chr03 3720 3721\n",
-      "\t 64687835 64687836\n",
-      "{'Q.START': 3716, 'Q.END': 3720, 'T.START': 64687839, 'T.END': 64687843, 'CG': '4='}\n",
-      "7046739 TO1000#1#chr03 3721 3722\n",
-      "\t 64687837 64687838\n",
-      "{'Q.START': 3720, 'Q.END': 3721, 'T.START': 64687835, 'T.END': 64687836, 'CG': '1='}\n",
-      "7046741 TO1000#1#chr03 3722 3735\n",
-      "\t 64687844 64687877\n",
-      "{'Q.START': 3721, 'Q.END': 3722, 'T.START': 64687837, 'T.END': 64687838, 'CG': '1='}\n",
-      "ALN_2\n",
-      "7594382 D134#1#chr03 0 1\n",
-      "\tIn path\n",
-      "\t 70220037 70220038\n",
-      "skipped\n",
-      "\n",
-      "7594369 D134#1#chr03 32 33\n",
-      "\tIn path\n",
-      "\t 70219216 70219217\n",
-      "{'Q.START': 0, 'Q.END': 1, 'T.START': 70220037, 'T.END': 70220038, 'CG': '1='}\n",
-      "7594371 D134#1#chr03 15 16\n",
-      "\tIn path\n",
-      "\t 70221163 70221164\n",
-      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 15, 'Q.END': 16, 'T.START': 70221163, 'T.END': 70221164, 'CG': '1='}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594356 D134#1#chr03 66 67\n",
-      "\tIn path\n",
-      "\t 70219570 70219571\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594375 D134#1#chr03 68 69\n",
-      "\tIn path\n",
-      "\t 70221598 70221599\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594626 D134#1#chr03 10 11\n",
-      "\tIn path\n",
-      "\t 70219214 70219215\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
-      "7594011 D134#1#chr03 11 12\n",
-      "\tIn path\n",
-      "\t 70219995 70219996\n",
-      "{'Q.START': 10, 'Q.END': 11, 'T.START': 70219214, 'T.END': 70219215, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 11, 'Q.END': 12, 'T.START': 70219995, 'T.END': 70219996, 'CG': '1='}\n",
-      "7594375 D134#1#chr03 68 69\n",
-      "\tIn path\n",
-      "\t 70221598 70221599\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594369 D134#1#chr03 32 33\n",
-      "\tIn path\n",
-      "\t 70219216 70219217\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
-      "7594371 D134#1#chr03 15 16\n",
-      "\tIn path\n",
-      "\t 70221163 70221164\n",
-      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 15, 'Q.END': 16, 'T.START': 70221163, 'T.END': 70221164, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594241 D134#1#chr03 20 21\n",
-      "\tIn path\n",
-      "\t 70219220 70219221\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594248 D134#1#chr03 21 22\n",
-      "\tNot in path\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 20, 'Q.END': 21, 'T.START': 70219220, 'T.END': 70219221, 'CG': '1='}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594315 D134#1#chr03 53 54\n",
-      "\tIn path\n",
-      "\t 70219857 70219858\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
-      "7594330 D134#1#chr03 26 27\n",
-      "\tNot in path\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594315 D134#1#chr03 53 54\n",
-      "\tIn path\n",
-      "\t 70219857 70219858\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594369 D134#1#chr03 32 33\n",
-      "\tIn path\n",
-      "\t 70219216 70219217\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 32, 'Q.END': 33, 'T.START': 70219216, 'T.END': 70219217, 'CG': '1='}\n",
-      "7594026 D134#1#chr03 37 38\n",
-      "\tIn path\n",
-      "\t 70220249 70220250\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 37, 'Q.END': 38, 'T.START': 70220249, 'T.END': 70220250, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594026 D134#1#chr03 37 38\n",
-      "\tIn path\n",
-      "\t 70220249 70220250\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 37, 'Q.END': 38, 'T.START': 70220249, 'T.END': 70220250, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594315 D134#1#chr03 53 54\n",
-      "\tIn path\n",
-      "\t 70219857 70219858\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 53, 'Q.END': 54, 'T.START': 70219857, 'T.END': 70219858, 'CG': '1='}\n",
-      "7594311 D134#1#chr03 55 56\n",
-      "\tIn path\n",
-      "\t 70219351 70219352\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 70219351, 'T.END': 70219352, 'CG': '1='}\n",
-      "7594021 D134#1#chr03 57 58\n",
-      "\tIn path\n",
-      "\t 70219218 70219219\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 70219218, 'T.END': 70219219, 'CG': '1X'}\n",
-      "7594286 D134#1#chr03 59 60\n",
-      "\tIn path\n",
-      "\t 70219349 70219350\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 70219349, 'T.END': 70219350, 'CG': '1='}\n",
-      "7594356 D134#1#chr03 66 67\n",
-      "\tIn path\n",
-      "\t 70219570 70219571\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594375 D134#1#chr03 68 69\n",
-      "\tIn path\n",
-      "\t 70221598 70221599\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
-      "7594356 D134#1#chr03 66 67\n",
-      "\tIn path\n",
-      "\t 70219570 70219571\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 66, 'Q.END': 67, 'T.START': 70219570, 'T.END': 70219571, 'CG': '1='}\n",
-      "7594375 D134#1#chr03 68 69\n",
-      "\tIn path\n",
-      "\t 70221598 70221599\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594374 D134#1#chr03 69 70\n",
-      "\tIn path\n",
-      "\t 70219092 70219093\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 70221598, 'T.END': 70221599, 'CG': '1='}\n",
-      "7594350 D134#1#chr03 70 71\n",
-      "\tIn path\n",
-      "\t 70219226 70219227\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 70219092, 'T.END': 70219093, 'CG': '1='}\n",
-      "7594264 D134#1#chr03 71 72\n",
-      "\tIn path\n",
-      "\t 70219228 70219229\n",
-      "{'Q.START': 70, 'Q.END': 71, 'T.START': 70219226, 'T.END': 70219227, 'CG': '1='}\n",
-      "7594207 D134#1#chr03 72 73\n",
-      "\tIn path\n",
-      "\t 70219230 70219231\n",
-      "{'Q.START': 71, 'Q.END': 72, 'T.START': 70219228, 'T.END': 70219229, 'CG': '1='}\n",
-      "7594225 D134#1#chr03 73 74\n",
-      "\tIn path\n",
-      "\t 70219232 70219233\n",
-      "{'Q.START': 72, 'Q.END': 73, 'T.START': 70219230, 'T.END': 70219231, 'CG': '1='}\n",
-      "7594227 D134#1#chr03 74 75\n",
-      "\tIn path\n",
-      "\t 70220150 70220151\n",
-      "{'Q.START': 73, 'Q.END': 74, 'T.START': 70219232, 'T.END': 70219233, 'CG': '1='}\n",
-      "7594120 D134#1#chr03 75 76\n",
-      "\tIn path\n",
-      "\t 70219236 70219237\n",
-      "{'Q.START': 74, 'Q.END': 75, 'T.START': 70220150, 'T.END': 70220151, 'CG': '1='}\n",
-      "7594132 D134#1#chr03 76 77\n",
-      "\tIn path\n",
-      "\t 70219777 70219778\n",
-      "{'Q.START': 75, 'Q.END': 76, 'T.START': 70219236, 'T.END': 70219237, 'CG': '1='}\n",
-      "7594165 D134#1#chr03 77 78\n",
-      "\tIn path\n",
-      "\t 70219240 70219241\n",
-      "{'Q.START': 76, 'Q.END': 77, 'T.START': 70219777, 'T.END': 70219778, 'CG': '1='}\n",
-      "7594172 D134#1#chr03 78 3735\n",
-      "\tNot in path\n",
-      "ALN_2\n",
-      "7594382 TO1000#1#chr03 0 1\n",
-      "\t 61731222 61731223\n",
-      "skipped\n",
-      "\n",
-      "7594369 TO1000#1#chr03 32 33\n",
-      "\t 61731060 61731061\n",
-      "{'Q.START': 0, 'Q.END': 1, 'T.START': 61731222, 'T.END': 61731223, 'CG': '1='}\n",
-      "7594371 TO1000#1#chr03 15 16\n",
-      "\tNot in path\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 32, 'Q.END': 33, 'T.START': 61731060, 'T.END': 61731061, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594356 TO1000#1#chr03 66 67\n",
-      "\t 61731519 61731520\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 66, 'Q.END': 67, 'T.START': 61731519, 'T.END': 61731520, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594375 TO1000#1#chr03 68 69\n",
-      "\t 61733612 61733613\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594626 TO1000#1#chr03 10 11\n",
-      "\t 61731056 61731057\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
-      "7594011 TO1000#1#chr03 11 12\n",
-      "\t 61733900 61733901\n",
-      "{'Q.START': 10, 'Q.END': 11, 'T.START': 61731056, 'T.END': 61731057, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 11, 'Q.END': 12, 'T.START': 61733900, 'T.END': 61733901, 'CG': '1='}\n",
-      "7594375 TO1000#1#chr03 68 69\n",
-      "\t 61733612 61733613\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594369 TO1000#1#chr03 32 33\n",
-      "\t 61731060 61731061\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
-      "7594371 TO1000#1#chr03 15 16\n",
-      "\tNot in path\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 32, 'Q.END': 33, 'T.START': 61731060, 'T.END': 61731061, 'CG': '1='}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594241 TO1000#1#chr03 20 21\n",
-      "\t 61731046 61731047\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594248 TO1000#1#chr03 21 22\n",
-      "\t 61734261 61734262\n",
-      "{'Q.START': 20, 'Q.END': 21, 'T.START': 61731046, 'T.END': 61731047, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 21, 'Q.END': 22, 'T.START': 61734261, 'T.END': 61734262, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594315 TO1000#1#chr03 53 54\n",
-      "\t 61733937 61733938\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 53, 'Q.END': 54, 'T.START': 61733937, 'T.END': 61733938, 'CG': '1='}\n",
-      "7594330 TO1000#1#chr03 26 27\n",
-      "\t 61731768 61731769\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 26, 'Q.END': 27, 'T.START': 61731768, 'T.END': 61731769, 'CG': '1='}\n",
-      "7594315 TO1000#1#chr03 53 54\n",
-      "\t 61733937 61733938\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 53, 'Q.END': 54, 'T.START': 61733937, 'T.END': 61733938, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594369 TO1000#1#chr03 32 33\n",
-      "\t 61731060 61731061\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 32, 'Q.END': 33, 'T.START': 61731060, 'T.END': 61731061, 'CG': '1='}\n",
-      "7594026 TO1000#1#chr03 37 38\n",
-      "\t 61734267 61734268\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 37, 'Q.END': 38, 'T.START': 61734267, 'T.END': 61734268, 'CG': '1='}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594026 TO1000#1#chr03 37 38\n",
-      "\t 61734267 61734268\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 37, 'Q.END': 38, 'T.START': 61734267, 'T.END': 61734268, 'CG': '1='}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594315 TO1000#1#chr03 53 54\n",
-      "\t 61733937 61733938\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 53, 'Q.END': 54, 'T.START': 61733937, 'T.END': 61733938, 'CG': '1='}\n",
-      "7594311 TO1000#1#chr03 55 56\n",
-      "\t 61731052 61731053\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 55, 'Q.END': 56, 'T.START': 61731052, 'T.END': 61731053, 'CG': '1='}\n",
-      "7594021 TO1000#1#chr03 57 58\n",
-      "\t 61730922 61730923\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 57, 'Q.END': 58, 'T.START': 61730922, 'T.END': 61730923, 'CG': '1X'}\n",
-      "7594286 TO1000#1#chr03 59 60\n",
-      "\t 61731054 61731055\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 59, 'Q.END': 60, 'T.START': 61731054, 'T.END': 61731055, 'CG': '1='}\n",
-      "7594356 TO1000#1#chr03 66 67\n",
-      "\t 61731519 61731520\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 66, 'Q.END': 67, 'T.START': 61731519, 'T.END': 61731520, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594375 TO1000#1#chr03 68 69\n",
-      "\t 61733612 61733613\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
-      "7594356 TO1000#1#chr03 66 67\n",
-      "\t 61731519 61731520\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 66, 'Q.END': 67, 'T.START': 61731519, 'T.END': 61731520, 'CG': '1='}\n",
-      "7594375 TO1000#1#chr03 68 69\n",
-      "\t 61733612 61733613\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594374 TO1000#1#chr03 69 70\n",
-      "\t 61730920 61730921\n",
-      "{'Q.START': 68, 'Q.END': 69, 'T.START': 61733612, 'T.END': 61733613, 'CG': '1='}\n",
-      "7594350 TO1000#1#chr03 70 71\n",
-      "\t 61731066 61731067\n",
-      "{'Q.START': 69, 'Q.END': 70, 'T.START': 61730920, 'T.END': 61730921, 'CG': '1='}\n",
-      "7594264 TO1000#1#chr03 71 72\n",
-      "\t 61731068 61731069\n",
-      "{'Q.START': 70, 'Q.END': 71, 'T.START': 61731066, 'T.END': 61731067, 'CG': '1='}\n",
-      "7594207 TO1000#1#chr03 72 73\n",
-      "\t 61731070 61731071\n",
-      "{'Q.START': 71, 'Q.END': 72, 'T.START': 61731068, 'T.END': 61731069, 'CG': '1='}\n",
-      "7594225 TO1000#1#chr03 73 74\n",
-      "\t 61731072 61731073\n",
-      "{'Q.START': 72, 'Q.END': 73, 'T.START': 61731070, 'T.END': 61731071, 'CG': '1='}\n",
-      "7594227 TO1000#1#chr03 74 75\n",
-      "\tNot in path\n",
-      "7594120 TO1000#1#chr03 75 76\n",
-      "\t 61731076 61731077\n",
-      "{'Q.START': 73, 'Q.END': 74, 'T.START': 61731072, 'T.END': 61731073, 'CG': '1='}\n",
-      "7594132 TO1000#1#chr03 76 77\n",
-      "\t 61733800 61733801\n",
-      "{'Q.START': 75, 'Q.END': 76, 'T.START': 61731076, 'T.END': 61731077, 'CG': '1='}\n",
-      "7594165 TO1000#1#chr03 77 78\n",
-      "\t 61731080 61731081\n",
-      "{'Q.START': 76, 'Q.END': 77, 'T.START': 61733800, 'T.END': 61733801, 'CG': '1='}\n",
-      "7594172 TO1000#1#chr03 78 3735\n",
-      "\tNot in path\n"
-     ]
-    }
-   ],
-   "source": [
-    "ALNS = {}\n",
-    "## Iterating over alignments\n",
-    "for aln_name in aln_dict.keys():\n",
-    "    \n",
-    "    ## Iterating over paths of the gfa\n",
-    "    for path_name in paths.keys():\n",
-    "        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(aln_name)\n",
-    "        _ = [] # Temporary list holding alignment blocks\n",
-    "\n",
-    "        ## Iterating over alignment nodes of the current alignment\n",
-    "        for node_id, orient in aln_dict[aln_name][\"PATH.MATCH\"]:\n",
-    "\n",
-    "            # Getting node info\n",
-    "            n_info = nodes[node_id]\n",
-    "            q_start = n_info[aln_name][\"START\"] # Start position on the query\n",
-    "            q_end = n_info[aln_name][\"END\"] # End position on the query\n",
-    "            _CG = n_info[aln_name][\"CIGAR\"] # Cigar of the alignment on the current node\n",
-    "\n",
-    "            if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(node_id, path_name, q_start, q_end)\n",
-    "\n",
-    "            ## Checking if path is traversing the current node\n",
-    "            if path_name in list(n_info.keys()):\n",
-    "                if path_name == \"D134#1#chr03\": print(\"\\tIn path\")\n",
-    "\n",
-    "                ## Getting start and end position on the target given the orientation of the node in the alignment and the path\n",
-    "                if n_info[aln_name][\"STRAND\"] == n_info[path_name][\"STRAND\"] :\n",
-    "                    t_start = n_info[path_name][\"START\"]+n_info[aln_name][\"S.OFF\"]\n",
-    "                    t_end = n_info[path_name][\"END\"]+n_info[aln_name][\"E.OFF\"] \n",
-    "                else :\n",
-    "                    t_end = n_info[path_name][\"START\"]+n_info[aln_name][\"S.OFF\"]\n",
-    "                    t_start = n_info[path_name][\"END\"]+n_info[aln_name][\"E.OFF\"]\n",
-    "\n",
-    "                if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"\\t\", t_start, t_end)\n",
-    "\n",
-    "                \"\"\"\n",
-    "                If the latest block t.end and q.end matches with the current node t.start and q.start, \n",
-    "                the node should be added to the block. Else, we terminate the block and add the node to a new block\n",
-    "                \"\"\"\n",
-    "                \n",
-    "                # Non empty temporary list of aln and ending of the last block is the same as the start of the new node : \n",
-    "                if len(_) and _[-1][\"T.END\"] == t_start and _[-1][\"Q.END\"]+1 == q_start: \n",
-    "                    tmp_aln[\"Q.END\"] = q_end\n",
-    "                    tmp_aln[\"T.END\"] = t_end\n",
-    "                    tmp_aln[\"CG\"] += _CG\n",
-    "#                elif len(_) and _[-1][\"T.END\"] == t_start: # Following on the target not on the query (i.e. Insertion)\n",
-    "#                    tmp_aln[\"T.END\"] = t_end\n",
-    "#                    tmp_aln[\"CG\"] += f\"{nodes_length[node_id]}I\"\n",
-    "#                elif len(_) and _[-1][\"Q.END\"]+1 == q_start: # Following on the query, not on the target (i.e. Deletion)\n",
-    "#                    tmp_aln[\"Q.END\"] = q_end\n",
-    "#                    tmp_aln[\"CG\"] += f\"{nodes_length[node_id]}D\"\n",
-    "                else : # Else, completely different\n",
-    "                    try : \n",
-    "                        _.append(tmp_aln)\n",
-    "                        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(tmp_aln)\n",
-    "                    except : \n",
-    "                        if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"skipped\\n\")\n",
-    "                    tmp_aln = {\n",
-    "                        \"Q.START\": q_start,\n",
-    "                        \"Q.END\": q_end,\n",
-    "                        \"T.START\": t_start,\n",
-    "                        \"T.END\": t_end,\n",
-    "                        \"CG\": _CG,\n",
-    "                    }\n",
-    "            \n",
-    "            else : \n",
-    "                if path_name in [\"TO1000#1#chr03\", \"D134#1#chr03\"]: print(\"\\tNot in path\")\n",
-    "                # Node is not in the path\n",
-    "\n",
-    "        del tmp_aln\n",
-    "        \n",
-    "        ALNS[(path_name, aln_name)] = _"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "547f03fa-cbd5-42f9-b668-1ca4404795ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[{'Q.START': 0, 'Q.END': 77, 'T.START': 73306158, 'T.END': 73306235, 'CG': '77='}, {'Q.START': 77, 'Q.END': 82, 'T.START': 73306238, 'T.END': 73306243, 'CG': '5='}, {'Q.START': 83, 'Q.END': 138, 'T.START': 73306246, 'T.END': 73306301, 'CG': '55='}, {'Q.START': 139, 'Q.END': 202, 'T.START': 73306302, 'T.END': 73306365, 'CG': '63='}, {'Q.START': 202, 'Q.END': 203, 'T.START': 73306366, 'T.END': 73306367, 'CG': '1='}, {'Q.START': 203, 'Q.END': 379, 'T.START': 73306368, 'T.END': 73306544, 'CG': '176='}, {'Q.START': 379, 'Q.END': 380, 'T.START': 73306545, 'T.END': 73306546, 'CG': '1='}, {'Q.START': 380, 'Q.END': 429, 'T.START': 73306547, 'T.END': 73306596, 'CG': '49='}, {'Q.START': 429, 'Q.END': 430, 'T.START': 73306597, 'T.END': 73306598, 'CG': '1='}, {'Q.START': 430, 'Q.END': 457, 'T.START': 73306599, 'T.END': 73306626, 'CG': '27='}, {'Q.START': 457, 'Q.END': 492, 'T.START': 73306641, 'T.END': 73306676, 'CG': '35='}, {'Q.START': 508, 'Q.END': 564, 'T.START': 73306694, 'T.END': 73306750, 'CG': '56='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 568, 'Q.END': 569, 'T.START': 73306753, 'T.END': 73306754, 'CG': '1='}, {'Q.START': 569, 'Q.END': 824, 'T.START': 73306755, 'T.END': 73307010, 'CG': '255='}, {'Q.START': 826, 'Q.END': 858, 'T.START': 73307011, 'T.END': 73307043, 'CG': '32='}, {'Q.START': 858, 'Q.END': 859, 'T.START': 73307044, 'T.END': 73307045, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 868, 'Q.END': 869, 'T.START': 73307046, 'T.END': 73307047, 'CG': '1='}, {'Q.START': 869, 'Q.END': 913, 'T.START': 73307048, 'T.END': 73307092, 'CG': '44='}, {'Q.START': 913, 'Q.END': 919, 'T.START': 73307093, 'T.END': 73307099, 'CG': '6='}, {'Q.START': 919, 'Q.END': 978, 'T.START': 73307100, 'T.END': 73307159, 'CG': '59='}, {'Q.START': 978, 'Q.END': 979, 'T.START': 73307160, 'T.END': 73307161, 'CG': '1='}, {'Q.START': 979, 'Q.END': 1038, 'T.START': 73307162, 'T.END': 73307221, 'CG': '59='}, {'Q.START': 1038, 'Q.END': 1045, 'T.START': 73307224, 'T.END': 73307231, 'CG': '7='}, {'Q.START': 1045, 'Q.END': 1046, 'T.START': 73307232, 'T.END': 73307233, 'CG': '1='}, {'Q.START': 1046, 'Q.END': 1080, 'T.START': 73307234, 'T.END': 73307268, 'CG': '34='}, {'Q.START': 1080, 'Q.END': 1081, 'T.START': 73307269, 'T.END': 73307270, 'CG': '1='}, {'Q.START': 1081, 'Q.END': 1107, 'T.START': 73307271, 'T.END': 73307297, 'CG': '26='}, {'Q.START': 1108, 'Q.END': 1183, 'T.START': 73307300, 'T.END': 73307375, 'CG': '75='}, {'Q.START': 1183, 'Q.END': 1186, 'T.START': 73307376, 'T.END': 73307379, 'CG': '3='}, {'Q.START': 1224, 'Q.END': 1257, 'T.START': 73307419, 'T.END': 73307452, 'CG': '33='}, {'Q.START': 1289, 'Q.END': 1311, 'T.START': 73307475, 'T.END': 73307497, 'CG': '22='}, {'Q.START': 1359, 'Q.END': 1382, 'T.START': 73307546, 'T.END': 73307569, 'CG': '23='}, {'Q.START': 1434, 'Q.END': 1451, 'T.START': 73307643, 'T.END': 73307660, 'CG': '17='}, {'Q.START': 1451, 'Q.END': 1531, 'T.START': 73307661, 'T.END': 73307741, 'CG': '80='}, {'Q.START': 1532, 'Q.END': 1543, 'T.START': 73307744, 'T.END': 73307755, 'CG': '11='}, {'Q.START': 1544, 'Q.END': 1572, 'T.START': 73307758, 'T.END': 73307786, 'CG': '28='}, {'Q.START': 1572, 'Q.END': 1573, 'T.START': 73307787, 'T.END': 73307788, 'CG': '1='}, {'Q.START': 1573, 'Q.END': 1587, 'T.START': 73307789, 'T.END': 73307803, 'CG': '14='}, {'Q.START': 1588, 'Q.END': 1616, 'T.START': 73307806, 'T.END': 73307834, 'CG': '28='}, {'Q.START': 1616, 'Q.END': 1617, 'T.START': 73307835, 'T.END': 73307836, 'CG': '1='}, {'Q.START': 1617, 'Q.END': 1646, 'T.START': 73307837, 'T.END': 73307866, 'CG': '29='}, {'Q.START': 1646, 'Q.END': 1661, 'T.START': 73307867, 'T.END': 73307882, 'CG': '15='}, {'Q.START': 1661, 'Q.END': 1673, 'T.START': 73307883, 'T.END': 73307895, 'CG': '12='}, {'Q.START': 1673, 'Q.END': 1674, 'T.START': 73307896, 'T.END': 73307897, 'CG': '1='}, {'Q.START': 1674, 'Q.END': 1726, 'T.START': 73307898, 'T.END': 73307950, 'CG': '52='}, {'Q.START': 1727, 'Q.END': 1762, 'T.START': 73307953, 'T.END': 73307988, 'CG': '35='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1765, 'Q.END': 1766, 'T.START': 73307993, 'T.END': 73307994, 'CG': '1='}, {'Q.START': 1766, 'Q.END': 1767, 'T.START': 73307991, 'T.END': 73307992, 'CG': '1='}, {'Q.START': 1767, 'Q.END': 1824, 'T.START': 73307995, 'T.END': 73308052, 'CG': '57='}, {'Q.START': 1824, 'Q.END': 1825, 'T.START': 73308053, 'T.END': 73308054, 'CG': '1='}, {'Q.START': 1825, 'Q.END': 1975, 'T.START': 73308055, 'T.END': 73308205, 'CG': '150='}, {'Q.START': 1976, 'Q.END': 2015, 'T.START': 73308208, 'T.END': 73308247, 'CG': '39='}, {'Q.START': 2016, 'Q.END': 2047, 'T.START': 73308250, 'T.END': 73308281, 'CG': '31='}, {'Q.START': 2047, 'Q.END': 2055, 'T.START': 73308286, 'T.END': 73308294, 'CG': '8='}, {'Q.START': 2056, 'Q.END': 2120, 'T.START': 73308297, 'T.END': 73308361, 'CG': '64='}, {'Q.START': 2120, 'Q.END': 2121, 'T.START': 73308362, 'T.END': 73308363, 'CG': '1='}, {'Q.START': 2121, 'Q.END': 2157, 'T.START': 73308364, 'T.END': 73308400, 'CG': '36='}, {'Q.START': 2158, 'Q.END': 2170, 'T.START': 73308403, 'T.END': 73308415, 'CG': '12='}, {'Q.START': 2170, 'Q.END': 2171, 'T.START': 73308416, 'T.END': 73308417, 'CG': '1='}, {'Q.START': 2171, 'Q.END': 2205, 'T.START': 73308418, 'T.END': 73308452, 'CG': '34='}, {'Q.START': 2206, 'Q.END': 2344, 'T.START': 73308455, 'T.END': 73308593, 'CG': '138='}, {'Q.START': 2345, 'Q.END': 2364, 'T.START': 73308596, 'T.END': 73308615, 'CG': '19='}, {'Q.START': 2364, 'Q.END': 2383, 'T.START': 73308616, 'T.END': 73308635, 'CG': '19='}, {'Q.START': 2383, 'Q.END': 2408, 'T.START': 73308636, 'T.END': 73308661, 'CG': '25='}, {'Q.START': 2408, 'Q.END': 2409, 'T.START': 73308662, 'T.END': 73308663, 'CG': '1='}, {'Q.START': 2409, 'Q.END': 2441, 'T.START': 73308664, 'T.END': 73308696, 'CG': '32='}, {'Q.START': 2441, 'Q.END': 2442, 'T.START': 73308697, 'T.END': 73308698, 'CG': '1='}, {'Q.START': 2442, 'Q.END': 2580, 'T.START': 73308699, 'T.END': 73308837, 'CG': '138='}, {'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}, {'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}, {'Q.START': 2582, 'Q.END': 2583, 'T.START': 73308838, 'T.END': 73308839, 'CG': '1='}, {'Q.START': 2583, 'Q.END': 2584, 'T.START': 73308840, 'T.END': 73308841, 'CG': '1='}, {'Q.START': 2584, 'Q.END': 2764, 'T.START': 73308842, 'T.END': 73309022, 'CG': '180='}, {'Q.START': 2765, 'Q.END': 2797, 'T.START': 73309025, 'T.END': 73309057, 'CG': '32='}, {'Q.START': 2798, 'Q.END': 2878, 'T.START': 73309060, 'T.END': 73309140, 'CG': '80='}, {'Q.START': 2878, 'Q.END': 2879, 'T.START': 73309141, 'T.END': 73309142, 'CG': '1='}, {'Q.START': 2879, 'Q.END': 2951, 'T.START': 73309143, 'T.END': 73309215, 'CG': '72='}, {'Q.START': 2951, 'Q.END': 2952, 'T.START': 73309216, 'T.END': 73309217, 'CG': '1='}, {'Q.START': 2952, 'Q.END': 3002, 'T.START': 73309218, 'T.END': 73309268, 'CG': '50='}, {'Q.START': 3002, 'Q.END': 3077, 'T.START': 73309271, 'T.END': 73309346, 'CG': '75='}, {'Q.START': 3077, 'Q.END': 3078, 'T.START': 73309347, 'T.END': 73309348, 'CG': '1='}, {'Q.START': 3078, 'Q.END': 3093, 'T.START': 73309349, 'T.END': 73309364, 'CG': '15='}, {'Q.START': 3094, 'Q.END': 3097, 'T.START': 73309367, 'T.END': 73309370, 'CG': '3='}, {'Q.START': 3097, 'Q.END': 3140, 'T.START': 73309371, 'T.END': 73309414, 'CG': '43='}, {'Q.START': 3140, 'Q.END': 3210, 'T.START': 73309415, 'T.END': 73309485, 'CG': '70='}, {'Q.START': 3210, 'Q.END': 3211, 'T.START': 73309486, 'T.END': 73309487, 'CG': '1='}, {'Q.START': 3211, 'Q.END': 3229, 'T.START': 73309488, 'T.END': 73309506, 'CG': '18='}, {'Q.START': 3229, 'Q.END': 3230, 'T.START': 73309507, 'T.END': 73309508, 'CG': '1='}, {'Q.START': 3230, 'Q.END': 3276, 'T.START': 73309509, 'T.END': 73309555, 'CG': '46='}, {'Q.START': 3277, 'Q.END': 3315, 'T.START': 73309558, 'T.END': 73309596, 'CG': '38='}, {'Q.START': 3316, 'Q.END': 3322, 'T.START': 73309599, 'T.END': 73309605, 'CG': '6='}, {'Q.START': 3323, 'Q.END': 3348, 'T.START': 73309608, 'T.END': 73309633, 'CG': '25='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3351, 'Q.END': 3352, 'T.START': 73309636, 'T.END': 73309637, 'CG': '1='}, {'Q.START': 3352, 'Q.END': 3353, 'T.START': 73309634, 'T.END': 73309635, 'CG': '1='}, {'Q.START': 3353, 'Q.END': 3354, 'T.START': 73309638, 'T.END': 73309639, 'CG': '1='}, {'Q.START': 3354, 'Q.END': 3356, 'T.START': 73309640, 'T.END': 73309642, 'CG': '2='}, {'Q.START': 3357, 'Q.END': 3489, 'T.START': 73309645, 'T.END': 73309777, 'CG': '132='}, {'Q.START': 3490, 'Q.END': 3642, 'T.START': 73309780, 'T.END': 73309932, 'CG': '152='}, {'Q.START': 3644, 'Q.END': 3685, 'T.START': 73309933, 'T.END': 73309974, 'CG': '41='}, {'Q.START': 3687, 'Q.END': 3693, 'T.START': 73309977, 'T.END': 73309983, 'CG': '6='}, {'Q.START': 3694, 'Q.END': 3708, 'T.START': 73309986, 'T.END': 73310000, 'CG': '14='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}, {'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}, {'Q.START': 3716, 'Q.END': 3720, 'T.START': 73310005, 'T.END': 73310009, 'CG': '4='}, {'Q.START': 3720, 'Q.END': 3721, 'T.START': 73310010, 'T.END': 73310011, 'CG': '1='}, {'Q.START': 3721, 'Q.END': 3722, 'T.START': 73310003, 'T.END': 73310004, 'CG': '1='}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(ALNS[(\"D134#1#chr03\", \"ALN_1\")])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.14"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/gaf2aln.py b/gaf2aln.py
deleted file mode 100644
index 25fdebf..0000000
--- a/gaf2aln.py
+++ /dev/null
@@ -1,441 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-gaf2aln
-Convert gaf alignement to sam or paf
-
-@author: alexis.mergez@inrae.fr
-@version: 0.1
-"""
-
-import numpy as np
-import pandas as pd
-import argparse
-import concurrent.futures
-import os
-import re
-
-version = "0.1"
-
-## Argument parser
-arg_parser = argparse.ArgumentParser(description='GFAvc: GFA version converter')
-arg_parser.add_argument(
-    "--gfa",
-    "-g",
-    dest = "gfa",
-    required = True,
-    help = "Graph (.gfa v1)"
-    ) 
-arg_parser.add_argument(
-    "--gaf",
-    "-a",
-    dest = "gaf",
-    required = True,
-    help = "Alignement file (.gaf)"
-    )  
-arg_parser.add_argument(
-    "--format",
-    "-f",
-    dest = "format",
-    default = "P",
-    help = "Output file format. (S: sam, P: paf (default))"
-    )     
-arg_parser.add_argument(
-    "--threads",
-    "-t",
-    dest = "threads",
-    required = False,
-    default = 1,
-    type = int,
-    help = "Number of threads"
-    )
-arg_parser.add_argument(
-    '--version',
-    '-v',
-    action="store_true",
-    dest = "version",
-    help = "Show version"
-)
-args = arg_parser.parse_args()
-
-# Printing version
-if args.version:
-    print(version)
-    os._exit(0)
-
-# Toolbox
-def walk2path(walk):
-    """
-    Takes a walk in a single string and returns a list of nodes id with signs (gfa v1 like)
-    """
-    _ = re.findall(r'>\w+|<\w+', walk)
-    # Converting ['>..', '>..', '<..', '>..'] to '..+,..+,..-,..+'
-    return [f'{elem[1:]}{(elem[0] == ">")*"+"+(elem[0] == "<")*"-"}' for elem in _]
-
-def cigar2basealn(cigar):
-    """
-    Takes a CIGAR string and convert it into a list of base level alignment.
-    For example : "345=" -> ["=", "=", ..., "="] of length 345.
-    """
-    _ = re.findall(r'\d+\D', cigar)
-    final_cigar = []
-    for match in _:
-        final_cigar += [match[-1]]*int(match[:-1])
-
-    return final_cigar
-
-def basealn2cigar(base_aln_list):
-    
-    last_elem = base_aln_list[0]
-    CIGAR = [[1, last_elem]]
-    for elem in base_aln_list[1:]:
-        if elem == last_elem:
-            CIGAR[-1][0] += 1
-
-        else :
-            CIGAR[-1][0] = str(CIGAR[-1][0])
-            CIGAR.append([1, elem])
-            last_elem = elem
-    CIGAR[-1][0] = str(CIGAR[-1][0])
-    return "".join(["".join(block) for block in CIGAR if block[1] != ""])
-
-# Parsing the .gaf file
-print(f"[gaf2aln::GAF Parser] Reading {args.gaf} ...")
-with open(args.gaf, 'r') as file:
-    gaf_lines = file.readlines()
-
-gaf_col = [
-    "QRY.NAME", "QRY.LEN", "QRY.START", "QRY.END", "STRAND", 
-    "PATH.MATCH", "PATH.LEN", "ALN.START", "ALN.END",
-    "RES.MATCH", "ALN.BLOCK.LEN", "MAPPING.QUAL"
-    ]
-
-# Creating dictionnary to store alignments
-print(f"[gaf2aln::GAF Parser] Extracting alignments ...")
-aln_dict = {}
-for line in range(len(gaf_lines)):
-    ## Splitting the line by tabulation
-    line_content = gaf_lines[line][:-1].split('\t')
-
-    ## Adding alignement info to dictionnary
-    aln_dict[f"ALN_{line+1}"] = {
-        gaf_col[i]: line_content[i] for i in range(len(gaf_col))
-    }
-    
-    ## Splitting "PATH.MATCH" into a list
-    aln_dict[f"ALN_{line+1}"]["PATH.MATCH"] = [
-        (str(node_id[:-1]), node_id[-1]) 
-        for node_id in walk2path(aln_dict[f"ALN_{line+1}"]["PATH.MATCH"])
-    ]
-
-    ## Adding CIGAR
-    aln_dict[f"ALN_{line+1}"]["RAW.CIGAR"] = line_content[-1]
-
-    ## Adding tags
-    aln_dict[f"ALN_{line+1}"]["TAGS"] = ",".join(line_content[13:-1])
-
-# Getting nodes of interest ids
-aln_nodes = np.unique([
-    str(node_id) 
-    for aln in aln_dict.keys() 
-    for node_id, orient in aln_dict[aln]["PATH.MATCH"]
-]).tolist()
-
-del gaf_lines, gaf_col
-
-# Parsing the .gfa
-print(f"[gaf2aln::GFA Parser] Reading {args.gfa} ...")
-with open(args.gfa, 'r') as file:
-    gfa_lines = file.readlines()
-
-# Nodes length dictionnary structured as follow :
-# {<NODE.ID>: <NODE.LENGTH>}
-nodes_length = {}
-# Nodes dictionnary structured as follow :
-# { <ALN.NODE.ID> : {
-#   <PATH.NAME>: {"START": start, "END": end, "STRAND": strand), 
-#   <ALN.NAME>: {"START": start, "END": end, "S.OFF": start.offset, "E.OFF": end.offset, "STRAND": strand, "CIGAR": CIGAR}
-#   }
-# }
-nodes = {node_id: {} for node_id in aln_nodes}
-# Paths dictionnary structured as follow :
-# {<PATH.NAME>: {NODES: {<NODE.ID>: <NODE.ORIENT>}, CIGAR: <CIGAR in comma separated list>}
-paths = {}
-# Links dictionnary structured as follow : 
-# {<FROM.NODE.ID>: {<TO.NODE.ID>: {FROM.ORIENT: <FROM.ORIENT>, TO.ORIENT: <TO.ORIENT>}}}
-links = {}
-
-# Parsing the gfa
-print(f"[gaf2aln::GFA Parser] Extracting nodes, paths and links ...")
-
-def GFA_parser(gfa_lines, nodes = nodes):
-    _links, _nodes, _nodes_length, paths = {}, {}, {}, {}
-    for line in gfa_lines:
-        line_content = line[:-1].split("\t")
-        line_id = line_content[0]
-        
-        # Segment line
-        if line_id == "S" :
-            
-            _nodes_length[str(line_content[1])] = len(line_content[2])
-        
-        # Link line
-        elif line_id == "L":
-            try :
-                _links[str(line_content[1])][str(line_content[3])] = {
-                    "FROM": str(line_content[2]), 
-                    "TO": str(line_content[4])
-                }
-
-            except :
-                _links[str(line_content[1])] = {
-                    str(line_content[3]) : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
-                }
-
-        # Path line
-        elif line_id == "P":
-            _paths[str(line_content[1])] = {
-                "NODES": {
-                    str(node_id[:-1]): str(node_id[-1])
-                    for node_id in line_content[2].split(',')
-                },
-                "CIGAR": line_content[3]
-            }
-
-    return [_links, _nodes, _nodes_length, _paths]
-
-# splits = np.quantile(range(len(gfa_lines)+1), q= np.array(args.threads+1)/args.threads, method='higher').tolist()
-# res = []
-# for i in range(1, len(splits)):
-#     res.append(executor.submit(GFA_parser, gfa_lines[splits[i-1]:splits[i]]))
-
-# for out in res:
-#     results = out.result()
-
-#     for link_id, link_info in results[0].items():
-#         links[]
-
-
-for line in gfa_lines:
-    line_content = line[:-1].split("\t")
-    line_id = line_content[0]
-    
-    # Segment line
-    if line_id == "S" :
-        
-        nodes_length[str(line_content[1])] = len(line_content[2])
-    
-    # Link line
-    elif line_id == "L":
-        try :
-            links[str(line_content[1])][str(line_content[3])] = {
-                "FROM": str(line_content[2]), 
-                "TO": str(line_content[4])
-            }
-
-        except :
-            links[str(line_content[1])] = {
-                str(line_content[3]) : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
-            }
-
-    # Path line
-    elif line_id == "P":
-        paths[str(line_content[1])] = {
-            "NODES": {
-                str(node_id[:-1]): str(node_id[-1])
-                for node_id in line_content[2].split(',')
-            },
-            "CIGAR": line_content[3]
-        }
-
-del gfa_lines
-
-print(f"[gaf2aln::Graph position processing] Computing nodes positions in each paths...")
-def get_node_pos(path_name, nodes = nodes, paths = paths, nodes_length = nodes_length):
-    print(f"[gaf2aln::Graph position processing] Running on {path_name} ...")
-    cur_pos = 0
-
-    out = {}
-    # Iterating over nodes in the path
-    for path_node in paths[path_name]["NODES"].keys():
-        # Instead of checking if the node is one interesting node, we try to add to the nodes dict
-        if path_node in aln_nodes :
-            out[path_node] = {
-                "START": cur_pos, # Start position of the node start in the currrent path
-                "END": cur_pos+nodes_length[path_node], # End position of the node end in the current path
-                "STRAND": paths[path_name]["NODES"][path_node] # Orientation of the node in the current path
-                } 
-
-            cur_pos += nodes_length[path_node]+1
-        else :
-            cur_pos += nodes_length[path_node]+1
-
-    return out
-
-res = {}
-executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
-# Adding nodes positions relative to path
-for path_name in paths.keys():
-    res[path_name] = executor.submit(get_node_pos, path_name)
-
-executor.shutdown(wait=True)
-
-for path_name, out in res.items():
-    results = out.result()
-    for path_node, node_pos in results.items():
-        nodes[path_node][path_name] = node_pos
-
-del res
-
-print(f"[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...")
-# Adding nodes positions relative to path
-
-def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length):
-    # Initializing current position in query
-    cur_pos = 0
-
-    # Getting start and end node ids
-    start_end_id = (aln_dict[aln_name]["PATH.MATCH"][0][0], aln_dict[aln_name]["PATH.MATCH"][-1][0])
-
-    # Creating result dictionnary
-    res = {}
-
-    ## Iterating over node_ids from the given alignment
-    for node_id, orient in aln_dict[aln_name]["PATH.MATCH"]:
-        # Adding entry for current node
-        res[node_id] = {aln_name: {}}
-
-        # First node
-        if node_id == start_end_id[0]:
-            start_pos = 0
-            s_off = int(aln_dict[aln_name]["ALN.START"])
-            end_pos = nodes_length[node_id]-s_off
-            e_off = 0
-        # End node
-        elif node_id == start_end_id[1]:
-            start_pos = cur_pos
-            s_off = 0
-            end_pos = int(aln_dict[aln_name]["QRY.END"])
-            e_off = nodes_length[node_id]-(end_pos-cur_pos)
-        # Node in between
-        else :
-            start_pos = cur_pos
-            s_off, e_off = 0, 0
-            end_pos = cur_pos+nodes_length[node_id]
-
-        res[node_id] = {
-            "START": start_pos, # Start position on the query
-            "END": end_pos, # End position on the query
-            "S.OFF": s_off, # Offset between the start of the alignment and the node's start
-            "E.OFF": e_off, # Offset between the end of the alignment and the node's end 
-            "STRAND": orient # Orientation of the node in the alignment
-            }
-        
-        cur_pos = end_pos
-        print(start_pos, end_pos, s_off, e_off, orient, nodes_length[node_id], cur_pos)
-
-    return res
-
-# Storing alignement 
-res = {}
-executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
-for aln_name in aln_dict.keys():
-    print(f"[gaf2aln::Alignment position processing] Running on {aln_name} ...")
-    
-    res[aln_name] = executor.submit(get_aln_node_info, aln_name)
-    #res[aln_name] = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)
-
-executor.shutdown(wait=True)
-
-for aln_name, node_info in res.items():
-    results = node_info.result()
-    for node_id, info in results.items():
-        nodes[node_id][aln_name] = info
-
-del res
-
-# Calculating CIGAR for each nodes in each aln
-print(f"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...")
-# Iterating over alignments
-for aln in aln_dict.keys():
-    
-    print(f"[gaf2aln::CIGAR processing] Running on {aln} ...")
-    # Getting the list of base level alignement (["=", "X", ...] from "1=1X...")
-    raw_cigar = cigar2basealn(aln_dict[aln]["RAW.CIGAR"])
-    CIGAR={}
-
-    for node_id, orient in aln_dict[aln]["PATH.MATCH"]:
-
-        _cigar = basealn2cigar(raw_cigar[
-            nodes[node_id][aln]["START"]:nodes[node_id][aln]["END"]
-            ])
-        nodes[node_id][aln]["CIGAR"] = _cigar
-        #print(_cigar, nodes[node_id][aln]["START"], nodes[node_id][aln]["END"])
-
-#print(nodes)
-
-# Lifting graph alignements to haplotype alignements
-
-ALNS = {}
-for aln_name in aln_dict.keys():
-    
-    for path_name in paths.keys():
-
-        _ = []
-        for node_id, orient in aln_dict[aln_name]["PATH.MATCH"]:
-            
-            n_info = nodes[node_id]
-            q_start = n_info[aln_name]["START"]
-            q_end = n_info[aln_name]["END"]
-            _CG = n_info[aln_name]["CIGAR"]
-
-            print(node_id, path_name, q_start, q_end)
-            if path_name in list(n_info.keys()):
-                print("\tIn path")
-
-                if n_info[aln_name]["STRAND"] == n_info[path_name]["STRAND"] :
-                    t_start = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
-                    t_end = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"] 
-                else :
-                    t_end = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
-                    t_start = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"]
-
-                print("\t", t_start, t_end)
-
-                # Non empty temporary list of aln and ending of the last block is the same as the start of the new node : 
-                if len(_) and _[-1]["T.END"] == t_start and _[-1]["Q.END"] == q_start: 
-                    tmp_aln["Q.END"] = q_end
-                    tmp_aln["T.END"] = t_end
-                    tmp_aln["CG"] += _CG
-                elif len(_) and _[-1]["T.END"] == t_start: # Following on the target not on the query (i.e. Insertion)
-                    tmp_aln["T.END"] = t_end
-                    tmp_aln["CG"] += f"{nodes_length[node_id]}I"
-                elif len(_) and _[-1]["Q.END"] == q_start: # Following on the query, not on the target (i.e. Deletion)
-                    tmp_aln["Q.END"] = q_end
-                    tmp_aln["CG"] += f"{nodes_length[node_id]}D"
-                else : # Else, completely different
-                    try : 
-                        _.append(tmp_aln)
-                    except : pass
-                    tmp_aln = {
-                        "Q.START": q_start,
-                        "Q.END": q_end,
-                        "T.START": t_start,
-                        "T.END": t_end,
-                        "CG": _CG,
-                    }
-
-            else : 
-                print("\tNot in path")
-                # Node is not in the path
-
-            
-        ALNS[(path_name, aln_name)] = _
-
-## Debug
-for elem in ALNS.keys():
-    print(elem)
- 
-for key, elem in ALNS.items():
-    print(key)
-    print(elem)
\ No newline at end of file
-- 
GitLab