[asterisk-bugs] [JIRA] (ASTERISK-26344) Asterisk 13.11.0 + PJSIP crash

Ian Gilmour (JIRA) noreply at issues.asterisk.org
Fri Sep 9 06:44:01 CDT 2016


    [ https://issues.asterisk.org/jira/browse/ASTERISK-26344?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=232228#comment-232228 ] 

Ian Gilmour commented on ASTERISK-26344:
----------------------------------------

This morning (after a rebuild), using the same config, I got another crash - this time it occurred immediately on asterisk startup (i.e. no calls running)!!

Could it be caused by some sort of race condition on multiple PJSIP registrations to the same SIP server? I can't think of anything else that's odd about this setup.

{noformat}
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffa18ed700 (LWP 16459)]
0x00007ffff741b1b4 in pj_atomic_inc_and_get (atomic_var=0x3536663238336466) at ../src/pj/os_core_unix.c:935
935         pj_mutex_lock( atomic_var->mutex );
(gdb) bt
#0  0x00007ffff741b1b4 in pj_atomic_inc_and_get (atomic_var=0x3536663238336466) at ../src/pj/os_core_unix.c:935
#1  0x00007ffff741b20c in pj_atomic_inc (atomic_var=0x3536663238336466) at ../src/pj/os_core_unix.c:949
#2  0x00007ffff7387836 in pjsip_tx_data_add_ref (tdata=0x7fff8800d958) at ../src/pjsip/sip_transport.c:464
#3  0x00007fff6410e929 in sip_outbound_registration_response_cb (param=0x7fffa18ec5a0) at res_pjsip_outbound_registration.c:939
#4  0x00007ffff7360202 in call_callback (regc=0x7fff88008338, status=0, st_code=200, reason=0x7fffcc006ae0, rdata=0x7fffcc005008, expiration=120, contact_cnt=1, contact=0x7fffa18ec680, is_unreg=0) at ../src/pjsip-ua/sip_reg.c:775
#5  0x00007ffff73614c1 in regc_tsx_callback (token=0x7fff88008338, event=0x7fffa18ec8d0) at ../src/pjsip-ua/sip_reg.c:1345
#6  0x00007ffff739e59f in mod_util_on_tsx_state (tsx=0x7fff88015b98, event=0x7fffa18ec8d0) at ../src/pjsip/sip_util_statefull.c:81
#7  0x00007ffff739ae90 in tsx_set_state (tsx=0x7fff88015b98, state=PJSIP_TSX_STATE_COMPLETED, event_src_type=PJSIP_EVENT_RX_MSG, event_src=0x7fffcc005008, flag=0) at ../src/pjsip/sip_transaction.c:1233
#8  0x00007ffff739ddd1 in tsx_on_state_proceeding_uac (tsx=0x7fff88015b98, event=0x7fffa18eca20) at ../src/pjsip/sip_transaction.c:2957
#9  0x00007ffff739d3ef in tsx_on_state_calling (tsx=0x7fff88015b98, event=0x7fffa18eca20) at ../src/pjsip/sip_transaction.c:2540
#10 0x00007ffff739bd87 in pjsip_tsx_recv_msg (tsx=0x7fff88015b98, rdata=0x7fffcc005008) at ../src/pjsip/sip_transaction.c:1787
#11 0x00007ffff739a322 in mod_tsx_layer_on_rx_response (rdata=0x7fffcc005008) at ../src/pjsip/sip_transaction.c:875
#12 0x00007ffff7380c25 in pjsip_endpt_process_rx_data (endpt=0x123f798, rdata=0x7fffcc005008, p=0x7ffff1035e40, p_handled=0x7fffa18ecb64) at ../src/pjsip/sip_endpoint.c:894
#13 0x00007ffff0e1ae59 in distribute (data=0x7fffcc005008) at res_pjsip/pjsip_distributor.c:765
#14 0x00000000005ed9cb in ast_taskprocessor_execute (tps=0x11a7cb8) at taskprocessor.c:938
#15 0x00000000005f6aca in execute_tasks (data=0x11a7cb8) at threadpool.c:1322
#16 0x00000000005ed9cb in ast_taskprocessor_execute (tps=0x123ce38) at taskprocessor.c:938
#17 0x00000000005f4dd5 in threadpool_execute (pool=0x123d088) at threadpool.c:351
#18 0x00000000005f63e8 in worker_active (worker=0x7fff90000ee8) at threadpool.c:1105
#19 0x00000000005f6194 in worker_start (arg=0x7fff90000ee8) at threadpool.c:1024
#20 0x00000000006023ee in dummy_start (data=0x7fff90000e70) at utils.c:1235
#21 0x00007ffff5589aa1 in start_thread (arg=0x7fffa18ed700) at pthread_create.c:301
#22 0x00007ffff4b95aad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115
(gdb) bt full
#0  0x00007ffff741b1b4 in pj_atomic_inc_and_get (atomic_var=0x3536663238336466) at ../src/pj/os_core_unix.c:935
        new_value = 140737354108928
#1  0x00007ffff741b20c in pj_atomic_inc (atomic_var=0x3536663238336466) at ../src/pj/os_core_unix.c:949
No locals.
#2  0x00007ffff7387836 in pjsip_tx_data_add_ref (tdata=0x7fff8800d958) at ../src/pjsip/sip_transport.c:464
No locals.
#3  0x00007fff6410e929 in sip_outbound_registration_response_cb (param=0x7fffa18ec5a0) at res_pjsip_outbound_registration.c:939
        retry_after = 0x0
        tsx = 0x7fff88015b98
        client_state = 0x11a7b78
        response = 0x7fff88018b88
        callback_invoked = 0x7fff88010060
        __PRETTY_FUNCTION__ = "sip_outbound_registration_response_cb"
#4  0x00007ffff7360202 in call_callback (regc=0x7fff88008338, status=0, st_code=200, reason=0x7fffcc006ae0, rdata=0x7fffcc005008, expiration=120, contact_cnt=1, contact=0x7fffa18ec680, is_unreg=0) at ../src/pjsip-ua/sip_reg.c:775
        cbparam = {regc = 0x7fff88008338, token = 0x11a7b78, status = 0, code = 200, reason = {ptr = 0x7fffcc006b40 "OK.5", slen = 2}, rdata = 0x7fffcc005008, expiration = 120, contact_cnt = 1, contact = {0x7fff8800db98, 0xa18ec680, 0x0, 0x7fffa18ec620, 0x7ffff741b75a, 0x7fff8800dc40, 0x7fff88007fa8, 0x7835015fb8, 0x88008338, 0x7fffa18ec640}, is_unreg = 0}
#5  0x00007ffff73614c1 in regc_tsx_callback (token=0x7fff88008338, event=0x7fffa18ec8d0) at ../src/pjsip-ua/sip_reg.c:1345
        is_unreg = 0
        rdata = 0x7fffcc005008
        expiration = 120
        contact_cnt = 1
        contact = {0x7fff8800db98, 0x45e035, 0xffffffffa18ec6b0, 0x7fff880119e8, 0x7fff8800e388, 0x11a7bf0, 0x7fffa18ec830, 0x7fffa18ec820, 0x7fffa18ec710, 0x7ffff741cc2c}
        status = 48
        regc = 0x7fff88008338
        tsx = 0x7fff88015b98
        handled = 0
        update_contact = 0
#6  0x00007ffff739e59f in mod_util_on_tsx_state (tsx=0x7fff88015b98, event=0x7fffa18ec8d0) at ../src/pjsip/sip_util_statefull.c:81
        tsx_data = 0x7fff88015fb8
#7  0x00007ffff739ae90 in tsx_set_state (tsx=0x7fff88015b98, state=PJSIP_TSX_STATE_COMPLETED, event_src_type=PJSIP_EVENT_RX_MSG, event_src=0x7fffcc005008, flag=0) at ../src/pjsip/sip_transaction.c:1233
        e = {prev = 0x7fffa18ec910, next = 0x7ffff7431463, type = PJSIP_EVENT_TSX_STATE, body = {timer = {entry = 0x7fffcc005008}, tsx_state = {src = {rdata = 0x7fffcc005008, tdata = 0x7fffcc005008, timer = 0x7fffcc005008, status = -872394744, data = 0x7fffcc005008}, tsx = 0x7fff88015b98, prev_state = 1, type = PJSIP_EVENT_RX_MSG}, tx_msg = {tdata = 0x7fffcc005008}, tx_error = {tdata = 0x7fffcc005008, tsx = 0x7fff88015b98}, rx_msg = {rdata = 0x7fffcc005008}, user = {user1 = 0x7fffcc005008, user2 = 0x7fff88015b98, user3 = 0x300000001, user4 = 0x288015c40}}}
        prev_state = PJSIP_TSX_STATE_CALLING
#8  0x00007ffff739ddd1 in tsx_on_state_proceeding_uac (tsx=0x7fff88015b98, event=0x7fffa18eca20) at ../src/pjsip/sip_transaction.c:2957
        timeout = {sec = 0, msec = 0}
#9  0x00007ffff739d3ef in tsx_on_state_calling (tsx=0x7fff88015b98, event=0x7fffa18eca20) at ../src/pjsip/sip_transaction.c:2540
        msg = 0x7fffcc006ad0
        code = 200
#10 0x00007ffff739bd87 in pjsip_tsx_recv_msg (tsx=0x7fff88015b98, rdata=0x7fffcc005008) at ../src/pjsip/sip_transaction.c:1787
        event = {prev = 0x0, next = 0x7fff88016438, type = PJSIP_EVENT_RX_MSG, body = {timer = {entry = 0x7fffcc005008}, tsx_state = {src = {rdata = 0x7fffcc005008, tdata = 0x7fffcc005008, timer = 0x7fffcc005008, status = -872394744, data = 0x7fffcc005008}, tsx = 0x7fffa18eca70, prev_state = -146688166, type = 32767}, tx_msg = {tdata = 0x7fffcc005008}, tx_error = {tdata = 0x7fffcc005008, tsx = 0x7fffa18eca70}, rx_msg = {rdata = 0x7fffcc005008}, user = {user1 = 0x7fffcc005008, user2 = 0x7fffa18eca70, user3 = 0x7ffff741b75a, user4 = 0x7fffa18eca70}}}
#11 0x00007ffff739a322 in mod_tsx_layer_on_rx_response (rdata=0x7fffcc005008) at ../src/pjsip/sip_transaction.c:875
        key = {ptr = 0x7fff8800de50 "c$REGISTER$z9hG4bKPjc25c7b82-ee5b-4ac0-9679-dfe63f1cfdef", slen = 56}
        hval = 4161045600
        tsx = 0x7fff88015b98
#12 0x00007ffff7380c25 in pjsip_endpt_process_rx_data (endpt=0x123f798, rdata=0x7fffcc005008, p=0x7ffff1035e40, p_handled=0x7fffa18ecb64) at ../src/pjsip/sip_endpoint.c:894
        msg = 0x7fffcc006ad0
        def_prm = {start_prio = 2710489984, start_mod = 0x6f3e0b, idx_after_start = 2710490000, silent = 32767}
        mod = 0x7ffff76922a0
        handled = 0
        i = 1
        status = 0
#13 0x00007ffff0e1ae59 in distribute (data=0x7fffcc005008) at res_pjsip/pjsip_distributor.c:765
        param = {start_prio = 0, start_mod = 0x7ffff1035ca0, idx_after_start = 1, silent = 0}
        handled = 0
        rdata = 0x7fffcc005008
        is_request = 0
        is_ack = 0
        endpoint = 0x0
#14 0x00000000005ed9cb in ast_taskprocessor_execute (tps=0x11a7cb8) at taskprocessor.c:938
        local = {local_data = 0x7fffa18ed9c0, data = 0x600b26}
        t = 0x7fffcc02a000
        size = 9989040
        __PRETTY_FUNCTION__ = "ast_taskprocessor_execute"
#15 0x00000000005f6aca in execute_tasks (data=0x11a7cb8) at threadpool.c:1322
        tps = 0x11a7cb8
#16 0x00000000005ed9cb in ast_taskprocessor_execute (tps=0x123ce38) at taskprocessor.c:938
        local = {local_data = 0x57d28c4d, data = 0x0}
        t = 0x7fffcc029d10
        size = 140735903880960
        __PRETTY_FUNCTION__ = "ast_taskprocessor_execute"
#17 0x00000000005f4dd5 in threadpool_execute (pool=0x123d088) at threadpool.c:351
        __PRETTY_FUNCTION__ = "threadpool_execute"
#18 0x00000000005f63e8 in worker_active (worker=0x7fff90000ee8) at threadpool.c:1105
        alive = 0
#19 0x00000000005f6194 in worker_start (arg=0x7fff90000ee8) at threadpool.c:1024
        worker = 0x7fff90000ee8
        saved_state = 32767
        __PRETTY_FUNCTION__ = "worker_start"
#20 0x00000000006023ee in dummy_start (data=0x7fff90000e70) at utils.c:1235
        __cancel_buf = {__cancel_jmp_buf = {{__cancel_jmp_buf = {140735903880960, -6756505628295498910, 140737220745856, 140735903881664, 0, 3, -6756505628337441950, 6756334233892352866}, __mask_was_saved = 0}}, __pad = {0x7fffa18ece30, 0x0, 0x0, 0x0}}
        __cancel_routine = 0x451815 <ast_unregister_thread>
        __cancel_arg = 0x7fffa18ed700
        not_first_call = 0
        ret = 0x0
        a = {start_routine = 0x5f610d <worker_start>, data = 0x7fff90000ee8, name = 0x7fff90001000 "worker_start         started at [ 1079] threadpool.c worker_thread_start()"}
#21 0x00007ffff5589aa1 in start_thread (arg=0x7fffa18ed700) at pthread_create.c:301
        __res = <value optimized out>
        pd = 0x7fffa18ed700
        now = <value optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140735903880960, 6756335057497289570, 140737220745856, 140735903881664, 0, 3, -6756505628289207454, -6756322751223106718}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = <value optimized out>
        pagesize_m1 = <value optimized out>
        sp = <value optimized out>
        freesize = <value optimized out>
#22 0x00007ffff4b95aad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115
No locals.
{noformat}

> Asterisk 13.11.0 + PJSIP crash
> ------------------------------
>
>                 Key: ASTERISK-26344
>                 URL: https://issues.asterisk.org/jira/browse/ASTERISK-26344
>             Project: Asterisk
>          Issue Type: Bug
>      Security Level: None
>          Components: pjproject/pjsip
>    Affects Versions: 13.11.0
>         Environment: Centos 6.8 (64-bit) + Asterisk-13.11.0 (with bundled 2.5.5 pjsip) + libsrtp-1.5.4
>            Reporter: Ian Gilmour
>            Assignee: Unassigned
>         Attachments: cli-and-gdb.tgz
>
>
> Hi,
> I have a development Asterisk 13.11.0 test setup (uses the bundled pjsip-2.5.5).
> Environment is Centos 6.8 (64-bit) + Asterisk-13.11.0 + libsrtp-1.5.4.
> On startup Asterisk registers 5 Asterisk users with a remote OpenSIPS server, over TLS, using PJSIP. As part of the test all 5 Asterisk PJSIP users are reregistered with OpenSIPS Server every couple of mins.
> All outgoing/incoming pjsip call media is encrypted using SRTP and via an external RTPPROXY running alongside the external OpenSIPS Server.
> Asterisk is also configured to use chan_sip on 127.0.0.1:5060 to allow calls from a locally run SIPp process. All SIPp calls are TCP+RTP.
> I use SIPp to run multiple concurrent loopback calls (calls vary in
> duration) through Asterisk to the OpenSIPS server and back to an echo() service running on the same Asterisk).
> i.e.
> {noformat}
>   SIPp <-TCP/RTP-> chan_sip <-> chan_pjsip <-TLS/SRTP->
>       OpenSIPS server (+ rtpproxy) <-TLS/SRTP-> chan_pjsip (echo service).
> {noformat}
> Initially I see all chan_pjsip registrations and reregistrations for all 5 PJSIP users go out through a single TCP port. I then start a SIPp test running multiple concurrent calls. At some point into the test the Asterisk PJSIP TCP port gets closed and reopened - when it does so I see Asterisk crash shortly afterwards. Possibly significantly\(?) the time of the crash was around the time one of the PJSIP users should have reregistered after the TCP outgoing port change (The log shows all 5 PJSIP users reregistering after the PJSIP TCP port change, but only 4 of the 5 reregistering twice before the crash).
> {noformat}
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fffa2814700 (LWP 7166)]
> __pthread_mutex_lock (mutex=0x44492d6c6c6143) at pthread_mutex_lock.c:50
> 50        unsigned int type = PTHREAD_MUTEX_TYPE (mutex);
> (gdb) bt
> #0  __pthread_mutex_lock (mutex=0x44492d6c6c6143) at pthread_mutex_lock.c:50
> #1  0x00007ffff78e9d9b in pj_mutex_lock (mutex=0x44492d6c6c6143) at ../src/pj/os_core_unix.c:1265
> #2  0x00007ffff78e9e39 in pj_atomic_dec_and_get (atomic_var=0x7fffd8074630) at ../src/pj/os_core_unix.c:962
> #3  0x00007ffff787d7e0 in pjsip_tx_data_dec_ref (tdata=0x7fff8c3bfab8) at ../src/pjsip/sip_transport.c:495
> #4  0x00007ffff788a087 in tsx_shutdown (tsx=0x7fff94060a98) at ../src/pjsip/sip_transaction.c:1062
> #5  0x00007ffff788b4bc in tsx_set_state (tsx=0x7fff94060a98, state=PJSIP_TSX_STATE_DESTROYED, event_src_type=PJSIP_EVENT_TIMER, event_src=0x7fff94060c50, flag=0) at ../src/pjsip/sip_transaction.c:1271
> #6  0x00007ffff788b88e in tsx_on_state_terminated (tsx=<value optimized out>, event=<value optimized out>) at ../src/pjsip/sip_transaction.c:3337
> #7  0x00007ffff788bcd5 in tsx_timer_callback (theap=<value optimized out>, entry=0x7fff94060c50) at ../src/pjsip/sip_transaction.c:1171
> #8  0x00007ffff78fc449 in pj_timer_heap_poll (ht=0x1137950, next_delay=0x7fffa2813d30) at ../src/pj/timer.c:643
> #9  0x00007ffff7875b19 in pjsip_endpt_handle_events2 (endpt=0x1137668, max_timeout=0x7fffa2813d70, p_count=0x0) at ../src/pjsip/sip_endpoint.c:712
> #10 0x00007ffff1320b00 in monitor_thread_exec (endpt=<value optimized out>) at res_pjsip.c:3889
> #11 0x00007ffff78ea5d6 in thread_main (param=0x114dee8) at ../src/pj/os_core_unix.c:541
> #12 0x00007ffff5a8faa1 in start_thread (arg=0x7fffa2814700) at pthread_create.c:301
> #13 0x00007ffff509baad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115
> (gdb) bt full
> #0  __pthread_mutex_lock (mutex=0x44492d6c6c6143) at pthread_mutex_lock.c:50
>         type = <value optimized out>
>         id = <value optimized out>
> #1  0x00007ffff78e9d9b in pj_mutex_lock (mutex=0x44492d6c6c6143) at ../src/pj/os_core_unix.c:1265
>         status = <value optimized out>
> #2  0x00007ffff78e9e39 in pj_atomic_dec_and_get (atomic_var=0x7fffd8074630) at ../src/pj/os_core_unix.c:962
>         new_value = <value optimized out>
> #3  0x00007ffff787d7e0 in pjsip_tx_data_dec_ref (tdata=0x7fff8c3bfab8) at ../src/pjsip/sip_transport.c:495
> No locals.
> #4  0x00007ffff788a087 in tsx_shutdown (tsx=0x7fff94060a98) at ../src/pjsip/sip_transaction.c:1062
> No locals.
> #5  0x00007ffff788b4bc in tsx_set_state (tsx=0x7fff94060a98, state=PJSIP_TSX_STATE_DESTROYED, event_src_type=PJSIP_EVENT_TIMER, event_src=0x7fff94060c50, flag=0) at ../src/pjsip/sip_transaction.c:1271
>         prev_state = PJSIP_TSX_STATE_TERMINATED
> #6  0x00007ffff788b88e in tsx_on_state_terminated (tsx=<value optimized out>, event=<value optimized out>) at ../src/pjsip/sip_transaction.c:3337
> No locals.
> #7  0x00007ffff788bcd5 in tsx_timer_callback (theap=<value optimized out>, entry=0x7fff94060c50) at ../src/pjsip/sip_transaction.c:1171
>         event = {prev = 0x7fff8c5f4908, next = 0x1bfe, type = PJSIP_EVENT_TIMER, body = {timer = {entry = 0x7fff94060c50}, tsx_state = {src = {rdata = 0x7fff94060c50, tdata = 0x7fff94060c50, timer = 0x7fff94060c50, status = -1811542960, data = 0x7fff94060c50}, 
>               tsx = 0x7fffa2813c90, prev_state = -1568588592, type = 32767}, tx_msg = {tdata = 0x7fff94060c50}, tx_error = {tdata = 0x7fff94060c50, tsx = 0x7fffa2813c90}, rx_msg = {rdata = 0x7fff94060c50}, user = {user1 = 0x7fff94060c50, user2 = 0x7fffa2813c90, 
>               user3 = 0x7fffa2813cd0, user4 = 0x0}}}
>         tsx = 0x7fff94060a98
> #8  0x00007ffff78fc449 in pj_timer_heap_poll (ht=0x1137950, next_delay=0x7fffa2813d30) at ../src/pj/timer.c:643
>         node = 0x7fff94060c50
>         grp_lock = 0x7fffd8000ab8
>         now = {sec = 613363, msec = 925}
>         count = 2
> #9  0x00007ffff7875b19 in pjsip_endpt_handle_events2 (endpt=0x1137668, max_timeout=0x7fffa2813d70, p_count=0x0) at ../src/pjsip/sip_endpoint.c:712
>         timeout = {sec = 0, msec = 0}
>         count = 0
>         net_event_count = 0
>         c = <value optimized out>
> #10 0x00007ffff1320b00 in monitor_thread_exec (endpt=<value optimized out>) at res_pjsip.c:3889
>         delay = {sec = 0, msec = 10}
> #11 0x00007ffff78ea5d6 in thread_main (param=0x114dee8) at ../src/pj/os_core_unix.c:541
>         rec = 0x114dee8
>         result = <value optimized out>
> #12 0x00007ffff5a8faa1 in start_thread (arg=0x7fffa2814700) at pthread_create.c:301
>         __res = <value optimized out>
>         pd = 0x7fffa2814700
>         now = <value optimized out>
>         unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140735919769344, -4896504223120570676, 140737488337344, 140735919770048, 0, 3, 4896356555646224076, 4896525551845689036}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, 
>               canceltype = 0}}}
>         not_first_call = <value optimized out>
>         pagesize_m1 = <value optimized out>
>         sp = <value optimized out>
>         freesize = <value optimized out>
> #13 0x00007ffff509baad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115
> No locals.
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.2#6252)



More information about the asterisk-bugs mailing list