Seafile + Collabora

gauburtin · December 18, 2016, 4:48pm

here are the files causing the error

utils.py

# Copyright (c) 2012-2016 Seafile Ltd.
import os
import re
import time
import urllib
import urlparse
import requests
import hashlib
import logging
import uuid
from dateutil.relativedelta import relativedelta

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

from django.core.cache import cache
from django.core.urlresolvers import reverse
from django.utils import timezone

from seahub.utils import get_site_scheme_and_netloc
from .settings import OFFICE_WEB_APP_BASE_URL, WOPI_ACCESS_TOKEN_EXPIRATION, \
    OFFICE_WEB_APP_DISCOVERY_EXPIRATION, OFFICE_WEB_APP_CLIENT_PEM, \
    OFFICE_WEB_APP_CLIENT_CERT, OFFICE_WEB_APP_CLIENT_KEY, \
    OFFICE_WEB_APP_SERVER_CA

logger = logging.getLogger(__name__)


def generate_access_token_cache_key(token):
    """ Generate cache key for WOPI access token
    """

    return 'wopi_access_token_' + str(token)

def get_file_info_by_token(token):
    """ Get file info from cache by access token

    return tuple: (request_user, repo_id, file_path)
    """

    key = generate_access_token_cache_key(token)
    return cache.get(key) if cache.get(key) else (None, None, None)

def generate_discovery_cache_key(name, ext):
    """ Generate cache key for office web app hosting discovery

    name: Operations that you can perform on an Office document
    ext: The file formats that are supported for the action
    """

    return 'wopi_' + name + '_' + ext

def get_wopi_dict(request_user, repo_id, file_path, action_name='view'):
    """ Prepare dict data for WOPI host page
    """

    if action_name not in ('view', 'edit'):
        return None

    file_name = os.path.basename(file_path)
    file_ext = os.path.splitext(file_name)[1][1:].lower()

    wopi_key = generate_discovery_cache_key(action_name, file_ext)
    action_url = cache.get(wopi_key)

    if not action_url:
        # can not get action_url from cache

        try:
            if OFFICE_WEB_APP_CLIENT_CERT and OFFICE_WEB_APP_CLIENT_KEY:
                xml = requests.get(OFFICE_WEB_APP_BASE_URL,
                    cert=(OFFICE_WEB_APP_CLIENT_CERT, OFFICE_WEB_APP_CLIENT_KEY),
                    verify=OFFICE_WEB_APP_SERVER_CA)
            elif OFFICE_WEB_APP_CLIENT_PEM:
                xml = requests.get(OFFICE_WEB_APP_BASE_URL,
                    cert=OFFICE_WEB_APP_CLIENT_PEM,
                    verify=OFFICE_WEB_APP_SERVER_CA)
            else:
                xml = requests.get(OFFICE_WEB_APP_BASE_URL, verify=OFFICE_WEB_APP_SERVER_CA)
        except Exception as e:
            logger.error(e)
            return None

        try:
            root = ET.fromstring(xml.content)
        except Exception as e:
            logger.error(e)
            return None

        for action in root.getiterator('action'):
            attr = action.attrib
            ext = attr.get('ext')
            name = attr.get('name')
            urlsrc = attr.get('urlsrc')

            if ext and name and urlsrc:

                tmp_action_url = re.sub(r'<.*>', '', urlsrc)
                tmp_wopi_key = generate_discovery_cache_key(name, ext)
                cache.set(tmp_wopi_key, tmp_action_url,
                        OFFICE_WEB_APP_DISCOVERY_EXPIRATION)

                if wopi_key == tmp_wopi_key:
                    action_url = tmp_action_url
            else:
                continue

    if not action_url:
        # can not get action_url from hosting discovery page
        return None

    # generate full action url
      # 6.04 debug
      # full_file_info = '_'.join([request_user, repo_id, file_path])
      full_file_info = '_'.join([repo_id, file_path])
      # end 6.04 debug
    fake_file_id = hashlib.sha1(full_file_info.encode('utf8')).hexdigest()

    base_url = get_site_scheme_and_netloc()
    check_file_info_endpoint = reverse('WOPIFilesView', args=[fake_file_id])
    WOPISrc = urlparse.urljoin(base_url, check_file_info_endpoint)

    query_dict = {'WOPISrc': WOPISrc}
    if action_url[-1] in ('?', '&'):
        full_action_url = action_url + urllib.urlencode(query_dict)
    elif '?' in action_url:
        full_action_url = action_url + '&' + urllib.urlencode(query_dict)
    else:
        full_action_url = action_url + '?' + urllib.urlencode(query_dict)

    # generate access token
    file_info = (request_user, repo_id, file_path)
    # 6.04 debug
      # access_token = uuid.uuid4()
      
      # collabora office only allowed alphanumeric and _
      uid = uuid.uuid4()
      access_token = uid.hex
      # end 6.04 debug
      
      key = generate_access_token_cache_key(access_token)
    cache.set(key, file_info, WOPI_ACCESS_TOKEN_EXPIRATION)
    
    
    # access_token_ttl property tells office web app
    # when access token expires
    expire_sec = WOPI_ACCESS_TOKEN_EXPIRATION
    expiration= timezone.now() + relativedelta(seconds=expire_sec)
    milliseconds_ttl = time.mktime(expiration.timetuple()) * 1000
    access_token_ttl = int(milliseconds_ttl)

    wopi_dict = {}
    wopi_dict['action_url'] = full_action_url
    wopi_dict['access_token'] = access_token
    wopi_dict['access_token_ttl'] = access_token_ttl

    return wopi_dict

views.py

# Copyright (c) 2012-2016 Seafile Ltd.
import os
import json
import logging
import urllib2
import requests
import hashlib
# 6.04 debug
import urlparse
# end 6.04 debug

from rest_framework.views import APIView

from django.http import HttpResponse
from django.core.cache import cache

from pysearpc import SearpcError
from seaserv import seafile_api

from seahub.views import check_file_lock
from seahub.utils import gen_inner_file_get_url, \
    gen_file_upload_url, get_file_type_and_ext
from seahub.base.templatetags.seahub_tags import email2nickname

# 6.04 debug
#from seahub.settings import FILE_LOCK_EXPIRATION_DAYS
from seahub.settings import FILE_LOCK_EXPIRATION_DAYS, SITE_ROOT
# end 6.04 debug

from .utils import get_file_info_by_token

from .settings import WOPI_ACCESS_TOKEN_EXPIRATION, \
    ENABLE_OFFICE_WEB_APP_EDIT, OFFICE_WEB_APP_EDIT_FILE_EXTENSION

logger = logging.getLogger(__name__)
json_content_type = 'application/json; charset=utf-8'

class WOPIFilesView(APIView):

    def get(self, request, file_id, format=None):
        """ WOPI endpoint for check file info
        """

        token = request.GET.get('access_token', None)
        request_user, repo_id, file_path = get_file_info_by_token(token)

        if not request_user or not repo_id or not file_path:
            logger.error('access_token invalid.')
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        repo = seafile_api.get_repo(repo_id)
        if not repo:
            logger.error('Library %s not found.') % repo_id
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        obj_id = seafile_api.get_file_id_by_path(repo_id, file_path)
        if not obj_id:
            logger.error('File %s not found.') % file_path
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        try:
            file_size = seafile_api.get_file_size(repo.store_id,
                                                  repo.version,
                                                  obj_id)
        except SearpcError as e:
            logger.error(e)
            return HttpResponse(json.dumps({}), status=500,
                                content_type=json_content_type)

        if file_size == -1:
            logger.error('File %s not found.') % file_path
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        result = {}
        result['BaseFileName'] = os.path.basename(file_path)
            # 6.04 debug
        # result['OwnerId'] = seafile_api.get_repo_owner(repo_id)
        # result['UserId'] = request_user
        # result['UserFriendlyName'] = email2nickname(request_user)
            # end 6.04 debug
        
            result['Size'] = file_size
        # used for office web app cache
        result['Version'] = obj_id

            # 6.04 debug
        # for use of collobora office
            result['UserFriendlyName'] = email2nickname(request_user)
            result['OwnerId'] = seafile_api.get_repo_owner(repo_id)
            result['UserId'] = request_user
            absolute_uri = request.build_absolute_uri('/')
            result['PostMessageOrigin'] = urlparse.urljoin(absolute_uri, SITE_ROOT).strip('/')
            result['HidePrintOption'] = False
            result['HideSaveOption'] = False
            result['HideExportOption'] = False
            result['EnableOwnerTermination'] = True
            # end 6.04 debug
        
        filename = os.path.basename(file_path)
        filetype, fileext = get_file_type_and_ext(filename)
        is_locked, locked_by_me = check_file_lock(repo_id,
                file_path, request_user)
        perm = seafile_api.check_permission_by_path(repo_id,
                file_path, request_user)

        if ENABLE_OFFICE_WEB_APP_EDIT and not repo.encrypted and \
            perm == 'rw' and ((not is_locked) or (is_locked and locked_by_me)) and \
                fileext in OFFICE_WEB_APP_EDIT_FILE_EXTENSION:

            result['SupportsLocks'] = True
            result['SupportsUpdate'] = True
            result['UserCanWrite'] = True

        return HttpResponse(json.dumps(result), status=200,
                            content_type=json_content_type)

    def post(self, request, file_id, format=None):

        token = request.GET.get('access_token', None)
        request_user, repo_id, file_path = get_file_info_by_token(token)

        # check basic file info
        if not request_user or not repo_id or not file_path:
            logger.error('access_token invalid.')
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        repo = seafile_api.get_repo(repo_id)
        if not repo:
            logger.error('Library %s not found.') % repo_id
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        obj_id = seafile_api.get_file_id_by_path(repo_id, file_path)
        if not obj_id:
            logger.error('File %s not found.') % file_path
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        # check file lock info
        is_locked, locked_by_request_user = check_file_lock(repo_id,
            file_path, request_user)

        # action according to HTTP_X_WOPI_OVERRIDE header from OWA
        x_wopi_override = request.META.get('HTTP_X_WOPI_OVERRIDE', None)
        if not x_wopi_override:
            return HttpResponse(json.dumps({'error_msg': 'HTTP_X_WOPI_OVERRIDE missing'}),
                    status=401, content_type=json_content_type)

        file_path_hash = hashlib.sha256(file_path).hexdigest()
        key_locked_by_OWA = '_'.join(['WOPI_LOCK',
                repo_id, 'file_path_hash', file_path_hash, 'locked_by_OWA'])

        if x_wopi_override in ('LOCK', 'REFRESH_LOCK'):
            if not is_locked:
                seafile_api.lock_file(repo_id, file_path, request_user,
                    FILE_LOCK_EXPIRATION_DAYS)
                cache.set(key_locked_by_OWA, True, WOPI_ACCESS_TOKEN_EXPIRATION)

        elif x_wopi_override == 'UNLOCK':
            if is_locked and cache.get(key_locked_by_OWA):
                seafile_api.unlock_file(repo_id, file_path.lstrip('/'))
                cache.delete(key_locked_by_OWA)

        else:
            return HttpResponse(json.dumps({'error_msg': 'HTTP_X_WOPI_OVERRIDE invalid'}),
                    status=401, content_type=json_content_type)

        return HttpResponse()

class WOPIFilesContentsView(APIView):

    def get(self, request, file_id, format=None):
        """ WOPI endpoint for get file content
        """

        token = request.GET.get('access_token', None)
        request_user, repo_id, file_path = get_file_info_by_token(token=token)

        if not request_user or not repo_id or not file_path:
            logger.error('access_token invalid.')
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        repo = seafile_api.get_repo(repo_id)
        if not repo:
            logger.error('Library %s not found.') % repo_id
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        obj_id = seafile_api.get_file_id_by_path(repo_id, file_path)
        if not obj_id:
            logger.error('File %s not found.') % file_path
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        file_name = os.path.basename(file_path)
        try:
            fileserver_token = seafile_api.get_fileserver_access_token(repo_id,
                                       obj_id, 'view', '', use_onetime = False)
        except SearpcError as e:
            logger.error(e)
            return HttpResponse(json.dumps({}), status=500,
                                content_type=json_content_type)

        inner_path = gen_inner_file_get_url(fileserver_token, file_name)

        try:
            file_content = urllib2.urlopen(inner_path).read()
        except urllib2.URLError as e:
            logger.error(e)
            return HttpResponse(json.dumps({}), status=500,
                                content_type=json_content_type)

        return HttpResponse(file_content, content_type="application/octet-stream")

    def post(self, request, file_id, format=None):

        token = request.GET.get('access_token', None)
        request_user, repo_id, file_path = get_file_info_by_token(token=token)

        if not request_user or not repo_id or not file_path:
            logger.error('access_token invalid.')
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        repo = seafile_api.get_repo(repo_id)
        if not repo:
            logger.error('Library %s not found.') % repo_id
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        obj_id = seafile_api.get_file_id_by_path(repo_id, file_path)
        if not obj_id:
            logger.error('File %s not found.') % file_path
            return HttpResponse(json.dumps({}), status=401,
                                content_type=json_content_type)

        try:
            file_obj = request.read()

            # get file update url
            token = seafile_api.get_fileserver_access_token(repo_id, 'dummy', 'update', request_user)
            update_url = gen_file_upload_url(token, 'update-api')

            # update file
            files = {
                'file': file_obj,
                'file_name': os.path.basename(file_path),
                'target_file': file_path,
            }
            requests.post(update_url, files=files)
        except Exception as e:
            logger.error(e)
            return HttpResponse(json.dumps({}), status=500,
                                content_type=json_content_type)

        return HttpResponse(json.dumps({}), status=200,
                            content_type=json_content_type)

lian · December 19, 2016, 2:14am

Maybe you should install the Python requests module

sudo pip install requests

BTW, a new pro version with the codes change above will be released soon.

daniel.pan · December 19, 2016, 9:43am

I have just uploaded version pro 6.0.5, you can test with this version.

gauburtin · December 19, 2016, 11:06am

Thank you @daniel.pan

I just updated seafile to 6.05 but there is no connection between seafile and collabora when clicking on a office file.

On Collabora/Code server :

I have no log on docker process
I have no access log on nginx

On Seafile server :

I have no log on Apache
I only have these logs on seahub.log (ddd.campus-condorcet.fr is collabora server)

2016-12-19 18:47:24,314 [INFO] urllib3.connectionpool:697 _new_conn Starting new HTTPS connection (1): ddd.campus-condorcet.fr
2016-12-19 18:47:29,812 [INFO] urllib3.connectionpool:697 _new_conn Starting new HTTPS connection (1): ddd.campus-condorcet.fr
2016-12-19 18:47:35,853 [INFO] urllib3.connectionpool:697 _new_conn Starting new HTTPS connection (1): ddd.campus-condorcet.fr
2016-12-19 18:47:45,177 [INFO] urllib3.connectionpool:697 _new_conn Starting new HTTPS connection (1): ddd.campus-condorcet.fr
2016-12-19 18:47:48,981 [INFO] urllib3.connectionpool:697 _new_conn Starting new HTTPS connection (1): ddd.campus-condorcet.fr

Is there any effective way to debug Seafile -> Collabora connection ?

Regards

daniel.pan · December 19, 2016, 11:14am

This is the workflow for MS Office webapp:

*(seahub->browser) Seahub will generate a page containing an iframe and send it to the browser

(browser->office web app) With the iframe, the browser will try to load the file preview page from the office web app
(office web app->seahub) Office web app receives the request and sends a request to Seahub to get the file content
(office web app->browser) Office web app sends the file preview page to the browser.

For Collabora Online, the process is the same. Can you check the requests using Chrome/Firefox debug mode to see which step is wrong?

gauburtin · December 19, 2016, 12:41pm

I’m not sure that even the first step occurs.

When i click on a .doc file, here is the debug log (Firefox console)

GET 
https://server.domain.fr/lib/5144d4da-3063-4feb-b820-c7f4c5c9296f/file/collabora/2016-70_Comite_%20achats.doc [HTTP/1.1 200 OK 310 ms]
Unchecked lastError value: Error: Could not establish connection. Receiving end does not exist.  ExtensionUtils.jsm:354
GET 
https://server.domain.fr/seafmedia/assets/css/bootstrap.min.b00faad199b5.css [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/css/seahub.min.css [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/css/file_view_extra.css [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/custom/css/cc.min.css [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/js/jquery-1.12.1.min.js [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/assets/scripts/lib/jquery.simplemodal.67fb20a63282.js [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/assets/scripts/lib/jquery.ui.tabs.7406a3c5d2e3.js [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/js/jq.min.js [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/js/base.js [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/assets/scripts/lib/underscore.1dccc53d7339.js [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/assets/scripts/lib/moment-with-locales.c4492e6216cc.js [HTTP/1.1 200 OK 0 ms]
GET 
https://server.domain.fr/seafmedia/assets/scripts/lib/marked.min.c2a88705e206.js [HTTP/1.1 200 OK 0 ms]
Unchecked lastError value: Error: Script returned non-structured-clonable data  ExtensionUtils.jsm:354
Sending message that cannot be cloned. Are you trying to send an XPCOM object?  MessageChannel.jsm:504:4
Sending message that cannot be cloned. Are you trying to send an XPCOM object?  MessageChannel.jsm:504:4
Sending message that cannot be cloned. Are you trying to send an XPCOM object? MessageChannel.jsm:504:4
Invalid chrome URI: /

gauburtin · December 19, 2016, 1:38pm

Ok, i found the problem

The certificate installed on collabora was corrupted.

wget https://ddd.campus-condorcet.fr/hosting/discovery
--2016-12-19 14:19:58--  https://ddd.campus-condorcet.fr/hosting/discovery
Résolution de ddd.campus-condorcet.fr (ddd.campus-condorcet.fr)… 134.158.33.224
Connexion à ddd.campus-condorcet.fr (ddd.campus-condorcet.fr)|134.158.33.224|:443… connecté.
Erreur : le certificat de « ddd.campus-condorcet.fr » n'est pas de confiance.
Erreur : le certificat de « ddd.campus-condorcet.fr » n'est pas d'un émetteur connu.

It now works fine, excepted for .doc|.xls|.ppt files.

I don’t see these file formats in collabora hosting/discovery xml output

Do you know how to add these file extension to Collabora config in docker install ?

My seafile config options includes .doc|.xls|.ppt files

# List of file formats that you want to view through LibreOffice Online
# You can change this value according to your preferences
# And of course you should make sure your LibreOffice Online supports to preview
# the files with the specified extensions
OFFICE_WEB_APP_FILE_EXTENSION = ('ods', 'xls', 'xlsb', 'xlsm', 'xlsx','ppsx', 'ppt',
    'pptm', 'pptx', 'doc', 'docm', 'docx')

# Enable edit files through LibreOffice Online
ENABLE_OFFICE_WEB_APP_EDIT = True

# types of files should be editable through LibreOffice Online
OFFICE_WEB_APP_EDIT_FILE_EXTENSION = ('ods', 'xls', 'xlsb', 'xlsm', 'xlsx','ppsx', 'ppt',
    'pptm', 'pptx', 'doc', 'docm', 'docx')

gauburtin · December 19, 2016, 2:29pm

Hi @daniel.pan ,

Now managing to edit files with collabora trough seafile, i have a more interesting question.

I noticed that if one (docx) file is opened by two differnt users from seafile, some buggy behaviours occurs.

Sometimes, the file does not open in collabora (grey window with cursor). You have to quit the file, quit the folder and click on the file many times to open it clearly in collabora.

The updates of one user are cleared by the updates of another user (no merge).
I wonder if you can have concurrent (synchronous merge) editing between seafile and collabora.
It seemes that Seafile remembers the last logged in user and creates a new version (see the history) when savinbg in collabora that erases the last updates of the other user.

Am I right ? Is there any way to enhance the behaviour. For the moment, it’s quite dangerous to use it in a parallel way, because some data seems to be lost.

Assuming that parallel editing is complicated by the version management of seafile (which you may confirm or not), the only way to prevent data loss is to use the locking feature from the web UI.
But, as users may not unlock files they edited, is there any option to unlock files automatically, like there is for Office Automatic locking from the client (see this thread : https://forum.seafile.de/t/file-lock-issues/5204/28), and to set a shorter delay than 12 hours?

Regards

lian · December 20, 2016, 2:12am

These old file formats are not supported by the Collabora, so there is nothing we can do.

lian · December 20, 2016, 2:36am

We have added the locking feature already:

the file will be locked automatically if some user A opens and edits it.
then if another user B opens the same file, user B can only preview (not able to edit) it.
after user A finishes his/her work and closes the file, the file will be unlocked automatically.

And of course, the locking feature has been tested when editing file through Office Online 2016, it works well.

As for the CollaboraOffice, we noticed the behaviours you mentioned above too, we are now working on to check why this happened.

gauburtin · December 20, 2016, 8:53am

Hi,
For my point of view, these formats will become old when Office willl no longer support them.
But as you mentioned, you are not concerned.
Regards

gauburtin · December 20, 2016, 8:54am

Ok, glad to hear you’re working on it !